metrics: fix review findings
This commit is contained in:
parent
ee659f509f
commit
6a032bfaa2
12 changed files with 141 additions and 115 deletions
|
|
@ -16,11 +16,9 @@ package controllers
|
|||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"garm/apiserver/params"
|
||||
|
|
@ -36,12 +34,7 @@ import (
|
|||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
func NewAPIController(r *runner.Runner, auth *auth.Authenticator, hub *wsWriter.Hub) (*APIController, error) {
|
||||
id, err := r.GetControllerID()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "getting controller ID")
|
||||
}
|
||||
|
||||
func NewAPIController(r *runner.Runner, auth *auth.Authenticator, hub *wsWriter.Hub, controllerInfo runnerParams.ControllerInfo) (*APIController, error) {
|
||||
return &APIController{
|
||||
r: r,
|
||||
auth: auth,
|
||||
|
|
@ -50,17 +43,16 @@ func NewAPIController(r *runner.Runner, auth *auth.Authenticator, hub *wsWriter.
|
|||
ReadBufferSize: 1024,
|
||||
WriteBufferSize: 16384,
|
||||
},
|
||||
id: id.String(),
|
||||
controllerInfo: controllerInfo,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type APIController struct {
|
||||
r *runner.Runner
|
||||
auth *auth.Authenticator
|
||||
hub *wsWriter.Hub
|
||||
upgrader websocket.Upgrader
|
||||
// holds this controller's id
|
||||
id string
|
||||
r *runner.Runner
|
||||
auth *auth.Authenticator
|
||||
hub *wsWriter.Hub
|
||||
upgrader websocket.Upgrader
|
||||
controllerInfo runnerParams.ControllerInfo
|
||||
}
|
||||
|
||||
func handleError(w http.ResponseWriter, err error) {
|
||||
|
|
@ -97,22 +89,6 @@ func handleError(w http.ResponseWriter, err error) {
|
|||
}
|
||||
}
|
||||
|
||||
// GetControllerInfo returns means to identify this very garm instance.
|
||||
// This is very useful for debugging and monitoring purposes.
|
||||
func (a *APIController) GetControllerInfo() (hostname, controllerId string) {
|
||||
|
||||
// the hostname is neither fixed nor in our control
|
||||
// so we get it every time to avoid confusion
|
||||
var err error
|
||||
hostname, err = os.Hostname()
|
||||
if err != nil {
|
||||
log.Printf("error getting hostname: %q", err)
|
||||
return "", ""
|
||||
}
|
||||
|
||||
return hostname, a.id
|
||||
}
|
||||
|
||||
// metric to count total webhooks received
|
||||
// at this point the webhook is not yet authenticated and
|
||||
// we don't know if it's meant for us or not
|
||||
|
|
@ -139,23 +115,26 @@ func (a *APIController) handleWorkflowJobEvent(w http.ResponseWriter, r *http.Re
|
|||
signature := r.Header.Get("X-Hub-Signature-256")
|
||||
hookType := r.Header.Get("X-Github-Hook-Installation-Target-Type")
|
||||
|
||||
hostname, controllerId := a.GetControllerInfo()
|
||||
controllerInfo, err := a.r.GetControllerInfo(r.Context())
|
||||
if err != nil {
|
||||
log.Printf("failed to get controller info for metics labels: %q", err)
|
||||
}
|
||||
|
||||
if err := a.r.DispatchWorkflowJob(hookType, signature, body); err != nil {
|
||||
if errors.Is(err, gErrors.ErrNotFound) {
|
||||
webhooksReceived.WithLabelValues("false", "owner_unknown", hostname, controllerId).Inc()
|
||||
webhooksReceived.WithLabelValues("false", "owner_unknown", controllerInfo.Hostname, controllerInfo.ControllerID.String()).Inc()
|
||||
log.Printf("got not found error from DispatchWorkflowJob. webhook not meant for us?: %q", err)
|
||||
return
|
||||
} else if strings.Contains(err.Error(), "signature") { // TODO: check error type
|
||||
webhooksReceived.WithLabelValues("false", "signature_invalid", hostname, controllerId).Inc()
|
||||
webhooksReceived.WithLabelValues("false", "signature_invalid", controllerInfo.Hostname, controllerInfo.ControllerID.String()).Inc()
|
||||
} else {
|
||||
webhooksReceived.WithLabelValues("false", "unknown", hostname, controllerId).Inc()
|
||||
webhooksReceived.WithLabelValues("false", "unknown", controllerInfo.Hostname, controllerInfo.ControllerID.String()).Inc()
|
||||
}
|
||||
|
||||
handleError(w, err)
|
||||
return
|
||||
}
|
||||
webhooksReceived.WithLabelValues("true", "", hostname, controllerId).Inc()
|
||||
webhooksReceived.WithLabelValues("true", "", controllerInfo.Hostname, controllerInfo.ControllerID.String()).Inc()
|
||||
}
|
||||
|
||||
func (a *APIController) CatchAll(w http.ResponseWriter, r *http.Request) {
|
||||
|
|
@ -225,14 +204,21 @@ func (a *APIController) NotFoundHandler(w http.ResponseWriter, r *http.Request)
|
|||
func (a *APIController) MetricsTokenHandler(w http.ResponseWriter, r *http.Request) {
|
||||
ctx := r.Context()
|
||||
|
||||
if !auth.IsAdmin(ctx) {
|
||||
handleError(w, gErrors.ErrUnauthorized)
|
||||
return
|
||||
}
|
||||
|
||||
token, err := a.auth.GetJWTMetricsToken(ctx)
|
||||
if err != nil {
|
||||
handleError(w, err)
|
||||
return
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
fmt.Fprintf(w, `{"token": "%s"}`, token)
|
||||
err = json.NewEncoder(w).Encode(runnerParams.JWTResponse{Token: token})
|
||||
if err != nil {
|
||||
log.Printf("failed to encode response: %q", err)
|
||||
}
|
||||
}
|
||||
|
||||
// LoginHandler returns a jwt token
|
||||
|
|
|
|||
|
|
@ -4,45 +4,77 @@ import (
|
|||
"log"
|
||||
|
||||
"garm/auth"
|
||||
"garm/runner"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
type GarmCollector struct {
|
||||
healthMetric *prometheus.Desc
|
||||
instanceMetric *prometheus.Desc
|
||||
apiController *APIController
|
||||
runner *runner.Runner
|
||||
}
|
||||
|
||||
func NewGarmCollector(a *APIController) *GarmCollector {
|
||||
func NewGarmCollector(r *runner.Runner) *GarmCollector {
|
||||
return &GarmCollector{
|
||||
apiController: a,
|
||||
runner: r,
|
||||
instanceMetric: prometheus.NewDesc(
|
||||
"garm_runner_status",
|
||||
"Status of the runner",
|
||||
[]string{"name", "status", "runner_status", "pool", "pool_type", "hostname", "controller_id"}, nil,
|
||||
)}
|
||||
[]string{"name", "status", "runner_status", "pool_owner", "pool_type", "pool_id", "hostname", "controller_id"}, nil,
|
||||
),
|
||||
healthMetric: prometheus.NewDesc(
|
||||
"garm_health",
|
||||
"Health of the runner",
|
||||
[]string{"hostname", "controller_id"}, nil,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *GarmCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- c.instanceMetric
|
||||
ch <- c.healthMetric
|
||||
}
|
||||
|
||||
func (c *GarmCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
c.CollectInstanceMetric(ch)
|
||||
controllerInfo, err := c.runner.GetControllerInfo(auth.GetAdminContext())
|
||||
if err != nil {
|
||||
log.Printf("error on fetching controllerInfo: %s", err)
|
||||
// continue anyway
|
||||
}
|
||||
|
||||
c.CollectInstanceMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String())
|
||||
c.CollectHealthMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String())
|
||||
}
|
||||
|
||||
func (c *GarmCollector) CollectHealthMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) {
|
||||
m, err := prometheus.NewConstMetric(
|
||||
c.healthMetric,
|
||||
prometheus.GaugeValue,
|
||||
1,
|
||||
hostname,
|
||||
controllerID,
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("error on creating health metric: %s", err)
|
||||
return
|
||||
}
|
||||
ch <- m
|
||||
}
|
||||
|
||||
// CollectInstanceMetric collects the metrics for the runner instances
|
||||
// reflecting the statuses and the pool they belong to.
|
||||
func (c *GarmCollector) CollectInstanceMetric(ch chan<- prometheus.Metric) {
|
||||
func (c *GarmCollector) CollectInstanceMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) {
|
||||
|
||||
ctx := auth.GetAdminContext()
|
||||
|
||||
instances, err := c.apiController.r.ListAllInstances(ctx)
|
||||
instances, err := c.runner.ListAllInstances(ctx)
|
||||
if err != nil {
|
||||
log.Printf("cannot collect metrics, listing instances: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
pools, err := c.apiController.r.ListAllPools(ctx)
|
||||
pools, err := c.runner.ListAllPools(ctx)
|
||||
if err != nil {
|
||||
log.Printf("listing pools: %s", err)
|
||||
// continue anyway
|
||||
|
|
@ -58,23 +90,21 @@ func (c *GarmCollector) CollectInstanceMetric(ch chan<- prometheus.Metric) {
|
|||
if pool.EnterpriseName != "" {
|
||||
poolNames[pool.ID] = poolInfo{
|
||||
Name: pool.EnterpriseName,
|
||||
Type: "enterprise",
|
||||
Type: string(pool.PoolType()),
|
||||
}
|
||||
} else if pool.OrgName != "" {
|
||||
poolNames[pool.ID] = poolInfo{
|
||||
Name: pool.OrgName,
|
||||
Type: "organization",
|
||||
Type: string(pool.PoolType()),
|
||||
}
|
||||
} else {
|
||||
poolNames[pool.ID] = poolInfo{
|
||||
Name: pool.RepoName,
|
||||
Type: "repository",
|
||||
Type: string(pool.PoolType()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hostname, controllerID := c.apiController.GetControllerInfo()
|
||||
|
||||
for _, instance := range instances {
|
||||
|
||||
m, err := prometheus.NewConstMetric(
|
||||
|
|
@ -86,6 +116,7 @@ func (c *GarmCollector) CollectInstanceMetric(ch chan<- prometheus.Metric) {
|
|||
string(instance.RunnerStatus),
|
||||
poolNames[instance.PoolID].Name,
|
||||
poolNames[instance.PoolID].Type,
|
||||
instance.PoolID,
|
||||
hostname,
|
||||
controllerID,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -19,17 +19,28 @@ import (
|
|||
"net/http"
|
||||
|
||||
"github.com/gorilla/mux"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
|
||||
"garm/apiserver/controllers"
|
||||
"garm/auth"
|
||||
"garm/config"
|
||||
"garm/util"
|
||||
)
|
||||
|
||||
func NewAPIRouter(han *controllers.APIController, logWriter io.Writer, authMiddleware, initMiddleware, instanceMiddleware auth.Middleware) *mux.Router {
|
||||
func NewAPIRouter(han *controllers.APIController, logWriter io.Writer, cfg *config.Config, authMiddleware, initMiddleware, instanceMiddleware, metricsMiddlerware auth.Middleware) *mux.Router {
|
||||
router := mux.NewRouter()
|
||||
logMiddleware := util.NewLoggingMiddleware(logWriter)
|
||||
router.Use(logMiddleware)
|
||||
|
||||
if cfg.Metrics.Enable {
|
||||
metricsRouter := router.PathPrefix("/metrics").Subrouter()
|
||||
if !cfg.Metrics.DisableAuth {
|
||||
metricsRouter.Use(metricsMiddlerware.Middleware)
|
||||
}
|
||||
metricsRouter.Handle("/", promhttp.Handler()).Methods("GET", "OPTIONS")
|
||||
metricsRouter.Handle("", promhttp.Handler()).Methods("GET", "OPTIONS")
|
||||
}
|
||||
|
||||
// Handles github webhooks
|
||||
webhookRouter := router.PathPrefix("/webhooks").Subrouter()
|
||||
webhookRouter.PathPrefix("/").Handler(http.HandlerFunc(han.CatchAll))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue