From 3e025dda2fd1c22e97ccac10f7c48ec06bd74a7f Mon Sep 17 00:00:00 2001 From: Mario Constanti Date: Tue, 20 Feb 2024 10:23:48 +0100 Subject: [PATCH] feat: define a default duration for metrics update Signed-off-by: Mario Constanti --- cmd/garm/main.go | 2 +- config/config.go | 36 ++++++++++++++++++++++++++++++--- doc/config_metrics.md | 20 ++++++++++++++---- metrics/pool.go | 4 ++++ util/appdefaults/appdefaults.go | 3 +++ 5 files changed, 57 insertions(+), 8 deletions(-) diff --git a/cmd/garm/main.go b/cmd/garm/main.go index 59686fd1..327f09bd 100644 --- a/cmd/garm/main.go +++ b/cmd/garm/main.go @@ -220,7 +220,7 @@ func main() { router = routers.WithMetricsRouter(router, cfg.Metrics.DisableAuth, metricsMiddleware) slog.InfoContext(ctx, "start metrics collection") - runnerMetrics.CollectObjectMetric(ctx, runner, time.NewTicker(cfg.Metrics.Period)) + runnerMetrics.CollectObjectMetric(ctx, runner, time.NewTicker(cfg.Metrics.Duration())) } if cfg.Default.DebugServer { diff --git a/config/config.go b/config/config.go index 7ab850a9..a12b91a9 100644 --- a/config/config.go +++ b/config/config.go @@ -456,9 +456,39 @@ func (t *TLSConfig) Validate() error { } type Metrics struct { - DisableAuth bool `toml:"disable_auth" json:"disable-auth"` - Enable bool `toml:"enable" json:"enable"` - Period time.Duration `toml:"period" json:"period"` + // DisableAuth defines if the API endpoint will be protected by + // JWT authentication + DisableAuth bool `toml:"disable_auth" json:"disable-auth"` + // Enable define if the API endpoint for metrics collection will + // be enabled + Enable bool `toml:"enable" json:"enable"` + // Period defines the internal period at which internal metrics are getting updated + // and propagated to the /metrics endpoint + Period time.Duration `toml:"period" json:"period"` +} + +// ParseDuration parses the configured duration and returns a time.Duration of 0 +// if the duration is invalid. +func (m *Metrics) ParseDuration() (time.Duration, error) { + duration, err := time.ParseDuration(fmt.Sprint(m.Period)) + if err != nil { + return 0, err + } + return duration, nil +} + +// Duration returns the configured duration or the default duration if no value +// is configured or the configured value is invalid. +func (m *Metrics) Duration() time.Duration { + duration, err := m.ParseDuration() + if err != nil { + slog.With(slog.Any("error", err)).Error(fmt.Sprintf("defined duration %s is invalid", m.Period)) + } + if duration == 0 { + slog.Debug(fmt.Sprintf("using default duration %s for metrics update interval", appdefaults.DefaultMetricsUpdateInterval)) + return appdefaults.DefaultMetricsUpdateInterval + } + return duration } // APIServer holds configuration for the API server diff --git a/doc/config_metrics.md b/doc/config_metrics.md index 8eaeb214..65ae1057 100644 --- a/doc/config_metrics.md +++ b/doc/config_metrics.md @@ -60,15 +60,27 @@ Metrics are disabled by default. To enable them, add the following to your confi ```toml [metrics] -# Toggle metrics. If set to false, the API endpoint for metrics collection will -# be disabled. -enable = true + # Toggle to disable authentication (not recommended) on the metrics endpoint. # If you do disable authentication, I encourage you to put a reverse proxy in front # of garm and limit which systems can access that particular endpoint. Ideally, you # would enable some kind of authentication using the reverse proxy, if the built-in auth # is not sufficient for your needs. -disable_auth = false +# +# Default: false +disable_auth = true + +# Toggle metrics. If set to false, the API endpoint for metrics collection will +# be disabled. +# +# Default: false +enable = true + +# period is the time interval when the /metrics endpoint will update internal metrics about +# controller specific objects (e.g. runners, pools, etc.) +# +# Default: "60s" +period = "30s" ``` You can choose to disable authentication if you wish, however it's not terribly difficult to set up, so I generally advise against disabling it. diff --git a/metrics/pool.go b/metrics/pool.go index 5803af90..764ad706 100644 --- a/metrics/pool.go +++ b/metrics/pool.go @@ -10,6 +10,10 @@ var ( Subsystem: metricsPoolSubsystem, Name: "info", Help: "Info of the pool", + // ConstLabels: prometheus.Labels{ + // "controller_id": metricControllerValue, + // "hostname": metricHostnameValue, + // }, }, []string{"id", "image", "flavor", "prefix", "os_type", "os_arch", "tags", "provider", "pool_owner", "pool_type"}) PoolStatus = prometheus.NewGaugeVec(prometheus.GaugeOpts{ diff --git a/util/appdefaults/appdefaults.go b/util/appdefaults/appdefaults.go index d0d86976..26faa667 100644 --- a/util/appdefaults/appdefaults.go +++ b/util/appdefaults/appdefaults.go @@ -27,4 +27,7 @@ const ( // uploadBaseURL is the default URL for guthub uploads. GithubDefaultUploadBaseURL = "https://uploads.github.com/" + + // metrics data update interval + DefaultMetricsUpdateInterval = 60 * time.Second )