feat: define a default duration for metrics update

Signed-off-by: Mario Constanti <mario.constanti@mercedes-benz.com>
This commit is contained in:
Mario Constanti 2024-02-20 10:23:48 +01:00
parent 97f172eb51
commit 3e025dda2f
5 changed files with 57 additions and 8 deletions

View file

@ -220,7 +220,7 @@ func main() {
router = routers.WithMetricsRouter(router, cfg.Metrics.DisableAuth, metricsMiddleware)
slog.InfoContext(ctx, "start metrics collection")
runnerMetrics.CollectObjectMetric(ctx, runner, time.NewTicker(cfg.Metrics.Period))
runnerMetrics.CollectObjectMetric(ctx, runner, time.NewTicker(cfg.Metrics.Duration()))
}
if cfg.Default.DebugServer {

View file

@ -456,9 +456,39 @@ func (t *TLSConfig) Validate() error {
}
type Metrics struct {
DisableAuth bool `toml:"disable_auth" json:"disable-auth"`
Enable bool `toml:"enable" json:"enable"`
Period time.Duration `toml:"period" json:"period"`
// DisableAuth defines if the API endpoint will be protected by
// JWT authentication
DisableAuth bool `toml:"disable_auth" json:"disable-auth"`
// Enable define if the API endpoint for metrics collection will
// be enabled
Enable bool `toml:"enable" json:"enable"`
// Period defines the internal period at which internal metrics are getting updated
// and propagated to the /metrics endpoint
Period time.Duration `toml:"period" json:"period"`
}
// ParseDuration parses the configured duration and returns a time.Duration of 0
// if the duration is invalid.
func (m *Metrics) ParseDuration() (time.Duration, error) {
duration, err := time.ParseDuration(fmt.Sprint(m.Period))
if err != nil {
return 0, err
}
return duration, nil
}
// Duration returns the configured duration or the default duration if no value
// is configured or the configured value is invalid.
func (m *Metrics) Duration() time.Duration {
duration, err := m.ParseDuration()
if err != nil {
slog.With(slog.Any("error", err)).Error(fmt.Sprintf("defined duration %s is invalid", m.Period))
}
if duration == 0 {
slog.Debug(fmt.Sprintf("using default duration %s for metrics update interval", appdefaults.DefaultMetricsUpdateInterval))
return appdefaults.DefaultMetricsUpdateInterval
}
return duration
}
// APIServer holds configuration for the API server

View file

@ -60,15 +60,27 @@ Metrics are disabled by default. To enable them, add the following to your confi
```toml
[metrics]
# Toggle metrics. If set to false, the API endpoint for metrics collection will
# be disabled.
enable = true
# Toggle to disable authentication (not recommended) on the metrics endpoint.
# If you do disable authentication, I encourage you to put a reverse proxy in front
# of garm and limit which systems can access that particular endpoint. Ideally, you
# would enable some kind of authentication using the reverse proxy, if the built-in auth
# is not sufficient for your needs.
disable_auth = false
#
# Default: false
disable_auth = true
# Toggle metrics. If set to false, the API endpoint for metrics collection will
# be disabled.
#
# Default: false
enable = true
# period is the time interval when the /metrics endpoint will update internal metrics about
# controller specific objects (e.g. runners, pools, etc.)
#
# Default: "60s"
period = "30s"
```
You can choose to disable authentication if you wish, however it's not terribly difficult to set up, so I generally advise against disabling it.

View file

@ -10,6 +10,10 @@ var (
Subsystem: metricsPoolSubsystem,
Name: "info",
Help: "Info of the pool",
// ConstLabels: prometheus.Labels{
// "controller_id": metricControllerValue,
// "hostname": metricHostnameValue,
// },
}, []string{"id", "image", "flavor", "prefix", "os_type", "os_arch", "tags", "provider", "pool_owner", "pool_type"})
PoolStatus = prometheus.NewGaugeVec(prometheus.GaugeOpts{

View file

@ -27,4 +27,7 @@ const (
// uploadBaseURL is the default URL for guthub uploads.
GithubDefaultUploadBaseURL = "https://uploads.github.com/"
// metrics data update interval
DefaultMetricsUpdateInterval = 60 * time.Second
)