feat: add new metrics
add info metrics about providers, enterprises, organizations, repositories and pools. Also expose most of the configurable pool information as metric like e.g. max Runners as garm_pool_max_runners Signed-off-by: Mario Constanti <mario.constanti@mercedes-benz.com>
This commit is contained in:
parent
a48ec0c0a8
commit
58e8b3454c
10 changed files with 579 additions and 96 deletions
|
|
@ -2,11 +2,55 @@
|
|||
|
||||
This is one of the features in GARM that I really love having. For one thing, it's community contributed and for another, it really adds value to the project. It allows us to create some pretty nice visualizations of what is happening with GARM.
|
||||
|
||||
At the moment there are only three meaningful metrics being collected, besides the default ones that the prometheus golang package enables by default. These are:
|
||||
## Common metrics
|
||||
|
||||
* `garm_health` - This is a gauge that is set to 1 if GARM is healthy and 0 if it is not. This is useful for alerting.
|
||||
* `garm_runner_status` - This is a gauge value that gives us details about the runners garm spawns
|
||||
* `garm_webhooks_received` - This is a counter that increments every time GARM receives a webhook from GitHub.
|
||||
| Metric name | Type | Labels | Description |
|
||||
|--------------------------|---------|-------------------------------------------------------------------|------------------------------------------------------------------------------------------------------|
|
||||
| `garm_health` | Gauge | `controller_id`=<controller id> <br>`name`=<hostname> | This is a gauge that is set to 1 if GARM is healthy and 0 if it is not. This is useful for alerting. |
|
||||
| `garm_webhooks_received` | Counter | `controller_id`=<controller id> <br>`name`=<hostname> | This is a counter that increments every time GARM receives a webhook from GitHub. |
|
||||
|
||||
## Enterprise metrics
|
||||
|
||||
| Metric name | Type | Labels | Description |
|
||||
|---------------------------------------|-------|-------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------|
|
||||
| `garm_enterprise_info` | Gauge | `id`=<enterprise id> <br>`name`=<enterprise name> | This is a gauge that is set to 1 and expose enterprise information |
|
||||
| `garm_enterprise_pool_manager_status` | Gauge | `id`=<enterprise id> <br>`name`=<enterprise name> <br>`running`=<true\|false> | This is a gauge that is set to 1 if the enterprise pool manager is running and set to 0 if not |
|
||||
|
||||
## Organization metrics
|
||||
|
||||
| Metric name | Type | Labels | Description |
|
||||
|-----------------------------------------|-------|-----------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------|
|
||||
| `garm_organization_info` | Gauge | `id`=<organization id> <br>`name`=<organization name> | This is a gauge that is set to 1 and expose organization information |
|
||||
| `garm_organization_pool_manager_status` | Gauge | `id`=<organization id> <br>`name`=<organization name> <br>`running`=<true\|false> | This is a gauge that is set to 1 if the organization pool manager is running and set to 0 if not |
|
||||
|
||||
## Repository metrics
|
||||
|
||||
| Metric name | Type | Labels | Description |
|
||||
|---------------------------------------|-------|-------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------|
|
||||
| `garm_repository_info` | Gauge | `id`=<repository id> <br>`name`=<repository name> | This is a gauge that is set to 1 and expose repository information |
|
||||
| `garm_repository_pool_manager_status` | Gauge | `id`=<repository id> <br>`name`=<repository name> <br>`running`=<true\|false> | This is a gauge that is set to 1 if the repository pool manager is running and set to 0 if not |
|
||||
|
||||
## Provider metrics
|
||||
|
||||
| Metric name | Type | Labels | Description |
|
||||
|----------------------|-------|-------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------|
|
||||
| `garm_provider_info` | Gauge | `description`=<provider description> <br>`name`=<provider name> <br>`type`=<internal\|external> | This is a gauge that is set to 1 and expose provider information |
|
||||
|
||||
## Pool metrics
|
||||
|
||||
| Metric name | Type | Labels | Description |
|
||||
|-------------------------------|-------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------|
|
||||
| `garm_pool_info` | Gauge | `flavor`=<flavor> <br>`id`=<pool id> <br>`image`=<image name> <br>`os_arch`=<defined OS arch> <br>`os_type`=<defined OS name> <br>`pool_owner`=<owner name> <br>`pool_type`=<repository\|organization\|enterprise> <br>`prefix`=<prefix> <br>`provider`=<provider name> <br>`tags`=<concatenated list of pool tags> <br> | This is a gauge that is set to 1 and expose pool information |
|
||||
| `garm_pool_status` | Gauge | `enabled`=<true\|false> <br>`id`=<pool id> | This is a gauge that is set to 1 if the pool is enabled and set to 0 if not |
|
||||
| `garm_pool_bootstrap_timeout` | Gauge | `id`=<pool id> | This is a gauge that is set to the pool bootstrap timeout |
|
||||
| `garm_pool_max_runners` | Gauge | `id`=<pool id> | This is a gauge that is set to the pool max runners |
|
||||
| `garm_pool_min_idle_runners` | Gauge | `id`=<pool id> | This is a gauge that is set to the pool min idle runners |
|
||||
|
||||
## Runner metrics
|
||||
|
||||
| Metric name | Type | Labels | Description |
|
||||
|----------------------|-------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------|
|
||||
| `garm_runner_status` | Gauge | `controller_id`=<controller id> <br>`hostname`=<hostname> <br>`name`=<runner name> <br>`pool_owner`=<owner name> <br>`pool_type`=<repository\|organization\|enterprise> <br>`provider`=<provider name> <br>`runner_status`=<running\|stopped\|error\|pending_delete\|deleting\|pending_create\|creating\|unknown> <br>`status`=<idle\|pending\|terminated\|installing\|failed\|active> <br> | This is a gauge value that gives us details about the runners garm spawns |
|
||||
|
||||
More metrics will be added in the future.
|
||||
|
||||
|
|
|
|||
50
metrics/enterprise.go
Normal file
50
metrics/enterprise.go
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"log"
|
||||
"strconv"
|
||||
|
||||
"github.com/cloudbase/garm/auth"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// CollectOrganizationMetric collects the metrics for the enterprise objects
|
||||
func (c *GarmCollector) CollectEnterpriseMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) {
|
||||
ctx := auth.GetAdminContext()
|
||||
|
||||
enterprises, err := c.runner.ListEnterprises(ctx)
|
||||
if err != nil {
|
||||
log.Printf("listing providers: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, enterprise := range enterprises {
|
||||
|
||||
enterpriseInfo, err := prometheus.NewConstMetric(
|
||||
c.enterpriseInfo,
|
||||
prometheus.GaugeValue,
|
||||
1,
|
||||
enterprise.Name, // label: name
|
||||
enterprise.ID, // label: id
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("cannot collect enterpriseInfo metric: %s", err)
|
||||
continue
|
||||
}
|
||||
ch <- enterpriseInfo
|
||||
|
||||
enterprisePoolManagerStatus, err := prometheus.NewConstMetric(
|
||||
c.enterprisePoolManagerStatus,
|
||||
prometheus.GaugeValue,
|
||||
bool2float64(enterprise.PoolManagerStatus.IsRunning),
|
||||
enterprise.Name, // label: name
|
||||
enterprise.ID, // label: id
|
||||
strconv.FormatBool(enterprise.PoolManagerStatus.IsRunning), // label: running
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("cannot collect enterprisePoolManagerStatus metric: %s", err)
|
||||
continue
|
||||
}
|
||||
ch <- enterprisePoolManagerStatus
|
||||
}
|
||||
}
|
||||
22
metrics/health.go
Normal file
22
metrics/health.go
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
func (c *GarmCollector) CollectHealthMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) {
|
||||
m, err := prometheus.NewConstMetric(
|
||||
c.healthMetric,
|
||||
prometheus.GaugeValue,
|
||||
1,
|
||||
hostname,
|
||||
controllerID,
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("error on creating health metric: %s", err)
|
||||
return
|
||||
}
|
||||
ch <- m
|
||||
}
|
||||
79
metrics/instance.go
Normal file
79
metrics/instance.go
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/cloudbase/garm/auth"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// CollectInstanceMetric collects the metrics for the runner instances
|
||||
// reflecting the statuses and the pool they belong to.
|
||||
func (c *GarmCollector) CollectInstanceMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) {
|
||||
ctx := auth.GetAdminContext()
|
||||
|
||||
instances, err := c.runner.ListAllInstances(ctx)
|
||||
if err != nil {
|
||||
log.Printf("cannot collect metrics, listing instances: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
pools, err := c.runner.ListAllPools(ctx)
|
||||
if err != nil {
|
||||
log.Printf("listing pools: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
type poolInfo struct {
|
||||
Name string
|
||||
Type string
|
||||
ProviderName string
|
||||
}
|
||||
|
||||
poolNames := make(map[string]poolInfo)
|
||||
for _, pool := range pools {
|
||||
if pool.EnterpriseName != "" {
|
||||
poolNames[pool.ID] = poolInfo{
|
||||
Name: pool.EnterpriseName,
|
||||
Type: string(pool.PoolType()),
|
||||
ProviderName: pool.ProviderName,
|
||||
}
|
||||
} else if pool.OrgName != "" {
|
||||
poolNames[pool.ID] = poolInfo{
|
||||
Name: pool.OrgName,
|
||||
Type: string(pool.PoolType()),
|
||||
ProviderName: pool.ProviderName,
|
||||
}
|
||||
} else {
|
||||
poolNames[pool.ID] = poolInfo{
|
||||
Name: pool.RepoName,
|
||||
Type: string(pool.PoolType()),
|
||||
ProviderName: pool.ProviderName,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, instance := range instances {
|
||||
|
||||
m, err := prometheus.NewConstMetric(
|
||||
c.instanceMetric,
|
||||
prometheus.GaugeValue,
|
||||
1,
|
||||
instance.Name, // label: name
|
||||
string(instance.Status), // label: status
|
||||
string(instance.RunnerStatus), // label: runner_status
|
||||
poolNames[instance.PoolID].Name, // label: pool_owner
|
||||
poolNames[instance.PoolID].Type, // label: pool_type
|
||||
instance.PoolID, // label: pool_id
|
||||
hostname, // label: hostname
|
||||
controllerID, // label: controller_id
|
||||
poolNames[instance.PoolID].ProviderName, // label: provider
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
log.Printf("cannot collect runner metric: %s", err)
|
||||
continue
|
||||
}
|
||||
ch <- m
|
||||
}
|
||||
}
|
||||
|
|
@ -11,6 +11,15 @@ import (
|
|||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const metricsNamespace = "garm_"
|
||||
const metricsRunnerSubsystem = "runner_"
|
||||
const metricsPoolSubsystem = "pool_"
|
||||
const metricsProviderSubsystem = "provider_"
|
||||
const metricsOrganizationSubsystem = "organization_"
|
||||
const metricsRepositorySubsystem = "repository_"
|
||||
const metricsEnterpriseSubsystem = "enterprise_"
|
||||
const metricsWebhookSubsystem = "webhook_"
|
||||
|
||||
var webhooksReceived *prometheus.CounterVec = nil
|
||||
|
||||
// RecordWebhookWithLabels will increment a webhook metric identified by specific
|
||||
|
|
@ -48,7 +57,7 @@ func RegisterCollectors(runner *runner.Runner) error {
|
|||
// at this point the webhook is not yet authenticated and
|
||||
// we don't know if it's meant for us or not
|
||||
webhooksReceived = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "garm_webhooks_received",
|
||||
Name: metricsNamespace + metricsWebhookSubsystem + "received",
|
||||
Help: "The total number of webhooks received",
|
||||
}, []string{"valid", "reason", "hostname", "controller_id"})
|
||||
|
||||
|
|
@ -59,6 +68,31 @@ func RegisterCollectors(runner *runner.Runner) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
type GarmCollector struct {
|
||||
healthMetric *prometheus.Desc
|
||||
instanceMetric *prometheus.Desc
|
||||
|
||||
// pool metrics
|
||||
poolInfo *prometheus.Desc
|
||||
poolStatus *prometheus.Desc
|
||||
poolMaxRunners *prometheus.Desc
|
||||
poolMinIdleRunners *prometheus.Desc
|
||||
poolBootstrapTimeout *prometheus.Desc
|
||||
|
||||
// provider metrics
|
||||
providerInfo *prometheus.Desc
|
||||
|
||||
organizationInfo *prometheus.Desc
|
||||
organizationPoolManagerStatus *prometheus.Desc
|
||||
repositoryInfo *prometheus.Desc
|
||||
repositoryPoolManagerStatus *prometheus.Desc
|
||||
enterpriseInfo *prometheus.Desc
|
||||
enterprisePoolManagerStatus *prometheus.Desc
|
||||
|
||||
runner *runner.Runner
|
||||
cachedControllerInfo params.ControllerInfo
|
||||
}
|
||||
|
||||
func NewGarmCollector(r *runner.Runner) (*GarmCollector, error) {
|
||||
controllerInfo, err := r.GetControllerInfo(auth.GetAdminContext())
|
||||
if err != nil {
|
||||
|
|
@ -67,29 +101,92 @@ func NewGarmCollector(r *runner.Runner) (*GarmCollector, error) {
|
|||
return &GarmCollector{
|
||||
runner: r,
|
||||
instanceMetric: prometheus.NewDesc(
|
||||
"garm_runner_status",
|
||||
metricsNamespace+metricsRunnerSubsystem+"status",
|
||||
"Status of the runner",
|
||||
[]string{"name", "status", "runner_status", "pool_owner", "pool_type", "pool_id", "hostname", "controller_id"}, nil,
|
||||
[]string{"name", "status", "runner_status", "pool_owner", "pool_type", "pool_id", "hostname", "controller_id", "provider"}, nil,
|
||||
),
|
||||
healthMetric: prometheus.NewDesc(
|
||||
"garm_health",
|
||||
metricsNamespace+"health",
|
||||
"Health of the runner",
|
||||
[]string{"hostname", "controller_id"}, nil,
|
||||
),
|
||||
poolInfo: prometheus.NewDesc(
|
||||
metricsNamespace+metricsPoolSubsystem+"info",
|
||||
"Information of the pool",
|
||||
[]string{"id", "image", "flavor", "prefix", "os_type", "os_arch", "tags", "provider", "pool_owner", "pool_type"}, nil,
|
||||
),
|
||||
poolStatus: prometheus.NewDesc(
|
||||
metricsNamespace+metricsPoolSubsystem+"status",
|
||||
"Status of the pool",
|
||||
[]string{"id", "enabled"}, nil,
|
||||
),
|
||||
poolMaxRunners: prometheus.NewDesc(
|
||||
metricsNamespace+metricsPoolSubsystem+"max_runners",
|
||||
"Max runners of the pool",
|
||||
[]string{"id"}, nil,
|
||||
),
|
||||
poolMinIdleRunners: prometheus.NewDesc(
|
||||
metricsNamespace+metricsPoolSubsystem+"min_idle_runners",
|
||||
"Min idle runners of the pool",
|
||||
[]string{"id"}, nil,
|
||||
),
|
||||
poolBootstrapTimeout: prometheus.NewDesc(
|
||||
metricsNamespace+metricsPoolSubsystem+"bootstrap_timeout",
|
||||
"Bootstrap timeout of the pool",
|
||||
[]string{"id"}, nil,
|
||||
),
|
||||
providerInfo: prometheus.NewDesc(
|
||||
metricsNamespace+metricsProviderSubsystem+"info",
|
||||
"Info of the provider",
|
||||
[]string{"name", "type", "description"}, nil,
|
||||
),
|
||||
organizationInfo: prometheus.NewDesc(
|
||||
metricsNamespace+metricsOrganizationSubsystem+"info",
|
||||
"Info of the organization",
|
||||
[]string{"name", "id"}, nil,
|
||||
),
|
||||
organizationPoolManagerStatus: prometheus.NewDesc(
|
||||
metricsNamespace+metricsOrganizationSubsystem+"pool_manager_status",
|
||||
"Status of the organization pool manager",
|
||||
[]string{"name", "id", "running"}, nil,
|
||||
),
|
||||
repositoryInfo: prometheus.NewDesc(
|
||||
metricsNamespace+metricsRepositorySubsystem+"info",
|
||||
"Info of the organization",
|
||||
[]string{"name", "owner", "id"}, nil,
|
||||
),
|
||||
repositoryPoolManagerStatus: prometheus.NewDesc(
|
||||
metricsNamespace+metricsRepositorySubsystem+"pool_manager_status",
|
||||
"Status of the repository pool manager",
|
||||
[]string{"name", "id", "running"}, nil,
|
||||
),
|
||||
enterpriseInfo: prometheus.NewDesc(
|
||||
metricsNamespace+metricsEnterpriseSubsystem+"info",
|
||||
"Info of the organization",
|
||||
[]string{"name", "id"}, nil,
|
||||
),
|
||||
enterprisePoolManagerStatus: prometheus.NewDesc(
|
||||
metricsNamespace+metricsEnterpriseSubsystem+"pool_manager_status",
|
||||
"Status of the enterprise pool manager",
|
||||
[]string{"name", "id", "running"}, nil,
|
||||
),
|
||||
|
||||
cachedControllerInfo: controllerInfo,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type GarmCollector struct {
|
||||
healthMetric *prometheus.Desc
|
||||
instanceMetric *prometheus.Desc
|
||||
runner *runner.Runner
|
||||
cachedControllerInfo params.ControllerInfo
|
||||
}
|
||||
|
||||
func (c *GarmCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- c.instanceMetric
|
||||
ch <- c.healthMetric
|
||||
ch <- c.poolInfo
|
||||
ch <- c.poolStatus
|
||||
ch <- c.poolMaxRunners
|
||||
ch <- c.poolMinIdleRunners
|
||||
ch <- c.providerInfo
|
||||
ch <- c.organizationInfo
|
||||
ch <- c.organizationPoolManagerStatus
|
||||
ch <- c.enterpriseInfo
|
||||
ch <- c.enterprisePoolManagerStatus
|
||||
}
|
||||
|
||||
func (c *GarmCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
|
|
@ -98,87 +195,12 @@ func (c *GarmCollector) Collect(ch chan<- prometheus.Metric) {
|
|||
log.Printf("failed to get controller info: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
c.CollectInstanceMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String())
|
||||
c.CollectHealthMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String())
|
||||
}
|
||||
|
||||
func (c *GarmCollector) CollectHealthMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) {
|
||||
m, err := prometheus.NewConstMetric(
|
||||
c.healthMetric,
|
||||
prometheus.GaugeValue,
|
||||
1,
|
||||
hostname,
|
||||
controllerID,
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("error on creating health metric: %s", err)
|
||||
return
|
||||
}
|
||||
ch <- m
|
||||
}
|
||||
|
||||
// CollectInstanceMetric collects the metrics for the runner instances
|
||||
// reflecting the statuses and the pool they belong to.
|
||||
func (c *GarmCollector) CollectInstanceMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) {
|
||||
ctx := auth.GetAdminContext()
|
||||
|
||||
instances, err := c.runner.ListAllInstances(ctx)
|
||||
if err != nil {
|
||||
log.Printf("cannot collect metrics, listing instances: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
pools, err := c.runner.ListAllPools(ctx)
|
||||
if err != nil {
|
||||
log.Printf("listing pools: %s", err)
|
||||
// continue anyway
|
||||
}
|
||||
|
||||
type poolInfo struct {
|
||||
Name string
|
||||
Type string
|
||||
}
|
||||
|
||||
poolNames := make(map[string]poolInfo)
|
||||
for _, pool := range pools {
|
||||
if pool.EnterpriseName != "" {
|
||||
poolNames[pool.ID] = poolInfo{
|
||||
Name: pool.EnterpriseName,
|
||||
Type: string(pool.PoolType()),
|
||||
}
|
||||
} else if pool.OrgName != "" {
|
||||
poolNames[pool.ID] = poolInfo{
|
||||
Name: pool.OrgName,
|
||||
Type: string(pool.PoolType()),
|
||||
}
|
||||
} else {
|
||||
poolNames[pool.ID] = poolInfo{
|
||||
Name: pool.RepoName,
|
||||
Type: string(pool.PoolType()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, instance := range instances {
|
||||
|
||||
m, err := prometheus.NewConstMetric(
|
||||
c.instanceMetric,
|
||||
prometheus.GaugeValue,
|
||||
1,
|
||||
instance.Name,
|
||||
string(instance.Status),
|
||||
string(instance.RunnerStatus),
|
||||
poolNames[instance.PoolID].Name,
|
||||
poolNames[instance.PoolID].Type,
|
||||
instance.PoolID,
|
||||
hostname,
|
||||
controllerID,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
log.Printf("cannot collect metrics, creating metric: %s", err)
|
||||
continue
|
||||
}
|
||||
ch <- m
|
||||
}
|
||||
c.CollectPoolMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String())
|
||||
c.CollectProviderMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String())
|
||||
c.CollectOrganizationMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String())
|
||||
c.CollectRepositoryMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String())
|
||||
c.CollectEnterpriseMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String())
|
||||
}
|
||||
|
|
|
|||
50
metrics/organization.go
Normal file
50
metrics/organization.go
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"log"
|
||||
"strconv"
|
||||
|
||||
"github.com/cloudbase/garm/auth"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// CollectOrganizationMetric collects the metrics for the organization objects
|
||||
func (c *GarmCollector) CollectOrganizationMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) {
|
||||
ctx := auth.GetAdminContext()
|
||||
|
||||
organizations, err := c.runner.ListOrganizations(ctx)
|
||||
if err != nil {
|
||||
log.Printf("listing providers: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, organization := range organizations {
|
||||
|
||||
organizationInfo, err := prometheus.NewConstMetric(
|
||||
c.organizationInfo,
|
||||
prometheus.GaugeValue,
|
||||
1,
|
||||
organization.Name, // label: name
|
||||
organization.ID, // label: id
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("cannot collect organizationInfo metric: %s", err)
|
||||
continue
|
||||
}
|
||||
ch <- organizationInfo
|
||||
|
||||
organizationPoolManagerStatus, err := prometheus.NewConstMetric(
|
||||
c.organizationPoolManagerStatus,
|
||||
prometheus.GaugeValue,
|
||||
bool2float64(organization.PoolManagerStatus.IsRunning),
|
||||
organization.Name, // label: name
|
||||
organization.ID, // label: id
|
||||
strconv.FormatBool(organization.PoolManagerStatus.IsRunning), // label: running
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("cannot collect organizationPoolManagerStatus metric: %s", err)
|
||||
continue
|
||||
}
|
||||
ch <- organizationPoolManagerStatus
|
||||
}
|
||||
}
|
||||
121
metrics/pool.go
Normal file
121
metrics/pool.go
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"log"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/cloudbase/garm/auth"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// CollectPoolMetric collects the metrics for the pool objects
|
||||
func (c *GarmCollector) CollectPoolMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) {
|
||||
ctx := auth.GetAdminContext()
|
||||
|
||||
pools, err := c.runner.ListAllPools(ctx)
|
||||
if err != nil {
|
||||
log.Printf("listing pools: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
type poolInfo struct {
|
||||
Name string
|
||||
Type string
|
||||
}
|
||||
|
||||
poolNames := make(map[string]poolInfo)
|
||||
for _, pool := range pools {
|
||||
if pool.EnterpriseName != "" {
|
||||
poolNames[pool.ID] = poolInfo{
|
||||
Name: pool.EnterpriseName,
|
||||
Type: string(pool.PoolType()),
|
||||
}
|
||||
} else if pool.OrgName != "" {
|
||||
poolNames[pool.ID] = poolInfo{
|
||||
Name: pool.OrgName,
|
||||
Type: string(pool.PoolType()),
|
||||
}
|
||||
} else {
|
||||
poolNames[pool.ID] = poolInfo{
|
||||
Name: pool.RepoName,
|
||||
Type: string(pool.PoolType()),
|
||||
}
|
||||
}
|
||||
|
||||
var poolTags []string
|
||||
for _, tag := range pool.Tags {
|
||||
poolTags = append(poolTags, tag.Name)
|
||||
}
|
||||
|
||||
poolInfo, err := prometheus.NewConstMetric(
|
||||
c.poolInfo,
|
||||
prometheus.GaugeValue,
|
||||
1,
|
||||
pool.ID, // label: id
|
||||
pool.Image, // label: image
|
||||
pool.Flavor, // label: flavor
|
||||
pool.Prefix, // label: prefix
|
||||
string(pool.OSType), // label: os_type
|
||||
string(pool.OSArch), // label: os_arch
|
||||
strings.Join(poolTags, ","), // label: tags
|
||||
pool.ProviderName, // label: provider
|
||||
poolNames[pool.ID].Name, // label: pool_owner
|
||||
poolNames[pool.ID].Type, // label: pool_type
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("cannot collect poolInfo metric: %s", err)
|
||||
continue
|
||||
}
|
||||
ch <- poolInfo
|
||||
|
||||
poolStatus, err := prometheus.NewConstMetric(
|
||||
c.poolStatus,
|
||||
prometheus.GaugeValue,
|
||||
bool2float64(pool.Enabled),
|
||||
pool.ID, // label: id
|
||||
strconv.FormatBool(pool.Enabled), // label: enabled
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("cannot collect poolStatus metric: %s", err)
|
||||
continue
|
||||
}
|
||||
ch <- poolStatus
|
||||
|
||||
poolMaxRunners, err := prometheus.NewConstMetric(
|
||||
c.poolMaxRunners,
|
||||
prometheus.GaugeValue,
|
||||
float64(pool.MaxRunners),
|
||||
pool.ID, // label: id
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("cannot collect poolMaxRunners metric: %s", err)
|
||||
continue
|
||||
}
|
||||
ch <- poolMaxRunners
|
||||
|
||||
poolMinIdleRunners, err := prometheus.NewConstMetric(
|
||||
c.poolMinIdleRunners,
|
||||
prometheus.GaugeValue,
|
||||
float64(pool.MinIdleRunners),
|
||||
pool.ID, // label: id
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("cannot collect poolMinIdleRunners metric: %s", err)
|
||||
continue
|
||||
}
|
||||
ch <- poolMinIdleRunners
|
||||
|
||||
poolBootstrapTimeout, err := prometheus.NewConstMetric(
|
||||
c.poolBootstrapTimeout,
|
||||
prometheus.GaugeValue,
|
||||
float64(pool.RunnerBootstrapTimeout),
|
||||
pool.ID, // label: id
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("cannot collect poolBootstrapTimeout metric: %s", err)
|
||||
continue
|
||||
}
|
||||
ch <- poolBootstrapTimeout
|
||||
}
|
||||
}
|
||||
36
metrics/provider.go
Normal file
36
metrics/provider.go
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/cloudbase/garm/auth"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// CollectPoolMetric collects the metrics for the pool objects
|
||||
func (c *GarmCollector) CollectProviderMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) {
|
||||
ctx := auth.GetAdminContext()
|
||||
|
||||
providers, err := c.runner.ListProviders(ctx)
|
||||
if err != nil {
|
||||
log.Printf("listing providers: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, provider := range providers {
|
||||
|
||||
providerInfo, err := prometheus.NewConstMetric(
|
||||
c.providerInfo,
|
||||
prometheus.GaugeValue,
|
||||
1,
|
||||
provider.Name, // label: name
|
||||
string(provider.ProviderType), // label: type
|
||||
provider.Description, // label: description
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("cannot collect providerInfo metric: %s", err)
|
||||
continue
|
||||
}
|
||||
ch <- providerInfo
|
||||
}
|
||||
}
|
||||
51
metrics/repository.go
Normal file
51
metrics/repository.go
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"log"
|
||||
"strconv"
|
||||
|
||||
"github.com/cloudbase/garm/auth"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// CollectOrganizationMetric collects the metrics for the repository objects
|
||||
func (c *GarmCollector) CollectRepositoryMetric(ch chan<- prometheus.Metric, hostname string, controllerID string) {
|
||||
ctx := auth.GetAdminContext()
|
||||
|
||||
repositories, err := c.runner.ListRepositories(ctx)
|
||||
if err != nil {
|
||||
log.Printf("listing providers: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, repository := range repositories {
|
||||
|
||||
repositoryInfo, err := prometheus.NewConstMetric(
|
||||
c.repositoryInfo,
|
||||
prometheus.GaugeValue,
|
||||
1,
|
||||
repository.Name, // label: name
|
||||
repository.Owner, // label: owner
|
||||
repository.ID, // label: id
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("cannot collect repositoryInfo metric: %s", err)
|
||||
continue
|
||||
}
|
||||
ch <- repositoryInfo
|
||||
|
||||
repositoryPoolManagerStatus, err := prometheus.NewConstMetric(
|
||||
c.repositoryPoolManagerStatus,
|
||||
prometheus.GaugeValue,
|
||||
bool2float64(repository.PoolManagerStatus.IsRunning),
|
||||
repository.Name, // label: name
|
||||
repository.ID, // label: id
|
||||
strconv.FormatBool(repository.PoolManagerStatus.IsRunning), // label: running
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("cannot collect repositoryPoolManagerStatus metric: %s", err)
|
||||
continue
|
||||
}
|
||||
ch <- repositoryPoolManagerStatus
|
||||
}
|
||||
}
|
||||
8
metrics/util.go
Normal file
8
metrics/util.go
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
package metrics
|
||||
|
||||
func bool2float64(b bool) float64 {
|
||||
if b {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue