forgejo-runner-optimiser/internal/metrics/aggregator.go
Manuel Ganter ddaf5fbd0f
Some checks failed
ci / goreleaser (push) Failing after 1s
chore: update module path to DevFW-CICD org
Rename module from edp.buildth.ing/DevFW/forgejo-runner-resource-collector
to edp.buildth.ing/DevFW-CICD/forgejo-runner-resource-collector
2026-02-04 14:25:50 +01:00

225 lines
5.9 KiB
Go

package metrics
import (
"sort"
"time"
"edp.buildth.ing/DevFW-CICD/forgejo-runner-resource-collector/internal/proc"
)
// Aggregator collects and aggregates metrics from processes
type Aggregator struct {
procPath string
topN int
prevCPU *CPUSnapshot
prevProcCPU map[int]*ProcessCPUSnapshot
}
// NewAggregator creates a new metrics aggregator
func NewAggregator(procPath string, topN int) *Aggregator {
return &Aggregator{
procPath: procPath,
topN: topN,
prevProcCPU: make(map[int]*ProcessCPUSnapshot),
}
}
// Collect gathers all system metrics
func (a *Aggregator) Collect() (*SystemMetrics, error) {
now := time.Now()
// Read system info
sysInfo, err := proc.ReadSystemInfo(a.procPath)
if err != nil {
return nil, err
}
// Read system CPU
user, nice, system, idle, iowait, irq, softirq, err := proc.ReadSystemCPU(a.procPath)
if err != nil {
return nil, err
}
currentCPU := &CPUSnapshot{
Timestamp: now,
User: user,
Nice: nice,
System: system,
Idle: idle,
IOWait: iowait,
IRQ: irq,
SoftIRQ: softirq,
}
// Calculate CPU percentages
cpuMetrics := a.calculateCPUMetrics(currentCPU)
a.prevCPU = currentCPU
// Read all processes
processes, err := proc.ReadAllProcesses(a.procPath)
if err != nil {
return nil, err
}
// Calculate per-process metrics
processMetrics := a.calculateProcessMetrics(processes, now)
// Calculate memory metrics
memMetrics := a.calculateMemoryMetrics(sysInfo, processMetrics)
// Get top CPU consumers
topCPU := a.getTopByMetric(processMetrics, func(p ProcessMetrics) float64 {
return p.CPUPercent
})
// Get top memory consumers
topMemory := a.getTopByMetric(processMetrics, func(p ProcessMetrics) float64 {
return float64(p.MemRSS)
})
return &SystemMetrics{
Timestamp: now,
TotalProcesses: len(processes),
CPU: cpuMetrics,
Memory: memMetrics,
TopCPU: topCPU,
TopMemory: topMemory,
}, nil
}
// calculateCPUMetrics calculates CPU percentages between snapshots
func (a *Aggregator) calculateCPUMetrics(current *CPUSnapshot) CPUMetrics {
if a.prevCPU == nil {
return CPUMetrics{}
}
totalDelta := float64(current.Total() - a.prevCPU.Total())
if totalDelta <= 0 {
return CPUMetrics{}
}
userDelta := float64(current.User+current.Nice) - float64(a.prevCPU.User+a.prevCPU.Nice)
systemDelta := float64(current.System+current.IRQ+current.SoftIRQ) - float64(a.prevCPU.System+a.prevCPU.IRQ+a.prevCPU.SoftIRQ)
idleDelta := float64(current.Idle) - float64(a.prevCPU.Idle)
iowaitDelta := float64(current.IOWait) - float64(a.prevCPU.IOWait)
return CPUMetrics{
TotalPercent: (totalDelta - idleDelta - iowaitDelta) / totalDelta * 100,
UserPercent: userDelta / totalDelta * 100,
SystemPercent: systemDelta / totalDelta * 100,
IdlePercent: idleDelta / totalDelta * 100,
IOWaitPercent: iowaitDelta / totalDelta * 100,
}
}
// calculateProcessMetrics calculates metrics for each process
func (a *Aggregator) calculateProcessMetrics(processes []*proc.ProcessInfo, now time.Time) []ProcessMetrics {
newProcCPU := make(map[int]*ProcessCPUSnapshot)
metrics := make([]ProcessMetrics, 0, len(processes))
for _, p := range processes {
pid := p.Stat.PID
// Calculate CPU percentage for this process
cpuPercent := 0.0
if prev, ok := a.prevProcCPU[pid]; ok && a.prevCPU != nil {
totalDelta := float64(a.prevCPU.Total())
if a.prevCPU != nil {
// Use system CPU total delta for process CPU calculation
currentTotal := a.prevCPU.Total()
if currentTotal > 0 {
procDelta := float64(p.Stat.TotalTime()) - float64(prev.Total())
if procDelta > 0 {
// Calculate based on elapsed time and clock ticks
elapsed := now.Sub(prev.Timestamp).Seconds()
if elapsed > 0 {
// CPU percent = (process ticks / clock_ticks_per_sec) / elapsed_time * 100
cpuPercent = (procDelta / float64(proc.DefaultClockTicks)) / elapsed * 100
if cpuPercent > 100 {
cpuPercent = 100 // Cap at 100% per CPU
}
}
}
}
}
_ = totalDelta // Avoid unused variable warning
}
// Store current snapshot for next iteration
newProcCPU[pid] = &ProcessCPUSnapshot{
PID: pid,
Timestamp: now,
UTime: p.Stat.UTime,
STime: p.Stat.STime,
}
state := string(p.Stat.State)
if state == "" {
state = "?"
}
metrics = append(metrics, ProcessMetrics{
PID: pid,
Name: p.Status.Name,
CPUPercent: cpuPercent,
MemRSS: p.Status.VmRSS,
MemVirtual: p.Status.VmSize,
Threads: p.Status.Threads,
State: state,
})
}
// Update process CPU snapshots for next iteration
a.prevProcCPU = newProcCPU
return metrics
}
// calculateMemoryMetrics calculates aggregated memory metrics
func (a *Aggregator) calculateMemoryMetrics(sysInfo *proc.SystemInfo, processes []ProcessMetrics) MemoryMetrics {
var totalRSS uint64
for _, p := range processes {
totalRSS += p.MemRSS
}
usedBytes := sysInfo.MemTotal - sysInfo.MemAvailable
usedPercent := 0.0
rssPercent := 0.0
if sysInfo.MemTotal > 0 {
usedPercent = float64(usedBytes) / float64(sysInfo.MemTotal) * 100
rssPercent = float64(totalRSS) / float64(sysInfo.MemTotal) * 100
}
return MemoryMetrics{
TotalBytes: sysInfo.MemTotal,
UsedBytes: usedBytes,
FreeBytes: sysInfo.MemFree,
AvailableBytes: sysInfo.MemAvailable,
UsedPercent: usedPercent,
TotalRSSBytes: totalRSS,
RSSPercent: rssPercent,
}
}
// getTopByMetric returns the top N processes by a given metric
func (a *Aggregator) getTopByMetric(metrics []ProcessMetrics, getValue func(ProcessMetrics) float64) []ProcessMetrics {
if len(metrics) == 0 {
return nil
}
// Sort by the metric value (descending)
sorted := make([]ProcessMetrics, len(metrics))
copy(sorted, metrics)
sort.Slice(sorted, func(i, j int) bool {
return getValue(sorted[i]) > getValue(sorted[j])
})
// Return top N
n := a.topN
if n > len(sorted) {
n = len(sorted)
}
return sorted[:n]
}