forgejo-runner-optimiser/internal/summary/accumulator.go

185 lines
5.1 KiB
Go
Raw Normal View History

// ABOUTME: Accumulates system metrics samples across a collection run.
// ABOUTME: Computes peak, average, and P95 statistics for CPU and memory on demand.
package summary
import (
"fmt"
"sort"
"time"
"edp.buildth.ing/DevFW-CICD/forgejo-runner-optimiser/internal/metrics"
)
// containerAccumulator tracks metrics for a single container
type containerAccumulator struct {
cpuCoresValues []float64
memoryBytesValues []float64
}
// Accumulator collects metric samples and computes run-level statistics
type Accumulator struct {
topN int
cpuValues []float64
memBytesValues []float64
memPctValues []float64
processPeaks map[string]*ProcessPeak
containers map[string]*containerAccumulator
startTime time.Time
endTime time.Time
sampleCount int
}
// NewAccumulator creates an accumulator that tracks the top N processes
func NewAccumulator(topN int) *Accumulator {
return &Accumulator{
topN: topN,
processPeaks: make(map[string]*ProcessPeak),
containers: make(map[string]*containerAccumulator),
}
}
// Add records a single metrics sample
func (a *Accumulator) Add(m *metrics.SystemMetrics) {
a.sampleCount++
if a.sampleCount == 1 {
a.startTime = m.Timestamp
}
a.endTime = m.Timestamp
a.cpuValues = append(a.cpuValues, m.CPU.TotalPercent)
a.memBytesValues = append(a.memBytesValues, float64(m.Memory.UsedBytes))
a.memPctValues = append(a.memPctValues, m.Memory.UsedPercent)
for _, p := range m.TopCPU {
a.updateProcessPeak(p)
}
for _, p := range m.TopMemory {
a.updateProcessPeak(p)
}
// Track per-container metrics
for name, cgroup := range m.Cgroups {
ca, ok := a.containers[name]
if !ok {
ca = &containerAccumulator{}
a.containers[name] = ca
}
// Only record CPU when a valid delta was computed (skip first sample and underflow)
if cgroup.CPU.HasDelta {
ca.cpuCoresValues = append(ca.cpuCoresValues, cgroup.CPU.UsedCores)
}
ca.memoryBytesValues = append(ca.memoryBytesValues, float64(cgroup.Memory.TotalRSSBytes))
}
}
// Summarize computes and returns the run summary, or nil if no samples were added
func (a *Accumulator) Summarize() *RunSummary {
if a.sampleCount == 0 {
return nil
}
return &RunSummary{
StartTime: a.startTime,
EndTime: a.endTime,
DurationSeconds: a.endTime.Sub(a.startTime).Seconds(),
SampleCount: a.sampleCount,
CPUTotal: computeStats(a.cpuValues),
MemUsedBytes: computeStats(a.memBytesValues),
MemUsedPercent: computeStats(a.memPctValues),
TopCPUProcesses: a.topProcesses(func(p *ProcessPeak) float64 { return p.PeakCPU }),
TopMemProcesses: a.topProcesses(func(p *ProcessPeak) float64 { return float64(p.PeakMem) }),
Containers: a.containerSummaries(),
}
}
// containerSummaries computes summaries for all tracked containers
func (a *Accumulator) containerSummaries() []ContainerSummary {
summaries := make([]ContainerSummary, 0, len(a.containers))
for name, ca := range a.containers {
summaries = append(summaries, ContainerSummary{
Name: name,
CPUCores: computeStats(ca.cpuCoresValues),
MemoryBytes: computeStats(ca.memoryBytesValues),
})
}
// Sort by name for consistent output
sort.Slice(summaries, func(i, j int) bool {
return summaries[i].Name < summaries[j].Name
})
return summaries
}
// SampleCount returns the number of samples added
func (a *Accumulator) SampleCount() int {
return a.sampleCount
}
// computeStats calculates peak, percentiles (p99, p95, p75, p50), and average from a sorted copy of the values
func computeStats(values []float64) StatSummary {
n := len(values)
if n == 0 {
return StatSummary{}
}
sorted := make([]float64, n)
copy(sorted, values)
sort.Float64s(sorted)
var sum float64
for _, v := range sorted {
sum += v
}
return StatSummary{
Peak: sorted[n-1],
P99: sorted[percentileIndex(n, 0.99)],
P95: sorted[percentileIndex(n, 0.95)],
P75: sorted[percentileIndex(n, 0.75)],
P50: sorted[percentileIndex(n, 0.50)],
Avg: sum / float64(n),
}
}
// percentileIndex returns the index for the given percentile (0.0-1.0)
func percentileIndex(n int, percentile float64) int {
return int(float64(n-1) * percentile)
}
// updateProcessPeak merges a process observation into the peak tracking map
func (a *Accumulator) updateProcessPeak(p metrics.ProcessMetrics) {
key := fmt.Sprintf("%d:%s", p.PID, p.Name)
existing, ok := a.processPeaks[key]
if !ok {
a.processPeaks[key] = &ProcessPeak{
PID: p.PID,
Name: p.Name,
PeakCPU: p.CPUPercent,
PeakMem: p.MemRSS,
}
return
}
if p.CPUPercent > existing.PeakCPU {
existing.PeakCPU = p.CPUPercent
}
if p.MemRSS > existing.PeakMem {
existing.PeakMem = p.MemRSS
}
}
// topProcesses returns the top N processes sorted by the given key function (descending)
func (a *Accumulator) topProcesses(keyFn func(*ProcessPeak) float64) []ProcessPeak {
all := make([]ProcessPeak, 0, len(a.processPeaks))
for _, p := range a.processPeaks {
all = append(all, *p)
}
sort.Slice(all, func(i, j int) bool {
return keyFn(&all[i]) > keyFn(&all[j])
})
if len(all) > a.topN {
all = all[:a.topN]
}
return all
}