diff --git a/internal/summary/accumulator.go b/internal/summary/accumulator.go index bc3b1de..c8f10f1 100644 --- a/internal/summary/accumulator.go +++ b/internal/summary/accumulator.go @@ -10,6 +10,12 @@ import ( "edp.buildth.ing/DevFW-CICD/forgejo-runner-resource-collector/internal/metrics" ) +// containerAccumulator tracks metrics for a single container +type containerAccumulator struct { + cpuCoresValues []float64 + memoryBytesValues []float64 +} + // Accumulator collects metric samples and computes run-level statistics type Accumulator struct { topN int @@ -17,6 +23,7 @@ type Accumulator struct { memBytesValues []float64 memPctValues []float64 processPeaks map[string]*ProcessPeak + containers map[string]*containerAccumulator startTime time.Time endTime time.Time sampleCount int @@ -27,6 +34,7 @@ func NewAccumulator(topN int) *Accumulator { return &Accumulator{ topN: topN, processPeaks: make(map[string]*ProcessPeak), + containers: make(map[string]*containerAccumulator), } } @@ -48,6 +56,17 @@ func (a *Accumulator) Add(m *metrics.SystemMetrics) { for _, p := range m.TopMemory { a.updateProcessPeak(p) } + + // Track per-container metrics + for name, cgroup := range m.Cgroups { + ca, ok := a.containers[name] + if !ok { + ca = &containerAccumulator{} + a.containers[name] = ca + } + ca.cpuCoresValues = append(ca.cpuCoresValues, cgroup.CPU.UsedCores) + ca.memoryBytesValues = append(ca.memoryBytesValues, float64(cgroup.Memory.TotalRSSBytes)) + } } // Summarize computes and returns the run summary, or nil if no samples were added @@ -66,15 +85,33 @@ func (a *Accumulator) Summarize() *RunSummary { MemUsedPercent: computeStats(a.memPctValues), TopCPUProcesses: a.topProcesses(func(p *ProcessPeak) float64 { return p.PeakCPU }), TopMemProcesses: a.topProcesses(func(p *ProcessPeak) float64 { return float64(p.PeakMem) }), + Containers: a.containerSummaries(), } } +// containerSummaries computes summaries for all tracked containers +func (a *Accumulator) containerSummaries() []ContainerSummary { + summaries := make([]ContainerSummary, 0, len(a.containers)) + for name, ca := range a.containers { + summaries = append(summaries, ContainerSummary{ + Name: name, + CPUCores: computeStats(ca.cpuCoresValues), + MemoryBytes: computeStats(ca.memoryBytesValues), + }) + } + // Sort by name for consistent output + sort.Slice(summaries, func(i, j int) bool { + return summaries[i].Name < summaries[j].Name + }) + return summaries +} + // SampleCount returns the number of samples added func (a *Accumulator) SampleCount() int { return a.sampleCount } -// computeStats calculates peak, average, and P95 from a sorted copy of the values +// computeStats calculates peak, percentiles (p99, p95, p75, p50), and average from a sorted copy of the values func computeStats(values []float64) StatSummary { n := len(values) if n == 0 { @@ -90,15 +127,21 @@ func computeStats(values []float64) StatSummary { sum += v } - p95Index := int(float64(n-1) * 0.95) - return StatSummary{ Peak: sorted[n-1], + P99: sorted[percentileIndex(n, 0.99)], + P95: sorted[percentileIndex(n, 0.95)], + P75: sorted[percentileIndex(n, 0.75)], + P50: sorted[percentileIndex(n, 0.50)], Avg: sum / float64(n), - P95: sorted[p95Index], } } +// percentileIndex returns the index for the given percentile (0.0-1.0) +func percentileIndex(n int, percentile float64) int { + return int(float64(n-1) * percentile) +} + // updateProcessPeak merges a process observation into the peak tracking map func (a *Accumulator) updateProcessPeak(p metrics.ProcessMetrics) { key := fmt.Sprintf("%d:%s", p.PID, p.Name) diff --git a/internal/summary/accumulator_test.go b/internal/summary/accumulator_test.go index 76f6d47..203fe53 100644 --- a/internal/summary/accumulator_test.go +++ b/internal/summary/accumulator_test.go @@ -333,3 +333,201 @@ func TestAccumulator_Duration(t *testing.T) { t.Errorf("DurationSeconds: got %f, want 60", s.DurationSeconds) } } + +func TestAccumulator_AllPercentiles(t *testing.T) { + acc := NewAccumulator(5) + // 20 values: 1, 2, 3, ..., 20 + for i := 1; i <= 20; i++ { + acc.Add(&metrics.SystemMetrics{ + Timestamp: time.Date(2025, 1, 1, 0, 0, i, 0, time.UTC), + CPU: metrics.CPUMetrics{TotalPercent: float64(i)}, + Memory: metrics.MemoryMetrics{}, + }) + } + + s := acc.Summarize() + if s == nil { + t.Fatal("expected non-nil summary") + } + + // Peak = 20 + if s.CPUTotal.Peak != 20 { + t.Errorf("CPU peak: got %f, want 20", s.CPUTotal.Peak) + } + // P99: index=int(19*0.99)=int(18.81)=18, value=19 + if s.CPUTotal.P99 != 19 { + t.Errorf("CPU p99: got %f, want 19", s.CPUTotal.P99) + } + // P95: index=int(19*0.95)=int(18.05)=18, value=19 + if s.CPUTotal.P95 != 19 { + t.Errorf("CPU p95: got %f, want 19", s.CPUTotal.P95) + } + // P75: index=int(19*0.75)=int(14.25)=14, value=15 + if s.CPUTotal.P75 != 15 { + t.Errorf("CPU p75: got %f, want 15", s.CPUTotal.P75) + } + // P50: index=int(19*0.50)=int(9.5)=9, value=10 + if s.CPUTotal.P50 != 10 { + t.Errorf("CPU p50: got %f, want 10", s.CPUTotal.P50) + } + // Avg = (1+2+...+20)/20 = 210/20 = 10.5 + if s.CPUTotal.Avg != 10.5 { + t.Errorf("CPU avg: got %f, want 10.5", s.CPUTotal.Avg) + } +} + +func TestAccumulator_ContainerMetrics(t *testing.T) { + acc := NewAccumulator(5) + + // Add samples with container metrics + for i := 1; i <= 5; i++ { + acc.Add(&metrics.SystemMetrics{ + Timestamp: time.Date(2025, 1, 1, 0, 0, i, 0, time.UTC), + CPU: metrics.CPUMetrics{TotalPercent: float64(i * 10)}, + Memory: metrics.MemoryMetrics{}, + Cgroups: map[string]*metrics.CgroupMetrics{ + "container-a": { + Name: "container-a", + CPU: metrics.CgroupCPUMetrics{UsedCores: float64(i)}, + Memory: metrics.CgroupMemoryMetrics{ + TotalRSSBytes: uint64(i * 1000), + }, + }, + "container-b": { + Name: "container-b", + CPU: metrics.CgroupCPUMetrics{UsedCores: float64(i * 2)}, + Memory: metrics.CgroupMemoryMetrics{ + TotalRSSBytes: uint64(i * 2000), + }, + }, + }, + }) + } + + s := acc.Summarize() + if s == nil { + t.Fatal("expected non-nil summary") + } + + // Should have 2 containers + if len(s.Containers) != 2 { + t.Fatalf("Containers length: got %d, want 2", len(s.Containers)) + } + + // Containers should be sorted by name + if s.Containers[0].Name != "container-a" { + t.Errorf("Containers[0].Name: got %s, want container-a", s.Containers[0].Name) + } + if s.Containers[1].Name != "container-b" { + t.Errorf("Containers[1].Name: got %s, want container-b", s.Containers[1].Name) + } + + // Container A: CPU cores [1,2,3,4,5], peak=5, avg=3 + containerA := s.Containers[0] + if containerA.CPUCores.Peak != 5 { + t.Errorf("container-a CPUCores.Peak: got %f, want 5", containerA.CPUCores.Peak) + } + if containerA.CPUCores.Avg != 3 { + t.Errorf("container-a CPUCores.Avg: got %f, want 3", containerA.CPUCores.Avg) + } + // Memory bytes [1000,2000,3000,4000,5000], peak=5000, avg=3000 + if containerA.MemoryBytes.Peak != 5000 { + t.Errorf("container-a MemoryBytes.Peak: got %f, want 5000", containerA.MemoryBytes.Peak) + } + if containerA.MemoryBytes.Avg != 3000 { + t.Errorf("container-a MemoryBytes.Avg: got %f, want 3000", containerA.MemoryBytes.Avg) + } + + // Container B: CPU cores [2,4,6,8,10], peak=10, avg=6 + containerB := s.Containers[1] + if containerB.CPUCores.Peak != 10 { + t.Errorf("container-b CPUCores.Peak: got %f, want 10", containerB.CPUCores.Peak) + } + if containerB.CPUCores.Avg != 6 { + t.Errorf("container-b CPUCores.Avg: got %f, want 6", containerB.CPUCores.Avg) + } +} + +func TestAccumulator_ContainerMetrics_NoContainers(t *testing.T) { + acc := NewAccumulator(5) + acc.Add(&metrics.SystemMetrics{ + Timestamp: time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC), + CPU: metrics.CPUMetrics{TotalPercent: 50}, + Memory: metrics.MemoryMetrics{}, + Cgroups: nil, // No containers + }) + + s := acc.Summarize() + if s == nil { + t.Fatal("expected non-nil summary") + } + + if len(s.Containers) != 0 { + t.Errorf("Containers length: got %d, want 0", len(s.Containers)) + } +} + +func TestAccumulator_ContainerMetrics_PartialSamples(t *testing.T) { + acc := NewAccumulator(5) + + // First sample: only container-a + acc.Add(&metrics.SystemMetrics{ + Timestamp: time.Date(2025, 1, 1, 0, 0, 1, 0, time.UTC), + CPU: metrics.CPUMetrics{}, + Memory: metrics.MemoryMetrics{}, + Cgroups: map[string]*metrics.CgroupMetrics{ + "container-a": { + Name: "container-a", + CPU: metrics.CgroupCPUMetrics{UsedCores: 1}, + Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: 1000}, + }, + }, + }) + + // Second sample: both containers + acc.Add(&metrics.SystemMetrics{ + Timestamp: time.Date(2025, 1, 1, 0, 0, 2, 0, time.UTC), + CPU: metrics.CPUMetrics{}, + Memory: metrics.MemoryMetrics{}, + Cgroups: map[string]*metrics.CgroupMetrics{ + "container-a": { + Name: "container-a", + CPU: metrics.CgroupCPUMetrics{UsedCores: 2}, + Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: 2000}, + }, + "container-b": { + Name: "container-b", + CPU: metrics.CgroupCPUMetrics{UsedCores: 5}, + Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: 5000}, + }, + }, + }) + + s := acc.Summarize() + if s == nil { + t.Fatal("expected non-nil summary") + } + + // Should have 2 containers + if len(s.Containers) != 2 { + t.Fatalf("Containers length: got %d, want 2", len(s.Containers)) + } + + // Container A: 2 samples [1,2] + containerA := s.Containers[0] + if containerA.CPUCores.Peak != 2 { + t.Errorf("container-a CPUCores.Peak: got %f, want 2", containerA.CPUCores.Peak) + } + if containerA.CPUCores.Avg != 1.5 { + t.Errorf("container-a CPUCores.Avg: got %f, want 1.5", containerA.CPUCores.Avg) + } + + // Container B: 1 sample [5] + containerB := s.Containers[1] + if containerB.CPUCores.Peak != 5 { + t.Errorf("container-b CPUCores.Peak: got %f, want 5", containerB.CPUCores.Peak) + } + if containerB.CPUCores.Avg != 5 { + t.Errorf("container-b CPUCores.Avg: got %f, want 5", containerB.CPUCores.Avg) + } +} diff --git a/internal/summary/types.go b/internal/summary/types.go index ab5ecea..dad0b01 100644 --- a/internal/summary/types.go +++ b/internal/summary/types.go @@ -4,11 +4,14 @@ package summary import "time" -// StatSummary holds peak, average, and P95 for a metric across the run +// StatSummary holds peak, percentiles, and average for a metric across the run type StatSummary struct { Peak float64 `json:"peak"` - Avg float64 `json:"avg"` + P99 float64 `json:"p99"` P95 float64 `json:"p95"` + P75 float64 `json:"p75"` + P50 float64 `json:"p50"` + Avg float64 `json:"avg"` } // ProcessPeak holds the peak CPU and memory observed for a single process @@ -19,15 +22,23 @@ type ProcessPeak struct { PeakMem uint64 `json:"peak_mem_rss_bytes"` } +// ContainerSummary holds statistics for a single container across the run +type ContainerSummary struct { + Name string `json:"name"` + CPUCores StatSummary `json:"cpu_cores"` + MemoryBytes StatSummary `json:"memory_bytes"` +} + // RunSummary holds the complete summary of a collection run type RunSummary struct { - StartTime time.Time `json:"start_time"` - EndTime time.Time `json:"end_time"` - DurationSeconds float64 `json:"duration_seconds"` - SampleCount int `json:"sample_count"` - CPUTotal StatSummary `json:"cpu_total_percent"` - MemUsedBytes StatSummary `json:"mem_used_bytes"` - MemUsedPercent StatSummary `json:"mem_used_percent"` - TopCPUProcesses []ProcessPeak `json:"top_cpu_processes"` - TopMemProcesses []ProcessPeak `json:"top_mem_processes"` + StartTime time.Time `json:"start_time"` + EndTime time.Time `json:"end_time"` + DurationSeconds float64 `json:"duration_seconds"` + SampleCount int `json:"sample_count"` + CPUTotal StatSummary `json:"cpu_total_percent"` + MemUsedBytes StatSummary `json:"mem_used_bytes"` + MemUsedPercent StatSummary `json:"mem_used_percent"` + TopCPUProcesses []ProcessPeak `json:"top_cpu_processes"` + TopMemProcesses []ProcessPeak `json:"top_mem_processes"` + Containers []ContainerSummary `json:"containers"` }