From 90c89583a0dc22e802741bb5cb23e65ef7ecdb7a Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Mon, 9 Feb 2026 17:37:53 +0100 Subject: [PATCH] Ignore data with no delta (first datapoint or underflow) --- internal/metrics/aggregator.go | 4 +- internal/metrics/types.go | 1 + internal/summary/accumulator.go | 5 +- internal/summary/accumulator_test.go | 77 +++++++++++++++++++++++++--- 4 files changed, 79 insertions(+), 8 deletions(-) diff --git a/internal/metrics/aggregator.go b/internal/metrics/aggregator.go index d8a22b9..2e7c18e 100644 --- a/internal/metrics/aggregator.go +++ b/internal/metrics/aggregator.go @@ -347,6 +347,7 @@ func (a *Aggregator) calculateCgroupMetrics( // Calculate CPU cores used from delta usedCores := 0.0 + hasDelta := false if prev, ok := a.prevCgroupCPU[containerName]; ok && elapsed > 0 { // Guard against underflow: if processes exited and new ones started, // totalTicks could be less than prev. In that case, skip this sample. @@ -354,8 +355,8 @@ func (a *Aggregator) calculateCgroupMetrics( deltaTicks := totalTicks - prev // Convert ticks to cores: deltaTicks / (elapsed_seconds * CLK_TCK) usedCores = float64(deltaTicks) / (elapsed * float64(proc.DefaultClockTicks)) + hasDelta = true } - // If totalTicks < prev, usedCores stays 0 for this sample } a.prevCgroupCPU[containerName] = totalTicks @@ -384,6 +385,7 @@ func (a *Aggregator) calculateCgroupMetrics( UsedCores: usedCores, UsedPercent: cpuPercent, LimitCores: limitCores, + HasDelta: hasDelta, }, Memory: CgroupMemoryMetrics{ TotalRSSBytes: totalRSS, diff --git a/internal/metrics/types.go b/internal/metrics/types.go index b88d4c8..9aa9cb2 100644 --- a/internal/metrics/types.go +++ b/internal/metrics/types.go @@ -51,6 +51,7 @@ type CgroupCPUMetrics struct { UsedCores float64 `json:"used_cores"` UsedPercent float64 `json:"used_percent,omitempty"` LimitCores float64 `json:"limit_cores,omitempty"` + HasDelta bool `json:"-"` // true when a valid delta could be computed } // CgroupMemoryMetrics holds memory metrics for a single cgroup/container diff --git a/internal/summary/accumulator.go b/internal/summary/accumulator.go index c8f10f1..6ffa8cd 100644 --- a/internal/summary/accumulator.go +++ b/internal/summary/accumulator.go @@ -64,7 +64,10 @@ func (a *Accumulator) Add(m *metrics.SystemMetrics) { ca = &containerAccumulator{} a.containers[name] = ca } - ca.cpuCoresValues = append(ca.cpuCoresValues, cgroup.CPU.UsedCores) + // Only record CPU when a valid delta was computed (skip first sample and underflow) + if cgroup.CPU.HasDelta { + ca.cpuCoresValues = append(ca.cpuCoresValues, cgroup.CPU.UsedCores) + } ca.memoryBytesValues = append(ca.memoryBytesValues, float64(cgroup.Memory.TotalRSSBytes)) } } diff --git a/internal/summary/accumulator_test.go b/internal/summary/accumulator_test.go index 203fe53..cdda597 100644 --- a/internal/summary/accumulator_test.go +++ b/internal/summary/accumulator_test.go @@ -379,7 +379,7 @@ func TestAccumulator_AllPercentiles(t *testing.T) { func TestAccumulator_ContainerMetrics(t *testing.T) { acc := NewAccumulator(5) - // Add samples with container metrics + // Add samples with container metrics (HasDelta=true to indicate valid CPU measurements) for i := 1; i <= 5; i++ { acc.Add(&metrics.SystemMetrics{ Timestamp: time.Date(2025, 1, 1, 0, 0, i, 0, time.UTC), @@ -388,14 +388,14 @@ func TestAccumulator_ContainerMetrics(t *testing.T) { Cgroups: map[string]*metrics.CgroupMetrics{ "container-a": { Name: "container-a", - CPU: metrics.CgroupCPUMetrics{UsedCores: float64(i)}, + CPU: metrics.CgroupCPUMetrics{UsedCores: float64(i), HasDelta: true}, Memory: metrics.CgroupMemoryMetrics{ TotalRSSBytes: uint64(i * 1000), }, }, "container-b": { Name: "container-b", - CPU: metrics.CgroupCPUMetrics{UsedCores: float64(i * 2)}, + CPU: metrics.CgroupCPUMetrics{UsedCores: float64(i * 2), HasDelta: true}, Memory: metrics.CgroupMemoryMetrics{ TotalRSSBytes: uint64(i * 2000), }, @@ -478,7 +478,7 @@ func TestAccumulator_ContainerMetrics_PartialSamples(t *testing.T) { Cgroups: map[string]*metrics.CgroupMetrics{ "container-a": { Name: "container-a", - CPU: metrics.CgroupCPUMetrics{UsedCores: 1}, + CPU: metrics.CgroupCPUMetrics{UsedCores: 1, HasDelta: true}, Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: 1000}, }, }, @@ -492,12 +492,12 @@ func TestAccumulator_ContainerMetrics_PartialSamples(t *testing.T) { Cgroups: map[string]*metrics.CgroupMetrics{ "container-a": { Name: "container-a", - CPU: metrics.CgroupCPUMetrics{UsedCores: 2}, + CPU: metrics.CgroupCPUMetrics{UsedCores: 2, HasDelta: true}, Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: 2000}, }, "container-b": { Name: "container-b", - CPU: metrics.CgroupCPUMetrics{UsedCores: 5}, + CPU: metrics.CgroupCPUMetrics{UsedCores: 5, HasDelta: true}, Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: 5000}, }, }, @@ -531,3 +531,68 @@ func TestAccumulator_ContainerMetrics_PartialSamples(t *testing.T) { t.Errorf("container-b CPUCores.Avg: got %f, want 5", containerB.CPUCores.Avg) } } + +func TestAccumulator_ContainerMetrics_InvalidDeltaExcluded(t *testing.T) { + acc := NewAccumulator(5) + + // Sample 1: no valid CPU delta (first sample / underflow) — should be excluded from CPU stats + acc.Add(&metrics.SystemMetrics{ + Timestamp: time.Date(2025, 1, 1, 0, 0, 1, 0, time.UTC), + CPU: metrics.CPUMetrics{}, + Memory: metrics.MemoryMetrics{}, + Cgroups: map[string]*metrics.CgroupMetrics{ + "runner": { + Name: "runner", + CPU: metrics.CgroupCPUMetrics{UsedCores: 0, HasDelta: false}, + Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: 1000}, + }, + }, + }) + + // Samples 2-4: valid deltas + for i := 2; i <= 4; i++ { + acc.Add(&metrics.SystemMetrics{ + Timestamp: time.Date(2025, 1, 1, 0, 0, i, 0, time.UTC), + CPU: metrics.CPUMetrics{}, + Memory: metrics.MemoryMetrics{}, + Cgroups: map[string]*metrics.CgroupMetrics{ + "runner": { + Name: "runner", + CPU: metrics.CgroupCPUMetrics{UsedCores: float64(i), HasDelta: true}, + Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: uint64(i * 1000)}, + }, + }, + }) + } + + s := acc.Summarize() + if s == nil { + t.Fatal("expected non-nil summary") + } + + if len(s.Containers) != 1 { + t.Fatalf("Containers length: got %d, want 1", len(s.Containers)) + } + + runner := s.Containers[0] + // CPU should only include samples 2,3,4 (values 2,3,4) — NOT the invalid zero + // Peak=4, Avg=3, P50=3 + if runner.CPUCores.Peak != 4 { + t.Errorf("CPUCores.Peak: got %f, want 4", runner.CPUCores.Peak) + } + if runner.CPUCores.Avg != 3 { + t.Errorf("CPUCores.Avg: got %f, want 3", runner.CPUCores.Avg) + } + if runner.CPUCores.P50 != 3 { + t.Errorf("CPUCores.P50: got %f, want 3", runner.CPUCores.P50) + } + + // Memory should include all 4 samples (memory is always valid) + // Values: 1000, 2000, 3000, 4000 + if runner.MemoryBytes.Peak != 4000 { + t.Errorf("MemoryBytes.Peak: got %f, want 4000", runner.MemoryBytes.Peak) + } + if runner.MemoryBytes.Avg != 2500 { + t.Errorf("MemoryBytes.Avg: got %f, want 2500", runner.MemoryBytes.Avg) + } +}