Ignore data with no delta (first datapoint or underflow)
All checks were successful
ci / build (push) Successful in 28s

This commit is contained in:
Martin McCaffery 2026-02-09 17:37:53 +01:00
parent 2a4c64bfb0
commit 90c89583a0
Signed by: martin.mccaffery
GPG key ID: 7C4D0F375BCEE533
4 changed files with 79 additions and 8 deletions

View file

@ -347,6 +347,7 @@ func (a *Aggregator) calculateCgroupMetrics(
// Calculate CPU cores used from delta
usedCores := 0.0
hasDelta := false
if prev, ok := a.prevCgroupCPU[containerName]; ok && elapsed > 0 {
// Guard against underflow: if processes exited and new ones started,
// totalTicks could be less than prev. In that case, skip this sample.
@ -354,8 +355,8 @@ func (a *Aggregator) calculateCgroupMetrics(
deltaTicks := totalTicks - prev
// Convert ticks to cores: deltaTicks / (elapsed_seconds * CLK_TCK)
usedCores = float64(deltaTicks) / (elapsed * float64(proc.DefaultClockTicks))
hasDelta = true
}
// If totalTicks < prev, usedCores stays 0 for this sample
}
a.prevCgroupCPU[containerName] = totalTicks
@ -384,6 +385,7 @@ func (a *Aggregator) calculateCgroupMetrics(
UsedCores: usedCores,
UsedPercent: cpuPercent,
LimitCores: limitCores,
HasDelta: hasDelta,
},
Memory: CgroupMemoryMetrics{
TotalRSSBytes: totalRSS,

View file

@ -51,6 +51,7 @@ type CgroupCPUMetrics struct {
UsedCores float64 `json:"used_cores"`
UsedPercent float64 `json:"used_percent,omitempty"`
LimitCores float64 `json:"limit_cores,omitempty"`
HasDelta bool `json:"-"` // true when a valid delta could be computed
}
// CgroupMemoryMetrics holds memory metrics for a single cgroup/container

View file

@ -64,7 +64,10 @@ func (a *Accumulator) Add(m *metrics.SystemMetrics) {
ca = &containerAccumulator{}
a.containers[name] = ca
}
ca.cpuCoresValues = append(ca.cpuCoresValues, cgroup.CPU.UsedCores)
// Only record CPU when a valid delta was computed (skip first sample and underflow)
if cgroup.CPU.HasDelta {
ca.cpuCoresValues = append(ca.cpuCoresValues, cgroup.CPU.UsedCores)
}
ca.memoryBytesValues = append(ca.memoryBytesValues, float64(cgroup.Memory.TotalRSSBytes))
}
}

View file

@ -379,7 +379,7 @@ func TestAccumulator_AllPercentiles(t *testing.T) {
func TestAccumulator_ContainerMetrics(t *testing.T) {
acc := NewAccumulator(5)
// Add samples with container metrics
// Add samples with container metrics (HasDelta=true to indicate valid CPU measurements)
for i := 1; i <= 5; i++ {
acc.Add(&metrics.SystemMetrics{
Timestamp: time.Date(2025, 1, 1, 0, 0, i, 0, time.UTC),
@ -388,14 +388,14 @@ func TestAccumulator_ContainerMetrics(t *testing.T) {
Cgroups: map[string]*metrics.CgroupMetrics{
"container-a": {
Name: "container-a",
CPU: metrics.CgroupCPUMetrics{UsedCores: float64(i)},
CPU: metrics.CgroupCPUMetrics{UsedCores: float64(i), HasDelta: true},
Memory: metrics.CgroupMemoryMetrics{
TotalRSSBytes: uint64(i * 1000),
},
},
"container-b": {
Name: "container-b",
CPU: metrics.CgroupCPUMetrics{UsedCores: float64(i * 2)},
CPU: metrics.CgroupCPUMetrics{UsedCores: float64(i * 2), HasDelta: true},
Memory: metrics.CgroupMemoryMetrics{
TotalRSSBytes: uint64(i * 2000),
},
@ -478,7 +478,7 @@ func TestAccumulator_ContainerMetrics_PartialSamples(t *testing.T) {
Cgroups: map[string]*metrics.CgroupMetrics{
"container-a": {
Name: "container-a",
CPU: metrics.CgroupCPUMetrics{UsedCores: 1},
CPU: metrics.CgroupCPUMetrics{UsedCores: 1, HasDelta: true},
Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: 1000},
},
},
@ -492,12 +492,12 @@ func TestAccumulator_ContainerMetrics_PartialSamples(t *testing.T) {
Cgroups: map[string]*metrics.CgroupMetrics{
"container-a": {
Name: "container-a",
CPU: metrics.CgroupCPUMetrics{UsedCores: 2},
CPU: metrics.CgroupCPUMetrics{UsedCores: 2, HasDelta: true},
Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: 2000},
},
"container-b": {
Name: "container-b",
CPU: metrics.CgroupCPUMetrics{UsedCores: 5},
CPU: metrics.CgroupCPUMetrics{UsedCores: 5, HasDelta: true},
Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: 5000},
},
},
@ -531,3 +531,68 @@ func TestAccumulator_ContainerMetrics_PartialSamples(t *testing.T) {
t.Errorf("container-b CPUCores.Avg: got %f, want 5", containerB.CPUCores.Avg)
}
}
func TestAccumulator_ContainerMetrics_InvalidDeltaExcluded(t *testing.T) {
acc := NewAccumulator(5)
// Sample 1: no valid CPU delta (first sample / underflow) — should be excluded from CPU stats
acc.Add(&metrics.SystemMetrics{
Timestamp: time.Date(2025, 1, 1, 0, 0, 1, 0, time.UTC),
CPU: metrics.CPUMetrics{},
Memory: metrics.MemoryMetrics{},
Cgroups: map[string]*metrics.CgroupMetrics{
"runner": {
Name: "runner",
CPU: metrics.CgroupCPUMetrics{UsedCores: 0, HasDelta: false},
Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: 1000},
},
},
})
// Samples 2-4: valid deltas
for i := 2; i <= 4; i++ {
acc.Add(&metrics.SystemMetrics{
Timestamp: time.Date(2025, 1, 1, 0, 0, i, 0, time.UTC),
CPU: metrics.CPUMetrics{},
Memory: metrics.MemoryMetrics{},
Cgroups: map[string]*metrics.CgroupMetrics{
"runner": {
Name: "runner",
CPU: metrics.CgroupCPUMetrics{UsedCores: float64(i), HasDelta: true},
Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: uint64(i * 1000)},
},
},
})
}
s := acc.Summarize()
if s == nil {
t.Fatal("expected non-nil summary")
}
if len(s.Containers) != 1 {
t.Fatalf("Containers length: got %d, want 1", len(s.Containers))
}
runner := s.Containers[0]
// CPU should only include samples 2,3,4 (values 2,3,4) — NOT the invalid zero
// Peak=4, Avg=3, P50=3
if runner.CPUCores.Peak != 4 {
t.Errorf("CPUCores.Peak: got %f, want 4", runner.CPUCores.Peak)
}
if runner.CPUCores.Avg != 3 {
t.Errorf("CPUCores.Avg: got %f, want 3", runner.CPUCores.Avg)
}
if runner.CPUCores.P50 != 3 {
t.Errorf("CPUCores.P50: got %f, want 3", runner.CPUCores.P50)
}
// Memory should include all 4 samples (memory is always valid)
// Values: 1000, 2000, 3000, 4000
if runner.MemoryBytes.Peak != 4000 {
t.Errorf("MemoryBytes.Peak: got %f, want 4000", runner.MemoryBytes.Peak)
}
if runner.MemoryBytes.Avg != 2500 {
t.Errorf("MemoryBytes.Avg: got %f, want 2500", runner.MemoryBytes.Avg)
}
}