All checks were successful
ci / build (push) Successful in 30s
Add cgroup-based process grouping to the resource collector. Processes are grouped by their cgroup path, with container names resolved via configurable process-to-container mapping. New features: - Read cgroup info from /proc/[pid]/cgroup (supports v1 and v2) - Parse K8s resource notation (500m, 1Gi, etc.) for CPU/memory limits - Group metrics by container using CGROUP_PROCESS_MAP env var - Calculate usage percentages against limits from CGROUP_LIMITS env var - Output cgroup metrics with CPU cores used, memory RSS, and percentages Environment variables: - CGROUP_PROCESS_MAP: Map process names to container names for discovery - CGROUP_LIMITS: Define CPU/memory limits per container in K8s notation Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
179 lines
4.4 KiB
Go
179 lines
4.4 KiB
Go
package proc
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
// ClockTicks returns the system clock ticks per second (SC_CLK_TCK)
|
|
// On most Linux systems this is 100, but we read it from the system
|
|
const DefaultClockTicks = 100
|
|
|
|
// SystemInfo holds system-wide information from /proc
|
|
type SystemInfo struct {
|
|
MemTotal uint64 // Total physical memory in bytes
|
|
MemFree uint64 // Free memory in bytes
|
|
MemAvailable uint64 // Available memory in bytes
|
|
Buffers uint64 // Buffer memory in bytes
|
|
Cached uint64 // Cached memory in bytes
|
|
CPUCount int // Number of CPUs
|
|
}
|
|
|
|
// DiscoverPIDs scans /proc and returns a list of all process PIDs
|
|
func DiscoverPIDs(procPath string) ([]int, error) {
|
|
entries, err := os.ReadDir(procPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("reading proc directory: %w", err)
|
|
}
|
|
|
|
var pids []int
|
|
for _, entry := range entries {
|
|
if !entry.IsDir() {
|
|
continue
|
|
}
|
|
|
|
pid, err := strconv.Atoi(entry.Name())
|
|
if err != nil {
|
|
// Not a PID directory
|
|
continue
|
|
}
|
|
|
|
pids = append(pids, pid)
|
|
}
|
|
|
|
return pids, nil
|
|
}
|
|
|
|
// ReadSystemInfo reads system-wide memory information from /proc/meminfo
|
|
func ReadSystemInfo(procPath string) (*SystemInfo, error) {
|
|
info := &SystemInfo{}
|
|
|
|
// Read /proc/meminfo
|
|
data, err := os.ReadFile(fmt.Sprintf("%s/meminfo", procPath))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("reading meminfo: %w", err)
|
|
}
|
|
|
|
lines := strings.Split(string(data), "\n")
|
|
for _, line := range lines {
|
|
parts := strings.SplitN(line, ":", 2)
|
|
if len(parts) != 2 {
|
|
continue
|
|
}
|
|
|
|
key := strings.TrimSpace(parts[0])
|
|
value := parseMemoryValue(strings.TrimSpace(parts[1]))
|
|
|
|
switch key {
|
|
case "MemTotal":
|
|
info.MemTotal = value
|
|
case "MemFree":
|
|
info.MemFree = value
|
|
case "MemAvailable":
|
|
info.MemAvailable = value
|
|
case "Buffers":
|
|
info.Buffers = value
|
|
case "Cached":
|
|
info.Cached = value
|
|
}
|
|
}
|
|
|
|
// Count CPUs from /proc/cpuinfo
|
|
cpuData, err := os.ReadFile(fmt.Sprintf("%s/cpuinfo", procPath))
|
|
if err == nil {
|
|
for _, line := range strings.Split(string(cpuData), "\n") {
|
|
if strings.HasPrefix(line, "processor") {
|
|
info.CPUCount++
|
|
}
|
|
}
|
|
}
|
|
|
|
if info.CPUCount == 0 {
|
|
info.CPUCount = 1 // Default to 1 CPU
|
|
}
|
|
|
|
return info, nil
|
|
}
|
|
|
|
// ReadSystemCPU reads the total CPU time from /proc/stat
|
|
// Returns: user, nice, system, idle, iowait, irq, softirq times (in clock ticks)
|
|
func ReadSystemCPU(procPath string) (user, nice, system, idle, iowait, irq, softirq uint64, err error) {
|
|
data, err := os.ReadFile(fmt.Sprintf("%s/stat", procPath))
|
|
if err != nil {
|
|
return 0, 0, 0, 0, 0, 0, 0, fmt.Errorf("reading /proc/stat: %w", err)
|
|
}
|
|
|
|
lines := strings.Split(string(data), "\n")
|
|
for _, line := range lines {
|
|
if strings.HasPrefix(line, "cpu ") {
|
|
fields := strings.Fields(line)
|
|
if len(fields) < 8 {
|
|
return 0, 0, 0, 0, 0, 0, 0, fmt.Errorf("invalid cpu line format")
|
|
}
|
|
|
|
user, _ = strconv.ParseUint(fields[1], 10, 64)
|
|
nice, _ = strconv.ParseUint(fields[2], 10, 64)
|
|
system, _ = strconv.ParseUint(fields[3], 10, 64)
|
|
idle, _ = strconv.ParseUint(fields[4], 10, 64)
|
|
iowait, _ = strconv.ParseUint(fields[5], 10, 64)
|
|
irq, _ = strconv.ParseUint(fields[6], 10, 64)
|
|
softirq, _ = strconv.ParseUint(fields[7], 10, 64)
|
|
|
|
return user, nice, system, idle, iowait, irq, softirq, nil
|
|
}
|
|
}
|
|
|
|
return 0, 0, 0, 0, 0, 0, 0, fmt.Errorf("cpu line not found in /proc/stat")
|
|
}
|
|
|
|
// ProcessInfo combines stat, status, and cgroup information for a process
|
|
type ProcessInfo struct {
|
|
Stat *ProcStat
|
|
Status *ProcStatus
|
|
Cgroup *CgroupInfo
|
|
}
|
|
|
|
// ReadProcess reads stat, status, and cgroup for a single process
|
|
func ReadProcess(procPath string, pid int) (*ProcessInfo, error) {
|
|
stat, err := ReadStat(procPath, pid)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
status, err := ReadStatus(procPath, pid)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Read cgroup info (non-fatal if it fails)
|
|
cgroup, _ := ReadCgroup(procPath, pid)
|
|
|
|
return &ProcessInfo{
|
|
Stat: stat,
|
|
Status: status,
|
|
Cgroup: cgroup,
|
|
}, nil
|
|
}
|
|
|
|
// ReadAllProcesses reads information for all processes
|
|
// It skips processes that disappear during collection (race-safe)
|
|
func ReadAllProcesses(procPath string) ([]*ProcessInfo, error) {
|
|
pids, err := DiscoverPIDs(procPath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var processes []*ProcessInfo
|
|
for _, pid := range pids {
|
|
info, err := ReadProcess(procPath, pid)
|
|
if err != nil {
|
|
// Process may have exited, skip it
|
|
continue
|
|
}
|
|
processes = append(processes, info)
|
|
}
|
|
|
|
return processes, nil
|
|
}
|