forgejo-runner-resource-col.../internal/proc/process.go
Manuel Ganter 5e470c33a5
All checks were successful
ci / build (push) Successful in 30s
feat(collector): group CPU and memory metrics by cgroup
Add cgroup-based process grouping to the resource collector. Processes are
grouped by their cgroup path, with container names resolved via configurable
process-to-container mapping.

New features:
- Read cgroup info from /proc/[pid]/cgroup (supports v1 and v2)
- Parse K8s resource notation (500m, 1Gi, etc.) for CPU/memory limits
- Group metrics by container using CGROUP_PROCESS_MAP env var
- Calculate usage percentages against limits from CGROUP_LIMITS env var
- Output cgroup metrics with CPU cores used, memory RSS, and percentages

Environment variables:
- CGROUP_PROCESS_MAP: Map process names to container names for discovery
- CGROUP_LIMITS: Define CPU/memory limits per container in K8s notation

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-06 14:50:36 +01:00

179 lines
4.4 KiB
Go

package proc
import (
"fmt"
"os"
"strconv"
"strings"
)
// ClockTicks returns the system clock ticks per second (SC_CLK_TCK)
// On most Linux systems this is 100, but we read it from the system
const DefaultClockTicks = 100
// SystemInfo holds system-wide information from /proc
type SystemInfo struct {
MemTotal uint64 // Total physical memory in bytes
MemFree uint64 // Free memory in bytes
MemAvailable uint64 // Available memory in bytes
Buffers uint64 // Buffer memory in bytes
Cached uint64 // Cached memory in bytes
CPUCount int // Number of CPUs
}
// DiscoverPIDs scans /proc and returns a list of all process PIDs
func DiscoverPIDs(procPath string) ([]int, error) {
entries, err := os.ReadDir(procPath)
if err != nil {
return nil, fmt.Errorf("reading proc directory: %w", err)
}
var pids []int
for _, entry := range entries {
if !entry.IsDir() {
continue
}
pid, err := strconv.Atoi(entry.Name())
if err != nil {
// Not a PID directory
continue
}
pids = append(pids, pid)
}
return pids, nil
}
// ReadSystemInfo reads system-wide memory information from /proc/meminfo
func ReadSystemInfo(procPath string) (*SystemInfo, error) {
info := &SystemInfo{}
// Read /proc/meminfo
data, err := os.ReadFile(fmt.Sprintf("%s/meminfo", procPath))
if err != nil {
return nil, fmt.Errorf("reading meminfo: %w", err)
}
lines := strings.Split(string(data), "\n")
for _, line := range lines {
parts := strings.SplitN(line, ":", 2)
if len(parts) != 2 {
continue
}
key := strings.TrimSpace(parts[0])
value := parseMemoryValue(strings.TrimSpace(parts[1]))
switch key {
case "MemTotal":
info.MemTotal = value
case "MemFree":
info.MemFree = value
case "MemAvailable":
info.MemAvailable = value
case "Buffers":
info.Buffers = value
case "Cached":
info.Cached = value
}
}
// Count CPUs from /proc/cpuinfo
cpuData, err := os.ReadFile(fmt.Sprintf("%s/cpuinfo", procPath))
if err == nil {
for _, line := range strings.Split(string(cpuData), "\n") {
if strings.HasPrefix(line, "processor") {
info.CPUCount++
}
}
}
if info.CPUCount == 0 {
info.CPUCount = 1 // Default to 1 CPU
}
return info, nil
}
// ReadSystemCPU reads the total CPU time from /proc/stat
// Returns: user, nice, system, idle, iowait, irq, softirq times (in clock ticks)
func ReadSystemCPU(procPath string) (user, nice, system, idle, iowait, irq, softirq uint64, err error) {
data, err := os.ReadFile(fmt.Sprintf("%s/stat", procPath))
if err != nil {
return 0, 0, 0, 0, 0, 0, 0, fmt.Errorf("reading /proc/stat: %w", err)
}
lines := strings.Split(string(data), "\n")
for _, line := range lines {
if strings.HasPrefix(line, "cpu ") {
fields := strings.Fields(line)
if len(fields) < 8 {
return 0, 0, 0, 0, 0, 0, 0, fmt.Errorf("invalid cpu line format")
}
user, _ = strconv.ParseUint(fields[1], 10, 64)
nice, _ = strconv.ParseUint(fields[2], 10, 64)
system, _ = strconv.ParseUint(fields[3], 10, 64)
idle, _ = strconv.ParseUint(fields[4], 10, 64)
iowait, _ = strconv.ParseUint(fields[5], 10, 64)
irq, _ = strconv.ParseUint(fields[6], 10, 64)
softirq, _ = strconv.ParseUint(fields[7], 10, 64)
return user, nice, system, idle, iowait, irq, softirq, nil
}
}
return 0, 0, 0, 0, 0, 0, 0, fmt.Errorf("cpu line not found in /proc/stat")
}
// ProcessInfo combines stat, status, and cgroup information for a process
type ProcessInfo struct {
Stat *ProcStat
Status *ProcStatus
Cgroup *CgroupInfo
}
// ReadProcess reads stat, status, and cgroup for a single process
func ReadProcess(procPath string, pid int) (*ProcessInfo, error) {
stat, err := ReadStat(procPath, pid)
if err != nil {
return nil, err
}
status, err := ReadStatus(procPath, pid)
if err != nil {
return nil, err
}
// Read cgroup info (non-fatal if it fails)
cgroup, _ := ReadCgroup(procPath, pid)
return &ProcessInfo{
Stat: stat,
Status: status,
Cgroup: cgroup,
}, nil
}
// ReadAllProcesses reads information for all processes
// It skips processes that disappear during collection (race-safe)
func ReadAllProcesses(procPath string) ([]*ProcessInfo, error) {
pids, err := DiscoverPIDs(procPath)
if err != nil {
return nil, err
}
var processes []*ProcessInfo
for _, pid := range pids {
info, err := ReadProcess(procPath, pid)
if err != nil {
// Process may have exited, skip it
continue
}
processes = append(processes, info)
}
return processes, nil
}