Compare commits
32 commits
| Author | SHA1 | Date | |
|---|---|---|---|
| bc9d0dd8ea | |||
| 479c13f596 | |||
| e38c99acd6 | |||
| 937e5b814b | |||
| d0aea88a5b | |||
| 862fc07328 | |||
| 7e3a4efb2d | |||
| a96a1079eb | |||
| 8101e9b20e | |||
| e1a4e9c579 | |||
| d713c25fa5 | |||
| d0dd209bc9 | |||
| 042ce77ddc | |||
| 90c89583a0 | |||
| 2a4c64bfb0 | |||
| addab99e5d | |||
| fd02242d5e | |||
| 52f1b8b64d | |||
| d624d46822 | |||
| eb01c1c842 | |||
| 0af8c28bc2 | |||
| 5b983692c8 | |||
| 6770cfcea7 | |||
| 5e470c33a5 | |||
| 0bf7dfee38 | |||
| 7da7dc138f | |||
| d99cd1dd56 | |||
| cfe583fbc4 | |||
| c309bd810d | |||
| c5c872a373 | |||
| 7201a527d8 | |||
| 54269e8a0e |
56 changed files with 7898 additions and 80 deletions
18
.github/workflows/ci.yaml
vendored
18
.github/workflows/ci.yaml
vendored
|
|
@ -7,16 +7,16 @@ on:
|
|||
pull_request:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
ci:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
uses: actions/setup-go@v6
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
|
||||
|
|
@ -27,13 +27,23 @@ jobs:
|
|||
run: make test
|
||||
|
||||
- name: Install GoReleaser
|
||||
uses: https://github.com/goreleaser/goreleaser-action@v5
|
||||
uses: https://github.com/goreleaser/goreleaser-action@v6
|
||||
with:
|
||||
install-only: true
|
||||
|
||||
- name: Sanitize Docker credentials
|
||||
run: |
|
||||
REGISTRY="${{ forgejo.server_url }}"
|
||||
echo "registry=${REGISTRY#https://}" >> "$GITHUB_OUTPUT"
|
||||
ORG="${{ github.repository_owner }}"
|
||||
echo "org=$(echo "$ORG" | tr '[:upper:]' '[:lower:]')" >> "$GITHUB_OUTPUT"
|
||||
id: sanitize_credentials
|
||||
|
||||
- name: GoReleaser Check
|
||||
run: |
|
||||
unset GITHUB_TOKEN
|
||||
goreleaser release --snapshot --skip=publish --clean
|
||||
env:
|
||||
GORELEASER_CURRENT_TAG: v0.0.0
|
||||
DOCKER_REGISTRY: ${{ steps.sanitize_credentials.outputs.registry }}
|
||||
DOCKER_ORG: ${{ steps.sanitize_credentials.outputs.org }}
|
||||
|
|
|
|||
18
.github/workflows/release.yaml
vendored
18
.github/workflows/release.yaml
vendored
|
|
@ -10,11 +10,11 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
uses: actions/setup-go@v6
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
- name: Test code
|
||||
|
|
@ -25,11 +25,23 @@ jobs:
|
|||
with:
|
||||
gpg_private_key: ${{ secrets.GPG_PRIVATE_KEY }}
|
||||
passphrase: ${{ secrets.GPG_PASSPHRASE }}
|
||||
- name: Set up QEMU
|
||||
uses: https://github.com/docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: https://github.com/docker/setup-buildx-action@v3
|
||||
|
||||
- name: Install GoReleaser
|
||||
uses: https://github.com/goreleaser/goreleaser-action@v5
|
||||
uses: https://github.com/goreleaser/goreleaser-action@v6
|
||||
with:
|
||||
install-only: true
|
||||
|
||||
- name: Docker login
|
||||
run: |
|
||||
REGISTRY="${GITHUB_SERVER_URL#https://}"
|
||||
echo "${{ secrets.PACKAGES_TOKEN }}" | docker login "$REGISTRY" -u "${{ github.repository_owner }}" --password-stdin
|
||||
echo "DOCKER_REGISTRY=$REGISTRY" >> "$GITHUB_ENV"
|
||||
echo "DOCKER_ORG=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Run GoReleaser
|
||||
run: |
|
||||
unset GITHUB_TOKEN
|
||||
|
|
|
|||
5
.gitignore
vendored
5
.gitignore
vendored
|
|
@ -1,6 +1,7 @@
|
|||
# Binaries (root level only)
|
||||
/resource-collector
|
||||
/collector
|
||||
/receiver
|
||||
|
||||
# Test coverage
|
||||
coverage.out
|
||||
|
|
@ -12,5 +13,9 @@ coverage.html
|
|||
*.swp
|
||||
*.swo
|
||||
|
||||
dist/
|
||||
|
||||
.claude/
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
|
|
|
|||
|
|
@ -1,20 +1,35 @@
|
|||
version: 1
|
||||
version: 2
|
||||
|
||||
project_name: resource-collector
|
||||
project_name: sizer
|
||||
|
||||
gitea_urls:
|
||||
api: "{{ .Env.GITHUB_SERVER_URL }}/api/v1"
|
||||
download: "{{ .Env.GITHUB_SERVER_URL }}"
|
||||
|
||||
before:
|
||||
hooks:
|
||||
- go mod tidy
|
||||
|
||||
builds:
|
||||
- id: resource-collector
|
||||
- id: collector
|
||||
main: ./cmd/collector
|
||||
binary: resource-collector
|
||||
binary: collector
|
||||
env:
|
||||
- CGO_ENABLED=0
|
||||
goos:
|
||||
- linux
|
||||
goarch:
|
||||
- amd64
|
||||
- arm64
|
||||
ldflags:
|
||||
- -s -w
|
||||
- id: receiver
|
||||
main: ./cmd/receiver
|
||||
binary: receiver
|
||||
env:
|
||||
- CGO_ENABLED=0
|
||||
goos:
|
||||
- linux
|
||||
- darwin
|
||||
goarch:
|
||||
- amd64
|
||||
- arm64
|
||||
|
|
@ -23,14 +38,39 @@ builds:
|
|||
|
||||
archives:
|
||||
- id: default
|
||||
format: tar.gz
|
||||
formats:
|
||||
- tar.gz
|
||||
name_template: "{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
|
||||
|
||||
checksum:
|
||||
name_template: checksums.txt
|
||||
|
||||
snapshot:
|
||||
name_template: "{{ incpatch .Version }}-next"
|
||||
version_template: "{{ incpatch .Version }}-next"
|
||||
|
||||
dockers_v2:
|
||||
- id: collector
|
||||
ids:
|
||||
- collector
|
||||
images:
|
||||
- "{{ .Env.DOCKER_REGISTRY }}/{{ .Env.DOCKER_ORG }}/forgejo-runner-sizer-collector"
|
||||
tags:
|
||||
- "{{ .Version }}"
|
||||
- latest
|
||||
dockerfile: Dockerfile.goreleaser
|
||||
build_args:
|
||||
BINARY: collector
|
||||
- id: receiver
|
||||
ids:
|
||||
- receiver
|
||||
images:
|
||||
- "{{ .Env.DOCKER_REGISTRY }}/{{ .Env.DOCKER_ORG }}/forgejo-runner-sizer-receiver"
|
||||
tags:
|
||||
- "{{ .Version }}"
|
||||
- latest
|
||||
dockerfile: Dockerfile.goreleaser
|
||||
build_args:
|
||||
BINARY: receiver
|
||||
|
||||
changelog:
|
||||
sort: asc
|
||||
|
|
|
|||
95
CLAUDE.md
Normal file
95
CLAUDE.md
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
## Build and Development Commands
|
||||
|
||||
```bash
|
||||
# Build
|
||||
make build # Build the collector binary
|
||||
go build -o collector ./cmd/collector
|
||||
go build -o receiver ./cmd/receiver
|
||||
|
||||
# Test
|
||||
make test # Run all tests
|
||||
go test -v ./... # Run all tests with verbose output
|
||||
go test -v ./internal/collector/... # Run tests for a specific package
|
||||
make test-coverage # Run tests with coverage report
|
||||
|
||||
# Code Quality
|
||||
make fmt # Format code
|
||||
make vet # Run go vet
|
||||
make lint # Run golangci-lint (v2.6.2)
|
||||
make all # Format, vet, lint, and build
|
||||
|
||||
# Git Hooks
|
||||
make install-hooks # Install pre-commit and commit-msg hooks
|
||||
```
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
A resource sizer for CI/CD environments with shared PID namespaces. It consists of two binaries — a **collector** and a **receiver** (which includes the **sizer**):
|
||||
|
||||
### Collector (`cmd/collector`)
|
||||
Runs alongside CI workloads, periodically reads `/proc` filesystem, and pushes a summary to the receiver on shutdown (SIGINT/SIGTERM).
|
||||
|
||||
**Data Flow:**
|
||||
1. `metrics.Aggregator` reads `/proc` to collect CPU/memory for all processes
|
||||
2. `collector.Collector` orchestrates collection at intervals and writes to output
|
||||
3. `summary.Accumulator` tracks samples across the run, computing peak/avg/percentiles
|
||||
4. On shutdown, `summary.PushClient` sends the summary to the receiver HTTP endpoint
|
||||
|
||||
### Receiver (`cmd/receiver`)
|
||||
HTTP service that stores metric summaries in SQLite (via GORM), provides a query API, and includes the **sizer** — which computes right-sized Kubernetes resource requests and limits from historical data.
|
||||
|
||||
**Key Endpoints:**
|
||||
- `POST /api/v1/metrics` - Receive metrics from collectors
|
||||
- `GET /api/v1/metrics/repo/{org}/{repo}/{workflow}/{job}` - Query stored metrics
|
||||
- `GET /api/v1/sizing/repo/{org}/{repo}/{workflow}/{job}` - Compute container sizes from historical data
|
||||
|
||||
### Internal Packages
|
||||
|
||||
| Package | Purpose |
|
||||
|---------|---------|
|
||||
| `internal/collector` | Orchestrates collection loop, handles shutdown |
|
||||
| `internal/metrics` | Aggregates system/process metrics from /proc |
|
||||
| `internal/proc` | Low-level /proc parsing (stat, status, cgroup) |
|
||||
| `internal/cgroup` | Parses CGROUP_LIMITS and CGROUP_PROCESS_MAP env vars |
|
||||
| `internal/summary` | Accumulates samples, computes stats, pushes to receiver |
|
||||
| `internal/receiver` | HTTP handlers, SQLite store, and sizer logic |
|
||||
| `internal/output` | Metrics output formatting (JSON/text) |
|
||||
|
||||
### Container Metrics
|
||||
|
||||
The collector groups processes by container using cgroup paths. Configuration via environment variables:
|
||||
- `CGROUP_PROCESS_MAP`: JSON mapping process names to container names (e.g., `{"node":"runner"}`)
|
||||
- `CGROUP_LIMITS`: JSON with CPU/memory limits per container for percentage calculations
|
||||
|
||||
CPU values in container metrics are reported as **cores** (not percentage), enabling direct comparison with Kubernetes resource limits.
|
||||
|
||||
## Commit Message Convention
|
||||
|
||||
Uses conventional commits enforced by git hook:
|
||||
```
|
||||
<type>(<scope>)?: <description>
|
||||
```
|
||||
|
||||
Types: `feat`, `fix`, `chore`, `docs`, `style`, `refactor`, `perf`, `test`, `build`, `ci`
|
||||
|
||||
Examples:
|
||||
- `feat: add user authentication`
|
||||
- `fix(collector): handle nil cgroup paths`
|
||||
- `feat!: breaking change in API`
|
||||
|
||||
## Running Locally
|
||||
|
||||
```bash
|
||||
# Run receiver
|
||||
./receiver --addr=:8080 --db=metrics.db
|
||||
|
||||
# Run collector with push endpoint
|
||||
./collector --interval=2s --top=10 --push-endpoint=http://localhost:8080/api/v1/metrics
|
||||
|
||||
# Docker Compose stress test
|
||||
docker compose -f test/docker/docker-compose-stress.yaml up -d
|
||||
```
|
||||
30
Dockerfile
30
Dockerfile
|
|
@ -1,16 +1,34 @@
|
|||
FROM golang:1.25-alpine AS builder
|
||||
FROM golang:1.25-alpine AS builder-base
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY go.mod ./
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /resource-collector ./cmd/collector
|
||||
# Collector build (no CGO needed)
|
||||
FROM builder-base AS builder-collector
|
||||
|
||||
FROM alpine:3.19
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /sizer ./cmd/collector
|
||||
|
||||
COPY --from=builder /resource-collector /usr/local/bin/resource-collector
|
||||
# Receiver build
|
||||
FROM builder-base AS builder-receiver
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/resource-collector"]
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /metrics-receiver ./cmd/receiver
|
||||
|
||||
# Collector image
|
||||
FROM alpine:3.19 AS collector
|
||||
|
||||
COPY --from=builder-collector /sizer /usr/local/bin/sizer
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/sizer"]
|
||||
|
||||
# Receiver image
|
||||
FROM alpine:3.19 AS receiver
|
||||
|
||||
COPY --from=builder-receiver /metrics-receiver /usr/local/bin/metrics-receiver
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/metrics-receiver"]
|
||||
|
|
|
|||
5
Dockerfile.goreleaser
Normal file
5
Dockerfile.goreleaser
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
FROM gcr.io/distroless/static:nonroot
|
||||
ARG TARGETPLATFORM
|
||||
ARG BINARY
|
||||
COPY ${TARGETPLATFORM}/${BINARY} /app
|
||||
ENTRYPOINT ["/app"]
|
||||
36
Makefile
36
Makefile
|
|
@ -1,11 +1,10 @@
|
|||
# ABOUTME: Makefile for forgejo-runner-resource-collector project.
|
||||
# ABOUTME: Makefile for forgejo-runner-sizer project.
|
||||
# ABOUTME: Provides targets for building, formatting, linting, and testing.
|
||||
|
||||
BINARY_NAME := resource-collector
|
||||
CMD_PATH := ./cmd/collector
|
||||
GO := go
|
||||
GOLANGCI_LINT := $(GO) run github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.6.2
|
||||
GITLEAKS := $(GO) run github.com/zricethezav/gitleaks/v8@v8.30.0
|
||||
OAPI_CODEGEN := $(GO) run github.com/oapi-codegen/oapi-codegen/v2/cmd/oapi-codegen@latest
|
||||
|
||||
# Build flags
|
||||
LDFLAGS := -s -w
|
||||
|
|
@ -13,18 +12,23 @@ BUILD_FLAGS := -ldflags "$(LDFLAGS)"
|
|||
|
||||
default: run
|
||||
|
||||
.PHONY: all build clean fmt format lint gitleaks test run help vet tidy install-hooks
|
||||
.PHONY: all build build-collector build-receiver clean fmt format lint gitleaks test run-collector run-receiver help vet tidy install-hooks openapi generate-client
|
||||
|
||||
# Default target
|
||||
all: fmt vet lint build
|
||||
|
||||
## Build targets
|
||||
|
||||
build: ## Build the binary
|
||||
$(GO) build $(BUILD_FLAGS) -o $(BINARY_NAME) $(CMD_PATH)
|
||||
build: build-collector build-receiver ## Build both binaries
|
||||
|
||||
build-collector: ## Build the collector binary
|
||||
$(GO) build $(BUILD_FLAGS) -o collector ./cmd/collector
|
||||
|
||||
build-receiver: ## Build the receiver binary
|
||||
$(GO) build $(BUILD_FLAGS) -o receiver ./cmd/receiver
|
||||
|
||||
clean: ## Remove build artifacts
|
||||
rm -f $(BINARY_NAME) coverage.out coverage.html
|
||||
rm -f collector receiver coverage.out coverage.html
|
||||
$(GO) clean
|
||||
|
||||
## Code quality targets
|
||||
|
|
@ -46,6 +50,16 @@ gitleaks: ## Check for secrets in git history
|
|||
gitleaks-all: ## Check for secrets in git history
|
||||
$(GITLEAKS) git .
|
||||
|
||||
## OpenAPI / Client Generation
|
||||
|
||||
openapi: ## Generate OpenAPI spec from Fuego routes
|
||||
$(GO) run scripts/extract-openapi/main.go
|
||||
|
||||
generate-client: openapi ## Generate Go client from OpenAPI spec
|
||||
rm -rf pkg/client
|
||||
mkdir -p pkg/client
|
||||
$(OAPI_CODEGEN) -generate types,client -package client docs/openapi.json > pkg/client/client.gen.go
|
||||
|
||||
## Dependency management
|
||||
|
||||
tidy: ## Tidy go modules
|
||||
|
|
@ -62,11 +76,11 @@ test-coverage: ## Run tests with coverage
|
|||
|
||||
## Run targets
|
||||
|
||||
run: build ## Build and run with default settings
|
||||
./$(BINARY_NAME)
|
||||
run-collector: build-collector ## Build and run the collector
|
||||
./collector
|
||||
|
||||
run-text: build ## Build and run with text output format
|
||||
./$(BINARY_NAME) --log-format text --interval 2s
|
||||
run-receiver: build-receiver ## Build and run the receiver
|
||||
./receiver --read-token=secure-read-token --hmac-key=secure-hmac-key
|
||||
|
||||
## Git hooks
|
||||
|
||||
|
|
|
|||
246
README.md
Normal file
246
README.md
Normal file
|
|
@ -0,0 +1,246 @@
|
|||
# Forgejo Runner Sizer
|
||||
|
||||
A resource sizer for CI/CD workloads in shared PID namespace environments. The **collector** reads `/proc` to gather CPU and memory metrics grouped by container/cgroup, and pushes run summaries to the **receiver**. The receiver stores metrics and exposes a **sizer** API that computes right-sized Kubernetes resource requests and limits from historical data.
|
||||
|
||||
## Architecture
|
||||
|
||||
The system has two binaries — a **collector** and a **receiver** (which includes the sizer):
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────┐ ┌──────────────────────────┐
|
||||
│ CI/CD Pod (shared PID namespace) │ │ Receiver Service │
|
||||
│ │ │ │
|
||||
│ ┌───────────┐ ┌────────┐ ┌───────────┐ │ │ POST /api/v1/metrics │
|
||||
│ │ collector │ │ runner │ │ sidecar │ │ │ │ │
|
||||
│ │ │ │ │ │ │ │ │ ▼ │
|
||||
│ │ reads │ │ │ │ │ │ push │ ┌────────────┐ │
|
||||
│ │ /proc for │ │ │ │ │ │──────▶│ │ SQLite │ │
|
||||
│ │ all PIDs │ │ │ │ │ │ │ └────────────┘ │
|
||||
│ └───────────┘ └────────┘ └───────────┘ │ │ │ │
|
||||
│ │ │ ▼ │
|
||||
└─────────────────────────────────────────────┘ │ GET /api/v1/metrics/... │
|
||||
│ GET /api/v1/sizing/... │
|
||||
│ (sizer) │
|
||||
└──────────────────────────┘
|
||||
```
|
||||
|
||||
### Collector
|
||||
|
||||
Runs as a sidecar alongside CI workloads. On a configurable interval, it reads `/proc` to collect CPU and memory for all visible processes, groups them by container using cgroup paths, and accumulates samples. On shutdown (SIGINT/SIGTERM), it computes run-level statistics (peak, avg, percentiles) and pushes a single summary to the receiver.
|
||||
|
||||
```bash
|
||||
./collector --interval=2s --top=10 --push-endpoint=http://receiver:8080/api/v1/metrics
|
||||
```
|
||||
|
||||
**Flags:** `--interval`, `--proc-path`, `--log-level`, `--log-format`, `--top`, `--push-endpoint`, `--push-token`
|
||||
|
||||
**Environment variables:**
|
||||
|
||||
| Variable | Description | Example |
|
||||
| ------------------------- | ------------------------------------- | ------------------- |
|
||||
| `GITHUB_REPOSITORY_OWNER` | Organization name | `my-org` |
|
||||
| `GITHUB_REPOSITORY` | Full repository path | `my-org/my-repo` |
|
||||
| `GITHUB_WORKFLOW` | Workflow filename | `ci.yml` |
|
||||
| `GITHUB_JOB` | Job name | `build` |
|
||||
| `GITHUB_RUN_ID` | Unique run identifier | `run-123` |
|
||||
| `COLLECTOR_PUSH_TOKEN` | Bearer token for push endpoint auth | — |
|
||||
| `CGROUP_PROCESS_MAP` | JSON: process name → container name | `{"node":"runner"}` |
|
||||
| `CGROUP_LIMITS` | JSON: per-container CPU/memory limits | See below |
|
||||
|
||||
**CGROUP_LIMITS example:**
|
||||
|
||||
```json
|
||||
{
|
||||
"runner": { "cpu": "2", "memory": "1Gi" },
|
||||
"sidecar": { "cpu": "500m", "memory": "256Mi" }
|
||||
}
|
||||
```
|
||||
|
||||
CPU supports Kubernetes notation (`"2"` = 2 cores, `"500m"` = 0.5 cores). Memory supports `Ki`, `Mi`, `Gi`, `Ti` (binary) or `K`, `M`, `G`, `T` (decimal).
|
||||
|
||||
### Receiver (with sizer)
|
||||
|
||||
HTTP service that stores metric summaries in SQLite (via GORM), exposes a query API, and provides a **sizer** endpoint that computes right-sized Kubernetes resource requests and limits from historical run data.
|
||||
|
||||
```bash
|
||||
./receiver --addr=:8080 --db=metrics.db --read-token=my-secret-token --hmac-key=my-hmac-key
|
||||
```
|
||||
|
||||
**Flags:**
|
||||
|
||||
| Flag | Environment Variable | Description | Default |
|
||||
| -------------- | --------------------- | ----------------------------------------------------- | ------------ |
|
||||
| `--addr` | — | HTTP listen address | `:8080` |
|
||||
| `--db` | — | SQLite database path | `metrics.db` |
|
||||
| `--read-token` | `RECEIVER_READ_TOKEN` | Pre-shared token for read/admin endpoints (required) | — |
|
||||
| `--hmac-key` | `RECEIVER_HMAC_KEY` | Secret key for push token generation/validation (required) | — |
|
||||
|
||||
**Endpoints:**
|
||||
|
||||
- `POST /api/v1/metrics` — receive and store a metric summary (requires scoped push token)
|
||||
- `POST /api/v1/token` — generate a scoped push token (requires read token auth)
|
||||
- `GET /api/v1/metrics/repo/{org}/{repo}/{workflow}/{job}` — query stored metrics (requires read token auth)
|
||||
- `GET /api/v1/sizing/repo/{org}/{repo}/{workflow}/{job}` — compute container sizes from historical data (requires read token auth)
|
||||
|
||||
**Authentication:**
|
||||
|
||||
All metrics endpoints require authentication via `--read-token`:
|
||||
|
||||
- The GET endpoint requires a Bearer token matching the read token
|
||||
- The POST metrics endpoint requires a scoped push token (generated via `POST /api/v1/token`)
|
||||
- The token endpoint itself requires the read token
|
||||
|
||||
**Token flow:**
|
||||
|
||||
```bash
|
||||
# 1. Admin generates a scoped push token using the read token
|
||||
curl -X POST http://localhost:8080/api/v1/token \
|
||||
-H "Authorization: Bearer my-secret-token" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"organization":"my-org","repository":"my-repo","workflow":"ci.yml","job":"build"}'
|
||||
# → {"token":"<hex-encoded HMAC>"}
|
||||
|
||||
# 2. Collector uses the scoped token to push metrics
|
||||
./collector --push-endpoint=http://localhost:8080/api/v1/metrics \
|
||||
--push-token=<token-from-step-1>
|
||||
|
||||
# 3. Query metrics with the read token
|
||||
curl -H "Authorization: Bearer my-secret-token" \ #gitleaks:allow
|
||||
http://localhost:8080/api/v1/metrics/repo/my-org/my-repo/ci.yml/build
|
||||
```
|
||||
|
||||
Push tokens are HMAC-SHA256 digests derived from `--hmac-key` and the scope (org/repo/workflow/job). They are stateless — no database storage is needed. The HMAC key is separate from the read token so that compromising a push token does not expose the admin credential.
|
||||
|
||||
## How Metrics Are Collected
|
||||
|
||||
The collector reads `/proc/[pid]/stat` for every visible process to get CPU ticks (`utime` + `stime`) and `/proc/[pid]/status` for memory (RSS). It takes two samples per interval and computes the delta to derive CPU usage rates.
|
||||
|
||||
Processes are grouped into containers by reading `/proc/[pid]/cgroup` and matching cgroup paths against the `CGROUP_PROCESS_MAP`. This is necessary because in shared PID namespace pods, `/proc/stat` only shows host-level aggregates — per-container metrics must be built up from individual process data.
|
||||
|
||||
Container CPU is reported in **cores** (not percentage) for direct comparison with Kubernetes resource limits. System-level CPU is reported as a percentage (0-100%).
|
||||
|
||||
Over the course of a run, the `summary.Accumulator` tracks every sample and on shutdown computes:
|
||||
|
||||
| Stat | Description |
|
||||
| -------------------------- | ------------------------------ |
|
||||
| `peak` | Maximum observed value |
|
||||
| `p99`, `p95`, `p75`, `p50` | Percentiles across all samples |
|
||||
| `avg` | Arithmetic mean |
|
||||
|
||||
These stats are computed for CPU, memory, and per-container metrics.
|
||||
|
||||
## API Response
|
||||
|
||||
```
|
||||
GET /api/v1/metrics/repo/my-org/my-repo/ci.yml/build
|
||||
```
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"organization": "my-org",
|
||||
"repository": "my-org/my-repo",
|
||||
"workflow": "ci.yml",
|
||||
"job": "build",
|
||||
"run_id": "run-123",
|
||||
"received_at": "2026-02-06T14:30:23.056Z",
|
||||
"payload": {
|
||||
"start_time": "2026-02-06T14:30:02.185Z",
|
||||
"end_time": "2026-02-06T14:30:22.190Z",
|
||||
"duration_seconds": 20.0,
|
||||
"sample_count": 11,
|
||||
"cpu_total_percent": { "peak": ..., "avg": ..., "p50": ... },
|
||||
"mem_used_bytes": { "peak": ..., "avg": ... },
|
||||
"containers": [
|
||||
{
|
||||
"name": "runner",
|
||||
"cpu_cores": { "peak": 2.007, "avg": 1.5, "p50": 1.817, "p95": 2.004 },
|
||||
"memory_bytes": { "peak": 18567168, "avg": 18567168 }
|
||||
}
|
||||
],
|
||||
"top_cpu_processes": [ ... ],
|
||||
"top_mem_processes": [ ... ]
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
**CPU metric distinction:**
|
||||
|
||||
- `cpu_total_percent` — system-wide, 0-100%
|
||||
- `cpu_cores` (containers) — cores used (e.g. `2.0` = two full cores)
|
||||
- `peak_cpu_percent` (processes) — per-process, where 100% = 1 core
|
||||
|
||||
All memory values are in **bytes**.
|
||||
|
||||
## Running
|
||||
|
||||
### Docker Compose
|
||||
|
||||
```bash
|
||||
# Start the receiver (builds image if needed):
|
||||
docker compose -f test/docker/docker-compose-stress.yaml up -d --build receiver
|
||||
|
||||
# Generate a scoped push token for the collector:
|
||||
PUSH_TOKEN=$(curl -s -X POST http://localhost:9080/api/v1/token \
|
||||
-H "Authorization: Bearer dummyreadtoken" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"organization":"test-org","repository":"test-org/stress-test","workflow":"stress-test-workflow","job":"heavy-workload"}' \
|
||||
| jq -r .token)
|
||||
|
||||
# Start the collector and stress workloads with the push token:
|
||||
COLLECTOR_PUSH_TOKEN=$PUSH_TOKEN \
|
||||
docker compose -f test/docker/docker-compose-stress.yaml up -d --build collector
|
||||
|
||||
# ... Wait for data collection ...
|
||||
|
||||
# Trigger shutdown summary:
|
||||
docker compose -f test/docker/docker-compose-stress.yaml stop collector
|
||||
|
||||
# Query results with the read token:
|
||||
curl -H "Authorization: Bearer dummyreadtoken" \
|
||||
http://localhost:9080/api/v1/metrics/repo/test-org/test-org%2Fstress-test/stress-test-workflow/heavy-workload
|
||||
```
|
||||
|
||||
### Local
|
||||
|
||||
```bash
|
||||
go build -o collector ./cmd/collector
|
||||
go build -o receiver ./cmd/receiver
|
||||
|
||||
# Start receiver with both keys:
|
||||
./receiver --addr=:8080 --db=metrics.db \
|
||||
--read-token=my-secret-token --hmac-key=my-hmac-key
|
||||
|
||||
# Generate a scoped push token:
|
||||
PUSH_TOKEN=$(curl -s -X POST http://localhost:8080/api/v1/token \
|
||||
-H "Authorization: Bearer my-secret-token" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"organization":"my-org","repository":"my-repo","workflow":"ci.yml","job":"build"}' \
|
||||
| jq -r .token)
|
||||
|
||||
# Run collector with the push token:
|
||||
./collector --interval=2s --top=10 \
|
||||
--push-endpoint=http://localhost:8080/api/v1/metrics \
|
||||
--push-token=$PUSH_TOKEN
|
||||
```
|
||||
|
||||
## Internal Packages
|
||||
|
||||
| Package | Purpose |
|
||||
| -------------------- | ------------------------------------------------------------------- |
|
||||
| `internal/proc` | Low-level `/proc` parsing (stat, status, cgroup) |
|
||||
| `internal/metrics` | Aggregates process metrics from `/proc` into system/container views |
|
||||
| `internal/cgroup` | Parses `CGROUP_PROCESS_MAP` and `CGROUP_LIMITS` env vars |
|
||||
| `internal/collector` | Orchestrates the collection loop and shutdown |
|
||||
| `internal/summary` | Accumulates samples, computes stats, pushes to receiver |
|
||||
| `internal/receiver` | HTTP handlers, SQLite store, and sizer logic |
|
||||
| `internal/output` | Metrics output formatting (JSON/text) |
|
||||
|
||||
## Background
|
||||
|
||||
Technical reference on the Linux primitives this project builds on:
|
||||
|
||||
- [Identifying process cgroups by PID](docs/background/identify-process-cgroup-by-pid.md) — how to read `/proc/<PID>/cgroup` to determine which container a process belongs to
|
||||
- [/proc/stat behavior in containers](docs/background/proc-stat-in-containers.md) — why `/proc/stat` shows host-level data in containers, and how to aggregate per-process stats from `/proc/[pid]/stat` instead, including CPU tick conversion and cgroup limit handling
|
||||
|
|
@ -10,8 +10,9 @@ import (
|
|||
"syscall"
|
||||
"time"
|
||||
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-resource-collector/internal/collector"
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-resource-collector/internal/output"
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/collector"
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/output"
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/summary"
|
||||
)
|
||||
|
||||
const (
|
||||
|
|
@ -29,6 +30,8 @@ func main() {
|
|||
logLevel := flag.String("log-level", defaultLogLevel, "Log level: debug, info, warn, error")
|
||||
logFormat := flag.String("log-format", defaultLogFormat, "Output format: json, text")
|
||||
topN := flag.Int("top", defaultTopN, "Number of top processes to include")
|
||||
pushEndpoint := flag.String("push-endpoint", "", "HTTP endpoint to push metrics to (e.g., http://localhost:8080/api/v1/metrics)")
|
||||
pushToken := flag.String("push-token", os.Getenv("COLLECTOR_PUSH_TOKEN"), "Bearer token for push endpoint authentication (or set COLLECTOR_PUSH_TOKEN)")
|
||||
flag.Parse()
|
||||
|
||||
// Setup structured logging for application logs
|
||||
|
|
@ -53,6 +56,25 @@ func main() {
|
|||
TopN: *topN,
|
||||
}, metricsWriter, appLogger)
|
||||
|
||||
// Attach summary writer to emit run summary on shutdown
|
||||
summaryWriter := summary.NewSummaryWriter(os.Stdout, *logFormat)
|
||||
c.SetSummaryWriter(summaryWriter)
|
||||
|
||||
// Setup push client if endpoint is configured
|
||||
if *pushEndpoint != "" {
|
||||
pushClient := summary.NewPushClient(*pushEndpoint, *pushToken)
|
||||
c.SetPushClient(pushClient)
|
||||
execCtx := pushClient.ExecutionContext()
|
||||
appLogger.Info("push client configured",
|
||||
slog.String("endpoint", *pushEndpoint),
|
||||
slog.String("organization", execCtx.Organization),
|
||||
slog.String("repository", execCtx.Repository),
|
||||
slog.String("workflow", execCtx.Workflow),
|
||||
slog.String("job", execCtx.Job),
|
||||
slog.String("run_id", execCtx.RunID),
|
||||
)
|
||||
}
|
||||
|
||||
// Setup signal handling for graceful shutdown
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
|
|
|||
83
cmd/receiver/main.go
Normal file
83
cmd/receiver/main.go
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
// ABOUTME: Entry point for the metrics receiver service.
|
||||
// ABOUTME: HTTP service using Fuego framework with automatic OpenAPI 3.0 generation.
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/getkin/kin-openapi/openapi3"
|
||||
"github.com/go-fuego/fuego"
|
||||
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/receiver"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultAddr = ":8080"
|
||||
defaultDBPath = "metrics.db"
|
||||
)
|
||||
|
||||
func main() {
|
||||
addr := flag.String("addr", defaultAddr, "HTTP listen address")
|
||||
dbPath := flag.String("db", defaultDBPath, "SQLite database path")
|
||||
readToken := flag.String("read-token", os.Getenv("RECEIVER_READ_TOKEN"), "Pre-shared token for read endpoints (or set RECEIVER_READ_TOKEN)")
|
||||
hmacKey := flag.String("hmac-key", os.Getenv("RECEIVER_HMAC_KEY"), "Secret key for push token generation/validation (or set RECEIVER_HMAC_KEY)")
|
||||
tokenTTL := flag.Duration("token-ttl", 2*time.Hour, "Time-to-live for push tokens (default 2h)")
|
||||
flag.Parse()
|
||||
|
||||
logger := slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{
|
||||
Level: slog.LevelInfo,
|
||||
}))
|
||||
|
||||
store, err := receiver.NewStore(*dbPath)
|
||||
if err != nil {
|
||||
logger.Error("failed to open database", slog.String("error", err.Error()))
|
||||
os.Exit(1)
|
||||
}
|
||||
defer func() { _ = store.Close() }()
|
||||
|
||||
handler := receiver.NewHandler(store, logger, *readToken, *hmacKey, *tokenTTL)
|
||||
|
||||
// Create Fuego server with OpenAPI configuration
|
||||
s := fuego.NewServer(
|
||||
fuego.WithAddr(*addr),
|
||||
fuego.WithEngineOptions(
|
||||
fuego.WithOpenAPIConfig(fuego.OpenAPIConfig{
|
||||
PrettyFormatJSON: true,
|
||||
JSONFilePath: "docs/openapi.json",
|
||||
SwaggerURL: "/swagger",
|
||||
Info: &openapi3.Info{
|
||||
Title: "Forgejo Runner Resource Collector API",
|
||||
Version: "1.0.0",
|
||||
Description: "HTTP service that receives and stores CI/CD resource metrics from collectors, providing query and sizing recommendation APIs.",
|
||||
Contact: &openapi3.Contact{
|
||||
Name: "API Support",
|
||||
URL: "https://edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer",
|
||||
},
|
||||
License: &openapi3.License{
|
||||
Name: "Apache 2.0",
|
||||
URL: "http://www.apache.org/licenses/LICENSE-2.0.html",
|
||||
},
|
||||
},
|
||||
}),
|
||||
),
|
||||
)
|
||||
|
||||
// Register routes
|
||||
handler.RegisterRoutes(s)
|
||||
|
||||
logger.Info("starting metrics receiver",
|
||||
slog.String("addr", *addr),
|
||||
slog.String("db", *dbPath),
|
||||
slog.String("swagger", fmt.Sprintf("http://localhost%s/swagger", *addr)),
|
||||
)
|
||||
|
||||
// Run server (handles graceful shutdown)
|
||||
if err := s.Run(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
38
docs/background/identify-process-cgroup-by-pid.md
Normal file
38
docs/background/identify-process-cgroup-by-pid.md
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
# Identifying a Process's Cgroup by PID
|
||||
|
||||
Read `/proc/<PID>/cgroup` to find which cgroup (and therefore which container) a process belongs to.
|
||||
|
||||
## /proc/PID/cgroup
|
||||
|
||||
```bash
|
||||
cat /proc/<PID>/cgroup
|
||||
```
|
||||
|
||||
Shows all cgroup controllers the process belongs to:
|
||||
```
|
||||
12:blkio:/user.slice
|
||||
11:memory:/user.slice/user-1000.slice
|
||||
...
|
||||
0::/user.slice/user-1000.slice/session-1.scope
|
||||
```
|
||||
|
||||
On cgroup v2, the path after `::` is the cgroup path under `/sys/fs/cgroup/`.
|
||||
|
||||
## Other Methods
|
||||
|
||||
```bash
|
||||
# ps format options
|
||||
ps -o pid,cgroup -p <PID>
|
||||
|
||||
# systemd systems
|
||||
systemd-cgls --unit <unit-name>
|
||||
systemd-cgls # whole tree
|
||||
```
|
||||
|
||||
## Quick One-Liners
|
||||
|
||||
```bash
|
||||
cat /proc/self/cgroup # current shell
|
||||
cat /proc/$$/cgroup # also current shell
|
||||
cat /proc/1234/cgroup # specific PID
|
||||
```
|
||||
238
docs/background/proc-stat-in-containers.md
Normal file
238
docs/background/proc-stat-in-containers.md
Normal file
|
|
@ -0,0 +1,238 @@
|
|||
# /proc/stat behavior in containerised environments
|
||||
|
||||
`/proc/stat` in containers shows **host-level** statistics, not container-specific data. To get container-aware CPU metrics when processes span multiple cgroups (e.g., sidecars sharing a PID namespace), aggregate `/proc/[pid]/stat` for all visible processes and use cgroup limits from `/sys/fs/cgroup` for normalization.
|
||||
|
||||
## Why /proc/stat is wrong in containers
|
||||
|
||||
`/proc/stat` reports host-wide values (CPU times, context switches, boot time, process count) because `/proc` is mounted from the host kernel, which has no namespace awareness for these metrics.
|
||||
|
||||
This means:
|
||||
- Tools reading `/proc/stat` (top, htop, etc.) show **host** CPU usage, not container usage
|
||||
- Cgroup CPU limits (e.g., 2 CPUs) are not reflected — all host CPUs are visible
|
||||
- In shared environments, containers see each other's aggregate impact
|
||||
|
||||
### Alternatives
|
||||
|
||||
| Approach | Description |
|
||||
|----------|-------------|
|
||||
| **cgroups** | Read `/sys/fs/cgroup/cpu/` for container-specific CPU accounting |
|
||||
| **LXCFS** | FUSE filesystem providing container-aware `/proc` files |
|
||||
| **Container runtimes** | Some (like Kata) use VMs with isolated kernels |
|
||||
| **Metrics APIs** | Docker/Kubernetes APIs instead of `/proc/stat` |
|
||||
|
||||
```bash
|
||||
# cgroups v2:
|
||||
cat /sys/fs/cgroup/cpu.stat
|
||||
|
||||
# cgroups v1:
|
||||
cat /sys/fs/cgroup/cpu/cpuacct.usage
|
||||
```
|
||||
|
||||
## Aggregating per-Process CPU from /proc/[pid]/stat
|
||||
|
||||
When cgroup-level reads aren't an option (sidecars sharing PID namespace with different cgroups), aggregate individual process stats:
|
||||
|
||||
```bash
|
||||
# Fields 14 (utime) and 15 (stime) in /proc/[pid]/stat
|
||||
for pid in /proc/[0-9]*; do
|
||||
awk '{print $14 + $15}' "$pid/stat" 2>/dev/null
|
||||
done | awk '{sum += $1} END {print sum}'
|
||||
```
|
||||
|
||||
### Caveats
|
||||
|
||||
1. **Race conditions** — processes can spawn/die between reads
|
||||
2. **Short-lived processes** — missed if they start and exit between samples
|
||||
3. **Zombie/exited processes** — their CPU time may not be captured
|
||||
4. **Overhead** — scanning all PIDs repeatedly is expensive
|
||||
5. **Namespace visibility** — you only see processes in your PID namespace (which is what you want)
|
||||
6. **Children accounting** — when a process exits, its CPU time is added to the parent's `cutime`/`cstime`, risking double-counting
|
||||
|
||||
Cgroups handle these edge cases natively, but **cannot be used when sidecars share the PID namespace with different cgroups** — in that case, per-process aggregation is the best option.
|
||||
|
||||
## Parent/Child Process Relationships
|
||||
|
||||
Field 4 in `/proc/[pid]/stat` is the PPID (parent process ID):
|
||||
|
||||
```bash
|
||||
awk '{print $4}' /proc/1234/stat # PPID from stat
|
||||
grep PPid /proc/1234/status # more readable
|
||||
```
|
||||
|
||||
### Building a Process Tree
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
declare -A parent_of children_of
|
||||
|
||||
for stat in /proc/[0-9]*/stat; do
|
||||
if read -r line < "$stat" 2>/dev/null; then
|
||||
pid="${stat#/proc/}"; pid="${pid%/stat}"
|
||||
rest="${line##*) }"; read -ra fields <<< "$rest"
|
||||
ppid="${fields[1]}" # 4th field overall = index 1 after state
|
||||
parent_of[$pid]=$ppid
|
||||
children_of[$ppid]+="$pid "
|
||||
fi
|
||||
done
|
||||
|
||||
print_tree() {
|
||||
local pid=$1 indent=$2
|
||||
echo "${indent}${pid}"
|
||||
for child in ${children_of[$pid]}; do print_tree "$child" " $indent"; done
|
||||
}
|
||||
print_tree 1 ""
|
||||
```
|
||||
|
||||
### Avoiding Double-Counting with cutime/cstime
|
||||
|
||||
Only sum `utime` + `stime` per process. The `cutime`/`cstime` fields are cumulative from children that have already exited and been `wait()`ed on — those children no longer exist in `/proc`, so their time is only accessible via the parent.
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
declare -A utime stime
|
||||
|
||||
for stat in /proc/[0-9]*/stat; do
|
||||
if read -r line < "$stat" 2>/dev/null; then
|
||||
pid="${stat#/proc/}"; pid="${pid%/stat}"
|
||||
rest="${line##*) }"; read -ra f <<< "$rest"
|
||||
utime[$pid]="${f[11]}"; stime[$pid]="${f[12]}"
|
||||
# cutime=${f[13]} cstime=${f[14]} — don't sum these
|
||||
fi
|
||||
done
|
||||
|
||||
total=0
|
||||
for pid in "${!utime[@]}"; do ((total += utime[$pid] + stime[$pid])); done
|
||||
echo "Total CPU ticks: $total"
|
||||
echo "Seconds: $(echo "scale=2; $total / $(getconf CLK_TCK)" | bc)"
|
||||
```
|
||||
|
||||
## Converting Ticks to CPU Percentages
|
||||
|
||||
CPU percentage is a rate — you need **two samples** over a time interval.
|
||||
|
||||
```
|
||||
CPU % = (delta_ticks / (elapsed_seconds * CLK_TCK * num_cpus)) * 100
|
||||
```
|
||||
|
||||
- `delta_ticks` = difference in (utime + stime) between samples
|
||||
- `CLK_TCK` = ticks per second (usually 100, get via `getconf CLK_TCK`)
|
||||
- `num_cpus` = number of CPUs (omit for per-core percentage)
|
||||
|
||||
| Style | Formula | Example |
|
||||
|-------|---------|---------|
|
||||
| **Normalized** (0-100%) | `delta / (elapsed * CLK_TCK * num_cpus) * 100` | 50% = half of total capacity |
|
||||
| **Cores-style** (0-N*100%) | `delta / (elapsed * CLK_TCK) * 100` | 200% = 2 full cores busy |
|
||||
|
||||
### Sampling Script
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
CLK_TCK=$(getconf CLK_TCK)
|
||||
NUM_CPUS=$(nproc)
|
||||
|
||||
get_total_ticks() {
|
||||
local total=0
|
||||
for stat in /proc/[0-9]*/stat; do
|
||||
if read -r line < "$stat" 2>/dev/null; then
|
||||
rest="${line##*) }"; read -ra f <<< "$rest"
|
||||
((total += f[11] + f[12]))
|
||||
fi
|
||||
done
|
||||
echo "$total"
|
||||
}
|
||||
|
||||
ticks1=$(get_total_ticks); time1=$(date +%s.%N)
|
||||
sleep 1
|
||||
ticks2=$(get_total_ticks); time2=$(date +%s.%N)
|
||||
|
||||
delta_ticks=$((ticks2 - ticks1))
|
||||
elapsed=$(echo "$time2 - $time1" | bc)
|
||||
|
||||
pct=$(echo "scale=2; ($delta_ticks / ($elapsed * $CLK_TCK * $NUM_CPUS)) * 100" | bc)
|
||||
echo "CPU usage: ${pct}% of ${NUM_CPUS} CPUs"
|
||||
|
||||
cores_pct=$(echo "scale=2; ($delta_ticks / ($elapsed * $CLK_TCK)) * 100" | bc)
|
||||
echo "CPU usage: ${cores_pct}% (cores-style)"
|
||||
```
|
||||
|
||||
## Respecting Cgroup CPU Limits
|
||||
|
||||
The above calculations use `nproc`, which returns the **host** CPU count. If a container is limited to 2 CPUs on an 8-CPU host, `nproc` returns 8 and the percentage is misleading.
|
||||
|
||||
### Reading Effective CPU Limit
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
get_effective_cpus() {
|
||||
# cgroups v2
|
||||
if [[ -f /sys/fs/cgroup/cpu.max ]]; then
|
||||
read quota period < /sys/fs/cgroup/cpu.max
|
||||
[[ "$quota" != "max" ]] && echo "scale=2; $quota / $period" | bc && return
|
||||
fi
|
||||
# cgroups v1
|
||||
if [[ -f /sys/fs/cgroup/cpu/cpu.cfs_quota_us ]]; then
|
||||
quota=$(cat /sys/fs/cgroup/cpu/cpu.cfs_quota_us)
|
||||
period=$(cat /sys/fs/cgroup/cpu/cpu.cfs_period_us)
|
||||
[[ "$quota" != "-1" ]] && echo "scale=2; $quota / $period" | bc && return
|
||||
fi
|
||||
nproc # fallback
|
||||
}
|
||||
```
|
||||
|
||||
Also check cpuset limits (`cpuset.cpus.effective` for v2, `cpuset/cpuset.cpus` for v1) which restrict which physical CPUs are available.
|
||||
|
||||
### Shared PID Namespace Complication
|
||||
|
||||
When sidecars share a PID namespace but have different cgroups, there's no single "correct" CPU limit for normalization. Options:
|
||||
|
||||
1. **Use host CPU count** — percentage of total host capacity
|
||||
2. **Sum the limits** — if you know each sidecar's cgroup, sum their quotas
|
||||
3. **Report in cores** — skip normalization, show `1.5 cores used` instead of percentage
|
||||
|
||||
## Reading Cgroup Limits for Other Containers
|
||||
|
||||
Every process exposes its cgroup membership via `/proc/<PID>/cgroup`. If the cgroup filesystem is mounted, you can read any container's limits:
|
||||
|
||||
```bash
|
||||
get_cgroup_cpu_limit() {
|
||||
local pid=$1
|
||||
# cgroups v2
|
||||
cgroup_path=$(grep -oP '0::\K.*' /proc/$pid/cgroup 2>/dev/null)
|
||||
if [[ -n "$cgroup_path" ]]; then
|
||||
limit_file="/sys/fs/cgroup${cgroup_path}/cpu.max"
|
||||
if [[ -r "$limit_file" ]]; then
|
||||
read quota period < "$limit_file"
|
||||
[[ "$quota" == "max" ]] && echo "unlimited" || echo "scale=2; $quota / $period" | bc
|
||||
return
|
||||
fi
|
||||
fi
|
||||
# cgroups v1
|
||||
cgroup_path=$(grep -oP 'cpu.*:\K.*' /proc/$pid/cgroup 2>/dev/null)
|
||||
if [[ -n "$cgroup_path" ]]; then
|
||||
quota_file="/sys/fs/cgroup/cpu${cgroup_path}/cpu.cfs_quota_us"
|
||||
period_file="/sys/fs/cgroup/cpu${cgroup_path}/cpu.cfs_period_us"
|
||||
if [[ -r "$quota_file" ]]; then
|
||||
quota=$(cat "$quota_file"); period=$(cat "$period_file")
|
||||
[[ "$quota" == "-1" ]] && echo "unlimited" || echo "scale=2; $quota / $period" | bc
|
||||
return
|
||||
fi
|
||||
fi
|
||||
echo "unknown"
|
||||
}
|
||||
```
|
||||
|
||||
### Mount Visibility
|
||||
|
||||
| Scenario | Can Read Other Cgroups? |
|
||||
|----------|------------------------|
|
||||
| Host system | Yes |
|
||||
| Privileged container | Yes |
|
||||
| `/sys/fs/cgroup` mounted read-only from host | Yes (common in Kubernetes) |
|
||||
| Only own cgroup subtree mounted | No |
|
||||
|
||||
### Fallbacks When Cgroups Aren't Accessible
|
||||
|
||||
1. **Mount the cgroup fs** — volume mount `/sys/fs/cgroup:ro`
|
||||
2. **Use a sidecar with access** — one privileged container does monitoring
|
||||
3. **Accept "unknown" limits** — report raw ticks/cores instead of percentages
|
||||
4. **Kubernetes Downward API** — inject limits as env vars (own container only)
|
||||
665
docs/openapi.json
Normal file
665
docs/openapi.json
Normal file
|
|
@ -0,0 +1,665 @@
|
|||
{
|
||||
"components": {
|
||||
"schemas": {
|
||||
"HTTPError": {
|
||||
"description": "HTTPError schema",
|
||||
"properties": {
|
||||
"detail": {
|
||||
"description": "Human readable error message",
|
||||
"nullable": true,
|
||||
"type": "string"
|
||||
},
|
||||
"errors": {
|
||||
"items": {
|
||||
"nullable": true,
|
||||
"properties": {
|
||||
"more": {
|
||||
"additionalProperties": {
|
||||
"description": "Additional information about the error",
|
||||
"nullable": true
|
||||
},
|
||||
"description": "Additional information about the error",
|
||||
"nullable": true,
|
||||
"type": "object"
|
||||
},
|
||||
"name": {
|
||||
"description": "For example, name of the parameter that caused the error",
|
||||
"type": "string"
|
||||
},
|
||||
"reason": {
|
||||
"description": "Human readable error message",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"nullable": true,
|
||||
"type": "array"
|
||||
},
|
||||
"instance": {
|
||||
"nullable": true,
|
||||
"type": "string"
|
||||
},
|
||||
"status": {
|
||||
"description": "HTTP status code",
|
||||
"example": 403,
|
||||
"nullable": true,
|
||||
"type": "integer"
|
||||
},
|
||||
"title": {
|
||||
"description": "Short title of the error",
|
||||
"nullable": true,
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"description": "URL of the error type. Can be used to lookup the error in a documentation",
|
||||
"nullable": true,
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"HealthResponse": {
|
||||
"description": "HealthResponse schema",
|
||||
"properties": {
|
||||
"status": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"MetricCreatedResponse": {
|
||||
"description": "MetricCreatedResponse schema",
|
||||
"properties": {
|
||||
"id": {
|
||||
"minimum": 0,
|
||||
"type": "integer"
|
||||
},
|
||||
"status": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"MetricResponse": {
|
||||
"description": "MetricResponse schema",
|
||||
"properties": {
|
||||
"id": {
|
||||
"minimum": 0,
|
||||
"type": "integer"
|
||||
},
|
||||
"job": {
|
||||
"type": "string"
|
||||
},
|
||||
"organization": {
|
||||
"type": "string"
|
||||
},
|
||||
"payload": {},
|
||||
"received_at": {
|
||||
"format": "date-time",
|
||||
"type": "string"
|
||||
},
|
||||
"repository": {
|
||||
"type": "string"
|
||||
},
|
||||
"run_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"workflow": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"SizingResponse": {
|
||||
"description": "SizingResponse schema",
|
||||
"properties": {
|
||||
"containers": {
|
||||
"items": {
|
||||
"properties": {
|
||||
"cpu": {
|
||||
"properties": {
|
||||
"limit": {
|
||||
"type": "string"
|
||||
},
|
||||
"request": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"memory": {
|
||||
"properties": {
|
||||
"limit": {
|
||||
"type": "string"
|
||||
},
|
||||
"request": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"meta": {
|
||||
"properties": {
|
||||
"buffer_percent": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cpu_percentile": {
|
||||
"type": "string"
|
||||
},
|
||||
"runs_analyzed": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"total": {
|
||||
"properties": {
|
||||
"cpu": {
|
||||
"properties": {
|
||||
"limit": {
|
||||
"type": "string"
|
||||
},
|
||||
"request": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"memory": {
|
||||
"properties": {
|
||||
"limit": {
|
||||
"type": "string"
|
||||
},
|
||||
"request": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"TokenRequest": {
|
||||
"description": "TokenRequest schema",
|
||||
"properties": {
|
||||
"job": {
|
||||
"type": "string"
|
||||
},
|
||||
"organization": {
|
||||
"type": "string"
|
||||
},
|
||||
"repository": {
|
||||
"type": "string"
|
||||
},
|
||||
"workflow": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"TokenResponse": {
|
||||
"description": "TokenResponse schema",
|
||||
"properties": {
|
||||
"token": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"unknown-interface": {
|
||||
"description": "unknown-interface schema"
|
||||
}
|
||||
}
|
||||
},
|
||||
"info": {
|
||||
"contact": {
|
||||
"name": "API Support",
|
||||
"url": "https://edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer"
|
||||
},
|
||||
"description": "HTTP service that receives and stores CI/CD resource metrics from collectors, providing query and sizing recommendation APIs.",
|
||||
"license": {
|
||||
"name": "Apache 2.0",
|
||||
"url": "http://www.apache.org/licenses/LICENSE-2.0.html"
|
||||
},
|
||||
"title": "Forgejo Runner Resource Collector API",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
"openapi": "3.1.0",
|
||||
"paths": {
|
||||
"/api/v1/metrics": {
|
||||
"post": {
|
||||
"description": "#### Controller: \n\n`edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/receiver.(*Handler).ReceiveMetrics`\n\n#### Middlewares:\n\n- `github.com/go-fuego/fuego.defaultLogger.middleware`\n\n---\n\n",
|
||||
"operationId": "POST_/api/v1/metrics",
|
||||
"parameters": [
|
||||
{
|
||||
"in": "header",
|
||||
"name": "Accept",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/MetricCreatedResponse"
|
||||
}
|
||||
},
|
||||
"application/xml": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/MetricCreatedResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "OK"
|
||||
},
|
||||
"400": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
},
|
||||
"application/xml": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "Bad Request _(validation or deserialization error)_"
|
||||
},
|
||||
"500": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
},
|
||||
"application/xml": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "Internal Server Error _(panics)_"
|
||||
},
|
||||
"default": {
|
||||
"description": ""
|
||||
}
|
||||
},
|
||||
"summary": "receive metrics",
|
||||
"tags": [
|
||||
"api/v1"
|
||||
]
|
||||
}
|
||||
},
|
||||
"/api/v1/metrics/repo/{org}/{repo}/{workflow}/{job}": {
|
||||
"get": {
|
||||
"description": "#### Controller: \n\n`edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/receiver.(*Handler).GetMetricsByWorkflowJob`\n\n#### Middlewares:\n\n- `github.com/go-fuego/fuego.defaultLogger.middleware`\n- `edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/receiver.(*Handler).requireReadToken`\n\n---\n\n",
|
||||
"operationId": "GET_/api/v1/metrics/repo/:org/:repo/:workflow/:job",
|
||||
"parameters": [
|
||||
{
|
||||
"in": "header",
|
||||
"name": "Accept",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"in": "path",
|
||||
"name": "org",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"in": "path",
|
||||
"name": "repo",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"in": "path",
|
||||
"name": "workflow",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"in": "path",
|
||||
"name": "job",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/MetricResponse"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"application/xml": {
|
||||
"schema": {
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/MetricResponse"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "OK"
|
||||
},
|
||||
"400": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
},
|
||||
"application/xml": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "Bad Request _(validation or deserialization error)_"
|
||||
},
|
||||
"500": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
},
|
||||
"application/xml": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "Internal Server Error _(panics)_"
|
||||
},
|
||||
"default": {
|
||||
"description": ""
|
||||
}
|
||||
},
|
||||
"summary": "get metrics by workflow job",
|
||||
"tags": [
|
||||
"api/v1"
|
||||
]
|
||||
}
|
||||
},
|
||||
"/api/v1/sizing/repo/{org}/{repo}/{workflow}/{job}": {
|
||||
"get": {
|
||||
"description": "#### Controller: \n\n`edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/receiver.(*Handler).GetSizing`\n\n#### Middlewares:\n\n- `github.com/go-fuego/fuego.defaultLogger.middleware`\n- `edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/receiver.(*Handler).requireReadToken`\n\n---\n\n",
|
||||
"operationId": "GET_/api/v1/sizing/repo/:org/:repo/:workflow/:job",
|
||||
"parameters": [
|
||||
{
|
||||
"in": "header",
|
||||
"name": "Accept",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"in": "path",
|
||||
"name": "org",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"in": "path",
|
||||
"name": "repo",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"in": "path",
|
||||
"name": "workflow",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"in": "path",
|
||||
"name": "job",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/SizingResponse"
|
||||
}
|
||||
},
|
||||
"application/xml": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/SizingResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "OK"
|
||||
},
|
||||
"400": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
},
|
||||
"application/xml": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "Bad Request _(validation or deserialization error)_"
|
||||
},
|
||||
"500": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
},
|
||||
"application/xml": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "Internal Server Error _(panics)_"
|
||||
},
|
||||
"default": {
|
||||
"description": ""
|
||||
}
|
||||
},
|
||||
"summary": "get sizing",
|
||||
"tags": [
|
||||
"api/v1"
|
||||
]
|
||||
}
|
||||
},
|
||||
"/api/v1/token": {
|
||||
"post": {
|
||||
"description": "#### Controller: \n\n`edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/receiver.(*Handler).GenerateToken`\n\n#### Middlewares:\n\n- `github.com/go-fuego/fuego.defaultLogger.middleware`\n- `edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/receiver.(*Handler).requireReadToken`\n\n---\n\n",
|
||||
"operationId": "POST_/api/v1/token",
|
||||
"parameters": [
|
||||
{
|
||||
"in": "header",
|
||||
"name": "Accept",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"*/*": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/TokenRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "Request body for receiver.TokenRequest",
|
||||
"required": true
|
||||
},
|
||||
"responses": {
|
||||
"200": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/TokenResponse"
|
||||
}
|
||||
},
|
||||
"application/xml": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/TokenResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "OK"
|
||||
},
|
||||
"400": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
},
|
||||
"application/xml": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "Bad Request _(validation or deserialization error)_"
|
||||
},
|
||||
"500": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
},
|
||||
"application/xml": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "Internal Server Error _(panics)_"
|
||||
},
|
||||
"default": {
|
||||
"description": ""
|
||||
}
|
||||
},
|
||||
"summary": "generate token",
|
||||
"tags": [
|
||||
"api/v1"
|
||||
]
|
||||
}
|
||||
},
|
||||
"/health": {
|
||||
"get": {
|
||||
"description": "#### Controller: \n\n`edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/receiver.(*Handler).Health`\n\n#### Middlewares:\n\n- `github.com/go-fuego/fuego.defaultLogger.middleware`\n\n---\n\n",
|
||||
"operationId": "GET_/health",
|
||||
"parameters": [
|
||||
{
|
||||
"in": "header",
|
||||
"name": "Accept",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HealthResponse"
|
||||
}
|
||||
},
|
||||
"application/xml": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HealthResponse"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "OK"
|
||||
},
|
||||
"400": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
},
|
||||
"application/xml": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "Bad Request _(validation or deserialization error)_"
|
||||
},
|
||||
"500": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
},
|
||||
"application/xml": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HTTPError"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "Internal Server Error _(panics)_"
|
||||
},
|
||||
"default": {
|
||||
"description": ""
|
||||
}
|
||||
},
|
||||
"summary": "health"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
48
go.mod
48
go.mod
|
|
@ -1,3 +1,49 @@
|
|||
module edp.buildth.ing/DevFW-CICD/forgejo-runner-resource-collector
|
||||
module edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer
|
||||
|
||||
go 1.25.6
|
||||
|
||||
require (
|
||||
github.com/getkin/kin-openapi v0.133.0
|
||||
github.com/glebarez/sqlite v1.11.0
|
||||
github.com/go-fuego/fuego v0.19.0
|
||||
github.com/oapi-codegen/runtime v1.1.2
|
||||
gorm.io/gorm v1.31.1
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||
github.com/gabriel-vasile/mimetype v1.4.11 // indirect
|
||||
github.com/glebarez/go-sqlite v1.21.2 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.22.3 // indirect
|
||||
github.com/go-openapi/swag/jsonname v0.25.4 // indirect
|
||||
github.com/go-playground/locales v0.14.1 // indirect
|
||||
github.com/go-playground/universal-translator v0.18.1 // indirect
|
||||
github.com/go-playground/validator/v10 v10.28.0 // indirect
|
||||
github.com/golang-jwt/jwt/v5 v5.3.0 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/gorilla/schema v1.4.1 // indirect
|
||||
github.com/jinzhu/inflection v1.0.0 // indirect
|
||||
github.com/jinzhu/now v1.1.5 // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/kr/pretty v0.3.1 // indirect
|
||||
github.com/leodido/go-urn v1.4.0 // indirect
|
||||
github.com/mailru/easyjson v0.9.1 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect
|
||||
github.com/oasdiff/yaml v0.0.0-20250309154309-f31be36b4037 // indirect
|
||||
github.com/oasdiff/yaml3 v0.0.0-20250309153720-d2182401db90 // indirect
|
||||
github.com/perimeterx/marshmallow v1.1.5 // indirect
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||
github.com/rogpeppe/go-internal v1.14.1 // indirect
|
||||
github.com/woodsbury/decimal128 v1.4.0 // indirect
|
||||
golang.org/x/crypto v0.45.0 // indirect
|
||||
golang.org/x/sys v0.39.0 // indirect
|
||||
golang.org/x/text v0.31.0 // indirect
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
modernc.org/libc v1.22.5 // indirect
|
||||
modernc.org/mathutil v1.5.0 // indirect
|
||||
modernc.org/memory v1.5.0 // indirect
|
||||
modernc.org/sqlite v1.23.1 // indirect
|
||||
)
|
||||
|
|
|
|||
116
go.sum
Normal file
116
go.sum
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ=
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk=
|
||||
github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||
github.com/gabriel-vasile/mimetype v1.4.11 h1:AQvxbp830wPhHTqc1u7nzoLT+ZFxGY7emj5DR5DYFik=
|
||||
github.com/gabriel-vasile/mimetype v1.4.11/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
|
||||
github.com/getkin/kin-openapi v0.133.0 h1:pJdmNohVIJ97r4AUFtEXRXwESr8b0bD721u/Tz6k8PQ=
|
||||
github.com/getkin/kin-openapi v0.133.0/go.mod h1:boAciF6cXk5FhPqe/NQeBTeenbjqU4LhWBf09ILVvWE=
|
||||
github.com/glebarez/go-sqlite v1.21.2 h1:3a6LFC4sKahUunAmynQKLZceZCOzUthkRkEAl9gAXWo=
|
||||
github.com/glebarez/go-sqlite v1.21.2/go.mod h1:sfxdZyhQjTM2Wry3gVYWaW072Ri1WMdWJi0k6+3382k=
|
||||
github.com/glebarez/sqlite v1.11.0 h1:wSG0irqzP6VurnMEpFGer5Li19RpIRi2qvQz++w0GMw=
|
||||
github.com/glebarez/sqlite v1.11.0/go.mod h1:h8/o8j5wiAsqSPoWELDUdJXhjAhsVliSn7bWZjOhrgQ=
|
||||
github.com/go-fuego/fuego v0.19.0 h1:kxkkBsrbGZP1YnPCAPIdUpMu53nreqN8N86lfi50CJw=
|
||||
github.com/go-fuego/fuego v0.19.0/go.mod h1:O7CLZbvCCBA9ijhN/q8SnyFTzDdMsqYZjUbR82VDHhA=
|
||||
github.com/go-openapi/jsonpointer v0.22.3 h1:dKMwfV4fmt6Ah90zloTbUKWMD+0he+12XYAsPotrkn8=
|
||||
github.com/go-openapi/jsonpointer v0.22.3/go.mod h1:0lBbqeRsQ5lIanv3LHZBrmRGHLHcQoOXQnf88fHlGWo=
|
||||
github.com/go-openapi/swag/jsonname v0.25.4 h1:bZH0+MsS03MbnwBXYhuTttMOqk+5KcQ9869Vye1bNHI=
|
||||
github.com/go-openapi/swag/jsonname v0.25.4/go.mod h1:GPVEk9CWVhNvWhZgrnvRA6utbAltopbKwDu8mXNUMag=
|
||||
github.com/go-openapi/testify/v2 v2.0.2 h1:X999g3jeLcoY8qctY/c/Z8iBHTbwLz7R2WXd6Ub6wls=
|
||||
github.com/go-openapi/testify/v2 v2.0.2/go.mod h1:HCPmvFFnheKK2BuwSA0TbbdxJ3I16pjwMkYkP4Ywn54=
|
||||
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
||||
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
||||
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
|
||||
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
|
||||
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
|
||||
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
|
||||
github.com/go-playground/validator/v10 v10.28.0 h1:Q7ibns33JjyW48gHkuFT91qX48KG0ktULL6FgHdG688=
|
||||
github.com/go-playground/validator/v10 v10.28.0/go.mod h1:GoI6I1SjPBh9p7ykNE/yj3fFYbyDOpwMn5KXd+m2hUU=
|
||||
github.com/go-test/deep v1.0.8 h1:TDsG77qcSprGbC6vTN8OuXp5g+J+b5Pcguhf7Zt61VM=
|
||||
github.com/go-test/deep v1.0.8/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE=
|
||||
github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo=
|
||||
github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
|
||||
github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ=
|
||||
github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gorilla/schema v1.4.1 h1:jUg5hUjCSDZpNGLuXQOgIWGdlgrIdYvgQ0wZtdK1M3E=
|
||||
github.com/gorilla/schema v1.4.1/go.mod h1:Dg5SSm5PV60mhF2NFaTV1xuYYj8tV8NOPRo4FggUMnM=
|
||||
github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
|
||||
github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
|
||||
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
|
||||
github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
|
||||
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
|
||||
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
|
||||
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
|
||||
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
|
||||
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
|
||||
github.com/mailru/easyjson v0.9.1 h1:LbtsOm5WAswyWbvTEOqhypdPeZzHavpZx96/n553mR8=
|
||||
github.com/mailru/easyjson v0.9.1/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
|
||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw=
|
||||
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8=
|
||||
github.com/oapi-codegen/runtime v1.1.2 h1:P2+CubHq8fO4Q6fV1tqDBZHCwpVpvPg7oKiYzQgXIyI=
|
||||
github.com/oapi-codegen/runtime v1.1.2/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
|
||||
github.com/oasdiff/yaml v0.0.0-20250309154309-f31be36b4037 h1:G7ERwszslrBzRxj//JalHPu/3yz+De2J+4aLtSRlHiY=
|
||||
github.com/oasdiff/yaml v0.0.0-20250309154309-f31be36b4037/go.mod h1:2bpvgLBZEtENV5scfDFEtB/5+1M4hkQhDQrccEJ/qGw=
|
||||
github.com/oasdiff/yaml3 v0.0.0-20250309153720-d2182401db90 h1:bQx3WeLcUWy+RletIKwUIt4x3t8n2SxavmoclizMb8c=
|
||||
github.com/oasdiff/yaml3 v0.0.0-20250309153720-d2182401db90/go.mod h1:y5+oSEHCPT/DGrS++Wc/479ERge0zTFxaF8PbGKcg2o=
|
||||
github.com/perimeterx/marshmallow v1.1.5 h1:a2LALqQ1BlHM8PZblsDdidgv1mWi1DgC2UmX50IvK2s=
|
||||
github.com/perimeterx/marshmallow v1.1.5/go.mod h1:dsXbUu8CRzfYP5a87xpp0xq9S3u0Vchtcl8we9tYaXw=
|
||||
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
||||
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
|
||||
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
|
||||
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
|
||||
github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
github.com/thejerf/slogassert v0.3.4 h1:VoTsXixRbXMrRSSxDjYTiEDCM4VWbsYPW5rB/hX24kM=
|
||||
github.com/thejerf/slogassert v0.3.4/go.mod h1:0zn9ISLVKo1aPMTqcGfG1o6dWwt+Rk574GlUxHD4rs8=
|
||||
github.com/ugorji/go/codec v1.3.0 h1:Qd2W2sQawAfG8XSvzwhBeoGq71zXOC/Q1E9y/wUcsUA=
|
||||
github.com/ugorji/go/codec v1.3.0/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2Wjqmfxj4=
|
||||
github.com/woodsbury/decimal128 v1.4.0 h1:xJATj7lLu4f2oObouMt2tgGiElE5gO6mSWUjQsBgUlc=
|
||||
github.com/woodsbury/decimal128 v1.4.0/go.mod h1:BP46FUrVjVhdTbKT+XuQh2xfQaGki9LMIRJSFuh6THU=
|
||||
golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
|
||||
golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
|
||||
golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
|
||||
golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gorm.io/gorm v1.31.1 h1:7CA8FTFz/gRfgqgpeKIBcervUn3xSyPUmr6B2WXJ7kg=
|
||||
gorm.io/gorm v1.31.1/go.mod h1:XyQVbO2k6YkOis7C2437jSit3SsDK72s7n7rsSHd+Gs=
|
||||
modernc.org/libc v1.22.5 h1:91BNch/e5B0uPbJFgqbxXuOnxBQjlS//icfQEGmvyjE=
|
||||
modernc.org/libc v1.22.5/go.mod h1:jj+Z7dTNX8fBScMVNRAYZ/jF91K8fdT2hYMThc3YjBY=
|
||||
modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ=
|
||||
modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E=
|
||||
modernc.org/memory v1.5.0 h1:N+/8c5rE6EqugZwHii4IFsaJ7MUhoWX07J5tC/iI5Ds=
|
||||
modernc.org/memory v1.5.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU=
|
||||
modernc.org/sqlite v1.23.1 h1:nrSBg4aRQQwq59JpvGEQ15tNxoO5pX/kUjcRNwSAGQM=
|
||||
modernc.org/sqlite v1.23.1/go.mod h1:OrDj17Mggn6MhE+iPbBNf7RGKODDE9NFT0f3EwDzJqk=
|
||||
84
internal/cgroup/config.go
Normal file
84
internal/cgroup/config.go
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
// ABOUTME: Configuration types and parsing for cgroup limits and process mapping.
|
||||
// ABOUTME: Parses CGROUP_LIMITS and CGROUP_PROCESS_MAP environment variables.
|
||||
package cgroup
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
)
|
||||
|
||||
// CgroupLimit holds the resource limits for a container/cgroup
|
||||
type CgroupLimit struct {
|
||||
CPUCores float64 // CPU limit in cores (e.g., 0.5 for "500m", 2.0 for "2")
|
||||
MemoryBytes uint64 // Memory limit in bytes
|
||||
}
|
||||
|
||||
// CgroupLimits maps container names to their resource limits
|
||||
type CgroupLimits map[string]CgroupLimit
|
||||
|
||||
// ProcessMapping maps process names to container names (for cgroup path discovery)
|
||||
type ProcessMapping map[string]string
|
||||
|
||||
// CgroupPathMapping maps cgroup paths to container names (built at runtime)
|
||||
type CgroupPathMapping map[string]string
|
||||
|
||||
// rawLimitEntry is the JSON structure for each entry in CGROUP_LIMITS
|
||||
type rawLimitEntry struct {
|
||||
CPU string `json:"cpu"`
|
||||
Memory string `json:"memory"`
|
||||
}
|
||||
|
||||
// ParseCgroupLimitsEnv parses the CGROUP_LIMITS environment variable.
|
||||
// Expected format: {"container-name": {"cpu": "500m", "memory": "1Gi"}, ...}
|
||||
func ParseCgroupLimitsEnv() (CgroupLimits, error) {
|
||||
raw := os.Getenv("CGROUP_LIMITS")
|
||||
if raw == "" {
|
||||
return nil, nil // No limits configured
|
||||
}
|
||||
|
||||
var parsed map[string]rawLimitEntry
|
||||
if err := json.Unmarshal([]byte(raw), &parsed); err != nil {
|
||||
return nil, fmt.Errorf("parsing CGROUP_LIMITS: %w", err)
|
||||
}
|
||||
|
||||
limits := make(CgroupLimits)
|
||||
for name, entry := range parsed {
|
||||
var limit CgroupLimit
|
||||
var err error
|
||||
|
||||
if entry.CPU != "" {
|
||||
limit.CPUCores, err = ParseCPU(entry.CPU)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing CPU for %q: %w", name, err)
|
||||
}
|
||||
}
|
||||
|
||||
if entry.Memory != "" {
|
||||
limit.MemoryBytes, err = ParseMemory(entry.Memory)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing memory for %q: %w", name, err)
|
||||
}
|
||||
}
|
||||
|
||||
limits[name] = limit
|
||||
}
|
||||
|
||||
return limits, nil
|
||||
}
|
||||
|
||||
// ParseProcessMappingEnv parses the CGROUP_PROCESS_MAP environment variable.
|
||||
// Expected format: {"process-name": "container-name", ...}
|
||||
func ParseProcessMappingEnv() (ProcessMapping, error) {
|
||||
raw := os.Getenv("CGROUP_PROCESS_MAP")
|
||||
if raw == "" {
|
||||
return nil, nil // No mapping configured
|
||||
}
|
||||
|
||||
var parsed map[string]string
|
||||
if err := json.Unmarshal([]byte(raw), &parsed); err != nil {
|
||||
return nil, fmt.Errorf("parsing CGROUP_PROCESS_MAP: %w", err)
|
||||
}
|
||||
|
||||
return ProcessMapping(parsed), nil
|
||||
}
|
||||
96
internal/cgroup/parse.go
Normal file
96
internal/cgroup/parse.go
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
// ABOUTME: Parses Kubernetes-style resource notation for CPU and memory.
|
||||
// ABOUTME: CPU: "500m" = 0.5 cores, "2" = 2 cores.
|
||||
// ABOUTME: Memory: "1Gi" = 1 GiB, "512Mi" = 512 MiB, "1G" = 1 GB.
|
||||
package cgroup
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ParseCPU parses Kubernetes CPU notation to cores.
|
||||
// Examples: "500m" => 0.5, "2" => 2.0, "100m" => 0.1, "2000m" => 2.0
|
||||
func ParseCPU(value string) (float64, error) {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
return 0, fmt.Errorf("empty CPU value")
|
||||
}
|
||||
|
||||
// Handle millicores suffix
|
||||
if strings.HasSuffix(value, "m") {
|
||||
millis, err := strconv.ParseFloat(strings.TrimSuffix(value, "m"), 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("parsing millicores: %w", err)
|
||||
}
|
||||
return millis / 1000.0, nil
|
||||
}
|
||||
|
||||
// Plain number means cores
|
||||
cores, err := strconv.ParseFloat(value, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("parsing cores: %w", err)
|
||||
}
|
||||
|
||||
return cores, nil
|
||||
}
|
||||
|
||||
// ParseMemory parses Kubernetes memory notation to bytes.
|
||||
// Supports:
|
||||
// - Binary suffixes: Ki, Mi, Gi, Ti (powers of 1024)
|
||||
// - Decimal suffixes: K, M, G, T (powers of 1000)
|
||||
// - Plain numbers: bytes
|
||||
func ParseMemory(value string) (uint64, error) {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
return 0, fmt.Errorf("empty memory value")
|
||||
}
|
||||
|
||||
// Binary suffixes (powers of 1024)
|
||||
binarySuffixes := map[string]uint64{
|
||||
"Ki": 1024,
|
||||
"Mi": 1024 * 1024,
|
||||
"Gi": 1024 * 1024 * 1024,
|
||||
"Ti": 1024 * 1024 * 1024 * 1024,
|
||||
}
|
||||
|
||||
// Decimal suffixes (powers of 1000)
|
||||
decimalSuffixes := map[string]uint64{
|
||||
"K": 1000,
|
||||
"M": 1000 * 1000,
|
||||
"G": 1000 * 1000 * 1000,
|
||||
"T": 1000 * 1000 * 1000 * 1000,
|
||||
}
|
||||
|
||||
// Try binary suffixes first (2-char)
|
||||
for suffix, multiplier := range binarySuffixes {
|
||||
if strings.HasSuffix(value, suffix) {
|
||||
numStr := strings.TrimSuffix(value, suffix)
|
||||
num, err := strconv.ParseFloat(numStr, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("parsing memory value: %w", err)
|
||||
}
|
||||
return uint64(num * float64(multiplier)), nil
|
||||
}
|
||||
}
|
||||
|
||||
// Try decimal suffixes (1-char)
|
||||
for suffix, multiplier := range decimalSuffixes {
|
||||
if strings.HasSuffix(value, suffix) {
|
||||
numStr := strings.TrimSuffix(value, suffix)
|
||||
num, err := strconv.ParseFloat(numStr, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("parsing memory value: %w", err)
|
||||
}
|
||||
return uint64(num * float64(multiplier)), nil
|
||||
}
|
||||
}
|
||||
|
||||
// Plain number (bytes)
|
||||
bytes, err := strconv.ParseUint(value, 10, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("parsing bytes: %w", err)
|
||||
}
|
||||
|
||||
return bytes, nil
|
||||
}
|
||||
84
internal/cgroup/parse_test.go
Normal file
84
internal/cgroup/parse_test.go
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
package cgroup
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseCPU(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
want float64
|
||||
wantErr bool
|
||||
}{
|
||||
{"millicores 500m", "500m", 0.5, false},
|
||||
{"millicores 100m", "100m", 0.1, false},
|
||||
{"millicores 2000m", "2000m", 2.0, false},
|
||||
{"millicores 1m", "1m", 0.001, false},
|
||||
{"cores integer", "2", 2.0, false},
|
||||
{"cores decimal", "1.5", 1.5, false},
|
||||
{"cores with spaces", " 2 ", 2.0, false},
|
||||
{"empty string", "", 0, true},
|
||||
{"invalid format", "abc", 0, true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := ParseCPU(tt.input)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("ParseCPU() error = %v, wantErr %v", err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
if !tt.wantErr && got != tt.want {
|
||||
t.Errorf("ParseCPU() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseMemory(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
want uint64
|
||||
wantErr bool
|
||||
}{
|
||||
// Binary suffixes (powers of 1024)
|
||||
{"Ki", "1Ki", 1024, false},
|
||||
{"Mi", "1Mi", 1024 * 1024, false},
|
||||
{"Gi", "1Gi", 1024 * 1024 * 1024, false},
|
||||
{"Ti", "1Ti", 1024 * 1024 * 1024 * 1024, false},
|
||||
{"512Mi", "512Mi", 512 * 1024 * 1024, false},
|
||||
{"2Gi", "2Gi", 2 * 1024 * 1024 * 1024, false},
|
||||
|
||||
// Decimal suffixes (powers of 1000)
|
||||
{"K", "1K", 1000, false},
|
||||
{"M", "1M", 1000000, false},
|
||||
{"G", "1G", 1000000000, false},
|
||||
{"T", "1T", 1000000000000, false},
|
||||
|
||||
// Plain bytes
|
||||
{"bytes", "1024", 1024, false},
|
||||
{"large bytes", "1073741824", 1073741824, false},
|
||||
|
||||
// With spaces
|
||||
{"with spaces", " 1Gi ", 1024 * 1024 * 1024, false},
|
||||
|
||||
// Error cases
|
||||
{"empty", "", 0, true},
|
||||
{"invalid", "abc", 0, true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := ParseMemory(tt.input)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("ParseMemory() error = %v, wantErr %v", err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
if !tt.wantErr && got != tt.want {
|
||||
t.Errorf("ParseMemory() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -6,8 +6,9 @@ import (
|
|||
"log/slog"
|
||||
"time"
|
||||
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-resource-collector/internal/metrics"
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-resource-collector/internal/output"
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/metrics"
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/output"
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/summary"
|
||||
)
|
||||
|
||||
// Config holds the collector configuration
|
||||
|
|
@ -19,22 +20,36 @@ type Config struct {
|
|||
|
||||
// Collector orchestrates metric collection
|
||||
type Collector struct {
|
||||
config Config
|
||||
aggregator *metrics.Aggregator
|
||||
writer output.Writer
|
||||
logger *slog.Logger
|
||||
config Config
|
||||
aggregator *metrics.Aggregator
|
||||
writer output.Writer
|
||||
logger *slog.Logger
|
||||
accumulator *summary.Accumulator
|
||||
summaryWriter *summary.SummaryWriter
|
||||
pushClient *summary.PushClient
|
||||
}
|
||||
|
||||
// New creates a new collector
|
||||
func New(cfg Config, writer output.Writer, logger *slog.Logger) *Collector {
|
||||
return &Collector{
|
||||
config: cfg,
|
||||
aggregator: metrics.NewAggregator(cfg.ProcPath, cfg.TopN),
|
||||
writer: writer,
|
||||
logger: logger,
|
||||
config: cfg,
|
||||
aggregator: metrics.NewAggregator(cfg.ProcPath, cfg.TopN),
|
||||
writer: writer,
|
||||
logger: logger,
|
||||
accumulator: summary.NewAccumulator(cfg.TopN),
|
||||
}
|
||||
}
|
||||
|
||||
// SetSummaryWriter attaches a summary writer for emitting run summaries on shutdown
|
||||
func (c *Collector) SetSummaryWriter(w *summary.SummaryWriter) {
|
||||
c.summaryWriter = w
|
||||
}
|
||||
|
||||
// SetPushClient attaches a push client for sending summaries to the receiver
|
||||
func (c *Collector) SetPushClient(p *summary.PushClient) {
|
||||
c.pushClient = p
|
||||
}
|
||||
|
||||
// Run starts the collector loop and blocks until context is cancelled
|
||||
func (c *Collector) Run(ctx context.Context) error {
|
||||
c.logger.Info("collector starting",
|
||||
|
|
@ -55,6 +70,7 @@ func (c *Collector) Run(ctx context.Context) error {
|
|||
select {
|
||||
case <-ctx.Done():
|
||||
c.logger.Info("collector stopping")
|
||||
c.emitSummary(context.Background()) // Use fresh context for shutdown tasks
|
||||
return ctx.Err()
|
||||
case <-ticker.C:
|
||||
if err := c.collect(); err != nil {
|
||||
|
|
@ -75,9 +91,37 @@ func (c *Collector) collect() error {
|
|||
return fmt.Errorf("writing metrics: %w", err)
|
||||
}
|
||||
|
||||
c.accumulator.Add(m)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// emitSummary computes and writes the run summary if a writer is configured
|
||||
func (c *Collector) emitSummary(ctx context.Context) {
|
||||
s := c.accumulator.Summarize()
|
||||
if s == nil {
|
||||
c.logger.Info("no samples collected, skipping run summary")
|
||||
return
|
||||
}
|
||||
|
||||
c.logger.Info("emitting run summary",
|
||||
slog.Int("sample_count", s.SampleCount),
|
||||
slog.Float64("duration_seconds", s.DurationSeconds),
|
||||
)
|
||||
|
||||
if c.summaryWriter != nil {
|
||||
c.summaryWriter.Write(s)
|
||||
}
|
||||
|
||||
if c.pushClient != nil {
|
||||
if err := c.pushClient.Push(ctx, s); err != nil {
|
||||
c.logger.Error("failed to push metrics", slog.String("error", err.Error()))
|
||||
} else {
|
||||
c.logger.Info("metrics pushed successfully")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// CollectOnce performs a single collection and returns the metrics
|
||||
func (c *Collector) CollectOnce() (*metrics.SystemMetrics, error) {
|
||||
return c.aggregator.Collect()
|
||||
|
|
|
|||
98
internal/collector/collector_test.go
Normal file
98
internal/collector/collector_test.go
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
// ABOUTME: Tests for the collector's summary integration.
|
||||
// ABOUTME: Validates that run summaries are emitted on shutdown and handles missing writer gracefully.
|
||||
package collector
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/output"
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/summary"
|
||||
)
|
||||
|
||||
func TestCollector_EmitsSummaryOnShutdown(t *testing.T) {
|
||||
// Use testdata/proc as the proc filesystem
|
||||
procPath := "testdata/proc"
|
||||
|
||||
// Metrics output (regular collection output)
|
||||
var metricsOut bytes.Buffer
|
||||
metricsWriter := output.NewLoggerWriter(output.LoggerConfig{
|
||||
Output: &metricsOut,
|
||||
Format: output.LogFormatJSON,
|
||||
Level: slog.LevelInfo,
|
||||
})
|
||||
|
||||
// Summary output
|
||||
var summaryOut bytes.Buffer
|
||||
sw := summary.NewSummaryWriter(&summaryOut, "json")
|
||||
|
||||
// Silence app logs
|
||||
appLogger := slog.New(slog.NewTextHandler(&bytes.Buffer{}, nil))
|
||||
|
||||
c := New(Config{
|
||||
ProcPath: procPath,
|
||||
Interval: 50 * time.Millisecond,
|
||||
TopN: 5,
|
||||
}, metricsWriter, appLogger)
|
||||
|
||||
c.SetSummaryWriter(sw)
|
||||
|
||||
// Run collector briefly then cancel
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
go func() {
|
||||
// Let at least 2 collection cycles run
|
||||
time.Sleep(150 * time.Millisecond)
|
||||
cancel()
|
||||
}()
|
||||
|
||||
_ = c.Run(ctx)
|
||||
|
||||
// Verify summary was emitted
|
||||
summaryOutput := summaryOut.String()
|
||||
if !strings.Contains(summaryOutput, "run_summary") {
|
||||
t.Errorf("expected 'run_summary' in output, got: %s", summaryOutput)
|
||||
}
|
||||
if !strings.Contains(summaryOutput, "duration_seconds") {
|
||||
t.Errorf("expected 'duration_seconds' in output, got: %s", summaryOutput)
|
||||
}
|
||||
if !strings.Contains(summaryOutput, "sample_count") {
|
||||
t.Errorf("expected 'sample_count' in output, got: %s", summaryOutput)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCollector_NoSummaryWithoutWriter(t *testing.T) {
|
||||
procPath := "testdata/proc"
|
||||
|
||||
var metricsOut bytes.Buffer
|
||||
metricsWriter := output.NewLoggerWriter(output.LoggerConfig{
|
||||
Output: &metricsOut,
|
||||
Format: output.LogFormatJSON,
|
||||
Level: slog.LevelInfo,
|
||||
})
|
||||
|
||||
appLogger := slog.New(slog.NewTextHandler(&bytes.Buffer{}, nil))
|
||||
|
||||
c := New(Config{
|
||||
ProcPath: procPath,
|
||||
Interval: 50 * time.Millisecond,
|
||||
TopN: 5,
|
||||
}, metricsWriter, appLogger)
|
||||
|
||||
// Deliberately do NOT set a summary writer
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
go func() {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
cancel()
|
||||
}()
|
||||
|
||||
// Should not panic
|
||||
err := c.Run(ctx)
|
||||
if err != context.Canceled {
|
||||
t.Errorf("expected context.Canceled, got: %v", err)
|
||||
}
|
||||
}
|
||||
1
internal/collector/testdata/proc/1/stat
vendored
Normal file
1
internal/collector/testdata/proc/1/stat
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
1 (init) S 0 1 1 0 -1 4194560 1000 0 0 0 100 50 0 0 20 0 1 0 1 10000000 500 18446744073709551615 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
14
internal/collector/testdata/proc/1/status
vendored
Normal file
14
internal/collector/testdata/proc/1/status
vendored
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
Name: init
|
||||
Uid: 0 0 0 0
|
||||
Gid: 0 0 0 0
|
||||
VmPeak: 10000 kB
|
||||
VmSize: 10000 kB
|
||||
VmRSS: 5000 kB
|
||||
VmData: 3000 kB
|
||||
VmStk: 200 kB
|
||||
VmExe: 100 kB
|
||||
VmLib: 1000 kB
|
||||
RssAnon: 3000 kB
|
||||
RssFile: 1500 kB
|
||||
RssShmem: 500 kB
|
||||
Threads: 1
|
||||
1
internal/collector/testdata/proc/cpuinfo
vendored
Normal file
1
internal/collector/testdata/proc/cpuinfo
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
processor : 0
|
||||
5
internal/collector/testdata/proc/meminfo
vendored
Normal file
5
internal/collector/testdata/proc/meminfo
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
MemTotal: 16348500 kB
|
||||
MemFree: 8000000 kB
|
||||
MemAvailable: 12000000 kB
|
||||
Buffers: 500000 kB
|
||||
Cached: 3000000 kB
|
||||
1
internal/collector/testdata/proc/stat
vendored
Normal file
1
internal/collector/testdata/proc/stat
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
cpu 10000 500 3000 80000 200 50 30 0 0 0
|
||||
402
internal/integration/integration_test.go
Normal file
402
internal/integration/integration_test.go
Normal file
|
|
@ -0,0 +1,402 @@
|
|||
// ABOUTME: Integration tests for collector and receiver interaction.
|
||||
// ABOUTME: Tests that the push client can successfully send metrics to the receiver.
|
||||
package integration
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/go-fuego/fuego"
|
||||
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/receiver"
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/summary"
|
||||
)
|
||||
|
||||
const (
|
||||
testReadToken = "integration-test-token"
|
||||
testHMACKey = "integration-hmac-key"
|
||||
)
|
||||
|
||||
// setupTestReceiver creates a test receiver with SQLite storage, auth, and HTTP server
|
||||
func setupTestReceiver(t *testing.T) (*receiver.Store, *httptest.Server, func()) {
|
||||
t.Helper()
|
||||
dbPath := filepath.Join(t.TempDir(), "test.db")
|
||||
store, err := receiver.NewStore(dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("NewStore() error = %v", err)
|
||||
}
|
||||
|
||||
handler := receiver.NewHandler(store, slog.New(slog.NewTextHandler(io.Discard, nil)), testReadToken, testHMACKey, 0)
|
||||
s := fuego.NewServer(
|
||||
fuego.WithoutStartupMessages(),
|
||||
fuego.WithEngineOptions(
|
||||
fuego.WithOpenAPIConfig(fuego.OpenAPIConfig{
|
||||
Disabled: true,
|
||||
}),
|
||||
),
|
||||
)
|
||||
handler.RegisterRoutes(s)
|
||||
|
||||
server := httptest.NewServer(s.Mux)
|
||||
|
||||
cleanup := func() {
|
||||
server.Close()
|
||||
_ = store.Close()
|
||||
}
|
||||
|
||||
return store, server, cleanup
|
||||
}
|
||||
|
||||
// generatePushToken generates a push token for an execution context
|
||||
func generatePushToken(exec summary.ExecutionContext) string {
|
||||
return receiver.GenerateToken(testHMACKey, exec.Organization, exec.Repository, exec.Workflow, exec.Job)
|
||||
}
|
||||
|
||||
func TestPushClientToReceiver(t *testing.T) {
|
||||
store, server, cleanup := setupTestReceiver(t)
|
||||
defer cleanup()
|
||||
|
||||
// Test execution context
|
||||
testCtx := summary.ExecutionContext{
|
||||
Organization: "integration-org",
|
||||
Repository: "integration-repo",
|
||||
Workflow: "test.yml",
|
||||
Job: "integration-test",
|
||||
RunID: "run-integration-123",
|
||||
}
|
||||
|
||||
// Create a test summary
|
||||
testSummary := &summary.RunSummary{
|
||||
StartTime: time.Now().Add(-time.Minute),
|
||||
EndTime: time.Now(),
|
||||
DurationSeconds: 60.0,
|
||||
SampleCount: 10,
|
||||
CPUTotal: summary.StatSummary{Peak: 85.5, Avg: 42.3, P95: 78.0},
|
||||
MemUsedBytes: summary.StatSummary{Peak: 4294967296, Avg: 2147483648, P95: 3865470566},
|
||||
MemUsedPercent: summary.StatSummary{Peak: 50.0, Avg: 25.0, P95: 45.0},
|
||||
TopCPUProcesses: []summary.ProcessPeak{
|
||||
{PID: 1234, Name: "test-process", PeakCPU: 45.0, PeakMem: 1073741824},
|
||||
},
|
||||
TopMemProcesses: []summary.ProcessPeak{
|
||||
{PID: 1234, Name: "test-process", PeakCPU: 45.0, PeakMem: 1073741824},
|
||||
},
|
||||
}
|
||||
|
||||
// Build payload matching what push client sends
|
||||
payload := struct {
|
||||
Execution summary.ExecutionContext `json:"execution"`
|
||||
Summary summary.RunSummary `json:"run_summary"`
|
||||
}{
|
||||
Execution: testCtx,
|
||||
Summary: *testSummary,
|
||||
}
|
||||
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
t.Fatalf("Marshal() error = %v", err)
|
||||
}
|
||||
|
||||
// Send via HTTP client with scoped push token
|
||||
pushToken := generatePushToken(testCtx)
|
||||
req, err := http.NewRequest(http.MethodPost, server.URL+"/api/v1/metrics", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
t.Fatalf("NewRequest() error = %v", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Authorization", "Bearer "+pushToken)
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("Do() error = %v", err)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
if resp.StatusCode != http.StatusCreated {
|
||||
t.Errorf("status = %d, want %d", resp.StatusCode, http.StatusCreated)
|
||||
}
|
||||
|
||||
// Verify metrics were stored
|
||||
metrics, err := store.GetMetricsByWorkflowJob("integration-org", "integration-repo", "test.yml", "integration-test")
|
||||
if err != nil {
|
||||
t.Fatalf("GetMetricsByWorkflowJob() error = %v", err)
|
||||
}
|
||||
|
||||
if len(metrics) != 1 {
|
||||
t.Fatalf("got %d metrics, want 1", len(metrics))
|
||||
}
|
||||
|
||||
m := metrics[0]
|
||||
if m.Organization != testCtx.Organization {
|
||||
t.Errorf("Organization = %q, want %q", m.Organization, testCtx.Organization)
|
||||
}
|
||||
if m.Repository != testCtx.Repository {
|
||||
t.Errorf("Repository = %q, want %q", m.Repository, testCtx.Repository)
|
||||
}
|
||||
if m.Workflow != testCtx.Workflow {
|
||||
t.Errorf("Workflow = %q, want %q", m.Workflow, testCtx.Workflow)
|
||||
}
|
||||
if m.Job != testCtx.Job {
|
||||
t.Errorf("Job = %q, want %q", m.Job, testCtx.Job)
|
||||
}
|
||||
if m.RunID != testCtx.RunID {
|
||||
t.Errorf("RunID = %q, want %q", m.RunID, testCtx.RunID)
|
||||
}
|
||||
|
||||
// Verify payload was stored correctly
|
||||
var storedSummary summary.RunSummary
|
||||
if err := json.Unmarshal([]byte(m.Payload), &storedSummary); err != nil {
|
||||
t.Fatalf("Unmarshal payload error = %v", err)
|
||||
}
|
||||
|
||||
if storedSummary.SampleCount != testSummary.SampleCount {
|
||||
t.Errorf("SampleCount = %d, want %d", storedSummary.SampleCount, testSummary.SampleCount)
|
||||
}
|
||||
if storedSummary.CPUTotal.Peak != testSummary.CPUTotal.Peak {
|
||||
t.Errorf("CPUTotal.Peak = %f, want %f", storedSummary.CPUTotal.Peak, testSummary.CPUTotal.Peak)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPushClientIntegration(t *testing.T) {
|
||||
store, server, cleanup := setupTestReceiver(t)
|
||||
defer cleanup()
|
||||
|
||||
// Set environment variables for the push client
|
||||
t.Setenv("GITHUB_REPOSITORY_OWNER", "push-client-org")
|
||||
t.Setenv("GITHUB_REPOSITORY", "push-client-repo")
|
||||
t.Setenv("GITHUB_WORKFLOW", "push-test.yml")
|
||||
t.Setenv("GITHUB_JOB", "push-job")
|
||||
t.Setenv("GITHUB_RUN_ID", "push-run-456")
|
||||
|
||||
// Generate push token
|
||||
pushToken := receiver.GenerateToken(testHMACKey, "push-client-org", "push-client-repo", "push-test.yml", "push-job")
|
||||
|
||||
// Create push client with token - it reads execution context from env vars
|
||||
pushClient := summary.NewPushClient(server.URL+"/api/v1/metrics", pushToken)
|
||||
|
||||
// Verify execution context was read from env
|
||||
ctx := pushClient.ExecutionContext()
|
||||
if ctx.Organization != "push-client-org" {
|
||||
t.Errorf("Organization = %q, want %q", ctx.Organization, "push-client-org")
|
||||
}
|
||||
|
||||
// Create and push a summary
|
||||
testSummary := &summary.RunSummary{
|
||||
StartTime: time.Now().Add(-30 * time.Second),
|
||||
EndTime: time.Now(),
|
||||
DurationSeconds: 30.0,
|
||||
SampleCount: 6,
|
||||
CPUTotal: summary.StatSummary{Peak: 50.0, Avg: 25.0, P95: 45.0},
|
||||
MemUsedBytes: summary.StatSummary{Peak: 1000000, Avg: 500000, P95: 900000},
|
||||
MemUsedPercent: summary.StatSummary{Peak: 10.0, Avg: 5.0, P95: 9.0},
|
||||
}
|
||||
|
||||
// Push the summary
|
||||
err := pushClient.Push(context.Background(), testSummary)
|
||||
if err != nil {
|
||||
t.Fatalf("Push() error = %v", err)
|
||||
}
|
||||
|
||||
// Verify it was stored
|
||||
metrics, err := store.GetMetricsByWorkflowJob("push-client-org", "push-client-repo", "push-test.yml", "push-job")
|
||||
if err != nil {
|
||||
t.Fatalf("GetMetricsByWorkflowJob() error = %v", err)
|
||||
}
|
||||
|
||||
if len(metrics) != 1 {
|
||||
t.Fatalf("got %d metrics, want 1", len(metrics))
|
||||
}
|
||||
|
||||
if metrics[0].RunID != "push-run-456" {
|
||||
t.Errorf("RunID = %q, want %q", metrics[0].RunID, "push-run-456")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultiplePushes(t *testing.T) {
|
||||
store, server, cleanup := setupTestReceiver(t)
|
||||
defer cleanup()
|
||||
|
||||
// Simulate multiple workflow runs pushing metrics via direct HTTP POST
|
||||
runs := []summary.ExecutionContext{
|
||||
{Organization: "org-a", Repository: "repo-1", Workflow: "ci.yml", Job: "build", RunID: "run-1"},
|
||||
{Organization: "org-a", Repository: "repo-1", Workflow: "ci.yml", Job: "build", RunID: "run-2"},
|
||||
{Organization: "org-a", Repository: "repo-1", Workflow: "ci.yml", Job: "test", RunID: "run-1"},
|
||||
{Organization: "org-a", Repository: "repo-2", Workflow: "ci.yml", Job: "build", RunID: "run-1"},
|
||||
}
|
||||
|
||||
for _, execCtx := range runs {
|
||||
payload := struct {
|
||||
Execution summary.ExecutionContext `json:"execution"`
|
||||
Summary summary.RunSummary `json:"run_summary"`
|
||||
}{
|
||||
Execution: execCtx,
|
||||
Summary: summary.RunSummary{
|
||||
SampleCount: 5,
|
||||
CPUTotal: summary.StatSummary{Peak: 50.0},
|
||||
},
|
||||
}
|
||||
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
t.Fatalf("Marshal() error = %v", err)
|
||||
}
|
||||
|
||||
pushToken := generatePushToken(execCtx)
|
||||
req, err := http.NewRequest(http.MethodPost, server.URL+"/api/v1/metrics", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
t.Fatalf("NewRequest() error = %v for run %+v", err, execCtx)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Authorization", "Bearer "+pushToken)
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("Do() error = %v for run %+v", err, execCtx)
|
||||
}
|
||||
_ = resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusCreated {
|
||||
t.Fatalf("status = %d, want %d for run %+v", resp.StatusCode, http.StatusCreated, execCtx)
|
||||
}
|
||||
}
|
||||
|
||||
// Verify filtering works correctly
|
||||
metrics, err := store.GetMetricsByWorkflowJob("org-a", "repo-1", "ci.yml", "build")
|
||||
if err != nil {
|
||||
t.Fatalf("GetMetricsByWorkflowJob() error = %v", err)
|
||||
}
|
||||
if len(metrics) != 2 {
|
||||
t.Errorf("got %d metrics for org-a/repo-1/ci.yml/build, want 2", len(metrics))
|
||||
}
|
||||
|
||||
metrics, err = store.GetMetricsByWorkflowJob("org-a", "repo-1", "ci.yml", "test")
|
||||
if err != nil {
|
||||
t.Fatalf("GetMetricsByWorkflowJob() error = %v", err)
|
||||
}
|
||||
if len(metrics) != 1 {
|
||||
t.Errorf("got %d metrics for org-a/repo-1/ci.yml/test, want 1", len(metrics))
|
||||
}
|
||||
}
|
||||
|
||||
func TestPushClientWithTokenIntegration(t *testing.T) {
|
||||
readToken := "integration-read-secret"
|
||||
hmacKey := "integration-hmac-secret"
|
||||
store, server, cleanup := setupTestReceiverWithToken(t, readToken, hmacKey)
|
||||
defer cleanup()
|
||||
|
||||
// Generate a scoped token via the API
|
||||
tokenReqBody, _ := json.Marshal(map[string]string{
|
||||
"organization": "token-org",
|
||||
"repository": "token-repo",
|
||||
"workflow": "ci.yml",
|
||||
"job": "build",
|
||||
})
|
||||
tokenReq, _ := http.NewRequest(http.MethodPost, server.URL+"/api/v1/token", bytes.NewReader(tokenReqBody))
|
||||
tokenReq.Header.Set("Authorization", "Bearer "+readToken)
|
||||
tokenReq.Header.Set("Content-Type", "application/json")
|
||||
|
||||
tokenResp, err := http.DefaultClient.Do(tokenReq)
|
||||
if err != nil {
|
||||
t.Fatalf("token request error: %v", err)
|
||||
}
|
||||
defer func() { _ = tokenResp.Body.Close() }()
|
||||
|
||||
if tokenResp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("token request status = %d, want %d", tokenResp.StatusCode, http.StatusOK)
|
||||
}
|
||||
|
||||
var tokenBody struct {
|
||||
Token string `json:"token"`
|
||||
}
|
||||
if err := json.NewDecoder(tokenResp.Body).Decode(&tokenBody); err != nil {
|
||||
t.Fatalf("decode token response: %v", err)
|
||||
}
|
||||
|
||||
// Use the scoped token to push metrics
|
||||
t.Setenv("GITHUB_REPOSITORY_OWNER", "token-org")
|
||||
t.Setenv("GITHUB_REPOSITORY", "token-repo")
|
||||
t.Setenv("GITHUB_WORKFLOW", "ci.yml")
|
||||
t.Setenv("GITHUB_JOB", "build")
|
||||
t.Setenv("GITHUB_RUN_ID", "token-run-1")
|
||||
|
||||
pushClient := summary.NewPushClient(server.URL+"/api/v1/metrics", tokenBody.Token)
|
||||
|
||||
testSummary := &summary.RunSummary{
|
||||
StartTime: time.Now().Add(-10 * time.Second),
|
||||
EndTime: time.Now(),
|
||||
DurationSeconds: 10.0,
|
||||
SampleCount: 2,
|
||||
}
|
||||
|
||||
if err := pushClient.Push(context.Background(), testSummary); err != nil {
|
||||
t.Fatalf("Push() error = %v", err)
|
||||
}
|
||||
|
||||
// Verify stored
|
||||
metrics, err := store.GetMetricsByWorkflowJob("token-org", "token-repo", "ci.yml", "build")
|
||||
if err != nil {
|
||||
t.Fatalf("GetMetricsByWorkflowJob() error = %v", err)
|
||||
}
|
||||
if len(metrics) != 1 {
|
||||
t.Fatalf("got %d metrics, want 1", len(metrics))
|
||||
}
|
||||
if metrics[0].RunID != "token-run-1" {
|
||||
t.Errorf("RunID = %q, want %q", metrics[0].RunID, "token-run-1")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPushClientWithWrongTokenIntegration(t *testing.T) {
|
||||
readToken := "integration-read-secret"
|
||||
hmacKey := "integration-hmac-secret"
|
||||
_, server, cleanup := setupTestReceiverWithToken(t, readToken, hmacKey)
|
||||
defer cleanup()
|
||||
|
||||
t.Setenv("GITHUB_REPOSITORY_OWNER", "token-org")
|
||||
t.Setenv("GITHUB_REPOSITORY", "token-repo")
|
||||
t.Setenv("GITHUB_WORKFLOW", "ci.yml")
|
||||
t.Setenv("GITHUB_JOB", "build")
|
||||
t.Setenv("GITHUB_RUN_ID", "token-run-2")
|
||||
|
||||
pushClient := summary.NewPushClient(server.URL+"/api/v1/metrics", "wrong-token")
|
||||
|
||||
err := pushClient.Push(context.Background(), &summary.RunSummary{SampleCount: 1})
|
||||
if err == nil {
|
||||
t.Error("Push() with wrong token should fail")
|
||||
}
|
||||
}
|
||||
|
||||
func setupTestReceiverWithToken(t *testing.T, readToken, hmacKey string) (*receiver.Store, *httptest.Server, func()) {
|
||||
t.Helper()
|
||||
dbPath := filepath.Join(t.TempDir(), "test.db")
|
||||
store, err := receiver.NewStore(dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("NewStore() error = %v", err)
|
||||
}
|
||||
|
||||
handler := receiver.NewHandler(store, slog.New(slog.NewTextHandler(io.Discard, nil)), readToken, hmacKey, 0)
|
||||
s := fuego.NewServer(
|
||||
fuego.WithoutStartupMessages(),
|
||||
fuego.WithEngineOptions(
|
||||
fuego.WithOpenAPIConfig(fuego.OpenAPIConfig{
|
||||
Disabled: true,
|
||||
}),
|
||||
),
|
||||
)
|
||||
handler.RegisterRoutes(s)
|
||||
|
||||
server := httptest.NewServer(s.Mux)
|
||||
|
||||
cleanup := func() {
|
||||
server.Close()
|
||||
_ = store.Close()
|
||||
}
|
||||
|
||||
return store, server, cleanup
|
||||
}
|
||||
|
|
@ -4,23 +4,37 @@ import (
|
|||
"sort"
|
||||
"time"
|
||||
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-resource-collector/internal/proc"
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/cgroup"
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/proc"
|
||||
)
|
||||
|
||||
// Aggregator collects and aggregates metrics from processes
|
||||
type Aggregator struct {
|
||||
procPath string
|
||||
topN int
|
||||
prevCPU *CPUSnapshot
|
||||
prevProcCPU map[int]*ProcessCPUSnapshot
|
||||
procPath string
|
||||
topN int
|
||||
prevCPU *CPUSnapshot
|
||||
prevProcCPU map[int]*ProcessCPUSnapshot
|
||||
cgroupLimits cgroup.CgroupLimits // container name -> limits
|
||||
processMapping cgroup.ProcessMapping // process name -> container name
|
||||
cgroupPathMapping cgroup.CgroupPathMapping // cgroup path -> container name (built at runtime)
|
||||
prevCgroupCPU map[string]uint64 // container name -> previous total ticks
|
||||
prevCgroupTime time.Time // previous collection time for cgroup CPU calc
|
||||
}
|
||||
|
||||
// NewAggregator creates a new metrics aggregator
|
||||
func NewAggregator(procPath string, topN int) *Aggregator {
|
||||
// Parse cgroup configuration from environment
|
||||
limits, _ := cgroup.ParseCgroupLimitsEnv()
|
||||
processMap, _ := cgroup.ParseProcessMappingEnv()
|
||||
|
||||
return &Aggregator{
|
||||
procPath: procPath,
|
||||
topN: topN,
|
||||
prevProcCPU: make(map[int]*ProcessCPUSnapshot),
|
||||
procPath: procPath,
|
||||
topN: topN,
|
||||
prevProcCPU: make(map[int]*ProcessCPUSnapshot),
|
||||
cgroupLimits: limits,
|
||||
processMapping: processMap,
|
||||
cgroupPathMapping: make(cgroup.CgroupPathMapping),
|
||||
prevCgroupCPU: make(map[string]uint64),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -77,6 +91,12 @@ func (a *Aggregator) Collect() (*SystemMetrics, error) {
|
|||
return float64(p.MemRSS)
|
||||
})
|
||||
|
||||
// Discover cgroup path mappings from known processes
|
||||
a.discoverCgroupMappings(processes)
|
||||
|
||||
// Calculate per-cgroup metrics
|
||||
cgroupMetrics := a.calculateCgroupMetrics(processes, processMetrics, now)
|
||||
|
||||
return &SystemMetrics{
|
||||
Timestamp: now,
|
||||
TotalProcesses: len(processes),
|
||||
|
|
@ -84,6 +104,7 @@ func (a *Aggregator) Collect() (*SystemMetrics, error) {
|
|||
Memory: memMetrics,
|
||||
TopCPU: topCPU,
|
||||
TopMemory: topMemory,
|
||||
Cgroups: cgroupMetrics,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
|
@ -158,6 +179,11 @@ func (a *Aggregator) calculateProcessMetrics(processes []*proc.ProcessInfo, now
|
|||
state = "?"
|
||||
}
|
||||
|
||||
cgroupPath := ""
|
||||
if p.Cgroup != nil {
|
||||
cgroupPath = p.Cgroup.Path
|
||||
}
|
||||
|
||||
metrics = append(metrics, ProcessMetrics{
|
||||
PID: pid,
|
||||
Name: p.Status.Name,
|
||||
|
|
@ -166,6 +192,7 @@ func (a *Aggregator) calculateProcessMetrics(processes []*proc.ProcessInfo, now
|
|||
MemVirtual: p.Status.VmSize,
|
||||
Threads: p.Status.Threads,
|
||||
State: state,
|
||||
CgroupPath: cgroupPath,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -223,3 +250,152 @@ func (a *Aggregator) getTopByMetric(metrics []ProcessMetrics, getValue func(Proc
|
|||
|
||||
return sorted[:n]
|
||||
}
|
||||
|
||||
// discoverCgroupMappings discovers cgroup path to container name mappings
|
||||
// by looking for processes that match the configured process mapping.
|
||||
func (a *Aggregator) discoverCgroupMappings(processes []*proc.ProcessInfo) {
|
||||
if len(a.processMapping) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
for _, p := range processes {
|
||||
if p.Cgroup == nil || p.Cgroup.Path == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if this process name is in our mapping
|
||||
if containerName, ok := a.processMapping[p.Status.Name]; ok {
|
||||
// Map this cgroup path to the container name
|
||||
a.cgroupPathMapping[p.Cgroup.Path] = containerName
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// resolveContainerName returns the container name for a cgroup path,
|
||||
// or the raw path if no mapping exists.
|
||||
func (a *Aggregator) resolveContainerName(cgroupPath string) string {
|
||||
if name, ok := a.cgroupPathMapping[cgroupPath]; ok {
|
||||
return name
|
||||
}
|
||||
// Use raw path as fallback
|
||||
if cgroupPath == "" {
|
||||
return "<unknown>"
|
||||
}
|
||||
return cgroupPath
|
||||
}
|
||||
|
||||
// calculateCgroupMetrics computes metrics grouped by container/cgroup.
|
||||
func (a *Aggregator) calculateCgroupMetrics(
|
||||
processes []*proc.ProcessInfo,
|
||||
processMetrics []ProcessMetrics,
|
||||
now time.Time,
|
||||
) map[string]*CgroupMetrics {
|
||||
// Build lookup from PID to ProcessMetrics
|
||||
pmByPID := make(map[int]ProcessMetrics)
|
||||
for _, pm := range processMetrics {
|
||||
pmByPID[pm.PID] = pm
|
||||
}
|
||||
|
||||
// Group processes by container name
|
||||
type cgroupData struct {
|
||||
cgroupPath string
|
||||
procs []*proc.ProcessInfo
|
||||
metrics []ProcessMetrics
|
||||
}
|
||||
containerGroups := make(map[string]*cgroupData)
|
||||
|
||||
for _, p := range processes {
|
||||
cgroupPath := ""
|
||||
if p.Cgroup != nil {
|
||||
cgroupPath = p.Cgroup.Path
|
||||
}
|
||||
|
||||
containerName := a.resolveContainerName(cgroupPath)
|
||||
|
||||
if _, ok := containerGroups[containerName]; !ok {
|
||||
containerGroups[containerName] = &cgroupData{
|
||||
cgroupPath: cgroupPath,
|
||||
}
|
||||
}
|
||||
|
||||
containerGroups[containerName].procs = append(containerGroups[containerName].procs, p)
|
||||
|
||||
if pm, ok := pmByPID[p.Stat.PID]; ok {
|
||||
containerGroups[containerName].metrics = append(containerGroups[containerName].metrics, pm)
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate elapsed time since last collection
|
||||
elapsed := 0.0
|
||||
if !a.prevCgroupTime.IsZero() {
|
||||
elapsed = now.Sub(a.prevCgroupTime).Seconds()
|
||||
}
|
||||
a.prevCgroupTime = now
|
||||
|
||||
// Calculate metrics for each container
|
||||
result := make(map[string]*CgroupMetrics)
|
||||
|
||||
for containerName, data := range containerGroups {
|
||||
// Sum CPU ticks (utime + stime only, not cutime/cstime)
|
||||
var totalTicks uint64
|
||||
var totalRSS uint64
|
||||
|
||||
for _, p := range data.procs {
|
||||
totalTicks += p.Stat.TotalTime()
|
||||
totalRSS += p.Status.VmRSS
|
||||
}
|
||||
|
||||
// Calculate CPU cores used from delta
|
||||
usedCores := 0.0
|
||||
hasDelta := false
|
||||
if prev, ok := a.prevCgroupCPU[containerName]; ok && elapsed > 0 {
|
||||
// Guard against underflow: if processes exited and new ones started,
|
||||
// totalTicks could be less than prev. In that case, skip this sample.
|
||||
if totalTicks >= prev {
|
||||
deltaTicks := totalTicks - prev
|
||||
// Convert ticks to cores: deltaTicks / (elapsed_seconds * CLK_TCK)
|
||||
usedCores = float64(deltaTicks) / (elapsed * float64(proc.DefaultClockTicks))
|
||||
hasDelta = true
|
||||
}
|
||||
}
|
||||
a.prevCgroupCPU[containerName] = totalTicks
|
||||
|
||||
// Calculate percentages against limits if available
|
||||
cpuPercent := 0.0
|
||||
memPercent := 0.0
|
||||
var limitCores float64
|
||||
var limitBytes uint64
|
||||
|
||||
if limit, ok := a.cgroupLimits[containerName]; ok {
|
||||
limitCores = limit.CPUCores
|
||||
limitBytes = limit.MemoryBytes
|
||||
if limit.CPUCores > 0 {
|
||||
cpuPercent = (usedCores / limit.CPUCores) * 100
|
||||
}
|
||||
if limit.MemoryBytes > 0 {
|
||||
memPercent = (float64(totalRSS) / float64(limit.MemoryBytes)) * 100
|
||||
}
|
||||
}
|
||||
|
||||
result[containerName] = &CgroupMetrics{
|
||||
Name: containerName,
|
||||
CgroupPath: data.cgroupPath,
|
||||
CPU: CgroupCPUMetrics{
|
||||
TotalTicks: totalTicks,
|
||||
UsedCores: usedCores,
|
||||
UsedPercent: cpuPercent,
|
||||
LimitCores: limitCores,
|
||||
HasDelta: hasDelta,
|
||||
},
|
||||
Memory: CgroupMemoryMetrics{
|
||||
TotalRSSBytes: totalRSS,
|
||||
UsedPercent: memPercent,
|
||||
LimitBytes: limitBytes,
|
||||
},
|
||||
Processes: data.metrics,
|
||||
NumProcs: len(data.procs),
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ type ProcessMetrics struct {
|
|||
MemVirtual uint64 `json:"mem_virtual_bytes"`
|
||||
Threads int `json:"threads"`
|
||||
State string `json:"state"`
|
||||
CgroupPath string `json:"cgroup_path,omitempty"`
|
||||
}
|
||||
|
||||
// CPUMetrics holds aggregated CPU metrics
|
||||
|
|
@ -35,12 +36,39 @@ type MemoryMetrics struct {
|
|||
|
||||
// SystemMetrics holds a complete snapshot of system metrics
|
||||
type SystemMetrics struct {
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
TotalProcesses int `json:"total_processes"`
|
||||
CPU CPUMetrics `json:"cpu"`
|
||||
Memory MemoryMetrics `json:"memory"`
|
||||
TopCPU []ProcessMetrics `json:"top_cpu,omitempty"`
|
||||
TopMemory []ProcessMetrics `json:"top_memory,omitempty"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
TotalProcesses int `json:"total_processes"`
|
||||
CPU CPUMetrics `json:"cpu"`
|
||||
Memory MemoryMetrics `json:"memory"`
|
||||
TopCPU []ProcessMetrics `json:"top_cpu,omitempty"`
|
||||
TopMemory []ProcessMetrics `json:"top_memory,omitempty"`
|
||||
Cgroups map[string]*CgroupMetrics `json:"cgroups,omitempty"`
|
||||
}
|
||||
|
||||
// CgroupCPUMetrics holds CPU metrics for a single cgroup/container
|
||||
type CgroupCPUMetrics struct {
|
||||
TotalTicks uint64 `json:"total_ticks"`
|
||||
UsedCores float64 `json:"used_cores"`
|
||||
UsedPercent float64 `json:"used_percent,omitempty"`
|
||||
LimitCores float64 `json:"limit_cores,omitempty"`
|
||||
HasDelta bool `json:"-"` // true when a valid delta could be computed
|
||||
}
|
||||
|
||||
// CgroupMemoryMetrics holds memory metrics for a single cgroup/container
|
||||
type CgroupMemoryMetrics struct {
|
||||
TotalRSSBytes uint64 `json:"total_rss_bytes"`
|
||||
UsedPercent float64 `json:"used_percent,omitempty"`
|
||||
LimitBytes uint64 `json:"limit_bytes,omitempty"`
|
||||
}
|
||||
|
||||
// CgroupMetrics holds all metrics for a single cgroup/container
|
||||
type CgroupMetrics struct {
|
||||
Name string `json:"name"`
|
||||
CgroupPath string `json:"cgroup_path"`
|
||||
CPU CgroupCPUMetrics `json:"cpu"`
|
||||
Memory CgroupMemoryMetrics `json:"memory"`
|
||||
Processes []ProcessMetrics `json:"processes"`
|
||||
NumProcs int `json:"num_processes"`
|
||||
}
|
||||
|
||||
// CPUSnapshot holds CPU timing data for calculating percentages between intervals
|
||||
|
|
|
|||
|
|
@ -1,11 +1,12 @@
|
|||
package output
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-resource-collector/internal/metrics"
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/metrics"
|
||||
)
|
||||
|
||||
// LogFormat specifies the log output format
|
||||
|
|
@ -53,29 +54,44 @@ func NewLoggerWriter(cfg LoggerConfig) *LoggerWriter {
|
|||
}
|
||||
}
|
||||
|
||||
// topCPUEntry is a lightweight struct for JSON serialization of top CPU processes
|
||||
type topCPUEntry struct {
|
||||
PID int `json:"pid"`
|
||||
Name string `json:"name"`
|
||||
CPUPercent float64 `json:"cpu_percent"`
|
||||
}
|
||||
|
||||
// topMemEntry is a lightweight struct for JSON serialization of top memory processes
|
||||
type topMemEntry struct {
|
||||
PID int `json:"pid"`
|
||||
Name string `json:"name"`
|
||||
RSSBytes uint64 `json:"rss_bytes"`
|
||||
}
|
||||
|
||||
// Write outputs the metrics using structured logging
|
||||
func (w *LoggerWriter) Write(m *metrics.SystemMetrics) error {
|
||||
// Build top CPU process attrs
|
||||
topCPUAttrs := make([]any, 0, len(m.TopCPU))
|
||||
// Build top CPU process entries
|
||||
topCPU := make([]topCPUEntry, 0, len(m.TopCPU))
|
||||
for _, p := range m.TopCPU {
|
||||
topCPUAttrs = append(topCPUAttrs, slog.Group("",
|
||||
slog.Int("pid", p.PID),
|
||||
slog.String("name", p.Name),
|
||||
slog.Float64("cpu_percent", p.CPUPercent),
|
||||
))
|
||||
topCPU = append(topCPU, topCPUEntry{
|
||||
PID: p.PID,
|
||||
Name: p.Name,
|
||||
CPUPercent: p.CPUPercent,
|
||||
})
|
||||
}
|
||||
|
||||
// Build top memory process attrs
|
||||
topMemAttrs := make([]any, 0, len(m.TopMemory))
|
||||
// Build top memory process entries
|
||||
topMem := make([]topMemEntry, 0, len(m.TopMemory))
|
||||
for _, p := range m.TopMemory {
|
||||
topMemAttrs = append(topMemAttrs, slog.Group("",
|
||||
slog.Int("pid", p.PID),
|
||||
slog.String("name", p.Name),
|
||||
slog.Uint64("rss_bytes", p.MemRSS),
|
||||
))
|
||||
topMem = append(topMem, topMemEntry{
|
||||
PID: p.PID,
|
||||
Name: p.Name,
|
||||
RSSBytes: p.MemRSS,
|
||||
})
|
||||
}
|
||||
|
||||
w.logger.Info("metrics_collected",
|
||||
// Build base attributes
|
||||
attrs := []slog.Attr{
|
||||
slog.Time("collection_time", m.Timestamp),
|
||||
slog.Int("total_processes", m.TotalProcesses),
|
||||
slog.Group("cpu",
|
||||
|
|
@ -94,9 +110,16 @@ func (w *LoggerWriter) Write(m *metrics.SystemMetrics) error {
|
|||
slog.Uint64("total_rss_bytes", m.Memory.TotalRSSBytes),
|
||||
slog.Float64("rss_percent", m.Memory.RSSPercent),
|
||||
),
|
||||
slog.Any("top_cpu", topCPUAttrs),
|
||||
slog.Any("top_memory", topMemAttrs),
|
||||
)
|
||||
slog.Any("top_cpu", topCPU),
|
||||
slog.Any("top_memory", topMem),
|
||||
}
|
||||
|
||||
// Add cgroups if present
|
||||
if len(m.Cgroups) > 0 {
|
||||
attrs = append(attrs, slog.Any("cgroups", m.Cgroups))
|
||||
}
|
||||
|
||||
w.logger.LogAttrs(context.Background(), slog.LevelInfo, "metrics_collected", attrs...)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
package output
|
||||
|
||||
import "edp.buildth.ing/DevFW-CICD/forgejo-runner-resource-collector/internal/metrics"
|
||||
import "edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/metrics"
|
||||
|
||||
// Writer defines the interface for outputting metrics
|
||||
// This allows for different implementations (logging, HTTP push, etc.)
|
||||
|
|
|
|||
59
internal/proc/cgroup.go
Normal file
59
internal/proc/cgroup.go
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
// ABOUTME: Reads cgroup information from /proc/[pid]/cgroup.
|
||||
// ABOUTME: Supports both cgroup v1 and v2 formats.
|
||||
package proc
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// CgroupInfo holds cgroup information for a process
|
||||
type CgroupInfo struct {
|
||||
Path string // The cgroup path (unified for v2, or from memory controller for v1)
|
||||
}
|
||||
|
||||
// ReadCgroup reads /proc/[pid]/cgroup and extracts the cgroup path
|
||||
func ReadCgroup(procPath string, pid int) (*CgroupInfo, error) {
|
||||
path := fmt.Sprintf("%s/%d/cgroup", procPath, pid)
|
||||
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening cgroup file: %w", err)
|
||||
}
|
||||
defer func() { _ = file.Close() }()
|
||||
|
||||
var cgroupPath string
|
||||
scanner := bufio.NewScanner(file)
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
// Try cgroup v2 first (unified hierarchy)
|
||||
// Format: 0::/path
|
||||
if path, found := strings.CutPrefix(line, "0::"); found {
|
||||
cgroupPath = path
|
||||
break
|
||||
}
|
||||
|
||||
// Fall back to cgroup v1 - look for memory controller
|
||||
// Format: X:memory:/path or X:memory,other:/path
|
||||
parts := strings.SplitN(line, ":", 3)
|
||||
if len(parts) == 3 {
|
||||
controllers := parts[1]
|
||||
if strings.Contains(controllers, "memory") {
|
||||
cgroupPath = parts[2]
|
||||
// Don't break - prefer v2 if found later
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, fmt.Errorf("scanning cgroup file: %w", err)
|
||||
}
|
||||
|
||||
return &CgroupInfo{
|
||||
Path: cgroupPath,
|
||||
}, nil
|
||||
}
|
||||
97
internal/proc/cgroup_test.go
Normal file
97
internal/proc/cgroup_test.go
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
package proc
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestReadCgroup(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
cgroupFile string
|
||||
wantPath string
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "cgroup v2 unified",
|
||||
cgroupFile: `0::/kubepods/pod-abc/container-123
|
||||
`,
|
||||
wantPath: "/kubepods/pod-abc/container-123",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "cgroup v2 with trailing newline",
|
||||
cgroupFile: `0::/docker/abc123def456
|
||||
`,
|
||||
wantPath: "/docker/abc123def456",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "cgroup v1 multiple controllers",
|
||||
cgroupFile: `12:blkio:/user.slice
|
||||
11:memory:/docker/abc123
|
||||
10:cpu,cpuacct:/docker/abc123
|
||||
9:pids:/docker/abc123
|
||||
`,
|
||||
wantPath: "/docker/abc123",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "cgroup v2 preferred over v1",
|
||||
cgroupFile: `11:memory:/docker/old-path
|
||||
0::/kubepods/new-path
|
||||
`,
|
||||
wantPath: "/kubepods/new-path",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "empty file",
|
||||
cgroupFile: "",
|
||||
wantPath: "",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "root cgroup",
|
||||
cgroupFile: `0::/
|
||||
`,
|
||||
wantPath: "/",
|
||||
wantErr: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Create a temp directory structure mimicking /proc
|
||||
tmpDir := t.TempDir()
|
||||
procDir := filepath.Join(tmpDir, "proc")
|
||||
pidDir := filepath.Join(procDir, "1234")
|
||||
|
||||
if err := os.MkdirAll(pidDir, 0755); err != nil {
|
||||
t.Fatalf("Failed to create pid dir: %v", err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(filepath.Join(pidDir, "cgroup"), []byte(tt.cgroupFile), 0644); err != nil {
|
||||
t.Fatalf("Failed to write cgroup file: %v", err)
|
||||
}
|
||||
|
||||
got, err := ReadCgroup(procDir, 1234)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("ReadCgroup() error = %v, wantErr %v", err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
if !tt.wantErr && got.Path != tt.wantPath {
|
||||
t.Errorf("ReadCgroup() path = %q, want %q", got.Path, tt.wantPath)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadCgroup_FileNotFound(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
|
||||
_, err := ReadCgroup(tmpDir, 1234)
|
||||
if err == nil {
|
||||
t.Error("ReadCgroup() expected error for missing file, got nil")
|
||||
}
|
||||
}
|
||||
|
|
@ -128,13 +128,14 @@ func ReadSystemCPU(procPath string) (user, nice, system, idle, iowait, irq, soft
|
|||
return 0, 0, 0, 0, 0, 0, 0, fmt.Errorf("cpu line not found in /proc/stat")
|
||||
}
|
||||
|
||||
// ProcessInfo combines stat and status information for a process
|
||||
// ProcessInfo combines stat, status, and cgroup information for a process
|
||||
type ProcessInfo struct {
|
||||
Stat *ProcStat
|
||||
Status *ProcStatus
|
||||
Cgroup *CgroupInfo
|
||||
}
|
||||
|
||||
// ReadProcess reads both stat and status for a single process
|
||||
// ReadProcess reads stat, status, and cgroup for a single process
|
||||
func ReadProcess(procPath string, pid int) (*ProcessInfo, error) {
|
||||
stat, err := ReadStat(procPath, pid)
|
||||
if err != nil {
|
||||
|
|
@ -146,9 +147,13 @@ func ReadProcess(procPath string, pid int) (*ProcessInfo, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
// Read cgroup info (non-fatal if it fails)
|
||||
cgroup, _ := ReadCgroup(procPath, pid)
|
||||
|
||||
return &ProcessInfo{
|
||||
Stat: stat,
|
||||
Status: status,
|
||||
Cgroup: cgroup,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
|
|
|||
303
internal/receiver/handler.go
Normal file
303
internal/receiver/handler.go
Normal file
|
|
@ -0,0 +1,303 @@
|
|||
// ABOUTME: HTTP handlers for the metrics receiver service using Fuego framework.
|
||||
// ABOUTME: Provides endpoints for receiving and querying metrics with automatic OpenAPI generation.
|
||||
package receiver
|
||||
|
||||
import (
|
||||
"crypto/subtle"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-fuego/fuego"
|
||||
)
|
||||
|
||||
// Handler handles HTTP requests for the metrics receiver
|
||||
type Handler struct {
|
||||
store *Store
|
||||
logger *slog.Logger
|
||||
readToken string // Pre-shared token for read endpoint authentication
|
||||
hmacKey string // Separate key for HMAC-based push token generation/validation
|
||||
tokenTTL time.Duration
|
||||
}
|
||||
|
||||
// NewHandler creates a new HTTP handler with the given store.
|
||||
// readToken authenticates read endpoints and the token generation endpoint.
|
||||
// hmacKey is the secret used to derive scoped push tokens.
|
||||
// tokenTTL specifies how long push tokens are valid (0 uses DefaultTokenTTL).
|
||||
func NewHandler(store *Store, logger *slog.Logger, readToken, hmacKey string, tokenTTL time.Duration) *Handler {
|
||||
if tokenTTL == 0 {
|
||||
tokenTTL = DefaultTokenTTL
|
||||
}
|
||||
return &Handler{store: store, logger: logger, readToken: readToken, hmacKey: hmacKey, tokenTTL: tokenTTL}
|
||||
}
|
||||
|
||||
// Common errors
|
||||
var (
|
||||
ErrUnauthorized = errors.New("authorization required")
|
||||
ErrInvalidToken = errors.New("invalid token")
|
||||
ErrInvalidFormat = errors.New("invalid authorization format")
|
||||
ErrMissingHMACKey = errors.New("token generation requires a configured HMAC key")
|
||||
ErrMissingFields = errors.New("organization, repository, workflow, and job are required")
|
||||
ErrMissingRunID = errors.New("run_id is required")
|
||||
ErrInvalidParams = errors.New("org, repo, workflow and job are required")
|
||||
ErrNoMetrics = errors.New("no metrics found for this workflow/job")
|
||||
ErrInvalidPercent = errors.New("invalid cpu_percentile: must be one of peak, p99, p95, p75, p50, avg")
|
||||
)
|
||||
|
||||
// HealthResponse is the response for the health endpoint
|
||||
type HealthResponse struct {
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
// MetricCreatedResponse is the response when a metric is successfully created
|
||||
type MetricCreatedResponse struct {
|
||||
ID uint `json:"id"`
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
// GetMetricsRequest contains path parameters for getting metrics
|
||||
type GetMetricsRequest struct {
|
||||
Org string `path:"org"`
|
||||
Repo string `path:"repo"`
|
||||
Workflow string `path:"workflow"`
|
||||
Job string `path:"job"`
|
||||
}
|
||||
|
||||
// GetSizingRequest contains path and query parameters for sizing endpoint
|
||||
type GetSizingRequest struct {
|
||||
Org string `path:"org"`
|
||||
Repo string `path:"repo"`
|
||||
Workflow string `path:"workflow"`
|
||||
Job string `path:"job"`
|
||||
Runs int `query:"runs" default:"5" validate:"min=1,max=100" description:"Number of recent runs to analyze"`
|
||||
Buffer int `query:"buffer" default:"20" validate:"min=0,max=100" description:"Buffer percentage to add"`
|
||||
CPUPercentile string `query:"cpu_percentile" default:"p95" description:"CPU percentile to use (peak, p99, p95, p75, p50, avg)"`
|
||||
}
|
||||
|
||||
// RegisterRoutes registers all HTTP routes on the Fuego server
|
||||
func (h *Handler) RegisterRoutes(s *fuego.Server) {
|
||||
// Health endpoint (no auth)
|
||||
fuego.Get(s, "/health", h.Health)
|
||||
|
||||
// API group with authentication
|
||||
api := fuego.Group(s, "/api/v1")
|
||||
|
||||
// Token generation (requires read token)
|
||||
fuego.Post(api, "/token", h.GenerateToken, fuego.OptionMiddleware(h.requireReadToken))
|
||||
|
||||
// Metrics endpoints
|
||||
fuego.Post(api, "/metrics", h.ReceiveMetrics) // Uses push token validated in handler
|
||||
fuego.Get(api, "/metrics/repo/{org}/{repo}/{workflow}/{job}", h.GetMetricsByWorkflowJob, fuego.OptionMiddleware(h.requireReadToken))
|
||||
|
||||
// Sizing endpoint
|
||||
fuego.Get(api, "/sizing/repo/{org}/{repo}/{workflow}/{job}", h.GetSizing, fuego.OptionMiddleware(h.requireReadToken))
|
||||
}
|
||||
|
||||
// requireReadToken is middleware that validates the read token
|
||||
func (h *Handler) requireReadToken(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if h.readToken == "" {
|
||||
h.logger.Warn("no read-token configured, rejecting request", slog.String("path", r.URL.Path))
|
||||
http.Error(w, "authorization required", http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
|
||||
authHeader := r.Header.Get("Authorization")
|
||||
if authHeader == "" {
|
||||
h.logger.Warn("missing authorization header", slog.String("path", r.URL.Path))
|
||||
http.Error(w, "authorization required", http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
|
||||
const bearerPrefix = "Bearer "
|
||||
if !strings.HasPrefix(authHeader, bearerPrefix) {
|
||||
h.logger.Warn("invalid authorization format", slog.String("path", r.URL.Path))
|
||||
http.Error(w, "invalid authorization format", http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
|
||||
token := strings.TrimPrefix(authHeader, bearerPrefix)
|
||||
if subtle.ConstantTimeCompare([]byte(token), []byte(h.readToken)) != 1 {
|
||||
h.logger.Warn("invalid token", slog.String("path", r.URL.Path))
|
||||
http.Error(w, "invalid token", http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
|
||||
// validatePushToken checks push authentication via scoped HMAC token
|
||||
func (h *Handler) validatePushToken(r *http.Request, exec ExecutionContext) error {
|
||||
if h.hmacKey == "" {
|
||||
h.logger.Warn("no HMAC key configured, rejecting push", slog.String("path", r.URL.Path))
|
||||
return ErrUnauthorized
|
||||
}
|
||||
|
||||
authHeader := r.Header.Get("Authorization")
|
||||
if authHeader == "" {
|
||||
h.logger.Warn("missing push authorization", slog.String("path", r.URL.Path))
|
||||
return ErrUnauthorized
|
||||
}
|
||||
|
||||
const bearerPrefix = "Bearer "
|
||||
if !strings.HasPrefix(authHeader, bearerPrefix) {
|
||||
h.logger.Warn("invalid push authorization format", slog.String("path", r.URL.Path))
|
||||
return ErrInvalidFormat
|
||||
}
|
||||
|
||||
token := strings.TrimPrefix(authHeader, bearerPrefix)
|
||||
if !ValidateToken(h.hmacKey, token, exec.Organization, exec.Repository, exec.Workflow, exec.Job, h.tokenTTL) {
|
||||
h.logger.Warn("invalid push token", slog.String("path", r.URL.Path))
|
||||
return ErrInvalidToken
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Health returns the service health status
|
||||
func (h *Handler) Health(c fuego.ContextNoBody) (HealthResponse, error) {
|
||||
return HealthResponse{Status: "ok"}, nil
|
||||
}
|
||||
|
||||
// GenerateToken generates a scoped HMAC push token for a workflow/job
|
||||
func (h *Handler) GenerateToken(c fuego.ContextWithBody[TokenRequest]) (TokenResponse, error) {
|
||||
if h.hmacKey == "" {
|
||||
return TokenResponse{}, fuego.BadRequestError{Detail: ErrMissingHMACKey.Error()}
|
||||
}
|
||||
|
||||
req, err := c.Body()
|
||||
if err != nil {
|
||||
return TokenResponse{}, fuego.BadRequestError{Detail: "invalid JSON body"}
|
||||
}
|
||||
|
||||
if req.Organization == "" || req.Repository == "" || req.Workflow == "" || req.Job == "" {
|
||||
return TokenResponse{}, fuego.BadRequestError{Detail: ErrMissingFields.Error()}
|
||||
}
|
||||
|
||||
token := GenerateToken(h.hmacKey, req.Organization, req.Repository, req.Workflow, req.Job)
|
||||
return TokenResponse{Token: token}, nil
|
||||
}
|
||||
|
||||
// ReceiveMetrics receives and stores metrics from a collector
|
||||
func (h *Handler) ReceiveMetrics(c fuego.ContextNoBody) (MetricCreatedResponse, error) {
|
||||
var payload MetricsPayload
|
||||
if err := json.NewDecoder(c.Request().Body).Decode(&payload); err != nil {
|
||||
h.logger.Error("failed to decode payload", slog.String("error", err.Error()))
|
||||
return MetricCreatedResponse{}, fuego.BadRequestError{Detail: "invalid JSON payload"}
|
||||
}
|
||||
|
||||
if payload.Execution.RunID == "" {
|
||||
return MetricCreatedResponse{}, fuego.BadRequestError{Detail: ErrMissingRunID.Error()}
|
||||
}
|
||||
|
||||
// Validate push token
|
||||
if err := h.validatePushToken(c.Request(), payload.Execution); err != nil {
|
||||
return MetricCreatedResponse{}, fuego.UnauthorizedError{Detail: err.Error()}
|
||||
}
|
||||
|
||||
id, err := h.store.SaveMetric(&payload)
|
||||
if err != nil {
|
||||
h.logger.Error("failed to save metric", slog.String("error", err.Error()))
|
||||
return MetricCreatedResponse{}, fuego.InternalServerError{Detail: "failed to save metric"}
|
||||
}
|
||||
|
||||
h.logger.Info("metric saved",
|
||||
slog.Uint64("id", uint64(id)),
|
||||
slog.String("run_id", payload.Execution.RunID),
|
||||
slog.String("repository", payload.Execution.Repository),
|
||||
)
|
||||
|
||||
c.SetStatus(http.StatusCreated)
|
||||
return MetricCreatedResponse{ID: id, Status: "created"}, nil
|
||||
}
|
||||
|
||||
// GetMetricsByWorkflowJob retrieves all metrics for a specific workflow/job
|
||||
func (h *Handler) GetMetricsByWorkflowJob(c fuego.ContextNoBody) ([]MetricResponse, error) {
|
||||
org := c.PathParam("org")
|
||||
repo := c.PathParam("repo")
|
||||
workflow := c.PathParam("workflow")
|
||||
job := c.PathParam("job")
|
||||
|
||||
if org == "" || repo == "" || workflow == "" || job == "" {
|
||||
return nil, fuego.BadRequestError{Detail: ErrInvalidParams.Error()}
|
||||
}
|
||||
|
||||
metrics, err := h.store.GetMetricsByWorkflowJob(org, repo, workflow, job)
|
||||
if err != nil {
|
||||
h.logger.Error("failed to get metrics", slog.String("error", err.Error()))
|
||||
return nil, fuego.InternalServerError{Detail: "failed to get metrics"}
|
||||
}
|
||||
|
||||
// Convert to response type with Payload as JSON object
|
||||
response := make([]MetricResponse, len(metrics))
|
||||
for i, m := range metrics {
|
||||
response[i] = m.ToResponse()
|
||||
}
|
||||
|
||||
return response, nil
|
||||
}
|
||||
|
||||
// GetSizing computes Kubernetes resource sizing recommendations
|
||||
func (h *Handler) GetSizing(c fuego.ContextNoBody) (SizingResponse, error) {
|
||||
org := c.PathParam("org")
|
||||
repo := c.PathParam("repo")
|
||||
workflow := c.PathParam("workflow")
|
||||
job := c.PathParam("job")
|
||||
|
||||
if org == "" || repo == "" || workflow == "" || job == "" {
|
||||
return SizingResponse{}, fuego.BadRequestError{Detail: ErrInvalidParams.Error()}
|
||||
}
|
||||
|
||||
// Parse query parameters with defaults
|
||||
runs := parseIntQueryParamFromContext(c, "runs", 5, 1, 100)
|
||||
buffer := parseIntQueryParamFromContext(c, "buffer", 20, 0, 100)
|
||||
cpuPercentile := c.QueryParam("cpu_percentile")
|
||||
if cpuPercentile == "" {
|
||||
cpuPercentile = "p95"
|
||||
}
|
||||
if !IsValidPercentile(cpuPercentile) {
|
||||
return SizingResponse{}, fuego.BadRequestError{Detail: ErrInvalidPercent.Error()}
|
||||
}
|
||||
|
||||
metrics, err := h.store.GetRecentMetricsByWorkflowJob(org, repo, workflow, job, runs)
|
||||
if err != nil {
|
||||
h.logger.Error("failed to get metrics", slog.String("error", err.Error()))
|
||||
return SizingResponse{}, fuego.InternalServerError{Detail: "failed to get metrics"}
|
||||
}
|
||||
|
||||
if len(metrics) == 0 {
|
||||
return SizingResponse{}, fuego.NotFoundError{Detail: ErrNoMetrics.Error()}
|
||||
}
|
||||
|
||||
response, err := computeSizing(metrics, buffer, cpuPercentile)
|
||||
if err != nil {
|
||||
h.logger.Error("failed to compute sizing", slog.String("error", err.Error()))
|
||||
return SizingResponse{}, fuego.InternalServerError{Detail: "failed to compute sizing"}
|
||||
}
|
||||
|
||||
return *response, nil
|
||||
}
|
||||
|
||||
// parseIntQueryParamFromContext parses an integer query parameter with default, min, and max values
|
||||
func parseIntQueryParamFromContext(c fuego.ContextNoBody, name string, defaultVal, minVal, maxVal int) int {
|
||||
strVal := c.QueryParam(name)
|
||||
if strVal == "" {
|
||||
return defaultVal
|
||||
}
|
||||
var val int
|
||||
if _, err := fmt.Sscanf(strVal, "%d", &val); err != nil {
|
||||
return defaultVal
|
||||
}
|
||||
if val < minVal {
|
||||
return minVal
|
||||
}
|
||||
if val > maxVal {
|
||||
return maxVal
|
||||
}
|
||||
return val
|
||||
}
|
||||
481
internal/receiver/handler_test.go
Normal file
481
internal/receiver/handler_test.go
Normal file
|
|
@ -0,0 +1,481 @@
|
|||
package receiver
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/go-fuego/fuego"
|
||||
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/summary"
|
||||
)
|
||||
|
||||
func TestHandler_ReceiveMetrics(t *testing.T) {
|
||||
const readToken = "test-token"
|
||||
h, cleanup := newTestHandlerWithToken(t, readToken)
|
||||
defer cleanup()
|
||||
|
||||
exec := ExecutionContext{
|
||||
Organization: "test-org",
|
||||
Repository: "test-repo",
|
||||
Workflow: "ci.yml",
|
||||
Job: "build",
|
||||
RunID: "run-123",
|
||||
}
|
||||
pushToken := GenerateToken(readToken, exec.Organization, exec.Repository, exec.Workflow, exec.Job)
|
||||
|
||||
payload := MetricsPayload{
|
||||
Execution: exec,
|
||||
Summary: summary.RunSummary{
|
||||
DurationSeconds: 60.0,
|
||||
SampleCount: 12,
|
||||
},
|
||||
}
|
||||
|
||||
body, _ := json.Marshal(payload)
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/metrics", bytes.NewReader(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Authorization", "Bearer "+pushToken)
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
s := newTestServer(h)
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusCreated {
|
||||
t.Errorf("status = %d, want %d", rec.Code, http.StatusCreated)
|
||||
}
|
||||
|
||||
var resp map[string]any
|
||||
if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil {
|
||||
t.Fatalf("failed to decode response: %v", err)
|
||||
}
|
||||
if resp["status"] != "created" {
|
||||
t.Errorf("response status = %v, want %q", resp["status"], "created")
|
||||
}
|
||||
if resp["id"] == nil || resp["id"].(float64) == 0 {
|
||||
t.Error("response id is missing or zero")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_ReceiveMetrics_InvalidJSON(t *testing.T) {
|
||||
h, cleanup := newTestHandler(t)
|
||||
defer cleanup()
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/metrics", bytes.NewReader([]byte("not json")))
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
s := newTestServer(h)
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusBadRequest {
|
||||
t.Errorf("status = %d, want %d", rec.Code, http.StatusBadRequest)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_ReceiveMetrics_MissingRunID(t *testing.T) {
|
||||
h, cleanup := newTestHandler(t)
|
||||
defer cleanup()
|
||||
|
||||
payload := MetricsPayload{
|
||||
Execution: ExecutionContext{
|
||||
Organization: "test-org",
|
||||
Repository: "test-repo",
|
||||
// RunID is missing
|
||||
},
|
||||
Summary: summary.RunSummary{},
|
||||
}
|
||||
|
||||
body, _ := json.Marshal(payload)
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/metrics", bytes.NewReader(body))
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
s := newTestServer(h)
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusBadRequest {
|
||||
t.Errorf("status = %d, want %d", rec.Code, http.StatusBadRequest)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_GetByWorkflowJob(t *testing.T) {
|
||||
const readToken = "test-token"
|
||||
h, cleanup := newTestHandlerWithToken(t, readToken)
|
||||
defer cleanup()
|
||||
|
||||
// Save metrics for different workflow/job combinations
|
||||
payloads := []*MetricsPayload{
|
||||
{Execution: ExecutionContext{Organization: "org-x", Repository: "repo-y", Workflow: "ci.yml", Job: "build", RunID: "r1"}},
|
||||
{Execution: ExecutionContext{Organization: "org-x", Repository: "repo-y", Workflow: "ci.yml", Job: "build", RunID: "r2"}},
|
||||
{Execution: ExecutionContext{Organization: "org-x", Repository: "repo-y", Workflow: "ci.yml", Job: "test", RunID: "r3"}},
|
||||
}
|
||||
for _, p := range payloads {
|
||||
if _, err := h.store.SaveMetric(p); err != nil {
|
||||
t.Fatalf("SaveMetric() error = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/metrics/repo/org-x/repo-y/ci.yml/build", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+readToken)
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
s := newTestServer(h)
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Errorf("status = %d, want %d", rec.Code, http.StatusOK)
|
||||
}
|
||||
|
||||
var metrics []MetricResponse
|
||||
if err := json.NewDecoder(rec.Body).Decode(&metrics); err != nil {
|
||||
t.Fatalf("failed to decode response: %v", err)
|
||||
}
|
||||
if len(metrics) != 2 {
|
||||
t.Errorf("got %d metrics, want 2", len(metrics))
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_GetByWorkflowJob_NotFound(t *testing.T) {
|
||||
const readToken = "test-token"
|
||||
h, cleanup := newTestHandlerWithToken(t, readToken)
|
||||
defer cleanup()
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/metrics/repo/org/repo/workflow/job", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+readToken)
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
s := newTestServer(h)
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Errorf("status = %d, want %d", rec.Code, http.StatusOK)
|
||||
}
|
||||
|
||||
var metrics []MetricResponse
|
||||
if err := json.NewDecoder(rec.Body).Decode(&metrics); err != nil {
|
||||
t.Fatalf("failed to decode response: %v", err)
|
||||
}
|
||||
if len(metrics) != 0 {
|
||||
t.Errorf("got %d metrics, want 0", len(metrics))
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_GetByWorkflowJob_WithToken(t *testing.T) {
|
||||
h, cleanup := newTestHandlerWithToken(t, "secret-token")
|
||||
defer cleanup()
|
||||
|
||||
// Save a metric
|
||||
payload := &MetricsPayload{
|
||||
Execution: ExecutionContext{Organization: "org", Repository: "repo", Workflow: "ci.yml", Job: "build", RunID: "r1"},
|
||||
}
|
||||
if _, err := h.store.SaveMetric(payload); err != nil {
|
||||
t.Fatalf("SaveMetric() error = %v", err)
|
||||
}
|
||||
|
||||
s := newTestServer(h)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
authHeader string
|
||||
wantCode int
|
||||
}{
|
||||
{"no auth header", "", http.StatusUnauthorized},
|
||||
{"wrong format", "Basic dXNlcjpwYXNz", http.StatusUnauthorized},
|
||||
{"wrong token", "Bearer wrong-token", http.StatusUnauthorized},
|
||||
{"valid token", "Bearer secret-token", http.StatusOK},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/metrics/repo/org/repo/ci.yml/build", nil)
|
||||
if tt.authHeader != "" {
|
||||
req.Header.Set("Authorization", tt.authHeader)
|
||||
}
|
||||
rec := httptest.NewRecorder()
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != tt.wantCode {
|
||||
t.Errorf("status = %d, want %d", rec.Code, tt.wantCode)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_Health(t *testing.T) {
|
||||
h, cleanup := newTestHandler(t)
|
||||
defer cleanup()
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/health", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
s := newTestServer(h)
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Errorf("status = %d, want %d", rec.Code, http.StatusOK)
|
||||
}
|
||||
|
||||
var resp map[string]string
|
||||
if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil {
|
||||
t.Fatalf("failed to decode response: %v", err)
|
||||
}
|
||||
if resp["status"] != "ok" {
|
||||
t.Errorf("status = %q, want %q", resp["status"], "ok")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_GenerateToken(t *testing.T) {
|
||||
h, cleanup := newTestHandlerWithToken(t, "secret-token")
|
||||
defer cleanup()
|
||||
|
||||
body, _ := json.Marshal(TokenRequest{
|
||||
Organization: "org",
|
||||
Repository: "repo",
|
||||
Workflow: "ci.yml",
|
||||
Job: "build",
|
||||
})
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/token", bytes.NewReader(body))
|
||||
req.Header.Set("Authorization", "Bearer secret-token")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
s := newTestServer(h)
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, want %d", rec.Code, http.StatusOK)
|
||||
}
|
||||
|
||||
var resp TokenResponse
|
||||
if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil {
|
||||
t.Fatalf("failed to decode response: %v", err)
|
||||
}
|
||||
if resp.Token == "" {
|
||||
t.Error("expected non-empty token")
|
||||
}
|
||||
// Token format is "timestamp:hmac" where hmac is 64 hex chars
|
||||
parts := strings.SplitN(resp.Token, ":", 2)
|
||||
if len(parts) != 2 {
|
||||
t.Errorf("token should have format 'timestamp:hmac', got %q", resp.Token)
|
||||
}
|
||||
if len(parts[1]) != 64 {
|
||||
t.Errorf("HMAC part length = %d, want 64", len(parts[1]))
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_GenerateToken_NoAuth(t *testing.T) {
|
||||
h, cleanup := newTestHandlerWithToken(t, "secret-token")
|
||||
defer cleanup()
|
||||
|
||||
body, _ := json.Marshal(TokenRequest{
|
||||
Organization: "org",
|
||||
Repository: "repo",
|
||||
Workflow: "ci.yml",
|
||||
Job: "build",
|
||||
})
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/token", bytes.NewReader(body))
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
s := newTestServer(h)
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusUnauthorized {
|
||||
t.Errorf("status = %d, want %d", rec.Code, http.StatusUnauthorized)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_GenerateToken_MissingFields(t *testing.T) {
|
||||
h, cleanup := newTestHandlerWithToken(t, "secret-token")
|
||||
defer cleanup()
|
||||
|
||||
// Missing job field
|
||||
body, _ := json.Marshal(TokenRequest{
|
||||
Organization: "org",
|
||||
Repository: "repo",
|
||||
Workflow: "ci.yml",
|
||||
})
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/token", bytes.NewReader(body))
|
||||
req.Header.Set("Authorization", "Bearer secret-token")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
s := newTestServer(h)
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusBadRequest {
|
||||
t.Errorf("status = %d, want %d", rec.Code, http.StatusBadRequest)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_GenerateToken_NoReadToken(t *testing.T) {
|
||||
h, cleanup := newTestHandler(t) // no readToken configured
|
||||
defer cleanup()
|
||||
|
||||
body, _ := json.Marshal(TokenRequest{
|
||||
Organization: "org",
|
||||
Repository: "repo",
|
||||
Workflow: "ci.yml",
|
||||
Job: "build",
|
||||
})
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/token", bytes.NewReader(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
s := newTestServer(h)
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
// With no read token, the middleware rejects before we reach the handler
|
||||
if rec.Code != http.StatusUnauthorized {
|
||||
t.Errorf("status = %d, want %d", rec.Code, http.StatusUnauthorized)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_ReceiveMetrics_WithPushToken(t *testing.T) {
|
||||
readToken := "secret-token"
|
||||
h, cleanup := newTestHandlerWithToken(t, readToken)
|
||||
defer cleanup()
|
||||
|
||||
s := newTestServer(h)
|
||||
|
||||
exec := ExecutionContext{
|
||||
Organization: "org",
|
||||
Repository: "repo",
|
||||
Workflow: "ci.yml",
|
||||
Job: "build",
|
||||
RunID: "run-1",
|
||||
}
|
||||
|
||||
validToken := GenerateToken(readToken, exec.Organization, exec.Repository, exec.Workflow, exec.Job)
|
||||
wrongScopeToken := GenerateToken(readToken, "other-org", "repo", "ci.yml", "build")
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
authHeader string
|
||||
wantCode int
|
||||
}{
|
||||
{"no auth", "", http.StatusUnauthorized},
|
||||
{"wrong token", "Bearer wrong-token", http.StatusUnauthorized},
|
||||
{"wrong scope", "Bearer " + wrongScopeToken, http.StatusUnauthorized},
|
||||
{"valid token", "Bearer " + validToken, http.StatusCreated},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
payload := MetricsPayload{
|
||||
Execution: exec,
|
||||
Summary: summary.RunSummary{SampleCount: 1},
|
||||
}
|
||||
body, _ := json.Marshal(payload)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/metrics", bytes.NewReader(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
if tt.authHeader != "" {
|
||||
req.Header.Set("Authorization", tt.authHeader)
|
||||
}
|
||||
rec := httptest.NewRecorder()
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != tt.wantCode {
|
||||
t.Errorf("status = %d, want %d", rec.Code, tt.wantCode)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_ReceiveMetrics_RejectsWhenNoReadToken(t *testing.T) {
|
||||
h, cleanup := newTestHandlerWithToken(t, "") // no readToken configured
|
||||
defer cleanup()
|
||||
|
||||
payload := MetricsPayload{
|
||||
Execution: ExecutionContext{
|
||||
Organization: "org",
|
||||
Repository: "repo",
|
||||
Workflow: "ci.yml",
|
||||
Job: "build",
|
||||
RunID: "run-1",
|
||||
},
|
||||
Summary: summary.RunSummary{SampleCount: 1},
|
||||
}
|
||||
body, _ := json.Marshal(payload)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/v1/metrics", bytes.NewReader(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
s := newTestServer(h)
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusUnauthorized {
|
||||
t.Errorf("status = %d, want %d", rec.Code, http.StatusUnauthorized)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_GetByWorkflowJob_RejectsWhenNoReadToken(t *testing.T) {
|
||||
h, cleanup := newTestHandlerWithToken(t, "") // no readToken configured
|
||||
defer cleanup()
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/metrics/repo/org/repo/ci.yml/build", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
s := newTestServer(h)
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusUnauthorized {
|
||||
t.Errorf("status = %d, want %d", rec.Code, http.StatusUnauthorized)
|
||||
}
|
||||
}
|
||||
|
||||
func newTestServer(h *Handler) *fuego.Server {
|
||||
s := fuego.NewServer(
|
||||
fuego.WithoutStartupMessages(),
|
||||
fuego.WithEngineOptions(
|
||||
fuego.WithOpenAPIConfig(fuego.OpenAPIConfig{
|
||||
Disabled: true,
|
||||
}),
|
||||
),
|
||||
)
|
||||
h.RegisterRoutes(s)
|
||||
return s
|
||||
}
|
||||
|
||||
func newTestHandler(t *testing.T) (*Handler, func()) {
|
||||
t.Helper()
|
||||
dbPath := filepath.Join(t.TempDir(), "test.db")
|
||||
store, err := NewStore(dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("NewStore() error = %v", err)
|
||||
}
|
||||
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
handler := NewHandler(store, logger, "", "", 0) // no auth — endpoints will reject
|
||||
|
||||
return handler, func() { _ = store.Close() }
|
||||
}
|
||||
|
||||
func newTestHandlerWithToken(t *testing.T, readToken string) (*Handler, func()) {
|
||||
t.Helper()
|
||||
return newTestHandlerWithKeys(t, readToken, readToken)
|
||||
}
|
||||
|
||||
func newTestHandlerWithKeys(t *testing.T, readToken, hmacKey string) (*Handler, func()) {
|
||||
t.Helper()
|
||||
dbPath := filepath.Join(t.TempDir(), "test.db")
|
||||
store, err := NewStore(dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("NewStore() error = %v", err)
|
||||
}
|
||||
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
handler := NewHandler(store, logger, readToken, hmacKey, 0) // 0 uses DefaultTokenTTL
|
||||
|
||||
return handler, func() { _ = store.Close() }
|
||||
}
|
||||
221
internal/receiver/sizing.go
Normal file
221
internal/receiver/sizing.go
Normal file
|
|
@ -0,0 +1,221 @@
|
|||
// ABOUTME: Computes ideal container sizes from historical run data.
|
||||
// ABOUTME: Provides Kubernetes-style resource sizes.
|
||||
package receiver
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/summary"
|
||||
)
|
||||
|
||||
// ResourceSize holds Kubernetes-formatted resource values
|
||||
type ResourceSize struct {
|
||||
Request string `json:"request"`
|
||||
Limit string `json:"limit"`
|
||||
}
|
||||
|
||||
// ContainerSizing holds computed sizing for a single container
|
||||
type ContainerSizing struct {
|
||||
Name string `json:"name"`
|
||||
CPU ResourceSize `json:"cpu"`
|
||||
Memory ResourceSize `json:"memory"`
|
||||
}
|
||||
|
||||
// SizingMeta provides context about the sizing calculation
|
||||
type SizingMeta struct {
|
||||
RunsAnalyzed int `json:"runs_analyzed"`
|
||||
BufferPercent int `json:"buffer_percent"`
|
||||
CPUPercentile string `json:"cpu_percentile"`
|
||||
}
|
||||
|
||||
// SizingResponse is the API response for the sizing endpoint
|
||||
type SizingResponse struct {
|
||||
Containers []ContainerSizing `json:"containers"`
|
||||
Total struct {
|
||||
CPU ResourceSize `json:"cpu"`
|
||||
Memory ResourceSize `json:"memory"`
|
||||
} `json:"total"`
|
||||
Meta SizingMeta `json:"meta"`
|
||||
}
|
||||
|
||||
// validPercentiles lists the allowed percentile values
|
||||
var validPercentiles = map[string]bool{
|
||||
"peak": true,
|
||||
"p99": true,
|
||||
"p95": true,
|
||||
"p75": true,
|
||||
"p50": true,
|
||||
"avg": true,
|
||||
}
|
||||
|
||||
// IsValidPercentile checks if the given percentile string is valid
|
||||
func IsValidPercentile(p string) bool {
|
||||
return validPercentiles[p]
|
||||
}
|
||||
|
||||
// selectCPUValue extracts the appropriate value from StatSummary based on percentile
|
||||
func selectCPUValue(stats summary.StatSummary, percentile string) float64 {
|
||||
switch percentile {
|
||||
case "peak":
|
||||
return stats.Peak
|
||||
case "p99":
|
||||
return stats.P99
|
||||
case "p95":
|
||||
return stats.P95
|
||||
case "p75":
|
||||
return stats.P75
|
||||
case "p50":
|
||||
return stats.P50
|
||||
case "avg":
|
||||
return stats.Avg
|
||||
default:
|
||||
return stats.P95 // default to p95
|
||||
}
|
||||
}
|
||||
|
||||
// formatMemoryK8s converts bytes to Kubernetes memory format (Mi)
|
||||
func formatMemoryK8s(bytes float64) string {
|
||||
const Mi = 1024 * 1024
|
||||
return fmt.Sprintf("%.0fMi", math.Ceil(bytes/Mi))
|
||||
}
|
||||
|
||||
// formatCPUK8s converts cores to Kubernetes CPU format (millicores or whole cores)
|
||||
func formatCPUK8s(cores float64) string {
|
||||
millicores := cores * 1000
|
||||
if millicores >= 1000 && math.Mod(millicores, 1000) == 0 {
|
||||
return fmt.Sprintf("%.0f", cores)
|
||||
}
|
||||
return fmt.Sprintf("%.0fm", math.Ceil(millicores))
|
||||
}
|
||||
|
||||
// roundUpMemoryLimit rounds bytes up to the next power of 2 in Mi
|
||||
func roundUpMemoryLimit(bytes float64) float64 {
|
||||
const Mi = 1024 * 1024
|
||||
if bytes <= 0 {
|
||||
return Mi // minimum 1Mi
|
||||
}
|
||||
miValue := bytes / Mi
|
||||
if miValue <= 1 {
|
||||
return Mi // minimum 1Mi
|
||||
}
|
||||
// Find next power of 2
|
||||
power := math.Ceil(math.Log2(miValue))
|
||||
return math.Pow(2, power) * Mi
|
||||
}
|
||||
|
||||
// roundUpCPULimit rounds cores up to the next 0.5 increment
|
||||
func roundUpCPULimit(cores float64) float64 {
|
||||
if cores <= 0 {
|
||||
return 0.5 // minimum 0.5 cores
|
||||
}
|
||||
return math.Ceil(cores*2) / 2
|
||||
}
|
||||
|
||||
// containerAggregation holds accumulated stats for a single container across runs
|
||||
type containerAggregation struct {
|
||||
cpuValues []float64
|
||||
memoryPeaks []float64
|
||||
}
|
||||
|
||||
// computeSizing calculates ideal container sizes from metrics
|
||||
func computeSizing(metrics []Metric, bufferPercent int, cpuPercentile string) (*SizingResponse, error) {
|
||||
if len(metrics) == 0 {
|
||||
return nil, fmt.Errorf("no metrics provided")
|
||||
}
|
||||
|
||||
// Aggregate container stats across all runs
|
||||
containerStats := make(map[string]*containerAggregation)
|
||||
|
||||
for _, m := range metrics {
|
||||
var runSummary summary.RunSummary
|
||||
if err := json.Unmarshal([]byte(m.Payload), &runSummary); err != nil {
|
||||
continue // skip invalid payloads
|
||||
}
|
||||
|
||||
for _, c := range runSummary.Containers {
|
||||
if _, exists := containerStats[c.Name]; !exists {
|
||||
containerStats[c.Name] = &containerAggregation{
|
||||
cpuValues: make([]float64, 0),
|
||||
memoryPeaks: make([]float64, 0),
|
||||
}
|
||||
}
|
||||
agg := containerStats[c.Name]
|
||||
agg.cpuValues = append(agg.cpuValues, selectCPUValue(c.CPUCores, cpuPercentile))
|
||||
agg.memoryPeaks = append(agg.memoryPeaks, c.MemoryBytes.Peak)
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate sizing for each container
|
||||
bufferMultiplier := 1.0 + float64(bufferPercent)/100.0
|
||||
var containers []ContainerSizing
|
||||
var totalCPU, totalMemory float64
|
||||
|
||||
// Sort container names for consistent output
|
||||
names := make([]string, 0, len(containerStats))
|
||||
for name := range containerStats {
|
||||
names = append(names, name)
|
||||
}
|
||||
sort.Strings(names)
|
||||
|
||||
for _, name := range names {
|
||||
agg := containerStats[name]
|
||||
|
||||
// CPU: max of selected percentile values across runs
|
||||
maxCPU := 0.0
|
||||
for _, v := range agg.cpuValues {
|
||||
if v > maxCPU {
|
||||
maxCPU = v
|
||||
}
|
||||
}
|
||||
|
||||
// Memory: peak of peaks
|
||||
maxMemory := 0.0
|
||||
for _, v := range agg.memoryPeaks {
|
||||
if v > maxMemory {
|
||||
maxMemory = v
|
||||
}
|
||||
}
|
||||
|
||||
// Apply buffer
|
||||
cpuWithBuffer := maxCPU * bufferMultiplier
|
||||
memoryWithBuffer := maxMemory * bufferMultiplier
|
||||
|
||||
containers = append(containers, ContainerSizing{
|
||||
Name: name,
|
||||
CPU: ResourceSize{
|
||||
Request: formatCPUK8s(cpuWithBuffer),
|
||||
Limit: formatCPUK8s(roundUpCPULimit(cpuWithBuffer)),
|
||||
},
|
||||
Memory: ResourceSize{
|
||||
Request: formatMemoryK8s(memoryWithBuffer),
|
||||
Limit: formatMemoryK8s(roundUpMemoryLimit(memoryWithBuffer)),
|
||||
},
|
||||
})
|
||||
|
||||
totalCPU += cpuWithBuffer
|
||||
totalMemory += memoryWithBuffer
|
||||
}
|
||||
|
||||
response := &SizingResponse{
|
||||
Containers: containers,
|
||||
Meta: SizingMeta{
|
||||
RunsAnalyzed: len(metrics),
|
||||
BufferPercent: bufferPercent,
|
||||
CPUPercentile: cpuPercentile,
|
||||
},
|
||||
}
|
||||
|
||||
response.Total.CPU = ResourceSize{
|
||||
Request: formatCPUK8s(totalCPU),
|
||||
Limit: formatCPUK8s(roundUpCPULimit(totalCPU)),
|
||||
}
|
||||
response.Total.Memory = ResourceSize{
|
||||
Request: formatMemoryK8s(totalMemory),
|
||||
Limit: formatMemoryK8s(roundUpMemoryLimit(totalMemory)),
|
||||
}
|
||||
|
||||
return response, nil
|
||||
}
|
||||
489
internal/receiver/sizing_test.go
Normal file
489
internal/receiver/sizing_test.go
Normal file
|
|
@ -0,0 +1,489 @@
|
|||
package receiver
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/summary"
|
||||
)
|
||||
|
||||
func TestFormatMemoryK8s(t *testing.T) {
|
||||
tests := []struct {
|
||||
bytes float64
|
||||
want string
|
||||
}{
|
||||
{0, "0Mi"},
|
||||
{1024 * 1024, "1Mi"},
|
||||
{256 * 1024 * 1024, "256Mi"},
|
||||
{512 * 1024 * 1024, "512Mi"},
|
||||
{1024 * 1024 * 1024, "1024Mi"},
|
||||
{2 * 1024 * 1024 * 1024, "2048Mi"},
|
||||
{1.5 * 1024 * 1024 * 1024, "1536Mi"},
|
||||
{100 * 1024 * 1024, "100Mi"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
got := formatMemoryK8s(tt.bytes)
|
||||
if got != tt.want {
|
||||
t.Errorf("formatMemoryK8s(%v) = %q, want %q", tt.bytes, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormatCPUK8s(t *testing.T) {
|
||||
tests := []struct {
|
||||
cores float64
|
||||
want string
|
||||
}{
|
||||
{0, "0m"},
|
||||
{0.1, "100m"},
|
||||
{0.5, "500m"},
|
||||
{1.0, "1"},
|
||||
{1.5, "1500m"},
|
||||
{2.0, "2"},
|
||||
{2.5, "2500m"},
|
||||
{0.123, "123m"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
got := formatCPUK8s(tt.cores)
|
||||
if got != tt.want {
|
||||
t.Errorf("formatCPUK8s(%v) = %q, want %q", tt.cores, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRoundUpMemoryLimit(t *testing.T) {
|
||||
Mi := float64(1024 * 1024)
|
||||
tests := []struct {
|
||||
bytes float64
|
||||
want float64
|
||||
}{
|
||||
{0, Mi}, // minimum 1Mi
|
||||
{100, Mi}, // rounds up to 1Mi
|
||||
{Mi, Mi}, // exactly 1Mi stays 1Mi
|
||||
{1.5 * Mi, 2 * Mi},
|
||||
{200 * Mi, 256 * Mi},
|
||||
{300 * Mi, 512 * Mi},
|
||||
{600 * Mi, 1024 * Mi},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
got := roundUpMemoryLimit(tt.bytes)
|
||||
if got != tt.want {
|
||||
t.Errorf("roundUpMemoryLimit(%v) = %v, want %v", tt.bytes, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRoundUpCPULimit(t *testing.T) {
|
||||
tests := []struct {
|
||||
cores float64
|
||||
want float64
|
||||
}{
|
||||
{0, 0.5}, // minimum 0.5
|
||||
{0.1, 0.5},
|
||||
{0.5, 0.5},
|
||||
{0.6, 1.0},
|
||||
{1.0, 1.0},
|
||||
{1.1, 1.5},
|
||||
{1.5, 1.5},
|
||||
{2.0, 2.0},
|
||||
{2.3, 2.5},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
got := roundUpCPULimit(tt.cores)
|
||||
if got != tt.want {
|
||||
t.Errorf("roundUpCPULimit(%v) = %v, want %v", tt.cores, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelectCPUValue(t *testing.T) {
|
||||
stats := summary.StatSummary{
|
||||
Peak: 10.0,
|
||||
P99: 9.0,
|
||||
P95: 8.0,
|
||||
P75: 6.0,
|
||||
P50: 5.0,
|
||||
Avg: 4.0,
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
percentile string
|
||||
want float64
|
||||
}{
|
||||
{"peak", 10.0},
|
||||
{"p99", 9.0},
|
||||
{"p95", 8.0},
|
||||
{"p75", 6.0},
|
||||
{"p50", 5.0},
|
||||
{"avg", 4.0},
|
||||
{"invalid", 8.0}, // defaults to p95
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
got := selectCPUValue(stats, tt.percentile)
|
||||
if got != tt.want {
|
||||
t.Errorf("selectCPUValue(stats, %q) = %v, want %v", tt.percentile, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsValidPercentile(t *testing.T) {
|
||||
valid := []string{"peak", "p99", "p95", "p75", "p50", "avg"}
|
||||
for _, p := range valid {
|
||||
if !IsValidPercentile(p) {
|
||||
t.Errorf("IsValidPercentile(%q) = false, want true", p)
|
||||
}
|
||||
}
|
||||
|
||||
invalid := []string{"p80", "p90", "max", ""}
|
||||
for _, p := range invalid {
|
||||
if IsValidPercentile(p) {
|
||||
t.Errorf("IsValidPercentile(%q) = true, want false", p)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeSizing_SingleRun(t *testing.T) {
|
||||
runSummary := summary.RunSummary{
|
||||
Containers: []summary.ContainerSummary{
|
||||
{
|
||||
Name: "runner",
|
||||
CPUCores: summary.StatSummary{Peak: 1.0, P99: 0.9, P95: 0.8, P75: 0.6, P50: 0.5, Avg: 0.4},
|
||||
MemoryBytes: summary.StatSummary{Peak: 512 * 1024 * 1024}, // 512Mi
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
payload, _ := json.Marshal(runSummary)
|
||||
metrics := []Metric{{Payload: string(payload)}}
|
||||
|
||||
resp, err := computeSizing(metrics, 20, "p95")
|
||||
if err != nil {
|
||||
t.Fatalf("computeSizing() error = %v", err)
|
||||
}
|
||||
|
||||
if len(resp.Containers) != 1 {
|
||||
t.Fatalf("got %d containers, want 1", len(resp.Containers))
|
||||
}
|
||||
|
||||
c := resp.Containers[0]
|
||||
if c.Name != "runner" {
|
||||
t.Errorf("container name = %q, want %q", c.Name, "runner")
|
||||
}
|
||||
|
||||
// CPU: 0.8 * 1.2 = 0.96 -> 960m request, 1 limit
|
||||
if c.CPU.Request != "960m" {
|
||||
t.Errorf("CPU request = %q, want %q", c.CPU.Request, "960m")
|
||||
}
|
||||
if c.CPU.Limit != "1" {
|
||||
t.Errorf("CPU limit = %q, want %q", c.CPU.Limit, "1")
|
||||
}
|
||||
|
||||
// Memory: 512Mi * 1.2 = 614.4Mi -> 615Mi request, 1024Mi limit
|
||||
if c.Memory.Request != "615Mi" {
|
||||
t.Errorf("Memory request = %q, want %q", c.Memory.Request, "615Mi")
|
||||
}
|
||||
if c.Memory.Limit != "1024Mi" {
|
||||
t.Errorf("Memory limit = %q, want %q", c.Memory.Limit, "1024Mi")
|
||||
}
|
||||
|
||||
if resp.Meta.RunsAnalyzed != 1 {
|
||||
t.Errorf("runs_analyzed = %d, want 1", resp.Meta.RunsAnalyzed)
|
||||
}
|
||||
if resp.Meta.BufferPercent != 20 {
|
||||
t.Errorf("buffer_percent = %d, want 20", resp.Meta.BufferPercent)
|
||||
}
|
||||
if resp.Meta.CPUPercentile != "p95" {
|
||||
t.Errorf("cpu_percentile = %q, want %q", resp.Meta.CPUPercentile, "p95")
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeSizing_MultipleRuns(t *testing.T) {
|
||||
// Run 1: lower values
|
||||
run1 := summary.RunSummary{
|
||||
Containers: []summary.ContainerSummary{
|
||||
{
|
||||
Name: "runner",
|
||||
CPUCores: summary.StatSummary{Peak: 0.5, P95: 0.4},
|
||||
MemoryBytes: summary.StatSummary{Peak: 256 * 1024 * 1024},
|
||||
},
|
||||
},
|
||||
}
|
||||
// Run 2: higher values (should be used)
|
||||
run2 := summary.RunSummary{
|
||||
Containers: []summary.ContainerSummary{
|
||||
{
|
||||
Name: "runner",
|
||||
CPUCores: summary.StatSummary{Peak: 1.0, P95: 0.8},
|
||||
MemoryBytes: summary.StatSummary{Peak: 512 * 1024 * 1024},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
payload1, _ := json.Marshal(run1)
|
||||
payload2, _ := json.Marshal(run2)
|
||||
metrics := []Metric{
|
||||
{Payload: string(payload1)},
|
||||
{Payload: string(payload2)},
|
||||
}
|
||||
|
||||
resp, err := computeSizing(metrics, 0, "p95") // no buffer for easier math
|
||||
if err != nil {
|
||||
t.Fatalf("computeSizing() error = %v", err)
|
||||
}
|
||||
|
||||
c := resp.Containers[0]
|
||||
|
||||
// CPU: max(0.4, 0.8) = 0.8
|
||||
if c.CPU.Request != "800m" {
|
||||
t.Errorf("CPU request = %q, want %q", c.CPU.Request, "800m")
|
||||
}
|
||||
|
||||
// Memory: max(256, 512) = 512Mi
|
||||
if c.Memory.Request != "512Mi" {
|
||||
t.Errorf("Memory request = %q, want %q", c.Memory.Request, "512Mi")
|
||||
}
|
||||
|
||||
if resp.Meta.RunsAnalyzed != 2 {
|
||||
t.Errorf("runs_analyzed = %d, want 2", resp.Meta.RunsAnalyzed)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeSizing_MultipleContainers(t *testing.T) {
|
||||
runSummary := summary.RunSummary{
|
||||
Containers: []summary.ContainerSummary{
|
||||
{
|
||||
Name: "runner",
|
||||
CPUCores: summary.StatSummary{P95: 1.0},
|
||||
MemoryBytes: summary.StatSummary{Peak: 512 * 1024 * 1024},
|
||||
},
|
||||
{
|
||||
Name: "dind",
|
||||
CPUCores: summary.StatSummary{P95: 0.5},
|
||||
MemoryBytes: summary.StatSummary{Peak: 256 * 1024 * 1024},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
payload, _ := json.Marshal(runSummary)
|
||||
metrics := []Metric{{Payload: string(payload)}}
|
||||
|
||||
resp, err := computeSizing(metrics, 0, "p95")
|
||||
if err != nil {
|
||||
t.Fatalf("computeSizing() error = %v", err)
|
||||
}
|
||||
|
||||
if len(resp.Containers) != 2 {
|
||||
t.Fatalf("got %d containers, want 2", len(resp.Containers))
|
||||
}
|
||||
|
||||
// Containers should be sorted alphabetically
|
||||
if resp.Containers[0].Name != "dind" {
|
||||
t.Errorf("first container = %q, want %q", resp.Containers[0].Name, "dind")
|
||||
}
|
||||
if resp.Containers[1].Name != "runner" {
|
||||
t.Errorf("second container = %q, want %q", resp.Containers[1].Name, "runner")
|
||||
}
|
||||
|
||||
// Total should be sum
|
||||
if resp.Total.CPU.Request != "1500m" {
|
||||
t.Errorf("total CPU request = %q, want %q", resp.Total.CPU.Request, "1500m")
|
||||
}
|
||||
if resp.Total.Memory.Request != "768Mi" {
|
||||
t.Errorf("total memory request = %q, want %q", resp.Total.Memory.Request, "768Mi")
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeSizing_NoMetrics(t *testing.T) {
|
||||
_, err := computeSizing([]Metric{}, 20, "p95")
|
||||
if err == nil {
|
||||
t.Error("computeSizing() with no metrics should return error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_GetSizing(t *testing.T) {
|
||||
const readToken = "test-token"
|
||||
h, cleanup := newTestHandlerWithToken(t, readToken)
|
||||
defer cleanup()
|
||||
|
||||
// Save metrics with container data
|
||||
for i := 0; i < 3; i++ {
|
||||
runSummary := summary.RunSummary{
|
||||
Containers: []summary.ContainerSummary{
|
||||
{
|
||||
Name: "runner",
|
||||
CPUCores: summary.StatSummary{Peak: 1.0, P99: 0.9, P95: 0.8, P75: 0.6, P50: 0.5, Avg: 0.4},
|
||||
MemoryBytes: summary.StatSummary{Peak: 512 * 1024 * 1024},
|
||||
},
|
||||
},
|
||||
}
|
||||
payload := &MetricsPayload{
|
||||
Execution: ExecutionContext{
|
||||
Organization: "org",
|
||||
Repository: "repo",
|
||||
Workflow: "ci.yml",
|
||||
Job: "build",
|
||||
RunID: "run-" + string(rune('1'+i)),
|
||||
},
|
||||
Summary: runSummary,
|
||||
}
|
||||
if _, err := h.store.SaveMetric(payload); err != nil {
|
||||
t.Fatalf("SaveMetric() error = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/sizing/repo/org/repo/ci.yml/build", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+readToken)
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
s := newTestServer(h)
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Errorf("status = %d, want %d", rec.Code, http.StatusOK)
|
||||
}
|
||||
|
||||
var resp SizingResponse
|
||||
if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil {
|
||||
t.Fatalf("failed to decode response: %v", err)
|
||||
}
|
||||
|
||||
if len(resp.Containers) != 1 {
|
||||
t.Errorf("got %d containers, want 1", len(resp.Containers))
|
||||
}
|
||||
if resp.Meta.RunsAnalyzed != 3 {
|
||||
t.Errorf("runs_analyzed = %d, want 3", resp.Meta.RunsAnalyzed)
|
||||
}
|
||||
if resp.Meta.BufferPercent != 20 {
|
||||
t.Errorf("buffer_percent = %d, want 20", resp.Meta.BufferPercent)
|
||||
}
|
||||
if resp.Meta.CPUPercentile != "p95" {
|
||||
t.Errorf("cpu_percentile = %q, want %q", resp.Meta.CPUPercentile, "p95")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_GetSizing_CustomParams(t *testing.T) {
|
||||
const readToken = "test-token"
|
||||
h, cleanup := newTestHandlerWithToken(t, readToken)
|
||||
defer cleanup()
|
||||
|
||||
// Save one metric
|
||||
runSummary := summary.RunSummary{
|
||||
Containers: []summary.ContainerSummary{
|
||||
{
|
||||
Name: "runner",
|
||||
CPUCores: summary.StatSummary{Peak: 1.0, P99: 0.9, P95: 0.8, P75: 0.6, P50: 0.5, Avg: 0.4},
|
||||
MemoryBytes: summary.StatSummary{Peak: 512 * 1024 * 1024},
|
||||
},
|
||||
},
|
||||
}
|
||||
payload := &MetricsPayload{
|
||||
Execution: ExecutionContext{Organization: "org", Repository: "repo", Workflow: "ci.yml", Job: "build", RunID: "run-1"},
|
||||
Summary: runSummary,
|
||||
}
|
||||
if _, err := h.store.SaveMetric(payload); err != nil {
|
||||
t.Fatalf("SaveMetric() error = %v", err)
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/sizing/repo/org/repo/ci.yml/build?runs=10&buffer=10&cpu_percentile=p75", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+readToken)
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
s := newTestServer(h)
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Errorf("status = %d, want %d", rec.Code, http.StatusOK)
|
||||
}
|
||||
|
||||
var resp SizingResponse
|
||||
if err := json.NewDecoder(rec.Body).Decode(&resp); err != nil {
|
||||
t.Fatalf("failed to decode response: %v", err)
|
||||
}
|
||||
|
||||
if resp.Meta.BufferPercent != 10 {
|
||||
t.Errorf("buffer_percent = %d, want 10", resp.Meta.BufferPercent)
|
||||
}
|
||||
if resp.Meta.CPUPercentile != "p75" {
|
||||
t.Errorf("cpu_percentile = %q, want %q", resp.Meta.CPUPercentile, "p75")
|
||||
}
|
||||
|
||||
// CPU: 0.6 * 1.1 = 0.66
|
||||
c := resp.Containers[0]
|
||||
if c.CPU.Request != "660m" {
|
||||
t.Errorf("CPU request = %q, want %q", c.CPU.Request, "660m")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_GetSizing_NotFound(t *testing.T) {
|
||||
const readToken = "test-token"
|
||||
h, cleanup := newTestHandlerWithToken(t, readToken)
|
||||
defer cleanup()
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/sizing/repo/org/repo/ci.yml/build", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+readToken)
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
s := newTestServer(h)
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusNotFound {
|
||||
t.Errorf("status = %d, want %d", rec.Code, http.StatusNotFound)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_GetSizing_InvalidPercentile(t *testing.T) {
|
||||
const readToken = "test-token"
|
||||
h, cleanup := newTestHandlerWithToken(t, readToken)
|
||||
defer cleanup()
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/sizing/repo/org/repo/ci.yml/build?cpu_percentile=p80", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+readToken)
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
s := newTestServer(h)
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusBadRequest {
|
||||
t.Errorf("status = %d, want %d", rec.Code, http.StatusBadRequest)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandler_GetSizing_AuthRequired(t *testing.T) {
|
||||
const readToken = "test-token"
|
||||
h, cleanup := newTestHandlerWithToken(t, readToken)
|
||||
defer cleanup()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
authHeader string
|
||||
wantCode int
|
||||
}{
|
||||
{"no auth", "", http.StatusUnauthorized},
|
||||
{"wrong token", "Bearer wrong-token", http.StatusUnauthorized},
|
||||
{"valid token", "Bearer " + readToken, http.StatusNotFound}, // no metrics, but auth works
|
||||
}
|
||||
|
||||
s := newTestServer(h)
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/v1/sizing/repo/org/repo/ci.yml/build", nil)
|
||||
if tt.authHeader != "" {
|
||||
req.Header.Set("Authorization", tt.authHeader)
|
||||
}
|
||||
rec := httptest.NewRecorder()
|
||||
s.Mux.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != tt.wantCode {
|
||||
t.Errorf("status = %d, want %d", rec.Code, tt.wantCode)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
123
internal/receiver/store.go
Normal file
123
internal/receiver/store.go
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
// ABOUTME: SQLite storage layer for metrics receiver using GORM.
|
||||
// ABOUTME: Handles database initialization and metric storage/retrieval.
|
||||
package receiver
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/glebarez/sqlite"
|
||||
"gorm.io/gorm"
|
||||
"gorm.io/gorm/logger"
|
||||
)
|
||||
|
||||
// Metric represents a stored metric record in the database
|
||||
type Metric struct {
|
||||
ID uint `gorm:"primaryKey"`
|
||||
Organization string `gorm:"index:idx_org_repo;not null"`
|
||||
Repository string `gorm:"index:idx_org_repo;not null"`
|
||||
Workflow string `gorm:"not null"`
|
||||
Job string `gorm:"not null"`
|
||||
RunID string `gorm:"index;not null"`
|
||||
ReceivedAt time.Time `gorm:"index;not null"`
|
||||
Payload string `gorm:"type:text;not null"` // JSON-encoded RunSummary
|
||||
}
|
||||
|
||||
// MetricResponse is the API response type with Payload as embedded JSON object
|
||||
type MetricResponse struct {
|
||||
ID uint `json:"id"`
|
||||
Organization string `json:"organization"`
|
||||
Repository string `json:"repository"`
|
||||
Workflow string `json:"workflow"`
|
||||
Job string `json:"job"`
|
||||
RunID string `json:"run_id"`
|
||||
ReceivedAt time.Time `json:"received_at"`
|
||||
Payload json.RawMessage `json:"payload"`
|
||||
}
|
||||
|
||||
// ToResponse converts a Metric to a MetricResponse with Payload as JSON object
|
||||
func (m *Metric) ToResponse() MetricResponse {
|
||||
return MetricResponse{
|
||||
ID: m.ID,
|
||||
Organization: m.Organization,
|
||||
Repository: m.Repository,
|
||||
Workflow: m.Workflow,
|
||||
Job: m.Job,
|
||||
RunID: m.RunID,
|
||||
ReceivedAt: m.ReceivedAt,
|
||||
Payload: json.RawMessage(m.Payload),
|
||||
}
|
||||
}
|
||||
|
||||
// Store handles SQLite storage for metrics using GORM
|
||||
type Store struct {
|
||||
db *gorm.DB
|
||||
}
|
||||
|
||||
// NewStore creates a new SQLite store at the given path
|
||||
func NewStore(dbPath string) (*Store, error) {
|
||||
db, err := gorm.Open(sqlite.Open(dbPath), &gorm.Config{
|
||||
Logger: logger.Default.LogMode(logger.Silent),
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening database: %w", err)
|
||||
}
|
||||
|
||||
if err := db.AutoMigrate(&Metric{}); err != nil {
|
||||
return nil, fmt.Errorf("migrating schema: %w", err)
|
||||
}
|
||||
|
||||
return &Store{db: db}, nil
|
||||
}
|
||||
|
||||
// SaveMetric stores a metrics payload in the database
|
||||
func (s *Store) SaveMetric(payload *MetricsPayload) (uint, error) {
|
||||
summaryJSON, err := json.Marshal(payload.Summary)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("marshaling summary: %w", err)
|
||||
}
|
||||
|
||||
metric := Metric{
|
||||
Organization: payload.Execution.Organization,
|
||||
Repository: payload.Execution.Repository,
|
||||
Workflow: payload.Execution.Workflow,
|
||||
Job: payload.Execution.Job,
|
||||
RunID: payload.Execution.RunID,
|
||||
ReceivedAt: time.Now().UTC(),
|
||||
Payload: string(summaryJSON),
|
||||
}
|
||||
|
||||
result := s.db.Create(&metric)
|
||||
if result.Error != nil {
|
||||
return 0, fmt.Errorf("inserting metric: %w", result.Error)
|
||||
}
|
||||
|
||||
return metric.ID, nil
|
||||
}
|
||||
|
||||
// GetMetricsByWorkflowJob retrieves all metrics for a specific workflow and job
|
||||
func (s *Store) GetMetricsByWorkflowJob(org, repo, workflow, job string) ([]Metric, error) {
|
||||
var metrics []Metric
|
||||
result := s.db.Where("organization = ? AND repository = ? AND workflow = ? AND job = ?", org, repo, workflow, job).Order("received_at DESC").Find(&metrics)
|
||||
return metrics, result.Error
|
||||
}
|
||||
|
||||
// GetRecentMetricsByWorkflowJob retrieves the last N metrics ordered by received_at DESC
|
||||
func (s *Store) GetRecentMetricsByWorkflowJob(org, repo, workflow, job string, limit int) ([]Metric, error) {
|
||||
var metrics []Metric
|
||||
result := s.db.Where(
|
||||
"organization = ? AND repository = ? AND workflow = ? AND job = ?",
|
||||
org, repo, workflow, job,
|
||||
).Order("received_at DESC").Limit(limit).Find(&metrics)
|
||||
return metrics, result.Error
|
||||
}
|
||||
|
||||
// Close closes the database connection
|
||||
func (s *Store) Close() error {
|
||||
sqlDB, err := s.db.DB()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return sqlDB.Close()
|
||||
}
|
||||
178
internal/receiver/store_test.go
Normal file
178
internal/receiver/store_test.go
Normal file
|
|
@ -0,0 +1,178 @@
|
|||
package receiver
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/summary"
|
||||
)
|
||||
|
||||
func TestNewStore(t *testing.T) {
|
||||
dbPath := filepath.Join(t.TempDir(), "test.db")
|
||||
|
||||
store, err := NewStore(dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("NewStore() error = %v", err)
|
||||
}
|
||||
defer func() { _ = store.Close() }()
|
||||
|
||||
if _, err := os.Stat(dbPath); os.IsNotExist(err) {
|
||||
t.Error("database file was not created")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_SaveMetric(t *testing.T) {
|
||||
store := newTestStore(t)
|
||||
defer func() { _ = store.Close() }()
|
||||
|
||||
payload := &MetricsPayload{
|
||||
Execution: ExecutionContext{
|
||||
Organization: "test-org",
|
||||
Repository: "test-repo",
|
||||
Workflow: "ci.yml",
|
||||
Job: "build",
|
||||
RunID: "run-123",
|
||||
},
|
||||
Summary: summary.RunSummary{
|
||||
StartTime: time.Now().Add(-time.Minute),
|
||||
EndTime: time.Now(),
|
||||
DurationSeconds: 60.0,
|
||||
SampleCount: 12,
|
||||
CPUTotal: summary.StatSummary{Peak: 80.5, Avg: 45.2, P95: 75.0},
|
||||
MemUsedBytes: summary.StatSummary{Peak: 1024000, Avg: 512000, P95: 900000},
|
||||
MemUsedPercent: summary.StatSummary{Peak: 50.0, Avg: 25.0, P95: 45.0},
|
||||
},
|
||||
}
|
||||
|
||||
id, err := store.SaveMetric(payload)
|
||||
if err != nil {
|
||||
t.Fatalf("SaveMetric() error = %v", err)
|
||||
}
|
||||
if id == 0 {
|
||||
t.Error("SaveMetric() returned id = 0, want non-zero")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_GetMetricsByWorkflowJob(t *testing.T) {
|
||||
store := newTestStore(t)
|
||||
defer func() { _ = store.Close() }()
|
||||
|
||||
// Save metrics for different workflow/job combinations
|
||||
payloads := []struct {
|
||||
org string
|
||||
repo string
|
||||
workflow string
|
||||
job string
|
||||
}{
|
||||
{"org-a", "repo-1", "ci.yml", "build"},
|
||||
{"org-a", "repo-1", "ci.yml", "build"},
|
||||
{"org-a", "repo-1", "ci.yml", "test"},
|
||||
{"org-a", "repo-1", "deploy.yml", "build"},
|
||||
}
|
||||
|
||||
for i, p := range payloads {
|
||||
payload := &MetricsPayload{
|
||||
Execution: ExecutionContext{
|
||||
Organization: p.org,
|
||||
Repository: p.repo,
|
||||
Workflow: p.workflow,
|
||||
Job: p.job,
|
||||
RunID: "run-" + string(rune('a'+i)),
|
||||
},
|
||||
Summary: summary.RunSummary{},
|
||||
}
|
||||
if _, err := store.SaveMetric(payload); err != nil {
|
||||
t.Fatalf("SaveMetric() error = %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
metrics, err := store.GetMetricsByWorkflowJob("org-a", "repo-1", "ci.yml", "build")
|
||||
if err != nil {
|
||||
t.Fatalf("GetMetricsByWorkflowJob() error = %v", err)
|
||||
}
|
||||
if len(metrics) != 2 {
|
||||
t.Errorf("GetMetricsByWorkflowJob() returned %d metrics, want 2", len(metrics))
|
||||
}
|
||||
|
||||
for _, m := range metrics {
|
||||
if m.Organization != "org-a" || m.Repository != "repo-1" || m.Workflow != "ci.yml" || m.Job != "build" {
|
||||
t.Errorf("GetMetricsByWorkflowJob() returned metric with org=%q repo=%q workflow=%q job=%q, want org-a/repo-1/ci.yml/build",
|
||||
m.Organization, m.Repository, m.Workflow, m.Job)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_GetMetricsByWorkflowJob_NotFound(t *testing.T) {
|
||||
store := newTestStore(t)
|
||||
defer func() { _ = store.Close() }()
|
||||
|
||||
metrics, err := store.GetMetricsByWorkflowJob("nonexistent", "repo", "workflow", "job")
|
||||
if err != nil {
|
||||
t.Fatalf("GetMetricsByWorkflowJob() error = %v", err)
|
||||
}
|
||||
if len(metrics) != 0 {
|
||||
t.Errorf("GetMetricsByWorkflowJob() returned %d metrics, want 0", len(metrics))
|
||||
}
|
||||
}
|
||||
|
||||
func TestStore_SaveMetric_PreservesPayload(t *testing.T) {
|
||||
store := newTestStore(t)
|
||||
defer func() { _ = store.Close() }()
|
||||
|
||||
original := &MetricsPayload{
|
||||
Execution: ExecutionContext{
|
||||
Organization: "test-org",
|
||||
Repository: "test-repo",
|
||||
Workflow: "build.yml",
|
||||
Job: "test",
|
||||
RunID: "run-preserve",
|
||||
},
|
||||
Summary: summary.RunSummary{
|
||||
DurationSeconds: 123.45,
|
||||
SampleCount: 50,
|
||||
CPUTotal: summary.StatSummary{Peak: 99.9, Avg: 55.5, P95: 88.8},
|
||||
},
|
||||
}
|
||||
|
||||
_, err := store.SaveMetric(original)
|
||||
if err != nil {
|
||||
t.Fatalf("SaveMetric() error = %v", err)
|
||||
}
|
||||
|
||||
metrics, err := store.GetMetricsByWorkflowJob("test-org", "test-repo", "build.yml", "test")
|
||||
if err != nil {
|
||||
t.Fatalf("GetMetricsByWorkflowJob() error = %v", err)
|
||||
}
|
||||
if len(metrics) != 1 {
|
||||
t.Fatalf("GetMetricsByWorkflowJob() returned %d metrics, want 1", len(metrics))
|
||||
}
|
||||
|
||||
m := metrics[0]
|
||||
if m.Organization != original.Execution.Organization {
|
||||
t.Errorf("Organization = %q, want %q", m.Organization, original.Execution.Organization)
|
||||
}
|
||||
if m.Repository != original.Execution.Repository {
|
||||
t.Errorf("Repository = %q, want %q", m.Repository, original.Execution.Repository)
|
||||
}
|
||||
if m.Workflow != original.Execution.Workflow {
|
||||
t.Errorf("Workflow = %q, want %q", m.Workflow, original.Execution.Workflow)
|
||||
}
|
||||
if m.Job != original.Execution.Job {
|
||||
t.Errorf("Job = %q, want %q", m.Job, original.Execution.Job)
|
||||
}
|
||||
if m.Payload == "" {
|
||||
t.Error("Payload is empty")
|
||||
}
|
||||
}
|
||||
|
||||
func newTestStore(t *testing.T) *Store {
|
||||
t.Helper()
|
||||
dbPath := filepath.Join(t.TempDir(), "test.db")
|
||||
store, err := NewStore(dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("NewStore() error = %v", err)
|
||||
}
|
||||
return store
|
||||
}
|
||||
77
internal/receiver/token.go
Normal file
77
internal/receiver/token.go
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
// ABOUTME: HMAC-SHA256 token generation and validation for scoped push authentication.
|
||||
// ABOUTME: Tokens are derived from a key + scope + timestamp, enabling stateless validation with expiration.
|
||||
package receiver
|
||||
|
||||
import (
|
||||
"crypto/hmac"
|
||||
"crypto/sha256"
|
||||
"crypto/subtle"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// DefaultTokenTTL is the default time-to-live for push tokens.
|
||||
const DefaultTokenTTL = 2 * time.Hour
|
||||
|
||||
// GenerateToken creates a token with embedded timestamp for expiration support.
|
||||
// Format: "<unix_timestamp>:<hmac_hex>"
|
||||
func GenerateToken(key, org, repo, workflow, job string) string {
|
||||
return GenerateTokenAt(key, org, repo, workflow, job, time.Now())
|
||||
}
|
||||
|
||||
// GenerateTokenAt creates a token with the specified timestamp.
|
||||
// The HMAC input is "v1\x00<org>\x00<repo>\x00<workflow>\x00<job>\x00<timestamp>".
|
||||
func GenerateTokenAt(key, org, repo, workflow, job string, timestamp time.Time) string {
|
||||
ts := strconv.FormatInt(timestamp.Unix(), 10)
|
||||
mac := hmac.New(sha256.New, []byte(key))
|
||||
mac.Write([]byte("v1\x00" + org + "\x00" + repo + "\x00" + workflow + "\x00" + job + "\x00" + ts))
|
||||
return ts + ":" + hex.EncodeToString(mac.Sum(nil))
|
||||
}
|
||||
|
||||
// ValidateToken validates a token and checks expiration.
|
||||
// Returns true if the token is valid and not expired.
|
||||
func ValidateToken(key, token, org, repo, workflow, job string, ttl time.Duration) bool {
|
||||
return ValidateTokenAt(key, token, org, repo, workflow, job, ttl, time.Now())
|
||||
}
|
||||
|
||||
// ValidateTokenAt validates a token against a specific reference time.
|
||||
func ValidateTokenAt(key, token, org, repo, workflow, job string, ttl time.Duration, now time.Time) bool {
|
||||
parts := strings.SplitN(token, ":", 2)
|
||||
if len(parts) != 2 {
|
||||
return false
|
||||
}
|
||||
|
||||
tsStr, hmacHex := parts[0], parts[1]
|
||||
ts, err := strconv.ParseInt(tsStr, 10, 64)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
tokenTime := time.Unix(ts, 0)
|
||||
if now.Sub(tokenTime) > ttl {
|
||||
return false
|
||||
}
|
||||
|
||||
// Recompute expected HMAC
|
||||
mac := hmac.New(sha256.New, []byte(key))
|
||||
mac.Write([]byte("v1\x00" + org + "\x00" + repo + "\x00" + workflow + "\x00" + job + "\x00" + tsStr))
|
||||
expected := hex.EncodeToString(mac.Sum(nil))
|
||||
|
||||
return subtle.ConstantTimeCompare([]byte(hmacHex), []byte(expected)) == 1
|
||||
}
|
||||
|
||||
// ParseTokenTimestamp extracts the timestamp from a timestamped token without validating it.
|
||||
func ParseTokenTimestamp(token string) (time.Time, error) {
|
||||
parts := strings.SplitN(token, ":", 2)
|
||||
if len(parts) != 2 {
|
||||
return time.Time{}, fmt.Errorf("invalid token format")
|
||||
}
|
||||
ts, err := strconv.ParseInt(parts[0], 10, 64)
|
||||
if err != nil {
|
||||
return time.Time{}, fmt.Errorf("invalid timestamp: %w", err)
|
||||
}
|
||||
return time.Unix(ts, 0), nil
|
||||
}
|
||||
180
internal/receiver/token_test.go
Normal file
180
internal/receiver/token_test.go
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
package receiver
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestGenerateToken_Format(t *testing.T) {
|
||||
token := GenerateToken("key", "org", "repo", "wf", "job")
|
||||
parts := strings.SplitN(token, ":", 2)
|
||||
if len(parts) != 2 {
|
||||
t.Fatalf("token should have format 'timestamp:hmac', got %q", token)
|
||||
}
|
||||
if len(parts[1]) != 64 {
|
||||
t.Errorf("HMAC part length = %d, want 64", len(parts[1]))
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateTokenAt_Deterministic(t *testing.T) {
|
||||
ts := time.Unix(1700000000, 0)
|
||||
token1 := GenerateTokenAt("key", "org", "repo", "wf", "job", ts)
|
||||
token2 := GenerateTokenAt("key", "org", "repo", "wf", "job", ts)
|
||||
if token1 != token2 {
|
||||
t.Errorf("tokens differ: %q vs %q", token1, token2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateTokenAt_ScopePinning(t *testing.T) {
|
||||
ts := time.Unix(1700000000, 0)
|
||||
base := GenerateTokenAt("key", "org", "repo", "wf", "job", ts)
|
||||
|
||||
variants := []struct {
|
||||
name string
|
||||
org string
|
||||
repo string
|
||||
wf string
|
||||
job string
|
||||
}{
|
||||
{"different org", "other-org", "repo", "wf", "job"},
|
||||
{"different repo", "org", "other-repo", "wf", "job"},
|
||||
{"different workflow", "org", "repo", "other-wf", "job"},
|
||||
{"different job", "org", "repo", "wf", "other-job"},
|
||||
}
|
||||
|
||||
for _, v := range variants {
|
||||
t.Run(v.name, func(t *testing.T) {
|
||||
token := GenerateTokenAt("key", v.org, v.repo, v.wf, v.job, ts)
|
||||
if token == base {
|
||||
t.Errorf("token for %s should differ from base", v.name)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateTokenAt_DifferentKeys(t *testing.T) {
|
||||
ts := time.Unix(1700000000, 0)
|
||||
token1 := GenerateTokenAt("key-a", "org", "repo", "wf", "job", ts)
|
||||
token2 := GenerateTokenAt("key-b", "org", "repo", "wf", "job", ts)
|
||||
if token1 == token2 {
|
||||
t.Error("different keys should produce different tokens")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateTokenAt_DifferentTimestamps(t *testing.T) {
|
||||
ts1 := time.Unix(1700000000, 0)
|
||||
ts2 := time.Unix(1700000001, 0)
|
||||
token1 := GenerateTokenAt("key", "org", "repo", "wf", "job", ts1)
|
||||
token2 := GenerateTokenAt("key", "org", "repo", "wf", "job", ts2)
|
||||
if token1 == token2 {
|
||||
t.Error("different timestamps should produce different tokens")
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateToken_Correct(t *testing.T) {
|
||||
ts := time.Now()
|
||||
token := GenerateTokenAt("key", "org", "repo", "wf", "job", ts)
|
||||
if !ValidateToken("key", token, "org", "repo", "wf", "job", 5*time.Minute) {
|
||||
t.Error("ValidateToken should accept correct token")
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateToken_WrongToken(t *testing.T) {
|
||||
if ValidateToken("key", "12345:deadbeef", "org", "repo", "wf", "job", 5*time.Minute) {
|
||||
t.Error("ValidateToken should reject wrong token")
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateToken_WrongScope(t *testing.T) {
|
||||
ts := time.Now()
|
||||
token := GenerateTokenAt("key", "org", "repo", "wf", "job", ts)
|
||||
if ValidateToken("key", token, "org", "repo", "wf", "other-job", 5*time.Minute) {
|
||||
t.Error("ValidateToken should reject token for different scope")
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateToken_Expired(t *testing.T) {
|
||||
ts := time.Now().Add(-10 * time.Minute)
|
||||
token := GenerateTokenAt("key", "org", "repo", "wf", "job", ts)
|
||||
if ValidateToken("key", token, "org", "repo", "wf", "job", 5*time.Minute) {
|
||||
t.Error("ValidateToken should reject expired token")
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateTokenAt_NotExpired(t *testing.T) {
|
||||
tokenTime := time.Unix(1700000000, 0)
|
||||
token := GenerateTokenAt("key", "org", "repo", "wf", "job", tokenTime)
|
||||
|
||||
// Validate at 4 minutes later (within 5 minute TTL)
|
||||
now := tokenTime.Add(4 * time.Minute)
|
||||
if !ValidateTokenAt("key", token, "org", "repo", "wf", "job", 5*time.Minute, now) {
|
||||
t.Error("ValidateTokenAt should accept token within TTL")
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateTokenAt_JustExpired(t *testing.T) {
|
||||
tokenTime := time.Unix(1700000000, 0)
|
||||
token := GenerateTokenAt("key", "org", "repo", "wf", "job", tokenTime)
|
||||
|
||||
// Validate at 6 minutes later (beyond 5 minute TTL)
|
||||
now := tokenTime.Add(6 * time.Minute)
|
||||
if ValidateTokenAt("key", token, "org", "repo", "wf", "job", 5*time.Minute, now) {
|
||||
t.Error("ValidateTokenAt should reject token beyond TTL")
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateToken_InvalidFormat(t *testing.T) {
|
||||
if ValidateToken("key", "no-colon-here", "org", "repo", "wf", "job", 5*time.Minute) {
|
||||
t.Error("ValidateToken should reject token without colon")
|
||||
}
|
||||
if ValidateToken("key", "not-a-number:abc123", "org", "repo", "wf", "job", 5*time.Minute) {
|
||||
t.Error("ValidateToken should reject token with invalid timestamp")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseTokenTimestamp(t *testing.T) {
|
||||
ts := time.Unix(1700000000, 0)
|
||||
token := GenerateTokenAt("key", "org", "repo", "wf", "job", ts)
|
||||
|
||||
parsed, err := ParseTokenTimestamp(token)
|
||||
if err != nil {
|
||||
t.Fatalf("ParseTokenTimestamp failed: %v", err)
|
||||
}
|
||||
if !parsed.Equal(ts) {
|
||||
t.Errorf("parsed timestamp = %v, want %v", parsed, ts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseTokenTimestamp_Invalid(t *testing.T) {
|
||||
_, err := ParseTokenTimestamp("no-colon")
|
||||
if err == nil {
|
||||
t.Error("ParseTokenTimestamp should fail on missing colon")
|
||||
}
|
||||
|
||||
_, err = ParseTokenTimestamp("not-a-number:abc123")
|
||||
if err == nil {
|
||||
t.Error("ParseTokenTimestamp should fail on invalid timestamp")
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateToken_TamperedTimestamp(t *testing.T) {
|
||||
// Generate a valid token
|
||||
ts := time.Now()
|
||||
token := GenerateTokenAt("key", "org", "repo", "wf", "job", ts)
|
||||
|
||||
parts := strings.SplitN(token, ":", 2)
|
||||
if len(parts) != 2 {
|
||||
t.Fatalf("unexpected token format: %q", token)
|
||||
}
|
||||
hmacPart := parts[1]
|
||||
|
||||
// Tamper with timestamp (e.g., attacker tries to extend token lifetime)
|
||||
tamperedTimestamp := strconv.FormatInt(time.Now().Add(1*time.Hour).Unix(), 10)
|
||||
tamperedToken := tamperedTimestamp + ":" + hmacPart
|
||||
|
||||
if ValidateToken("key", tamperedToken, "org", "repo", "wf", "job", 5*time.Minute) {
|
||||
t.Error("ValidateToken should reject token with tampered timestamp")
|
||||
}
|
||||
}
|
||||
45
internal/receiver/types.go
Normal file
45
internal/receiver/types.go
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
// ABOUTME: Data types for the metrics receiver service.
|
||||
// ABOUTME: Defines MetricsPayload combining execution metadata with run summary.
|
||||
package receiver
|
||||
|
||||
import "edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/summary"
|
||||
|
||||
// ExecutionContext holds GitHub Actions style identifiers for a workflow run
|
||||
type ExecutionContext struct {
|
||||
Organization string `json:"organization"`
|
||||
Repository string `json:"repository"`
|
||||
Workflow string `json:"workflow"`
|
||||
Job string `json:"job"`
|
||||
RunID string `json:"run_id"`
|
||||
}
|
||||
|
||||
// MetricsPayload is the complete payload sent to the receiver
|
||||
type MetricsPayload struct {
|
||||
Execution ExecutionContext `json:"execution"`
|
||||
Summary summary.RunSummary `json:"run_summary"`
|
||||
}
|
||||
|
||||
// StoredMetric represents a metric record as stored in the database
|
||||
type StoredMetric struct {
|
||||
ID int64
|
||||
Organization string
|
||||
Repository string
|
||||
Workflow string
|
||||
Job string
|
||||
RunID string
|
||||
ReceivedAt string
|
||||
Payload string // JSON-encoded RunSummary
|
||||
}
|
||||
|
||||
// TokenRequest is the request body for POST /api/v1/token
|
||||
type TokenRequest struct {
|
||||
Organization string `json:"organization"`
|
||||
Repository string `json:"repository"`
|
||||
Workflow string `json:"workflow"`
|
||||
Job string `json:"job"`
|
||||
}
|
||||
|
||||
// TokenResponse is the response body for POST /api/v1/token
|
||||
type TokenResponse struct {
|
||||
Token string `json:"token"`
|
||||
}
|
||||
184
internal/summary/accumulator.go
Normal file
184
internal/summary/accumulator.go
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
// ABOUTME: Accumulates system metrics samples across a collection run.
|
||||
// ABOUTME: Computes peak, average, and P95 statistics for CPU and memory on demand.
|
||||
package summary
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/metrics"
|
||||
)
|
||||
|
||||
// containerAccumulator tracks metrics for a single container
|
||||
type containerAccumulator struct {
|
||||
cpuCoresValues []float64
|
||||
memoryBytesValues []float64
|
||||
}
|
||||
|
||||
// Accumulator collects metric samples and computes run-level statistics
|
||||
type Accumulator struct {
|
||||
topN int
|
||||
cpuValues []float64
|
||||
memBytesValues []float64
|
||||
memPctValues []float64
|
||||
processPeaks map[string]*ProcessPeak
|
||||
containers map[string]*containerAccumulator
|
||||
startTime time.Time
|
||||
endTime time.Time
|
||||
sampleCount int
|
||||
}
|
||||
|
||||
// NewAccumulator creates an accumulator that tracks the top N processes
|
||||
func NewAccumulator(topN int) *Accumulator {
|
||||
return &Accumulator{
|
||||
topN: topN,
|
||||
processPeaks: make(map[string]*ProcessPeak),
|
||||
containers: make(map[string]*containerAccumulator),
|
||||
}
|
||||
}
|
||||
|
||||
// Add records a single metrics sample
|
||||
func (a *Accumulator) Add(m *metrics.SystemMetrics) {
|
||||
a.sampleCount++
|
||||
if a.sampleCount == 1 {
|
||||
a.startTime = m.Timestamp
|
||||
}
|
||||
a.endTime = m.Timestamp
|
||||
|
||||
a.cpuValues = append(a.cpuValues, m.CPU.TotalPercent)
|
||||
a.memBytesValues = append(a.memBytesValues, float64(m.Memory.UsedBytes))
|
||||
a.memPctValues = append(a.memPctValues, m.Memory.UsedPercent)
|
||||
|
||||
for _, p := range m.TopCPU {
|
||||
a.updateProcessPeak(p)
|
||||
}
|
||||
for _, p := range m.TopMemory {
|
||||
a.updateProcessPeak(p)
|
||||
}
|
||||
|
||||
// Track per-container metrics
|
||||
for name, cgroup := range m.Cgroups {
|
||||
ca, ok := a.containers[name]
|
||||
if !ok {
|
||||
ca = &containerAccumulator{}
|
||||
a.containers[name] = ca
|
||||
}
|
||||
// Only record CPU when a valid delta was computed (skip first sample and underflow)
|
||||
if cgroup.CPU.HasDelta {
|
||||
ca.cpuCoresValues = append(ca.cpuCoresValues, cgroup.CPU.UsedCores)
|
||||
}
|
||||
ca.memoryBytesValues = append(ca.memoryBytesValues, float64(cgroup.Memory.TotalRSSBytes))
|
||||
}
|
||||
}
|
||||
|
||||
// Summarize computes and returns the run summary, or nil if no samples were added
|
||||
func (a *Accumulator) Summarize() *RunSummary {
|
||||
if a.sampleCount == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &RunSummary{
|
||||
StartTime: a.startTime,
|
||||
EndTime: a.endTime,
|
||||
DurationSeconds: a.endTime.Sub(a.startTime).Seconds(),
|
||||
SampleCount: a.sampleCount,
|
||||
CPUTotal: computeStats(a.cpuValues),
|
||||
MemUsedBytes: computeStats(a.memBytesValues),
|
||||
MemUsedPercent: computeStats(a.memPctValues),
|
||||
TopCPUProcesses: a.topProcesses(func(p *ProcessPeak) float64 { return p.PeakCPU }),
|
||||
TopMemProcesses: a.topProcesses(func(p *ProcessPeak) float64 { return float64(p.PeakMem) }),
|
||||
Containers: a.containerSummaries(),
|
||||
}
|
||||
}
|
||||
|
||||
// containerSummaries computes summaries for all tracked containers
|
||||
func (a *Accumulator) containerSummaries() []ContainerSummary {
|
||||
summaries := make([]ContainerSummary, 0, len(a.containers))
|
||||
for name, ca := range a.containers {
|
||||
summaries = append(summaries, ContainerSummary{
|
||||
Name: name,
|
||||
CPUCores: computeStats(ca.cpuCoresValues),
|
||||
MemoryBytes: computeStats(ca.memoryBytesValues),
|
||||
})
|
||||
}
|
||||
// Sort by name for consistent output
|
||||
sort.Slice(summaries, func(i, j int) bool {
|
||||
return summaries[i].Name < summaries[j].Name
|
||||
})
|
||||
return summaries
|
||||
}
|
||||
|
||||
// SampleCount returns the number of samples added
|
||||
func (a *Accumulator) SampleCount() int {
|
||||
return a.sampleCount
|
||||
}
|
||||
|
||||
// computeStats calculates peak, percentiles (p99, p95, p75, p50), and average from a sorted copy of the values
|
||||
func computeStats(values []float64) StatSummary {
|
||||
n := len(values)
|
||||
if n == 0 {
|
||||
return StatSummary{}
|
||||
}
|
||||
|
||||
sorted := make([]float64, n)
|
||||
copy(sorted, values)
|
||||
sort.Float64s(sorted)
|
||||
|
||||
var sum float64
|
||||
for _, v := range sorted {
|
||||
sum += v
|
||||
}
|
||||
|
||||
return StatSummary{
|
||||
Peak: sorted[n-1],
|
||||
P99: sorted[percentileIndex(n, 0.99)],
|
||||
P95: sorted[percentileIndex(n, 0.95)],
|
||||
P75: sorted[percentileIndex(n, 0.75)],
|
||||
P50: sorted[percentileIndex(n, 0.50)],
|
||||
Avg: sum / float64(n),
|
||||
}
|
||||
}
|
||||
|
||||
// percentileIndex returns the index for the given percentile (0.0-1.0)
|
||||
func percentileIndex(n int, percentile float64) int {
|
||||
return int(float64(n-1) * percentile)
|
||||
}
|
||||
|
||||
// updateProcessPeak merges a process observation into the peak tracking map
|
||||
func (a *Accumulator) updateProcessPeak(p metrics.ProcessMetrics) {
|
||||
key := fmt.Sprintf("%d:%s", p.PID, p.Name)
|
||||
existing, ok := a.processPeaks[key]
|
||||
if !ok {
|
||||
a.processPeaks[key] = &ProcessPeak{
|
||||
PID: p.PID,
|
||||
Name: p.Name,
|
||||
PeakCPU: p.CPUPercent,
|
||||
PeakMem: p.MemRSS,
|
||||
}
|
||||
return
|
||||
}
|
||||
if p.CPUPercent > existing.PeakCPU {
|
||||
existing.PeakCPU = p.CPUPercent
|
||||
}
|
||||
if p.MemRSS > existing.PeakMem {
|
||||
existing.PeakMem = p.MemRSS
|
||||
}
|
||||
}
|
||||
|
||||
// topProcesses returns the top N processes sorted by the given key function (descending)
|
||||
func (a *Accumulator) topProcesses(keyFn func(*ProcessPeak) float64) []ProcessPeak {
|
||||
all := make([]ProcessPeak, 0, len(a.processPeaks))
|
||||
for _, p := range a.processPeaks {
|
||||
all = append(all, *p)
|
||||
}
|
||||
|
||||
sort.Slice(all, func(i, j int) bool {
|
||||
return keyFn(&all[i]) > keyFn(&all[j])
|
||||
})
|
||||
|
||||
if len(all) > a.topN {
|
||||
all = all[:a.topN]
|
||||
}
|
||||
return all
|
||||
}
|
||||
598
internal/summary/accumulator_test.go
Normal file
598
internal/summary/accumulator_test.go
Normal file
|
|
@ -0,0 +1,598 @@
|
|||
// ABOUTME: Tests for the summary accumulator that tracks metrics across a run.
|
||||
// ABOUTME: Validates stats computation (peak/avg/P95), process peak tracking, and edge cases.
|
||||
package summary
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/metrics"
|
||||
)
|
||||
|
||||
func TestAccumulator_NoSamples(t *testing.T) {
|
||||
acc := NewAccumulator(5)
|
||||
result := acc.Summarize()
|
||||
if result != nil {
|
||||
t.Errorf("expected nil summary for no samples, got %+v", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAccumulator_SingleSample(t *testing.T) {
|
||||
acc := NewAccumulator(5)
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
CPU: metrics.CPUMetrics{TotalPercent: 42.5},
|
||||
Memory: metrics.MemoryMetrics{
|
||||
UsedBytes: 1000,
|
||||
UsedPercent: 50.0,
|
||||
},
|
||||
})
|
||||
|
||||
s := acc.Summarize()
|
||||
if s == nil {
|
||||
t.Fatal("expected non-nil summary")
|
||||
}
|
||||
|
||||
// With a single sample, peak=avg=p95
|
||||
if s.CPUTotal.Peak != 42.5 {
|
||||
t.Errorf("CPU peak: got %f, want 42.5", s.CPUTotal.Peak)
|
||||
}
|
||||
if s.CPUTotal.Avg != 42.5 {
|
||||
t.Errorf("CPU avg: got %f, want 42.5", s.CPUTotal.Avg)
|
||||
}
|
||||
if s.CPUTotal.P95 != 42.5 {
|
||||
t.Errorf("CPU p95: got %f, want 42.5", s.CPUTotal.P95)
|
||||
}
|
||||
if s.MemUsedBytes.Peak != 1000 {
|
||||
t.Errorf("MemUsedBytes peak: got %f, want 1000", s.MemUsedBytes.Peak)
|
||||
}
|
||||
if s.MemUsedPercent.Peak != 50.0 {
|
||||
t.Errorf("MemUsedPercent peak: got %f, want 50.0", s.MemUsedPercent.Peak)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAccumulator_Stats(t *testing.T) {
|
||||
acc := NewAccumulator(5)
|
||||
cpuValues := []float64{10, 20, 30, 40, 50}
|
||||
for i, v := range cpuValues {
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: time.Date(2025, 1, 1, 0, 0, i, 0, time.UTC),
|
||||
CPU: metrics.CPUMetrics{TotalPercent: v},
|
||||
Memory: metrics.MemoryMetrics{
|
||||
UsedBytes: uint64(v * 100),
|
||||
UsedPercent: v,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
s := acc.Summarize()
|
||||
if s == nil {
|
||||
t.Fatal("expected non-nil summary")
|
||||
}
|
||||
|
||||
// Peak = max = 50
|
||||
if s.CPUTotal.Peak != 50 {
|
||||
t.Errorf("CPU peak: got %f, want 50", s.CPUTotal.Peak)
|
||||
}
|
||||
// Avg = (10+20+30+40+50)/5 = 30
|
||||
if s.CPUTotal.Avg != 30 {
|
||||
t.Errorf("CPU avg: got %f, want 30", s.CPUTotal.Avg)
|
||||
}
|
||||
// P95: sorted=[10,20,30,40,50], index=int(4*0.95)=int(3.8)=3, value=40
|
||||
if s.CPUTotal.P95 != 40 {
|
||||
t.Errorf("CPU p95: got %f, want 40", s.CPUTotal.P95)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAccumulator_P95_LargerDataset(t *testing.T) {
|
||||
acc := NewAccumulator(5)
|
||||
// 20 values: 1, 2, 3, ..., 20
|
||||
for i := 1; i <= 20; i++ {
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: time.Date(2025, 1, 1, 0, 0, i, 0, time.UTC),
|
||||
CPU: metrics.CPUMetrics{TotalPercent: float64(i)},
|
||||
Memory: metrics.MemoryMetrics{},
|
||||
})
|
||||
}
|
||||
|
||||
s := acc.Summarize()
|
||||
if s == nil {
|
||||
t.Fatal("expected non-nil summary")
|
||||
}
|
||||
|
||||
// P95: sorted=[1..20], index=int(19*0.95)=int(18.05)=18, value=19
|
||||
if s.CPUTotal.P95 != 19 {
|
||||
t.Errorf("CPU p95: got %f, want 19", s.CPUTotal.P95)
|
||||
}
|
||||
// Avg = (1+2+...+20)/20 = 210/20 = 10.5
|
||||
if s.CPUTotal.Avg != 10.5 {
|
||||
t.Errorf("CPU avg: got %f, want 10.5", s.CPUTotal.Avg)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAccumulator_MemoryStats(t *testing.T) {
|
||||
acc := NewAccumulator(5)
|
||||
memBytes := []uint64{1000, 2000, 3000, 4000, 5000}
|
||||
memPercent := []float64{10, 20, 30, 40, 50}
|
||||
|
||||
for i := range memBytes {
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: time.Date(2025, 1, 1, 0, 0, i, 0, time.UTC),
|
||||
CPU: metrics.CPUMetrics{TotalPercent: 0},
|
||||
Memory: metrics.MemoryMetrics{
|
||||
UsedBytes: memBytes[i],
|
||||
UsedPercent: memPercent[i],
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
s := acc.Summarize()
|
||||
if s == nil {
|
||||
t.Fatal("expected non-nil summary")
|
||||
}
|
||||
|
||||
// MemUsedBytes: peak=5000, avg=3000, p95=4000
|
||||
if s.MemUsedBytes.Peak != 5000 {
|
||||
t.Errorf("MemUsedBytes peak: got %f, want 5000", s.MemUsedBytes.Peak)
|
||||
}
|
||||
if s.MemUsedBytes.Avg != 3000 {
|
||||
t.Errorf("MemUsedBytes avg: got %f, want 3000", s.MemUsedBytes.Avg)
|
||||
}
|
||||
if s.MemUsedBytes.P95 != 4000 {
|
||||
t.Errorf("MemUsedBytes p95: got %f, want 4000", s.MemUsedBytes.P95)
|
||||
}
|
||||
|
||||
// MemUsedPercent: peak=50, avg=30, p95=40
|
||||
if s.MemUsedPercent.Peak != 50 {
|
||||
t.Errorf("MemUsedPercent peak: got %f, want 50", s.MemUsedPercent.Peak)
|
||||
}
|
||||
if s.MemUsedPercent.Avg != 30 {
|
||||
t.Errorf("MemUsedPercent avg: got %f, want 30", s.MemUsedPercent.Avg)
|
||||
}
|
||||
if s.MemUsedPercent.P95 != 40 {
|
||||
t.Errorf("MemUsedPercent p95: got %f, want 40", s.MemUsedPercent.P95)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAccumulator_ProcessPeaks(t *testing.T) {
|
||||
acc := NewAccumulator(5)
|
||||
|
||||
// Same PID across two samples; peaks should be retained
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
CPU: metrics.CPUMetrics{},
|
||||
Memory: metrics.MemoryMetrics{},
|
||||
TopCPU: []metrics.ProcessMetrics{
|
||||
{PID: 1, Name: "a", CPUPercent: 10, MemRSS: 100},
|
||||
},
|
||||
TopMemory: []metrics.ProcessMetrics{
|
||||
{PID: 1, Name: "a", CPUPercent: 10, MemRSS: 100},
|
||||
},
|
||||
})
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: time.Date(2025, 1, 1, 0, 0, 1, 0, time.UTC),
|
||||
CPU: metrics.CPUMetrics{},
|
||||
Memory: metrics.MemoryMetrics{},
|
||||
TopCPU: []metrics.ProcessMetrics{
|
||||
{PID: 1, Name: "a", CPUPercent: 20, MemRSS: 50},
|
||||
},
|
||||
TopMemory: []metrics.ProcessMetrics{
|
||||
{PID: 1, Name: "a", CPUPercent: 5, MemRSS: 200},
|
||||
},
|
||||
})
|
||||
|
||||
s := acc.Summarize()
|
||||
if s == nil {
|
||||
t.Fatal("expected non-nil summary")
|
||||
}
|
||||
|
||||
// Should find PID 1 with peak CPU=20, peak mem=200
|
||||
found := false
|
||||
for _, p := range s.TopCPUProcesses {
|
||||
if p.PID == 1 {
|
||||
found = true
|
||||
if p.PeakCPU != 20 {
|
||||
t.Errorf("PeakCPU: got %f, want 20", p.PeakCPU)
|
||||
}
|
||||
if p.PeakMem != 200 {
|
||||
t.Errorf("PeakMem: got %d, want 200", p.PeakMem)
|
||||
}
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Error("PID 1 not found in TopCPUProcesses")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAccumulator_ProcessPeaks_TopN(t *testing.T) {
|
||||
acc := NewAccumulator(2) // Only top 2
|
||||
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
CPU: metrics.CPUMetrics{},
|
||||
Memory: metrics.MemoryMetrics{},
|
||||
TopCPU: []metrics.ProcessMetrics{
|
||||
{PID: 1, Name: "low", CPUPercent: 10, MemRSS: 100},
|
||||
{PID: 2, Name: "mid", CPUPercent: 50, MemRSS: 500},
|
||||
{PID: 3, Name: "high", CPUPercent: 90, MemRSS: 900},
|
||||
},
|
||||
TopMemory: []metrics.ProcessMetrics{
|
||||
{PID: 1, Name: "low", CPUPercent: 10, MemRSS: 100},
|
||||
{PID: 2, Name: "mid", CPUPercent: 50, MemRSS: 500},
|
||||
{PID: 3, Name: "high", CPUPercent: 90, MemRSS: 900},
|
||||
},
|
||||
})
|
||||
|
||||
s := acc.Summarize()
|
||||
if s == nil {
|
||||
t.Fatal("expected non-nil summary")
|
||||
}
|
||||
|
||||
// TopCPUProcesses should have at most 2 entries, sorted by PeakCPU descending
|
||||
if len(s.TopCPUProcesses) != 2 {
|
||||
t.Fatalf("TopCPUProcesses length: got %d, want 2", len(s.TopCPUProcesses))
|
||||
}
|
||||
if s.TopCPUProcesses[0].PeakCPU != 90 {
|
||||
t.Errorf("TopCPU[0] PeakCPU: got %f, want 90", s.TopCPUProcesses[0].PeakCPU)
|
||||
}
|
||||
if s.TopCPUProcesses[1].PeakCPU != 50 {
|
||||
t.Errorf("TopCPU[1] PeakCPU: got %f, want 50", s.TopCPUProcesses[1].PeakCPU)
|
||||
}
|
||||
|
||||
// TopMemProcesses should have at most 2 entries, sorted by PeakMem descending
|
||||
if len(s.TopMemProcesses) != 2 {
|
||||
t.Fatalf("TopMemProcesses length: got %d, want 2", len(s.TopMemProcesses))
|
||||
}
|
||||
if s.TopMemProcesses[0].PeakMem != 900 {
|
||||
t.Errorf("TopMem[0] PeakMem: got %d, want 900", s.TopMemProcesses[0].PeakMem)
|
||||
}
|
||||
if s.TopMemProcesses[1].PeakMem != 500 {
|
||||
t.Errorf("TopMem[1] PeakMem: got %d, want 500", s.TopMemProcesses[1].PeakMem)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAccumulator_ProcessPeaks_Dedup(t *testing.T) {
|
||||
acc := NewAccumulator(5)
|
||||
|
||||
// A process appears in both TopCPU and TopMemory
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
CPU: metrics.CPUMetrics{},
|
||||
Memory: metrics.MemoryMetrics{},
|
||||
TopCPU: []metrics.ProcessMetrics{
|
||||
{PID: 1, Name: "proc", CPUPercent: 80, MemRSS: 100},
|
||||
},
|
||||
TopMemory: []metrics.ProcessMetrics{
|
||||
{PID: 1, Name: "proc", CPUPercent: 30, MemRSS: 500},
|
||||
},
|
||||
})
|
||||
|
||||
s := acc.Summarize()
|
||||
if s == nil {
|
||||
t.Fatal("expected non-nil summary")
|
||||
}
|
||||
|
||||
// The internal process map should have merged the peaks
|
||||
// PeakCPU should be 80 (from TopCPU), PeakMem should be 500 (from TopMemory)
|
||||
for _, p := range s.TopCPUProcesses {
|
||||
if p.PID == 1 {
|
||||
if p.PeakCPU != 80 {
|
||||
t.Errorf("PeakCPU: got %f, want 80", p.PeakCPU)
|
||||
}
|
||||
if p.PeakMem != 500 {
|
||||
t.Errorf("PeakMem: got %d, want 500", p.PeakMem)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAccumulator_SampleCount(t *testing.T) {
|
||||
acc := NewAccumulator(5)
|
||||
if acc.SampleCount() != 0 {
|
||||
t.Errorf("initial SampleCount: got %d, want 0", acc.SampleCount())
|
||||
}
|
||||
|
||||
for i := 0; i < 3; i++ {
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: time.Date(2025, 1, 1, 0, 0, i, 0, time.UTC),
|
||||
CPU: metrics.CPUMetrics{},
|
||||
Memory: metrics.MemoryMetrics{},
|
||||
})
|
||||
}
|
||||
|
||||
if acc.SampleCount() != 3 {
|
||||
t.Errorf("SampleCount after 3 adds: got %d, want 3", acc.SampleCount())
|
||||
}
|
||||
}
|
||||
|
||||
func TestAccumulator_Duration(t *testing.T) {
|
||||
acc := NewAccumulator(5)
|
||||
start := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
end := time.Date(2025, 1, 1, 0, 1, 0, 0, time.UTC) // 60 seconds later
|
||||
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: start,
|
||||
CPU: metrics.CPUMetrics{},
|
||||
Memory: metrics.MemoryMetrics{},
|
||||
})
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: end,
|
||||
CPU: metrics.CPUMetrics{},
|
||||
Memory: metrics.MemoryMetrics{},
|
||||
})
|
||||
|
||||
s := acc.Summarize()
|
||||
if s == nil {
|
||||
t.Fatal("expected non-nil summary")
|
||||
}
|
||||
|
||||
if !s.StartTime.Equal(start) {
|
||||
t.Errorf("StartTime: got %v, want %v", s.StartTime, start)
|
||||
}
|
||||
if s.DurationSeconds != 60 {
|
||||
t.Errorf("DurationSeconds: got %f, want 60", s.DurationSeconds)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAccumulator_AllPercentiles(t *testing.T) {
|
||||
acc := NewAccumulator(5)
|
||||
// 20 values: 1, 2, 3, ..., 20
|
||||
for i := 1; i <= 20; i++ {
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: time.Date(2025, 1, 1, 0, 0, i, 0, time.UTC),
|
||||
CPU: metrics.CPUMetrics{TotalPercent: float64(i)},
|
||||
Memory: metrics.MemoryMetrics{},
|
||||
})
|
||||
}
|
||||
|
||||
s := acc.Summarize()
|
||||
if s == nil {
|
||||
t.Fatal("expected non-nil summary")
|
||||
}
|
||||
|
||||
// Peak = 20
|
||||
if s.CPUTotal.Peak != 20 {
|
||||
t.Errorf("CPU peak: got %f, want 20", s.CPUTotal.Peak)
|
||||
}
|
||||
// P99: index=int(19*0.99)=int(18.81)=18, value=19
|
||||
if s.CPUTotal.P99 != 19 {
|
||||
t.Errorf("CPU p99: got %f, want 19", s.CPUTotal.P99)
|
||||
}
|
||||
// P95: index=int(19*0.95)=int(18.05)=18, value=19
|
||||
if s.CPUTotal.P95 != 19 {
|
||||
t.Errorf("CPU p95: got %f, want 19", s.CPUTotal.P95)
|
||||
}
|
||||
// P75: index=int(19*0.75)=int(14.25)=14, value=15
|
||||
if s.CPUTotal.P75 != 15 {
|
||||
t.Errorf("CPU p75: got %f, want 15", s.CPUTotal.P75)
|
||||
}
|
||||
// P50: index=int(19*0.50)=int(9.5)=9, value=10
|
||||
if s.CPUTotal.P50 != 10 {
|
||||
t.Errorf("CPU p50: got %f, want 10", s.CPUTotal.P50)
|
||||
}
|
||||
// Avg = (1+2+...+20)/20 = 210/20 = 10.5
|
||||
if s.CPUTotal.Avg != 10.5 {
|
||||
t.Errorf("CPU avg: got %f, want 10.5", s.CPUTotal.Avg)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAccumulator_ContainerMetrics(t *testing.T) {
|
||||
acc := NewAccumulator(5)
|
||||
|
||||
// Add samples with container metrics (HasDelta=true to indicate valid CPU measurements)
|
||||
for i := 1; i <= 5; i++ {
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: time.Date(2025, 1, 1, 0, 0, i, 0, time.UTC),
|
||||
CPU: metrics.CPUMetrics{TotalPercent: float64(i * 10)},
|
||||
Memory: metrics.MemoryMetrics{},
|
||||
Cgroups: map[string]*metrics.CgroupMetrics{
|
||||
"container-a": {
|
||||
Name: "container-a",
|
||||
CPU: metrics.CgroupCPUMetrics{UsedCores: float64(i), HasDelta: true},
|
||||
Memory: metrics.CgroupMemoryMetrics{
|
||||
TotalRSSBytes: uint64(i * 1000),
|
||||
},
|
||||
},
|
||||
"container-b": {
|
||||
Name: "container-b",
|
||||
CPU: metrics.CgroupCPUMetrics{UsedCores: float64(i * 2), HasDelta: true},
|
||||
Memory: metrics.CgroupMemoryMetrics{
|
||||
TotalRSSBytes: uint64(i * 2000),
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
s := acc.Summarize()
|
||||
if s == nil {
|
||||
t.Fatal("expected non-nil summary")
|
||||
}
|
||||
|
||||
// Should have 2 containers
|
||||
if len(s.Containers) != 2 {
|
||||
t.Fatalf("Containers length: got %d, want 2", len(s.Containers))
|
||||
}
|
||||
|
||||
// Containers should be sorted by name
|
||||
if s.Containers[0].Name != "container-a" {
|
||||
t.Errorf("Containers[0].Name: got %s, want container-a", s.Containers[0].Name)
|
||||
}
|
||||
if s.Containers[1].Name != "container-b" {
|
||||
t.Errorf("Containers[1].Name: got %s, want container-b", s.Containers[1].Name)
|
||||
}
|
||||
|
||||
// Container A: CPU cores [1,2,3,4,5], peak=5, avg=3
|
||||
containerA := s.Containers[0]
|
||||
if containerA.CPUCores.Peak != 5 {
|
||||
t.Errorf("container-a CPUCores.Peak: got %f, want 5", containerA.CPUCores.Peak)
|
||||
}
|
||||
if containerA.CPUCores.Avg != 3 {
|
||||
t.Errorf("container-a CPUCores.Avg: got %f, want 3", containerA.CPUCores.Avg)
|
||||
}
|
||||
// Memory bytes [1000,2000,3000,4000,5000], peak=5000, avg=3000
|
||||
if containerA.MemoryBytes.Peak != 5000 {
|
||||
t.Errorf("container-a MemoryBytes.Peak: got %f, want 5000", containerA.MemoryBytes.Peak)
|
||||
}
|
||||
if containerA.MemoryBytes.Avg != 3000 {
|
||||
t.Errorf("container-a MemoryBytes.Avg: got %f, want 3000", containerA.MemoryBytes.Avg)
|
||||
}
|
||||
|
||||
// Container B: CPU cores [2,4,6,8,10], peak=10, avg=6
|
||||
containerB := s.Containers[1]
|
||||
if containerB.CPUCores.Peak != 10 {
|
||||
t.Errorf("container-b CPUCores.Peak: got %f, want 10", containerB.CPUCores.Peak)
|
||||
}
|
||||
if containerB.CPUCores.Avg != 6 {
|
||||
t.Errorf("container-b CPUCores.Avg: got %f, want 6", containerB.CPUCores.Avg)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAccumulator_ContainerMetrics_NoContainers(t *testing.T) {
|
||||
acc := NewAccumulator(5)
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
CPU: metrics.CPUMetrics{TotalPercent: 50},
|
||||
Memory: metrics.MemoryMetrics{},
|
||||
Cgroups: nil, // No containers
|
||||
})
|
||||
|
||||
s := acc.Summarize()
|
||||
if s == nil {
|
||||
t.Fatal("expected non-nil summary")
|
||||
}
|
||||
|
||||
if len(s.Containers) != 0 {
|
||||
t.Errorf("Containers length: got %d, want 0", len(s.Containers))
|
||||
}
|
||||
}
|
||||
|
||||
func TestAccumulator_ContainerMetrics_PartialSamples(t *testing.T) {
|
||||
acc := NewAccumulator(5)
|
||||
|
||||
// First sample: only container-a
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: time.Date(2025, 1, 1, 0, 0, 1, 0, time.UTC),
|
||||
CPU: metrics.CPUMetrics{},
|
||||
Memory: metrics.MemoryMetrics{},
|
||||
Cgroups: map[string]*metrics.CgroupMetrics{
|
||||
"container-a": {
|
||||
Name: "container-a",
|
||||
CPU: metrics.CgroupCPUMetrics{UsedCores: 1, HasDelta: true},
|
||||
Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: 1000},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
// Second sample: both containers
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: time.Date(2025, 1, 1, 0, 0, 2, 0, time.UTC),
|
||||
CPU: metrics.CPUMetrics{},
|
||||
Memory: metrics.MemoryMetrics{},
|
||||
Cgroups: map[string]*metrics.CgroupMetrics{
|
||||
"container-a": {
|
||||
Name: "container-a",
|
||||
CPU: metrics.CgroupCPUMetrics{UsedCores: 2, HasDelta: true},
|
||||
Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: 2000},
|
||||
},
|
||||
"container-b": {
|
||||
Name: "container-b",
|
||||
CPU: metrics.CgroupCPUMetrics{UsedCores: 5, HasDelta: true},
|
||||
Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: 5000},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
s := acc.Summarize()
|
||||
if s == nil {
|
||||
t.Fatal("expected non-nil summary")
|
||||
}
|
||||
|
||||
// Should have 2 containers
|
||||
if len(s.Containers) != 2 {
|
||||
t.Fatalf("Containers length: got %d, want 2", len(s.Containers))
|
||||
}
|
||||
|
||||
// Container A: 2 samples [1,2]
|
||||
containerA := s.Containers[0]
|
||||
if containerA.CPUCores.Peak != 2 {
|
||||
t.Errorf("container-a CPUCores.Peak: got %f, want 2", containerA.CPUCores.Peak)
|
||||
}
|
||||
if containerA.CPUCores.Avg != 1.5 {
|
||||
t.Errorf("container-a CPUCores.Avg: got %f, want 1.5", containerA.CPUCores.Avg)
|
||||
}
|
||||
|
||||
// Container B: 1 sample [5]
|
||||
containerB := s.Containers[1]
|
||||
if containerB.CPUCores.Peak != 5 {
|
||||
t.Errorf("container-b CPUCores.Peak: got %f, want 5", containerB.CPUCores.Peak)
|
||||
}
|
||||
if containerB.CPUCores.Avg != 5 {
|
||||
t.Errorf("container-b CPUCores.Avg: got %f, want 5", containerB.CPUCores.Avg)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAccumulator_ContainerMetrics_InvalidDeltaExcluded(t *testing.T) {
|
||||
acc := NewAccumulator(5)
|
||||
|
||||
// Sample 1: no valid CPU delta (first sample / underflow) — should be excluded from CPU stats
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: time.Date(2025, 1, 1, 0, 0, 1, 0, time.UTC),
|
||||
CPU: metrics.CPUMetrics{},
|
||||
Memory: metrics.MemoryMetrics{},
|
||||
Cgroups: map[string]*metrics.CgroupMetrics{
|
||||
"runner": {
|
||||
Name: "runner",
|
||||
CPU: metrics.CgroupCPUMetrics{UsedCores: 0, HasDelta: false},
|
||||
Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: 1000},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
// Samples 2-4: valid deltas
|
||||
for i := 2; i <= 4; i++ {
|
||||
acc.Add(&metrics.SystemMetrics{
|
||||
Timestamp: time.Date(2025, 1, 1, 0, 0, i, 0, time.UTC),
|
||||
CPU: metrics.CPUMetrics{},
|
||||
Memory: metrics.MemoryMetrics{},
|
||||
Cgroups: map[string]*metrics.CgroupMetrics{
|
||||
"runner": {
|
||||
Name: "runner",
|
||||
CPU: metrics.CgroupCPUMetrics{UsedCores: float64(i), HasDelta: true},
|
||||
Memory: metrics.CgroupMemoryMetrics{TotalRSSBytes: uint64(i * 1000)},
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
s := acc.Summarize()
|
||||
if s == nil {
|
||||
t.Fatal("expected non-nil summary")
|
||||
}
|
||||
|
||||
if len(s.Containers) != 1 {
|
||||
t.Fatalf("Containers length: got %d, want 1", len(s.Containers))
|
||||
}
|
||||
|
||||
runner := s.Containers[0]
|
||||
// CPU should only include samples 2,3,4 (values 2,3,4) — NOT the invalid zero
|
||||
// Peak=4, Avg=3, P50=3
|
||||
if runner.CPUCores.Peak != 4 {
|
||||
t.Errorf("CPUCores.Peak: got %f, want 4", runner.CPUCores.Peak)
|
||||
}
|
||||
if runner.CPUCores.Avg != 3 {
|
||||
t.Errorf("CPUCores.Avg: got %f, want 3", runner.CPUCores.Avg)
|
||||
}
|
||||
if runner.CPUCores.P50 != 3 {
|
||||
t.Errorf("CPUCores.P50: got %f, want 3", runner.CPUCores.P50)
|
||||
}
|
||||
|
||||
// Memory should include all 4 samples (memory is always valid)
|
||||
// Values: 1000, 2000, 3000, 4000
|
||||
if runner.MemoryBytes.Peak != 4000 {
|
||||
t.Errorf("MemoryBytes.Peak: got %f, want 4000", runner.MemoryBytes.Peak)
|
||||
}
|
||||
if runner.MemoryBytes.Avg != 2500 {
|
||||
t.Errorf("MemoryBytes.Avg: got %f, want 2500", runner.MemoryBytes.Avg)
|
||||
}
|
||||
}
|
||||
112
internal/summary/push.go
Normal file
112
internal/summary/push.go
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
// ABOUTME: HTTP client for pushing run summaries to the metrics receiver.
|
||||
// ABOUTME: Reads execution context from GitHub Actions style environment variables.
|
||||
package summary
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ExecutionContext holds GitHub Actions style identifiers for a workflow run
|
||||
type ExecutionContext struct {
|
||||
Organization string `json:"organization"`
|
||||
Repository string `json:"repository"`
|
||||
Workflow string `json:"workflow"`
|
||||
Job string `json:"job"`
|
||||
RunID string `json:"run_id"`
|
||||
}
|
||||
|
||||
// MetricsPayload is the complete payload sent to the receiver
|
||||
type MetricsPayload struct {
|
||||
Execution ExecutionContext `json:"execution"`
|
||||
Summary RunSummary `json:"run_summary"`
|
||||
}
|
||||
|
||||
// PushClient sends metrics to the receiver service
|
||||
type PushClient struct {
|
||||
endpoint string
|
||||
token string
|
||||
client *http.Client
|
||||
ctx ExecutionContext
|
||||
}
|
||||
|
||||
// NewPushClient creates a new push client configured from environment variables.
|
||||
// If token is non-empty, it is sent as a Bearer token on each push request.
|
||||
func NewPushClient(endpoint, token string) *PushClient {
|
||||
return &PushClient{
|
||||
endpoint: endpoint,
|
||||
token: token,
|
||||
client: &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
},
|
||||
ctx: ExecutionContextFromEnv(),
|
||||
}
|
||||
}
|
||||
|
||||
// ExecutionContextFromEnv reads execution context from GitHub Actions environment variables
|
||||
func ExecutionContextFromEnv() ExecutionContext {
|
||||
return ExecutionContext{
|
||||
Organization: getEnvWithFallback("GITHUB_REPOSITORY_OWNER", "GITEA_REPO_OWNER"),
|
||||
Repository: getEnvWithFallback("GITHUB_REPOSITORY", "GITEA_REPO"),
|
||||
Workflow: getEnvWithFallback("GITHUB_WORKFLOW", "GITEA_WORKFLOW"),
|
||||
Job: getEnvWithFallback("GITHUB_JOB", "GITEA_JOB"),
|
||||
RunID: getEnvWithFallback("GITHUB_RUN_ID", "GITEA_RUN_ID"),
|
||||
}
|
||||
}
|
||||
|
||||
func getEnvWithFallback(keys ...string) string {
|
||||
for _, key := range keys {
|
||||
if val := os.Getenv(key); val != "" {
|
||||
return val
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Push sends the run summary to the receiver
|
||||
func (p *PushClient) Push(ctx context.Context, summary *RunSummary) error {
|
||||
if summary == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
payload := MetricsPayload{
|
||||
Execution: p.ctx,
|
||||
Summary: *summary,
|
||||
}
|
||||
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshaling payload: %w", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, p.endpoint, bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
if p.token != "" {
|
||||
req.Header.Set("Authorization", "Bearer "+p.token)
|
||||
}
|
||||
|
||||
resp, err := p.client.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("sending request: %w", err)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ExecutionContext returns the current execution context
|
||||
func (p *PushClient) ExecutionContext() ExecutionContext {
|
||||
return p.ctx
|
||||
}
|
||||
202
internal/summary/push_test.go
Normal file
202
internal/summary/push_test.go
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
package summary
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestPushClient_Push(t *testing.T) {
|
||||
var received MetricsPayload
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
t.Errorf("expected POST, got %s", r.Method)
|
||||
}
|
||||
if ct := r.Header.Get("Content-Type"); ct != "application/json" {
|
||||
t.Errorf("expected Content-Type application/json, got %s", ct)
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&received); err != nil {
|
||||
t.Errorf("failed to decode body: %v", err)
|
||||
}
|
||||
w.WriteHeader(http.StatusCreated)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := NewPushClient(server.URL, "")
|
||||
client.ctx = ExecutionContext{
|
||||
Organization: "test-org",
|
||||
Repository: "test-repo",
|
||||
Workflow: "ci.yml",
|
||||
Job: "build",
|
||||
RunID: "12345",
|
||||
}
|
||||
|
||||
summary := &RunSummary{
|
||||
StartTime: time.Now().Add(-time.Minute),
|
||||
EndTime: time.Now(),
|
||||
DurationSeconds: 60.0,
|
||||
SampleCount: 10,
|
||||
CPUTotal: StatSummary{Peak: 80.0, Avg: 50.0, P95: 75.0},
|
||||
}
|
||||
|
||||
err := client.Push(context.Background(), summary)
|
||||
if err != nil {
|
||||
t.Fatalf("Push() error = %v", err)
|
||||
}
|
||||
|
||||
if received.Execution.Organization != "test-org" {
|
||||
t.Errorf("Organization = %q, want %q", received.Execution.Organization, "test-org")
|
||||
}
|
||||
if received.Execution.RunID != "12345" {
|
||||
t.Errorf("RunID = %q, want %q", received.Execution.RunID, "12345")
|
||||
}
|
||||
if received.Summary.SampleCount != 10 {
|
||||
t.Errorf("SampleCount = %d, want %d", received.Summary.SampleCount, 10)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPushClient_Push_NilSummary(t *testing.T) {
|
||||
client := NewPushClient("http://localhost:9999", "")
|
||||
err := client.Push(context.Background(), nil)
|
||||
if err != nil {
|
||||
t.Errorf("Push(nil) error = %v, want nil", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPushClient_Push_ServerError(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := NewPushClient(server.URL, "")
|
||||
client.ctx = ExecutionContext{RunID: "test"}
|
||||
|
||||
err := client.Push(context.Background(), &RunSummary{})
|
||||
if err == nil {
|
||||
t.Error("Push() expected error for 500 response, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPushClient_Push_ConnectionError(t *testing.T) {
|
||||
client := NewPushClient("http://localhost:1", "") // Invalid port
|
||||
client.ctx = ExecutionContext{RunID: "test"}
|
||||
|
||||
err := client.Push(context.Background(), &RunSummary{})
|
||||
if err == nil {
|
||||
t.Error("Push() expected error for connection failure, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExecutionContextFromEnv(t *testing.T) {
|
||||
// Save and restore env
|
||||
origVars := map[string]string{
|
||||
"GITHUB_REPOSITORY_OWNER": "",
|
||||
"GITHUB_REPOSITORY": "",
|
||||
"GITHUB_WORKFLOW": "",
|
||||
"GITHUB_JOB": "",
|
||||
"GITHUB_RUN_ID": "",
|
||||
}
|
||||
for k := range origVars {
|
||||
origVars[k] = getEnvWithFallback(k)
|
||||
}
|
||||
defer func() {
|
||||
for k, v := range origVars {
|
||||
if v == "" {
|
||||
t.Setenv(k, "")
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
t.Setenv("GITHUB_REPOSITORY_OWNER", "my-org")
|
||||
t.Setenv("GITHUB_REPOSITORY", "my-org/my-repo")
|
||||
t.Setenv("GITHUB_WORKFLOW", "CI")
|
||||
t.Setenv("GITHUB_JOB", "test")
|
||||
t.Setenv("GITHUB_RUN_ID", "999")
|
||||
|
||||
ctx := ExecutionContextFromEnv()
|
||||
|
||||
if ctx.Organization != "my-org" {
|
||||
t.Errorf("Organization = %q, want %q", ctx.Organization, "my-org")
|
||||
}
|
||||
if ctx.Repository != "my-org/my-repo" {
|
||||
t.Errorf("Repository = %q, want %q", ctx.Repository, "my-org/my-repo")
|
||||
}
|
||||
if ctx.Workflow != "CI" {
|
||||
t.Errorf("Workflow = %q, want %q", ctx.Workflow, "CI")
|
||||
}
|
||||
if ctx.Job != "test" {
|
||||
t.Errorf("Job = %q, want %q", ctx.Job, "test")
|
||||
}
|
||||
if ctx.RunID != "999" {
|
||||
t.Errorf("RunID = %q, want %q", ctx.RunID, "999")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExecutionContextFromEnv_GiteaFallback(t *testing.T) {
|
||||
t.Setenv("GITHUB_RUN_ID", "")
|
||||
t.Setenv("GITEA_RUN_ID", "gitea-123")
|
||||
|
||||
ctx := ExecutionContextFromEnv()
|
||||
|
||||
if ctx.RunID != "gitea-123" {
|
||||
t.Errorf("RunID = %q, want %q (Gitea fallback)", ctx.RunID, "gitea-123")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPushClient_Push_WithToken(t *testing.T) {
|
||||
var gotAuth string
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
gotAuth = r.Header.Get("Authorization")
|
||||
w.WriteHeader(http.StatusCreated)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := NewPushClient(server.URL, "my-token")
|
||||
client.ctx = ExecutionContext{RunID: "test"}
|
||||
|
||||
err := client.Push(context.Background(), &RunSummary{})
|
||||
if err != nil {
|
||||
t.Fatalf("Push() error = %v", err)
|
||||
}
|
||||
if gotAuth != "Bearer my-token" {
|
||||
t.Errorf("Authorization = %q, want %q", gotAuth, "Bearer my-token")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPushClient_Push_WithoutToken(t *testing.T) {
|
||||
var gotAuth string
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
gotAuth = r.Header.Get("Authorization")
|
||||
w.WriteHeader(http.StatusCreated)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client := NewPushClient(server.URL, "")
|
||||
client.ctx = ExecutionContext{RunID: "test"}
|
||||
|
||||
err := client.Push(context.Background(), &RunSummary{})
|
||||
if err != nil {
|
||||
t.Fatalf("Push() error = %v", err)
|
||||
}
|
||||
if gotAuth != "" {
|
||||
t.Errorf("Authorization = %q, want empty", gotAuth)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPushClient_ExecutionContext(t *testing.T) {
|
||||
client := NewPushClient("http://example.com", "")
|
||||
client.ctx = ExecutionContext{
|
||||
Organization: "org",
|
||||
Repository: "repo",
|
||||
RunID: "run",
|
||||
}
|
||||
|
||||
ctx := client.ExecutionContext()
|
||||
if ctx.Organization != "org" {
|
||||
t.Errorf("Organization = %q, want %q", ctx.Organization, "org")
|
||||
}
|
||||
}
|
||||
44
internal/summary/types.go
Normal file
44
internal/summary/types.go
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
// ABOUTME: Data types for run-level summary statistics.
|
||||
// ABOUTME: Defines StatSummary, ProcessPeak, and RunSummary used to report metrics on shutdown.
|
||||
package summary
|
||||
|
||||
import "time"
|
||||
|
||||
// StatSummary holds peak, percentiles, and average for a metric across the run
|
||||
type StatSummary struct {
|
||||
Peak float64 `json:"peak"`
|
||||
P99 float64 `json:"p99"`
|
||||
P95 float64 `json:"p95"`
|
||||
P75 float64 `json:"p75"`
|
||||
P50 float64 `json:"p50"`
|
||||
Avg float64 `json:"avg"`
|
||||
}
|
||||
|
||||
// ProcessPeak holds the peak CPU and memory observed for a single process
|
||||
type ProcessPeak struct {
|
||||
PID int `json:"pid"`
|
||||
Name string `json:"name"`
|
||||
PeakCPU float64 `json:"peak_cpu_percent"`
|
||||
PeakMem uint64 `json:"peak_mem_rss_bytes"`
|
||||
}
|
||||
|
||||
// ContainerSummary holds statistics for a single container across the run
|
||||
type ContainerSummary struct {
|
||||
Name string `json:"name"`
|
||||
CPUCores StatSummary `json:"cpu_cores"`
|
||||
MemoryBytes StatSummary `json:"memory_bytes"`
|
||||
}
|
||||
|
||||
// RunSummary holds the complete summary of a collection run
|
||||
type RunSummary struct {
|
||||
StartTime time.Time `json:"start_time"`
|
||||
EndTime time.Time `json:"end_time"`
|
||||
DurationSeconds float64 `json:"duration_seconds"`
|
||||
SampleCount int `json:"sample_count"`
|
||||
CPUTotal StatSummary `json:"cpu_total_percent"`
|
||||
MemUsedBytes StatSummary `json:"mem_used_bytes"`
|
||||
MemUsedPercent StatSummary `json:"mem_used_percent"`
|
||||
TopCPUProcesses []ProcessPeak `json:"top_cpu_processes"`
|
||||
TopMemProcesses []ProcessPeak `json:"top_mem_processes"`
|
||||
Containers []ContainerSummary `json:"containers"`
|
||||
}
|
||||
61
internal/summary/writer.go
Normal file
61
internal/summary/writer.go
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
// ABOUTME: Emits a RunSummary as a structured log entry via slog.
|
||||
// ABOUTME: Follows the same slog pattern as internal/output/logger.go for consistency.
|
||||
package summary
|
||||
|
||||
import (
|
||||
"io"
|
||||
"log/slog"
|
||||
)
|
||||
|
||||
// SummaryWriter outputs a RunSummary using structured logging
|
||||
type SummaryWriter struct {
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
// NewSummaryWriter creates a writer that emits summaries to the given output in the given format
|
||||
func NewSummaryWriter(output io.Writer, format string) *SummaryWriter {
|
||||
opts := &slog.HandlerOptions{Level: slog.LevelInfo}
|
||||
|
||||
var handler slog.Handler
|
||||
switch format {
|
||||
case "text":
|
||||
handler = slog.NewTextHandler(output, opts)
|
||||
default:
|
||||
handler = slog.NewJSONHandler(output, opts)
|
||||
}
|
||||
|
||||
return &SummaryWriter{
|
||||
logger: slog.New(handler),
|
||||
}
|
||||
}
|
||||
|
||||
// Write emits the run summary as a single structured log entry
|
||||
func (w *SummaryWriter) Write(s *RunSummary) {
|
||||
if s == nil {
|
||||
return
|
||||
}
|
||||
|
||||
w.logger.Info("run_summary",
|
||||
slog.Time("start_time", s.StartTime),
|
||||
slog.Time("end_time", s.EndTime),
|
||||
slog.Float64("duration_seconds", s.DurationSeconds),
|
||||
slog.Int("sample_count", s.SampleCount),
|
||||
slog.Group("cpu_total_percent",
|
||||
slog.Float64("peak", s.CPUTotal.Peak),
|
||||
slog.Float64("avg", s.CPUTotal.Avg),
|
||||
slog.Float64("p95", s.CPUTotal.P95),
|
||||
),
|
||||
slog.Group("mem_used_bytes",
|
||||
slog.Float64("peak", s.MemUsedBytes.Peak),
|
||||
slog.Float64("avg", s.MemUsedBytes.Avg),
|
||||
slog.Float64("p95", s.MemUsedBytes.P95),
|
||||
),
|
||||
slog.Group("mem_used_percent",
|
||||
slog.Float64("peak", s.MemUsedPercent.Peak),
|
||||
slog.Float64("avg", s.MemUsedPercent.Avg),
|
||||
slog.Float64("p95", s.MemUsedPercent.P95),
|
||||
),
|
||||
slog.Any("top_cpu_processes", s.TopCPUProcesses),
|
||||
slog.Any("top_mem_processes", s.TopMemProcesses),
|
||||
)
|
||||
}
|
||||
93
internal/summary/writer_test.go
Normal file
93
internal/summary/writer_test.go
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
// ABOUTME: Tests for the summary writer that emits run summaries via slog.
|
||||
// ABOUTME: Validates JSON output, text output, and nil summary handling.
|
||||
package summary
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestSummaryWriter_JSON(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
w := NewSummaryWriter(&buf, "json")
|
||||
|
||||
s := &RunSummary{
|
||||
StartTime: time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
EndTime: time.Date(2025, 1, 1, 0, 1, 0, 0, time.UTC),
|
||||
DurationSeconds: 60,
|
||||
SampleCount: 12,
|
||||
CPUTotal: StatSummary{Peak: 95.5, Avg: 42.0, P95: 88.0},
|
||||
MemUsedBytes: StatSummary{Peak: 8000000, Avg: 4000000, P95: 7500000},
|
||||
MemUsedPercent: StatSummary{Peak: 80.0, Avg: 40.0, P95: 75.0},
|
||||
TopCPUProcesses: []ProcessPeak{
|
||||
{PID: 1, Name: "busy", PeakCPU: 95.5, PeakMem: 1000},
|
||||
},
|
||||
TopMemProcesses: []ProcessPeak{
|
||||
{PID: 2, Name: "hungry", PeakCPU: 10.0, PeakMem: 8000000},
|
||||
},
|
||||
}
|
||||
|
||||
w.Write(s)
|
||||
|
||||
output := buf.String()
|
||||
if !strings.Contains(output, "run_summary") {
|
||||
t.Errorf("output should contain 'run_summary', got: %s", output)
|
||||
}
|
||||
if !strings.Contains(output, "duration_seconds") {
|
||||
t.Errorf("output should contain 'duration_seconds', got: %s", output)
|
||||
}
|
||||
if !strings.Contains(output, "sample_count") {
|
||||
t.Errorf("output should contain 'sample_count', got: %s", output)
|
||||
}
|
||||
if !strings.Contains(output, "cpu_total_percent") {
|
||||
t.Errorf("output should contain 'cpu_total_percent', got: %s", output)
|
||||
}
|
||||
if !strings.Contains(output, "mem_used_bytes") {
|
||||
t.Errorf("output should contain 'mem_used_bytes', got: %s", output)
|
||||
}
|
||||
if !strings.Contains(output, "top_cpu_processes") {
|
||||
t.Errorf("output should contain 'top_cpu_processes', got: %s", output)
|
||||
}
|
||||
if !strings.Contains(output, "top_mem_processes") {
|
||||
t.Errorf("output should contain 'top_mem_processes', got: %s", output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSummaryWriter_Text(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
w := NewSummaryWriter(&buf, "text")
|
||||
|
||||
s := &RunSummary{
|
||||
StartTime: time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC),
|
||||
EndTime: time.Date(2025, 1, 1, 0, 1, 0, 0, time.UTC),
|
||||
DurationSeconds: 60,
|
||||
SampleCount: 12,
|
||||
CPUTotal: StatSummary{Peak: 95.5, Avg: 42.0, P95: 88.0},
|
||||
MemUsedBytes: StatSummary{Peak: 8000000, Avg: 4000000, P95: 7500000},
|
||||
MemUsedPercent: StatSummary{Peak: 80.0, Avg: 40.0, P95: 75.0},
|
||||
}
|
||||
|
||||
w.Write(s)
|
||||
|
||||
output := buf.String()
|
||||
if !strings.Contains(output, "run_summary") {
|
||||
t.Errorf("output should contain 'run_summary', got: %s", output)
|
||||
}
|
||||
if !strings.Contains(output, "duration_seconds") {
|
||||
t.Errorf("output should contain 'duration_seconds', got: %s", output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSummaryWriter_NilSummary(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
w := NewSummaryWriter(&buf, "json")
|
||||
|
||||
// Should not panic and should not write anything
|
||||
w.Write(nil)
|
||||
|
||||
if buf.Len() != 0 {
|
||||
t.Errorf("expected no output for nil summary, got: %s", buf.String())
|
||||
}
|
||||
}
|
||||
1096
pkg/client/client.gen.go
Normal file
1096
pkg/client/client.gen.go
Normal file
File diff suppressed because it is too large
Load diff
64
scripts/extract-openapi/main.go
Normal file
64
scripts/extract-openapi/main.go
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
//go:build ignore
|
||||
|
||||
// ABOUTME: Extracts OpenAPI spec from Fuego server without running it.
|
||||
// ABOUTME: Run with: go run scripts/extract-openapi/main.go
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
|
||||
"github.com/getkin/kin-openapi/openapi3"
|
||||
"github.com/go-fuego/fuego"
|
||||
|
||||
"edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer/internal/receiver"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Create a minimal handler (store is nil, won't be used)
|
||||
logger := slog.New(slog.NewTextHandler(io.Discard, nil))
|
||||
handler := receiver.NewHandler(nil, logger, "dummy", "dummy", 0)
|
||||
|
||||
// Create Fuego server with OpenAPI config
|
||||
s := fuego.NewServer(
|
||||
fuego.WithoutStartupMessages(),
|
||||
fuego.WithEngineOptions(
|
||||
fuego.WithOpenAPIConfig(fuego.OpenAPIConfig{
|
||||
DisableLocalSave: true,
|
||||
Info: &openapi3.Info{
|
||||
Title: "Forgejo Runner Resource Collector API",
|
||||
Version: "1.0.0",
|
||||
Description: "HTTP service that receives and stores CI/CD resource metrics from collectors, providing query and sizing recommendation APIs.",
|
||||
Contact: &openapi3.Contact{
|
||||
Name: "API Support",
|
||||
URL: "https://edp.buildth.ing/DevFW-CICD/forgejo-runner-sizer",
|
||||
},
|
||||
License: &openapi3.License{
|
||||
Name: "Apache 2.0",
|
||||
URL: "http://www.apache.org/licenses/LICENSE-2.0.html",
|
||||
},
|
||||
},
|
||||
}),
|
||||
),
|
||||
)
|
||||
|
||||
// Register routes to populate OpenAPI spec
|
||||
handler.RegisterRoutes(s)
|
||||
|
||||
// Output OpenAPI spec as JSON
|
||||
spec, err := json.MarshalIndent(s.OpenAPI.Description(), "", " ")
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error marshaling OpenAPI spec: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if err := os.WriteFile("docs/openapi.json", spec, 0644); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error writing docs/openapi.json: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
fmt.Println("Generated docs/openapi.json")
|
||||
}
|
||||
131
test/docker/docker-compose-stress.yaml
Normal file
131
test/docker/docker-compose-stress.yaml
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
# Docker Compose stress test with receiver
|
||||
# See README.md "Docker Compose" section for the full token workflow.
|
||||
#
|
||||
# This test:
|
||||
# 1. Starts the metrics receiver (with read-token and hmac-key)
|
||||
# 2. You generate a scoped push token via POST /api/v1/token
|
||||
# 3. Start the collector with COLLECTOR_PUSH_TOKEN set
|
||||
# 4. Runs heavy CPU/memory workloads in multiple containers with shared PID namespace
|
||||
# 5. Collector gathers metrics and pushes summary to receiver on shutdown
|
||||
#
|
||||
# To trigger the push, stop the collector gracefully:
|
||||
# docker compose -f test/docker/docker-compose-stress.yaml stop collector
|
||||
|
||||
services:
|
||||
# Metrics receiver - stores summaries in SQLite
|
||||
receiver:
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: Dockerfile
|
||||
target: receiver
|
||||
ports:
|
||||
- "9080:8080"
|
||||
environment:
|
||||
- DB_PATH=/data/metrics.db
|
||||
- RECEIVER_READ_TOKEN=dummyreadtoken
|
||||
- RECEIVER_HMAC_KEY=dummyhmackey
|
||||
volumes:
|
||||
- receiver-data:/data
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-q", "--spider", "http://localhost:8080/health"]
|
||||
interval: 5s
|
||||
timeout: 3s
|
||||
retries: 3
|
||||
|
||||
# Heavy CPU workload - uses stress-ng (owns the PID namespace)
|
||||
cpu-stress:
|
||||
image: alexeiled/stress-ng:latest
|
||||
command:
|
||||
- --cpu
|
||||
- "3"
|
||||
- --timeout
|
||||
- "300s"
|
||||
- --metrics-brief
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: "2.0"
|
||||
memory: 128M
|
||||
# This container owns the PID namespace
|
||||
|
||||
# Memory-intensive workload - shares PID namespace with cpu-stress
|
||||
mem-stress:
|
||||
image: alexeiled/stress-ng:latest
|
||||
command:
|
||||
- --vm
|
||||
- "2"
|
||||
- --vm-bytes
|
||||
- "64M"
|
||||
- --timeout
|
||||
- "300s"
|
||||
- --metrics-brief
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: "0.5"
|
||||
memory: 256M
|
||||
pid: "service:cpu-stress"
|
||||
depends_on:
|
||||
- cpu-stress
|
||||
|
||||
# IO workload - continuous disk writes
|
||||
io-stress:
|
||||
image: busybox:latest
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
echo "IO stress started"
|
||||
# 'dd' will be our identifiable process
|
||||
while true; do
|
||||
dd if=/dev/zero of=/tmp/testfile bs=1M count=100 2>/dev/null
|
||||
rm -f /tmp/testfile
|
||||
done
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: "0.5"
|
||||
memory: 128M
|
||||
pid: "service:cpu-stress"
|
||||
depends_on:
|
||||
- cpu-stress
|
||||
|
||||
# Resource collector - pushes to receiver on shutdown
|
||||
collector:
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: Dockerfile
|
||||
target: collector
|
||||
command:
|
||||
- --interval=2s
|
||||
- --top=10
|
||||
- --log-format=json
|
||||
- --push-endpoint=http://receiver:8080/api/v1/metrics
|
||||
environment:
|
||||
# Push token — pass via COLLECTOR_PUSH_TOKEN from host env
|
||||
COLLECTOR_PUSH_TOKEN: "${COLLECTOR_PUSH_TOKEN}"
|
||||
# Execution context for the receiver
|
||||
GITHUB_REPOSITORY_OWNER: "test-org"
|
||||
GITHUB_REPOSITORY: "test-org/stress-test"
|
||||
GITHUB_WORKFLOW: "stress-test-workflow"
|
||||
GITHUB_JOB: "heavy-workload"
|
||||
GITHUB_RUN_ID: "stress-run-001"
|
||||
# Cgroup configuration
|
||||
# stress-ng-cpu is the worker process name for CPU stress
|
||||
# stress-ng-vm is the worker process name for memory stress
|
||||
CGROUP_PROCESS_MAP: '{"stress-ng-cpu":"cpu-stress","stress-ng-vm":"mem-stress","dd":"io-stress","sizer":"collector"}'
|
||||
CGROUP_LIMITS: '{"cpu-stress":{"cpu":"1","memory":"128Mi"},"mem-stress":{"cpu":"500m","memory":"256Mi"},"io-stress":{"cpu":"500m","memory":"128Mi"},"collector":{"cpu":"200m","memory":"64Mi"}}'
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: "0.2"
|
||||
memory: 64M
|
||||
pid: "service:cpu-stress"
|
||||
depends_on:
|
||||
receiver:
|
||||
condition: service_healthy
|
||||
cpu-stress:
|
||||
condition: service_started
|
||||
|
||||
volumes:
|
||||
receiver-data:
|
||||
81
test/docker/docker-compose.yaml
Normal file
81
test/docker/docker-compose.yaml
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
# Docker Compose test setup for cgroup grouping verification
|
||||
# Run with: docker compose -f test/docker/docker-compose.yaml up
|
||||
#
|
||||
# NOTE: Docker Compose doesn't have a direct equivalent to K8s shareProcessNamespace.
|
||||
# Options:
|
||||
# 1. pid: "host" - sees ALL host processes (not container-specific)
|
||||
# 2. pid: "service:<name>" - chains PID namespace to another service
|
||||
#
|
||||
# For proper testing, use Kubernetes or run containers manually with --pid=container:<id>
|
||||
|
||||
services:
|
||||
# Simulate a runner workload (this will be the "root" of the shared PID namespace)
|
||||
# Uses 'cat' reading from a fifo as a unique identifiable process
|
||||
runner:
|
||||
image: busybox:latest
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
echo "Runner started (PID 1 in namespace)"
|
||||
mkfifo /tmp/runner_fifo
|
||||
# 'cat' will be our identifiable runner process (blocks on fifo)
|
||||
cat /tmp/runner_fifo &
|
||||
CAT_PID=$!
|
||||
# Generate CPU load with dd
|
||||
while true; do
|
||||
dd if=/dev/zero of=/dev/null bs=1M count=50 2>/dev/null
|
||||
done
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: "0.5"
|
||||
memory: 256M
|
||||
# This container owns the PID namespace
|
||||
|
||||
# Simulate a sidecar service - shares PID namespace with runner
|
||||
sidecar:
|
||||
image: busybox:latest
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
echo "Sidecar started"
|
||||
# List processes to verify shared namespace
|
||||
ps aux
|
||||
while true; do
|
||||
sleep 10
|
||||
done
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: "0.1"
|
||||
memory: 128M
|
||||
pid: "service:runner" # Share PID namespace with runner
|
||||
depends_on:
|
||||
- runner
|
||||
|
||||
# Resource collector - shares PID namespace with runner
|
||||
collector:
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: Dockerfile
|
||||
target: collector
|
||||
command:
|
||||
- --interval=3s
|
||||
- --top=5
|
||||
- --log-format=json
|
||||
environment:
|
||||
# Map unique process names to container names
|
||||
# 'cat' runs only in runner, 'sleep' runs only in sidecar
|
||||
CGROUP_PROCESS_MAP: '{"cat":"runner","sleep":"sidecar","sizer":"collector"}'
|
||||
CGROUP_LIMITS: '{"runner":{"cpu":"500m","memory":"256Mi"},"sidecar":{"cpu":"100m","memory":"128Mi"},"collector":{"cpu":"100m","memory":"64Mi"}}'
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: "0.1"
|
||||
memory: 64M
|
||||
pid: "service:runner" # Share PID namespace with runner
|
||||
depends_on:
|
||||
- runner
|
||||
- sidecar
|
||||
148
test/k8s/test-cgroup-grouping.yaml
Normal file
148
test/k8s/test-cgroup-grouping.yaml
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
# Test manifest to verify cgroup grouping behavior
|
||||
# This pod runs multiple containers with different resource limits
|
||||
# and a collector sidecar that groups metrics by cgroup/container
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: test-cgroup-grouping
|
||||
labels:
|
||||
app: test-cgroup-grouping
|
||||
spec:
|
||||
# Share PID namespace so collector can see all processes
|
||||
shareProcessNamespace: true
|
||||
|
||||
containers:
|
||||
# Main workload container - simulates a runner
|
||||
- name: runner
|
||||
image: busybox:latest
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
echo "Runner container started"
|
||||
# Simulate some CPU work
|
||||
while true; do
|
||||
dd if=/dev/zero of=/dev/null bs=1M count=100 2>/dev/null
|
||||
sleep 1
|
||||
done
|
||||
resources:
|
||||
requests:
|
||||
cpu: "100m"
|
||||
memory: "64Mi"
|
||||
limits:
|
||||
cpu: "500m"
|
||||
memory: "256Mi"
|
||||
|
||||
# Sidecar container - simulates nginx or another service
|
||||
- name: sidecar
|
||||
image: busybox:latest
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
echo "Sidecar container started"
|
||||
# Simulate some lighter work
|
||||
while true; do
|
||||
sleep 5
|
||||
done
|
||||
resources:
|
||||
requests:
|
||||
cpu: "50m"
|
||||
memory: "32Mi"
|
||||
limits:
|
||||
cpu: "100m"
|
||||
memory: "128Mi"
|
||||
|
||||
# Resource collector sidecar
|
||||
- name: collector
|
||||
image: ghcr.io/your-org/forgejo-runner-sizer:latest
|
||||
args:
|
||||
- --interval=5s
|
||||
- --top=3
|
||||
env:
|
||||
# Map process names to container names
|
||||
# "sh" is the main process in busybox containers
|
||||
# You may need to adjust based on actual process names
|
||||
- name: CGROUP_PROCESS_MAP
|
||||
value: |
|
||||
{"sh":"runner","sleep":"sidecar","collector":"collector"}
|
||||
# Define limits for each container (must match names in CGROUP_PROCESS_MAP)
|
||||
- name: CGROUP_LIMITS
|
||||
value: |
|
||||
{"runner":{"cpu":"500m","memory":"256Mi"},"sidecar":{"cpu":"100m","memory":"128Mi"},"collector":{"cpu":"100m","memory":"64Mi"}}
|
||||
resources:
|
||||
requests:
|
||||
cpu: "50m"
|
||||
memory: "32Mi"
|
||||
limits:
|
||||
cpu: "100m"
|
||||
memory: "64Mi"
|
||||
# Mount proc read-only for process discovery
|
||||
volumeMounts:
|
||||
- name: proc
|
||||
mountPath: /proc
|
||||
readOnly: true
|
||||
|
||||
volumes:
|
||||
- name: proc
|
||||
hostPath:
|
||||
path: /proc
|
||||
type: Directory
|
||||
|
||||
restartPolicy: Never
|
||||
---
|
||||
# Alternative: Using a Deployment for longer-running tests
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: test-cgroup-simple
|
||||
labels:
|
||||
app: test-cgroup-simple
|
||||
spec:
|
||||
shareProcessNamespace: true
|
||||
|
||||
containers:
|
||||
# Stress container to generate CPU/memory load
|
||||
- name: stress
|
||||
image: progrium/stress:latest
|
||||
args:
|
||||
- --cpu
|
||||
- "1"
|
||||
- --vm
|
||||
- "1"
|
||||
- --vm-bytes
|
||||
- "64M"
|
||||
- --timeout
|
||||
- "300s"
|
||||
resources:
|
||||
limits:
|
||||
cpu: "500m"
|
||||
memory: "128Mi"
|
||||
|
||||
# Collector
|
||||
- name: collector
|
||||
image: ghcr.io/your-org/forgejo-runner-sizer:latest
|
||||
args:
|
||||
- --interval=2s
|
||||
- --top=5
|
||||
env:
|
||||
- name: CGROUP_PROCESS_MAP
|
||||
value: '{"stress":"stress","collector":"collector"}'
|
||||
- name: CGROUP_LIMITS
|
||||
value: '{"stress":{"cpu":"500m","memory":"128Mi"},"collector":{"cpu":"100m","memory":"64Mi"}}'
|
||||
resources:
|
||||
limits:
|
||||
cpu: "100m"
|
||||
memory: "64Mi"
|
||||
volumeMounts:
|
||||
- name: proc
|
||||
mountPath: /proc
|
||||
readOnly: true
|
||||
|
||||
volumes:
|
||||
- name: proc
|
||||
hostPath:
|
||||
path: /proc
|
||||
type: Directory
|
||||
|
||||
restartPolicy: Never
|
||||
36
test/local-test.sh
Executable file
36
test/local-test.sh
Executable file
|
|
@ -0,0 +1,36 @@
|
|||
#!/bin/bash
|
||||
# Local test script to verify cgroup grouping
|
||||
# Run from project root: ./test/local-test.sh
|
||||
|
||||
set -e
|
||||
|
||||
echo "Building collector..."
|
||||
go build -o bin/collector ./cmd/collector
|
||||
|
||||
echo ""
|
||||
echo "Testing cgroup parsing on current system..."
|
||||
echo "Current process cgroup:"
|
||||
cat /proc/self/cgroup 2>/dev/null || echo "Cannot read /proc/self/cgroup (expected on macOS)"
|
||||
|
||||
echo ""
|
||||
echo "Running collector for 10 seconds with cgroup grouping..."
|
||||
echo "Press Ctrl+C to stop early"
|
||||
echo ""
|
||||
|
||||
# Set up test environment variables
|
||||
# Map common process names to container names
|
||||
export CGROUP_PROCESS_MAP='{"bash":"shell","collector":"collector","zsh":"shell"}'
|
||||
export CGROUP_LIMITS='{"shell":{"cpu":"2","memory":"4Gi"},"collector":{"cpu":"1","memory":"1Gi"}}'
|
||||
|
||||
# Run collector
|
||||
timeout 10 ./bin/collector \
|
||||
--interval=2s \
|
||||
--top=5 \
|
||||
--log-format=json \
|
||||
2>/dev/null || true
|
||||
|
||||
echo ""
|
||||
echo "Test complete!"
|
||||
echo ""
|
||||
echo "Note: On macOS, cgroup paths will be empty since cgroups are a Linux feature."
|
||||
echo "To test properly, run in a Linux container or VM."
|
||||
Loading…
Add table
Add a link
Reference in a new issue