From 5b983692c88acb950bd203e8de5ca89b8e9b8810 Mon Sep 17 00:00:00 2001 From: Manuel Ganter Date: Fri, 6 Feb 2026 15:11:22 +0100 Subject: [PATCH] test: add stress test with receiver integration Docker Compose setup that: - Runs metrics receiver with SQLite storage - Spawns CPU and memory stress workloads using stress-ng - Uses shared PID namespace (pid: service:cpu-stress) for proper isolation - Collector gathers metrics and pushes summary on shutdown Known issue: Container CPU summary may show overflow values on first sample due to delta calculation - to be fixed in accumulator. Co-Authored-By: Claude Opus 4.5 --- test/docker/docker-compose-stress.yaml | 125 +++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 test/docker/docker-compose-stress.yaml diff --git a/test/docker/docker-compose-stress.yaml b/test/docker/docker-compose-stress.yaml new file mode 100644 index 0000000..89ce7e6 --- /dev/null +++ b/test/docker/docker-compose-stress.yaml @@ -0,0 +1,125 @@ +# Docker Compose stress test with receiver +# Run with: docker compose -f test/docker/docker-compose-stress.yaml up +# +# This test: +# 1. Starts the metrics receiver +# 2. Runs heavy CPU/memory workloads in multiple containers with shared PID namespace +# 3. Collector gathers metrics and pushes summary to receiver on shutdown +# +# To trigger the push, stop the collector gracefully: +# docker compose -f test/docker/docker-compose-stress.yaml stop collector + +services: + # Metrics receiver - stores summaries in SQLite + receiver: + build: + context: ../.. + dockerfile: Dockerfile + target: receiver + ports: + - "9080:8080" + environment: + - DB_PATH=/data/metrics.db + volumes: + - receiver-data:/data + healthcheck: + test: ["CMD", "wget", "-q", "--spider", "http://localhost:8080/health"] + interval: 5s + timeout: 3s + retries: 3 + + # Heavy CPU workload - uses stress-ng (owns the PID namespace) + cpu-stress: + image: alexeiled/stress-ng:latest + command: + - --cpu + - "2" + - --timeout + - "300s" + - --metrics-brief + deploy: + resources: + limits: + cpus: "1.0" + memory: 128M + # This container owns the PID namespace + + # Memory-intensive workload - shares PID namespace with cpu-stress + mem-stress: + image: alexeiled/stress-ng:latest + command: + - --vm + - "2" + - --vm-bytes + - "64M" + - --timeout + - "300s" + - --metrics-brief + deploy: + resources: + limits: + cpus: "0.5" + memory: 256M + pid: "service:cpu-stress" + depends_on: + - cpu-stress + + # IO workload - continuous disk writes + io-stress: + image: busybox:latest + command: + - /bin/sh + - -c + - | + echo "IO stress started" + # 'dd' will be our identifiable process + while true; do + dd if=/dev/zero of=/tmp/testfile bs=1M count=100 2>/dev/null + rm -f /tmp/testfile + done + deploy: + resources: + limits: + cpus: "0.5" + memory: 128M + pid: "service:cpu-stress" + depends_on: + - cpu-stress + + # Resource collector - pushes to receiver on shutdown + collector: + build: + context: ../.. + dockerfile: Dockerfile + target: collector + command: + - --interval=2s + - --top=10 + - --log-format=json + - --push-endpoint=http://receiver:8080/api/v1/metrics + environment: + # Execution context for the receiver + GITHUB_REPOSITORY_OWNER: "test-org" + GITHUB_REPOSITORY: "test-org/stress-test" + GITHUB_WORKFLOW: "stress-test-workflow" + GITHUB_JOB: "heavy-workload" + GITHUB_RUN_ID: "stress-run-001" + # Cgroup configuration + # stress-ng-cpu is the worker process name for CPU stress + # stress-ng-vm is the worker process name for memory stress + CGROUP_PROCESS_MAP: '{"stress-ng-cpu":"cpu-stress","stress-ng-vm":"mem-stress","dd":"io-stress","resource-collec":"collector"}' + CGROUP_LIMITS: '{"cpu-stress":{"cpu":"1","memory":"128Mi"},"mem-stress":{"cpu":"500m","memory":"256Mi"},"io-stress":{"cpu":"500m","memory":"128Mi"},"collector":{"cpu":"200m","memory":"64Mi"}}' + deploy: + resources: + limits: + cpus: "0.2" + memory: 64M + pid: "service:cpu-stress" + depends_on: + receiver: + condition: service_healthy + cpu-stress: + condition: service_started + +volumes: + receiver-data: