test: add stress test with receiver integration

Docker Compose setup that: - Runs metrics receiver with SQLite storage - Spawns CPU and memory stress workloads using stress-ng - Uses shared PID namespace (pid: service:cpu-stress) for proper isolation - Collector gathers metrics and pushes summary on shutdown Known issue: Container CPU summary may show overflow values on first sample due to delta calculation - to be fixed in accumulator. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-06 15:11:22 +01:00 · 2026-02-06 15:11:22 +01:00 · 5b983692c8
commit 5b983692c8
parent 6770cfcea7
1 changed files with 125 additions and 0 deletions
--- a/test/docker/docker-compose-stress.yaml
+++ b/test/docker/docker-compose-stress.yaml
@ -0,0 +1,125 @@
+# Docker Compose stress test with receiver
+# Run with: docker compose -f test/docker/docker-compose-stress.yaml up
+#
+# This test:
+# 1. Starts the metrics receiver
+# 2. Runs heavy CPU/memory workloads in multiple containers with shared PID namespace
+# 3. Collector gathers metrics and pushes summary to receiver on shutdown
+#
+# To trigger the push, stop the collector gracefully:
+#   docker compose -f test/docker/docker-compose-stress.yaml stop collector
+
+services:
+  # Metrics receiver - stores summaries in SQLite
+  receiver:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+      target: receiver
+    ports:
+      - "9080:8080"
+    environment:
+      - DB_PATH=/data/metrics.db
+    volumes:
+      - receiver-data:/data
+    healthcheck:
+      test: ["CMD", "wget", "-q", "--spider", "http://localhost:8080/health"]
+      interval: 5s
+      timeout: 3s
+      retries: 3
+
+  # Heavy CPU workload - uses stress-ng (owns the PID namespace)
+  cpu-stress:
+    image: alexeiled/stress-ng:latest
+    command:
+      - --cpu
+      - "2"
+      - --timeout
+      - "300s"
+      - --metrics-brief
+    deploy:
+      resources:
+        limits:
+          cpus: "1.0"
+          memory: 128M
+    # This container owns the PID namespace
+
+  # Memory-intensive workload - shares PID namespace with cpu-stress
+  mem-stress:
+    image: alexeiled/stress-ng:latest
+    command:
+      - --vm
+      - "2"
+      - --vm-bytes
+      - "64M"
+      - --timeout
+      - "300s"
+      - --metrics-brief
+    deploy:
+      resources:
+        limits:
+          cpus: "0.5"
+          memory: 256M
+    pid: "service:cpu-stress"
+    depends_on:
+      - cpu-stress
+
+  # IO workload - continuous disk writes
+  io-stress:
+    image: busybox:latest
+    command:
+      - /bin/sh
+      - -c
+      - |
+        echo "IO stress started"
+        # 'dd' will be our identifiable process
+        while true; do
+          dd if=/dev/zero of=/tmp/testfile bs=1M count=100 2>/dev/null
+          rm -f /tmp/testfile
+        done
+    deploy:
+      resources:
+        limits:
+          cpus: "0.5"
+          memory: 128M
+    pid: "service:cpu-stress"
+    depends_on:
+      - cpu-stress
+
+  # Resource collector - pushes to receiver on shutdown
+  collector:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+      target: collector
+    command:
+      - --interval=2s
+      - --top=10
+      - --log-format=json
+      - --push-endpoint=http://receiver:8080/api/v1/metrics
+    environment:
+      # Execution context for the receiver
+      GITHUB_REPOSITORY_OWNER: "test-org"
+      GITHUB_REPOSITORY: "test-org/stress-test"
+      GITHUB_WORKFLOW: "stress-test-workflow"
+      GITHUB_JOB: "heavy-workload"
+      GITHUB_RUN_ID: "stress-run-001"
+      # Cgroup configuration
+      # stress-ng-cpu is the worker process name for CPU stress
+      # stress-ng-vm is the worker process name for memory stress
+      CGROUP_PROCESS_MAP: '{"stress-ng-cpu":"cpu-stress","stress-ng-vm":"mem-stress","dd":"io-stress","resource-collec":"collector"}'
+      CGROUP_LIMITS: '{"cpu-stress":{"cpu":"1","memory":"128Mi"},"mem-stress":{"cpu":"500m","memory":"256Mi"},"io-stress":{"cpu":"500m","memory":"128Mi"},"collector":{"cpu":"200m","memory":"64Mi"}}'
+    deploy:
+      resources:
+        limits:
+          cpus: "0.2"
+          memory: 64M
+    pid: "service:cpu-stress"
+    depends_on:
+      receiver:
+        condition: service_healthy
+      cpu-stress:
+        condition: service_started
+
+volumes:
+  receiver-data: