spring-petclinic/.github/workflows/pipeline.yml

name: Enhanced Java Application Pipeline with Metrics

on:
  push:
    branches: [ pipeline-optimization ]
  pull_request:
    branches: [ pipeline-optimization ]

jobs:
  build-with-metrics:
    runs-on: ubuntu-latest
    timeout-minutes: 60

    services:
      prometheus:
        image: prom/prometheus:latest
        ports:
          - 9090:9090
        options: >-
          --health-cmd "wget -q -O- http://localhost:9090/-/healthy || exit 1"
          --health-interval 10s
          --health-timeout 5s
          --health-retries 3

      pushgateway:
        image: prom/pushgateway:latest
        ports:
          - 9091:9091
        options: >-
          --health-cmd "wget -q -O- http://localhost:9091/-/healthy || exit 1"
          --health-interval 10s
          --health-timeout 5s
          --health-retries 3

    steps:
      - uses: actions/checkout@v4

      # Installation and setup of monitoring tools
      - name: Setup monitoring tools
        id: setup-monitoring
        timeout-minutes: 5
        run: |
          set -eo pipefail

          echo "::group::Installing system packages"
          sudo apt-get update || (echo "Failed to update package lists" && exit 1)
          sudo apt-get install -y powerstat linux-tools-common linux-tools-generic || (echo "Failed to install powerstat and linux tools" && exit 1)
          echo "::endgroup::"

          echo "::group::Setting up node exporter"
          curl -L --retry 3 https://github.com/prometheus/node_exporter/releases/download/v1.3.1/node_exporter-1.3.1.linux-amd64.tar.gz -o node_exporter.tar.gz || (echo "Failed to download node exporter" && exit 1)
          tar xvfz node_exporter.tar.gz || (echo "Failed to extract node exporter" && exit 1)
          echo "::endgroup::"

      # Start monitoring tools with improved configuration
      - name: Start monitoring
        id: start-monitoring
        timeout-minutes: 2
        run: |
          set -eo pipefail

          # Start node exporter with health check
          ./node_exporter-*/node_exporter --web.listen-address=":9100" &
          echo "NODE_EXPORTER_PID=$!" >> $GITHUB_ENV

          # Wait for node exporter to become healthy
          timeout 30s bash -c 'until curl -s http://localhost:9100/metrics > /dev/null; do sleep 1; done' || (echo "Node exporter failed to start" && exit 1)

          # Create start timestamp file
          date +%s%N > pipeline_start_time.txt

      - name: Set up JDK 17
        uses: actions/setup-java@v4
        with:
          java-version: '17'
          distribution: 'adopt'
          cache: maven

      - name: Build with Maven
        id: build
        timeout-minutes: 15
        run: |
          set -eo pipefail
          start_time=$(date +%s%N)
          ./mvnw -B verify
          build_status=$?
          end_time=$(date +%s%N)
          echo "BUILD_TIME=$((($end_time - $start_time)/1000000))" >> $GITHUB_ENV
          exit $build_status

      - name: Run tests
        id: test
        if: success() || failure()
        timeout-minutes: 20
        run: |
          set -eo pipefail
          start_time=$(date +%s%N)
          ./mvnw test
          test_status=$?
          end_time=$(date +%s%N)
          echo "TEST_TIME=$((($end_time - $start_time)/1000000))" >> $GITHUB_ENV
          exit $test_status

      - name: Build Docker image
        id: docker-build
        if: success()
        timeout-minutes: 10
        run: |
          set -eo pipefail
          start_time=$(date +%s%N)
          docker build -t app:latest . --no-cache
          build_status=$?
          end_time=$(date +%s%N)
          echo "DOCKER_BUILD_TIME=$((($end_time - $start_time)/1000000))" >> $GITHUB_ENV
          exit $build_status

      - name: Setup Kubernetes
        id: k8s-setup
        if: success()
        uses: helm/kind-action@v1
        with:
          wait: 120s

      - name: Deploy to Kubernetes
        id: deploy
        if: success()
        timeout-minutes: 10
        run: |
          set -eo pipefail
          start_time=$(date +%s%N)
          kubectl apply -f k8s/ || (echo "Failed to apply Kubernetes manifests" && exit 1)

          if ! kubectl wait --for=condition=ready pod -l app=petclinic --timeout=180s; then
            echo "::error::Deployment failed - collecting debug information"
            kubectl describe pods -l app=petclinic
            kubectl logs -l app=petclinic --all-containers=true
            exit 1
          fi

          end_time=$(date +%s%N)
          echo "DEPLOY_TIME=$((($end_time - $start_time)/1000000))" >> $GITHUB_ENV

      # Export metrics with proper function definition
      - name: Export metrics to Prometheus
        if: always()
        timeout-minutes: 5
        run: |
          set -eo pipefail

          # Define the export_metric function
          export_metric() {
            local metric_name=$1
            local metric_value=$2
            local stage=$3

            if [ -n "$metric_value" ]; then
              echo "${metric_name}{stage=\"${stage}\",project=\"petclinic\"} ${metric_value}" | \
                curl --retry 3 --retry-delay 2 --max-time 10 --silent --show-error \
                  --data-binary @- http://localhost:9091/metrics/job/petclinic-pipeline || \
                echo "::warning::Failed to export ${metric_name} for ${stage}"
            fi
          }

          # Export timing metrics
          export_metric "pipeline_build_duration_ms" "${BUILD_TIME}" "build"
          export_metric "pipeline_test_duration_ms" "${TEST_TIME}" "test"
          export_metric "pipeline_docker_build_duration_ms" "${DOCKER_BUILD_TIME}" "docker-build"
          export_metric "pipeline_deploy_duration_ms" "${DEPLOY_TIME}" "deploy"

      # Collect additional resource metrics with function definition
      - name: Collect resource metrics
        if: always()
        timeout-minutes: 2
        run: |
          set -eo pipefail

          # Define the export_metric function again since it's a new shell context
          export_metric() {
            local metric_name=$1
            local metric_value=$2
            local stage=$3

            if [ -n "$metric_value" ]; then
              echo "${metric_name}{stage=\"${stage}\",project=\"petclinic\"} ${metric_value}" | \
                curl --retry 3 --retry-delay 2 --max-time 10 --silent --show-error \
                  --data-binary @- http://localhost:9091/metrics/job/petclinic-pipeline || \
                echo "::warning::Failed to export ${metric_name} for ${stage}"
            fi
          }

          # Memory usage metric with error handling
          mem_usage=$(free -b | grep Mem: | awk '{print $3}') || echo "::warning::Failed to collect memory usage"
          if [ -n "$mem_usage" ]; then
            export_metric "pipeline_memory_usage_bytes" "$mem_usage" "memory"
          fi

          # CPU usage metric with error handling
          cpu_usage=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}') || echo "::warning::Failed to collect CPU usage"
          if [ -n "$cpu_usage" ]; then
            export_metric "pipeline_cpu_usage_percent" "$cpu_usage" "cpu"
          fi

      # Collect final metrics
      - name: Collect final metrics
        if: always()
        timeout-minutes: 5
        run: |
          set -eo pipefail

          # End timestamp
          date +%s%N > pipeline_end_time.txt

          # Stop node exporter
          if [ -n "$NODE_EXPORTER_PID" ]; then
            kill $NODE_EXPORTER_PID || echo "::warning::Failed to stop node exporter"
          fi

          # Collect system metrics with error handling
          {
            echo "=== System Resources ===" > system_metrics.txt
            top -b -n 1 >> system_metrics.txt
          } || echo "::warning::Failed to collect top metrics"

          {
            echo "=== Memory Usage ===" > memory_metrics.txt
            free -m >> memory_metrics.txt
          } || echo "::warning::Failed to collect memory metrics"

          {
            echo "=== Disk Usage ===" > disk_metrics.txt
            df -h >> disk_metrics.txt
          } || echo "::warning::Failed to collect disk metrics"

      # Save metrics as artifacts
      - name: Save metrics
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: pipeline-metrics
          path: |
            system_metrics.txt
            memory_metrics.txt
            disk_metrics.txt
            pipeline_start_time.txt
            pipeline_end_time.txt
          retention-days: 90
          if-no-files-found: warn