From 43d2fd8c2de55d474409f144d4b7ce771508f22e Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Mon, 6 Feb 2023 15:29:01 +0200 Subject: [PATCH] Add grace period to scale-down Add a grace period for idle runners of 5 minutes. A new idle runner will not be taken into consideration for scale-down unless it's older than 5 minutes. This should prevent situations where the scaleDown() routine that runs every minute will evaluate candidates for reaping and erroneously count the new one as well. The in_progress hooks that transitiones an idle runner to "active" may arive a long while after the "queued" hook has spun up a runner. Signed-off-by: Gabriel Adrian Samfira --- runner/pool/pool.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/runner/pool/pool.go b/runner/pool/pool.go index 26553ffa..b0ffc5ac 100644 --- a/runner/pool/pool.go +++ b/runner/pool/pool.go @@ -394,7 +394,9 @@ func (r *basePoolManager) acquireNewInstance(job params.WorkflowJob) error { } } - if int64(idleWorkers) >= int64(pool.MinIdleRunners) { + // Skip creating a new runner if we have at least one idle runner and the minimum is already satisfied. + // This should work even for pools that define a MinIdleRunner of 0. + if int64(idleWorkers) > 0 && int64(idleWorkers) >= int64(pool.MinIdleRunners) { log.Printf("we have enough min_idle_runners (%d) for pool %s, skipping...", pool.MinIdleRunners, pool.ID) return nil } @@ -786,8 +788,13 @@ func (r *basePoolManager) scaleDownOnePool(pool params.Pool) { idleWorkers := []params.Instance{} for _, inst := range existingInstances { + // Idle runners that have been spawned and are still idle after 5 minutes, are take into + // consideration for scale-down. The 5 minute grace period prevents a situation where a + // "queued" workflow triggers the creation of a new idle runner, and this routine reaps + // an idle runner before they have a chance to pick up a job. if providerCommon.RunnerStatus(inst.RunnerStatus) == providerCommon.RunnerIdle && - providerCommon.InstanceStatus(inst.Status) == providerCommon.InstanceRunning { + providerCommon.InstanceStatus(inst.Status) == providerCommon.InstanceRunning && + time.Since(inst.UpdatedAt).Minutes() > 5 { idleWorkers = append(idleWorkers, inst) } }