garm/runner/pool/locking.go
Gabriel Adrian Samfira edbaf47970 Add a backoff mechanism when deleting runners
This change adds a backoff mechanism when deleting github runners.
If the delete operation fails, we record the event and retry with
a geometric progression of 1.5 starting from 5 seconds, which is the
pool consolidation timeout.

Signed-off-by: Gabriel Adrian Samfira <gsamfira@cloudbasesolutions.com>
2025-01-28 23:00:10 +00:00

88 lines
1.8 KiB
Go

package pool
import (
"sync"
"time"
"github.com/cloudbase/garm/runner/common"
)
const (
maxBackoffSeconds float64 = 1200 // 20 minutes
)
type keyMutex struct {
muxes sync.Map
}
func (k *keyMutex) TryLock(key string) bool {
mux, _ := k.muxes.LoadOrStore(key, &sync.Mutex{})
keyMux := mux.(*sync.Mutex)
return keyMux.TryLock()
}
func (k *keyMutex) Unlock(key string, remove bool) {
mux, ok := k.muxes.Load(key)
if !ok {
return
}
keyMux := mux.(*sync.Mutex)
if remove {
k.Delete(key)
}
keyMux.Unlock()
}
func (k *keyMutex) Delete(key string) {
k.muxes.Delete(key)
}
type instanceBackOff struct {
backoffSeconds float64
lastRecordedFailureTime time.Time
mux sync.Mutex
}
type instanceDeleteBackoff struct {
muxes sync.Map
}
func (i *instanceDeleteBackoff) ShouldProcess(key string) (bool, time.Time) {
backoff, loaded := i.muxes.LoadOrStore(key, &instanceBackOff{})
if !loaded {
return true, time.Time{}
}
ib := backoff.(*instanceBackOff)
ib.mux.Lock()
defer ib.mux.Unlock()
if ib.lastRecordedFailureTime.IsZero() || ib.backoffSeconds == 0 {
return true, time.Time{}
}
now := time.Now().UTC()
deadline := ib.lastRecordedFailureTime.Add(time.Duration(ib.backoffSeconds) * time.Second)
return deadline.After(now), deadline
}
func (i *instanceDeleteBackoff) Delete(key string) {
i.muxes.Delete(key)
}
func (i *instanceDeleteBackoff) RecordFailure(key string) {
backoff, _ := i.muxes.LoadOrStore(key, &instanceBackOff{})
ib := backoff.(*instanceBackOff)
ib.mux.Lock()
defer ib.mux.Unlock()
ib.lastRecordedFailureTime = time.Now().UTC()
if ib.backoffSeconds == 0 {
ib.backoffSeconds = common.PoolConsilitationInterval.Seconds()
} else {
// Geometric progression of 1.5
newBackoff := ib.backoffSeconds * 1.5
// Cap the backoff to 20 minutes
ib.backoffSeconds = min(newBackoff, maxBackoffSeconds)
}
}