Properly handle stopped runners

Signed-off-by: Gabriel Adrian Samfira <gsamfira@cloudbasesolutions.com>
This commit is contained in:
Gabriel Adrian Samfira 2023-03-27 15:02:25 +00:00
parent 6b3ea50ca5
commit 243ae75476

View file

@ -179,13 +179,13 @@ func (r *basePoolManager) reapTimedOutRunners(runners []*github.Runner) error {
return nil
}
func instanceInList(instanceName string, instances []params.Instance) bool {
func instanceInList(instanceName string, instances []params.Instance) (params.Instance, bool) {
for _, val := range instances {
if val.Name == instanceName {
return true
return val, true
}
}
return false
return params.Instance{}, false
}
// cleanupOrphanedGithubRunners will forcefully remove any github runners that appear
@ -252,6 +252,7 @@ func (r *basePoolManager) cleanupOrphanedGithubRunners(runners []*github.Runner)
var poolInstances []params.Instance
poolInstances, ok = poolInstanceCache[pool.ID]
if !ok {
log.Printf("updating instances cache for pool %s", pool.ID)
poolInstances, err = provider.ListInstances(r.ctx, pool.ID)
if err != nil {
return errors.Wrapf(err, "fetching instances for pool %s", pool.ID)
@ -259,7 +260,8 @@ func (r *basePoolManager) cleanupOrphanedGithubRunners(runners []*github.Runner)
poolInstanceCache[pool.ID] = poolInstances
}
if !instanceInList(dbInstance.Name, poolInstances) {
providerInstance, ok := instanceInList(dbInstance.Name, poolInstances)
if !ok {
// The runner instance is no longer on the provider, and it appears offline in github.
// It should be safe to force remove it.
log.Printf("Runner instance for %s is no longer on the provider, removing from github", dbInstance.Name)
@ -286,16 +288,18 @@ func (r *basePoolManager) cleanupOrphanedGithubRunners(runners []*github.Runner)
continue
}
if providerCommon.InstanceStatus(dbInstance.Status) == providerCommon.InstanceRunning {
if providerInstance.Status == providerCommon.InstanceRunning {
// instance is running, but github reports runner as offline. Log the event.
// This scenario requires manual intervention.
// Perhaps it just came online and github did not yet change it's status?
log.Printf("instance %s is online but github reports runner as offline", dbInstance.Name)
continue
}
//start the instance
if err := provider.Start(r.ctx, dbInstance.ProviderID); err != nil {
return errors.Wrapf(err, "starting instance %s", dbInstance.ProviderID)
} else {
log.Printf("instance %s was found in stopped state; starting", dbInstance.Name)
//start the instance
if err := provider.Start(r.ctx, dbInstance.ProviderID); err != nil {
return errors.Wrapf(err, "starting instance %s", dbInstance.ProviderID)
}
}
}
return nil