Add provider worker code

Runners now get created and cleaned up in scale sets.

Signed-off-by: Gabriel Adrian Samfira <gsamfira@cloudbasesolutions.com>
This commit is contained in:
Gabriel Adrian Samfira 2025-04-24 23:29:40 +00:00
parent 436fd7746f
commit 004ad1f124
23 changed files with 837 additions and 147 deletions

View file

@ -7,7 +7,6 @@ import (
"fmt"
"html/template"
"log/slog"
"strings"
"github.com/pkg/errors"
@ -57,24 +56,51 @@ func (r *Runner) GetRunnerServiceName(ctx context.Context) (string, error) {
ctx, "failed to get instance params")
return "", runnerErrors.ErrUnauthorized
}
var entity params.GithubEntity
pool, err := r.store.GetPoolByID(r.ctx, instance.PoolID)
if err != nil {
slog.With(slog.Any("error", err)).ErrorContext(
ctx, "failed to get pool",
"pool_id", instance.PoolID)
return "", errors.Wrap(err, "fetching pool")
if instance.PoolID != "" {
pool, err := r.store.GetPoolByID(r.ctx, instance.PoolID)
if err != nil {
slog.With(slog.Any("error", err)).ErrorContext(
ctx, "failed to get pool",
"pool_id", instance.PoolID)
return "", errors.Wrap(err, "fetching pool")
}
entity, err = pool.GithubEntity()
if err != nil {
slog.With(slog.Any("error", err)).ErrorContext(
ctx, "failed to get pool entity",
"pool_id", instance.PoolID)
return "", errors.Wrap(err, "fetching pool entity")
}
} else if instance.ScaleSetID != 0 {
scaleSet, err := r.store.GetScaleSetByID(r.ctx, instance.ScaleSetID)
if err != nil {
slog.With(slog.Any("error", err)).ErrorContext(
ctx, "failed to get scale set",
"scale_set_id", instance.ScaleSetID)
return "", errors.Wrap(err, "fetching scale set")
}
entity, err = scaleSet.GithubEntity()
if err != nil {
slog.With(slog.Any("error", err)).ErrorContext(
ctx, "failed to get scale set entity",
"scale_set_id", instance.ScaleSetID)
return "", errors.Wrap(err, "fetching scale set entity")
}
} else {
return "", errors.New("instance not associated with a pool or scale set")
}
tpl := "actions.runner.%s.%s"
var serviceName string
switch pool.PoolType() {
switch entity.EntityType {
case params.GithubEntityTypeEnterprise:
serviceName = fmt.Sprintf(tpl, pool.EnterpriseName, instance.Name)
serviceName = fmt.Sprintf(tpl, entity.Owner, instance.Name)
case params.GithubEntityTypeOrganization:
serviceName = fmt.Sprintf(tpl, pool.OrgName, instance.Name)
serviceName = fmt.Sprintf(tpl, entity.Owner, instance.Name)
case params.GithubEntityTypeRepository:
serviceName = fmt.Sprintf(tpl, strings.ReplaceAll(pool.RepoName, "/", "-"), instance.Name)
serviceName = fmt.Sprintf(tpl, fmt.Sprintf("%s-%s", entity.Owner, entity.Name), instance.Name)
}
return serviceName, nil
}

View file

@ -100,6 +100,7 @@ func NewEntityPoolManager(ctx context.Context, entity params.GithubEntity, insta
repo := &basePoolManager{
ctx: ctx,
consumerID: consumerID,
entity: entity,
ghcli: ghc,
controllerInfo: controllerInfo,
@ -117,6 +118,7 @@ func NewEntityPoolManager(ctx context.Context, entity params.GithubEntity, insta
type basePoolManager struct {
ctx context.Context
consumerID string
entity params.GithubEntity
ghcli common.GithubClient
controllerInfo params.ControllerInfo
@ -420,7 +422,7 @@ func (r *basePoolManager) cleanupOrphanedProviderRunners(runners []*github.Runne
continue
}
lockAcquired, err := locking.TryLock(instance.Name)
lockAcquired, err := locking.TryLock(instance.Name, r.consumerID)
if !lockAcquired || err != nil {
slog.DebugContext(
r.ctx, "failed to acquire lock for instance",
@ -499,7 +501,7 @@ func (r *basePoolManager) reapTimedOutRunners(runners []*github.Runner) error {
slog.DebugContext(
r.ctx, "attempting to lock instance",
"runner_name", instance.Name)
lockAcquired, err := locking.TryLock(instance.Name)
lockAcquired, err := locking.TryLock(instance.Name, r.consumerID)
if !lockAcquired || err != nil {
slog.DebugContext(
r.ctx, "failed to acquire lock for instance",
@ -626,7 +628,7 @@ func (r *basePoolManager) cleanupOrphanedGithubRunners(runners []*github.Runner)
poolInstanceCache[dbInstance.PoolID] = poolInstances
}
lockAcquired, err := locking.TryLock(dbInstance.Name)
lockAcquired, err := locking.TryLock(dbInstance.Name, r.consumerID)
if !lockAcquired || err != nil {
slog.DebugContext(
r.ctx, "failed to acquire lock for instance",
@ -1064,7 +1066,7 @@ func (r *basePoolManager) scaleDownOnePool(ctx context.Context, pool params.Pool
for _, instanceToDelete := range idleWorkers[:numScaleDown] {
instanceToDelete := instanceToDelete
lockAcquired, err := locking.TryLock(instanceToDelete.Name)
lockAcquired, err := locking.TryLock(instanceToDelete.Name, r.consumerID)
if !lockAcquired || err != nil {
slog.With(slog.Any("error", err)).ErrorContext(
ctx, "failed to acquire lock for instance",
@ -1217,7 +1219,7 @@ func (r *basePoolManager) retryFailedInstancesForOnePool(ctx context.Context, po
slog.DebugContext(
ctx, "attempting to retry failed instance",
"runner_name", instance.Name)
lockAcquired, err := locking.TryLock(instance.Name)
lockAcquired, err := locking.TryLock(instance.Name, r.consumerID)
if !lockAcquired || err != nil {
slog.DebugContext(
ctx, "failed to acquire lock for instance",
@ -1401,7 +1403,7 @@ func (r *basePoolManager) deletePendingInstances() error {
r.ctx, "removing instance from pool",
"runner_name", instance.Name,
"pool_id", instance.PoolID)
lockAcquired, err := locking.TryLock(instance.Name)
lockAcquired, err := locking.TryLock(instance.Name, r.consumerID)
if !lockAcquired || err != nil {
slog.InfoContext(
r.ctx, "failed to acquire lock for instance",
@ -1513,7 +1515,7 @@ func (r *basePoolManager) addPendingInstances() error {
r.ctx, "attempting to acquire lock for instance",
"runner_name", instance.Name,
"action", "create_pending")
lockAcquired, err := locking.TryLock(instance.Name)
lockAcquired, err := locking.TryLock(instance.Name, r.consumerID)
if !lockAcquired || err != nil {
slog.DebugContext(
r.ctx, "failed to acquire lock for instance",