garm/runner/metadata.go
Gabriel Adrian Samfira 004ad1f124 Add provider worker code
Runners now get created and cleaned up in scale sets.

Signed-off-by: Gabriel Adrian Samfira <gsamfira@cloudbasesolutions.com>
2025-05-03 22:29:40 +00:00

231 lines
6.9 KiB
Go

package runner
import (
"bytes"
"context"
"encoding/base64"
"fmt"
"html/template"
"log/slog"
"github.com/pkg/errors"
"github.com/cloudbase/garm-provider-common/defaults"
runnerErrors "github.com/cloudbase/garm-provider-common/errors"
"github.com/cloudbase/garm/auth"
"github.com/cloudbase/garm/params"
)
var systemdUnitTemplate = `[Unit]
Description=GitHub Actions Runner ({{.ServiceName}})
After=network.target
[Service]
ExecStart=/home/{{.RunAsUser}}/actions-runner/runsvc.sh
User={{.RunAsUser}}
WorkingDirectory=/home/{{.RunAsUser}}/actions-runner
KillMode=process
KillSignal=SIGTERM
TimeoutStopSec=5min
[Install]
WantedBy=multi-user.target
`
func validateInstanceState(ctx context.Context) (params.Instance, error) {
if !auth.InstanceHasJITConfig(ctx) {
return params.Instance{}, fmt.Errorf("instance not configured for JIT: %w", runnerErrors.ErrNotFound)
}
status := auth.InstanceRunnerStatus(ctx)
if status != params.RunnerPending && status != params.RunnerInstalling {
return params.Instance{}, runnerErrors.ErrUnauthorized
}
instance, err := auth.InstanceParams(ctx)
if err != nil {
return params.Instance{}, runnerErrors.ErrUnauthorized
}
return instance, nil
}
func (r *Runner) GetRunnerServiceName(ctx context.Context) (string, error) {
instance, err := validateInstanceState(ctx)
if err != nil {
slog.With(slog.Any("error", err)).ErrorContext(
ctx, "failed to get instance params")
return "", runnerErrors.ErrUnauthorized
}
var entity params.GithubEntity
if instance.PoolID != "" {
pool, err := r.store.GetPoolByID(r.ctx, instance.PoolID)
if err != nil {
slog.With(slog.Any("error", err)).ErrorContext(
ctx, "failed to get pool",
"pool_id", instance.PoolID)
return "", errors.Wrap(err, "fetching pool")
}
entity, err = pool.GithubEntity()
if err != nil {
slog.With(slog.Any("error", err)).ErrorContext(
ctx, "failed to get pool entity",
"pool_id", instance.PoolID)
return "", errors.Wrap(err, "fetching pool entity")
}
} else if instance.ScaleSetID != 0 {
scaleSet, err := r.store.GetScaleSetByID(r.ctx, instance.ScaleSetID)
if err != nil {
slog.With(slog.Any("error", err)).ErrorContext(
ctx, "failed to get scale set",
"scale_set_id", instance.ScaleSetID)
return "", errors.Wrap(err, "fetching scale set")
}
entity, err = scaleSet.GithubEntity()
if err != nil {
slog.With(slog.Any("error", err)).ErrorContext(
ctx, "failed to get scale set entity",
"scale_set_id", instance.ScaleSetID)
return "", errors.Wrap(err, "fetching scale set entity")
}
} else {
return "", errors.New("instance not associated with a pool or scale set")
}
tpl := "actions.runner.%s.%s"
var serviceName string
switch entity.EntityType {
case params.GithubEntityTypeEnterprise:
serviceName = fmt.Sprintf(tpl, entity.Owner, instance.Name)
case params.GithubEntityTypeOrganization:
serviceName = fmt.Sprintf(tpl, entity.Owner, instance.Name)
case params.GithubEntityTypeRepository:
serviceName = fmt.Sprintf(tpl, fmt.Sprintf("%s-%s", entity.Owner, entity.Name), instance.Name)
}
return serviceName, nil
}
func (r *Runner) GenerateSystemdUnitFile(ctx context.Context, runAsUser string) ([]byte, error) {
serviceName, err := r.GetRunnerServiceName(ctx)
if err != nil {
return nil, errors.Wrap(err, "fetching runner service name")
}
unitTemplate, err := template.New("").Parse(systemdUnitTemplate)
if err != nil {
return nil, errors.Wrap(err, "parsing template")
}
if runAsUser == "" {
runAsUser = defaults.DefaultUser
}
data := struct {
ServiceName string
RunAsUser string
}{
ServiceName: serviceName,
RunAsUser: runAsUser,
}
var unitFile bytes.Buffer
if err := unitTemplate.Execute(&unitFile, data); err != nil {
return nil, errors.Wrap(err, "executing template")
}
return unitFile.Bytes(), nil
}
func (r *Runner) GetJITConfigFile(ctx context.Context, file string) ([]byte, error) {
instance, err := validateInstanceState(ctx)
if err != nil {
slog.With(slog.Any("error", err)).ErrorContext(
ctx, "failed to get instance params")
return nil, runnerErrors.ErrUnauthorized
}
jitConfig := instance.JitConfiguration
contents, ok := jitConfig[file]
if !ok {
return nil, errors.Wrap(runnerErrors.ErrNotFound, "retrieving file")
}
decoded, err := base64.StdEncoding.DecodeString(contents)
if err != nil {
return nil, errors.Wrap(err, "decoding file contents")
}
return decoded, nil
}
func (r *Runner) GetInstanceGithubRegistrationToken(ctx context.Context) (string, error) {
// Check if this instance already fetched a registration token or if it was configured using
// the new Just In Time runner feature. If we're still using the old way of configuring a runner,
// we only allow an instance to fetch one token. If the instance fails to bootstrap after a token
// is fetched, we reset the token fetched field when re-queueing the instance.
if auth.InstanceTokenFetched(ctx) || auth.InstanceHasJITConfig(ctx) {
return "", runnerErrors.ErrUnauthorized
}
status := auth.InstanceRunnerStatus(ctx)
if status != params.RunnerPending && status != params.RunnerInstalling {
return "", runnerErrors.ErrUnauthorized
}
instance, err := auth.InstanceParams(ctx)
if err != nil {
slog.With(slog.Any("error", err)).ErrorContext(
ctx, "failed to get instance params")
return "", runnerErrors.ErrUnauthorized
}
poolMgr, err := r.getPoolManagerFromInstance(ctx, instance)
if err != nil {
return "", errors.Wrap(err, "fetching pool manager for instance")
}
token, err := poolMgr.GithubRunnerRegistrationToken()
if err != nil {
return "", errors.Wrap(err, "fetching runner token")
}
tokenFetched := true
updateParams := params.UpdateInstanceParams{
TokenFetched: &tokenFetched,
}
if _, err := r.store.UpdateInstance(r.ctx, instance.Name, updateParams); err != nil {
return "", errors.Wrap(err, "setting token_fetched for instance")
}
if err := r.store.AddInstanceEvent(ctx, instance.Name, params.FetchTokenEvent, params.EventInfo, "runner registration token was retrieved"); err != nil {
return "", errors.Wrap(err, "recording event")
}
return token, nil
}
func (r *Runner) GetRootCertificateBundle(ctx context.Context) (params.CertificateBundle, error) {
instance, err := auth.InstanceParams(ctx)
if err != nil {
slog.With(slog.Any("error", err)).ErrorContext(
ctx, "failed to get instance params")
return params.CertificateBundle{}, runnerErrors.ErrUnauthorized
}
poolMgr, err := r.getPoolManagerFromInstance(ctx, instance)
if err != nil {
return params.CertificateBundle{}, errors.Wrap(err, "fetching pool manager for instance")
}
bundle, err := poolMgr.RootCABundle()
if err != nil {
slog.With(slog.Any("error", err)).ErrorContext(
ctx, "failed to get root CA bundle",
"instance", instance.Name,
"pool_manager", poolMgr.ID())
// The root CA bundle is invalid. Return an empty bundle to the runner and log the event.
return params.CertificateBundle{
RootCertificates: make(map[string][]byte),
}, nil
}
return bundle, nil
}