// Copyright 2022 Cloudbase Solutions SRL // // Licensed under the Apache License, Version 2.0 (the "License"); you may // not use this file except in compliance with the License. You may obtain // a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations // under the License. package runner import ( "context" "crypto/hmac" "crypto/sha1" //nolint:golangci-lint,gosec // sha1 is used for github webhooks "crypto/sha256" "encoding/hex" "encoding/json" "fmt" "hash" "log/slog" "net/url" "os" "strings" "sync" "time" "github.com/juju/clock" "github.com/juju/retry" "github.com/pkg/errors" "golang.org/x/sync/errgroup" runnerErrors "github.com/cloudbase/garm-provider-common/errors" commonParams "github.com/cloudbase/garm-provider-common/params" "github.com/cloudbase/garm-provider-common/util" "github.com/cloudbase/garm/auth" "github.com/cloudbase/garm/config" dbCommon "github.com/cloudbase/garm/database/common" "github.com/cloudbase/garm/params" "github.com/cloudbase/garm/runner/common" "github.com/cloudbase/garm/runner/pool" "github.com/cloudbase/garm/runner/providers" "github.com/cloudbase/garm/util/github" "github.com/cloudbase/garm/util/github/scalesets" ) func NewRunner(ctx context.Context, cfg config.Config, db dbCommon.Store) (*Runner, error) { ctrlID, err := db.ControllerInfo() if err != nil { return nil, errors.Wrap(err, "fetching controller info") } providers, err := providers.LoadProvidersFromConfig(ctx, cfg, ctrlID.ControllerID.String()) if err != nil { return nil, errors.Wrap(err, "loading providers") } creds := map[string]config.Github{} for _, ghcreds := range cfg.Github { creds[ghcreds.Name] = ghcreds } poolManagerCtrl := &poolManagerCtrl{ config: cfg, store: db, repositories: map[string]common.PoolManager{}, organizations: map[string]common.PoolManager{}, enterprises: map[string]common.PoolManager{}, } runner := &Runner{ ctx: ctx, config: cfg, store: db, poolManagerCtrl: poolManagerCtrl, providers: providers, } if err := runner.loadReposOrgsAndEnterprises(); err != nil { return nil, errors.Wrap(err, "loading pool managers") } return runner, nil } type poolManagerCtrl struct { mux sync.Mutex config config.Config store dbCommon.Store repositories map[string]common.PoolManager organizations map[string]common.PoolManager enterprises map[string]common.PoolManager } func (p *poolManagerCtrl) CreateRepoPoolManager(ctx context.Context, repo params.Repository, providers map[string]common.Provider, store dbCommon.Store) (common.PoolManager, error) { p.mux.Lock() defer p.mux.Unlock() entity, err := repo.GetEntity() if err != nil { return nil, errors.Wrap(err, "getting entity") } instanceTokenGetter, err := auth.NewInstanceTokenGetter(p.config.JWTAuth.Secret) if err != nil { return nil, errors.Wrap(err, "creating instance token getter") } poolManager, err := pool.NewEntityPoolManager(ctx, entity, instanceTokenGetter, providers, store) if err != nil { return nil, errors.Wrap(err, "creating repo pool manager") } p.repositories[repo.ID] = poolManager return poolManager, nil } func (p *poolManagerCtrl) GetRepoPoolManager(repo params.Repository) (common.PoolManager, error) { if repoPoolMgr, ok := p.repositories[repo.ID]; ok { return repoPoolMgr, nil } return nil, errors.Wrapf(runnerErrors.ErrNotFound, "repository %s/%s pool manager not loaded", repo.Owner, repo.Name) } func (p *poolManagerCtrl) DeleteRepoPoolManager(repo params.Repository) error { p.mux.Lock() defer p.mux.Unlock() poolMgr, ok := p.repositories[repo.ID] if ok { if err := poolMgr.Stop(); err != nil { return errors.Wrap(err, "stopping repo pool manager") } delete(p.repositories, repo.ID) } return nil } func (p *poolManagerCtrl) GetRepoPoolManagers() (map[string]common.PoolManager, error) { return p.repositories, nil } func (p *poolManagerCtrl) CreateOrgPoolManager(ctx context.Context, org params.Organization, providers map[string]common.Provider, store dbCommon.Store) (common.PoolManager, error) { p.mux.Lock() defer p.mux.Unlock() entity, err := org.GetEntity() if err != nil { return nil, errors.Wrap(err, "getting entity") } instanceTokenGetter, err := auth.NewInstanceTokenGetter(p.config.JWTAuth.Secret) if err != nil { return nil, errors.Wrap(err, "creating instance token getter") } poolManager, err := pool.NewEntityPoolManager(ctx, entity, instanceTokenGetter, providers, store) if err != nil { return nil, errors.Wrap(err, "creating org pool manager") } p.organizations[org.ID] = poolManager return poolManager, nil } func (p *poolManagerCtrl) GetOrgPoolManager(org params.Organization) (common.PoolManager, error) { if orgPoolMgr, ok := p.organizations[org.ID]; ok { return orgPoolMgr, nil } return nil, errors.Wrapf(runnerErrors.ErrNotFound, "organization %s pool manager not loaded", org.Name) } func (p *poolManagerCtrl) DeleteOrgPoolManager(org params.Organization) error { p.mux.Lock() defer p.mux.Unlock() poolMgr, ok := p.organizations[org.ID] if ok { if err := poolMgr.Stop(); err != nil { return errors.Wrap(err, "stopping org pool manager") } delete(p.organizations, org.ID) } return nil } func (p *poolManagerCtrl) GetOrgPoolManagers() (map[string]common.PoolManager, error) { return p.organizations, nil } func (p *poolManagerCtrl) CreateEnterprisePoolManager(ctx context.Context, enterprise params.Enterprise, providers map[string]common.Provider, store dbCommon.Store) (common.PoolManager, error) { p.mux.Lock() defer p.mux.Unlock() entity, err := enterprise.GetEntity() if err != nil { return nil, errors.Wrap(err, "getting entity") } instanceTokenGetter, err := auth.NewInstanceTokenGetter(p.config.JWTAuth.Secret) if err != nil { return nil, errors.Wrap(err, "creating instance token getter") } poolManager, err := pool.NewEntityPoolManager(ctx, entity, instanceTokenGetter, providers, store) if err != nil { return nil, errors.Wrap(err, "creating enterprise pool manager") } p.enterprises[enterprise.ID] = poolManager return poolManager, nil } func (p *poolManagerCtrl) GetEnterprisePoolManager(enterprise params.Enterprise) (common.PoolManager, error) { if enterprisePoolMgr, ok := p.enterprises[enterprise.ID]; ok { return enterprisePoolMgr, nil } return nil, errors.Wrapf(runnerErrors.ErrNotFound, "enterprise %s pool manager not loaded", enterprise.Name) } func (p *poolManagerCtrl) DeleteEnterprisePoolManager(enterprise params.Enterprise) error { p.mux.Lock() defer p.mux.Unlock() poolMgr, ok := p.enterprises[enterprise.ID] if ok { if err := poolMgr.Stop(); err != nil { return errors.Wrap(err, "stopping enterprise pool manager") } delete(p.enterprises, enterprise.ID) } return nil } func (p *poolManagerCtrl) GetEnterprisePoolManagers() (map[string]common.PoolManager, error) { return p.enterprises, nil } type Runner struct { mux sync.Mutex config config.Config ctx context.Context store dbCommon.Store poolManagerCtrl PoolManagerController providers map[string]common.Provider } // UpdateController will update the controller settings. func (r *Runner) UpdateController(ctx context.Context, param params.UpdateControllerParams) (params.ControllerInfo, error) { if !auth.IsAdmin(ctx) { return params.ControllerInfo{}, runnerErrors.ErrUnauthorized } if err := param.Validate(); err != nil { return params.ControllerInfo{}, errors.Wrap(err, "validating controller update params") } info, err := r.store.UpdateController(param) if err != nil { return params.ControllerInfo{}, errors.Wrap(err, "updating controller info") } return info, nil } // GetControllerInfo returns the controller id and the hostname. // This data might be used in metrics and logging. func (r *Runner) GetControllerInfo(ctx context.Context) (params.ControllerInfo, error) { if !auth.IsAdmin(ctx) { return params.ControllerInfo{}, runnerErrors.ErrUnauthorized } // It is unlikely that fetching the hostname will encounter an error on a standard // linux (or Windows) system, but if os.Hostname() can fail, we need to at least retry // a few times before giving up. // This retries 10 times within one second. While it has the potential to give us a // one second delay before returning either the hostname or an error, I expect this // to succeed on the first try. // As a side note, Windows requires a reboot for the hostname change to take effect, // so if we'll ever support Windows as a target system, the hostname can be cached. var hostname string err := retry.Call(retry.CallArgs{ Func: func() error { var err error hostname, err = os.Hostname() if err != nil { return errors.Wrap(err, "fetching hostname") } return nil }, Attempts: 10, Delay: 100 * time.Millisecond, Clock: clock.WallClock, }) if err != nil { return params.ControllerInfo{}, errors.Wrap(err, "fetching hostname") } info, err := r.store.ControllerInfo() if err != nil { return params.ControllerInfo{}, errors.Wrap(err, "fetching controller info") } // This is temporary. Right now, GARM is a single-instance deployment. When we add the // ability to scale out, the hostname field will be moved form here to a dedicated node // object. As a single controller will be made up of multiple nodes, we will need to model // that aspect of GARM. info.Hostname = hostname return info, nil } func (r *Runner) ListProviders(ctx context.Context) ([]params.Provider, error) { if !auth.IsAdmin(ctx) { return nil, runnerErrors.ErrUnauthorized } ret := []params.Provider{} for _, val := range r.providers { ret = append(ret, val.AsParams()) } return ret, nil } func (r *Runner) loadReposOrgsAndEnterprises() error { r.mux.Lock() defer r.mux.Unlock() repos, err := r.store.ListRepositories(r.ctx, params.RepositoryFilter{}) if err != nil { return errors.Wrap(err, "fetching repositories") } orgs, err := r.store.ListOrganizations(r.ctx, params.OrganizationFilter{}) if err != nil { return errors.Wrap(err, "fetching organizations") } enterprises, err := r.store.ListEnterprises(r.ctx, params.EnterpriseFilter{}) if err != nil { return errors.Wrap(err, "fetching enterprises") } g, _ := errgroup.WithContext(r.ctx) for _, repo := range repos { repo := repo g.Go(func() error { slog.InfoContext( r.ctx, "creating pool manager for repo", "repo_owner", repo.Owner, "repo_name", repo.Name) _, err := r.poolManagerCtrl.CreateRepoPoolManager(r.ctx, repo, r.providers, r.store) return err }) } for _, org := range orgs { org := org g.Go(func() error { slog.InfoContext(r.ctx, "creating pool manager for organization", "org_name", org.Name) _, err := r.poolManagerCtrl.CreateOrgPoolManager(r.ctx, org, r.providers, r.store) return err }) } for _, enterprise := range enterprises { enterprise := enterprise g.Go(func() error { slog.InfoContext(r.ctx, "creating pool manager for enterprise", "enterprise_name", enterprise.Name) _, err := r.poolManagerCtrl.CreateEnterprisePoolManager(r.ctx, enterprise, r.providers, r.store) return err }) } if err := r.waitForErrorGroupOrTimeout(g); err != nil { return fmt.Errorf("failed to create pool managers: %w", err) } return nil } func (r *Runner) Start() error { r.mux.Lock() defer r.mux.Unlock() repositories, err := r.poolManagerCtrl.GetRepoPoolManagers() if err != nil { return errors.Wrap(err, "fetch repo pool managers") } organizations, err := r.poolManagerCtrl.GetOrgPoolManagers() if err != nil { return errors.Wrap(err, "fetch org pool managers") } enterprises, err := r.poolManagerCtrl.GetEnterprisePoolManagers() if err != nil { return errors.Wrap(err, "fetch enterprise pool managers") } g, _ := errgroup.WithContext(r.ctx) for _, repo := range repositories { repo := repo g.Go(func() error { return repo.Start() }) } for _, org := range organizations { org := org g.Go(func() error { return org.Start() }) } for _, enterprise := range enterprises { enterprise := enterprise g.Go(func() error { return enterprise.Start() }) } if err := r.waitForErrorGroupOrTimeout(g); err != nil { return fmt.Errorf("failed to start pool managers: %w", err) } return nil } func (r *Runner) waitForErrorGroupOrTimeout(g *errgroup.Group) error { if g == nil { return nil } done := make(chan error, 1) go func() { done <- g.Wait() }() timer := time.NewTimer(60 * time.Second) defer timer.Stop() select { case err := <-done: return err case <-timer.C: return fmt.Errorf("timed out waiting for pool manager start") } } func (r *Runner) Stop() error { r.mux.Lock() defer r.mux.Unlock() repos, err := r.poolManagerCtrl.GetRepoPoolManagers() if err != nil { return errors.Wrap(err, "fetch repo pool managers") } orgs, err := r.poolManagerCtrl.GetOrgPoolManagers() if err != nil { return errors.Wrap(err, "fetch org pool managers") } enterprises, err := r.poolManagerCtrl.GetEnterprisePoolManagers() if err != nil { return errors.Wrap(err, "fetch enterprise pool managers") } g, _ := errgroup.WithContext(r.ctx) for _, repo := range repos { poolMgr := repo g.Go(func() error { err := poolMgr.Stop() if err != nil { return fmt.Errorf("failed to stop repo pool manager: %w", err) } return poolMgr.Wait() }) } for _, org := range orgs { poolMgr := org g.Go(func() error { err := poolMgr.Stop() if err != nil { return fmt.Errorf("failed to stop org pool manager: %w", err) } return poolMgr.Wait() }) } for _, enterprise := range enterprises { poolMgr := enterprise g.Go(func() error { err := poolMgr.Stop() if err != nil { return fmt.Errorf("failed to stop enterprise pool manager: %w", err) } return poolMgr.Wait() }) } if err := r.waitForErrorGroupOrTimeout(g); err != nil { return fmt.Errorf("failed to stop pool managers: %w", err) } return nil } func (r *Runner) Wait() error { r.mux.Lock() defer r.mux.Unlock() var wg sync.WaitGroup repos, err := r.poolManagerCtrl.GetRepoPoolManagers() if err != nil { return errors.Wrap(err, "fetch repo pool managers") } orgs, err := r.poolManagerCtrl.GetOrgPoolManagers() if err != nil { return errors.Wrap(err, "fetch org pool managers") } enterprises, err := r.poolManagerCtrl.GetEnterprisePoolManagers() if err != nil { return errors.Wrap(err, "fetch enterprise pool managers") } for poolID, repo := range repos { wg.Add(1) go func(id string, poolMgr common.PoolManager) { defer wg.Done() if err := poolMgr.Wait(); err != nil { slog.With(slog.Any("error", err)).ErrorContext(r.ctx, "timed out waiting for pool manager to exit", "pool_id", id, "pool_mgr_id", poolMgr.ID()) } }(poolID, repo) } for poolID, org := range orgs { wg.Add(1) go func(id string, poolMgr common.PoolManager) { defer wg.Done() if err := poolMgr.Wait(); err != nil { slog.With(slog.Any("error", err)).ErrorContext(r.ctx, "timed out waiting for pool manager to exit", "pool_id", id) } }(poolID, org) } for poolID, enterprise := range enterprises { wg.Add(1) go func(id string, poolMgr common.PoolManager) { defer wg.Done() if err := poolMgr.Wait(); err != nil { slog.With(slog.Any("error", err)).ErrorContext(r.ctx, "timed out waiting for pool manager to exit", "pool_id", id) } }(poolID, enterprise) } wg.Wait() return nil } func (r *Runner) validateHookBody(signature, secret string, body []byte) error { if secret == "" { return runnerErrors.NewMissingSecretError("missing secret to validate webhook signature") } if signature == "" { // A secret was set in our config, but a signature was not received // from Github. Authentication of the body cannot be done. return runnerErrors.NewUnauthorizedError("missing github signature") } sigParts := strings.SplitN(signature, "=", 2) if len(sigParts) != 2 { // We expect the signature from github to be of the format: // hashType=hashValue // ie: sha256=1fc917c7ad66487470e466c0ad40ddd45b9f7730a4b43e1b2542627f0596bbdc return runnerErrors.NewBadRequestError("invalid signature format") } var hashFunc func() hash.Hash switch sigParts[0] { case "sha256": hashFunc = sha256.New case "sha1": hashFunc = sha1.New default: return runnerErrors.NewBadRequestError("unknown signature type") } mac := hmac.New(hashFunc, []byte(secret)) _, err := mac.Write(body) if err != nil { return errors.Wrap(err, "failed to compute sha256") } expectedMAC := hex.EncodeToString(mac.Sum(nil)) if !hmac.Equal([]byte(sigParts[1]), []byte(expectedMAC)) { return runnerErrors.NewUnauthorizedError("signature missmatch") } return nil } func (r *Runner) findEndpointForJob(job params.WorkflowJob, forgeType params.EndpointType) (params.ForgeEndpoint, error) { uri, err := url.ParseRequestURI(job.WorkflowJob.HTMLURL) if err != nil { return params.ForgeEndpoint{}, errors.Wrap(err, "parsing job URL") } baseURI := fmt.Sprintf("%s://%s", uri.Scheme, uri.Host) // Note(gabriel-samfira): Endpoints should be cached. We don't expect to have a large number // of endpoints. In most cases there will be just one (github.com). In cases where there is // a GHES involved, those users will have just one extra endpoint or 2 (if they also have a // test env). But there should be a relatively small number, regardless. So we don't really care // that much about the performance of this function. var endpoints []params.ForgeEndpoint switch forgeType { case params.GithubEndpointType: endpoints, err = r.store.ListGithubEndpoints(r.ctx) case params.GiteaEndpointType: endpoints, err = r.store.ListGiteaEndpoints(r.ctx) default: return params.ForgeEndpoint{}, runnerErrors.NewBadRequestError("unknown forge type %s", forgeType) } if err != nil { return params.ForgeEndpoint{}, errors.Wrap(err, "fetching github endpoints") } for _, ep := range endpoints { slog.DebugContext(r.ctx, "checking endpoint", "base_uri", baseURI, "endpoint", ep.BaseURL) epBaseURI := strings.TrimSuffix(ep.BaseURL, "/") if epBaseURI == baseURI { return ep, nil } } return params.ForgeEndpoint{}, runnerErrors.NewNotFoundError("no endpoint found for job") } func (r *Runner) DispatchWorkflowJob(hookTargetType, signature string, forgeType params.EndpointType, jobData []byte) error { if len(jobData) == 0 { slog.ErrorContext(r.ctx, "missing job data") return runnerErrors.NewBadRequestError("missing job data") } var job params.WorkflowJob if err := json.Unmarshal(jobData, &job); err != nil { slog.ErrorContext(r.ctx, "failed to unmarshal job data", "error", err) return errors.Wrapf(runnerErrors.ErrBadRequest, "invalid job data: %s", err) } endpoint, err := r.findEndpointForJob(job, forgeType) if err != nil { slog.ErrorContext(r.ctx, "failed to find endpoint for job", "error", err) return errors.Wrap(err, "finding endpoint for job") } var poolManager common.PoolManager switch HookTargetType(hookTargetType) { case RepoHook: slog.DebugContext( r.ctx, "got hook for repo", "repo_owner", util.SanitizeLogEntry(job.Repository.Owner.Login), "repo_name", util.SanitizeLogEntry(job.Repository.Name), "endpoint", endpoint.Name) poolManager, err = r.findRepoPoolManager(job.Repository.Owner.Login, job.Repository.Name, endpoint.Name) case OrganizationHook: slog.DebugContext( r.ctx, "got hook for organization", "organization", util.SanitizeLogEntry(job.GetOrgName(forgeType)), "endpoint", endpoint.Name) poolManager, err = r.findOrgPoolManager(job.GetOrgName(forgeType), endpoint.Name) case EnterpriseHook: slog.DebugContext( r.ctx, "got hook for enterprise", "enterprise", util.SanitizeLogEntry(job.Enterprise.Slug), "endpoint", endpoint.Name) poolManager, err = r.findEnterprisePoolManager(job.Enterprise.Slug, endpoint.Name) default: return runnerErrors.NewBadRequestError("cannot handle hook target type %s", hookTargetType) } slog.DebugContext(r.ctx, "found pool manager", "pool_manager", poolManager.ID()) if err != nil { slog.ErrorContext(r.ctx, "failed to find pool manager", "error", err, "hook_target_type", hookTargetType) // We don't have a repository or organization configured that // can handle this workflow job. return errors.Wrap(err, "fetching poolManager") } // We found a pool. Validate the webhook job. If a secret is configured, // we make sure that the source of this workflow job is valid. secret := poolManager.WebhookSecret() if err := r.validateHookBody(signature, secret, jobData); err != nil { slog.ErrorContext(r.ctx, "failed to validate webhook data", "error", err) return errors.Wrap(err, "validating webhook data") } if err := poolManager.HandleWorkflowJob(job); err != nil { slog.ErrorContext(r.ctx, "failed to handle workflow job", "error", err) return errors.Wrap(err, "handling workflow job") } return nil } func (r *Runner) appendTagsToCreatePoolParams(param params.CreatePoolParams) (params.CreatePoolParams, error) { if err := param.Validate(); err != nil { return params.CreatePoolParams{}, fmt.Errorf("failed to validate params (%q): %w", err, runnerErrors.ErrBadRequest) // errors.Wrapf(runnerErrors.ErrBadRequest, "validating params: %s", err) } if !IsSupportedOSType(param.OSType) { return params.CreatePoolParams{}, runnerErrors.NewBadRequestError("invalid OS type %s", param.OSType) } if !IsSupportedArch(param.OSArch) { return params.CreatePoolParams{}, runnerErrors.NewBadRequestError("invalid OS architecture %s", param.OSArch) } _, ok := r.providers[param.ProviderName] if !ok { return params.CreatePoolParams{}, runnerErrors.NewBadRequestError("no such provider %s", param.ProviderName) } return param, nil } func (r *Runner) GetInstance(ctx context.Context, instanceName string) (params.Instance, error) { if !auth.IsAdmin(ctx) { return params.Instance{}, runnerErrors.ErrUnauthorized } instance, err := r.store.GetInstanceByName(ctx, instanceName) if err != nil { return params.Instance{}, errors.Wrap(err, "fetching instance") } return instance, nil } func (r *Runner) ListAllInstances(ctx context.Context) ([]params.Instance, error) { if !auth.IsAdmin(ctx) { return nil, runnerErrors.ErrUnauthorized } instances, err := r.store.ListAllInstances(ctx) if err != nil { return nil, errors.Wrap(err, "fetching instances") } return instances, nil } func (r *Runner) AddInstanceStatusMessage(ctx context.Context, param params.InstanceUpdateMessage) error { instanceName := auth.InstanceName(ctx) if instanceName == "" { return runnerErrors.ErrUnauthorized } if err := r.store.AddInstanceEvent(ctx, instanceName, params.StatusEvent, params.EventInfo, param.Message); err != nil { return errors.Wrap(err, "adding status update") } updateParams := params.UpdateInstanceParams{ RunnerStatus: param.Status, } if param.AgentID != nil { updateParams.AgentID = *param.AgentID } if _, err := r.store.UpdateInstance(r.ctx, instanceName, updateParams); err != nil { return errors.Wrap(err, "updating runner agent ID") } return nil } func (r *Runner) UpdateSystemInfo(ctx context.Context, param params.UpdateSystemInfoParams) error { instanceName := auth.InstanceName(ctx) if instanceName == "" { slog.ErrorContext(ctx, "missing instance name") return runnerErrors.ErrUnauthorized } if param.OSName == "" && param.OSVersion == "" && param.AgentID == nil { // Nothing to update return nil } updateParams := params.UpdateInstanceParams{ OSName: param.OSName, OSVersion: param.OSVersion, } if param.AgentID != nil { updateParams.AgentID = *param.AgentID } if _, err := r.store.UpdateInstance(r.ctx, instanceName, updateParams); err != nil { return errors.Wrap(err, "updating runner system info") } return nil } func (r *Runner) getPoolManagerFromInstance(ctx context.Context, instance params.Instance) (common.PoolManager, error) { pool, err := r.store.GetPoolByID(ctx, instance.PoolID) if err != nil { return nil, errors.Wrap(err, "fetching pool") } var poolMgr common.PoolManager switch { case pool.RepoID != "": repo, err := r.store.GetRepositoryByID(ctx, pool.RepoID) if err != nil { return nil, errors.Wrap(err, "fetching repo") } poolMgr, err = r.findRepoPoolManager(repo.Owner, repo.Name, repo.Endpoint.Name) if err != nil { return nil, errors.Wrapf(err, "fetching pool manager for repo %s", pool.RepoName) } case pool.OrgID != "": org, err := r.store.GetOrganizationByID(ctx, pool.OrgID) if err != nil { return nil, errors.Wrap(err, "fetching org") } poolMgr, err = r.findOrgPoolManager(org.Name, org.Endpoint.Name) if err != nil { return nil, errors.Wrapf(err, "fetching pool manager for org %s", pool.OrgName) } case pool.EnterpriseID != "": enterprise, err := r.store.GetEnterpriseByID(ctx, pool.EnterpriseID) if err != nil { return nil, errors.Wrap(err, "fetching enterprise") } poolMgr, err = r.findEnterprisePoolManager(enterprise.Name, enterprise.Endpoint.Name) if err != nil { return nil, errors.Wrapf(err, "fetching pool manager for enterprise %s", pool.EnterpriseName) } } return poolMgr, nil } // DeleteRunner removes a runner from a pool. If forceDelete is true, GARM will ignore any provider errors // that may occur, and attempt to remove the runner from GitHub and then the database, regardless of provider // errors. func (r *Runner) DeleteRunner(ctx context.Context, instanceName string, forceDelete, bypassGithubUnauthorized bool) error { if !auth.IsAdmin(ctx) { return runnerErrors.ErrUnauthorized } instance, err := r.store.GetInstanceByName(ctx, instanceName) if err != nil { return errors.Wrap(err, "fetching instance") } switch instance.Status { case commonParams.InstanceRunning, commonParams.InstanceError, commonParams.InstancePendingForceDelete, commonParams.InstancePendingDelete: default: validStates := []string{ string(commonParams.InstanceRunning), string(commonParams.InstanceError), string(commonParams.InstancePendingForceDelete), string(commonParams.InstancePendingDelete), } return runnerErrors.NewBadRequestError("runner must be in one of the following states: %q", strings.Join(validStates, ", ")) } ghCli, ssCli, err := r.getGHCliFromInstance(ctx, instance) if err != nil { return errors.Wrap(err, "fetching github client") } if instance.AgentID != 0 { switch { case instance.ScaleSetID != 0: err = ssCli.RemoveRunner(ctx, instance.AgentID) case instance.PoolID != "": err = ghCli.RemoveEntityRunner(ctx, instance.AgentID) default: return errors.New("instance does not have a pool or scale set") } if err != nil { if !errors.Is(err, runnerErrors.ErrNotFound) { if errors.Is(err, runnerErrors.ErrUnauthorized) && instance.PoolID != "" { poolMgr, err := r.getPoolManagerFromInstance(ctx, instance) if err != nil { return errors.Wrap(err, "fetching pool manager for instance") } poolMgr.SetPoolRunningState(false, fmt.Sprintf("failed to remove runner: %q", err)) } if !bypassGithubUnauthorized { return errors.Wrap(err, "removing runner from github") } } } } instanceStatus := commonParams.InstancePendingDelete if forceDelete { instanceStatus = commonParams.InstancePendingForceDelete } slog.InfoContext( r.ctx, "setting instance status", "runner_name", instance.Name, "status", instanceStatus) updateParams := params.UpdateInstanceParams{ Status: instanceStatus, } _, err = r.store.UpdateInstance(r.ctx, instance.Name, updateParams) if err != nil { return errors.Wrap(err, "updating runner state") } return nil } func (r *Runner) getGHCliFromInstance(ctx context.Context, instance params.Instance) (common.GithubClient, *scalesets.ScaleSetClient, error) { // nolint:golangci-lint,godox // TODO(gabriel-samfira): We can probably cache the entity. var entityGetter params.EntityGetter var err error switch { case instance.PoolID != "": entityGetter, err = r.store.GetPoolByID(ctx, instance.PoolID) if err != nil { return nil, nil, errors.Wrap(err, "fetching pool") } case instance.ScaleSetID != 0: entityGetter, err = r.store.GetScaleSetByID(ctx, instance.ScaleSetID) if err != nil { return nil, nil, errors.Wrap(err, "fetching scale set") } default: return nil, nil, errors.New("instance does not have a pool or scale set") } entity, err := entityGetter.GetEntity() if err != nil { return nil, nil, errors.Wrap(err, "fetching entity") } // Fetching the entity from the database will populate all fields, including credentials. entity, err = r.store.GetForgeEntity(ctx, entity.EntityType, entity.ID) if err != nil { return nil, nil, errors.Wrap(err, "fetching entity") } ghCli, err := github.Client(ctx, entity) if err != nil { return nil, nil, errors.Wrap(err, "creating github client") } scaleSetCli, err := scalesets.NewClient(ghCli) if err != nil { return nil, nil, errors.Wrap(err, "creating scaleset client") } return ghCli, scaleSetCli, nil }