From a91f64331e5d83ac6e72f15e34a227bec71a62c0 Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Thu, 29 Dec 2022 22:57:10 +0000 Subject: [PATCH] Limit instances to one runner token --- auth/context.go | 14 +++ .../azure/cloudconfig/install_runner.tpl | 19 +++- .../providers.d/azure/garm-external-provider | 10 +- .../openstack/cloudconfig/install_runner.tpl | 19 +++- .../openstack/garm-external-provider | 10 +- database/sql/instances.go | 4 + database/sql/models.go | 1 + database/sql/util.go | 1 + params/params.go | 1 + params/requests.go | 1 + runner/pool/pool.go | 103 ++++++++---------- runner/runner.go | 26 +++-- 12 files changed, 116 insertions(+), 93 deletions(-) diff --git a/auth/context.go b/auth/context.go index 993aea73..ba1bf9cb 100644 --- a/auth/context.go +++ b/auth/context.go @@ -37,6 +37,7 @@ const ( instancePoolTypeKey contextFlags = "scope" instanceEntityKey contextFlags = "entity" instanceRunnerStatus contextFlags = "status" + instanceTokenFetched contextFlags = "tokenFetched" ) func SetInstanceID(ctx context.Context, id string) context.Context { @@ -51,6 +52,18 @@ func InstanceID(ctx context.Context) string { return elem.(string) } +func SetInstanceTokenFetched(ctx context.Context, fetched bool) context.Context { + return context.WithValue(ctx, instanceTokenFetched, fetched) +} + +func InstanceTokenFetched(ctx context.Context) bool { + elem := ctx.Value(instanceTokenFetched) + if elem == nil { + return false + } + return elem.(bool) +} + func SetInstanceRunnerStatus(ctx context.Context, val common.RunnerStatus) context.Context { return context.WithValue(ctx, instanceRunnerStatus, val) } @@ -116,6 +129,7 @@ func PopulateInstanceContext(ctx context.Context, instance params.Instance) cont ctx = SetInstanceName(ctx, instance.Name) ctx = SetInstancePoolID(ctx, instance.PoolID) ctx = SetInstanceRunnerStatus(ctx, instance.RunnerStatus) + ctx = SetInstanceTokenFetched(ctx, instance.TokenFetched) return ctx } diff --git a/contrib/providers.d/azure/cloudconfig/install_runner.tpl b/contrib/providers.d/azure/cloudconfig/install_runner.tpl index f84b5cc3..910d8eac 100644 --- a/contrib/providers.d/azure/cloudconfig/install_runner.tpl +++ b/contrib/providers.d/azure/cloudconfig/install_runner.tpl @@ -3,19 +3,21 @@ set -e set -o pipefail +METADATA_URL="GARM_METADATA_URL" CALLBACK_URL="GARM_CALLBACK_URL" BEARER_TOKEN="GARM_CALLBACK_TOKEN" DOWNLOAD_URL="GH_DOWNLOAD_URL" DOWNLOAD_TOKEN="GH_TEMP_DOWNLOAD_TOKEN" FILENAME="GH_FILENAME" TARGET_URL="GH_TARGET_URL" -RUNNER_TOKEN="GH_RUNNER_TOKEN" RUNNER_NAME="GH_RUNNER_NAME" RUNNER_LABELS="GH_RUNNER_LABELS" TEMP_TOKEN="" -if [ ! -z "$DOWNLOAD_TOKEN" ]; then - TEMP_TOKEN="Authorization: Bearer $DOWNLOAD_TOKEN" + +if [ -z "$METADATA_URL" ];then + echo "no token is available and METADATA_URL is not set" + exit 1 fi function call() { @@ -40,6 +42,10 @@ function fail() { exit 1 } +if [ ! -z "$DOWNLOAD_TOKEN" ]; then + TEMP_TOKEN="Authorization: Bearer $DOWNLOAD_TOKEN" +fi + sendStatus "downloading tools from ${DOWNLOAD_URL}" curl --fail -L -H "${TEMP_TOKEN}" -o "/home/runner/${FILENAME}" "${DOWNLOAD_URL}" || fail "failed to download tools" @@ -53,8 +59,11 @@ sendStatus "installing dependencies" cd /home/runner/actions-runner sudo ./bin/installdependencies.sh || fail "failed to install dependencies" +sendStatus "fetching runner registration token" +GITHUB_TOKEN=$(curl --fail -s -X GET -H 'Accept: application/json' -H "Authorization: Bearer ${BEARER_TOKEN}" "${METADATA_URL}" || fail "failed to get runner registration token") + sendStatus "configuring runner" -sudo -u runner -- ./config.sh --unattended --url "${TARGET_URL}" --token "${RUNNER_TOKEN}" --name "${RUNNER_NAME}" --labels "${RUNNER_LABELS}" --ephemeral || fail "failed to configure runner" +sudo -u runner -- ./config.sh --unattended --url "${TARGET_URL}" --token "${GITHUB_TOKEN}" --name "${RUNNER_NAME}" --labels "${RUNNER_LABELS}" --ephemeral || fail "failed to configure runner" sendStatus "installing runner service" ./svc.sh install runner || fail "failed to install service" @@ -69,4 +78,4 @@ if [ $? -ne 0 ];then fi set -e -success "runner successfully installed" $AGENT_ID +success "runner successfully installed" $AGENT_ID \ No newline at end of file diff --git a/contrib/providers.d/azure/garm-external-provider b/contrib/providers.d/azure/garm-external-provider index b0cf18d8..1e3ebff9 100755 --- a/contrib/providers.d/azure/garm-external-provider +++ b/contrib/providers.d/azure/garm-external-provider @@ -126,14 +126,6 @@ function repoURL() { echo "${REPO}" } -function ghAccessToken() { - TOKEN_URL=$(runnerTokenURL) - BEARER_TOKEN=$(callbackToken) - TOKEN=$(curl --fail -s -X GET -H 'Accept: application/json' -H "Authorization: Bearer ${BEARER_TOKEN}" "${TOKEN_URL}") - checkValNotNull "${TOKEN}" "repo_url" || return $? - echo "${TOKEN}" -} - function callbackURL() { CB_URL=$(echo "$INPUT" | jq -c -r '."callback-url"') checkValNotNull "${CB_URL}" "callback-url" || return $? @@ -206,7 +198,7 @@ function getCloudConfig() { -e "s|GH_DOWNLOAD_URL|${DW_URL}|g" \ -e "s|GH_FILENAME|${DW_FILENAME}|g" \ -e "s|GH_TARGET_URL|$(repoURL)|g" \ - -e "s|GH_RUNNER_TOKEN|$(ghAccessToken)|g" \ + -e "s|GARM_METADATA_URL|$(runnerTokenURL)|g" \ -e "s|GH_RUNNER_NAME|$(instanceName)|g" \ -e "s|GH_TEMP_DOWNLOAD_TOKEN|${DW_TOKEN}|g" \ -e "s|GH_RUNNER_LABELS|${LABELS}|g" > ${TMP_SCRIPT} diff --git a/contrib/providers.d/openstack/cloudconfig/install_runner.tpl b/contrib/providers.d/openstack/cloudconfig/install_runner.tpl index f84b5cc3..910d8eac 100644 --- a/contrib/providers.d/openstack/cloudconfig/install_runner.tpl +++ b/contrib/providers.d/openstack/cloudconfig/install_runner.tpl @@ -3,19 +3,21 @@ set -e set -o pipefail +METADATA_URL="GARM_METADATA_URL" CALLBACK_URL="GARM_CALLBACK_URL" BEARER_TOKEN="GARM_CALLBACK_TOKEN" DOWNLOAD_URL="GH_DOWNLOAD_URL" DOWNLOAD_TOKEN="GH_TEMP_DOWNLOAD_TOKEN" FILENAME="GH_FILENAME" TARGET_URL="GH_TARGET_URL" -RUNNER_TOKEN="GH_RUNNER_TOKEN" RUNNER_NAME="GH_RUNNER_NAME" RUNNER_LABELS="GH_RUNNER_LABELS" TEMP_TOKEN="" -if [ ! -z "$DOWNLOAD_TOKEN" ]; then - TEMP_TOKEN="Authorization: Bearer $DOWNLOAD_TOKEN" + +if [ -z "$METADATA_URL" ];then + echo "no token is available and METADATA_URL is not set" + exit 1 fi function call() { @@ -40,6 +42,10 @@ function fail() { exit 1 } +if [ ! -z "$DOWNLOAD_TOKEN" ]; then + TEMP_TOKEN="Authorization: Bearer $DOWNLOAD_TOKEN" +fi + sendStatus "downloading tools from ${DOWNLOAD_URL}" curl --fail -L -H "${TEMP_TOKEN}" -o "/home/runner/${FILENAME}" "${DOWNLOAD_URL}" || fail "failed to download tools" @@ -53,8 +59,11 @@ sendStatus "installing dependencies" cd /home/runner/actions-runner sudo ./bin/installdependencies.sh || fail "failed to install dependencies" +sendStatus "fetching runner registration token" +GITHUB_TOKEN=$(curl --fail -s -X GET -H 'Accept: application/json' -H "Authorization: Bearer ${BEARER_TOKEN}" "${METADATA_URL}" || fail "failed to get runner registration token") + sendStatus "configuring runner" -sudo -u runner -- ./config.sh --unattended --url "${TARGET_URL}" --token "${RUNNER_TOKEN}" --name "${RUNNER_NAME}" --labels "${RUNNER_LABELS}" --ephemeral || fail "failed to configure runner" +sudo -u runner -- ./config.sh --unattended --url "${TARGET_URL}" --token "${GITHUB_TOKEN}" --name "${RUNNER_NAME}" --labels "${RUNNER_LABELS}" --ephemeral || fail "failed to configure runner" sendStatus "installing runner service" ./svc.sh install runner || fail "failed to install service" @@ -69,4 +78,4 @@ if [ $? -ne 0 ];then fi set -e -success "runner successfully installed" $AGENT_ID +success "runner successfully installed" $AGENT_ID \ No newline at end of file diff --git a/contrib/providers.d/openstack/garm-external-provider b/contrib/providers.d/openstack/garm-external-provider index 93c0a64b..910fec9d 100755 --- a/contrib/providers.d/openstack/garm-external-provider +++ b/contrib/providers.d/openstack/garm-external-provider @@ -190,14 +190,6 @@ function repoURL() { echo "${REPO}" } -function ghAccessToken() { - TOKEN_URL=$(runnerTokenURL) - BEARER_TOKEN=$(callbackToken) - TOKEN=$(curl --fail -s -X GET -H 'Accept: application/json' -H "Authorization: Bearer ${BEARER_TOKEN}" "${TOKEN_URL}") - checkValNotNull "${TOKEN}" "repo_url" || return $? - echo "${TOKEN}" -} - function callbackURL() { CB_URL=$(echo "$INPUT" | jq -c -r '."callback-url"') checkValNotNull "${CB_URL}" "callback-url" || return $? @@ -244,7 +236,7 @@ function getCloudConfig() { -e "s|GH_DOWNLOAD_URL|${DW_URL}|g" \ -e "s|GH_FILENAME|${DW_FILENAME}|g" \ -e "s|GH_TARGET_URL|$(repoURL)|g" \ - -e "s|GH_RUNNER_TOKEN|$(ghAccessToken)|g" \ + -e "s|GARM_METADATA_URL|$(runnerTokenURL)|g" \ -e "s|GH_RUNNER_NAME|$(instanceName)|g" \ -e "s|GH_TEMP_DOWNLOAD_TOKEN|${DW_TOKEN}|g" \ -e "s|GH_RUNNER_LABELS|${LABELS}|g" > ${TMP_SCRIPT} diff --git a/database/sql/instances.go b/database/sql/instances.go index 3e32e745..e813b11a 100644 --- a/database/sql/instances.go +++ b/database/sql/instances.go @@ -218,6 +218,10 @@ func (s *sqlDatabase) UpdateInstance(ctx context.Context, instanceID string, par instance.CreateAttempt = param.CreateAttempt } + if param.TokenFetched != nil { + instance.TokenFetched = *param.TokenFetched + } + instance.ProviderFault = param.ProviderFault q := s.conn.Save(&instance) diff --git a/database/sql/models.go b/database/sql/models.go index 66a6dc3d..f47045a1 100644 --- a/database/sql/models.go +++ b/database/sql/models.go @@ -144,6 +144,7 @@ type Instance struct { MetadataURL string ProviderFault []byte `gorm:"type:longblob"` CreateAttempt int + TokenFetched bool PoolID uuid.UUID Pool Pool `gorm:"foreignKey:PoolID"` diff --git a/database/sql/util.go b/database/sql/util.go index a5aa15c2..5a39d5bb 100644 --- a/database/sql/util.go +++ b/database/sql/util.go @@ -45,6 +45,7 @@ func (s *sqlDatabase) sqlToParamsInstance(instance Instance) params.Instance { StatusMessages: []params.StatusMessage{}, CreateAttempt: instance.CreateAttempt, UpdatedAt: instance.UpdatedAt, + TokenFetched: instance.TokenFetched, } if len(instance.ProviderFault) > 0 { diff --git a/params/params.go b/params/params.go index 0e4f067e..decd041f 100644 --- a/params/params.go +++ b/params/params.go @@ -94,6 +94,7 @@ type Instance struct { CallbackURL string `json:"-"` MetadataURL string `json:"-"` CreateAttempt int `json:"-"` + TokenFetched bool `json:"-"` } func (i Instance) GetName() string { diff --git a/params/requests.go b/params/requests.go index cda5c387..a383badd 100644 --- a/params/requests.go +++ b/params/requests.go @@ -173,6 +173,7 @@ type UpdateInstanceParams struct { ProviderFault []byte `json:"provider_fault,omitempty"` AgentID int64 `json:"-"` CreateAttempt int `json:"-"` + TokenFetched *bool `json:"-"` } type UpdateUserParams struct { diff --git a/runner/pool/pool.go b/runner/pool/pool.go index e5fecf67..a6d4f7c4 100644 --- a/runner/pool/pool.go +++ b/runner/pool/pool.go @@ -76,19 +76,6 @@ func controllerIDFromLabels(labels []string) string { return "" } -func poolIDFromLabels(labels []string) string { - for _, lbl := range labels { - if strings.HasPrefix(lbl, poolIDLabelprefix) { - return lbl[len(poolIDLabelprefix):] - } - } - return "" -} - -func poolIsOwnedBy(pool params.Pool, ownerID string) bool { - return pool.RepoID == ownerID || pool.OrgID == ownerID || pool.EnterpriseID == ownerID -} - func labelsFromRunner(runner *github.Runner) []string { if runner == nil || runner.Labels == nil { return []string{} @@ -104,18 +91,11 @@ func labelsFromRunner(runner *github.Runner) []string { return labels } -func (r *basePoolManager) isControllerRunner(labels []string) bool { +// isManagedRunner returns true if labels indicate the runner belongs to a pool +// this manager is responsible for. +func (r *basePoolManager) isManagedRunner(labels []string) bool { runnerControllerID := controllerIDFromLabels(labels) - if runnerControllerID != r.controllerID { - return false - } - - poolID := poolIDFromLabels(labels) - if poolID == "" { - return false - } - _, err := r.helper.GetPoolByID(poolID) - return err == nil + return runnerControllerID == r.controllerID } // cleanupOrphanedProviderRunners compares runners in github with local runners and removes @@ -133,7 +113,8 @@ func (r *basePoolManager) cleanupOrphanedProviderRunners(runners []*github.Runne runnerNames := map[string]bool{} for _, run := range runners { - if !r.isControllerRunner(labelsFromRunner(run)) { + if !r.isManagedRunner(labelsFromRunner(run)) { + log.Printf("runner %s is not managed by a pool belonging to %s", *run.Name, r.helper.String()) continue } runnerNames[*run.Name] = true @@ -171,7 +152,8 @@ func (r *basePoolManager) reapTimedOutRunners(runners []*github.Runner) error { runnerNames := map[string]bool{} for _, run := range runners { - if !r.isControllerRunner(labelsFromRunner(run)) { + if !r.isManagedRunner(labelsFromRunner(run)) { + log.Printf("runner %s is not managed by a pool belonging to %s", *run.Name, r.helper.String()) continue } runnerNames[*run.Name] = true @@ -202,7 +184,8 @@ func (r *basePoolManager) reapTimedOutRunners(runners []*github.Runner) error { // first remove the instance from github, and then from our database. func (r *basePoolManager) cleanupOrphanedGithubRunners(runners []*github.Runner) error { for _, runner := range runners { - if !r.isControllerRunner(labelsFromRunner(runner)) { + if !r.isManagedRunner(labelsFromRunner(runner)) { + log.Printf("runner %s is not managed by a pool belonging to %s", *runner.Name, r.helper.String()) continue } @@ -248,9 +231,6 @@ func (r *basePoolManager) cleanupOrphanedGithubRunners(runners []*github.Runner) if err != nil { return errors.Wrap(err, "fetching pool") } - if !poolIsOwnedBy(pool, r.ID()) { - continue - } // check if the provider still has the instance. provider, ok := r.providers[pool.ProviderName] @@ -627,26 +607,37 @@ func (r *basePoolManager) addInstanceToProvider(instance params.Instance) error } func (r *basePoolManager) getRunnerDetailsFromJob(job params.WorkflowJob) (params.RunnerInfo, error) { - if job.WorkflowJob.RunnerName != "" { - return params.RunnerInfo{ - Name: job.WorkflowJob.RunnerName, - Labels: job.WorkflowJob.Labels, - }, nil + runnerInfo := params.RunnerInfo{ + Name: job.WorkflowJob.RunnerName, + Labels: job.WorkflowJob.Labels, } - // Runner name was not set in WorkflowJob by github. We can still attempt to - // fetch the info we need, using the workflow run ID, from the API. - log.Printf("runner name not found in workflow job, attempting to fetch from API") - runnerInfo, err := r.helper.GetRunnerInfoFromWorkflow(job) - if err != nil { - if errors.Is(err, runnerErrors.ErrUnauthorized) { - failureReason := fmt.Sprintf("failed to fetch runner name from API: %q", err) - r.setPoolRunningState(false, failureReason) - log.Print(failureReason) + var err error + if job.WorkflowJob.RunnerName == "" { + // Runner name was not set in WorkflowJob by github. We can still attempt to + // fetch the info we need, using the workflow run ID, from the API. + log.Printf("runner name not found in workflow job, attempting to fetch from API") + runnerInfo, err = r.helper.GetRunnerInfoFromWorkflow(job) + if err != nil { + if errors.Is(err, runnerErrors.ErrUnauthorized) { + failureReason := fmt.Sprintf("failed to fetch runner name from API: %q", err) + r.setPoolRunningState(false, failureReason) + log.Print(failureReason) + } + return params.RunnerInfo{}, errors.Wrap(err, "fetching runner name from API") } - return params.RunnerInfo{}, errors.Wrap(err, "fetching runner name from API") } + runnerDetails, err := r.store.GetInstanceByName(context.Background(), runnerInfo.Name) + if err != nil { + log.Printf("could not find runner details for %s", runnerInfo.Name) + return params.RunnerInfo{}, errors.Wrap(err, "fetching runner details") + } + + if _, err := r.helper.GetPoolByID(runnerDetails.PoolID); err != nil { + log.Printf("runner %s (pool ID: %s) does not belong to any pool we manage: %s", runnerDetails.Name, runnerDetails.PoolID, err) + return params.RunnerInfo{}, errors.Wrap(err, "fetching pool for instance") + } return runnerInfo, nil } @@ -672,12 +663,8 @@ func (r *basePoolManager) HandleWorkflowJob(job params.WorkflowJob) error { runnerInfo, err := r.getRunnerDetailsFromJob(job) if err != nil { // Unassigned jobs will have an empty runner_name. - // There is nothing to to in this case. - log.Printf("no runner was assigned. Skipping.") - return nil - } - - if !r.isControllerRunner(runnerInfo.Labels) { + // We also need to ignore not found errors, as we may get a webhook regarding + // a workflow that is handled by a runner at a different hierarchy level. return nil } @@ -702,13 +689,17 @@ func (r *basePoolManager) HandleWorkflowJob(job params.WorkflowJob) error { // a runner set. In such cases, we attemt to fetch it from the API. runnerInfo, err := r.getRunnerDetailsFromJob(job) if err != nil { + if errors.Is(err, runnerErrors.ErrNotFound) { + // This is most likely a runner we're not managing. If we define a repo from within an org + // and also define that same org, we will get a hook from github from both the repo and the org + // regarding the same workflow. We look for the runner in the database, and make sure it exists and is + // part of a pool that this manager is responsible for. A not found error here will most likely mean + // that we are not responsible for that runner, and we should ignore it. + return nil + } return errors.Wrap(err, "determining runner name") } - if !r.isControllerRunner(runnerInfo.Labels) { - return nil - } - // update instance workload state. if err := r.setInstanceRunnerStatus(runnerInfo.Name, providerCommon.RunnerActive); err != nil { if errors.Is(err, runnerErrors.ErrNotFound) { @@ -816,8 +807,10 @@ func (r *basePoolManager) retryFailedInstancesForOnePool(pool params.Pool) { // TODO(gabriel-samfira): Incrementing CreateAttempt should be done within a transaction. // It's fairly safe to do here (for now), as there should be no other code path that updates // an instance in this state. + var tokenFetched bool = false updateParams := params.UpdateInstanceParams{ CreateAttempt: inst.CreateAttempt + 1, + TokenFetched: &tokenFetched, Status: providerCommon.InstancePendingCreate, } log.Printf("queueing previously failed instance %s for retry", inst.Name) diff --git a/runner/runner.go b/runner/runner.go index 3c5a7b70..b7db8060 100644 --- a/runner/runner.go +++ b/runner/runner.go @@ -731,6 +731,13 @@ func (r *Runner) GetInstanceGithubRegistrationToken(ctx context.Context) (string return "", runnerErrors.ErrUnauthorized } + // Check if this instance already fetched a registration token. We only allow an instance to + // fetch one token. If the instance fails to bootstrap after a token is fetched, we reset the + // token fetched field when re-queueing the instance. + if auth.InstanceTokenFetched(ctx) { + return "", runnerErrors.ErrUnauthorized + } + status := auth.InstanceRunnerStatus(ctx) if status != providerCommon.RunnerPending && status != providerCommon.RunnerInstalling { return "", runnerErrors.ErrUnauthorized @@ -746,21 +753,20 @@ func (r *Runner) GetInstanceGithubRegistrationToken(ctx context.Context) (string return "", errors.Wrap(err, "fetching pool manager for instance") } - tokenEvents, err := r.store.ListInstanceEvents(ctx, instance.ID, params.FetchTokenEvent, "") - if err != nil { - return "", errors.Wrap(err, "fetching instance events") - } - - if len(tokenEvents) > 0 { - // Token already retrieved - return "", runnerErrors.ErrUnauthorized - } - token, err := poolMgr.GithubRunnerRegistrationToken() if err != nil { return "", errors.Wrap(err, "fetching runner token") } + tokenFetched := true + updateParams := params.UpdateInstanceParams{ + TokenFetched: &tokenFetched, + } + + if _, err := r.store.UpdateInstance(r.ctx, instance.ID, updateParams); err != nil { + return "", errors.Wrap(err, "setting token_fetched for instance") + } + if err := r.store.AddInstanceEvent(ctx, instance.ID, params.FetchTokenEvent, params.EventInfo, "runner registration token was retrieved"); err != nil { return "", errors.Wrap(err, "recording event") }