Limit instances to one runner token

This commit is contained in:
Gabriel Adrian Samfira 2022-12-29 22:57:10 +00:00
parent 3a92a5be0e
commit a91f64331e
12 changed files with 116 additions and 93 deletions

View file

@ -37,6 +37,7 @@ const (
instancePoolTypeKey contextFlags = "scope"
instanceEntityKey contextFlags = "entity"
instanceRunnerStatus contextFlags = "status"
instanceTokenFetched contextFlags = "tokenFetched"
)
func SetInstanceID(ctx context.Context, id string) context.Context {
@ -51,6 +52,18 @@ func InstanceID(ctx context.Context) string {
return elem.(string)
}
func SetInstanceTokenFetched(ctx context.Context, fetched bool) context.Context {
return context.WithValue(ctx, instanceTokenFetched, fetched)
}
func InstanceTokenFetched(ctx context.Context) bool {
elem := ctx.Value(instanceTokenFetched)
if elem == nil {
return false
}
return elem.(bool)
}
func SetInstanceRunnerStatus(ctx context.Context, val common.RunnerStatus) context.Context {
return context.WithValue(ctx, instanceRunnerStatus, val)
}
@ -116,6 +129,7 @@ func PopulateInstanceContext(ctx context.Context, instance params.Instance) cont
ctx = SetInstanceName(ctx, instance.Name)
ctx = SetInstancePoolID(ctx, instance.PoolID)
ctx = SetInstanceRunnerStatus(ctx, instance.RunnerStatus)
ctx = SetInstanceTokenFetched(ctx, instance.TokenFetched)
return ctx
}

View file

@ -3,19 +3,21 @@
set -e
set -o pipefail
METADATA_URL="GARM_METADATA_URL"
CALLBACK_URL="GARM_CALLBACK_URL"
BEARER_TOKEN="GARM_CALLBACK_TOKEN"
DOWNLOAD_URL="GH_DOWNLOAD_URL"
DOWNLOAD_TOKEN="GH_TEMP_DOWNLOAD_TOKEN"
FILENAME="GH_FILENAME"
TARGET_URL="GH_TARGET_URL"
RUNNER_TOKEN="GH_RUNNER_TOKEN"
RUNNER_NAME="GH_RUNNER_NAME"
RUNNER_LABELS="GH_RUNNER_LABELS"
TEMP_TOKEN=""
if [ ! -z "$DOWNLOAD_TOKEN" ]; then
TEMP_TOKEN="Authorization: Bearer $DOWNLOAD_TOKEN"
if [ -z "$METADATA_URL" ];then
echo "no token is available and METADATA_URL is not set"
exit 1
fi
function call() {
@ -40,6 +42,10 @@ function fail() {
exit 1
}
if [ ! -z "$DOWNLOAD_TOKEN" ]; then
TEMP_TOKEN="Authorization: Bearer $DOWNLOAD_TOKEN"
fi
sendStatus "downloading tools from ${DOWNLOAD_URL}"
curl --fail -L -H "${TEMP_TOKEN}" -o "/home/runner/${FILENAME}" "${DOWNLOAD_URL}" || fail "failed to download tools"
@ -53,8 +59,11 @@ sendStatus "installing dependencies"
cd /home/runner/actions-runner
sudo ./bin/installdependencies.sh || fail "failed to install dependencies"
sendStatus "fetching runner registration token"
GITHUB_TOKEN=$(curl --fail -s -X GET -H 'Accept: application/json' -H "Authorization: Bearer ${BEARER_TOKEN}" "${METADATA_URL}" || fail "failed to get runner registration token")
sendStatus "configuring runner"
sudo -u runner -- ./config.sh --unattended --url "${TARGET_URL}" --token "${RUNNER_TOKEN}" --name "${RUNNER_NAME}" --labels "${RUNNER_LABELS}" --ephemeral || fail "failed to configure runner"
sudo -u runner -- ./config.sh --unattended --url "${TARGET_URL}" --token "${GITHUB_TOKEN}" --name "${RUNNER_NAME}" --labels "${RUNNER_LABELS}" --ephemeral || fail "failed to configure runner"
sendStatus "installing runner service"
./svc.sh install runner || fail "failed to install service"
@ -69,4 +78,4 @@ if [ $? -ne 0 ];then
fi
set -e
success "runner successfully installed" $AGENT_ID
success "runner successfully installed" $AGENT_ID

View file

@ -126,14 +126,6 @@ function repoURL() {
echo "${REPO}"
}
function ghAccessToken() {
TOKEN_URL=$(runnerTokenURL)
BEARER_TOKEN=$(callbackToken)
TOKEN=$(curl --fail -s -X GET -H 'Accept: application/json' -H "Authorization: Bearer ${BEARER_TOKEN}" "${TOKEN_URL}")
checkValNotNull "${TOKEN}" "repo_url" || return $?
echo "${TOKEN}"
}
function callbackURL() {
CB_URL=$(echo "$INPUT" | jq -c -r '."callback-url"')
checkValNotNull "${CB_URL}" "callback-url" || return $?
@ -206,7 +198,7 @@ function getCloudConfig() {
-e "s|GH_DOWNLOAD_URL|${DW_URL}|g" \
-e "s|GH_FILENAME|${DW_FILENAME}|g" \
-e "s|GH_TARGET_URL|$(repoURL)|g" \
-e "s|GH_RUNNER_TOKEN|$(ghAccessToken)|g" \
-e "s|GARM_METADATA_URL|$(runnerTokenURL)|g" \
-e "s|GH_RUNNER_NAME|$(instanceName)|g" \
-e "s|GH_TEMP_DOWNLOAD_TOKEN|${DW_TOKEN}|g" \
-e "s|GH_RUNNER_LABELS|${LABELS}|g" > ${TMP_SCRIPT}

View file

@ -3,19 +3,21 @@
set -e
set -o pipefail
METADATA_URL="GARM_METADATA_URL"
CALLBACK_URL="GARM_CALLBACK_URL"
BEARER_TOKEN="GARM_CALLBACK_TOKEN"
DOWNLOAD_URL="GH_DOWNLOAD_URL"
DOWNLOAD_TOKEN="GH_TEMP_DOWNLOAD_TOKEN"
FILENAME="GH_FILENAME"
TARGET_URL="GH_TARGET_URL"
RUNNER_TOKEN="GH_RUNNER_TOKEN"
RUNNER_NAME="GH_RUNNER_NAME"
RUNNER_LABELS="GH_RUNNER_LABELS"
TEMP_TOKEN=""
if [ ! -z "$DOWNLOAD_TOKEN" ]; then
TEMP_TOKEN="Authorization: Bearer $DOWNLOAD_TOKEN"
if [ -z "$METADATA_URL" ];then
echo "no token is available and METADATA_URL is not set"
exit 1
fi
function call() {
@ -40,6 +42,10 @@ function fail() {
exit 1
}
if [ ! -z "$DOWNLOAD_TOKEN" ]; then
TEMP_TOKEN="Authorization: Bearer $DOWNLOAD_TOKEN"
fi
sendStatus "downloading tools from ${DOWNLOAD_URL}"
curl --fail -L -H "${TEMP_TOKEN}" -o "/home/runner/${FILENAME}" "${DOWNLOAD_URL}" || fail "failed to download tools"
@ -53,8 +59,11 @@ sendStatus "installing dependencies"
cd /home/runner/actions-runner
sudo ./bin/installdependencies.sh || fail "failed to install dependencies"
sendStatus "fetching runner registration token"
GITHUB_TOKEN=$(curl --fail -s -X GET -H 'Accept: application/json' -H "Authorization: Bearer ${BEARER_TOKEN}" "${METADATA_URL}" || fail "failed to get runner registration token")
sendStatus "configuring runner"
sudo -u runner -- ./config.sh --unattended --url "${TARGET_URL}" --token "${RUNNER_TOKEN}" --name "${RUNNER_NAME}" --labels "${RUNNER_LABELS}" --ephemeral || fail "failed to configure runner"
sudo -u runner -- ./config.sh --unattended --url "${TARGET_URL}" --token "${GITHUB_TOKEN}" --name "${RUNNER_NAME}" --labels "${RUNNER_LABELS}" --ephemeral || fail "failed to configure runner"
sendStatus "installing runner service"
./svc.sh install runner || fail "failed to install service"
@ -69,4 +78,4 @@ if [ $? -ne 0 ];then
fi
set -e
success "runner successfully installed" $AGENT_ID
success "runner successfully installed" $AGENT_ID

View file

@ -190,14 +190,6 @@ function repoURL() {
echo "${REPO}"
}
function ghAccessToken() {
TOKEN_URL=$(runnerTokenURL)
BEARER_TOKEN=$(callbackToken)
TOKEN=$(curl --fail -s -X GET -H 'Accept: application/json' -H "Authorization: Bearer ${BEARER_TOKEN}" "${TOKEN_URL}")
checkValNotNull "${TOKEN}" "repo_url" || return $?
echo "${TOKEN}"
}
function callbackURL() {
CB_URL=$(echo "$INPUT" | jq -c -r '."callback-url"')
checkValNotNull "${CB_URL}" "callback-url" || return $?
@ -244,7 +236,7 @@ function getCloudConfig() {
-e "s|GH_DOWNLOAD_URL|${DW_URL}|g" \
-e "s|GH_FILENAME|${DW_FILENAME}|g" \
-e "s|GH_TARGET_URL|$(repoURL)|g" \
-e "s|GH_RUNNER_TOKEN|$(ghAccessToken)|g" \
-e "s|GARM_METADATA_URL|$(runnerTokenURL)|g" \
-e "s|GH_RUNNER_NAME|$(instanceName)|g" \
-e "s|GH_TEMP_DOWNLOAD_TOKEN|${DW_TOKEN}|g" \
-e "s|GH_RUNNER_LABELS|${LABELS}|g" > ${TMP_SCRIPT}

View file

@ -218,6 +218,10 @@ func (s *sqlDatabase) UpdateInstance(ctx context.Context, instanceID string, par
instance.CreateAttempt = param.CreateAttempt
}
if param.TokenFetched != nil {
instance.TokenFetched = *param.TokenFetched
}
instance.ProviderFault = param.ProviderFault
q := s.conn.Save(&instance)

View file

@ -144,6 +144,7 @@ type Instance struct {
MetadataURL string
ProviderFault []byte `gorm:"type:longblob"`
CreateAttempt int
TokenFetched bool
PoolID uuid.UUID
Pool Pool `gorm:"foreignKey:PoolID"`

View file

@ -45,6 +45,7 @@ func (s *sqlDatabase) sqlToParamsInstance(instance Instance) params.Instance {
StatusMessages: []params.StatusMessage{},
CreateAttempt: instance.CreateAttempt,
UpdatedAt: instance.UpdatedAt,
TokenFetched: instance.TokenFetched,
}
if len(instance.ProviderFault) > 0 {

View file

@ -94,6 +94,7 @@ type Instance struct {
CallbackURL string `json:"-"`
MetadataURL string `json:"-"`
CreateAttempt int `json:"-"`
TokenFetched bool `json:"-"`
}
func (i Instance) GetName() string {

View file

@ -173,6 +173,7 @@ type UpdateInstanceParams struct {
ProviderFault []byte `json:"provider_fault,omitempty"`
AgentID int64 `json:"-"`
CreateAttempt int `json:"-"`
TokenFetched *bool `json:"-"`
}
type UpdateUserParams struct {

View file

@ -76,19 +76,6 @@ func controllerIDFromLabels(labels []string) string {
return ""
}
func poolIDFromLabels(labels []string) string {
for _, lbl := range labels {
if strings.HasPrefix(lbl, poolIDLabelprefix) {
return lbl[len(poolIDLabelprefix):]
}
}
return ""
}
func poolIsOwnedBy(pool params.Pool, ownerID string) bool {
return pool.RepoID == ownerID || pool.OrgID == ownerID || pool.EnterpriseID == ownerID
}
func labelsFromRunner(runner *github.Runner) []string {
if runner == nil || runner.Labels == nil {
return []string{}
@ -104,18 +91,11 @@ func labelsFromRunner(runner *github.Runner) []string {
return labels
}
func (r *basePoolManager) isControllerRunner(labels []string) bool {
// isManagedRunner returns true if labels indicate the runner belongs to a pool
// this manager is responsible for.
func (r *basePoolManager) isManagedRunner(labels []string) bool {
runnerControllerID := controllerIDFromLabels(labels)
if runnerControllerID != r.controllerID {
return false
}
poolID := poolIDFromLabels(labels)
if poolID == "" {
return false
}
_, err := r.helper.GetPoolByID(poolID)
return err == nil
return runnerControllerID == r.controllerID
}
// cleanupOrphanedProviderRunners compares runners in github with local runners and removes
@ -133,7 +113,8 @@ func (r *basePoolManager) cleanupOrphanedProviderRunners(runners []*github.Runne
runnerNames := map[string]bool{}
for _, run := range runners {
if !r.isControllerRunner(labelsFromRunner(run)) {
if !r.isManagedRunner(labelsFromRunner(run)) {
log.Printf("runner %s is not managed by a pool belonging to %s", *run.Name, r.helper.String())
continue
}
runnerNames[*run.Name] = true
@ -171,7 +152,8 @@ func (r *basePoolManager) reapTimedOutRunners(runners []*github.Runner) error {
runnerNames := map[string]bool{}
for _, run := range runners {
if !r.isControllerRunner(labelsFromRunner(run)) {
if !r.isManagedRunner(labelsFromRunner(run)) {
log.Printf("runner %s is not managed by a pool belonging to %s", *run.Name, r.helper.String())
continue
}
runnerNames[*run.Name] = true
@ -202,7 +184,8 @@ func (r *basePoolManager) reapTimedOutRunners(runners []*github.Runner) error {
// first remove the instance from github, and then from our database.
func (r *basePoolManager) cleanupOrphanedGithubRunners(runners []*github.Runner) error {
for _, runner := range runners {
if !r.isControllerRunner(labelsFromRunner(runner)) {
if !r.isManagedRunner(labelsFromRunner(runner)) {
log.Printf("runner %s is not managed by a pool belonging to %s", *runner.Name, r.helper.String())
continue
}
@ -248,9 +231,6 @@ func (r *basePoolManager) cleanupOrphanedGithubRunners(runners []*github.Runner)
if err != nil {
return errors.Wrap(err, "fetching pool")
}
if !poolIsOwnedBy(pool, r.ID()) {
continue
}
// check if the provider still has the instance.
provider, ok := r.providers[pool.ProviderName]
@ -627,26 +607,37 @@ func (r *basePoolManager) addInstanceToProvider(instance params.Instance) error
}
func (r *basePoolManager) getRunnerDetailsFromJob(job params.WorkflowJob) (params.RunnerInfo, error) {
if job.WorkflowJob.RunnerName != "" {
return params.RunnerInfo{
Name: job.WorkflowJob.RunnerName,
Labels: job.WorkflowJob.Labels,
}, nil
runnerInfo := params.RunnerInfo{
Name: job.WorkflowJob.RunnerName,
Labels: job.WorkflowJob.Labels,
}
// Runner name was not set in WorkflowJob by github. We can still attempt to
// fetch the info we need, using the workflow run ID, from the API.
log.Printf("runner name not found in workflow job, attempting to fetch from API")
runnerInfo, err := r.helper.GetRunnerInfoFromWorkflow(job)
if err != nil {
if errors.Is(err, runnerErrors.ErrUnauthorized) {
failureReason := fmt.Sprintf("failed to fetch runner name from API: %q", err)
r.setPoolRunningState(false, failureReason)
log.Print(failureReason)
var err error
if job.WorkflowJob.RunnerName == "" {
// Runner name was not set in WorkflowJob by github. We can still attempt to
// fetch the info we need, using the workflow run ID, from the API.
log.Printf("runner name not found in workflow job, attempting to fetch from API")
runnerInfo, err = r.helper.GetRunnerInfoFromWorkflow(job)
if err != nil {
if errors.Is(err, runnerErrors.ErrUnauthorized) {
failureReason := fmt.Sprintf("failed to fetch runner name from API: %q", err)
r.setPoolRunningState(false, failureReason)
log.Print(failureReason)
}
return params.RunnerInfo{}, errors.Wrap(err, "fetching runner name from API")
}
return params.RunnerInfo{}, errors.Wrap(err, "fetching runner name from API")
}
runnerDetails, err := r.store.GetInstanceByName(context.Background(), runnerInfo.Name)
if err != nil {
log.Printf("could not find runner details for %s", runnerInfo.Name)
return params.RunnerInfo{}, errors.Wrap(err, "fetching runner details")
}
if _, err := r.helper.GetPoolByID(runnerDetails.PoolID); err != nil {
log.Printf("runner %s (pool ID: %s) does not belong to any pool we manage: %s", runnerDetails.Name, runnerDetails.PoolID, err)
return params.RunnerInfo{}, errors.Wrap(err, "fetching pool for instance")
}
return runnerInfo, nil
}
@ -672,12 +663,8 @@ func (r *basePoolManager) HandleWorkflowJob(job params.WorkflowJob) error {
runnerInfo, err := r.getRunnerDetailsFromJob(job)
if err != nil {
// Unassigned jobs will have an empty runner_name.
// There is nothing to to in this case.
log.Printf("no runner was assigned. Skipping.")
return nil
}
if !r.isControllerRunner(runnerInfo.Labels) {
// We also need to ignore not found errors, as we may get a webhook regarding
// a workflow that is handled by a runner at a different hierarchy level.
return nil
}
@ -702,13 +689,17 @@ func (r *basePoolManager) HandleWorkflowJob(job params.WorkflowJob) error {
// a runner set. In such cases, we attemt to fetch it from the API.
runnerInfo, err := r.getRunnerDetailsFromJob(job)
if err != nil {
if errors.Is(err, runnerErrors.ErrNotFound) {
// This is most likely a runner we're not managing. If we define a repo from within an org
// and also define that same org, we will get a hook from github from both the repo and the org
// regarding the same workflow. We look for the runner in the database, and make sure it exists and is
// part of a pool that this manager is responsible for. A not found error here will most likely mean
// that we are not responsible for that runner, and we should ignore it.
return nil
}
return errors.Wrap(err, "determining runner name")
}
if !r.isControllerRunner(runnerInfo.Labels) {
return nil
}
// update instance workload state.
if err := r.setInstanceRunnerStatus(runnerInfo.Name, providerCommon.RunnerActive); err != nil {
if errors.Is(err, runnerErrors.ErrNotFound) {
@ -816,8 +807,10 @@ func (r *basePoolManager) retryFailedInstancesForOnePool(pool params.Pool) {
// TODO(gabriel-samfira): Incrementing CreateAttempt should be done within a transaction.
// It's fairly safe to do here (for now), as there should be no other code path that updates
// an instance in this state.
var tokenFetched bool = false
updateParams := params.UpdateInstanceParams{
CreateAttempt: inst.CreateAttempt + 1,
TokenFetched: &tokenFetched,
Status: providerCommon.InstancePendingCreate,
}
log.Printf("queueing previously failed instance %s for retry", inst.Name)

View file

@ -731,6 +731,13 @@ func (r *Runner) GetInstanceGithubRegistrationToken(ctx context.Context) (string
return "", runnerErrors.ErrUnauthorized
}
// Check if this instance already fetched a registration token. We only allow an instance to
// fetch one token. If the instance fails to bootstrap after a token is fetched, we reset the
// token fetched field when re-queueing the instance.
if auth.InstanceTokenFetched(ctx) {
return "", runnerErrors.ErrUnauthorized
}
status := auth.InstanceRunnerStatus(ctx)
if status != providerCommon.RunnerPending && status != providerCommon.RunnerInstalling {
return "", runnerErrors.ErrUnauthorized
@ -746,21 +753,20 @@ func (r *Runner) GetInstanceGithubRegistrationToken(ctx context.Context) (string
return "", errors.Wrap(err, "fetching pool manager for instance")
}
tokenEvents, err := r.store.ListInstanceEvents(ctx, instance.ID, params.FetchTokenEvent, "")
if err != nil {
return "", errors.Wrap(err, "fetching instance events")
}
if len(tokenEvents) > 0 {
// Token already retrieved
return "", runnerErrors.ErrUnauthorized
}
token, err := poolMgr.GithubRunnerRegistrationToken()
if err != nil {
return "", errors.Wrap(err, "fetching runner token")
}
tokenFetched := true
updateParams := params.UpdateInstanceParams{
TokenFetched: &tokenFetched,
}
if _, err := r.store.UpdateInstance(r.ctx, instance.ID, updateParams); err != nil {
return "", errors.Wrap(err, "setting token_fetched for instance")
}
if err := r.store.AddInstanceEvent(ctx, instance.ID, params.FetchTokenEvent, params.EventInfo, "runner registration token was retrieved"); err != nil {
return "", errors.Wrap(err, "recording event")
}