Add JIT config as part of instance create
We must create the DB entry for a runner with a JIT config included. Adding it later via an update runs the risk of having the consolidate loop pick up the incomplete instance. Signed-off-by: Gabriel Adrian Samfira <gsamfira@cloudbasesolutions.com>
This commit is contained in:
parent
8c507a9251
commit
019948acbe
7 changed files with 44 additions and 77 deletions
|
|
@ -82,6 +82,7 @@ func (s *sqlDatabase) CreateInstance(ctx context.Context, poolID string, param p
|
|||
GitHubRunnerGroup: param.GitHubRunnerGroup,
|
||||
JitConfiguration: secret,
|
||||
AditionalLabels: labels,
|
||||
AgentID: param.AgentID,
|
||||
}
|
||||
q := s.conn.Create(&newInstance)
|
||||
if q.Error != nil {
|
||||
|
|
|
|||
|
|
@ -136,7 +136,8 @@ type CreateInstanceParams struct {
|
|||
// GithubRunnerGroup is the github runner group to which the runner belongs.
|
||||
// The runner group must be created by someone with access to the enterprise.
|
||||
GitHubRunnerGroup string
|
||||
CreateAttempt int `json:"-"`
|
||||
CreateAttempt int `json:"-"`
|
||||
AgentID int64 `json:"-"`
|
||||
AditionalLabels []string
|
||||
JitConfiguration map[string]string
|
||||
}
|
||||
|
|
|
|||
|
|
@ -104,15 +104,7 @@ func (r *enterprise) findRunnerGroupByName(ctx context.Context, name string) (*g
|
|||
return nil, errors.Wrap(runnerErrors.ErrNotFound, "runner group not found")
|
||||
}
|
||||
|
||||
func (r *enterprise) GetJITConfig(ctx context.Context, instance params.Instance, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) {
|
||||
if instance.AgentID != 0 {
|
||||
return nil, nil, fmt.Errorf("instance already has an agent ID: %w", runnerErrors.ErrBadRequest)
|
||||
}
|
||||
|
||||
if instance.JitConfiguration != nil {
|
||||
return nil, nil, fmt.Errorf("instance already has a JIT configuration: %w", runnerErrors.ErrBadRequest)
|
||||
}
|
||||
|
||||
func (r *enterprise) GetJITConfig(ctx context.Context, instance string, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) {
|
||||
var rg int64 = 1
|
||||
if pool.GitHubRunnerGroup != "" {
|
||||
runnerGroup, err := r.findRunnerGroupByName(ctx, pool.GitHubRunnerGroup)
|
||||
|
|
@ -123,7 +115,7 @@ func (r *enterprise) GetJITConfig(ctx context.Context, instance params.Instance,
|
|||
}
|
||||
|
||||
req := github.GenerateJITConfigRequest{
|
||||
Name: instance.Name,
|
||||
Name: instance,
|
||||
RunnerGroupID: rg,
|
||||
Labels: labels,
|
||||
// TODO(gabriel-samfira): Should we make this configurable?
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ type poolHelper interface {
|
|||
|
||||
GithubCLI() common.GithubClient
|
||||
|
||||
GetJITConfig(ctx context.Context, instance params.Instance, pool params.Pool, labels []string) (map[string]string, *github.Runner, error)
|
||||
GetJITConfig(ctx context.Context, instanceName string, pool params.Pool, labels []string) (map[string]string, *github.Runner, error)
|
||||
|
||||
FetchDbInstances() ([]params.Instance, error)
|
||||
ListPools() ([]params.Pool, error)
|
||||
|
|
|
|||
|
|
@ -116,15 +116,7 @@ func (r *organization) findRunnerGroupByName(ctx context.Context, name string) (
|
|||
return nil, errors.Wrap(runnerErrors.ErrNotFound, "runner group not found")
|
||||
}
|
||||
|
||||
func (r *organization) GetJITConfig(ctx context.Context, instance params.Instance, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) {
|
||||
if instance.AgentID != 0 {
|
||||
return nil, nil, fmt.Errorf("instance already has an agent ID: %w", runnerErrors.ErrBadRequest)
|
||||
}
|
||||
|
||||
if instance.JitConfiguration != nil {
|
||||
return nil, nil, fmt.Errorf("instance already has a JIT configuration: %w", runnerErrors.ErrBadRequest)
|
||||
}
|
||||
|
||||
func (r *organization) GetJITConfig(ctx context.Context, instance string, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) {
|
||||
var rg int64 = 1
|
||||
if pool.GitHubRunnerGroup != "" {
|
||||
runnerGroup, err := r.findRunnerGroupByName(ctx, pool.GitHubRunnerGroup)
|
||||
|
|
@ -135,7 +127,7 @@ func (r *organization) GetJITConfig(ctx context.Context, instance params.Instanc
|
|||
}
|
||||
|
||||
req := github.GenerateJITConfigRequest{
|
||||
Name: instance.Name,
|
||||
Name: instance,
|
||||
RunnerGroupID: rg,
|
||||
Labels: labels,
|
||||
// TODO(gabriel-samfira): Should we make this configurable?
|
||||
|
|
|
|||
|
|
@ -388,11 +388,18 @@ func (r *basePoolManager) cleanupOrphanedProviderRunners(runners []*github.Runne
|
|||
continue
|
||||
}
|
||||
|
||||
pool, err := r.store.GetPoolByID(r.ctx, instance.PoolID)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "fetching instance pool info")
|
||||
}
|
||||
|
||||
switch instance.RunnerStatus {
|
||||
case params.RunnerPending, params.RunnerInstalling:
|
||||
// runner is still installing. We give it a chance to finish.
|
||||
r.log("runner %s is still installing, give it a chance to finish", instance.Name)
|
||||
continue
|
||||
if time.Since(instance.UpdatedAt).Minutes() < float64(pool.RunnerTimeout()) {
|
||||
// runner is still installing. We give it a chance to finish.
|
||||
r.log("runner %s is still installing, give it a chance to finish", instance.Name)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if time.Since(instance.UpdatedAt).Minutes() < 5 {
|
||||
|
|
@ -451,20 +458,7 @@ func (r *basePoolManager) reapTimedOutRunners(runners []*github.Runner) error {
|
|||
// * The runner never joined github within the pool timeout
|
||||
// * The runner managed to join github, but the setup process failed later and the runner
|
||||
// never started on the instance.
|
||||
//
|
||||
// There are several steps in the user data that sets up the runner:
|
||||
// * Download and unarchive the runner from github (or used the cached version)
|
||||
// * Configure runner (connects to github). At this point the runner is seen as offline.
|
||||
// * Install the service
|
||||
// * Set SELinux context (if SELinux is enabled)
|
||||
// * Start the service (if successful, the runner will transition to "online")
|
||||
// * Get the runner ID
|
||||
//
|
||||
// If we fail getting the runner ID after it's started, garm will set the runner status to "failed",
|
||||
// even though, technically the runner is online and fully functional. This is why we check here for
|
||||
// both the runner status as reported by GitHub and the runner status as reported by the provider.
|
||||
// If the runner is "offline" and marked as "failed", it should be safe to reap it.
|
||||
if runner, ok := runnersByName[instance.Name]; !ok || (runner.GetStatus() == "offline" && instance.RunnerStatus == params.RunnerFailed) {
|
||||
if runner, ok := runnersByName[instance.Name]; !ok || runner.GetStatus() == "offline" {
|
||||
r.log("reaping timed-out/failed runner %s", instance.Name)
|
||||
if err := r.ForceDeleteRunner(instance); err != nil {
|
||||
r.log("failed to update runner %s status: %s", instance.Name, err)
|
||||
|
|
@ -699,6 +693,12 @@ func (r *basePoolManager) AddRunner(ctx context.Context, poolID string, aditiona
|
|||
}
|
||||
|
||||
name := fmt.Sprintf("%s-%s", pool.GetRunnerPrefix(), util.NewID())
|
||||
labels := r.getLabelsForInstance(pool)
|
||||
// Attempt to create JIT config
|
||||
jitConfig, runner, err := r.helper.GetJITConfig(ctx, name, pool, labels)
|
||||
if err != nil {
|
||||
r.log("failed to get JIT config, falling back to registration token: %s", err)
|
||||
}
|
||||
|
||||
createParams := params.CreateInstanceParams{
|
||||
Name: name,
|
||||
|
|
@ -711,6 +711,11 @@ func (r *basePoolManager) AddRunner(ctx context.Context, poolID string, aditiona
|
|||
CreateAttempt: 1,
|
||||
GitHubRunnerGroup: pool.GitHubRunnerGroup,
|
||||
AditionalLabels: aditionalLabels,
|
||||
JitConfiguration: jitConfig,
|
||||
}
|
||||
|
||||
if runner != nil {
|
||||
createParams.AgentID = runner.GetID()
|
||||
}
|
||||
|
||||
instance, err := r.store.CreateInstance(r.ctx, poolID, createParams)
|
||||
|
|
@ -719,38 +724,22 @@ func (r *basePoolManager) AddRunner(ctx context.Context, poolID string, aditiona
|
|||
}
|
||||
|
||||
defer func() {
|
||||
if err != nil && instance.ID != "" {
|
||||
if err := r.ForceDeleteRunner(instance); err != nil {
|
||||
r.log("failed to cleanup instance: %s", instance.Name)
|
||||
if err != nil {
|
||||
if instance.ID != "" {
|
||||
if err := r.ForceDeleteRunner(instance); err != nil {
|
||||
r.log("failed to cleanup instance: %s", instance.Name)
|
||||
}
|
||||
}
|
||||
|
||||
if runner != nil {
|
||||
_, runnerCleanupErr := r.helper.RemoveGithubRunner(runner.GetID())
|
||||
if err != nil {
|
||||
r.log("failed to remove runner %d: %s", runner.GetID(), runnerCleanupErr)
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
labels := r.getLabelsForInstance(pool)
|
||||
// Attempt to create JIT config
|
||||
jitConfig, runner, err := r.helper.GetJITConfig(ctx, instance, pool, labels)
|
||||
if err == nil {
|
||||
updateParams := params.UpdateInstanceParams{
|
||||
AgentID: runner.GetID(),
|
||||
// We're using a JIT config. Setting the TokenFetched will disable the registration token
|
||||
// metadata endpoint.
|
||||
TokenFetched: github.Bool(true),
|
||||
JitConfiguration: jitConfig,
|
||||
}
|
||||
instance, err = r.updateInstance(instance.Name, updateParams)
|
||||
if err != nil {
|
||||
// The agent ID is not recorded in the instance, so the deferred ForceDeleteRunner() will not
|
||||
// attempt to clean it up. We need to do it here.
|
||||
_, runnerCleanupErr := r.helper.RemoveGithubRunner(runner.GetID())
|
||||
if err != nil {
|
||||
log.Printf("failed to remove runner %d: %s", runner.GetID(), runnerCleanupErr)
|
||||
}
|
||||
return errors.Wrap(err, "updating instance")
|
||||
}
|
||||
} else {
|
||||
r.log("failed to get JIT config, falling back to registration token: %s", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -88,17 +88,9 @@ type repository struct {
|
|||
mux sync.Mutex
|
||||
}
|
||||
|
||||
func (r *repository) GetJITConfig(ctx context.Context, instance params.Instance, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) {
|
||||
if instance.AgentID != 0 {
|
||||
return nil, nil, fmt.Errorf("instance already has an agent ID: %w", runnerErrors.ErrBadRequest)
|
||||
}
|
||||
|
||||
if instance.JitConfiguration != nil {
|
||||
return nil, nil, fmt.Errorf("instance already has a JIT configuration: %w", runnerErrors.ErrBadRequest)
|
||||
}
|
||||
|
||||
func (r *repository) GetJITConfig(ctx context.Context, instance string, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) {
|
||||
req := github.GenerateJITConfigRequest{
|
||||
Name: instance.Name,
|
||||
Name: instance,
|
||||
// At the repository level we only have the default runner group.
|
||||
RunnerGroupID: 1,
|
||||
Labels: labels,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue