diff --git a/apiserver/controllers/instances.go b/apiserver/controllers/instances.go index c9ef6533..dd1ad284 100644 --- a/apiserver/controllers/instances.go +++ b/apiserver/controllers/instances.go @@ -18,6 +18,7 @@ import ( "encoding/json" "log" "net/http" + "strconv" gErrors "github.com/cloudbase/garm-provider-common/errors" "github.com/cloudbase/garm/apiserver/params" @@ -121,6 +122,12 @@ func (a *APIController) GetInstanceHandler(w http.ResponseWriter, r *http.Reques // in: path // required: true // +// + name: forceRemove +// description: If true GARM will ignore any provider error when removing the runner and will continue to remove the runner from github and the GARM database. +// type: boolean +// in: query +// required: false +// // Responses: // default: APIErrorResponse func (a *APIController) DeleteInstanceHandler(w http.ResponseWriter, r *http.Request) { @@ -138,7 +145,8 @@ func (a *APIController) DeleteInstanceHandler(w http.ResponseWriter, r *http.Req return } - if err := a.r.ForceDeleteRunner(ctx, instanceName); err != nil { + forceRemove, _ := strconv.ParseBool(r.URL.Query().Get("forceRemove")) + if err := a.r.DeleteRunner(ctx, instanceName, forceRemove); err != nil { log.Printf("removing runner: %s", err) handleError(w, err) return diff --git a/apiserver/swagger.yaml b/apiserver/swagger.yaml index 1d5f33b1..2d269391 100644 --- a/apiserver/swagger.yaml +++ b/apiserver/swagger.yaml @@ -597,6 +597,10 @@ paths: name: instanceName required: true type: string + - description: If true GARM will ignore any provider error when removing the runner and will continue to remove the runner from github and the GARM database. + in: query + name: forceRemove + type: boolean responses: default: description: APIErrorResponse diff --git a/client/instances/delete_instance_parameters.go b/client/instances/delete_instance_parameters.go index 34cb6eee..d4f9695c 100644 --- a/client/instances/delete_instance_parameters.go +++ b/client/instances/delete_instance_parameters.go @@ -14,6 +14,7 @@ import ( "github.com/go-openapi/runtime" cr "github.com/go-openapi/runtime/client" "github.com/go-openapi/strfmt" + "github.com/go-openapi/swag" ) // NewDeleteInstanceParams creates a new DeleteInstanceParams object, @@ -61,6 +62,12 @@ DeleteInstanceParams contains all the parameters to send to the API endpoint */ type DeleteInstanceParams struct { + /* ForceRemove. + + If true GARM will ignore any provider error when removing the runner and will continue to remove the runner from github and the GARM database. + */ + ForceRemove *bool + /* InstanceName. Runner instance name. @@ -120,6 +127,17 @@ func (o *DeleteInstanceParams) SetHTTPClient(client *http.Client) { o.HTTPClient = client } +// WithForceRemove adds the forceRemove to the delete instance params +func (o *DeleteInstanceParams) WithForceRemove(forceRemove *bool) *DeleteInstanceParams { + o.SetForceRemove(forceRemove) + return o +} + +// SetForceRemove adds the forceRemove to the delete instance params +func (o *DeleteInstanceParams) SetForceRemove(forceRemove *bool) { + o.ForceRemove = forceRemove +} + // WithInstanceName adds the instanceName to the delete instance params func (o *DeleteInstanceParams) WithInstanceName(instanceName string) *DeleteInstanceParams { o.SetInstanceName(instanceName) @@ -139,6 +157,23 @@ func (o *DeleteInstanceParams) WriteToRequest(r runtime.ClientRequest, reg strfm } var res []error + if o.ForceRemove != nil { + + // query param forceRemove + var qrForceRemove bool + + if o.ForceRemove != nil { + qrForceRemove = *o.ForceRemove + } + qForceRemove := swag.FormatBool(qrForceRemove) + if qForceRemove != "" { + + if err := r.SetQueryParam("forceRemove", qForceRemove); err != nil { + return err + } + } + } + // path param instanceName if err := r.SetPathParam("instanceName", o.InstanceName); err != nil { return err diff --git a/cmd/garm-cli/cmd/runner.go b/cmd/garm-cli/cmd/runner.go index 0d7971bd..5b7d07c5 100644 --- a/cmd/garm-cli/cmd/runner.go +++ b/cmd/garm-cli/cmd/runner.go @@ -187,12 +187,9 @@ to either cancel the workflow or wait for it to finish. return fmt.Errorf("requires a runner name") } - if !forceRemove { - return fmt.Errorf("use --force-remove-runner=true to remove a runner") - } - deleteInstanceReq := apiClientInstances.NewDeleteInstanceParams() deleteInstanceReq.InstanceName = args[0] + deleteInstanceReq.ForceRemove = &forceRemove if err := apiCli.Instances.DeleteInstance(deleteInstanceReq, authToken); err != nil { return err } @@ -207,7 +204,7 @@ func init() { runnerListCmd.Flags().BoolVarP(&runnerAll, "all", "a", false, "List all runners, regardless of org or repo.") runnerListCmd.MarkFlagsMutuallyExclusive("repo", "org", "enterprise", "all") - runnerDeleteCmd.Flags().BoolVarP(&forceRemove, "force-remove-runner", "f", false, "Confirm you want to delete a runner") + runnerDeleteCmd.Flags().BoolVarP(&forceRemove, "force-remove-runner", "f", false, "Forcefully remove a runner. If set to true, GARM will ignore provider errors when removing the runner.") runnerDeleteCmd.MarkFlagsMutuallyExclusive("force-remove-runner") runnerCmd.AddCommand( diff --git a/database/common/mocks/Store.go b/database/common/mocks/Store.go index da21f1a7..7d9f58fa 100644 --- a/database/common/mocks/Store.go +++ b/database/common/mocks/Store.go @@ -1,4 +1,4 @@ -// Code generated by mockery v0.0.0-dev. DO NOT EDIT. +// Code generated by mockery v2.28.1. DO NOT EDIT. package mocks @@ -1578,12 +1578,13 @@ func (_m *Store) UpdateUser(ctx context.Context, user string, param params.Updat return r0, r1 } -// NewStore creates a new instance of Store. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -// The first argument is typically a *testing.T value. -func NewStore(t interface { +type mockConstructorTestingTNewStore interface { mock.TestingT Cleanup(func()) -}) *Store { +} + +// NewStore creates a new instance of Store. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewStore(t mockConstructorTestingTNewStore) *Store { mock := &Store{} mock.Mock.Test(t) diff --git a/go.mod b/go.mod index 367821c6..2b8f49c0 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.20 require ( github.com/BurntSushi/toml v1.2.1 - github.com/cloudbase/garm-provider-common v0.1.0 + github.com/cloudbase/garm-provider-common v0.1.1-0.20231012061429-49001794e700 github.com/go-openapi/errors v0.20.4 github.com/go-openapi/runtime v0.26.0 github.com/go-openapi/strfmt v0.21.7 diff --git a/go.sum b/go.sum index 596df8af..dbd89cb8 100644 --- a/go.sum +++ b/go.sum @@ -22,8 +22,8 @@ github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMn github.com/chzyer/test v1.0.0 h1:p3BQDXSxOhOG0P9z6/hGnII4LGiEPOYBhs8asl/fC04= github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cloudbase/garm-provider-common v0.1.0 h1:gc2n8nsLjt7G3InAfqZ+75iZjSIUkIx86d6/DFA2+jc= -github.com/cloudbase/garm-provider-common v0.1.0/go.mod h1:igxJRT3OlykERYc6ssdRQXcb+BCaeSfnucg6I0OSoDc= +github.com/cloudbase/garm-provider-common v0.1.1-0.20231012061429-49001794e700 h1:ZCJ1zZ2WI/37ffzpRsu7t5zzShAMThhYsXw7bBNKBR0= +github.com/cloudbase/garm-provider-common v0.1.1-0.20231012061429-49001794e700/go.mod h1:igxJRT3OlykERYc6ssdRQXcb+BCaeSfnucg6I0OSoDc= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/runner/common/mocks/GithubClient.go b/runner/common/mocks/GithubClient.go index b8f450e7..6bfec1d8 100644 --- a/runner/common/mocks/GithubClient.go +++ b/runner/common/mocks/GithubClient.go @@ -1,4 +1,4 @@ -// Code generated by mockery v0.0.0-dev. DO NOT EDIT. +// Code generated by mockery v2.28.1. DO NOT EDIT. package mocks @@ -730,12 +730,13 @@ func (_m *GithubClient) RemoveRunner(ctx context.Context, owner string, repo str return r0, r1 } -// NewGithubClient creates a new instance of GithubClient. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -// The first argument is typically a *testing.T value. -func NewGithubClient(t interface { +type mockConstructorTestingTNewGithubClient interface { mock.TestingT Cleanup(func()) -}) *GithubClient { +} + +// NewGithubClient creates a new instance of GithubClient. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewGithubClient(t mockConstructorTestingTNewGithubClient) *GithubClient { mock := &GithubClient{} mock.Mock.Test(t) diff --git a/runner/common/mocks/GithubEnterpriseClient.go b/runner/common/mocks/GithubEnterpriseClient.go index 2fc1442a..c53e775b 100644 --- a/runner/common/mocks/GithubEnterpriseClient.go +++ b/runner/common/mocks/GithubEnterpriseClient.go @@ -1,4 +1,4 @@ -// Code generated by mockery v0.0.0-dev. DO NOT EDIT. +// Code generated by mockery v2.28.1. DO NOT EDIT. package mocks @@ -215,12 +215,13 @@ func (_m *GithubEnterpriseClient) RemoveRunner(ctx context.Context, enterprise s return r0, r1 } -// NewGithubEnterpriseClient creates a new instance of GithubEnterpriseClient. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -// The first argument is typically a *testing.T value. -func NewGithubEnterpriseClient(t interface { +type mockConstructorTestingTNewGithubEnterpriseClient interface { mock.TestingT Cleanup(func()) -}) *GithubEnterpriseClient { +} + +// NewGithubEnterpriseClient creates a new instance of GithubEnterpriseClient. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewGithubEnterpriseClient(t mockConstructorTestingTNewGithubEnterpriseClient) *GithubEnterpriseClient { mock := &GithubEnterpriseClient{} mock.Mock.Test(t) diff --git a/runner/common/mocks/OrganizationHooks.go b/runner/common/mocks/OrganizationHooks.go index 7d0428d0..dffe6a66 100644 --- a/runner/common/mocks/OrganizationHooks.go +++ b/runner/common/mocks/OrganizationHooks.go @@ -1,4 +1,4 @@ -// Code generated by mockery v0.0.0-dev. DO NOT EDIT. +// Code generated by mockery v2.28.1. DO NOT EDIT. package mocks @@ -171,12 +171,13 @@ func (_m *OrganizationHooks) PingOrgHook(ctx context.Context, org string, id int return r0, r1 } -// NewOrganizationHooks creates a new instance of OrganizationHooks. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -// The first argument is typically a *testing.T value. -func NewOrganizationHooks(t interface { +type mockConstructorTestingTNewOrganizationHooks interface { mock.TestingT Cleanup(func()) -}) *OrganizationHooks { +} + +// NewOrganizationHooks creates a new instance of OrganizationHooks. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewOrganizationHooks(t mockConstructorTestingTNewOrganizationHooks) *OrganizationHooks { mock := &OrganizationHooks{} mock.Mock.Test(t) diff --git a/runner/common/mocks/PoolManager.go b/runner/common/mocks/PoolManager.go index 949dfc55..9ff34758 100644 --- a/runner/common/mocks/PoolManager.go +++ b/runner/common/mocks/PoolManager.go @@ -1,4 +1,4 @@ -// Code generated by mockery v0.0.0-dev. DO NOT EDIT. +// Code generated by mockery v2.28.1. DO NOT EDIT. package mocks @@ -14,6 +14,20 @@ type PoolManager struct { mock.Mock } +// DeleteRunner provides a mock function with given fields: runner, forceRemove +func (_m *PoolManager) DeleteRunner(runner params.Instance, forceRemove bool) error { + ret := _m.Called(runner, forceRemove) + + var r0 error + if rf, ok := ret.Get(0).(func(params.Instance, bool) error); ok { + r0 = rf(runner, forceRemove) + } else { + r0 = ret.Error(0) + } + + return r0 +} + // ForceDeleteRunner provides a mock function with given fields: runner func (_m *PoolManager) ForceDeleteRunner(runner params.Instance) error { ret := _m.Called(runner) @@ -250,12 +264,13 @@ func (_m *PoolManager) WebhookSecret() string { return r0 } -// NewPoolManager creates a new instance of PoolManager. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -// The first argument is typically a *testing.T value. -func NewPoolManager(t interface { +type mockConstructorTestingTNewPoolManager interface { mock.TestingT Cleanup(func()) -}) *PoolManager { +} + +// NewPoolManager creates a new instance of PoolManager. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewPoolManager(t mockConstructorTestingTNewPoolManager) *PoolManager { mock := &PoolManager{} mock.Mock.Test(t) diff --git a/runner/common/mocks/Provider.go b/runner/common/mocks/Provider.go index e5157e0f..53ee2c95 100644 --- a/runner/common/mocks/Provider.go +++ b/runner/common/mocks/Provider.go @@ -1,4 +1,4 @@ -// Code generated by mockery v0.0.0-dev. DO NOT EDIT. +// Code generated by mockery v2.28.1. DO NOT EDIT. package mocks @@ -160,12 +160,13 @@ func (_m *Provider) Stop(ctx context.Context, instance string, force bool) error return r0 } -// NewProvider creates a new instance of Provider. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -// The first argument is typically a *testing.T value. -func NewProvider(t interface { +type mockConstructorTestingTNewProvider interface { mock.TestingT Cleanup(func()) -}) *Provider { +} + +// NewProvider creates a new instance of Provider. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewProvider(t mockConstructorTestingTNewProvider) *Provider { mock := &Provider{} mock.Mock.Test(t) diff --git a/runner/common/mocks/RepositoryHooks.go b/runner/common/mocks/RepositoryHooks.go index 67ffe3e0..5706d4fc 100644 --- a/runner/common/mocks/RepositoryHooks.go +++ b/runner/common/mocks/RepositoryHooks.go @@ -1,4 +1,4 @@ -// Code generated by mockery v0.0.0-dev. DO NOT EDIT. +// Code generated by mockery v2.28.1. DO NOT EDIT. package mocks @@ -171,12 +171,13 @@ func (_m *RepositoryHooks) PingRepoHook(ctx context.Context, owner string, repo return r0, r1 } -// NewRepositoryHooks creates a new instance of RepositoryHooks. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -// The first argument is typically a *testing.T value. -func NewRepositoryHooks(t interface { +type mockConstructorTestingTNewRepositoryHooks interface { mock.TestingT Cleanup(func()) -}) *RepositoryHooks { +} + +// NewRepositoryHooks creates a new instance of RepositoryHooks. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewRepositoryHooks(t mockConstructorTestingTNewRepositoryHooks) *RepositoryHooks { mock := &RepositoryHooks{} mock.Mock.Test(t) diff --git a/runner/common/pool.go b/runner/common/pool.go index b44488ee..caf1da38 100644 --- a/runner/common/pool.go +++ b/runner/common/pool.go @@ -38,22 +38,54 @@ const ( //go:generate mockery --all type PoolManager interface { + // ID returns the ID of the entity (repo, org, enterprise) ID() string + // WebhookSecret returns the unencrypted webhook secret associated with the webhook installed + // in GitHub for GARM. For GARM to receive webhook events for an entity, either the operator or + // GARM will have to create a webhook in GitHub which points to the GARM API server. To authenticate + // the webhook, a webhook secret is used. This function returns that secret. WebhookSecret() string + // GithubRunnerRegistrationToken returns a new registration token for a github runner. This is used + // for GHES installations that have not yet upgraded to a version >= 3.10. Starting with 3.10, we use + // just-in-time runners, which no longer require exposing a runner registration token. GithubRunnerRegistrationToken() (string, error) + // HandleWorkflowJob handles a workflow job meant for a particular entity. When a webhook is fired for + // a repo, org or enterprise, we determine the destination of that webhook, retrieve the pool manager + // for it and call this function with the WorkflowJob as a parameter. HandleWorkflowJob(job params.WorkflowJob) error + // RefreshState allows us to update webhook secrets and configuration for a pool manager. RefreshState(param params.UpdatePoolStateParams) error + // ForceDeleteRunner will attempt to remove a runner from the pool. + // + // Deprecated: FunctionName is deprecated. Use DeleteRunner instead. ForceDeleteRunner(runner params.Instance) error + // DeleteRunner will attempt to remove a runner from the pool. If forceRemove is true, any error + // received from the provider will be ignored and we will procede to remove the runner from the database. + // An error received while attempting to remove from GitHub (other than 404) will still stop the deletion + // process. This can happen if the runner is already processing a job. At which point, you can simply cancel + // the job in github. Doing so will prompt GARM to reap the runner automatically. + DeleteRunner(runner params.Instance, forceRemove bool) error + + // InstallWebhook will create a webhook in github for the entity associated with this pool manager. InstallWebhook(ctx context.Context, param params.InstallWebhookParams) (params.HookInfo, error) + // GetWebhookInfo will return information about the webhook installed in github for the entity associated GetWebhookInfo(ctx context.Context) (params.HookInfo, error) + // UninstallWebhook will remove the webhook installed in github for the entity associated with this pool manager. UninstallWebhook(ctx context.Context) error + // RootCABundle will return a CA bundle that must be installed on all runners in order to properly validate + // x509 certificates used by various systems involved. This CA bundle is defined in the GARM config file and + // can include multiple CA certificates for the GARM api server, GHES server and any provider API endpoint that + // may use internal or self signed certificates. RootCABundle() (params.CertificateBundle, error) - // PoolManager lifecycle functions. Start/stop pool. + // Start will start the pool manager and all associated workers. Start() error + // Stop will stop the pool manager and all associated workers. Stop() error + // Status will return the current status of the pool manager. Status() params.PoolManagerStatus + // Wait will block until the pool manager has stopped. Wait() error } diff --git a/runner/mocks/PoolManagerController.go b/runner/mocks/PoolManagerController.go index d422508a..6806623a 100644 --- a/runner/mocks/PoolManagerController.go +++ b/runner/mocks/PoolManagerController.go @@ -1,4 +1,4 @@ -// Code generated by mockery v0.0.0-dev. DO NOT EDIT. +// Code generated by mockery v2.28.1. DO NOT EDIT. package mocks @@ -373,12 +373,13 @@ func (_m *PoolManagerController) UpdateRepoPoolManager(ctx context.Context, repo return r0, r1 } -// NewPoolManagerController creates a new instance of PoolManagerController. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -// The first argument is typically a *testing.T value. -func NewPoolManagerController(t interface { +type mockConstructorTestingTNewPoolManagerController interface { mock.TestingT Cleanup(func()) -}) *PoolManagerController { +} + +// NewPoolManagerController creates a new instance of PoolManagerController. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewPoolManagerController(t mockConstructorTestingTNewPoolManagerController) *PoolManagerController { mock := &PoolManagerController{} mock.Mock.Test(t) diff --git a/runner/pool/pool.go b/runner/pool/pool.go index 7f6b07ca..adc46af7 100644 --- a/runner/pool/pool.go +++ b/runner/pool/pool.go @@ -381,7 +381,7 @@ func (r *basePoolManager) cleanupOrphanedProviderRunners(runners []*github.Runne switch commonParams.InstanceStatus(instance.Status) { case commonParams.InstancePendingCreate, - commonParams.InstancePendingDelete: + commonParams.InstancePendingDelete, commonParams.InstancePendingForceDelete: // this instance is in the process of being created or is awaiting deletion. // Instances in pending_create did not get a chance to register themselves in, // github so we let them be for now. @@ -454,13 +454,14 @@ func (r *basePoolManager) reapTimedOutRunners(runners []*github.Runner) error { continue } - // There are 2 cases (currently) where we consider a runner as timed out: + // There are 3 cases (currently) where we consider a runner as timed out: // * The runner never joined github within the pool timeout // * The runner managed to join github, but the setup process failed later and the runner // never started on the instance. + // * A JIT config was created, but the runner never joined github. if runner, ok := runnersByName[instance.Name]; !ok || runner.GetStatus() == "offline" { r.log("reaping timed-out/failed runner %s", instance.Name) - if err := r.ForceDeleteRunner(instance); err != nil { + if err := r.DeleteRunner(instance, false); err != nil { r.log("failed to update runner %s status: %s", instance.Name, err) return errors.Wrap(err, "updating runner") } @@ -726,7 +727,7 @@ func (r *basePoolManager) AddRunner(ctx context.Context, poolID string, aditiona defer func() { if err != nil { if instance.ID != "" { - if err := r.ForceDeleteRunner(instance); err != nil { + if err := r.DeleteRunner(instance, false); err != nil { r.log("failed to cleanup instance: %s", instance.Name) } } @@ -1030,7 +1031,7 @@ func (r *basePoolManager) scaleDownOnePool(ctx context.Context, pool params.Pool g.Go(func() error { r.log("scaling down idle worker %s from pool %s\n", instanceToDelete.Name, pool.ID) - if err := r.ForceDeleteRunner(instanceToDelete); err != nil { + if err := r.DeleteRunner(instanceToDelete, false); err != nil { return fmt.Errorf("failed to delete instance %s: %w", instanceToDelete.ID, err) } return nil @@ -1272,7 +1273,7 @@ func (r *basePoolManager) deletePendingInstances() error { r.log("removing instances in pending_delete") for _, instance := range instances { - if instance.Status != commonParams.InstancePendingDelete { + if instance.Status != commonParams.InstancePendingDelete && instance.Status != commonParams.InstancePendingForceDelete { // not in pending_delete status. Skip. continue } @@ -1284,6 +1285,7 @@ func (r *basePoolManager) deletePendingInstances() error { continue } + currentStatus := instance.Status // Set the status to deleting before launching the goroutine that removes // the runner from the provider (which can take a long time). if _, err := r.setInstanceStatus(instance.Name, commonParams.InstanceDeleting, nil); err != nil { @@ -1300,9 +1302,9 @@ func (r *basePoolManager) deletePendingInstances() error { defer func(instance params.Instance) { if err != nil { r.log("failed to remove instance %s: %s", instance.Name, err) - // failed to remove from provider. Set the status back to pending_delete, which - // will retry the operation. - if _, err := r.setInstanceStatus(instance.Name, commonParams.InstancePendingDelete, nil); err != nil { + // failed to remove from provider. Set status to previous value, which will retry + // the operation. + if _, err := r.setInstanceStatus(instance.Name, currentStatus, nil); err != nil { r.log("failed to update runner %s status: %s", instance.Name, err) } } @@ -1311,7 +1313,10 @@ func (r *basePoolManager) deletePendingInstances() error { r.log("removing instance %s from provider", instance.Name) err = r.deleteInstanceFromProvider(r.ctx, instance) if err != nil { - return fmt.Errorf("failed to remove instance from provider: %w", err) + if currentStatus != commonParams.InstancePendingForceDelete { + return fmt.Errorf("failed to remove instance from provider: %w", err) + } + log.Printf("failed to remove instance %s from provider (continuing anyway): %s", instance.Name, err) } r.log("removing instance %s from database", instance.Name) if deleteErr := r.store.DeleteInstance(r.ctx, instance.PoolID, instance.Name); deleteErr != nil { @@ -1397,18 +1402,14 @@ func (r *basePoolManager) runnerCleanup() (err error) { return fmt.Errorf("failed to reap timed out runners: %w", err) } - if err := r.cleanupOrphanedRunners(); err != nil { + if err := r.cleanupOrphanedRunners(runners); err != nil { return fmt.Errorf("failed to cleanup orphaned runners: %w", err) } return nil } -func (r *basePoolManager) cleanupOrphanedRunners() error { - runners, err := r.helper.GetGithubRunners() - if err != nil { - return errors.Wrap(err, "fetching github runners") - } +func (r *basePoolManager) cleanupOrphanedRunners(runners []*github.Runner) error { if err := r.cleanupOrphanedProviderRunners(runners); err != nil { return errors.Wrap(err, "cleaning orphaned instances") } @@ -1421,16 +1422,24 @@ func (r *basePoolManager) cleanupOrphanedRunners() error { } func (r *basePoolManager) Start() error { - go r.updateTools() //nolint + initialToolUpdate := make(chan struct{}, 1) + go func() { + r.updateTools() //nolint + initialToolUpdate <- struct{}{} + }() - go r.startLoopForFunction(r.runnerCleanup, common.PoolReapTimeoutInterval, "timeout_reaper", false) - go r.startLoopForFunction(r.scaleDown, common.PoolScaleDownInterval, "scale_down", false) - go r.startLoopForFunction(r.deletePendingInstances, common.PoolConsilitationInterval, "consolidate[delete_pending]", false) - go r.startLoopForFunction(r.addPendingInstances, common.PoolConsilitationInterval, "consolidate[add_pending]", false) - go r.startLoopForFunction(r.ensureMinIdleRunners, common.PoolConsilitationInterval, "consolidate[ensure_min_idle]", false) - go r.startLoopForFunction(r.retryFailedInstances, common.PoolConsilitationInterval, "consolidate[retry_failed]", false) - go r.startLoopForFunction(r.updateTools, common.PoolToolUpdateInterval, "update_tools", true) - go r.startLoopForFunction(r.consumeQueuedJobs, common.PoolConsilitationInterval, "job_queue_consumer", false) + go func() { + <-initialToolUpdate + defer close(initialToolUpdate) + go r.startLoopForFunction(r.runnerCleanup, common.PoolReapTimeoutInterval, "timeout_reaper", false) + go r.startLoopForFunction(r.scaleDown, common.PoolScaleDownInterval, "scale_down", false) + go r.startLoopForFunction(r.deletePendingInstances, common.PoolConsilitationInterval, "consolidate[delete_pending]", false) + go r.startLoopForFunction(r.addPendingInstances, common.PoolConsilitationInterval, "consolidate[add_pending]", false) + go r.startLoopForFunction(r.ensureMinIdleRunners, common.PoolConsilitationInterval, "consolidate[ensure_min_idle]", false) + go r.startLoopForFunction(r.retryFailedInstances, common.PoolConsilitationInterval, "consolidate[retry_failed]", false) + go r.startLoopForFunction(r.updateTools, common.PoolToolUpdateInterval, "update_tools", true) + go r.startLoopForFunction(r.consumeQueuedJobs, common.PoolConsilitationInterval, "job_queue_consumer", false) + }() return nil } @@ -1455,7 +1464,16 @@ func (r *basePoolManager) ID() string { return r.helper.ID() } +// ForceDeleteRunner will delete a runner from a pool. +// +// Deprecated: Use DeleteRunner instead. func (r *basePoolManager) ForceDeleteRunner(runner params.Instance) error { + return r.DeleteRunner(runner, true) +} + +// Delete runner will delete a runner from a pool. If forceRemove is set to true, any error received from +// the IaaS provider will be ignored and deletion will continue. +func (r *basePoolManager) DeleteRunner(runner params.Instance, forceRemove bool) error { if !r.managerIsRunning { return runnerErrors.NewConflictError("pool manager is not running for %s", r.helper.String()) } @@ -1485,9 +1503,14 @@ func (r *basePoolManager) ForceDeleteRunner(runner params.Instance) error { } } } - r.log("setting instance status for: %v", runner.Name) - if _, err := r.setInstanceStatus(runner.Name, commonParams.InstancePendingDelete, nil); err != nil { + instanceStatus := commonParams.InstancePendingDelete + if forceRemove { + instanceStatus = commonParams.InstancePendingForceDelete + } + + r.log("setting instance status for %v to %v", runner.Name, instanceStatus) + if _, err := r.setInstanceStatus(runner.Name, instanceStatus, nil); err != nil { r.log("failed to update runner %s status: %s", runner.Name, err) return errors.Wrap(err, "updating runner") } diff --git a/runner/runner.go b/runner/runner.go index aa4aedbe..aae2baf2 100644 --- a/runner/runner.go +++ b/runner/runner.go @@ -898,7 +898,17 @@ func (r *Runner) getPoolManagerFromInstance(ctx context.Context, instance params return poolMgr, nil } +// ForceDeleteRunner will attempt to remove a runner from a pool. +// +// Deprecated: FunctionName is deprecated. Use DeleteRunner instead. func (r *Runner) ForceDeleteRunner(ctx context.Context, instanceName string) error { + return r.DeleteRunner(ctx, instanceName, true) +} + +// DeleteRunner removes a runner from a pool. If forceDelete is true, GARM will ignore any provider errors +// that may occur, and attempt to remove the runner from GitHub and then the database, regardless of provider +// errors. +func (r *Runner) DeleteRunner(ctx context.Context, instanceName string, forceDelete bool) error { if !auth.IsAdmin(ctx) { return runnerErrors.ErrUnauthorized } @@ -909,7 +919,8 @@ func (r *Runner) ForceDeleteRunner(ctx context.Context, instanceName string) err } switch instance.Status { - case commonParams.InstanceRunning, commonParams.InstanceError: + case commonParams.InstanceRunning, commonParams.InstanceError, + commonParams.InstancePendingForceDelete, commonParams.InstancePendingDelete: default: return runnerErrors.NewBadRequestError("runner must be in %q or %q state", commonParams.InstanceRunning, commonParams.InstanceError) } @@ -919,7 +930,7 @@ func (r *Runner) ForceDeleteRunner(ctx context.Context, instanceName string) err return errors.Wrap(err, "fetching pool manager for instance") } - if err := poolMgr.ForceDeleteRunner(instance); err != nil { + if err := poolMgr.DeleteRunner(instance, forceDelete); err != nil { return errors.Wrap(err, "removing runner") } return nil diff --git a/vendor/github.com/cloudbase/garm-provider-common/params/params.go b/vendor/github.com/cloudbase/garm-provider-common/params/params.go index 3bd2e7d4..95a6e6bb 100644 --- a/vendor/github.com/cloudbase/garm-provider-common/params/params.go +++ b/vendor/github.com/cloudbase/garm-provider-common/params/params.go @@ -39,14 +39,15 @@ const ( ) const ( - InstanceRunning InstanceStatus = "running" - InstanceStopped InstanceStatus = "stopped" - InstanceError InstanceStatus = "error" - InstancePendingDelete InstanceStatus = "pending_delete" - InstanceDeleting InstanceStatus = "deleting" - InstancePendingCreate InstanceStatus = "pending_create" - InstanceCreating InstanceStatus = "creating" - InstanceStatusUnknown InstanceStatus = "unknown" + InstanceRunning InstanceStatus = "running" + InstanceStopped InstanceStatus = "stopped" + InstanceError InstanceStatus = "error" + InstancePendingDelete InstanceStatus = "pending_delete" + InstancePendingForceDelete InstanceStatus = "pending_force_delete" + InstanceDeleting InstanceStatus = "deleting" + InstancePendingCreate InstanceStatus = "pending_create" + InstanceCreating InstanceStatus = "creating" + InstanceStatusUnknown InstanceStatus = "unknown" ) const ( diff --git a/vendor/modules.txt b/vendor/modules.txt index 098e3341..e2af7de0 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -14,7 +14,7 @@ github.com/cespare/xxhash/v2 # github.com/chzyer/readline v1.5.1 ## explicit; go 1.15 github.com/chzyer/readline -# github.com/cloudbase/garm-provider-common v0.1.0 +# github.com/cloudbase/garm-provider-common v0.1.1-0.20231012061429-49001794e700 ## explicit; go 1.20 github.com/cloudbase/garm-provider-common/cloudconfig github.com/cloudbase/garm-provider-common/defaults