diff --git a/cmd/garm-cli/cmd/controller.go b/cmd/garm-cli/cmd/controller.go index 2141121b..e74df56f 100644 --- a/cmd/garm-cli/cmd/controller.go +++ b/cmd/garm-cli/cmd/controller.go @@ -113,9 +113,13 @@ up the GARM controller URLs as: params.WebhookURL = &webhookURL } - if params.WebhookURL == nil && params.MetadataURL == nil && params.CallbackURL == nil { + if cmd.Flags().Changed("minimum-job-age-backoff") { + params.MinimumJobAgeBackoff = &minimumJobAgeBackoff + } + + if params.WebhookURL == nil && params.MetadataURL == nil && params.CallbackURL == nil && params.MinimumJobAgeBackoff == nil { cmd.Help() - return fmt.Errorf("at least one of metadata-url, callback-url or webhook-url must be provided") + return fmt.Errorf("at least one of minimum-job-age-backoff, metadata-url, callback-url or webhook-url must be provided") } updateUrlsReq := apiClientController.NewUpdateControllerParams() @@ -151,6 +155,7 @@ func renderControllerInfoTable(info params.ControllerInfo) string { t.AppendRow(table.Row{"Callback URL", info.CallbackURL}) t.AppendRow(table.Row{"Webhook Base URL", info.WebhookURL}) t.AppendRow(table.Row{"Controller Webhook URL", info.ControllerWebhookURL}) + t.AppendRow(table.Row{"Minimum Job Age Backoff", info.MinimumJobAgeBackoff}) return t.Render() } @@ -163,6 +168,7 @@ func init() { controllerUpdateCmd.Flags().StringVarP(&metadataURL, "metadata-url", "m", "", "The metadata URL for the controller (ie. https://garm.example.com/api/v1/metadata)") controllerUpdateCmd.Flags().StringVarP(&callbackURL, "callback-url", "c", "", "The callback URL for the controller (ie. https://garm.example.com/api/v1/callbacks)") controllerUpdateCmd.Flags().StringVarP(&webhookURL, "webhook-url", "w", "", "The webhook URL for the controller (ie. https://garm.example.com/webhooks)") + controllerUpdateCmd.Flags().UintVarP(&minimumJobAgeBackoff, "minimum-job-age-backoff", "b", 0, "The minimum job age backoff for the controller") controllerCmd.AddCommand( controllerShowCmd, diff --git a/cmd/garm-cli/cmd/init.go b/cmd/garm-cli/cmd/init.go index 7ed404b8..262b4baa 100644 --- a/cmd/garm-cli/cmd/init.go +++ b/cmd/garm-cli/cmd/init.go @@ -33,9 +33,10 @@ import ( ) var ( - callbackURL string - metadataURL string - webhookURL string + callbackURL string + metadataURL string + webhookURL string + minimumJobAgeBackoff uint ) // initCmd represents the init command diff --git a/database/sql/controller.go b/database/sql/controller.go index c5b900f3..ec6b3b5f 100644 --- a/database/sql/controller.go +++ b/database/sql/controller.go @@ -38,6 +38,7 @@ func dbControllerToCommonController(dbInfo ControllerInfo) (params.ControllerInf WebhookURL: dbInfo.WebhookBaseURL, ControllerWebhookURL: url, CallbackURL: dbInfo.CallbackURL, + MinimumJobAgeBackoff: dbInfo.MinimumJobAgeBackoff, }, nil } @@ -70,7 +71,8 @@ func (s *sqlDatabase) InitController() (params.ControllerInfo, error) { } newInfo := ControllerInfo{ - ControllerID: newID, + ControllerID: newID, + MinimumJobAgeBackoff: 30, } q := s.conn.Save(&newInfo) @@ -115,6 +117,10 @@ func (s *sqlDatabase) UpdateController(info params.UpdateControllerParams) (para dbInfo.WebhookBaseURL = *info.WebhookURL } + if info.MinimumJobAgeBackoff != nil { + dbInfo.MinimumJobAgeBackoff = *info.MinimumJobAgeBackoff + } + q = tx.Save(&dbInfo) if q.Error != nil { return errors.Wrap(q.Error, "saving controller info") diff --git a/database/sql/models.go b/database/sql/models.go index 172170b2..5486dd5b 100644 --- a/database/sql/models.go +++ b/database/sql/models.go @@ -211,6 +211,12 @@ type ControllerInfo struct { CallbackURL string MetadataURL string WebhookBaseURL string + // MinimumJobAgeBackoff is the minimum time that a job must be in the queue + // before GARM will attempt to allocate a runner to service it. This backoff + // is useful if you have idle runners in various pools that could potentially + // pick up the job. GARM would allow this amount of time for runners to react + // before spinning up a new one and potentially having to scale down later. + MinimumJobAgeBackoff uint } type WorkflowJob struct { diff --git a/database/sql/sql.go b/database/sql/sql.go index 937ef676..680b9115 100644 --- a/database/sql/sql.go +++ b/database/sql/sql.go @@ -407,6 +407,12 @@ func (s *sqlDatabase) migrateDB() error { if !s.conn.Migrator().HasTable(&GithubCredentials{}) || !s.conn.Migrator().HasTable(&GithubEndpoint{}) { needsCredentialMigration = true } + + var hasMinAgeField bool + if s.conn.Migrator().HasTable(&ControllerInfo{}) && s.conn.Migrator().HasColumn(&ControllerInfo{}, "minimum_job_age_backoff") { + hasMinAgeField = true + } + s.conn.Exec("PRAGMA foreign_keys = OFF") if err := s.conn.AutoMigrate( &User{}, @@ -427,6 +433,20 @@ func (s *sqlDatabase) migrateDB() error { } s.conn.Exec("PRAGMA foreign_keys = ON") + if !hasMinAgeField { + var controller ControllerInfo + if err := s.conn.First(&controller).Error; err != nil { + if !errors.Is(err, gorm.ErrRecordNotFound) { + return errors.Wrap(err, "updating controller info") + } + } else { + controller.MinimumJobAgeBackoff = 30 + if err := s.conn.Save(&controller).Error; err != nil { + return errors.Wrap(err, "updating controller info") + } + } + } + if err := s.ensureGithubEndpoint(); err != nil { return errors.Wrap(err, "ensuring github endpoint") } diff --git a/params/params.go b/params/params.go index e1ea8327..e7bdf869 100644 --- a/params/params.go +++ b/params/params.go @@ -382,20 +382,6 @@ func (p *Pool) HasRequiredLabels(set []string) bool { // used by swagger client generated code type Pools []Pool -type Internal struct { - ControllerID string `json:"controller_id"` - InstanceCallbackURL string `json:"instance_callback_url"` - InstanceMetadataURL string `json:"instance_metadata_url"` - BaseWebhookURL string `json:"base_webhook_url"` - ControllerWebhookURL string `json:"controller_webhook_url"` - - JWTSecret string `json:"jwt_secret"` - // GithubCredentialsDetails contains all info about the credentials, except the - // token, which is added above. - GithubCredentialsDetails GithubCredentials `json:"gh_creds_details"` - PoolBalancerType PoolBalancerType `json:"pool_balancing_type"` -} - type Repository struct { ID string `json:"id"` Owner string `json:"owner"` @@ -569,12 +555,45 @@ type JWTResponse struct { } type ControllerInfo struct { - ControllerID uuid.UUID `json:"controller_id"` - Hostname string `json:"hostname"` - MetadataURL string `json:"metadata_url"` - CallbackURL string `json:"callback_url"` - WebhookURL string `json:"webhook_url"` - ControllerWebhookURL string `json:"controller_webhook_url"` + // ControllerID is the unique ID of this controller. This ID gets generated + // automatically on controller init. + ControllerID uuid.UUID `json:"controller_id"` + // Hostname is the hostname of the machine that runs this controller. In the + // future, this field will be migrated to a separate table that will keep track + // of each the controller nodes that are part of a cluster. This will happen when + // we implement controller scale-out capability. + Hostname string `json:"hostname"` + // MetadataURL is the public metadata URL of the GARM instance. This URL is used + // by instances to fetch information they need to set themselves up. The URL itself + // may be made available to runners via a reverse proxy or a load balancer. That + // means that the user is responsible for telling GARM what the public URL is, by + // setting this field. + MetadataURL string `json:"metadata_url"` + // CallbackURL is the URL where instances can send updates back to the controller. + // This URL is used by instances to send status updates back to the controller. The + // URL itself may be made available to instances via a reverse proxy or a load balancer. + // That means that the user is responsible for telling GARM what the public URL is, by + // setting this field. + CallbackURL string `json:"callback_url"` + // WebhookURL is the base URL where the controller will receive webhooks from github. + // When webhook management is used, this URL is used as a base to which the controller + // UUID is appended and which will receive the webhooks. + // The URL itself may be made available to instances via a reverse proxy or a load balancer. + // That means that the user is responsible for telling GARM what the public URL is, by + // setting this field. + WebhookURL string `json:"webhook_url"` + // ControllerWebhookURL is the controller specific URL where webhooks will be received. + // This field holds the WebhookURL defined above to which we append the ControllerID. + // Functionally it is the same as WebhookURL, but it allows us to safely manage webhooks + // from GARM without accidentally removing webhooks from other services or GARM controllers. + ControllerWebhookURL string `json:"controller_webhook_url"` + // MinimumJobAgeBackoff is the minimum time in seconds that a job must be in queued state + // before GARM will attempt to allocate a runner for it. When set to a non zero value, + // GARM will ignore the job until the job's age is greater than this value. When using + // the min_idle_runners feature of a pool, this gives enough time for potential idle + // runners to pick up the job before GARM attempts to allocate a new runner, thus avoiding + // the need to potentially scale down runners later. + MinimumJobAgeBackoff uint `json:"minimum_job_age_backoff"` } type GithubCredentials struct { diff --git a/params/requests.go b/params/requests.go index 25d948ee..83694199 100644 --- a/params/requests.go +++ b/params/requests.go @@ -503,9 +503,10 @@ func (u UpdateGithubCredentialsParams) Validate() error { } type UpdateControllerParams struct { - MetadataURL *string `json:"metadata_url,omitempty"` - CallbackURL *string `json:"callback_url,omitempty"` - WebhookURL *string `json:"webhook_url,omitempty"` + MetadataURL *string `json:"metadata_url,omitempty"` + CallbackURL *string `json:"callback_url,omitempty"` + WebhookURL *string `json:"webhook_url,omitempty"` + MinimumJobAgeBackoff *uint `json:"minimum_job_age_backoff,omitempty"` } func (u UpdateControllerParams) Validate() error { diff --git a/runner/pool/pool.go b/runner/pool/pool.go index c08c1e9c..c78fffb1 100644 --- a/runner/pool/pool.go +++ b/runner/pool/pool.go @@ -1673,10 +1673,10 @@ func (r *basePoolManager) consumeQueuedJobs() error { continue } - if time.Since(job.UpdatedAt) < time.Second*30 { + if time.Since(job.UpdatedAt) < time.Second*time.Duration(r.controllerInfo.MinimumJobAgeBackoff) { // give the idle runners a chance to pick up the job. slog.DebugContext( - r.ctx, "job was updated less than 30 seconds ago. Skipping", + r.ctx, "job backoff not reached", "backoff_interval", r.controllerInfo.MinimumJobAgeBackoff, "job_id", job.ID) continue }