Add runner rotate ability to CLI

This change adds a new "generation" field to pools, scalesets and runners. The generation field is inherited by runners from scale sets or pools at the time of creation. The generation field on scalesets and pools is incremented when the pool or scale set is updated in a way that might influence how runners are created (flavor, image, specs, runner groups, etc). Using this new field, we can determine if existing runners have diverged from the settings of the pool/scale set that spawned them. In the CLI we now have a new set of commands available for both pools and scalesets that lists runners, with an optional --outdated flag and a new "rotate" flag that removes all idle runners. Optionally the --outdated flag can be passed to the rotate command to only remove outdated runners. Signed-off-by: Gabriel Adrian Samfira <gsamfira@cloudbasesolutions.com>
2026-02-08 23:48:57 +02:00 · 2026-02-08 23:48:57 +02:00 · 80e042ee88
commit 80e042ee88
parent 61b4b4cadd
27 changed files with 648 additions and 93 deletions
--- a/database/common/mocks/Store.go
+++ b/database/common/mocks/Store.go
@ -4445,9 +4445,9 @@ func (_c *Store_ListOrganizations_Call) RunAndReturn(run func(context.Context, p
 	return _c
 }

-// ListPoolInstances provides a mock function with given fields: ctx, poolID
-func (_m *Store) ListPoolInstances(ctx context.Context, poolID string) ([]params.Instance, error) {
-	ret := _m.Called(ctx, poolID)
+// ListPoolInstances provides a mock function with given fields: ctx, poolID, oudatedOnly
+func (_m *Store) ListPoolInstances(ctx context.Context, poolID string, oudatedOnly bool) ([]params.Instance, error) {
+	ret := _m.Called(ctx, poolID, oudatedOnly)

 	if len(ret) == 0 {
 		panic("no return value specified for ListPoolInstances")
@ -4455,19 +4455,19 @@ func (_m *Store) ListPoolInstances(ctx context.Context, poolID string) ([]params

 	var r0 []params.Instance
 	var r1 error
-	if rf, ok := ret.Get(0).(func(context.Context, string) ([]params.Instance, error)); ok {
-		return rf(ctx, poolID)
+	if rf, ok := ret.Get(0).(func(context.Context, string, bool) ([]params.Instance, error)); ok {
+		return rf(ctx, poolID, oudatedOnly)
 	}
-	if rf, ok := ret.Get(0).(func(context.Context, string) []params.Instance); ok {
-		r0 = rf(ctx, poolID)
+	if rf, ok := ret.Get(0).(func(context.Context, string, bool) []params.Instance); ok {
+		r0 = rf(ctx, poolID, oudatedOnly)
 	} else {
 		if ret.Get(0) != nil {
 			r0 = ret.Get(0).([]params.Instance)
 		}
 	}

-	if rf, ok := ret.Get(1).(func(context.Context, string) error); ok {
-		r1 = rf(ctx, poolID)
+	if rf, ok := ret.Get(1).(func(context.Context, string, bool) error); ok {
+		r1 = rf(ctx, poolID, oudatedOnly)
 	} else {
 		r1 = ret.Error(1)
 	}
@ -4483,13 +4483,14 @@ type Store_ListPoolInstances_Call struct {
 // ListPoolInstances is a helper method to define mock.On call
 //   - ctx context.Context
 //   - poolID string
-func (_e *Store_Expecter) ListPoolInstances(ctx interface{}, poolID interface{}) *Store_ListPoolInstances_Call {
-	return &Store_ListPoolInstances_Call{Call: _e.mock.On("ListPoolInstances", ctx, poolID)}
+//   - oudatedOnly bool
+func (_e *Store_Expecter) ListPoolInstances(ctx interface{}, poolID interface{}, oudatedOnly interface{}) *Store_ListPoolInstances_Call {
+	return &Store_ListPoolInstances_Call{Call: _e.mock.On("ListPoolInstances", ctx, poolID, oudatedOnly)}
 }

-func (_c *Store_ListPoolInstances_Call) Run(run func(ctx context.Context, poolID string)) *Store_ListPoolInstances_Call {
+func (_c *Store_ListPoolInstances_Call) Run(run func(ctx context.Context, poolID string, oudatedOnly bool)) *Store_ListPoolInstances_Call {
 	_c.Call.Run(func(args mock.Arguments) {
-		run(args[0].(context.Context), args[1].(string))
+		run(args[0].(context.Context), args[1].(string), args[2].(bool))
 	})
 	return _c
 }
@ -4499,7 +4500,7 @@ func (_c *Store_ListPoolInstances_Call) Return(_a0 []params.Instance, _a1 error)
 	return _c
 }

-func (_c *Store_ListPoolInstances_Call) RunAndReturn(run func(context.Context, string) ([]params.Instance, error)) *Store_ListPoolInstances_Call {
+func (_c *Store_ListPoolInstances_Call) RunAndReturn(run func(context.Context, string, bool) ([]params.Instance, error)) *Store_ListPoolInstances_Call {
 	_c.Call.Return(run)
 	return _c
 }
@ -4563,9 +4564,9 @@ func (_c *Store_ListRepositories_Call) RunAndReturn(run func(context.Context, pa
 	return _c
 }

-// ListScaleSetInstances provides a mock function with given fields: _a0, scalesetID
-func (_m *Store) ListScaleSetInstances(_a0 context.Context, scalesetID uint) ([]params.Instance, error) {
-	ret := _m.Called(_a0, scalesetID)
+// ListScaleSetInstances provides a mock function with given fields: _a0, scalesetID, outdatedOnly
+func (_m *Store) ListScaleSetInstances(_a0 context.Context, scalesetID uint, outdatedOnly bool) ([]params.Instance, error) {
+	ret := _m.Called(_a0, scalesetID, outdatedOnly)

 	if len(ret) == 0 {
 		panic("no return value specified for ListScaleSetInstances")
@ -4573,19 +4574,19 @@ func (_m *Store) ListScaleSetInstances(_a0 context.Context, scalesetID uint) ([]

 	var r0 []params.Instance
 	var r1 error
-	if rf, ok := ret.Get(0).(func(context.Context, uint) ([]params.Instance, error)); ok {
-		return rf(_a0, scalesetID)
+	if rf, ok := ret.Get(0).(func(context.Context, uint, bool) ([]params.Instance, error)); ok {
+		return rf(_a0, scalesetID, outdatedOnly)
 	}
-	if rf, ok := ret.Get(0).(func(context.Context, uint) []params.Instance); ok {
-		r0 = rf(_a0, scalesetID)
+	if rf, ok := ret.Get(0).(func(context.Context, uint, bool) []params.Instance); ok {
+		r0 = rf(_a0, scalesetID, outdatedOnly)
 	} else {
 		if ret.Get(0) != nil {
 			r0 = ret.Get(0).([]params.Instance)
 		}
 	}

-	if rf, ok := ret.Get(1).(func(context.Context, uint) error); ok {
-		r1 = rf(_a0, scalesetID)
+	if rf, ok := ret.Get(1).(func(context.Context, uint, bool) error); ok {
+		r1 = rf(_a0, scalesetID, outdatedOnly)
 	} else {
 		r1 = ret.Error(1)
 	}
@ -4601,13 +4602,14 @@ type Store_ListScaleSetInstances_Call struct {
 // ListScaleSetInstances is a helper method to define mock.On call
 //   - _a0 context.Context
 //   - scalesetID uint
-func (_e *Store_Expecter) ListScaleSetInstances(_a0 interface{}, scalesetID interface{}) *Store_ListScaleSetInstances_Call {
-	return &Store_ListScaleSetInstances_Call{Call: _e.mock.On("ListScaleSetInstances", _a0, scalesetID)}
+//   - outdatedOnly bool
+func (_e *Store_Expecter) ListScaleSetInstances(_a0 interface{}, scalesetID interface{}, outdatedOnly interface{}) *Store_ListScaleSetInstances_Call {
+	return &Store_ListScaleSetInstances_Call{Call: _e.mock.On("ListScaleSetInstances", _a0, scalesetID, outdatedOnly)}
 }

-func (_c *Store_ListScaleSetInstances_Call) Run(run func(_a0 context.Context, scalesetID uint)) *Store_ListScaleSetInstances_Call {
+func (_c *Store_ListScaleSetInstances_Call) Run(run func(_a0 context.Context, scalesetID uint, outdatedOnly bool)) *Store_ListScaleSetInstances_Call {
 	_c.Call.Run(func(args mock.Arguments) {
-		run(args[0].(context.Context), args[1].(uint))
+		run(args[0].(context.Context), args[1].(uint), args[2].(bool))
 	})
 	return _c
 }
@ -4617,7 +4619,7 @@ func (_c *Store_ListScaleSetInstances_Call) Return(_a0 []params.Instance, _a1 er
 	return _c
 }

-func (_c *Store_ListScaleSetInstances_Call) RunAndReturn(run func(context.Context, uint) ([]params.Instance, error)) *Store_ListScaleSetInstances_Call {
+func (_c *Store_ListScaleSetInstances_Call) RunAndReturn(run func(context.Context, uint, bool) ([]params.Instance, error)) *Store_ListScaleSetInstances_Call {
 	_c.Call.Return(run)
 	return _c
 }
--- a/database/common/store.go
+++ b/database/common/store.go
@ -75,7 +75,7 @@ type PoolStore interface {
 	GetPoolByID(ctx context.Context, poolID string) (params.Pool, error)
 	DeletePoolByID(ctx context.Context, poolID string) error

-	ListPoolInstances(ctx context.Context, poolID string) ([]params.Instance, error)
+	ListPoolInstances(ctx context.Context, poolID string, oudatedOnly bool) ([]params.Instance, error)

 	PoolInstanceCount(ctx context.Context, poolID string) (int64, error)
 	FindPoolsMatchingAllTags(ctx context.Context, entityType params.ForgeEntityType, entityID string, tags []string) ([]params.Pool, error)
@ -152,7 +152,7 @@ type ScaleSetsStore interface {
 }

 type ScaleSetInstanceStore interface {
-	ListScaleSetInstances(_ context.Context, scalesetID uint) ([]params.Instance, error)
+	ListScaleSetInstances(_ context.Context, scalesetID uint, outdatedOnly bool) ([]params.Instance, error)
 	CreateScaleSetInstance(_ context.Context, scaleSetID uint, param params.CreateInstanceParams) (instance params.Instance, err error)
 }

--- a/database/sql/instances.go
+++ b/database/sql/instances.go
@ -90,6 +90,7 @@ func (s *sqlDatabase) CreateInstance(ctx context.Context, poolID string, param p
 			JitConfiguration:  secret,
 			AditionalLabels:   labels,
 			AgentID:           param.AgentID,
+			Generation:        param.Generation,
 		}
 		q = tx.Create(&newInstance)
 		if q.Error != nil {
@ -511,14 +512,18 @@ func (s *sqlDatabase) listInstancesBatched(queryModifier func(*gorm.DB) *gorm.DB
 	return ret, err
 }

-func (s *sqlDatabase) ListPoolInstances(_ context.Context, poolID string) ([]params.Instance, error) {
+func (s *sqlDatabase) ListPoolInstances(_ context.Context, poolID string, outdatedOnly bool) ([]params.Instance, error) {
 	u, err := uuid.Parse(poolID)
 	if err != nil {
 		return nil, fmt.Errorf("error parsing id: %w", runnerErrors.ErrBadRequest)
 	}

 	ret, err := s.listInstancesBatched(func(query *gorm.DB) *gorm.DB {
-		return query.Where("pool_id = ?", u)
+		q := query.Where("pool_id = ?", u)
+		if outdatedOnly {
+			q = q.Where("instances.generation < (SELECT pools.generation FROM pools WHERE pools.id = instances.pool_id)")
+		}
+		return q
 	})
 	if err != nil {
 		return nil, fmt.Errorf("failed to list pool instances: %w", err)
@ -527,7 +532,7 @@ func (s *sqlDatabase) ListPoolInstances(_ context.Context, poolID string) ([]par
 }

 func (s *sqlDatabase) ListAllInstances(_ context.Context) ([]params.Instance, error) {
-	ret, err := s.listInstancesBatched(nil) // No query modifier for all instances
+	ret, err := s.listInstancesBatched(nil)
 	if err != nil {
 		return nil, fmt.Errorf("failed to list all instances: %w", err)
 	}
--- a/database/sql/instances_test.go
+++ b/database/sql/instances_test.go
@ -668,14 +668,14 @@ func (s *InstancesTestSuite) TestUpdateInstanceDBUpdateAddressErr() {
 }

 func (s *InstancesTestSuite) TestListPoolInstances() {
-	instances, err := s.Store.ListPoolInstances(s.adminCtx, s.Fixtures.Pool.ID)
+	instances, err := s.Store.ListPoolInstances(s.adminCtx, s.Fixtures.Pool.ID, false)

 	s.Require().Nil(err)
 	s.equalInstancesByName(s.Fixtures.Instances, instances)
 }

 func (s *InstancesTestSuite) TestListPoolInstancesInvalidPoolID() {
-	_, err := s.Store.ListPoolInstances(s.adminCtx, "dummy-pool-id")
+	_, err := s.Store.ListPoolInstances(s.adminCtx, "dummy-pool-id", false)

 	s.Require().Equal("error parsing id: invalid request", err.Error())
 }
--- a/database/sql/models.go
+++ b/database/sql/models.go
@ -121,6 +121,14 @@ type Pool struct {
 	GitHubRunnerGroup string
 	EnableShell       bool

+	// Generation holds the numeric generation of the pool. This number
+	// will be incremented, every time certain settings of the pool, which
+	// may influence how runners are created (flavor, specs, image) are changed.
+	// When a runner is created, this generation will be copied to the runners as
+	// well. That way if some settings diverge, we can target those runners
+	// to be recreated.
+	Generation uint64
+
 	RepoID     *uuid.UUID `gorm:"index"`
 	Repository Repository `gorm:"foreignKey:RepoID;"`

@ -178,6 +186,13 @@ type ScaleSet struct {
 	ExtraSpecs  datatypes.JSON
 	EnableShell bool

+	// Generation is the scaleset generation at the time of creating this instance.
+	// This field is to track a divergence between when the instance was created
+	// and the settings currently set on a scaleset. We can then use this field to know
+	// if the instance is out of date with the scaleset, allowing us to remove it if we
+	// need to.
+	Generation uint64
+
 	RepoID     *uuid.UUID `gorm:"index"`
 	Repository Repository `gorm:"foreignKey:RepoID;"`

@ -336,6 +351,12 @@ type Instance struct {
 	GitHubRunnerGroup string
 	AditionalLabels   datatypes.JSON
 	Capabilities      datatypes.JSON
+	// Generation is the pool generation at the time of creating this instance.
+	// This field is to track a divergence between when the instance was created
+	// and the settings currently set on a pool. We can then use this field to know
+	// if the instance is out of date with the pool, allowing us to remove it if we
+	// need to.
+	Generation uint64

 	PoolID *uuid.UUID
 	Pool   Pool `gorm:"foreignKey:PoolID"`
--- a/database/sql/pools_test.go
+++ b/database/sql/pools_test.go
@ -150,7 +150,7 @@ func (s *PoolsTestSuite) TestListAllPools() {

 func (s *PoolsTestSuite) TestListAllPoolsDBFetchErr() {
 	s.Fixtures.SQLMock.
-		ExpectQuery(regexp.QuoteMeta("SELECT `pools`.`id`,`pools`.`created_at`,`pools`.`updated_at`,`pools`.`deleted_at`,`pools`.`provider_name`,`pools`.`runner_prefix`,`pools`.`max_runners`,`pools`.`min_idle_runners`,`pools`.`runner_bootstrap_timeout`,`pools`.`image`,`pools`.`flavor`,`pools`.`os_type`,`pools`.`os_arch`,`pools`.`enabled`,`pools`.`git_hub_runner_group`,`pools`.`enable_shell`,`pools`.`repo_id`,`pools`.`org_id`,`pools`.`enterprise_id`,`pools`.`template_id`,`pools`.`priority` FROM `pools` WHERE `pools`.`deleted_at` IS NULL")).
+		ExpectQuery(regexp.QuoteMeta("SELECT `pools`.`id`,`pools`.`created_at`,`pools`.`updated_at`,`pools`.`deleted_at`,`pools`.`provider_name`,`pools`.`runner_prefix`,`pools`.`max_runners`,`pools`.`min_idle_runners`,`pools`.`runner_bootstrap_timeout`,`pools`.`image`,`pools`.`flavor`,`pools`.`os_type`,`pools`.`os_arch`,`pools`.`enabled`,`pools`.`git_hub_runner_group`,`pools`.`enable_shell`,`pools`.`generation`,`pools`.`repo_id`,`pools`.`org_id`,`pools`.`enterprise_id`,`pools`.`template_id`,`pools`.`priority` FROM `pools` WHERE `pools`.`deleted_at` IS NULL")).
 		WillReturnError(fmt.Errorf("mocked fetching all pools error"))

 	_, err := s.StoreSQLMocked.ListAllPools(s.adminCtx)
--- a/database/sql/scaleset_instances.go
+++ b/database/sql/scaleset_instances.go
@ -65,9 +65,13 @@ func (s *sqlDatabase) CreateScaleSetInstance(_ context.Context, scaleSetID uint,
 	return s.sqlToParamsInstance(newInstance)
 }

-func (s *sqlDatabase) ListScaleSetInstances(_ context.Context, scalesetID uint) ([]params.Instance, error) {
+func (s *sqlDatabase) ListScaleSetInstances(_ context.Context, scalesetID uint, outdatedOnly bool) ([]params.Instance, error) {
 	ret, err := s.listInstancesBatched(func(query *gorm.DB) *gorm.DB {
-		return query.Where("scale_set_fk_id = ?", scalesetID)
+		q := query.Where("scale_set_fk_id = ?", scalesetID)
+		if outdatedOnly {
+			q = q.Where("instances.generation < (SELECT scale_sets.generation FROM scale_sets WHERE scale_sets.id = instances.scale_set_fk_id)")
+		}
+		return q
 	})
 	if err != nil {
 		return nil, fmt.Errorf("failed to list scaleset instances: %w", err)
--- a/database/sql/scalesets.go
+++ b/database/sql/scalesets.go
@ -284,6 +284,7 @@ func (s *sqlDatabase) getEntityScaleSet(tx *gorm.DB, entityType params.ForgeEnti
 }

 func (s *sqlDatabase) updateScaleSet(tx *gorm.DB, scaleSet ScaleSet, param params.UpdateScaleSetParams) (params.ScaleSet, error) {
+	incrementGeneration := false
 	if param.Enabled != nil && scaleSet.Enabled != *param.Enabled {
 		scaleSet.Enabled = *param.Enabled
 	}
@ -306,6 +307,7 @@ func (s *sqlDatabase) updateScaleSet(tx *gorm.DB, scaleSet ScaleSet, param param

 	if param.EnableShell != nil {
 		scaleSet.EnableShell = *param.EnableShell
+		incrementGeneration = true
 	}

 	if param.Name != "" {
@ -314,14 +316,17 @@ func (s *sqlDatabase) updateScaleSet(tx *gorm.DB, scaleSet ScaleSet, param param

 	if param.GitHubRunnerGroup != nil && *param.GitHubRunnerGroup != "" {
 		scaleSet.GitHubRunnerGroup = *param.GitHubRunnerGroup
+		incrementGeneration = true
 	}

 	if param.Flavor != "" {
 		scaleSet.Flavor = param.Flavor
+		incrementGeneration = true
 	}

 	if param.Image != "" {
 		scaleSet.Image = param.Image
+		incrementGeneration = true
 	}

 	if param.Prefix != "" {
@ -338,22 +343,25 @@ func (s *sqlDatabase) updateScaleSet(tx *gorm.DB, scaleSet ScaleSet, param param

 	if param.OSArch != "" {
 		scaleSet.OSArch = param.OSArch
+		incrementGeneration = true
 	}

 	if param.OSType != "" {
 		scaleSet.OSType = param.OSType
+		incrementGeneration = true
 	}

 	if param.ExtraSpecs != nil {
 		scaleSet.ExtraSpecs = datatypes.JSON(param.ExtraSpecs)
+		incrementGeneration = true
 	}

 	if param.RunnerBootstrapTimeout != nil && *param.RunnerBootstrapTimeout > 0 {
 		scaleSet.RunnerBootstrapTimeout = *param.RunnerBootstrapTimeout
 	}

-	if param.GitHubRunnerGroup != nil {
-		scaleSet.GitHubRunnerGroup = *param.GitHubRunnerGroup
+	if incrementGeneration {
+		scaleSet.Generation++
 	}

 	if q := tx.Save(&scaleSet); q.Error != nil {
--- a/database/sql/scalesets_test.go
+++ b/database/sql/scalesets_test.go
@ -356,7 +356,7 @@ func (s *ScaleSetsTestSuite) TestScaleSetOperations() {
 	})

 	s.T().Run("List repo scale set instances", func(_ *testing.T) {
-		instances, err := s.Store.ListScaleSetInstances(s.adminCtx, repoScaleSet.ID)
+		instances, err := s.Store.ListScaleSetInstances(s.adminCtx, repoScaleSet.ID, false)
 		s.Require().NoError(err)
 		s.Require().NotEmpty(instances)
 		s.Require().Len(instances, 1)
--- a/database/sql/util.go
+++ b/database/sql/util.go
@ -75,6 +75,7 @@ func (s *sqlDatabase) sqlToParamsInstance(instance Instance) (params.Instance, e
 		GitHubRunnerGroup: instance.GitHubRunnerGroup,
 		AditionalLabels:   labels,
 		Heartbeat:         instance.Heartbeat,
+		Generation:        instance.Generation,
 	}

 	if len(instance.Capabilities) > 0 {
@ -299,6 +300,7 @@ func (s *sqlDatabase) sqlToCommonPool(pool Pool) (params.Pool, error) {
 		CreatedAt:              pool.CreatedAt,
 		UpdatedAt:              pool.UpdatedAt,
 		EnableShell:            pool.EnableShell,
+		Generation:             pool.Generation,
 	}

 	if pool.TemplateID != nil && *pool.TemplateID != 0 {
@ -376,6 +378,7 @@ func (s *sqlDatabase) sqlToCommonScaleSet(scaleSet ScaleSet) (params.ScaleSet, e
 		LastMessageID:          scaleSet.LastMessageID,
 		DesiredRunnerCount:     scaleSet.DesiredRunnerCount,
 		EnableShell:            scaleSet.EnableShell,
+		Generation:             scaleSet.Generation,
 	}

 	if scaleSet.TemplateID != nil && *scaleSet.TemplateID != 0 {
@ -539,20 +542,24 @@ func (s *sqlDatabase) getOrCreateTag(tx *gorm.DB, tagName string) (Tag, error) {
 }

 func (s *sqlDatabase) updatePool(tx *gorm.DB, pool Pool, param params.UpdatePoolParams) (params.Pool, error) {
+	incrementGeneration := false
 	if param.Enabled != nil && pool.Enabled != *param.Enabled {
 		pool.Enabled = *param.Enabled
 	}

 	if param.Flavor != "" {
 		pool.Flavor = param.Flavor
+		incrementGeneration = true
 	}

 	if param.EnableShell != nil {
 		pool.EnableShell = *param.EnableShell
+		incrementGeneration = true
 	}

 	if param.Image != "" {
 		pool.Image = param.Image
+		incrementGeneration = true
 	}

 	if param.Prefix != "" {
@ -573,14 +580,17 @@ func (s *sqlDatabase) updatePool(tx *gorm.DB, pool Pool, param params.UpdatePool

 	if param.OSArch != "" {
 		pool.OSArch = param.OSArch
+		incrementGeneration = true
 	}

 	if param.OSType != "" {
 		pool.OSType = param.OSType
+		incrementGeneration = true
 	}

 	if param.ExtraSpecs != nil {
 		pool.ExtraSpecs = datatypes.JSON(param.ExtraSpecs)
+		incrementGeneration = true
 	}

 	if param.RunnerBootstrapTimeout != nil && *param.RunnerBootstrapTimeout > 0 {
@ -589,12 +599,17 @@ func (s *sqlDatabase) updatePool(tx *gorm.DB, pool Pool, param params.UpdatePool

 	if param.GitHubRunnerGroup != nil {
 		pool.GitHubRunnerGroup = *param.GitHubRunnerGroup
+		incrementGeneration = true
 	}

 	if param.Priority != nil {
 		pool.Priority = *param.Priority
 	}

+	if incrementGeneration {
+		pool.Generation++
+	}
+
 	if q := tx.Save(&pool); q.Error != nil {
 		return params.Pool{}, fmt.Errorf("error saving database entry: %w", q.Error)
 	}