Force transition instances in inconsistent state

If GARM is killed or restarted while creating a runner, there is a chance
that runners remain in creating or deleting state. We've started checking
state transitions in GARM and allow a transition when the new state makes
sense in normal circumstances. However, when recovering from a crash, we
may be in an inconsisten state from which we need to recover.

This change added a ForceUpdateInstance() function that ignores state
transition inconsistencies. For now, we only use it when spinning up a
scale set and check for instance states.

This change also fixes a locking issue.

Signed-off-by: Gabriel Adrian Samfira <gsamfira@cloudbasesolutions.com>
This commit is contained in:
Gabriel Adrian Samfira 2026-02-11 20:41:05 +02:00 committed by Gabriel
parent e2d5526c5c
commit c7d34e0f0e
3 changed files with 34 additions and 14 deletions

View file

@ -395,7 +395,15 @@ func (s *sqlDatabase) applyInstanceUpdates(instance *Instance, param params.Upda
return nil
}
func (s *sqlDatabase) ForceUpdateInstance(ctx context.Context, instanceName string, param params.UpdateInstanceParams) (params.Instance, error) {
return s.updateInstance(ctx, instanceName, param, true)
}
func (s *sqlDatabase) UpdateInstance(ctx context.Context, instanceName string, param params.UpdateInstanceParams) (params.Instance, error) {
return s.updateInstance(ctx, instanceName, param, false)
}
func (s *sqlDatabase) updateInstance(ctx context.Context, instanceName string, param params.UpdateInstanceParams, force bool) (params.Instance, error) {
var rowsAffected int64
err := s.conn.Transaction(func(tx *gorm.DB) error {
instance, err := s.getInstance(ctx, tx, instanceName, "Pool", "ScaleSet")
@ -403,15 +411,18 @@ func (s *sqlDatabase) UpdateInstance(ctx context.Context, instanceName string, p
return fmt.Errorf("error updating instance: %w", err)
}
// Validate transitions
if err := s.validateAgentID(instance.AgentID, param.AgentID); err != nil {
return err
}
if err := s.validateRunnerStatusTransition(instance.RunnerStatus, param.RunnerStatus); err != nil {
return err
}
if err := s.validateInstanceStatusTransition(instance.Status, param.Status); err != nil {
return err
if !force {
// Validate transitions
if err := s.validateAgentID(instance.AgentID, param.AgentID); err != nil {
return err
}
if err := s.validateRunnerStatusTransition(instance.RunnerStatus, param.RunnerStatus); err != nil {
return err
}
if err := s.validateInstanceStatusTransition(instance.Status, param.Status); err != nil {
return err
}
}
// Apply updates