// ABOUTME: Resource management for EdgeConnect apply command with deployment execution and rollback // ABOUTME: Handles actual deployment operations, manifest processing, and error recovery with parallel execution package v2 import ( "context" "errors" "fmt" "strings" "time" "edp.buildth.ing/DevFW-CICD/edge-connect-client/v2/internal/config" v2 "edp.buildth.ing/DevFW-CICD/edge-connect-client/v2/sdk/edgeconnect/v2" ) // ResourceManagerInterface defines the interface for resource management type ResourceManagerInterface interface { // ApplyDeployment executes a deployment plan ApplyDeployment(ctx context.Context, plan *DeploymentPlan, config *config.EdgeConnectConfig, manifestContent string) (*ExecutionResult, error) // RollbackDeployment attempts to rollback a failed deployment RollbackDeployment(ctx context.Context, result *ExecutionResult) error // ValidatePrerequisites checks if deployment prerequisites are met ValidatePrerequisites(ctx context.Context, plan *DeploymentPlan) error } // EdgeConnectResourceManager implements resource management for EdgeConnect type EdgeConnectResourceManager struct { client EdgeConnectClientInterface parallelLimit int rollbackOnFail bool logger Logger strategyConfig StrategyConfig } // Logger interface for deployment logging type Logger interface { Printf(format string, v ...interface{}) } // ResourceManagerOptions configures the resource manager behavior type ResourceManagerOptions struct { // ParallelLimit controls how many operations run concurrently ParallelLimit int // RollbackOnFail automatically rolls back on deployment failure RollbackOnFail bool // Logger for deployment operations Logger Logger // Timeout for individual operations OperationTimeout time.Duration // StrategyConfig for deployment strategies StrategyConfig StrategyConfig } // DefaultResourceManagerOptions returns sensible defaults func DefaultResourceManagerOptions() ResourceManagerOptions { return ResourceManagerOptions{ ParallelLimit: 5, // Conservative parallel limit RollbackOnFail: true, OperationTimeout: 2 * time.Minute, StrategyConfig: DefaultStrategyConfig(), } } // NewResourceManager creates a new EdgeConnect resource manager func NewResourceManager(client EdgeConnectClientInterface, opts ...func(*ResourceManagerOptions)) ResourceManagerInterface { options := DefaultResourceManagerOptions() for _, opt := range opts { opt(&options) } return &EdgeConnectResourceManager{ client: client, parallelLimit: options.ParallelLimit, rollbackOnFail: options.RollbackOnFail, logger: options.Logger, strategyConfig: options.StrategyConfig, } } // WithParallelLimit sets the parallel execution limit func WithParallelLimit(limit int) func(*ResourceManagerOptions) { return func(opts *ResourceManagerOptions) { opts.ParallelLimit = limit } } // WithRollbackOnFail enables/disables automatic rollback func WithRollbackOnFail(rollback bool) func(*ResourceManagerOptions) { return func(opts *ResourceManagerOptions) { opts.RollbackOnFail = rollback } } // WithLogger sets a logger for deployment operations func WithLogger(logger Logger) func(*ResourceManagerOptions) { return func(opts *ResourceManagerOptions) { opts.Logger = logger } } // WithStrategyConfig sets the strategy configuration func WithStrategyConfig(config StrategyConfig) func(*ResourceManagerOptions) { return func(opts *ResourceManagerOptions) { opts.StrategyConfig = config } } // ApplyDeployment executes a deployment plan using deployment strategies func (rm *EdgeConnectResourceManager) ApplyDeployment(ctx context.Context, plan *DeploymentPlan, config *config.EdgeConnectConfig, manifestContent string) (*ExecutionResult, error) { rm.logf("Starting deployment: %s", plan.ConfigName) // Step 1: Validate prerequisites if err := rm.ValidatePrerequisites(ctx, plan); err != nil { result := &ExecutionResult{ Plan: plan, CompletedActions: []ActionResult{}, FailedActions: []ActionResult{}, Error: fmt.Errorf("prerequisites validation failed: %w", err), Duration: 0, } return result, err } // Step 2: Determine deployment strategy strategyName := DeploymentStrategy(config.Spec.GetDeploymentStrategy()) rm.logf("Using deployment strategy: %s", strategyName) // Step 3: Create strategy executor strategyConfig := rm.strategyConfig strategyConfig.ParallelOperations = rm.parallelLimit > 1 factory := NewStrategyFactory(rm.client, strategyConfig, rm.logger) strategy, err := factory.CreateStrategy(strategyName) if err != nil { result := &ExecutionResult{ Plan: plan, CompletedActions: []ActionResult{}, FailedActions: []ActionResult{}, Error: fmt.Errorf("failed to create deployment strategy: %w", err), Duration: 0, } return result, err } // Step 4: Validate strategy can handle this deployment if err := strategy.Validate(plan); err != nil { result := &ExecutionResult{ Plan: plan, CompletedActions: []ActionResult{}, FailedActions: []ActionResult{}, Error: fmt.Errorf("strategy validation failed: %w", err), Duration: 0, } return result, err } // Step 5: Execute the deployment strategy rm.logf("Estimated deployment duration: %v", strategy.EstimateDuration(plan)) result, err := strategy.Execute(ctx, plan, config, manifestContent) // Step 6: Handle rollback if needed if err != nil && rm.rollbackOnFail && result != nil { rm.logf("Deployment failed, attempting rollback...") if rollbackErr := rm.RollbackDeployment(ctx, result); rollbackErr != nil { rm.logf("Rollback failed: %v", rollbackErr) } else { result.RollbackPerformed = true result.RollbackSuccess = true } } if result != nil && result.Success { rm.logf("Deployment completed successfully in %v", result.Duration) } return result, err } // ValidatePrerequisites checks if deployment prerequisites are met func (rm *EdgeConnectResourceManager) ValidatePrerequisites(ctx context.Context, plan *DeploymentPlan) error { rm.logf("Validating deployment prerequisites for: %s", plan.ConfigName) // Check if we have any actions to perform if plan.IsEmpty() { return fmt.Errorf("deployment plan is empty - no actions to perform") } // Validate that we have required client capabilities if rm.client == nil { return fmt.Errorf("EdgeConnect client is not configured") } rm.logf("Prerequisites validation passed") return nil } // RollbackDeployment attempts to rollback a failed deployment func (rm *EdgeConnectResourceManager) RollbackDeployment(ctx context.Context, result *ExecutionResult) error { rm.logf("Starting rollback for deployment: %s", result.Plan.ConfigName) rollbackErrors := []error{} // Phase 1: Delete resources that were created in this deployment attempt (in reverse order) rm.logf("Phase 1: Rolling back created resources") for i := len(result.CompletedActions) - 1; i >= 0; i-- { action := result.CompletedActions[i] switch action.Type { case ActionCreate: if err := rm.rollbackCreateAction(ctx, action, result.Plan); err != nil { rollbackErrors = append(rollbackErrors, fmt.Errorf("failed to rollback %s: %w", action.Target, err)) } else { rm.logf("Successfully rolled back: %s", action.Target) } } } // Phase 2: Restore resources that were deleted before the failed deployment // This is critical for RecreateStrategy which deletes everything before recreating if result.DeletedAppBackup != nil || len(result.DeletedInstancesBackup) > 0 { rm.logf("Phase 2: Restoring deleted resources") // Restore app first (must exist before instances can be created) if result.DeletedAppBackup != nil { if err := rm.restoreApp(ctx, result.DeletedAppBackup); err != nil { rollbackErrors = append(rollbackErrors, fmt.Errorf("failed to restore app: %w", err)) rm.logf("Failed to restore app: %v", err) } else { rm.logf("Successfully restored app: %s", result.DeletedAppBackup.App.Key.Name) } } // Restore instances for _, backup := range result.DeletedInstancesBackup { if err := rm.restoreInstance(ctx, &backup); err != nil { rollbackErrors = append(rollbackErrors, fmt.Errorf("failed to restore instance %s: %w", backup.Instance.Key.Name, err)) rm.logf("Failed to restore instance %s: %v", backup.Instance.Key.Name, err) } else { rm.logf("Successfully restored instance: %s", backup.Instance.Key.Name) } } } if len(rollbackErrors) > 0 { return fmt.Errorf("rollback encountered %d errors: %v", len(rollbackErrors), rollbackErrors) } rm.logf("Rollback completed successfully") return nil } // rollbackCreateAction rolls back a CREATE action by deleting the resource func (rm *EdgeConnectResourceManager) rollbackCreateAction(ctx context.Context, action ActionResult, plan *DeploymentPlan) error { if action.Type != ActionCreate { return nil } // Determine if this is an app or instance rollback based on the target name isInstance := false for _, instanceAction := range plan.InstanceActions { if instanceAction.InstanceName == action.Target { isInstance = true break } } if isInstance { return rm.rollbackInstance(ctx, action, plan) } else { return rm.rollbackApp(ctx, action, plan) } } // rollbackApp deletes an application that was created func (rm *EdgeConnectResourceManager) rollbackApp(ctx context.Context, action ActionResult, plan *DeploymentPlan) error { appKey := v2.AppKey{ Organization: plan.AppAction.Desired.Organization, Name: plan.AppAction.Desired.Name, Version: plan.AppAction.Desired.Version, } return rm.client.DeleteApp(ctx, appKey, plan.AppAction.Desired.Region) } // rollbackInstance deletes an instance that was created func (rm *EdgeConnectResourceManager) rollbackInstance(ctx context.Context, action ActionResult, plan *DeploymentPlan) error { // Find the instance action to get the details for _, instanceAction := range plan.InstanceActions { if instanceAction.InstanceName == action.Target { instanceKey := v2.AppInstanceKey{ Organization: plan.AppAction.Desired.Organization, Name: instanceAction.InstanceName, CloudletKey: v2.CloudletKey{ Organization: instanceAction.Target.CloudletOrg, Name: instanceAction.Target.CloudletName, }, } return rm.client.DeleteAppInstance(ctx, instanceKey, instanceAction.Target.Region) } } return fmt.Errorf("instance action not found for rollback: %s", action.Target) } // restoreApp recreates an app that was deleted during deployment func (rm *EdgeConnectResourceManager) restoreApp(ctx context.Context, backup *AppBackup) error { rm.logf("Restoring app: %s/%s version %s", backup.App.Key.Organization, backup.App.Key.Name, backup.App.Key.Version) // Build a clean app input with only creation-safe fields // We must exclude read-only fields like CreatedAt, UpdatedAt, etc. appInput := &v2.NewAppInput{ Region: backup.Region, App: v2.App{ Key: backup.App.Key, Deployment: backup.App.Deployment, ImageType: backup.App.ImageType, ImagePath: backup.App.ImagePath, AllowServerless: backup.App.AllowServerless, DefaultFlavor: backup.App.DefaultFlavor, ServerlessConfig: backup.App.ServerlessConfig, DeploymentManifest: backup.App.DeploymentManifest, DeploymentGenerator: backup.App.DeploymentGenerator, RequiredOutboundConnections: backup.App.RequiredOutboundConnections, // Explicitly omit read-only fields like CreatedAt, UpdatedAt, Fields, etc. }, } if err := rm.client.CreateApp(ctx, appInput); err != nil { return fmt.Errorf("failed to restore app: %w", err) } rm.logf("Successfully restored app: %s", backup.App.Key.Name) return nil } // restoreInstance recreates an instance that was deleted during deployment func (rm *EdgeConnectResourceManager) restoreInstance(ctx context.Context, backup *InstanceBackup) error { rm.logf("Restoring instance: %s on %s:%s", backup.Instance.Key.Name, backup.Instance.Key.CloudletKey.Organization, backup.Instance.Key.CloudletKey.Name) // Build a clean instance input with only creation-safe fields // We must exclude read-only fields like CloudletLoc, CreatedAt, etc. instanceInput := &v2.NewAppInstanceInput{ Region: backup.Region, AppInst: v2.AppInstance{ Key: backup.Instance.Key, AppKey: backup.Instance.AppKey, Flavor: backup.Instance.Flavor, // Explicitly omit read-only fields like CloudletLoc, State, PowerState, CreatedAt, etc. }, } // Retry logic to handle namespace termination race conditions maxRetries := 5 retryDelay := 10 * time.Second var lastErr error for attempt := 0; attempt <= maxRetries; attempt++ { if attempt > 0 { rm.logf("Retrying instance restore %s (attempt %d/%d)", backup.Instance.Key.Name, attempt, maxRetries) select { case <-time.After(retryDelay): case <-ctx.Done(): return ctx.Err() } } err := rm.client.CreateAppInstance(ctx, instanceInput) if err == nil { rm.logf("Successfully restored instance: %s", backup.Instance.Key.Name) return nil } lastErr = err // Check if error is retryable if !rm.isRetryableError(err) { rm.logf("Failed to restore instance %s: %v (non-retryable error, giving up)", backup.Instance.Key.Name, err) return fmt.Errorf("failed to restore instance: %w", err) } if attempt < maxRetries { rm.logf("Failed to restore instance %s: %v (will retry)", backup.Instance.Key.Name, err) } } return fmt.Errorf("failed to restore instance after %d attempts: %w", maxRetries+1, lastErr) } // isRetryableError determines if an error should be retried func (rm *EdgeConnectResourceManager) isRetryableError(err error) bool { if err == nil { return false } errStr := strings.ToLower(err.Error()) // Special case: Kubernetes namespace termination race condition // This is a transient 400 error that should be retried if strings.Contains(errStr, "being terminated") || strings.Contains(errStr, "is being terminated") { return true } // Check if it's an APIError with a status code var apiErr *v2.APIError if errors.As(err, &apiErr) { // Don't retry client errors (4xx) if apiErr.StatusCode >= 400 && apiErr.StatusCode < 500 { return false } // Retry server errors (5xx) if apiErr.StatusCode >= 500 { return true } } // Retry all other errors (network issues, timeouts, etc.) return true } // logf logs a message if a logger is configured func (rm *EdgeConnectResourceManager) logf(format string, v ...interface{}) { if rm.logger != nil { rm.logger.Printf("[ResourceManager] "+format, v...) } }