edge-connect-client/internal/apply/v2/manager.go

// ABOUTME: Resource management for EdgeConnect apply command with deployment execution and rollback
// ABOUTME: Handles actual deployment operations, manifest processing, and error recovery with parallel execution
package v2

import (
	"context"
	"errors"
	"fmt"
	"strings"
	"time"

	"edp.buildth.ing/DevFW-CICD/edge-connect-client/v2/internal/config"
	v2 "edp.buildth.ing/DevFW-CICD/edge-connect-client/v2/sdk/edgeconnect/v2"
)

// ResourceManagerInterface defines the interface for resource management
type ResourceManagerInterface interface {
	// ApplyDeployment executes a deployment plan
	ApplyDeployment(ctx context.Context, plan *DeploymentPlan, config *config.EdgeConnectConfig, manifestContent string) (*ExecutionResult, error)

	// RollbackDeployment attempts to rollback a failed deployment
	RollbackDeployment(ctx context.Context, result *ExecutionResult) error

	// ValidatePrerequisites checks if deployment prerequisites are met
	ValidatePrerequisites(ctx context.Context, plan *DeploymentPlan) error
}

// EdgeConnectResourceManager implements resource management for EdgeConnect
type EdgeConnectResourceManager struct {
	client         EdgeConnectClientInterface
	parallelLimit  int
	rollbackOnFail bool
	logger         Logger
	strategyConfig StrategyConfig
}

// Logger interface for deployment logging
type Logger interface {
	Printf(format string, v ...interface{})
}

// ResourceManagerOptions configures the resource manager behavior
type ResourceManagerOptions struct {
	// ParallelLimit controls how many operations run concurrently
	ParallelLimit int

	// RollbackOnFail automatically rolls back on deployment failure
	RollbackOnFail bool

	// Logger for deployment operations
	Logger Logger

	// Timeout for individual operations
	OperationTimeout time.Duration

	// StrategyConfig for deployment strategies
	StrategyConfig StrategyConfig
}

// DefaultResourceManagerOptions returns sensible defaults
func DefaultResourceManagerOptions() ResourceManagerOptions {
	return ResourceManagerOptions{
		ParallelLimit:    5, // Conservative parallel limit
		RollbackOnFail:   true,
		OperationTimeout: 2 * time.Minute,
		StrategyConfig:   DefaultStrategyConfig(),
	}
}

// NewResourceManager creates a new EdgeConnect resource manager
func NewResourceManager(client EdgeConnectClientInterface, opts ...func(*ResourceManagerOptions)) ResourceManagerInterface {
	options := DefaultResourceManagerOptions()
	for _, opt := range opts {
		opt(&options)
	}

	return &EdgeConnectResourceManager{
		client:         client,
		parallelLimit:  options.ParallelLimit,
		rollbackOnFail: options.RollbackOnFail,
		logger:         options.Logger,
		strategyConfig: options.StrategyConfig,
	}
}

// WithParallelLimit sets the parallel execution limit
func WithParallelLimit(limit int) func(*ResourceManagerOptions) {
	return func(opts *ResourceManagerOptions) {
		opts.ParallelLimit = limit
	}
}

// WithRollbackOnFail enables/disables automatic rollback
func WithRollbackOnFail(rollback bool) func(*ResourceManagerOptions) {
	return func(opts *ResourceManagerOptions) {
		opts.RollbackOnFail = rollback
	}
}

// WithLogger sets a logger for deployment operations
func WithLogger(logger Logger) func(*ResourceManagerOptions) {
	return func(opts *ResourceManagerOptions) {
		opts.Logger = logger
	}
}

// WithStrategyConfig sets the strategy configuration
func WithStrategyConfig(config StrategyConfig) func(*ResourceManagerOptions) {
	return func(opts *ResourceManagerOptions) {
		opts.StrategyConfig = config
	}
}

// ApplyDeployment executes a deployment plan using deployment strategies
func (rm *EdgeConnectResourceManager) ApplyDeployment(ctx context.Context, plan *DeploymentPlan, config *config.EdgeConnectConfig, manifestContent string) (*ExecutionResult, error) {
	rm.logf("Starting deployment: %s", plan.ConfigName)

	// Step 1: Validate prerequisites
	if err := rm.ValidatePrerequisites(ctx, plan); err != nil {
		result := &ExecutionResult{
			Plan:             plan,
			CompletedActions: []ActionResult{},
			FailedActions:    []ActionResult{},
			Error:            fmt.Errorf("prerequisites validation failed: %w", err),
			Duration:         0,
		}
		return result, err
	}

	// Step 2: Determine deployment strategy
	strategyName := DeploymentStrategy(config.Spec.GetDeploymentStrategy())
	rm.logf("Using deployment strategy: %s", strategyName)

	// Step 3: Create strategy executor
	strategyConfig := rm.strategyConfig
	strategyConfig.ParallelOperations = rm.parallelLimit > 1

	factory := NewStrategyFactory(rm.client, strategyConfig, rm.logger)
	strategy, err := factory.CreateStrategy(strategyName)
	if err != nil {
		result := &ExecutionResult{
			Plan:             plan,
			CompletedActions: []ActionResult{},
			FailedActions:    []ActionResult{},
			Error:            fmt.Errorf("failed to create deployment strategy: %w", err),
			Duration:         0,
		}
		return result, err
	}

	// Step 4: Validate strategy can handle this deployment
	if err := strategy.Validate(plan); err != nil {
		result := &ExecutionResult{
			Plan:             plan,
			CompletedActions: []ActionResult{},
			FailedActions:    []ActionResult{},
			Error:            fmt.Errorf("strategy validation failed: %w", err),
			Duration:         0,
		}
		return result, err
	}

	// Step 5: Execute the deployment strategy
	rm.logf("Estimated deployment duration: %v", strategy.EstimateDuration(plan))
	result, err := strategy.Execute(ctx, plan, config, manifestContent)

	// Step 6: Handle rollback if needed
	if err != nil && rm.rollbackOnFail && result != nil {
		rm.logf("Deployment failed, attempting rollback...")
		if rollbackErr := rm.RollbackDeployment(ctx, result); rollbackErr != nil {
			rm.logf("Rollback failed: %v", rollbackErr)
		} else {
			result.RollbackPerformed = true
			result.RollbackSuccess = true
		}
	}

	if result != nil && result.Success {
		rm.logf("Deployment completed successfully in %v", result.Duration)
	}

	return result, err
}

// ValidatePrerequisites checks if deployment prerequisites are met
func (rm *EdgeConnectResourceManager) ValidatePrerequisites(ctx context.Context, plan *DeploymentPlan) error {
	rm.logf("Validating deployment prerequisites for: %s", plan.ConfigName)

	// Check if we have any actions to perform
	if plan.IsEmpty() {
		return fmt.Errorf("deployment plan is empty - no actions to perform")
	}

	// Validate that we have required client capabilities
	if rm.client == nil {
		return fmt.Errorf("EdgeConnect client is not configured")
	}

	rm.logf("Prerequisites validation passed")
	return nil
}

// RollbackDeployment attempts to rollback a failed deployment
func (rm *EdgeConnectResourceManager) RollbackDeployment(ctx context.Context, result *ExecutionResult) error {
	rm.logf("Starting rollback for deployment: %s", result.Plan.ConfigName)

	rollbackErrors := []error{}

	// Phase 1: Delete resources that were created in this deployment attempt (in reverse order)
	rm.logf("Phase 1: Rolling back created resources")
	for i := len(result.CompletedActions) - 1; i >= 0; i-- {
		action := result.CompletedActions[i]

		switch action.Type {
		case ActionCreate:
			if err := rm.rollbackCreateAction(ctx, action, result.Plan); err != nil {
				rollbackErrors = append(rollbackErrors, fmt.Errorf("failed to rollback %s: %w", action.Target, err))
			} else {
				rm.logf("Successfully rolled back: %s", action.Target)
			}
		}
	}

	// Phase 2: Restore resources that were deleted before the failed deployment
	// This is critical for RecreateStrategy which deletes everything before recreating
	if result.DeletedAppBackup != nil || len(result.DeletedInstancesBackup) > 0 {
		rm.logf("Phase 2: Restoring deleted resources")

		// Restore app first (must exist before instances can be created)
		if result.DeletedAppBackup != nil {
			if err := rm.restoreApp(ctx, result.DeletedAppBackup); err != nil {
				rollbackErrors = append(rollbackErrors, fmt.Errorf("failed to restore app: %w", err))
				rm.logf("Failed to restore app: %v", err)
			} else {
				rm.logf("Successfully restored app: %s", result.DeletedAppBackup.App.Key.Name)
			}
		}

		// Restore instances
		for _, backup := range result.DeletedInstancesBackup {
			if err := rm.restoreInstance(ctx, &backup); err != nil {
				rollbackErrors = append(rollbackErrors, fmt.Errorf("failed to restore instance %s: %w", backup.Instance.Key.Name, err))
				rm.logf("Failed to restore instance %s: %v", backup.Instance.Key.Name, err)
			} else {
				rm.logf("Successfully restored instance: %s", backup.Instance.Key.Name)
			}
		}
	}

	if len(rollbackErrors) > 0 {
		return fmt.Errorf("rollback encountered %d errors: %v", len(rollbackErrors), rollbackErrors)
	}

	rm.logf("Rollback completed successfully")
	return nil
}

// rollbackCreateAction rolls back a CREATE action by deleting the resource
func (rm *EdgeConnectResourceManager) rollbackCreateAction(ctx context.Context, action ActionResult, plan *DeploymentPlan) error {
	if action.Type != ActionCreate {
		return nil
	}

	// Determine if this is an app or instance rollback based on the target name
	isInstance := false
	for _, instanceAction := range plan.InstanceActions {
		if instanceAction.InstanceName == action.Target {
			isInstance = true
			break
		}
	}

	if isInstance {
		return rm.rollbackInstance(ctx, action, plan)
	} else {
		return rm.rollbackApp(ctx, action, plan)
	}
}

// rollbackApp deletes an application that was created
func (rm *EdgeConnectResourceManager) rollbackApp(ctx context.Context, action ActionResult, plan *DeploymentPlan) error {
	appKey := v2.AppKey{
		Organization: plan.AppAction.Desired.Organization,
		Name:         plan.AppAction.Desired.Name,
		Version:      plan.AppAction.Desired.Version,
	}

	return rm.client.DeleteApp(ctx, appKey, plan.AppAction.Desired.Region)
}

// rollbackInstance deletes an instance that was created
func (rm *EdgeConnectResourceManager) rollbackInstance(ctx context.Context, action ActionResult, plan *DeploymentPlan) error {
	// Find the instance action to get the details
	for _, instanceAction := range plan.InstanceActions {
		if instanceAction.InstanceName == action.Target {
			instanceKey := v2.AppInstanceKey{
				Organization: plan.AppAction.Desired.Organization,
				Name:         instanceAction.InstanceName,
				CloudletKey: v2.CloudletKey{
					Organization: instanceAction.Target.CloudletOrg,
					Name:         instanceAction.Target.CloudletName,
				},
			}
			return rm.client.DeleteAppInstance(ctx, instanceKey, instanceAction.Target.Region)
		}
	}
	return fmt.Errorf("instance action not found for rollback: %s", action.Target)
}

// restoreApp recreates an app that was deleted during deployment
func (rm *EdgeConnectResourceManager) restoreApp(ctx context.Context, backup *AppBackup) error {
	rm.logf("Restoring app: %s/%s version %s",
		backup.App.Key.Organization, backup.App.Key.Name, backup.App.Key.Version)

	// Build a clean app input with only creation-safe fields
	// We must exclude read-only fields like CreatedAt, UpdatedAt, etc.
	appInput := &v2.NewAppInput{
		Region: backup.Region,
		App: v2.App{
			Key:                         backup.App.Key,
			Deployment:                  backup.App.Deployment,
			ImageType:                   backup.App.ImageType,
			ImagePath:                   backup.App.ImagePath,
			AllowServerless:             backup.App.AllowServerless,
			DefaultFlavor:               backup.App.DefaultFlavor,
			ServerlessConfig:            backup.App.ServerlessConfig,
			DeploymentManifest:          backup.App.DeploymentManifest,
			DeploymentGenerator:         backup.App.DeploymentGenerator,
			RequiredOutboundConnections: backup.App.RequiredOutboundConnections,
			// Explicitly omit read-only fields like CreatedAt, UpdatedAt, Fields, etc.
		},
	}

	if err := rm.client.CreateApp(ctx, appInput); err != nil {
		return fmt.Errorf("failed to restore app: %w", err)
	}

	rm.logf("Successfully restored app: %s", backup.App.Key.Name)
	return nil
}

// restoreInstance recreates an instance that was deleted during deployment
func (rm *EdgeConnectResourceManager) restoreInstance(ctx context.Context, backup *InstanceBackup) error {
	rm.logf("Restoring instance: %s on %s:%s",
		backup.Instance.Key.Name,
		backup.Instance.Key.CloudletKey.Organization,
		backup.Instance.Key.CloudletKey.Name)

	// Build a clean instance input with only creation-safe fields
	// We must exclude read-only fields like CloudletLoc, CreatedAt, etc.
	instanceInput := &v2.NewAppInstanceInput{
		Region: backup.Region,
		AppInst: v2.AppInstance{
			Key:    backup.Instance.Key,
			AppKey: backup.Instance.AppKey,
			Flavor: backup.Instance.Flavor,
			// Explicitly omit read-only fields like CloudletLoc, State, PowerState, CreatedAt, etc.
		},
	}

	// Retry logic to handle namespace termination race conditions
	maxRetries := 5
	retryDelay := 10 * time.Second

	var lastErr error
	for attempt := 0; attempt <= maxRetries; attempt++ {
		if attempt > 0 {
			rm.logf("Retrying instance restore %s (attempt %d/%d)", backup.Instance.Key.Name, attempt, maxRetries)
			select {
			case <-time.After(retryDelay):
			case <-ctx.Done():
				return ctx.Err()
			}
		}

		err := rm.client.CreateAppInstance(ctx, instanceInput)
		if err == nil {
			rm.logf("Successfully restored instance: %s", backup.Instance.Key.Name)
			return nil
		}

		lastErr = err

		// Check if error is retryable
		if !rm.isRetryableError(err) {
			rm.logf("Failed to restore instance %s: %v (non-retryable error, giving up)", backup.Instance.Key.Name, err)
			return fmt.Errorf("failed to restore instance: %w", err)
		}

		if attempt < maxRetries {
			rm.logf("Failed to restore instance %s: %v (will retry)", backup.Instance.Key.Name, err)
		}
	}

	return fmt.Errorf("failed to restore instance after %d attempts: %w", maxRetries+1, lastErr)
}

// isRetryableError determines if an error should be retried
func (rm *EdgeConnectResourceManager) isRetryableError(err error) bool {
	if err == nil {
		return false
	}

	errStr := strings.ToLower(err.Error())

	// Special case: Kubernetes namespace termination race condition
	// This is a transient 400 error that should be retried
	if strings.Contains(errStr, "being terminated") || strings.Contains(errStr, "is being terminated") {
		return true
	}

	// Check if it's an APIError with a status code
	var apiErr *v2.APIError
	if errors.As(err, &apiErr) {
		// Don't retry client errors (4xx)
		if apiErr.StatusCode >= 400 && apiErr.StatusCode < 500 {
			return false
		}
		// Retry server errors (5xx)
		if apiErr.StatusCode >= 500 {
			return true
		}
	}

	// Retry all other errors (network issues, timeouts, etc.)
	return true
}

// logf logs a message if a logger is configured
func (rm *EdgeConnectResourceManager) logf(format string, v ...interface{}) {
	if rm.logger != nil {
		rm.logger.Printf("[ResourceManager] "+format, v...)
	}
}