garm/apiserver/controllers/instances.go
Gabriel Adrian Samfira d09f12dfd8 Add force delete runner
This branch adds the ability to forcefully remove a runner from GARM.

When the operator wishes to manually remove a runner, the workflow is as
follows:

* Check that the runner exists in GitHub. If it does, attempt to
  remove it. An error here indicates that the runner may be processing
  a job. In this case, we don't continue and the operator gets immediate
  feedback from the API.
* Mark the runner in the database as pending_delete
* Allow the consolidate loop to reap it from the provider and remove it
  from the database.

Removing the instance from the provider is async. If the provider errs out,
GARM will keep trying to remove it in perpetuity until the provider succedes.

In situations where the provider is misconfigured, this will never happen, leaving
the instance in a permanent state of pending_delete.

A provider may fail for various reasons. Either credentials have expired, the
API endpoint has changed, the provider is misconfigured or the operator may just
have removed it from the config before cleaning up the runners. While some cases
are recoverable, some are not. We cannot have a situation in which we cannot clean
resources in garm because of a misconfiguration.

This change adds the pending_force_delete instance status. Instances marked with
this status, will be removed from GARM even if the provider reports an error.

The GARM cli has been modified to give new meaning to the --force-remove-runner
option. This option in the CLI is no longer mandatory. Instead, setting it will mark
the runner with the new pending_force_delete status. Omitting it will mark the runner
with the old status of pending_delete.

Fixes: #160

Signed-off-by: Gabriel Adrian Samfira <gsamfira@cloudbasesolutions.com>
2023-10-12 06:15:36 +00:00

326 lines
8.5 KiB
Go

// Copyright 2022 Cloudbase Solutions SRL
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
package controllers
import (
"encoding/json"
"log"
"net/http"
"strconv"
gErrors "github.com/cloudbase/garm-provider-common/errors"
"github.com/cloudbase/garm/apiserver/params"
runnerParams "github.com/cloudbase/garm/params"
"github.com/gorilla/mux"
)
// swagger:route GET /pools/{poolID}/instances instances ListPoolInstances
//
// List runner instances in a pool.
//
// Parameters:
// + name: poolID
// description: Runner pool ID.
// type: string
// in: path
// required: true
//
// Responses:
// 200: Instances
// default: APIErrorResponse
func (a *APIController) ListPoolInstancesHandler(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
vars := mux.Vars(r)
poolID, ok := vars["poolID"]
if !ok {
w.WriteHeader(http.StatusBadRequest)
if err := json.NewEncoder(w).Encode(params.APIErrorResponse{
Error: "Bad Request",
Details: "No pool ID specified",
}); err != nil {
log.Printf("failed to encode response: %q", err)
}
return
}
instances, err := a.r.ListPoolInstances(ctx, poolID)
if err != nil {
log.Printf("listing pool instances: %s", err)
handleError(w, err)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instances); err != nil {
log.Printf("failed to encode response: %q", err)
}
}
// swagger:route GET /instances/{instanceName} instances GetInstance
//
// Get runner instance by name.
//
// Parameters:
// + name: instanceName
// description: Runner instance name.
// type: string
// in: path
// required: true
//
// Responses:
// 200: Instance
// default: APIErrorResponse
func (a *APIController) GetInstanceHandler(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
vars := mux.Vars(r)
instanceName, ok := vars["instanceName"]
if !ok {
w.WriteHeader(http.StatusBadRequest)
if err := json.NewEncoder(w).Encode(params.APIErrorResponse{
Error: "Bad Request",
Details: "No runner name specified",
}); err != nil {
log.Printf("failed to encode response: %q", err)
}
return
}
instance, err := a.r.GetInstance(ctx, instanceName)
if err != nil {
log.Printf("listing instances: %s", err)
handleError(w, err)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil {
log.Printf("failed to encode response: %q", err)
}
}
// swagger:route DELETE /instances/{instanceName} instances DeleteInstance
//
// Delete runner instance by name.
//
// Parameters:
// + name: instanceName
// description: Runner instance name.
// type: string
// in: path
// required: true
//
// + name: forceRemove
// description: If true GARM will ignore any provider error when removing the runner and will continue to remove the runner from github and the GARM database.
// type: boolean
// in: query
// required: false
//
// Responses:
// default: APIErrorResponse
func (a *APIController) DeleteInstanceHandler(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
vars := mux.Vars(r)
instanceName, ok := vars["instanceName"]
if !ok {
w.WriteHeader(http.StatusBadRequest)
if err := json.NewEncoder(w).Encode(params.APIErrorResponse{
Error: "Bad Request",
Details: "No instance name specified",
}); err != nil {
log.Printf("failed to encode response: %q", err)
}
return
}
forceRemove, _ := strconv.ParseBool(r.URL.Query().Get("forceRemove"))
if err := a.r.DeleteRunner(ctx, instanceName, forceRemove); err != nil {
log.Printf("removing runner: %s", err)
handleError(w, err)
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
}
// swagger:route GET /repositories/{repoID}/instances repositories instances ListRepoInstances
//
// List repository instances.
//
// Parameters:
// + name: repoID
// description: Repository ID.
// type: string
// in: path
// required: true
//
// Responses:
// 200: Instances
// default: APIErrorResponse
func (a *APIController) ListRepoInstancesHandler(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
vars := mux.Vars(r)
repoID, ok := vars["repoID"]
if !ok {
w.WriteHeader(http.StatusBadRequest)
if err := json.NewEncoder(w).Encode(params.APIErrorResponse{
Error: "Bad Request",
Details: "No repo ID specified",
}); err != nil {
log.Printf("failed to encode response: %q", err)
}
return
}
instances, err := a.r.ListRepoInstances(ctx, repoID)
if err != nil {
log.Printf("listing pools: %s", err)
handleError(w, err)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instances); err != nil {
log.Printf("failed to encode response: %q", err)
}
}
// swagger:route GET /organizations/{orgID}/instances organizations instances ListOrgInstances
//
// List organization instances.
//
// Parameters:
// + name: orgID
// description: Organization ID.
// type: string
// in: path
// required: true
//
// Responses:
// 200: Instances
// default: APIErrorResponse
func (a *APIController) ListOrgInstancesHandler(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
vars := mux.Vars(r)
orgID, ok := vars["orgID"]
if !ok {
w.WriteHeader(http.StatusBadRequest)
if err := json.NewEncoder(w).Encode(params.APIErrorResponse{
Error: "Bad Request",
Details: "No org ID specified",
}); err != nil {
log.Printf("failed to encode response: %q", err)
}
return
}
instances, err := a.r.ListOrgInstances(ctx, orgID)
if err != nil {
log.Printf("listing instances: %s", err)
handleError(w, err)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instances); err != nil {
log.Printf("failed to encode response: %q", err)
}
}
// swagger:route GET /enterprises/{enterpriseID}/instances enterprises instances ListEnterpriseInstances
//
// List enterprise instances.
//
// Parameters:
// + name: enterpriseID
// description: Enterprise ID.
// type: string
// in: path
// required: true
//
// Responses:
// 200: Instances
// default: APIErrorResponse
func (a *APIController) ListEnterpriseInstancesHandler(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
vars := mux.Vars(r)
enterpriseID, ok := vars["enterpriseID"]
if !ok {
w.WriteHeader(http.StatusBadRequest)
if err := json.NewEncoder(w).Encode(params.APIErrorResponse{
Error: "Bad Request",
Details: "No enterprise ID specified",
}); err != nil {
log.Printf("failed to encode response: %q", err)
}
return
}
instances, err := a.r.ListEnterpriseInstances(ctx, enterpriseID)
if err != nil {
log.Printf("listing instances: %s", err)
handleError(w, err)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instances); err != nil {
log.Printf("failed to encode response: %q", err)
}
}
// swagger:route GET /instances instances ListInstances
//
// Get all runners' instances.
//
// Responses:
// 200: Instances
// default: APIErrorResponse
func (a *APIController) ListAllInstancesHandler(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
instances, err := a.r.ListAllInstances(ctx)
if err != nil {
log.Printf("listing instances: %s", err)
handleError(w, err)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instances); err != nil {
log.Printf("failed to encode response: %q", err)
}
}
func (a *APIController) InstanceStatusMessageHandler(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
var updateMessage runnerParams.InstanceUpdateMessage
if err := json.NewDecoder(r.Body).Decode(&updateMessage); err != nil {
log.Printf("failed to decode: %s", err)
handleError(w, gErrors.ErrBadRequest)
return
}
if err := a.r.AddInstanceStatusMessage(ctx, updateMessage); err != nil {
log.Printf("error saving status message: %s", err)
handleError(w, err)
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
}