This change adds a new "agent mode" to GARM. The agent enables GARM to set up a persistent websocket connection between the garm server and the runners it spawns. The goal is to be able to easier keep track of state, even without subsequent webhooks from the forge. The Agent will report via websockets when the runner is actually online, when it started a job and when it finished a job. Additionally, the agent allows us to enable optional remote shell between the user and any runner that is spun up using agent mode. The remote shell is multiplexed over the same persistent websocket connection the agent sets up with the server (the agent never listens on a port). Enablement has also been done in the web UI for this functionality. Signed-off-by: Gabriel Adrian Samfira <gsamfira@cloudbasesolutions.com>
115 lines
3.1 KiB
Go
115 lines
3.1 KiB
Go
package runner
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"time"
|
|
|
|
runnerErrors "github.com/cloudbase/garm-provider-common/errors"
|
|
commonParams "github.com/cloudbase/garm-provider-common/params"
|
|
"github.com/cloudbase/garm/auth"
|
|
"github.com/cloudbase/garm/params"
|
|
)
|
|
|
|
func (r *Runner) RecordAgentHeartbeat(ctx context.Context) error {
|
|
instance, err := auth.InstanceParams(ctx)
|
|
if err != nil {
|
|
slog.With(slog.Any("error", err)).ErrorContext(
|
|
ctx, "failed to get instance params")
|
|
return runnerErrors.ErrUnauthorized
|
|
}
|
|
now := time.Now().UTC()
|
|
updateParams := params.UpdateInstanceParams{
|
|
Heartbeat: &now,
|
|
}
|
|
|
|
if _, err := r.store.UpdateInstance(ctx, instance.Name, updateParams); err != nil {
|
|
return fmt.Errorf("failed to record heartbeat: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (r *Runner) SetInstanceCapabilities(ctx context.Context, caps params.AgentCapabilities) error {
|
|
instance, err := auth.InstanceParams(ctx)
|
|
if err != nil {
|
|
slog.With(slog.Any("error", err)).ErrorContext(
|
|
ctx, "failed to get instance params")
|
|
return runnerErrors.ErrUnauthorized
|
|
}
|
|
|
|
updateParams := params.UpdateInstanceParams{
|
|
Capabilities: &caps,
|
|
}
|
|
|
|
if _, err := r.store.UpdateInstance(ctx, instance.ID, updateParams); err != nil {
|
|
return fmt.Errorf("failed to update capabilities: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (r *Runner) SetInstanceToPendingDelete(ctx context.Context) error {
|
|
instance, err := auth.InstanceParams(ctx)
|
|
if err != nil {
|
|
slog.With(slog.Any("error", err)).ErrorContext(
|
|
ctx, "failed to get instance params")
|
|
return runnerErrors.ErrUnauthorized
|
|
}
|
|
|
|
updateParams := params.UpdateInstanceParams{
|
|
Status: commonParams.InstancePendingDelete,
|
|
}
|
|
|
|
if _, err := r.store.UpdateInstance(r.ctx, instance.ID, updateParams); err != nil {
|
|
return fmt.Errorf("failed to set instance to pending_delete: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (r *Runner) GetAgentJWTToken(ctx context.Context, runnerName string) (string, error) {
|
|
var instance params.Instance
|
|
var err error
|
|
if !auth.IsAdmin(ctx) {
|
|
instance, err = auth.InstanceParams(ctx)
|
|
if err != nil {
|
|
return "", runnerErrors.ErrUnauthorized
|
|
}
|
|
|
|
// A runner bootstrap token can get an agent token for itself.
|
|
if instance.Name != runnerName || auth.InstanceIsAgent(ctx) {
|
|
return "", runnerErrors.ErrUnauthorized
|
|
}
|
|
} else {
|
|
instance, err = r.GetInstance(ctx, runnerName)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to get runner: %w", err)
|
|
}
|
|
}
|
|
|
|
var entityGetter params.EntityGetter
|
|
switch {
|
|
case instance.PoolID != "":
|
|
entityGetter, err = r.GetPoolByID(ctx, instance.PoolID)
|
|
case instance.ScaleSetID != 0:
|
|
entityGetter, err = r.GetScaleSetByID(ctx, instance.ScaleSetID)
|
|
}
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to get entity: %w", err)
|
|
}
|
|
|
|
entity, err := entityGetter.GetEntity()
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to get entity: %w", err)
|
|
}
|
|
|
|
dbEntity, err := r.store.GetForgeEntity(ctx, entity.EntityType, entity.ID)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to get entity from DB: %w", err)
|
|
}
|
|
|
|
agentToken, err := r.tokenGetter.NewAgentJWTToken(instance, dbEntity)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to get agent token: %w", err)
|
|
}
|
|
return agentToken, nil
|
|
}
|