Remove caches, retry fetching the hostname

Signed-off-by: Gabriel Adrian Samfira <gsamfira@cloudbasesolutions.com>
This commit is contained in:
Gabriel Adrian Samfira 2023-01-29 13:16:35 +02:00
parent 77f96d2761
commit 8d17498ab8
No known key found for this signature in database
GPG key ID: 7D073DCC2C074CB5
4 changed files with 49 additions and 67 deletions

View file

@ -20,7 +20,6 @@ import (
"log"
"net/http"
"strings"
"sync"
"garm/apiserver/params"
"garm/auth"
@ -36,10 +35,6 @@ import (
)
func NewAPIController(r *runner.Runner, authenticator *auth.Authenticator, hub *wsWriter.Hub) (*APIController, error) {
controllerInfo, err := r.GetControllerInfo(auth.GetAdminContext())
if err != nil {
return nil, errors.Wrap(err, "getting controller info")
}
return &APIController{
r: r,
auth: authenticator,
@ -48,17 +43,14 @@ func NewAPIController(r *runner.Runner, authenticator *auth.Authenticator, hub *
ReadBufferSize: 1024,
WriteBufferSize: 16384,
},
cachedControllerInfo: controllerInfo,
}, nil
}
type APIController struct {
r *runner.Runner
auth *auth.Authenticator
hub *wsWriter.Hub
upgrader websocket.Upgrader
cachedControllerInfo runnerParams.ControllerInfo
mux sync.Mutex
r *runner.Runner
auth *auth.Authenticator
hub *wsWriter.Hub
upgrader websocket.Upgrader
}
func handleError(w http.ResponseWriter, err error) {
@ -95,27 +87,13 @@ func handleError(w http.ResponseWriter, err error) {
}
}
// controllerInfo calls into runner.GetControllerInfo(), but instead of erroring out, will
// fall back on a cached version of that info. If successful, the cached version is updated.
func (a *APIController) controllerInfo() runnerParams.ControllerInfo {
// Atempt to fetch controller info. We do this on every call, in case the hostname
// changes while garm is running. The ControllerID will never change, once initialized.
info, err := a.r.GetControllerInfo(auth.GetAdminContext())
if err != nil {
// The call may fail, but we shouldn't loose metrics just because something went
// terribly wrong while fetching the hostname.
log.Printf("failed to get new controller info; falling back on cached version: %s", err)
return a.cachedControllerInfo
}
// Set new controller info and return it.
a.mux.Lock()
defer a.mux.Unlock()
a.cachedControllerInfo = info
return a.cachedControllerInfo
}
func (a *APIController) webhookMetricLabelValues(valid, reason string) []string {
controllerInfo := a.controllerInfo()
controllerInfo, err := a.r.GetControllerInfo(auth.GetAdminContext())
if err != nil {
log.Printf("failed to get controller info: %s", err)
// If labels are empty, not attempt will be made to record webhook.
return []string{}
}
return []string{
valid, reason,
controllerInfo.Hostname, controllerInfo.ControllerID.String(),

View file

@ -2,7 +2,6 @@ package metrics
import (
"log"
"sync"
"garm/auth"
"garm/params"
@ -86,19 +85,6 @@ type GarmCollector struct {
instanceMetric *prometheus.Desc
runner *runner.Runner
cachedControllerInfo params.ControllerInfo
mux sync.Mutex
}
func (c *GarmCollector) controllerInfo() params.ControllerInfo {
controllerInfo, err := c.runner.GetControllerInfo(auth.GetAdminContext())
if err != nil {
log.Printf("error getting controller info: %s", err)
return c.cachedControllerInfo
}
c.mux.Lock()
defer c.mux.Unlock()
c.cachedControllerInfo = controllerInfo
return c.cachedControllerInfo
}
func (c *GarmCollector) Describe(ch chan<- *prometheus.Desc) {
@ -107,7 +93,11 @@ func (c *GarmCollector) Describe(ch chan<- *prometheus.Desc) {
}
func (c *GarmCollector) Collect(ch chan<- prometheus.Metric) {
controllerInfo := c.controllerInfo()
controllerInfo, err := c.runner.GetControllerInfo(auth.GetAdminContext())
if err != nil {
log.Printf("failed to get controller info: %s", err)
return
}
c.CollectInstanceMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String())
c.CollectHealthMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String())
}

View file

@ -41,6 +41,8 @@ import (
providerCommon "garm/runner/providers/common"
"garm/util"
"github.com/juju/clock"
"github.com/juju/retry"
"github.com/pkg/errors"
uuid "github.com/satori/go.uuid"
)
@ -277,8 +279,28 @@ func (r *Runner) GetControllerInfo(ctx context.Context) (params.ControllerInfo,
if !auth.IsAdmin(ctx) {
return params.ControllerInfo{}, runnerErrors.ErrUnauthorized
}
// hostname could change
hostname, err := os.Hostname()
// It is unlikely that fetching the hostname will encounter an error on a standard
// linux (or Windows) system, but if os.Hostname() can fail, we need to at least retry
// a few times before giving up.
// This retries 10 times within one second. While it has the potential to give us a
// one second delay before returning either the hostname or an error, I expect this
// to succeed on the first try.
// As a side note, Windows requires a reboot for the hostname change to take effect,
// so if we'll ever support Windows as a target system, the hostname can be cached.
var hostname string
err := retry.Call(retry.CallArgs{
Func: func() error {
var err error
hostname, err = os.Hostname()
if err != nil {
return errors.Wrap(err, "fetching hostname")
}
return nil
},
Attempts: 10,
Delay: 100 * time.Millisecond,
Clock: clock.WallClock,
})
if err != nil {
return params.ControllerInfo{}, errors.Wrap(err, "fetching hostname")
}

View file

@ -24,6 +24,7 @@ import (
"encoding/base64"
"fmt"
"io"
"math/big"
"net/http"
"os"
"path"
@ -334,26 +335,17 @@ func SanitizeLogEntry(entry string) string {
return strings.Replace(strings.Replace(entry, "\n", "", -1), "\r", "", -1)
}
func randomCharacter() string {
for i := 0; i < 5; i++ {
character, err := GetRandomString(1)
if err != nil {
continue
}
return character
}
return ""
func toBase62(uuid []byte) string {
var i big.Int
i.SetBytes(uuid[:])
return i.Text(62)
}
func NewID() string {
newID, err := shortid.Generate()
if err != nil {
newID = uuid.New().String()
} else {
// remove underscores and hyphens from short ID. The hypens will remain
// if we are forced to fall back to uuid4.
newID = strings.Replace(newID, "_", randomCharacter(), -1)
newID = strings.Replace(newID, "-", randomCharacter(), -1)
short, err := shortid.Generate()
if err == nil {
return toBase62([]byte(short))
}
return newID
newUUID := uuid.New()
return toBase62(newUUID[:])
}