diff --git a/apiserver/controllers/controllers.go b/apiserver/controllers/controllers.go index b86de12f..ad9e4ba0 100644 --- a/apiserver/controllers/controllers.go +++ b/apiserver/controllers/controllers.go @@ -20,7 +20,6 @@ import ( "log" "net/http" "strings" - "sync" "garm/apiserver/params" "garm/auth" @@ -36,10 +35,6 @@ import ( ) func NewAPIController(r *runner.Runner, authenticator *auth.Authenticator, hub *wsWriter.Hub) (*APIController, error) { - controllerInfo, err := r.GetControllerInfo(auth.GetAdminContext()) - if err != nil { - return nil, errors.Wrap(err, "getting controller info") - } return &APIController{ r: r, auth: authenticator, @@ -48,17 +43,14 @@ func NewAPIController(r *runner.Runner, authenticator *auth.Authenticator, hub * ReadBufferSize: 1024, WriteBufferSize: 16384, }, - cachedControllerInfo: controllerInfo, }, nil } type APIController struct { - r *runner.Runner - auth *auth.Authenticator - hub *wsWriter.Hub - upgrader websocket.Upgrader - cachedControllerInfo runnerParams.ControllerInfo - mux sync.Mutex + r *runner.Runner + auth *auth.Authenticator + hub *wsWriter.Hub + upgrader websocket.Upgrader } func handleError(w http.ResponseWriter, err error) { @@ -95,27 +87,13 @@ func handleError(w http.ResponseWriter, err error) { } } -// controllerInfo calls into runner.GetControllerInfo(), but instead of erroring out, will -// fall back on a cached version of that info. If successful, the cached version is updated. -func (a *APIController) controllerInfo() runnerParams.ControllerInfo { - // Atempt to fetch controller info. We do this on every call, in case the hostname - // changes while garm is running. The ControllerID will never change, once initialized. - info, err := a.r.GetControllerInfo(auth.GetAdminContext()) - if err != nil { - // The call may fail, but we shouldn't loose metrics just because something went - // terribly wrong while fetching the hostname. - log.Printf("failed to get new controller info; falling back on cached version: %s", err) - return a.cachedControllerInfo - } - // Set new controller info and return it. - a.mux.Lock() - defer a.mux.Unlock() - a.cachedControllerInfo = info - return a.cachedControllerInfo -} - func (a *APIController) webhookMetricLabelValues(valid, reason string) []string { - controllerInfo := a.controllerInfo() + controllerInfo, err := a.r.GetControllerInfo(auth.GetAdminContext()) + if err != nil { + log.Printf("failed to get controller info: %s", err) + // If labels are empty, not attempt will be made to record webhook. + return []string{} + } return []string{ valid, reason, controllerInfo.Hostname, controllerInfo.ControllerID.String(), diff --git a/metrics/metrics.go b/metrics/metrics.go index a94ba17c..3879d440 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -2,7 +2,6 @@ package metrics import ( "log" - "sync" "garm/auth" "garm/params" @@ -86,19 +85,6 @@ type GarmCollector struct { instanceMetric *prometheus.Desc runner *runner.Runner cachedControllerInfo params.ControllerInfo - mux sync.Mutex -} - -func (c *GarmCollector) controllerInfo() params.ControllerInfo { - controllerInfo, err := c.runner.GetControllerInfo(auth.GetAdminContext()) - if err != nil { - log.Printf("error getting controller info: %s", err) - return c.cachedControllerInfo - } - c.mux.Lock() - defer c.mux.Unlock() - c.cachedControllerInfo = controllerInfo - return c.cachedControllerInfo } func (c *GarmCollector) Describe(ch chan<- *prometheus.Desc) { @@ -107,7 +93,11 @@ func (c *GarmCollector) Describe(ch chan<- *prometheus.Desc) { } func (c *GarmCollector) Collect(ch chan<- prometheus.Metric) { - controllerInfo := c.controllerInfo() + controllerInfo, err := c.runner.GetControllerInfo(auth.GetAdminContext()) + if err != nil { + log.Printf("failed to get controller info: %s", err) + return + } c.CollectInstanceMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String()) c.CollectHealthMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String()) } diff --git a/runner/runner.go b/runner/runner.go index e97882a6..e2d8f157 100644 --- a/runner/runner.go +++ b/runner/runner.go @@ -41,6 +41,8 @@ import ( providerCommon "garm/runner/providers/common" "garm/util" + "github.com/juju/clock" + "github.com/juju/retry" "github.com/pkg/errors" uuid "github.com/satori/go.uuid" ) @@ -277,8 +279,28 @@ func (r *Runner) GetControllerInfo(ctx context.Context) (params.ControllerInfo, if !auth.IsAdmin(ctx) { return params.ControllerInfo{}, runnerErrors.ErrUnauthorized } - // hostname could change - hostname, err := os.Hostname() + // It is unlikely that fetching the hostname will encounter an error on a standard + // linux (or Windows) system, but if os.Hostname() can fail, we need to at least retry + // a few times before giving up. + // This retries 10 times within one second. While it has the potential to give us a + // one second delay before returning either the hostname or an error, I expect this + // to succeed on the first try. + // As a side note, Windows requires a reboot for the hostname change to take effect, + // so if we'll ever support Windows as a target system, the hostname can be cached. + var hostname string + err := retry.Call(retry.CallArgs{ + Func: func() error { + var err error + hostname, err = os.Hostname() + if err != nil { + return errors.Wrap(err, "fetching hostname") + } + return nil + }, + Attempts: 10, + Delay: 100 * time.Millisecond, + Clock: clock.WallClock, + }) if err != nil { return params.ControllerInfo{}, errors.Wrap(err, "fetching hostname") } diff --git a/util/util.go b/util/util.go index 57b9f72b..e9916266 100644 --- a/util/util.go +++ b/util/util.go @@ -24,6 +24,7 @@ import ( "encoding/base64" "fmt" "io" + "math/big" "net/http" "os" "path" @@ -334,26 +335,17 @@ func SanitizeLogEntry(entry string) string { return strings.Replace(strings.Replace(entry, "\n", "", -1), "\r", "", -1) } -func randomCharacter() string { - for i := 0; i < 5; i++ { - character, err := GetRandomString(1) - if err != nil { - continue - } - return character - } - return "" +func toBase62(uuid []byte) string { + var i big.Int + i.SetBytes(uuid[:]) + return i.Text(62) } func NewID() string { - newID, err := shortid.Generate() - if err != nil { - newID = uuid.New().String() - } else { - // remove underscores and hyphens from short ID. The hypens will remain - // if we are forced to fall back to uuid4. - newID = strings.Replace(newID, "_", randomCharacter(), -1) - newID = strings.Replace(newID, "-", randomCharacter(), -1) + short, err := shortid.Generate() + if err == nil { + return toBase62([]byte(short)) } - return newID + newUUID := uuid.New() + return toBase62(newUUID[:]) }