Remove caches, retry fetching the hostname
Signed-off-by: Gabriel Adrian Samfira <gsamfira@cloudbasesolutions.com>
This commit is contained in:
parent
77f96d2761
commit
8d17498ab8
4 changed files with 49 additions and 67 deletions
|
|
@ -20,7 +20,6 @@ import (
|
|||
"log"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"garm/apiserver/params"
|
||||
"garm/auth"
|
||||
|
|
@ -36,10 +35,6 @@ import (
|
|||
)
|
||||
|
||||
func NewAPIController(r *runner.Runner, authenticator *auth.Authenticator, hub *wsWriter.Hub) (*APIController, error) {
|
||||
controllerInfo, err := r.GetControllerInfo(auth.GetAdminContext())
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "getting controller info")
|
||||
}
|
||||
return &APIController{
|
||||
r: r,
|
||||
auth: authenticator,
|
||||
|
|
@ -48,17 +43,14 @@ func NewAPIController(r *runner.Runner, authenticator *auth.Authenticator, hub *
|
|||
ReadBufferSize: 1024,
|
||||
WriteBufferSize: 16384,
|
||||
},
|
||||
cachedControllerInfo: controllerInfo,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type APIController struct {
|
||||
r *runner.Runner
|
||||
auth *auth.Authenticator
|
||||
hub *wsWriter.Hub
|
||||
upgrader websocket.Upgrader
|
||||
cachedControllerInfo runnerParams.ControllerInfo
|
||||
mux sync.Mutex
|
||||
r *runner.Runner
|
||||
auth *auth.Authenticator
|
||||
hub *wsWriter.Hub
|
||||
upgrader websocket.Upgrader
|
||||
}
|
||||
|
||||
func handleError(w http.ResponseWriter, err error) {
|
||||
|
|
@ -95,27 +87,13 @@ func handleError(w http.ResponseWriter, err error) {
|
|||
}
|
||||
}
|
||||
|
||||
// controllerInfo calls into runner.GetControllerInfo(), but instead of erroring out, will
|
||||
// fall back on a cached version of that info. If successful, the cached version is updated.
|
||||
func (a *APIController) controllerInfo() runnerParams.ControllerInfo {
|
||||
// Atempt to fetch controller info. We do this on every call, in case the hostname
|
||||
// changes while garm is running. The ControllerID will never change, once initialized.
|
||||
info, err := a.r.GetControllerInfo(auth.GetAdminContext())
|
||||
if err != nil {
|
||||
// The call may fail, but we shouldn't loose metrics just because something went
|
||||
// terribly wrong while fetching the hostname.
|
||||
log.Printf("failed to get new controller info; falling back on cached version: %s", err)
|
||||
return a.cachedControllerInfo
|
||||
}
|
||||
// Set new controller info and return it.
|
||||
a.mux.Lock()
|
||||
defer a.mux.Unlock()
|
||||
a.cachedControllerInfo = info
|
||||
return a.cachedControllerInfo
|
||||
}
|
||||
|
||||
func (a *APIController) webhookMetricLabelValues(valid, reason string) []string {
|
||||
controllerInfo := a.controllerInfo()
|
||||
controllerInfo, err := a.r.GetControllerInfo(auth.GetAdminContext())
|
||||
if err != nil {
|
||||
log.Printf("failed to get controller info: %s", err)
|
||||
// If labels are empty, not attempt will be made to record webhook.
|
||||
return []string{}
|
||||
}
|
||||
return []string{
|
||||
valid, reason,
|
||||
controllerInfo.Hostname, controllerInfo.ControllerID.String(),
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package metrics
|
|||
|
||||
import (
|
||||
"log"
|
||||
"sync"
|
||||
|
||||
"garm/auth"
|
||||
"garm/params"
|
||||
|
|
@ -86,19 +85,6 @@ type GarmCollector struct {
|
|||
instanceMetric *prometheus.Desc
|
||||
runner *runner.Runner
|
||||
cachedControllerInfo params.ControllerInfo
|
||||
mux sync.Mutex
|
||||
}
|
||||
|
||||
func (c *GarmCollector) controllerInfo() params.ControllerInfo {
|
||||
controllerInfo, err := c.runner.GetControllerInfo(auth.GetAdminContext())
|
||||
if err != nil {
|
||||
log.Printf("error getting controller info: %s", err)
|
||||
return c.cachedControllerInfo
|
||||
}
|
||||
c.mux.Lock()
|
||||
defer c.mux.Unlock()
|
||||
c.cachedControllerInfo = controllerInfo
|
||||
return c.cachedControllerInfo
|
||||
}
|
||||
|
||||
func (c *GarmCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
|
|
@ -107,7 +93,11 @@ func (c *GarmCollector) Describe(ch chan<- *prometheus.Desc) {
|
|||
}
|
||||
|
||||
func (c *GarmCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
controllerInfo := c.controllerInfo()
|
||||
controllerInfo, err := c.runner.GetControllerInfo(auth.GetAdminContext())
|
||||
if err != nil {
|
||||
log.Printf("failed to get controller info: %s", err)
|
||||
return
|
||||
}
|
||||
c.CollectInstanceMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String())
|
||||
c.CollectHealthMetric(ch, controllerInfo.Hostname, controllerInfo.ControllerID.String())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@ import (
|
|||
providerCommon "garm/runner/providers/common"
|
||||
"garm/util"
|
||||
|
||||
"github.com/juju/clock"
|
||||
"github.com/juju/retry"
|
||||
"github.com/pkg/errors"
|
||||
uuid "github.com/satori/go.uuid"
|
||||
)
|
||||
|
|
@ -277,8 +279,28 @@ func (r *Runner) GetControllerInfo(ctx context.Context) (params.ControllerInfo,
|
|||
if !auth.IsAdmin(ctx) {
|
||||
return params.ControllerInfo{}, runnerErrors.ErrUnauthorized
|
||||
}
|
||||
// hostname could change
|
||||
hostname, err := os.Hostname()
|
||||
// It is unlikely that fetching the hostname will encounter an error on a standard
|
||||
// linux (or Windows) system, but if os.Hostname() can fail, we need to at least retry
|
||||
// a few times before giving up.
|
||||
// This retries 10 times within one second. While it has the potential to give us a
|
||||
// one second delay before returning either the hostname or an error, I expect this
|
||||
// to succeed on the first try.
|
||||
// As a side note, Windows requires a reboot for the hostname change to take effect,
|
||||
// so if we'll ever support Windows as a target system, the hostname can be cached.
|
||||
var hostname string
|
||||
err := retry.Call(retry.CallArgs{
|
||||
Func: func() error {
|
||||
var err error
|
||||
hostname, err = os.Hostname()
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "fetching hostname")
|
||||
}
|
||||
return nil
|
||||
},
|
||||
Attempts: 10,
|
||||
Delay: 100 * time.Millisecond,
|
||||
Clock: clock.WallClock,
|
||||
})
|
||||
if err != nil {
|
||||
return params.ControllerInfo{}, errors.Wrap(err, "fetching hostname")
|
||||
}
|
||||
|
|
|
|||
28
util/util.go
28
util/util.go
|
|
@ -24,6 +24,7 @@ import (
|
|||
"encoding/base64"
|
||||
"fmt"
|
||||
"io"
|
||||
"math/big"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
|
|
@ -334,26 +335,17 @@ func SanitizeLogEntry(entry string) string {
|
|||
return strings.Replace(strings.Replace(entry, "\n", "", -1), "\r", "", -1)
|
||||
}
|
||||
|
||||
func randomCharacter() string {
|
||||
for i := 0; i < 5; i++ {
|
||||
character, err := GetRandomString(1)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
return character
|
||||
}
|
||||
return ""
|
||||
func toBase62(uuid []byte) string {
|
||||
var i big.Int
|
||||
i.SetBytes(uuid[:])
|
||||
return i.Text(62)
|
||||
}
|
||||
|
||||
func NewID() string {
|
||||
newID, err := shortid.Generate()
|
||||
if err != nil {
|
||||
newID = uuid.New().String()
|
||||
} else {
|
||||
// remove underscores and hyphens from short ID. The hypens will remain
|
||||
// if we are forced to fall back to uuid4.
|
||||
newID = strings.Replace(newID, "_", randomCharacter(), -1)
|
||||
newID = strings.Replace(newID, "-", randomCharacter(), -1)
|
||||
short, err := shortid.Generate()
|
||||
if err == nil {
|
||||
return toBase62([]byte(short))
|
||||
}
|
||||
return newID
|
||||
newUUID := uuid.New()
|
||||
return toBase62(newUUID[:])
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue