2025-05-20 09:40:15 +00:00
// Copyright 2025 Cloudbase Solutions SRL
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
2025-05-07 23:01:22 +00:00
package cache
import (
"context"
"crypto/rand"
"fmt"
"log/slog"
"math/big"
"sync"
"time"
commonParams "github.com/cloudbase/garm-provider-common/params"
"github.com/cloudbase/garm/cache"
2025-05-22 18:43:32 +00:00
"github.com/cloudbase/garm/database/common"
2025-05-07 23:01:22 +00:00
"github.com/cloudbase/garm/params"
garmUtil "github.com/cloudbase/garm/util"
2025-09-26 15:03:19 +00:00
"github.com/cloudbase/garm/util/appdefaults"
2025-05-07 23:01:22 +00:00
"github.com/cloudbase/garm/util/github"
)
2025-09-26 15:03:19 +00:00
var (
// githubToolsUpdateDeadline in minutes
githubToolsUpdateDeadline = 40
// giteaToolsUpdateDeadline in minutes
giteaToolsUpdateDeadline = 180
)
2025-05-22 18:43:32 +00:00
func newToolsUpdater ( ctx context . Context , entity params . ForgeEntity , store common . Store ) * toolsUpdater {
2025-09-26 15:03:19 +00:00
workerID := fmt . Sprintf ( "tools-updater-%s-%s" , entity , entity . Credentials . Endpoint . Name )
ctx = garmUtil . WithSlogContext (
ctx ,
slog . Any ( "worker" , workerID ) )
2025-05-07 23:01:22 +00:00
return & toolsUpdater {
ctx : ctx ,
entity : entity ,
quit : make ( chan struct { } ) ,
2025-05-22 18:43:32 +00:00
store : store ,
2025-05-07 23:01:22 +00:00
}
}
type toolsUpdater struct {
ctx context . Context
2025-05-12 21:47:13 +00:00
entity params . ForgeEntity
2025-05-07 23:01:22 +00:00
tools [ ] commonParams . RunnerApplicationDownload
lastUpdate time . Time
2025-05-22 18:43:32 +00:00
store common . Store
2025-05-07 23:01:22 +00:00
mux sync . Mutex
running bool
quit chan struct { }
reset chan struct { }
}
func ( t * toolsUpdater ) Start ( ) error {
t . mux . Lock ( )
defer t . mux . Unlock ( )
if t . running {
return nil
}
t . running = true
t . quit = make ( chan struct { } )
2025-05-14 21:09:02 +00:00
slog . DebugContext ( t . ctx , "starting tools updater" , "entity" , t . entity . String ( ) , "forge_type" , t . entity . Credentials . ForgeType )
2025-05-14 00:34:54 +00:00
switch t . entity . Credentials . ForgeType {
case params . GithubEndpointType :
go t . loop ( )
case params . GiteaEndpointType :
go t . giteaUpdateLoop ( )
}
2025-05-07 23:01:22 +00:00
return nil
}
func ( t * toolsUpdater ) Stop ( ) error {
t . mux . Lock ( )
defer t . mux . Unlock ( )
if ! t . running {
return nil
}
t . running = false
close ( t . quit )
return nil
}
func ( t * toolsUpdater ) updateTools ( ) error {
2025-09-26 15:03:19 +00:00
slog . DebugContext ( t . ctx , "updating tools" , "last_update" , t . lastUpdate , "entity" , t . entity . String ( ) , "forge_type" , t . entity . Credentials . ForgeType )
2025-05-07 23:01:22 +00:00
entity , ok := cache . GetEntity ( t . entity . ID )
if ! ok {
return fmt . Errorf ( "getting entity from cache: %s" , t . entity . ID )
}
ghCli , err := github . Client ( t . ctx , entity )
if err != nil {
return fmt . Errorf ( "getting github client: %w" , err )
}
tools , err := garmUtil . FetchTools ( t . ctx , ghCli )
if err != nil {
return fmt . Errorf ( "fetching tools: %w" , err )
}
t . lastUpdate = time . Now ( ) . UTC ( )
t . tools = tools
slog . DebugContext ( t . ctx , "updating tools cache" , "entity" , t . entity . String ( ) )
cache . SetGithubToolsCache ( entity , tools )
return nil
}
func ( t * toolsUpdater ) Reset ( ) {
t . mux . Lock ( )
defer t . mux . Unlock ( )
if ! t . running {
return
}
2025-09-26 15:03:19 +00:00
slog . DebugContext ( t . ctx , "resetting tools worker" , "reset" , fmt . Sprintf ( "%v" , t . reset ) )
2025-05-14 00:34:54 +00:00
2025-05-07 23:01:22 +00:00
if t . reset != nil {
close ( t . reset )
t . reset = nil
}
}
2025-05-14 00:34:54 +00:00
func ( t * toolsUpdater ) sleepWithCancel ( sleepTime time . Duration ) ( canceled bool ) {
2025-07-07 04:54:10 +00:00
if sleepTime == 0 {
return false
}
2025-05-14 00:34:54 +00:00
ticker := time . NewTicker ( sleepTime )
defer ticker . Stop ( )
select {
case <- ticker . C :
return false
case <- t . quit :
case <- t . ctx . Done ( ) :
}
return true
}
// giteaUpdateLoop updates tools for gitea. The act runner can be downloaded
// without a token, unlike the github tools, which for GHES require a token.
func ( t * toolsUpdater ) giteaUpdateLoop ( ) {
defer t . Stop ( )
2025-09-26 15:03:19 +00:00
// add some jitter
timerJitter , err := rand . Int ( rand . Reader , big . NewInt ( 120 ) )
2025-05-22 18:43:32 +00:00
if err != nil {
2025-09-26 15:03:19 +00:00
timerJitter = big . NewInt ( 0 )
2025-05-14 00:34:54 +00:00
}
2025-09-26 15:03:19 +00:00
ticker := time . NewTicker ( 1 * time . Minute + time . Duration ( timerJitter . Int64 ( ) ) * time . Second )
defer ticker . Stop ( )
2025-05-14 00:34:54 +00:00
2025-09-28 10:02:46 +00:00
oldMetadataURL := ""
oldUseInternal := false
2025-09-26 15:03:19 +00:00
reset :
metadataURL := appdefaults . GiteaRunnerReleasesURL
var useInternal bool
ep , ok := cache . GetEndpoint ( t . entity . Credentials . Endpoint . Name )
if ok {
if ep . ToolsMetadataURL != "" {
metadataURL = ep . ToolsMetadataURL
}
if ep . UseInternalToolsMetadata != nil {
useInternal = * ep . UseInternalToolsMetadata
}
}
now := time . Now ( ) . UTC ( )
2025-09-28 10:02:46 +00:00
if now . After ( t . lastUpdate . Add ( time . Duration ( giteaToolsUpdateDeadline ) * time . Minute ) ) || oldMetadataURL != metadataURL || oldUseInternal != useInternal {
2025-09-26 15:03:19 +00:00
tools , err := getTools ( t . ctx , metadataURL , useInternal )
if err != nil {
t . addStatusEvent ( fmt . Sprintf ( "failed to update gitea tools: %q" , err ) , params . EventError )
} else {
if useInternal {
t . addStatusEvent ( "using internal tools metadata" , params . EventInfo )
} else {
t . addStatusEvent ( fmt . Sprintf ( "successfully updated tools using metadata URL %s" , metadataURL ) , params . EventInfo )
}
t . lastUpdate = now
2025-09-28 10:02:46 +00:00
oldMetadataURL = metadataURL
oldUseInternal = useInternal
2025-09-26 15:03:19 +00:00
cache . SetGithubToolsCache ( t . entity , tools )
}
}
2025-05-14 00:34:54 +00:00
for {
2025-09-26 15:03:19 +00:00
t . mux . Lock ( )
if t . reset == nil {
t . reset = make ( chan struct { } )
}
t . mux . Unlock ( )
2025-05-14 00:34:54 +00:00
select {
case <- t . quit :
slog . DebugContext ( t . ctx , "stopping tools updater" )
return
2025-09-26 15:03:19 +00:00
case <- t . reset :
goto reset
2025-05-14 00:34:54 +00:00
case <- t . ctx . Done ( ) :
return
case <- ticker . C :
2025-09-26 15:03:19 +00:00
now := time . Now ( ) . UTC ( )
2025-09-28 10:02:46 +00:00
if ! now . After ( t . lastUpdate . Add ( time . Duration ( giteaToolsUpdateDeadline ) * time . Minute ) ) || oldMetadataURL != metadataURL || oldUseInternal != useInternal {
2025-09-26 15:03:19 +00:00
continue
}
ep , ok := cache . GetEndpoint ( t . entity . Credentials . Endpoint . Name )
if ok {
if ep . ToolsMetadataURL != "" {
metadataURL = ep . ToolsMetadataURL
}
if ep . UseInternalToolsMetadata != nil {
useInternal = * ep . UseInternalToolsMetadata
}
}
tools , err := getTools ( t . ctx , metadataURL , useInternal )
2025-05-14 00:34:54 +00:00
if err != nil {
2025-05-31 20:55:21 +00:00
t . addStatusEvent ( fmt . Sprintf ( "failed to update gitea tools: %q" , err ) , params . EventError )
2025-05-14 00:34:54 +00:00
slog . DebugContext ( t . ctx , "failed to update gitea tools" , "error" , err )
continue
}
2025-09-26 15:03:19 +00:00
if useInternal {
t . addStatusEvent ( "using internal tools metadata" , params . EventInfo )
} else {
t . addStatusEvent ( fmt . Sprintf ( "successfully updated tools using metadata URL %s" , metadataURL ) , params . EventInfo )
}
t . lastUpdate = now
2025-09-28 10:02:46 +00:00
oldMetadataURL = metadataURL
oldUseInternal = useInternal
2025-05-14 00:34:54 +00:00
cache . SetGithubToolsCache ( t . entity , tools )
}
}
}
2025-05-07 23:01:22 +00:00
func ( t * toolsUpdater ) loop ( ) {
defer t . Stop ( )
// add some jitter. When spinning up multiple entities, we add
// jitter to prevent stampeeding herd.
randInt , err := rand . Int ( rand . Reader , big . NewInt ( 3000 ) )
if err != nil {
randInt = big . NewInt ( 0 )
}
2025-05-14 00:34:54 +00:00
t . sleepWithCancel ( time . Duration ( randInt . Int64 ( ) ) * time . Millisecond )
2025-05-07 23:01:22 +00:00
2025-09-26 15:03:19 +00:00
// add some jitter
timerJitter , err := rand . Int ( rand . Reader , big . NewInt ( 120 ) )
if err != nil {
timerJitter = big . NewInt ( 0 )
}
timer := time . NewTicker ( 1 * time . Minute + time . Duration ( timerJitter . Int64 ( ) ) * time . Second )
defer timer . Stop ( )
reset :
2025-05-07 23:01:22 +00:00
now := time . Now ( ) . UTC ( )
2025-09-26 15:03:19 +00:00
if now . After ( t . lastUpdate . Add ( time . Duration ( githubToolsUpdateDeadline ) * time . Minute ) ) {
slog . DebugContext ( t . ctx , "last update after deadline" , "last_update" , t . lastUpdate , "deadline" , t . lastUpdate . Add ( time . Duration ( githubToolsUpdateDeadline ) * time . Minute ) )
2025-05-07 23:01:22 +00:00
if err := t . updateTools ( ) ; err != nil {
2025-05-31 20:55:21 +00:00
slog . ErrorContext ( t . ctx , "updating tools" , "error" , err )
t . addStatusEvent ( fmt . Sprintf ( "failed to update tools: %q" , err ) , params . EventError )
2025-05-07 23:01:22 +00:00
} else {
// Tools are usually valid for 1 hour.
2025-09-26 15:03:19 +00:00
t . lastUpdate = now
2025-05-31 20:55:21 +00:00
t . addStatusEvent ( "successfully updated tools" , params . EventInfo )
2025-05-07 23:01:22 +00:00
}
}
for {
2025-09-26 15:03:19 +00:00
t . mux . Lock ( )
2025-05-07 23:01:22 +00:00
if t . reset == nil {
t . reset = make ( chan struct { } )
}
2025-09-26 15:03:19 +00:00
t . mux . Unlock ( )
2025-05-07 23:01:22 +00:00
select {
case <- t . quit :
slog . DebugContext ( t . ctx , "stopping tools updater" )
return
case <- timer . C :
2025-09-26 15:03:19 +00:00
now := time . Now ( ) . UTC ( )
if ! now . After ( t . lastUpdate . Add ( time . Duration ( githubToolsUpdateDeadline ) * time . Minute ) ) {
continue
}
2025-05-07 23:01:22 +00:00
slog . DebugContext ( t . ctx , "updating tools" )
2025-05-22 18:43:32 +00:00
if err := t . updateTools ( ) ; err != nil {
2025-05-07 23:01:22 +00:00
slog . ErrorContext ( t . ctx , "updating tools" , "error" , err )
2025-05-31 20:55:21 +00:00
t . addStatusEvent ( fmt . Sprintf ( "failed to update tools: %q" , err ) , params . EventError )
2025-05-07 23:01:22 +00:00
} else {
// Tools are usually valid for 1 hour.
2025-05-31 20:55:21 +00:00
t . addStatusEvent ( "successfully updated tools" , params . EventInfo )
2025-05-07 23:01:22 +00:00
}
case <- t . reset :
slog . DebugContext ( t . ctx , "resetting tools updater" )
2025-09-26 15:03:19 +00:00
goto reset
2025-05-07 23:01:22 +00:00
}
}
}
2025-05-31 20:55:21 +00:00
func ( t * toolsUpdater ) addStatusEvent ( msg string , level params . EventLevel ) {
if err := t . store . AddEntityEvent ( t . ctx , t . entity , params . StatusEvent , level , msg , 30 ) ; err != nil {
slog . With ( slog . Any ( "error" , err ) ) . Error ( "failed to add entity event" )
}
}