From c2fa808cd36f553bc4b4f1ce73f46362b6a3b2f7 Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Thu, 6 Jul 2023 14:57:28 +0000 Subject: [PATCH 1/9] Update some docs Signed-off-by: Gabriel Adrian Samfira --- doc/database.md | 51 ++++++++++++----------------------- doc/logging.md | 19 ++++++++++--- doc/providers.md | 17 +++++++++--- doc/webhooks_and_callbacks.md | 8 +----- testdata/config.toml | 3 +++ 5 files changed, 51 insertions(+), 47 deletions(-) diff --git a/doc/database.md b/doc/database.md index 59204640..09c9be43 100644 --- a/doc/database.md +++ b/doc/database.md @@ -1,37 +1,20 @@ # Database configuration -Garm currently supports two database backends: +GARM currently supports SQLite3. The current implementation of GARM is single server, so it does not make much sense to enable anything else at the moment. -* SQLite3 -* MySQL - -You can choose either one of these. For most cases, ```SQLite3``` should do, but feel free to go with MySQL if you wish. - - ```toml - [database] - # Turn on/off debugging for database queries. - debug = false - # Database backend to use. Currently supported backends are: - # * sqlite3 - # * mysql - backend = "sqlite3" - # the passphrase option is a temporary measure by which we encrypt the webhook - # secret that gets saved to the database, using AES256. In the future, secrets - # will be saved to something like Barbican or Vault, eliminating the need for - # this. - passphrase = "n<$n&P#L*TWqOh95_bN5J1r4mhxY7R84HZ%pvM#1vxJ<7~q%YVsCwU@Z60;7~Djo" - [database.mysql] - # If MySQL is used, these are the credentials and connection information used - # to connect to the server instance. - # database username - username = "" - # Database password - password = "" - # hostname to connect to - hostname = "" - # database name - database = "" - [database.sqlite3] - # Path on disk to the sqlite3 database file. - db_file = "/home/runner/file.db" - ``` +```toml +[database] + # Turn on/off debugging for database queries. + debug = false + # Database backend to use. Currently supported backends are: + # * sqlite3 + backend = "sqlite3" + # the passphrase option is a temporary measure by which we encrypt the webhook + # secret that gets saved to the database, using AES256. In the future, secrets + # will be saved to something like Barbican or Vault, eliminating the need for + # this. + passphrase = "n<$n&P#L*TWqOh95_bN5J1r4mhxY7R84HZ%pvM#1vxJ<7~q%YVsCwU@Z60;7~Djo" + [database.sqlite3] + # Path on disk to the sqlite3 database file. + db_file = "/home/runner/garm.db" +``` diff --git a/doc/logging.md b/doc/logging.md index 02eb34c5..339b1e58 100644 --- a/doc/logging.md +++ b/doc/logging.md @@ -1,8 +1,8 @@ # Logging -By default, GARM is logging only on standard output. +By default, GARM logs everything to standard output. -If you would like GARM to use a logging file instead, you can use the `log_file` configuration option: +You can optionally log to file by adding the following to your config file: ```toml [default] @@ -12,6 +12,19 @@ log_file = "/tmp/runner-manager.log" ## Rotating log files -If GARM uses a log file, by default it will rotate it when it reaches 500MB or 28 days, whichever comes first. +GARM automatically rotates the log if it reaches 500 MB in size or 28 days, whichever comes first. However, if you want to manually rotate the log file, you can send a `SIGHUP` signal to the GARM process. + +You can add the following to your systemd unit file to enable `reload`: + +```ini +[Service] +ExecReload=/bin/kill -HUP $MAINPID +``` + +Then you can simply: + +```bash +systemctl reload garm +``` \ No newline at end of file diff --git a/doc/providers.md b/doc/providers.md index d1042130..7a724ecc 100644 --- a/doc/providers.md +++ b/doc/providers.md @@ -106,7 +106,7 @@ Image remotes in the ```garm``` config, is a map of strings to remote settings. The external provider is a special kind of provider. It delegates the functionality needed to create the runners to external executables. These executables can be either binaries or scripts. As long as they adhere to the needed interface, they can be used to create runners in any target IaaS. This is identical to what ```containerd``` does with ```CNIs```. -There is currently one external provider for [OpenStack](https://www.openstack.org/) available in the [contrib folder of this repository](../contrib/providers.d/openstack). The provider is written in ```bash``` and it is just a sample. A production ready provider would need more error checking and idempotency, but it serves as an example of what can be done. As it stands, it is functional. +There are currently two external providers available in the [contrib folder of this repository](../contrib/providers.d/). The providers are written in ```bash``` and it are just samples. Production ready providers would need more error checking and idempotency, but they serve as an example of what can be done. As it stands, they are functional. The configuration for an external provider is quite simple: @@ -127,13 +127,24 @@ provider_type = "external" provider_executable = "/etc/garm/providers.d/openstack/garm-external-provider" ``` -The external provider has three options: +The external provider has two options: * ```provider_executable``` * ```config_file``` The ```provider_executable``` option is the absolute path to an executable that implements the provider logic. Garm will delegate all provider operations to this executable. This executable can be anything (bash, python, perl, go, etc). See [Writing an external provider](./external_provider.md) for more details. -The ```config_file``` option is a path on disk to an arbitrary file, that is passed to the external executable via the environment variable ```GARM_PROVIDER_CONFIG_FILE```. This file is only relevant to the external provider. Garm itself does not read it. In the case of the OpenStack provider, this file contains access information for an OpenStack cloud (what you would typically find in a ```keystonerc``` file) as well as some provider specific options like whether or not to boot from volume and which tenant network to use. You can check out the [sample config file](../contrib/providers.d/openstack/keystonerc) in this repository. +The ```config_file``` option is a path on disk to an arbitrary file, that is passed to the external executable via the environment variable ```GARM_PROVIDER_CONFIG_FILE```. This file is only relevant to the external provider. Garm itself does not read it. In the case of the sample OpenStack provider, this file contains access information for an OpenStack cloud (what you would typically find in a ```keystonerc``` file) as well as some provider specific options like whether or not to boot from volume and which tenant network to use. You can check out the [sample config file](../contrib/providers.d/openstack/keystonerc) in this repository. If you want to implement an external provider, you can use this file for anything you need to pass into the binary when ```garm``` calls it to execute a particular operation. + +### Available external providers + +For non testing purposes, there are two external providers currently available: + +* [OpenStack](https://github.com/cloudbase/garm-provider-openstack) +* [Azure](https://github.com/cloudbase/garm-provider-azure) + +Details on how to install and configure them are available in their respective repositories. + +If you wrote a provider and would like to add it to the above list, feel free to open a PR. diff --git a/doc/webhooks_and_callbacks.md b/doc/webhooks_and_callbacks.md index dfe1b3a1..0ff7555d 100644 --- a/doc/webhooks_and_callbacks.md +++ b/doc/webhooks_and_callbacks.md @@ -1,12 +1,6 @@ # Webhooks -Garm is designed to auto-scale github runners based on a few simple rules: - -* A minimum idle runner count can be set for a pool. Garm will attempt to maintain that minimum of idle runners, ready to be used by your workflows. -* A maximum number of runners for a pool. This is a hard limit of runners a pool will create, regardless of minimum idle runners. -* When a runner is scheduled by github, ```garm``` will automatically spin up a new runner to replace it, obeying the maximum hard limit defined. - -To achieve this, ```garm``` relies on [GitHub Webhooks](https://docs.github.com/en/developers/webhooks-and-events/webhooks/about-webhooks). Webhooks allow ```garm``` to react to workflow events from your repository or organization. +Garm is designed to auto-scale github runners. To achieve this, ```garm``` relies on [GitHub Webhooks](https://docs.github.com/en/developers/webhooks-and-events/webhooks/about-webhooks). Webhooks allow ```garm``` to react to workflow events from your repository, organization or enterprise. In your repository or organization, navigate to ```Settings --> Webhooks```. In the ```Payload URL``` field, enter the URL to the ```garm``` webhook endpoint. The ```garm``` API endpoint for webhooks is: diff --git a/testdata/config.toml b/testdata/config.toml index 1182ad4a..0952ef34 100644 --- a/testdata/config.toml +++ b/testdata/config.toml @@ -27,6 +27,9 @@ config_dir = "/etc/garm" # Enable streaming logs via web sockets. Use garm-cli debug-log. enable_log_streamer = false +# Enable the golang debug server. See the documentation in the "doc" folder for more information. +debug_server = false + [metrics] # Toggle metrics. If set to false, the API endpoint for metrics collection will # be disabled. From 462e9415a515fe4b9535d69497bb71fc09ad1b40 Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Thu, 6 Jul 2023 15:02:44 +0000 Subject: [PATCH 2/9] Add profiling doc Signed-off-by: Gabriel Adrian Samfira --- doc/debugging_and_profiling.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 doc/debugging_and_profiling.md diff --git a/doc/debugging_and_profiling.md b/doc/debugging_and_profiling.md new file mode 100644 index 00000000..064b811b --- /dev/null +++ b/doc/debugging_and_profiling.md @@ -0,0 +1,22 @@ +# Debugging and profiling + +GARM can optionally enable the golang profiling server. You can then use the usual `go tool pprof` command to start profiling. This is useful if you suspect garm may be bottlenecking in any way. To enable the profiling server, add the following section to the garm config: + +```toml +[default] + +debug_server = true +``` + +Then restarg garm. You can then use the following command to start profiling: + +```bash +go tool pprof http://127.0.0.1:9997/debug/pprof/profile?seconds=120 +``` + +Important note on profiling when behind a reverse proxy. The above command will hang for a fairly long time. Most reverse proxies will timeout after about 60 seconds. To avoid this, you should only profile on localhost by connecting directly to garm. + +It's also advisable to exclude the debug server URLs from your reverse proxy and only make them available locally. + +Now that the debug server is enabled, here is a blog post on how to profile golang applications: https://blog.golang.org/profiling-go-programs + From 44bfa83fc060328c2bacf527de5ebc8b42b14a3b Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Sat, 8 Jul 2023 14:13:54 +0000 Subject: [PATCH 3/9] Remove some options and add docs * Remove the unused CondifGir option * Add docs for the default section * Move some docs from other files Signed-off-by: Gabriel Adrian Samfira --- README.md | 45 ++++--- cloudconfig/templates.go | 8 ++ config/config.go | 15 --- config/config_test.go | 39 ------ doc/config_api_server.md | 0 ...oks_and_callbacks.md => config_default.md} | 127 ++++++++++++++---- doc/config_jwt_auth.md | 0 doc/config_metrics.md | 0 doc/database.md | 2 +- doc/logging.md | 30 ----- doc/providers.md | 26 +++- doc/the_boring_details.md | 4 + doc/webhooks.md | 27 ++++ testdata/config.toml | 17 --- util/appdefaults/appdefaults.go | 4 - 15 files changed, 185 insertions(+), 159 deletions(-) create mode 100644 doc/config_api_server.md rename doc/{webhooks_and_callbacks.md => config_default.md} (56%) create mode 100644 doc/config_jwt_auth.md create mode 100644 doc/config_metrics.md delete mode 100644 doc/logging.md create mode 100644 doc/the_boring_details.md create mode 100644 doc/webhooks.md diff --git a/README.md b/README.md index 03930a78..397bf16a 100644 --- a/README.md +++ b/README.md @@ -23,16 +23,18 @@ Whether you're running into issues or just want to drop by and say "hi", feel fr You need to have Go installed, then run: ```bash - git clone https://github.com/cloudbase/garm - cd garm - go install ./... + go install github.com/cloudbase/garm/cmd/garm@latest + go install github.com/cloudbase/garm/cmd/garm-cli@latest ``` -You should now have both ```garm``` and ```garm-cli``` in your ```$GOPATH/bin``` folder. +This will install the garm binaries in ```$GOPATH/bin``` folder. Move them somewhere in your ```$PATH``` to make them available system-wide. If you have docker/podman installed, you can also build statically linked binaries by running: ```bash + git clone https://github.com/cloudbase/garm + cd garm + git checkout release/v0.1 make build-static ``` @@ -75,12 +77,6 @@ Copy the config template: sudo cp ./testdata/config.toml /etc/garm/ ``` -Copy the external provider (optional): - - ```bash - sudo cp -a ./contrib/providers.d /etc/garm/ - ``` - Copy the systemd service file: ```bash @@ -106,15 +102,26 @@ Customize the config in ```/etc/garm/config.toml```, and start the service: sudo systemctl start garm ``` +## Installing external providers + +External providers are binaries that GARM calls into to create runners in a particular IaaS. There are currently two external providers available: + +* [OpenStack](https://github.com/cloudbase/garm-provider-openstack) +* [Azure](https://github.com/cloudbase/garm-provider-azure) + +Follow the instructions in the README of each provider to install them. + ## Configuration -The ```garm``` configuration is a simple ```toml```. A sample of the config file can be found in [the testdata folder](/testdata/config.toml). +The ```garm``` configuration is a simple ```toml```. The sample config file in [the testdata folder](/testdata/config.toml) is fairly well commented and should be enough to get you started. The configuration file is split into several sections, each of which is documented in its own page. The sections are: -There are 3 major sections of the config that require your attention: - -* [Github credentials section](/doc/github_credentials.md) -* [Providers section](/doc/providers.md) -* [The database section](/doc/database.md) +* [The default section](/doc/config_default.md) +* [Metrics](/doc/config_metrics.md) +* [JWT authentication](/doc/config_jwt_auth.md) +* [API server](/doc/config_api_server.md) +* [Github credentials](/doc/github_credentials.md) +* [Providers](/doc/providers.md) +* [Database](/doc/database.md) Once you've configured your database, providers and github credentials, you'll need to configure your [webhooks and the callback_url](/doc/webhooks_and_callbacks.md). @@ -124,12 +131,6 @@ If you would like to use ```garm``` with a different IaaS than the ones already If you like to optimize the startup time of new instance, take a look at the [performance considerations](/doc/performance_considerations.md) page. -## Security considerations - -Garm does not apply any ACLs of any kind to the instances it creates. That task remains in the responsibility of the user. [Here is a guide for creating ACLs in LXD](https://linuxcontainers.org/lxd/docs/master/howto/network_acls/). You can of course use ```iptables``` or ```nftables``` to create any rules you wish. I recommend you create a separate isolated lxd bridge for runners, and secure it using ACLs/iptables/nftables. - -You must make sure that the code that runs as part of the workflows is trusted, and if that cannot be done, you must make sure that any malicious code that will be pulled in by the actions and run as part of a workload, is as contained as possible. There is a nice article about [securing your workflow runs here](https://blog.gitguardian.com/github-actions-security-cheat-sheet/). - ## Write your own provider The providers are interfaces between ```garm``` and a particular IaaS in which we spin up GitHub Runners. These providers can be either **native** or **external**. The **native** providers are written in ```Go```, and must implement [the interface defined here](https://github.com/cloudbase/garm/blob/main/runner/common/provider.go#L22-L39). **External** providers can be written in any language, as they are in the form of an external executable that ```garm``` calls into. diff --git a/cloudconfig/templates.go b/cloudconfig/templates.go index e02c9c77..1d5b71c3 100644 --- a/cloudconfig/templates.go +++ b/cloudconfig/templates.go @@ -40,6 +40,10 @@ GITHUB_TOKEN=$(curl --retry 5 --retry-delay 5 --retry-connrefused --fail -s -X G function call() { PAYLOAD="$1" + [[ $CALLBACK_URL =~ ^(.*)/status$ ]] + if [ -z "$BASH_REMATCH" ];then + CALLBACK_URL="${CALLBACK_URL}/status" + fi curl --retry 5 --retry-delay 5 --retry-connrefused --fail -s -X POST -d "${PAYLOAD}" -H 'Accept: application/json' -H "Authorization: Bearer ${BEARER_TOKEN}" "${CALLBACK_URL}" || echo "failed to call home: exit code ($?)" } @@ -350,6 +354,10 @@ $GHRunnerGroup = "{{.GitHubRunnerGroup}}" function Install-Runner() { $CallbackURL="{{.CallbackURL}}" + if (!$CallbackURL.EndsWith("/status")) { + $CallbackURL = "$CallbackURL/status" + } + if ($Token.Length -eq 0) { Throw "missing callback authentication token" } diff --git a/config/config.go b/config/config.go index a0eca1e3..a65c4668 100644 --- a/config/config.go +++ b/config/config.go @@ -47,9 +47,6 @@ func NewConfig(cfgFile string) (*Config, error) { if _, err := toml.DecodeFile(cfgFile, &config); err != nil { return nil, errors.Wrap(err, "decoding toml") } - if config.Default.ConfigDir == "" { - config.Default.ConfigDir = appdefaults.DefaultConfigDir - } if err := config.Validate(); err != nil { return nil, errors.Wrap(err, "validating config") } @@ -108,10 +105,6 @@ func (c *Config) Validate() error { } type Default struct { - // ConfigDir is the folder where the runner may save any aditional files - // or configurations it may need. Things like auto-generated SSH keys that - // may be used to access the runner instances. - ConfigDir string `toml:"config_dir,omitempty" json:"config-dir,omitempty"` // CallbackURL is the URL where the instances can send back status reports. CallbackURL string `toml:"callback_url" json:"callback-url"` // MetadataURL is the URL where instances can fetch information they may need @@ -139,14 +132,6 @@ func (d *Default) Validate() error { return errors.Wrap(err, "validating metadata_url") } - if d.ConfigDir == "" { - return fmt.Errorf("config_dir cannot be empty") - } - - if _, err := os.Stat(d.ConfigDir); err != nil { - return errors.Wrap(err, "accessing config dir") - } - return nil } diff --git a/config/config_test.go b/config/config_test.go index 210b0fce..f6558cd8 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -32,7 +32,6 @@ var ( func getDefaultSectionConfig(configDir string) Default { return Default{ - ConfigDir: configDir, CallbackURL: "https://garm.example.com/", MetadataURL: "https://garm.example.com/api/v1/metadata", LogFile: filepath.Join(configDir, "garm.log"), @@ -152,7 +151,6 @@ func TestDefaultSectionConfig(t *testing.T) { cfg: Default{ CallbackURL: "", MetadataURL: cfg.MetadataURL, - ConfigDir: cfg.ConfigDir, }, errString: "missing callback_url", }, @@ -161,25 +159,14 @@ func TestDefaultSectionConfig(t *testing.T) { cfg: Default{ CallbackURL: cfg.CallbackURL, MetadataURL: "", - ConfigDir: cfg.ConfigDir, }, errString: "missing metadata-url", }, - { - name: "ConfigDir cannot be empty", - cfg: Default{ - CallbackURL: cfg.CallbackURL, - MetadataURL: cfg.MetadataURL, - ConfigDir: "", - }, - errString: "config_dir cannot be empty", - }, { name: "config_dir must exist and be accessible", cfg: Default{ CallbackURL: cfg.CallbackURL, MetadataURL: cfg.MetadataURL, - ConfigDir: "/i/do/not/exist", }, errString: "accessing config dir: stat /i/do/not/exist:.*", }, @@ -560,7 +547,6 @@ func TestNewConfig(t *testing.T) { require.Nil(t, err) require.NotNil(t, cfg) require.Equal(t, "https://garm.example.com/", cfg.Default.CallbackURL) - require.Equal(t, "./testdata", cfg.Default.ConfigDir) require.Equal(t, "0.0.0.0", cfg.APIServer.Bind) require.Equal(t, 9998, cfg.APIServer.Port) require.Equal(t, false, cfg.APIServer.UseTLS) @@ -574,31 +560,6 @@ func TestNewConfig(t *testing.T) { require.Equal(t, timeToLive("48h"), cfg.JWTAuth.TimeToLive) } -func TestNewConfigEmptyConfigDir(t *testing.T) { - dirPath, err := os.MkdirTemp("", "garm-config-test") - if err != nil { - t.Fatalf("failed to create temporary directory: %s", err) - } - defer os.RemoveAll(dirPath) - appdefaults.DefaultConfigDir = dirPath - - cfg, err := NewConfig("testdata/test-empty-config-dir.toml") - require.Nil(t, err) - require.NotNil(t, cfg) - require.Equal(t, cfg.Default.ConfigDir, dirPath) - require.Equal(t, "https://garm.example.com/", cfg.Default.CallbackURL) - require.Equal(t, "0.0.0.0", cfg.APIServer.Bind) - require.Equal(t, 9998, cfg.APIServer.Port) - require.Equal(t, false, cfg.APIServer.UseTLS) - require.Equal(t, DBBackendType("mysql"), cfg.Database.DbBackend) - require.Equal(t, "test", cfg.Database.MySQL.Username) - require.Equal(t, "test", cfg.Database.MySQL.Password) - require.Equal(t, "127.0.0.1", cfg.Database.MySQL.Hostname) - require.Equal(t, "garm", cfg.Database.MySQL.DatabaseName) - require.Equal(t, "bocyasicgatEtenOubwonIbsudNutDom", cfg.JWTAuth.Secret) - require.Equal(t, timeToLive("48h"), cfg.JWTAuth.TimeToLive) -} - func TestNewConfigInvalidTomlPath(t *testing.T) { cfg, err := NewConfig("this is not a file path") require.Nil(t, cfg) diff --git a/doc/config_api_server.md b/doc/config_api_server.md new file mode 100644 index 00000000..e69de29b diff --git a/doc/webhooks_and_callbacks.md b/doc/config_default.md similarity index 56% rename from doc/webhooks_and_callbacks.md rename to doc/config_default.md index 0ff7555d..0d75e397 100644 --- a/doc/webhooks_and_callbacks.md +++ b/doc/config_default.md @@ -1,30 +1,35 @@ -# Webhooks +# The default config section -Garm is designed to auto-scale github runners. To achieve this, ```garm``` relies on [GitHub Webhooks](https://docs.github.com/en/developers/webhooks-and-events/webhooks/about-webhooks). Webhooks allow ```garm``` to react to workflow events from your repository, organization or enterprise. +The `default` config section holds configuration options that don't need a category of their own, but are essential to the operation of the service. In this section we will detail each of the options available in the `default` section. -In your repository or organization, navigate to ```Settings --> Webhooks```. In the ```Payload URL``` field, enter the URL to the ```garm``` webhook endpoint. The ```garm``` API endpoint for webhooks is: +```toml +[default] +# This URL is used by instances to send back status messages as they install +# the github actions runner. Status messages can be seen by querying the +# runner status in garm. +# Note: If you're using a reverse proxy in front of your garm installation, +# this URL needs to point to the address of the reverse proxy. Using TLS is +# highly encouraged. +callback_url = "https://garm.example.com/api/v1/callbacks" - ```txt - POST /webhooks - ``` +# This URL is used by instances to retrieve information they need to set themselves +# up. Access to this URL is granted using the same JWT token used to send back +# status updates. Once the instance transitions to "installed" or "failed" state, +# access to both the status and metadata endpoints is disabled. +# Note: If you're using a reverse proxy in front of your garm installation, +# this URL needs to point to the address of the reverse proxy. Using TLS is +# highly encouraged. +metadata_url = "https://garm.example.com/api/v1/metadata" -If ```garm``` is running on a server under the domain ```garm.example.com```, then that field should be set to ```https://garm.example.com/webhooks```. +# Uncomment this line if you'd like to log to a file instead of standard output. +# log_file = "/tmp/runner-manager.log" -In the webhook configuration page under ```Content type``` you will need to select ```application/json```, set the proper webhook URL and, really important, **make sure you configure a webhook secret**. Garm will authenticate the payloads to make sure they are coming from GitHub. +# Enable streaming logs via web sockets. Use garm-cli debug-log. +enable_log_streamer = false -The webhook secret must be secure. Use something like this to generate one: - - ```bash - gabriel@rossak:~$ function generate_secret () { - tr -dc 'a-zA-Z0-9!@#$%^&*()_+?><~\`;' < /dev/urandom | head -c 64; - echo '' - } - - gabriel@rossak:~$ generate_secret - 9Q*nsr*S54g0imK64(!2$Ns6C!~VsH(p)cFj+AMLug%LM!R%FOQ - ``` - -Next, you can choose which events GitHub should send to ```garm``` via webhooks. Click on ```Let me select individual events``` and select ```Workflow jobs``` (should be at the bottom). You can send everything if you want, but any events ```garm``` doesn't care about will simply be ignored. +# Enable the golang debug server. See the documentation in the "doc" folder for more information. +debug_server = false +``` ## The callback_url option @@ -37,13 +42,13 @@ Your runners will call back home with status updates as they install. Once they Example of a runner sending status updates: ```bash - garm-cli runner show garm-f5227755-129d-4e2d-b306-377a8f3a5dfe + garm-cli runner show garm-DvxiVAlfHeE7 +-----------------+--------------------------------------------------------------------------------------------------------------------------------------------------+ | FIELD | VALUE | +-----------------+--------------------------------------------------------------------------------------------------------------------------------------------------+ | ID | 1afb407b-e9f7-4d75-a410-fc4a8c2dbe6c | - | Provider ID | garm-f5227755-129d-4e2d-b306-377a8f3a5dfe | - | Name | garm-f5227755-129d-4e2d-b306-377a8f3a5dfe | + | Provider ID | garm-DvxiVAlfHeE7 | + | Name | garm-DvxiVAlfHeE7 | | OS Type | linux | | OS Architecture | amd64 | | OS Name | ubuntu | @@ -86,3 +91,77 @@ This URL needs to be accessible only by the instances ```garm``` sets up. This U ```toml metadata_url = "https://garm.example.com/api/v1/metadata" ``` + +## The debug_server option + +GARM can optionally enable the golang profiling server. You can then use the usual `go tool pprof` command to start profiling. This is useful if you suspect garm may be bottlenecking in any way. To enable the profiling server, add the following section to the garm config: + +```toml +[default] + +debug_server = true +``` + +Then restarg garm. You can then use the following command to start profiling: + +```bash +go tool pprof http://127.0.0.1:9997/debug/pprof/profile?seconds=120 +``` + +Important note on profiling when behind a reverse proxy. The above command will hang for a fairly long time. Most reverse proxies will timeout after about 60 seconds. To avoid this, you should only profile on localhost by connecting directly to garm. + +It's also advisable to exclude the debug server URLs from your reverse proxy and only make them available locally. + +Now that the debug server is enabled, here is a blog post on how to profile golang applications: https://blog.golang.org/profiling-go-programs + + +## The log_file option + +By default, GARM logs everything to standard output. + +You can optionally log to file by adding the following to your config file: + +```toml +[default] +# Use this if you'd like to log to a file instead of standard output. +log_file = "/tmp/runner-manager.log" +``` + +### Rotating log files + +GARM automatically rotates the log if it reaches 500 MB in size or 28 days, whichever comes first. + +However, if you want to manually rotate the log file, you can send a `SIGHUP` signal to the GARM process. + +You can add the following to your systemd unit file to enable `reload`: + +```ini +[Service] +ExecReload=/bin/kill -HUP $MAINPID +``` + +Then you can simply: + +```bash +systemctl reload garm +``` + +## The enable_log_streamer option + +This option allows you to stream garm logs directly to your terminal. Set this option to true, then you can use the following command to stream logs: + +```bash +garm-cli debug-log +``` + +An important note on enabling this option when behind a reverse proxy. The log streamer uses websockets to stream logs to you. You will need to configure your reverse proxy to allow websocket connections. If you're using nginx, you will need to add the following to your nginx `server` config: + +```nginx +location /api/v1/ws { + proxy_pass http://garm_backend; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "Upgrade"; + proxy_set_header Host $host; +} +``` \ No newline at end of file diff --git a/doc/config_jwt_auth.md b/doc/config_jwt_auth.md new file mode 100644 index 00000000..e69de29b diff --git a/doc/config_metrics.md b/doc/config_metrics.md new file mode 100644 index 00000000..e69de29b diff --git a/doc/database.md b/doc/database.md index 09c9be43..c3e1edc6 100644 --- a/doc/database.md +++ b/doc/database.md @@ -1,6 +1,6 @@ # Database configuration -GARM currently supports SQLite3. The current implementation of GARM is single server, so it does not make much sense to enable anything else at the moment. +GARM currently supports SQLite3. Support for other stores will be added in the future. ```toml [database] diff --git a/doc/logging.md b/doc/logging.md deleted file mode 100644 index 339b1e58..00000000 --- a/doc/logging.md +++ /dev/null @@ -1,30 +0,0 @@ -# Logging - -By default, GARM logs everything to standard output. - -You can optionally log to file by adding the following to your config file: - -```toml -[default] -# Use this if you'd like to log to a file instead of standard output. -log_file = "/tmp/runner-manager.log" -``` - -## Rotating log files - -GARM automatically rotates the log if it reaches 500 MB in size or 28 days, whichever comes first. - -However, if you want to manually rotate the log file, you can send a `SIGHUP` signal to the GARM process. - -You can add the following to your systemd unit file to enable `reload`: - -```ini -[Service] -ExecReload=/bin/kill -HUP $MAINPID -``` - -Then you can simply: - -```bash -systemctl reload garm -``` \ No newline at end of file diff --git a/doc/providers.md b/doc/providers.md index 7a724ecc..2dcb4f90 100644 --- a/doc/providers.md +++ b/doc/providers.md @@ -1,13 +1,17 @@ # Provider configuration -Garm was designed to be extensible. The database layer as well as the providers are defined as interfaces. Currently there are two providers: +GARM was designed to be extensible. Providers can be written either as built-in plugins or as external executables. The built-in plugins are written in Go, and they are compiled into the ```garm``` binary. External providers are executables that implement the needed interface to create/delete/list compute systems that are used by ```garm``` to create runners. -* [LXD](https://linuxcontainers.org/lxd/introduction/) -* External +GARM currently ships with one built-in provider for [LXD](https://linuxcontainers.org/lxd/introduction/) and the external provider interface which allows you to write your own provider in any language you want. -LXD is the simplest cloud-like system you can easily set up on any GNU/Linux machine, which enables you to create both containers and Virtual Machines. The ```external``` provider is a special type of provider, which delegates functionality to external executables. -## The LXD provider +- [LXD provider](#lxd-provider) + - [LXD remotes](#lxd-remotes) + - [LXD Security considerations](#lxd-security-considerations) +- [External provider](#external-provider) + - [Available external providers](#available-external-providers) + +## LXD provider Garm leverages the virtual machines feature of LXD to create the runners. Here is a sample config section for an LXD provider: @@ -102,11 +106,19 @@ You can also create your own image remote, where you can host your own custom im Image remotes in the ```garm``` config, is a map of strings to remote settings. The name of the remote is the last bit of string in the section header. For example, the following section ```[provider.lxd.image_remotes.ubuntu_daily]```, defines the image remote named **ubuntu_daily**. Use this name to reference images inside that remote. -## The External provider +You can also use locally uploaded images. Check out the [performance considerations](./performance_considerations.md) page for details on how to customize local images and use them with garm. + +### LXD Security considerations + +Garm does not apply any ACLs of any kind to the instances it creates. That task remains in the responsibility of the user. [Here is a guide for creating ACLs in LXD](https://linuxcontainers.org/lxd/docs/master/howto/network_acls/). You can of course use ```iptables``` or ```nftables``` to create any rules you wish. I recommend you create a separate isolated lxd bridge for runners, and secure it using ACLs/iptables/nftables. + +You must make sure that the code that runs as part of the workflows is trusted, and if that cannot be done, you must make sure that any malicious code that will be pulled in by the actions and run as part of a workload, is as contained as possible. There is a nice article about [securing your workflow runs here](https://blog.gitguardian.com/github-actions-security-cheat-sheet/). + +## External provider The external provider is a special kind of provider. It delegates the functionality needed to create the runners to external executables. These executables can be either binaries or scripts. As long as they adhere to the needed interface, they can be used to create runners in any target IaaS. This is identical to what ```containerd``` does with ```CNIs```. -There are currently two external providers available in the [contrib folder of this repository](../contrib/providers.d/). The providers are written in ```bash``` and it are just samples. Production ready providers would need more error checking and idempotency, but they serve as an example of what can be done. As it stands, they are functional. +There are currently two sample external providers available in the [contrib folder of this repository](../contrib/providers.d/). The providers are written in ```bash``` and are meant as examples of how a provider could be written in ```bash```. Production ready providers would need more error checking and idempotency, but they serve as an example of what can be done. As it stands, they are functional. The configuration for an external provider is quite simple: diff --git a/doc/the_boring_details.md b/doc/the_boring_details.md new file mode 100644 index 00000000..12d1bc39 --- /dev/null +++ b/doc/the_boring_details.md @@ -0,0 +1,4 @@ +# GARM implementation details and design decissions + +This document attempts to offer an in-depth look at the implementation details and design decisions that went into the creation of GARM. It is not meant to be a user guide, but rather a technical document for those interested in the inner workings of the application. + diff --git a/doc/webhooks.md b/doc/webhooks.md new file mode 100644 index 00000000..03dd8956 --- /dev/null +++ b/doc/webhooks.md @@ -0,0 +1,27 @@ +# Webhooks + +Garm is designed to auto-scale github runners. To achieve this, ```garm``` relies on [GitHub Webhooks](https://docs.github.com/en/developers/webhooks-and-events/webhooks/about-webhooks). Webhooks allow ```garm``` to react to workflow events from your repository, organization or enterprise. + +In your repository or organization, navigate to ```Settings --> Webhooks```. In the ```Payload URL``` field, enter the URL to the ```garm``` webhook endpoint. The ```garm``` API endpoint for webhooks is: + + ```txt + POST /webhooks + ``` + +If ```garm``` is running on a server under the domain ```garm.example.com```, then that field should be set to ```https://garm.example.com/webhooks```. + +In the webhook configuration page under ```Content type``` you will need to select ```application/json```, set the proper webhook URL and, really important, **make sure you configure a webhook secret**. Garm will authenticate the payloads to make sure they are coming from GitHub. + +The webhook secret must be secure. Use something like this to generate one: + + ```bash + gabriel@rossak:~$ function generate_secret () { + tr -dc 'a-zA-Z0-9!@#$%^&*()_+?><~\`;' < /dev/urandom | head -c 64; + echo '' + } + + gabriel@rossak:~$ generate_secret + 9Q*nsr*S54g0imK64(!2$Ns6C!~VsH(p)cFj+AMLug%LM!R%FOQ + ``` + +Next, you can choose which events GitHub should send to ```garm``` via webhooks. Click on ```Let me select individual events``` and select ```Workflow jobs``` (should be at the bottom). You can send everything if you want, but any events ```garm``` doesn't care about will simply be ignored. diff --git a/testdata/config.toml b/testdata/config.toml index 0952ef34..5df14a2d 100644 --- a/testdata/config.toml +++ b/testdata/config.toml @@ -17,10 +17,6 @@ callback_url = "https://garm.example.com/api/v1/callbacks/status" # highly encouraged. metadata_url = "https://garm.example.com/api/v1/metadata" -# This folder is defined here for future use. Right now, we create a SSH -# public/private key-pair. -config_dir = "/etc/garm" - # Uncomment this line if you'd like to log to a file instead of standard output. # log_file = "/tmp/runner-manager.log" @@ -81,29 +77,16 @@ time_to_live = "8760h" debug = false # Database backend to use. Currently supported backends are: # * sqlite3 - # * mysql backend = "sqlite3" # the passphrase option is a temporary measure by which we encrypt the webhook # secret that gets saved to the database, using AES256. In the future, secrets # will be saved to something like Barbican or Vault, eliminating the need for # this. This setting needs to be 32 characters in size. passphrase = "shreotsinWadquidAitNefayctowUrph" - [database.mysql] - # If MySQL is used, these are the credentials and connection information used - # to connect to the server instance. - # database username - username = "" - # Database password - password = "" - # hostname to connect to - hostname = "" - # database name - database = "" [database.sqlite3] # Path on disk to the sqlite3 database file. db_file = "/etc/garm/garm.db" - # Currently, providers are defined statically in the config. This is due to the fact # that we have not yet added support for storing secrets in something like Barbican # or Vault. This will change in the future. However, for now, it's important to remember diff --git a/util/appdefaults/appdefaults.go b/util/appdefaults/appdefaults.go index 70b779bc..41fa3645 100644 --- a/util/appdefaults/appdefaults.go +++ b/util/appdefaults/appdefaults.go @@ -35,10 +35,6 @@ const ( ) var ( - // DefaultConfigDir is the default path on disk to the config dir. The config - // file will probably be in the same folder, but it is not mandatory. - DefaultConfigDir = "/etc/garm" - // DefaultUserGroups are the groups the default user will be part of. DefaultUserGroups = []string{ "sudo", "adm", "cdrom", "dialout", From f1d5a3ce5bdfac919712360fd89d90a873f98e0a Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Sat, 8 Jul 2023 14:22:41 +0000 Subject: [PATCH 4/9] Update instance show example Signed-off-by: Gabriel Adrian Samfira --- doc/config_default.md | 44 ++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/doc/config_default.md b/doc/config_default.md index 0d75e397..cd293a87 100644 --- a/doc/config_default.md +++ b/doc/config_default.md @@ -43,27 +43,29 @@ Example of a runner sending status updates: ```bash garm-cli runner show garm-DvxiVAlfHeE7 - +-----------------+--------------------------------------------------------------------------------------------------------------------------------------------------+ - | FIELD | VALUE | - +-----------------+--------------------------------------------------------------------------------------------------------------------------------------------------+ - | ID | 1afb407b-e9f7-4d75-a410-fc4a8c2dbe6c | - | Provider ID | garm-DvxiVAlfHeE7 | - | Name | garm-DvxiVAlfHeE7 | - | OS Type | linux | - | OS Architecture | amd64 | - | OS Name | ubuntu | - | OS Version | focal | - | Status | running | - | Runner Status | idle | - | Pool ID | 98f438b9-5549-4eaf-9bb7-1781533a455d | - | Status Updates | 2022-05-05T11:32:41: downloading tools from https://github.com/actions/runner/releases/download/v2.290.1/actions-runner-linux-x64-2.290.1.tar.gz | - | | 2022-05-05T11:32:43: extracting runner | - | | 2022-05-05T11:32:47: installing dependencies | - | | 2022-05-05T11:32:55: configuring runner | - | | 2022-05-05T11:32:59: installing runner service | - | | 2022-05-05T11:33:00: starting service | - | | 2022-05-05T11:33:00: runner successfully installed | - +-----------------+--------------------------------------------------------------------------------------------------------------------------------------------------+ + +-----------------+------------------------------------------------------------------------------------+ + | FIELD | VALUE | + +-----------------+------------------------------------------------------------------------------------+ + | ID | 16b96ba2-d406-45b8-ab66-b70be6237b4e | + | Provider ID | garm-DvxiVAlfHeE7 | + | Name | garm-DvxiVAlfHeE7 | + | OS Type | linux | + | OS Architecture | amd64 | + | OS Name | ubuntu | + | OS Version | jammy | + | Status | running | + | Runner Status | idle | + | Pool ID | 8ec34c1f-b053-4a5d-80d6-40afdfb389f9 | + | Addresses | 10.198.117.120 | + | Status Updates | 2023-07-08T06:26:46: runner registration token was retrieved | + | | 2023-07-08T06:26:46: using cached runner found in /opt/cache/actions-runner/latest | + | | 2023-07-08T06:26:50: configuring runner | + | | 2023-07-08T06:26:56: runner successfully configured after 1 attempt(s) | + | | 2023-07-08T06:26:56: installing runner service | + | | 2023-07-08T06:26:56: starting service | + | | 2023-07-08T06:26:57: runner successfully installed | + +-----------------+------------------------------------------------------------------------------------+ + ``` This URL must be set and must be accessible by the instance. If you wish to restrict access to it, a reverse proxy can be configured to accept requests only from networks in which the runners ```garm``` manages will be spun up. This URL doesn't need to be globally accessible, it just needs to be accessible by the instances. From bcf321631469d4b3c701f4f3f119a64af56b220a Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Sat, 8 Jul 2023 14:33:42 +0000 Subject: [PATCH 5/9] Remove leftover test Signed-off-by: Gabriel Adrian Samfira --- config/config_test.go | 8 -------- doc/config_default.md | 2 +- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/config/config_test.go b/config/config_test.go index f6558cd8..e7f8489f 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -162,14 +162,6 @@ func TestDefaultSectionConfig(t *testing.T) { }, errString: "missing metadata-url", }, - { - name: "config_dir must exist and be accessible", - cfg: Default{ - CallbackURL: cfg.CallbackURL, - MetadataURL: cfg.MetadataURL, - }, - errString: "accessing config dir: stat /i/do/not/exist:.*", - }, } for _, tc := range tests { diff --git a/doc/config_default.md b/doc/config_default.md index cd293a87..8c2bce29 100644 --- a/doc/config_default.md +++ b/doc/config_default.md @@ -73,7 +73,7 @@ This URL must be set and must be accessible by the instance. If you wish to rest For example, in a scenario where you expose the API endpoint directly, this setting could look like the following: ```toml - callback_url = "https://garm.example.com/api/v1/callbacks/status" + callback_url = "https://garm.example.com/api/v1/callbacks" ``` Authentication is done using a short-lived JWT token, that gets generated for a particular instance that we are spinning up. That JWT token grants access to the instance to only update it's own status and to fetch metadata for itself. No other API endpoints will work with that JWT token. The validity of the token is equal to the pool bootstrap timeout value (default 20 minutes) plus the garm polling interval (5 minutes). From 40ff3589c9953b1ba13a473e1787e2121bd967a8 Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Sat, 8 Jul 2023 14:38:53 +0000 Subject: [PATCH 6/9] Fix typo Signed-off-by: Gabriel Adrian Samfira --- doc/config_default.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/config_default.md b/doc/config_default.md index 8c2bce29..b3ccde3f 100644 --- a/doc/config_default.md +++ b/doc/config_default.md @@ -96,7 +96,7 @@ This URL needs to be accessible only by the instances ```garm``` sets up. This U ## The debug_server option -GARM can optionally enable the golang profiling server. You can then use the usual `go tool pprof` command to start profiling. This is useful if you suspect garm may be bottlenecking in any way. To enable the profiling server, add the following section to the garm config: +GARM can optionally enable the golang profiling server. This is useful if you suspect garm may be bottlenecking in any way. To enable the profiling server, add the following section to the garm config: ```toml [default] @@ -104,7 +104,7 @@ GARM can optionally enable the golang profiling server. You can then use the usu debug_server = true ``` -Then restarg garm. You can then use the following command to start profiling: +And restart garm. You can then use the following command to start profiling: ```bash go tool pprof http://127.0.0.1:9997/debug/pprof/profile?seconds=120 From dc27a549e239c827763965092f000398c84bbc68 Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Sat, 8 Jul 2023 21:30:20 +0000 Subject: [PATCH 7/9] Remove debugging_and_profiling.md This file was merged in config_default.md Signed-off-by: Gabriel Adrian Samfira --- doc/debugging_and_profiling.md | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 doc/debugging_and_profiling.md diff --git a/doc/debugging_and_profiling.md b/doc/debugging_and_profiling.md deleted file mode 100644 index 064b811b..00000000 --- a/doc/debugging_and_profiling.md +++ /dev/null @@ -1,22 +0,0 @@ -# Debugging and profiling - -GARM can optionally enable the golang profiling server. You can then use the usual `go tool pprof` command to start profiling. This is useful if you suspect garm may be bottlenecking in any way. To enable the profiling server, add the following section to the garm config: - -```toml -[default] - -debug_server = true -``` - -Then restarg garm. You can then use the following command to start profiling: - -```bash -go tool pprof http://127.0.0.1:9997/debug/pprof/profile?seconds=120 -``` - -Important note on profiling when behind a reverse proxy. The above command will hang for a fairly long time. Most reverse proxies will timeout after about 60 seconds. To avoid this, you should only profile on localhost by connecting directly to garm. - -It's also advisable to exclude the debug server URLs from your reverse proxy and only make them available locally. - -Now that the debug server is enabled, here is a blog post on how to profile golang applications: https://blog.golang.org/profiling-go-programs - From 1682e98cebc3f5336dbfae238d6b30cf8d58818d Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Sat, 8 Jul 2023 23:38:14 +0000 Subject: [PATCH 8/9] Add some more docs Signed-off-by: Gabriel Adrian Samfira --- doc/config_api_server.md | 32 ++++++++++++++++++++++++++++++++ doc/config_jwt_auth.md | 19 +++++++++++++++++++ doc/providers.md | 1 - 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/doc/config_api_server.md b/doc/config_api_server.md index e69de29b..9e240680 100644 --- a/doc/config_api_server.md +++ b/doc/config_api_server.md @@ -0,0 +1,32 @@ +# The API server config section + +This section allows you to configure the GARM API server. The API server is responsible for serving all the API endpoints used by the `garm-cli`, the runners that phone home their status and by GitHub when it sends us webhooks. + +The config options are fairly straight forward. + +```toml +[apiserver] + # Bind the API to this IP + bind = "0.0.0.0" + # Bind the API to this port + port = 9997 + # Whether or not to set up TLS for the API endpoint. If this is set to true, + # you must have a valid apiserver.tls section. + use_tls = false + # Set a list of allowed origins + # By default, if this option is ommited or empty, we will check + # only that the origin is the same as the originating server. + # A literal of "*" will allow any origin + cors_origins = ["*"] + [apiserver.tls] + # Path on disk to a x509 certificate bundle. + # NOTE: if your certificate is signed by an intermediary CA, this file + # must contain the entire certificate bundle needed for clients to validate + # the certificate. This usually means concatenating the certificate and the + # CA bundle you received. + certificate = "" + # The path on disk to the corresponding private key for the certificate. + key = "" +``` + +The GARM API server has the option to enable TLS, but I suggest you use a reverse proxy and enable TLS termination in that reverse proxy. There is an `nginx` sample in this repository. \ No newline at end of file diff --git a/doc/config_jwt_auth.md b/doc/config_jwt_auth.md index e69de29b..f68005ac 100644 --- a/doc/config_jwt_auth.md +++ b/doc/config_jwt_auth.md @@ -0,0 +1,19 @@ +# The JWT authentication config section + +This section configures the JWT authentication used by the API server. GARM is currently a single user system and that user has the right to do anything and everything GARM is capable of. As a result, the JWT auth we have does not include a refresh token. The token is valid for the duration of the time to live (TTL) set in the config file. Once the token expires, you will need to log in again. + +It is recommended that the secret be a long, randomly generated string. Changing the secret at any time will invalidate all existing tokens. + +```toml +[jwt_auth] +# A JWT token secret used to sign tokens. +# Obviously, this needs to be changed :). +secret = ")9gk_4A6KrXz9D2u`0@MPea*sd6W`%@5MAWpWWJ3P3EqW~qB!!(Vd$FhNc*eU4vG" + +# Time to live for tokens. Both the instances and you will use JWT tokens to +# authenticate against the API. However, this TTL is applied only to tokens you +# get when logging into the API. The tokens issued to the instances we manage, +# have a TTL based on the runner bootstrap timeout set on each pool. The minimum +# TTL for this token is 24h. +time_to_live = "8760h" +``` \ No newline at end of file diff --git a/doc/providers.md b/doc/providers.md index 2dcb4f90..6591b1c7 100644 --- a/doc/providers.md +++ b/doc/providers.md @@ -4,7 +4,6 @@ GARM was designed to be extensible. Providers can be written either as built-in GARM currently ships with one built-in provider for [LXD](https://linuxcontainers.org/lxd/introduction/) and the external provider interface which allows you to write your own provider in any language you want. - - [LXD provider](#lxd-provider) - [LXD remotes](#lxd-remotes) - [LXD Security considerations](#lxd-security-considerations) From e5d3cae47a0e82e261e031b9bc9c0390ba246bf3 Mon Sep 17 00:00:00 2001 From: Gabriel Adrian Samfira Date: Sun, 9 Jul 2023 14:29:30 +0000 Subject: [PATCH 9/9] Add some more docs Signed-off-by: Gabriel Adrian Samfira --- README.md | 8 +++--- doc/config_api_server.md | 4 ++- doc/config_jwt_auth.md | 3 +-- doc/config_metrics.md | 55 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 397bf16a..6036495d 100644 --- a/README.md +++ b/README.md @@ -116,12 +116,12 @@ Follow the instructions in the README of each provider to install them. The ```garm``` configuration is a simple ```toml```. The sample config file in [the testdata folder](/testdata/config.toml) is fairly well commented and should be enough to get you started. The configuration file is split into several sections, each of which is documented in its own page. The sections are: * [The default section](/doc/config_default.md) +* [Database](/doc/database.md) +* [Github credentials](/doc/github_credentials.md) +* [Providers](/doc/providers.md) * [Metrics](/doc/config_metrics.md) * [JWT authentication](/doc/config_jwt_auth.md) * [API server](/doc/config_api_server.md) -* [Github credentials](/doc/github_credentials.md) -* [Providers](/doc/providers.md) -* [Database](/doc/database.md) Once you've configured your database, providers and github credentials, you'll need to configure your [webhooks and the callback_url](/doc/webhooks_and_callbacks.md). @@ -129,7 +129,7 @@ At this point, you should be done. Have a look at the [running garm document](/d If you would like to use ```garm``` with a different IaaS than the ones already available, have a look at the [writing an external provider](/doc/external_provider.md) page. -If you like to optimize the startup time of new instance, take a look at the [performance considerations](/doc/performance_considerations.md) page. +If you would like to optimize the startup time of new instance, take a look at the [performance considerations](/doc/performance_considerations.md) page. ## Write your own provider diff --git a/doc/config_api_server.md b/doc/config_api_server.md index 9e240680..33f0f4ea 100644 --- a/doc/config_api_server.md +++ b/doc/config_api_server.md @@ -29,4 +29,6 @@ The config options are fairly straight forward. key = "" ``` -The GARM API server has the option to enable TLS, but I suggest you use a reverse proxy and enable TLS termination in that reverse proxy. There is an `nginx` sample in this repository. \ No newline at end of file +The GARM API server has the option to enable TLS, but I suggest you use a reverse proxy and enable TLS termination in that reverse proxy. There is an `nginx` sample in this repository with TLS termination enabled. + +You can of course enable TLS in both garm and the reverse proxy. The choice is yours. \ No newline at end of file diff --git a/doc/config_jwt_auth.md b/doc/config_jwt_auth.md index f68005ac..7f07d311 100644 --- a/doc/config_jwt_auth.md +++ b/doc/config_jwt_auth.md @@ -6,8 +6,7 @@ It is recommended that the secret be a long, randomly generated string. Changing ```toml [jwt_auth] -# A JWT token secret used to sign tokens. -# Obviously, this needs to be changed :). +# A JWT token secret used to sign tokens. Obviously, this needs to be changed :). secret = ")9gk_4A6KrXz9D2u`0@MPea*sd6W`%@5MAWpWWJ3P3EqW~qB!!(Vd$FhNc*eU4vG" # Time to live for tokens. Both the instances and you will use JWT tokens to diff --git a/doc/config_metrics.md b/doc/config_metrics.md index e69de29b..caa50b1b 100644 --- a/doc/config_metrics.md +++ b/doc/config_metrics.md @@ -0,0 +1,55 @@ +# The metrics section + +This is one of the features in GARM that I really love having. For one thing, it's community contributed and for another, it really adds value to the project. It allows us to create some pretty nice visualizations of what is happening with GARM. + +At the moment there are only three meaningful metrics being collected, besides the default ones that the prometheus golang package enables by default. These are: + +* `garm_health` - This is a gauge that is set to 1 if GARM is healthy and 0 if it is not. This is useful for alerting. +* `garm_runner_status` - This is a gauge value that gives us details about the runners garm spawns +* `garm_webhooks_received` - This is a counter that increments every time GARM receives a webhook from GitHub. + +More metrics will be added in the future. + +## Enabling metrics + +Metrics are disabled by default. To enable them, add the following to your config file: + +```toml +[metrics] +# Toggle metrics. If set to false, the API endpoint for metrics collection will +# be disabled. +enable = true +# Toggle to disable authentication (not recommended) on the metrics endpoint. +# If you do disable authentication, I encourage you to put a reverse proxy in front +# of garm and limit which systems can access that particular endpoint. Ideally, you +# would enable some kind of authentication using the reverse proxy, if the built-in auth +# is not sufficient for your needs. +disable_auth = false +``` + +You can choose to disable authentication if you wish, however it's not terribly difficult to set up, so I generally advise against disabling it. + +## Configuring prometheus + +The following section assumes that your garm instance is running at `garm.example.com` and has TLS enabled. + +First, generate a new JWT token valid only for the metrics endpoint: + +```bash +garm-cli metrics-token create +``` + +Note: The token validity is equal to the TTL you set in the [JWT config section](/doc/config_jwt_auth.md). + +Copy the resulting token, and add it to your prometheus config file. The following is an example of how to add garm as a target in your prometheus config file: + +```yaml +scrape_configs: + - job_name: "garm" + # Connect over https. If you don't have TLS enabled, change this to http. + scheme: https + static_configs: + - targets: ["garm.example.com"] + authorization: + credentials: "superSecretTokenYouGeneratedEarlier" +``` \ No newline at end of file