Compare commits

...
Sign in to create a new pull request.

39 commits

Author SHA1 Message Date
ce8865007c
bumped garm to v0.0.4 2025-12-08 11:06:51 +01:00
5b438097bb
bumped argo to argo-cd-9.1.5 2025-12-02 15:37:45 +01:00
89f92fdabc
bumped garm version 2025-12-02 14:57:37 +01:00
97709eff30
added garm to stacks 2025-12-02 13:56:47 +01:00
44fecf67c2
added oidc env vars for terralist 2025-12-01 15:03:31 +01:00
45da6fc210
added FORGEJO_IMAGE_TAG env var 2025-11-28 11:27:50 +01:00
94c51a4d77
added terralist 2025-11-28 10:51:23 +01:00
115e8f27f6
added coder stack 2025-11-27 16:28:22 +01:00
4d1621b783 chore(alerts): disabled bogus alerts related to kubecontrollermanager and kubescheduler 2025-10-21 08:47:29 +00:00
47c16eeafd feat(vmuser): use secret instead of hardcoded value for authentication 2025-08-18 10:38:08 +02:00
2eab9bd80b feat(sso): configure sso for ArgoCD 2025-08-15 15:10:55 +02:00
699b6cedcb
fix(backup): Increased s3 backup volume size to 100GB
Refs: DevFW/infra-deploy#116
2025-08-15 10:56:36 +02:00
c8d5195dc7 feat(sso): introduced grafana OAUTH config 2025-08-15 10:01:04 +02:00
b3f77644e9 feat(sso): using secret references in dex to not put secrets in git 2025-08-14 16:22:11 +02:00
d677b4b0e7 feat(sso): added dex and added template parameters for grafana and dex 2025-08-14 15:55:03 +02:00
67c513d1a5
feat(alerts): 🎉 Add disk consumption high alert rule
Introduce a new alert rule for monitoring high disk consumption in Kubernetes. This enhances observability by providing alerts when disk usage exceeds 60%, helping to maintain storage health in the cluster environment.

Refs: DevFW/infra-deploy#109
2025-08-13 13:38:31 +02:00
3a666e718f feat(edp): changed disck-volume-type from SATA to GPSSD 2025-08-13 10:55:15 +02:00
b3582b9929 fix(backup): Fixed syntax problem related to forgejo s3 backups 2025-08-13 08:00:52 +00:00
3277d6d854
introduced control parameter for cronjob 2025-08-12 16:16:55 +02:00
a92ed86c4d
fix(observability): Disabled scraping of kube controller manager and scheduler
They are managed by OTC
2025-08-12 15:06:14 +02:00
fb64314fb2
feat(observability): Introduced alert priority for notifications 2025-08-12 14:20:01 +02:00
975bb6b982
feat(observability): Introduced alert for failed s3 backup jobs 2025-08-12 14:07:38 +02:00
e0f6cc77dd
fix(observability): Added missing encryption to grafana volume 2025-08-12 13:37:56 +02:00
dbda3d4ab5 fix(cronjob): fix bug where only packages got backuped 2025-08-11 15:34:38 +02:00
28c23b9f08
chore: set default storage class to csi-disk driver 2025-08-08 15:25:25 +02:00
f19b294b26 chore(OTC): changed obsolete disk type 2025-08-07 11:30:27 +00:00
643176228e
Revert "feat(grafana alerts): add notification channel (email) for grafana alerts"
This reverts commit c9d14d451f.
2025-08-05 15:25:42 +02:00
ea6b18b7ea
feat(alertmanager): 🎉 Enable managed configuration for alerts
Updates the Alertmanager configuration to use managed settings, enabling streamlined alert handling. Removes outdated configurations and introduces a new email receiver for Grafana alerts.
2025-08-05 15:24:37 +02:00
c9d14d451f feat(grafana alerts): add notification channel (email) for grafana alerts 2025-08-05 15:01:12 +02:00
6af5ce71cd feat(forgejo): updated secret ref for a bucket name 2025-08-01 10:31:04 +02:00
55d9a06dc7 feat(forgejo): backup s3 directly to pvc 2025-08-01 10:31:04 +02:00
491be80842 fix(s3backup): doing a local backup first and then push it to remote, which is still on the same OBS store 2025-08-01 10:31:04 +02:00
e7d14a89cd feat(manifest): 🎉 WIP Add CronJob and Secret for S3 backups
Adds a new CronJob for scheduled S3 backups using rclone, along with a corresponding Secret for AWS credentials. This introduces automated backup functionality for the Forgejo server, enhancing data protection and recovery capabilities.
2025-08-01 10:31:04 +02:00
51a55b5ed4 fix(forgejo): Enable email notifications for common things like PR's 2025-07-31 09:31:00 +00:00
30c2ec054b chore(pipeline): Remove use for our three helm mirrors 2025-07-30 13:55:38 +00:00
fb03ded960 chore(pipeline): Remove use for our three helm mirrors 2025-07-30 13:54:53 +00:00
278c832cb4 chore(pipeline): Remove use for our three helm mirrors 2025-07-30 13:54:04 +00:00
a2324a16b7 test(pipeline): Revert of general test of OSC dependencies
helm-chart-4.12.4 will require an update of argocd to version >=3
2025-07-30 12:39:18 +00:00
d79653cc64 test(pipeline): Revert of general test of OSC dependencies
Only v1.1.0-edp-v11.0.3 works currently
2025-07-30 12:38:10 +00:00
26 changed files with 719 additions and 136 deletions

View file

@ -0,0 +1,24 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: coder-reg
namespace: argocd
labels:
env: dev
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
destination:
name: in-cluster
namespace: argocd
source:
path: "{{{ .Env.CLIENT_REPO_ID }}}/{{{ .Env.DOMAIN }}}/stacks/coder"
repoURL: "https://{{{ .Env.CLIENT_REPO_DOMAIN }}}/{{{ .Env.CLIENT_REPO_ORG_NAME }}}"
targetRevision: HEAD
project: default
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true

View file

@ -0,0 +1,24 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: garm-reg
namespace: argocd
labels:
env: dev
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
destination:
name: in-cluster
namespace: argocd
source:
path: "{{{ .Env.CLIENT_REPO_ID }}}/{{{ .Env.DOMAIN }}}/stacks/garm"
repoURL: "https://{{{ .Env.CLIENT_REPO_DOMAIN }}}/{{{ .Env.CLIENT_REPO_ORG_NAME }}}"
targetRevision: HEAD
project: default
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true

View file

@ -0,0 +1,24 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: terralist-reg
namespace: argocd
labels:
env: dev
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
destination:
name: in-cluster
namespace: argocd
source:
path: "{{{ .Env.CLIENT_REPO_ID }}}/{{{ .Env.DOMAIN }}}/stacks/terralist"
repoURL: "https://{{{ .Env.CLIENT_REPO_DOMAIN }}}/{{{ .Env.CLIENT_REPO_ORG_NAME }}}"
targetRevision: HEAD
project: default
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true

View file

@ -0,0 +1,32 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: coder
namespace: argocd
labels:
env: dev
spec:
project: default
syncPolicy:
automated:
selfHeal: true
syncOptions:
- CreateNamespace=true
retry:
limit: -1
destination:
name: in-cluster
namespace: coder
sources:
- repoURL: https://helm.coder.com/v2
chart: coder
targetRevision: 2.28.3
helm:
valueFiles:
- $values/{{{ .Env.CLIENT_REPO_ID }}}/{{{ .Env.DOMAIN }}}/stacks/coder/coder/values.yaml
- repoURL: https://{{{ .Env.CLIENT_REPO_DOMAIN }}}/{{{ .Env.CLIENT_REPO_ORG_NAME }}}
targetRevision: HEAD
ref: values
- repoURL: https://{{{ .Env.CLIENT_REPO_DOMAIN }}}/{{{ .Env.CLIENT_REPO_ORG_NAME }}}
targetRevision: HEAD
path: "{{{ .Env.CLIENT_REPO_ID }}}/{{{ .Env.DOMAIN }}}/stacks/coder/coder/manifests"

View file

@ -0,0 +1,38 @@
---
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: coder-db
namespace: coder
spec:
instances: 1
primaryUpdateStrategy: unsupervised
resources:
requests:
memory: "1Gi"
cpu: "1"
limits:
memory: "1Gi"
cpu: "1"
managed:
roles:
- name: coder
createdb: true
login: true
passwordSecret:
name: coder-db-user
storage:
size: 10Gi
storageClass: csi-disk
---
apiVersion: postgresql.cnpg.io/v1
kind: Database
metadata:
name: coder
namespace: coder
spec:
cluster:
name: coder-db
name: coder
owner: coder
---

View file

@ -0,0 +1,61 @@
coder:
# You can specify any environment variables you'd like to pass to Coder
# here. Coder consumes environment variables listed in
# `coder server --help`, and these environment variables are also passed
# to the workspace provisioner (so you can consume them in your Terraform
# templates for auth keys etc.).
#
# Please keep in mind that you should not set `CODER_HTTP_ADDRESS`,
# `CODER_TLS_ENABLE`, `CODER_TLS_CERT_FILE` or `CODER_TLS_KEY_FILE` as
# they are already set by the Helm chart and will cause conflicts.
env:
- name: CODER_ACCESS_URL
value: https://coder.{{{ .Env.DOMAIN_GITEA }}}
- name: CODER_PG_CONNECTION_URL
valueFrom:
secretKeyRef:
# You'll need to create a secret called coder-db-url with your
# Postgres connection URL like:
# postgres://coder:password@postgres:5432/coder?sslmode=disable
name: coder-db-user
key: url
# For production deployments, we recommend configuring your own GitHub
# OAuth2 provider and disabling the default one.
- name: CODER_OAUTH2_GITHUB_DEFAULT_PROVIDER_ENABLE
value: "false"
- name: EDGE_CONNECT_ENDPOINT
valueFrom:
secretKeyRef:
name: edge-credential
key: endpoint
- name: EDGE_CONNECT_USERNAME
valueFrom:
secretKeyRef:
name: edge-credential
key: username
- name: EDGE_CONNECT_PASSWORD
valueFrom:
secretKeyRef:
name: edge-credential
key: password
# (Optional) For production deployments the access URL should be set.
# If you're just trying Coder, access the dashboard via the service IP.
# - name: CODER_ACCESS_URL
# value: "https://coder.example.com"
#tls:
# secretNames:
# - my-tls-secret-name
service:
type: ClusterIP
ingress:
enable: true
className: nginx
host: coder.{{{ .Env.DOMAIN_GITEA }}}
annotations:
cert-manager.io/cluster-issuer: main
tls:
enable: true
secretName: coder-tls-secret

View file

@ -18,12 +18,12 @@ spec:
name: in-cluster
namespace: argocd
sources:
- repoURL: https://{{{ .Env.CLIENT_REPO_DOMAIN }}}/DevFW-CICD/argocd-helm.git
- repoURL: https://github.com/argoproj/argo-helm.git
path: charts/argo-cd
# TODO: RIRE Can be updated when https://github.com/argoproj/argo-cd/issues/20790 is fixed and merged
# As logout make problems, it is suggested to switch from path based routing to an own argocd domain,
# similar to the CNOE amazon reference implementation and in our case, Forgejo
targetRevision: argo-cd-7.8.28-depends
targetRevision: argo-cd-9.1.5
helm:
valueFiles:
- $values/{{{ .Env.CLIENT_REPO_ID }}}/{{{ .Env.DOMAIN }}}/stacks/core/argocd/values.yaml
@ -32,4 +32,4 @@ spec:
ref: values
- repoURL: https://{{{ .Env.CLIENT_REPO_DOMAIN }}}/{{{ .Env.CLIENT_REPO_ORG_NAME }}}
targetRevision: HEAD
path: "{{{ .Env.CLIENT_REPO_ID }}}/{{{ .Env.DOMAIN }}}/stacks/core/argocd/manifests"
path: "{{{ .Env.CLIENT_REPO_ID }}}/{{{ .Env.DOMAIN }}}/stacks/core/argocd/manifests"

View file

@ -5,6 +5,16 @@ configs:
params:
server.insecure: true
cm:
oidc.config: |
name: FORGEJO
issuer: https://{{{ .Env.DOMAIN_DEX }}}
clientID: controller-argocd-dex
clientSecret: $dex-argo-client:clientSecret
requestedScopes:
- openid
- profile
- email
- groups
application.resourceTrackingMethod: annotation
timeout.reconciliation: 60s
resource.exclusions: |
@ -18,10 +28,9 @@ configs:
- CiliumIdentity
clusters:
- "*"
accounts.provider-argocd: apiKey
url: https://{{{ .Env.DOMAIN_ARGOCD }}}
rbac:
policy.csv: 'g, provider-argocd, role:admin'
policy.csv: 'g, DevFW, role:admin'
tls:
certificates:

View file

@ -0,0 +1,29 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: cloudnative-pg
namespace: argocd
labels:
env: dev
spec:
project: default
syncPolicy:
automated:
selfHeal: true
syncOptions:
- CreateNamespace=true
retry:
limit: -1
destination:
name: in-cluster
namespace: cloudnative-pg
sources:
- repoURL: https://cloudnative-pg.github.io/charts
chart: cloudnative-pg
targetRevision: 0.26.1
helm:
valueFiles:
- $values/{{{ .Env.CLIENT_REPO_ID }}}/{{{ .Env.DOMAIN }}}/stacks/core/cloudnative-pg/values.yaml
- repoURL: https://{{{ .Env.CLIENT_REPO_DOMAIN }}}/{{{ .Env.CLIENT_REPO_ORG_NAME }}}
targetRevision: HEAD
ref: values

View file

@ -0,0 +1,29 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: dex
namespace: argocd
labels:
env: dev
spec:
project: default
syncPolicy:
automated:
selfHeal: true
syncOptions:
- CreateNamespace=true
retry:
limit: -1
destination:
name: in-cluster
namespace: dex
sources:
- repoURL: https://charts.dexidp.io
chart: dex
targetRevision: 0.23.0
helm:
valueFiles:
- $values/{{{ .Env.CLIENT_REPO_ID }}}/{{{ .Env.DOMAIN }}}/stacks/core/dex/values.yaml
- repoURL: https://{{{ .Env.CLIENT_REPO_DOMAIN }}}/{{{ .Env.CLIENT_REPO_ORG_NAME }}}
targetRevision: HEAD
ref: values

View file

@ -0,0 +1,76 @@
ingress:
enabled: true
className: nginx
annotations:
cert-manager.io/cluster-issuer: main
hosts:
- host: {{{ .Env.DOMAIN_DEX }}}
paths:
- path: /
pathType: Prefix
tls:
- hosts:
- {{{ .Env.DOMAIN_DEX }}}
secretName: dex-cert
envVars:
- name: FORGEJO_CLIENT_SECRET
valueFrom:
secretKeyRef:
name: dex-forgejo-client
key: clientSecret
- name: FORGEJO_CLIENT_ID
valueFrom:
secretKeyRef:
name: dex-forgejo-client
key: clientID
- name: OIDC_DEX_GRAFANA_CLIENT_SECRET
valueFrom:
secretKeyRef:
name: dex-grafana-client
key: clientSecret
- name: OIDC_DEX_ARGO_CLIENT_SECRET
valueFrom:
secretKeyRef:
name: dex-argo-client
key: clientSecret
- name: LOG_LEVEL
value: debug
config:
# Set it to a valid URL
issuer: https://{{{ .Env.DOMAIN_DEX }}}
# See https://dexidp.io/docs/storage/ for more options
storage:
type: memory
oauth2:
skipApprovalScreen: true
alwaysShowLoginScreen: false
connectors:
- type: gitea
id: gitea
name: Forgejo
config:
clientID: "$FORGEJO_CLIENT_ID"
clientSecret: "$FORGEJO_CLIENT_SECRET"
redirectURI: https://{{{ .Env.DOMAIN_DEX }}}/callback
baseURL: https://edp.buildth.ing
# loadAllGroups: true
orgs:
- name: DevFW
enablePasswordDB: false
staticClients:
- id: controller-argocd-dex
name: ArgoCD Client
redirectURIs:
- "https://{{{ .Env.DOMAIN_ARGOCD }}}/auth/callback"
secretEnv: "OIDC_DEX_ARGO_CLIENT_SECRET"
- id: grafana
redirectURIs:
- "https://{{{ .Env.DOMAIN_GRAFANA }}}/login/generic_oauth"
name: "Grafana"
secretEnv: "OIDC_DEX_GRAFANA_CLIENT_SECRET"

View file

@ -18,15 +18,9 @@ spec:
name: in-cluster
namespace: gitea
sources:
- repoURL: https://{{{ .Env.CLIENT_REPO_DOMAIN }}}/DevFW-CICD/forgejo-helm.git
- repoURL: https://code.forgejo.org/forgejo-helm/forgejo-helm.git
path: .
# first check out the desired version (example v9.0.0): https://code.forgejo.org/forgejo-helm/forgejo-helm/src/tag/v9.0.0/Chart.yaml
# (note that the chart version is not the same as the forgejo application version, which is specified in the above Chart.yaml file)
# then use the devops pipeline and select development, forgejo and the desired version (example v9.0.0):
# https://{{{ .Env.CLIENT_REPO_DOMAIN }}}/DevFW-CICD/devops-pipelines/actions?workflow=update-helm-depends.yaml&actor=0&status=0
# finally update the desired version here and include "-depends", it is created by the devops pipeline.
# why do we have an added "-depends" tag? it resolves rate limitings when downloading helm OCI dependencies
targetRevision: v12.0.0-depends
targetRevision: v12.0.0
helm:
valueFiles:
- $values/{{{ .Env.CLIENT_REPO_ID }}}/{{{ .Env.DOMAIN }}}/stacks/forgejo/forgejo-server/values.yaml

View file

@ -0,0 +1,91 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: forgejo-s3-backup
namespace: gitea
spec:
schedule: "0 1 * * *"
concurrencyPolicy: "Forbid"
successfulJobsHistoryLimit: 5
failedJobsHistoryLimit: 5
startingDeadlineSeconds: 600 # 10 minutes
jobTemplate:
spec:
# 60 min until backup - 10 min start - (backoffLimit * activeDeadlineSeconds) - some time sync buffer
activeDeadlineSeconds: 1350
backoffLimit: 2
ttlSecondsAfterFinished: 259200 #
template:
spec:
containers:
- name: rclone
image: rclone/rclone:1.70
imagePullPolicy: IfNotPresent
env:
- name: SOURCE_BUCKET
valueFrom:
secretKeyRef:
name: forgejo-cloud-credentials
key: bucket-name
- name: AWS_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: forgejo-cloud-credentials
key: access-key
- name: AWS_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: forgejo-cloud-credentials
key: secret-key
volumeMounts:
- name: rclone-config
mountPath: /config/rclone
readOnly: true
- name: backup-dir
mountPath: /backup
readOnly: false
command:
- /bin/sh
- -c
- |
rclone sync source:/${SOURCE_BUCKET} /backup -v --ignore-checksum
restartPolicy: OnFailure
volumes:
- name: rclone-config
secret:
secretName: forgejo-s3-backup
- name: backup-dir
persistentVolumeClaim:
claimName: s3-backup
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: s3-backup
namespace: gitea
annotations:
everest.io/disk-volume-type: GPSSD
everest.io/crypt-key-id: {{{ .Env.PVC_KMS_KEY_ID }}}
spec:
storageClassName: csi-disk
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 100Gi
---
apiVersion: v1
kind: Secret
metadata:
name: forgejo-s3-backup
namespace: gitea
type: Opaque
stringData:
rclone.conf: |
[source]
type = s3
provider = HuaweiOBS
env_auth = true
endpoint = obs.eu-de.otc.t-systems.com
region = eu-de
acl = private

View file

@ -1,3 +1,4 @@
# This is only used for deploying older versions of infra-catalogue where the bucket name is not an output of the terragrunt modules
{{{- define "BUCKET_NAME" -}}}
{{{- if (getenv "FORGEJO_BUCKET_NAME") -}}}
{{{ getenv "FORGEJO_BUCKET_NAME" }}}
@ -8,7 +9,7 @@ edp-forgejo-{{{ getenv "CLUSTER_ENVIRONMENT" }}}
# We use recreate to make sure only one instance with one version is running, because Forgejo might break or data gets inconsistant.
# We use recreate to make sure only one instance with one version is running, because Forgejo might break or data gets inconsistant.
strategy:
type: Recreate
@ -27,8 +28,10 @@ postgresql-ha:
persistence:
enabled: true
size: 200Gi
storageClass: csi-disk
annotations:
everest.io/crypt-key-id: {{{ .Env.PVC_KMS_KEY_ID }}}
everest.io/disk-volume-type: GPSSD
test:
enabled: false
@ -146,6 +149,7 @@ gitea:
service:
DISABLE_REGISTRATION: true
ENABLE_NOTIFY_MAIL: true
other:
SHOW_FOOTER_VERSION: false
@ -173,7 +177,7 @@ service:
nodePort: 32222
externalTrafficPolicy: Cluster
annotations:
kubernetes.io/elb.id: {{{ .Env.LOADBALANCER_ID }}}
kubernetes.io/elb.id: {{{ .Env.LOADBALANCER_ID }}}
image:
pullPolicy: "IfNotPresent"
@ -181,8 +185,7 @@ image:
#tag: "8.0.3"
# Adds -rootless suffix to image name
# rootless: true
#fullOverride: {{{ getenv "CLIENT_REPO_DOMAIN" }}}/devfw-cicd/edp-forgejo:v1.1.0-edp-v11.0.3
fullOverride: {{{ getenv "CLIENT_REPO_DOMAIN" }}}/devfw-cicd/edp-forgejo:osctest
fullOverride: {{{ getenv "CLIENT_REPO_DOMAIN" }}}/devfw-cicd/edp-forgejo:{{{ .Env.FORGEJO_IMAGE_TAG }}}
forgejo:
runner:

View file

@ -0,0 +1,29 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: garm
namespace: argocd
labels:
env: dev
spec:
project: default
syncPolicy:
automated:
selfHeal: true
syncOptions:
- CreateNamespace=true
retry:
limit: -1
destination:
name: in-cluster
namespace: garm
sources:
- repoURL: https://edp.buildth.ing/DevFW-CICD/garm-helm
path: charts/garm
targetRevision: v0.0.4
helm:
valueFiles:
- $values/{{{ .Env.CLIENT_REPO_ID }}}/{{{ .Env.DOMAIN }}}/stacks/garm/garm/values.yaml
- repoURL: https://{{{ .Env.CLIENT_REPO_DOMAIN }}}/{{{ .Env.CLIENT_REPO_ORG_NAME }}}
targetRevision: HEAD
ref: values

View file

@ -0,0 +1,23 @@
ingress:
enabled: true
className: nginx
annotations:
cert-manager.io/cluster-issuer: main
nginx.ingress.kubernetes.io/backend-protocol: HTTP
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
hosts:
- host: garm.{{{ .Env.DOMAIN_GITEA }}}
paths:
- path: /
pathType: Prefix
tls:
- secretName: garm-net-tls
hosts:
- garm.{{{ .Env.DOMAIN_GITEA }}}
# Credentials and Secrets
credentials:
edgeConnect:
existingSecretName: "edge-credential"
gitea:
url: "https://{{{ .Env.DOMAIN_GITEA }}}" # Required

View file

@ -1,9 +0,0 @@
apiVersion: v1
kind: Secret
metadata:
name: simple-user-secret
namespace: observability
type: Opaque
stringData:
username: simple-user
password: simple-password

View file

@ -6,12 +6,51 @@ metadata:
dashboards: "grafana"
spec:
persistentVolumeClaim:
metadata:
annotations:
everest.io/disk-volume-type: GPSSD
everest.io/crypt-key-id: {{{ .Env.PVC_KMS_KEY_ID }}}
spec:
storageClassName: csi-disk
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
deployment:
spec:
template:
spec:
containers:
- name: grafana
env:
- name: OAUTH_CLIENT_SECRET
valueFrom:
secretKeyRef:
key: clientSecret
name: dex-grafana-client
config:
log.console:
level: debug
server:
root_url: "https://{{{ .Env.DOMAIN_GRAFANA }}}"
auth:
disable_login: "true"
disable_login_form: "true"
auth.generic_oauth:
enabled: "true"
name: Forgejo
allow_sign_up: "true"
use_refresh_token: "true"
client_id: grafana
client_secret: $__env{OAUTH_CLIENT_SECRET}
scopes: openid email profile offline_access groups
auth_url: https://{{{ .Env.DOMAIN_DEX }}}/auth
token_url: https://{{{ .Env.DOMAIN_DEX }}}/token
api_url: https://{{{ .Env.DOMAIN_DEX }}}/userinfo
redirect_uri: https://{{{ .Env.DOMAIN_GRAFANA }}}/login/generic_oauth
role_attribute_path: "contains(groups[*], 'DevFW') && 'GrafanaAdmin' || 'None'"
allow_assign_grafana_admin: "true"
ingress:
metadata:
annotations:
@ -20,7 +59,7 @@ spec:
spec:
ingressClassName: nginx
rules:
- host: grafana.{{{ .Env.DOMAIN }}}
- host: {{{ .Env.DOMAIN_GRAFANA }}}
http:
paths:
- backend:
@ -32,5 +71,5 @@ spec:
pathType: Prefix
tls:
- hosts:
- grafana.{{{ .Env.DOMAIN }}}
- {{{ .Env.DOMAIN_GRAFANA }}}
secretName: grafana-net-tls

View file

@ -11,8 +11,30 @@ spec:
expr: sum by(cluster_environment) (up{pod=~"forgejo-server-.*"}) < 1
for: 30s
labels:
severity: major
severity: critical
job: "{{ $labels.job }}"
annotations:
value: "{{ $value }}"
description: 'forgejo is down in cluster environment {{ $labels.cluster_environment }}'
- name: forgejo-backup
rules:
- alert: forgejo s3 backup job failed
expr: max by(cluster_environment) (kube_job_status_failed{job_name=~"forgejo-s3-backup-.*"}) != 0
for: 30s
labels:
severity: critical
job: "{{ $labels.job }}"
annotations:
value: "{{ $value }}"
description: 'forgejo s3 backup job failed in cluster environment {{ $labels.cluster_environment }}'
- name: disk-consumption-high
rules:
- alert: disk consumption high
expr: 1-(kubelet_volume_stats_available_bytes / kubelet_volume_stats_capacity_bytes) > 0.6
for: 30s
labels:
severity: major
job: "{{ $labels.job }}"
annotations:
value: "{{ $value }}"
description: 'disk consumption of pvc {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} is high in cluster environment {{ $labels.cluster_environment }}'

View file

@ -9,7 +9,9 @@ spec:
storageMetadata:
annotations:
everest.io/crypt-key-id: {{{ .Env.PVC_KMS_KEY_ID }}}
everest.io/disk-volume-type: GPSSD
storage:
storageClassName: csi-disk
accessModes:
- ReadWriteOnce
resources:
@ -21,4 +23,4 @@ spec:
cpu: 500m
limits:
memory: 10Gi
cpu: 2
cpu: 2

View file

@ -5,11 +5,13 @@ metadata:
namespace: observability
spec:
username: simple-user
password: simple-password
passwordRef:
key: password
name: simple-user-secret
targetRefs:
- static:
url: http://vmsingle-o12y:8429
paths: ["/api/v1/write"]
- static:
url: http://vlogs-victorialogs:9428
paths: ["/insert/elasticsearch/.*"]
paths: ["/insert/elasticsearch/.*"]

View file

@ -201,13 +201,13 @@ defaultRules:
create: true
rules: {}
kubernetesSystemControllerManager:
create: true
create: false
rules: {}
kubeScheduler:
create: true
create: false
rules: {}
kubernetesSystemScheduler:
create: true
create: false
rules: {}
kubeStateMetrics:
create: true
@ -289,7 +289,9 @@ vmsingle:
storageMetadata:
annotations:
everest.io/crypt-key-id: {{{ .Env.PVC_KMS_KEY_ID }}}
everest.io/disk-volume-type: GPSSD
storage:
storageClassName: csi-disk
accessModes:
- ReadWriteOnce
resources:
@ -536,108 +538,30 @@ alertmanager:
# If you're migrating existing config, please make sure that `.Values.alertmanager.config`:
# - with `useManagedConfig: false` has structure described [here](https://prometheus.io/docs/alerting/latest/configuration/).
# - with `useManagedConfig: true` has structure described [here](https://docs.victoriametrics.com/operator/api/#vmalertmanagerconfig).
useManagedConfig: false
useManagedConfig: true
# -- (object) Alertmanager configuration
config:
route:
receiver: "blackhole"
# group_by: ["alertgroup", "job"]
# group_wait: 30s
# group_interval: 5m
# repeat_interval: 12h
# routes:
#
# # Duplicate code_owner routes to teams
# # These will send alerts to team channels but continue
# # processing through the rest of the tree to handled by on-call
# - matchers:
# - code_owner_channel!=""
# - severity=~"info|warning|critical"
# group_by: ["code_owner_channel", "alertgroup", "job"]
# receiver: slack-code-owners
#
# # Standard on-call routes
# - matchers:
# - severity=~"info|warning|critical"
# receiver: slack-monitoring
# continue: true
#
# inhibit_rules:
# - target_matchers:
# - severity=~"warning|info"
# source_matchers:
# - severity=critical
# equal:
# - cluster
# - namespace
# - alertname
# - target_matchers:
# - severity=info
# source_matchers:
# - severity=warning
# equal:
# - cluster
# - namespace
# - alertname
# - target_matchers:
# - severity=info
# source_matchers:
# - alertname=InfoInhibitor
# equal:
# - cluster
# - namespace
routes:
- matchers:
- severity=~"critical|major"
receiver: outlook
receivers:
- name: blackhole
# - name: "slack-monitoring"
# slack_configs:
# - channel: "#channel"
# send_resolved: true
# title: '{{ template "slack.monzo.title" . }}'
# icon_emoji: '{{ template "slack.monzo.icon_emoji" . }}'
# color: '{{ template "slack.monzo.color" . }}'
# text: '{{ template "slack.monzo.text" . }}'
# actions:
# - type: button
# text: "Runbook :green_book:"
# url: "{{ (index .Alerts 0).Annotations.runbook_url }}"
# - type: button
# text: "Query :mag:"
# url: "{{ (index .Alerts 0).GeneratorURL }}"
# - type: button
# text: "Dashboard :grafana:"
# url: "{{ (index .Alerts 0).Annotations.dashboard }}"
# - type: button
# text: "Silence :no_bell:"
# url: '{{ template "__alert_silence_link" . }}'
# - type: button
# text: '{{ template "slack.monzo.link_button_text" . }}'
# url: "{{ .CommonAnnotations.link_url }}"
# - name: slack-code-owners
# slack_configs:
# - channel: "#{{ .CommonLabels.code_owner_channel }}"
# send_resolved: true
# title: '{{ template "slack.monzo.title" . }}'
# icon_emoji: '{{ template "slack.monzo.icon_emoji" . }}'
# color: '{{ template "slack.monzo.color" . }}'
# text: '{{ template "slack.monzo.text" . }}'
# actions:
# - type: button
# text: "Runbook :green_book:"
# url: "{{ (index .Alerts 0).Annotations.runbook }}"
# - type: button
# text: "Query :mag:"
# url: "{{ (index .Alerts 0).GeneratorURL }}"
# - type: button
# text: "Dashboard :grafana:"
# url: "{{ (index .Alerts 0).Annotations.dashboard }}"
# - type: button
# text: "Silence :no_bell:"
# url: '{{ template "__alert_silence_link" . }}'
# - type: button
# text: '{{ template "slack.monzo.link_button_text" . }}'
# url: "{{ .CommonAnnotations.link_url }}"
#
- name: outlook
email_configs:
- smarthost: 'mail.mms-support.de:465'
auth_username: 'ipcei-cis-devfw@mms-support.de'
auth_password:
name: email-user-credentials
key: connection-string
from: '"IPCEI CIS DevFW" <ipcei-cis-devfw@mms-support.de>'
to: 'f9f9953a.mg.telekom.de@de.teams.ms'
headers:
subject: 'Grafana Mail Alerts'
require_tls: false
# -- Better alert templates for [slack source](https://gist.github.com/milesbxf/e2744fc90e9c41b47aa47925f8ff6512)
monzoTemplate:
enabled: true
@ -880,7 +804,7 @@ grafana:
enabled: false
# all values for grafana helm chart can be specified here
persistence:
enabled: true
enabled: false
type: pvc
storageClassName: "default"
grafana.ini:
@ -1096,7 +1020,7 @@ kubeApiServer:
# Component scraping the kube controller manager
kubeControllerManager:
# -- Enable kube controller manager metrics scraping
enabled: true
enabled: false
# -- If your kube controller manager is not deployed as a pod, specify IPs it can be found on
endpoints: []
@ -1229,7 +1153,7 @@ kubeEtcd:
# Component scraping kube scheduler
kubeScheduler:
# -- Enable KubeScheduler metrics scraping
enabled: true
enabled: false
# -- If your kube scheduler is not deployed as a pod, specify IPs it can be found on
endpoints: []

View file

@ -18,9 +18,9 @@ spec:
name: in-cluster
namespace: ingress-nginx
sources:
- repoURL: https://{{{ .Env.CLIENT_REPO_DOMAIN }}}/DevFW-CICD/ingress-nginx-helm.git
- repoURL: https://github.com/kubernetes/ingress-nginx.git
path: charts/ingress-nginx
targetRevision: helm-chart-4.12.4-depends
targetRevision: helm-chart-4.12.1
helm:
valueFiles:
- $values/{{{ .Env.CLIENT_REPO_ID }}}/{{{ .Env.DOMAIN }}}/stacks/otc/ingress-nginx/values.yaml

View file

@ -0,0 +1,30 @@
# helm upgrade --install --create-namespace --namespace terralist terralist oci://ghcr.io/terralist/helm-charts/terralist -f terralist-values.yaml
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: terralist
namespace: argocd
labels:
env: dev
spec:
project: default
syncPolicy:
automated:
selfHeal: true
syncOptions:
- CreateNamespace=true
retry:
limit: -1
destination:
name: in-cluster
namespace: terralist
sources:
- repoURL: https://github.com/terralist/helm-charts
path: charts/terralist
targetRevision: terralist-0.8.1
helm:
valueFiles:
- $values/{{{ .Env.CLIENT_REPO_ID }}}/{{{ .Env.DOMAIN }}}/stacks/terralist/terralist/values.yaml
- repoURL: https://{{{ .Env.CLIENT_REPO_DOMAIN }}}/{{{ .Env.CLIENT_REPO_ORG_NAME }}}
targetRevision: HEAD
ref: values

View file

@ -0,0 +1,87 @@
controllers:
main:
strategy: Recreate
containers:
app:
env:
- name: TERRALIST_OAUTH_PROVIDER
value: oidc
- name: TERRALIST_OI_CLIENT_ID
valueFrom:
secretKeyRef:
name: oidc-credentials
key: client-id
- name: TERRALIST_OI_CLIENT_SECRET
valueFrom:
secretKeyRef:
name: oidc-credentials
key: client-secret
- name: TERRALIST_OI_AUTHORIZE_URL
valueFrom:
secretKeyRef:
name: oidc-credentials
key: authorize-url
- name: TERRALIST_OI_TOKEN_URL
valueFrom:
secretKeyRef:
name: oidc-credentials
key: token-url
- name: TERRALIST_OI_USERINFO_URL
valueFrom:
secretKeyRef:
name: oidc-credentials
key: userinfo-url
- name: TERRALIST_OI_SCOPE
valueFrom:
secretKeyRef:
name: oidc-credentials
key: scope
- name: TERRALIST_TOKEN_SIGNING_SECRET
valueFrom:
secretKeyRef:
name: terralist-secret
key: token-signing-secret
- name: TERRALIST_COOKIE_SECRET
valueFrom:
secretKeyRef:
name: terralist-secret
key: cookie-secret
- name: TERRALIST_URL
value: https://terralist.{{{ .Env.DOMAIN_GITEA }}}
- name: TERRALIST_SQLITE_PATH
value: /data/db.sqlite
- name: TERRALIST_LOCAL_STORE
value: /data/modules
- name: TERRALIST_PROVIDERS_ANONYMOUS_READ
value: "true"
ingress:
main:
enabled: true
className: nginx
annotations:
cert-manager.io/cluster-issuer: main
hosts:
- host: terralist.{{{ .Env.DOMAIN_GITEA }}}
paths:
- path: /
pathType: Prefix
service:
identifier: main
port: http
tls:
- hosts:
- terralist.{{{ .Env.DOMAIN_GITEA }}}
secretName: terralist-tls-secret
persistence:
data:
enabled: true
accessMode: ReadWriteOnce
size: 10Gi
retain: false
storageClass: "csi-disk"
annotations:
everest.io/disk-volume-type: GPSSD
globalMounts:
- path: /data