This commit is contained in:
Jesse Houldsworth 2025-10-09 06:46:44 -07:00
parent 0bdb6f79a4
commit 00ed61ebb5
85 changed files with 3919 additions and 0 deletions

3
Copilot-instructions.md Normal file
View file

@ -0,0 +1,3 @@
# GitHub Copilot Instructions
## :lock: MANDATORY PACKAGE VERIFICATION POLICY
For any open-source package type supported by the JFrog curation tool, your first action MUST be to use the JFrog MCP Server tool to verify its approval status before suggesting it. Only use packages confirmed as 'approved'. If a package is rejected, you must find and suggest an approved alternative. This initial step is a mandatory and non-negotiable rule.

BIN
infra/.DS_Store vendored Normal file

Binary file not shown.

View file

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View file

@ -0,0 +1,12 @@
apiVersion: v2
name: feast-feature-server
description: Feast Feature Server in Go or Python
type: application
version: 0.28.0
keywords:
- machine learning
- big data
- mlops
home: https://feast.dev/
sources:
- https://github.com/feast-dev/feast

View file

@ -0,0 +1,48 @@
# Feast Python / Go Feature Server Helm Charts
Current chart version is `0.28.0`
## Installation
Run the following commands to add the repository
```
helm repo add feast-charts https://feast-helm-charts.storage.googleapis.com
helm repo update
```
Install Feast Feature Server on Kubernetes
A base64 encoded version of the `feature_store.yaml` file is needed. Helm install example:
```
helm install feast-feature-server feast-charts/feast-feature-server --set feature_store_yaml_base64=$(base64 feature_store.yaml)
```
## Tutorial
See [here](https://github.com/feast-dev/feast/tree/master/examples/python-helm-demo) for a sample tutorial on testing this helm chart with a demo feature repository and a local Redis instance.
## Values
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| affinity | object | `{}` | |
| feature_store_yaml_base64 | string | `""` | [required] a base64 encoded version of feature_store.yaml |
| fullnameOverride | string | `""` | |
| image.pullPolicy | string | `"IfNotPresent"` | |
| image.repository | string | `"feastdev/feature-server"` | Docker image for Feature Server repository |
| image.tag | string | `"0.28.0"` | The Docker image tag (can be overwritten if custom feature server deps are needed for on demand transforms) |
| imagePullSecrets | list | `[]` | |
| livenessProbe.initialDelaySeconds | int | `30` | |
| livenessProbe.periodSeconds | int | `30` | |
| nameOverride | string | `""` | |
| nodeSelector | object | `{}` | |
| podAnnotations | object | `{}` | |
| podSecurityContext | object | `{}` | |
| readinessProbe.initialDelaySeconds | int | `20` | |
| readinessProbe.periodSeconds | int | `10` | |
| replicaCount | int | `1` | |
| resources | object | `{}` | |
| securityContext | object | `{}` | |
| service.port | int | `80` | |
| service.type | string | `"ClusterIP"` | |
| tolerations | list | `[]` | |

View file

@ -0,0 +1,26 @@
# Feast Python / Go Feature Server Helm Charts
Current chart version is `{{ template "chart.version" . }}`
## Installation
Run the following commands to add the repository
```
helm repo add feast-charts https://feast-helm-charts.storage.googleapis.com
helm repo update
```
Install Feast Feature Server on Kubernetes
A base64 encoded version of the `feature_store.yaml` file is needed. Helm install example:
```
helm install feast-feature-server feast-charts/feast-feature-server --set feature_store_yaml_base64=$(base64 feature_store.yaml)
```
## Tutorial
See [here](https://github.com/feast-dev/feast/tree/master/examples/python-helm-demo) for a sample tutorial on testing this helm chart with a demo feature repository and a local Redis instance.
{{ template "chart.requirementsSection" . }}
{{ template "chart.valuesSection" . }}

View file

@ -0,0 +1,52 @@
{{/* vim: set filetype=mustache: */}}
{{/*
Expand the name of the chart.
*/}}
{{- define "feast-feature-server.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "feast-feature-server.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "feast-feature-server.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "feast-feature-server.labels" -}}
helm.sh/chart: {{ include "feast-feature-server.chart" . }}
{{ include "feast-feature-server.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "feast-feature-server.selectorLabels" -}}
app.kubernetes.io/name: {{ include "feast-feature-server.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

View file

@ -0,0 +1,64 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "feast-feature-server.fullname" . }}
labels:
{{- include "feast-feature-server.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
{{- include "feast-feature-server.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "feast-feature-server.selectorLabels" . | nindent 8 }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
env:
- name: FEATURE_STORE_YAML_BASE64
value: {{ .Values.feature_store_yaml_base64 }}
command: ["feast", "serve", "-h", "0.0.0.0"]
ports:
- name: http
containerPort: 6566
protocol: TCP
livenessProbe:
tcpSocket:
port: http
initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.livenessProbe.periodSeconds }}
readinessProbe:
tcpSocket:
port: http
initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

View file

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "feast-feature-server.name" . }}
labels:
{{- include "feast-feature-server.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.port }}
targetPort: http
protocol: TCP
name: http
selector:
{{- include "feast-feature-server.selectorLabels" . | nindent 4 }}

View file

@ -0,0 +1,62 @@
# Default values for feast.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
replicaCount: 1
image:
# image.repository -- Docker image for Feature Server repository
repository: feastdev/feature-server
pullPolicy: IfNotPresent
# image.tag -- The Docker image tag (can be overwritten if custom feature server deps are needed for on demand transforms)
tag: 0.28.0
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
# feature_store_yaml_base64 -- [required] a base64 encoded version of feature_store.yaml
feature_store_yaml_base64: ""
podAnnotations: {}
podSecurityContext: {}
# fsGroup: 2000
securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
service:
type: ClusterIP
port: 80
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
nodeSelector: {}
tolerations: []
affinity: {}
livenessProbe:
initialDelaySeconds: 30
periodSeconds: 30
readinessProbe:
initialDelaySeconds: 20
periodSeconds: 10

View file

@ -0,0 +1,22 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View file

@ -0,0 +1,11 @@
apiVersion: v1
description: Feature store for machine learning
name: feast
version: 0.28.0
keywords:
- machine learning
- big data
- mlops
home: https://feast.dev/
sources:
- https://github.com/feast-dev/feast

View file

@ -0,0 +1,82 @@
# Feast Java Helm Charts (alpha)
This repo contains Helm charts for Feast Java components that are being installed on Kubernetes:
* Feast (root chart): The complete Helm chart containing all Feast components and dependencies. Most users will use this chart, but can selectively enable/disable subcharts using the values.yaml file.
* [Feature Server](charts/feature-server): High performant JVM-based implementation of feature server.
* [Transformation Service](charts/transformation-service): Transformation server for calculating on-demand features
* Redis: (Optional) One of possible options for an online store used by Feature Server
## Chart: Feast
Feature store for machine learning Current chart version is `0.28.0`
## Installation
Charts are published to `https://feast-helm-charts.storage.googleapis.com`. Please note that this URL is different from the URL we previously used (`feast-charts`)
Run the following commands to add the repository
```
helm repo add feast-charts https://feast-helm-charts.storage.googleapis.com
helm repo update
```
Install Feast
```
helm install feast-release feast-charts/feast
```
## Customize your installation
This Feast chart comes with a [values.yaml](values.yaml) that allows for configuration and customization of all sub-charts.
In order to modify the default configuration of Feature Server, please use the `application-override.yaml` key in the `values.yaml` file in this chart. A code snippet example
```
feature-server:
application-override.yaml:
enabled: true
feast:
active_store: online
stores:
- name: online
type: REDIS
config:
host: localhost
port: 6379
entityKeySerializationVersion: 2
global:
registry:
path: gs://[YOUR GCS BUCKET]/demo-repo/registry.db
cache_ttl_seconds: 60
project: feast_java_demo
```
For the default configuration, please see the [Feature Server Configuration](https://github.com/feast-dev/feast/blob/master/java/serving/src/main/resources/application.yml).
For more details, please see: https://docs.feast.dev/how-to-guides/running-feast-in-production
## Example
See [here](https://github.com/feast-dev/feast/tree/master/examples/java-demo) for a sample tutorial on testing this helm chart with a demo feature repository and a local Redis instance.
## Requirements
| Repository | Name | Version |
|------------|------|---------|
| https://charts.helm.sh/stable | redis | 10.5.6 |
| https://feast-helm-charts.storage.googleapis.com | feature-server(feature-server) | 0.28.0 |
| https://feast-helm-charts.storage.googleapis.com | transformation-service(transformation-service) | 0.28.0 |
## Values
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| feature-server.enabled | bool | `true` | |
| global.project | string | `"default"` | Project from feature_store.yaml |
| global.registry | object | `{"cache_ttl_seconds":0,"path":"gs://path/to/registry.db"}` | Information about registry managed by Feast Python SDK (must be in sync with feature_store.yaml) |
| global.registry.cache_ttl_seconds | int | `0` | Registry cache (in memory) will be refreshed on this interval |
| global.registry.path | string | `"gs://path/to/registry.db"` | Path to the registry file managed by Feast Python SDK |
| redis.enabled | bool | `false` | Flag to install Redis |
| redis.usePassword | bool | `false` | Disable redis password |
| transformation-service.enabled | bool | `true` | |

View file

@ -0,0 +1,66 @@
# Feast Java Helm Charts (alpha)
This repo contains Helm charts for Feast Java components that are being installed on Kubernetes:
* Feast (root chart): The complete Helm chart containing all Feast components and dependencies. Most users will use this chart, but can selectively enable/disable subcharts using the values.yaml file.
* [Feature Server](charts/feature-server): High performant JVM-based implementation of feature server.
* [Transformation Service](charts/transformation-service): Transformation server for calculating on-demand features
* Redis: (Optional) One of possible options for an online store used by Feature Server
## Chart: Feast
{{ template "chart.description" . }} Current chart version is `{{ template "chart.version" . }}`
## Installation
Charts are published to `https://feast-helm-charts.storage.googleapis.com`. Please note that this URL is different from the URL we previously used (`feast-charts`)
Run the following commands to add the repository
```
helm repo add feast-charts https://feast-helm-charts.storage.googleapis.com
helm repo update
```
Install Feast
```
helm install feast-release feast-charts/feast
```
## Customize your installation
This Feast chart comes with a [values.yaml](values.yaml) that allows for configuration and customization of all sub-charts.
In order to modify the default configuration of Feature Server, please use the `application-override.yaml` key in the `values.yaml` file in this chart. A code snippet example
```
feature-server:
application-override.yaml:
enabled: true
feast:
active_store: online
stores:
- name: online
type: REDIS
config:
host: localhost
port: 6379
entityKeySerializationVersion: 2
global:
registry:
path: gs://[YOUR GCS BUCKET]/demo-repo/registry.db
cache_ttl_seconds: 60
project: feast_java_demo
```
For the default configuration, please see the [Feature Server Configuration](https://github.com/feast-dev/feast/blob/master/java/serving/src/main/resources/application.yml).
For more details, please see: https://docs.feast.dev/how-to-guides/running-feast-in-production
## Example
See [here](https://github.com/feast-dev/feast/tree/master/examples/java-demo) for a sample tutorial on testing this helm chart with a demo feature repository and a local Redis instance.
{{ template "chart.requirementsSection" . }}
{{ template "chart.valuesSection" . }}

View file

@ -0,0 +1,10 @@
apiVersion: v1
description: "Feast Feature Server: Online feature serving service for Feast"
name: feature-server
version: 0.28.0
appVersion: v0.28.0
keywords:
- machine learning
- big data
- mlops
home: https://github.com/feast-dev/feast

View file

@ -0,0 +1,67 @@
# feature-server
![Version: 0.28.0](https://img.shields.io/badge/Version-0.28.0-informational?style=flat-square) ![AppVersion: v0.28.0](https://img.shields.io/badge/AppVersion-v0.28.0-informational?style=flat-square)
Feast Feature Server: Online feature serving service for Feast
**Homepage:** <https://github.com/feast-dev/feast>
## Values
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| "application-generated.yaml".enabled | bool | `true` | Flag to include Helm generated configuration. Please set `application-override.yaml` to override this configuration. |
| "application-override.yaml" | object | `{"enabled":true}` | Configuration to override the default [application.yaml](https://github.com/feast-dev/feast/blob/master/java/serving/src/main/resources/application.yml). Will be created as a ConfigMap. `application-override.yaml` has a higher precedence than `application-secret.yaml` |
| "application-secret.yaml" | object | `{"enabled":false}` | Configuration to override the default [application.yaml](https://github.com/feast-dev/feast/blob/master/java/serving/src/main/resources/application.yml). Will be created as a Secret. `application-override.yaml` has a higher precedence than `application-secret.yaml`. It is recommended to either set `application-override.yaml` or `application-secret.yaml` only to simplify config management. |
| "application.yaml".enabled | bool | `true` | Flag to include the default [configuration](https://github.com/feast-dev/feast/blob/master/java/serving/src/main/resources/application.yml). Please set `application-override.yaml` to override this configuration. |
| envOverrides | object | `{}` | Extra environment variables to set |
| image.pullPolicy | string | `"IfNotPresent"` | Image pull policy |
| image.repository | string | `"feastdev/feature-server-java"` | Docker image for Feature Server repository |
| image.tag | string | `"0.28.0"` | Image tag |
| ingress.grpc.annotations | object | `{}` | Extra annotations for the ingress |
| ingress.grpc.auth.enabled | bool | `false` | Flag to enable auth |
| ingress.grpc.class | string | `"nginx"` | Which ingress controller to use |
| ingress.grpc.enabled | bool | `false` | Flag to create an ingress resource for the service |
| ingress.grpc.hosts | list | `[]` | List of hostnames to match when routing requests |
| ingress.grpc.https.enabled | bool | `true` | Flag to enable HTTPS |
| ingress.grpc.https.secretNames | object | `{}` | Map of hostname to TLS secret name |
| ingress.grpc.whitelist | string | `""` | Allowed client IP source ranges |
| ingress.http.annotations | object | `{}` | Extra annotations for the ingress |
| ingress.http.auth.authUrl | string | `"http://auth-server.auth-ns.svc.cluster.local/auth"` | URL to an existing authentication service |
| ingress.http.auth.enabled | bool | `false` | Flag to enable auth |
| ingress.http.class | string | `"nginx"` | Which ingress controller to use |
| ingress.http.enabled | bool | `false` | Flag to create an ingress resource for the service |
| ingress.http.hosts | list | `[]` | List of hostnames to match when routing requests |
| ingress.http.https.enabled | bool | `true` | Flag to enable HTTPS |
| ingress.http.https.secretNames | object | `{}` | Map of hostname to TLS secret name |
| ingress.http.whitelist | string | `""` | Allowed client IP source ranges |
| javaOpts | string | `nil` | [JVM options](https://docs.oracle.com/cd/E22289_01/html/821-1274/configuring-the-default-jvm-and-java-arguments.html). For better performance, it is advised to set the min and max heap: <br> `-Xms2048m -Xmx2048m` |
| livenessProbe.enabled | bool | `true` | Flag to enabled the probe |
| livenessProbe.failureThreshold | int | `5` | Min consecutive failures for the probe to be considered failed |
| livenessProbe.initialDelaySeconds | int | `60` | Delay before the probe is initiated |
| livenessProbe.periodSeconds | int | `10` | How often to perform the probe |
| livenessProbe.successThreshold | int | `1` | Min consecutive success for the probe to be considered successful |
| livenessProbe.timeoutSeconds | int | `5` | When the probe times out |
| logLevel | string | `"WARN"` | Default log level, use either one of `DEBUG`, `INFO`, `WARN` or `ERROR` |
| logType | string | `"Console"` | Log format, either `JSON` or `Console` |
| nodeSelector | object | `{}` | Node labels for pod assignment |
| podAnnotations | object | `{}` | Annotations to be added to Feast Serving pods |
| podLabels | object | `{}` | Labels to be added to Feast Serving pods |
| readinessProbe.enabled | bool | `true` | Flag to enabled the probe |
| readinessProbe.failureThreshold | int | `5` | Min consecutive failures for the probe to be considered failed |
| readinessProbe.initialDelaySeconds | int | `15` | Delay before the probe is initiated |
| readinessProbe.periodSeconds | int | `10` | How often to perform the probe |
| readinessProbe.successThreshold | int | `1` | Min consecutive success for the probe to be considered successful |
| readinessProbe.timeoutSeconds | int | `10` | When the probe times out |
| replicaCount | int | `1` | Number of pods that will be created |
| resources | object | `{}` | CPU/memory [resource requests/limit](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#resource-requests-and-limits-of-pod-and-container) |
| secrets | list | `[]` | List of Kubernetes secrets to be mounted. These secrets will be mounted on /etc/secrets/<secret name>. |
| service.grpc.nodePort | string | `nil` | Port number that each cluster node will listen to |
| service.grpc.port | int | `6566` | Service port for GRPC requests |
| service.grpc.targetPort | int | `6566` | Container port serving GRPC requests |
| service.type | string | `"ClusterIP"` | Kubernetes service type |
| transformationService.host | string | `""` | |
| transformationService.port | int | `6566` | |
----------------------------------------------
Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)

View file

@ -0,0 +1,45 @@
{{/* vim: set filetype=mustache: */}}
{{/*
Expand the name of the chart.
*/}}
{{- define "feature-server.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "feature-server.fullname" -}}
{{- if .Values.fullnameOverride -}}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- if contains $name .Release.Name -}}
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "feature-server.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{/*
Common labels
*/}}
{{- define "feature-server.labels" -}}
app.kubernetes.io/name: {{ include "feature-server.name" . }}
helm.sh/chart: {{ include "feature-server.chart" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end -}}

View file

@ -0,0 +1,68 @@
{{- /*
This takes an array of three values:
- the top context
- the feast component
- the service protocol
- the ingress context
*/ -}}
{{- define "feast.ingress" -}}
{{- $top := (index . 0) -}}
{{- $component := (index . 1) -}}
{{- $protocol := (index . 2) -}}
{{- $ingressValues := (index . 3) -}}
apiVersion: extensions/v1beta1
kind: Ingress
{{ include "feast.ingress.metadata" . }}
spec:
rules:
{{- range $host := $ingressValues.hosts }}
- host: {{ $host }}
http:
paths:
- path: /
backend:
serviceName: {{ include (printf "feast-%s.fullname" $component) $top }}
servicePort: {{ index $top.Values "service" $protocol "port" }}
{{- end }}
{{- if $ingressValues.https.enabled }}
tls:
{{- range $host := $ingressValues.hosts }}
- secretName: {{ index $ingressValues.https.secretNames $host | default (splitList "." $host | rest | join "-" | printf "%s-tls") }}
hosts:
- {{ $host }}
{{- end }}
{{- end -}}
{{- end -}}
{{- define "feast.ingress.metadata" -}}
{{- $commonMetadata := fromYaml (include "common.metadata" (first .)) }}
{{- $overrides := fromYaml (include "feast.ingress.metadata-overrides" .) -}}
{{- toYaml (merge $overrides $commonMetadata) -}}
{{- end -}}
{{- define "feast.ingress.metadata-overrides" -}}
{{- $top := (index . 0) -}}
{{- $component := (index . 1) -}}
{{- $protocol := (index . 2) -}}
{{- $ingressValues := (index . 3) -}}
{{- $commonFullname := include "common.fullname" $top }}
metadata:
name: {{ $commonFullname }}-{{ $component }}-{{ $protocol }}
annotations:
kubernetes.io/ingress.class: {{ $ingressValues.class | quote }}
{{- if (and (eq $ingressValues.class "nginx") $ingressValues.auth.enabled) }}
nginx.ingress.kubernetes.io/auth-url: {{ $ingressValues.auth.authUrl | quote }}
nginx.ingress.kubernetes.io/auth-response-headers: "x-auth-request-email, x-auth-request-user"
nginx.ingress.kubernetes.io/auth-signin: "https://{{ $ingressValues.auth.signinHost | default (splitList "." (index $ingressValues.hosts 0) | rest | join "." | printf "auth.%s")}}/oauth2/start?rd=/r/$host/$request_uri"
{{- end }}
{{- if (and (eq $ingressValues.class "nginx") $ingressValues.whitelist) }}
nginx.ingress.kubernetes.io/whitelist-source-range: {{ $ingressValues.whitelist | quote -}}
{{- end }}
{{- if (and (eq $ingressValues.class "nginx") (eq $protocol "grpc") ) }}
# TODO: Allow choice of GRPC/GRPCS
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
{{- end }}
{{- if $ingressValues.annotations -}}
{{ include "common.annote" $ingressValues.annotations | indent 4 }}
{{- end }}
{{- end -}}

View file

@ -0,0 +1,50 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ template "feature-server.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
app: {{ template "feature-server.name" . }}
component: serving
chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
data:
application-generated.yaml: |
{{- if index .Values "application-generated.yaml" "enabled" }}
feast:
registry: {{ .Values.global.registry.path }}
registryRefreshInterval: {{ .Values.global.registry.cache_ttl_seconds }}
{{- if .Values.transformationService.host }}
transformationServiceEndpoint: {{ .Values.transformationService.host}}:{{ .Values.transformationService.port }}
{{- else }}
transformationServiceEndpoint: {{ .Release.Name }}-transformation-service:{{ .Values.transformationService.port }}
{{- end }}
activeStore: online
stores:
- name: online
type: REDIS
config:
host: {{ .Release.Name }}-redis-master
port: 6379
grpc:
server:
port: {{ .Values.service.grpc.targetPort }}
{{- end }}
application-override.yaml: |
{{- if index .Values "application-override.yaml" "enabled" }}
{{- if index .Values "application-override.yaml" "feast" }}
feast: {{- toYaml (index .Values "application-override.yaml" "feast") | nindent 6 }}
registry: {{ .Values.global.registry.path }}
registryRefreshInterval: {{ .Values.global.registry.cache_ttl_seconds }}
project: {{ .Values.global.project }}
{{- end }}
{{- if index .Values "application-override.yaml" "rest" }}
rest: {{- toYaml (index .Values "application-override.yaml" "rest") | nindent 6 }}
{{- end }}
{{- if index .Values "application-override.yaml" "grpc" }}
grpc: {{- toYaml (index .Values "application-override.yaml" "grpc") | nindent 6 }}
{{- end }}
{{- end }}

View file

@ -0,0 +1,143 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ template "feature-server.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
app: {{ template "feature-server.name" . }}
component: serving
chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
app: {{ template "feature-server.name" . }}
component: serving
release: {{ .Release.Name }}
template:
metadata:
annotations:
checksum/configmap: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
checksum/secret: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }}
{{- if .Values.podAnnotations }}
{{ toYaml .Values.podAnnotations | nindent 8 }}
{{- end }}
labels:
app: {{ template "feature-server.name" . }}
component: serving
release: {{ .Release.Name }}
{{- if .Values.podLabels }}
{{ toYaml .Values.podLabels | nindent 8 }}
{{- end }}
spec:
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
volumes:
- name: {{ template "feature-server.fullname" . }}-config
configMap:
name: {{ template "feature-server.fullname" . }}
- name: {{ template "feature-server.fullname" . }}-secret
secret:
secretName: {{ template "feature-server.fullname" . }}
{{- range $secret := .Values.secrets }}
- name: {{ $secret }}
secret:
secretName: {{ $secret }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
volumeMounts:
- name: {{ template "feature-server.fullname" . }}-config
mountPath: /etc/feast
- name: {{ template "feature-server.fullname" . }}-secret
mountPath: /etc/secrets/feast
readOnly: true
{{- range $secret := .Values.secrets }}
- name: {{ $secret }}
mountPath: "/etc/secrets/{{ $secret }}"
readOnly: true
{{- end }}
env:
- name: LOG_TYPE
value: {{ .Values.logType | quote }}
- name: LOG_LEVEL
value: {{ .Values.logLevel | quote }}
{{- if .Values.javaOpts }}
- name: JAVA_TOOL_OPTIONS
value: {{ .Values.javaOpts }}
{{- end }}
{{- range $key, $value := .Values.envOverrides }}
- name: {{ printf "%s" $key | replace "." "_" | upper | quote }}
{{- if eq (kindOf $value) "map" }}
valueFrom:
{{- toYaml $value | nindent 12 }}
{{- else }}
value: {{ $value | quote }}
{{- end }}
{{- end }}
command:
- java
- -jar
- /opt/feast/feast-serving.jar
- {{ if index .Values "application.yaml" "enabled" -}}
classpath:/application.yml
{{- end }}
{{- if index .Values "application-generated.yaml" "enabled" -}}
,file:/etc/feast/application-generated.yaml
{{- end }}
{{- if index .Values "application-secret.yaml" "enabled" -}}
,file:/etc/secrets/feast/application-secret.yaml
{{- end }}
{{- if index .Values "application-override.yaml" "enabled" -}}
,file:/etc/feast/application-override.yaml
{{- end }}
ports:
- name: grpc
containerPort: {{ .Values.service.grpc.targetPort }}
{{- if .Values.livenessProbe.enabled }}
livenessProbe:
exec:
command:
- "grpc-health-probe"
- "-addr=:{{ .Values.service.grpc.targetPort }}"
- "-connect-timeout={{ .Values.livenessProbe.timeoutSeconds }}s"
- "-rpc-timeout={{ .Values.livenessProbe.timeoutSeconds }}s"
initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.livenessProbe.periodSeconds }}
successThreshold: {{ .Values.livenessProbe.successThreshold }}
timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }}
failureThreshold: {{ .Values.livenessProbe.failureThreshold }}
{{- end }}
{{- if .Values.readinessProbe.enabled }}
readinessProbe:
exec:
command:
- "grpc-health-probe"
- "-addr=:{{ .Values.service.grpc.targetPort }}"
- "-connect-timeout={{ .Values.readinessProbe.timeoutSeconds }}s"
- "-rpc-timeout={{ .Values.readinessProbe.timeoutSeconds }}s"
initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
successThreshold: {{ .Values.readinessProbe.successThreshold }}
timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }}
failureThreshold: {{ .Values.readinessProbe.failureThreshold }}
{{- end }}
resources:
{{- toYaml .Values.resources | nindent 10 }}

View file

@ -0,0 +1,7 @@
{{- if .Values.ingress.http.enabled -}}
{{ template "feast.ingress" (list . "serving" "http" .Values.ingress.http) }}
{{- end }}
---
{{ if .Values.ingress.grpc.enabled -}}
{{ template "feast.ingress" (list . "serving" "grpc" .Values.ingress.grpc) }}
{{- end }}

View file

@ -0,0 +1,23 @@
apiVersion: v1
kind: Secret
metadata:
name: {{ template "feature-server.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
app: {{ template "feature-server.name" . }}
component: serving
chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
type: Opaque
stringData:
application-secret.yaml: |
{{- if index .Values "application-secret.yaml" "feast" }}
feast: {{- toYaml (index .Values "application-secret.yaml" "feast") | nindent 6 }}
{{- end }}
{{- if index .Values "application-secret.yaml" "rest" }}
rest: {{- toYaml (index .Values "application-secret.yaml" "rest") | nindent 6 }}
{{- end }}
{{- if index .Values "application-secret.yaml" "grpc" }}
grpc: {{- toYaml (index .Values "application-secret.yaml" "grpc") | nindent 6 }}
{{- end }}

View file

@ -0,0 +1,34 @@
apiVersion: v1
kind: Service
metadata:
name: {{ template "feature-server.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
app: {{ template "feature-server.name" . }}
chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
{{- with .Values.service.annotations }}
annotations:
{{ toYaml . | indent 4 }}
{{- end }}
spec:
type: {{ .Values.service.type }}
{{- if .Values.service.loadBalancerIP }}
loadBalancerIP: {{ .Values.service.loadBalancerIP }}
{{- end }}
{{- if .Values.service.loadBalancerSourceRanges }}
loadBalancerSourceRanges:
{{ toYaml .Values.service.loadBalancerSourceRanges | indent 2 }}
{{- end }}
ports:
- name: grpc
port: {{ .Values.service.grpc.port }}
targetPort: {{ .Values.service.grpc.targetPort }}
{{- if .Values.service.grpc.nodePort }}
nodePort: {{ .Values.service.grpc.nodePort }}
{{- end }}
selector:
app: {{ template "feature-server.name" . }}
component: serving
release: {{ .Release.Name }}

View file

@ -0,0 +1,140 @@
# replicaCount -- Number of pods that will be created
replicaCount: 1
image:
# image.repository -- Docker image for Feature Server repository
repository: feastdev/feature-server-java
# image.tag -- Image tag
tag: 0.28.0
# image.pullPolicy -- Image pull policy
pullPolicy: IfNotPresent
transformationService:
host: ""
port: 6566
application.yaml:
# "application.yaml".enabled -- Flag to include the default [configuration](https://github.com/feast-dev/feast/blob/master/java/serving/src/main/resources/application.yml). Please set `application-override.yaml` to override this configuration.
enabled: true
application-generated.yaml:
# "application-generated.yaml".enabled -- Flag to include Helm generated configuration. Please set `application-override.yaml` to override this configuration.
enabled: true
# "application-secret.yaml" -- Configuration to override the default [application.yaml](https://github.com/feast-dev/feast/blob/master/java/serving/src/main/resources/application.yml). Will be created as a Secret. `application-override.yaml` has a higher precedence than `application-secret.yaml`. It is recommended to either set `application-override.yaml` or `application-secret.yaml` only to simplify config management.
application-secret.yaml:
enabled: false
# "application-override.yaml" -- Configuration to override the default [application.yaml](https://github.com/feast-dev/feast/blob/master/java/serving/src/main/resources/application.yml). Will be created as a ConfigMap. `application-override.yaml` has a higher precedence than `application-secret.yaml`
application-override.yaml:
enabled: true
# javaOpts -- [JVM options](https://docs.oracle.com/cd/E22289_01/html/821-1274/configuring-the-default-jvm-and-java-arguments.html). For better performance, it is advised to set the min and max heap: <br> `-Xms2048m -Xmx2048m`
javaOpts:
# logType -- Log format, either `JSON` or `Console`
logType: Console
# logLevel -- Default log level, use either one of `DEBUG`, `INFO`, `WARN` or `ERROR`
logLevel: WARN
livenessProbe:
# livenessProbe.enabled -- Flag to enabled the probe
enabled: true
# livenessProbe.initialDelaySeconds -- Delay before the probe is initiated
initialDelaySeconds: 60
# livenessProbe.periodSeconds -- How often to perform the probe
periodSeconds: 10
# livenessProbe.timeoutSeconds -- When the probe times out
timeoutSeconds: 5
# livenessProbe.successThreshold -- Min consecutive success for the probe to be considered successful
successThreshold: 1
# livenessProbe.failureThreshold -- Min consecutive failures for the probe to be considered failed
failureThreshold: 5
readinessProbe:
# readinessProbe.enabled -- Flag to enabled the probe
enabled: true
# readinessProbe.initialDelaySeconds -- Delay before the probe is initiated
initialDelaySeconds: 15
# readinessProbe.periodSeconds -- How often to perform the probe
periodSeconds: 10
# readinessProbe.timeoutSeconds -- When the probe times out
timeoutSeconds: 10
# readinessProbe.successThreshold -- Min consecutive success for the probe to be considered successful
successThreshold: 1
# readinessProbe.failureThreshold -- Min consecutive failures for the probe to be considered failed
failureThreshold: 5
service:
# service.type -- Kubernetes service type
type: ClusterIP
grpc:
# service.grpc.port -- Service port for GRPC requests
port: 6566
# service.grpc.targetPort -- Container port serving GRPC requests
targetPort: 6566
# service.grpc.nodePort -- Port number that each cluster node will listen to
nodePort:
ingress:
grpc:
# ingress.grpc.enabled -- Flag to create an ingress resource for the service
enabled: false
# ingress.grpc.class -- Which ingress controller to use
class: nginx
# ingress.grpc.hosts -- List of hostnames to match when routing requests
hosts: []
# ingress.grpc.annotations -- Extra annotations for the ingress
annotations: {}
https:
# ingress.grpc.https.enabled -- Flag to enable HTTPS
enabled: true
# ingress.grpc.https.secretNames -- Map of hostname to TLS secret name
secretNames: {}
# ingress.grpc.whitelist -- Allowed client IP source ranges
whitelist: ""
auth:
# ingress.grpc.auth.enabled -- Flag to enable auth
enabled: false
http:
# ingress.http.enabled -- Flag to create an ingress resource for the service
enabled: false
# ingress.http.class -- Which ingress controller to use
class: nginx
# ingress.http.hosts -- List of hostnames to match when routing requests
hosts: []
# ingress.http.annotations -- Extra annotations for the ingress
annotations: {}
https:
# ingress.http.https.enabled -- Flag to enable HTTPS
enabled: true
# ingress.http.https.secretNames -- Map of hostname to TLS secret name
secretNames: {}
# ingress.http.whitelist -- Allowed client IP source ranges
whitelist: ""
auth:
# ingress.http.auth.enabled -- Flag to enable auth
enabled: false
# ingress.http.auth.authUrl -- URL to an existing authentication service
authUrl: http://auth-server.auth-ns.svc.cluster.local/auth
# resources -- CPU/memory [resource requests/limit](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#resource-requests-and-limits-of-pod-and-container)
resources: {}
# nodeSelector -- Node labels for pod assignment
nodeSelector: {}
# envOverrides -- Extra environment variables to set
envOverrides: {}
# secrets -- List of Kubernetes secrets to be mounted. These secrets will be mounted on /etc/secrets/<secret name>.
secrets: []
# podAnnotations -- Annotations to be added to Feast Serving pods
podAnnotations: {}
# podLabels -- Labels to be added to Feast Serving pods
podLabels: {}

Binary file not shown.

View file

@ -0,0 +1,10 @@
apiVersion: v1
description: "Transformation service: to compute on-demand features"
name: transformation-service
version: 0.28.0
appVersion: v0.28.0
keywords:
- machine learning
- big data
- mlops
home: https://github.com/feast-dev/feast

View file

@ -0,0 +1,28 @@
# transformation-service
![Version: 0.28.0](https://img.shields.io/badge/Version-0.28.0-informational?style=flat-square) ![AppVersion: v0.28.0](https://img.shields.io/badge/AppVersion-v0.28.0-informational?style=flat-square)
Transformation service: to compute on-demand features
**Homepage:** <https://github.com/feast-dev/feast>
## Values
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| envOverrides | object | `{}` | Extra environment variables to set |
| image.pullPolicy | string | `"IfNotPresent"` | Image pull policy |
| image.repository | string | `"feastdev/feature-transformation-server"` | Docker image for Transformation Server repository |
| image.tag | string | `"0.28.0"` | Image tag |
| nodeSelector | object | `{}` | Node labels for pod assignment |
| podLabels | object | `{}` | Labels to be added to Feast Serving pods |
| replicaCount | int | `1` | Number of pods that will be created |
| resources | object | `{}` | CPU/memory [resource requests/limit](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#resource-requests-and-limits-of-pod-and-container) |
| secrets | list | `[]` | List of Kubernetes secrets to be mounted. These secrets will be mounted on /etc/secrets/<secret name>. |
| service.grpc.nodePort | string | `nil` | Port number that each cluster node will listen to |
| service.grpc.port | int | `6566` | Service port for GRPC requests |
| service.grpc.targetPort | int | `6566` | Container port serving GRPC requests |
| service.type | string | `"ClusterIP"` | Kubernetes service type |
----------------------------------------------
Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)

View file

@ -0,0 +1,5 @@
registry:
path: {{ .Values.global.registry.path }}
cache_ttl_seconds: {{ .Values.global.registry.cache_ttl_seconds }}
provider: local
project: {{ .Values.global.project }}

View file

@ -0,0 +1,45 @@
{{/* vim: set filetype=mustache: */}}
{{/*
Expand the name of the chart.
*/}}
{{- define "transformation-service.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "transformation-service.fullname" -}}
{{- if .Values.fullnameOverride -}}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- if contains $name .Release.Name -}}
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "transformation-service.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{/*
Common labels
*/}}
{{- define "transformation-service.labels" -}}
app.kubernetes.io/name: {{ include "transformation-service.name" . }}
helm.sh/chart: {{ include "transformation-service.chart" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end -}}

View file

@ -0,0 +1,73 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ template "transformation-service.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
app: {{ template "transformation-service.name" . }}
component: serving
chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
app: {{ template "transformation-service.name" . }}
component: serving
release: {{ .Release.Name }}
template:
metadata:
labels:
app: {{ template "transformation-service.name" . }}
component: serving
release: {{ .Release.Name }}
{{- if .Values.podLabels }}
{{ toYaml .Values.podLabels | nindent 8 }}
{{- end }}
spec:
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
volumes:
{{- range $secret := .Values.secrets }}
- name: {{ $secret }}
secret:
secretName: {{ $secret }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: grpc
containerPort: {{ .Values.service.grpc.targetPort }}
volumeMounts:
{{- range $secret := .Values.secrets }}
- name: {{ $secret }}
mountPath: "/etc/secrets/{{ $secret }}"
readOnly: true
{{- end }}
env:
- name: FEATURE_TRANSFORMATION_SERVER_PORT
value: {{ .Values.service.grpc.targetPort | quote }}
- name: FEATURE_STORE_YAML_BASE64
value: {{ tpl (.Files.Get "config/feature_store.yaml") . | b64enc | quote }}
{{- range $key, $value := .Values.envOverrides }}
- name: {{ printf "%s" $key | replace "." "_" | upper | quote }}
{{- if eq (kindOf $value) "map" }}
valueFrom:
{{- toYaml $value | nindent 12 }}
{{- else }}
value: {{ $value | quote }}
{{- end }}
{{- end }}
resources:
{{- toYaml .Values.resources | nindent 10 }}

View file

@ -0,0 +1,27 @@
apiVersion: v1
kind: Service
metadata:
name: {{ template "transformation-service.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
app: {{ template "transformation-service.name" . }}
chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
{{- with .Values.service.annotations }}
annotations:
{{ toYaml . | indent 4 }}
{{- end }}
spec:
type: {{ .Values.service.type }}
ports:
- name: grpc
port: {{ .Values.service.grpc.port }}
targetPort: {{ .Values.service.grpc.targetPort }}
{{- if .Values.service.grpc.nodePort }}
nodePort: {{ .Values.service.grpc.nodePort }}
{{- end }}
selector:
app: {{ template "transformation-service.name" . }}
component: serving
release: {{ .Release.Name }}

View file

@ -0,0 +1,37 @@
# replicaCount -- Number of pods that will be created
replicaCount: 1
image:
# image.repository -- Docker image for Transformation Server repository
repository: feastdev/feature-transformation-server
# image.tag -- Image tag
tag: 0.28.0
# image.pullPolicy -- Image pull policy
pullPolicy: IfNotPresent
service:
# service.type -- Kubernetes service type
type: ClusterIP
grpc:
# service.grpc.port -- Service port for GRPC requests
port: 6566
# service.grpc.targetPort -- Container port serving GRPC requests
targetPort: 6566
# service.grpc.nodePort -- Port number that each cluster node will listen to
nodePort:
# resources -- CPU/memory [resource requests/limit](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#resource-requests-and-limits-of-pod-and-container)
resources: {}
# nodeSelector -- Node labels for pod assignment
nodeSelector: {}
# envOverrides -- Extra environment variables to set
envOverrides: {}
# secrets -- List of Kubernetes secrets to be mounted. These secrets will be mounted on /etc/secrets/<secret name>.
secrets: []
# podLabels -- Labels to be added to Feast Serving pods
podLabels: {}

View file

@ -0,0 +1,15 @@
dependencies:
- name: feature-server
alias: feature-server
version: 0.28.0
condition: feature-server.enabled
repository: https://feast-helm-charts.storage.googleapis.com
- name: transformation-service
alias: transformation-service
version: 0.28.0
condition: transformation-service.enabled
repository: https://feast-helm-charts.storage.googleapis.com
- name: redis
version: 10.5.6
repository: https://charts.helm.sh/stable
condition: redis.enabled

View file

@ -0,0 +1,23 @@
feature-server:
enabled: true
transformation-service:
enabled: true
redis:
# redis.enabled -- Flag to install Redis
enabled: false
# redis.usePassword -- Disable redis password
usePassword: false
global:
# global.registry -- Information about registry managed by Feast Python SDK (must be in sync with feature_store.yaml)
registry:
# global.registry.path -- Path to the registry file managed by Feast Python SDK
path: gs://path/to/registry.db
# global.registry.cache_ttl_seconds -- Registry cache (in memory) will be refreshed on this interval
cache_ttl_seconds: 0
# global.project -- Project from feature_store.yaml
project: default

View file

@ -0,0 +1,22 @@
import boto3
from tqdm import tqdm
def main() -> None:
db = boto3.resource("dynamodb")
num_to_delete = 0
all_tables = db.tables.all()
for table in all_tables:
if "integration_test" in table.name:
num_to_delete += 1
with tqdm(total=num_to_delete) as progress:
for table in all_tables:
if "integration_test" in table.name:
table.delete()
progress.update()
print(f"Deleted {num_to_delete} CI DynamoDB tables")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,53 @@
try:
from jinja2 import Template
except ImportError:
raise ImportError(
"Please install Jinja in order for template generation to succeed"
)
############################
# Find the repo root
############################
from pathlib import Path
def find_repo(path):
# Find repository root from the path's parents
for path in Path(path).parents:
# Check whether "path/.git" exists and is a directory
git_dir = path / ".git"
if git_dir.is_dir():
return path
# Find the repo root where the script is
repo_root = find_repo(__file__)
############################
# Template README.md
############################
roadmap_path = repo_root / "docs" / "roadmap.md"
with open(roadmap_path, "r") as f:
# skip first lines since it has the title
roadmap_contents_lines = f.readlines()[2:]
# Join back again
roadmap_contents = "".join(roadmap_contents_lines)
template_path = repo_root / "infra" / "templates" / "README.md.jinja2"
with open(template_path) as f:
template = Template(f.read())
# Compile template
readme_md = template.render(roadmap_contents=roadmap_contents)
# Add warning to generated file
readme_md = (
"<!--Do not modify this file. It is auto-generated from a template (infra/templates/README.md.jinja2)-->\n\n"
+ readme_md
)
readme_path = repo_root / "README.md"
with open(readme_path, "w") as f:
f.write(readme_md)

View file

@ -0,0 +1,33 @@
#!/usr/bin/env bash
set -e
# This script downloads previous maven packages that have been downloaded
# from Google Cloud Storage to local path for faster build
usage()
{
echo "usage: prepare_maven_cache.sh
--archive-uri gcs uri to retrieve maven .m2 archive
--output-dir output directory for .m2 directory"
}
while [ "$1" != "" ]; do
case "$1" in
--archive-uri ) ARCHIVE_URI="$2"; shift;;
--output-dir ) OUTPUT_DIR="$2"; shift;;
* ) usage; exit 1
esac
shift
done
if [[ ! ${ARCHIVE_URI} ]]; then usage; exit 1; fi
if [[ ! ${OUTPUT_DIR} ]]; then usage; exit 1; fi
# Install Google Cloud SDK if gsutil command not exists
if [[ ! $(command -v gsutil) ]]; then
CURRENT_DIR=$(dirname "$BASH_SOURCE")
. "${CURRENT_DIR}"/install-google-cloud-sdk.sh
fi
gsutil -q cp ${ARCHIVE_URI} /tmp/.m2.tar
tar xf /tmp/.m2.tar -C ${OUTPUT_DIR}

View file

@ -0,0 +1,9 @@
#!/usr/bin/env bash
set -e
readonly HELM_URL=https://storage.googleapis.com/kubernetes-helm
readonly HELM_TARBALL="helm-${HELM_VERSION}-linux-amd64.tar.gz"
readonly STABLE_REPO_URL=https://charts.helm.sh/stable
readonly INCUBATOR_REPO_URL=https://charts.helm.sh/incubator
curl -s "https://get.helm.sh/helm-${HELM_VERSION}-linux-amd64.tar.gz" | tar -C /tmp -xz
sudo mv /tmp/linux-amd64/helm /usr/bin/helm
helm repo add incubator "$INCUBATOR_REPO_URL"

View file

@ -0,0 +1,23 @@
#!/usr/bin/env bash
set -e
if [ $# -ne 1 ]; then
echo "Please provide a single semver version (without a \"v\" prefix) to test the repository against, e.g 0.99.0"
exit 1
fi
bucket=gs://feast-helm-charts
repo_url=https://feast-helm-charts.storage.googleapis.com/
helm plugin install https://github.com/hayorov/helm-gcs.git --version 0.3.18 || true
helm repo add feast-helm-chart-repo $bucket
cd infra/charts
helm package feast
helm package feast-feature-server
helm gcs push --public feast-${1}.tgz feast-helm-chart-repo --force
helm gcs push --public feast-feature-server-${1}.tgz feast-helm-chart-repo --force
rm -f ./*.tgz

View file

@ -0,0 +1,39 @@
#!/usr/bin/env bash
# Get project root
PROJECT_ROOT_DIR=$(git rev-parse --show-toplevel)
# Should have no "latest" tags
grep -R "tag: latest" "$PROJECT_ROOT_DIR"/infra/charts || true
COUNT=$(grep -R "tag: latest" "$PROJECT_ROOT_DIR"/infra/charts | wc -l)
if [ "$COUNT" -gt 0 ]; then
echo 'Found more than one instance of "latest" in an image tag. Please replace with correct release version.';
exit 1
else
echo 'No "latest" tags found, continuing';
fi
# TODO: Enable DockerHub vs GCR checks asap.
## Should have no "gcr" images
#grep -R "gcr.io" "$PROJECT_ROOT_DIR"/infra/charts || true
#COUNT=$(grep -R "gcr.io" "$PROJECT_ROOT_DIR"/infra/charts | wc -l)
#
#if [ "$COUNT" -gt 0 ]; then
# echo 'Found more than one instance of "gcr.io" in charts. Please replace with https://hub.docker.com/r/feastdev feast image.';
# exit 1
#else
# echo 'No "gcr.io" instances found, continuing';
#fi
# Should have no "SNAPSHOT" versions
grep -R "SNAPSHOT" "$PROJECT_ROOT_DIR"/infra/charts || true
COUNT=$(grep -R "SNAPSHOT" "$PROJECT_ROOT_DIR"/infra/charts | wc -l)
if [ "$COUNT" -gt 0 ]; then
echo 'Found more than one instance of "SNAPSHOT" in charts. Please ensure that no SNAPSHOT charts are published.';
exit 1
else
echo 'No "SNAPSHOT" instances found, continuing';
fi

View file

@ -0,0 +1,36 @@
#!/usr/bin/env bash
set -e
# Amount of file locations that need to be bumped in unison when versions increment
UNIQUE_VERSIONS_COUNT=20 # Change in release 0.27.0
if [ $# -ne 1 ]; then
echo "Please provide a single semver version (without a \"v\" prefix) to test the repository against, e.g 0.99.0"
exit 1
fi
CHART_ROOT_DIR=$(git rev-parse --show-toplevel)/infra/charts
echo "Finding how many versions have been set to ${1} in the current repository"
CHANGED_VERSIONS_COUNT=$(grep -R --exclude-dir='.*' ${1} ${CHART_ROOT_DIR} | wc -l)
echo "Found ${CHANGED_VERSIONS_COUNT} versions that have been changed"
echo "This repository should contain ${UNIQUE_VERSIONS_COUNT} changed versions"
if [ $UNIQUE_VERSIONS_COUNT -ne "${CHANGED_VERSIONS_COUNT}" ]; then
echo "We expected $UNIQUE_VERSIONS_COUNT to have been updated to the latest version, but only ${CHANGED_VERSIONS_COUNT} have. This number is statically defined based on a simple grep"
echo "Please confirm that all versions in all charts and requirements files have been bumped to the tagged release version. If you have successfully bumped all versions and there is still a mismatch in the expected and actual counts, then rerun the following command"
echo "grep -R 'insert_your_semver_version_here' . | wc -l"
echo "and update the script scripts/validate-helm-chart-versions.sh"
echo
echo For your reference, the following lines were detected as changed
echo
grep -R --exclude-dir='.*' ${1} ${CHART_ROOT_DIR} || true
echo
exit 1
fi
echo "All versions validated. Passing test."

View file

@ -0,0 +1,44 @@
#!/usr/bin/env bash
set -e
usage()
{
echo "usage: . install-google-cloud-sdk.sh
[--with-key-file local file path to service account json]
NOTE: requires 'dot' before install-google-cloud-sdk.sh
so that the PATH variable is exported succesfully to
the calling process, i.e. you don't need to provide
full path to gcloud command after installation
--with-key-file is optional,
if no authentication is required"
}
while [ "$1" != "" ]; do
case "$1" in
--with-key-file ) KEY_FILE="$2"; shift;;
* ) usage; exit 1
esac
shift
done
GOOGLE_CLOUD_SDK_ARCHIVE_URL=https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-266.0.0-linux-x86_64.tar.gz
GOOGLE_PROJECT_ID=kf-feast
KUBE_CLUSTER_NAME=primary-test-cluster
KUBE_CLUSTER_ZONE=us-central1-a
curl -s ${GOOGLE_CLOUD_SDK_ARCHIVE_URL} | tar xz -C /
export PATH=/google-cloud-sdk/bin:${PATH}
gcloud -q components install kubectl &> /var/log/kubectl.install.log
if [[ ${KEY_FILE} ]]; then
gcloud -q auth activate-service-account --key-file=${KEY_FILE}
gcloud -q auth configure-docker
gcloud -q config set project ${GOOGLE_PROJECT_ID}
gcloud -q container clusters get-credentials ${KUBE_CLUSTER_NAME} --zone ${KUBE_CLUSTER_ZONE} --project ${GOOGLE_PROJECT_ID}
export GOOGLE_APPLICATION_CREDENTIALS=${KEY_FILE}
fi
# Restore bash option
set +e

10
infra/scripts/install-helm.sh Executable file
View file

@ -0,0 +1,10 @@
#!/usr/bin/env bash
set -e
readonly HELM_URL=https://storage.googleapis.com/kubernetes-helm
readonly HELM_TARBALL="helm-${HELM_VERSION}-linux-amd64.tar.gz"
readonly STABLE_REPO_URL=https://charts.helm.sh/stable
readonly INCUBATOR_REPO_URL=https://charts.helm.sh/incubator
curl -s "https://get.helm.sh/helm-${HELM_VERSION}-linux-amd64.tar.gz" | tar -C /tmp -xz
sudo mv /tmp/linux-amd64/helm /usr/bin/helm
helm init --client-only
helm repo add incubator "$INCUBATOR_REPO_URL"

View file

@ -0,0 +1,51 @@
#!/usr/bin/env bash
set -e
set -o pipefail
usage()
{
echo "usage: publish-docker-image.sh
--repository the target repository to upload the Docker image, example:
gcr.io/kf-feast/feast-core
--tag the tag for the Docker image, example: 1.0.4
--file path to the Dockerfile
[--google-service-account-file
path to Google Cloud service account JSON key file]
"
}
while [ "$1" != "" ]; do
case "$1" in
--repository ) REPOSITORY="$2"; shift;;
--tag ) TAG="$2"; shift;;
--file ) FILE="$2"; shift;;
--google-service-account-file ) GOOGLE_SERVICE_ACCOUNT_FILE="$2"; shift;;
-h | --help ) usage; exit;;
* ) usage; exit 1
esac
shift
done
if [ -z $REPOSITORY ]; then usage; exit 1; fi
if [ -z $TAG ]; then usage; exit 1; fi
if [ -z $FILE ]; then usage; exit 1; fi
if [ $GOOGLE_SERVICE_ACCOUNT_FILE ]; then
gcloud -q auth activate-service-account --key-file $GOOGLE_SERVICE_ACCOUNT_FILE
gcloud -q auth configure-docker
fi
echo "============================================================"
echo "Building Docker image $REPOSITORY:$TAG"
echo "============================================================"
docker build -t $REPOSITORY:$TAG --build-arg REVISION=$TAG -f $FILE .
echo "============================================================"
echo "Pushing Docker image $REPOSITORY:$TAG"
echo "============================================================"
docker push $REPOSITORY:$TAG

View file

@ -0,0 +1,72 @@
#!/usr/bin/env bash
set -e
set -o pipefail
GPG_KEY_IMPORT_DIR=/etc/gpg
usage()
{
echo "usage: publish-java-sdk.sh
--revision Value for the revision e.g. '0.2.3'
--gpg-key-import-dir Directory containing existing GPG keys to import.
The directory should contain these 2 files:
- public-key
- private-key
The default value is '/etc/gpg'
This script assumes the GPG private key is protected by a passphrase.
The passphrase can be specified in \$HOME/.m2/settings.xml. In the same xml
file, credentials to upload releases to Sonatype must also be provided.
# Example settings: ~/.m2/settings.xml
<settings>
<servers>
<server>
<id>ossrh</id>
<username>SONATYPE_USER</username>
<password>SONATYPE_PASSWORD</password>
</server>
</servers>
<profiles>
<profile>
<id>ossrh</id>
<properties>
<gpg.passphrase>GPG_PASSPHRASE</gpg.passphrase>
</properties>
</profile>
</profiles>
</settings>
"
}
while [ "$1" != "" ]; do
case "$1" in
--revision ) REVISION="$2"; shift;;
--gpg-key-import-dir ) GPG_KEY_IMPORT_DIR="$2"; shift;;
-h | --help ) usage; exit;;
* ) usage; exit 1
esac
shift
done
if [ -z $REVISION ]; then usage; exit 1; fi
echo "============================================================"
echo "Checking Maven and GPG versions"
echo "============================================================"
mvn -f java/pom.xml --version
echo ""
gpg --version
echo "============================================================"
echo "Importing GPG keys"
echo "============================================================"
gpg --import --batch --yes $GPG_KEY_IMPORT_DIR/public-key
gpg --import --batch --yes $GPG_KEY_IMPORT_DIR/private-key
echo "============================================================"
echo "Deploying Java SDK with revision: $REVISION"
echo "============================================================"
mvn -f java/pom.xml --projects .,datatypes,serving-client -Drevision=$REVISION --batch-mode clean deploy

98
infra/scripts/redis-cluster.sh Executable file
View file

@ -0,0 +1,98 @@
#!/usr/bin/env bash
# Settings
# Make sure you run "brew install redis"
# BIN_PATH="/opt/homebrew/bin"
REDIS_CLI=`which redis-cli`
REDIS_SERVER=`which redis-server`
CLUSTER_HOST=127.0.0.1
# Creates a cluster at ports 6001-6006 with 3 masters 6001-6003 and 3 slaves 6004-6006
PORT=${2:-6000}
TIMEOUT=2000
NODES=6
REPLICAS=1
PROTECTED_MODE=yes
ADDITIONAL_OPTIONS=""
if [ -a config.sh ]
then
source "config.sh"
fi
# Computed vars
ENDPORT=$((PORT+NODES))
if [ "$1" == "start" ]
then
while [ $((PORT < ENDPORT)) != "0" ]; do
PORT=$((PORT+1))
echo "Starting $PORT"
$REDIS_SERVER --port $PORT --protected-mode $PROTECTED_MODE --cluster-enabled yes --cluster-config-file nodes-${PORT}.conf --cluster-node-timeout $TIMEOUT --appendonly yes --appendfilename appendonly-${PORT}.aof --dbfilename dump-${PORT}.rdb --logfile ${PORT}.log --daemonize yes ${ADDITIONAL_OPTIONS}
done
exit 0
fi
if [ "$1" == "create" ]
then
HOSTS=""
while [ $((PORT < ENDPORT)) != "0" ]; do
PORT=$((PORT+1))
HOSTS="$HOSTS $CLUSTER_HOST:$PORT"
done
OPT_ARG=""
if [ "$2" == "-f" ]; then
OPT_ARG="--cluster-yes"
fi
$REDIS_CLI --cluster create $HOSTS --cluster-replicas $REPLICAS $OPT_ARG
exit 0
fi
if [ "$1" == "stop" ]
then
while [ $((PORT < ENDPORT)) != "0" ]; do
PORT=$((PORT+1))
echo "Stopping $PORT"
$REDIS_CLI -p $PORT shutdown nosave
done
exit 0
fi
if [ "$1" == "watch" ]
then
PORT=$((PORT+1))
while [ 1 ]; do
clear
date
$REDIS_CLI -p $PORT cluster nodes | head -30
sleep 1
done
exit 0
fi
if [ "$1" == "clean" ]
then
echo "Cleaning *.log"
rm -rf *.log
echo "Cleaning appendonly-*"
rm -rf appendonly-*
echo "Cleaning dump-*.rdb"
rm -rf dump-*.rdb
echo "Cleaning nodes-*.conf"
rm -rf nodes-*.conf
exit 0
fi
if [ "$1" == "clean-logs" ]
then
echo "Cleaning *.log"
rm -rf *.log
exit 0
fi
echo "Usage: $0 [start|create|stop|watch|clean|clean-logs|call]"
echo "start [PORT] -- Launch Redis Cluster instances."
echo "create [PORT] [-f] -- Create a cluster using redis-cli --cluster create."
echo "stop [PORT] -- Stop Redis Cluster instances."
echo "watch [PORT] -- Show CLUSTER NODES output (first 30 lines) of first node."
echo "clean -- Remove all instances data, logs, configs."
echo "clean-logs -- Remove just instances logs."

264
infra/scripts/release-patch.sh Executable file
View file

@ -0,0 +1,264 @@
#!/usr/bin/env bash
set -eo pipefail
usage()
{
echo "usage: release-patch.sh
This script is used to release a patch release. It is untested on major/minor releases and for those, some modification may be necessary.
-v, --version version to release, example: 0.10.6
-t, --github-token personal GitHub token
-r, --remote git remote server name for the feast-dev/feast repo (e.g. origin, upstream, etc.)
"
}
while [ "$1" != "" ]; do
case "$1" in
-v | --version ) VERSION="$2"; shift;;
-t | --github-token ) GH_TOKEN="$2"; shift;;
-r | --remote ) REMOTE="$2"; shift;;
-h | --help ) usage; exit;;
* ) usage; exit 1
esac
shift
done
if [ -z $VERSION ]; then usage; exit 1; fi
if [ -z $GH_TOKEN ]; then usage; exit 1; fi
if [ -z $REMOTE ]; then usage; exit 1; fi
regex="([0-9]+)\.([0-9]+)\.([0-9]+)"
if [[ $VERSION =~ $regex ]]
then
MAJOR="${BASH_REMATCH[1]}"
MINOR="${BASH_REMATCH[2]}"
PATCH="${BASH_REMATCH[3]}"
else
usage
exit 1
fi
if ! which gh ; then echo "Please install the GitHub CLI to use this script"; exit 1; fi
echo "This script is mostly idempotent; check git status for temp files before restarting. It will always prompt you before making any non-local change."
# Go to infra/scripts directory
cd $(dirname "$0")
# Login to GitHub CLI
echo $GH_TOKEN | gh auth login --with-token
echo "Step 1: rebase new commits onto release branch"
git fetch $REMOTE
git checkout $REMOTE/master
STARTING_COMMIT=$(git merge-base $REMOTE/master v$MAJOR.$MINOR-branch)
git checkout v$MAJOR.$MINOR-branch
push_rebased_commits()
{
echo "Pushing commits"
git push $REMOTE v$MAJOR.$MINOR-branch
echo "Commits pushed"
}
rebase_from_master()
{
echo "Rebasing commits"
git checkout $REMOTE/master
git rebase --interactive --onto v$MAJOR.$MINOR-branch $STARTING_COMMIT HEAD
git branch -f v$MAJOR.$MINOR-branch HEAD
git checkout v$MAJOR.$MINOR-branch
echo "Commits rebased"
echo "Step 1b: Push commits"
read -p "Commits are not pushed. Continue (y) or skip this sub-step (n)? " choice
case "$choice" in
y|Y ) push_rebased_commits ;;
* ) echo "Skipping this sub-step" ;;
esac ;
}
echo "Step 1a: rebase commits"
if git status | grep -q "is ahead of" ; then
read -p "Your local branch is ahead of its remote counterpart, indicating you may have already rebased. Skip this step (y) or run the rebase starting from commit $STARTING_COMMIT (n)? " choice
case "$choice" in
y|Y ) echo "Skipping this step" ;;
* ) rebase_from_master ;;
esac ;
else
read -p "Will rebase starting from commit $STARTING_COMMIT. Continue (y) or skip this step (n)? " choice
case "$choice" in
y|Y ) rebase_from_master ;;
* ) echo "Skipping this step" ;;
esac ;
fi
CHANGELOG=$(git rev-parse --show-toplevel)/CHANGELOG.md
commit_changelog()
{
echo "Committing CHANGELOG.md"
git add $CHANGELOG
git commit -m "Update CHANGELOG for Feast v$MAJOR.$MINOR.$PATCH"
}
update_changelog()
{
echo "Running changelog generator (will take up to a few minutes)"
echo -e "# Changelog\n" > temp \
&& docker run -it --rm ferrarimarco/github-changelog-generator \
--user feast-dev \
--project feast \
--release-branch master \
--future-release v$MAJOR.$MINOR.$PATCH \
--unreleased-only \
--no-issues \
--bug-labels kind/bug \
--enhancement-labels kind/feature \
--breaking-labels compat/breaking \
-t $GH_TOKEN \
--max-issues 1 -o \
| sed -n '/## \[v'"$MAJOR.$MINOR.$PATCH"'\]/,$p' \
| sed '$d' | sed '$d' | sed '$d' | tr -d '\r' >> temp \
&& sed '1d' $CHANGELOG >> temp && mv temp $CHANGELOG
git diff $CHANGELOG
echo "Check CHANGELOG.md carefully and fix any errors. In particular, make sure the new enhancements/PRs/bugfixes aren't already listed somewhere lower down in the file."
read -p "Once you're done checking, continue to commit the changelog (y) or exit (n)? " choice
case "$choice" in
y|Y ) commit_changelog ;;
* ) exit ;;
esac ;
}
echo "Step 2: Updating CHANGELOG.md"
if grep -q "https://github.com/feast-dev/feast/tree/v$MAJOR.$MINOR.$PATCH" $CHANGELOG ; then
read -p "CHANGELOG.md appears updated. Skip this step (y/n)? " choice
case "$choice" in
y|Y ) echo "Skipping this step" ;;
* ) update_changelog ;;
esac ;
else
update_changelog ;
fi
tag_commit()
{
echo "Tagging commit"
git tag v$MAJOR.$MINOR.$PATCH
echo "Commit tagged"
}
echo "Step 3: Tag commit"
if git tag | grep -q "v$MAJOR.$MINOR.$PATCH" ; then
read -p "The tag already exists. Skip this step (y/n)? " choice
case "$choice" in
y|Y ) echo "Skipping this step" ;;
* ) tag_commit ;;
esac ;
else
tag_commit ;
fi
echo "Step 4: Push commits and tags"
push_commits()
{
echo "Pushing commits"
git push $REMOTE v$MAJOR.$MINOR-branch
echo "Commits pushed"
}
echo "Step 4a: Push commits"
if git status | grep -q "nothing to commit" ; then
echo "The commits appear pushed. Skipping this sub-step"
else
read -p "Commits are not pushed. Continue (y) or skip this sub-step (n)? " choice
case "$choice" in
y|Y ) push_commits ;;
* ) echo "Skipping this sub-step" ;;
esac ;
fi
push_tag()
{
echo "Pushing tag"
git push $REMOTE v$MAJOR.$MINOR.$PATCH
echo "Tag pushed"
}
echo "Step 4b: Push tag"
if git ls-remote --tags $REMOTE | grep -q "v$MAJOR.$MINOR.$PATCH" ; then
read -p "The tag appears pushed. Skip this sub-step (y/n)? " choice
case "$choice" in
y|Y ) echo "Skipping this sub-step" ;;
* ) push_tag ;;
esac ;
else
read -p "The tag is not pushed. Continue (y) or skip this sub-step (n)? " choice
case "$choice" in
y|Y ) push_tag ;;
* ) echo "Skipping this sub-step" ;;
esac ;
fi
read -p "Now wait for the CI to pass. Continue (y) or exit and fix the problem (n)? " choice
case "$choice" in
y|Y ) echo "Moving on to the next step" ;;
* ) exit ;;
esac ;
echo "Step 6: Add changelog to master"
changelog_hash=$(git rev-parse HEAD)
git checkout master
cp_changelog()
{
echo "Cherry-picking"
git cherry-pick $changelog_hash
echo "Cherry-pick done"
}
echo "Step 6a: Cherry-pick changelog to master"
if grep -q "https://github.com/feast-dev/feast/tree/v$MAJOR.$MINOR.$PATCH" $CHANGELOG ; then
read -p "The changelog appears to be cherry-picked onto master. Skip this sub-step (y/n)? " choice
case "$choice" in
y|Y ) echo "Skipping this sub-step" ;;
* ) cp_changelog ;;
esac ;
else
read -p "The changelog does not appear to be cherry-picked onto master. Continue (y) or skip this sub-step (n)? " choice
case "$choice" in
y|Y ) cp_changelog ;;
* ) echo "Skipping this sub-step" ;;
esac ;
fi
push_cp()
{
echo "Pushing cherry-pick"
git push $REMOTE master
echo "Commit pushed"
}
echo "Step 6b: Push changelog to master"
if git status | grep -q "nothing to commit" ; then
echo "The commit appears pushed. Skipping this sub-step"
else
read -p "The commit is not pushed. Continue (y) or skip this sub-step (n)? " choice
case "$choice" in
y|Y ) push_cp ;;
* ) echo "Skipping this sub-step" ;;
esac ;
fi
create_release()
{
echo "Creating GitHub release"
cat $CHANGELOG | sed -n '/## \[v'"$MAJOR.$MINOR.$PATCH"'\]/,/## \[v'"$MAJOR.$MINOR.$(($PATCH-1))"'\]/p' | sed -n '/**Implemented enhancements/,$p' | sed '$d' > temp2 \
&& gh release create v$MAJOR.$MINOR.$PATCH -t "Feast v$MAJOR.$MINOR.$PATCH" --repo feast-dev/feast --notes-file temp2 \
&& rm temp2
echo "GitHub release created"
}
echo "Step 7: Create a GitHub release"
if gh release list --repo feast-dev/feast | grep -q "v$MAJOR.$MINOR.$PATCH" ; then
read -p "GitHub release appears created. Skip this step (y/n)? " choice
case "$choice" in
y|Y ) echo "Skipping this step" ;;
* ) create_release ;;
esac ;
else
read -p "A GitHub release has not been created. Continue (y) or skip this step (n)? " choice
case "$choice" in
y|Y ) create_release ;;
* ) echo "Skipping this step" ;;
esac ;
fi
echo "Step 8: Update the Upgrade Guide manually (docs/advanced/upgrading.md)"

View file

@ -0,0 +1,83 @@
# This script will bump the versions found in files (charts, pom.xml) during the Feast release process.
import pathlib
import sys
USAGE = f"Usage: python {sys.argv[0]} [--help] | current_semver_version new_semver_version]"
VERSIONS_TO_BUMP = 27
def main() -> None:
args = sys.argv[1:]
if not args or len(args) != 2:
raise SystemExit(USAGE)
current_version = args[0].strip()
new_version = args[1].strip()
if current_version == new_version:
raise SystemExit(f"Current and new versions are the same: {current_version} == {new_version}")
# Validate that the input arguments are semver versions
if not is_semantic_version(current_version):
raise SystemExit(f"Current version is not a valid semantic version: {current_version}")
if not is_semantic_version(new_version):
raise SystemExit(f"New version is not a valid semantic version: {new_version}")
# Get git repo root directory
repo_root = pathlib.Path(__file__).resolve().parent.parent.parent.parent
path_to_file_list = repo_root.joinpath("infra", "scripts", "release", "files_to_bump.txt")
# Get files to bump versions within
with open(path_to_file_list, "r") as f:
files_to_bump = f.read().splitlines()
# The current version should be 0.18.0 or 0.19.0 or 0.20.0 etc
validate_files_to_bump(current_version, files_to_bump, repo_root)
# Bump the version in the files
updated_count = 0
for file in files_to_bump:
components = file.split(" ")
file_path = components[0]
lines = components[1:]
with open(repo_root.joinpath(file_path), "r") as f:
file_contents = f.readlines()
for line in lines:
file_contents[int(line) - 1] = file_contents[int(line) - 1].replace(current_version, new_version)
with open(repo_root.joinpath(file_path), "w") as f:
f.write(''.join(file_contents))
updated_count += 1
print(f"Updated {updated_count} files with new version {new_version}")
def is_semantic_version(version: str) -> bool:
components = version.split(".")
if len(components) != 3:
return False
for component in components:
if not component.isdigit():
return False
return True
def validate_files_to_bump(current_version, files_to_bump, repo_root):
for file in files_to_bump:
components = file.split(" ")
assert len(components) > 1, f"Entry {file} should have a file name, and a list of line numbers with versions"
file_path = components[0]
lines = components[1:]
with open(repo_root.joinpath(file_path), "r") as f:
file_contents = f.readlines()
for line in lines:
assert current_version in file_contents[int(line) - 1], (
f"File `{file_path}` line `{line}` didn't contain version {current_version}. "
f"Contents: {file_contents[int(line) - 1]}"
)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,14 @@
infra/charts/feast/requirements.yaml 4 9
infra/charts/feast/Chart.yaml 4
infra/charts/feast/charts/transformation-service/Chart.yaml 4 5
infra/charts/feast/charts/transformation-service/README.md 3 16
infra/charts/feast/charts/transformation-service/values.yaml 8
infra/charts/feast/charts/feature-server/Chart.yaml 4 5
infra/charts/feast/charts/feature-server/README.md 3 20
infra/charts/feast/charts/feature-server/values.yaml 8
infra/charts/feast/README.md 11 68 69
infra/charts/feast-feature-server/Chart.yaml 5
infra/charts/feast-feature-server/README.md 3
infra/charts/feast-feature-server/values.yaml 12
java/pom.xml 38
ui/package.json 3

View file

@ -0,0 +1,70 @@
# For some reason patch releases with Semantic Release are tagged as "pre-release" on GitHub. This script
# removes the "pre-release" tag from the release.
import os
import sys
import requests
USAGE = f"Usage: python {sys.argv[0]} [--help] | version_being_released (e.g., v0.19.1)]"
def get_prerelease_status(version_being_released, token):
url = f"https://api.github.com/repos/feast-dev/feast/releases/tags/v{version_being_released}"
headers = {
"Content-Type": "application/json",
"Accept": "application/vnd.github.v3+json",
"Authorization": f"Bearer {token}"
}
response = requests.request("GET", url, headers=headers)
response_json = response.json()
return bool(response_json['prerelease']), response_json['id']
def set_prerelease_status(release_id, status, token):
url = f"https://api.github.com/repos/feast-dev/feast/releases/{release_id}"
payload = {"prerelease": status}
headers = {
"Content-Type": "application/json",
"Accept": "application/vnd.github.v3+json",
"Authorization": f"Bearer {token}"
}
requests.request("PATCH", url, json=payload, headers=headers)
def main() -> None:
args = sys.argv[1:]
if not args or len(args) != 1:
raise SystemExit(USAGE)
version_being_released = args[0].strip() # should look like 0.19.1 (without the v)
print(f"Disabling prerelease status for {version_being_released}")
token = os.getenv('GITHUB_TOKEN', default=None)
if token is None:
raise OSError("GITHUB_TOKEN environmental variable is not set")
is_prerelease, release_id = get_prerelease_status(version_being_released, token)
if is_prerelease:
set_prerelease_status(release_id, False, token)
else:
print(f"{version_being_released} is not a pre-release, exiting.")
exit(0)
is_prerelease, release_id = get_prerelease_status(version_being_released, token)
if is_prerelease:
import warnings
warnings.warn(f"Failed to unset prerelease status for {version_being_released} release id {release_id}")
else:
print(f"Successfully unset prerelease status for {version_being_released} release id {release_id}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,88 @@
#!/usr/bin/env bash
# Get Feast project repository root and scripts directory
export PROJECT_ROOT_DIR=$(git rev-parse --show-toplevel)
export SCRIPTS_DIR=${PROJECT_ROOT_DIR}/infra/scripts
install_test_tools() {
apt-get -qq update
apt-get -y install wget netcat kafkacat build-essential
}
print_banner() {
echo "
============================================================
$1
============================================================
"
}
wait_for_docker_image(){
# This script will block until a docker image is ready
[[ -z "$1" ]] && { echo "Please pass the docker image URI as the first parameter" ; exit 1; }
oldopt=$-
set +e
DOCKER_IMAGE=$1
poll_count=0
maximum_poll_count=150
# Wait for Feast Core to be available on GCR
until docker pull "$DOCKER_IMAGE"
do
# Exit when we have tried enough times
if [[ "$poll_count" -gt "$maximum_poll_count" ]]; then
set -$oldopt
exit 1
fi
# Sleep and increment counter on failure
echo "${DOCKER_IMAGE} could not be found";
sleep 5;
((poll_count++))
done
set -$oldopt
}
# Usage: TAG=$(get_tag_release [-ms])
# Parses the last release from git tags.
# Options:
# -m - Use only tags that are tagged on the current branch
# -s - Use only stable version tags. (ie no prerelease tags).
get_tag_release() {
local GIT_TAG_CMD="git tag -l"
# Match only Semver tags
# Regular expression should match MAJOR.MINOR.PATCH[-PRERELEASE[.IDENTIFIER]]
# eg. v0.7.1 v0.7.2-alpha v0.7.2-rc.1
local TAG_REGEX='^v[0-9]+\.[0-9]+\.[0-9]+(-([0-9A-Za-z-]+(\.[0-9A-Za-z-]+)*))?$'
local OPTIND opt
while getopts "ms" opt; do
case "${opt}" in
m)
GIT_TAG_CMD="$GIT_TAG_CMD --merged"
;;
s)
# Match only stable version tags.
TAG_REGEX="^v[0-9]+\.[0-9]+\.[0-9]+$"
;;
*)
echo "get_tag_release(): Error: Bad arguments: $@"
return 1
;;
esac
done
shift $((OPTIND-1))
# Retrieve tags from git and filter as per regex.
local FILTERED_TAGS=$(bash -c "$GIT_TAG_CMD" | grep -P "$TAG_REGEX")
# Sort version tags in highest semver version first.
# To make sure that prerelease versions (ie versions vMAJOR.MINOR.PATCH-PRERELEASE suffix)
# are sorted after stable versions (ie vMAJOR.MINOR.PATCH), we append '_' after
# eachustable version as '_' is after '-' found in prerelease version
# alphanumerically and remove after sorting.
local SEMVER_SORTED_TAGS=$(echo "$FILTERED_TAGS" | sed -e '/-/!{s/$/_/}' | sort -rV \
| sed -e 's/_$//')
echo $(echo "$SEMVER_SORTED_TAGS" | head -n 1)
}

View file

@ -0,0 +1,15 @@
#!/usr/bin/env bash
export MAVEN_OPTS="-Dmaven.repo.local=/tmp/.m2/repository -DdependencyLocationsEnabled=false -Dmaven.wagon.httpconnectionManager.ttlSeconds=25 -Dmaven.wagon.http.retryHandler.count=3 -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false"
export MAVEN_CACHE="gs://feast-templocation-kf-feast/.m2.2020-11-17.tar"
infra/scripts/download-maven-cache.sh --archive-uri ${MAVEN_CACHE} --output-dir /tmp
apt-get update && apt-get install -y redis-server postgresql libpq-dev
make build-java-no-tests REVISION=develop
python -m pip install --upgrade pip setuptools wheel pip-tools
make install-python
python -m pip install -qr tests/requirements.txt
export FEAST_USAGE="False"
su -p postgres -c "PATH=$PATH HOME=/tmp pytest -v tests/e2e/ --feast-version develop"

View file

@ -0,0 +1,10 @@
#!/usr/bin/env bash
mvn -f java/pom.xml --batch-mode --also-make --projects serving test
TEST_EXIT_CODE=$?
# Default artifact location setting in Prow jobs
LOGS_ARTIFACT_PATH=/logs/artifacts
cp -r serving/target/surefire-reports ${LOGS_ARTIFACT_PATH}/surefire-reports
exit ${TEST_EXIT_CODE}

View file

@ -0,0 +1,30 @@
# This script ensures that we don't accidentally cut the wrong kind of release on master or release branches
if [ "$#" -ne 2 ]
then
echo "Usage: validate-release.sh [major, minor, patch] branch"
echo "Example: validate-release.sh patch master"
exit 1
fi
if [ "$1" = "minor" ]; then
if [ "$2" = "master" ]; then
echo "Releasing a minor version on master, looks good!"
exit 0
else
echo "Can't release a minor version from a non-master branch! Please confirm the version you are releasing!!"
exit 1
fi
elif [ "$1" = "patch" ]; then
if [ "$2" = "master" ]; then
echo "Can't release a patch version from master branch! Please confirm the version you are releasing!!"
exit 1
else
echo "Releasing a patch version from a non-master branch, looks good!"
exit 0
fi
else
echo "Not sure what kind of release is happening. Please confirm that you are creating a minor release from master
or a patch from a release branch"
exit 1
fi

View file

@ -0,0 +1,166 @@
<p align="center">
<a href="https://feast.dev/">
<img src="docs/assets/feast_logo.png" width="550">
</a>
</p>
<br />
[![unit-tests](https://github.com/feast-dev/feast/actions/workflows/unit_tests.yml/badge.svg?branch=master&event=push)](https://github.com/feast-dev/feast/actions/workflows/unit_tests.yml)
[![integration-tests-and-build](https://github.com/feast-dev/feast/actions/workflows/master_only.yml/badge.svg?branch=master&event=push)](https://github.com/feast-dev/feast/actions/workflows/master_only.yml)
[![java-integration-tests](https://github.com/feast-dev/feast/actions/workflows/java_master_only.yml/badge.svg?branch=master&event=push)](https://github.com/feast-dev/feast/actions/workflows/java_master_only.yml)
[![linter](https://github.com/feast-dev/feast/actions/workflows/linter.yml/badge.svg?branch=master&event=push)](https://github.com/feast-dev/feast/actions/workflows/linter.yml)
[![Docs Latest](https://img.shields.io/badge/docs-latest-blue.svg)](https://docs.feast.dev/)
[![Python API](https://img.shields.io/readthedocs/feast/master?label=Python%20API)](http://rtd.feast.dev/)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue)](https://github.com/feast-dev/feast/blob/master/LICENSE)
[![GitHub Release](https://img.shields.io/github/v/release/feast-dev/feast.svg?style=flat&sort=semver&color=blue)](https://github.com/feast-dev/feast/releases)
## Overview
Feast (**Fea**ture **St**ore) is an open source feature store for machine learning. Feast is the fastest path to manage existing infrastructure to productionize analytic data for model training and online inference.
Feast allows ML platform teams to:
* **Make features consistently available for training and serving** by managing an _offline store_ (to process historical data for scale-out batch scoring or model training), a low-latency _online store_ (to power real-time prediction)_,_ and a battle-tested _feature server_ (to serve pre-computed features online).
* **Avoid data leakage** by generating point-in-time correct feature sets so data scientists can focus on feature engineering rather than debugging error-prone dataset joining logic. This ensure that future feature values do not leak to models during training.
* **Decouple ML from data infrastructure** by providing a single data access layer that abstracts feature storage from feature retrieval, ensuring models remain portable as you move from training models to serving models, from batch models to realtime models, and from one data infra system to another.
Please see our [documentation](https://docs.feast.dev/) for more information about the project, or sign up for an [email newsletter](https://feast.dev/).
## 📐 Architecture
![](docs/assets/feast_marchitecture.png)
The above architecture is the minimal Feast deployment. Want to run the full Feast on Snowflake/GCP/AWS? Click [here](https://docs.feast.dev/how-to-guides/feast-snowflake-gcp-aws).
## 🐣 Getting Started
### 1. Install Feast
```commandline
pip install feast
```
### 2. Create a feature repository
```commandline
feast init my_feature_repo
cd my_feature_repo/feature_repo
```
### 3. Register your feature definitions and set up your feature store
```commandline
feast apply
```
### 4. Explore your data in the web UI (experimental)
![Web UI](ui/sample.png)
```commandline
feast ui
```
### 5. Build a training dataset
```python
from feast import FeatureStore
import pandas as pd
from datetime import datetime
entity_df = pd.DataFrame.from_dict({
"driver_id": [1001, 1002, 1003, 1004],
"event_timestamp": [
datetime(2021, 4, 12, 10, 59, 42),
datetime(2021, 4, 12, 8, 12, 10),
datetime(2021, 4, 12, 16, 40, 26),
datetime(2021, 4, 12, 15, 1 , 12)
]
})
store = FeatureStore(repo_path=".")
training_df = store.get_historical_features(
entity_df=entity_df,
features = [
'driver_hourly_stats:conv_rate',
'driver_hourly_stats:acc_rate',
'driver_hourly_stats:avg_daily_trips'
],
).to_df()
print(training_df.head())
# Train model
# model = ml.fit(training_df)
```
```commandline
event_timestamp driver_id conv_rate acc_rate avg_daily_trips
0 2021-04-12 08:12:10+00:00 1002 0.713465 0.597095 531
1 2021-04-12 10:59:42+00:00 1001 0.072752 0.044344 11
2 2021-04-12 15:01:12+00:00 1004 0.658182 0.079150 220
3 2021-04-12 16:40:26+00:00 1003 0.162092 0.309035 959
```
### 6. Load feature values into your online store
```commandline
CURRENT_TIME=$(date -u +"%Y-%m-%dT%H:%M:%S")
feast materialize-incremental $CURRENT_TIME
```
```commandline
Materializing feature view driver_hourly_stats from 2021-04-14 to 2021-04-15 done!
```
### 7. Read online features at low latency
```python
from pprint import pprint
from feast import FeatureStore
store = FeatureStore(repo_path=".")
feature_vector = store.get_online_features(
features=[
'driver_hourly_stats:conv_rate',
'driver_hourly_stats:acc_rate',
'driver_hourly_stats:avg_daily_trips'
],
entity_rows=[{"driver_id": 1001}]
).to_dict()
pprint(feature_vector)
# Make prediction
# model.predict(feature_vector)
```
```json
{
"driver_id": [1001],
"driver_hourly_stats__conv_rate": [0.49274],
"driver_hourly_stats__acc_rate": [0.92743],
"driver_hourly_stats__avg_daily_trips": [72]
}
```
## 📦 Functionality and Roadmap
{{ roadmap_contents }}
## 🎓 Important Resources
Please refer to the official documentation at [Documentation](https://docs.feast.dev/)
* [Quickstart](https://docs.feast.dev/getting-started/quickstart)
* [Tutorials](https://docs.feast.dev/tutorials/tutorials-overview)
* [Running Feast with Snowflake/GCP/AWS](https://docs.feast.dev/how-to-guides/feast-snowflake-gcp-aws)
* [Change Log](https://github.com/feast-dev/feast/blob/master/CHANGELOG.md)
* [Slack (#Feast)](https://slack.feast.dev/)
## 👋 Contributing
Feast is a community project and is still under active development. Please have a look at our contributing and development guides if you want to contribute to the project:
- [Contribution Process for Feast](https://docs.feast.dev/project/contributing)
- [Development Guide for Feast](https://docs.feast.dev/project/development-guide)
- [Development Guide for the Main Feast Repository](./CONTRIBUTING.md)
## ✨ Contributors
Thanks goes to these incredible people:
<a href="https://github.com/feast-dev/feast/graphs/contributors">
<img src="https://contrib.rocks/image?repo=feast-dev/feast" />
</a>

BIN
infra/terraform/.DS_Store vendored Normal file

Binary file not shown.

View file

@ -0,0 +1,28 @@
# Terraform config for feast on AWS
Uses terraform 0.12
1. Run `aws emr create-default-roles` once.
2. Create a tfvars file, e.g. `my.tfvars` and set name_prefix:
```
name_prefix = "my-feast"
region = "us-east-1"
```
3. Configure tf state backend, e.g.:
```
terraform {
backend "s3" {
bucket = "my-terraform-state-bucket"
key = "clusters/my-feast-test"
region = "us-west-2"
dynamodb_table = "terraform-state-lock"
encrypt = true
}
}
```
3. Use `terraform apply -var-file="my.tfvars"` to deploy.

117
infra/terraform/aws/eks.tf Normal file
View file

@ -0,0 +1,117 @@
terraform {
required_version = ">= 0.12.0"
}
provider "aws" {
version = ">= 2.28.1"
region = var.region
}
provider "random" {
version = "~> 2.1"
}
provider "local" {
version = "~> 1.2"
}
provider "null" {
version = "~> 2.1"
}
provider "template" {
version = "~> 2.1"
}
data "aws_eks_cluster" "cluster" {
name = module.eks.cluster_id
}
data "aws_eks_cluster_auth" "cluster" {
name = module.eks.cluster_id
}
provider "kubernetes" {
host = data.aws_eks_cluster.cluster.endpoint
cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority.0.data)
token = data.aws_eks_cluster_auth.cluster.token
load_config_file = false
version = "~> 1.11"
}
data "aws_availability_zones" "available" {
}
locals {
cluster_name = "${var.name_prefix}-${random_string.suffix.result}"
}
resource "random_string" "suffix" {
length = 8
special = false
}
resource "aws_security_group" "all_worker_mgmt" {
name_prefix = "${var.name_prefix}-worker"
vpc_id = module.vpc.vpc_id
tags = var.tags
}
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
version = "2.47.0"
name = "${var.name_prefix}-vpc"
cidr = "10.0.0.0/16"
azs = data.aws_availability_zones.available.names
private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"]
public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"]
enable_nat_gateway = true
single_nat_gateway = true
enable_dns_hostnames = true
public_subnet_tags = {
"kubernetes.io/cluster/${local.cluster_name}" = "shared"
"kubernetes.io/role/elb" = "1"
}
private_subnet_tags = {
"kubernetes.io/cluster/${local.cluster_name}" = "shared"
"kubernetes.io/role/internal-elb" = "1"
}
tags = var.tags
}
module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "12.2.0"
cluster_name = local.cluster_name
cluster_version = "1.17"
subnets = module.vpc.private_subnets
tags = var.tags
vpc_id = module.vpc.vpc_id
worker_groups = [
{
name = "worker-group-1"
instance_type = "r3.large"
asg_desired_capacity = 2
},
{
name = "worker-group-2"
instance_type = "r3.large"
asg_desired_capacity = 1
},
]
worker_additional_security_group_ids = [aws_security_group.all_worker_mgmt.id]
map_roles = var.map_roles
map_accounts = var.map_accounts
workers_additional_policies = [aws_iam_policy.worker_policy.id]
}

View file

@ -0,0 +1,72 @@
data "aws_iam_instance_profile" "emr_default_role" {
name = "EMR_EC2_DefaultRole"
}
resource "aws_emr_cluster" "persistent_cluster" {
count = var.use_persistent_emr_cluster ? 1 : 0
name = "${var.name_prefix}-persistent-emr"
keep_job_flow_alive_when_no_steps = true
release_label = "emr-6.0.0"
ec2_attributes {
subnet_id = module.vpc.private_subnets[0]
additional_master_security_groups = aws_security_group.all_worker_mgmt.id
additional_slave_security_groups = aws_security_group.all_worker_mgmt.id
instance_profile = data.aws_iam_instance_profile.emr_default_role.arn
}
applications = ["Hadoop", "Hive", "Spark", "Livy"]
service_role = "EMR_DefaultRole"
bootstrap_action {
path = "s3://aws-bigdata-blog/artifacts/resize_storage/resize_storage.sh"
name = "runif"
args = ["--scaling-factor", "1.5"]
}
master_instance_fleet {
instance_type_configs {
instance_type = "m4.xlarge"
ebs_config {
size = "100"
type = "gp2"
volumes_per_instance = 1
}
}
launch_specifications {
spot_specification {
timeout_action = "SWITCH_TO_ON_DEMAND"
timeout_duration_minutes = 10
allocation_strategy = "capacity-optimized"
}
}
target_spot_capacity = 1
}
core_instance_fleet {
instance_type_configs {
bid_price_as_percentage_of_on_demand_price = 100
ebs_config {
size = "100"
type = "gp2"
volumes_per_instance = 1
}
instance_type = "m4.xlarge"
weighted_capacity = 1
}
launch_specifications {
spot_specification {
timeout_action = "SWITCH_TO_ON_DEMAND"
timeout_duration_minutes = 10
allocation_strategy = "capacity-optimized"
}
}
target_spot_capacity = 2
}
step_concurrency_level = 256
log_uri = "s3://${aws_s3_bucket.feast_bucket.id}/logs/${var.name_prefix}-persistent-emr/"
tags = var.tags
}

112
infra/terraform/aws/helm.tf Normal file
View file

@ -0,0 +1,112 @@
provider "helm" {
kubernetes {
host = data.aws_eks_cluster.cluster.endpoint
cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority.0.data)
token = data.aws_eks_cluster_auth.cluster.token
load_config_file = false
}
}
# Construct feast configs that need to point to RDS and Redis.
#
# RDS password is stored in a configmap which is not awesome but that RDS instance is not routable
# from the outside anyways so that'll do.
locals {
feast_core_config = {
redis = {
enabled = false
}
postgresql = {
enabled = false
}
kafka = {
enabled = false
}
"feast-core" = {
"application-generated.yaml" = {
enabled = false
}
"application-override.yaml" = {
spring = {
datasource = {
url = "jdbc:postgresql://${module.rds_cluster.endpoint}:5432/${module.rds_cluster.database_name}"
username = "${module.rds_cluster.master_username}"
password = "${random_password.db_password.result}"
}
}
feast = {
stream = {
type = "kafka"
options = {
bootstrapServers = aws_msk_cluster.msk.bootstrap_brokers
topic = "feast"
}
}
}
server = {
port = "8080"
}
}
}
"feast-serving" = {
"application-override.yaml" = {
enabled = true
feast = {
stores = [
{
name = "online"
type = "REDIS"
config = {
host = module.redis.endpoint
port = 6379
ssl = true
}
subscriptions = [
{
name = "*"
project = "*"
version = "*"
}
]
}
]
job_store = {
redis_host = module.redis.endpoint
redis_port = 6379
}
}
}
}
"feast-jupyter" = {
"envOverrides" = {
feast_redis_host = module.redis.endpoint
feast_redis_port = 6379
feast_redis_ssl = true
feast_emr_cluster_id = (length(aws_emr_cluster.persistent_cluster) > 0) ? aws_emr_cluster.persistent_cluster[0].id : null
feast_emr_region = var.region
feast_spark_staging_location = "s3://${aws_s3_bucket.feast_bucket.id}/artifacts/"
feast_emr_log_location = "s3://${aws_s3_bucket.feast_bucket.id}/emr-logs/"
feast_spark_launcher = "emr"
feast_historical_feature_output_location = "s3://${aws_s3_bucket.feast_bucket.id}/out/"
feast_historical_feature_output_format = "parquet"
demo_kafka_brokers = aws_msk_cluster.msk.bootstrap_brokers
demo_data_location = "s3://${aws_s3_bucket.feast_bucket.id}/test-data/"
}
}
}
}
resource "helm_release" "feast" {
name = "feast"
chart = "../../charts/feast"
wait = false
values = [
yamlencode(local.feast_core_config)
]
}

View file

@ -0,0 +1,27 @@
data "aws_iam_policy_document" "worker_policy_document" {
statement {
sid = "1"
actions = [
"s3:*",
"elasticmapreduce:*",
"glue:*",
"cloudwatch:*",
"ecr:*",
"iam:PassRole",
]
resources = [
"*",
]
}
}
resource "aws_iam_policy" "worker_policy" {
name = "${var.name_prefix}_feast_worker_policy"
path = "/"
description = "Worker IAM policy"
policy = data.aws_iam_policy_document.worker_policy_document.json
}

View file

@ -0,0 +1,52 @@
resource "aws_security_group" "broker" {
name_prefix = "${var.name_prefix}-kafka-broker"
vpc_id = module.vpc.vpc_id
ingress {
description = "Allow connections from the worker group"
security_groups = [aws_security_group.all_worker_mgmt.id]
protocol = "tcp"
from_port = 0
to_port = 65535
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = var.tags
}
resource "aws_msk_cluster" "msk" {
cluster_name = "${var.name_prefix}-kafka"
kafka_version = "2.4.1.1"
number_of_broker_nodes = 2
broker_node_group_info {
instance_type = "kafka.t3.small"
ebs_volume_size = 100
client_subnets = [module.vpc.private_subnets[0], module.vpc.private_subnets[1]]
security_groups = [aws_security_group.broker.id]
}
encryption_info {
encryption_in_transit {
client_broker = "TLS_PLAINTEXT"
}
}
logging_info {
broker_logs {
s3 {
enabled = true
bucket = aws_s3_bucket.feast_bucket.id
prefix = "msk-logs"
}
}
}
tags = var.tags
}

View file

@ -0,0 +1,10 @@
module "redis" {
source = "git::https://github.com/cloudposse/terraform-aws-elasticache-redis.git?ref=tags/0.25.0"
subnets = module.vpc.private_subnets
name = "${var.name_prefix}-online"
vpc_id = module.vpc.vpc_id
allowed_security_groups = [aws_security_group.all_worker_mgmt.id]
availability_zones = module.vpc.azs
tags = var.tags
}

View file

View file

@ -0,0 +1,35 @@
resource "random_password" "db_password" {
length = 16
special = true
override_special = "!#()-[]<>"
}
module "rds_cluster" {
source = "git::https://github.com/cloudposse/terraform-aws-rds-cluster.git?ref=tags/0.36.0"
name = "${var.name_prefix}-db"
engine = "aurora-postgresql"
engine_mode = "serverless"
engine_version = "10.7"
cluster_family = "aurora-postgresql10"
cluster_size = 0
admin_user = var.postgres_db_user
admin_password = random_password.db_password.result
db_name = var.postgres_db_name
db_port = 5432
instance_type = "db.t2.small"
vpc_id = module.vpc.vpc_id
security_groups = [aws_security_group.all_worker_mgmt.id]
subnets = module.vpc.private_subnets
scaling_configuration = [
{
auto_pause = true
max_capacity = 16
min_capacity = 2
seconds_until_auto_pause = 300
timeout_action = "ForceApplyCapacityChange"
}
]
tags = var.tags
}

22
infra/terraform/aws/s3.tf Normal file
View file

@ -0,0 +1,22 @@
resource "random_string" "s3_suffix" {
length = 8
lower = true
upper = false
special = false
}
resource "aws_s3_bucket" "feast_bucket" {
# Since bucket names are globally unique, we add a random suffix here.
bucket = "${var.name_prefix}-feast-${random_string.s3_suffix.result}"
acl = "private"
server_side_encryption_configuration {
rule {
apply_server_side_encryption_by_default {
sse_algorithm = "AES256"
}
}
}
tags = var.tags
}

View file

@ -0,0 +1,46 @@
variable "region" {
}
variable "name_prefix" {
}
variable "postgres_db_name" {
default = "feast"
}
variable "postgres_db_user" {
default = "feast"
}
variable "map_accounts" {
description = "Additional AWS account numbers to add to the aws-auth configmap."
type = list(string)
default = [
]
}
variable "map_roles" {
description = "Additional IAM roles to add to the aws-auth configmap."
type = list(object({
rolearn = string
username = string
groups = list(string)
}))
default = [
]
}
variable "use_persistent_emr_cluster" {
description = "Create a persistent EMR cluster."
default = true
}
variable "tags" {
description = "Tags"
type = map(string)
default = {}
}

View file

@ -0,0 +1,36 @@
# Terraform config for Feast on Azure
This serves as a guide on how to deploy Feast on Azure. At the end of this guide, we will have provisioned:
1. AKS cluster
2. Feast services running on AKS
3. Azure Cache (Redis) as online store
4. Spark operator on AKS
5. Kafka running on HDInsight.
# Steps
1. Create a tfvars file, e.g. `my.tfvars`. A sample configuration is as below:
```
name_prefix = "feast09"
resource_group = "Feast" # pre-exisiting resource group
```
3. Configure tf state backend, e.g.:
```
terraform {
backend "azurerm" {
storage_account_name = "<your storage account name>"
container_name = "<your container name>"
key = "<your blob name>"
}
}
```
3. Use `terraform apply -var-file="my.tfvars"` to deploy.
Note: to get the list of Kafka brokers needed for streaming ingestion, use
`curl -sS -u <Kafka gateway username>:<Kafka gateway password> -G https://<Kafka cluster name>.azurehdinsight.net/api/v1/clusters/<Kafka cluster name>/services/KAFKA/components/KAFKA_BROKER | jq -r '["\(.host_components[].HostRoles.host_name):9092"] | join(",")'`
where the Kafka gateway username is <name_prefix>-kafka-gateway, the Kafka cluster name is <name_prefix>-kafka, and the Kafka gateway password is a kubectl secret under the name feast-kafka-gateway.

View file

@ -0,0 +1,15 @@
resource "azurerm_kubernetes_cluster" "main" {
name = "${var.name_prefix}-aks"
location = data.azurerm_resource_group.main.location
resource_group_name = data.azurerm_resource_group.main.name
dns_prefix = var.name_prefix
default_node_pool {
name = var.name_prefix
vm_size = var.aks_machine_type
node_count = var.aks_node_count
vnet_subnet_id = azurerm_subnet.main.id
}
identity {
type = "SystemAssigned"
}
}

View file

@ -0,0 +1,102 @@
locals {
feast_postgres_secret_name = "${var.name_prefix}-postgres-secret"
feast_helm_values = {
redis = {
enabled = false
}
grafana = {
enabled = false
}
kafka = {
enabled = false
}
postgresql = {
existingSecret = local.feast_postgres_secret_name
}
feast-core = {
postgresql = {
existingSecret = local.feast_postgres_secret_name
}
}
feast-serving = {
enabled = true
"application-override.yaml" = {
feast = {
core-host = "${var.name_prefix}-feast-core"
core-grpc-port = 6565
active_store = "online_store"
stores = [
{
name = "online_store"
type = "REDIS"
config = {
host = azurerm_redis_cache.main.hostname
port = azurerm_redis_cache.main.ssl_port
ssl = true
}
}
]
}
}
}
feast-jupyter = {
enabled = true
envOverrides = {
feast_redis_host = azurerm_redis_cache.main.hostname,
feast_redis_port = azurerm_redis_cache.main.ssl_port,
feast_redis_ssl = true
feast_spark_launcher = "k8s"
feast_spark_staging_location = "wasbs://${azurerm_storage_container.staging.name}@${azurerm_storage_account.main.name}.blob.core.windows.net/artifacts/"
feast_historical_feature_output_location : "wasbs://${azurerm_storage_container.staging.name}@${azurerm_storage_account.main.name}.blob.core.windows.net/out/"
feast_historical_feature_output_format : "parquet"
demo_data_location : "wasbs://${azurerm_storage_container.staging.name}@${azurerm_storage_account.main.name}.blob.core.windows.net/test-data/"
feast_azure_blob_account_name = azurerm_storage_account.main.name
feast_azure_blob_account_access_key = azurerm_storage_account.main.primary_access_key
}
}
}
}
resource "random_password" "feast-postgres-password" {
length = 16
special = false
}
resource "kubernetes_secret" "feast-postgres-secret" {
metadata {
name = local.feast_postgres_secret_name
}
data = {
postgresql-password = random_password.feast-postgres-password.result
}
}
resource "helm_release" "feast" {
depends_on = [kubernetes_secret.feast-postgres-secret]
name = var.name_prefix
namespace = var.aks_namespace
repository = "https://feast-helm-charts.storage.googleapis.com"
chart = "feast"
values = [
yamlencode(local.feast_helm_values)
]
}
resource "helm_release" "sparkop" {
name = "sparkop"
namespace = "default"
repository = "https://googlecloudplatform.github.io/spark-on-k8s-operator"
chart = "spark-operator"
set {
name = "serviceAccounts.spark.name"
value = "spark"
}
}

View file

@ -0,0 +1,75 @@
resource "azurerm_hdinsight_kafka_cluster" "main" {
name = "${var.name_prefix}-kafka"
location = data.azurerm_resource_group.main.location
resource_group_name = data.azurerm_resource_group.main.name
cluster_version = "4.0"
tier = "Standard"
component_version {
kafka = "2.1"
}
gateway {
enabled = true
username = "${var.name_prefix}-kafka-gateway"
password = random_password.feast-kafka-gateway-password.result
}
storage_account {
is_default = true
storage_account_key = azurerm_storage_account.main.primary_access_key
storage_container_id = azurerm_storage_container.kafka.id
}
roles {
head_node {
vm_size = var.kafka_head_vm_size
username = "${var.name_prefix}-kafka-user"
password = random_password.feast-kafka-role-password.result
subnet_id = azurerm_subnet.kafka.id
virtual_network_id = azurerm_virtual_network.main.id
}
worker_node {
vm_size = var.kafka_worker_vm_size
username = "${var.name_prefix}-kafka-user"
password = random_password.feast-kafka-role-password.result
number_of_disks_per_node = var.kafka_worker_disks_per_node
target_instance_count = var.kafka_worker_target_instance_count
subnet_id = azurerm_subnet.kafka.id
virtual_network_id = azurerm_virtual_network.main.id
}
zookeeper_node {
vm_size = var.kafka_zookeeper_vm_size
username = "${var.name_prefix}-kafka-user"
password = random_password.feast-kafka-role-password.result
subnet_id = azurerm_subnet.kafka.id
virtual_network_id = azurerm_virtual_network.main.id
}
}
}
resource "random_password" "feast-kafka-role-password" {
length = 16
special = false
min_upper = 1
min_lower = 1
min_numeric = 1
}
resource "random_password" "feast-kafka-gateway-password" {
length = 16
special = true
min_upper = 1
min_lower = 1
min_special = 1
min_numeric = 1
}
resource "kubernetes_secret" "feast-kafka-gateway-secret" {
metadata {
name = "feast-kafka-gateway"
}
data = {
kafka-gateway-password = random_password.feast-kafka-gateway-password.result
}
}

View file

@ -0,0 +1,28 @@
provider "azurerm" {
version = "=2.40.0"
features {}
}
provider "helm" {
version = "~> 1.3.2"
kubernetes {
host = azurerm_kubernetes_cluster.main.kube_config.0.host
username = azurerm_kubernetes_cluster.main.kube_config.0.username
password = azurerm_kubernetes_cluster.main.kube_config.0.password
client_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_certificate)
client_key = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_key)
cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.cluster_ca_certificate)
load_config_file = false
}
}
provider "kubernetes" {
version = "~> 1.13.3"
host = azurerm_kubernetes_cluster.main.kube_config.0.host
username = azurerm_kubernetes_cluster.main.kube_config.0.username
password = azurerm_kubernetes_cluster.main.kube_config.0.password
client_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_certificate)
client_key = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_key)
cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.cluster_ca_certificate)
load_config_file = false
}

View file

@ -0,0 +1,12 @@
resource "azurerm_redis_cache" "main" {
name = "${var.name_prefix}-redis"
location = data.azurerm_resource_group.main.location
resource_group_name = data.azurerm_resource_group.main.name
capacity = var.redis_capacity
family = "P"
sku_name = "Premium"
redis_configuration {
enable_authentication = false
}
subnet_id = azurerm_subnet.redis.id
}

View file

@ -0,0 +1,27 @@
resource "kubernetes_role" "sparkop-user" {
metadata {
name = "use-spark-operator"
namespace = var.aks_namespace
}
rule {
api_groups = ["sparkoperator.k8s.io"]
resources = ["sparkapplications"]
verbs = ["create", "delete", "deletecollection", "get", "list", "update", "watch", "patch"]
}
}
resource "kubernetes_role_binding" "sparkop-user" {
metadata {
name = "use-spark-operator"
namespace = var.aks_namespace
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "Role"
name = kubernetes_role.sparkop-user.metadata[0].name
}
subject {
kind = "ServiceAccount"
name = "default"
}
}

View file

@ -0,0 +1,21 @@
resource "azurerm_storage_account" "main" {
name = "${var.name_prefix}storage"
resource_group_name = data.azurerm_resource_group.main.name
location = data.azurerm_resource_group.main.location
account_kind = "StorageV2"
account_tier = "Standard"
account_replication_type = var.storage_account_replication_type
allow_blob_public_access = true
}
resource "azurerm_storage_container" "staging" {
name = "staging"
storage_account_name = azurerm_storage_account.main.name
container_access_type = "blob"
}
resource "azurerm_storage_container" "kafka" {
name = "kafkastorage"
storage_account_name = azurerm_storage_account.main.name
container_access_type = "blob"
}

View file

@ -0,0 +1,57 @@
variable "resource_group" {
type = string
}
variable "name_prefix" {
type = string
}
variable "aks_machine_type" {
type = string
default = "Standard_DS2_v2"
}
variable "aks_node_count" {
type = number
default = 2
}
variable "redis_capacity" {
type = number
default = 2
}
variable "storage_account_replication_type" {
type = string
default = "LRS"
}
variable "aks_namespace" {
type = string
default = "default"
}
variable "kafka_head_vm_size" {
type = string
default = "Standard_DS3_v2"
}
variable "kafka_worker_vm_size" {
type = string
default = "A5"
}
variable "kafka_zookeeper_vm_size" {
type = string
default = "Standard_DS3_v2"
}
variable "kafka_worker_disks_per_node" {
type = number
default = 3
}
variable "kafka_worker_target_instance_count" {
type = number
default = 3
}

View file

@ -0,0 +1,31 @@
data "azurerm_resource_group" "main" {
name = var.resource_group
}
resource "azurerm_virtual_network" "main" {
name = "${var.name_prefix}-vnet"
location = data.azurerm_resource_group.main.location
resource_group_name = data.azurerm_resource_group.main.name
address_space = ["10.1.0.0/16"]
}
resource "azurerm_subnet" "main" {
name = "${var.name_prefix}-aks-subnet"
resource_group_name = data.azurerm_resource_group.main.name
virtual_network_name = azurerm_virtual_network.main.name
address_prefixes = ["10.1.0.0/24"]
}
resource "azurerm_subnet" "redis" {
name = "${var.name_prefix}-redis-subnet"
resource_group_name = data.azurerm_resource_group.main.name
virtual_network_name = azurerm_virtual_network.main.name
address_prefixes = ["10.1.128.0/24"]
}
resource "azurerm_subnet" "kafka" {
name = "${var.name_prefix}-kafka-subnet"
resource_group_name = data.azurerm_resource_group.main.name
virtual_network_name = azurerm_virtual_network.main.name
address_prefixes = ["10.1.64.0/24"]
}

View file

@ -0,0 +1,35 @@
# Terraform config for feast on GCP
This serves as a guide on how to deploy Feast on GCP. At the end of this guide, we will have provisioned:
1. GKE cluster
2. Feast services running on GKE
3. Google Memorystore (Redis) as online store
4. Dataproc cluster
4. Kafka running on GKE, exposed to the dataproc cluster via internal load balancer.
# Steps
1. Create a tfvars file, e.g. `my.tfvars`. A sample configuration is as below:
```
gcp_project_name = "kf-feast"
name_prefix = "feast-0-8"
region = "asia-east1"
gke_machine_type = "n1-standard-2"
network = "default"
subnetwork = "default"
dataproc_staging_bucket = "kf-feast-dataproc-staging-test"
```
3. Configure tf state backend, e.g.:
```
terraform {
backend "gcs" {
bucket = "<your bucket name>"
prefix = "terraform/feast"
}
}
```
3. Use `terraform apply -var-file="my.tfvars"` to deploy.

View file

@ -0,0 +1,68 @@
resource "google_storage_bucket" "dataproc_staging_bucket" {
name = var.dataproc_staging_bucket
project = var.gcp_project_name
location = var.region
force_destroy = true
}
resource "google_dataproc_autoscaling_policy" "feast_dataproc_cluster_asp" {
policy_id = var.name_prefix
location = var.region
project = var.gcp_project_name
worker_config {
min_instances = var.min_dataproc_worker_count
max_instances = var.max_dataproc_worker_count
}
basic_algorithm {
yarn_config {
graceful_decommission_timeout = "3600s"
scale_down_factor = 0.5
scale_up_factor = 0.5
}
}
}
resource "google_dataproc_cluster" "feast_dataproc_cluster" {
project = var.gcp_project_name
name = var.name_prefix
region = var.region
cluster_config {
staging_bucket = google_storage_bucket.dataproc_staging_bucket.name
master_config {
num_instances = 1
machine_type = var.dataproc_master_instance_type
disk_config {
boot_disk_type = var.dataproc_master_disk_type
boot_disk_size_gb = var.dataproc_master_disk_size
}
}
worker_config {
num_instances = var.min_dataproc_worker_count
machine_type = var.dataproc_worker_instance_type
disk_config {
boot_disk_type = var.dataproc_worker_disk_type
boot_disk_size_gb = var.dataproc_worker_disk_size
}
}
gce_cluster_config {
subnetwork = var.subnetwork
service_account = google_service_account.feast_sa.email
internal_ip_only = true
}
software_config {
image_version = var.dataproc_image_version
}
autoscaling_config {
policy_uri = google_dataproc_autoscaling_policy.feast_dataproc_cluster_asp.name
}
}
}

View file

@ -0,0 +1,121 @@
locals {
feast_postgres_secret_name = "${var.name_prefix}-postgres-secret"
feast_helm_values = {
redis = {
enabled = false
}
kafka = {
enabled = true
externalAccess = {
enabled = true
service = {
types = "LoadBalancer"
port = 9094
loadBalancerIPs = [google_compute_address.kafka_broker.address]
loadBalancerSourceRanges = ["10.0.0.0/8"]
annotations = {
"cloud.google.com/load-balancer-type" = "Internal"
}
}
}
}
grafana = {
enabled = false
}
postgresql = {
existingSecret = local.feast_postgres_secret_name
}
feast-core = {
postgresql = {
existingSecret = local.feast_postgres_secret_name
}
}
feast-serving = {
enabled = true
"application-override.yaml" = {
feast = {
core-host = "${var.name_prefix}-feast-core"
core-grpc-port = 6565
active_store = "online_store"
stores = [
{
name = "online_store"
type = "REDIS"
config = {
host = google_redis_instance.online_store.host
port = 6379
subscriptions = [
{
name = "*"
project = "*"
}
]
}
}
]
}
}
}
feast-jupyter = {
enabled = true
envOverrides = {
feast_redis_host = google_redis_instance.online_store.host,
feast_redis_port = 6379,
feast_spark_launcher = "dataproc"
feast_dataproc_cluster_name = google_dataproc_cluster.feast_dataproc_cluster.name
feast_dataproc_project = var.gcp_project_name
feast_dataproc_region = var.region
feast_spark_staging_location = "gs://${var.dataproc_staging_bucket}/artifacts/"
feast_historical_feature_output_location : "gs://${var.dataproc_staging_bucket}/out/"
feast_historical_feature_output_format : "parquet"
demo_kafka_brokers : "${google_compute_address.kafka_broker.address}:9094"
demo_data_location : "gs://${var.dataproc_staging_bucket}/test-data/"
}
gcpServiceAccount = {
enabled = true
name = var.feast_sa_secret_name
key = "credentials.json"
}
}
}
}
resource "random_password" "feast-postgres-password" {
length = 16
special = false
}
resource "kubernetes_secret" "feast-postgres-secret" {
metadata {
name = local.feast_postgres_secret_name
}
data = {
postgresql-password = random_password.feast-postgres-password.result
}
}
resource "google_compute_address" "kafka_broker" {
project = var.gcp_project_name
region = var.region
subnetwork = var.subnetwork
name = "${var.name_prefix}-kafka"
address_type = "INTERNAL"
}
resource "helm_release" "feast" {
depends_on = [kubernetes_secret.feast-postgres-secret, kubernetes_secret.feast_sa_secret]
name = var.name_prefix
chart = "https://feast-helm-charts.storage.googleapis.com/feast-0.100.4.tgz"
values = [
yamlencode(local.feast_helm_values)
]
}

View file

@ -0,0 +1,19 @@
resource "google_container_cluster" "feast_gke_cluster" {
name = "${var.name_prefix}-cluster"
location = var.region
network = var.network
subnetwork = var.subnetwork
initial_node_count = var.gke_node_count
node_config {
machine_type = var.gke_machine_type
}
ip_allocation_policy {
}
}
data "google_container_cluster" "feast_gke_cluster" {
location = var.region
name = google_container_cluster.feast_gke_cluster.name
}

View file

@ -0,0 +1,37 @@
resource "google_service_account" "feast_sa" {
account_id = var.name_prefix
display_name = var.name_prefix
project = var.gcp_project_name
}
resource "google_service_account_key" "feast_sa" {
service_account_id = google_service_account.feast_sa.name
}
resource "google_project_iam_member" "feast_dataproc_worker" {
project = var.gcp_project_name
role = "roles/dataproc.worker"
member = "serviceAccount:${google_service_account.feast_sa.email}"
}
resource "google_project_iam_member" "feast_dataproc_editor" {
project = var.gcp_project_name
role = "roles/dataproc.editor"
member = "serviceAccount:${google_service_account.feast_sa.email}"
}
resource "google_project_iam_member" "feast_batch_ingestion_storage" {
project = var.gcp_project_name
role = "roles/storage.admin"
member = "serviceAccount:${google_service_account.feast_sa.email}"
}
resource "kubernetes_secret" "feast_sa_secret" {
metadata {
name = var.feast_sa_secret_name
}
data = {
"credentials.json" = base64decode(google_service_account_key.feast_sa.private_key)
}
}

View file

@ -0,0 +1,18 @@
resource "google_redis_instance" "online_store" {
project = var.gcp_project_name
region = var.region
name = "${var.name_prefix}-online-store"
tier = var.redis_tier
memory_size_gb = var.redis_memory_size_gb
authorized_network = data.google_compute_network.redis-network.id
redis_version = "REDIS_5_0"
display_name = "Feast Online Store"
}
data "google_compute_network" "redis-network" {
project = var.gcp_project_name
name = var.network
}

View file

@ -0,0 +1,27 @@
provider "google" {
version = "~> 3.46"
project = var.gcp_project_name
}
data "google_client_config" "gcp_client" {
provider = google
}
provider "kubernetes" {
version = "~> 1.13.3"
host = google_container_cluster.feast_gke_cluster.endpoint
token = data.google_client_config.gcp_client.access_token
cluster_ca_certificate = base64decode(google_container_cluster.feast_gke_cluster.master_auth.0.cluster_ca_certificate)
load_config_file = false
}
provider "helm" {
version = "~> 1.3.2"
kubernetes {
host = google_container_cluster.feast_gke_cluster.endpoint
token = data.google_client_config.gcp_client.access_token
cluster_ca_certificate = base64decode(google_container_cluster.feast_gke_cluster.master_auth.0.cluster_ca_certificate)
load_config_file = false
}
}

View file

@ -0,0 +1,103 @@
variable "gcp_project_name" {
description = "GCP project name"
}
variable "name_prefix" {
description = "Prefix to be used when naming the different components of Feast"
}
variable "region" {
description = "Region for GKE and Dataproc cluster"
}
variable "gke_machine_type" {
description = "GKE node pool machine type"
default = "n1-standard-4"
}
variable "gke_node_count" {
description = "Number of nodes in the GKE default node pool"
default = 1
}
variable "gke_disk_size_gb" {
description = "Disk size for nodes in the GKE default node pool"
default = 100
}
variable "gke_disk_type" {
description = "Disk type for nodes in the GKE default node pool"
default = "pd-standard"
}
variable "network" {
description = "Network for GKE and Dataproc cluster"
}
variable "subnetwork" {
description = "Subnetwork for GKE and Dataproc cluster"
}
variable "dataproc_staging_bucket" {
description = "GCS bucket for staging temporary files required for dataproc jobs"
}
variable "min_dataproc_worker_count" {
description = "Minimum dataproc worker count"
default = 2
}
variable "max_dataproc_worker_count" {
description = "Maximum dataproc worker count"
default = 4
}
variable "dataproc_master_instance_type" {
description = "Machine type for dataproc cluster master"
default = "n1-standard-2"
}
variable "dataproc_master_disk_type" {
description = "Disk type for dataproc cluster master"
default = "pd-standard"
}
variable "dataproc_master_disk_size" {
description = "Disk size for dataproc cluster master"
default = 100
}
variable "dataproc_worker_instance_type" {
description = "Machine type for dataproc cluster worker"
default = "n1-standard-2"
}
variable "dataproc_worker_disk_type" {
description = "Disk type for dataproc cluster worker"
default = "pd-standard"
}
variable "dataproc_worker_disk_size" {
description = "Disk size for dataproc cluster worker"
default = 100
}
variable "dataproc_image_version" {
description = "Dataproc image version"
default = "1.5-debian10"
}
variable "redis_tier" {
description = "GCP Redis instance tier"
default = "BASIC"
}
variable "redis_memory_size_gb" {
description = "Redis memory size in Gb"
default = 2
}
variable "feast_sa_secret_name" {
description = "Kubernetes secret name for Feast GCP service account"
default = "feast-gcp-service-account"
}