diff --git a/.gitignore b/.gitignore index c3a2a6c3..aeda6a36 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ +.firewall-releases .rime-releases .tmp-charts -**/charts +**/charts/*.tgz **/Chart.lock diff --git a/Makefile b/Makefile index e929a5d7..5c468c67 100644 --- a/Makefile +++ b/Makefile @@ -1,50 +1,80 @@ +# vim: filetype=make include ../../make_utils/env-var.mk +# General configurations of make options. +MAKEFLAGS += --no-print-directory + # Creates a Helm chart releases: # See: https://helm.sh/docs/topics/chart_repository/ # # To create a new release of the RIME chart: -# 1. Ensure the helm dependency versions are correct in rime/charts -# and/or update with `helm dependency update` -# 2. Run `make VERSION=##.##.## APP_VERSION=v## create_rime_charts_release` +# 1. Run `make VERSION=##.##.## APP_VERSION=v## create_rime_charts_release` +# +# To create a new release of the Firewall chart: +# 1. Run `make create_firewall_charts_release` SHELL = /bin/bash VERSION_FILE := ../../version.txt VERSION ?= $(shell cat ${VERSION_FILE}) APP_VERSION := v$(VERSION) +FW_VERSION_FILE := ../../fw_version.txt +FW_VERSION ?= $(shell cat ${FW_VERSION_FILE}) +FW_APP_VERSION := v$(FW_VERSION) REPO_URL=https://robustintelligence.github.io/helm - -OPERATOR_ROLE_FILE := rime-agent/templates/operator/role.yaml - -.PHONY: clean .tmp-charts/rime .tmp-charts/rime-agent .tmp-charts/rime-extras .tmp-charts/rime-kube-system gen_operator_manifests - -clean: +REPO_FW_URL=https://robustintelligence.github.io/helm/fw + +RIME_AGENT_OPERATOR_ROLE_FILE := $(shell pwd)/rime-agent/templates/operator/role.yaml +RI_FIREWALL_OPERATOR_ROLE_FILE := $(shell pwd)/ri-firewall/templates/operator/role.yaml +# We use absolute paths for the CRD_OUT_DIR paths because `controller-gen` is +# run in the OPERATOR_DIR working directory, which is in `go/`. +# It is easier to reason about an absolute path than a relative path from a +# different project directory. +# The OPERATOR_DIR is a relative path from this Makefile because we use it to +# change directory to where the operator code is defined. +RIME_AGENT_CRD_OUT_DIR := $(shell pwd)/rime-agent/crds +RIME_AGENT_OPERATOR_DIR := ../../go/dataplane/operator +RI_FIREWALL_CRD_OUT_DIR := $(shell pwd)/ri-firewall/crds +RI_FIREWALL_OPERATOR_DIR := ../../go/generativefirewall/operator + +MAKEFLAGS += --no-print-directory + +define release_targets + $(patsubst %,.tmp-charts/%,$(shell find $(1) -type f -name '*.yaml' -o -name '*.tpl' -o -name '*.md' -o -name '*.txt')) +endef + +.SECONDARY: +.SECONDEXPANSION: +.PHONY: clean-firewall clean-rime + +clean-rime: rm -rf .tmp-charts/ rm -rf .rime-releases/ - rm -rf rime-agent/crds - rm -rf $(OPERATOR_ROLE_FILE) -# Rule to copy a file to .tmp-charts/ -.tmp-charts/%: % - mkdir -p $(@D) +clean-firewall: + rm -rf .tmp-charts/ + rm -rf .firewall-releases/ + +.tmp-charts/%.yaml: %.yaml + @mkdir -p $(@D) cp $< $@ -# Rules to create .tmp-charts by copying only the chart files. -.tmp-charts/rime: .tmp-charts/rime/Chart.yaml .tmp-charts/rime/Chart.lock .tmp-charts/rime/values.yaml $(patsubst %, .tmp-charts/%, $(wildcard rime/templates/*.*)) $(patsubst %, .tmp-charts/%, $(wildcard rime/charts/*.tgz)) $(patsubst %, .tmp-charts/%, $(wildcard rime/custom-key-auth/*.*)) - ( \ - cp -rf "rime/templates/." ".tmp-charts/rime/templates/." \ - ) +.tmp-charts/%.tpl: %.tpl + @mkdir -p $(@D) + cp $< $@ -.tmp-charts/rime-agent: .tmp-charts/rime-agent/Chart.yaml .tmp-charts/rime-agent/values.yaml $(patsubst %, .tmp-charts/%, $(wildcard rime-agent/templates/*.*)) $(patsubst %, .tmp-charts/%, $(wildcard rime-agent/templates/operator/*.*)) $(patsubst %, .tmp-charts/%, $(wildcard rime-agent/crds/*.*)) - ( \ - cp -rf "rime-agent/crds" ".tmp-charts/rime-agent/." && \ - cp "$(OPERATOR_ROLE_FILE)" ".tmp-charts/rime-agent/templates/operator/." \ - ) +.tmp-charts/%.txt: %.txt + @mkdir -p $(@D) + cp $< $@ -.tmp-charts/rime-extras: .tmp-charts/rime-extras/Chart.yaml .tmp-charts/rime-extras/Chart.lock .tmp-charts/rime-extras/values.yaml $(patsubst %, .tmp-charts/%, $(wildcard rime-extras/charts/*.tgz)) +.tmp-charts/%.md: %.md + @mkdir -p $(@D) + cp $< $@ -.tmp-charts/rime-kube-system: .tmp-charts/rime-kube-system/Chart.yaml .tmp-charts/rime-kube-system/Chart.lock .tmp-charts/rime-kube-system/values.yaml $(patsubst %, .tmp-charts/%, $(wildcard rime-kube-system/charts/*.tgz)) +# Rules to create .tmp-charts by copying only the chart files. +.tmp-charts/%: $$(call release_targets,$$(@F)) + @cd $@ && \ + helm dependency update # Rules to create a release tar-ball for the rime chart in .tmp-charts # for the given VERSION. @@ -57,7 +87,7 @@ clean: popd \ ) -.rime-releases/rime-agent-$(VERSION).tgz: gen_operator_manifests .tmp-charts/rime-agent +.rime-releases/rime-agent-$(VERSION).tgz: .tmp-charts/rime-agent ( \ $(call check_defined, APP_VERSION VERSION, helm chart version) \ mkdir -p .rime-releases && \ @@ -94,27 +124,122 @@ clean: ) # Creates a new rime Helm chart release. -create_rime_charts_release: clean .rime-releases/index.yaml +create_rime_charts_release: clean-rime .rime-releases/index.yaml -### Operator manfiest files for rime-agent helm chart ### -gen_operator_manifests: rime-agent/crds/rimejob-crd.yaml $(OPERATOR_ROLE_FILE) - -rime-agent/crds/rimejob-crd.yaml: rime-agent/crds ../../go/dataplane/operator/api/v1/rimejob.go ../../go/dataplane/operator/api/v1/groupversion_info.go - # TODO: make gen_go_protos a prereq instead - cd ../.. && make gen_go_protos - cd ../../go/dataplane/operator && \ - controller-gen crd paths="./..." output:crd:stdout > ../../../deployments/helm/rime-agent/crds/rimejob-crd.yaml +### Operator manifest files for RI Helm charts ### +.PHONY: gen_operator_manifests +gen_operator_manifests: gen_rime_agent_crds gen_ri_firewall_crds $(RIME_AGENT_OPERATOR_ROLE_FILE) $(RI_FIREWALL_OPERATOR_ROLE_FILE) # CRD is generated into a subdirectory called 'crds' so that helm will skip if already installed # as CRDs are cluster scope. # https://helm.sh/docs/chart_best_practices/custom_resource_definitions/ -rime-agent/crds: - mkdir -p $@ - -$(OPERATOR_ROLE_FILE): ../../go/dataplane/operator/controllers/rimejob_controller.go - # TODO: make gen_go_protos a prereq instead +$(RIME_AGENT_CRD_OUT_DIR): + @mkdir -p $@ + +$(RI_FIREWALL_CRD_OUT_DIR): + @mkdir -p $@ + +define generate_crds + @cd ../.. && make gen_go_protos + @cd $(1) && \ + controller-gen crd paths="./..." output:crd:stdout output:crd:dir=$(2) +endef + +.PHONY: gen_rime_agent_crds +gen_rime_agent_crds: $(wildcard ../../go/dataplane/operator/api/v1/*.go) $(RIME_AGENT_CRD_OUT_DIR) + $(call generate_crds,$(RIME_AGENT_OPERATOR_DIR),$(RIME_AGENT_CRD_OUT_DIR)) + +.PHONY: gen_ri_firewall_crds +gen_ri_firewall_crds: $(wildcard ../../go/generativefirewall/operator/api/v1/*.go) $(RI_FIREWALL_CRD_OUT_DIR) + $(call generate_crds,$(RI_FIREWALL_OPERATOR_DIR),$(RI_FIREWALL_CRD_OUT_DIR)) + + +define crd_diff_check_impl + $(eval $@_TMP := $(shell mktemp -d /tmp/crdXXXXXXXXXXXXXXX)) + $(call generate_crds,$(1),$($@_TMP)) + @diff $(2) $($@_TMP) || (echo 'ERROR: CRDs in "$(2)" need to be updated' && rm -rf $($@_TMP) && exit 1) + @rm -rf $($@_TMP) +endef + +.PHONY: crd_diff_check +crd_diff_check: + $(call crd_diff_check_impl,$(RIME_AGENT_OPERATOR_DIR),$(RIME_AGENT_CRD_OUT_DIR)) + $(call crd_diff_check_impl,$(RI_FIREWALL_OPERATOR_DIR),$(RI_FIREWALL_CRD_OUT_DIR)) + +# Arguments: +# 1. Path to the output role YAML file. +# 2. Operator Go source directory. +# 3. The name of the Helm role (to be substituted) +# 4. The name of the service account creation flag (to be substituted) +define generate_operator_role cd ../.. && make gen_go_protos - cd ../../go/dataplane/operator && \ - controller-gen rbac:roleName="PLACEHOLDER_ROLE_NAME" paths="./..." output:rbac:stdout | sed 's/PLACEHOLDER_ROLE_NAME/{{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.operator.name }}-role/1' > ../../../deployments/helm/$(OPERATOR_ROLE_FILE) - echo '{{- if .Values.rimeAgent.operator.serviceAccount.create -}}' | cat - $(OPERATOR_ROLE_FILE) > temp.yaml && mv temp.yaml $(OPERATOR_ROLE_FILE) && \ - echo '{{- end }}' >> $(OPERATOR_ROLE_FILE) + cd $(2) && \ + controller-gen rbac:roleName="PLACEHOLDER_ROLE_NAME" paths="./..." output:rbac:stdout | sed 's/PLACEHOLDER_ROLE_NAME/$(3)/1' > $(1) + echo '{{- if $(4) -}}' | cat - $(1) > temp.yaml && mv temp.yaml $(1) && \ + echo '{{- end }}' >> $(1) +endef + +$(RIME_AGENT_OPERATOR_ROLE_FILE): ../../go/dataplane/operator/controllers/rimejob_controller.go + $(call generate_operator_role,$(RIME_AGENT_OPERATOR_ROLE_FILE),$(RIME_AGENT_OPERATOR_DIR),{{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.operator.name }}-role,.Values.rimeAgent.operator.serviceAccount.create) + +$(RI_FIREWALL_OPERATOR_ROLE_FILE): ../../go/generativefirewall/operator/controllers/firewall_instance_controller.go + $(call generate_operator_role,$(RI_FIREWALL_OPERATOR_ROLE_FILE),$(RI_FIREWALL_OPERATOR_DIR),{{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.operator.name }}-role,.Values.riFirewall.operator.serviceAccount.create) + +# Arguments: +# 1. Path to the output role YAML file. +# 2. Operator Go source directory. +# 3. The name of the Helm role (to be substituted) +# 4. The name of the service account creation flag (to be substituted) +define operator_role_diff_check_impl + $(eval $@_TMP := $(shell mktemp /tmp/operator-roleXXXXXXXXXXXXXXX)) + $(call generate_operator_role,$($@_TMP),$(2),$(3),$(4)) + @diff $(1) $($@_TMP) || (echo 'ERROR: Operator role "$(1)" needs update' && rm -rf $($@_TMP) && exit 1) + @rm -rf $($@_TMP) +endef + +operator_role_diff_check: + $(call operator_role_diff_check_impl,$(RIME_AGENT_OPERATOR_ROLE_FILE),$(RIME_AGENT_OPERATOR_DIR),{{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.operator.name }}-role,.Values.rimeAgent.operator.serviceAccount.create) + $(call operator_role_diff_check_impl,$(RI_FIREWALL_OPERATOR_ROLE_FILE),$(RI_FIREWALL_OPERATOR_DIR),{{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.operator.name }}-role,.Values.riFirewall.operator.serviceAccount.create) + +# Rules to create a firewall release tar-balls for the +# for the given VERSION. +.firewall-releases/rime-extras-$(FW_VERSION).tgz: .tmp-charts/rime-extras + ( \ + $(call check_defined, FW_APP_VERSION FW_VERSION, helm chart version) \ + mkdir -p .firewall-releases && \ + pushd .tmp-charts/ && \ + helm package --app-version=$(FW_APP_VERSION) --version=$(FW_VERSION) --destination=../.firewall-releases rime-extras && \ + popd \ + ) + +.firewall-releases/rime-kube-system-$(FW_VERSION).tgz: .tmp-charts/rime-kube-system + ( \ + $(call check_defined, FW_APP_VERSION FW_VERSION, helm chart version) \ + mkdir -p .firewall-releases && \ + pushd .tmp-charts/ && \ + helm package --app-version=$(FW_APP_VERSION) --version=$(FW_VERSION) --destination=../.firewall-releases rime-kube-system && \ + popd \ + ) + +# Rules to create a release tar-ball for the firewall chart in .tmp-charts +# for the given VERSION. +.firewall-releases/ri-firewall-$(FW_VERSION).tgz: .tmp-charts/ri-firewall + ( \ + $(call check_defined, APP_VERSION VERSION, helm chart version) \ + mkdir -p .firewall-releases && \ + pushd .tmp-charts/ && \ + helm package --app-version=$(FW_APP_VERSION) --version=$(FW_VERSION) --destination=../.firewall-releases ri-firewall && \ + popd \ + ) + +# Rule to update the release index with metadata about release VERSION. +.firewall-releases/index.yaml: .firewall-releases/ri-firewall-$(FW_VERSION).tgz .firewall-releases/rime-extras-$(FW_VERSION).tgz .firewall-releases/rime-kube-system-$(FW_VERSION).tgz + ( \ + mkdir -p .firewall-releases && \ + pushd .firewall-releases/ && \ + helm repo index --url=$(REPO_FW_URL) . && \ + popd \ + ) + +# Creates a new RI firewall Helm chart release. +create_firewall_charts_release: clean-firewall .firewall-releases/index.yaml diff --git a/README.md b/README.md index dd142807..3d351505 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # Robust Intelligence Helm Charts - + + Robust Intelligence Logo @@ -10,91 +11,68 @@ ``` helm repo add robustintelligence https://robustintelligence.github.io/helm --force-update ``` -This repository contains 4 Helm charts: -- `rime` - - Core application services (i.e., the *control plane*) -- `rime-agent` - - Model Testing agent (i.e., the *data plane*) +This repository contains 3 Helm charts: +- `ri-firewall` + - AI Firewall installation +- `rime-kube-system` + - K8s Cluster and ML infrastructure services, such as [External DNS](https://github.com/kubernetes-sigs/external-dns/tree/master/charts/external-dns) and kServe - `rime-extras` (recommended) - - 3rd-party add-ons like Velero backups or DataDog monitoring -- `rime-kube-system` (recommended) - - K8s Cluster infrastructure services, such as [External DNS](https://github.com/kubernetes-sigs/external-dns/tree/master/charts/external-dns) + - 3rd-party add-ons like DataDog monitoring Detailed READMEs for each chart are in the subfolders. -# Installation - -**For a standard installation, you need only install the `rime-agent` chart in a K8s namespace, which is auto-configured during the guided installation process.** - -Please refer to Installation in the product documentation for details: -- [Installation](https://docs.rime.dev/en/2.0.0/installation/index.html) - -For **Self-Hosted** deployments, see below. - ---- - # Self-Hosted Installation -For a standalone Robust Intelligence cluster, both the `rime` and `rime-agent` charts are necessary, and it is recommended to install both `rime-extras` and `rime-kube-system` (unless the contained functionalities already exist in your K8s cluster). +For a standalone Robust Intelligence Firewall, both the `ri-firewall` and `rime-kube-system` charts are necessary. ### General Prerequisites -1. A Kubernetes cluster (version 1.23 or greater) +1. A Kubernetes cluster (version 1.24 or greater) - (AWS EKS) enable [IAM roles for service accounts (IRSA)](https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/setting-up-enable-IAM.html) - - (GCP GKE) enable [Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity) - - (Azure AKS) enable [Workload Identity](https://learn.microsoft.com/en-us/azure/aks/workload-identity-overview) (recommended) 2. A dedicated K8s namespace for Robust Intelligence -3. [Helm](https://helm.sh/) (version 3) -4. [kubectl](https://kubernetes.io/docs/reference/kubectl/kubectl/) -5. A read token for the Robust Intelligence artifact repository as a [K8s secret](https://kubernetes.io/docs/concepts/configuration/secret/#docker-config-secrets) (will be provided by your Solutions Architect) - -### Recommended K8s Cluster Configuration -The core charts (`rime` and `rime-agent`) can be deployed to a single namespace. -Additionally, we recommend the following: -1. A dedicated node group (with autoscaling) for the `rime-agent` workloads - - Label: `dedicated=model-testing`, Taint: `dedicated=model-testing:NoSchedule` -2. An expandable and encrypted [StorageClass](https://kubernetes.io/docs/concepts/storage/storage-classes/) for services in the Robust Intelligence namespace - -## `rime-kube-system` (Recommended) +3. Access to the kube-system namespace in this K8s cluster +4. [Helm](https://helm.sh/) (version 3) +5. [kubectl](https://kubernetes.io/docs/reference/kubectl/kubectl/) +6. A read token for the Robust Intelligence artifact repository as a [K8s secret](https://kubernetes.io/docs/concepts/configuration/secret/#docker-config-secrets) (will be provided by your Solutions Architect) +7. Ensure that the following URLs have been whitelisted for your K8s cluster. + - Robust Intelligence Private Dockerhub Repositories: https://hub.docker.com/repository/docker/robustintelligencehq/ + - Robust Intelligence Private Github Repository for YARA signatures: https://github.com/RobustIntelligence/rime-yara + - Robust Intelligence Private Huggingface Model Hub: https://huggingface.co/robustintelligence/ + +## `rime-kube-system` NOTE: Resources for the `rime-kube-system` pertain to infrastructure services like the [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/cluster-autoscaler-1.21.0/cluster-autoscaler/cloudprovider) or [External DNS](https://github.com/kubernetes-sigs/external-dns/tree/v0.12.0/charts/external-dns); therefore, they are deployed in the `kube-system` namespace.

Prerequisites

1. Permissions to create resources in the `kube-system` namespace -2. [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/cluster-autoscaler-1.21.0/cluster-autoscaler/cloudprovider) prerequisites (recommended) -3. [External DNS](https://github.com/kubernetes-sigs/external-dns/tree/v0.12.0/charts/external-dns) prerequisites (recommended) -4. [AWS Load Balancer Controller](https://github.com/kubernetes-sigs/aws-load-balancer-controller/tree/v2.4.2) prerequisites (recommended, AWS-only) -5. [Metrics Server](https://github.com/kubernetes-sigs/metrics-server/tree/v0.6.1) prerequisites (recommended, necessary for autoscaling) - -#### GCP (GKE) -NOTE: The [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/cluster-autoscaler-1.21.0/cluster-autoscaler/cloudprovider) and [Metrics Server](https://github.com/kubernetes-sigs/metrics-server/tree/v0.6.1) come configured by default with GKE clusters, so no additional configuration is necessary. - -#### Azure (AKS) -NOTE: The [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/cluster-autoscaler-1.21.0/cluster-autoscaler/cloudprovider) and [Metrics Server](https://github.com/kubernetes-sigs/metrics-server/tree/v0.6.1) come configured by default with AKS clusters, so no additional configuration is necessary. +2. [kserve](https://github.com/kserve/kserve) prerequisites +3. [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/cluster-autoscaler-1.21.0/cluster-autoscaler/cloudprovider) prerequisites (recommended) +4. [External DNS](https://github.com/kubernetes-sigs/external-dns/tree/v0.12.0/charts/external-dns) prerequisites (recommended) +5. [AWS Load Balancer Controller](https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.6/) prerequisites (recommended, AWS-only) +6. [Metrics Server](https://github.com/kubernetes-sigs/metrics-server/tree/v0.6.1) prerequisites (recommended, necessary for autoscaling)
### Configuring Parameters -For a detailed overview of this chart's values, see the `rime-kube-system` README [here](). Your Solutions Architect will assist with configuring parameters during deployment. - -Note that if deploying [cert-manager](https://github.com/cert-manager/cert-manager/tree/v1.10.0) for internal TLS (recommended), CRDs will be created. These CRDS must be created *before* deploying any other Robust Intelligence charts. +For a detailed overview of this chart's values, see the `rime-kube-system` README [here](./rime-kube-system). Your Solutions Architect will assist with configuring parameters during deployment. ### Installing the Chart ``` # When ready to deploy, remove --dry-run helm upgrade -i rime-kube-system robustintelligence/rime-kube-system \ - --version $RI_VERSION \ + --version $RI_FIREWALL_VERSION \ --values $RIME_KUBE_SYSTEM_VALUES_FILE \ --namespace kube-system \ --debug \ --dry-run ``` + #### Uninstalling the Chart ``` helm uninstall rime-kube-system -n kube-system ``` -## `rime` +## `ri-firewall`

Prerequisites

@@ -106,40 +84,26 @@ helm uninstall rime-kube-system -n kube-system #### AWS (EKS) 1. A domain for your service managed by [Route53](https://aws.amazon.com/route53/) - A TLS certificate in ACM -2. **Managed Images** prerequisites (add-on feature) - - An Elastic Container Registry (ECR) - - IAM permissions for Image Builder role - - IAM permissions for Repo Manager role - -#### GCP (GKE) -1. A domain for your service managed by [Cloud DNS](https://cloud.google.com/dns/) - - A TLS certificate as a [K8s secret](https://kubernetes.io/docs/concepts/configuration/secret/#tls-secrets) - -#### Azure (AKS) -1. A domain for your service - - A TLS certificate as a [K8s secret](https://kubernetes.io/docs/concepts/configuration/secret/#tls-secrets)
### Configuring Parameters -For a detailed overview of this chart's values, see the `rime` README [here](). Your Solutions Architect will assist with configuring parameters during deployment. +For a detailed overview of this chart's values, see the `ri-firewall` README [here](./ri-firewall). Your Solutions Architect will assist with configuring parameters during deployment. Some of the main sections to configure include: -1. `rime.secrets`: application secrets for product license, admin one-time credentials, etc. - - (use `rime.secrets.existingSecretName` to specify these values through a K8s secret) -2. `rime.datasetManagerServer`: settings for the **Managed Blob Storage** feature (AWS-only) - - If enabling this feature, set `rime.datasetManagerServer.enabled: true` and specify the Managed Blob Storage IAM role -3. `rime.imageRegistryServer`: settings for the **Managed Images** feature (AWS-only) - - If enabling this feature, set `rime.imageRegistryServer.enabled: true` and specify the Image Builder and Repo Manager IAM roles -4. `ingress-nginx`: settings for the [Ingress-NGINX Controller](https://artifacthub.io/packages/helm/ingress-nginx/ingress-nginx) +1. `riFirewall.secrets`: application secrets for auth0-based authentication and RI-provided keys. + - Your Solutions Architect will guide you through the creation process of the K8s secret for `rime.secrets.existingIntegrationSecretsName` + - If enabling auth0, parameters and secrets can be provided via the `riFirewall.secrets.auth0` fields or via a pre-created K8s secret with name specified in `rime.secrets.existingAuthSecretsName` +2. `riFirewall.yaraServer.gitRepoToken`: read-only access to an RI-managed github repository of YARA signatures (recommended) + - Your Solutions Architect will provide a token. +3. `ingress-nginx`: settings for the [Ingress-NGINX Controller](https://artifacthub.io/packages/helm/ingress-nginx/ingress-nginx) - Specify `ingress-nginx.controller.service.annotations` for your load balancing configuration -5. `tls`: whether to enable internal TLS for specific services ### Installing the Chart ``` # When ready to deploy, remove --dry-run -helm upgrade -i rime robustintelligence/rime \ - --version $RI_VERSION \ +helm upgrade -i ri-firewall robustintelligence/ri-firewall \ + --version $RI_FIREWALL_VERSION \ --values $RIME_VALUES_FILE \ --namespace $RI_NAMESPACE \ --debug \ @@ -148,51 +112,7 @@ helm upgrade -i rime robustintelligence/rime \ #### Uninstalling the Chart ``` -helm uninstall rime -n $RI_NAMESPACE -``` - -## `rime-agent` -
-

Prerequisites

- -#### General -1. A blob storage entity -2. An authorization policy allowing read access to ^ - -#### AWS (EKS) -1. A blob storage entity ([S3 bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingBucket.html)) -2. An authorization policy allowing read access to ^ ([IAM role](https://docs.aws.amazon.com/eks/latest/userguide/associate-service-account-role.html)) - -#### GCP (GKE) -1. A blob storage entity ([Cloud Storage bucket](https://cloud.google.com/storage/docs/buckets)) -2. An authorization policy allowing read access to ^ ([Service Account](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity)) - -#### Azure (AKS) -1. A [Storage Account](https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview) -2. A blob storage entity ([Blob Storage Container](https://learn.microsoft.com/en-us/azure/storage/blobs/blob-containers-portal)) -3. An authorization policy allowing read access to ^ ([Managed Identity](https://learn.microsoft.com/en-us/azure/aks/learn/tutorial-kubernetes-workload-identity#create-a-managed-identity-and-grant-permissions-to-access-the-secret)) - -
- -### Configuring Parameters -For a detailed overview of this chart's values, see the `rime-agent` README [here](). Your Solutions Architect will assist with configuring parameters during deployment. - -Generally, the only setup needed for the `rime-agent` is to identify the authorization for the `rime-agent-model-tester` ServiceAccount under `rimeAgent.modelTestJob.serviceAccount`. - -### Installing the Chart -``` -# When ready to deploy, remove --dry-run -helm upgrade -i rime-agent robustintelligence/rime-agent \ - --version $RI_VERSION \ - --values $RIME_AGENT_VALUES_FILE \ - --namespace $RI_NAMESPACE \ - --debug \ - --dry-run -``` - -#### Uninstalling the Chart -``` -helm uninstall rime-agent -n $RI_NAMESPACE +helm uninstall ri-firewall -n $RI_NAMESPACE ``` ## `rime-extras` (Recommended) @@ -203,13 +123,10 @@ It's recommended to deploy the `rime-extras` chart in a separate namespace (e.g. 1. [DataDog](https://github.com/DataDog/helm-charts/tree/datadog-2.20.3/charts/datadog) prerequisites - A DataDog API key (will be provided by your Solutions Architect) -2. [Velero](https://github.com/vmware-tanzu/helm-charts/tree/velero-2.23.6/charts/velero) prerequisites - - Follow the [setup instructions](https://velero.io/docs/v1.6/supported-providers/) for your provider - ### Configuring Parameters -For a detailed overview of this chart's values, see the `rime-extras` README [here](). Your Solutions Architect will assist with configuring parameters during deployment. +For a detailed overview of this chart's values, see the `rime-extras` README [here](./rime-extras). Your Solutions Architect will assist with configuring parameters during deployment. For DataDog, you may wish to configure the log masking logic specified in `datadog.datadog.env`. @@ -235,7 +152,7 @@ helm uninstall rime-extras -n $RIME_EXTRAS_NAMESPACE ## License -Copyright © 2023 Robust Intelligence +Copyright © 2024 Robust Intelligence Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/helm_dev_lib.sh b/helm_dev_lib.sh index 4379dc07..9347d551 100755 --- a/helm_dev_lib.sh +++ b/helm_dev_lib.sh @@ -62,9 +62,6 @@ sub_controlplane_uninstall() { # AGENT agent_install_or_upgrade() { echo "---------Installing Agent-------------" - echo "Generating operator manifests" - make gen_operator_manifests - local cmd="$1" local namespace="$2" go_to_namespace $namespace diff --git a/ri-firewall/.helmignore b/ri-firewall/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/ri-firewall/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/ri-firewall/Chart.yaml b/ri-firewall/Chart.yaml new file mode 100644 index 00000000..6cae1755 --- /dev/null +++ b/ri-firewall/Chart.yaml @@ -0,0 +1,33 @@ +apiVersion: v2 +name: ri-firewall +description: A Helm chart for the Robust Intelligence Firewall. + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +## Note: we manually set the version when pushing to separate Robust Intelligence helm repo. +version: 0.1.4 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "0.18.0" + +# Compatible versions of kubernetes. +kubeVersion: ">=1.20.0-0" + +dependencies: + - name: "ingress-nginx" + version: "4.2.0" + repository: "https://kubernetes.github.io/ingress-nginx" diff --git a/ri-firewall/crds/fw.rbst.io_firewallinstances.yaml b/ri-firewall/crds/fw.rbst.io_firewallinstances.yaml new file mode 100644 index 00000000..9a56595c --- /dev/null +++ b/ri-firewall/crds/fw.rbst.io_firewallinstances.yaml @@ -0,0 +1,94 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.13.0 + name: firewallinstances.fw.rbst.io +spec: + group: fw.rbst.io + names: + kind: FirewallInstance + listKind: FirewallInstanceList + plural: firewallinstances + singular: firewallinstance + scope: Namespaced + versions: + - name: v1 + schema: + openAPIV3Schema: + description: FirewallInstance is the Schema for the firewallinstance API. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: FirewallInstanceSpec defines the desired state of FirewallInstance. + K8s is a declarative API; the operator will reconcile the state on the + cluster step-by-step until the actual state reaches the desired state + specified here. The desired state of the FirewallInstance is a firewall + deployment with the specified rule configuration. + properties: + description: + description: Description is a human-readable description of the FirewallInstance + to help users know what the purpose of it is. + type: string + ruleConfig: + description: RuleConfig is serialized JSON of the FirewallRuleConfig + Protobuf. Users control this field to determine the behavior of + the firewall rules. + type: string + type: object + status: + description: FirewallInstanceStatus defines the observed state of FirewallInstance. + properties: + conditions: + description: Represents the observations of a FirewallInstance current + state. + items: + description: FirewallInstanceCondition describes a single condition + for the FirewallInstance status. This follows the common conventions + for representing status in K8s. https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties. + properties: + lastTransitionTime: + description: Last time the condition transitioned from one status + to another. + format: date-time + type: string + lastUpdateTime: + description: The last time this condition was updated. + format: date-time + type: string + message: + description: A human-readable message indicating details about + the transition. + type: string + reason: + description: The reason for the condition's last transition. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: Type of the FirewallInstance condition. + type: string + required: + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/ri-firewall/templates/_helpers.tpl b/ri-firewall/templates/_helpers.tpl new file mode 100644 index 00000000..2c44fb1a --- /dev/null +++ b/ri-firewall/templates/_helpers.tpl @@ -0,0 +1,148 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "ri-firewall.name" -}} +{{- default .Chart.Name .Values.riFirewall.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to +this (by the DNS naming spec). If release name contains chart name it will +be used as a full name. +*/}} +{{- define "ri-firewall.fullname" -}} +{{- if .Values.riFirewall.fullNameOverride }} +{{- .Values.riFirewall.fullNameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.riFirewall.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "ri-firewall.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "ri-firewall.selectorLabels" -}} +app.kubernetes.io/name: {{ include "ri-firewall.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{/* +Common labels +*/}} +{{- define "ri-firewall.labels" -}} +helm.sh/chart: {{ include "ri-firewall.chart" . }} +{{ include "ri-firewall.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/part-of: {{ template "ri-firewall.name" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- if .Values.riFirewall.commonLabels}} +{{ toYaml .Values.riFirewall.commonLabels }} +{{- end }} +{{- end -}} + +{{/* +Common annotations added to all resources. +*/}} +{{- define "ri-firewall.annotations" -}} +helm.sh/chart: {{ include "ri-firewall.chart" . }} +{{ include "ri-firewall.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/part-of: {{ template "ri-firewall.name" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/owned-by: "ri" +{{- if .Values.riFirewall.commonAnnotations}} +{{ toYaml .Values.riFirewall.commonAnnotations }} +{{- end }} +{{- end -}} + +{{/* +Return the name of the secret containing generated secrets used by the Firewall. +*/}} +{{- define "ri-firewall.generatedSecretsName" -}} +{{- printf "%s-generated-secrets" (include "ri-firewall.fullname" .) }} +{{- end }} + +{{/* +Return the name of the existing secrets used by the Firewall. +*/}} +{{- define "ri-firewall.existingSecretsName" -}} +{{- printf "%s-existing-secrets" (include "ri-firewall.fullname" .) }} +{{- end }} + +{{- define "ri-firewall.serverArgs" -}} +common: + connections: + addresses: + metrics: + enabled: {{ .Values.riFirewall.monitoring.enabled }} + port: {{ .Values.riFirewall.monitoring.port }} +{{- end }} + +{{/* +Return the appropriate apiVersion for Horizontal Pod Autoscaler. +*/}} +{{- define "ri-firewall.hpa.apiVersion" -}} +{{- if $.Capabilities.APIVersions.Has "autoscaling/v2/HorizontalPodAutoscaler" }} +{{- print "autoscaling/v2" }} +{{- else }} +{{- print "autoscaling/v2beta2" }} +{{- end }} +{{- end }} + +{{/* +Return the service account name used by the firewall instance manager server. +*/}} +{{- define "ri-firewall.instanceManagerServer.serviceAccountName" -}} +{{- if .Values.riFirewall.instanceManagerServer.serviceAccount.create -}} + {{ default (printf "%s-%s" (include "ri-firewall.fullname" .) .Values.riFirewall.instanceManagerServer.name) .Values.riFirewall.instanceManagerServer.serviceAccount.name | trunc 63 | trimSuffix "-" }} +{{- else -}} + {{ default "default" .Values.riFirewall.instanceManagerServer.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* +Return the service account name used by the firewall instance operator. +*/}} +{{- define "ri-firewall.operator.serviceAccountName" -}} +{{- if .Values.riFirewall.operator.serviceAccount.create -}} + {{ default (printf "%s-%s" (include "ri-firewall.fullname" .) .Values.riFirewall.operator.name) .Values.riFirewall.operator.serviceAccount.name | trunc 63 | trimSuffix "-" }} +{{- else -}} + {{ default "default" .Values.riFirewall.operator.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* +Return the name of the config map storing system configuration for the firewall, +such as the azure openAI base URL. +For now, we are doing a one deployment <> one firewall scheme. +This is the deployment-wide configuration for the firewall. +The firewall server consumes this configmap. +*/}} +{{- define "ri-firewall.firewallSystemConfigMapName" -}} + {{ include "ri-firewall.fullname" . }}-system-firewall-config +{{- end -}} + +{{/* +Name for the ConfigMap containing the model name to address +mapping. +*/}} +{{- define "ri-firewall.modelConnectionConfigMapName" -}} +{{- printf "%s-model-connection-map-conf" (include "ri-firewall.fullname" .) }} +{{- end -}} diff --git a/ri-firewall/templates/auth-ingress.yaml b/ri-firewall/templates/auth-ingress.yaml new file mode 100644 index 00000000..149e8ee1 --- /dev/null +++ b/ri-firewall/templates/auth-ingress.yaml @@ -0,0 +1,32 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "ri-firewall.fullname" . }}-auth-ingress + labels: + {{- with .Values.riFirewall.ingress.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "ri-firewall.annotations" . | nindent 4 }} + {{- with .Values.riFirewall.ingress.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + ingressClassName: {{ .Values.riFirewall.ingress.ingressClassName }} + rules: + - http: + paths: + - path: /v1/auth + pathType: Prefix + backend: + service: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.authServer.name }} + port: + number: {{ .Values.riFirewall.authServer.port }} + - path: /v1-beta/version + pathType: Prefix + backend: + service: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.instanceManagerServer.name }} + port: + number: {{ .Values.riFirewall.instanceManagerServer.restPort }} diff --git a/ri-firewall/templates/auth-server/configmap.yaml b/ri-firewall/templates/auth-server/configmap.yaml new file mode 100644 index 00000000..2d0dc159 --- /dev/null +++ b/ri-firewall/templates/auth-server/configmap.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.authServer.name }}-conf +data: + server.config: | +{{ include "ri-firewall.serverArgs" . | indent 4 }} + authServer: + auth0Enabled: {{ .Values.riFirewall.secrets.auth0Enabled}} diff --git a/ri-firewall/templates/auth-server/deployment.yaml b/ri-firewall/templates/auth-server/deployment.yaml new file mode 100644 index 00000000..f29f1157 --- /dev/null +++ b/ri-firewall/templates/auth-server/deployment.yaml @@ -0,0 +1,103 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.authServer.name }} + labels: + app: {{ .Values.riFirewall.authServer.name }} + {{- with .Values.riFirewall.authServer.deployment.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "ri-firewall.annotations" . | nindent 4 }} + {{- with .Values.riFirewall.authServer.deployment.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + app: {{ .Values.riFirewall.authServer.name }} + {{- if not .Values.riFirewall.authServer.hpa.enabled }} + replicas: {{ .Values.riFirewall.authServer.deployment.replicaCount }} + {{- end }} + template: + metadata: + labels: + app: {{ .Values.riFirewall.authServer.name }} + {{- with .Values.riFirewall.authServer.deployment.labels }} + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + {{- include "ri-firewall.annotations" . | nindent 8 }} + {{- with .Values.riFirewall.authServer.deployment.annotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.riFirewall.images.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ .Values.riFirewall.authServer.name }} + image: "{{ .Values.riFirewall.images.backendImage.registry}}/{{ .Values.riFirewall.images.backendImage.name }}" + imagePullPolicy: {{ .Values.riFirewall.images.backendImage.pullPolicy }} + ports: + - name: auth + containerPort: {{ .Values.riFirewall.authServer.port }} + protocol: TCP + env: + - name: FIREWALL_API_KEY + valueFrom: + secretKeyRef: + name: {{ include "ri-firewall.generatedSecretsName" . }} + key: apiKey + - name: FIREWALL_AUTH0_DOMAIN + valueFrom: + secretKeyRef: + name: {{ .Values.riFirewall.secrets.existingAuthSecretsName | default (include "ri-firewall.existingSecretsName" .) }} + key: auth0Domain + - name: FIREWALL_AUTH0_CLIENT_ID + valueFrom: + secretKeyRef: + name: {{ .Values.riFirewall.secrets.existingAuthSecretsName | default (include "ri-firewall.existingSecretsName" .) }} + key: auth0ClientID + - name: FIREWALL_AUTH0_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: {{ .Values.riFirewall.secrets.existingAuthSecretsName | default (include "ri-firewall.existingSecretsName" .) }} + key: auth0ClientSecret + - name: FIREWALL_AUTH0_CALLBACK_URL + valueFrom: + secretKeyRef: + name: {{ .Values.riFirewall.secrets.existingAuthSecretsName | default (include "ri-firewall.existingSecretsName" .) }} + key: auth0CallbackURL + - name: FIREWALL_TOKEN_SIGNING_KEY + valueFrom: + secretKeyRef: + name: {{ include "ri-firewall.generatedSecretsName" . }} + key: tokenSigningKey + - name: FIREWALL_USER_TOKEN_LIFETIME_HOURS + value: {{ .Values.riFirewall.authServer.tokenLifetimeHours.userTokenLifetime | quote }} + - name: FIREWALL_SYSTEM_TOKEN_LIFETIME_HOURS + value: {{ .Values.riFirewall.authServer.tokenLifetimeHours.systemTokenLifetime | quote }} + - name: RI_SERVICE_NAME + value: {{ .Values.riFirewall.authServer.name }} + resources: + {{- toYaml .Values.riFirewall.authServer.deployment.resources | nindent 12 }} + command: + - "/ri/firewall" + args: + - "start-auth-server" + - "--server-config-path=/config/server.config" + - "--port={{ .Values.riFirewall.authServer.port }}" + volumeMounts: + - name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.authServer.name }}-config + mountPath: "/config" + readOnly: true + volumes: + # Volumes are defined at the Pod level, then mounted into containers within that Pod + - name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.authServer.name }}-config + configMap: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.authServer.name }}-conf + items: + - key: "server.config" + path: "server.config" diff --git a/ri-firewall/templates/auth-server/hpa.yaml b/ri-firewall/templates/auth-server/hpa.yaml new file mode 100644 index 00000000..967b1423 --- /dev/null +++ b/ri-firewall/templates/auth-server/hpa.yaml @@ -0,0 +1,21 @@ +{{- if .Values.riFirewall.authServer.hpa.enabled }} +apiVersion: {{ include "ri-firewall.hpa.apiVersion" . }} +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.authServer.name }} + labels: + app: {{ .Values.riFirewall.authServer.name }} + {{- include "ri-firewall.labels" . | nindent 4 }} + {{- with .Values.riFirewall.authServer.hpa.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.authServer.name }} + minReplicas: {{ .Values.riFirewall.authServer.hpa.minReplicas }} + maxReplicas: {{ .Values.riFirewall.authServer.hpa.maxReplicas }} + metrics: +{{ toYaml .Values.riFirewall.authServer.hpa.metrics | indent 4 }} +{{- end }} diff --git a/ri-firewall/templates/auth-server/service.yaml b/ri-firewall/templates/auth-server/service.yaml new file mode 100644 index 00000000..3435be1e --- /dev/null +++ b/ri-firewall/templates/auth-server/service.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.authServer.name }} + labels: + {{- with .Values.riFirewall.authServer.service.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "ri-firewall.annotations" . | nindent 4 }} + {{- with .Values.riFirewall.authServer.service.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.riFirewall.authServer.service.type }} + ports: + - port: {{ .Values.riFirewall.authServer.port }} + targetPort: {{ .Values.riFirewall.authServer.port }} + protocol: TCP + name: auth + selector: + app: {{ .Values.riFirewall.authServer.name }} diff --git a/ri-firewall/templates/firewall-instance-resource-quota.yaml b/ri-firewall/templates/firewall-instance-resource-quota.yaml new file mode 100644 index 00000000..f291b756 --- /dev/null +++ b/ri-firewall/templates/firewall-instance-resource-quota.yaml @@ -0,0 +1,9 @@ +{{- if .Values.riFirewall.firewallInstanceResourceQuota.enabled }} +apiVersion: v1 +kind: ResourceQuota +metadata: + name: {{ include "ri-firewall.fullname" . }}-firewall-instance-quota +spec: + hard: + count/firewallinstances.fw.rbst.io: {{ .Values.riFirewall.firewallInstanceResourceQuota.maxObjectCount }} +{{- end }} diff --git a/ri-firewall/templates/generated-secrets.yaml b/ri-firewall/templates/generated-secrets.yaml new file mode 100644 index 00000000..3642f607 --- /dev/null +++ b/ri-firewall/templates/generated-secrets.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "ri-firewall.generatedSecretsName" . }} +type: Opaque +data: + # for auth-related secrets, always read from pre-existing secret to avoid interrupting in-flight workloads + {{- $secret := (lookup "v1" "Secret" .Release.Namespace (include "ri-firewall.generatedSecretsName" .) ) | default dict }} + {{- $secretData := (get $secret "data") | default dict }} + apiKey: {{ (get $secretData "apiKey") | default (randAlphaNum 32 | b64enc) }} + tokenSigningKey: {{ (get $secretData "tokenSigningKey") | default (randAlphaNum 32 | b64enc) }} diff --git a/ri-firewall/templates/ingress.yaml b/ri-firewall/templates/ingress.yaml new file mode 100644 index 00000000..beb98c3b --- /dev/null +++ b/ri-firewall/templates/ingress.yaml @@ -0,0 +1,27 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "ri-firewall.fullname" . }}-ingress + labels: + {{- with .Values.riFirewall.ingress.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "ri-firewall.annotations" . | nindent 4 }} + nginx.ingress.kubernetes.io/auth-url: http://{{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.authServer.name }}.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.riFirewall.authServer.port}}/v1/auth/validate + nginx.ingress.kubernetes.io/auth-method: POST + {{- with .Values.riFirewall.ingress.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + ingressClassName: {{ .Values.riFirewall.ingress.ingressClassName }} + rules: + - http: + paths: + - path: "/v1-beta/firewall-instance" + pathType: Prefix + backend: + service: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.instanceManagerServer.name }} + port: + number: {{ .Values.riFirewall.instanceManagerServer.restPort }} diff --git a/ri-firewall/templates/instance-manager-server/configmap.yaml b/ri-firewall/templates/instance-manager-server/configmap.yaml new file mode 100644 index 00000000..c30def42 --- /dev/null +++ b/ri-firewall/templates/instance-manager-server/configmap.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.instanceManagerServer.name }}-conf +data: + server.config: | +{{ include "ri-firewall.serverArgs" . | indent 4 }} diff --git a/ri-firewall/templates/instance-manager-server/deployment.yaml b/ri-firewall/templates/instance-manager-server/deployment.yaml new file mode 100644 index 00000000..c46f6ea9 --- /dev/null +++ b/ri-firewall/templates/instance-manager-server/deployment.yaml @@ -0,0 +1,75 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.instanceManagerServer.name }} + labels: + app: {{ .Values.riFirewall.instanceManagerServer.name }} + {{- include "ri-firewall.labels" . | nindent 4 }} + {{- with .Values.riFirewall.instanceManagerServer.deployment.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "ri-firewall.annotations" . | nindent 4 }} + {{- with .Values.riFirewall.instanceManagerServer.deployment.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + app: {{ .Values.riFirewall.instanceManagerServer.name }} + {{- if not .Values.riFirewall.instanceManagerServer.hpa.enabled }} + replicas: {{ .Values.riFirewall.instanceManagerServer.deployment.replicaCount }} + {{- end }} + template: + metadata: + labels: + app: {{ .Values.riFirewall.instanceManagerServer.name }} + {{- with .Values.riFirewall.instanceManagerServer.deployment.labels }} + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + {{- include "ri-firewall.annotations" . | nindent 8 }} + {{- with .Values.riFirewall.instanceManagerServer.deployment.annotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.riFirewall.images.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "ri-firewall.instanceManagerServer.serviceAccountName" . }} + containers: + - name: {{ .Values.riFirewall.instanceManagerServer.name }} + image: "{{ .Values.riFirewall.images.backendImage.registry}}/{{ .Values.riFirewall.images.backendImage.name }}" + imagePullPolicy: {{ .Values.riFirewall.images.backendImage.pullPolicy }} + env: + - name: RI_SERVICE_NAME + value: {{ .Values.riFirewall.instanceManagerServer.name }} + ports: + - name: mgr + containerPort: {{ .Values.riFirewall.instanceManagerServer.port }} + protocol: TCP + - name: mgr-rest + containerPort: {{ .Values.riFirewall.instanceManagerServer.restPort }} + protocol: TCP + resources: + {{- toYaml .Values.riFirewall.instanceManagerServer.deployment.resources | nindent 12 }} + command: + - "/ri/firewall" + args: + - "start-firewall-instance-manager" + - "--server-config-path=/config/server.config" + - "--port={{ .Values.riFirewall.instanceManagerServer.port }}" + - "--rest-port={{ .Values.riFirewall.instanceManagerServer.restPort }}" + volumeMounts: + - name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.instanceManagerServer.name }}-config + mountPath: "/config" + readOnly: true + volumes: + # Volumes are defined at the Pod level, then mounted into containers within that Pod + - name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.instanceManagerServer.name }}-config + configMap: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.instanceManagerServer.name }}-conf + items: + - key: "server.config" + path: "server.config" diff --git a/ri-firewall/templates/instance-manager-server/hpa.yaml b/ri-firewall/templates/instance-manager-server/hpa.yaml new file mode 100644 index 00000000..5342d855 --- /dev/null +++ b/ri-firewall/templates/instance-manager-server/hpa.yaml @@ -0,0 +1,21 @@ +{{- if .Values.riFirewall.instanceManagerServer.hpa.enabled }} +apiVersion: {{ include "ri-firewall.hpa.apiVersion" . }} +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.instanceManagerServer.name }} + labels: + app: {{ .Values.riFirewall.instanceManagerServer.name }} + {{- include "ri-firewall.labels" . | nindent 4 }} + {{- with .Values.riFirewall.instanceManagerServer.hpa.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.instanceManagerServer.name }} + minReplicas: {{ .Values.riFirewall.instanceManagerServer.hpa.minReplicas }} + maxReplicas: {{ .Values.riFirewall.instanceManagerServer.hpa.maxReplicas }} + metrics: +{{ toYaml .Values.riFirewall.instanceManagerServer.hpa.metrics | indent 4 }} +{{- end }} diff --git a/ri-firewall/templates/instance-manager-server/role-binding.yaml b/ri-firewall/templates/instance-manager-server/role-binding.yaml new file mode 100644 index 00000000..8a77335d --- /dev/null +++ b/ri-firewall/templates/instance-manager-server/role-binding.yaml @@ -0,0 +1,15 @@ +{{- if .Values.riFirewall.instanceManagerServer.serviceAccount.create }} +# Binds Instance Manager server service account to a role providing write access to +# k8s resources. +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "ri-firewall.instanceManagerServer.serviceAccountName" . }}-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "ri-firewall.instanceManagerServer.serviceAccountName" . }} +subjects: + - kind: ServiceAccount + name: {{ include "ri-firewall.instanceManagerServer.serviceAccountName" . }} +{{- end }} diff --git a/ri-firewall/templates/instance-manager-server/role.yaml b/ri-firewall/templates/instance-manager-server/role.yaml new file mode 100644 index 00000000..a412e8ff --- /dev/null +++ b/ri-firewall/templates/instance-manager-server/role.yaml @@ -0,0 +1,12 @@ +{{- if .Values.riFirewall.instanceManagerServer.serviceAccount.create }} +# A Role granting access to write operations on k8s resources for the +# configuration server. +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "ri-firewall.instanceManagerServer.serviceAccountName" . }} +rules: + - apiGroups: ["fw.rbst.io"] + resources: [firewallinstances] + verbs: ["*"] +{{- end }} diff --git a/ri-firewall/templates/instance-manager-server/service-account.yaml b/ri-firewall/templates/instance-manager-server/service-account.yaml new file mode 100644 index 00000000..c1fe1b87 --- /dev/null +++ b/ri-firewall/templates/instance-manager-server/service-account.yaml @@ -0,0 +1,7 @@ +{{- if .Values.riFirewall.instanceManagerServer.serviceAccount.create}} +# Service account for the firewall instance manager server. +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "ri-firewall.instanceManagerServer.serviceAccountName" . }} +{{- end }} diff --git a/ri-firewall/templates/instance-manager-server/service.yaml b/ri-firewall/templates/instance-manager-server/service.yaml new file mode 100644 index 00000000..9c5259f6 --- /dev/null +++ b/ri-firewall/templates/instance-manager-server/service.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.instanceManagerServer.name }} + labels: + {{- include "ri-firewall.labels" . | nindent 4 }} + {{- with .Values.riFirewall.instanceManagerServer.service.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "ri-firewall.annotations" . | nindent 4 }} + {{- with .Values.riFirewall.instanceManagerServer.service.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.riFirewall.instanceManagerServer.service.type }} + ports: + - port: {{ .Values.riFirewall.instanceManagerServer.port }} + targetPort: {{ .Values.riFirewall.instanceManagerServer.port }} + protocol: TCP + name: mgr + - port: {{ .Values.riFirewall.instanceManagerServer.restPort }} + targetPort: {{ .Values.riFirewall.instanceManagerServer.restPort }} + protocol: TCP + name: mgr-rest + selector: + app: {{ .Values.riFirewall.instanceManagerServer.name }} diff --git a/ri-firewall/templates/model-servers/_gen/configmap.yaml b/ri-firewall/templates/model-servers/_gen/configmap.yaml new file mode 100644 index 00000000..6fa4df1d --- /dev/null +++ b/ri-firewall/templates/model-servers/_gen/configmap.yaml @@ -0,0 +1,39 @@ +# Autogenerated by a script. DO NOT EDIT. +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "ri-firewall.modelConnectionConfigMapName" . }} +data: + model-connection-map.yaml: | + models: + prompt-injection: + {{- if .Values.riFirewall.modelServers.promptInjection.remoteModelServer.enabled }} + address: {{ .Values.riFirewall.modelServers.promptInjection.remoteModelServer.address }} + {{- else }} + address: "http://prompt-injection-predictor:80" + {{- end }} + task: "text-classification" + + factual-inconsistency: + {{- if .Values.riFirewall.modelServers.factualInconsistency.remoteModelServer.enabled }} + address: {{ .Values.riFirewall.modelServers.factualInconsistency.remoteModelServer.address }} + {{- else }} + address: "http://factual-inconsistency-predictor:80" + {{- end }} + task: "text-classification" + + language-detection: + {{- if .Values.riFirewall.modelServers.languageDetection.remoteModelServer.enabled }} + address: {{ .Values.riFirewall.modelServers.languageDetection.remoteModelServer.address }} + {{- else }} + address: "http://language-detection-predictor:80" + {{- end }} + task: "language-detection" + + text-embedding: + {{- if .Values.riFirewall.modelServers.textEmbedding.remoteModelServer.enabled }} + address: {{ .Values.riFirewall.modelServers.textEmbedding.remoteModelServer.address }} + {{- else }} + address: "http://text-embedding-predictor:80" + {{- end }} + task: "text-embedding" diff --git a/ri-firewall/templates/model-servers/_gen/factual-inconsistency/inference-service.yaml b/ri-firewall/templates/model-servers/_gen/factual-inconsistency/inference-service.yaml new file mode 100644 index 00000000..c45e1609 --- /dev/null +++ b/ri-firewall/templates/model-servers/_gen/factual-inconsistency/inference-service.yaml @@ -0,0 +1,60 @@ +{{- if not .Values.riFirewall.modelServers.factualInconsistency.remoteModelServer.enabled }} +# Autogenerated by a script. DO NOT EDIT. +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: factual-inconsistency + annotations: + {{- include "ri-firewall.annotations" . | nindent 4 }} +spec: + predictor: + {{- with .Values.riFirewall.images.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + minReplicas: {{ .Values.riFirewall.modelServers.factualInconsistency.minReplicas }} + maxReplicas: {{ .Values.riFirewall.modelServers.factualInconsistency.maxReplicas }} + containers: + - name: factual-inconsistency + image: "{{ .Values.riFirewall.images.modelServerImage.registry}}/{{ .Values.riFirewall.images.modelServerImage.name }}" + imagePullPolicy: {{ .Values.riFirewall.images.modelServerImage.pullPolicy }} + env: + - name: HUGGINGFACE_API_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.riFirewall.secrets.existingIntegrationSecretsName }} + key: huggingfaceAPIKey + ports: + - name: rest + protocol: TCP + containerPort: 8080 + command: + - "run_server" + args: + - "--model-config-path={{ .Values.riFirewall.modelServers.modelSettingsPath }}/model-settings.json" + - "--cache-dir=/model_cache" + resources: + requests: + memory: {{ .Values.riFirewall.modelServers.factualInconsistency.resources.requests.memory }} + cpu: {{ .Values.riFirewall.modelServers.factualInconsistency.resources.requests.cpu }} + limits: + memory: {{ .Values.riFirewall.modelServers.factualInconsistency.resources.limits.memory }} + cpu: {{ .Values.riFirewall.modelServers.factualInconsistency.resources.limits.cpu }} + volumeMounts: + - name: {{ include "ri-firewall.fullname" . }}-factual-inconsistency-config + mountPath: {{ .Values.riFirewall.modelServers.modelSettingsPath }} + readOnly: true + - name: {{ include "ri-firewall.fullname" . }}-factual-inconsistency-model-cache + mountPath: "/model_cache" + readOnly: false + volumes: + # Volumes are defined at the Pod level, then mounted into containers within that Pod + - name: {{ include "ri-firewall.fullname" . }}-factual-inconsistency-config + configMap: + name: {{ include "ri-firewall.fullname" . }}-factual-inconsistency-conf + items: + - key: "model-settings.json" + path: "model-settings.json" + - name: {{ include "ri-firewall.fullname" . }}-factual-inconsistency-model-cache + emptyDir: { } +{{- end }} diff --git a/ri-firewall/templates/model-servers/_gen/factual-inconsistency/settings-configmap.yaml b/ri-firewall/templates/model-servers/_gen/factual-inconsistency/settings-configmap.yaml new file mode 100644 index 00000000..d875c2f5 --- /dev/null +++ b/ri-firewall/templates/model-servers/_gen/factual-inconsistency/settings-configmap.yaml @@ -0,0 +1,16 @@ +{{- if not .Values.riFirewall.modelServers.factualInconsistency.remoteModelServer.enabled }} +# Autogenerated by a script. DO NOT EDIT. +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "ri-firewall.fullname" . }}-factual-inconsistency-conf +data: + model-settings.json: | + { + "name": "factual-inconsistency", + "task": "text-classification", + "tokenizer_kwargs": {"max_length": 512, "padding": "max_length", "truncation": true}, + "huggingface_uri": "robustintelligence/factual-inconsistency-distilbart-rifi-250k", + "huggingface_commit_hash": "9747272ef915f108c50a19fef26a435176364a0a" + } +{{- end }} diff --git a/ri-firewall/templates/model-servers/_gen/language-detection/inference-service.yaml b/ri-firewall/templates/model-servers/_gen/language-detection/inference-service.yaml new file mode 100644 index 00000000..ef2007c0 --- /dev/null +++ b/ri-firewall/templates/model-servers/_gen/language-detection/inference-service.yaml @@ -0,0 +1,60 @@ +{{- if not .Values.riFirewall.modelServers.languageDetection.remoteModelServer.enabled }} +# Autogenerated by a script. DO NOT EDIT. +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: language-detection + annotations: + {{- include "ri-firewall.annotations" . | nindent 4 }} +spec: + predictor: + {{- with .Values.riFirewall.images.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + minReplicas: {{ .Values.riFirewall.modelServers.languageDetection.minReplicas }} + maxReplicas: {{ .Values.riFirewall.modelServers.languageDetection.maxReplicas }} + containers: + - name: language-detection + image: "{{ .Values.riFirewall.images.modelServerImage.registry}}/{{ .Values.riFirewall.images.modelServerImage.name }}" + imagePullPolicy: {{ .Values.riFirewall.images.modelServerImage.pullPolicy }} + env: + - name: HUGGINGFACE_API_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.riFirewall.secrets.existingIntegrationSecretsName }} + key: huggingfaceAPIKey + ports: + - name: rest + protocol: TCP + containerPort: 8080 + command: + - "run_server" + args: + - "--model-config-path={{ .Values.riFirewall.modelServers.modelSettingsPath }}/model-settings.json" + - "--cache-dir=/model_cache" + resources: + requests: + memory: {{ .Values.riFirewall.modelServers.languageDetection.resources.requests.memory }} + cpu: {{ .Values.riFirewall.modelServers.languageDetection.resources.requests.cpu }} + limits: + memory: {{ .Values.riFirewall.modelServers.languageDetection.resources.limits.memory }} + cpu: {{ .Values.riFirewall.modelServers.languageDetection.resources.limits.cpu }} + volumeMounts: + - name: {{ include "ri-firewall.fullname" . }}-language-detection-config + mountPath: {{ .Values.riFirewall.modelServers.modelSettingsPath }} + readOnly: true + - name: {{ include "ri-firewall.fullname" . }}-language-detection-model-cache + mountPath: "/model_cache" + readOnly: false + volumes: + # Volumes are defined at the Pod level, then mounted into containers within that Pod + - name: {{ include "ri-firewall.fullname" . }}-language-detection-config + configMap: + name: {{ include "ri-firewall.fullname" . }}-language-detection-conf + items: + - key: "model-settings.json" + path: "model-settings.json" + - name: {{ include "ri-firewall.fullname" . }}-language-detection-model-cache + emptyDir: { } +{{- end }} diff --git a/ri-firewall/templates/model-servers/_gen/language-detection/settings-configmap.yaml b/ri-firewall/templates/model-servers/_gen/language-detection/settings-configmap.yaml new file mode 100644 index 00000000..20f09530 --- /dev/null +++ b/ri-firewall/templates/model-servers/_gen/language-detection/settings-configmap.yaml @@ -0,0 +1,15 @@ +{{- if not .Values.riFirewall.modelServers.languageDetection.remoteModelServer.enabled }} +# Autogenerated by a script. DO NOT EDIT. +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "ri-firewall.fullname" . }}-language-detection-conf +data: + model-settings.json: | + { + "name": "language-detection", + "task": "language-detection", + "huggingface_uri": "robustintelligence/language-detection-fasttext", + "huggingface_commit_hash": "33bbf0555e815fef78b7b981ec553d8229ccf5e1" + } +{{- end }} diff --git a/ri-firewall/templates/model-servers/_gen/prompt-injection/inference-service.yaml b/ri-firewall/templates/model-servers/_gen/prompt-injection/inference-service.yaml new file mode 100644 index 00000000..5bd996f2 --- /dev/null +++ b/ri-firewall/templates/model-servers/_gen/prompt-injection/inference-service.yaml @@ -0,0 +1,60 @@ +{{- if not .Values.riFirewall.modelServers.promptInjection.remoteModelServer.enabled }} +# Autogenerated by a script. DO NOT EDIT. +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: prompt-injection + annotations: + {{- include "ri-firewall.annotations" . | nindent 4 }} +spec: + predictor: + {{- with .Values.riFirewall.images.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + minReplicas: {{ .Values.riFirewall.modelServers.promptInjection.minReplicas }} + maxReplicas: {{ .Values.riFirewall.modelServers.promptInjection.maxReplicas }} + containers: + - name: prompt-injection + image: "{{ .Values.riFirewall.images.modelServerImage.registry}}/{{ .Values.riFirewall.images.modelServerImage.name }}" + imagePullPolicy: {{ .Values.riFirewall.images.modelServerImage.pullPolicy }} + env: + - name: HUGGINGFACE_API_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.riFirewall.secrets.existingIntegrationSecretsName }} + key: huggingfaceAPIKey + ports: + - name: rest + protocol: TCP + containerPort: 8080 + command: + - "run_server" + args: + - "--model-config-path={{ .Values.riFirewall.modelServers.modelSettingsPath }}/model-settings.json" + - "--cache-dir=/model_cache" + resources: + requests: + memory: {{ .Values.riFirewall.modelServers.promptInjection.resources.requests.memory }} + cpu: {{ .Values.riFirewall.modelServers.promptInjection.resources.requests.cpu }} + limits: + memory: {{ .Values.riFirewall.modelServers.promptInjection.resources.limits.memory }} + cpu: {{ .Values.riFirewall.modelServers.promptInjection.resources.limits.cpu }} + volumeMounts: + - name: {{ include "ri-firewall.fullname" . }}-prompt-injection-config + mountPath: {{ .Values.riFirewall.modelServers.modelSettingsPath }} + readOnly: true + - name: {{ include "ri-firewall.fullname" . }}-prompt-injection-model-cache + mountPath: "/model_cache" + readOnly: false + volumes: + # Volumes are defined at the Pod level, then mounted into containers within that Pod + - name: {{ include "ri-firewall.fullname" . }}-prompt-injection-config + configMap: + name: {{ include "ri-firewall.fullname" . }}-prompt-injection-conf + items: + - key: "model-settings.json" + path: "model-settings.json" + - name: {{ include "ri-firewall.fullname" . }}-prompt-injection-model-cache + emptyDir: { } +{{- end }} diff --git a/ri-firewall/templates/model-servers/_gen/prompt-injection/settings-configmap.yaml b/ri-firewall/templates/model-servers/_gen/prompt-injection/settings-configmap.yaml new file mode 100644 index 00000000..aa92f4dc --- /dev/null +++ b/ri-firewall/templates/model-servers/_gen/prompt-injection/settings-configmap.yaml @@ -0,0 +1,15 @@ +{{- if not .Values.riFirewall.modelServers.promptInjection.remoteModelServer.enabled }} +# Autogenerated by a script. DO NOT EDIT. +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "ri-firewall.fullname" . }}-prompt-injection-conf +data: + model-settings.json: | + { + "name": "prompt-injection", + "task": "text-classification", + "huggingface_uri": "robustintelligence/prompt-injection-electra-all-2.6-data", + "huggingface_commit_hash": "efc296c3046d10ef67223e541fea29c0a34a0e50" + } +{{- end }} diff --git a/ri-firewall/templates/model-servers/_gen/text-embedding/inference-service.yaml b/ri-firewall/templates/model-servers/_gen/text-embedding/inference-service.yaml new file mode 100644 index 00000000..1efc2465 --- /dev/null +++ b/ri-firewall/templates/model-servers/_gen/text-embedding/inference-service.yaml @@ -0,0 +1,60 @@ +{{- if not .Values.riFirewall.modelServers.textEmbedding.remoteModelServer.enabled }} +# Autogenerated by a script. DO NOT EDIT. +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: text-embedding + annotations: + {{- include "ri-firewall.annotations" . | nindent 4 }} +spec: + predictor: + {{- with .Values.riFirewall.images.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + minReplicas: {{ .Values.riFirewall.modelServers.textEmbedding.minReplicas }} + maxReplicas: {{ .Values.riFirewall.modelServers.textEmbedding.maxReplicas }} + containers: + - name: text-embedding + image: "{{ .Values.riFirewall.images.modelServerImage.registry}}/{{ .Values.riFirewall.images.modelServerImage.name }}" + imagePullPolicy: {{ .Values.riFirewall.images.modelServerImage.pullPolicy }} + env: + - name: HUGGINGFACE_API_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.riFirewall.secrets.existingIntegrationSecretsName }} + key: huggingfaceAPIKey + ports: + - name: rest + protocol: TCP + containerPort: 8080 + command: + - "run_server" + args: + - "--model-config-path={{ .Values.riFirewall.modelServers.modelSettingsPath }}/model-settings.json" + - "--cache-dir=/model_cache" + resources: + requests: + memory: {{ .Values.riFirewall.modelServers.textEmbedding.resources.requests.memory }} + cpu: {{ .Values.riFirewall.modelServers.textEmbedding.resources.requests.cpu }} + limits: + memory: {{ .Values.riFirewall.modelServers.textEmbedding.resources.limits.memory }} + cpu: {{ .Values.riFirewall.modelServers.textEmbedding.resources.limits.cpu }} + volumeMounts: + - name: {{ include "ri-firewall.fullname" . }}-text-embedding-config + mountPath: {{ .Values.riFirewall.modelServers.modelSettingsPath }} + readOnly: true + - name: {{ include "ri-firewall.fullname" . }}-text-embedding-model-cache + mountPath: "/model_cache" + readOnly: false + volumes: + # Volumes are defined at the Pod level, then mounted into containers within that Pod + - name: {{ include "ri-firewall.fullname" . }}-text-embedding-config + configMap: + name: {{ include "ri-firewall.fullname" . }}-text-embedding-conf + items: + - key: "model-settings.json" + path: "model-settings.json" + - name: {{ include "ri-firewall.fullname" . }}-text-embedding-model-cache + emptyDir: { } +{{- end }} diff --git a/ri-firewall/templates/model-servers/_gen/text-embedding/settings-configmap.yaml b/ri-firewall/templates/model-servers/_gen/text-embedding/settings-configmap.yaml new file mode 100644 index 00000000..d6640d18 --- /dev/null +++ b/ri-firewall/templates/model-servers/_gen/text-embedding/settings-configmap.yaml @@ -0,0 +1,15 @@ +{{- if not .Values.riFirewall.modelServers.textEmbedding.remoteModelServer.enabled }} +# Autogenerated by a script. DO NOT EDIT. +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "ri-firewall.fullname" . }}-text-embedding-conf +data: + model-settings.json: | + { + "name": "text-embedding", + "task": "text-embedding", + "huggingface_uri": "robustintelligence/roberta-text-embed", + "huggingface_commit_hash": "0f75c234a454ffc42a05757a4f28e41eb5e50679" + } +{{- end }} diff --git a/ri-firewall/templates/operator/configmap.yaml b/ri-firewall/templates/operator/configmap.yaml new file mode 100644 index 00000000..46615083 --- /dev/null +++ b/ri-firewall/templates/operator/configmap.yaml @@ -0,0 +1,199 @@ +# Config map that defines templates for Firewall instances. +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.operator.name }}-conf +data: + firewall-instance-configmap.config: | + apiVersion: v1 + kind: ConfigMap + metadata: + # Controller will set the name based on the firewall instance name. + name: PLACEHOLDER + firewall-instance-service.config: | + apiVersion: v1 + kind: Service + metadata: + # Controller will set the name based on the firewall instance name. + name: PLACEHOLDER + labels: + {{- include "ri-firewall.labels" . | nindent 8 }} + {{- with .Values.riFirewall.operator.firewallInstanceTemplate.service.labels }} + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + {{- include "ri-firewall.annotations" . | nindent 8 }} + {{- with .Values.riFirewall.operator.firewallInstanceTemplate.service.annotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + type: {{ .Values.riFirewall.operator.firewallInstanceTemplate.service.type }} + ports: + - port: {{ .Values.riFirewall.operator.firewallInstanceTemplate.proxyPort }} + targetPort: {{ .Values.riFirewall.operator.firewallInstanceTemplate.proxyPort }} + protocol: TCP + name: fw-proxy + selector: + # Controller needs to fill in this value so the service matches pods + # for the firewall instance. + app: PLACEHOLDER + firewall-instance-deployment.config: | + apiVersion: apps/v1 + kind: Deployment + metadata: + # Controller will set the name based on the firewall instance name. + name: PLACEHOLDER + labels: + {{- include "ri-firewall.labels" . | nindent 8 }} + {{- with .Values.riFirewall.operator.firewallInstanceTemplate.deployment.labels }} + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + {{- include "ri-firewall.annotations" . | nindent 8 }} + {{- with .Values.riFirewall.operator.firewallInstanceTemplate.deployment.annotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + selector: + matchLabels: + # Each Deployment needs its own matchlabels so the firewall instance + # replica sets are independent. + app: PLACEHOLDER + replicas: {{ .Values.riFirewall.operator.firewallInstanceTemplate.deployment.replicaCount }} + template: + metadata: + labels: + # Each Deployment needs its own matchlabels so the firewall instance + # replica sets are independent. + app: PLACEHOLDER + {{- include "ri-firewall.labels" . | nindent 12 }} + {{- with .Values.riFirewall.operator.firewallInstanceTemplate.deployment.labels }} + {{- toYaml . | nindent 12 }} + {{- end }} + annotations: + checksum/system-config: {{ include (print $.Template.BasePath "/system-firewall-config.yaml") . | sha256sum }} + checksum/model-map-config: {{ include (print $.Template.BasePath "/model-servers/_gen/configmap.yaml") . | sha256sum }} + {{- include "ri-firewall.annotations" . | nindent 12 }} + {{- with .Values.riFirewall.operator.firewallInstanceTemplate.deployment.annotations }} + {{- toYaml . | nindent 12 }} + {{- end }} + spec: + {{- with .Values.riFirewall.images.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 12 }} + {{- end }} + containers: + # gRPC-gateway is a sidecar container that proxies traffic from REST to gRPC. + - name: grpc-gateway-proxy + image: "{{ .Values.riFirewall.images.backendImage.registry}}/{{ .Values.riFirewall.images.backendImage.name }}" + imagePullPolicy: {{ .Values.riFirewall.images.backendImage.pullPolicy }} + ports: + - name: fw-proxy + containerPort: {{ .Values.riFirewall.operator.firewallInstanceTemplate.proxyPort }} + protocol: TCP + resources: + {{- toYaml .Values.riFirewall.operator.firewallInstanceTemplate.deployment.proxyResources | nindent 16 }} + command: + - "/ri/firewall" + args: + - "start-firewall-gateway-proxy" + - "--proxy-addr=:{{ .Values.riFirewall.operator.firewallInstanceTemplate.proxyPort }}" + # Use a local port of the other container. + # Containers can use the local network to communicate with each other. + - "--backend-addr=:{{ .Values.riFirewall.operator.firewallInstanceTemplate.firewallServerLocalPort }}" + - name: firewall-server + image: "{{ .Values.riFirewall.images.firewallServerImage.registry}}/{{ .Values.riFirewall.images.firewallServerImage.name }}" + imagePullPolicy: "{{ .Values.riFirewall.images.firewallServerImage.pullPolicy }}" + env: + - name: OPENAI_API_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.riFirewall.secrets.existingIntegrationSecretsName }} + key: openaiAPIKey + - name: RI_SERVICE_NAME + value: firewall-server + # Purely optional key for local firewall deployments to allow access to remote + # model servers. + - name: REMOTE_FIREWALL_API_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.riFirewall.secrets.existingIntegrationSecretsName }} + key: remoteFirewallAPIKey + optional: true + resources: + {{- toYaml .Values.riFirewall.operator.firewallInstanceTemplate.deployment.serverResources | nindent 16 }} + command: + - "ri-firewall" + args: + - "--port={{ .Values.riFirewall.operator.firewallInstanceTemplate.firewallServerLocalPort }}" + # This relies on the controller mounting the rule config with this + # mount directory and name. + - "--user-config-path=/config/ruleConfig" + - "--model-connection-config-path=/model-connection-config/model-connection-map.yaml" + - "--yaml-system-config-path=/system-config/system-conf.yaml" + volumeMounts: + - name: {{ include "ri-firewall.modelConnectionConfigMapName" . }} + mountPath: "/model-connection-config" + readOnly: true + - name: system-config + mountPath: "/system-config" + readOnly: true + startupProbe: + httpGet: + path: /healthz + port: {{ .Values.riFirewall.operator.firewallInstanceTemplate.proxyPort }} + httpHeaders: + # There is no auth needed for this check, but the server + # expects this keys to be present in the request headers. + - name: X-Firewall-Api-Key + value: test + failureThreshold: 20 + periodSeconds: 5 + readinessProbe: + httpGet: + path: /healthz + port: {{ .Values.riFirewall.operator.firewallInstanceTemplate.proxyPort }} + httpHeaders: + # There is no auth needed for this check, but the server + # expects this keys to be present in the request headers. + - name: X-Firewall-Api-Key + value: test + periodSeconds: 5 + # The user config will be set by the controller. + # The system config and the model connection config should be the same + # across all firewall instances. + volumes: + - name: system-config + configMap: + name: {{ include "ri-firewall.firewallSystemConfigMapName" . }} + optional: true + items: + - key: "systemConf" + path: "system-conf.yaml" + - name: {{ include "ri-firewall.modelConnectionConfigMapName" . }} + configMap: + name: {{ include "ri-firewall.modelConnectionConfigMapName" . }} + items: + - key: "model-connection-map.yaml" + path: "model-connection-map.yaml" + firewall-instance-ingress.config: | + apiVersion: networking.k8s.io/v1 + kind: Ingress + metadata: + name: {{ include "ri-firewall.fullname" . }}-firewall-instance-ingress + namespace: '{{.Release.Namespace}}' + labels: + {{- include "ri-firewall.labels" . | nindent 8 }} + {{- with .Values.riFirewall.ingress.labels }} + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + {{- include "ri-firewall.annotations" . | nindent 8 }} + nginx.ingress.kubernetes.io/auth-url: http://{{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.authServer.name }}.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.riFirewall.authServer.port}}/v1/auth/validate + nginx.ingress.kubernetes.io/auth-method: POST + {{- with .Values.riFirewall.ingress.annotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + ingressClassName: {{ .Values.riFirewall.ingress.ingressClassName }} + # Controller will add rules when creating the ingress. diff --git a/ri-firewall/templates/operator/deployment.yaml b/ri-firewall/templates/operator/deployment.yaml new file mode 100644 index 00000000..654e8273 --- /dev/null +++ b/ri-firewall/templates/operator/deployment.yaml @@ -0,0 +1,85 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.operator.name }} + annotations: + {{- include "ri-firewall.annotations" . | nindent 4 }} + {{- with .Values.riFirewall.operator.deployment.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} + checksum/config: {{ include (print $.Template.BasePath "/operator/configmap.yaml") . | sha256sum }} + labels: + {{- include "ri-firewall.labels" . | nindent 4 }} + {{- with .Values.riFirewall.operator.deployment.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + app: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.operator.name }} + replicas: 1 + template: + metadata: + annotations: + {{- include "ri-firewall.annotations" . | nindent 8 }} + {{- with .Values.riFirewall.operator.deployment.annotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + app: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.operator.name }} + {{- include "ri-firewall.labels" . | nindent 8 }} + {{- with .Values.riFirewall.operator.deployment.labels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.riFirewall.images.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "ri-firewall.operator.serviceAccountName" . }} + containers: + - name: controller-manager + image: "{{ .Values.riFirewall.images.backendImage.registry}}/{{ .Values.riFirewall.images.backendImage.name }}" + imagePullPolicy: {{ .Values.riFirewall.images.backendImage.pullPolicy }} + resources: + {{- toYaml .Values.riFirewall.operator.deployment.resources | nindent 12 }} + command: + - "/ri/firewall" + args: + - "start-operator" + - "--app-name={{ include "ri-firewall.fullname" . }}" + - "--configmap-template-path=/config/firewall-instance-configmap.config" + - "--deployment-template-path=/config/firewall-instance-deployment.config" + - "--ingress-template-path=/config/firewall-instance-ingress.config" + - "--service-template-path=/config/firewall-instance-service.config" + - "--template-checksum={{ include (print $.Template.BasePath "/operator/configmap.yaml") . | sha256sum }}" + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 10 + volumeMounts: + - name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.operator.name }}-config + mountPath: "/config" + readOnly: true + terminationGracePeriodSeconds: 10 + volumes: + - name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.operator.name }}-config + configMap: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.operator.name }}-conf + items: + - key: "firewall-instance-configmap.config" + path: "firewall-instance-configmap.config" + - key: "firewall-instance-deployment.config" + path: "firewall-instance-deployment.config" + - key: "firewall-instance-service.config" + path: "firewall-instance-service.config" + - key: "firewall-instance-ingress.config" + path: "firewall-instance-ingress.config" diff --git a/ri-firewall/templates/operator/role-binding.yaml b/ri-firewall/templates/operator/role-binding.yaml new file mode 100644 index 00000000..c213b469 --- /dev/null +++ b/ri-firewall/templates/operator/role-binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "ri-firewall.fullname" . }}-operator-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.operator.name }}-role +subjects: +- kind: ServiceAccount + name: {{ include "ri-firewall.operator.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} diff --git a/ri-firewall/templates/operator/role.yaml b/ri-firewall/templates/operator/role.yaml new file mode 100644 index 00000000..13e92f40 --- /dev/null +++ b/ri-firewall/templates/operator/role.yaml @@ -0,0 +1,83 @@ +{{- if .Values.riFirewall.operator.serviceAccount.create -}} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.operator.name }}-role + namespace: '{{.Release.Namespace}}' +rules: +- apiGroups: + - apps + resources: + - deployments + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - "" + resources: + - configmaps + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - "" + resources: + - services + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - fw.rbst.io + resources: + - firewallinstances + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - fw.rbst.io + resources: + - firewallinstances/finalizers + verbs: + - update +- apiGroups: + - fw.rbst.io + resources: + - firewallinstances/status + verbs: + - get + - patch + - update +- apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +{{- end }} diff --git a/ri-firewall/templates/operator/service-account.yaml b/ri-firewall/templates/operator/service-account.yaml new file mode 100644 index 00000000..e138e2a9 --- /dev/null +++ b/ri-firewall/templates/operator/service-account.yaml @@ -0,0 +1,16 @@ +{{- if .Values.riFirewall.operator.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "ri-firewall.operator.serviceAccountName" . }} + labels: + {{- include "ri-firewall.labels" . | nindent 4 }} + {{- with .Values.riFirewall.operator.serviceAccount.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "ri-firewall.annotations" . | nindent 4 }} + {{- with .Values.riFirewall.operator.serviceAccount.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/ri-firewall/templates/secrets.yaml b/ri-firewall/templates/secrets.yaml new file mode 100644 index 00000000..109867b9 --- /dev/null +++ b/ri-firewall/templates/secrets.yaml @@ -0,0 +1,14 @@ +{{- if (not .Values.riFirewall.secrets.existingAuthSecretsName) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "ri-firewall.existingSecretsName" . }} +type: Opaque +data: + {{- $secret := (lookup "v1" "Secret" .Release.Namespace (include "ri-firewall.existingSecretsName" .) ) | default dict }} + {{- $secretData := (get $secret "data") | default dict }} + auth0ClientSecret: {{ (get $secretData "auth0ClientSecret") | default (.Values.riFirewall.secrets.auth0.clientSecret | b64enc | quote) }} + auth0ClientID: {{ (get $secretData "auth0ClientID") | default (.Values.riFirewall.secrets.auth0.clientID | b64enc | quote) }} + auth0Domain: {{ (get $secretData "auth0Domain") | default (.Values.riFirewall.secrets.auth0.domain | b64enc | quote) }} + auth0CallbackURL: {{ (get $secretData "auth0CallbackURL") | default (.Values.riFirewall.secrets.auth0.callback | b64enc | quote) }} +{{- end }} diff --git a/ri-firewall/templates/system-firewall-config.yaml b/ri-firewall/templates/system-firewall-config.yaml new file mode 100644 index 00000000..25df1a94 --- /dev/null +++ b/ri-firewall/templates/system-firewall-config.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "ri-firewall.firewallSystemConfigMapName" . }} +data: + systemConf: | + log_user_data: {{ .Values.riFirewall.firewallSystemConfig.logUserData }} + max_request_tokens: {{ .Values.riFirewall.firewallSystemConfig.maxRequestTokens }} + yara_config: + enable_yara: {{ .Values.riFirewall.firewallSystemConfig.enableYara }} + yara_server_address: "{{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.yaraServer.name }}:{{ .Values.riFirewall.yaraServer.port }}" + azure_openai_model_provider: + api_base_url: {{ .Values.riFirewall.firewallSystemConfig.azureOpenaiModelProvider.apiBaseURL }} + api_version: {{ .Values.riFirewall.firewallSystemConfig.azureOpenaiModelProvider.apiVersion }} + chat_model_deployment_name: {{ .Values.riFirewall.firewallSystemConfig.azureOpenaiModelProvider.chatModelDeploymentName }} + # We do not use embeddings model for firewall, so leave it empty. + embeddings_model_deployment_name: "" diff --git a/ri-firewall/templates/yara-server/configmap.yaml b/ri-firewall/templates/yara-server/configmap.yaml new file mode 100644 index 00000000..2a3fd0c7 --- /dev/null +++ b/ri-firewall/templates/yara-server/configmap.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.yaraServer.name }}-conf +data: + server.config: | +{{ include "ri-firewall.serverArgs" . | indent 4 }} + yaraServer: + autoUpdateEnabled: {{ .Values.riFirewall.yaraServer.autoUpdateEnabled }} + yaraRuleRepoRef: {{ .Values.riFirewall.yaraServer.ruleRepoRef }} + gitRepoToken: {{ .Values.riFirewall.yaraServer.gitRepoToken }} + mountDir: {{ .Values.riFirewall.yaraServer.yaraPatternMountDir }} + updateFrequency: {{ .Values.riFirewall.yaraServer.updateFrequency }} diff --git a/ri-firewall/templates/yara-server/deployment.yaml b/ri-firewall/templates/yara-server/deployment.yaml new file mode 100644 index 00000000..6dfc4f1c --- /dev/null +++ b/ri-firewall/templates/yara-server/deployment.yaml @@ -0,0 +1,92 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.yaraServer.name }} + labels: + app: {{ .Values.riFirewall.yaraServer.name }} + {{- with .Values.riFirewall.yaraServer.deployment.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "ri-firewall.annotations" . | nindent 4 }} + {{- with .Values.riFirewall.yaraServer.deployment.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + app: {{ .Values.riFirewall.yaraServer.name }} + {{- if not .Values.riFirewall.yaraServer.hpa.enabled }} + replicas: {{ .Values.riFirewall.authServer.deployment.replicaCount }} + {{- end }} + template: + metadata: + labels: + app: {{ .Values.riFirewall.yaraServer.name }} + {{- with .Values.riFirewall.yaraServer.deployment.labels }} + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + {{- include "ri-firewall.annotations" . | nindent 8 }} + {{- with .Values.riFirewall.yaraServer.deployment.annotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.riFirewall.images.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ .Values.riFirewall.yaraServer.name }} + image: "{{ .Values.riFirewall.images.backendImage.registry}}/{{ .Values.riFirewall.images.backendImage.name }}" + imagePullPolicy: {{ .Values.riFirewall.images.backendImage.pullPolicy }} + ports: + - name: yara + containerPort: {{ .Values.riFirewall.yaraServer.port }} + protocol: TCP + livenessProbe: + exec: + command: [ "/bin/grpc_health_probe", "-addr=:{{ .Values.riFirewall.yaraServer.port }}" ] + periodSeconds: 5 + readinessProbe: + exec: + command: [ "/bin/grpc_health_probe", "-addr=:{{ .Values.riFirewall.yaraServer.port }}" ] + periodSeconds: 5 + startupProbe: + exec: + command: [ "/bin/grpc_health_probe", "-addr=:{{ .Values.riFirewall.yaraServer.port }}" ] + failureThreshold: 60 + periodSeconds: 5 + env: + - name: GITHUB_API_KEY + value: "{{ .Values.riFirewall.yaraServer.gitRepoToken }}" + resources: + {{- toYaml .Values.riFirewall.yaraServer.deployment.resources | nindent 12 }} + command: + - "/ri/firewall" + args: + - "start-yara-server" + - "--server-config-path=/config/server.config" + - "--port={{ .Values.riFirewall.yaraServer.port }}" + - "--mount-dir={{ .Values.riFirewall.yaraServer.yaraPatternMountDir }}" + - "--auto-update-enabled={{ .Values.riFirewall.yaraServer.autoUpdateEnabled }}" + - "--yara-patterns-git-ref={{ .Values.riFirewall.yaraServer.ruleRepoRef }}" + {{ if ne .Values.riFirewall.yaraServer.yaraPatternUpdateFrequency "" }} + - "--update-frequency={{ .Values.riFirewall.yaraServer.yaraPatternUpdateFrequency }}" + {{ end }} + volumeMounts: + - name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.yaraServer.name }}-config + mountPath: "/config" + readOnly: true + - name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.yaraServer.name }}-ephemeral-storage + mountPath: {{ .Values.riFirewall.yaraServer.yaraPatternMountDir }} + volumes: + # Volumes are defined at the Pod level, then mounted into containers within that Pod + - name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.yaraServer.name }}-config + configMap: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.yaraServer.name }}-conf + items: + - key: "server.config" + path: "server.config" + - name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.yaraServer.name }}-ephemeral-storage + emptyDir: {} diff --git a/ri-firewall/templates/yara-server/hpa.yaml b/ri-firewall/templates/yara-server/hpa.yaml new file mode 100644 index 00000000..3cda2404 --- /dev/null +++ b/ri-firewall/templates/yara-server/hpa.yaml @@ -0,0 +1,21 @@ +{{- if .Values.riFirewall.yaraServer.hpa.enabled }} +apiVersion: {{ include "ri-firewall.hpa.apiVersion" . }} +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.yaraServer.name }} + labels: + app: {{ .Values.riFirewall.yaraServer.name }} + {{- include "ri-firewall.labels" . | nindent 4 }} + {{- with .Values.riFirewall.yaraServer.hpa.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.yaraServer.name }} + minReplicas: {{ .Values.riFirewall.yaraServer.hpa.minReplicas }} + maxReplicas: {{ .Values.riFirewall.yaraServer.hpa.maxReplicas }} + metrics: +{{ toYaml .Values.riFirewall.yaraServer.hpa.metrics | indent 4 }} +{{- end }} diff --git a/ri-firewall/templates/yara-server/service.yaml b/ri-firewall/templates/yara-server/service.yaml new file mode 100644 index 00000000..048ac5ad --- /dev/null +++ b/ri-firewall/templates/yara-server/service.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "ri-firewall.fullname" . }}-{{ .Values.riFirewall.yaraServer.name }} + labels: + {{- with .Values.riFirewall.yaraServer.service.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "ri-firewall.annotations" . | nindent 4 }} + {{- with .Values.riFirewall.yaraServer.service.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.riFirewall.yaraServer.service.type }} + ports: + - port: {{ .Values.riFirewall.yaraServer.port }} + targetPort: {{ .Values.riFirewall.yaraServer.port }} + protocol: TCP + name: yara + selector: + app: {{ .Values.riFirewall.yaraServer.name }} diff --git a/ri-firewall/values.yaml b/ri-firewall/values.yaml new file mode 100644 index 00000000..a2b5878a --- /dev/null +++ b/ri-firewall/values.yaml @@ -0,0 +1,385 @@ +riFirewall: + # @ignored -- Override for the chart name. If used, this will be appended to the + # release name to form the fully qualified app name + # (e.g., `ri-${nameOverride}` instead of just `ri`) + nameOverride: "" + # Common annotations added to all K8s resources + commonAnnotations: {} + # Common labels added to all K8s resources + commonLabels: {} + + # -- Values for the internal RI K8 secret used by the Firewall. + # @default -- (see individual values in `values`.yaml) + secrets: + # If existingIntegrationSecretsName is set, the secret will not be created. Must have openaiAPIKey, + # and huggingfaceAPIKey keys set. + existingIntegrationSecretsName: "" + # If existingAuthSecretsName is set, the secret will not be created. Must have domain, + # clientID, clientSecret and callback keys set. + existingAuthSecretsName: "" + # auth0Enabled (bool): Whether to enable Auth0 for the Firewall. + auth0Enabled: false + auth0: + # domain (str): Auth0 domain (optional) + domain: "" + # clientID (str): Auth0 client ID (optional) + clientID: "" + # clientSecret (str): Auth0 client secret (optional) + clientSecret: "" + # callback (str): Auth0 callback URL (optional) + callback: "" + + # -- firewallSystemConfig is system configuration for the RI Firewall. + firewallSystemConfig: + # maxRequestTokens (int): The maximum number of tokens that Firewall accepts from a single API request. + maxRequestTokens: 4096 + # logUserData (bool): Whether to collect raw firewall requests in the logs. + # Be careful with this setting! It opens us up to compliance / contract issues. + logUserData: true + # enableYara (bool): Whether or not to use the YARA rules in firewall rule evaluation. + enableYara: true + # azureOpenaiModelProvider: This specifies how to connect to Azure OpenAI models for internal rule evaluation. + azureOpenaiModelProvider: + # apiBaseURL (str): The URL where the firewall can access Azure models. + apiBaseURL: "" + # apiVersion (str): API version of Azure OpenAI to use. + apiVersion: "" + # chatModelDeploymentName (str): Name of the chat model deployment to use. + # This can be found in the Azure OpenAI console. + # We prefer GPT3.5-Turbo for the firewall. + chatModelDeploymentName: "" + + # -- firewallInstanceResourceQuota is configuration for a resource quota to + # limit the number of FirewallInstances a user can create in this deployment. + firewallInstanceResourceQuota: + # enabled (bool): Enable the creation of the resource quota. + enabled: true + # maxObjectCount (int): The maximum allowed number of FirewallInstances in the namespace. + maxObjectCount: 5 + + # -- Image specification for the RI Firewall. + # @default -- (see individual values in `values.yaml`) + images: + imagePullSecrets: + - name: rimecreds + backendImage: + registry: "docker.io" + name: "robustintelligencehq/firewall-backend:latest" + pullPolicy: "Always" + firewallServerImage: + registry: "docker.io" + name: "robustintelligencehq/ri-firewall:latest" + pullPolicy: "Always" + modelServerImage: + registry: "docker.io" + name: "robustintelligencehq/firewall-model-server:latest" + pullPolicy: "Always" + + # -- `authServer` K8s-level configurations + # @default -- (see individual values in `values.yaml`) + authServer: + name: "auth-server" + port: 15021 + # Service for authServer + service: + type: ClusterIP + annotations: {} + labels: {} + # HPA for the authServer. If disabled, will use `replicaCount` for the deployment. + hpa: + annotations: {} + labels: {} + enabled: true + minReplicas: 1 + maxReplicas: 10 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 60 + # Deployment for authServer + deployment: + annotations: {} + labels: {} + replicaCount: 1 + resources: + limits: + memory: 90Mi + requests: + cpu: 100m + memory: 90Mi + tokenLifetimeHours: + userTokenLifetime: 10 # 10 hours + systemTokenLifetime: 720 # 30 days + + # -- `operator` K8s-level configurations + # @default -- (see individual values in `values.yaml`) + # The operator is responsible for reconciling FirewallInstance CRs. + # It creates individual firewall deployments and makes them available over + # the network. + operator: + name: "operator" + # Service Account for operator to manipulate k8s objects. + serviceAccount: + create: true + # If not set and create is true, a name is generated using the fullname template + name: + annotations: {} + labels: {} + deployment: + annotations: {} + labels: {} + resources: + limits: + memory: 300Mi + requests: + cpu: 10m + memory: 100Mi + + firewallInstanceTemplate: + proxyPort: 8081 + # Internal local port for communication between containers in the same deployment. + firewallServerLocalPort: 50052 + # Service for the firewall-instances + service: + type: ClusterIP + annotations: {} + labels: {} + # Deployment for the firewall server. + deployment: + annotations: {} + labels: {} + # Replica count for the firewall-server: only use if HPA disabled. + replicaCount: 1 + # Resources for the firewall server container. + serverResources: + limits: + memory: 3500Mi + requests: + cpu: 1000m + memory: 3500Mi + # Resources for the gRPC-gateway proxy container. + proxyResources: + limits: + memory: 100Mi + requests: + cpu: 10m + memory: 100Mi + + # -- `yaraServer` K8s-level configurations + # @default -- (see individual values in `values.yaml`) + yaraServer: + name: "yara-server" + port: 5023 + # Service for yaraServer + service: + type: ClusterIP + annotations: {} + labels: {} + # HPA for the yaraServer. If disabled, will use `replicaCount` for the deployment. + hpa: + annotations: {} + labels: {} + enabled: true + minReplicas: 1 + maxReplicas: 10 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 60 + # Deployment for yaraServer + deployment: + annotations: {} + labels: {} + replicaCount: 1 + resources: + limits: + memory: 1000Mi + ephemeral-storage: "4Gi" + requests: + cpu: 1000m + memory: 100Mi + ephemeral-storage: "2Gi" + # autoUpdateEnabled (bool): Whether to allow yara server to periodically update its rules via a pull mechanism. + autoUpdateEnabled: false + # ruleRepoRef (str): The git repo to pull yara rules from. + ruleRepoRef: "" + # gitRepoToken (str): The git repo token to use for pulling yara rules. + gitRepoToken: "" + yaraPatternMountDir: "/yara" + # yaraPatternUpdateFrequency (str): The cron frequency at which yara server should update its rules. + yaraPatternUpdateFrequency: "" + + # -- `instanceManagerServer` K8s-level configurations + # @default -- (see individual values in `values.yaml`) + instanceManagerServer: + name: "instance-manager-server" + port: 5024 + restPort: 15024 + # Service Account for instanceManagerServer to manipulate k8s firewall instances. + serviceAccount: + create: true + # If not set and create is true, a name is generated using the fullname template + name: + annotations: {} + labels: {} + # Service for instanceManagerServer + service: + type: ClusterIP + annotations: {} + labels: {} + # HPA for the instanceManagerServer. If disabled, will use `replicaCount` for the deployment. + hpa: + annotations: {} + labels: {} + enabled: true + minReplicas: 1 + maxReplicas: 3 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 60 + # Deployment for instanceManagerServer + deployment: + annotations: {} + labels: {} + replicaCount: 1 + resources: + limits: + memory: 90Mi + requests: + cpu: 100m + memory: 90Mi + + # -- `ingress` K8s-level configurations + # @default -- (see individual values in `values.yaml`) + ingress: + annotations: {} + labels: {} + tls: [] + ingressClassName: nginx + + # -- `monitoring` (Prometheus metrics/Datadog) K8s-level configurations + # @default -- (see individual values in `values.yaml`) + monitoring: + # -- Whether to enable Prometheus metrics for all services on the Firewall + enabled: true + # -- Port to expose Prometheus metrics on + port: 8080 + + # -- `modelServers` K8s-level configurations + # @default -- (see individual values in `values.yaml`) + modelServers: + modelSettingsPath: /model_server + + # The following section of model server config values between the comments + # is autogenerated by the autogen script. The start and end delimiter + # comments should not be changed and are used to identify the section that + # should be autogenerated. + ## START MODEL SERVER VALUES + promptInjection: + maxReplicas: 3 + minReplicas: 1 + # Use an existing model server outside the cluster instead of spinning up a + # model server in this Helm deployment. + # Only enable this for local Minikube. + remoteModelServer: + enabled: false + address: "" + resources: + requests: + memory: "2500Mi" + cpu: "1000m" + limits: + memory: "2500Mi" + cpu: "5000m" + factualInconsistency: + maxReplicas: 3 + minReplicas: 1 + # Use an existing model server outside the cluster instead of spinning up a + # model server in this Helm deployment. + # Only enable this for local Minikube. + remoteModelServer: + enabled: false + address: "" + resources: + requests: + memory: "3500Mi" + cpu: "1000m" + limits: + memory: "3500Mi" + cpu: "5000m" + languageDetection: + maxReplicas: 1 + minReplicas: 1 + # Use an existing model server outside the cluster instead of spinning up a + # model server in this Helm deployment. + # Only enable this for local Minikube. + remoteModelServer: + enabled: false + address: "" + resources: + requests: + memory: "2000Mi" + cpu: "1000m" + limits: + memory: "2000Mi" + cpu: "5000m" + textEmbedding: + maxReplicas: 3 + minReplicas: 1 + # Use an existing model server outside the cluster instead of spinning up a + # model server in this Helm deployment. + # Only enable this for local Minikube. + remoteModelServer: + enabled: false + address: "" + resources: + requests: + memory: "2500Mi" + cpu: "1000m" + limits: + memory: "2500Mi" + cpu: "5000m" + ## END MODEL SERVER VALUES + + +# -- Ingress-nginx controller sub-chart. See https://artifacthub.io/packages/helm/ingress-nginx/ingress-nginx for all parameters. +# @default -- (see individual values in `values`.yaml) +ingress-nginx: + imagePullSecrets: + - name: rimecreds + controller: + image: + registry: "docker.io" + image: "robustintelligencehq/ingress-nginx-controller" + tag: "v1.3.0" + digest: "sha256:067673df26a65ec5c2d5b30f25db869bad4d7d391fc81882250134577e581ef0" + scope: + enabled: true + # -- K8s namespace for the ingress + namespace: "" + ingressClassResource: + enabled: true + default: false + admissionWebhooks: + enabled: false + service: + targetPorts: + http: http + https: http + # -- For full list of annotations, see + # https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.2/guide/service/annotations/ + annotations: {} + enableHttp: false + config: + force-ssl-redirect: "false" + ssl-redirect: "false" diff --git a/rime-agent/Chart.yaml b/rime-agent/Chart.yaml index 110c569d..e257ce29 100644 --- a/rime-agent/Chart.yaml +++ b/rime-agent/Chart.yaml @@ -16,7 +16,7 @@ type: application # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) ## Note: we manually set the version when pushing to separate Robust Intelligence helm repo. -version: 0.1.0 +version: 0.1.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/rime-agent/README.md b/rime-agent/README.md index 07d59901..913c9e3a 100644 --- a/rime-agent/README.md +++ b/rime-agent/README.md @@ -1,6 +1,6 @@ # rime-agent -![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.18.0](https://img.shields.io/badge/AppVersion-0.18.0-informational?style=flat-square) +![Version: 0.1.1](https://img.shields.io/badge/Version-0.1.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.18.0](https://img.shields.io/badge/AppVersion-0.18.0-informational?style=flat-square) A Helm chart for the Robust Intelligence Platform Agent, part of the Data Plane. @@ -12,85 +12,63 @@ Kubernetes: `>=1.20.0-0` | Key | Type | Default | Description | |-----|------|---------|-------------| -| rimeAgent.apiKey | string | `nil` | the API key the agent will use to communicate with the RI Platform. | -| rimeAgent.commonAnnotations | object | `{}` | | -| rimeAgent.commonLabels | object | `{}` | | -| rimeAgent.connections.agentManagerAddress | string | `"rime-agent-manager-server:15000"` | | -| rimeAgent.connections.dataCollectorRestAddress | string | `"rime-data-collector-server:15015"` | | -| rimeAgent.connections.datasetManagerRestAddress | string | `"rime-dataset-manager-server:15009"` | | -| rimeAgent.connections.firewallServerRestAddress | string | `"rime-firewall-server:15002"` | | -| rimeAgent.connections.platformAddress | string | `nil` | | -| rimeAgent.connections.uploadServerAddress | string | `"rime-upload-server:5000"` | | -| rimeAgent.connections.uploadServerRestAddress | string | `"rime-upload-server:15001"` | | -| rimeAgent.dockerCredentialsPayload | string | `nil` | pre-configured json encoded string of k8s docker config secret | -| rimeAgent.fullNameOverride | string | `nil` | | -| rimeAgent.id | string | `nil` | unique ID for this Agent. Can be left blank if this is a internal agent. | -| rimeAgent.images.agentImage.name | string | `"robustintelligencehq/rime-agent:latest"` | the name and tag of the rime agent image. | -| rimeAgent.images.agentImage.pullPolicy | string | `"Always"` | see https://kubernetes.io/docs/concepts/containers/images/#image-pull-policy | -| rimeAgent.images.agentImage.registry | string | `"docker.io"` | the registry of the agent image. | -| rimeAgent.images.imagePullSecrets | list | `[]` | use existing image pull secrets in your k8s cluster, overriding rimeAgent.dockerCredentials # Note that the credentials should provide access to both the Agent image and model testing images. | -| rimeAgent.images.modelTestJobImage.name | string | `"robustintelligencehq/rime-testing-engine-dev:latest"` | | -| rimeAgent.images.modelTestJobImage.pullPolicy | string | `"Always"` | image pull policy for model test jobs. | -| rimeAgent.images.modelTestJobImage.registry | string | `"docker.io"` | the registry of the default model test job image. | -| rimeAgent.isInternal | bool | `false` | | -| rimeAgent.launcher.deployment.affinity | object | `{}` | | -| rimeAgent.launcher.deployment.annotations | object | `{}` | | -| rimeAgent.launcher.deployment.extraEnv | list | `[]` | | -| rimeAgent.launcher.deployment.extraVolumeMounts | list | `[]` | | -| rimeAgent.launcher.deployment.extraVolumes | list | `[]` | | -| rimeAgent.launcher.deployment.labels | object | `{}` | | -| rimeAgent.launcher.deployment.nodeSelector | object | `{}` | | -| rimeAgent.launcher.deployment.resources.limits.cpu | string | `"500m"` | | -| rimeAgent.launcher.deployment.resources.limits.memory | string | `"500Mi"` | | -| rimeAgent.launcher.deployment.resources.requests.cpu | string | `"100m"` | | -| rimeAgent.launcher.deployment.resources.requests.memory | string | `"100Mi"` | | -| rimeAgent.launcher.deployment.securityContext | object | `{}` | | -| rimeAgent.launcher.deployment.tolerations | list | `[]` | | -| rimeAgent.launcher.name | string | `"launcher"` | | -| rimeAgent.launcher.serviceAccount.annotations | object | `{}` | | -| rimeAgent.launcher.serviceAccount.create | bool | `true` | | -| rimeAgent.launcher.serviceAccount.labels | object | `{}` | | -| rimeAgent.launcher.serviceAccount.name | string | `nil` | | -| rimeAgent.nameOverride | string | `nil` | | -| rimeAgent.operator.deployment.affinity | object | `{}` | | -| rimeAgent.operator.deployment.annotations | object | `{}` | | -| rimeAgent.operator.deployment.extraEnv | list | `[]` | | -| rimeAgent.operator.deployment.extraVolumeMounts | list | `[]` | | -| rimeAgent.operator.deployment.extraVolumes | list | `[]` | | -| rimeAgent.operator.deployment.labels | object | `{}` | | -| rimeAgent.operator.deployment.nodeSelector | object | `{}` | | -| rimeAgent.operator.deployment.resources.limits.cpu | string | `"500m"` | | -| rimeAgent.operator.deployment.resources.limits.memory | string | `"128Mi"` | | -| rimeAgent.operator.deployment.resources.requests.cpu | string | `"500m"` | | -| rimeAgent.operator.deployment.resources.requests.memory | string | `"128Mi"` | | -| rimeAgent.operator.deployment.securityContext | object | `{}` | | -| rimeAgent.operator.deployment.tolerations | list | `[]` | | -| rimeAgent.operator.logArchival.enabled | bool | `false` | | -| rimeAgent.operator.modelTestJob.activeDeadlineSeconds | int | `259200` | active deadline of job in seconds. Default to 72 hours. | -| rimeAgent.operator.modelTestJob.affinity | object | `{}` | affinity for model test jobs. | -| rimeAgent.operator.modelTestJob.annotations | object | `{}` | | -| rimeAgent.operator.modelTestJob.backoffLimit | int | `0` | | -| rimeAgent.operator.modelTestJob.extraEnv | list | `[]` | | -| rimeAgent.operator.modelTestJob.extraVolumeMounts | list | `[]` | | -| rimeAgent.operator.modelTestJob.extraVolumes | list | `[]` | | -| rimeAgent.operator.modelTestJob.labels | object | `{}` | | -| rimeAgent.operator.modelTestJob.name | string | `"model-testing-job"` | | -| rimeAgent.operator.modelTestJob.nodeSelector | object | `{}` | node selector for model test jobs. | -| rimeAgent.operator.modelTestJob.resources | object | `{"limits":{"cpu":"3000m","memory":"8000Mi"},"requests":{"cpu":"3000m","memory":"8000Mi"}}` | resource request and limits for model test jobs. | -| rimeAgent.operator.modelTestJob.securityContext | object | `{}` | | -| rimeAgent.operator.modelTestJob.serviceAccount.annotations | object | `{}` | if create is true, annotations to add to the service account. # Since data is stored in a cloud storage (e.g. S3, GCS), add an annotation to allow read access here. # EKS IAM setup for S3: https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html # GKE IAM setup for GCS: https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity | -| rimeAgent.operator.modelTestJob.serviceAccount.create | bool | `true` | Specifies whether a ServiceAccount should be created. | -| rimeAgent.operator.modelTestJob.serviceAccount.name | string | `nil` | Specify a preexisting ServiceAccount to use if create is false. | -| rimeAgent.operator.modelTestJob.tolerations | list | `[]` | tolerations for model test jobs. | -| rimeAgent.operator.modelTestJob.ttlSecondsAfterFinished | int | `172800` | TTL for jobs after finished in seconds. Default to 48 hours. | -| rimeAgent.operator.name | string | `"operator"` | | -| rimeAgent.operator.serviceAccount.annotations | object | `{}` | | -| rimeAgent.operator.serviceAccount.create | bool | `true` | | -| rimeAgent.operator.serviceAccount.labels | object | `{}` | | -| rimeAgent.operator.serviceAccount.name | string | `""` | | -| rimeAgent.verbose | bool | `true` | | -| tls.crossplaneEnabled | bool | `false` | | -| tls.enableCertManager | bool | `false` | | +| rimeAgent.apiKey | string | `nil` | The API key the agent will use to communicate with the RI Platform. Required for external agents. | +| rimeAgent.connections | object | (see individual values in `values.yaml`) | Service addresses for the agent. | +| rimeAgent.dockerCredentialsPayload | string | `nil` | Pre-configured json encoded string of K8s docker config secret Providing `rimeAgent.dockerCredentialsPayload` will override any provided inputs in rimeAgent.dockerCredentials | +| rimeAgent.existingSecretName | string | `""` | Name of an existing K8s secret containing the API key. If existingSecretName is set, the secret will not be created. Must have api-key set. | +| rimeAgent.fileServer.config.endpoint | string | `"s3.amazonaws.com"` | | +| rimeAgent.fileServer.config.storageBucketName | string | `""` | The bucket name of the S3 bucket used as the blob storage. | +| rimeAgent.fileServer.config.type | string | `"s3"` | | +| rimeAgent.fileServer.deployment.affinity | object | `{}` | | +| rimeAgent.fileServer.deployment.annotations | object | `{}` | | +| rimeAgent.fileServer.deployment.extraEnv | list | `[]` | | +| rimeAgent.fileServer.deployment.extraVolumeMounts | list | `[]` | | +| rimeAgent.fileServer.deployment.extraVolumes | list | `[]` | | +| rimeAgent.fileServer.deployment.labels | object | `{}` | | +| rimeAgent.fileServer.deployment.nodeSelector | object | `{}` | | +| rimeAgent.fileServer.deployment.replicaCount | int | `1` | | +| rimeAgent.fileServer.deployment.resources.limits.memory | string | `"90Mi"` | | +| rimeAgent.fileServer.deployment.resources.requests.cpu | string | `"100m"` | | +| rimeAgent.fileServer.deployment.resources.requests.memory | string | `"90Mi"` | | +| rimeAgent.fileServer.deployment.securityContext | object | `{}` | | +| rimeAgent.fileServer.deployment.tolerations | list | `[]` | | +| rimeAgent.fileServer.enabled | bool | `false` | | +| rimeAgent.fileServer.hpa.annotations | object | `{}` | | +| rimeAgent.fileServer.hpa.enabled | bool | `true` | | +| rimeAgent.fileServer.hpa.labels | object | `{}` | | +| rimeAgent.fileServer.hpa.maxReplicas | int | `10` | | +| rimeAgent.fileServer.hpa.metrics[0].resource.name | string | `"cpu"` | | +| rimeAgent.fileServer.hpa.metrics[0].resource.target.averageUtilization | int | `60` | | +| rimeAgent.fileServer.hpa.metrics[0].resource.target.type | string | `"Utilization"` | | +| rimeAgent.fileServer.hpa.metrics[0].type | string | `"Resource"` | | +| rimeAgent.fileServer.hpa.minReplicas | int | `1` | | +| rimeAgent.fileServer.name | string | `"file-server"` | | +| rimeAgent.fileServer.port | int | `5022` | | +| rimeAgent.fileServer.service.annotations | object | `{}` | | +| rimeAgent.fileServer.service.labels | object | `{}` | | +| rimeAgent.fileServer.service.type | string | `"ClusterIP"` | | +| rimeAgent.fileServer.serviceAccount | object | `{"annotations":{"eks.amazonaws.com/role-arn":""},"create":true,"labels":{},"name":""}` | Account used by services that need access to blob storage. | +| rimeAgent.fileServer.serviceAccount.annotations."eks.amazonaws.com/role-arn" | string | `""` | Specify ARN of IRSA-enabled Blob Storage IAM role here | +| rimeAgent.id | string | `nil` | unique ID for this Agent. Can be left blank if this is a internal agent. This id is provided by the RI Platform Control Plane when creating external agents. For internal agents, this field is not used. | +| rimeAgent.images | object | (see individual values in `values.yaml`) | Image specification for the Agent. | +| rimeAgent.isInternal | bool | `false` | Whether this agent is running within the same K8s cluster as the control plane. | +| rimeAgent.launcher | object | (see individual values in `values.yaml`) | `launcher` K8s-level configurations | +| rimeAgent.monitoring | object | (see individual values in `values.yaml`) | `monitoring` (Datadog) K8s-level configurations | +| rimeAgent.monitoring.datadogEnabled | bool | `true` | Whether to enable Datadog autodiscovery tags for all services on the RIME agent | +| rimeAgent.monitoring.enabled | bool | `true` | Whether to enable Prometheus metrics for all services on the RIME agent | +| rimeAgent.monitoring.port | int | `8080` | Port to expose Prometheus metrics on | +| rimeAgent.operator | object | (see individual values in `values.yaml`) | `operator` K8s-level configurations | +| rimeAgent.operator.crossPlaneRPCJob | object | (see individual values in `values.yaml`) | `cross-plane-job` K8s-level configurations | +| rimeAgent.operator.logArchival | object | (see individual values in `values.yaml`) | Configuration for RIME Job Log Archival (persistence of job logs for debugging). | +| rimeAgent.operator.modelTestJob | object | (see individual values in `values.yaml`) | `model-testing-job` K8s-level configurations | +| rimeAgent.registerAgent | object | (see individual values in `values.yaml`) | `registerAgent` K8s-level configurations | +| rimeAgent.rimeCrossPlaneServer | object | (see individual values in `values.yaml`) | `rime-cross-plane-server` K8s-level configurations | +| tls | object | (see individual values in `values.yaml`) | Mutual TLS configuration for internal agent. | +| tls.certificateSpec | object | `{"issuerRef":{"group":"cert-manager.io","kind":"Issuer","name":""},"subject":{"organizations":["RobustIntelligence"]}}` | `spec` for Certificate object (https://cert-manager.io/docs/usage/certificate/). | +| tls.certificateSpec.issuerRef | object | `{"group":"cert-manager.io","kind":"Issuer","name":""}` | See https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.CertificateSpec Attributes listed below are the minimum required `issuerRef` property. | +| tls.certificateSpec.issuerRef.name | string | `""` | Will default to `rime-{{ .Release.Namespace }}-ca-issuer`. | +| tls.certificateSpec.subject | object | `{"organizations":["RobustIntelligence"]}` | See https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.CertificateSpec Attributed listed below are the minimum required for the `subject` property. | +| tls.enableCertManager | bool | `false` | Whether to enable the cert-manager service for issuing and managing TLS certificates within the cluster | ---------------------------------------------- Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0) diff --git a/rime-agent/crds/rbst.io_crossplanerpcjobs.yaml b/rime-agent/crds/rbst.io_crossplanerpcjobs.yaml new file mode 100644 index 00000000..d29ec722 --- /dev/null +++ b/rime-agent/crds/rbst.io_crossplanerpcjobs.yaml @@ -0,0 +1,83 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.13.0 + name: crossplanerpcjobs.rbst.io +spec: + group: rbst.io + names: + kind: CrossPlaneRPCJob + listKind: CrossPlaneRPCJobList + plural: crossplanerpcjobs + singular: crossplanerpcjob + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.jobStatus + name: JobStatus + type: string + name: v1 + schema: + openAPIV3Schema: + description: "CrossPlaneRPCJob is the Schema for the crossplanerpcjobs API + \n Enable object interface implementation generation for this type. Enable + /status subresource for this type. Add a columns to \"kubectl get\" output + for the CRD." + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: CrossPlaneRPCJobSpec defines the desired state of CrossPlaneRPCJob. + properties: + cancellationRequested: + description: Flag to indicate that the CrossPlaneJob should be cancelled. + type: boolean + crossPlaneRPCSpec: + description: CrossPlaneRPCSpec defines the parameters needed for a + cross plane rpc. + properties: + crossPlaneTaskConfig: + description: CrossPlaneTaskConfig is the CrossPlaneRequest proto + for a CrossPlaneTask that has been marshalled. + format: byte + type: string + required: + - crossPlaneTaskConfig + type: object + crossServiceKeyRef: + description: Reference to the cross service key. Only required when + running on an Internal agent. + properties: + keyPath: + type: string + secretName: + type: string + type: object + required: + - crossPlaneRPCSpec + type: object + status: + description: CrossPlaneRPCJobStatus defines the observed state of a CrossPlaneRPCJob. + properties: + jobStatus: + description: JobStatus string that is a string mapping of the JobStatus + enum used by the StateDB. + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/rime-agent/crds/rimejob-crd.yaml b/rime-agent/crds/rbst.io_rimejobs.yaml similarity index 96% rename from rime-agent/crds/rimejob-crd.yaml rename to rime-agent/crds/rbst.io_rimejobs.yaml index 8265cfc7..5c404fc2 100644 --- a/rime-agent/crds/rimejob-crd.yaml +++ b/rime-agent/crds/rbst.io_rimejobs.yaml @@ -3,8 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.11.3 - creationTimestamp: null + controller-gen.kubebuilder.io/version: v0.13.0 name: rimejobs.rbst.io spec: group: rbst.io @@ -48,7 +47,7 @@ spec: description: Flag to indicate that the RimeJob should be cancelled. type: boolean kubernetesSpec: - description: Optioinal kubernetes resource settings. Values already + description: Optional kubernetes resource settings. Values already have defaults set by the underlying resource templates used as the base for generating the resource spec. properties: @@ -97,7 +96,9 @@ spec: fileScanSpec: description: Optional settings for file scan test configuration. properties: - workspaceID: + modelID: + type: string + projectID: type: string type: object rimeKeys: diff --git a/rime-agent/templates/_helpers.tpl b/rime-agent/templates/_helpers.tpl index 8f55cc0a..dbfe44aa 100644 --- a/rime-agent/templates/_helpers.tpl +++ b/rime-agent/templates/_helpers.tpl @@ -49,7 +49,10 @@ helm.sh/chart: {{ include "rime-agent.chart" . }} app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} {{- end }} app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- if .Values.rimeAgent.commonLabels}} +{{ toYaml .Values.rimeAgent.commonLabels }} {{- end }} +{{- end -}} {{/* Common annotations added to all resources. @@ -62,11 +65,26 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} {{- end }} app.kubernetes.io/part-of: {{ template "rime-agent.name" . }} app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/owned-by: "ri" {{- if .Values.rimeAgent.commonAnnotations}} {{ toYaml .Values.rimeAgent.commonAnnotations }} {{- end }} {{- end -}} +{{/* +Monitoring annotations to add to pods. +*/}} +{{- define "rime-agent.monitoringAnnotations" -}} +{{- if .monitoring.enabled}} +prometheus.io/scrape: "true" +prometheus.io/path: "/metrics" +prometheus.io/port: "{{ .monitoring.port }}" +{{- end }} +{{- if .monitoring.datadogEnabled }} +tags.datadoghq.com/service: {{ .name }} +{{- end }} +{{- end -}} + {{/* Common flags passed to all the Agent servers. Be careful when modifying these values! */}} @@ -77,7 +95,13 @@ common: caPath: "/var/tmp/tls/common/ca.crt" certPath: "/var/tmp/tls/common/tls.crt" keyPath: "/var/tmp/tls/common/tls.key" + grpcTLSEnabled: true {{- end }} + logging: + verbose: {{ .Values.rimeAgent.verbose }} + metrics: + enabled: {{ .Values.rimeAgent.monitoring.enabled }} + port: {{ .Values.rimeAgent.monitoring.port }} connections: addresses: {{- if .Values.rimeAgent.connections.agentManagerAddress}} @@ -87,10 +111,36 @@ common: uploadServerAddr: {{ .Values.rimeAgent.connections.uploadServerAddress }} {{- end }} dataplane: + agentID: {{ .Values.rimeAgent.id }} isInternal: {{ .Values.rimeAgent.isInternal }} platformAddress: {{ .Values.rimeAgent.connections.platformAddress }} + + connections: + addresses: + crossPlaneServerAddr: "{{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.rimeCrossPlaneServer.name }}:{{ .Values.rimeAgent.rimeCrossPlaneServer.port }}" + {{- if .Values.rimeAgent.fileServer.enabled }} + dataPlaneFileServerAddr: "{{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.fileServer.name }}:{{ .Values.rimeAgent.fileServer.port }}" + {{- end }} {{- end }} +{{/* +Name of the issuer to be used for cert-manager Certificates for RIME services. +*/}} +{{- define "tls.certificateIssuerName" -}} +{{- default (printf "rime-%s-ca-issuer" .Release.Namespace) .Values.tls.certificateSpec.issuerRef.name }} +{{- end }} + +{{/* +Return the service account name used by the register agent hook. +*/}} +{{- define "rime-agent.registerAgent.serviceAccountName" -}} +{{- if .Values.rimeAgent.registerAgent.serviceAccount.create -}} + {{ default (printf "%s-%s" (include "rime-agent.fullname" .) .Values.rimeAgent.registerAgent.name) .Values.rimeAgent.registerAgent.serviceAccount.name | trunc 63 | trimSuffix "-" }} +{{- else -}} + {{ default "default" .Values.rimeAgent.registerAgent.serviceAccount.name }} +{{- end -}} +{{- end -}} + {{/* Return the service account name used by the operator controller manager. */}} @@ -102,6 +152,17 @@ Return the service account name used by the operator controller manager. {{- end -}} {{- end -}} +{{/* +Return the service account name used by the rimeServer. +*/}} +{{- define "rime-agent.rimeServer.serviceAccountName" -}} +{{- if .Values.rimeAgent.rimeCrossPlaneServer.serviceAccount.create -}} + {{ default (printf "%s-%s" (include "rime-agent.fullname" .) .Values.rimeAgent.rimeCrossPlaneServer.name) .Values.rimeAgent.rimeCrossPlaneServer.serviceAccount.name | trunc 63 | trimSuffix "-" }} +{{- else -}} + {{ default "default" .Values.rimeAgent.rimeCrossPlaneServer.serviceAccount.name }} +{{- end -}} +{{- end -}} + {{/* Return the service account name used by the model testing jobs, which has access to read S3 buckets. @@ -118,7 +179,7 @@ access to read S3 buckets. Return the name used for secrets containing credentials used by agent services. */}} {{- define "rime-agent.secretName" -}} -{{ include "rime-agent.fullname" . }}-secret +{{- default (printf "%s-secret" (include "rime-agent.fullname" .)) .Values.rimeAgent.existingSecretName }} {{- end -}} {{/* @@ -154,3 +215,32 @@ Return the service account name used by the operator controller manager. {{ default "default" .Values.rimeAgent.operator.serviceAccount.name }} {{- end -}} {{- end -}} + +{{/* +Return the service account name used by the file server. +*/}} +{{- define "rime-agent.fileServer.serviceAccountName" -}} +{{- if .Values.rimeAgent.fileServer.serviceAccount.create -}} + {{ default (printf "%s-%s" (include "rime-agent.fullname" .) .Values.rimeAgent.fileServer.name) .Values.rimeAgent.operator.serviceAccount.name | trunc 63 | trimSuffix "-" }} +{{- else -}} + {{ default "default" .Values.rimeAgent.fileServer.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* +Return the name of the secret containing generated secrets for the Internal Agent. +*/}} +{{- define "rime-agent.generatedSecretsName" -}} +rime-generated-secrets +{{- end }} + +{{/* +Return the appropriate apiVersion for Horizontal Pod Autoscaler. +*/}} +{{- define "rime-agent.hpa.apiVersion" -}} +{{- if $.Capabilities.APIVersions.Has "autoscaling/v2/HorizontalPodAutoscaler" }} +{{- print "autoscaling/v2" }} +{{- else }} +{{- print "autoscaling/v2beta2" }} +{{- end }} +{{- end }} diff --git a/rime-agent/templates/file-server/certificate.yaml b/rime-agent/templates/file-server/certificate.yaml new file mode 100644 index 00000000..2651346d --- /dev/null +++ b/rime-agent/templates/file-server/certificate.yaml @@ -0,0 +1,29 @@ +{{- if and .Values.tls.crossplaneEnabled .Values.rimeAgent.fileServer.enabled }} +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.fileServer.name }}-certificate +spec: + secretName: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.fileServer.name }}-tls + duration: 4320h # 180d + renewBefore: 744h # 31d +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} + isCA: false + privateKey: + algorithm: RSA + encoding: PKCS1 + size: 2048 + rotationPolicy: Always + usages: + - server auth + - client auth + dnsNames: + - {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.fileServer.name }} + - localhost + issuerRef: + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} +{{- end }} diff --git a/rime-agent/templates/file-server/configmap.yaml b/rime-agent/templates/file-server/configmap.yaml new file mode 100644 index 00000000..a80da52f --- /dev/null +++ b/rime-agent/templates/file-server/configmap.yaml @@ -0,0 +1,10 @@ +{{- if .Values.rimeAgent.fileServer.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.fileServer.name }}-conf +data: + server.config: | +{{ include "rime-agent.serverArgs" . | indent 4 }} + fileStorage: {} +{{- end }} diff --git a/rime-agent/templates/file-server/deployment.yaml b/rime-agent/templates/file-server/deployment.yaml new file mode 100644 index 00000000..c7d72216 --- /dev/null +++ b/rime-agent/templates/file-server/deployment.yaml @@ -0,0 +1,125 @@ +{{- if .Values.rimeAgent.fileServer.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.fileServer.name }} + labels: + app: {{ .Values.rimeAgent.fileServer.name }} + {{- with .Values.rimeAgent.fileServer.deployment.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "rime-agent.annotations" . | nindent 4 }} + {{- with .Values.rimeAgent.fileServer.deployment.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + app: {{ .Values.rimeAgent.fileServer.name }} + {{- if not .Values.rimeAgent.fileServer.hpa.enabled }} + replicas: {{ .Values.rimeAgent.fileServer.deployment.replicaCount }} + {{- end }} + template: + metadata: + labels: + app: {{ .Values.rimeAgent.fileServer.name }} + {{- with .Values.rimeAgent.fileServer.deployment.labels }} + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + {{- include "rime-agent.annotations" . | nindent 8 }} + {{- with .Values.rimeAgent.fileServer.deployment.annotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + checksum/config: {{ include (print $.Template.BasePath "/file-server/configmap.yaml") . | sha256sum }} + spec: + {{- include "rime-agent.imagePullSecretsYaml" . | nindent 6 }} + {{- with .Values.rimeAgent.fileServer.deployment.securityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "rime-agent.fileServer.serviceAccountName" . }} + containers: + - name: {{ .Values.rimeAgent.fileServer.name }} + image: "{{ .Values.rimeAgent.images.agentImage.registry}}/{{ .Values.rimeAgent.images.agentImage.name }}" + imagePullPolicy: {{ .Values.rimeAgent.images.agentImage.pullPolicy }} + ports: + - name: fs + containerPort: {{ .Values.rimeAgent.fileServer.port }} + protocol: TCP + - name: fs-rest + containerPort: {{ .Values.rimeAgent.fileServer.restPort }} + protocol: TCP + - name: fs-debug + containerPort: 6060 + protocol: TCP + resources: + {{- toYaml .Values.rimeAgent.fileServer.deployment.resources | nindent 12 }} + env: + - name: RIME_API_KEY + valueFrom: + secretKeyRef: + name: {{ include "rime-agent.secretName" .}} + key: api-key + optional: true + - name: RIME_CROSS_SERVICE_KEY + valueFrom: + secretKeyRef: + name: {{ include "rime-agent.generatedSecretsName" .}} + key: crossServiceKey + optional: true + {{- with.Values.rimeAgent.fileServer.deployment.extraEnv }} + {{- toYaml . | nindent 12 }} + {{- end }} + - name: RI_SERVICE_NAME + value: {{ .Values.rimeAgent.fileServer.name }} + command: + - "/rime/fileServer" + args: + - "--server-config-path=/config/server.config" + - "--file-server-port={{ .Values.rimeAgent.fileServer.port }}" + - "--file-server-rest-port={{ .Values.rimeAgent.fileServer.restPort }}" + - "--blob-store-bucket-name={{ .Values.rimeAgent.fileServer.config.storageBucketName }}" + - "--blob-store-bucket-endpoint={{ .Values.rimeAgent.fileServer.config.endpoint }}" + - "--blob-store-service-type={{ .Values.rimeAgent.fileServer.config.type }}" + volumeMounts: + {{- if .Values.tls.crossplaneEnabled }} + - name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.fileServer.name }}-tls + mountPath: /var/tmp/tls/common + readOnly: true + {{- end }} + - name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.fileServer.name }}-config + mountPath: "/config" + readOnly: true + {{- with .Values.rimeAgent.fileServer.deployment.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + {{- if .Values.tls.crossplaneEnabled }} + - name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.fileServer.name }}-tls + secret: + secretName: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.fileServer.name }}-tls + {{- end }} + {{- with .Values.rimeAgent.fileServer.deployment.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + - name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.fileServer.name }}-config + configMap: + name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.fileServer.name }}-conf + items: + - key: "server.config" + path: "server.config" + {{- with .Values.rimeAgent.fileServer.deployment.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.rimeAgent.fileServer.deployment.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.rimeAgent.fileServer.deployment.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/rime-agent/templates/file-server/hpa.yaml b/rime-agent/templates/file-server/hpa.yaml new file mode 100644 index 00000000..990d9d46 --- /dev/null +++ b/rime-agent/templates/file-server/hpa.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.rimeAgent.fileServer.hpa.enabled .Values.rimeAgent.fileServer.enabled }} +apiVersion: {{ include "rime-agent.hpa.apiVersion" . }} +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.fileServer.name }} + labels: + app: {{ .Values.rimeAgent.fileServer.name }} + {{- include "rime-agent.labels" . | nindent 4 }} + {{- with .Values.rimeAgent.fileServer.hpa.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.fileServer.name }} + minReplicas: {{ .Values.rimeAgent.fileServer.hpa.minReplicas }} + maxReplicas: {{ .Values.rimeAgent.fileServer.hpa.maxReplicas }} + metrics: +{{ toYaml .Values.rimeAgent.fileServer.hpa.metrics | indent 4 }} +{{- end }} diff --git a/rime-agent/templates/file-server/service-account.yaml b/rime-agent/templates/file-server/service-account.yaml new file mode 100644 index 00000000..896e52c9 --- /dev/null +++ b/rime-agent/templates/file-server/service-account.yaml @@ -0,0 +1,15 @@ +{{- if .Values.rimeAgent.fileServer.enabled }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "rime-agent.fileServer.serviceAccountName" . }} + labels: + {{- with .Values.rimeAgent.fileServer.serviceAccount.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "rime-agent.annotations" . | nindent 4 }} + {{- with .Values.rimeAgent.fileServer.serviceAccount.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/rime-agent/templates/file-server/service.yaml b/rime-agent/templates/file-server/service.yaml new file mode 100644 index 00000000..fc34ebb4 --- /dev/null +++ b/rime-agent/templates/file-server/service.yaml @@ -0,0 +1,28 @@ +{{- if .Values.rimeAgent.fileServer.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.fileServer.name }} + labels: + {{- with .Values.rimeAgent.fileServer.service.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "rime-agent.annotations" . | nindent 4 }} + {{- with .Values.rimeAgent.fileServer.service.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.rimeAgent.fileServer.service.type }} + ports: + - port: {{ .Values.rimeAgent.fileServer.port }} + targetPort: {{ .Values.rimeAgent.fileServer.port }} + protocol: TCP + name: fs + - port: {{ .Values.rimeAgent.fileServer.restPort }} + targetPort: {{ .Values.rimeAgent.fileServer.restPort }} + protocol: TCP + name: fs-rest + selector: + app: {{ .Values.rimeAgent.fileServer.name }} +{{- end }} diff --git a/rime-agent/templates/generated-secrets.yaml b/rime-agent/templates/generated-secrets.yaml new file mode 100644 index 00000000..0516517a --- /dev/null +++ b/rime-agent/templates/generated-secrets.yaml @@ -0,0 +1,15 @@ +{{- if not .Values.rimeAgent.isInternal }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "rime-agent.generatedSecretsName" . }} + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-weight": "0" +type: Opaque +data: + # for auth-related secrets, always read from pre-existing secret to avoid interrupting in-flight workloads + {{- $secret := (lookup "v1" "Secret" .Release.Namespace (include "rime-agent.generatedSecretsName" .) ) | default dict }} + {{- $secretData := (get $secret "data") | default dict }} + crossServiceKey: {{ (get $secretData "crossServiceKey") | default (randAlphaNum 32 | b64enc) }} +{{- end }} diff --git a/rime-agent/templates/init-jobs/register-agent-configmap.yaml b/rime-agent/templates/init-jobs/register-agent-configmap.yaml new file mode 100644 index 00000000..098091d6 --- /dev/null +++ b/rime-agent/templates/init-jobs/register-agent-configmap.yaml @@ -0,0 +1,21 @@ +{{- if not .Values.rimeAgent.isInternal }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.registerAgent.name }}-conf + labels: + {{- include "rime-agent.labels" . | nindent 4 }} + {{- with .Values.rimeAgent.registerAgent.serviceAccount.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "rime-agent.annotations" . | nindent 4 }} + {{- with .Values.rimeAgent.registerAgent.serviceAccount.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} + helm.sh/hook: pre-install,pre-upgrade + "helm.sh/hook-weight": "1" +data: + server.config: | +{{ include "rime-agent.serverArgs" . | indent 4 }} +{{- end }} diff --git a/rime-agent/templates/init-jobs/register-agent-job.yaml b/rime-agent/templates/init-jobs/register-agent-job.yaml new file mode 100644 index 00000000..9d32b7f3 --- /dev/null +++ b/rime-agent/templates/init-jobs/register-agent-job.yaml @@ -0,0 +1,88 @@ +{{- if not .Values.rimeAgent.isInternal }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.registerAgent.name }} + labels: + {{- include "rime-agent.labels" . | nindent 4 }} + {{- with .Values.rimeAgent.registerAgent.deployment.labels }} + {{- toYaml . | indent 4 }} + {{- end }} + annotations: + "helm.sh/hook": pre-install,pre-upgrade + # Hooks are sorted in ascending order. + "helm.sh/hook-weight": "5" + "helm.sh/hook-delete-policy": before-hook-creation + {{- include "rime-agent.annotations" . | nindent 4 }} + {{- with .Values.rimeAgent.registerAgent.deployment.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + template: + metadata: + labels: + {{- include "rime-agent.labels" . | nindent 8 }} + {{- with .Values.rimeAgent.registerAgent.deployment.labels }} + {{- toYaml . | indent 8 }} + {{- end }} + annotations: + {{- include "rime-agent.annotations" . | nindent 8 }} + {{- with .Values.rimeAgent.registerAgent.deployment.annotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- include "rime-agent.imagePullSecretsYaml" . | nindent 6 }} + {{- with .Values.rimeAgent.registerAgent.deployment.securityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "rime-agent.registerAgent.serviceAccountName" . }} + containers: + - name: {{ .Values.rimeAgent.registerAgent.name }} + image: "{{ .Values.rimeAgent.images.agentImage.registry }}/{{ .Values.rimeAgent.images.agentImage.name }}" + imagePullPolicy: {{ .Values.rimeAgent.images.agentImage.pullPolicy }} + resources: + {{- toYaml .Values.rimeAgent.registerAgent.deployment.resources | nindent 12 }} + env: + - name: RIME_API_KEY + valueFrom: + secretKeyRef: + name: {{ include "rime-agent.secretName" .}} + key: api-key + optional: true + - name: RIME_CROSS_SERVICE_KEY + valueFrom: + secretKeyRef: + name: {{ include "rime-agent.generatedSecretsName" .}} + key: crossServiceKey + optional: true + {{- with.Values.rimeAgent.registerAgent.deployment.extraEnv }} + {{- toYaml . | nindent 12 }} + {{- end }} + command: + - "/rime/registerAgent" + args: + {{- if .Values.rimeAgent.id }} + - "--id={{ .Values.rimeAgent.id }}" + {{- end }} + - "--server-config-path=/config/server.config" + volumeMounts: + - name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.registerAgent.name }}-config + mountPath: "/config" + readOnly: true + {{- with .Values.rimeAgent.registerAgent.deployment.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + restartPolicy: OnFailure + volumes: + {{- with .Values.rimeAgent.registerAgent.deployment.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + - name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.registerAgent.name }}-config + configMap: + name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.registerAgent.name }}-conf + items: + - key: "server.config" + path: "server.config" + backoffLimit: {{ .Values.rimeAgent.registerAgent.backoffLimit }} +{{- end }} diff --git a/rime-agent/templates/init-jobs/register-agent-role-binding.yaml b/rime-agent/templates/init-jobs/register-agent-role-binding.yaml new file mode 100644 index 00000000..8486432c --- /dev/null +++ b/rime-agent/templates/init-jobs/register-agent-role-binding.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.rimeAgent.registerAgent.serviceAccount.create (not .Values.rimeAgent.isInternal) }} +# Binds Launcher service account to a role providing write access to k8s jobs +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "rime-agent.registerAgent.serviceAccountName" . }} + labels: + {{- include "rime-agent.labels" . | nindent 4 }} + annotations: + {{- include "rime-agent.annotations" . | nindent 4 }} + helm.sh/hook: pre-install,pre-upgrade + "helm.sh/hook-weight": "4" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "rime-agent.registerAgent.serviceAccountName" . }} +subjects: + - kind: ServiceAccount + name: {{ include "rime-agent.registerAgent.serviceAccountName" . }} +{{- end }} diff --git a/rime-agent/templates/init-jobs/register-agent-role.yaml b/rime-agent/templates/init-jobs/register-agent-role.yaml new file mode 100644 index 00000000..4aa64c5e --- /dev/null +++ b/rime-agent/templates/init-jobs/register-agent-role.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.rimeAgent.registerAgent.serviceAccount.create (not .Values.rimeAgent.isInternal) }} +# A Role granting access to write operations on k8s jobs. Used by Launcher. +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "rime-agent.registerAgent.serviceAccountName" . }} + labels: + {{- include "rime-agent.labels" . | nindent 4 }} + annotations: + {{- include "rime-agent.annotations" . | nindent 4 }} + helm.sh/hook: pre-install,pre-upgrade + "helm.sh/hook-weight": "3" +rules: + - apiGroups: [""] + resources: [secrets] + verbs: ["*"] +{{- end }} diff --git a/rime-agent/templates/init-jobs/register-agent-service-account.yaml b/rime-agent/templates/init-jobs/register-agent-service-account.yaml new file mode 100644 index 00000000..6c00e6a4 --- /dev/null +++ b/rime-agent/templates/init-jobs/register-agent-service-account.yaml @@ -0,0 +1,19 @@ +{{- if and .Values.rimeAgent.registerAgent.serviceAccount.create (not .Values.rimeAgent.isInternal) }} +# Service account for the register agent init job. +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "rime-agent.registerAgent.serviceAccountName" . }} + labels: + {{- include "rime-agent.labels" . | nindent 4 }} + {{- with .Values.rimeAgent.registerAgent.serviceAccount.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "rime-agent.annotations" . | nindent 4 }} + {{- with .Values.rimeAgent.registerAgent.serviceAccount.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} + helm.sh/hook: pre-install,pre-upgrade + "helm.sh/hook-weight": "2" +{{- end }} diff --git a/rime-agent/templates/launcher/certificate.yaml b/rime-agent/templates/launcher/certificate.yaml index b503f877..7f1ca14d 100644 --- a/rime-agent/templates/launcher/certificate.yaml +++ b/rime-agent/templates/launcher/certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.launcher.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.launcher.name }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime-agent/templates/launcher/deployment.yaml b/rime-agent/templates/launcher/deployment.yaml index fd2dd53a..e9627e56 100644 --- a/rime-agent/templates/launcher/deployment.yaml +++ b/rime-agent/templates/launcher/deployment.yaml @@ -32,6 +32,7 @@ spec: {{- with .Values.rimeAgent.launcher.deployment.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime-agent.monitoringAnnotations" (dict "monitoring" .Values.rimeAgent.monitoring "name" .Values.rimeAgent.launcher.name ) | nindent 8 }} checksum/config: {{ include (print $.Template.BasePath "/launcher/configmap.yaml") . | sha256sum }} spec: {{- include "rime-agent.imagePullSecretsYaml" . | nindent 6 }} @@ -53,21 +54,21 @@ spec: name: {{ include "rime-agent.secretName" .}} key: api-key optional: true + - name: RIME_CROSS_SERVICE_KEY + valueFrom: + secretKeyRef: + name: {{ include "rime-agent.generatedSecretsName" .}} + key: crossServiceKey + optional: true {{- with.Values.rimeAgent.launcher.deployment.extraEnv }} {{- toYaml . | nindent 12 }} {{- end }} + - name: RI_SERVICE_NAME + value: {{ .Values.rimeAgent.launcher.name }} command: - "/rime/launcher" args: - {{- if .Values.tls.crossplaneEnabled }} - - "--auth-mode=tls" - {{- end }} - {{- if .Values.rimeAgent.id }} - - "--id={{ .Values.rimeAgent.id }}" - {{- end }} - {{- if .Values.rimeAgent.verbose }} - - "--verbose" - {{- end }} + - "--backend-id-int={{ .Values.rimeAgent.launcher.backendID }}" - "--server-config-path=/config/server.config" volumeMounts: {{- if .Values.tls.crossplaneEnabled }} diff --git a/rime-agent/templates/launcher/role.yaml b/rime-agent/templates/launcher/role.yaml index 5e11aed3..811548e6 100644 --- a/rime-agent/templates/launcher/role.yaml +++ b/rime-agent/templates/launcher/role.yaml @@ -6,6 +6,6 @@ metadata: name: {{ include "rime-agent.launcher.serviceAccountName" . }} rules: - apiGroups: ["rbst.io"] - resources: [rimejobs] + resources: [rimejobs, crossplanerpcjobs] verbs: [get, create, update, patch, delete, list] {{- end }} diff --git a/rime-agent/templates/operator/certificate.yaml b/rime-agent/templates/operator/certificate.yaml index 20363fd1..5bd9b305 100644 --- a/rime-agent/templates/operator/certificate.yaml +++ b/rime-agent/templates/operator/certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.operator.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.operator.name }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime-agent/templates/operator/configmap.yaml b/rime-agent/templates/operator/configmap.yaml index 1d508f45..d08a4f1e 100644 --- a/rime-agent/templates/operator/configmap.yaml +++ b/rime-agent/templates/operator/configmap.yaml @@ -12,7 +12,7 @@ data: metadata: name: job-conf-placeholder immutable: true - job.config: | + rime_job.config: | apiVersion: batch/v1 kind: Job metadata: @@ -32,6 +32,7 @@ data: {{- with .Values.rimeAgent.operator.modelTestJob.annotations }} {{- toYaml . | nindent 12 }} {{- end }} + {{- include "rime-agent.monitoringAnnotations" (dict "monitoring" .Values.rimeAgent.monitoring "name" .Values.rimeAgent.operator.modelTestJob.name ) | nindent 12 }} spec: {{- include "rime-agent.imagePullSecretsYaml" . | nindent 10 }} {{- with .Values.rimeAgent.operator.modelTestJob.securityContext }} @@ -55,7 +56,19 @@ data: valueFrom: secretKeyRef: name: {{ include "rime-agent.secretName" . }} - key: api_key + key: api-key + - name: RIME_CROSS_SERVICE_KEY + valueFrom: + secretKeyRef: + name: {{ include "rime-agent.generatedSecretsName" . }} + key: crossServiceKey + optional: true + {{- if .Values.rimeAgent.id }} + - name: AGENT_ID + value: {{ .Values.rimeAgent.id }} + {{- end }} + - name: BACKEND_ID + value: "{{ .Values.rimeAgent.operator.modelTestJob.backendID }}" - name: RIME_DISABLE_TLS value: {{ if .Values.rimeAgent.connections.platformAddress }} "false" {{ else }} "true" {{ end }} {{- with .Values.rimeAgent.operator.modelTestJob.extraEnv }} @@ -96,3 +109,91 @@ data: {{- toYaml . | nindent 12 }} {{- end }} backoffLimit: {{ .Values.rimeAgent.operator.modelTestJob.backoffLimit }} + + crossplane_job.config: | + apiVersion: batch/v1 + kind: Job + metadata: + name: job-placeholder + spec: + activeDeadlineSeconds: {{ .Values.rimeAgent.operator.crossPlaneRPCJob.activeDeadlineSeconds }} + ttlSecondsAfterFinished: {{ .Values.rimeAgent.operator.crossPlaneRPCJob.ttlSecondsAfterFinished }} + template: + metadata: + labels: + {{- include "rime-agent.labels" . | nindent 12 }} + {{- with .Values.rimeAgent.operator.crossPlaneRPCJob.labels }} + {{- toYaml . | nindent 12 }} + {{- end }} + annotations: + {{- include "rime-agent.annotations" . | nindent 12 }} + {{- with .Values.rimeAgent.operator.crossPlaneRPCJob.annotations }} + {{- toYaml . | nindent 12 }} + {{- end }} + spec: + {{- include "rime-agent.imagePullSecretsYaml" . | nindent 10 }} + {{- with .Values.rimeAgent.operator.crossPlaneRPCJob.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + containers: + - name: {{ .Values.rimeAgent.operator.crossPlaneRPCJob.name }} + image: "{{ .Values.rimeAgent.images.agentImage.registry }}/{{ .Values.rimeAgent.images.agentImage.name }}" + env: + - name: RIME_API_KEY + valueFrom: + secretKeyRef: + name: {{ include "rime-agent.secretName" . }} + key: api-key + optional: true + - name: RIME_CROSS_SERVICE_KEY + valueFrom: + secretKeyRef: + name: {{ include "rime-agent.generatedSecretsName" . }} + key: crossServiceKey + optional: true + - name: BACKEND_ID + value: {{ .Values.rimeAgent.rimeCrossPlaneServer.backendID | quote }} + {{- with .Values.rimeAgent.operator.crossPlaneRPCJob.extraEnv }} + {{- toYaml . | nindent 14 }} + {{- end }} + imagePullPolicy: {{ .Values.rimeAgent.images.agentImage.pullPolicy }} + volumeMounts: + {{- if .Values.tls.crossplaneEnabled }} + - name: {{ .Values.rimeAgent.operator.crossPlaneRPCJob.name }}-{{ .Release.Namespace }}-tls + mountPath: /var/tmp/tls/common + readOnly: true + {{- end }} + - name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.operator.crossPlaneRPCJob.name }}-common-config + mountPath: "/config" + readOnly: true + {{- with .Values.rimeAgent.operator.crossPlaneRPCJob.extraVolumeMounts }} + {{- toYaml . | nindent 14 }} + {{- end }} + resources: + {{- toYaml .Values.rimeAgent.operator.crossPlaneRPCJob.resources | nindent 14 }} + restartPolicy: Never + {{- with .Values.rimeAgent.operator.crossPlaneRPCJob.tolerations }} + tolerations: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.rimeAgent.operator.crossPlaneRPCJob.affinity }} + affinity: + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.operator.crossPlaneRPCJob.name }}-common-config + configMap: + name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.operator.crossPlaneRPCJob.name }}-common-conf + items: + - key: "server.config" + path: "server.config" + {{- if .Values.tls.crossplaneEnabled }} + - name: {{ .Values.rimeAgent.operator.crossPlaneRPCJob.name }}-{{ .Release.Namespace }}-tls + secret: + secretName: {{ .Values.rimeAgent.operator.crossPlaneRPCJob.name }}-{{ .Release.Namespace }}-tls + {{- end }} + {{- with .Values.rimeAgent.operator.crossPlaneRPCJob.extraVolumes }} + {{- toYaml . | nindent 12 }} + {{- end }} + backoffLimit: {{ .Values.rimeAgent.operator.crossPlaneRPCJob.backoffLimit }} diff --git a/rime-agent/templates/operator/cross-plane-job-certificate.yaml b/rime-agent/templates/operator/cross-plane-job-certificate.yaml new file mode 100644 index 00000000..023653dd --- /dev/null +++ b/rime-agent/templates/operator/cross-plane-job-certificate.yaml @@ -0,0 +1,29 @@ +{{- if .Values.tls.crossplaneEnabled }} +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ .Values.rimeAgent.operator.crossPlaneRPCJob.name }}-{{ .Release.Namespace }}-certificate +spec: + secretName: {{ .Values.rimeAgent.operator.crossPlaneRPCJob.name }}-{{ .Release.Namespace }}-tls + duration: 4320h # 180d + renewBefore: 744h # 31d +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} + isCA: false + privateKey: + algorithm: RSA + encoding: PKCS1 + size: 2048 + rotationPolicy: Always + usages: + - server auth + - client auth + dnsNames: + - cross-plane-job + - localhost + issuerRef: + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} +{{- end }} diff --git a/rime-agent/templates/operator/crossplanerpcjob-common-configmap.yaml b/rime-agent/templates/operator/crossplanerpcjob-common-configmap.yaml new file mode 100644 index 00000000..d69f813c --- /dev/null +++ b/rime-agent/templates/operator/crossplanerpcjob-common-configmap.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.operator.crossPlaneRPCJob.name }}-common-conf +data: + server.config: | +{{ include "rime-agent.serverArgs" . | indent 4 }} diff --git a/rime-agent/templates/operator/deployment.yaml b/rime-agent/templates/operator/deployment.yaml index 27198d4b..bbb8351d 100644 --- a/rime-agent/templates/operator/deployment.yaml +++ b/rime-agent/templates/operator/deployment.yaml @@ -33,6 +33,7 @@ spec: {{- with .Values.rimeAgent.operator.deployment.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime-agent.monitoringAnnotations" (dict "monitoring" .Values.rimeAgent.monitoring "name" .Values.rimeAgent.operator.name ) | nindent 8 }} checksum/config: {{ include (print $.Template.BasePath "/operator/configmap.yaml") . | sha256sum }} spec: {{- include "rime-agent.imagePullSecretsYaml" . | nindent 6 }} @@ -53,6 +54,14 @@ spec: name: {{ include "rime-agent.secretName" .}} key: api-key optional: true + - name: RIME_CROSS_SERVICE_KEY + valueFrom: + secretKeyRef: + name: {{ include "rime-agent.generatedSecretsName" . }} + key: crossServiceKey + optional: true + - name: RI_SERVICE_NAME + value: {{ .Values.rimeAgent.operator.name }} image: "{{ .Values.rimeAgent.images.agentImage.registry }}/{{ .Values.rimeAgent.images.agentImage.name }}" imagePullPolicy: {{ .Values.rimeAgent.images.agentImage.pullPolicy }} command: @@ -60,12 +69,10 @@ spec: args: - "--leader-elect" - "--base-configmap-template-file=/config/job_configmap.config" - - "--base-job-template-file=/config/job.config" + - "--base-rime-job-template-file=/config/rime_job.config" - "--app-name={{ include "rime-agent.fullname" . }}" - {{- if .Values.tls.crossplaneEnabled }} - - "--auth-mode=tls" - {{- end }} - - "--server-config-path=/server-config/server.config" + - "--server-config-path=/config/server.config" + - "--base-crossplane-job-template-file=/config/crossplane_job.config" {{- if .Values.rimeAgent.operator.logArchival.enabled }} - "--blob-store-bucket-name={{ .Values.rimeAgent.operator.logArchival.storageBucketName }}" - "--blob-store-bucket-endpoint={{ .Values.rimeAgent.operator.logArchival.endpoint }}" @@ -94,11 +101,8 @@ spec: mountPath: /var/tmp/tls/common readOnly: true {{- end }} - - name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.operator.name }}-config - mountPath: "/server-config" - readOnly: true {{- with .Values.rimeAgent.operator.deployment.extraVolumeMounts }} - {{- toYaml . | nindent 12 }} + {{- toYaml . | nindent 8 }} {{- end }} terminationGracePeriodSeconds: 10 volumes: @@ -118,8 +122,10 @@ spec: path: "server.config" - key: "job_configmap.config" path: "job_configmap.config" - - key: "job.config" - path: "job.config" + - key: "rime_job.config" + path: "rime_job.config" + - key: "crossplane_job.config" + path: "crossplane_job.config" {{- with .Values.rimeAgent.operator.deployment.tolerations }} tolerations: {{- toYaml . | nindent 8 }} diff --git a/rime-agent/templates/operator/model-testing-job-certificate.yaml b/rime-agent/templates/operator/model-testing-job-certificate.yaml index 146beac1..7c00961a 100644 --- a/rime-agent/templates/operator/model-testing-job-certificate.yaml +++ b/rime-agent/templates/operator/model-testing-job-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ .Values.rimeAgent.operator.modelTestJob.name }}-{{ .Release.Namespace }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - model-testing-job - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime-agent/templates/operator/role.yaml b/rime-agent/templates/operator/role.yaml index 3af55df1..fc6cad7a 100644 --- a/rime-agent/templates/operator/role.yaml +++ b/rime-agent/templates/operator/role.yaml @@ -3,7 +3,6 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: - creationTimestamp: null name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.operator.name }}-role namespace: '{{.Release.Namespace}}' rules: @@ -31,6 +30,12 @@ rules: - patch - update - watch +- apiGroups: + - "" + resources: + - events + verbs: + - list - apiGroups: - "" resources: @@ -46,6 +51,32 @@ rules: verbs: - get - list +- apiGroups: + - rbst.io + resources: + - crossplanerpcjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - rbst.io + resources: + - crossplanerpcjobs/finalizers + verbs: + - update +- apiGroups: + - rbst.io + resources: + - crossplanerpcjobs/status + verbs: + - get + - patch + - update - apiGroups: - rbst.io resources: diff --git a/rime-agent/templates/rime-cross-plane-server/certificate.yaml b/rime-agent/templates/rime-cross-plane-server/certificate.yaml new file mode 100644 index 00000000..b1689218 --- /dev/null +++ b/rime-agent/templates/rime-cross-plane-server/certificate.yaml @@ -0,0 +1,29 @@ +{{- if .Values.tls.crossplaneEnabled }} +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.rimeCrossPlaneServer.name }}-certificate +spec: + secretName: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.rimeCrossPlaneServer.name }}-tls + duration: 4320h # 180d + renewBefore: 744h # 31d +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} + isCA: false + privateKey: + algorithm: RSA + encoding: PKCS1 + size: 2048 + rotationPolicy: Always + usages: + - server auth + - client auth + dnsNames: + - {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.rimeCrossPlaneServer.name }} + - localhost + issuerRef: + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} +{{- end }} diff --git a/rime-agent/templates/rime-cross-plane-server/deployment.yaml b/rime-agent/templates/rime-cross-plane-server/deployment.yaml new file mode 100644 index 00000000..089e7a9b --- /dev/null +++ b/rime-agent/templates/rime-cross-plane-server/deployment.yaml @@ -0,0 +1,115 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.rimeCrossPlaneServer.name }} + labels: + app: {{ .Values.rimeAgent.rimeCrossPlaneServer.name }} + {{- include "rime-agent.labels" . | nindent 4 }} + {{- with .Values.rimeAgent.rimeCrossPlaneServer.deployment.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "rime-agent.annotations" . | nindent 4 }} + {{- with .Values.rimeAgent.rimeCrossPlaneServer.deployment.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if not .Values.rimeAgent.rimeCrossPlaneServer.hpa.enabled }} + replicas: {{ .Values.rimeAgent.rimeCrossPlaneServer.deployment.replicaCount }} + {{- end }} + selector: + matchLabels: + app: {{ .Values.rimeAgent.rimeCrossPlaneServer.name }} + {{- include "rime-agent.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + app: {{ .Values.rimeAgent.rimeCrossPlaneServer.name }} + {{- include "rime-agent.labels" . | nindent 8 }} + {{- with .Values.rimeAgent.rimeCrossPlaneServer.deployment.labels }} + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + {{- include "rime-agent.annotations" . | nindent 8 }} + {{- with .Values.rimeAgent.rimeCrossPlaneServer.deployment.annotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- include "rime-agent.monitoringAnnotations" (dict "monitoring" .Values.rimeAgent.monitoring "name" .Values.rimeAgent.rimeCrossPlaneServer.name ) | nindent 8 }} + spec: + {{- include "rime-agent.imagePullSecretsYaml" . | nindent 6 }} + {{- with .Values.rimeAgent.rimeCrossPlaneServer.deployment.securityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "rime-agent.rimeServer.serviceAccountName" . }} + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.rimeAgent.images.modelTestJobImage.registry }}/{{ .Values.rimeAgent.images.modelTestJobImage.name }}" + imagePullPolicy: {{ .Values.rimeAgent.images.modelTestJobImage.pullPolicy }} + resources: + {{- toYaml .Values.rimeAgent.rimeCrossPlaneServer.deployment.resources | nindent 12 }} + env: + - name: RIME_API_KEY + valueFrom: + secretKeyRef: + name: {{ include "rime-agent.secretName" .}} + key: api-key + optional: true + - name: RIME_CROSS_SERVICE_KEY + valueFrom: + secretKeyRef: + name: {{ include "rime-agent.generatedSecretsName" . }} + key: crossServiceKey + optional: true + {{- if .Values.rimeAgent.id }} + - name: AGENT_ID + value: {{ .Values.rimeAgent.id }} + {{- end }} + - name: BACKEND_ID + value: {{ .Values.rimeAgent.operator.crossPlaneRPCJob.backendID | quote }} + {{- with.Values.rimeAgent.rimeCrossPlaneServer.deployment.extraEnv }} + {{- toYaml . | nindent 12 }} + {{- end }} + - name: RI_SERVICE_NAME + value: {{ .Values.rimeAgent.rimeCrossPlaneServer.name }} + ports: + - name: rime + containerPort: {{ .Values.rimeAgent.rimeCrossPlaneServer.port }} + protocol: TCP + command: + - "rime-cross-plane-server" + args: + - "--port={{ .Values.rimeAgent.rimeCrossPlaneServer.port }}" + - "--api-key=$(RIME_API_KEY)" + - "--upload-endpoint={{ .Values.rimeAgent.connections.platformAddress | default .Values.rimeAgent.connections.uploadServerRestAddress }}" + - "--data-collector-endpoint={{ .Values.rimeAgent.connections.platformAddress | default .Values.rimeAgent.connections.dataCollectorRestAddress }}" + volumeMounts: + {{- if .Values.tls.crossplaneEnabled }} + - name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.rimeCrossPlaneServer.name }}-tls + mountPath: /var/tmp/tls/common + readOnly: true + {{- end }} + {{- with .Values.rimeAgent.rimeCrossPlaneServer.deployment.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + {{- if .Values.tls.crossplaneEnabled }} + - name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.rimeCrossPlaneServer.name }}-tls + secret: + secretName: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.rimeCrossPlaneServer.name }}-tls + {{- end }} + {{- with .Values.rimeAgent.rimeCrossPlaneServer.deployment.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.rimeAgent.rimeCrossPlaneServer.deployment.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.rimeAgent.rimeCrossPlaneServer.deployment.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.rimeAgent.rimeCrossPlaneServer.deployment.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/rime-agent/templates/rime-cross-plane-server/hpa.yaml b/rime-agent/templates/rime-cross-plane-server/hpa.yaml new file mode 100644 index 00000000..b0fbfb10 --- /dev/null +++ b/rime-agent/templates/rime-cross-plane-server/hpa.yaml @@ -0,0 +1,21 @@ +{{- if .Values.rimeAgent.rimeCrossPlaneServer.hpa.enabled }} +apiVersion: {{ include "rime-agent.hpa.apiVersion" . }} +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.rimeCrossPlaneServer.name }} + labels: + app: {{ .Values.rimeAgent.rimeCrossPlaneServer.name }} + {{- include "rime-agent.labels" . | nindent 4 }} + {{- with .Values.rimeAgent.rimeCrossPlaneServer.hpa.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.rimeCrossPlaneServer.name }} + minReplicas: {{ .Values.rimeAgent.rimeCrossPlaneServer.hpa.minReplicas }} + maxReplicas: {{ .Values.rimeAgent.rimeCrossPlaneServer.hpa.maxReplicas }} + metrics: +{{ toYaml .Values.rimeAgent.rimeCrossPlaneServer.hpa.metrics | indent 4 }} +{{- end }} diff --git a/rime-agent/templates/rime-cross-plane-server/service-account.yaml b/rime-agent/templates/rime-cross-plane-server/service-account.yaml new file mode 100644 index 00000000..bd400327 --- /dev/null +++ b/rime-agent/templates/rime-cross-plane-server/service-account.yaml @@ -0,0 +1,16 @@ +{{- if .Values.rimeAgent.rimeCrossPlaneServer.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "rime-agent.rimeServer.serviceAccountName" . }} + labels: + {{- include "rime-agent.labels" . | nindent 4 }} + {{- with .Values.rimeAgent.rimeCrossPlaneServer.serviceAccount.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "rime-agent.annotations" . | nindent 4 }} + {{- with .Values.rimeAgent.rimeCrossPlaneServer.serviceAccount.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/rime-agent/templates/rime-cross-plane-server/service.yaml b/rime-agent/templates/rime-cross-plane-server/service.yaml new file mode 100644 index 00000000..0a5b2561 --- /dev/null +++ b/rime-agent/templates/rime-cross-plane-server/service.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "rime-agent.fullname" . }}-{{ .Values.rimeAgent.rimeCrossPlaneServer.name }} + labels: + {{- include "rime-agent.labels" . | nindent 4 }} + {{- with .Values.rimeAgent.rimeCrossPlaneServer.service.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "rime-agent.annotations" . | nindent 4 }} + {{- with .Values.rimeAgent.rimeCrossPlaneServer.service.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.rimeAgent.rimeCrossPlaneServer.service.type }} + ports: + - port: {{ .Values.rimeAgent.rimeCrossPlaneServer.port }} + targetPort: {{ .Values.rimeAgent.rimeCrossPlaneServer.port }} + protocol: TCP + name: rime + selector: + app: {{ .Values.rimeAgent.rimeCrossPlaneServer.name }} + {{- include "rime-agent.selectorLabels" . | nindent 4 }} diff --git a/rime-agent/templates/secret-docker.yaml b/rime-agent/templates/secret-docker.yaml index 515c9080..a24f9961 100644 --- a/rime-agent/templates/secret-docker.yaml +++ b/rime-agent/templates/secret-docker.yaml @@ -3,6 +3,9 @@ apiVersion: v1 kind: Secret metadata: name: {{ include "rime-agent.dockerSecretName" . }} + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-weight": "-2" type: kubernetes.io/dockerconfigjson data: .dockerconfigjson: {{ .Values.rimeAgent.dockerCredentialsPayload | quote}} diff --git a/rime-agent/templates/secret.yaml b/rime-agent/templates/secret.yaml index 3d6a381a..bd1f29a3 100644 --- a/rime-agent/templates/secret.yaml +++ b/rime-agent/templates/secret.yaml @@ -1,7 +1,12 @@ +{{- if (not .Values.rimeAgent.existingSecretName ) }} apiVersion: v1 kind: Secret metadata: name: {{ include "rime-agent.secretName" . }} + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-weight": "-1" type: Opaque data: - api_key: {{ default "" .Values.rimeAgent.apiKey | b64enc | quote}} + api-key: {{ default "" .Values.rimeAgent.apiKey | b64enc | quote}} +{{- end }} diff --git a/rime-agent/values.yaml b/rime-agent/values.yaml index 22e25bfe..622fb2ef 100644 --- a/rime-agent/values.yaml +++ b/rime-agent/values.yaml @@ -1,83 +1,212 @@ rimeAgent: + # @ignored verbose: true - nameOverride: - fullNameOverride: + # @ignored -- Override for the chart name. If used, this will be appended to the + # release name to form the fully qualified app name + # (e.g., `rime-${nameOverride}` instead of just `rime`) + nameOverride: "" + # @ignored -- Override for the fully qualified app name used across services. + # If specified, `nameOverride` will be ignored. + fullnameOverride: "" + # @ignored -- Common annotations added to all K8s resources commonAnnotations: {} + # @ignored -- Common labels added to all K8s resources commonLabels: {} + # -- Whether this agent is running within the same K8s cluster as the control plane. isInternal: false + # -- unique ID for this Agent. Can be left blank if this is a internal agent. + # This id is provided by the RI Platform Control Plane when creating external agents. + # For internal agents, this field is not used. + id: + + # -- The API key the agent will use to communicate with the RI Platform. + # Required for external agents. + apiKey: + # -- Name of an existing K8s secret containing the API key. + # If existingSecretName is set, the secret will not be created. Must have api-key set. + existingSecretName: "" + + # -- Pre-configured json encoded string of K8s docker config secret + # Providing `rimeAgent.dockerCredentialsPayload` will override any provided inputs in rimeAgent.dockerCredentials + dockerCredentialsPayload: # '{"auths":{"https://index.docker.io/v2/":{"auth":"cm9...hZA="}}}' + + # -- Image specification for the Agent. + # @default -- (see individual values in `values.yaml`) images: - # rimeAgent.images.imagePullSecrets -- use existing image pull secrets in your k8s cluster, overriding rimeAgent.dockerCredentials - ## Note that the credentials should provide access to both the Agent image and model testing images. + # Specify this value to use existing image pull secrets in your K8s cluster (overriding rimeAgent.dockerCredentials). + # Note that the credentials should provide access to both the Agent image and Model Testing images. imagePullSecrets: [] + # Image specification for the agent's operational services. agentImage: - # rimeAgent.images.agentImage.registry -- the registry of the agent image. + # The registry of the agent image. registry: "docker.io" - # rimeAgent.images.agentImage.name -- the name and tag of the rime agent image. + # The name and tag of the rime agent image. name: "robustintelligencehq/rime-agent:latest" - # rimeAgent.images.agentImage.pullPolicy -- see https://kubernetes.io/docs/concepts/containers/images/#image-pull-policy - pullPolicy: Always + # See https://kubernetes.io/docs/concepts/containers/images/#image-pull-policy + pullPolicy: "Always" + # Image specification for the ephemeral model testing jobs. modelTestJobImage: - # rimeAgent.images.modelTestJobImage.registry -- the registry of the default model test job image. + # The registry of the default model test job image. registry: "docker.io" - # rimeAgent.images.modelTestJobImage.name -- image name for running model test jobs. + # Image name for running model test jobs. name: "robustintelligencehq/rime-testing-engine-dev:latest" - # rimeAgent.images.modelTestJobImage.pullPolicy -- image pull policy for model test jobs. + # Image pull policy for model test jobs. pullPolicy: "Always" - # rimeAgent.id -- unique ID for this Agent. Can be left blank if this is a internal agent. + # -- Service addresses for the agent. + # @default -- (see individual values in `values.yaml`) + connections: + # The address of the RI Platform this agent should communicate with. + # Required for external agents. + platformAddress: # your-company.rbst.io + # Internal address of the Control Plane's grpc upload server. + # (This value will be ignored for external agents that use rimeAgent.platformAddress to connect to the control plane) + uploadServerAddress: rime-upload-server:5000 + # Internal address of the Control Plane's grpc agent management service. + # (This value will be ignored for external agents that use rimeAgent.platformAddress to connect to the control plane) + agentManagerAddress: rime-agent-manager-server:15000 + # Internal address of the Control Plane's upload server. + # (This value will be ignored for external agents that use rimeAgent.platformAddress to connect to the control plane) + uploadServerRestAddress: rime-upload-server:15001 + # Internal address of the Control Plane's firewall server. + # (This value will be ignored for external agents that use rimeAgent.platformAddress to connect to the control plane) + firewallServerRestAddress: rime-firewall-server:15002 + # Internal address of the Control Plane's data collector server. + # (This value will be ignored for external agents that use rimeAgent.platformAddress to connect to the control plane) + dataCollectorRestAddress: rime-data-collector-server:15015 + # Internal address of the Control Plane's dataset manager server. + # (This value will be ignored for external agents that use rimeAgent.platformAddress to connect to the control plane) + datasetManagerRestAddress: rime-dataset-manager-server:15009 - ## This id is provided by the RI Platform Control Plane when creating external agents. - ## For internal agents, this field is not used, and the internal agent will register with the - ## Control Plane during startup and get the ID to use at that point. - id: + # -- `monitoring` (Datadog) K8s-level configurations + # @default -- (see individual values in `values.yaml`) + monitoring: + # -- Whether to enable Prometheus metrics for all services on the RIME agent + enabled: true + # -- Port to expose Prometheus metrics on + port: 8080 + # -- Whether to enable Datadog autodiscovery tags for all services on the RIME agent + datadogEnabled: true - # rimeAgent.apiKey -- the API key the agent will use to communicate with the RI Platform. + # -- `registerAgent` K8s-level configurations + # @default -- (see individual values in `values.yaml`) + registerAgent: + name: "register-agent" + serviceAccount: + create: true + # If not set and create is true, a name is generated using the fullname template + name: + annotations: {} + labels: {} + backoffLimit: 2 + deployment: + annotations: {} + labels: {} + resources: + limits: + memory: "100Mi" + requests: + memory: "100Mi" + cpu: "100m" - ## If not set, the agent must be installed in the same cluster as the RI Platform. - apiKey: + # SecurityContext to add to the deployment. Default is just set to not run as root. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#securitycontext-v1-core + securityContext: {} - # rimeAgent.dockerCredentialsPayload -- pre-configured json encoded string of k8s docker config secret + # Extra env variables to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#envvar-v1-core + extraEnv: [] - ## providing rimeAgent.dockerCredentialsPayload will override any provided inputs in rimeAgent.dockerCredentials - dockerCredentialsPayload: # '{"auths":{"https://index.docker.io/v2/":{"auth":"cm9...hZA="}}}' + # Extra volumes to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumedevice-v1-core + extraVolumes: [] - connections: - # rimeAgent.platformAddress -- the address of the RI Platform this agent should communicate with. + # Extra volume mounts to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumemount-v1-core + extraVolumeMounts: [] - ## If not set, the agent must be installed in the same cluster as the RI Platform and you will need to specify all the internal addresses below. - platformAddress: # your-company.rbst.io - # rimeAgent.uploadServerAddress -- internal address of the Control Plane's grpc upload server. + # -- `launcher` K8s-level configurations + # @default -- (see individual values in `values.yaml`) + launcher: + name: "launcher" + backendID: 14 + serviceAccount: + create: true + # If not set and create is true, a name is generated using the fullname template + name: + annotations: {} + labels: {} + deployment: + annotations: {} + labels: {} + resources: + limits: + memory: "500Mi" + cpu: "500m" + requests: + memory: "100Mi" + cpu: "100m" - ## This value will be ignored for external agents that use rimeAgent.platformAddress to connect to the control plane - uploadServerAddress: rime-upload-server:5000 - # rimeAgent.agentManagerAddress -- internal address of the Control Plane's grpc agent management service. + # SecurityContext to add to the deployment. Default is just set to not run as root. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#securitycontext-v1-core + securityContext: {} - ## This value will be ignored for external agents that use rimeAgent.platformAddress to connect to the control plane - agentManagerAddress: rime-agent-manager-server:15000 - # rimeAgent.uploadServerRestAddress -- internal address of the Control Plane's upload server. + # Extra env variables to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#envvar-v1-core + extraEnv: [] - ## This value will be ignored for external agents that use rimeAgent.platformAddress to connect to the control plane - uploadServerRestAddress: rime-upload-server:15001 - # rimeAgent.firewallServerAddress -- internal address of the Control Plane's firewall server. + # Extra volumes to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumedevice-v1-core + extraVolumes: [] - ## This value will be ignored for external agents that use rimeAgent.platformAddress to connect to the control plane - firewallServerRestAddress: rime-firewall-server:15002 - # rimeAgent.dataCollectorRestAddress -- internal address of the Control Plane's data collector server. + # Extra volume mounts to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumemount-v1-core + extraVolumeMounts: [] - ## This value will be ignored for external agents that use rimeAgent.platformAddress to connect to the control plane - dataCollectorRestAddress: rime-data-collector-server:15015 - # rimeAgent.datasetManagerRestAddress -- internal address of the Control Plane's data collector server. + ## Node labels for pod assignment + ## ref: https://kubernetes.io/docs/user-guide/node-selection/ + nodeSelector: {} - ## This value will be ignored for external agents that use rimeAgent.platformAddress to connect to the control plane - datasetManagerRestAddress: rime-dataset-manager-server:15009 + ## Tolerations for pod assignment + ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ + tolerations: [] - launcher: - name: "launcher" + ## Affinity for pod assignment (evaluated as template) + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity + affinity: {} + + # -- `rime-cross-plane-server` K8s-level configurations + # @default -- (see individual values in `values.yaml`) + rimeCrossPlaneServer: + name: "rime-cross-plane-server" + backendID: 13 + port: 50051 + service: + type: ClusterIP + annotations: {} + labels: {} + hpa: + enabled: true + annotations: {} + labels: {} + minReplicas: 1 + maxReplicas: 10 + metrics: + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: 60 serviceAccount: - # Specifies whether a ServiceAccount should be created create: true - # The name of the ServiceAccount to use. # If not set and create is true, a name is generated using the fullname template name: annotations: {} @@ -85,51 +214,54 @@ rimeAgent: deployment: annotations: {} labels: {} + # Replica count for the cross-plane-server. Only used if HPA is disabled. + replicaCount: 3 resources: + limits: + memory: "10000Mi" requests: - memory: "100Mi" + memory: "10000Mi" cpu: "100m" - limits: - memory: "500Mi" - cpu: "500m" # SecurityContext to add to the deployment. Default is just set to not run as root. # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#securitycontext-v1-core securityContext: {} - ## Extra env variables to add to the deployment. Make sure these don't - ## conflict with the ones already defined in the deployment. - ## ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#envvar-v1-core - # + # Extra env variables to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#envvar-v1-core extraEnv: [] - ## Extra volumes to add to the deployment. Make sure these don't - ## conflict with the ones already defined in the deployment. - ## ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumedevice-v1-coreim gonn - # + + # Extra volumes to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumedevice-v1-core extraVolumes: [] - ## Extra volume mounts to add to the deployment. Make sure these don't - ## conflict with the ones already defined in the deployment. - ## ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumemount-v1-core - # + + # Extra volume mounts to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumemount-v1-core extraVolumeMounts: [] + ## Node labels for pod assignment ## ref: https://kubernetes.io/docs/user-guide/node-selection/ - # nodeSelector: {} + ## Tolerations for pod assignment ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ - ## tolerations: [] + ## Affinity for pod assignment (evaluated as template) ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity - ## affinity: {} + # -- `operator` K8s-level configurations + # @default -- (see individual values in `values.yaml`) operator: name: "operator" serviceAccount: create: true - name: "" + # If not set and create is true, a name is generated using the fullname template + name: annotations: {} labels: {} deployment: @@ -138,70 +270,54 @@ rimeAgent: resources: limits: cpu: 500m - memory: 128Mi + memory: 1000Mi requests: cpu: 500m - memory: 128Mi + memory: 300Mi # SecurityContext to add to the deployment. Default is just set to not run as root. # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#securitycontext-v1-core securityContext: {} - ## Extra env variables to add to the deployment. Make sure these don't - ## conflict with the ones already defined in the deployment. - ## ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#envvar-v1-core - # + # Extra env variables to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#envvar-v1-core extraEnv: [] - ## Extra volumes to add to the deployment. Make sure these don't - ## conflict with the ones already defined in the deployment. - ## ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumedevice-v1-coreim gonn - # + + # Extra volumes to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumedevice-v1-core extraVolumes: [] - ## Extra volume mounts to add to the deployment. Make sure these don't - ## conflict with the ones already defined in the deployment. - ## ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumemount-v1-core - # + + # Extra volume mounts to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumemount-v1-core extraVolumeMounts: [] + ## Node labels for pod assignment ## ref: https://kubernetes.io/docs/user-guide/node-selection/ - # nodeSelector: {} + ## Tolerations for pod assignment ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ - ## tolerations: [] + ## Affinity for pod assignment (evaluated as template) ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity - ## affinity: {} - ## Configurations related to the model testing jobs launched by this agent. + + # -- `model-testing-job` K8s-level configurations + # @default -- (see individual values in `values.yaml`) modelTestJob: name: "model-testing-job" + backendID: 20 annotations: {} labels: {} - # rimeAgent.operator.modelTestJob.image -- default image specification for the image used by model test jobs. - # rimeAgent.operator.modelTestJob.backoffLimit -- backoff limit for mode test jobs. backoffLimit: 0 - # rimeAgent.operator.modelTestJob.activeDeadlineSeconds -- active deadline of job in seconds. - # Default to 72 hours. + # Active deadline of job in seconds (default to 72 hours). activeDeadlineSeconds: 259200 - # rimeAgent.operator.modelTestJob.ttlSecondsAfterFinished -- TTL for jobs after finished in seconds. - # Default to 48 hours. + # TTL for jobs after finished in seconds (default to 48 hours). ttlSecondsAfterFinished: 172800 - ## Configure rbac ServiceAccount for the jobs launched by this agent. For example, granting S3 read permissions. - serviceAccount: - # rimeAgent.operator.modelTestJob.serviceAccount.create -- Specifies whether a ServiceAccount should be created. - create: true - # rimeAgent.operator.modelTestJob.serviceAccount.name -- Specify a preexisting ServiceAccount to use if create is false. - name: - # rimeAgent.operator.modelTestJob.serviceAccount.annotations -- if create is true, annotations to add to the service account. - ## Since data is stored in a cloud storage (e.g. S3, GCS), add an annotation to allow read access here. - ## EKS IAM setup for S3: https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html - ## GKE IAM setup for GCS: https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity - annotations: {} - # eks.amazonaws.com/role-arn: - # iam.gke.io/gcp-service-account: - # rimeAgent.operator.modelTestJob.resources -- resource request and limits for model test jobs. resources: limits: cpu: 3000m @@ -210,6 +326,26 @@ rimeAgent: cpu: 3000m memory: 8000Mi + # Configure rbac ServiceAccount for the jobs launched by this agent. For example, granting S3 read permissions. + serviceAccount: + # Specifies whether a ServiceAccount should be created. + create: true + # Specify a preexisting ServiceAccount to use if create is false. + name: + # If create is true, annotations to add to the service account. + # Since data is stored in a cloud storage (e.g. S3, GCS), add an annotation to allow read access here. + # Azure IAM setup: https://azure.github.io/azure-workload-identity/docs/ + annotations: {} + # EKS IAM setup for S3: https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html + # eks.amazonaws.com/role-arn: + + # GKE IAM setup for GCS: https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity + # iam.gke.io/gcp-service-account: + + # Azure IAM setup: https://azure.github.io/azure-workload-identity/docs/ + # azure.workload.identity/client-id: + # azure.workload.identity/tenant-id: + # SecurityContext to add to the deployment. Default is just set to not run as root. # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#securitycontext-v1-core securityContext: {} @@ -229,16 +365,178 @@ rimeAgent: # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumemount-v1-core extraVolumeMounts: [] - # rimeAgent.operator.modelTestJob.nodeSelector -- node selector for model test jobs. + ## Node labels for pod assignment + ## ref: https://kubernetes.io/docs/user-guide/node-selection/ nodeSelector: {} - # rimeAgent.operator.modelTestJob.tolerations -- tolerations for model test jobs. + + ## Tolerations for pod assignment + ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ tolerations: [] - # rimeAgent.operator.modelTestJob.affinity -- affinity for model test jobs. + + ## Affinity for pod assignment (evaluated as template) + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity affinity: {} + # -- `cross-plane-job` K8s-level configurations + # @default -- (see individual values in `values.yaml`) + crossPlaneRPCJob: + name: "cross-plane-job" + backendID: 21 + annotations: {} + labels: {} + backoffLimit: 0 + # Active deadline of job in seconds (default to 5 minutes). + activeDeadlineSeconds: 300 + # TTL for jobs after finished in seconds (default to 5 minutes). + ttlSecondsAfterFinished: 300 + resources: + limits: + memory: 300Mi + requests: + cpu: 100m + memory: 60Mi + # Configure rbac ServiceAccount for the jobs launched by this agent. For example, granting S3 read permissions. + + # SecurityContext to add to the deployment. Default is just set to not run as root. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#securitycontext-v1-core + securityContext: {} + + # Extra env variables to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#envvar-v1-core + extraEnv: [] + + # Extra volumes to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumedevice-v1-core + extraVolumes: [] + + # Extra volume mounts to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumemount-v1-core + extraVolumeMounts: [] + + ## Node labels for pod assignment + ## ref: https://kubernetes.io/docs/user-guide/node-selection/ + nodeSelector: {} + + ## Tolerations for pod assignment + ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ + tolerations: [] + + ## Affinity for pod assignment (evaluated as template) + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity + affinity: {} + + # -- Configuration for RIME Job Log Archival (persistence of job logs for debugging). + # @default -- (see individual values in `values.yaml`) logArchival: enabled: false + # The S3 destination for log archives. + storageBucketName: + endpoint: "s3.amazonaws.com" + type: "s3" + serviceAccount: + # Account used by services that need access to blob storage. + # This account requires privileges to read and write to S3 bucket used for blob storage. + annotations: { + eks.amazonaws.com/role-arn: + } + fileServer: + enabled: false + name: "file-server" + port: 5022 + config: + # -- The bucket name of the S3 bucket used as the blob storage. + storageBucketName: "" + endpoint: "s3.amazonaws.com" + type: "s3" + # -- Account used by services that need access to blob storage. + serviceAccount: + create: true + name: "" + annotations: { + # -- Specify ARN of IRSA-enabled Blob Storage IAM role here + eks.amazonaws.com/role-arn: "" + } + labels: {} + service: + type: ClusterIP + annotations: {} + labels: {} + hpa: + annotations: {} + labels: {} + enabled: true + minReplicas: 1 + maxReplicas: 10 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 60 + deployment: + annotations: {} + labels: {} + replicaCount: 1 + resources: + limits: + memory: 90Mi + requests: + cpu: 100m + memory: 90Mi + + # SecurityContext to add to the deployment. Default is just set to not run as root. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#securitycontext-v1-core + securityContext: {} + + # Extra env variables to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#envvar-v1-core + extraEnv: [] + + # Extra volumes to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumedevice-v1-core + extraVolumes: [] + + # Extra volume mounts to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumemount-v1-core + extraVolumeMounts: [] + + ## Node labels for pod assignment + ## ref: https://kubernetes.io/docs/user-guide/node-selection/ + nodeSelector: {} + + ## Tolerations for pod assignment + ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ + tolerations: [] + + ## Affinity for pod assignment (evaluated as template) + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity + affinity: {} + +# -- Mutual TLS configuration for internal agent. +# @default -- (see individual values in `values.yaml`) tls: crossplaneEnabled: false + # -- Whether to enable the cert-manager service for issuing and managing TLS certificates within the cluster enableCertManager: false + # -- `spec` for Certificate object (https://cert-manager.io/docs/usage/certificate/). + certificateSpec: + # -- See https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.CertificateSpec + # Attributed listed below are the minimum required for the `subject` property. + subject: + organizations: + - RobustIntelligence + # -- See https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.CertificateSpec + # Attributes listed below are the minimum required `issuerRef` property. + issuerRef: + # -- Will default to `rime-{{ .Release.Namespace }}-ca-issuer`. + name: "" + kind: Issuer + group: cert-manager.io diff --git a/rime-extras/Chart.yaml b/rime-extras/Chart.yaml index 32dcbf85..ef50a5f9 100644 --- a/rime-extras/Chart.yaml +++ b/rime-extras/Chart.yaml @@ -15,10 +15,30 @@ appVersion: "v9" dependencies: - name: "datadog" - version: "2.20.3" + version: "3.32.8" repository: "https://helm.datadoghq.com" condition: rimeExtras.datadog - name: "velero" version: "2.23.6" repository: "https://vmware-tanzu.github.io/helm-charts" condition: rimeExtras.velero + - name: "prometheus-node-exporter" + version: "4.22.0" + repository: "https://prometheus-community.github.io/helm-charts" + condition: rimeExtras.prometheusNodeExporter + - name: "prometheus" + version: "25.6.0" + repository: "https://prometheus-community.github.io/helm-charts" + condition: rimeExtras.prometheusServer + - name: "prometheus-cloudwatch-exporter" + version: "0.25.2" + repository: "https://prometheus-community.github.io/helm-charts" + condition: rimeExtras.prometheusCloudwatchExporter + - name: "humio-helm-charts" + version: "0.9.5" + repository: "https://humio.github.io/humio-helm-charts" + condition: rimeExtras.humioFluentBit + - name: "fluent-bit" + version: "0.42.0" + repository: "https://fluent.github.io/helm-charts" + condition: rimeExtras.riObservabilityFluentBit diff --git a/rime-extras/README.md b/rime-extras/README.md index e61eb732..cd44f732 100644 --- a/rime-extras/README.md +++ b/rime-extras/README.md @@ -8,7 +8,12 @@ A Helm chart for RIME's extra resources | Repository | Name | Version | |------------|------|---------| -| https://helm.datadoghq.com | datadog | 2.20.3 | +| https://fluent.github.io/helm-charts | fluent-bit | 0.42.0 | +| https://helm.datadoghq.com | datadog | 3.32.8 | +| https://humio.github.io/humio-helm-charts | humio-helm-charts | 0.9.5 | +| https://prometheus-community.github.io/helm-charts | prometheus | 25.6.0 | +| https://prometheus-community.github.io/helm-charts | prometheus-cloudwatch-exporter | 0.25.2 | +| https://prometheus-community.github.io/helm-charts | prometheus-node-exporter | 4.22.0 | | https://vmware-tanzu.github.io/helm-charts | velero | 2.23.6 | ## Values @@ -19,7 +24,168 @@ A Helm chart for RIME's extra resources | datadog.datadog.apiKey | string | `""` | API key for DataDog services. Will be provided by your RI Solutions Architect. | | datadog.datadog.env[0] | object | `{"name":"DD_LOGS_CONFIG_PROCESSING_RULES","value":"[{\n \"type\": \"mask_sequences\",\n \"name\": \"mask_ip\",\n \"replace_placeholder\": \"[masked_ip]\",\n \"pattern\" : \"(?:[0-9]{1,3}\\\\.){3}[0-9]{1,3}\"\n },\n {\n \"type\": \"mask_sequences\",\n \"name\": \"mask_email\",\n \"replace_placeholder\": \"[masked_email]\",\n \"pattern\" : \"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,4}\"\n }]"}` | Log masking to prevent transmission of sensitive info NOTE: regex in the log rules require an extra escape for any escape character used, e.g. \\\b for \b in normal regex | | datadog.datadog.tags | list | `["user:${datadog_user_tag}","rime-version:${datadog_rime_version_tag}"]` | List of static tags to attach to every metric, event and service check collected by this Agent. Learn more about tagging: https://docs.datadoghq.com/tagging/ | +| fluent-bit.config.filters | string | `"[FILTER]\n Name kubernetes\n Match kube.*\n Merge_Log On\n Keep_Log Off\n K8S-Logging.Parser On\n K8S-Logging.Exclude On\n Annotations Off\n Labels Off\n Buffer_Size 10MB\n\n[FILTER]\n Name modify\n Match *\n Condition Key_does_not_exist attr.error.code\n Rename attr.error attr.error_str\n\n[FILTER]\n Name modify\n Match *\n Condition Key_exists message\n Rename message msg\n\n[FILTER]\n Name nest\n Match kube.*\n Operation lift\n Nested_under kubernetes\n\n[FILTER]\n Name modify\n Match kube.*\n Remove attr\n Rename log msg\n"` | | +| fluent-bit.config.inputs | string | `"[INPUT]\n Name tail\n Path /var/log/containers/*.log\n multiline.parser docker, cri\n Tag kube.*\n Mem_Buf_Limit 10MB\n Skip_Long_Lines On\n"` | | +| fluent-bit.config.outputs | string | `"[OUTPUT]\n Name opensearch\n Match kube.*\n Host search-ri-opensearch-vqycu6e5fafj4zojubom4zzl4y.us-west-2.es.amazonaws.com\n Port 443\n Index ri_logs.%Y.%m.%d\n AWS_Auth On\n AWS_Region us-west-2\n AWS_Role_ARN arn:aws:iam::746181457053:role/fluentbit_role\n Suppress_Type_Name On\n TLS On\n Trace_Error On\n"` | | +| fluent-bit.config.service | string | `"[SERVICE]\n Daemon Off\n Flush {{ .Values.flush }}\n Log_Level {{ .Values.logLevel }}\n Parsers_File /fluent-bit/etc/parsers.conf\n Parsers_File /fluent-bit/etc/conf/custom_parsers.conf\n HTTP_Server On\n HTTP_Listen 0.0.0.0\n HTTP_Port {{ .Values.metricsPort }}\n Health_Check On\n"` | | +| fluent-bit.image.pullPolicy | string | `"IfNotPresent"` | | +| fluent-bit.image.repository | string | `"docker.io/robustintelligencehq/fluent-bit"` | | +| fluent-bit.image.tag | string | `"2.2.2"` | | +| fluent-bit.imagePullSecrets[0].name | string | `"rimecreds"` | | +| fluent-bit.nameOverride | string | `"ri-observability-fluent-bit"` | | +| fluent-bit.tolerations[0].effect | string | `"NoSchedule"` | | +| fluent-bit.tolerations[0].operator | string | `"Exists"` | | +| humio-helm-charts | object | (see individual values in `values`.yaml) | For full reference, see https://github.com/humio/humio-helm-charts/tree/release-0.9.5/charts/humio-fluentbit | +| observabilityProxyServer.containerPort | int | `8000` | | +| observabilityProxyServer.image.name | string | `"robustintelligencehq/observability-proxy-server:v0.1"` | | +| observabilityProxyServer.image.pullPolicy | string | `"IfNotPresent"` | | +| observabilityProxyServer.image.registry | string | `"docker.io"` | | +| observabilityProxyServer.port | int | `8000` | | +| observabilityProxyServer.remoteWriteSecretName | string | `"remote-write-api-key"` | | +| observabilityProxyServer.remoteWriteURL | string | `"https://4dj9f20xee.execute-api.us-west-2.amazonaws.com/production/remote_write"` | | +| prometheus-cloudwatch-exporter.config | string | `"region: \"us-west-2\"\nperiod_seconds: 60\ndelay_seconds: 900\nmetrics:\n\n- aws_metric_name: CPUUtilization\n aws_namespace: AWS/EC2\n aws_statistics:\n - Average\n aws_dimensions:\n - InstanceId\n\n# The number of unhealthy hosts\n- aws_metric_name: UnHealthyHostCount\n aws_namespace: AWS/ELB\n aws_statistics:\n - Minimum\n aws_dimensions:\n - LoadBalancerName\n - AvailabilityZone\n\n# The total number of bytes processed by the load balancer, including TCP/IP headers.\n# This count includes traffic to and from targets, minus health check traffic.\n- aws_metric_name: ProcessedBytes\n aws_namespace: AWS/NetworkELB\n aws_statistics:\n - Sum\n aws_dimensions:\n - LoadBalancer\n - AvailabilityZone\n\n# The total number of concurrent flows (or connections) from clients to targets.\n- aws_metric_name: ActiveFlowCount\n aws_namespace: AWS/NetworkELB\n aws_statistics:\n - Average\n aws_dimensions:\n - LoadBalancer\n - AvailabilityZone\n\n# The number of new ICMP messages rejected by the inbound rules of the load balancer security groups.\n- aws_metric_name: SecurityGroupBlockedFlowCount_Inbound_ICMP\n aws_namespace: AWS/NetworkELB\n aws_statistics:\n - Sum\n aws_dimensions:\n - LoadBalancer\n - AvailabilityZone\n\n# The number of new TCP messages rejected by the inbound rules of the load balancer security groups.\n- aws_metric_name: SecurityGroupBlockedFlowCount_Inbound_TCP\n aws_namespace: AWS/NetworkELB\n aws_statistics:\n - Sum\n aws_dimensions:\n - LoadBalancer\n - AvailabilityZone\n\n# The number of new UDP messages rejected by the inbound rules of the load balancer security groups.\n- aws_metric_name: SecurityGroupBlockedFlowCount_Inbound_UDP\n aws_namespace: AWS/NetworkELB\n aws_statistics:\n - Sum\n aws_dimensions:\n - LoadBalancer\n - AvailabilityZone"` | | +| prometheus-cloudwatch-exporter.image.pullPolicy | string | `"IfNotPresent"` | | +| prometheus-cloudwatch-exporter.image.repository | string | `"docker.io/robustintelligencehq/cloudwatch-exporter"` | | +| prometheus-cloudwatch-exporter.image.tag | string | `"v0.15.5"` | | +| prometheus-cloudwatch-exporter.pod.annotations."app.kubernetes.io/owned-by" | string | `"ri"` | | +| prometheus-cloudwatch-exporter.pod.annotations."prometheus.io/path" | string | `"/metrics"` | | +| prometheus-cloudwatch-exporter.pod.annotations."prometheus.io/port" | string | `"9106"` | | +| prometheus-cloudwatch-exporter.pod.annotations."prometheus.io/scrape" | string | `"true"` | | +| prometheus-node-exporter | object | (see individual values in `values`.yaml) | For full reference, see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter | +| prometheus.alertmanager.enabled | bool | `false` | | +| prometheus.configmapReload.prometheus.enabled | bool | `true` | | +| prometheus.configmapReload.prometheus.image.pullPolicy | string | `"IfNotPresent"` | | +| prometheus.configmapReload.prometheus.image.repository | string | `"docker.io/robustintelligencehq/prometheus-config-reloader"` | | +| prometheus.configmapReload.prometheus.image.tag | string | `"v0.70.0"` | | +| prometheus.kube-state-metrics.enabled | bool | `true` | | +| prometheus.kube-state-metrics.image.registry | string | `"docker.io"` | | +| prometheus.kube-state-metrics.image.repository | string | `"robustintelligencehq/kube-state-metrics"` | | +| prometheus.kube-state-metrics.image.tag | string | `"v2.10.1"` | | +| prometheus.kube-state-metrics.podAnnotations."app.kubernetes.io/owned-by" | string | `"ri"` | | +| prometheus.kube-state-metrics.podAnnotations."prometheus.io/path" | string | `"/metrics"` | | +| prometheus.kube-state-metrics.podAnnotations."prometheus.io/port" | string | `"8080"` | | +| prometheus.kube-state-metrics.podAnnotations."prometheus.io/scrape" | string | `"true"` | | +| prometheus.prometheus-node-exporter.enabled | bool | `false` | | +| prometheus.prometheus-pushgateway.enabled | bool | `false` | | +| prometheus.server.defaultFlagsOverride[0] | string | `"--web.enable-lifecycle"` | | +| prometheus.server.defaultFlagsOverride[1] | string | `"--config.file=/etc/config/prometheus.yml"` | | +| prometheus.server.global.scrape_interval | string | `"30s"` | | +| prometheus.server.image.pullPolicy | string | `"IfNotPresent"` | | +| prometheus.server.image.repository | string | `"docker.io/robustintelligencehq/prometheus"` | | +| prometheus.server.image.tag | string | `"v2.48.0"` | | +| prometheus.server.persistentVolume.enabled | bool | `false` | | +| prometheus.server.remoteWrite[0].url | string | `"http://observability-proxy-server:8000/remote_write"` | | +| prometheus.serverFiles."alerting_rules.yml" | object | `{}` | | +| prometheus.serverFiles."prometheus.yml".rule_files[0] | string | `"/etc/config/recording_rules.yml"` | | +| prometheus.serverFiles."prometheus.yml".rule_files[1] | string | `"/etc/config/alerting_rules.yml"` | | +| prometheus.serverFiles."prometheus.yml".rule_files[2] | string | `"/etc/config/rules"` | | +| prometheus.serverFiles."prometheus.yml".rule_files[3] | string | `"/etc/config/alerts"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[0].job_name | string | `"prometheus"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[0].static_configs[0].targets[0] | string | `"localhost:9090"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[1].bearer_token_file | string | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[1].job_name | string | `"kubernetes-apiservers"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[1].kubernetes_sd_configs[0].role | string | `"endpoints"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[1].relabel_configs[0].action | string | `"keep"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[1].relabel_configs[0].regex | string | `"default;kubernetes;https"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[1].relabel_configs[0].source_labels[0] | string | `"__meta_kubernetes_namespace"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[1].relabel_configs[0].source_labels[1] | string | `"__meta_kubernetes_service_name"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[1].relabel_configs[0].source_labels[2] | string | `"__meta_kubernetes_endpoint_port_name"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[1].scheme | string | `"https"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[1].tls_config.ca_file | string | `"/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[1].tls_config.insecure_skip_verify | bool | `true` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[2].bearer_token_file | string | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[2].job_name | string | `"kubernetes-nodes"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[2].kubernetes_sd_configs[0].role | string | `"node"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[2].relabel_configs[0].action | string | `"labelmap"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[2].relabel_configs[0].regex | string | `"__meta_kubernetes_node_label_(.+)"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[2].relabel_configs[1].replacement | string | `"kubernetes.default.svc:443"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[2].relabel_configs[1].target_label | string | `"__address__"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[2].relabel_configs[2].regex | string | `"(.+)"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[2].relabel_configs[2].replacement | string | `"/api/v1/nodes/$1/proxy/metrics"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[2].relabel_configs[2].source_labels[0] | string | `"__meta_kubernetes_node_name"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[2].relabel_configs[2].target_label | string | `"__metrics_path__"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[2].scheme | string | `"https"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[2].tls_config.ca_file | string | `"/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[2].tls_config.insecure_skip_verify | bool | `true` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[3].bearer_token_file | string | `"/var/run/secrets/kubernetes.io/serviceaccount/token"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[3].job_name | string | `"kubernetes-nodes-cadvisor"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[3].kubernetes_sd_configs[0].role | string | `"node"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[3].relabel_configs[0].action | string | `"labelmap"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[3].relabel_configs[0].regex | string | `"__meta_kubernetes_node_label_(.+)"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[3].relabel_configs[1].replacement | string | `"kubernetes.default.svc:443"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[3].relabel_configs[1].target_label | string | `"__address__"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[3].relabel_configs[2].regex | string | `"(.+)"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[3].relabel_configs[2].replacement | string | `"/api/v1/nodes/$1/proxy/metrics/cadvisor"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[3].relabel_configs[2].source_labels[0] | string | `"__meta_kubernetes_node_name"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[3].relabel_configs[2].target_label | string | `"__metrics_path__"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[3].scheme | string | `"https"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[3].tls_config.ca_file | string | `"/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[3].tls_config.insecure_skip_verify | bool | `true` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].honor_labels | bool | `true` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].job_name | string | `"kubernetes-pods"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].kubernetes_sd_configs[0].role | string | `"pod"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[0].action | string | `"keep"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[0].regex | bool | `true` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[0].source_labels[0] | string | `"__meta_kubernetes_pod_annotation_prometheus_io_scrape"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[10].action | string | `"drop"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[10].regex | string | `"Pending|Succeeded|Failed|Completed"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[10].source_labels[0] | string | `"__meta_kubernetes_pod_phase"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[11].action | string | `"replace"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[11].source_labels[0] | string | `"__meta_kubernetes_pod_node_name"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[11].target_label | string | `"node"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[1].action | string | `"keep"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[1].regex | string | `"ri"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[1].source_labels[0] | string | `"__meta_kubernetes_pod_annotation_app_kubernetes_io_owned_by"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[2].action | string | `"replace"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[2].regex | string | `"(https?)"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[2].source_labels[0] | string | `"__meta_kubernetes_pod_annotation_prometheus_io_scheme"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[2].target_label | string | `"__scheme__"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[3].action | string | `"replace"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[3].regex | string | `"(.+)"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[3].source_labels[0] | string | `"__meta_kubernetes_pod_annotation_prometheus_io_path"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[3].target_label | string | `"__metrics_path__"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[4].action | string | `"replace"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[4].regex | string | `"(\\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[4].replacement | string | `"[$2]:$1"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[4].source_labels[0] | string | `"__meta_kubernetes_pod_annotation_prometheus_io_port"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[4].source_labels[1] | string | `"__meta_kubernetes_pod_ip"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[4].target_label | string | `"__address__"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[5].action | string | `"replace"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[5].regex | string | `"(\\d+);((([0-9]+?)(\\.|$)){4})"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[5].replacement | string | `"$2:$1"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[5].source_labels[0] | string | `"__meta_kubernetes_pod_annotation_prometheus_io_port"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[5].source_labels[1] | string | `"__meta_kubernetes_pod_ip"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[5].target_label | string | `"__address__"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[6].action | string | `"labelmap"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[6].regex | string | `"__meta_kubernetes_pod_annotation_prometheus_io_param_(.+)"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[6].replacement | string | `"__param_$1"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[7].action | string | `"labelmap"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[7].regex | string | `"__meta_kubernetes_pod_label_(.+)"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[8].action | string | `"replace"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[8].source_labels[0] | string | `"__meta_kubernetes_namespace"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[8].target_label | string | `"namespace"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[9].action | string | `"replace"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[9].source_labels[0] | string | `"__meta_kubernetes_pod_name"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[4].relabel_configs[9].target_label | string | `"pod"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[5].honor_labels | bool | `true` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[5].job_name | string | `"aws-node"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[5].kubernetes_sd_configs[0].role | string | `"pod"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[5].relabel_configs[0].action | string | `"keep"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[5].relabel_configs[0].regex | string | `"aws-vpc-cni"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[5].relabel_configs[0].source_labels[0] | string | `"__meta_kubernetes_pod_label_app_kubernetes_io_instance"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[5].relabel_configs[1].action | string | `"keep"` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[5].relabel_configs[1].regex | bool | `true` | | +| prometheus.serverFiles."prometheus.yml".scrape_configs[5].relabel_configs[1].source_labels[0] | string | `"__meta_kubernetes_pod_annotation_prometheus_io_scrape"` | | +| prometheus.serverFiles."recording_rules.yml" | object | `{}` | | +| prometheus.serverFiles.alerts | object | `{}` | | +| prometheus.serverFiles.rules | object | `{}` | | | rimeExtras.datadog | bool | `false` | Whether to install the DataDog Helm charts for observability | +| rimeExtras.humioFluentBit | bool | `false` | Whether to install the Humio FluentBit Helm charts for sending Firewall validation logs to Humio(Crowdstrike) for SIEM. | +| rimeExtras.observabilityProxyServer | bool | `false` | Whether to install the Observability Proxy Server Helm charts for observability | +| rimeExtras.prometheusCloudwatchExporter | bool | `false` | Whether to install the Prometheus CloudWatch Exporter Helm charts for observability | +| rimeExtras.prometheusNodeExporter | bool | `false` | Whether to install the Prometheus Node Exporter Helm charts for observability | +| rimeExtras.prometheusServer | bool | `false` | Whether to install the Prometheus Server Helm charts for observability | +| rimeExtras.riObservabilityFluentBit | bool | `false` | Whether to install the RI Observability FluentBit Helm chart for internal oversvability and monitoring | | rimeExtras.velero | bool | `false` | Whether to install the Velero Helm charts for disaster recovery | | velero | object | (see individual values in `values`.yaml) | For full reference, see https://github.com/vmware-tanzu/helm-charts/tree/velero-2.23.6/charts/velero | | velero.configuration.backupStorageLocation.bucket | string | `""` | Bucket is the name of the bucket to store backups in. Required. | @@ -27,8 +193,8 @@ A Helm chart for RIME's extra resources | velero.configuration.volumeSnapshotLocation.config.region | string | `""` | AWS region for the EKS cluster | | velero.configuration.volumeSnapshotLocation.name | string | `"mongodb-snapshots"` | Name of the volume snapshot location where snapshots are being taken. Required. | | velero.initContainers | list | `[{"image":"docker.io/robustintelligencehq/velero-plugin-for-aws:v1.2.1","imagePullPolicy":"IfNotPresent","name":"velero-plugin-for-aws","volumeMounts":[{"mountPath":"/target","name":"plugins"}]}]` | Init containers to add to the Velero deployment's pod spec. At least one plugin provider image is required. For other cloud providers, see https://velero.io/docs/v1.6/supported-providers/ | -| velero.schedules.mongodb-backup.schedule | string | `"0 2 * * *"` | Default: daily at 2:00am | -| velero.schedules.mongodb-backup.template.includedNamespaces | list | `[""]` | At minimum, should include the RIME namespace(s) (all namespaces recommended) | +| velero.schedules.mongodb-backup.schedule | string | `"0 */4 * * *"` | Default: every four hours starting at 12:00 AM | +| velero.schedules.mongodb-backup.template.includedNamespaces | list | `["*"]` | At minimum, should include the RIME namespace(s) (all namespaces recommended) | | velero.schedules.mongodb-backup.template.ttl | string | `"336h"` | Backup horizon. Default is 336h (i.e., 2 weeks) | | velero.serviceAccount.annotations | object | `{"eks.amazonaws.com/role-arn":""}` | For AWS: Specify ARN of IRSA-enabled Velero Backups IAM role here | diff --git a/rime-extras/templates/observability-proxy-server/deployment.yaml b/rime-extras/templates/observability-proxy-server/deployment.yaml new file mode 100644 index 00000000..74d04ac8 --- /dev/null +++ b/rime-extras/templates/observability-proxy-server/deployment.yaml @@ -0,0 +1,50 @@ +{{- if .Values.rimeExtras.observabilityProxyServer }} +apiVersion: v1 +kind: Service +metadata: + name: observability-proxy-server +spec: + selector: + app: observability-proxy-server + ports: + - protocol: "TCP" + port: {{ .Values.observabilityProxyServer.port }} + targetPort: {{ .Values.observabilityProxyServer.containerPort }} + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: observability-proxy-server +spec: + selector: + matchLabels: + app: observability-proxy-server + replicas: 1 + template: + metadata: + labels: + app: observability-proxy-server + spec: + imagePullSecrets: + - name: rimecreds + containers: + - name: observability-proxy-server + image: "{{ .Values.observabilityProxyServer.image.registry}}/{{ .Values.observabilityProxyServer.image.name }}" + imagePullPolicy: {{ .Values.observabilityProxyServer.image.pullPolicy }} + ports: + - containerPort: {{ .Values.observabilityProxyServer.containerPort }} + + env: + {{- if .Values.observabilityProxyServer.remoteWriteSecretName }} + - name: REMOTE_WRITE_API_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.observabilityProxyServer.remoteWriteSecretName }} + key: REMOTE_WRITE_API_KEY + {{- end }} + {{- if .Values.observabilityProxyServer.remoteWriteURL }} + - name: REMOTE_WRITE_URL + value: {{ .Values.observabilityProxyServer.remoteWriteURL }} + {{- end }} +{{- end }} diff --git a/rime-extras/values.yaml b/rime-extras/values.yaml index 36561f6e..e2d9c321 100644 --- a/rime-extras/values.yaml +++ b/rime-extras/values.yaml @@ -3,6 +3,19 @@ rimeExtras: datadog: false # -- Whether to install the Velero Helm charts for disaster recovery velero: false + # -- Whether to install the Prometheus Node Exporter Helm charts for observability + prometheusNodeExporter: false + # -- Whether to install the Prometheus Server Helm charts for observability + prometheusServer: false + # -- Whether to install the Observability Proxy Server Helm charts for observability + observabilityProxyServer: false + # -- Whether to install the Prometheus CloudWatch Exporter Helm charts for observability + prometheusCloudwatchExporter: false + # -- Whether to install the Humio FluentBit Helm charts for sending Firewall + # validation logs to Humio(Crowdstrike) for SIEM. + humioFluentBit: false + # -- Whether to install the RI Observability FluentBit Helm chart for internal oversvability and monitoring + riObservabilityFluentBit: false # -- For full reference, see https://github.com/DataDog/helm-charts/tree/datadog-2.20.3/charts/datadog # @default -- (see individual values in `values`.yaml) @@ -19,10 +32,18 @@ datadog: # Restricts prometheus metrics scraping to only our custom containers. additionalConfigs: - + configurations: + - send_distribution_buckets: true + - histogram_buckets_as_distributions: true autodiscovery: kubernetes_container_names: - - rime - - rime-agent + - "aws-node" + kubernetes_annotations: + include: + prometheus.io/scrape: "true" + app.kubernetes.io/owned-by: "ri" + exclude: + prometheus.io/scrape: "false" # Ignore auto-configuration for `kubernetes_state` (because we disable kube-state-metrics) and `redisdb` # See https://docs.datadoghq.com/agent/guide/auto_conf/ ignoreAutoConfig: @@ -77,13 +98,13 @@ datadog: repository: "robustintelligencehq/datadog-agent" pullSecrets: - name: rimecreds - tag: 7.32.3 + tag: 7.46.0 clusterAgent: image: repository: "robustintelligencehq/datadog-cluster-agent" pullSecrets: - name: rimecreds - tag: 1.14.0 + tag: 7.46.0 # -- For full reference, see https://github.com/vmware-tanzu/helm-charts/tree/velero-2.23.6/charts/velero # @default -- (see individual values in `values`.yaml) @@ -135,8 +156,8 @@ velero: schedules: mongodb-backup: disabled: false - # -- Default: daily at 2:00am - schedule: "0 2 * * *" + # -- Default: every four hours starting at 12:00 AM + schedule: "0 */4 * * *" useOwnerReferencesInBackup: false template: # -- Backup horizon. Default is 336h (i.e., 2 weeks) @@ -146,4 +167,391 @@ velero: - pv # -- At minimum, should include the RIME namespace(s) (all namespaces recommended) includedNamespaces: - - "" + - "*" + +# -- For full reference, see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter +# @default -- (see individual values in `values`.yaml) +prometheus-node-exporter: + image: + registry: "docker.io" + repository: "robustintelligencehq/node-exporter" + tag: v1.6.0 + pullPolicy: IfNotPresent + extraArgs: + - --collector.disable-defaults + - --collector.cpu + - --collector.meminfo + - --collector.filefd + - --collector.loadavg + - --collector.netstat + - --collector.stat + - --collector.vmstat + podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9100" + prometheus.io/path: "/metrics" + app.kubernetes.io/owned-by: "ri" + +prometheus: + server: + image: + repository: "docker.io/robustintelligencehq/prometheus" + tag: v2.48.0 + pullPolicy: IfNotPresent + remoteWrite: + - url: "http://observability-proxy-server:8000/remote_write" + persistentVolume: + enabled: false + global: + scrape_interval: 30s + defaultFlagsOverride: ["--web.enable-lifecycle", "--config.file=/etc/config/prometheus.yml"] + serverFiles: + alerting_rules.yml: {} + alerts: {} + recording_rules.yml: {} + rules: {} + prometheus.yml: + rule_files: + - /etc/config/recording_rules.yml + - /etc/config/alerting_rules.yml + - /etc/config/rules + - /etc/config/alerts + # The vast majority of the below scrape configs are defaults, + # but we need to copy them here because Helm will overwrite the + # entire config if we add additional jobs here + scrape_configs: + # Scrapes Prometheus itself + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + # Scrapes metrics about the k8s control plane (via k8s API) + - job_name: 'kubernetes-apiservers' + kubernetes_sd_configs: + - role: endpoints + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: default;kubernetes;https + # Scrapes metrics about nodes in the k8s cluster (via k8s API) + - job_name: 'kubernetes-nodes' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/$1/proxy/metrics + # Scrapes metrics about nodes related to cAdvisor (E.g., CPU, memory) + - job_name: 'kubernetes-nodes-cadvisor' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor + # Scrapes pods directly. This is where our custom metrics are exposed. + - job_name: 'kubernetes-pods' + honor_labels: true + kubernetes_sd_configs: + - role: pod + relabel_configs: + # Only keep pods with prometheus.io/scrape: "true" + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + # Only keep pods with app.kubernetes.io/owned-by: "ri" + - source_labels: [__meta_kubernetes_pod_annotation_app_kubernetes_io_owned_by] + action: keep + regex: ri + # Gets the metric endpoint's scheme from the annotation prometheus.io/scheme + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme] + action: replace + regex: (https?) + target_label: __scheme__ + # Gets the metric endpoint's path from the annotation prometheus.io/path + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + # For IPv6, gets the metric endpoint's ip/port from the annotation prometheus.io/port + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip] + action: replace + regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4}) + replacement: '[$2]:$1' + target_label: __address__ + # For IPv4, gets the metric endpoint's ip/port from the annotation prometheus.io/port + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip] + action: replace + regex: (\d+);((([0-9]+?)(\.|$)){4}) + replacement: $2:$1 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+) + replacement: __param_$1 + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: pod + - source_labels: [__meta_kubernetes_pod_phase] + regex: Pending|Succeeded|Failed|Completed + action: drop + - source_labels: [__meta_kubernetes_pod_node_name] + action: replace + target_label: node + # Scrapes AWS VPC CNI + - job_name: aws-node + honor_labels: true + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] + action: keep + regex: aws-vpc-cni + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + configmapReload: + prometheus: + enabled: true + image: + repository: "docker.io/robustintelligencehq/prometheus-config-reloader" + tag: v0.70.0 + pullPolicy: IfNotPresent + kube-state-metrics: + enabled: true + podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" + prometheus.io/path: "/metrics" + app.kubernetes.io/owned-by: "ri" + image: + registry: "docker.io" + repository: "robustintelligencehq/kube-state-metrics" + tag: "v2.10.1" + prometheus-node-exporter: + enabled: false + prometheus-pushgateway: + enabled: false + alertmanager: + enabled: false + +observabilityProxyServer: + image: + registry: docker.io + name: robustintelligencehq/observability-proxy-server:v0.1 + pullPolicy: IfNotPresent + port: 8000 + containerPort: 8000 + # The same remote_write URL is used for all Prometheus servers + remoteWriteURL: https://4dj9f20xee.execute-api.us-west-2.amazonaws.com/production/remote_write + remoteWriteSecretName: remote-write-api-key + +prometheus-cloudwatch-exporter: + image: + repository: "docker.io/robustintelligencehq/cloudwatch-exporter" + tag: "v0.15.5" + pullPolicy: IfNotPresent + pod: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9106" + prometheus.io/path: "/metrics" + app.kubernetes.io/owned-by: "ri" + config: |- + region: "us-west-2" + period_seconds: 60 + delay_seconds: 900 + metrics: + + - aws_metric_name: CPUUtilization + aws_namespace: AWS/EC2 + aws_statistics: + - Average + aws_dimensions: + - InstanceId + + # The number of unhealthy hosts + - aws_metric_name: UnHealthyHostCount + aws_namespace: AWS/ELB + aws_statistics: + - Minimum + aws_dimensions: + - LoadBalancerName + - AvailabilityZone + + # The total number of bytes processed by the load balancer, including TCP/IP headers. + # This count includes traffic to and from targets, minus health check traffic. + - aws_metric_name: ProcessedBytes + aws_namespace: AWS/NetworkELB + aws_statistics: + - Sum + aws_dimensions: + - LoadBalancer + - AvailabilityZone + + # The total number of concurrent flows (or connections) from clients to targets. + - aws_metric_name: ActiveFlowCount + aws_namespace: AWS/NetworkELB + aws_statistics: + - Average + aws_dimensions: + - LoadBalancer + - AvailabilityZone + + # The number of new ICMP messages rejected by the inbound rules of the load balancer security groups. + - aws_metric_name: SecurityGroupBlockedFlowCount_Inbound_ICMP + aws_namespace: AWS/NetworkELB + aws_statistics: + - Sum + aws_dimensions: + - LoadBalancer + - AvailabilityZone + + # The number of new TCP messages rejected by the inbound rules of the load balancer security groups. + - aws_metric_name: SecurityGroupBlockedFlowCount_Inbound_TCP + aws_namespace: AWS/NetworkELB + aws_statistics: + - Sum + aws_dimensions: + - LoadBalancer + - AvailabilityZone + + # The number of new UDP messages rejected by the inbound rules of the load balancer security groups. + - aws_metric_name: SecurityGroupBlockedFlowCount_Inbound_UDP + aws_namespace: AWS/NetworkELB + aws_statistics: + - Sum + aws_dimensions: + - LoadBalancer + - AvailabilityZone + +# -- For full reference, see https://github.com/humio/humio-helm-charts/tree/release-0.9.5/charts/humio-fluentbit +# @default -- (see individual values in `values`.yaml) +humio-helm-charts: + humio-fluentbit: + image: "docker.io/robustintelligencehq/fluent-bit:2.0.3" + imagePullPolicy: IfNotPresent + imagePullSecrets: + - name: rimecreds + enabled: false + humioHostname: "" + token: "" + es: + tls: true + +# Configuration for RI's internal Fluent Bit that is used for internal logging and monitoring +fluent-bit: + nameOverride: ri-observability-fluent-bit + image: + repository: "docker.io/robustintelligencehq/fluent-bit" + tag: "2.2.2" + pullPolicy: IfNotPresent + tolerations: + - operator: Exists + effect: NoSchedule + imagePullSecrets: + - name: rimecreds + ## https://docs.fluentbit.io/manual/administration/configuring-fluent-bit/classic-mode/configuration-file + config: + service: | + [SERVICE] + Daemon Off + Flush {{ .Values.flush }} + Log_Level {{ .Values.logLevel }} + Parsers_File /fluent-bit/etc/parsers.conf + Parsers_File /fluent-bit/etc/conf/custom_parsers.conf + HTTP_Server On + HTTP_Listen 0.0.0.0 + HTTP_Port {{ .Values.metricsPort }} + Health_Check On + + ## https://docs.fluentbit.io/manual/pipeline/inputs + inputs: | + [INPUT] + Name tail + Path /var/log/containers/*.log + multiline.parser docker, cri + Tag kube.* + Mem_Buf_Limit 10MB + Skip_Long_Lines On + + ## https://docs.fluentbit.io/manual/pipeline/filters + filters: | + [FILTER] + Name kubernetes + Match kube.* + Merge_Log On + Keep_Log Off + K8S-Logging.Parser On + K8S-Logging.Exclude On + Annotations Off + Labels Off + Buffer_Size 10MB + + [FILTER] + Name modify + Match * + Condition Key_does_not_exist attr.error.code + Rename attr.error attr.error_str + + [FILTER] + Name modify + Match * + Condition Key_exists message + Rename message msg + + [FILTER] + Name nest + Match kube.* + Operation lift + Nested_under kubernetes + + [FILTER] + Name modify + Match kube.* + Remove attr + Rename log msg + + ## https://docs.fluentbit.io/manual/pipeline/outputs + outputs: | + [OUTPUT] + Name opensearch + Match kube.* + Host search-ri-opensearch-vqycu6e5fafj4zojubom4zzl4y.us-west-2.es.amazonaws.com + Port 443 + Index ri_logs.%Y.%m.%d + AWS_Auth On + AWS_Region us-west-2 + AWS_Role_ARN arn:aws:iam::746181457053:role/fluentbit_role + Suppress_Type_Name On + TLS On + Trace_Error On diff --git a/rime-kube-system/Chart.yaml b/rime-kube-system/Chart.yaml index 136502b4..b0f2c1c1 100644 --- a/rime-kube-system/Chart.yaml +++ b/rime-kube-system/Chart.yaml @@ -7,14 +7,15 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.1 +version: 0.1.4 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. appVersion: "v9" -# Compatible versions of kubernetes. -kubeVersion: ">=1.20.0-0" +# Kubernetes upstream has end-of-lifed Kubernetes 1.24 as of July 28th, 2023. +# So updating the supported version +kubeVersion: ">=1.24.0-0" dependencies: - name: "cluster-autoscaler" @@ -26,7 +27,7 @@ dependencies: repository: "https://kubernetes-sigs.github.io/external-dns/" condition: rimeSystem.externalDns - name: "aws-load-balancer-controller" - version: "1.4.2" + version: "1.5.5" repository: "https://aws.github.io/eks-charts" condition: rimeSystem.awsLoadBalancerController - name: "metrics-server" @@ -37,3 +38,15 @@ dependencies: version: v1.10.0 repository: https://charts.jetstack.io condition: cert-manager.enabled + - name: "ingress-nginx" + version: "4.7.1" + repository: "https://kubernetes.github.io/ingress-nginx" + condition: rimeSystem.ingressNginx + - name: aws-load-balancer-crds-2-6-1 + version: v0.0.1 + repository: "file://./charts/aws-load-balancer-controller-crds-2-6-1" + condition: rimeSystem.awsLoadBalancerController + - name: kserve + version: "v0.11.1" + repository: "https://robustintelligence.github.io/helm" + condition: rimeSystem.kserve diff --git a/rime-kube-system/README.md b/rime-kube-system/README.md index f95de5dc..72ca6316 100644 --- a/rime-kube-system/README.md +++ b/rime-kube-system/README.md @@ -1,29 +1,34 @@ # rime-kube-system -![Version: 0.1.1](https://img.shields.io/badge/Version-0.1.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v9](https://img.shields.io/badge/AppVersion-v9-informational?style=flat-square) +![Version: 0.1.4](https://img.shields.io/badge/Version-0.1.4-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v9](https://img.shields.io/badge/AppVersion-v9-informational?style=flat-square) A Helm chart for RIME's kubesystem resources ## Requirements -Kubernetes: `>=1.20.0-0` +Kubernetes: `>=1.24.0-0` | Repository | Name | Version | |------------|------|---------| -| https://aws.github.io/eks-charts | aws-load-balancer-controller | 1.4.2 | +| file://./charts/aws-load-balancer-controller-crds-2-6-1 | aws-load-balancer-crds-2-6-1 | v0.0.1 | +| https://aws.github.io/eks-charts | aws-load-balancer-controller | 1.5.5 | | https://charts.jetstack.io | cert-manager | v1.10.0 | | https://kubernetes-sigs.github.io/external-dns/ | external-dns | 1.10.1 | | https://kubernetes-sigs.github.io/metrics-server/ | metrics-server | 3.8.2 | | https://kubernetes.github.io/autoscaler | cluster-autoscaler | 9.28.0 | +| https://kubernetes.github.io/ingress-nginx | ingress-nginx | 4.7.1 | +| https://robustintelligence.github.io/helm | kserve | v0.11.1 | ## Values | Key | Type | Default | Description | |-----|------|---------|-------------| | cert-manager | object | (see individual values in `values`.yaml) | For full reference, see https://github.com/cert-manager/cert-manager/tree/v1.10.0 | -| rimeSystem.awsLoadBalancerController | bool | `false` | Whether to install the AWS Load Balancer Controller Helm chart. For full reference, see https://github.com/kubernetes-sigs/aws-load-balancer-controller/tree/v2.4.2 | +| rimeSystem.awsLoadBalancerController | bool | `false` | Whether to install the AWS Load Balancer Controller Helm chart. For full reference, see https://github.com/kubernetes-sigs/aws-load-balancer-controller/tree/v2.5.4 | | rimeSystem.clusterAutoscaler | bool | `false` | Whether to install the Kubernetes Autoscaler Helm chart. For full reference, see https://github.com/kubernetes/autoscaler/tree/cluster-autoscaler-1.21.0/cluster-autoscaler/cloudprovider | | rimeSystem.externalDns | bool | `false` | Whether to install the ExternalDNS Helm chart. For full reference, see https://github.com/kubernetes-sigs/external-dns/tree/v0.12.0/charts/external-dns | +| rimeSystem.ingressNginx | bool | `false` | Whether to install the ingress-nginx Helm chart. For full reference, see See https://artifacthub.io/packages/helm/ingress-nginx/ingress-nginx | +| rimeSystem.kserve | bool | `false` | Whether to install the Kserve Helm chart. For full reference, see See https://github.com/kserve/kserve | | rimeSystem.metricsServer | bool | `false` | Whether to install the Kubernetes Metrics Server Helm chart. For full reference, see https://github.com/kubernetes-sigs/metrics-server/tree/v0.6.1 | ---------------------------------------------- diff --git a/rime-kube-system/charts/.helmignore b/rime-kube-system/charts/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/rime-kube-system/charts/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/rime-kube-system/charts/aws-load-balancer-controller-crds-2-6-1/.helmignore b/rime-kube-system/charts/aws-load-balancer-controller-crds-2-6-1/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/rime-kube-system/charts/aws-load-balancer-controller-crds-2-6-1/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/rime-kube-system/charts/aws-load-balancer-controller-crds-2-6-1/Chart.yaml b/rime-kube-system/charts/aws-load-balancer-controller-crds-2-6-1/Chart.yaml new file mode 100644 index 00000000..e39a7d4a --- /dev/null +++ b/rime-kube-system/charts/aws-load-balancer-controller-crds-2-6-1/Chart.yaml @@ -0,0 +1,14 @@ +apiVersion: v2 +name: aws-load-balancer-crds-2-6-1 +description: A Helm chart with CRDs for AWS Load Balancer 2.6.1 with RI Platform's kube-system resources +type: application +# chart version +version: 0.0.1 + +# This is the version of the aws-load-balancer that these CRDs are packaged for +appVersion: "2.6.1" + +# Technically, these CRDs only require Kubernetes v1.16 or above. +# However, Kubernetes upstream has end-of-lifed Kubernetes 1.24 as of July 28th, 2023. +# Compatible versions of kubernetes. +kubeVersion: ">=1.24.0-0" diff --git a/rime-kube-system/charts/aws-load-balancer-controller-crds-2-6-1/crds/crds.yaml b/rime-kube-system/charts/aws-load-balancer-controller-crds-2-6-1/crds/crds.yaml new file mode 100644 index 00000000..78c22666 --- /dev/null +++ b/rime-kube-system/charts/aws-load-balancer-controller-crds-2-6-1/crds/crds.yaml @@ -0,0 +1,590 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.11.1 + creationTimestamp: null + name: ingressclassparams.elbv2.k8s.aws +spec: + group: elbv2.k8s.aws + names: + kind: IngressClassParams + listKind: IngressClassParamsList + plural: ingressclassparams + singular: ingressclassparams + scope: Cluster + versions: + - additionalPrinterColumns: + - description: The Ingress Group name + jsonPath: .spec.group.name + name: GROUP-NAME + type: string + - description: The AWS Load Balancer scheme + jsonPath: .spec.scheme + name: SCHEME + type: string + - description: The AWS Load Balancer ipAddressType + jsonPath: .spec.ipAddressType + name: IP-ADDRESS-TYPE + type: string + - jsonPath: .metadata.creationTimestamp + name: AGE + type: date + name: v1beta1 + schema: + openAPIV3Schema: + description: IngressClassParams is the Schema for the IngressClassParams API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: IngressClassParamsSpec defines the desired state of IngressClassParams + properties: + group: + description: Group defines the IngressGroup for all Ingresses that + belong to IngressClass with this IngressClassParams. + properties: + name: + description: Name is the name of IngressGroup. + type: string + required: + - name + type: object + inboundCIDRs: + description: InboundCIDRs specifies the CIDRs that are allowed to + access the Ingresses that belong to IngressClass with this IngressClassParams. + items: + type: string + type: array + ipAddressType: + description: IPAddressType defines the ip address type for all Ingresses + that belong to IngressClass with this IngressClassParams. + enum: + - ipv4 + - dualstack + type: string + loadBalancerAttributes: + description: LoadBalancerAttributes define the custom attributes to + LoadBalancers for all Ingress that that belong to IngressClass with + this IngressClassParams. + items: + description: Attributes defines custom attributes on resources. + properties: + key: + description: The key of the attribute. + type: string + value: + description: The value of the attribute. + type: string + required: + - key + - value + type: object + type: array + namespaceSelector: + description: NamespaceSelector restrict the namespaces of Ingresses + that are allowed to specify the IngressClass with this IngressClassParams. + * if absent or present but empty, it selects all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that + contains values, a key, and an operator that relates the key + and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to + a set of values. Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of string values. If the + operator is In or NotIn, the values array must be non-empty. + If the operator is Exists or DoesNotExist, the values + array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator + is "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + scheme: + description: Scheme defines the scheme for all Ingresses that belong + to IngressClass with this IngressClassParams. + enum: + - internal + - internet-facing + type: string + sslPolicy: + description: SSLPolicy specifies the SSL Policy for all Ingresses + that belong to IngressClass with this IngressClassParams. + type: string + subnets: + description: Subnets defines the subnets for all Ingresses that belong + to IngressClass with this IngressClassParams. + properties: + ids: + description: IDs specify the resource IDs of subnets. Exactly + one of this or `tags` must be specified. + items: + description: SubnetID specifies a subnet ID. + pattern: subnet-[0-9a-f]+ + type: string + minItems: 1 + type: array + tags: + additionalProperties: + items: + type: string + type: array + description: Tags specifies subnets in the load balancer's VPC + where each tag specified in the map key contains one of the + values in the corresponding value list. Exactly one of this + or `ids` must be specified. + type: object + type: object + tags: + description: Tags defines list of Tags on AWS resources provisioned + for Ingresses that belong to IngressClass with this IngressClassParams. + items: + description: Tag defines a AWS Tag on resources. + properties: + key: + description: The key of the tag. + type: string + value: + description: The value of the tag. + type: string + required: + - key + - value + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: {} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.11.1 + creationTimestamp: null + name: targetgroupbindings.elbv2.k8s.aws +spec: + group: elbv2.k8s.aws + names: + kind: TargetGroupBinding + listKind: TargetGroupBindingList + plural: targetgroupbindings + singular: targetgroupbinding + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: The Kubernetes Service's name + jsonPath: .spec.serviceRef.name + name: SERVICE-NAME + type: string + - description: The Kubernetes Service's port + jsonPath: .spec.serviceRef.port + name: SERVICE-PORT + type: string + - description: The AWS TargetGroup's TargetType + jsonPath: .spec.targetType + name: TARGET-TYPE + type: string + - description: The AWS TargetGroup's Amazon Resource Name + jsonPath: .spec.targetGroupARN + name: ARN + priority: 1 + type: string + - jsonPath: .metadata.creationTimestamp + name: AGE + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: TargetGroupBinding is the Schema for the TargetGroupBinding API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: TargetGroupBindingSpec defines the desired state of TargetGroupBinding + properties: + networking: + description: networking provides the networking setup for ELBV2 LoadBalancer + to access targets in TargetGroup. + properties: + ingress: + description: List of ingress rules to allow ELBV2 LoadBalancer + to access targets in TargetGroup. + items: + properties: + from: + description: List of peers which should be able to access + the targets in TargetGroup. At least one NetworkingPeer + should be specified. + items: + description: NetworkingPeer defines the source/destination + peer for networking rules. + properties: + ipBlock: + description: IPBlock defines an IPBlock peer. If specified, + none of the other fields can be set. + properties: + cidr: + description: CIDR is the network CIDR. Both IPV4 + or IPV6 CIDR are accepted. + type: string + required: + - cidr + type: object + securityGroup: + description: SecurityGroup defines a SecurityGroup + peer. If specified, none of the other fields can + be set. + properties: + groupID: + description: GroupID is the EC2 SecurityGroupID. + type: string + required: + - groupID + type: object + type: object + type: array + ports: + description: List of ports which should be made accessible + on the targets in TargetGroup. If ports is empty or unspecified, + it defaults to all ports with TCP. + items: + properties: + port: + anyOf: + - type: integer + - type: string + description: The port which traffic must match. When + NodePort endpoints(instance TargetType) is used, + this must be a numerical port. When Port endpoints(ip + TargetType) is used, this can be either numerical + or named port on pods. if port is unspecified, it + defaults to all ports. + x-kubernetes-int-or-string: true + protocol: + description: The protocol which traffic must match. + If protocol is unspecified, it defaults to TCP. + enum: + - TCP + - UDP + type: string + type: object + type: array + required: + - from + - ports + type: object + type: array + type: object + serviceRef: + description: serviceRef is a reference to a Kubernetes Service and + ServicePort. + properties: + name: + description: Name is the name of the Service. + type: string + port: + anyOf: + - type: integer + - type: string + description: Port is the port of the ServicePort. + x-kubernetes-int-or-string: true + required: + - name + - port + type: object + targetGroupARN: + description: targetGroupARN is the Amazon Resource Name (ARN) for + the TargetGroup. + type: string + targetType: + description: targetType is the TargetType of TargetGroup. If unspecified, + it will be automatically inferred. + enum: + - instance + - ip + type: string + required: + - serviceRef + - targetGroupARN + type: object + status: + description: TargetGroupBindingStatus defines the observed state of TargetGroupBinding + properties: + observedGeneration: + description: The generation observed by the TargetGroupBinding controller. + format: int64 + type: integer + type: object + type: object + served: true + storage: false + subresources: + status: {} + - additionalPrinterColumns: + - description: The Kubernetes Service's name + jsonPath: .spec.serviceRef.name + name: SERVICE-NAME + type: string + - description: The Kubernetes Service's port + jsonPath: .spec.serviceRef.port + name: SERVICE-PORT + type: string + - description: The AWS TargetGroup's TargetType + jsonPath: .spec.targetType + name: TARGET-TYPE + type: string + - description: The AWS TargetGroup's Amazon Resource Name + jsonPath: .spec.targetGroupARN + name: ARN + priority: 1 + type: string + - jsonPath: .metadata.creationTimestamp + name: AGE + type: date + name: v1beta1 + schema: + openAPIV3Schema: + description: TargetGroupBinding is the Schema for the TargetGroupBinding API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: TargetGroupBindingSpec defines the desired state of TargetGroupBinding + properties: + ipAddressType: + description: ipAddressType specifies whether the target group is of + type IPv4 or IPv6. If unspecified, it will be automatically inferred. + enum: + - ipv4 + - ipv6 + type: string + networking: + description: networking defines the networking rules to allow ELBV2 + LoadBalancer to access targets in TargetGroup. + properties: + ingress: + description: List of ingress rules to allow ELBV2 LoadBalancer + to access targets in TargetGroup. + items: + description: NetworkingIngressRule defines a particular set + of traffic that is allowed to access TargetGroup's targets. + properties: + from: + description: List of peers which should be able to access + the targets in TargetGroup. At least one NetworkingPeer + should be specified. + items: + description: NetworkingPeer defines the source/destination + peer for networking rules. + properties: + ipBlock: + description: IPBlock defines an IPBlock peer. If specified, + none of the other fields can be set. + properties: + cidr: + description: CIDR is the network CIDR. Both IPV4 + or IPV6 CIDR are accepted. + type: string + required: + - cidr + type: object + securityGroup: + description: SecurityGroup defines a SecurityGroup + peer. If specified, none of the other fields can + be set. + properties: + groupID: + description: GroupID is the EC2 SecurityGroupID. + type: string + required: + - groupID + type: object + type: object + type: array + ports: + description: List of ports which should be made accessible + on the targets in TargetGroup. If ports is empty or unspecified, + it defaults to all ports with TCP. + items: + description: NetworkingPort defines the port and protocol + for networking rules. + properties: + port: + anyOf: + - type: integer + - type: string + description: The port which traffic must match. When + NodePort endpoints(instance TargetType) is used, + this must be a numerical port. When Port endpoints(ip + TargetType) is used, this can be either numerical + or named port on pods. if port is unspecified, it + defaults to all ports. + x-kubernetes-int-or-string: true + protocol: + description: The protocol which traffic must match. + If protocol is unspecified, it defaults to TCP. + enum: + - TCP + - UDP + type: string + type: object + type: array + required: + - from + - ports + type: object + type: array + type: object + nodeSelector: + description: node selector for instance type target groups to only + register certain nodes + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that + contains values, a key, and an operator that relates the key + and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to + a set of values. Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of string values. If the + operator is In or NotIn, the values array must be non-empty. + If the operator is Exists or DoesNotExist, the values + array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator + is "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + serviceRef: + description: serviceRef is a reference to a Kubernetes Service and + ServicePort. + properties: + name: + description: Name is the name of the Service. + type: string + port: + anyOf: + - type: integer + - type: string + description: Port is the port of the ServicePort. + x-kubernetes-int-or-string: true + required: + - name + - port + type: object + targetGroupARN: + description: targetGroupARN is the Amazon Resource Name (ARN) for + the TargetGroup. + minLength: 1 + type: string + targetType: + description: targetType is the TargetType of TargetGroup. If unspecified, + it will be automatically inferred. + enum: + - instance + - ip + type: string + required: + - serviceRef + - targetGroupARN + type: object + status: + description: TargetGroupBindingStatus defines the observed state of TargetGroupBinding + properties: + observedGeneration: + description: The generation observed by the TargetGroupBinding controller. + format: int64 + type: integer + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/rime-kube-system/charts/aws-load-balancer-controller-crds-2-6-1/values.yaml b/rime-kube-system/charts/aws-load-balancer-controller-crds-2-6-1/values.yaml new file mode 100644 index 00000000..94456aba --- /dev/null +++ b/rime-kube-system/charts/aws-load-balancer-controller-crds-2-6-1/values.yaml @@ -0,0 +1 @@ +# intentionally left blank diff --git a/rime-kube-system/values.yaml b/rime-kube-system/values.yaml index c7fd4be1..e31d2e3b 100644 --- a/rime-kube-system/values.yaml +++ b/rime-kube-system/values.yaml @@ -6,12 +6,17 @@ rimeSystem: # For full reference, see https://github.com/kubernetes-sigs/external-dns/tree/v0.12.0/charts/external-dns externalDns: false # -- Whether to install the AWS Load Balancer Controller Helm chart. - # For full reference, see https://github.com/kubernetes-sigs/aws-load-balancer-controller/tree/v2.4.2 + # For full reference, see https://github.com/kubernetes-sigs/aws-load-balancer-controller/tree/v2.5.4 awsLoadBalancerController: false # -- Whether to install the Kubernetes Metrics Server Helm chart. # For full reference, see https://github.com/kubernetes-sigs/metrics-server/tree/v0.6.1 metricsServer: false - + # -- Whether to install the ingress-nginx Helm chart. + # For full reference, see See https://artifacthub.io/packages/helm/ingress-nginx/ingress-nginx + ingressNginx: false + # -- Whether to install the Kserve Helm chart. + # For full reference, see See https://github.com/kserve/kserve + kserve: false # -- For full reference, see https://github.com/cert-manager/cert-manager/tree/v1.10.0 # @default -- (see individual values in `values`.yaml) cert-manager: @@ -40,13 +45,27 @@ cert-manager: repository: robustintelligencehq/cert-manager-ctl tag: "v1.10.0" +# # -- For full reference, see https://github.com/kserve/kserve +# # @default -- (see individual values in `values`.yaml) +# kserve: +# kserve: +# # Do not use the Serverless mode; it requires Istio to be installed. +# # RawDeployment is enough for serving models internally with long-running +# # deployments. +# controller: +# deploymentMode: "RawDeployment" +# # We do not use ModelMesh; it is an advanced feature for serving multiple +# # models from a single container. +# modelmesh: +# enabled: false + # TODO CLD-1275 Incorporate these standard templates into an AWS example -# # -- For full reference, see https://github.com/kubernetes-sigs/aws-load-balancer-controller/tree/v2.4.2 +# # -- For full reference, see https://github.com/kubernetes-sigs/aws-load-balancer-controller/tree/v2.6.1 # # @default -- (see individual values in `values`.yaml) # aws-load-balancer-controller: # image: # repository: "docker.io/robustintelligencehq/aws-alb-ingress-controller" -# tag: "v2.4.2" +# tag: "v2.6.1" # # -- EKS cluster name # clusterName: "" # serviceAccount: @@ -104,3 +123,51 @@ cert-manager: # tag: "v0.6.1" # imagePullSecrets: # - rimecreds + +# -- Ingress-nginx controller sub-chart. See https://artifacthub.io/packages/helm/ingress-nginx/ingress-nginx for all parameters. +# @default -- (see individual values in `values`.yaml) +# ingress-nginx: +# enabled: true +# imagePullSecrets: +# - name: rimecreds +# controller: +# image: +# registry: "docker.io" +# image: "robustintelligencehq/ingress-nginx-controller" +# tag: "v1.8.1" +# digest: "sha256:bd54c330f73b17d0bf19f3ec3832b285d43a4c9fa5fe15f5a7accd3de706b438" +# scope: +# enabled: true +# # -- K8s namespace for the ingress +# namespace: "" +# ingressClassResource: +# name: ri +# controllerValue: k8s.io/ri +# admissionWebhooks: +# enabled: false +# service: +# targetPorts: +# http: http +# https: http +# # -- For full list of annotations, see +# # https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.2/guide/service/annotations/ +# annotations: +# # -- Specifies the ARN of one or more certificates managed by the AWS Certificate Manager. +# service.beta.kubernetes.io/aws-load-balancer-ssl-cert: "" +# # -- NLB specification: either "internal" or "internet-facing" +# service.beta.kubernetes.io/aws-load-balancer-scheme: "internet-facing" +# # # -- Uncomment if using an IP allowlist +# # # (Set of CIDR routes to add to the allowlist for all ingresses. +# # # If not specified, all IP addresses are allowed.) +# # service.beta.kubernetes.io/aws-load-balancer-target-group-attributes: preserve_client_ip.enabled=true +# # Specifies the frontend ports with TLS listeners. +# service.beta.kubernetes.io/aws-load-balancer-ssl-ports: "https" +# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "3600" +# service.beta.kubernetes.io/aws-load-balancer-backend-protocol: "tcp" +# # Load balancer type: either "external" or "nlb-ip" +# service.beta.kubernetes.io/aws-load-balancer-type: "external" +# # Needed for routing to pod IP +# service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: "ip" +# config: +# force-ssl-redirect: "false" +# ssl-redirect: "false" diff --git a/rime/Chart.yaml b/rime/Chart.yaml index 4b82b090..0748d9e2 100644 --- a/rime/Chart.yaml +++ b/rime/Chart.yaml @@ -7,7 +7,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 2.0.0 +version: 2.0.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. @@ -22,8 +22,10 @@ dependencies: repository: "https://charts.bitnami.com/bitnami" condition: mongodb.enabled - name: "ingress-nginx" - version: "4.2.0" + version: "4.7.1" repository: "https://kubernetes.github.io/ingress-nginx" + condition: ingress-nginx.enabled - name: "vault" version: "0.21.0" repository: "https://helm.releases.hashicorp.com" + condition: vault.global.enabled diff --git a/rime/README.md b/rime/README.md index d5dd5290..f4b85fd4 100644 --- a/rime/README.md +++ b/rime/README.md @@ -1,6 +1,6 @@ # rime -![Version: 2.0.0](https://img.shields.io/badge/Version-2.0.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v2.0.0](https://img.shields.io/badge/AppVersion-v2.0.0-informational?style=flat-square) +![Version: 2.0.1](https://img.shields.io/badge/Version-2.0.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v2.0.0](https://img.shields.io/badge/AppVersion-v2.0.0-informational?style=flat-square) A Helm chart for RIME's hosted services @@ -12,14 +12,14 @@ Kubernetes: `>=1.20.0-0` |------------|------|---------| | https://charts.bitnami.com/bitnami | mongodb | 12.1.27 | | https://helm.releases.hashicorp.com | vault | 0.21.0 | -| https://kubernetes.github.io/ingress-nginx | ingress-nginx | 4.2.0 | +| https://kubernetes.github.io/ingress-nginx | ingress-nginx | 4.7.1 | ## Values | Key | Type | Default | Description | |-----|------|---------|-------------| | external.mongo | object | `{"databaseName":"","enabled":false,"replicaSetName":"","secretName":"","url":"","urlPrefix":""}` | Whether to use an external MongoDB instance | -| external.vault | object | `{"enabled":false,"kvVersion":"","namespace":"","roleName":"","secretName":"","url":""}` | Whether to use an external Vault instance | +| external.vault | object | `{"enabled":false,"kvVersion":"","mountPath":"","namespace":"","roleName":"","secretName":"","url":""}` | Whether to use an external Vault instance | | ingress-nginx | object | (see individual values in `values`.yaml) | Ingress-nginx controller sub-chart. See https://artifacthub.io/packages/helm/ingress-nginx/ingress-nginx for all parameters. | | ingress-nginx.controller.scope.namespace | string | `""` | K8s namespace for the ingress | | ingress-nginx.controller.service.annotations | object | `{"service.beta.kubernetes.io/aws-load-balancer-backend-protocol":"tcp","service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout":"3600","service.beta.kubernetes.io/aws-load-balancer-nlb-target-type":"ip","service.beta.kubernetes.io/aws-load-balancer-scheme":"internet-facing","service.beta.kubernetes.io/aws-load-balancer-ssl-cert":"","service.beta.kubernetes.io/aws-load-balancer-ssl-ports":"https","service.beta.kubernetes.io/aws-load-balancer-type":"external"}` | For full list of annotations, see https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.2/guide/service/annotations/ | @@ -30,16 +30,26 @@ Kubernetes: `>=1.20.0-0` | mongodb.persistence.storageClass | string | `"expandable-storage"` | Name of the StorageClass for MongoDB. Should be of the form "mongo-storage-$NAMESPACE" | | rime | object | (see individual values in `values`.yaml) | Global variables used by all RIME services. | | rime.agentManagerServer | object | (see individual values in `values.yaml`) | `agentManagerServer` K8s-level configurations | +| rime.agentManagerServer.existingConfigSecretName | string | `""` | Use if MANUALLY creating a Secret to replace the default internal service configuration. NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. | | rime.authServer | object | (see individual values in `values.yaml`) | `authServer` K8s-level configurations | +| rime.authServer.existingConfigSecretName | string | `""` | Use if MANUALLY creating a Secret to replace the default internal service configuration. NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. | +| rime.cacheServer | object | (see individual values in `values.yaml`) | `cacheServer` K8s-level configurations | +| rime.cacheServer.existingConfigSecretName | string | `""` | Use if MANUALLY creating a Secret to replace the default internal service configuration. NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. | | rime.dataCollectorServer | object | (see individual values in `values.yaml`) | `dataCollectorServer` K8s-level configurations | +| rime.dataCollectorServer.existingConfigSecretName | string | `""` | Use if MANUALLY creating a Secret to replace the default internal service configuration. NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. | | rime.datasetManagerServer | object | (see individual values in `values.yaml`) | `datasetManagerServer` K8s-level configurations | | rime.datasetManagerServer.config.storageBucketName | string | `""` | The bucket name of the S3 bucket used as the blob storage. | +| rime.datasetManagerServer.existingConfigSecretName | string | `""` | Use if MANUALLY creating a Secret to replace the default internal service configuration. NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. | | rime.datasetManagerServer.serviceAccount | object | `{"annotations":{"eks.amazonaws.com/role-arn":""},"create":true,"labels":{},"name":""}` | Account used by services that need access to blob storage. | -| rime.datasetManagerServer.serviceAccount.annotations."eks.amazonaws.com/role-arn" | string | `""` | Specify ARN of IRSA-enabled Load Balancer Controller IAM role here | +| rime.datasetManagerServer.serviceAccount.annotations."eks.amazonaws.com/role-arn" | string | `""` | Specify ARN of IRSA-enabled Blob Storage IAM role here | | rime.domain | string | `""` | Base domain of the RIME web app, which will consist of `rime.${domain}` | | rime.dropDuplicates | object | (see individual values in `values.yaml`) | `dropDuplicates` K8s-level configurations | | rime.featureFlagServer | object | (see individual values in `values.yaml`) | `featureFlagServer` K8s-level configurations | +| rime.featureFlagServer.existingConfigSecretName | string | `""` | Use if MANUALLY creating a Secret to replace the default internal service configuration. NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. | +| rime.featureFlagServer.serviceAccount | object | `{"annotations":{"eks.amazonaws.com/role-arn":""},"labels":{},"name":""}` | Account used by services that need access the s3 license. | +| rime.featureFlagServer.serviceAccount.annotations."eks.amazonaws.com/role-arn" | string | `""` | Specify ARN of IRSA-enabled Blob Storage IAM role here | | rime.firewallServer | object | (see individual values in `values.yaml`) | `firewallServer` K8s-level configurations | +| rime.firewallServer.existingConfigSecretName | string | `""` | Use if MANUALLY creating a Secret to replace the default internal service configuration. NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. | | rime.firewallServer.scheduledCTCron | object | `{"annotations":{},"enabled":true,"labels":{},"name":"scheduled-ct-cron","schedule":"*/20 * * * *"}` | Configuration for scheduled Continuous Testing | | rime.frontendServer | object | (see individual values in `values.yaml`) | `frontendServer` K8s-level configurations | | rime.imageRegistryServer | object | (see individual values in `values.yaml`) | `imageRegistryServer` K8s-level configurations | @@ -53,16 +63,30 @@ Kubernetes: `>=1.20.0-0` | rime.initIndexes | object | (see individual values in `values.yaml`) | `initIndexes` K8s-level configurations | | rime.initMongoTLS | object | (see individual values in `values.yaml`) | `initMongoTLS` K8s-level configurations | | rime.initVault | object | (see individual values in `values.yaml`) | `initVault` K8s-level configurations | +| rime.initVault.existingConfigSecretName | string | `""` | Use if MANUALLY creating a Secret to replace the default internal service configuration. NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. | +| rime.internalWorkerJobs | object | `{"existingConfigSecretName":""}` | Settings shared across internal worker jobs (e.g., `drop-duplicates`, `init-indexes`) | +| rime.internalWorkerJobs.existingConfigSecretName | string | `""` | Use if MANUALLY creating a Secret to replace the default internal service configuration. NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. | | rime.modelTestingServer | object | (see individual values in `values.yaml`) | `modelTestingServer` K8s-level configurations | -| rime.monitoring | object | (see individual values in `values.yaml`) | `monitoring` (Prometheus metrics) K8s-level configurations | +| rime.modelTestingServer.existingConfigSecretName | string | `""` | Use if MANUALLY creating a Secret to replace the default internal service configuration. NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. | +| rime.monitoring | object | (see individual values in `values.yaml`) | `monitoring` (Prometheus metrics/Datadog) K8s-level configurations | +| rime.monitoring.datadogEnabled | bool | `true` | Whether to enable Datadog autodiscovery tags for all services on the RIME cluster | | rime.monitoring.enabled | bool | `true` | Whether to enable Prometheus metrics for all services on the RIME cluster | +| rime.monitoring.port | int | `8080` | Port to expose Prometheus metrics on | | rime.notificationsWorker | object | (see individual values in `values.yaml`) | `notificationsWorker` K8s-level configurations | +| rime.notificationsWorker.existingConfigSecretName | string | `""` | Use if MANUALLY creating a Secret to replace the default internal service configuration. NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. | | rime.notificationsWorker.notificationsDigestCron | object | `{"annotations":{},"enabled":true,"labels":{},"name":"notifications-digest-cron","schedule":"*/20 * * * *"}` | Configuration for scheduled push notifications | | rime.rolloutRestart | object | (see individual values in `values.yaml`) | `rolloutRestart` K8s-level configurations | +| rime.scheduledSTCron | object | `{"annotations":{},"enabled":true,"labels":{},"name":"scheduled-st-cron","schedule":"*/20 * * * *"}` | Configuration for Scheduled Stress Testing | | rime.secrets | object | (see individual values in `values`.yaml) | Values for the internal RIME K8 secret | | rime.uploadServer | object | (see individual values in `values.yaml`) | `uploadServer` K8s-level configurations | +| rime.uploadServer.existingConfigSecretName | string | `""` | Use if MANUALLY creating a Secret to replace the default internal service configuration. NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. | | rime.webServer | object | (see individual values in `values.yaml`) | `webServer` K8s-level configurations | +| rime.webServer.existingConfigSecretName | string | `""` | Use if MANUALLY creating a Secret to replace the default internal service configuration. NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. | | tls.autorotateEnabled | bool | `false` | Whether to automatically rotate TLS certificates for services (`enableCertManager` must be true to enable) | +| tls.certificateSpec | object | `{"issuerRef":{"group":"cert-manager.io","kind":"Issuer","name":""},"subject":{"organizations":["RobustIntelligence"]}}` | `spec` for Certificate object (https://cert-manager.io/docs/usage/certificate/). | +| tls.certificateSpec.issuerRef | object | `{"group":"cert-manager.io","kind":"Issuer","name":""}` | See https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.CertificateSpec Attributes listed below are the minimum required for the `issuerRef` property. | +| tls.certificateSpec.issuerRef.name | string | `""` | Will default to `rime-{{ .Release.Namespace }}-ca-issuer`. | +| tls.certificateSpec.subject | object | `{"organizations":["RobustIntelligence"]}` | See https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.CertificateSpec Attributes listed below are the minimum required for the `subject` property. | | tls.crossplaneEnabled | bool | `false` | Whether to enable mutual TLS for cross-plane (control plane to data plane) communications (`enableCertManager` must be true to enable) | | tls.enableCertManager | bool | `false` | Whether to enable the cert-manager service for issuing and managing TLS certificates within the cluster | | tls.grpcEnabled | bool | `false` | Whether to enable mutual TLS for REST communications (`enableCertManager` must be true to enable) TODO explain why this is here (I thought cluster internal was gRPC) | diff --git a/rime/templates/_helpers.tpl b/rime/templates/_helpers.tpl index 3144ffcf..102832ca 100644 --- a/rime/templates/_helpers.tpl +++ b/rime/templates/_helpers.tpl @@ -65,11 +65,34 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} {{- end }} app.kubernetes.io/part-of: {{ template "rime.name" . }} app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/owned-by: "ri" {{- if .Values.rime.commonAnnotations}} {{ toYaml .Values.rime.commonAnnotations }} {{- end }} {{- end -}} +{{/* +Monitoring annotations to add to pods. +*/}} +{{- define "rime.monitoringAnnotations" -}} +{{- if .monitoring.enabled}} +prometheus.io/scrape: "true" +prometheus.io/path: "/metrics" +prometheus.io/port: "{{ .monitoring.port }}" +{{- end }} +{{- if .monitoring.datadogEnabled }} +tags.datadoghq.com/service: "{{ .name }}" +ad.datadoghq.com/{{ .name }}.instances: | + [ + { + "collect_counters_with_distributions": true, + "send_distribution_counts_as_monotonic": true, + "metrics": ["*"], + } + ] +{{- end }} +{{- end -}} + {{/* Common flags passed to all our servers. Be careful when modifying these values! */}} @@ -85,6 +108,7 @@ common: mongoTLSEnabled: {{ .Values.tls.mongoEnabled }} restTLSEnabled: {{ .Values.tls.restEnabled }} vaultTLSDisabled: {{ .Values.tls.vaultDisabled }} + grpcTLSEnabled: {{ .Values.tls.grpcEnabled }} mongo: databaseName: {{ default "rime-store" .Values.external.mongo.databaseName }} urlPrefix: {{ default "mongodb+srv://" .Values.external.mongo.urlPrefix }} @@ -114,7 +138,11 @@ common: {{- else }} url: "{{ include "rime.fullname" . }}-vault-0.{{ include "rime.fullname" . }}-vault-internal:8200" {{- end }} + {{- if .Values.external.vault.mountPath }} + mountPath: {{ .Values.external.vault.mountPath }} + {{- else }} mountPath: "secret/" + {{- end }} {{- if .Values.external.vault.roleName }} roleName: {{ .Values.external.vault.roleName }} {{- end }} @@ -136,11 +164,11 @@ common: externalVaultCaPath: "/var/tmp/tls/external/vault/ca.crt" externalVaultCertPath: "/var/tmp/tls/external/vault/tls.crt" externalVaultKeyPath: "/var/tmp/tls/external/vault/tls.key" + logging: + verbose: {{ .Values.rime.verbose }} metrics: - enabled: "{{ .Values.rime.monitoring.enabled }}" - port: "{{ .Values.rime.monitoring.port }}" - configuration: - verbose: "{{ .Values.rime.verbose }}" + enabled: {{ .Values.rime.monitoring.enabled }} + port: {{ .Values.rime.monitoring.port }} connections: addresses: agentManagerServerAddr: "{{ include "rime.fullname" . }}-{{ .Values.rime.agentManagerServer.name }}:{{ .Values.rime.agentManagerServer.port }}" @@ -149,12 +177,108 @@ common: firewallServerAddr: "{{ include "rime.fullname" . }}-{{ .Values.rime.firewallServer.name }}:{{ .Values.rime.firewallServer.port }}" grpcWebServerAddr: "{{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }}:{{ .Values.rime.webServer.grpcPort }}" uploadServerAddr: "{{ include "rime.fullname" . }}-{{ .Values.rime.uploadServer.name }}:{{ .Values.rime.uploadServer.port }}" + cacheServerAddr: "{{ include "rime.fullname" . }}-{{ .Values.rime.cacheServer.name }}:{{ .Values.rime.cacheServer.port }}" {{- if .Values.rime.imageRegistryServer.enabled }} imageRegistryServerAddr: "{{ include "rime.fullname" . }}-{{ .Values.rime.imageRegistryServer.name }}:{{ .Values.rime.imageRegistryServer.port }}" {{- end }} modelTestingServerAddr: "{{ include "rime.fullname" . }}-{{ .Values.rime.modelTestingServer.name }}:{{ .Values.rime.modelTestingServer.port }}" + crossServiceKeyRef: + secretName: {{ include "rime.generatedSecretsName" . }} + key: crossServiceKey {{- end }} +{{/* +Unique additions to the agentManagerServer's ConfigMap. +*/}} +{{- define "rime.agentManagerServer.serverArgs" -}} +agentManager: {} +{{- end -}} + +{{/* +Unique additions to the authServer's ConfigMap. +*/}} +{{- define "rime.authServer.serverArgs" -}} +auth: {} +{{- end -}} + +{{/* +Unique additions to the cacheServer's ConfigMap. +*/}} +{{- define "rime.cacheServer.serverArgs" -}} +cache: {} +{{- end -}} + +{{/* +Unique additions to the dataCollectorServer's ConfigMap. +*/}} +{{- define "rime.dataCollectorServer.serverArgs" -}} +dataCollector: {} +{{- end -}} + +{{/* +Unique additions to the datasetManagerServer's ConfigMap. +*/}} +{{- define "rime.datasetManagerServer.serverArgs" -}} +datasetManager: {} +{{- end -}} + +{{/* +Unique additions to the featureFlagServer's ConfigMap. +*/}} +{{- define "rime.featureFlagServer.serverArgs" -}} +featureFlag: {} +{{- end -}} + +{{/* +Unique additions to the firewallServer's ConfigMap. +*/}} +{{- define "rime.firewallServer.serverArgs" -}} +firewall: {} +{{- end -}} + +{{/* +Due to their large size, "rime.imageRegistryServer.*" ConfigMaps are defined directly in the main ConfigMap template. +*/}} + +{{/* +Unique additions to initVault's ConfigMap. +*/}} +{{- define "rime.initVault.serverArgs" -}} +initVault: {} +{{- end -}} + +{{/* +Unique additions to the modelTestingServer's ConfigMap. +*/}} +{{- define "rime.modelTestingServer.serverArgs" -}} +modelTests: + managedImages: + allow_external_custom_images: true + spark: + allowSparkJobs: {{ contains "rime-testing-engine-spark" .Values.rime.images.modelTestingImage.name }} +{{- end -}} + +{{/* +Unique additions to the notificationsWorker's ConfigMap. +*/}} +{{- define "rime.notificationsWorker.serverArgs" -}} +notificationsWorker: {} +{{- end -}} + +{{/* +Unique additions to the uploadServer's ConfigMap. +*/}} +{{- define "rime.uploadServer.serverArgs" -}} +upload: {} +{{- end -}} + +{{/* +Unique additions to the webServer's ConfigMap. +*/}} +{{- define "rime.webServer.serverArgs" -}} +webServer: {} +{{- end -}} + {{/* Return the service account name used by the services that need blob storage. The service account has read and write access to the S3 bucket used for the blob storage. @@ -169,6 +293,16 @@ the user to specify the service account name. {{- end -}} {{- end -}} +{{/* +Return the service account name used by the feature flag server to fetch +license files from s3, if enabled. +*/}} +{{- define "rime.featureFlagServer.serviceAccountName" -}} +{{- if .Values.rime.featureFlagServer.fetchLicenseFromS3 -}} + {{ default (printf "%s-%s" (include "rime.fullname" .) .Values.rime.featureFlagServer.name) .Values.rime.featureFlagServer.serviceAccount.name | trunc 63 | trimSuffix "-" }} +{{- end -}} +{{- end -}} + {{/* Return the service account name used by the image registry server to access the image registry used to store docker images. @@ -277,7 +411,14 @@ provides a name, we expect the secret to already exist. Return the name of the secret containing generated secrets used by RIME services */}} {{- define "rime.generatedSecretsName" -}} -{{- printf "%s-generated-secrets" (include "rime.fullname" .) }} +rime-generated-secrets +{{- end }} + +{{/* +Name of the issuer to be used for cert-manager Certificates for RIME services. +*/}} +{{- define "tls.certificateIssuerName" -}} +{{- default (printf "rime-%s-ca-issuer" .Release.Namespace) .Values.tls.certificateSpec.issuerRef.name }} {{- end }} {{/* @@ -294,6 +435,20 @@ Common environment variables used in all RIME services. secretKeyRef: name: {{ include "rime.generatedSecretsName" . }} key: crossServiceKey +- name: RIME_CUSTOMER_NAME + value: {{ .Values.rime.customerName }} +{{- end }} + +{{/* +Environment variables used in the feature flag services. +*/}} +{{- define "rime.featureFlagEnv" -}} +{{- if .Values.rime.featureFlagServer.fetchLicenseFromS3 }} +- name: RIME_S3_HOSTED_LICENSE_BUCKET + value: {{ .Values.rime.featureFlagServer.config.storageBucketName }} +- name: RIME_S3_LICENSE_BUCKET_REGION + value: {{ .Values.rime.featureFlagServer.config.storageBucketRegion}} +{{- end }} {{- end }} {{/* diff --git a/rime/templates/agent-manager-server/configmap.yaml b/rime/templates/agent-manager-server/configmap.yaml index 1fee8dc5..78f1a2af 100644 --- a/rime/templates/agent-manager-server/configmap.yaml +++ b/rime/templates/agent-manager-server/configmap.yaml @@ -1,3 +1,4 @@ +{{- if eq .Values.rime.agentManagerServer.existingConfigSecretName "" }} apiVersion: v1 kind: ConfigMap metadata: @@ -5,4 +6,5 @@ metadata: data: server.config: | {{ include "rime.serverArgs" . | indent 4 }} - agentManager: {} +{{ include "rime.agentManagerServer.serverArgs" . | indent 4 }} +{{- end }} diff --git a/rime/templates/agent-manager-server/deployment.yaml b/rime/templates/agent-manager-server/deployment.yaml index 64a65b28..fbb8f2a4 100644 --- a/rime/templates/agent-manager-server/deployment.yaml +++ b/rime/templates/agent-manager-server/deployment.yaml @@ -34,6 +34,7 @@ spec: {{- with .Values.rime.agentManagerServer.deployment.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.agentManagerServer.name ) | nindent 8 }} checksum/config: {{ include (print $.Template.BasePath "/agent-manager-server/configmap.yaml") . | sha256sum }} spec: {{- with .Values.rime.images.imagePullSecrets }} @@ -57,9 +58,12 @@ spec: name: rimecreds {{- end }} key: .dockerconfigjson + optional: true {{- with .Values.rime.agentManagerServer.deployment.extraEnv }} {{- toYaml . | nindent 12 }} {{- end }} + - name: RI_SERVICE_NAME + value: {{ .Values.rime.agentManagerServer.name }} image: "{{ .Values.rime.images.backendImage.registry}}/{{ .Values.rime.images.backendImage.name }}" imagePullPolicy: {{ .Values.rime.images.backendImage.pullPolicy }} ports: @@ -109,18 +113,11 @@ spec: - "/rime/rime" args: - "start-agent-manager-server" - {{- if .Values.tls.grpcEnabled }} - - "--auth-mode=tls" - {{ end }} - "--server-config-path=/config/server.config" - "--agent-manager-port={{ .Values.rime.agentManagerServer.port }}" - "--agent-manager-rest-port={{ .Values.rime.agentManagerServer.restPort }}" - "--docker-creds-json=$(DOCKER_CREDS_JSON)" - "--platform-address-domain={{ .Values.rime.domain }}" - {{- if .Values.rime.monitoring.enabled }} - - "--metrics-exposition-enabled=true" - - "--metrics-exposition-port={{ .Values.rime.monitoring.port }}" - {{- end }} volumeMounts: - name: {{ include "rime.fullname" . }}-{{ .Values.rime.agentManagerServer.name }}-config mountPath: "/config" @@ -137,8 +134,13 @@ spec: volumes: # Volumes are defined at the Pod level, then mounted into containers within that Pod - name: {{ include "rime.fullname" . }}-{{ .Values.rime.agentManagerServer.name }}-config + {{- if ne .Values.rime.agentManagerServer.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.agentManagerServer.existingConfigSecretName }} + {{- else }} configMap: name: {{ include "rime.fullname" . }}-{{ .Values.rime.agentManagerServer.name }}-conf + {{- end }} items: - key: "server.config" path: "server.config" diff --git a/rime/templates/auth-ingress.yaml b/rime/templates/auth-ingress.yaml index 3bf1d7c2..4c6af841 100644 --- a/rime/templates/auth-ingress.yaml +++ b/rime/templates/auth-ingress.yaml @@ -8,7 +8,6 @@ metadata: {{- toYaml . | nindent 4 }} {{- end }} annotations: - kubernetes.io/ingress.class: "nginx" {{- if .Values.tls.restEnabled }} nginx.ingress.kubernetes.io/backend-protocol: "HTTPS" nginx.ingress.kubernetes.io/proxy-ssl-secret: "{{.Release.Namespace}}/{{ include "rime.fullname" . }}-auth-ingress-tls" @@ -17,11 +16,19 @@ metadata: nginx.ingress.kubernetes.io/proxy-connect-timeout: "360" nginx.ingress.kubernetes.io/proxy-send-timeout: "360" nginx.ingress.kubernetes.io/proxy-read-timeout: "360" + nginx.ingress.kubernetes.io/configuration-snippet: | + add_header X-Frame-Options "sameorigin" always; + add_header X-Content-Type-Options "nosniff" always; + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains; preload" always; + add_header Cross-Origin-Embedder-Policy "credentialless" always; + add_header Cross-Origin-Opener-Policy "same-origin" always; + add_header Cross-Origin-Resource-Policy "same-site" always; {{- include "rime.annotations" . | nindent 4 }} {{- with .Values.rime.ingress.annotations }} {{- toYaml . | nindent 4 }} {{- end }} spec: + ingressClassName: {{ .Values.rime.ingress.ingressClassName }} {{- with .Values.rime.ingress.tls }} tls: {{- toYaml . | nindent 4 }} @@ -50,3 +57,6 @@ spec: name: {{ include "rime.fullname" . }}-{{ .Values.rime.frontendServer.name }} port: number: {{ .Values.rime.frontendServer.port }} + {{- if .Values.rime.ingress.host }} + host: {{ .Values.rime.ingress.host }} + {{- end }} diff --git a/rime/templates/auth-server/configmap.yaml b/rime/templates/auth-server/configmap.yaml index 1f0e9d67..74295f37 100644 --- a/rime/templates/auth-server/configmap.yaml +++ b/rime/templates/auth-server/configmap.yaml @@ -1,3 +1,4 @@ +{{- if eq .Values.rime.authServer.existingConfigSecretName "" }} apiVersion: v1 kind: ConfigMap metadata: @@ -5,4 +6,5 @@ metadata: data: server.config: | {{ include "rime.serverArgs" . | indent 4 }} - auth: {} +{{ include "rime.authServer.serverArgs" . | indent 4 }} +{{- end }} diff --git a/rime/templates/auth-server/deployment.yaml b/rime/templates/auth-server/deployment.yaml index 39101c0f..87fea28d 100644 --- a/rime/templates/auth-server/deployment.yaml +++ b/rime/templates/auth-server/deployment.yaml @@ -34,6 +34,7 @@ spec: {{- with .Values.rime.authServer.deployment.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.authServer.name ) | nindent 8 }} checksum/config: {{ include (print $.Template.BasePath "/auth-server/configmap.yaml") . | sha256sum }} spec: {{- with .Values.rime.images.imagePullSecrets }} @@ -56,6 +57,8 @@ spec: {{- with .Values.rime.authServer.deployment.extraEnv }} {{- toYaml . | nindent 12 }} {{- end }} + - name: RI_SERVICE_NAME + value: {{ .Values.rime.authServer.name }} image: "{{ .Values.rime.images.backendImage.registry}}/{{ .Values.rime.images.backendImage.name }}" imagePullPolicy: {{ .Values.rime.images.backendImage.pullPolicy }} ports: @@ -102,11 +105,10 @@ spec: - "/rime/rime" args: - "start-auth-server" - {{- if .Values.tls.grpcEnabled }} - - "--auth-mode=tls" - {{ end }} - "--server-config-path=/config/server.config" - "--auth-port={{ .Values.rime.authServer.port }}" + - "--user-apikey-lifetime-hours={{ .Values.rime.authServer.apiKeyLifetimeHours.userApiKeyLifetime }}" + - "--agent-apikey-lifetime-hours={{ .Values.rime.authServer.apiKeyLifetimeHours.agentApiKeyLifetime }}" volumeMounts: - name: {{ include "rime.fullname" . }}-{{ .Values.rime.authServer.name }}-config mountPath: "/config" @@ -123,8 +125,13 @@ spec: volumes: # Volumes are defined at the Pod level, then mounted into containers within that Pod - name: {{ include "rime.fullname" . }}-{{ .Values.rime.authServer.name }}-config + {{- if ne .Values.rime.authServer.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.authServer.existingConfigSecretName }} + {{- else }} configMap: name: {{ include "rime.fullname" . }}-{{ .Values.rime.authServer.name }}-conf + {{- end }} items: - key: "server.config" path: "server.config" diff --git a/rime/templates/cache-server/configmap.yaml b/rime/templates/cache-server/configmap.yaml new file mode 100644 index 00000000..b55ffacb --- /dev/null +++ b/rime/templates/cache-server/configmap.yaml @@ -0,0 +1,12 @@ +{{- if .Values.rime.cacheServer.enabled }} +{{- if eq .Values.rime.cacheServer.existingConfigSecretName "" }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "rime.fullname" . }}-{{ .Values.rime.cacheServer.name }}-conf +data: + server.config: | +{{ include "rime.serverArgs" . | indent 4 }} +{{ include "rime.cacheServer.serverArgs" . | indent 4 }} +{{- end }} +{{- end }} diff --git a/rime/templates/cache-server/deployment.yaml b/rime/templates/cache-server/deployment.yaml new file mode 100644 index 00000000..7a44d937 --- /dev/null +++ b/rime/templates/cache-server/deployment.yaml @@ -0,0 +1,149 @@ +{{- if .Values.rime.cacheServer.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "rime.fullname" . }}-{{ .Values.rime.cacheServer.name }} + labels: + app: {{ .Values.rime.cacheServer.name }} + {{- include "rime.labels" . | nindent 4 }} + {{- with .Values.rime.cacheServer.deployment.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "rime.annotations" . | nindent 4 }} + {{- with .Values.rime.cacheServer.deployment.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + app: {{ .Values.rime.cacheServer.name }} + {{- include "rime.selectorLabels" . | nindent 6 }} + replicas: {{ .Values.rime.cacheServer.deployment.replicaCount }} + template: + metadata: + labels: + app: {{ .Values.rime.cacheServer.name }} + {{- include "rime.labels" . | nindent 8 }} + {{- with .Values.rime.cacheServer.deployment.labels }} + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + {{- include "rime.annotations" . | nindent 8 }} + {{- with .Values.rime.cacheServer.deployment.annotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.cacheServer.name ) | nindent 8 }} + checksum/config: {{ include (print $.Template.BasePath "/cache-server/configmap.yaml") . | sha256sum }} + spec: + {{- with .Values.rime.images.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.rime.cacheServer.deployment.securityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ .Values.rime.cacheServer.name }} + env: + {{- include "rime.commonEnv" . | nindent 12 }} + {{- with .Values.rime.cacheServer.deployment.extraEnv }} + {{- toYaml . | nindent 12 }} + {{- end }} + - name: RI_SERVICE_NAME + value: {{ .Values.rime.cacheServer.name }} + image: "{{ .Values.rime.images.backendImage.registry}}/{{ .Values.rime.images.backendImage.name }}" + imagePullPolicy: {{ .Values.rime.images.backendImage.pullPolicy }} + ports: + - name: cs + containerPort: {{ .Values.rime.cacheServer.port }} + protocol: TCP + - name: cs-debug + containerPort: 6060 + protocol: TCP + {{ if .Values.tls.grpcEnabled }} + livenessProbe: + exec: + command: [ "/bin/grpc_health_probe", "-tls", "-tls-no-verify", "-addr=:{{ .Values.rime.cacheServer.port }}" ] + periodSeconds: 5 + timeoutSeconds: 5 + readinessProbe: + exec: + command: [ "/bin/grpc_health_probe", "-tls", "-tls-no-verify", "-addr=:{{ .Values.rime.cacheServer.port }}" ] + periodSeconds: 5 + timeoutSeconds: 5 + startupProbe: + exec: + command: [ "/bin/grpc_health_probe", "-tls", "-tls-no-verify", "-addr=:{{ .Values.rime.cacheServer.port }}" ] + failureThreshold: 60 + periodSeconds: 5 + {{ else }} + livenessProbe: + exec: + command: [ "/bin/grpc_health_probe", "-addr=:{{ .Values.rime.cacheServer.port }}" ] + periodSeconds: 5 + readinessProbe: + exec: + command: [ "/bin/grpc_health_probe", "-addr=:{{ .Values.rime.cacheServer.port }}" ] + periodSeconds: 5 + startupProbe: + exec: + command: [ "/bin/grpc_health_probe", "-addr=:{{ .Values.rime.cacheServer.port }}" ] + failureThreshold: 60 + periodSeconds: 5 + {{ end }} + resources: + {{- toYaml .Values.rime.cacheServer.deployment.resources | nindent 12 }} + command: + - "/rime/rime" + args: + - "start-cache-server" + - "--cache-port={{ .Values.rime.cacheServer.port }}" + - "--server-config-path=/config/server.config" + volumeMounts: + - name: {{ include "rime.fullname" . }}-{{ .Values.rime.cacheServer.name }}-config + mountPath: "/config" + readOnly: true + {{- if .Values.tls.enableCertManager }} + - name: {{ include "rime.fullname" . }}-{{ .Values.rime.cacheServer.name }}-tls + mountPath: /var/tmp/tls/common + readOnly: true + {{- end }} + {{- with .Values.rime.cacheServer.deployment.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + # Volumes are defined at the Pod level, then mounted into containers within that Pod + - name: {{ include "rime.fullname" . }}-{{ .Values.rime.cacheServer.name }}-config + {{- if ne .Values.rime.cacheServer.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.cacheServer.existingConfigSecretName }} + {{- else }} + configMap: + name: {{ include "rime.fullname" . }}-{{ .Values.rime.cacheServer.name }}-conf + {{- end }} + items: + - key: "server.config" + path: "server.config" + {{- if .Values.tls.enableCertManager }} + - name: {{ include "rime.fullname" . }}-{{ .Values.rime.cacheServer.name }}-tls + secret: + secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.cacheServer.name }}-tls + {{- end }} + {{- with .Values.rime.cacheServer.deployment.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.rime.cacheServer.deployment.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.rime.cacheServer.deployment.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.rime.cacheServer.deployment.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/rime/templates/cache-server/service.yaml b/rime/templates/cache-server/service.yaml new file mode 100644 index 00000000..af79906b --- /dev/null +++ b/rime/templates/cache-server/service.yaml @@ -0,0 +1,26 @@ +{{- if .Values.rime.cacheServer.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "rime.fullname" . }}-{{ .Values.rime.cacheServer.name }} + labels: + {{- include "rime.labels" . | nindent 4 }} + {{- with .Values.rime.cacheServer.service.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "rime.annotations" . | nindent 4 }} + {{- with .Values.rime.cacheServer.service.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.rime.cacheServer.service.type }} + ports: + - port: {{ .Values.rime.cacheServer.port }} + targetPort: {{ .Values.rime.cacheServer.port }} + protocol: TCP + name: cs + selector: + app: {{ .Values.rime.cacheServer.name }} + {{- include "rime.labels" . | nindent 4 }} +{{- end }} diff --git a/rime/templates/certificates/agent-manager-certificate.yaml b/rime/templates/certificates/agent-manager-certificate.yaml index c40b8d27..be26b0e5 100644 --- a/rime/templates/certificates/agent-manager-certificate.yaml +++ b/rime/templates/certificates/agent-manager-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.agentManagerServer.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -24,7 +24,7 @@ spec: - {{ include "rime.fullname" . }}-{{ .Values.rime.agentManagerServer.name }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/auth-ingress-certificate.yaml b/rime/templates/certificates/auth-ingress-certificate.yaml index 07b1d11e..7f241fc9 100644 --- a/rime/templates/certificates/auth-ingress-certificate.yaml +++ b/rime/templates/certificates/auth-ingress-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-auth-ingress-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - {{ include "rime.fullname" . }}-auth-ingress-server - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/auth-server-certificate.yaml b/rime/templates/certificates/auth-server-certificate.yaml index 683c01fb..ff6b8665 100644 --- a/rime/templates/certificates/auth-server-certificate.yaml +++ b/rime/templates/certificates/auth-server-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.authServer.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - {{ include "rime.fullname" . }}-{{ .Values.rime.authServer.name }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/cache-certificate.yaml b/rime/templates/certificates/cache-certificate.yaml new file mode 100644 index 00000000..05283ff6 --- /dev/null +++ b/rime/templates/certificates/cache-certificate.yaml @@ -0,0 +1,29 @@ +{{- if .Values.tls.enableCertManager }} +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ include "rime.fullname" . }}-{{ .Values.rime.cacheServer.name }}-certificate +spec: + secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.cacheServer.name }}-tls + duration: 4320h # 180d + renewBefore: 744h # 31d +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} + isCA: false + privateKey: + algorithm: RSA + encoding: PKCS1 + size: 2048 + rotationPolicy: Always + usages: + - server auth + - client auth + dnsNames: + - {{ include "rime.fullname" . }}-{{ .Values.rime.cacheServer.name }} + - localhost + issuerRef: + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} +{{- end }} diff --git a/rime/templates/certificates/data-collector-certificate.yaml b/rime/templates/certificates/data-collector-certificate.yaml index 0a8ec1a9..487317ee 100644 --- a/rime/templates/certificates/data-collector-certificate.yaml +++ b/rime/templates/certificates/data-collector-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.dataCollectorServer.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -24,7 +24,7 @@ spec: - {{ include "rime.fullname" . }}-{{ .Values.rime.dataCollectorServer.name }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/dataset-manager-certificate.yaml b/rime/templates/certificates/dataset-manager-certificate.yaml index 996bd8a9..76416a51 100644 --- a/rime/templates/certificates/dataset-manager-certificate.yaml +++ b/rime/templates/certificates/dataset-manager-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.datasetManagerServer.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - {{ include "rime.fullname" . }}-{{ .Values.rime.datasetManagerServer.name }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/drop-duplicates-certificate.yaml b/rime/templates/certificates/drop-duplicates-certificate.yaml index 7e5e12fe..5f885f8e 100644 --- a/rime/templates/certificates/drop-duplicates-certificate.yaml +++ b/rime/templates/certificates/drop-duplicates-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.dropDuplicates.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - {{ include "rime.fullname" . }}-{{ .Values.rime.dropDuplicates.name }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/feature-flag-certificate.yaml b/rime/templates/certificates/feature-flag-certificate.yaml index a54ed820..6bcf4237 100644 --- a/rime/templates/certificates/feature-flag-certificate.yaml +++ b/rime/templates/certificates/feature-flag-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.featureFlagServer.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - {{ include "rime.fullname" . }}-{{ .Values.rime.featureFlagServer.name }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/firewall-server-certificate.yaml b/rime/templates/certificates/firewall-server-certificate.yaml index 2d8208cf..6c7dcc8b 100644 --- a/rime/templates/certificates/firewall-server-certificate.yaml +++ b/rime/templates/certificates/firewall-server-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.firewallServer.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -24,7 +24,7 @@ spec: - {{ include "rime.fullname" . }}-{{ .Values.rime.firewallServer.name }}.{{ .Release.Namespace }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/frontend-server-certificate.yaml b/rime/templates/certificates/frontend-server-certificate.yaml index e7fe5529..b16c3bb4 100644 --- a/rime/templates/certificates/frontend-server-certificate.yaml +++ b/rime/templates/certificates/frontend-server-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.frontendServer.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - {{ include "rime.fullname" . }}-{{ .Values.rime.frontendServer.name }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/image-builder-job-certificate.yaml b/rime/templates/certificates/image-builder-job-certificate.yaml index bd20b1eb..c8c8a303 100644 --- a/rime/templates/certificates/image-builder-job-certificate.yaml +++ b/rime/templates/certificates/image-builder-job-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ .Values.rime.imageRegistryServer.imageRegistryJob.name }}-{{ .Release.Namespace }}-tls duration: 2160h # 90d renewBefore: 360h # 15d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -22,7 +22,7 @@ spec: - image-builder-job - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/image-registry-certificate.yaml b/rime/templates/certificates/image-registry-certificate.yaml index 0971ae12..dc57f136 100644 --- a/rime/templates/certificates/image-registry-certificate.yaml +++ b/rime/templates/certificates/image-registry-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.imageRegistryServer.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - {{ include "rime.fullname" . }}-{{ .Values.rime.imageRegistryServer.name }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/ingress-certificate.yaml b/rime/templates/certificates/ingress-certificate.yaml index cec13475..6c31e56e 100644 --- a/rime/templates/certificates/ingress-certificate.yaml +++ b/rime/templates/certificates/ingress-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-ingress-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - {{ include "rime.fullname" . }}-ingress-server - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/init-cluster-metadata-certificate.yaml b/rime/templates/certificates/init-cluster-metadata-certificate.yaml index 8f22b8a3..130489cb 100644 --- a/rime/templates/certificates/init-cluster-metadata-certificate.yaml +++ b/rime/templates/certificates/init-cluster-metadata-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.initClusterMetadata.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - {{ include "rime.fullname" . }}-{{ .Values.rime.initClusterMetadata.name }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/init-indexes-certificate.yaml b/rime/templates/certificates/init-indexes-certificate.yaml index 1fa2378f..03db68bb 100644 --- a/rime/templates/certificates/init-indexes-certificate.yaml +++ b/rime/templates/certificates/init-indexes-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.initIndexes.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - {{ include "rime.fullname" . }}-{{ .Values.rime.initIndexes.name }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/init-vault-certificate.yaml b/rime/templates/certificates/init-vault-certificate.yaml index 5085c6a7..16f548d2 100644 --- a/rime/templates/certificates/init-vault-certificate.yaml +++ b/rime/templates/certificates/init-vault-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.initVault.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - {{ include "rime.fullname" . }}-{{ .Values.rime.initVault.name }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/model-testing-certificate.yaml b/rime/templates/certificates/model-testing-certificate.yaml index 042b26cd..495f462c 100644 --- a/rime/templates/certificates/model-testing-certificate.yaml +++ b/rime/templates/certificates/model-testing-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.modelTestingServer.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - {{ include "rime.fullname" . }}-{{ .Values.rime.modelTestingServer.name }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/mongo-certificate.yaml b/rime/templates/certificates/mongo-certificate.yaml index c582677b..93dfa747 100644 --- a/rime/templates/certificates/mongo-certificate.yaml +++ b/rime/templates/certificates/mongo-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-mongo-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - {{ include "rime.fullname" . }}-mongodb-0.{{ include "rime.fullname" . }}-mongodb-headless.{{ .Release.Namespace }}.svc.cluster.local - 127.0.0.1 issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/notifications-worker-certificate.yaml b/rime/templates/certificates/notifications-worker-certificate.yaml index f459a7e4..94fd2e02 100644 --- a/rime/templates/certificates/notifications-worker-certificate.yaml +++ b/rime/templates/certificates/notifications-worker-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.notificationsWorker.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - {{ include "rime.fullname" . }}-{{ .Values.rime.notificationsWorker.name }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/scheduled-ct-cron-certificate.yaml b/rime/templates/certificates/scheduled-ct-cron-certificate.yaml index 58b1b778..4409322b 100644 --- a/rime/templates/certificates/scheduled-ct-cron-certificate.yaml +++ b/rime/templates/certificates/scheduled-ct-cron-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-scheduled-ct-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -23,7 +23,7 @@ spec: - {{ include "rime.fullname" . }}-scheduled-ct-worker - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/scheduled-st-cron-certificate.yaml b/rime/templates/certificates/scheduled-st-cron-certificate.yaml new file mode 100644 index 00000000..2109ac09 --- /dev/null +++ b/rime/templates/certificates/scheduled-st-cron-certificate.yaml @@ -0,0 +1,29 @@ +{{- if .Values.tls.enableCertManager }} +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ include "rime.fullname" . }}-scheduled-st-certificate +spec: + secretName: {{ include "rime.fullname" . }}-scheduled-st-tls + duration: 4320h # 180d + renewBefore: 744h # 31d +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} + isCA: false + privateKey: + algorithm: RSA + encoding: PKCS1 + size: 2048 + rotationPolicy: Always + usages: + - server auth + - client auth + dnsNames: + - {{ include "rime.fullname" . }}-scheduled-st-worker + - localhost + issuerRef: + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} +{{- end }} diff --git a/rime/templates/certificates/upload-server-certificate.yaml b/rime/templates/certificates/upload-server-certificate.yaml index aecd47c0..6aa61274 100644 --- a/rime/templates/certificates/upload-server-certificate.yaml +++ b/rime/templates/certificates/upload-server-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.uploadServer.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -24,7 +24,7 @@ spec: - {{ include "rime.fullname" . }}-{{ .Values.rime.uploadServer.name }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/vault-certificate.yaml b/rime/templates/certificates/vault-certificate.yaml index a5720ffb..9614cdb1 100644 --- a/rime/templates/certificates/vault-certificate.yaml +++ b/rime/templates/certificates/vault-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-vault-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -24,7 +24,7 @@ spec: ipAddresses: - 127.0.0.1 issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/certificates/web-server-certificate.yaml b/rime/templates/certificates/web-server-certificate.yaml index 3d76c224..b28ec9bd 100644 --- a/rime/templates/certificates/web-server-certificate.yaml +++ b/rime/templates/certificates/web-server-certificate.yaml @@ -7,9 +7,9 @@ spec: secretName: {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }}-tls duration: 4320h # 180d renewBefore: 744h # 31d - subject: - organizations: - - RobustIntelligence +{{- with .Values.tls.certificateSpec.subject }} + subject: {{ toYaml . | nindent 4 }} +{{- end}} isCA: false privateKey: algorithm: RSA @@ -24,7 +24,7 @@ spec: - {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }} - localhost issuerRef: - name: rime-{{ .Release.Namespace }}-ca-issuer - kind: Issuer - group: cert-manager.io + name: {{ include "tls.certificateIssuerName" . }} + kind: {{ .Values.tls.certificateSpec.issuerRef.kind }} + group: {{ .Values.tls.certificateSpec.issuerRef.group }} {{- end }} diff --git a/rime/templates/data-collector-server/configmap.yaml b/rime/templates/data-collector-server/configmap.yaml index e74c5ef5..63535870 100644 --- a/rime/templates/data-collector-server/configmap.yaml +++ b/rime/templates/data-collector-server/configmap.yaml @@ -1,3 +1,4 @@ +{{- if eq .Values.rime.dataCollectorServer.existingConfigSecretName "" }} apiVersion: v1 kind: ConfigMap metadata: @@ -5,4 +6,5 @@ metadata: data: server.config: | {{ include "rime.serverArgs" . | indent 4 }} - dataCollector: {} +{{ include "rime.dataCollectorServer.serverArgs" . | indent 4 }} +{{- end }} diff --git a/rime/templates/data-collector-server/deployment.yaml b/rime/templates/data-collector-server/deployment.yaml index 4a3cd4c9..3fd38581 100644 --- a/rime/templates/data-collector-server/deployment.yaml +++ b/rime/templates/data-collector-server/deployment.yaml @@ -34,6 +34,7 @@ spec: {{- with .Values.rime.dataCollectorServer.deployment.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.dataCollectorServer.name ) | nindent 8 }} checksum/config: {{ include (print $.Template.BasePath "/data-collector-server/configmap.yaml") . | sha256sum }} spec: {{- with .Values.rime.images.imagePullSecrets }} @@ -51,6 +52,8 @@ spec: {{- with .Values.rime.dataCollectorServer.deployment.extraEnv }} {{- toYaml . | nindent 12 }} {{- end }} + - name: RI_SERVICE_NAME + value: {{ .Values.rime.dataCollectorServer.name }} image: "{{ .Values.rime.images.backendImage.registry}}/{{ .Values.rime.images.backendImage.name }}" imagePullPolicy: {{ .Values.rime.images.backendImage.pullPolicy }} ports: @@ -100,16 +103,9 @@ spec: - "/rime/rime" args: - "start-data-collector-server" - {{- if .Values.tls.grpcEnabled }} - - "--auth-mode=tls" - {{ end }} - "--server-config-path=/config/server.config" - "--data-collector-port={{ .Values.rime.dataCollectorServer.port }}" - "--data-collector-rest-port={{ .Values.rime.dataCollectorServer.restPort }}" - {{- if .Values.rime.monitoring.enabled }} - - "--metrics-exposition-enabled=true" - - "--metrics-exposition-port={{ .Values.rime.monitoring.port }}" - {{- end }} volumeMounts: - name: {{ include "rime.fullname" . }}-{{ .Values.rime.dataCollectorServer.name }}-config mountPath: "/config" @@ -126,8 +122,13 @@ spec: volumes: # Volumes are defined at the Pod level, then mounted into containers within that Pod - name: {{ include "rime.fullname" . }}-{{ .Values.rime.dataCollectorServer.name }}-config + {{- if ne .Values.rime.dataCollectorServer.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.dataCollectorServer.existingConfigSecretName }} + {{- else }} configMap: name: {{ include "rime.fullname" . }}-{{ .Values.rime.dataCollectorServer.name }}-conf + {{- end }} items: - key: "server.config" path: "server.config" diff --git a/rime/templates/dataset-manager-server/cluster-role-binding.yaml b/rime/templates/dataset-manager-server/cluster-role-binding.yaml new file mode 100644 index 00000000..c3dd5ce1 --- /dev/null +++ b/rime/templates/dataset-manager-server/cluster-role-binding.yaml @@ -0,0 +1,18 @@ +{{- if .Values.rime.datasetManagerServer.serviceAccount.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "rime.datasetManagerServer.serviceAccountName" . }} + labels: + {{- include "rime.labels" . | nindent 4 }} + annotations: + {{- include "rime.annotations" . | nindent 4 }} +roleRef: + kind: ClusterRole + name: {{ include "rime.datasetManagerServer.serviceAccountName" . }} + apiGroup: rbac.authorization.k8s.io +subjects: + - kind: ServiceAccount + name: {{ include "rime.datasetManagerServer.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +{{- end }} diff --git a/rime/templates/dataset-manager-server/cluster-role.yaml b/rime/templates/dataset-manager-server/cluster-role.yaml new file mode 100644 index 00000000..e911f35a --- /dev/null +++ b/rime/templates/dataset-manager-server/cluster-role.yaml @@ -0,0 +1,10 @@ +{{- if .Values.rime.datasetManagerServer.serviceAccount.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "rime.datasetManagerServer.serviceAccountName" . }} +rules: + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["*"] +{{- end }} diff --git a/rime/templates/dataset-manager-server/configmap.yaml b/rime/templates/dataset-manager-server/configmap.yaml index 039f88db..6d91d650 100644 --- a/rime/templates/dataset-manager-server/configmap.yaml +++ b/rime/templates/dataset-manager-server/configmap.yaml @@ -1,4 +1,5 @@ {{- if .Values.rime.datasetManagerServer.enabled }} +{{- if eq .Values.rime.datasetManagerServer.existingConfigSecretName "" }} apiVersion: v1 kind: ConfigMap metadata: @@ -6,5 +7,6 @@ metadata: data: server.config: | {{ include "rime.serverArgs" . | indent 4 }} - datasetManager: {} +{{ include "rime.datasetManagerServer.serverArgs" . | indent 4 }} +{{- end }} {{- end }} diff --git a/rime/templates/dataset-manager-server/deployment.yaml b/rime/templates/dataset-manager-server/deployment.yaml index 17cc9c27..b9072647 100644 --- a/rime/templates/dataset-manager-server/deployment.yaml +++ b/rime/templates/dataset-manager-server/deployment.yaml @@ -35,8 +35,10 @@ spec: {{- with .Values.rime.datasetManagerServer.deployment.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.datasetManagerServer.name ) | nindent 8 }} checksum/config: {{ include (print $.Template.BasePath "/dataset-manager-server/configmap.yaml") . | sha256sum }} spec: + priorityClassName: {{ .Values.rime.datasetManagerServer.deployment.priorityClassName }} {{- with .Values.rime.images.imagePullSecrets }} imagePullSecrets: {{- toYaml . | nindent 8 }} @@ -53,6 +55,8 @@ spec: {{- with .Values.rime.datasetManagerServer.deployment.extraEnv }} {{- toYaml . | nindent 12 }} {{- end }} + - name: RI_SERVICE_NAME + value: {{ .Values.rime.datasetManagerServer.name }} image: "{{ .Values.rime.images.backendImage.registry}}/{{ .Values.rime.images.backendImage.name }}" imagePullPolicy: {{ .Values.rime.images.backendImage.pullPolicy }} ports: @@ -102,19 +106,12 @@ spec: - "/rime/rime" args: - "start-dataset-manager-server" - {{- if .Values.tls.grpcEnabled }} - - "--auth-mode=tls" - {{ end }} - "--server-config-path=/config/server.config" - "--dataset-manager-port={{ .Values.rime.datasetManagerServer.port }}" - "--dataset-manager-rest-port={{ .Values.rime.datasetManagerServer.restPort }}" - "--blob-store-bucket-name={{ .Values.rime.datasetManagerServer.config.storageBucketName }}" - "--blob-store-bucket-endpoint={{ .Values.rime.datasetManagerServer.config.endpoint }}" - "--blob-store-service-type={{ .Values.rime.datasetManagerServer.config.type }}" - {{- if .Values.rime.monitoring.enabled }} - - "--metrics-exposition-enabled=true" - - "--metrics-exposition-port={{ .Values.rime.monitoring.port }}" - {{- end }} volumeMounts: - name: {{ include "rime.fullname" . }}-{{ .Values.rime.datasetManagerServer.name }}-config mountPath: "/config" @@ -131,8 +128,13 @@ spec: volumes: # Volumes are defined at the Pod level, then mounted into containers within that Pod - name: {{ include "rime.fullname" . }}-{{ .Values.rime.datasetManagerServer.name }}-config + {{- if ne .Values.rime.datasetManagerServer.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.datasetManagerServer.existingConfigSecretName }} + {{- else }} configMap: name: {{ include "rime.fullname" . }}-{{ .Values.rime.datasetManagerServer.name }}-conf + {{- end }} items: - key: "server.config" path: "server.config" diff --git a/rime/templates/dataset-manager-server/role-binding.yaml b/rime/templates/dataset-manager-server/role-binding.yaml new file mode 100644 index 00000000..eb6bdec2 --- /dev/null +++ b/rime/templates/dataset-manager-server/role-binding.yaml @@ -0,0 +1,17 @@ +{{- if .Values.rime.datasetManagerServer.serviceAccount.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "rime.datasetManagerServer.serviceAccountName" . }} + labels: + {{- include "rime.labels" . | nindent 4 }} + annotations: + {{- include "rime.annotations" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "rime.datasetManagerServer.serviceAccountName" . }} +subjects: + - kind: ServiceAccount + name: {{ include "rime.datasetManagerServer.serviceAccountName" . }} +{{- end }} diff --git a/rime/templates/dataset-manager-server/role.yaml b/rime/templates/dataset-manager-server/role.yaml new file mode 100644 index 00000000..ebe2dcd6 --- /dev/null +++ b/rime/templates/dataset-manager-server/role.yaml @@ -0,0 +1,17 @@ +{{- if .Values.rime.datasetManagerServer.serviceAccount.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "rime.datasetManagerServer.serviceAccountName" . }} + labels: + {{- include "rime.labels" . | nindent 4 }} + annotations: + {{- include "rime.annotations" . | nindent 4 }} +rules: + - apiGroups: [""] + resources: ["configmaps", "persistentvolumeclaims"] + verbs: ["*"] + - apiGroups: ["apps"] + resources: ["statefulsets", "statefulsets/scale"] + verbs: ["*"] +{{- end }} diff --git a/rime/templates/feature-flag-server/configmap.yaml b/rime/templates/feature-flag-server/configmap.yaml index 47875065..09ad0c2d 100644 --- a/rime/templates/feature-flag-server/configmap.yaml +++ b/rime/templates/feature-flag-server/configmap.yaml @@ -1,3 +1,4 @@ +{{- if eq .Values.rime.featureFlagServer.existingConfigSecretName "" }} apiVersion: v1 kind: ConfigMap metadata: @@ -5,4 +6,5 @@ metadata: data: server.config: | {{ include "rime.serverArgs" . | indent 4 }} - featureFlag: {} +{{ include "rime.featureFlagServer.serverArgs" . | indent 4 }} +{{- end }} diff --git a/rime/templates/feature-flag-server/deployment.yaml b/rime/templates/feature-flag-server/deployment.yaml index 1b42e777..c8aca9b9 100644 --- a/rime/templates/feature-flag-server/deployment.yaml +++ b/rime/templates/feature-flag-server/deployment.yaml @@ -34,6 +34,7 @@ spec: {{- with .Values.rime.featureFlagServer.deployment.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.featureFlagServer.name ) | nindent 8 }} checksum/config: {{ include (print $.Template.BasePath "/feature-flag-server/configmap.yaml") . | sha256sum }} spec: {{- with .Values.rime.images.imagePullSecrets }} @@ -44,13 +45,19 @@ spec: securityContext: {{- toYaml . | nindent 8 }} {{- end }} + {{- if .Values.rime.featureFlagServer.fetchLicenseFromS3 }} + serviceAccountName: {{ include "rime.featureFlagServer.serviceAccountName" . }} + {{- end }} containers: - name: {{ .Values.rime.featureFlagServer.name }} env: {{- include "rime.commonEnv" . | nindent 12 }} + {{- include "rime.featureFlagEnv" . | nindent 12 }} {{- with .Values.rime.featureFlagServer.deployment.extraEnv }} {{- toYaml . | nindent 12 }} {{- end }} + - name: RI_SERVICE_NAME + value: {{ .Values.rime.featureFlagServer.name }} image: "{{ .Values.rime.images.backendImage.registry}}/{{ .Values.rime.images.backendImage.name }}" imagePullPolicy: {{ .Values.rime.images.backendImage.pullPolicy }} ports: @@ -100,16 +107,9 @@ spec: - "/rime/rime" args: - "start-feature-flag-server" - {{- if .Values.tls.grpcEnabled }} - - "--auth-mode=tls" - {{ end }} - "--server-config-path=/config/server.config" - "--feature-flag-port={{ .Values.rime.featureFlagServer.port }}" - "--feature-flag-rest-port={{ .Values.rime.featureFlagServer.restPort }}" - {{- if .Values.rime.monitoring.enabled }} - - "--metrics-exposition-enabled=true" - - "--metrics-exposition-port={{ .Values.rime.monitoring.port }}" - {{- end }} volumeMounts: - name: {{ include "rime.fullname" . }}-{{ .Values.rime.featureFlagServer.name }}-config mountPath: "/config" @@ -126,8 +126,13 @@ spec: volumes: # Volumes are defined at the Pod level, then mounted into containers within that Pod - name: {{ include "rime.fullname" . }}-{{ .Values.rime.featureFlagServer.name }}-config + {{- if ne .Values.rime.featureFlagServer.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.featureFlagServer.existingConfigSecretName }} + {{- else }} configMap: name: {{ include "rime.fullname" . }}-{{ .Values.rime.featureFlagServer.name }}-conf + {{- end }} items: - key: "server.config" path: "server.config" diff --git a/rime/templates/feature-flag-server/service-account.yaml b/rime/templates/feature-flag-server/service-account.yaml new file mode 100644 index 00000000..c8cd17b8 --- /dev/null +++ b/rime/templates/feature-flag-server/service-account.yaml @@ -0,0 +1,16 @@ +{{- if .Values.rime.featureFlagServer.fetchLicenseFromS3 -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "rime.featureFlagServer.serviceAccountName" . }} + labels: + {{- include "rime.labels" . | nindent 4 }} + {{- with .Values.rime.featureFlagServer.serviceAccount.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "rime.annotations" . | nindent 4 }} + {{- with .Values.rime.featureFlagServer.serviceAccount.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/rime/templates/firewall-server/configmap.yaml b/rime/templates/firewall-server/configmap.yaml index a1f51ccf..80089de4 100644 --- a/rime/templates/firewall-server/configmap.yaml +++ b/rime/templates/firewall-server/configmap.yaml @@ -1,3 +1,4 @@ +{{- if eq .Values.rime.firewallServer.existingConfigSecretName "" }} apiVersion: v1 kind: ConfigMap metadata: @@ -5,4 +6,5 @@ metadata: data: server.config: | {{ include "rime.serverArgs" . | indent 4 }} - firewall: {} +{{ include "rime.firewallServer.serverArgs" . | indent 4 }} +{{- end }} diff --git a/rime/templates/firewall-server/deployment.yaml b/rime/templates/firewall-server/deployment.yaml index 48c86316..5b25966d 100644 --- a/rime/templates/firewall-server/deployment.yaml +++ b/rime/templates/firewall-server/deployment.yaml @@ -34,6 +34,7 @@ spec: {{- with .Values.rime.firewallServer.deployment.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.firewallServer.name ) | nindent 8 }} checksum/config: {{ include (print $.Template.BasePath "/firewall-server/configmap.yaml") . | sha256sum }} spec: {{- with .Values.rime.images.imagePullSecrets }} @@ -52,6 +53,8 @@ spec: {{- with .Values.rime.firewallServer.deployment.extraEnv }} {{- toYaml . | nindent 12 }} {{- end }} + - name: RI_SERVICE_NAME + value: {{ .Values.rime.firewallServer.name }} image: "{{ .Values.rime.images.backendImage.registry}}/{{ .Values.rime.images.backendImage.name }}" imagePullPolicy: {{ .Values.rime.images.backendImage.pullPolicy }} ports: @@ -101,16 +104,9 @@ spec: - "/rime/rime" args: - "start-firewall-server" - {{- if .Values.tls.grpcEnabled }} - - "--auth-mode=tls" - {{ end }} - "--server-config-path=/config/server.config" - "--firewall-port={{ .Values.rime.firewallServer.port }}" - "--firewall-rest-port={{ .Values.rime.firewallServer.restPort }}" - {{- if .Values.rime.monitoring.enabled }} - - "--metrics-exposition-enabled=true" - - "--metrics-exposition-port={{ .Values.rime.monitoring.port }}" - {{- end }} volumeMounts: - name: {{ include "rime.fullname" . }}-{{ .Values.rime.firewallServer.name }}-config mountPath: "/config" @@ -127,8 +123,13 @@ spec: volumes: # Volumes are defined at the Pod level, then mounted into containers within that Pod - name: {{ include "rime.fullname" . }}-{{ .Values.rime.firewallServer.name }}-config + {{- if ne .Values.rime.firewallServer.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.firewallServer.existingConfigSecretName }} + {{- else }} configMap: name: {{ include "rime.fullname" . }}-{{ .Values.rime.firewallServer.name }}-conf + {{- end }} items: - key: "server.config" path: "server.config" diff --git a/rime/templates/firewall-server/scheduled-ct-cron-job.yaml b/rime/templates/firewall-server/scheduled-ct-cron-job.yaml index adf9ff0c..ed1fed34 100644 --- a/rime/templates/firewall-server/scheduled-ct-cron-job.yaml +++ b/rime/templates/firewall-server/scheduled-ct-cron-job.yaml @@ -31,6 +31,7 @@ spec: {{- with .Values.rime.firewallServer.scheduledCTCron.labels }} {{- toYaml . | nindent 12 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.firewallServer.scheduledCTCron.name ) | nindent 12 }} spec: {{- with .Values.rime.images.imagePullSecrets }} imagePullSecrets: @@ -45,9 +46,6 @@ spec: command: - "/rime/scheduled_ct_worker" - "--server-config-path=/config/worker.config" - {{- if .Values.tls.grpcEnabled }} - - "--auth-mode=tls" - {{ end }} - "--timeout=4m" {{- if .Values.rime.verbose }} - "-verbose" @@ -65,8 +63,13 @@ spec: restartPolicy: Never volumes: - name: {{ include "rime.fullname" . }}-worker-config + {{- if ne .Values.rime.internalWorkerJobs.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.internalWorkerJobs.existingConfigSecretName }} + {{- else }} configMap: name: {{ include "rime.fullname" . }}-worker-conf + {{- end }} items: - key: "worker.config" path: "worker.config" diff --git a/rime/templates/frontend-server/deployment.yaml b/rime/templates/frontend-server/deployment.yaml index 86041314..1bfaf61e 100644 --- a/rime/templates/frontend-server/deployment.yaml +++ b/rime/templates/frontend-server/deployment.yaml @@ -34,6 +34,7 @@ spec: {{- with .Values.rime.frontendServer.deployment.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.frontendServer.name ) | nindent 8 }} spec: {{- with .Values.rime.images.imagePullSecrets }} imagePullSecrets: @@ -62,6 +63,8 @@ spec: {{- with .Values.rime.frontendServer.deployment.extraEnv }} {{- toYaml . | nindent 12 }} {{- end }} + - name: RI_SERVICE_NAME + value: {{ .Values.rime.frontendServer.name }} image: "{{ .Values.rime.images.backendImage.registry}}/{{ .Values.rime.images.frontendImage.name }}" imagePullPolicy: {{ .Values.rime.images.frontendImage.pullPolicy }} ports: @@ -75,7 +78,8 @@ spec: {{- if .Values.tls.restEnabled }} scheme: HTTPS {{ end }} - periodSeconds: 5 + periodSeconds: 30 + timeoutSeconds: 15 readinessProbe: httpGet: path: / @@ -83,7 +87,8 @@ spec: {{- if .Values.tls.restEnabled }} scheme: HTTPS {{ end }} - periodSeconds: 5 + periodSeconds: 30 + timeoutSeconds: 15 startupProbe: httpGet: path: / @@ -92,7 +97,8 @@ spec: scheme: HTTPS {{ end }} failureThreshold: 60 - periodSeconds: 5 + periodSeconds: 30 + timeoutSeconds: 15 resources: {{- toYaml .Values.rime.frontendServer.deployment.resources | nindent 12 }} {{- if .Values.tls.enableCertManager }} diff --git a/rime/templates/generated-secrets.yaml b/rime/templates/generated-secrets.yaml index 65c82065..8d44dd0b 100644 --- a/rime/templates/generated-secrets.yaml +++ b/rime/templates/generated-secrets.yaml @@ -9,3 +9,4 @@ data: {{- $secretData := (get $secret "data") | default dict }} jwtSecret: {{ (get $secretData "jwtSecret") | default (randAlphaNum 32 | b64enc) }} crossServiceKey: {{ (get $secretData "crossServiceKey") | default (randAlphaNum 32 | b64enc) }} + openaiApiKey: {{ (get $secretData "openaiApiKey") | default (.Values.rime.secrets.openai.ApiKey | b64enc | quote) }} diff --git a/rime/templates/image-registry-server/_configmap.tpl b/rime/templates/image-registry-server/_configmap.tpl new file mode 100644 index 00000000..237106c7 --- /dev/null +++ b/rime/templates/image-registry-server/_configmap.tpl @@ -0,0 +1,166 @@ +{{/* +Unique additions to the imageRegistryServer's server.config ConfigMap. +*/}} +{{- define "rime.imageRegistryServer.serverArgs" -}} +{{- toYaml .Values.rime.imageRegistryServer.config }} +{{- end -}} +{{/* +Unique additions to the imageRegistryServer's image_builder_job_configmap.config ConfigMap. +*/}} +{{- define "rime.imageRegistryServer.imageBuilderJobConfigMap" -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: job-conf-placeholder +immutable: true +{{- end -}} +{{/* +Unique additions to the imageRegistryServer's image_builder_job.config ConfigMap. +*/}} +{{- define "rime.imageRegistryServer.imageBuilderJob" -}} +apiVersion: batch/v1 +kind: Job +metadata: + name: job-placeholder +spec: + # Terminate job after at most 1 hours; see + # https://kubernetes.io/docs/concepts/workloads/controllers/job/#job-termination-and-cleanup + activeDeadlineSeconds: 3600 + # TTL job 48 hours after finished; see + # https://kubernetes.io/docs/concepts/workloads/controllers/job/#ttl-mechanism-for-finished-jobs + ttlSecondsAfterFinished: 172800 + template: + metadata: + labels: + {{- include "rime.labels" . | nindent 8 }} + {{- with .Values.rime.imageRegistryServer.imageRegistryJob.labels }} + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + {{- include "rime.annotations" . | nindent 8 }} + {{- with .Values.rime.imageRegistryServer.imageRegistryJob.annotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.imageRegistryServer.imageRegistryJob.name ) | nindent 8 }} + spec: + {{- with .Values.rime.images.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.rime.imageRegistryServer.imageRegistryJob.securityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + # TODO(Figure out service account naming scheme) + serviceAccountName: {{ include "rime.imageRegistryServer.imageRegistryJob.serviceAccountName" . }} + initContainers: + # This init container is designed to wait until the source image + # required by the main container can be pulled thereby ensuring + # a strict temporal ordering of dependent builder jobs. + - name: {{ .Chart.Name }}-src-waiter + # The source image name must be filled in by the job creator. + image: "" + imagePullPolicy: {{ .Values.rime.images.imageBuilderImage.pullPolicy }} + command: ['sh', '-c', 'true'] + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.rime.images.imageBuilderImage.registry }}/{{ .Values.rime.images.imageBuilderImage.name }}" + imagePullPolicy: {{ .Values.rime.images.imageBuilderImage.pullPolicy }} + {{- if .Values.rime.imageRegistryServer.imageRegistryJob.privilegedOverride }} + securityContext: + privileged: true + {{- end}} + {{- with .Values.rime.imageRegistryServer.imageRegistryJob.extraEnv }} + env: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with .Values.rime.imageRegistryServer.imageRegistryJob.resources }} + resources: + {{- toYaml . | nindent 10 }} + {{- end }} + # This command depends on setting environmental variables: + # * SOURCE - source repo uri (without the version tag). + # * SOURCE_AUTH_MODE - auth mode for using the source image. + # * IMAGE_NAME - the name used by RIME to identify the new image. + # * DESTINATION - destination repo uri (without the version tag). + # * DESTINATION_AUTH_MODE - auth mode for writing the destination image. + # * VERSION - version for the source and destination image. + # * AUTH_TOKEN - the auth token for internal APIs. + # to configure the target image being built. + command: + - "/builder/build_image.sh" + args: + - "--source=$(SOURCE):$(VERSION)" + - "--source_auth_mode=$(SOURCE_AUTH_MODE)" + # Currently only docker auth files can be added and we add this + # flag whether or not it is required for the source image. + {{- if .Values.rime.imageRegistryServer.imageRegistryJob.dockerSecretName }} + - "--source_auth_file=/auth/.docker/config.json" + {{- end }} + - "--workingdir=/build-config/" + - "--destination=$(DESTINATION):$(VERSION)" + - "--destination_auth_mode=$(DESTINATION_AUTH_MODE)" + # Currently only docker auth files can be added and we add this + # flag whether or not it is required for the destination image. + {{- if .Values.rime.imageRegistryServer.imageRegistryJob.dockerSecretName }} + - "--destination_auth_file=/auth/.docker/config.json" + {{- end }} + - "--image_name=$(IMAGE_NAME)" + - "--ca_path=/var/tmp/{{ .Values.rime.imageRegistryServer.imageRegistryJob.name }}-tls/ca.crt" + - "--cert_path=/var/tmp/{{ .Values.rime.imageRegistryServer.imageRegistryJob.name }}-tls/tls.crt" + - "--key_path=/var/tmp/{{ .Values.rime.imageRegistryServer.imageRegistryJob.name }}-tls/tls.key" + - "--image_registry_addr={{ include "rime.fullname" . }}-{{ .Values.rime.imageRegistryServer.name }}:{{ .Values.rime.imageRegistryServer.restPort }}" + - "--enable_cert_manager={{ .Values.tls.enableCertManager }}" + - "--auth_token=$(AUTH_TOKEN)" + volumeMounts: + # This mounts the docker credentials used for pulling the base image from Docker. + # This is only mounted if a dockerSecretName is provided to the image registry module. + {{- if .Values.rime.imageRegistryServer.imageRegistryJob.dockerSecretName }} + - name: docker-config + mountPath: "/auth/.docker" + {{- end }} + {{- if .Values.tls.enableCertManager }} + - name: {{ .Values.rime.imageRegistryServer.imageRegistryJob.name }}-{{ .Release.Namespace }}-tls + mountPath: /var/tmp/{{ .Values.rime.imageRegistryServer.imageRegistryJob.name }}-tls + readOnly: true + {{- end }} + {{- with .Values.rime.imageRegistryServer.imageRegistryJob.extraVolumeMounts }} + {{- toYaml . | nindent 10 }} + {{- end }} + restartPolicy: Never + volumes: + # This provides the volume containing the Docker secrets but is only + # constructed if a dockerSecretName is provided to the image registry module. + {{- if .Values.rime.imageRegistryServer.imageRegistryJob.dockerSecretName }} + - name: docker-config + projected: + sources: + - secret: + name: {{ .Values.rime.imageRegistryServer.imageRegistryJob.dockerSecretName }} + items: + - key: .dockerconfigjson + path: config.json + {{- end }} + {{- if .Values.tls.enableCertManager }} + - name: {{ .Values.rime.imageRegistryServer.imageRegistryJob.name }}-{{ .Release.Namespace }}-tls + secret: + secretName: {{ .Values.rime.imageRegistryServer.imageRegistryJob.name }}-{{ .Release.Namespace }}-tls + {{- end }} + {{- with .Values.rime.imageRegistryServer.imageRegistryJob.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + # Mount an additional volume containing the Dockerfile. + {{- with .Values.rime.imageRegistryServer.imageRegistryJob.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.rime.imageRegistryServer.imageRegistryJob.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.rime.imageRegistryServer.imageRegistryJob.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + backoffLimit: {{ .Values.rime.imageRegistryServer.imageRegistryJob.backoffLimit }} +{{- end -}} diff --git a/rime/templates/image-registry-server/configmap.yaml b/rime/templates/image-registry-server/configmap.yaml index 3cfeeb86..dabfc8fd 100644 --- a/rime/templates/image-registry-server/configmap.yaml +++ b/rime/templates/image-registry-server/configmap.yaml @@ -6,154 +6,9 @@ metadata: data: server.config: | {{ include "rime.serverArgs" . | indent 4 }} - {{- toYaml .Values.rime.imageRegistryServer.config | nindent 4 }} +{{ include "rime.imageRegistryServer.serverArgs" . | indent 4 }} image_builder_job_configmap.config: | - apiVersion: v1 - kind: ConfigMap - metadata: - name: job-conf-placeholder - immutable: true +{{ include "rime.imageRegistryServer.imageBuilderJobConfigMap" . | indent 4 }} image_builder_job.config: | - apiVersion: batch/v1 - kind: Job - metadata: - name: job-placeholder - spec: - # Terminate job after at most 1 hours; see - # https://kubernetes.io/docs/concepts/workloads/controllers/job/#job-termination-and-cleanup - activeDeadlineSeconds: 3600 - # TTL job 48 hours after finished; see - # https://kubernetes.io/docs/concepts/workloads/controllers/job/#ttl-mechanism-for-finished-jobs - ttlSecondsAfterFinished: 172800 - template: - metadata: - labels: - {{- include "rime.labels" . | nindent 12 }} - {{- with .Values.rime.imageRegistryServer.imageRegistryJob.labels }} - {{- toYaml . | nindent 12 }} - {{- end }} - annotations: - {{- include "rime.annotations" . | nindent 12 }} - {{- with .Values.rime.imageRegistryServer.imageRegistryJob.annotations }} - {{- toYaml . | nindent 12 }} - {{- end }} - spec: - {{- with .Values.rime.images.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.rime.imageRegistryServer.imageRegistryJob.securityContext }} - securityContext: - {{- toYaml . | nindent 12 }} - {{- end }} - # TODO(Figure out service account naming scheme) - serviceAccountName: {{ include "rime.imageRegistryServer.imageRegistryJob.serviceAccountName" . }} - initContainers: - # This init container is designed to wait until the source image - # required by the main container can be pulled thereby ensuring - # a strict temporal ordering of dependent builder jobs. - - name: {{ .Chart.Name }}-src-waiter - # The source image name must be filled in by the job creator. - image: "" - imagePullPolicy: {{ .Values.rime.images.imageBuilderImage.pullPolicy }} - command: ['sh', '-c', 'true'] - containers: - - name: {{ .Chart.Name }} - image: "{{ .Values.rime.images.imageBuilderImage.registry }}/{{ .Values.rime.images.imageBuilderImage.name }}" - imagePullPolicy: {{ .Values.rime.images.imageBuilderImage.pullPolicy }} - {{- if .Values.rime.imageRegistryServer.imageRegistryJob.privilegedOverride }} - securityContext: - privileged: true - {{- end}} - {{- with .Values.rime.imageRegistryServer.imageRegistryJob.extraEnv }} - env: - {{- toYaml . | nindent 14 }} - {{- end }} - {{- with .Values.rime.imageRegistryServer.imageRegistryJob.resources }} - resources: - {{- toYaml . | nindent 14 }} - {{- end }} - # This command depends on setting environmental variables: - # * SOURCE - source repo uri (without the version tag). - # * SOURCE_AUTH_MODE - auth mode for using the source image. - # * IMAGE_NAME - the name used by RIME to identify the new image. - # * DESTINATION - destination repo uri (without the version tag). - # * DESTINATION_AUTH_MODE - auth mode for writing the destination image. - # * VERSION - version for the source and destination image. - # to configure the target image being built. - command: - - "/builder/build_image.sh" - args: - - "--source=$(SOURCE):$(VERSION)" - - "--source_auth_mode=$(SOURCE_AUTH_MODE)" - # Currently only docker auth files can be added and we add this - # flag whether or not it is required for the source image. - {{- if .Values.rime.imageRegistryServer.imageRegistryJob.dockerSecretName }} - - "--source_auth_file=/auth/.docker/config.json" - {{- end }} - - "--workingdir=/build-config/" - - "--destination=$(DESTINATION):$(VERSION)" - - "--destination_auth_mode=$(DESTINATION_AUTH_MODE)" - # Currently only docker auth files can be added and we add this - # flag whether or not it is required for the destination image. - {{- if .Values.rime.imageRegistryServer.imageRegistryJob.dockerSecretName }} - - "--destination_auth_file=/auth/.docker/config.json" - {{- end }} - - "--image_name=$(IMAGE_NAME)" - - "--ca_path=/var/tmp/{{ .Values.rime.imageRegistryServer.imageRegistryJob.name }}-tls/ca.crt" - - "--cert_path=/var/tmp/{{ .Values.rime.imageRegistryServer.imageRegistryJob.name }}-tls/tls.crt" - - "--key_path=/var/tmp/{{ .Values.rime.imageRegistryServer.imageRegistryJob.name }}-tls/tls.key" - - "--image_registry_addr={{ include "rime.fullname" . }}-{{ .Values.rime.imageRegistryServer.name }}:{{ .Values.rime.imageRegistryServer.restPort }}" - - "--enable_cert_manager={{ .Values.tls.enableCertManager }}" - volumeMounts: - # This mounts the docker credentials used for pulling the base image from Docker. - # This is only mounted if a dockerSecretName is provided to the image registry module. - {{- if .Values.rime.imageRegistryServer.imageRegistryJob.dockerSecretName }} - - name: docker-config - mountPath: "/auth/.docker" - {{- end }} - {{- if .Values.tls.enableCertManager }} - - name: {{ .Values.rime.imageRegistryServer.imageRegistryJob.name }}-{{ .Release.Namespace }}-tls - mountPath: /var/tmp/{{ .Values.rime.imageRegistryServer.imageRegistryJob.name }}-tls - readOnly: true - {{- end }} - {{- with .Values.rime.imageRegistryServer.imageRegistryJob.extraVolumeMounts }} - {{- toYaml . | nindent 14 }} - {{- end }} - restartPolicy: Never - volumes: - # This provides the volume containing the Docker secrets but is only - # constructed if a dockerSecretName is provided to the image registry module. - {{- if .Values.rime.imageRegistryServer.imageRegistryJob.dockerSecretName }} - - name: docker-config - projected: - sources: - - secret: - name: {{ .Values.rime.imageRegistryServer.imageRegistryJob.dockerSecretName }} - items: - - key: .dockerconfigjson - path: config.json - {{- end }} - {{- if .Values.tls.enableCertManager }} - - name: {{ .Values.rime.imageRegistryServer.imageRegistryJob.name }}-{{ .Release.Namespace }}-tls - secret: - secretName: {{ .Values.rime.imageRegistryServer.imageRegistryJob.name }}-{{ .Release.Namespace }}-tls - {{- end }} - {{- with .Values.rime.imageRegistryServer.imageRegistryJob.extraVolumes }} - {{- toYaml . | nindent 12 }} - {{- end }} - # Mount an additional volume containing the Dockerfile. - {{- with .Values.rime.imageRegistryServer.imageRegistryJob.tolerations }} - tolerations: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.rime.imageRegistryServer.imageRegistryJob.affinity }} - affinity: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.rime.imageRegistryServer.imageRegistryJob.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 12 }} - {{- end }} - backoffLimit: {{ .Values.rime.imageRegistryServer.imageRegistryJob.backoffLimit }} +{{ include "rime.imageRegistryServer.imageBuilderJob" . | indent 4 }} {{- end }} diff --git a/rime/templates/image-registry-server/deployment.yaml b/rime/templates/image-registry-server/deployment.yaml index 2f58faba..b0d8d418 100644 --- a/rime/templates/image-registry-server/deployment.yaml +++ b/rime/templates/image-registry-server/deployment.yaml @@ -35,6 +35,7 @@ spec: {{- with .Values.rime.imageRegistryServer.deployment.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.uploadServer.name ) | nindent 8 }} checksum/config: {{ include (print $.Template.BasePath "/image-registry-server/configmap.yaml") . | sha256sum }} spec: {{- with .Values.rime.images.imagePullSecrets }} @@ -53,6 +54,8 @@ spec: {{- with .Values.rime.imageRegistryServer.deployment.extraEnv }} {{- toYaml . | nindent 12 }} {{- end }} + - name: RI_SERVICE_NAME + value: {{ .Values.rime.imageRegistryServer.name }} image: "{{ .Values.rime.images.backendImage.registry}}/{{ .Values.rime.images.backendImage.name }}" imagePullPolicy: {{ .Values.rime.images.backendImage.pullPolicy }} ports: @@ -107,21 +110,11 @@ spec: - "/rime/rime" args: - "start-image-registry-server" - {{- if .Values.tls.grpcEnabled }} - - "--auth-mode=tls" - {{ end }} - "--server-config-path=/config/server.config" - "--image-builder-job-configmap-file=/config/image_builder_job_configmap.config" - "--image-builder-job-file=/config/image_builder_job.config" - "--image-registry-port={{ .Values.rime.imageRegistryServer.port }}" - "--image-registry-rest-port={{ .Values.rime.imageRegistryServer.restPort }}" - {{- if .Values.rime.verbose }} - - "--verbose" - {{- end }} - {{- if .Values.rime.monitoring.enabled }} - - "--metrics-exposition-enabled=true" - - "--metrics-exposition-port={{ .Values.rime.monitoring.port }}" - {{- end }} volumeMounts: - name: {{ include "rime.fullname" . }}-{{ .Values.rime.imageRegistryServer.name }}-config mountPath: "/config" diff --git a/rime/templates/ingress.yaml b/rime/templates/ingress.yaml index 84148967..2e8bfd8f 100644 --- a/rime/templates/ingress.yaml +++ b/rime/templates/ingress.yaml @@ -8,14 +8,13 @@ metadata: {{- toYaml . | nindent 4 }} {{- end }} annotations: - kubernetes.io/ingress.class: "nginx" {{- if .Values.tls.restEnabled }} nginx.ingress.kubernetes.io/auth-url: https://{{ include "rime.fullname" . }}-{{ .Values.rime.authServer.name }}.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.rime.authServer.port}}/v1/auth/validate {{- else }} nginx.ingress.kubernetes.io/auth-url: http://{{ include "rime.fullname" . }}-{{ .Values.rime.authServer.name }}.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.rime.authServer.port}}/v1/auth/validate {{- end }} nginx.ingress.kubernetes.io/auth-method: POST - nginx.ingress.kubernetes.io/auth-response-headers: X-Rime-User, X-Rime-Api-Key, X-Rime-Api-Key-Name, X-Rime-Workspace + nginx.ingress.kubernetes.io/auth-response-headers: X-Rime-User, X-Rime-Api-Key, X-Rime-Api-Key-Name, X-Rime-Workspace, X-Rime-Agent, X-Rime-External-Agent-Register {{- if .Values.tls.restEnabled }} nginx.ingress.kubernetes.io/backend-protocol: "HTTPS" nginx.ingress.kubernetes.io/proxy-ssl-secret: "{{.Release.Namespace}}/{{ include "rime.fullname" . }}-ingress-tls" @@ -24,6 +23,13 @@ metadata: nginx.ingress.kubernetes.io/proxy-connect-timeout: "360" nginx.ingress.kubernetes.io/proxy-send-timeout: "360" nginx.ingress.kubernetes.io/proxy-read-timeout: "360" + nginx.ingress.kubernetes.io/configuration-snippet: | + add_header X-Frame-Options "sameorigin" always; + add_header X-Content-Type-Options "nosniff" always; + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains; preload" always; + add_header Cross-Origin-Embedder-Policy "credentialless" always; + add_header Cross-Origin-Opener-Policy "same-origin" always; + add_header Cross-Origin-Resource-Policy "same-site" always; {{- with .Values.rime.ingress.annotations }} {{- toYaml . | nindent 4 }} {{- end }} @@ -32,16 +38,10 @@ spec: tls: {{- toYaml . | nindent 4 }} {{- end }} + ingressClassName: {{ .Values.rime.ingress.ingressClassName }} rules: - http: paths: - - path: /internal/images - pathType: Prefix - backend: - service: - name: {{ include "rime.fullname" . }}-{{ .Values.rime.imageRegistryServer.name }} - port: - number: {{ .Values.rime.imageRegistryServer.restPort }} - path: /internal/secrets pathType: Prefix backend: @@ -84,6 +84,7 @@ spec: name: {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }} port: number: {{ .Values.rime.webServer.grpcRestPort }} + {{- if .Values.rime.datasetManagerServer.enabled }} - path: /v1-beta/datasets pathType: Prefix backend: @@ -105,6 +106,14 @@ spec: name: {{ include "rime.fullname" . }}-{{ .Values.rime.datasetManagerServer.name }} port: number: {{ .Values.rime.datasetManagerServer.restPort }} + - path: /v1-beta/customer-managed-key + pathType: Prefix + backend: + service: + name: {{ include "rime.fullname" . }}-{{ .Values.rime.datasetManagerServer.name }} + port: + number: {{ .Values.rime.datasetManagerServer.restPort }} + {{- end }} - path: /v1/feature-results pathType: Prefix backend: @@ -119,18 +128,18 @@ spec: name: {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }} port: number: {{ .Values.rime.webServer.grpcRestPort }} - - path: /v1/feature-flags + - path: /v1/category-results pathType: Prefix backend: service: - name: {{ include "rime.fullname" . }}-{{ .Values.rime.featureFlagServer.name }} + name: {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }} port: - number: {{ .Values.rime.featureFlagServer.restPort }} - - path: /internal/feature-flags + number: {{ .Values.rime.webServer.grpcRestPort }} + - path: /v1/feature-flags pathType: Prefix backend: service: - name: {{ include "rime.fullname" . }}-feature-flag-server + name: {{ include "rime.fullname" . }}-{{ .Values.rime.featureFlagServer.name }} port: number: {{ .Values.rime.featureFlagServer.restPort }} - path: /v1/agents @@ -210,7 +219,14 @@ spec: name: {{ include "rime.fullname" . }}-{{ .Values.rime.modelTestingServer.name }} port: number: {{ .Values.rime.modelTestingServer.restPort }} - - path: /v1/logs + - path: /v1-beta/file-scan-results + pathType: Prefix + backend: + service: + name: {{ include "rime.fullname" . }}-{{ .Values.rime.modelTestingServer.name }} + port: + number: {{ .Values.rime.modelTestingServer.restPort }} + - path: /v1-beta/logs pathType: Prefix backend: service: @@ -224,6 +240,14 @@ spec: name: {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }} port: number: {{ .Values.rime.webServer.grpcRestPort }} + {{- if .Values.rime.imageRegistryServer.enabled }} + - path: /internal/images + pathType: Prefix + backend: + service: + name: {{ include "rime.fullname" . }}-{{ .Values.rime.imageRegistryServer.name }} + port: + number: {{ .Values.rime.imageRegistryServer.restPort }} - path: /v1/images pathType: Prefix backend: @@ -231,7 +255,8 @@ spec: name: {{ include "rime.fullname" . }}-{{ .Values.rime.imageRegistryServer.name }} port: number: {{ .Values.rime.imageRegistryServer.restPort }} - - path: /v1-beta/jobs + {{- end }} + - path: /v1/jobs pathType: Prefix backend: service: @@ -252,6 +277,13 @@ spec: name: {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }} port: number: {{ .Values.rime.webServer.grpcRestPort }} + - path: /v1-beta/projects + pathType: Prefix + backend: + service: + name: {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }} + port: + number: {{ .Values.rime.webServer.grpcRestPort }} - path: /v1/stress-tests pathType: Prefix backend: @@ -280,6 +312,13 @@ spec: name: {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }} port: number: {{ .Values.rime.webServer.grpcRestPort }} + - path: /v1-beta/test-runs + pathType: Prefix + backend: + service: + name: {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }} + port: + number: {{ .Values.rime.webServer.grpcRestPort }} - path: /v1/workspace pathType: Prefix backend: @@ -287,6 +326,13 @@ spec: name: {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }} port: number: {{ .Values.rime.webServer.grpcRestPort }} + - path: /v1-beta/schedules + pathType: Prefix + backend: + service: + name: {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }} + port: + number: {{ .Values.rime.webServer.grpcRestPort }} - path: /v1-beta/integrations pathType: Prefix backend: @@ -294,7 +340,21 @@ spec: name: {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }} port: number: {{ .Values.rime.webServer.grpcRestPort }} - - path: /internal/config-validator + - path: /v1/validation + pathType: Prefix + backend: + service: + name: {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }} + port: + number: {{ .Values.rime.webServer.grpcRestPort }} + - path: /v1/config + pathType: Prefix + backend: + service: + name: {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }} + port: + number: {{ .Values.rime.webServer.grpcRestPort }} + - path: /v1-beta/config-validator pathType: Prefix backend: service: @@ -322,3 +382,6 @@ spec: name: {{ include "rime.fullname" . }}-{{ .Values.rime.frontendServer.name }} port: number: {{ .Values.rime.frontendServer.port }} + {{- if .Values.rime.ingress.host }} + host: {{ .Values.rime.ingress.host }} + {{- end }} diff --git a/rime/templates/init-jobs/drop-duplicates-job.yaml b/rime/templates/init-jobs/drop-duplicates-job.yaml index 4d123a23..0169daca 100644 --- a/rime/templates/init-jobs/drop-duplicates-job.yaml +++ b/rime/templates/init-jobs/drop-duplicates-job.yaml @@ -33,6 +33,7 @@ spec: {{- with .Values.rime.dropDuplicates.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.dropDuplicates.name ) | nindent 8 }} spec: {{- with .Values.rime.images.imagePullSecrets }} imagePullSecrets: @@ -72,8 +73,13 @@ spec: restartPolicy: OnFailure volumes: - name: {{ include "rime.fullname" . }}-worker-config + {{- if ne .Values.rime.internalWorkerJobs.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.internalWorkerJobs.existingConfigSecretName }} + {{- else }} configMap: name: {{ include "rime.fullname" . }}-worker-conf + {{- end }} items: - key: "worker.config" path: "worker.config" diff --git a/rime/templates/init-jobs/init-cluster-metadata-job.yaml b/rime/templates/init-jobs/init-cluster-metadata-job.yaml index 1209c390..da80144c 100644 --- a/rime/templates/init-jobs/init-cluster-metadata-job.yaml +++ b/rime/templates/init-jobs/init-cluster-metadata-job.yaml @@ -14,7 +14,7 @@ metadata: # NOTE: post-upgrade relies on the database-setup script being idempotent. "helm.sh/hook": post-install,post-upgrade # Hooks are sorted in ascending order -> this should run after the index creation job. - "helm.sh/hook-weight": "4" + "helm.sh/hook-weight": "5" "helm.sh/hook-delete-policy": before-hook-creation {{- include "rime.annotations" . | nindent 4 }} {{- with .Values.rime.initClusterMetadata.annotations }} @@ -33,11 +33,15 @@ spec: {{- with .Values.rime.initClusterMetadata.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.initClusterMetadata.name ) | nindent 8 }} spec: {{- with .Values.rime.images.imagePullSecrets }} imagePullSecrets: {{- toYaml . | nindent 8 }} {{- end }} + {{- if .Values.rime.featureFlagServer.fetchLicenseFromS3 }} + serviceAccountName: {{ include "rime.featureFlagServer.serviceAccountName" . }} + {{- end }} {{- with .Values.rime.initClusterMetadata.securityContext }} securityContext: {{- toYaml . | nindent 8 }} @@ -46,6 +50,7 @@ spec: - name: {{ .Values.rime.initClusterMetadata.name }} env: {{- include "rime.commonEnv" . | nindent 12 }} + {{- include "rime.featureFlagEnv" . | nindent 12 }} {{- include "rime.webAppHostEnv" . | nindent 12 }} - name: SMTP_EMAIL valueFrom: @@ -107,9 +112,6 @@ spec: - "/rime/dbworker" args: - "init-cluster-metadata" - {{- if .Values.tls.grpcEnabled }} - - "--auth-mode=tls" - {{ end }} - "--server-config-path=/config/worker.config" - "-overwrite-feature-flags={{ .Values.rime.initClusterMetadata.overwriteLicense }}" {{if .Values.rime.imageRegistryServer.enabled }} @@ -136,8 +138,13 @@ spec: restartPolicy: OnFailure volumes: - name: {{ include "rime.fullname" . }}-worker-config + {{- if ne .Values.rime.internalWorkerJobs.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.internalWorkerJobs.existingConfigSecretName }} + {{- else }} configMap: name: {{ include "rime.fullname" . }}-worker-conf + {{- end }} items: - key: "worker.config" path: "worker.config" diff --git a/rime/templates/init-jobs/init-indexes-job.yaml b/rime/templates/init-jobs/init-indexes-job.yaml index ab638389..6150e73e 100644 --- a/rime/templates/init-jobs/init-indexes-job.yaml +++ b/rime/templates/init-jobs/init-indexes-job.yaml @@ -33,6 +33,7 @@ spec: {{- with .Values.rime.initIndexes.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.initIndexes.name ) | nindent 8 }} spec: {{- with .Values.rime.images.imagePullSecrets }} imagePullSecrets: @@ -72,8 +73,13 @@ spec: restartPolicy: OnFailure volumes: - name: {{ include "rime.fullname" . }}-worker-config + {{- if ne .Values.rime.internalWorkerJobs.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.internalWorkerJobs.existingConfigSecretName }} + {{- else }} configMap: name: {{ include "rime.fullname" . }}-worker-conf + {{- end }} items: - key: "worker.config" path: "worker.config" diff --git a/rime/templates/init-jobs/init-mongo-tls-job.yaml b/rime/templates/init-jobs/init-mongo-tls-job.yaml index ef3151c1..0e9e1c5a 100644 --- a/rime/templates/init-jobs/init-mongo-tls-job.yaml +++ b/rime/templates/init-jobs/init-mongo-tls-job.yaml @@ -33,6 +33,7 @@ spec: {{- with .Values.rime.initMongoTLS.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.uploadServer.name ) | nindent 8 }} spec: {{- with .Values.rime.images.imagePullSecrets }} imagePullSecrets: diff --git a/rime/templates/init-jobs/init-vault-configmap.yaml b/rime/templates/init-jobs/init-vault-configmap.yaml index 53f2c896..1e61cb15 100644 --- a/rime/templates/init-jobs/init-vault-configmap.yaml +++ b/rime/templates/init-jobs/init-vault-configmap.yaml @@ -1,3 +1,4 @@ +{{- if eq .Values.rime.initVault.existingConfigSecretName "" }} apiVersion: v1 kind: ConfigMap metadata: @@ -5,4 +6,5 @@ metadata: data: worker.config: | {{ include "rime.serverArgs" . | indent 4 }} - initVault: {} +{{ include "rime.initVault.serverArgs" . | indent 4 }} +{{- end }} diff --git a/rime/templates/init-jobs/init-vault-job.yaml b/rime/templates/init-jobs/init-vault-job.yaml index fa2f61f5..e9e33a78 100644 --- a/rime/templates/init-jobs/init-vault-job.yaml +++ b/rime/templates/init-jobs/init-vault-job.yaml @@ -14,7 +14,7 @@ metadata: # NOTE: post-upgrade relies on the database-setup script being idempotent. "helm.sh/hook": post-install,post-upgrade # Hooks are sorted in ascending order. - "helm.sh/hook-weight": "6" + "helm.sh/hook-weight": "4" "helm.sh/hook-delete-policy": before-hook-creation {{- include "rime.annotations" . | nindent 4 }} {{- with .Values.rime.initVault.annotations }} @@ -33,6 +33,7 @@ spec: {{- with .Values.rime.initVault.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.uploadServer.name ) | nindent 8 }} spec: {{- with .Values.rime.images.imagePullSecrets }} imagePullSecrets: @@ -69,8 +70,13 @@ spec: {{- if .Values.tls.enableCertManager }} volumes: - name: {{ include "rime.fullname" . }}-{{ .Values.rime.initVault.name }}-config + {{- if ne .Values.rime.initVault.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.initVault.existingConfigSecretName }} + {{- else }} configMap: name: {{ include "rime.fullname" . }}-{{ .Values.rime.initVault.name }}-conf + {{- end }} items: - key: "worker.config" path: "worker.config" diff --git a/rime/templates/model-testing-server/configmap.yaml b/rime/templates/model-testing-server/configmap.yaml index db66264d..87021f8f 100644 --- a/rime/templates/model-testing-server/configmap.yaml +++ b/rime/templates/model-testing-server/configmap.yaml @@ -1,3 +1,4 @@ +{{- if eq .Values.rime.modelTestingServer.existingConfigSecretName "" }} apiVersion: v1 kind: ConfigMap metadata: @@ -5,11 +6,5 @@ metadata: data: server.config: | {{ include "rime.serverArgs" . | indent 4 }} - modelTests: - managedImages: - allow_external_custom_images: true - spark: - allowSparkJobs: {{ contains "rime-testing-engine-spark" .Values.rime.images.modelTestingImage.name }} - crossServiceKeyRef: - secretName: {{ include "rime.generatedSecretsName" . }} - key: crossServiceKey +{{ include "rime.modelTestingServer.serverArgs" . | indent 4 }} +{{- end }} diff --git a/rime/templates/model-testing-server/deployment.yaml b/rime/templates/model-testing-server/deployment.yaml index 98d5d9a6..ff9aa0f1 100644 --- a/rime/templates/model-testing-server/deployment.yaml +++ b/rime/templates/model-testing-server/deployment.yaml @@ -34,6 +34,7 @@ spec: {{- with .Values.rime.modelTestingServer.deployment.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.uploadServer.name ) | nindent 8 }} checksum/config: {{ include (print $.Template.BasePath "/model-testing-server/configmap.yaml") . | sha256sum }} spec: {{- with .Values.rime.images.imagePullSecrets }} @@ -53,6 +54,8 @@ spec: {{- with .Values.rime.modelTestingServer.deployment.extraEnv }} {{- toYaml . | nindent 12 }} {{- end }} + - name: RI_SERVICE_NAME + value: {{ .Values.rime.modelTestingServer.name }} image: "{{ .Values.rime.images.backendImage.registry}}/{{ .Values.rime.images.backendImage.name }}" imagePullPolicy: {{ .Values.rime.images.backendImage.pullPolicy }} ports: @@ -108,19 +111,9 @@ spec: - "/rime/rime" args: - "start-model-testing-server" - {{- if .Values.tls.grpcEnabled }} - - "--auth-mode=tls" - {{ end }} - "--server-config-path=/config/server.config" - "--model-testing-port={{ .Values.rime.modelTestingServer.port }}" - "--model-testing-rest-port={{ .Values.rime.modelTestingServer.restPort }}" - {{- if .Values.rime.verbose }} - - "--verbose" - {{- end }} - {{- if .Values.rime.monitoring.enabled }} - - "--metrics-exposition-enabled=true" - - "--metrics-exposition-port={{ .Values.rime.monitoring.port }}" - {{- end }} volumeMounts: - name: {{ include "rime.fullname" . }}-{{ .Values.rime.modelTestingServer.name }}-config mountPath: "/config" @@ -137,8 +130,13 @@ spec: volumes: # Volumes are defined at the Pod level, then mounted into containers within that Pod - name: {{ include "rime.fullname" . }}-{{ .Values.rime.modelTestingServer.name }}-config + {{- if ne .Values.rime.modelTestingServer.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.modelTestingServer.existingConfigSecretName }} + {{- else }} configMap: name: {{ include "rime.fullname" . }}-{{ .Values.rime.modelTestingServer.name }}-conf + {{- end }} items: - key: "server.config" path: "server.config" diff --git a/rime/templates/notifications-worker/configmap.yaml b/rime/templates/notifications-worker/configmap.yaml index 9481ea06..e6e0f115 100644 --- a/rime/templates/notifications-worker/configmap.yaml +++ b/rime/templates/notifications-worker/configmap.yaml @@ -1,3 +1,4 @@ +{{- if eq .Values.rime.notificationsWorker.existingConfigSecretName "" }} apiVersion: v1 kind: ConfigMap metadata: @@ -5,4 +6,5 @@ metadata: data: server.config: | {{ include "rime.serverArgs" . | indent 4 }} - notificationsWorker: {} +{{ include "rime.notificationsWorker.serverArgs" . | indent 4 }} +{{- end }} diff --git a/rime/templates/notifications-worker/deployment.yaml b/rime/templates/notifications-worker/deployment.yaml index e6734fa5..5512fbad 100644 --- a/rime/templates/notifications-worker/deployment.yaml +++ b/rime/templates/notifications-worker/deployment.yaml @@ -34,6 +34,7 @@ spec: {{- with .Values.rime.notificationsWorker.deployment.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.notificationsWorker.name ) | nindent 8 }} checksum/config: {{ include (print $.Template.BasePath "/notifications-worker/configmap.yaml") . | sha256sum }} spec: {{- with .Values.rime.images.imagePullSecrets }} @@ -51,6 +52,8 @@ spec: {{- with .Values.rime.notificationsWorker.deployment.extraEnv }} {{- toYaml . | nindent 12 }} {{- end }} + - name: RI_SERVICE_NAME + value: {{ .Values.rime.notificationsWorker.name }} image: "{{ .Values.rime.images.backendImage.registry}}/{{ .Values.rime.images.backendImage.name }}" imagePullPolicy: {{ .Values.rime.images.backendImage.pullPolicy }} resources: @@ -60,10 +63,6 @@ spec: args: - "start-notifications-worker" - "--server-config-path=/config/server.config" - {{- if .Values.rime.monitoring.enabled }} - - "--metrics-exposition-enabled=true" - - "--metrics-exposition-port={{ .Values.rime.monitoring.port }}" - {{- end }} volumeMounts: - name: {{ include "rime.fullname" . }}-{{ .Values.rime.notificationsWorker.name }}-config mountPath: "/config" @@ -80,8 +79,13 @@ spec: volumes: # Volumes are defined at the Pod level, then mounted into containers within that Pod - name: {{ include "rime.fullname" . }}-{{ .Values.rime.notificationsWorker.name }}-config + {{- if ne .Values.rime.notificationsWorker.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.notificationsWorker.existingConfigSecretName }} + {{- else }} configMap: name: {{ include "rime.fullname" . }}-{{ .Values.rime.notificationsWorker.name }}-conf + {{- end }} items: - key: "server.config" path: "server.config" diff --git a/rime/templates/notifications-worker/notifications-digest-cron-job.yaml b/rime/templates/notifications-worker/notifications-digest-cron-job.yaml index 30390813..c7862bac 100644 --- a/rime/templates/notifications-worker/notifications-digest-cron-job.yaml +++ b/rime/templates/notifications-worker/notifications-digest-cron-job.yaml @@ -44,9 +44,6 @@ spec: imagePullPolicy: {{ .Values.rime.images.backendImage.pullPolicy }} command: - "/rime/digest_notif_worker" - {{- if .Values.tls.grpcEnabled }} - - "--auth-mode=tls" - {{ end }} - "--server-config-path=/config/worker.config" volumeMounts: - name: {{ include "rime.fullname" . }}-worker-config @@ -60,8 +57,13 @@ spec: {{- include "rime.externalTLSSecretVolumeMounts" . | nindent 16 }} volumes: - name: {{ include "rime.fullname" . }}-worker-config + {{- if ne .Values.rime.internalWorkerJobs.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.internalWorkerJobs.existingConfigSecretName }} + {{- else }} configMap: name: {{ include "rime.fullname" . }}-worker-conf + {{- end }} items: - key: "worker.config" path: "worker.config" diff --git a/rime/templates/restart/deployment-restart-cron-job.yaml b/rime/templates/restart/deployment-restart-cron-job.yaml index b8544316..c2657708 100644 --- a/rime/templates/restart/deployment-restart-cron-job.yaml +++ b/rime/templates/restart/deployment-restart-cron-job.yaml @@ -5,7 +5,7 @@ metadata: name: {{ include "rime.fullname" . }}-{{ .Values.rime.rolloutRestart.name }} spec: concurrencyPolicy: Forbid - schedule: '0 7 1-7 * 0' # the first Sunday of each month, at 7:00 am + schedule: '0 7 * * 0' # the first Sunday of each month, at 7:00 am jobTemplate: spec: backoffLimit: 2 @@ -22,8 +22,10 @@ spec: - name: rollout-restart image: "{{ .Values.rime.images.backendImage.registry}}/{{ .Values.rime.images.backendImage.name }}" imagePullPolicy: "{{ .Values.rime.images.backendImage.pullPolicy }}" - command: - - "/rime/rollout-restart" - - "-timeout=8m" - - "-debug" + command: [ "/bin/sh","-c" ] + args: [ "if [ $(date +\\%d) -le 07 ]; \ + then /rime/rollout-restart --mongo-secret-name={{ include "rime.fullname" . }}-mongo-tls \ + --ca-secret-name={{ include "rime.fullname" . }}-root-ca \ + --mongo-bitnami-secret-name={{ include "rime.fullname" . }}-mongo-bitnami-tls \ + -timeout=8m -debug; fi" ] {{- end }} diff --git a/rime/templates/restart/deployment-restart-role.yaml b/rime/templates/restart/deployment-restart-role.yaml index 82006d93..a615b178 100644 --- a/rime/templates/restart/deployment-restart-role.yaml +++ b/rime/templates/restart/deployment-restart-role.yaml @@ -11,4 +11,7 @@ rules: - apiGroups: ["apps"] resources: ["deployments", "statefulsets", "statefulsets/scale"] verbs: ["*"] + - apiGroups: [ "" ] + resources: [ secrets ] + verbs: [ "*" ] {{- end }} diff --git a/rime/templates/scheduled-st-cron-job.yaml b/rime/templates/scheduled-st-cron-job.yaml new file mode 100644 index 00000000..58e4ea30 --- /dev/null +++ b/rime/templates/scheduled-st-cron-job.yaml @@ -0,0 +1,85 @@ +{{- if .Values.rime.scheduledSTCron.enabled }} +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ include "rime.fullname" . }}-{{ .Values.rime.scheduledSTCron.name }} + labels: + app: {{ .Values.rime.scheduledSTCron.name }} + {{- include "rime.labels" . | nindent 4 }} + {{- with .Values.rime.scheduledSTCron.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + annotations: + {{- include "rime.annotations" . | nindent 4 }} + {{- with .Values.rime.scheduledSTCron.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + schedule: "{{ .Values.rime.scheduledSTCron.schedule }}" + concurrencyPolicy: Forbid + jobTemplate: + spec: + template: + metadata: + labels: + app: {{ .Values.rime.scheduledSTCron.name }} + {{- include "rime.labels" . | nindent 12 }} + {{- with .Values.rime.scheduledSTCron.labels }} + {{- toYaml . | nindent 12 }} + {{- end }} + annotations: + {{- include "rime.annotations" . | nindent 12 }} + {{- with .Values.rime.scheduledSTCron.labels }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.scheduledSTCron.name ) | nindent 12 }} + spec: + {{- with .Values.rime.images.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 12 }} + {{- end }} + containers: + - name: "scheduled-st-worker" + env: + {{- include "rime.commonEnv" . | nindent 16 }} + image: "{{ .Values.rime.images.backendImage.registry}}/{{ .Values.rime.images.backendImage.name }}" + imagePullPolicy: {{ .Values.rime.images.backendImage.pullPolicy }} + command: + - "/rime/scheduled_st_worker" + - "--server-config-path=/config/worker.config" + - "--timeout=4m" + {{- if .Values.rime.verbose }} + - "-verbose" + {{- end }} + volumeMounts: + - name: {{ include "rime.fullname" . }}-worker-config + mountPath: "/config" + readOnly: true + {{- if .Values.tls.enableCertManager }} + - name: {{ include "rime.fullname" . }}-scheduled-st-tls + mountPath: "/var/tmp/tls/common" + readOnly: true + {{- end }} + {{- include "rime.externalTLSSecretVolumeMounts" . | nindent 16 }} + restartPolicy: Never + volumes: + - name: {{ include "rime.fullname" . }}-worker-config + {{- if ne .Values.rime.internalWorkerJobs.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.internalWorkerJobs.existingConfigSecretName }} + {{- else }} + configMap: + name: {{ include "rime.fullname" . }}-worker-conf + {{- end }} + items: + - key: "worker.config" + path: "worker.config" + {{- if .Values.tls.enableCertManager }} + - name: {{ include "rime.fullname" . }}-scheduled-st-tls + secret: + secretName: {{ include "rime.fullname" . }}-scheduled-st-tls + {{- end }} + {{- include "rime.externalTLSSecretVolumes" . | nindent 12 }} + backoffLimit: 0 + activeDeadlineSeconds: 300 + {{- end }} diff --git a/rime/templates/upload-server/configmap.yaml b/rime/templates/upload-server/configmap.yaml index 710ef89d..2e609db1 100644 --- a/rime/templates/upload-server/configmap.yaml +++ b/rime/templates/upload-server/configmap.yaml @@ -1,3 +1,4 @@ +{{- if eq .Values.rime.uploadServer.existingConfigSecretName "" }} apiVersion: v1 kind: ConfigMap metadata: @@ -5,4 +6,5 @@ metadata: data: server.config: | {{ include "rime.serverArgs" . | indent 4 }} - upload: {} +{{ include "rime.uploadServer.serverArgs" . | indent 4 }} +{{- end }} diff --git a/rime/templates/upload-server/deployment.yaml b/rime/templates/upload-server/deployment.yaml index 2f21ca90..6c0b4503 100644 --- a/rime/templates/upload-server/deployment.yaml +++ b/rime/templates/upload-server/deployment.yaml @@ -34,6 +34,7 @@ spec: {{- with .Values.rime.uploadServer.deployment.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.uploadServer.name ) | nindent 8 }} checksum/config: {{ include (print $.Template.BasePath "/upload-server/configmap.yaml") . | sha256sum }} spec: {{- with .Values.rime.images.imagePullSecrets }} @@ -53,6 +54,8 @@ spec: {{- with .Values.rime.uploadServer.deployment.extraEnv }} {{- toYaml . | nindent 12 }} {{- end }} + - name: RI_SERVICE_NAME + value: {{ .Values.rime.uploadServer.name }} image: "{{ .Values.rime.images.backendImage.registry}}/{{ .Values.rime.images.backendImage.name }}" imagePullPolicy: {{ .Values.rime.images.backendImage.pullPolicy }} ports: @@ -107,15 +110,8 @@ spec: - "/rime/rime" args: - "start-upload-server" - {{- if .Values.tls.grpcEnabled }} - - "--auth-mode=tls" - {{ end }} - "--server-config-path=/config/server.config" - "--upload-port={{ .Values.rime.uploadServer.port }}" - {{- if .Values.rime.monitoring.enabled }} - - "--metrics-exposition-enabled=true" - - "--metrics-exposition-port={{ .Values.rime.monitoring.port }}" - {{- end }} volumeMounts: - name: {{ include "rime.fullname" . }}-{{ .Values.rime.uploadServer.name }}-config mountPath: "/config" @@ -132,8 +128,13 @@ spec: volumes: # Volumes are defined at the Pod level, then mounted into containers within that Pod - name: {{ include "rime.fullname" . }}-{{ .Values.rime.uploadServer.name }}-config + {{- if ne .Values.rime.uploadServer.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.uploadServer.existingConfigSecretName }} + {{- else }} configMap: name: {{ include "rime.fullname" . }}-{{ .Values.rime.uploadServer.name }}-conf + {{- end }} items: - key: "server.config" path: "server.config" diff --git a/rime/templates/web-server/configmap.yaml b/rime/templates/web-server/configmap.yaml index b162a4f8..bb0581af 100644 --- a/rime/templates/web-server/configmap.yaml +++ b/rime/templates/web-server/configmap.yaml @@ -1,3 +1,4 @@ +{{- if eq .Values.rime.webServer.existingConfigSecretName "" }} apiVersion: v1 kind: ConfigMap metadata: @@ -5,4 +6,5 @@ metadata: data: server.config: | {{ include "rime.serverArgs" . | indent 4 }} - webServer: {} +{{ include "rime.webServer.serverArgs" . | indent 4 }} +{{- end }} diff --git a/rime/templates/web-server/deployment.yaml b/rime/templates/web-server/deployment.yaml index d822e89f..78891cf0 100644 --- a/rime/templates/web-server/deployment.yaml +++ b/rime/templates/web-server/deployment.yaml @@ -34,6 +34,7 @@ spec: {{- with .Values.rime.webServer.deployment.annotations }} {{- toYaml . | nindent 8 }} {{- end }} + {{- include "rime.monitoringAnnotations" (dict "monitoring" .Values.rime.monitoring "name" .Values.rime.webServer.name ) | nindent 8 }} checksum/config: {{ include (print $.Template.BasePath "/web-server/configmap.yaml") . | sha256sum }} spec: {{- with .Values.rime.images.imagePullSecrets }} @@ -53,6 +54,14 @@ spec: {{- with .Values.rime.webServer.deployment.extraEnv }} {{- toYaml . | nindent 12 }} {{- end }} + - name: RI_SERVICE_NAME + value: {{ .Values.rime.webServer.name }} + - name: OPENAI_API_KEY + valueFrom: + secretKeyRef: + name: {{ include "rime.commonSecretName" . }} + key: openaiApiKey + optional: true image: "{{ .Values.rime.images.backendImage.registry}}/{{ .Values.rime.images.backendImage.name }}" imagePullPolicy: {{ .Values.rime.images.backendImage.pullPolicy }} ports: @@ -105,19 +114,9 @@ spec: - "/rime/rime" args: - "start-grpc-web-server" - {{- if .Values.tls.grpcEnabled }} - - "--auth-mode=tls" - {{ end }} - "--server-config-path=/config/server.config" - "--grpc-web-port={{ .Values.rime.webServer.grpcPort }}" - "--native-rest-web-port={{ .Values.rime.webServer.port }}" - {{- if .Values.rime.verbose }} - - "--verbose" - {{- end }} - {{- if .Values.rime.monitoring.enabled }} - - "--metrics-exposition-enabled=true" - - "--metrics-exposition-port={{ .Values.rime.monitoring.port }}" - {{- end }} volumeMounts: - name: {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }}-config mountPath: "/config" @@ -134,8 +133,13 @@ spec: volumes: # Volumes are defined at the Pod level, then mounted into containers within that Pod - name: {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }}-config + {{- if ne .Values.rime.webServer.existingConfigSecretName "" }} + secret: + secretName: {{ .Values.rime.webServer.existingConfigSecretName }} + {{- else }} configMap: name: {{ include "rime.fullname" . }}-{{ .Values.rime.webServer.name }}-conf + {{- end }} items: - key: "server.config" path: "server.config" diff --git a/rime/templates/worker-configmap.yaml b/rime/templates/worker-configmap.yaml index 69940533..bdddffa1 100644 --- a/rime/templates/worker-configmap.yaml +++ b/rime/templates/worker-configmap.yaml @@ -1,3 +1,4 @@ +{{- if eq .Values.rime.internalWorkerJobs.existingConfigSecretName "" }} apiVersion: v1 kind: ConfigMap metadata: @@ -5,3 +6,4 @@ metadata: data: worker.config: | {{ include "rime.serverArgs" . | indent 4 }} +{{- end }} diff --git a/rime/values.yaml b/rime/values.yaml index b3c8efe8..57e8b79a 100644 --- a/rime/values.yaml +++ b/rime/values.yaml @@ -39,6 +39,8 @@ rime: clientID: "" # issuerURL (str): Issuer URL for OIDC application (optional) issuerURL: "" + openai: + ApiKey: "" # -- Parameters for Robust Intelligence Docker images (update accordingly if using a private registry) images: @@ -72,6 +74,12 @@ rime: port: 5012 grpcPort: 5017 grpcRestPort: 15017 + # -- Use if MANUALLY creating a Secret to replace the default internal service configuration. + # NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. + existingConfigSecretName: "" + apiKeyLifetimeHours: + userApiKeyLifetime: 6480 # nine months + agentApiKeyLifetime: 6480 # nine months # Service for authServer service: type: ClusterIP @@ -98,7 +106,6 @@ rime: replicaCount: 1 resources: limits: - cpu: 2000m memory: 2Gi requests: cpu: 1200m @@ -141,6 +148,17 @@ rime: name: "feature-flag-server" port: 5013 restPort: 15013 + # -- Use if MANUALLY creating a Secret to replace the default internal service configuration. + # NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. + existingConfigSecretName: "" + # -- Account used by services that need access the s3 license. + serviceAccount: + name: "" + annotations: { + # -- Specify ARN of IRSA-enabled Blob Storage IAM role here + eks.amazonaws.com/role-arn: "" + } + labels: {} # Service for feature-flag-server. service: type: ClusterIP @@ -167,11 +185,69 @@ rime: replicaCount: 3 resources: limits: + memory: 500Mi + requests: cpu: 100m memory: 90Mi + + # SecurityContext to add to the deployment. Default is just set to not run as root. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#securitycontext-v1-core + securityContext: {} + + # Extra env variables to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#envvar-v1-core + extraEnv: [] + + # Extra volumes to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumedevice-v1-core + extraVolumes: [] + + # Extra volume mounts to add to the deployment. Make sure these don't + # conflict with the ones already defined in the deployment. + # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#volumemount-v1-core + + extraVolumeMounts: [] + + ## Node labels for pod assignment + ## ref: https://kubernetes.io/docs/user-guide/node-selection/ + nodeSelector: {} + + ## Tolerations for pod assignment + ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ + tolerations: [] + + ## Affinity for pod assignment (evaluated as template) + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity + affinity: {} + + # -- `cacheServer` K8s-level configurations + # @default -- (see individual values in `values.yaml`) + cacheServer: + enabled: true + name: "cache-server" + port: 5018 + # -- Use if MANUALLY creating a Secret to replace the default internal service configuration. + # NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. + existingConfigSecretName: "" + # Service for cache-server. + service: + type: ClusterIP + annotations: {} + labels: {} + deployment: + annotations: {} + labels: {} + # Replica count for the cache-server. The cache server is a singleton so this should always be 1. + replicaCount: 1 + resources: + limits: + cpu: 100m + memory: 1000Mi requests: cpu: 100m - memory: 90Mi + memory: 500Mi # SecurityContext to add to the deployment. Default is just set to not run as root. # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#securitycontext-v1-core @@ -211,6 +287,9 @@ rime: name: "agent-manager-server" port: 5016 restPort: 15016 + # -- Use if MANUALLY creating a Secret to replace the default internal service configuration. + # NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. + existingConfigSecretName: "" # Service for agent-manager-server. service: type: ClusterIP @@ -237,8 +316,7 @@ rime: replicaCount: 3 resources: limits: - cpu: 100m - memory: 90Mi + memory: 500Mi requests: cpu: 100m memory: 90Mi @@ -280,6 +358,9 @@ rime: name: "data-collector-server" port: 5015 restPort: 15015 + # -- Use if MANUALLY creating a Secret to replace the default internal service configuration. + # NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. + existingConfigSecretName: "" # Service for data-collector-server. service: type: ClusterIP @@ -306,8 +387,7 @@ rime: replicaCount: 3 resources: limits: - cpu: 100m - memory: 90Mi + memory: 500Mi requests: cpu: 100m memory: 90Mi @@ -349,6 +429,9 @@ rime: name: "firewall-server" port: 5002 restPort: 15002 + # -- Use if MANUALLY creating a Secret to replace the default internal service configuration. + # NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. + existingConfigSecretName: "" # Service for firewall-server. service: type: ClusterIP @@ -376,8 +459,7 @@ rime: replicaCount: 3 resources: limits: - cpu: 100m - memory: 90Mi + memory: 500Mi requests: cpu: 100m memory: 90Mi @@ -422,6 +504,15 @@ rime: annotations: {} labels: {} + # -- Configuration for Scheduled Stress Testing + scheduledSTCron: + name: "scheduled-st-cron" + enabled: true + # By default, run the CRON job every 20 minutes. + schedule: "*/20 * * * *" + annotations: {} + labels: {} + # -- `datasetManagerServer` K8s-level configurations # @default -- (see individual values in `values.yaml`) datasetManagerServer: @@ -429,6 +520,9 @@ rime: enabled: false port: 5009 restPort: 15009 + # -- Use if MANUALLY creating a Secret to replace the default internal service configuration. + # NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. + existingConfigSecretName: "" config: # -- The bucket name of the S3 bucket used as the blob storage. storageBucketName: "" @@ -439,7 +533,7 @@ rime: create: true name: "" annotations: { - # -- Specify ARN of IRSA-enabled Load Balancer Controller IAM role here + # -- Specify ARN of IRSA-enabled Blob Storage IAM role here eks.amazonaws.com/role-arn: "" } labels: {} @@ -464,14 +558,14 @@ rime: type: Utilization averageUtilization: 60 deployment: + priorityClassName: "system-node-critical" annotations: {} labels: {} # Replica count for the dataset-manager-server. Only used if HPA is disabled. replicaCount: 3 resources: limits: - cpu: 100m - memory: 90Mi + memory: 500Mi requests: cpu: 100m memory: 90Mi @@ -514,6 +608,9 @@ rime: port: 5555 grpcPort: 5011 grpcRestPort: 15011 + # -- Use if MANUALLY creating a Secret to replace the default internal service configuration. + # NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. + existingConfigSecretName: "" # Service for web-server service: type: ClusterIP @@ -547,7 +644,6 @@ rime: replicaCount: 1 resources: limits: - cpu: 500m memory: 1000Mi requests: cpu: 300m @@ -590,6 +686,9 @@ rime: name: "upload-server" port: 5000 restPort: 15001 + # -- Use if MANUALLY creating a Secret to replace the default internal service configuration. + # NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. + existingConfigSecretName: "" # Service for upload-server. service: type: ClusterIP @@ -623,7 +722,6 @@ rime: replicaCount: 3 resources: limits: - cpu: 500m memory: 1000Mi requests: cpu: 300m @@ -664,6 +762,9 @@ rime: # @default -- (see individual values in `values.yaml`) notificationsWorker: name: "notifications-worker" + # -- Use if MANUALLY creating a Secret to replace the default internal service configuration. + # NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. + existingConfigSecretName: "" # -- Configuration for scheduled push notifications notificationsDigestCron: name: "notifications-digest-cron" @@ -694,8 +795,7 @@ rime: replicaCount: 3 resources: limits: - cpu: 100m - memory: 90Mi + memory: 500Mi requests: cpu: 100m memory: 90Mi @@ -737,6 +837,9 @@ rime: name: "model-testing-server" port: 5003 restPort: 15003 + # -- Use if MANUALLY creating a Secret to replace the default internal service configuration. + # NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. + existingConfigSecretName: "" service: type: ClusterIP annotations: {} @@ -770,8 +873,7 @@ rime: replicaCount: 3 resources: limits: - cpu: 100m - memory: 90Mi + memory: 500Mi requests: cpu: 100m memory: 90Mi @@ -829,15 +931,15 @@ rime: # baseImage: # name: "" # registryType: dockerhub - # # -- Uncomment this section if using the Managed Images feature on AWS (Elastic Container Registry) - # ecr: - # registryId: "" - # repositoryPrefix: - # # -- Uncomment this section if using the Managed Images feature on AWS (Google Artifact Repository) - # gar: - # location: "" - # project: "" - # repository: "" + # # -- Uncomment this section if using the Managed Images feature on AWS (Elastic Container Registry) + # ecr: + # registryID: "" + # repositoryPrefix: + # # # -- Uncomment this section if using the Managed Images feature on AWS (Google Artifact Repository) + # # gar: + # # location: "" + # # project: "" + # # repository: "" # ServiceAccount used by the image registry server. This service account # must have access to the image registry backend that you have configured. @@ -873,8 +975,7 @@ rime: replicaCount: 3 resources: limits: - cpu: 100m - memory: 90Mi + memory: 500Mi requests: cpu: 100m memory: 90Mi @@ -914,7 +1015,8 @@ rime: name: "image-builder-job" privilegedOverride: false dockerSecretName: rimecreds - + annotations: {} + labels: {} # # -- Uncomment this section if using a dedicated model testing node group # tolerations: # - key: "dedicated" @@ -936,7 +1038,6 @@ rime: labels: {} resources: limits: - cpu: 1500m memory: 16Gi requests: cpu: 1500m @@ -978,7 +1079,7 @@ rime: # @default -- (see individual values in `values.yaml`) frontendServer: name: "frontend-server" - port: 80 + port: 8000 # Service for frontend-server. service: type: ClusterIP @@ -1005,11 +1106,10 @@ rime: replicaCount: 3 resources: limits: - cpu: 1000m - memory: 2.5Gi + memory: 4Gi requests: - cpu: 800m - memory: 2Gi + cpu: 2500m + memory: 3Gi # SecurityContext to add to the deployment. Default is just set to not run as root. # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#securitycontext-v1-core @@ -1047,6 +1147,9 @@ rime: initVault: name: "init-vault" enabled: true + # -- Use if MANUALLY creating a Secret to replace the default internal service configuration. + # NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. + existingConfigSecretName: "" # If a pod fails in debug mode, it will sleep for a time so the pod can be inspected. debug: true backoffLimit: 2 @@ -1143,35 +1246,62 @@ rime: annotations: {} labels: {} + # -- Settings shared across internal worker jobs (e.g., `drop-duplicates`, `init-indexes`) + internalWorkerJobs: + # -- Use if MANUALLY creating a Secret to replace the default internal service configuration. + # NOTE: This Secret's schema MUST match that of the corresponding default defined in /templates/**/configmap.yaml. + existingConfigSecretName: "" + # -- `ingress` K8s-level configurations # @default -- (see individual values in `values.yaml`) ingress: + ingressClassName: ri + host: "" annotations: { # # Uncomment if using externalDns (from rime-kube-system) and fill out the hostname # # accordingly (e.g., "rime.${domain}") # external-dns.alpha.kubernetes.io/hostname: "" + nginx.ingress.kubernetes.io/affinity: "cookie", + nginx.ingress.kubernetes.io/affinity-mode: "persistent", + nginx.ingress.kubernetes.io/session-cookie-expires: "172800", + nginx.ingress.kubernetes.io/session-cookie-max-age: "172800", + nginx.ingress.kubernetes.io/session-cookie-path: "/", + nginx.ingress.kubernetes.io/session-cookie-httponly: "true", + nginx.ingress.kubernetes.io/session-cookie-secure: "true", } labels: {} tls: [] - # -- `monitoring` (Prometheus metrics) K8s-level configurations + auth-ingress: + annotations: { + nginx.ingress.kubernetes.io/affinity: "cookie", + nginx.ingress.kubernetes.io/affinity-mode: "persistent", + nginx.ingress.kubernetes.io/session-cookie-expires: "172800", + nginx.ingress.kubernetes.io/session-cookie-max-age: "172800", + nginx.ingress.kubernetes.io/session-cookie-path: "/", + nginx.ingress.kubernetes.io/session-cookie-httponly: "true", + nginx.ingress.kubernetes.io/session-cookie-secure: "true", + } + + # -- `monitoring` (Prometheus metrics/Datadog) K8s-level configurations # @default -- (see individual values in `values.yaml`) monitoring: - port: 8080 - # -- Whether to enable Prometheus metrics for all services on the RIME cluster enabled: true + # -- Port to expose Prometheus metrics on + port: 8080 + # -- Whether to enable Datadog autodiscovery tags for all services on the RIME cluster + datadogEnabled: true # -- MongoDB sub-chart. See https://artifacthub.io/packages/helm/bitnami/mongodb for all parameters. # @default -- (see individual values in `values`.yaml) mongodb: enabled: true + priorityClassName: "system-node-critical" image: registry: "docker.io" repository: "robustintelligencehq/mongodb" tag: "5.0.9-debian-11-r7" - pullSecrets: - - rimecreds architecture: "replicaset" replicaCount: 3 arbiter: @@ -1187,7 +1317,6 @@ mongodb: cpu: "1500m" limits: memory: "8Gi" - cpu: "2000m" persistence: enabled: true @@ -1200,16 +1329,8 @@ mongodb: cluster-autoscaler.kubernetes.io/safe-to-evict: "false" ad.datadoghq.com/mongodb.check_names: '["mongo"]' ad.datadoghq.com/mongodb.init_configs: '[{}]' - ad.datadoghq.com/mongodb.instances: | - [ - { - "hosts": ["%%host%%:%%port%%"], - "database": "rime-store", - "additional_metrics": ["collection"], - "collections": ["project", "docstore-notif-setting", "docstore-firewall", "UserV0"] - } - ] ad.datadoghq.com/mongodb.logs: '[{"source":"mongodb","service":"mongodb"}]' + # # -- Uncomment if enabling mutual TLS for MongoDB (see `tls.mongoEnabled`) # tls: # image: @@ -1228,20 +1349,22 @@ mongodb: # -- Ingress-nginx controller sub-chart. See https://artifacthub.io/packages/helm/ingress-nginx/ingress-nginx for all parameters. # @default -- (see individual values in `values`.yaml) ingress-nginx: + enabled: true imagePullSecrets: - name: rimecreds controller: image: registry: "docker.io" image: "robustintelligencehq/ingress-nginx-controller" - tag: "v1.3.0" - digest: "sha256:067673df26a65ec5c2d5b30f25db869bad4d7d391fc81882250134577e581ef0" + tag: "v1.8.1" + digest: "sha256:bd54c330f73b17d0bf19f3ec3832b285d43a4c9fa5fe15f5a7accd3de706b438" scope: enabled: true # -- K8s namespace for the ingress namespace: "" ingressClassResource: - enabled: false + name: ri + controllerValue: k8s.io/ri admissionWebhooks: enabled: false service: @@ -1290,12 +1413,11 @@ vault: # These Resource Limits are in line with node requirements in the # Vault Reference Architecture for a Small Cluster resources: + limits: + memory: 500Mi requests: memory: 64Mi cpu: 250m - limits: - memory: 128Mi - cpu: 500m # For HA configuration and because we need to manually init the vault, # we need to define custom readiness/liveness Probe settings readinessProbe: @@ -1368,6 +1490,8 @@ external: namespace: "" ## -- The vault kv version kvVersion: "" + ## -- The vault mount path + mountPath: "" # -- Whether to use an external MongoDB instance mongo: enabled: false @@ -1391,6 +1515,20 @@ external: tls: # -- Whether to enable the cert-manager service for issuing and managing TLS certificates within the cluster enableCertManager: false + # -- `spec` for Certificate object (https://cert-manager.io/docs/usage/certificate/). + certificateSpec: + # -- See https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.CertificateSpec + # Attributes listed below are the minimum required for the `subject` property. + subject: + organizations: + - RobustIntelligence + # -- See https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.CertificateSpec + # Attributes listed below are the minimum required for the `issuerRef` property. + issuerRef: + # -- Will default to `rime-{{ .Release.Namespace }}-ca-issuer`. + name: "" + kind: Issuer + group: cert-manager.io # -- Whether to automatically rotate TLS certificates for services # (`enableCertManager` must be true to enable) autorotateEnabled: false