diff --git a/.gitignore b/.gitignore index e648d2f2..85a9afbb 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ *.dll *.so *.dylib -bin/* +bin Dockerfile.cross # Test binary, build with `go test -c` diff --git a/charts/.gitignore b/charts/.gitignore deleted file mode 100644 index a2286348..00000000 --- a/charts/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -# Helm dependency charts -*/charts/ -*.tgz - -# Helm lock file -*/Chart.lock diff --git a/charts/QUICKSTART.md b/charts/QUICKSTART.md deleted file mode 100644 index c1f226e7..00000000 --- a/charts/QUICKSTART.md +++ /dev/null @@ -1,287 +0,0 @@ -# AlphaTrion Helm Chart - Quick Start Guide - -This guide will help you quickly deploy AlphaTrion on Kubernetes using Helm. - -## Prerequisites - -- Kubernetes cluster (1.19+) -- Helm 3.0+ -- kubectl configured to access your cluster -- **PostgreSQL database** (external, version 12 or later) -- **ClickHouse database** (optional, for tracing - see [Quick ClickHouse Setup](#optional-clickhouse-setup)) - -## Quick Install (Local Development) - -### 1. Build Docker Images - -```bash -# Build backend image -docker build -t alphatrion-server:latest . - -# Build dashboard image -docker build -t alphatrion-dashboard:latest ./dashboard -``` - -### 2. Load Images to Kubernetes (if using minikube/kind) - -```bash -# For minikube -minikube image load alphatrion-server:latest -minikube image load alphatrion-dashboard:latest - -# For kind -kind load docker-image alphatrion-server:latest -kind load docker-image alphatrion-dashboard:latest -``` - -### 3. Setup PostgreSQL Database - -You need an external PostgreSQL database. For local development, you can quickly spin one up: - -```bash -# Using Docker -docker run -d --name alphatrion-postgres \ - -e POSTGRES_DB=alphatrion \ - -e POSTGRES_USER=alphatrion \ - -e POSTGRES_PASSWORD=alphatr1on \ - -p 5432:5432 \ - postgres:16-alpine - -# Or using Kubernetes -kubectl run postgres --image=postgres:16-alpine \ - --env="POSTGRES_DB=alphatrion" \ - --env="POSTGRES_USER=alphatrion" \ - --env="POSTGRES_PASSWORD=alphatr1on" \ - --port=5432 - -kubectl expose pod postgres --port=5432 --target-port=5432 -``` - -### 4. Install Helm Chart - -```bash -# Install with PostgreSQL connection -helm install alphatrion ./charts/alphatrion \ - -f ./charts/alphatrion/values-dev.yaml \ - --set postgresql.host=postgres \ - --set postgresql.password=alphatr1on -``` - -**Note:** If using Docker PostgreSQL from your host, use `host.docker.internal` or your host IP instead of `postgres`. - -### 5. Wait for Pods to Start - -```bash -kubectl get pods -l app.kubernetes.io/name=alphatrion -w -``` - -### 6. Initialize AlphaTrion - -```bash -kubectl exec -it deploy/alphatrion-server -- alphatrion init \ - --username admin \ - --email admin@example.com -``` - -Save the `USER_ID` and `TEAM_ID` from the output. - -### 7. Access the Dashboard - -```bash -kubectl port-forward svc/alphatrion-dashboard 8080:80 -``` - -Visit http://localhost:8080 in your browser. - -## Optional: ClickHouse Setup - -If you want to enable tracing support with ClickHouse, you have two options: - -### Option A: Single Node (Development/Testing) - -```bash -# 1. Create gp3 storage class (AWS only) -kubectl apply -f ./charts/clickhouse/storageclass-gp3.yaml - -# 2. Deploy single-node ClickHouse -kubectl apply -f ./charts/clickhouse/clickhouse-statefulset.yaml - -# 3. Verify -kubectl get pods -n alphatrion -l app=clickhouse -kubectl exec -n alphatrion clickhouse-0 -- clickhouse-client --query "SELECT version()" -``` - -### Option B: High Availability (Production) - -For production workloads with automatic failover and data replication: - -```bash -# 1. Create gp3 storage class (AWS only) -kubectl apply -f ./charts/clickhouse/storageclass-gp3.yaml - -# 2. Deploy HA cluster (3 replicas + 3 keeper nodes) -./charts/clickhouse/deploy-ha.sh - -# Or manually: -kubectl apply -f ./charts/clickhouse/clickhouse-ha.yaml -``` - -See [HA Setup Guide](./clickhouse/HA-SETUP.md) for detailed instructions and migration guide. - -### Connect AlphaTrion to ClickHouse - -```bash -helm upgrade alphatrion ./charts/alphatrion \ - -f ./charts/alphatrion/values-with-clickhouse.yaml -``` - -For more details, see the [ClickHouse deployment guide](./clickhouse/README.md). - -## Common Operations - -### View Logs - -```bash -# Backend logs -kubectl logs -l app.kubernetes.io/component=server -f - -# Dashboard logs -kubectl logs -l app.kubernetes.io/component=dashboard -f - -# Migration logs -kubectl logs job/alphatrion-migration -``` - -### Access Backend API - -```bash -kubectl port-forward svc/alphatrion-server 8000:8000 -curl http://localhost:8000/health -``` - -### Access PostgreSQL - -```bash -# If using Kubernetes PostgreSQL pod -kubectl port-forward pod/postgres 5432:5432 -psql -h localhost -U alphatrion -d alphatrion -# Password: alphatr1on (dev default) - -# If using Docker PostgreSQL -psql -h localhost -U alphatrion -d alphatrion -# Password: alphatr1on (dev default) -``` - -### Upgrade Release - -```bash -helm upgrade alphatrion ./charts/alphatrion -f ./charts/alphatrion/values-dev.yaml -``` - -### Uninstall - -```bash -helm uninstall alphatrion - -# Optionally delete PostgreSQL (if you created it for dev) -kubectl delete pod postgres -kubectl delete svc postgres - -# Or if using Docker -docker stop alphatrion-postgres -docker rm alphatrion-postgres -``` - -## Production Deployment - -For production, use `values-prod.yaml` and customize it: - -1. Update image repositories to your registry -2. Configure your production PostgreSQL connection -3. Enable ClickHouse for tracing -4. Enable Docker Registry for artifacts -5. Configure Ingress with TLS -6. Set up proper secrets management - -```bash -# Copy and customize production values -cp charts/alphatrion/values-prod.yaml my-prod-values.yaml -# Edit my-prod-values.yaml with your PostgreSQL host and other settings - -# Create secret for PostgreSQL password -kubectl create secret generic alphatrion-postgres-credentials \ - --from-literal=password=your-secure-password - -# Install -helm install alphatrion ./charts/alphatrion -f my-prod-values.yaml -``` - -## Troubleshooting - -### Pods Not Starting - -```bash -# Check pod status -kubectl get pods -l app.kubernetes.io/name=alphatrion - -# Describe pod for events -kubectl describe pod - -# Check logs -kubectl logs -``` - -### Database Connection Issues - -```bash -# Check if PostgreSQL is ready -kubectl get pods -l app.kubernetes.io/name=postgresql - -# Check migration job logs -kubectl logs job/alphatrion-migration - -# Test database connection -kubectl exec -it deploy/alphatrion-server -- sh -c 'python -c "import os; print(os.getenv(\"ALPHATRION_METADATA_DB_URL\"))"' -``` - -### Migration Failures - -```bash -# View migration logs -kubectl logs job/alphatrion-migration - -# If you need to re-run migrations -kubectl delete job alphatrion-migration -helm upgrade alphatrion ./charts/alphatrion -``` - -## Using AlphaTrion in Your Application - -After initialization, use AlphaTrion in your Python code: - -```python -import alphatrion as alpha - -# Initialize with your USER_ID from the init command -alpha.init(user_id='') - -# Create an experiment -experiment = alpha.create_experiment( - name="my-experiment", - description="My first experiment" -) - -# Track your GenAI application -# ... your code here ... -``` - -## Next Steps - -- Read the full [README.md](alphatrion/README.md) for detailed configuration options -- Check the [values.yaml](alphatrion/values.yaml) for all available settings -- Review [values-dev.yaml](alphatrion/values-dev.yaml) and [values-prod.yaml](alphatrion/values-prod.yaml) for environment-specific examples - -## Support - -- Documentation: https://github.com/InftyAI/alphatrion -- Issues: https://github.com/InftyAI/alphatrion/issues diff --git a/charts/alphatrion/.helmignore b/charts/alphatrion/.helmignore deleted file mode 100644 index 0e8a0eb3..00000000 --- a/charts/alphatrion/.helmignore +++ /dev/null @@ -1,23 +0,0 @@ -# Patterns to ignore when building packages. -# This supports shell glob matching, relative path matching, and -# negation (prefixed with !). Only one pattern per line. -.DS_Store -# Common VCS dirs -.git/ -.gitignore -.bzr/ -.bzrignore -.hg/ -.hgignore -.svn/ -# Common backup files -*.swp -*.bak -*.tmp -*.orig -*~ -# Various IDEs -.project -.idea/ -*.tmproj -.vscode/ diff --git a/charts/alphatrion/Chart.yaml b/charts/alphatrion/Chart.yaml deleted file mode 100644 index f3b816a1..00000000 --- a/charts/alphatrion/Chart.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: v2 -name: alphatrion -description: A Helm chart for deploying AlphaTrion - An open-source framework for GenAI applications -type: application -version: 0.1.0 -appVersion: "0.1.1" -home: https://github.com/InftyAI/alphatrion -maintainers: - - name: InftyAI - url: https://github.com/InftyAI diff --git a/charts/alphatrion/README.md b/charts/alphatrion/README.md deleted file mode 100644 index b22c8772..00000000 --- a/charts/alphatrion/README.md +++ /dev/null @@ -1,273 +0,0 @@ -# AlphaTrion Helm Chart - -This Helm chart deploys AlphaTrion, an open-source framework for GenAI applications, on Kubernetes. - -## Prerequisites - -- Kubernetes 1.19+ -- Helm 3.0+ -- **External PostgreSQL database** (version 12 or later recommended) - -## Installing the Chart - -Install the chart with your PostgreSQL configuration: - -```bash -helm install alphatrion ./charts/alphatrion \ - --set server.image.repository=alphatrion-server \ - --set server.image.tag=latest \ - --set dashboard.image.repository=alphatrion-dashboard \ - --set dashboard.image.tag=latest \ - --set postgresql.host=your-postgres-host \ - --set postgresql.password=your-postgres-password -``` - -## Building Docker Images - -Before installing the Helm chart, you need to build and push the Docker images: - -### Server Image - -```bash -docker build -t alphatrion-server:latest . -# If using a remote registry: -# docker tag alphatrion-server:latest your-registry/alphatrion-server:latest -# docker push your-registry/alphatrion-server:latest -``` - -### Dashboard Image - -```bash -docker build -t alphatrion-dashboard:latest ./dashboard -# If using a remote registry: -# docker tag alphatrion-dashboard:latest your-registry/alphatrion-dashboard:latest -# docker push your-registry/alphatrion-dashboard:latest -``` - -## Configuration - -The following table lists the configurable parameters of the AlphaTrion chart and their default values. - -### ServerConfiguration - -| Parameter | Description | Default | -|-----------|-------------|---------| -| `server.image.repository` | Serverimage repository | `alphatrion-server` | -| `server.image.tag` | Serverimage tag | `latest` | -| `server.image.pullPolicy` | Serverimage pull policy | `IfNotPresent` | -| `server.replicaCount` | Number of backend replicas | `2` | -| `server.resources.requests.cpu` | ServerCPU request | `500m` | -| `server.resources.requests.memory` | Servermemory request | `512Mi` | -| `server.resources.limits.cpu` | ServerCPU limit | `1000m` | -| `server.resources.limits.memory` | Servermemory limit | `1Gi` | -| `server.service.type` | Serverservice type | `ClusterIP` | -| `server.service.port` | Serverservice port | `8000` | -| `server.env.logLevel` | Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL) | `INFO` | -| `server.env.autoCleanup` | Auto cleanup old records | `true` | -| `server.env.enableTracing` | Enable tracing with ClickHouse | `false` | -| `server.env.enableArtifactStorage` | Enable artifact storage | `false` | - -### Dashboard Configuration - -| Parameter | Description | Default | -|-----------|-------------|---------| -| `dashboard.image.repository` | Dashboard image repository | `alphatrion-dashboard` | -| `dashboard.image.tag` | Dashboard image tag | `latest` | -| `dashboard.image.pullPolicy` | Dashboard image pull policy | `IfNotPresent` | -| `dashboard.replicaCount` | Number of dashboard replicas | `2` | -| `dashboard.resources.requests.cpu` | Dashboard CPU request | `100m` | -| `dashboard.resources.requests.memory` | Dashboard memory request | `128Mi` | -| `dashboard.resources.limits.cpu` | Dashboard CPU limit | `500m` | -| `dashboard.resources.limits.memory` | Dashboard memory limit | `256Mi` | -| `dashboard.service.type` | Dashboard service type | `ClusterIP` | -| `dashboard.service.port` | Dashboard service port | `80` | - -### PostgreSQL Configuration (Required) - -AlphaTrion requires an external PostgreSQL database. Configure the connection details below: - -| Parameter | Description | Default | -|-----------|-------------|---------| -| `postgresql.enabled` | Enable PostgreSQL connection | `true` | -| `postgresql.host` | PostgreSQL host (required) | `""` | -| `postgresql.port` | PostgreSQL port | `5432` | -| `postgresql.database` | PostgreSQL database name | `alphatrion` | -| `postgresql.username` | PostgreSQL username | `alphatrion` | -| `postgresql.password` | PostgreSQL password | `""` | -| `postgresql.existingSecret` | Existing secret for PostgreSQL password (recommended) | `""` | -| `postgresql.initTables` | Automatically initialize database tables | `true` | - -### Ingress Configuration - -| Parameter | Description | Default | -|-----------|-------------|---------| -| `ingress.enabled` | Enable ingress | `false` | -| `ingress.className` | Ingress class name | `nginx` | -| `ingress.annotations` | Ingress annotations | `{}` | -| `ingress.hosts[0].host` | Hostname | `alphatrion.local` | -| `ingress.tls` | TLS configuration | `[]` | - -### ClickHouse Configuration (Optional) - -ClickHouse is used for storing OpenTelemetry traces. The connection and migrations are **independently configurable**. - -| Parameter | Description | Default | -|-----------|-------------|---------| -| `clickhouse.enabled` | Enable ClickHouse integration in server | `false` | -| `clickhouse.host` | ClickHouse host | `""` | -| `clickhouse.port` | ClickHouse port | `8123` | -| `clickhouse.database` | ClickHouse database | `alphatrion_traces` | -| `clickhouse.username` | ClickHouse username | `alphatrion` | -| `clickhouse.password` | ClickHouse password | `""` | -| `clickhouse.existingSecret` | Use existing secret for password | `""` | -| `clickhouse.clusterName` | ClickHouse cluster name for HA setup | `""` | -| `clickhouse.migrations.enabled` | Run schema migrations automatically | `false` | -| `clickhouse.migrations.hookWeight` | Helm hook weight for migration job | `"5"` | - -**Note:** See [CLICKHOUSE_SETUP.md](./CLICKHOUSE_SETUP.md) for detailed setup guide. - -### Docker Registry Configuration (Optional) - -| Parameter | Description | Default | -|-----------|-------------|---------| -| `registry.enabled` | Enable Docker Registry integration | `false` | -| `registry.url` | Registry URL | `""` | -| `registry.insecure` | Use HTTP instead of HTTPS | `false` | -| `registry.username` | Registry username | `""` | -| `registry.password` | Registry password | `""` | - -## PostgreSQL Database Setup - -AlphaTrion requires an external PostgreSQL database (version 12 or later). You must configure the database connection during installation: - -### Using Password (Development) - -```bash -helm install alphatrion ./charts/alphatrion \ - --set postgresql.host=my-postgres.example.com \ - --set postgresql.password=mypassword -``` - -### Using Existing Secret (Production - Recommended) - -First, create a secret with your PostgreSQL password: - -```bash -kubectl create secret generic alphatrion-postgres-credentials \ - --from-literal=password=your-secure-password -``` - -Then install with the secret reference: - -```bash -helm install alphatrion ./charts/alphatrion \ - --set postgresql.host=my-postgres.example.com \ - --set postgresql.existingSecret=alphatrion-postgres-credentials -``` - -## Enabling Ingress - -To expose AlphaTrion via Ingress: - -```bash -helm install alphatrion ./charts/alphatrion \ - --set ingress.enabled=true \ - --set ingress.hosts[0].host=alphatrion.example.com \ - --set ingress.tls[0].secretName=alphatrion-tls \ - --set ingress.tls[0].hosts[0]=alphatrion.example.com -``` - -## Initializing AlphaTrion - -After installation, initialize AlphaTrion with a user and team: - -```bash -kubectl exec -it deploy/alphatrion-server -- alphatrion init \ - --username admin \ - --email admin@example.com -``` - -This will output a `USER_ID` and `TEAM_ID` that you can use in your GenAI applications. - -## Accessing the Dashboard - -### Via Port-Forward (Default) - -```bash -kubectl port-forward svc/alphatrion-dashboard 8080:80 -``` - -Then visit http://localhost:8080 - -### Via Ingress - -If you enabled ingress, visit the configured hostname (e.g., https://alphatrion.example.com) - -## Upgrading - -To upgrade the release: - -```bash -helm upgrade alphatrion ./charts/alphatrion \ - -f custom-values.yaml -``` - -## Uninstalling - -To uninstall/delete the release: - -```bash -helm uninstall alphatrion -``` - -This removes all the Kubernetes components associated with the chart and deletes the release. - -**Note:** Your external PostgreSQL database is not affected by the uninstall. You'll need to manually clean up the database if desired. - -## Troubleshooting - -### Check Pod Status - -```bash -kubectl get pods -l app.kubernetes.io/name=alphatrion -``` - -### View ServerLogs - -```bash -kubectl logs -l app.kubernetes.io/component=server -f -``` - -### View Dashboard Logs - -```bash -kubectl logs -l app.kubernetes.io/component=dashboard -f -``` - -### View Migration Logs - -```bash -kubectl logs job/alphatrion-migration -``` - -### Check Database Connection - -```bash -# Replace YOUR_POSTGRES_HOST and YOUR_PASSWORD with your actual values -kubectl run postgresql-client --rm --tty -i --restart='Never' \ - --image postgres:16-alpine \ - --env="PGPASSWORD=YOUR_PASSWORD" \ - -- psql --host YOUR_POSTGRES_HOST -U alphatrion -d alphatrion -``` - -## Advanced Configuration - -For advanced configuration options, see the `values.yaml` file or run: - -```bash -helm show values ./charts/alphatrion -``` - -## License - -This chart is licensed under the Apache License 2.0. diff --git a/charts/alphatrion/templates/NOTES.txt b/charts/alphatrion/templates/NOTES.txt deleted file mode 100644 index 35c5f945..00000000 --- a/charts/alphatrion/templates/NOTES.txt +++ /dev/null @@ -1,101 +0,0 @@ - - █████ ██ ██████ ██ ██ █████ ████████ ██████ ██ ██████ ███ ██ - ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ████ ██ - ███████ ██ ██████ ███████ ███████ ██ ██████ ██ ██ ██ ██ ██ ██ - ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ - ██ ██ ███████ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██████ ██ ████ - -Thank you for installing {{ .Chart.Name }}! - -Release name: {{ .Release.Name }} -Namespace: {{ .Release.Namespace }} -Version: {{ .Chart.AppVersion }} - -📋 DEPLOYMENT STATUS -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -To check the status of your deployment: - - kubectl get pods -n {{ .Release.Namespace }} -l app.kubernetes.io/instance={{ .Release.Name }} - -🌐 ACCESSING THE APPLICATION -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -{{- if .Values.ingress.enabled }} -{{- range $host := .Values.ingress.hosts }} -The dashboard is accessible at: - {{- if $.Values.ingress.tls }} - https://{{ $host.host }} - {{- else }} - http://{{ $host.host }} - {{- end }} -{{- end }} -{{- else }} - -The dashboard is not exposed externally. To access it: - -1. Port-forward to the dashboard service: - - kubectl port-forward -n {{ .Release.Namespace }} svc/{{ include "alphatrion.dashboard.fullname" . }} 8080:{{ .Values.dashboard.service.port }} - - Then visit: http://localhost:8080 - -2. Or expose via ingress by updating values: - - helm upgrade {{ .Release.Name }} alphatrion \ - --set ingress.enabled=true \ - --set ingress.hosts[0].host=alphatrion.example.com -{{- end }} - -🗄️ DATABASE CONNECTION -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Connected to external PostgreSQL: - Host: {{ .Values.postgresql.host }} - Port: {{ .Values.postgresql.port }} - Database: {{ .Values.postgresql.database }} - Username: {{ .Values.postgresql.username }} - -🚀 NEXT STEPS -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -1. Initialize AlphaTrion with a user and team: - - kubectl exec -n {{ .Release.Namespace }} -it deploy/{{ include "alphatrion.server.fullname" . }} \ - -- alphatrion init --username admin --email admin@example.com - - This will output a USER_ID and TEAM_ID. - -2. Use these IDs in your GenAI applications: - - import alphatrion as alpha - alpha.init(user_id='') - -📊 MONITORING -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -View server logs: - kubectl logs -n {{ .Release.Namespace }} -l app.kubernetes.io/component=server -f - -View dashboard logs: - kubectl logs -n {{ .Release.Namespace }} -l app.kubernetes.io/component=dashboard -f - -View migration logs: - kubectl logs -n {{ .Release.Namespace }} job/{{ include "alphatrion.fullname" . }}-migration - -🔧 CONFIGURATION -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -To customize your installation: - helm upgrade {{ .Release.Name }} alphatrion -f custom-values.yaml - -For all available options: - helm show values alphatrion - -📚 DOCUMENTATION -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Documentation: https://github.com/InftyAI/alphatrion -Report issues: https://github.com/InftyAI/alphatrion/issues - -Happy experimenting! 🎉 diff --git a/charts/alphatrion/templates/_helpers.tpl b/charts/alphatrion/templates/_helpers.tpl deleted file mode 100644 index b61908f1..00000000 --- a/charts/alphatrion/templates/_helpers.tpl +++ /dev/null @@ -1,188 +0,0 @@ -{{/* -Expand the name of the chart. -*/}} -{{- define "alphatrion.name" -}} -{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Create a default fully qualified app name. -*/}} -{{- define "alphatrion.fullname" -}} -{{- if .Values.fullnameOverride }} -{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- $name := default .Chart.Name .Values.nameOverride }} -{{- if contains $name .Release.Name }} -{{- .Release.Name | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} -{{- end }} -{{- end }} -{{- end }} - -{{/* -Create chart name and version as used by the chart label. -*/}} -{{- define "alphatrion.chart" -}} -{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Common labels -*/}} -{{- define "alphatrion.labels" -}} -helm.sh/chart: {{ include "alphatrion.chart" . }} -{{ include "alphatrion.selectorLabels" . }} -{{- if .Chart.AppVersion }} -app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} -{{- end }} -app.kubernetes.io/managed-by: {{ .Release.Service }} -{{- end }} - -{{/* -Selector labels -*/}} -{{- define "alphatrion.selectorLabels" -}} -app.kubernetes.io/name: {{ include "alphatrion.name" . }} -app.kubernetes.io/instance: {{ .Release.Name }} -{{- end }} - -{{/* -Server specific labels -*/}} -{{- define "alphatrion.server.labels" -}} -{{ include "alphatrion.labels" . }} -app.kubernetes.io/component: server -{{- end }} - -{{/* -Server selector labels -*/}} -{{- define "alphatrion.server.selectorLabels" -}} -{{ include "alphatrion.selectorLabels" . }} -app.kubernetes.io/component: server -{{- end }} - -{{/* -Server fullname -*/}} -{{- define "alphatrion.server.fullname" -}} -{{- printf "%s-server" (include "alphatrion.fullname" .) }} -{{- end }} - -{{/* -Dashboard specific labels -*/}} -{{- define "alphatrion.dashboard.labels" -}} -{{ include "alphatrion.labels" . }} -app.kubernetes.io/component: dashboard -{{- end }} - -{{/* -Dashboard selector labels -*/}} -{{- define "alphatrion.dashboard.selectorLabels" -}} -{{ include "alphatrion.selectorLabels" . }} -app.kubernetes.io/component: dashboard -{{- end }} - -{{/* -Dashboard fullname -*/}} -{{- define "alphatrion.dashboard.fullname" -}} -{{- printf "%s-dashboard" (include "alphatrion.fullname" .) }} -{{- end }} - -{{/* -Create the name of the service account to use -*/}} -{{- define "alphatrion.serviceAccountName" -}} -{{- if .Values.serviceAccount.create }} -{{- default (include "alphatrion.fullname" .) .Values.serviceAccount.name }} -{{- else }} -{{- default "default" .Values.serviceAccount.name }} -{{- end }} -{{- end }} - -{{/* -PostgreSQL host -*/}} -{{- define "alphatrion.postgresql.host" -}} -{{- .Values.postgresql.host }} -{{- end }} - -{{/* -PostgreSQL port -*/}} -{{- define "alphatrion.postgresql.port" -}} -{{- .Values.postgresql.port }} -{{- end }} - -{{/* -PostgreSQL database -*/}} -{{- define "alphatrion.postgresql.database" -}} -{{- .Values.postgresql.database }} -{{- end }} - -{{/* -PostgreSQL username -*/}} -{{- define "alphatrion.postgresql.username" -}} -{{- .Values.postgresql.username }} -{{- end }} - -{{/* -PostgreSQL password secret name -*/}} -{{- define "alphatrion.postgresql.secretName" -}} -{{- if .Values.postgresql.existingSecret }} -{{- .Values.postgresql.existingSecret }} -{{- else }} -{{- include "alphatrion.server.fullname" . }} -{{- end }} -{{- end }} - -{{/* -PostgreSQL password secret key -*/}} -{{- define "alphatrion.postgresql.secretKey" -}} -{{- .Values.postgresql.existingSecretKey | default "postgres-password" }} -{{- end }} - -{{/* -ClickHouse host -*/}} -{{- define "alphatrion.clickhouse.host" -}} -{{- .Values.clickhouse.host }} -{{- end }} - -{{/* -ClickHouse secret name -*/}} -{{- define "alphatrion.clickhouse.secretName" -}} -{{- if .Values.clickhouse.existingSecret }} -{{- .Values.clickhouse.existingSecret }} -{{- else }} -{{- include "alphatrion.server.fullname" . }} -{{- end }} -{{- end }} - -{{/* -ClickHouse password secret key -*/}} -{{- define "alphatrion.clickhouse.secretKey" -}} -{{- .Values.clickhouse.existingSecretKey | default "clickhouse-password" }} -{{- end }} - -{{/* -Docker Registry secret name -*/}} -{{- define "alphatrion.registry.secretName" -}} -{{- if .Values.registry.existingSecret }} -{{- .Values.registry.existingSecret }} -{{- else }} -{{- printf "%s-registry" (include "alphatrion.fullname" .) }} -{{- end }} -{{- end }} diff --git a/charts/alphatrion/templates/clickhouse-migration-job.yaml b/charts/alphatrion/templates/clickhouse-migration-job.yaml deleted file mode 100644 index 5db36e97..00000000 --- a/charts/alphatrion/templates/clickhouse-migration-job.yaml +++ /dev/null @@ -1,62 +0,0 @@ -{{- if .Values.clickhouse.migrations.enabled }} -apiVersion: batch/v1 -kind: Job -metadata: - name: {{ include "alphatrion.fullname" . }}-clickhouse-migration - labels: - {{- include "alphatrion.labels" . | nindent 4 }} - app.kubernetes.io/component: clickhouse-migration - annotations: - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": {{ .Values.clickhouse.migrations.hookWeight | quote }} - "helm.sh/hook-delete-policy": before-hook-creation -spec: - backoffLimit: 3 - template: - metadata: - labels: - {{- include "alphatrion.selectorLabels" . | nindent 8 }} - app.kubernetes.io/component: clickhouse-migration - spec: - restartPolicy: Never - containers: - - name: clickhouse-migration - image: "{{ .Values.server.image.repository }}:{{ .Values.server.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.server.image.pullPolicy }} - command: - - sh - - -c - - | - set -e - echo "==========================================" - echo "ClickHouse Schema Migrations" - echo "==========================================" - echo "" - echo "Database: {{ .Values.clickhouse.database }}" - echo "" - - # Run migrations using the Python CLI - echo "Running migrations..." - python -m migrations.clickhouse.cli migrate - - echo "" - echo "==========================================" - echo "✓ Migration completed successfully!" - echo "==========================================" - env: - - name: ALPHATRION_CLICKHOUSE_URL - value: "{{ .Values.clickhouse.host }}:{{ .Values.clickhouse.port }}" - - name: ALPHATRION_CLICKHOUSE_DATABASE - value: "{{ .Values.clickhouse.database }}" - - name: ALPHATRION_CLICKHOUSE_USERNAME - value: "{{ .Values.clickhouse.username }}" - - name: ALPHATRION_CLICKHOUSE_PASSWORD - valueFrom: - secretKeyRef: - name: {{ include "alphatrion.clickhouse.secretName" . }} - key: {{ include "alphatrion.clickhouse.secretKey" . }} - {{- if .Values.clickhouse.clusterName }} - - name: ALPHATRION_CLICKHOUSE_CLUSTER_NAME - value: "{{ .Values.clickhouse.clusterName }}" - {{- end }} -{{- end }} diff --git a/charts/alphatrion/templates/dashboard-deployment.yaml b/charts/alphatrion/templates/dashboard-deployment.yaml deleted file mode 100644 index 1eb2f587..00000000 --- a/charts/alphatrion/templates/dashboard-deployment.yaml +++ /dev/null @@ -1,99 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "alphatrion.dashboard.fullname" . }} - labels: - {{- include "alphatrion.dashboard.labels" . | nindent 4 }} -spec: - replicas: {{ .Values.dashboard.replicaCount }} - strategy: - type: RollingUpdate - rollingUpdate: - maxSurge: 1 - maxUnavailable: 0 - selector: - matchLabels: - {{- include "alphatrion.dashboard.selectorLabels" . | nindent 6 }} - template: - metadata: - {{- with .Values.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "alphatrion.dashboard.selectorLabels" . | nindent 8 }} - spec: - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- if .Values.serviceAccount.create }} - serviceAccountName: {{ include "alphatrion.serviceAccountName" . }} - {{- end }} - securityContext: - {{- if .Values.dashboard.podSecurityContext }} - {{- toYaml .Values.dashboard.podSecurityContext | nindent 8 }} - {{- else }} - {{- toYaml .Values.podSecurityContext | nindent 8 }} - {{- end }} - containers: - - name: dashboard - securityContext: - {{- if .Values.dashboard.securityContext }} - {{- toYaml .Values.dashboard.securityContext | nindent 12 }} - {{- else }} - {{- toYaml .Values.securityContext | nindent 12 }} - {{- end }} - image: "{{ .Values.dashboard.image.repository }}:{{ .Values.dashboard.image.tag }}" - imagePullPolicy: {{ .Values.dashboard.image.pullPolicy }} - - ports: - - name: http - containerPort: 8080 - protocol: TCP - - {{- if .Values.dashboard.env }} - env: - {{- if .Values.dashboard.env.apiUrl }} - - name: VITE_API_URL - value: {{ .Values.dashboard.env.apiUrl | quote }} - {{- end }} - {{- end }} - - {{- if .Values.dashboard.livenessProbe.enabled }} - livenessProbe: - httpGet: - path: / - port: http - initialDelaySeconds: {{ .Values.dashboard.livenessProbe.initialDelaySeconds }} - periodSeconds: {{ .Values.dashboard.livenessProbe.periodSeconds }} - timeoutSeconds: {{ .Values.dashboard.livenessProbe.timeoutSeconds }} - failureThreshold: {{ .Values.dashboard.livenessProbe.failureThreshold }} - {{- end }} - - {{- if .Values.dashboard.readinessProbe.enabled }} - readinessProbe: - httpGet: - path: / - port: http - initialDelaySeconds: {{ .Values.dashboard.readinessProbe.initialDelaySeconds }} - periodSeconds: {{ .Values.dashboard.readinessProbe.periodSeconds }} - timeoutSeconds: {{ .Values.dashboard.readinessProbe.timeoutSeconds }} - failureThreshold: {{ .Values.dashboard.readinessProbe.failureThreshold }} - {{- end }} - - resources: - {{- toYaml .Values.dashboard.resources | nindent 12 }} - - {{- with .Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} diff --git a/charts/alphatrion/templates/dashboard-service.yaml b/charts/alphatrion/templates/dashboard-service.yaml deleted file mode 100644 index cc78d977..00000000 --- a/charts/alphatrion/templates/dashboard-service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: {{ include "alphatrion.dashboard.fullname" . }} - labels: - {{- include "alphatrion.dashboard.labels" . | nindent 4 }} -spec: - type: {{ .Values.dashboard.service.type }} - ports: - - port: {{ .Values.dashboard.service.port }} - targetPort: http - protocol: TCP - name: http - selector: - {{- include "alphatrion.dashboard.selectorLabels" . | nindent 4 }} diff --git a/charts/alphatrion/templates/ingress.yaml b/charts/alphatrion/templates/ingress.yaml deleted file mode 100644 index 15d79310..00000000 --- a/charts/alphatrion/templates/ingress.yaml +++ /dev/null @@ -1,56 +0,0 @@ -{{- if .Values.ingress.enabled -}} -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: {{ include "alphatrion.fullname" . }} - labels: - {{- include "alphatrion.labels" . | nindent 4 }} - {{- with .Values.ingress.annotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} -spec: - {{- if .Values.ingress.className }} - ingressClassName: {{ .Values.ingress.className }} - {{- end }} - {{- if .Values.ingress.tls }} - tls: - {{- range .Values.ingress.tls }} - - hosts: - {{- range .hosts }} - - {{ . | quote }} - {{- end }} - secretName: {{ .secretName }} - {{- end }} - {{- end }} - rules: - {{- range .Values.ingress.hosts }} - - host: {{ .host | quote }} - http: - paths: - # API routes go to backend server - - path: /api - pathType: Prefix - backend: - service: - name: {{ include "alphatrion.server.fullname" $ }} - port: - number: {{ $.Values.server.service.port }} - # GraphQL routes go to backend server - - path: /graphql - pathType: Prefix - backend: - service: - name: {{ include "alphatrion.server.fullname" $ }} - port: - number: {{ $.Values.server.service.port }} - # Everything else (/, /static/*, SPA routes) goes to dashboard (static files) - - path: / - pathType: Prefix - backend: - service: - name: {{ include "alphatrion.dashboard.fullname" $ }} - port: - number: {{ $.Values.dashboard.service.port }} - {{- end }} -{{- end }} diff --git a/charts/alphatrion/templates/migration-job.yaml b/charts/alphatrion/templates/migration-job.yaml deleted file mode 100644 index cd8880da..00000000 --- a/charts/alphatrion/templates/migration-job.yaml +++ /dev/null @@ -1,48 +0,0 @@ -{{- if .Values.migration.enabled }} -apiVersion: batch/v1 -kind: Job -metadata: - name: {{ include "alphatrion.fullname" . }}-migration - labels: - {{- include "alphatrion.labels" . | nindent 4 }} - app.kubernetes.io/component: migration - annotations: - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": "0" - "helm.sh/hook-delete-policy": before-hook-creation -spec: - ttlSecondsAfterFinished: {{ .Values.migration.ttlSecondsAfterFinished }} - template: - metadata: - labels: - {{- include "alphatrion.selectorLabels" . | nindent 8 }} - app.kubernetes.io/component: migration - spec: - restartPolicy: Never - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: alembic-upgrade - image: "{{ .Values.server.image.repository }}:{{ .Values.server.image.tag }}" - imagePullPolicy: {{ .Values.server.image.pullPolicy }} - command: - - sh - - -c - - | - # Construct database URL with password from secret - export ALPHATRION_METADATA_DB_URL="postgresql+psycopg2://{{ include "alphatrion.postgresql.username" . }}:${POSTGRES_PASSWORD}@{{ include "alphatrion.postgresql.host" . }}:{{ include "alphatrion.postgresql.port" . }}/{{ include "alphatrion.postgresql.database" . }}" - echo "Running database migrations..." - alembic upgrade head - echo "Migrations completed successfully!" - env: - # Database password from secret - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - name: {{ include "alphatrion.postgresql.secretName" . }} - key: {{ include "alphatrion.postgresql.secretKey" . }} - resources: - {{- toYaml .Values.migration.resources | nindent 12 }} -{{- end }} diff --git a/charts/alphatrion/templates/server-configmap.yaml b/charts/alphatrion/templates/server-configmap.yaml deleted file mode 100644 index 3a9ae897..00000000 --- a/charts/alphatrion/templates/server-configmap.yaml +++ /dev/null @@ -1,36 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "alphatrion.server.fullname" . }} - labels: - {{- include "alphatrion.server.labels" . | nindent 4 }} -data: - # Logging configuration - ALPHATRION_LOG_LEVEL: {{ .Values.server.env.logLevel | quote }} - - # Root path for API - {{- if .Values.server.env.rootPath }} - ALPHATRION_ROOT_PATH: {{ .Values.server.env.rootPath | quote }} - {{- end }} - - # Database configuration - ALPHATRION_METADATA_INIT_TABLES: {{ .Values.postgresql.initTables | quote }} - - # Tracing configuration - ALPHATRION_ENABLE_TRACING: {{ .Values.server.env.enableTracing | quote }} - {{- if .Values.clickhouse.enabled }} - ALPHATRION_CLICKHOUSE_URL: {{ printf "http://%s:%d" .Values.clickhouse.host (int .Values.clickhouse.port) | quote }} - ALPHATRION_CLICKHOUSE_DATABASE: {{ .Values.clickhouse.database | quote }} - ALPHATRION_CLICKHOUSE_USERNAME: {{ .Values.clickhouse.username | quote }} - ALPHATRION_CLICKHOUSE_ENABLE_BATCH: {{ .Values.clickhouse.enableBatch | quote }} - {{- end }} - - # Artifact storage configuration - ALPHATRION_ENABLE_ARTIFACT_STORAGE: {{ .Values.server.env.enableArtifactStorage | quote }} - {{- if .Values.registry.enabled }} - ALPHATRION_ARTIFACT_REGISTRY_URL: {{ .Values.registry.url | quote }} - ALPHATRION_ARTIFACT_INSECURE: {{ .Values.registry.insecure | quote }} - {{- end }} - - # Authentication configuration - ALPHATRION_ENABLE_AUTH: {{ .Values.server.env.enableAuth | quote }} diff --git a/charts/alphatrion/templates/server-deployment.yaml b/charts/alphatrion/templates/server-deployment.yaml deleted file mode 100644 index 87229c2d..00000000 --- a/charts/alphatrion/templates/server-deployment.yaml +++ /dev/null @@ -1,113 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "alphatrion.server.fullname" . }} - labels: - {{- include "alphatrion.server.labels" . | nindent 4 }} -spec: - replicas: {{ .Values.server.replicaCount }} - strategy: - type: RollingUpdate - rollingUpdate: - maxSurge: 1 - maxUnavailable: 0 - selector: - matchLabels: - {{- include "alphatrion.server.selectorLabels" . | nindent 6 }} - template: - metadata: - annotations: - checksum/config: {{ include (print $.Template.BasePath "/server-configmap.yaml") . | sha256sum }} - checksum/secret: {{ include (print $.Template.BasePath "/server-secret.yaml") . | sha256sum }} - {{- with .Values.podAnnotations }} - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "alphatrion.server.selectorLabels" . | nindent 8 }} - spec: - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- if .Values.serviceAccount.create }} - serviceAccountName: {{ include "alphatrion.serviceAccountName" . }} - {{- end }} - securityContext: - {{- toYaml .Values.podSecurityContext | nindent 8 }} - containers: - - name: server - securityContext: - {{- toYaml .Values.securityContext | nindent 12 }} - image: "{{ .Values.server.image.repository }}:{{ .Values.server.image.tag }}" - imagePullPolicy: {{ .Values.server.image.pullPolicy }} - command: - - sh - - -c - - | - # Construct database URL with password from secret - export ALPHATRION_METADATA_DB_URL="postgresql+psycopg2://{{ include "alphatrion.postgresql.username" . }}:${POSTGRES_PASSWORD}@{{ include "alphatrion.postgresql.host" . }}:{{ include "alphatrion.postgresql.port" . }}/{{ include "alphatrion.postgresql.database" . }}" - # Start the server - exec alphatrion server --host 0.0.0.0 --port 8000 - ports: - - name: http - containerPort: 8000 - protocol: TCP - env: - # Database password from secret - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - name: {{ include "alphatrion.postgresql.secretName" . }} - key: {{ include "alphatrion.postgresql.secretKey" . }} - - # ClickHouse password (if enabled) - {{- if .Values.clickhouse.enabled }} - - name: ALPHATRION_CLICKHOUSE_PASSWORD - valueFrom: - secretKeyRef: - name: {{ include "alphatrion.clickhouse.secretName" . }} - key: {{ include "alphatrion.clickhouse.secretKey" . }} - {{- end }} - - envFrom: - # Non-sensitive environment variables - - configMapRef: - name: {{ include "alphatrion.server.fullname" . }} - - {{- if .Values.server.livenessProbe.enabled }} - livenessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: {{ .Values.server.livenessProbe.initialDelaySeconds }} - periodSeconds: {{ .Values.server.livenessProbe.periodSeconds }} - timeoutSeconds: {{ .Values.server.livenessProbe.timeoutSeconds }} - failureThreshold: {{ .Values.server.livenessProbe.failureThreshold }} - {{- end }} - - {{- if .Values.server.readinessProbe.enabled }} - readinessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: {{ .Values.server.readinessProbe.initialDelaySeconds }} - periodSeconds: {{ .Values.server.readinessProbe.periodSeconds }} - timeoutSeconds: {{ .Values.server.readinessProbe.timeoutSeconds }} - failureThreshold: {{ .Values.server.readinessProbe.failureThreshold }} - {{- end }} - - resources: - {{- toYaml .Values.server.resources | nindent 12 }} - - {{- with .Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} diff --git a/charts/alphatrion/templates/server-secret.yaml b/charts/alphatrion/templates/server-secret.yaml deleted file mode 100644 index 6cd73604..00000000 --- a/charts/alphatrion/templates/server-secret.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - name: {{ include "alphatrion.server.fullname" . }} - labels: - {{- include "alphatrion.server.labels" . | nindent 4 }} - annotations: - "helm.sh/hook": pre-install,pre-upgrade - "helm.sh/hook-weight": "-5" - "helm.sh/resource-policy": keep -type: Opaque -stringData: - {{- if not .Values.postgresql.existingSecret }} - # PostgreSQL password - postgres-password: {{ .Values.postgresql.password | quote }} - {{- end }} - {{- if and .Values.clickhouse.enabled (not .Values.clickhouse.existingSecret) }} - # ClickHouse password - clickhouse-password: {{ .Values.clickhouse.password | quote }} - {{- end }} - {{- if and .Values.registry.enabled (not .Values.registry.existingSecret) }} - # Docker Registry credentials - registry-username: {{ .Values.registry.username | quote }} - registry-password: {{ .Values.registry.password | quote }} - {{- end }} diff --git a/charts/alphatrion/templates/server-service.yaml b/charts/alphatrion/templates/server-service.yaml deleted file mode 100644 index b9cc81b9..00000000 --- a/charts/alphatrion/templates/server-service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: {{ include "alphatrion.server.fullname" . }} - labels: - {{- include "alphatrion.server.labels" . | nindent 4 }} -spec: - type: {{ .Values.server.service.type }} - ports: - - port: {{ .Values.server.service.port }} - targetPort: http - protocol: TCP - name: http - selector: - {{- include "alphatrion.server.selectorLabels" . | nindent 4 }} diff --git a/charts/alphatrion/values.yaml b/charts/alphatrion/values.yaml deleted file mode 100644 index 81122a5f..00000000 --- a/charts/alphatrion/values.yaml +++ /dev/null @@ -1,213 +0,0 @@ -# Default values for alphatrion -# This is a YAML-formatted file. -# Declare variables to be passed into your templates. - -# Server service configuration -server: - image: - repository: alphatrion-server - tag: latest - pullPolicy: IfNotPresent - - replicaCount: 2 - - resources: - requests: - cpu: 500m - memory: 512Mi - limits: - cpu: 1000m - memory: 1Gi - - service: - type: ClusterIP - port: 8000 - - env: - # Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL - logLevel: INFO - # Automatically cleanup old records - autoCleanup: true - # Enable tracing with ClickHouse - enableTracing: false - # Enable artifact storage with Docker Registry - enableArtifactStorage: false - # Root path for API (useful if behind a reverse proxy) - rootPath: "" - # Enable JWT authentication (true) or use direct headers (false) - enableAuth: false - - # Health check configuration - livenessProbe: - enabled: true - initialDelaySeconds: 30 - periodSeconds: 10 - timeoutSeconds: 5 - failureThreshold: 3 - - readinessProbe: - enabled: true - initialDelaySeconds: 10 - periodSeconds: 5 - timeoutSeconds: 3 - failureThreshold: 3 - -# Dashboard service configuration -dashboard: - image: - repository: alphatrion-dashboard - tag: latest - pullPolicy: IfNotPresent - - replicaCount: 2 - - resources: - requests: - cpu: 100m - memory: 128Mi - limits: - cpu: 500m - memory: 256Mi - - service: - type: ClusterIP - port: 80 - - env: - # Backend API URL - set this to the server service URL - # For internal cluster communication: http://alphatrion-server:8000 - # For external access: https://api.example.com - apiUrl: "" - - # Health check configuration - livenessProbe: - enabled: true - initialDelaySeconds: 10 - periodSeconds: 10 - timeoutSeconds: 3 - failureThreshold: 3 - - readinessProbe: - enabled: true - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 2 - failureThreshold: 3 - -# PostgreSQL configuration (external database required) -postgresql: - enabled: true - host: "" - port: 5432 - database: alphatrion - username: alphatrion - password: "" - # Use existing secret for password - existingSecret: "" - # Key name in the existing secret (default: postgres-password) - existingSecretKey: "postgres-password" - # Automatically initialize database tables on first run - initTables: false - -# ClickHouse configuration (optional - for tracing) -clickhouse: - # Enable ClickHouse integration (used by server for connecting to ClickHouse) - enabled: false - host: "clickhouse.alphatrion.svc.cluster.local" - port: 8123 - database: alphatrion_traces - username: alphatrion - password: "alphatrion" - # Use existing secret for password - existingSecret: "" - # Key name in the existing secret (default: clickhouse-password) - existingSecretKey: "clickhouse-password" - # Enable batch operations for better performance - enableBatch: true - # Cluster name for HA ClickHouse (enables ReplicatedMergeTree with ON CLUSTER) - # Leave empty for single-node MergeTree setup - clusterName: "" - - # Migration job configuration (independent from clickhouse.enabled) - migrations: - # Enable automatic schema migrations on helm install/upgrade - enabled: false - # Hook weight (runs after PostgreSQL migrations which are at "0") - hookWeight: "5" - -# Docker Registry configuration (optional - for artifact storage) -registry: - enabled: false - url: "" - # Set to true if using HTTP instead of HTTPS - insecure: false - # Authentication (if required) - username: "" - password: "" - # Use existing secret for credentials (keys should be 'username' and 'password') - existingSecret: "" - -# Ingress configuration -ingress: - enabled: false - className: nginx - annotations: {} - # cert-manager.io/cluster-issuer: letsencrypt-prod - # nginx.ingress.kubernetes.io/ssl-redirect: "true" - hosts: - - host: alphatrion.local - paths: - - path: / - pathType: Prefix - tls: [] - # - secretName: alphatrion-tls - # hosts: - # - alphatrion.example.com - -# Image pull secrets (if using private registry) -imagePullSecrets: [] - -# Service account configuration -serviceAccount: - create: false - annotations: {} - name: "" - -# Pod annotations -podAnnotations: {} - -# Pod security context -podSecurityContext: - runAsNonRoot: true - runAsUser: 1000 - fsGroup: 1000 - -# Security context -securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - -# Node selector -nodeSelector: {} - -# Tolerations -tolerations: [] - -# Affinity -affinity: {} - -# Migration job configuration -migration: - enabled: false - resources: - requests: - cpu: 100m - memory: 256Mi - limits: - cpu: 500m - memory: 512Mi - # Time to live for completed migration jobs (in seconds) - ttlSecondsAfterFinished: 300 diff --git a/charts/clickhouse/HA-SETUP.md b/charts/clickhouse/HA-SETUP.md deleted file mode 100644 index 17c6030d..00000000 --- a/charts/clickhouse/HA-SETUP.md +++ /dev/null @@ -1,439 +0,0 @@ -# High Availability ClickHouse Setup - -This guide shows how to deploy a highly available ClickHouse cluster with 3 replicas and ClickHouse Keeper for coordination. - -## Architecture - -**High Availability Setup:** -- **3 ClickHouse replicas** - Data is replicated across all 3 nodes -- **3 ClickHouse Keeper nodes** - Consensus-based coordination (replaces ZooKeeper) -- **Pod Anti-Affinity** - Spreads pods across different nodes and availability zones -- **Persistent Storage** - Each pod has its own 100Gi gp3 volume -- **Auto-recovery** - Failed pods automatically restart and rejoin the cluster - -## Prerequisites - -1. **AWS EKS Cluster** with at least 3 worker nodes (preferably across different AZs) -2. **gp3 Storage Class** configured -3. **Sufficient resources** - Each ClickHouse pod needs 500m CPU / 2Gi RAM (request) -4. **kubectl** configured to access your cluster - -## Deployment - -### 1. Create gp3 Storage Class (if not exists) - -```bash -kubectl apply -f ./charts/clickhouse/storageclass-gp3.yaml -``` - -### 2. Deploy HA ClickHouse Cluster - -```bash -kubectl apply -f ./charts/clickhouse/clickhouse-ha.yaml -``` - -This will create: -- 3 ClickHouse Keeper pods (for coordination) -- 3 ClickHouse server pods (for data storage and processing) -- ConfigMap with cluster configuration -- Services for internal and external access - -### 3. Wait for Deployment - -```bash -# Watch pods come up (this may take 2-3 minutes) -kubectl get pods -n alphatrion -l app=clickhouse -w -kubectl get pods -n alphatrion -l app=clickhouse-keeper -w -``` - -Expected output: -``` -clickhouse-keeper-0 1/1 Running -clickhouse-keeper-1 1/1 Running -clickhouse-keeper-2 1/1 Running -clickhouse-0 1/1 Running -clickhouse-1 1/1 Running -clickhouse-2 1/1 Running -``` - -### 4. Verify Cluster - -```bash -# Check cluster status -kubectl exec -n alphatrion clickhouse-0 -- clickhouse-client --query "SELECT * FROM system.clusters WHERE cluster='alphatrion_cluster'" - -# Check replication status -kubectl exec -n alphatrion clickhouse-0 -- clickhouse-client --query "SELECT * FROM system.replicas" - -# Verify all nodes see each other -for i in 0 1 2; do - echo "=== clickhouse-$i ===" - kubectl exec -n alphatrion clickhouse-$i -- clickhouse-client --query "SELECT hostname(), version()" -done -``` - -## Creating Replicated Tables - -When using the HA setup, create tables with the `ReplicatedMergeTree` engine: - -```sql --- Connect to any ClickHouse node -kubectl exec -it -n alphatrion clickhouse-0 -- clickhouse-client - --- Create a replicated table -CREATE TABLE alphatrion_traces.traces ON CLUSTER alphatrion_cluster -( - trace_id String, - span_id String, - timestamp DateTime, - duration_ms UInt32, - service_name String -) -ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/traces', '{replica}') -PARTITION BY toYYYYMM(timestamp) -ORDER BY (service_name, timestamp); - --- Verify table exists on all replicas -SELECT hostname(), database, name, engine FROM clusterAllReplicas('alphatrion_cluster', system.tables) -WHERE database = 'alphatrion_traces'; -``` - -## Testing High Availability - -### Test 1: Node Failure - -```bash -# Delete one ClickHouse pod -kubectl delete pod -n alphatrion clickhouse-1 - -# Watch it automatically restart -kubectl get pods -n alphatrion -l app=clickhouse -w - -# Verify data is still accessible -kubectl exec -n alphatrion clickhouse-0 -- clickhouse-client --query "SELECT count() FROM system.clusters WHERE cluster='alphatrion_cluster'" -``` - -### Test 2: Write Replication - -```bash -# Write data to clickhouse-0 -kubectl exec -n alphatrion clickhouse-0 -- clickhouse-client --query "INSERT INTO alphatrion_traces.traces VALUES ('trace1', 'span1', now(), 100, 'test-service')" - -# Read from clickhouse-1 (should see the data) -kubectl exec -n alphatrion clickhouse-1 -- clickhouse-client --query "SELECT * FROM alphatrion_traces.traces" - -# Read from clickhouse-2 (should also see the data) -kubectl exec -n alphatrion clickhouse-2 -- clickhouse-client --query "SELECT * FROM alphatrion_traces.traces" -``` - -### Test 3: Keeper Failover - -```bash -# Delete one Keeper pod -kubectl delete pod -n alphatrion clickhouse-keeper-1 - -# Cluster should continue working (quorum is 2/3) -kubectl exec -n alphatrion clickhouse-0 -- clickhouse-client --query "SELECT 1" -``` - -## Monitoring - -### Check Cluster Health - -```bash -# Cluster info -kubectl exec -n alphatrion clickhouse-0 -- clickhouse-client --query " -SELECT - cluster, - shard_num, - replica_num, - host_name, - port, - is_local, - errors_count -FROM system.clusters -WHERE cluster = 'alphatrion_cluster'" - -# Replication queue -kubectl exec -n alphatrion clickhouse-0 -- clickhouse-client --query " -SELECT - database, - table, - is_currently_executing, - num_tries, - last_exception -FROM system.replication_queue" - -# Keeper status -for i in 0 1 2; do - echo "=== keeper-$i ===" - kubectl exec -n alphatrion clickhouse-keeper-$i -- sh -c 'echo mntr | nc localhost 2181' -done -``` - -### Resource Usage - -```bash -# Check CPU and memory -kubectl top pods -n alphatrion -l app=clickhouse -kubectl top pods -n alphatrion -l app=clickhouse-keeper - -# Check storage -kubectl get pvc -n alphatrion -``` - -## Scaling - -### Scale ClickHouse Replicas - -To add more replicas (within the same shard): - -1. Update the StatefulSet: -```bash -kubectl scale statefulset clickhouse -n alphatrion --replicas=5 -``` - -2. Update the cluster configuration in ConfigMap to include new replicas: -```yaml - - clickhouse-3.clickhouse.alphatrion.svc.cluster.local - 9000 - - - clickhouse-4.clickhouse.alphatrion.svc.cluster.local - 9000 - -``` - -3. Restart pods to pick up new config: -```bash -kubectl rollout restart statefulset clickhouse -n alphatrion -``` - -### Add Shards (Horizontal Partitioning) - -For true horizontal scaling with data partitioning, you need to add shards. This requires: - -1. Deploying additional StatefulSets for each shard -2. Updating the `remote_servers` configuration with multiple `` sections -3. Using Distributed tables to query across shards - -See ClickHouse documentation for multi-shard setups. - -## Backup and Restore - -### Backup - -```bash -# Create backup on all replicas -kubectl exec -n alphatrion clickhouse-0 -- clickhouse-client --query="BACKUP DATABASE alphatrion_traces TO Disk('backups', 'backup-$(date +%Y%m%d)')" - -# Or use volume snapshots (recommended for large datasets) -# Create snapshots of all PVCs -for i in 0 1 2; do - kubectl exec -n alphatrion clickhouse-$i -- sync - # Then create AWS EBS snapshot of corresponding volume -done -``` - -### Restore - -```bash -# Stop writes (optional) -kubectl scale statefulset clickhouse -n alphatrion --replicas=0 - -# Restore from backup -kubectl exec -n alphatrion clickhouse-0 -- clickhouse-client --query="RESTORE DATABASE alphatrion_traces FROM Disk('backups', 'backup-20240101')" - -# Or restore from volume snapshot -# Restore PVCs from EBS snapshots, then restart pods -``` - -## Migration from Single Node - -If you already have a single-node ClickHouse deployment: - -### Option 1: Backup and Restore (Recommended) - -```bash -# 1. Backup existing data -kubectl exec -n alphatrion clickhouse-0 -- clickhouse-client --query="BACKUP DATABASE alphatrion_traces TO Disk('backups', 'migration-backup')" - -# 2. Export backup to local -kubectl exec -n alphatrion clickhouse-0 -- tar -czf /tmp/backup.tar.gz /var/lib/clickhouse/backups -kubectl cp alphatrion/clickhouse-0:/tmp/backup.tar.gz ./clickhouse-backup.tar.gz - -# 3. Delete single-node deployment -kubectl delete -f ./charts/clickhouse/clickhouse-statefulset.yaml -kubectl delete pvc -n alphatrion clickhouse-data-clickhouse-0 - -# 4. Deploy HA cluster -kubectl apply -f ./charts/clickhouse/clickhouse-ha.yaml - -# 5. Wait for cluster to be ready -kubectl wait --for=condition=ready pod -n alphatrion -l app=clickhouse --timeout=300s - -# 6. Restore backup -kubectl cp ./clickhouse-backup.tar.gz alphatrion/clickhouse-0:/tmp/backup.tar.gz -kubectl exec -n alphatrion clickhouse-0 -- tar -xzf /tmp/backup.tar.gz -C / -kubectl exec -n alphatrion clickhouse-0 -- clickhouse-client --query="RESTORE DATABASE alphatrion_traces FROM Disk('backups', 'migration-backup')" -``` - -### Option 2: Direct Data Migration (Zero Downtime) - -```bash -# 1. Deploy HA cluster alongside existing single node (use different namespace temporarily) -kubectl create namespace alphatrion-ha -# Edit clickhouse-ha.yaml to use alphatrion-ha namespace -kubectl apply -f ./charts/clickhouse/clickhouse-ha.yaml - -# 2. Use clickhouse-copier or remote() function to copy data -kubectl exec -n alphatrion-ha clickhouse-0 -- clickhouse-client --query=" -INSERT INTO alphatrion_traces.traces -SELECT * FROM remote('clickhouse-0.clickhouse.alphatrion.svc.cluster.local:9000', 'alphatrion_traces', 'traces', 'alphatrion', 'alphatrion')" - -# 3. Switch AlphaTrion connection to new cluster -# Update service discovery or change host in values.yaml - -# 4. Delete old single-node deployment -kubectl delete -f ./charts/clickhouse/clickhouse-statefulset.yaml -``` - -## Troubleshooting - -### Pods Stuck in Pending - -```bash -# Check PVC status -kubectl get pvc -n alphatrion - -# Check node resources -kubectl describe nodes | grep -A 5 "Allocated resources" - -# Check pod events -kubectl describe pod -n alphatrion clickhouse-0 -``` - -### Replication Lag - -```bash -# Check replication queue -kubectl exec -n alphatrion clickhouse-0 -- clickhouse-client --query " -SELECT - database, - table, - is_currently_executing, - num_tries, - last_exception -FROM system.replication_queue -WHERE last_exception != ''" - -# Force sync -kubectl exec -n alphatrion clickhouse-0 -- clickhouse-client --query "SYSTEM SYNC REPLICA alphatrion_traces.traces" -``` - -### Keeper Connection Issues - -```bash -# Check Keeper logs -kubectl logs -n alphatrion clickhouse-keeper-0 - -# Test Keeper connectivity -kubectl exec -n alphatrion clickhouse-0 -- sh -c 'echo ruok | nc clickhouse-keeper-0.clickhouse-keeper.alphatrion.svc.cluster.local 2181' - -# Check Keeper quorum -for i in 0 1 2; do - echo "=== keeper-$i ===" - kubectl exec -n alphatrion clickhouse-keeper-$i -- sh -c 'echo stat | nc localhost 2181' -done -``` - -### Split Brain - -If you have quorum issues: - -```bash -# Check Keeper leader -for i in 0 1 2; do - echo "=== keeper-$i ===" - kubectl exec -n alphatrion clickhouse-keeper-$i -- sh -c 'echo srvr | nc localhost 2181 | grep Mode' -done - -# Should see: 1 leader, 2 followers -``` - -## Performance Tuning - -### For High Throughput - -Edit ConfigMap and increase: -```xml -200 -2048 -32 -``` - -### For Large Datasets - -```bash -# Increase storage -kubectl patch pvc clickhouse-data-clickhouse-0 -n alphatrion -p '{"spec":{"resources":{"requests":{"storage":"500Gi"}}}}' - -# Adjust gp3 throughput -kubectl edit storageclass gp3 -# Add: parameters.throughput: "500" -``` - -### Resource Allocation - -```bash -# Update StatefulSet resources -kubectl edit statefulset clickhouse -n alphatrion - -# Increase to: -resources: - requests: - cpu: 2000m - memory: 8Gi - limits: - cpu: 4000m - memory: 16Gi -``` - -## Security Considerations - -1. **Change default password** - Update CLICKHOUSE_PASSWORD in the StatefulSet -2. **Use Kubernetes Secrets** - Store credentials in secrets instead of env vars -3. **Enable TLS** - Configure TLS for ClickHouse connections -4. **Network Policies** - Restrict traffic between pods -5. **RBAC** - Use ClickHouse users/roles for access control - -Example with Secret: -```bash -kubectl create secret generic clickhouse-credentials \ - --namespace alphatrion \ - --from-literal=password='your-secure-password' - -# Update StatefulSet to use secret -env: -- name: CLICKHOUSE_PASSWORD - valueFrom: - secretKeyRef: - name: clickhouse-credentials - key: password -``` - -## Cost Optimization - -1. **Right-size resources** - Monitor actual usage and adjust requests/limits -2. **Use gp3 over gp2** - Better performance at lower cost -3. **Enable compression** - ClickHouse has excellent compression (already enabled by default) -4. **Data retention policies** - Implement TTL to automatically delete old data -5. **Spot instances** - Use spot instances for ClickHouse nodes in non-production - -## Further Reading - -- [ClickHouse Replication](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication) -- [ClickHouse Keeper](https://clickhouse.com/docs/en/guides/sre/keeper/clickhouse-keeper) -- [ClickHouse Cluster](https://clickhouse.com/docs/en/architecture/cluster-deployment) -- [High Availability Best Practices](https://clickhouse.com/docs/en/guides/sre/ha) diff --git a/charts/clickhouse/README.md b/charts/clickhouse/README.md deleted file mode 100644 index 8c7905ac..00000000 --- a/charts/clickhouse/README.md +++ /dev/null @@ -1,331 +0,0 @@ -# ClickHouse Deployment for AlphaTrion - -This directory contains configuration for deploying ClickHouse separately from AlphaTrion using Kubernetes StatefulSets with the official ClickHouse image. - -**Two deployment options:** - -| Feature | Single Node | High Availability | -|---------|------------|-------------------| -| **Replicas** | 1 ClickHouse pod | 3 ClickHouse pods + 3 Keeper pods | -| **Data Redundancy** | ❌ No | ✅ Yes (3x replicated) | -| **Automatic Failover** | ❌ No | ✅ Yes | -| **Resource Usage** | 500m CPU / 2Gi RAM | 2.1 CPU / 8.5Gi RAM total | -| **Storage** | 100Gi | 300Gi total (100Gi × 3) | -| **Use Case** | Development, Testing | Production | -| **Setup Complexity** | Simple | Moderate | -| **File** | `clickhouse-statefulset.yaml` | `clickhouse-ha.yaml` | - -For HA setup, see **[High Availability Setup Guide](HA-SETUP.md)**. - ---- - -## Single Node Deployment (Quick Start) - -### Prerequisites - -1. **AWS EKS Cluster** with the EBS CSI driver installed -2. **gp3 Storage Class** configured -3. **kubectl** configured to access your cluster - -## Installation - -### 1. Create gp3 Storage Class - -```bash -kubectl apply -f ./charts/clickhouse/storageclass-gp3.yaml -``` - -### 2. Create Namespace (if not exists) - -```bash -kubectl create namespace alphatrion -``` - -### 3. Deploy ClickHouse - -```bash -kubectl apply -f ./charts/clickhouse/clickhouse-statefulset.yaml -``` - -### 4. Verify Deployment - -```bash -# Check pod -kubectl get pods -n alphatrion -l app=clickhouse - -# Check PVC -kubectl get pvc -n alphatrion - -# Check service -kubectl get svc -n alphatrion clickhouse - -# Test ClickHouse -kubectl exec -n alphatrion clickhouse-0 -- clickhouse-client --query "SELECT version()" -``` - -## Configuration - -The deployment is configured via `clickhouse-statefulset.yaml`: - -### Storage - -- **Storage Class**: gp3 -- **Size**: 100Gi -- **Access Mode**: ReadWriteOnce - -To change storage size, edit the StatefulSet: - -```yaml -volumeClaimTemplates: -- metadata: - name: clickhouse-data - spec: - storageClassName: gp3 - resources: - requests: - storage: 200Gi # Change size here -``` - -For custom gp3 IOPS/throughput, update `storageclass-gp3.yaml`: - -```yaml -parameters: - type: gp3 - iops: "3000" - throughput: "250" -``` - -### Authentication - -Default credentials (defined in clickhouse-statefulset.yaml): -- **Username**: alphatrion -- **Password**: alphatrion (⚠️ Change in production!) - -To change credentials, update the env vars in the StatefulSet: - -```yaml -env: -- name: CLICKHOUSE_USER - value: "your-username" -- name: CLICKHOUSE_PASSWORD - value: "your-secure-password" -``` - -Or use a Kubernetes Secret: - -```yaml -env: -- name: CLICKHOUSE_PASSWORD - valueFrom: - secretKeyRef: - name: clickhouse-credentials - key: password -``` - -### Resources - -Current resource limits: - -```yaml -resources: - requests: - cpu: 500m - memory: 2Gi - limits: - cpu: 2000m - memory: 4Gi -``` - -Adjust based on your workload requirements. - -## Connecting AlphaTrion - -After deploying ClickHouse, update your AlphaTrion `values.yaml`: - -```yaml -clickhouse: - enabled: true - host: "clickhouse.alphatrion.svc.cluster.local" - port: 8123 - database: alphatrion_traces - username: alphatrion - password: "alphatrion" # or use existingSecret - initTables: false - -server: - env: - enableTracing: true -``` - -Or use the provided values file: - -```bash -helm upgrade alphatrion ./charts/alphatrion \ - -f ./charts/alphatrion/values-with-clickhouse.yaml -``` - -## Accessing ClickHouse - -### Port Forward - -```bash -kubectl port-forward -n alphatrion clickhouse-0 8123:8123 -``` - -Then access via HTTP: -```bash -curl http://localhost:8123 -``` - -### Using clickhouse-client - -```bash -kubectl exec -it -n alphatrion clickhouse-0 -- clickhouse-client -``` - -Example queries: - -```sql --- List databases -SHOW DATABASES; - --- Use alphatrion_traces database -USE alphatrion_traces; - --- List tables -SHOW TABLES; -``` - -## Scaling - -To scale ClickHouse (single node only in this simple setup): - -```bash -kubectl scale statefulset clickhouse -n alphatrion --replicas=1 -``` - -Note: For multi-node clustering with replication, you'll need to configure ZooKeeper/ClickHouse Keeper and update the ClickHouse configuration files. - -## Upgrading - -To upgrade to a newer ClickHouse version, edit the StatefulSet and change the image tag: - -```yaml -containers: -- name: clickhouse - image: clickhouse/clickhouse-server:24.8-alpine # Update version -``` - -Then apply the changes: - -```bash -kubectl apply -f ./charts/clickhouse/clickhouse-statefulset.yaml -``` - -## Uninstallation - -```bash -# Delete the StatefulSet and Service -kubectl delete -f ./charts/clickhouse/clickhouse-statefulset.yaml - -# Delete PVC if needed (⚠️ This deletes all data!) -kubectl delete pvc -n alphatrion clickhouse-data-clickhouse-0 -``` - -## Backup and Restore - -### Backup - -```bash -# Create a backup -kubectl exec -n alphatrion clickhouse-0 -- clickhouse-client --query="BACKUP DATABASE alphatrion_traces TO Disk('backups', 'backup-$(date +%Y%m%d)')" - -# Or use kubectl cp to copy data -kubectl cp alphatrion/clickhouse-0:/var/lib/clickhouse ./clickhouse-backup -``` - -### Restore - -```bash -kubectl exec -n alphatrion clickhouse-0 -- clickhouse-client --query="RESTORE DATABASE alphatrion_traces FROM Disk('backups', 'backup-20240101')" -``` - -## Troubleshooting - -### Check ClickHouse logs - -```bash -kubectl logs -n alphatrion clickhouse-0 -f -``` - -### Pod not starting - -```bash -# Check pod events -kubectl describe pod -n alphatrion clickhouse-0 - -# Check PVC events -kubectl describe pvc -n alphatrion clickhouse-data-clickhouse-0 -``` - -### Storage issues - -```bash -# Check PVC status -kubectl get pvc -n alphatrion - -# Check storage class -kubectl get storageclass gp3 - -# Check if gp3 storage class exists -kubectl describe storageclass gp3 -``` - -If gp3 storage class doesn't exist, create it: - -```bash -kubectl apply -f ./charts/clickhouse/storageclass-gp3.yaml -``` - -### Connection issues - -```bash -# Test connectivity from another pod -kubectl run -it --rm test --image=curlimages/curl --restart=Never -- \ - curl http://clickhouse.alphatrion.svc.cluster.local:8123 - -# Check service endpoints -kubectl get endpoints -n alphatrion clickhouse -``` - -## Performance Tuning - -1. **Increase gp3 throughput** in `storageclass-gp3.yaml`: - ```yaml - parameters: - throughput: "500" # Up to 1000 MiB/s - ``` - -2. **Increase resources** in `clickhouse-statefulset.yaml`: - ```yaml - resources: - requests: - cpu: 2000m - memory: 8Gi - limits: - cpu: 4000m - memory: 16Gi - ``` - -3. **Use node selectors** for specific instance types: - ```yaml - nodeSelector: - node.kubernetes.io/instance-type: m5.2xlarge - ``` - -## Cost Optimization - -- gp3 provides better performance at lower cost than gp2 -- Right-size storage based on actual usage -- Consider using spot instances for non-production -- Monitor and adjust resource requests/limits based on actual usage diff --git a/charts/clickhouse/clickhouse-ha.yaml b/charts/clickhouse/clickhouse-ha.yaml deleted file mode 100644 index dbd7cc12..00000000 --- a/charts/clickhouse/clickhouse-ha.yaml +++ /dev/null @@ -1,366 +0,0 @@ ---- -# ConfigMap for ClickHouse cluster configuration -apiVersion: v1 -kind: ConfigMap -metadata: - name: clickhouse-config - namespace: alphatrion -data: - config.xml: | - - - information - true - - AlphaTrion ClickHouse Cluster - - - - - clickhouse-keeper-0.clickhouse-keeper.alphatrion.svc.cluster.local - 2181 - - - clickhouse-keeper-1.clickhouse-keeper.alphatrion.svc.cluster.local - 2181 - - - clickhouse-keeper-2.clickhouse-keeper.alphatrion.svc.cluster.local - 2181 - - - - - - - - alphatrion_cluster - - - - - - - true - - clickhouse-0.clickhouse.alphatrion.svc.cluster.local - 9000 - - - clickhouse-1.clickhouse.alphatrion.svc.cluster.local - 9000 - - - clickhouse-2.clickhouse.alphatrion.svc.cluster.local - 9000 - - - - - - - 100 - 1024 - 0.8 - 16 - 16 - - ---- -# ClickHouse Keeper (ZooKeeper replacement) Service -apiVersion: v1 -kind: Service -metadata: - name: clickhouse-keeper - namespace: alphatrion -spec: - ports: - - port: 2181 - name: client - targetPort: 2181 - - port: 9181 - name: raft - targetPort: 9181 - selector: - app: clickhouse-keeper - clusterIP: None - ---- -# ClickHouse Keeper StatefulSet -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: clickhouse-keeper - namespace: alphatrion -spec: - serviceName: clickhouse-keeper - replicas: 3 - selector: - matchLabels: - app: clickhouse-keeper - template: - metadata: - labels: - app: clickhouse-keeper - spec: - affinity: - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchLabels: - app: clickhouse-keeper - topologyKey: kubernetes.io/hostname - containers: - - name: clickhouse-keeper - image: clickhouse/clickhouse-keeper:24.3-alpine - ports: - - containerPort: 2181 - name: client - - containerPort: 9181 - name: raft - env: - - name: KEEPER_ID - valueFrom: - fieldRef: - fieldPath: metadata.name - volumeMounts: - - name: keeper-data - mountPath: /var/lib/clickhouse-keeper - - name: keeper-config - mountPath: /etc/clickhouse-keeper - command: - - sh - - -c - - | - # Extract keeper ID from pod name (clickhouse-keeper-0 -> 0) - KEEPER_ID=$(echo $KEEPER_ID | sed 's/clickhouse-keeper-//') - KEEPER_ID=$((KEEPER_ID + 1)) - - # Create keeper config - cat > /etc/clickhouse-keeper/keeper_config.xml < - :: - - 2181 - ${KEEPER_ID} - /var/lib/clickhouse-keeper/coordination/log - /var/lib/clickhouse-keeper/coordination/snapshots - - - 10000 - 30000 - warning - - - - - 1 - clickhouse-keeper-0.clickhouse-keeper.alphatrion.svc.cluster.local - 9181 - - - 2 - clickhouse-keeper-1.clickhouse-keeper.alphatrion.svc.cluster.local - 9181 - - - 3 - clickhouse-keeper-2.clickhouse-keeper.alphatrion.svc.cluster.local - 9181 - - - - - EOF - - # Start keeper - exec /usr/bin/clickhouse-keeper --config-file=/etc/clickhouse-keeper/keeper_config.xml - resources: - requests: - cpu: 200m - memory: 512Mi - limits: - cpu: 1000m - memory: 1Gi - livenessProbe: - tcpSocket: - port: 9181 - initialDelaySeconds: 30 - periodSeconds: 10 - failureThreshold: 10 - readinessProbe: - tcpSocket: - port: 9181 - initialDelaySeconds: 10 - periodSeconds: 5 - failureThreshold: 10 - volumes: - - name: keeper-config - emptyDir: {} - volumeClaimTemplates: - - metadata: - name: keeper-data - spec: - accessModes: [ "ReadWriteOnce" ] - storageClassName: gp3 - resources: - requests: - storage: 10Gi - ---- -# ClickHouse Service (ClusterIP for internal communication) -apiVersion: v1 -kind: Service -metadata: - name: clickhouse - namespace: alphatrion -spec: - ports: - - port: 8123 - targetPort: 8123 - name: http - - port: 9000 - targetPort: 9000 - name: tcp - - port: 9009 - targetPort: 9009 - name: interserver - selector: - app: clickhouse - clusterIP: None - ---- -# ClickHouse Service (LoadBalancer for external access - optional) -apiVersion: v1 -kind: Service -metadata: - name: clickhouse-external - namespace: alphatrion -spec: - type: ClusterIP - ports: - - port: 8123 - targetPort: 8123 - name: http - - port: 9000 - targetPort: 9000 - name: tcp - selector: - app: clickhouse - ---- -# ClickHouse StatefulSet with 3 replicas -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: clickhouse - namespace: alphatrion -spec: - serviceName: clickhouse - replicas: 3 - selector: - matchLabels: - app: clickhouse - template: - metadata: - labels: - app: clickhouse - spec: - affinity: - podAntiAffinity: - # Prefer to schedule pods on different nodes - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchLabels: - app: clickhouse - topologyKey: kubernetes.io/hostname - # Try to schedule pods in different availability zones - - weight: 50 - podAffinityTerm: - labelSelector: - matchLabels: - app: clickhouse - topologyKey: topology.kubernetes.io/zone - initContainers: - # Wait for ClickHouse Keeper to be ready - - name: wait-for-keeper - image: alpine:3.19 - command: - - sh - - -c - - | - apk add --no-cache netcat-openbsd - until nc -z clickhouse-keeper-0.clickhouse-keeper.alphatrion.svc.cluster.local 2181; do - echo "Waiting for ClickHouse Keeper..." - sleep 2 - done - containers: - - name: clickhouse - image: clickhouse/clickhouse-server:24.3-alpine - ports: - - containerPort: 8123 - name: http - - containerPort: 9000 - name: tcp - - containerPort: 9009 - name: interserver - env: - - name: CLICKHOUSE_DB - value: "alphatrion_traces" - - name: CLICKHOUSE_USER - value: "alphatrion" - - name: CLICKHOUSE_PASSWORD - value: "alphatrion" - - name: CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT - value: "1" - # Set shard and replica for macros - - name: CLICKHOUSE_SHARD - value: "01" - - name: CLICKHOUSE_REPLICA - valueFrom: - fieldRef: - fieldPath: metadata.name - volumeMounts: - - name: clickhouse-data - mountPath: /var/lib/clickhouse - - name: clickhouse-config - mountPath: /etc/clickhouse-server/config.d/cluster.xml - subPath: config.xml - resources: - requests: - cpu: 500m - memory: 2Gi - limits: - cpu: 2000m - memory: 4Gi - livenessProbe: - httpGet: - path: /ping - port: 8123 - initialDelaySeconds: 60 - periodSeconds: 10 - timeoutSeconds: 5 - readinessProbe: - httpGet: - path: /ping - port: 8123 - initialDelaySeconds: 30 - periodSeconds: 5 - timeoutSeconds: 3 - volumes: - - name: clickhouse-config - configMap: - name: clickhouse-config - volumeClaimTemplates: - - metadata: - name: clickhouse-data - spec: - accessModes: [ "ReadWriteOnce" ] - storageClassName: gp3 - resources: - requests: - storage: 100Gi diff --git a/charts/clickhouse/clickhouse-statefulset.yaml b/charts/clickhouse/clickhouse-statefulset.yaml deleted file mode 100644 index 52efacca..00000000 --- a/charts/clickhouse/clickhouse-statefulset.yaml +++ /dev/null @@ -1,69 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: clickhouse - namespace: alphatrion -spec: - ports: - - port: 8123 - targetPort: 8123 - name: http - - port: 9000 - targetPort: 9000 - name: tcp - selector: - app: clickhouse - clusterIP: None ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: clickhouse - namespace: alphatrion -spec: - serviceName: clickhouse - replicas: 1 - selector: - matchLabels: - app: clickhouse - template: - metadata: - labels: - app: clickhouse - spec: - containers: - - name: clickhouse - image: clickhouse/clickhouse-server:24.3-alpine - ports: - - containerPort: 8123 - name: http - - containerPort: 9000 - name: tcp - env: - - name: CLICKHOUSE_DB - value: "alphatrion_traces" - - name: CLICKHOUSE_USER - value: "alphatrion" - - name: CLICKHOUSE_PASSWORD - value: "alphatrion" - - name: CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT - value: "1" - volumeMounts: - - name: clickhouse-data - mountPath: /var/lib/clickhouse - resources: - requests: - cpu: 500m - memory: 2Gi - limits: - cpu: 2000m - memory: 4Gi - volumeClaimTemplates: - - metadata: - name: clickhouse-data - spec: - accessModes: [ "ReadWriteOnce" ] - storageClassName: gp3 - resources: - requests: - storage: 100Gi diff --git a/charts/clickhouse/storageclass-gp3.yaml b/charts/clickhouse/storageclass-gp3.yaml deleted file mode 100644 index 7f58cdce..00000000 --- a/charts/clickhouse/storageclass-gp3.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: storage.k8s.io/v1 -kind: StorageClass -metadata: - name: gp3 -provisioner: ebs.csi.aws.com -parameters: - type: gp3 - # Optional: customize IOPS and throughput - # iops: "3000" - # throughput: "125" - csi.storage.k8s.io/fstype: ext4 - encrypted: "true" -volumeBindingMode: WaitForFirstConsumer -allowVolumeExpansion: true -reclaimPolicy: Retain