Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 40 additions & 29 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"path/filepath"
"time"

"github.com/spf13/pflag"
"sigs.k8s.io/controller-runtime/pkg/metrics/filters"

intController "github.com/splunk/splunk-operator/internal/controller"
Expand Down Expand Up @@ -89,24 +90,44 @@ func main() {
// TLS certificate configuration for metrics
var metricsCertPath, metricsCertName, metricsCertKey string

flag.StringVar(&logEncoder, "log-encoder", "json", "log encoding ('json' or 'console')")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
pflag.StringVar(&logEncoder, "log-encoder", "json", "log encoding ('json' or 'console')")
pflag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
pflag.BoolVar(&enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
flag.BoolVar(&pprofActive, "pprof", true, "Enable pprof endpoint")
flag.IntVar(&logLevel, "log-level", int(zapcore.InfoLevel), "set log level")
flag.IntVar(&leaseDurationSecond, "lease-duration", leaseDurationSecond, "manager lease duration in seconds")
flag.IntVar(&renewDeadlineSecond, "renew-duration", renewDeadlineSecond, "manager renew duration in seconds")
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metrics endpoint binds to. "+
pflag.BoolVar(&pprofActive, "pprof", true, "Enable pprof endpoint")
pflag.IntVar(&logLevel, "log-level", int(zapcore.InfoLevel), "set log level")
pflag.IntVar(&leaseDurationSecond, "lease-duration", leaseDurationSecond, "manager lease duration in seconds")
pflag.IntVar(&renewDeadlineSecond, "renew-duration", renewDeadlineSecond, "manager renew duration in seconds")
pflag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metrics endpoint binds to. "+
"Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.")
flag.BoolVar(&secureMetrics, "metrics-secure", false,
pflag.BoolVar(&secureMetrics, "metrics-secure", false,
"If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.")

// TLS certificate flags for metrics server
flag.StringVar(&metricsCertPath, "metrics-cert-path", "", "The directory that contains the metrics server certificate.")
flag.StringVar(&metricsCertName, "metrics-cert-name", "tls.crt", "The name of the metrics server certificate file.")
flag.StringVar(&metricsCertKey, "metrics-cert-key", "tls.key", "The name of the metrics server key file.")
pflag.StringVar(&metricsCertPath, "metrics-cert-path", "", "The directory that contains the metrics server certificate.")
pflag.StringVar(&metricsCertName, "metrics-cert-name", "tls.crt", "The name of the metrics server certificate file.")
pflag.StringVar(&metricsCertKey, "metrics-cert-key", "tls.key", "The name of the metrics server key file.")

config.DefaultMutableFeatureGate.AddFlag(pflag.CommandLine)

opts := zap.Options{
Development: true,
TimeEncoder: zapcore.RFC3339NanoTimeEncoder,
}
opts.BindFlags(flag.CommandLine)
pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
pflag.Parse()

ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))

if allGates := config.DefaultMutableFeatureGate.GetAll(); len(allGates) > 0 {
effectiveStates := make(map[string]bool, len(allGates))
for gate := range allGates {
effectiveStates[string(gate)] = config.DefaultMutableFeatureGate.Enabled(gate)
}
setupLog.Info("Feature gates initialized", "gates", effectiveStates)
}

// Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server.
// More info:
Expand Down Expand Up @@ -147,16 +168,6 @@ func main() {
renewDeadline = time.Duration(renewDeadlineSecond) * time.Second
}

opts := zap.Options{
Development: true,
TimeEncoder: zapcore.RFC3339NanoTimeEncoder,
}
opts.BindFlags(flag.CommandLine)
flag.Parse()

// Logging setup
ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))

// Configure metrics certificate watcher if metrics certs are provided
var metricsCertWatcher *certwatcher.CertWatcher
if len(metricsCertPath) > 0 {
Expand Down Expand Up @@ -280,10 +291,11 @@ func main() {
os.Exit(1)
}

// Setup centralized validation webhook server (opt-in via ENABLE_VALIDATION_WEBHOOK env var, defaults to false)
enableWebhooks := os.Getenv("ENABLE_VALIDATION_WEBHOOK")
if enableWebhooks == "true" {
// Parse optional timeout configurations from environment
if _, ok := os.LookupEnv("ENABLE_VALIDATION_WEBHOOK"); ok {
setupLog.Info("DEPRECATED: ENABLE_VALIDATION_WEBHOOK env var is deprecated and will be removed in a future release; use --feature-gates=ValidationWebhook=true instead")
}

if config.DefaultMutableFeatureGate.Enabled(config.ValidationWebhook) {
readTimeout := 10 * time.Second
if val := os.Getenv("WEBHOOK_READ_TIMEOUT"); val != "" {
if d, err := time.ParseDuration(val); err == nil {
Expand All @@ -306,16 +318,15 @@ func main() {
Client: mgr.GetClient(),
})

// Add webhook server as a runnable to the manager
if err := mgr.Add(manager.RunnableFunc(func(ctx context.Context) error {
return webhookServer.Start(ctx)
})); err != nil {
setupLog.Error(err, "unable to add webhook server to manager")
os.Exit(1)
}
setupLog.Info("Validation webhook enabled via ENABLE_VALIDATION_WEBHOOK=true")
setupLog.Info("Validation webhook enabled")
} else {
setupLog.Info("Validation webhook disabled (set ENABLE_VALIDATION_WEBHOOK=true to enable)")
setupLog.Info("Validation webhook disabled (set --feature-gates=ValidationWebhook=true to enable)")
}
//+kubebuilder:scaffold:builder

Expand Down
9 changes: 5 additions & 4 deletions config/default-with-webhook/kustomization-cluster.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Adds namespace to all resources.
# Cluster-scoped deployment WITH webhook enabled (opt-in)
# Requires cert-manager to be installed in the cluster
namespace: splunk-operator
namespace: splunk-operator

# Value of this field is prepended to the
# names of all resources, e.g. a deployment named
Expand Down Expand Up @@ -115,7 +115,7 @@ patches:
patch: |-
- op: add
path: /spec/template/spec/containers/0/env
value:
value:
- name: WATCH_NAMESPACE
value: WATCH_NAMESPACE_VALUE
- name: RELATED_IMAGE_SPLUNK_ENTERPRISE
Expand All @@ -124,12 +124,13 @@ patches:
value: splunk-operator
- name: SPLUNK_GENERAL_TERMS
value: SPLUNK_GENERAL_TERMS_VALUE
- name: ENABLE_VALIDATION_WEBHOOK
value: "true"
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- op: add
path: /spec/template/spec/containers/0/args/-
value: --feature-gates=ValidationWebhook=true
# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443.
# More info: https://book.kubebuilder.io/reference/metrics
- path: manager_metrics_patch.yaml
Expand Down
9 changes: 5 additions & 4 deletions config/default-with-webhook/kustomization-namespace.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Adds namespace to all resources.
# Namespace-scoped deployment WITH webhook enabled (opt-in)
# Requires cert-manager to be installed in the cluster
namespace: splunk-operator
namespace: splunk-operator

# Value of this field is prepended to the
# names of all resources, e.g. a deployment named
Expand Down Expand Up @@ -115,7 +115,7 @@ patches:
patch: |-
- op: add
path: /spec/template/spec/containers/0/env
value:
value:
- name: WATCH_NAMESPACE
valueFrom:
fieldRef:
Expand All @@ -126,12 +126,13 @@ patches:
value: splunk-operator
- name: SPLUNK_GENERAL_TERMS
value: SPLUNK_GENERAL_TERMS_VALUE
- name: ENABLE_VALIDATION_WEBHOOK
value: "true"
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- op: add
path: /spec/template/spec/containers/0/args/-
value: --feature-gates=ValidationWebhook=true
# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443.
# More info: https://book.kubebuilder.io/reference/metrics
- path: manager_metrics_patch.yaml
Expand Down
9 changes: 5 additions & 4 deletions config/default-with-webhook/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Adds namespace to all resources.
# Cluster-scoped deployment WITH webhook enabled (opt-in)
# Requires cert-manager to be installed in the cluster
namespace: splunk-operator
namespace: splunk-operator

# Value of this field is prepended to the
# names of all resources, e.g. a deployment named
Expand Down Expand Up @@ -115,7 +115,7 @@ patches:
patch: |-
- op: add
path: /spec/template/spec/containers/0/env
value:
value:
- name: WATCH_NAMESPACE
value: WATCH_NAMESPACE_VALUE
- name: RELATED_IMAGE_SPLUNK_ENTERPRISE
Expand All @@ -124,12 +124,13 @@ patches:
value: splunk-operator
- name: SPLUNK_GENERAL_TERMS
value: WATCH_NAMESPACE_VALUE
- name: ENABLE_VALIDATION_WEBHOOK
value: "true"
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- op: add
path: /spec/template/spec/containers/0/args/-
value: --feature-gates=ValidationWebhook=true
# [METRICS] The following patch will enable the metrics endpoint using HTTPS and the port :8443.
# More info: https://book.kubebuilder.io/reference/metrics
- path: manager_metrics_patch.yaml
Expand Down
110 changes: 110 additions & 0 deletions docs/FeatureGates.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Feature Gates

The Splunk Operator uses the Kubernetes [FeatureGate](https://pkg.go.dev/k8s.io/component-base/featuregate) pattern to control rollout of new functionality. Feature gates allow new code to be merged to the main branch without activating in production, giving teams a safe, per-environment opt-in mechanism.

## Usage

Enable or disable feature gates at operator startup:

```bash
/manager --feature-gates=ValidationWebhook=true
```

## Maturity Lifecycle

| Stage | Default | Can Override | Next Step |
|-----------|---------|-------------|-----------------------------------------|
| **Alpha** | off | Yes | Promote to Beta after validation |
| **Beta** | on | Yes | Promote to GA after sustained stability |
| **GA** | on | No | Remove gate in a future release |

## Current Feature Gates

| Gate | Default | Stage | Since | Description |
|-----------------------|---------|-------|---------|----------------------------------------------------------|
| `ValidationWebhook` | `false` | Alpha | v3.2.0 | Centralized validation webhook server for CR admission |

## Adding a New Feature Gate

Follow these four steps:

### 1. Register the gate in `pkg/config/featuregates.go`

Add a constant and an entry in `defaultFeatureGates`:

```go
const (
MyNewFeature featuregate.Feature = "MyNewFeature"
)

var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
// existing gates …
MyNewFeature: {Default: false, PreRelease: featuregate.Alpha},
}
```

### 2. Guard the code path

Check the gate wherever the feature-specific logic runs:

```go
if config.DefaultMutableFeatureGate.Enabled(config.MyNewFeature) {
// feature-specific logic
}
```

This can guard anything — a reconciler code path, a helper function, a webhook handler, an HTTP endpoint, etc.

### Example: Gating a New Controller (CRD)

When the feature gate introduces an entirely new CRD and controller, there are additional steps beyond the basic gate check. All three steps below are **mandatory** for any new CRD behind a feature gate.

#### a. Gate controller registration in `cmd/main.go`

Wrap the `SetupWithManager` call so the controller only starts when the gate is on:

```go
if config.DefaultMutableFeatureGate.Enabled(config.MyNewFeature) {
if err = (&controller.MyNewReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "MyNew")
os.Exit(1)
}
}
```

#### b. Add a validating webhook for the gated CRD group

A validating webhook **must** reject CR creation when the gate is off. Without this, users can create resources that no controller will reconcile, leading to silent failures:

```go
func (v *MyNewValidator) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) {
if !config.DefaultMutableFeatureGate.Enabled(config.MyNewFeature) {
return nil, fmt.Errorf(
"the MyNewFeature feature is not enabled; "+
"set --feature-gates=MyNewFeature=true to activate")
}
return nil, nil
}
```

#### c. Label the CRD manifests

Every gated CRD **must** carry maturity annotations and labels in `config/crd/bases/`. These signal to operators and tooling which gate controls the CRD and its current stability level:

```yaml
metadata:
annotations:
splunk.com/feature-gate: MyNewFeature
splunk.com/feature-stage: Alpha
labels:
splunk.com/feature-stage: alpha
```

## Promoting a Gate

- **Alpha → Beta**: Change `Default: false` to `Default: true` in `featuregates.go`; update the CRD label to `beta`
- **Beta → GA**: Set `LockToDefault: true` in the `FeatureSpec`; update the CRD label to `ga`
- **GA → Removed**: Delete the constant and `FeatureSpec` entry; remove the `if` guard in `cmd/main.go`; remove the CRD annotations/labels and the validating webhook
Loading
Loading