diff --git a/CLAUDE.md b/CLAUDE.md index 7991c105fc..6a0d507ff5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -61,8 +61,9 @@ make test-e2e-setup # Setup E2E test environment make test-e2e-cleanup # Cleanup after E2E tests # Test variations -TEST_VIRT=true make test-e2e # Run virtualization tests -TEST_UPGRADE=true make test-e2e # Run upgrade tests +TEST_VIRT=true make test-e2e # Run virtualization tests (community HCO, KubeVirt 1.8+) +TEST_VIRT_GA=true make test-e2e # Run virtualization tests (OpenShift Virtualization from redhat-operators) +TEST_UPGRADE=true make test-e2e # Run upgrade tests TEST_CLI=true make test-e2e # Run CLI-based tests # Run focused tests diff --git a/Makefile b/Makefile index 65adac4a82..9e6921df17 100644 --- a/Makefile +++ b/Makefile @@ -924,11 +924,16 @@ test-e2e-setup: login-required build-must-gather VELERO_INSTANCE_NAME ?= velero-test ARTIFACT_DIR ?= /tmp +# virt HCO_UPSTREAM ?= false +TEST_VIRT_GA ?= false TEST_VIRT ?= false +HCO_INDEX_TAG ?= 1.18.0 +# hcp TEST_HCP ?= false TEST_HCP_EXTERNAL ?= false HCP_EXTERNAL_ARGS ?= "" +# other TEST_CLI ?= false SKIP_MUST_GATHER ?= false TEST_UPGRADE ?= false @@ -938,6 +943,8 @@ $(SED) -r "s/[&]* [!] $(CLUSTER_TYPE)|[!] $(CLUSTER_TYPE) [&]*//")) || $(CLUSTER #TEST_FILTER := $(shell echo '! aws && ! gcp && ! azure' | $(SED) -r "s/[&]* [!] $(CLUSTER_TYPE)|[!] $(CLUSTER_TYPE) [&]*//") ifeq ($(TEST_VIRT),true) TEST_FILTER += && (virt) +else ifeq ($(TEST_VIRT_GA),true) + TEST_FILTER += && (virt) else TEST_FILTER += && (! virt) endif @@ -985,6 +992,8 @@ test-e2e: test-e2e-setup install-ginkgo ## Run E2E tests against OADP operator i -artifact_dir=$(ARTIFACT_DIR) \ -kvm_emulation=$(KVM_EMULATION) \ -hco_upstream=$(HCO_UPSTREAM) \ + -hco_community=$(TEST_VIRT) \ + -hco_index_tag=$(HCO_INDEX_TAG) \ -skipMustGather=$(SKIP_MUST_GATHER) \ $(HCP_EXTERNAL_ARGS) \ || EXIT_CODE=$$?; \ diff --git a/build/ci-Dockerfile b/build/ci-Dockerfile index c82986382b..2a620c0ada 100644 --- a/build/ci-Dockerfile +++ b/build/ci-Dockerfile @@ -14,12 +14,16 @@ RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/s chmod +x kubectl && \ mv kubectl /usr/local/bin/ -# Install Node.js and Claude CLI -# Using NodeSource setup script for RHEL-based images -RUN curl -fsSL https://rpm.nodesource.com/setup_20.x | bash - && \ - dnf install -y nodejs && \ - npm install -g @anthropic-ai/claude-code && \ - dnf clean all +# Install virtctl for KubeVirt VM operations in E2E tests +RUN export KV_VERSION=$(curl -s https://storage.googleapis.com/kubevirt-prow/release/kubevirt/kubevirt/stable.txt) && \ + curl -L -o virtctl "https://github.com/kubevirt/kubevirt/releases/download/${KV_VERSION}/virtctl-${KV_VERSION}-linux-${TARGETARCH}" && \ + chmod +x virtctl && \ + mv virtctl /usr/local/bin/ + +# Install Claude CLI (native binary, no Node.js dependency) +RUN curl -fsSL https://claude.ai/install.sh | bash && \ + ln -sf ~/.local/bin/claude /usr/local/bin/claude && \ + claude --version # Clone openshift/velero source code for failure analysis # Uses oadp-dev branch to match OADP operator development diff --git a/docs/design/specs/2026-04-21-kubevirt-datamover-e2e-test-design.md b/docs/design/specs/2026-04-21-kubevirt-datamover-e2e-test-design.md new file mode 100644 index 0000000000..b4327cf115 --- /dev/null +++ b/docs/design/specs/2026-04-21-kubevirt-datamover-e2e-test-design.md @@ -0,0 +1,276 @@ +# Kubevirt Datamover E2E Test Design + +## Summary + +Add a basic E2E test to `virt_backup_restore_suite_test.go` that validates the kubevirt-datamover backup path: enabling CBT on a CirrOS VM via HCO configuration and VM labels, deploying OADP with the `kubevirt-datamover` plugin, triggering a Velero backup with `SnapshotMoveData=true`, and verifying that a `VirtualMachineBackupTracker` is created (proving the kubevirt-datamover controller processed the backup). + +## Approach + +Extend the existing HCO/OLM-based virt test infrastructure. No new KubeVirt install paths. + +## Prerequisites + +- HCO version with KubeVirt >= v1.7 (CBT support, released Nov 2025) +- kubevirt-datamover-controller deployed (handled by OADP operator when `kubevirt-datamover` plugin is enabled) +- kubevirt-datamover-plugin image available (Velero init container, configured via DPA `DefaultPluginKubeVirtDataMover`) + +--- + +## Existing KubeVirt Installation Flow (No Changes) + +The existing virt test suite installs KubeVirt via the HyperConverged Cluster Operator (HCO) through OLM. This flow is in `tests/e2e/lib/virt_helpers.go` and is driven by the `BeforeAll` in `virt_backup_restore_suite_test.go`. The test does NOT install raw upstream KubeVirt; it always uses HCO. + +### Step-by-step existing flow + +1. **`GetVirtOperator(client, clientset, dynamicClient, useUpstreamHco)`** (line 65) + - Selects namespace and OLM package based on `upstream` flag: + - OpenShift (default): namespace `openshift-cnv`, PackageManifest `kubevirt-hyperconverged`, catalog `redhat-operators` + - Upstream (`HCO_UPSTREAM=true`): namespace `kubevirt-hyperconverged`, PackageManifest `community-kubevirt-hyperconverged`, catalog `community-operators` + - Reads the `stable` channel from the PackageManifest to get the current CSV name and version + +2. **`EnsureVirtInstallation()`** (line 732) — only runs if HCO is not already present + - `EnsureNamespace(v.Namespace)` — creates `openshift-cnv` or `kubevirt-hyperconverged` + - `ensureOperatorGroup()` — creates OperatorGroup (upstream uses empty `TargetNamespaces`) + - `ensureSubscription()` — creates OLM Subscription pointing to the catalog/channel/CSV + - `ensureCsv(5min)` — waits for the ClusterServiceVersion to be ready + - `ensureHco(5min)` — creates the `HyperConverged` CR and waits for health + +3. **`installHco()`** (line 339) — creates the HyperConverged CR with **empty spec**: + ```yaml + apiVersion: hco.kubevirt.io/v1beta1 + kind: HyperConverged + metadata: + name: kubevirt-hyperconverged + namespace: + spec: {} + ``` + HCO then creates and manages the KubeVirt CR, CDI, and other operands. + +4. **Optional: KVM emulation** (`EnsureEmulation()`, line 686) + - Only when `kvmEmulation=true` (cloud clusters without nested virt) + - Patches the HCO CR's **annotation** `kubevirt.kubevirt.io/jsonpatch` with: + ```json + [{"op": "add", "path": "/spec/configuration/developerConfiguration", "value": {"useEmulation": true}}] + ``` + - HCO applies this JSON patch to the KubeVirt CR it manages + +5. **CirrOS boot image setup** + - Downloads latest CirrOS image URL + - Creates DataVolume `cirros` in `openshift-virtualization-os-images` namespace + - Creates DataSource from the PVC + +6. **DPA plugin configuration** + - Appends `DefaultPluginKubeVirt` to `dpaCR.VeleroDefaultPlugins` + +7. **Storage classes and RBAC** + - Creates `test-sc-immediate` and `test-sc-wffc` StorageClasses + - Installs `cirros-rbac.yaml` + +### Key point: HCO annotations propagate to KubeVirt CR + +HCO manages the KubeVirt CR. Direct edits to the KubeVirt CR are overwritten by HCO. To inject KubeVirt-level configuration that HCO doesn't directly expose, the pattern is to use the `kubevirt.kubevirt.io/jsonpatch` annotation on the HCO CR. This is already used for KVM emulation (step 4 above). + +--- + +## CBT Enablement: Two Separate Configurations Required + +Enabling ChangedBlockTracking on a VM requires two distinct cluster-level configurations plus a per-VM label. + +**Note:** An older setup procedure used three `kubevirt.kubevirt.io/jsonpatch` operations to inject `IncrementalBackup` and `UtilityVolumes` feature gates plus the label selector. That is **no longer necessary** — HCO now exposes `incrementalBackup` as a first-class feature gate, and enabling it automatically enables `UtilityVolumes`. Only the label selector still requires the jsonpatch annotation. + +### Configuration 1: HCO Feature Gate (`incrementalBackup`) + +The HCO CR has a feature gate `spec.featureGates.incrementalBackup` (default: `false`). Setting this to `true`: +- Enables the `IncrementalBackup` feature gate in the KubeVirt CR +- Automatically enables the `UtilityVolumes` feature gate (required for backup output storage) +- This is a Tech Preview feature (Alpha graduation) + +**What to set on HCO:** +```yaml +apiVersion: hco.kubevirt.io/v1beta1 +kind: HyperConverged +metadata: + name: kubevirt-hyperconverged +spec: + featureGates: + incrementalBackup: true +``` + +This is a direct field on the HCO spec, so it can be set via a standard merge patch on the HCO resource (no annotation needed). + +### Configuration 2: KubeVirt Label Selector (`changedBlockTrackingLabelSelectors`) + +The KubeVirt CR has a configuration field `spec.configuration.changedBlockTrackingLabelSelectors` that tells KubeVirt which VMs should have CBT enabled, using label selectors. This field is on the **KubeVirt CR**, not directly exposed by HCO. + +**What to set on KubeVirt CR:** +```yaml +apiVersion: kubevirt.io/v1 +kind: KubeVirt +spec: + configuration: + changedBlockTrackingLabelSelectors: + virtualMachineLabelSelector: + matchLabels: + changedBlockTracking: "true" +``` + +Since HCO manages the KubeVirt CR and overwrites direct edits, this must be injected via the `kubevirt.kubevirt.io/jsonpatch` annotation on the HCO CR (same mechanism as KVM emulation): + +```json +[{"op": "add", "path": "/spec/configuration/changedBlockTrackingLabelSelectors", "value": {"virtualMachineLabelSelector": {"matchLabels": {"changedBlockTracking": "true"}}}}] +``` + +**Important:** The `kubevirt.kubevirt.io/jsonpatch` annotation is a single annotation holding a JSON array of patch operations. If KVM emulation is also enabled, both patches must be combined into one annotation value: +```json +[ + {"op": "add", "path": "/spec/configuration/developerConfiguration", "value": {"useEmulation": true}}, + {"op": "add", "path": "/spec/configuration/changedBlockTrackingLabelSelectors", "value": {"virtualMachineLabelSelector": {"matchLabels": {"changedBlockTracking": "true"}}}} +] +``` + +### Configuration 3: VM Label + +The VM itself must carry the matching label. This is baked into the VM manifest: +```yaml +apiVersion: kubevirt.io/v1 +kind: VirtualMachine +metadata: + labels: + changedBlockTracking: "true" +``` + +### VM Restart Required + +Even with the label present from VM creation, a restart cycle is required for KubeVirt to: +1. Create a backend storage PVC +2. Create a qcow2 overlay on top of the raw disk +3. Update the VM's domain XML + +After restart, the VM's `status.ChangedBlockTracking.State` transitions to `Enabled`. + +### Full CBT activation sequence in the test + +``` +1. EnsureVirtInstallation() — existing flow, installs HCO with empty spec +2. EnableCBTFeatureGate() — patch HCO: spec.featureGates.incrementalBackup = true +3. EnableCBTLabelSelector() — patch HCO annotation: jsonpatch to set changedBlockTrackingLabelSelectors on KubeVirt CR +4. Deploy CirrOS VM with label — template has changedBlockTracking: "true" +5. Wait for VM Running +6. Restart VM (stop + start) — required for qcow2 overlay creation +7. Wait for VM Running again +8. Wait for status.ChangedBlockTracking.State == "Enabled" +9. Proceed with backup +``` + +Steps 2 and 3 are idempotent and can be placed in `BeforeAll` so they run once for the entire suite. + +--- + +## Changes + +### 1. New CirrOS VM template with CBT label + +**File:** `tests/e2e/sample-applications/virtual-machines/cirros-test/cirros-test-cbt.yaml` + +Based on existing `cirros-test.yaml`, with the addition of: +- `metadata.labels.changedBlockTracking: "true"` on the VirtualMachine +- Same CirrOS boot image, same storage class, same resource requests + +### 2. New VirtOperator methods in `tests/e2e/lib/virt_helpers.go` + +#### `EnableCBTFeatureGate() error` + +Patches the HCO CR to set `spec.featureGates.incrementalBackup: true`. Uses the dynamic client to get the HCO, set the nested field, and update. Follows the same retry-on-conflict pattern as `EnsureEmulation`. + +#### `EnableCBTLabelSelector() error` + +Patches the HCO CR's `kubevirt.kubevirt.io/jsonpatch` annotation to inject `changedBlockTrackingLabelSelectors` into the KubeVirt CR. Must handle the case where: +- The annotation doesn't exist yet (create it with just the CBT patch) +- The annotation already has patches (e.g. emulation) — parse the existing JSON array, append the CBT patch if not already present, write back + +#### `StartVm(namespace, name string) error` + +REST call to the KubeVirt subresource API: +``` +PUT /apis/subresources.kubevirt.io/v1/namespaces/{namespace}/virtualmachines/{name}/start +``` +Mirrors the existing `StopVm` method. + +#### `RestartVmAndWaitRunning(namespace, name string, timeout time.Duration) error` + +Stops the VM, waits for Stopped status, starts it, and waits for Running status. + +#### `WaitForCBTEnabled(namespace, name string, timeout time.Duration) error` + +Polls the VM's `status.changedBlockTracking.state` via the dynamic client until it equals `"Enabled"` or times out. + +### 3. DPA configuration in `virt_backup_restore_suite_test.go` + +In `BeforeAll`, add `DefaultPluginKubeVirtDataMover` to `dpaCR.VeleroDefaultPlugins` alongside the existing `DefaultPluginKubeVirt`. This causes the OADP operator to: +- Add the kubevirt-datamover-plugin as a Velero init container +- Deploy the kubevirt-datamover-controller Deployment + +### 4. New test entry in `virt_backup_restore_suite_test.go` + +A new `ginkgo.Entry` in the existing `DescribeTable` with label `"virt"`: + +**"no-application kubevirt-datamover backup, CirrOS VM with CBT"** + +Uses a modified `runVmBackupAndRestore` flow or a dedicated run function that: + +1. Creates DPA (via `prepareBackupAndRestore`) +2. Creates namespace, installs the CBT CirrOS VM template +3. Waits for VM Running +4. Calls `v.EnableCBTFeatureGate()` and `v.EnableCBTLabelSelector()` (idempotent, can also be in BeforeAll) +5. Restarts the VM (`v.RestartVmAndWaitRunning`) +6. Waits for `status.ChangedBlockTracking.State == Enabled` (`v.WaitForCBTEnabled`) +7. Triggers Velero backup (via existing `runBackup` with `CSIDataMover` type for `SnapshotMoveData=true`) +8. **Post-backup verification**: Checks that a `VirtualMachineBackupTracker` CR (`backup.kubevirt.io/v1alpha1`) was created in the VM's namespace. This is the definitive signal that the kubevirt-datamover-controller received and started processing the DataUpload — it creates the VMBT during the Accepted phase before creating the VMB for the actual qcow2 backup. +9. Deletes VM and namespace +10. Runs restore +11. **Post-restore verification**: VM comes back running (restore path is best-effort since the kubevirt-datamover-controller doesn't implement DataDownload reconciliation yet) + +### 5. Verification helper + +**`verifyVMBackupTrackerExists(dynamicClient, vmNamespace string)`** + +Uses the dynamic client to list `VirtualMachineBackupTracker` resources (`backup.kubevirt.io/v1alpha1`) in the VM namespace and asserts at least one exists. This proves the full chain worked: BIA plugin created a DataUpload with `dataMover: kubevirt`, and the kubevirt-datamover-controller reconciled it and created the VMBT. + +## Files Changed + +| File | Type | Description | +|------|------|-------------| +| `tests/e2e/sample-applications/virtual-machines/cirros-test/cirros-test-cbt.yaml` | New | CirrOS VM template with `changedBlockTracking: "true"` label | +| `tests/e2e/lib/virt_helpers.go` | Modified | Add `EnableCBTFeatureGate`, `EnableCBTLabelSelector`, `StartVm`, `RestartVmAndWaitRunning`, `WaitForCBTEnabled` | +| `tests/e2e/virt_backup_restore_suite_test.go` | Modified | Add `DefaultPluginKubeVirtDataMover` to plugins, add CBT test entry, add VMBT verification | + +## Test Labels and Execution + +The test entry uses the `"virt"` label (same as existing VM tests), gated by `TEST_VIRT=true`. If the HCO version doesn't support the `incrementalBackup` feature gate or CBT, the enablement steps will fail with a clear error. + +## Volume Policy: `skip` Action Type + +The kubevirt-datamover-plugin uses Velero's volume policy mechanism to determine which PVCs it should handle. Specifically, PVCs that have the `skip` action type in the volume policy are eligible for the kubevirt datamover path. The `skip` action prevents Velero from performing CSI snapshots on these PVCs, allowing the kubevirt-datamover-plugin's `BackupItemActionV2` to create a `DataUpload` CR with `DataMover: "kubevirt"` instead. + +> **Future**: Once upstream Velero merges the `custom` action type ([velero-io/velero#9678](https://github.com/velero-io/velero/pull/9678)), the kubevirt-datamover-plugin will be updated to check for `custom` with kubevirt-specific parameters (see [kubevirt-datamover-plugin#4](https://github.com/migtools/kubevirt-datamover-plugin/issues/4)). + +The E2E test creates a volume policy ConfigMap in the velero namespace with the `skip` action type: + +```yaml +version: v1 +volumePolicies: + - conditions: + pvcLabels: + changedBlockTracking: "true" + action: + type: skip +``` + +This ConfigMap is referenced via `Spec.ResourcePolicy` on the Backup CR. When Velero evaluates volume policies for PVCs with the `changedBlockTracking: "true"` label, it matches the `skip` action and returns `shouldSnapshot=false`, which the kubevirt-datamover-plugin interprets as eligibility for the kubevirt datamover path. + +The helpers `EnsureKubevirtVolumePolicy` and `CreateBackupWithVolumePolicy` in `tests/e2e/lib/backup.go` manage this lifecycle. + +## Out of Scope + +- Restore via kubevirt-datamover (DataDownload controller not implemented) +- Raw upstream KubeVirt daily build installation diff --git a/docs/developer/testing/TESTING.md b/docs/developer/testing/TESTING.md index 7baa171aa6..076096de03 100644 --- a/docs/developer/testing/TESTING.md +++ b/docs/developer/testing/TESTING.md @@ -21,7 +21,9 @@ To get started, you need to provide the following **required** environment varia | `BSL_REGION` | The region of backupLocations | `us-east-1` | false | | `OADP_TEST_NAMESPACE` | The namespace where OADP operator is installed | `openshift-adp` | false | | `OPENSHIFT_CI` | Disable colored output from tests suite run | `true` | false | -| `TEST_VIRT` | Exclusively run Virtual Machine backup/restore testing | `false` | false | +| `TEST_VIRT` | Exclusively run VM backup/restore testing using community HCO from custom CatalogSource (mutually exclusive with TEST_VIRT_GA) | `false` | false | +| `TEST_VIRT_GA` | Exclusively run Virtual Machine backup/restore testing (OpenShift Virtualization from redhat-operators) | `false` | false | +| `HCO_INDEX_TAG` | HCO index image tag for the community CatalogSource (used with TEST_VIRT) | `1.18.0` | false | | `TEST_HCP` | Exclusively run Hypershift backup/restore testing | `false` | false | | `TEST_UPGRADE` | Exclusively run upgrade tests. Need to first run `make catalog-test-upgrade`, if testing non production operator | `false` | false | | `TEST_CLI` | Exclusively run CLI-based backup/restore testing | `false` | false | diff --git a/go.mod b/go.mod index 8796b2f0a4..cc85f61875 100644 --- a/go.mod +++ b/go.mod @@ -43,6 +43,7 @@ require ( golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 google.golang.org/api v0.256.0 k8s.io/klog/v2 v2.130.1 + sigs.k8s.io/yaml v1.4.0 ) require ( @@ -199,7 +200,6 @@ require ( sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect sigs.k8s.io/randfill v1.0.0 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect - sigs.k8s.io/yaml v1.4.0 // indirect ) replace github.com/vmware-tanzu/velero => github.com/openshift/velero v0.10.2-0.20260413161955-ea34d4d90057 diff --git a/tests/e2e/e2e_suite_test.go b/tests/e2e/e2e_suite_test.go index 5ca5c40946..8fe999fff2 100644 --- a/tests/e2e/e2e_suite_test.go +++ b/tests/e2e/e2e_suite_test.go @@ -46,6 +46,8 @@ var ( kvmEmulation bool useUpstreamHco bool + useCommunityHco bool + hcoIndexTag string skipMustGather bool hcBackupRestoreMode string hcName string @@ -65,6 +67,8 @@ func init() { flag.Int64Var(&flakeAttempts, "flakeAttempts", 3, "Customize the number of flake retries (3)") flag.BoolVar(&kvmEmulation, "kvm_emulation", true, "Enable or disable KVM emulation for virtualization testing") flag.BoolVar(&useUpstreamHco, "hco_upstream", false, "Force use of upstream virtualization operator") + flag.BoolVar(&useCommunityHco, "hco_community", false, "Install community HCO from custom CatalogSource (mutually exclusive with -hco_upstream)") + flag.StringVar(&hcoIndexTag, "hco_index_tag", "1.17.1", "HCO index image tag for community CatalogSource (used with -hco_community)") flag.BoolVar(&skipMustGather, "skipMustGather", false, "avoid errors with local execution and cluster architecture") flag.StringVar(&hcBackupRestoreMode, "hc_backup_restore_mode", string(HCModeCreate), "Type of HC test to run") flag.StringVar(&hcName, "hc_name", "", "Name of the HostedCluster to use for HCP tests") @@ -119,6 +123,16 @@ func init() { log.Println("Error parsing HCO_UPSTREAM, it will be disabled by default: ", err) } } + if envValue := os.Getenv("TEST_VIRT"); envValue != "" { + if parsedValue, err := strconv.ParseBool(envValue); err == nil { + useCommunityHco = parsedValue + } else { + log.Println("Error parsing TEST_VIRT, it will be disabled by default: ", err) + } + } + if os.Getenv("HCO_INDEX_TAG") != "" { + hcoIndexTag = os.Getenv("HCO_INDEX_TAG") + } if envValue := os.Getenv("SKIP_MUST_GATHER"); envValue != "" { if parsedValue, err := strconv.ParseBool(envValue); err == nil { skipMustGather = parsedValue @@ -143,6 +157,15 @@ func init() { func TestOADPE2E(t *testing.T) { flag.Parse() + if os.Getenv("OPENSHIFT_CI") != "true" { + log.Println("OPENSHIFT_CI is not set to true, skipping must-gather") + skipMustGather = true + } + + if useUpstreamHco && useCommunityHco { + t.Fatal("Cannot use both -hco_upstream and -hco_community at the same time") + } + var err error kubeConfig = config.GetConfigOrDie() diff --git a/tests/e2e/lib/backup.go b/tests/e2e/lib/backup.go index d1f6221405..c76b149379 100755 --- a/tests/e2e/lib/backup.go +++ b/tests/e2e/lib/backup.go @@ -12,10 +12,15 @@ import ( "github.com/vmware-tanzu/velero/pkg/cmd/util/downloadrequest" "github.com/vmware-tanzu/velero/pkg/cmd/util/output" "github.com/vmware-tanzu/velero/pkg/label" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" ) func CreateBackupForNamespaces(ocClient client.Client, veleroNamespace, backupName string, namespaces []string, defaultVolumesToFsBackup bool, snapshotMoveData bool) error { @@ -50,6 +55,83 @@ func CreateCustomBackupForNamespaces(ocClient client.Client, veleroNamespace, ba return ocClient.Create(context.Background(), &backup) } +const kubevirtVolumePolicyName = "kubevirt-volume-policy" + +const kubevirtVolumePolicyData = `version: v1 +volumePolicies: + - conditions: {} + action: + type: skip +` + +// EnsureKubevirtVolumePolicy creates (or updates) the volume policy ConfigMap +// that tells Velero to skip CSI snapshots for CBT-labeled PVCs, allowing the +// kubevirt-datamover-plugin BackupItemActionV2 to handle them instead. +func EnsureKubevirtVolumePolicy(ocClient client.Client, namespace string) error { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: kubevirtVolumePolicyName, + Namespace: namespace, + }, + Data: map[string]string{ + "policy.yaml": kubevirtVolumePolicyData, + }, + } + err := ocClient.Create(context.Background(), cm) + if err != nil { + if apierrors.IsAlreadyExists(err) { + log.Printf("Volume policy ConfigMap %s already exists, updating", kubevirtVolumePolicyName) + existing := &corev1.ConfigMap{} + if getErr := ocClient.Get(context.Background(), client.ObjectKeyFromObject(cm), existing); getErr != nil { + return fmt.Errorf("failed to get existing volume policy ConfigMap: %w", getErr) + } + existing.Data = cm.Data + return ocClient.Update(context.Background(), existing) + } + return fmt.Errorf("failed to create volume policy ConfigMap: %w", err) + } + log.Printf("Created kubevirt volume policy ConfigMap %s/%s", namespace, kubevirtVolumePolicyName) + return nil +} + +// DeleteKubevirtVolumePolicy removes the volume policy ConfigMap. +func DeleteKubevirtVolumePolicy(ocClient client.Client, namespace string) error { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: kubevirtVolumePolicyName, + Namespace: namespace, + }, + } + err := ocClient.Delete(context.Background(), cm) + if err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("failed to delete volume policy ConfigMap: %w", err) + } + return nil +} + +// CreateBackupWithVolumePolicy creates a backup that references the kubevirt +// volume policy ConfigMap via Spec.ResourcePolicy. +func CreateBackupWithVolumePolicy(ocClient client.Client, veleroNamespace, backupName string, namespaces []string, snapshotMoveData bool) error { + backup := velero.Backup{ + ObjectMeta: metav1.ObjectMeta{ + Name: backupName, + Namespace: veleroNamespace, + }, + Spec: velero.BackupSpec{ + IncludedNamespaces: namespaces, + DefaultVolumesToFsBackup: boolPtr(false), + SnapshotMoveData: &snapshotMoveData, + ResourcePolicy: &corev1.TypedLocalObjectReference{ + Kind: "ConfigMap", + Name: kubevirtVolumePolicyName, + }, + }, + } + return ocClient.Create(context.Background(), &backup) +} + +func boolPtr(b bool) *bool { return &b } + func GetBackup(c client.Client, namespace string, name string) (*velero.Backup, error) { backup := velero.Backup{} err := c.Get(context.Background(), client.ObjectKey{ @@ -98,6 +180,62 @@ func IsBackupDone(ocClient client.Client, veleroNamespace, name string) wait.Con } } +var kubevirtDMBackupGvr = schema.GroupVersionResource{ + Group: "backup.kubevirt.io", + Resource: "virtualmachinebackups", + Version: "v1alpha1", +} + +var kubevirtDMBackupTrackerGvr = schema.GroupVersionResource{ + Group: "backup.kubevirt.io", + Resource: "virtualmachinebackuptrackers", + Version: "v1alpha1", +} + +// logKubevirtDMResources lists VirtualMachineBackup and VirtualMachineBackupTracker +// CRs across all namespaces and logs their full YAML for debugging. +func logKubevirtDMResources(dynClient dynamic.Interface) { + for _, gvr := range []schema.GroupVersionResource{kubevirtDMBackupGvr, kubevirtDMBackupTrackerGvr} { + list, err := dynClient.Resource(gvr).Namespace("").List(context.Background(), metav1.ListOptions{}) + if err != nil { + log.Printf("unable to list %s: %v", gvr.Resource, err) + continue + } + if len(list.Items) == 0 { + log.Printf("no %s resources found", gvr.Resource) + continue + } + for i := range list.Items { + item := &list.Items[i] + y, err := yaml.Marshal(item.Object) + if err != nil { + log.Printf("failed to marshal %s/%s to YAML: %v", item.GetNamespace(), item.GetName(), err) + continue + } + log.Printf("--- %s %s/%s ---\n%s", gvr.Resource, item.GetNamespace(), item.GetName(), string(y)) + } + } +} + +// IsKubevirtDMBackupDone polls the Velero backup status and, on each iteration, +// logs the YAML of any VirtualMachineBackup or VirtualMachineBackupTracker CRs +// that the kubevirt-datamover-controller has created. +func IsKubevirtDMBackupDone(ocClient client.Client, dynClient dynamic.Interface, veleroNamespace, name string) wait.ConditionFunc { + return func() (bool, error) { + backup, err := GetBackup(ocClient, veleroNamespace, name) + if err != nil { + return false, err + } + if len(backup.Status.Phase) > 0 { + log.Printf("backup phase: %s", backup.Status.Phase) + } + + logKubevirtDMResources(dynClient) + + return !IsBackupPhaseNotDone(string(backup.Status.Phase)), nil + } +} + func IsBackupCompletedSuccessfully(c *kubernetes.Clientset, ocClient client.Client, namespace string, name string) (bool, error) { backup, err := GetBackup(ocClient, namespace, name) if err != nil { diff --git a/tests/e2e/lib/virt_helpers.go b/tests/e2e/lib/virt_helpers.go index 911bb1af85..d532ccc16a 100644 --- a/tests/e2e/lib/virt_helpers.go +++ b/tests/e2e/lib/virt_helpers.go @@ -2,6 +2,7 @@ package lib import ( "context" + "encoding/json" "errors" "fmt" "log" @@ -24,10 +25,52 @@ import ( const ( emulationAnnotation = "kubevirt.kubevirt.io/jsonpatch" - useEmulation = `[{"op": "add", "path": "/spec/configuration/developerConfiguration", "value": {"useEmulation": true}}]` + emulationPatchPath = "/spec/configuration/developerConfiguration" stopVmPath = "/apis/subresources.kubevirt.io/v1/namespaces/%s/virtualmachines/%s/stop" + startVmPath = "/apis/subresources.kubevirt.io/v1/namespaces/%s/virtualmachines/%s/start" + + cbtJsonPatchPath = "/spec/configuration/changedBlockTrackingLabelSelectors" ) +var emulationPatch = map[string]interface{}{ + "op": "add", + "path": emulationPatchPath, + "value": map[string]interface{}{"useEmulation": true}, +} + +func parseJsonPatchAnnotation(raw string) ([]interface{}, error) { + if raw == "" { + return nil, nil + } + var patches []interface{} + if err := json.Unmarshal([]byte(raw), &patches); err != nil { + return nil, fmt.Errorf("failed to parse jsonpatch annotation: %w", err) + } + return patches, nil +} + +func patchArrayContainsPath(patches []interface{}, targetPath string) bool { + for _, p := range patches { + m, ok := p.(map[string]interface{}) + if ok && m["path"] == targetPath { + return true + } + } + return false +} + +func setPatchInArray(patches []interface{}, patch map[string]interface{}) []interface{} { + targetPath := patch["path"] + for i, p := range patches { + m, ok := p.(map[string]interface{}) + if ok && m["path"] == targetPath { + patches[i] = patch + return patches + } + } + return append(patches, patch) +} + var packageManifestsGvr = schema.GroupVersionResource{ Group: "packages.operators.coreos.com", Resource: "packagemanifests", @@ -52,41 +95,227 @@ var csvGvr = schema.GroupVersionResource{ Version: "v1alpha1", } +var virtualMachineInstanceGvr = schema.GroupVersionResource{ + Group: "kubevirt.io", + Resource: "virtualmachineinstances", + Version: "v1", +} + +var virtualMachineBackupTrackerGvr = schema.GroupVersionResource{ + Group: "backup.kubevirt.io", + Resource: "virtualmachinebackuptrackers", + Version: "v1alpha1", +} + +var kubevirtCrGvr = schema.GroupVersionResource{ + Group: "kubevirt.io", + Resource: "kubevirts", + Version: "v1", +} + +var catalogSourceGvr = schema.GroupVersionResource{ + Group: "operators.coreos.com", + Resource: "catalogsources", + Version: "v1alpha1", +} + +const ( + communityHcoCatalogName = "kubevirt-community-catalog" + communityHcoIndexImage = "quay.io/kubevirt/hyperconverged-cluster-index" +) + type VirtOperator struct { - Client client.Client - Clientset *kubernetes.Clientset - Dynamic dynamic.Interface - Namespace string - Csv string - Version *version.Version - Upstream bool + Client client.Client + Clientset *kubernetes.Clientset + Dynamic dynamic.Interface + Namespace string + Csv string + Version *version.Version + Upstream bool + CommunityIndex string // HCO index image tag (e.g. "1.17.1"); empty means no custom catalog +} + +// communityChannelFromTag derives the OLM subscription channel name from an HCO +// index tag, e.g. "1.18.0" → "stable-v1.18", "1.17.1" → "stable-v1.17". +func communityChannelFromTag(indexTag string) string { + parts := strings.SplitN(indexTag, ".", 3) + if len(parts) >= 2 { + return "stable-v" + parts[0] + "." + parts[1] + } + return "stable-v" + indexTag +} + +// EnsureCommunityHcoCatalog creates a CatalogSource in openshift-marketplace +// pointing to the community HCO index image with the given tag. It then waits +// for the corresponding PackageManifest to become available, which indicates +// the catalog's grpc pod is serving content. +func EnsureCommunityHcoCatalog(dynamicClient dynamic.Interface, indexTag string, timeout time.Duration) error { + catalogSource := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "operators.coreos.com/v1alpha1", + "kind": "CatalogSource", + "metadata": map[string]interface{}{ + "name": communityHcoCatalogName, + "namespace": "openshift-marketplace", + }, + "spec": map[string]interface{}{ + "sourceType": "grpc", + "image": communityHcoIndexImage + ":" + indexTag, + "displayName": "KubeVirt Community HCO", + "publisher": "KubeVirt", + }, + }, + } + + existing, err := dynamicClient.Resource(catalogSourceGvr).Namespace("openshift-marketplace").Get(context.Background(), communityHcoCatalogName, metav1.GetOptions{}) + if err == nil { + existingImage, _, _ := unstructured.NestedString(existing.UnstructuredContent(), "spec", "image") + expectedImage := communityHcoIndexImage + ":" + indexTag + if existingImage != expectedImage { + log.Printf("CatalogSource %s exists with stale image %s, updating to %s", communityHcoCatalogName, existingImage, expectedImage) + if err := unstructured.SetNestedField(existing.UnstructuredContent(), expectedImage, "spec", "image"); err != nil { + return fmt.Errorf("failed to set CatalogSource image: %w", err) + } + _, err = dynamicClient.Resource(catalogSourceGvr).Namespace("openshift-marketplace").Update(context.Background(), existing, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update CatalogSource %s: %w", communityHcoCatalogName, err) + } + } else { + log.Printf("CatalogSource %s already exists with correct image %s", communityHcoCatalogName, existingImage) + } + } else { + log.Printf("Creating CatalogSource %s with image %s:%s", communityHcoCatalogName, communityHcoIndexImage, indexTag) + _, err = dynamicClient.Resource(catalogSourceGvr).Namespace("openshift-marketplace").Create(context.Background(), catalogSource, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("failed to create CatalogSource %s: %w", communityHcoCatalogName, err) + } + } + + // Wait for the packagemanifest to include a channel from the community catalog. + // The community-kubevirt-hyperconverged manifest may already exist from the + // community-operators catalog (with only "stable","1.10.7","1.11.0"), so we + // must wait until the new catalog's channels (e.g. "stable-v1.17") appear. + log.Printf("Waiting for community-kubevirt-hyperconverged PackageManifest to appear") + err = wait.PollUntilContextTimeout(context.Background(), 5*time.Second, timeout, true, func(ctx context.Context) (bool, error) { + manifest, getErr := dynamicClient.Resource(packageManifestsGvr).Namespace("default").Get(context.Background(), "community-kubevirt-hyperconverged", metav1.GetOptions{}) + if getErr != nil { + log.Printf("PackageManifest not yet available: %v", getErr) + return false, nil + } + channels, _, _ := unstructured.NestedSlice(manifest.UnstructuredContent(), "status", "channels") + for _, ch := range channels { + chMap, ok := ch.(map[string]interface{}) + if !ok { + continue + } + name, _, _ := unstructured.NestedString(chMap, "name") + if strings.HasPrefix(name, "stable-v") { + log.Printf("PackageManifest has community channel: %s", name) + return true, nil + } + } + log.Printf("PackageManifest exists but community stable-v* channel not yet populated, retrying...") + return false, nil + }) + if err != nil { + return fmt.Errorf("timed out waiting for PackageManifest from CatalogSource %s: %w", communityHcoCatalogName, err) + } + log.Printf("CatalogSource %s is ready", communityHcoCatalogName) + return nil +} + +// RemoveCommunityHcoCatalog removes the custom community HCO CatalogSource. +func RemoveCommunityHcoCatalog(dynamicClient dynamic.Interface, timeout time.Duration) error { + _, err := dynamicClient.Resource(catalogSourceGvr).Namespace("openshift-marketplace").Get(context.Background(), communityHcoCatalogName, metav1.GetOptions{}) + if err != nil { + log.Printf("CatalogSource %s already removed, no action required", communityHcoCatalogName) + return nil + } + + log.Printf("Deleting CatalogSource %s", communityHcoCatalogName) + err = dynamicClient.Resource(catalogSourceGvr).Namespace("openshift-marketplace").Delete(context.Background(), communityHcoCatalogName, metav1.DeleteOptions{}) + if err != nil { + return fmt.Errorf("failed to delete CatalogSource %s: %w", communityHcoCatalogName, err) + } + + err = wait.PollUntilContextTimeout(context.Background(), 5*time.Second, timeout, true, func(ctx context.Context) (bool, error) { + _, getErr := dynamicClient.Resource(catalogSourceGvr).Namespace("openshift-marketplace").Get(context.Background(), communityHcoCatalogName, metav1.GetOptions{}) + return getErr != nil, nil + }) + if err != nil { + return fmt.Errorf("timed out waiting to delete CatalogSource %s: %w", communityHcoCatalogName, err) + } + log.Printf("CatalogSource %s removed", communityHcoCatalogName) + return nil } -// GetVirtOperator fills out a new VirtOperator -func GetVirtOperator(c client.Client, clientset *kubernetes.Clientset, dynamicClient dynamic.Interface, upstream bool) (*VirtOperator, error) { +// GetVirtOperator fills out a new VirtOperator. Set communityIndexTag to a +// non-empty string (e.g. "1.17.1") to use a custom CatalogSource for the +// community HCO operator. The CatalogSource must already exist before calling +// this function (see EnsureCommunityHcoCatalog). +func GetVirtOperator(c client.Client, clientset *kubernetes.Clientset, dynamicClient dynamic.Interface, upstream bool, communityIndexTag string) (*VirtOperator, error) { namespace := "openshift-cnv" manifest := "kubevirt-hyperconverged" - if upstream { + channel := "stable" + if communityIndexTag != "" { + namespace = "kubevirt-hyperconverged" + manifest = "community-kubevirt-hyperconverged" + channel = communityChannelFromTag(communityIndexTag) + } else if upstream { namespace = "kubevirt-hyperconverged" manifest = "community-kubevirt-hyperconverged" } - csv, operatorVersion, err := getCsvFromPackageManifest(dynamicClient, manifest) - if err != nil { - log.Printf("Failed to get CSV from package manifest") - return nil, err + v := &VirtOperator{ + Client: c, + Clientset: clientset, + Dynamic: dynamicClient, + Namespace: namespace, + Upstream: upstream || communityIndexTag != "", + CommunityIndex: communityIndexTag, } - v := &VirtOperator{ - Client: c, - Clientset: clientset, - Dynamic: dynamicClient, - Namespace: namespace, - Csv: csv, - Version: operatorVersion, - Upstream: upstream, + // If virt is already installed, read the CSV directly from the existing + // subscription instead of hitting the PackageManifest (which can be + // inconsistent across OLM PackageServer replicas). + if v.IsVirtInstalled() { + log.Printf("Virt already installed, reading CSV from existing subscription") + sub, subErr := v.getOperatorSubscription() + if subErr == nil && sub.Status.InstalledCSV != "" { + log.Printf("Found installed CSV: %s", sub.Status.InstalledCSV) + v.Csv = sub.Status.InstalledCSV + // Parse version from CSV name, e.g. "kubevirt-hyperconverged-operator.v1.17.1" -> "1.17.1" + if parts := strings.SplitN(v.Csv, ".v", 2); len(parts) == 2 { + if operatorVersion, parseErr := version.ParseGeneric(parts[1]); parseErr == nil { + v.Version = operatorVersion + } + } + return v, nil + } + log.Printf("Could not read CSV from subscription (%v), falling back to PackageManifest", subErr) + } + + // Virt not yet installed (or subscription unreadable): look up CSV from + // the PackageManifest. Retry to tolerate OLM PackageServer replica skew. + var csv string + var operatorVersion *version.Version + err := wait.PollUntilContextTimeout(context.Background(), 5*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { + var getErr error + csv, operatorVersion, getErr = getCsvFromPackageManifest(dynamicClient, manifest, channel) + if getErr != nil { + log.Printf("PackageManifest lookup failed, retrying: %v", getErr) + return false, nil + } + return true, nil + }) + if err != nil { + log.Printf("Failed to get CSV from package manifest after retries") + return nil, fmt.Errorf("failed to get CSV from package manifest for channel %s: %w", channel, err) } + v.Csv = csv + v.Version = operatorVersion + return v, nil } @@ -113,14 +342,14 @@ func (v *VirtOperator) makeOperatorGroup() *operatorsv1.OperatorGroup { } } -// getCsvFromPackageManifest returns the current CSV from the first channel +// getCsvFromPackageManifest returns the current CSV from the specified channel // in the given PackageManifest name. Uses the dynamic client because adding // the real PackageManifest API from OLM was actually more work than this. -// Takes the name of the package manifest, and returns the currentCSV string, -// like: kubevirt-hyperconverged-operator.v4.12.8 +// Takes the name of the package manifest and the channel name, and returns +// the currentCSV string, like: kubevirt-hyperconverged-operator.v4.12.8 // Also returns just the version (e.g. 4.12.8 from above) as a comparable // Version type, so it is easy to check against the current cluster version. -func getCsvFromPackageManifest(dynamicClient dynamic.Interface, name string) (string, *version.Version, error) { +func getCsvFromPackageManifest(dynamicClient dynamic.Interface, name string, channel string) (string, *version.Version, error) { log.Println("Getting packagemanifest...") unstructuredManifest, err := dynamicClient.Resource(packageManifestsGvr).Namespace("default").Get(context.Background(), name, metav1.GetOptions{}) if err != nil { @@ -142,8 +371,8 @@ func getCsvFromPackageManifest(dynamicClient dynamic.Interface, name string) (st } var stableChannel map[string]interface{} - for _, channel := range channels { - currentChannel, ok := channel.(map[string]interface{}) + for _, ch := range channels { + currentChannel, ok := ch.(map[string]interface{}) if !ok { continue } @@ -152,13 +381,13 @@ func getCsvFromPackageManifest(dynamicClient dynamic.Interface, name string) (st continue } log.Printf("Found channel: %s", channelName) - if channelName == "stable" { + if channelName == channel { stableChannel = currentChannel } } if len(stableChannel) == 0 { - return "", nil, errors.New("failed to get stable channel from " + name + " packagemanifest") + return "", nil, errors.New("failed to get channel " + channel + " from " + name + " packagemanifest") } csv, ok, err := unstructured.NestedString(stableChannel, "currentCSV") @@ -248,7 +477,9 @@ func (v *VirtOperator) checkHco() bool { return health == "healthy" } -// Check if KVM emulation is enabled. +// Check if KVM emulation is enabled by looking for the emulation patch inside +// the jsonpatch annotation array. This handles annotations that contain +// additional patches (e.g. CBT label selectors). func (v *VirtOperator) checkEmulation() bool { hco, err := v.Dynamic.Resource(hyperConvergedGvr).Namespace(v.Namespace).Get(context.Background(), "kubevirt-hyperconverged", metav1.GetOptions{}) if err != nil { @@ -258,20 +489,23 @@ func (v *VirtOperator) checkEmulation() bool { return false } - // Look for JSON patcher annotation that enables emulation. - patcher, ok, err := unstructured.NestedString(hco.UnstructuredContent(), "metadata", "annotations", emulationAnnotation) + raw, ok, err := unstructured.NestedString(hco.UnstructuredContent(), "metadata", "annotations", emulationAnnotation) if err != nil { log.Printf("Failed to get KVM emulation annotation from HCO: %v", err) return false } - if !ok { + if !ok || raw == "" { log.Printf("No KVM emulation annotation (%s) listed on HCO!", emulationAnnotation) + return false } - if strings.Compare(patcher, useEmulation) == 0 { - return true + + patches, err := parseJsonPatchAnnotation(raw) + if err != nil { + log.Printf("Failed to parse KVM emulation annotation: %v", err) + return false } - return false + return patchArrayContainsPath(patches, emulationPatchPath) } // Creates the target namespace, likely openshift-cnv or kubevirt-hyperconverged, @@ -308,7 +542,16 @@ func (v *VirtOperator) installSubscription() error { StartingCSV: v.Csv, InstallPlanApproval: operatorsv1alpha1.ApprovalAutomatic, } - if v.Upstream { + if v.CommunityIndex != "" { + spec = &operatorsv1alpha1.SubscriptionSpec{ + CatalogSource: communityHcoCatalogName, + CatalogSourceNamespace: "openshift-marketplace", + Package: "community-kubevirt-hyperconverged", + Channel: communityChannelFromTag(v.CommunityIndex), + StartingCSV: v.Csv, + InstallPlanApproval: operatorsv1alpha1.ApprovalAutomatic, + } + } else if v.Upstream { spec = &operatorsv1alpha1.SubscriptionSpec{ CatalogSource: "community-operators", CatalogSourceNamespace: "openshift-marketplace", @@ -373,18 +616,32 @@ func (v *VirtOperator) configureEmulation() error { if !ok { annotations = make(map[string]interface{}) } - annotations[emulationAnnotation] = useEmulation - if err := unstructured.SetNestedMap(hco.UnstructuredContent(), annotations, "metadata", "annotations"); err != nil { - return err + var patches []interface{} + if existing, isSet := annotations[emulationAnnotation]; isSet { + existingStr, isStr := existing.(string) + if isStr && existingStr != "" { + patches, err = parseJsonPatchAnnotation(existingStr) + if err != nil { + return err + } + } } - _, err = v.Dynamic.Resource(hyperConvergedGvr).Namespace(v.Namespace).Update(context.Background(), hco, metav1.UpdateOptions{}) + patches = setPatchInArray(patches, emulationPatch) + + patchBytes, err := json.Marshal(patches) if err != nil { + return fmt.Errorf("failed to marshal jsonpatch: %w", err) + } + annotations[emulationAnnotation] = string(patchBytes) + + if err := unstructured.SetNestedMap(hco.UnstructuredContent(), annotations, "metadata", "annotations"); err != nil { return err } - return nil + _, err = v.Dynamic.Resource(hyperConvergedGvr).Namespace(v.Namespace).Update(context.Background(), hco, metav1.UpdateOptions{}) + return err } // Creates target namespace if needed, and waits for it to exist @@ -640,6 +897,31 @@ func (v *VirtOperator) GetVmStatus(namespace, name string) (string, error) { return GetVmStatus(v.Dynamic, namespace, name) } +func (v *VirtOperator) WaitForVMReady(namespace, name string, timeout time.Duration) error { + log.Printf("Waiting for VMI %s/%s Ready condition", namespace, name) + return wait.PollUntilContextTimeout(context.Background(), 10*time.Second, timeout, true, func(ctx context.Context) (bool, error) { + vmi, err := v.Dynamic.Resource(virtualMachineInstanceGvr).Namespace(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return false, nil + } + conditions, found, err := unstructured.NestedSlice(vmi.UnstructuredContent(), "status", "conditions") + if err != nil || !found { + return false, nil + } + for _, c := range conditions { + cond, ok := c.(map[string]interface{}) + if !ok { + continue + } + if cond["type"] == "Ready" && cond["status"] == "True" { + log.Printf("VMI %s/%s is Ready", namespace, name) + return true, nil + } + } + return false, nil + }) +} + // StopVm stops a VM with a REST call to "stop". This is needed because a // poweroff from inside the VM results in KubeVirt restarting it. // From the KubeVirt API reference: @@ -800,7 +1082,15 @@ func (v *VirtOperator) EnsureVirtRemoval() error { if err := v.ensureNamespaceRemoved(v.Namespace, 3*time.Minute); err != nil { return err } - log.Printf("Deleting namespace %s", v.Namespace) + log.Printf("Deleted namespace %s", v.Namespace) + + if v.CommunityIndex != "" { + log.Printf("Removing community HCO CatalogSource") + if err := RemoveCommunityHcoCatalog(v.Dynamic, 1*time.Minute); err != nil { + return err + } + log.Printf("Removed CatalogSource") + } return nil } @@ -810,3 +1100,262 @@ func (v *VirtOperator) RemoveVm(namespace, name string, timeout time.Duration) e log.Printf("Removing virtual machine %s/%s", namespace, name) return v.ensureVmRemoval(namespace, name, timeout) } + +// StartVm starts a VM with a REST call to "start". +func (v *VirtOperator) StartVm(namespace, name string) error { + path := fmt.Sprintf(startVmPath, namespace, name) + return v.Clientset.RESTClient().Put().AbsPath(path).Do(context.Background()).Error() +} + +// RestartVmAndWaitRunning stops a VM, waits for it to stop, starts it, and +// waits for it to be running again. +func (v *VirtOperator) RestartVmAndWaitRunning(namespace, name string, timeout time.Duration) error { + log.Printf("Restarting VM %s/%s", namespace, name) + + if err := v.StopVm(namespace, name); err != nil { + return fmt.Errorf("failed to stop VM %s/%s: %w", namespace, name, err) + } + + halfTimeout := timeout / 2 + err := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, halfTimeout, true, func(ctx context.Context) (bool, error) { + status, err := v.GetVmStatus(namespace, name) + if err != nil { + return false, nil + } + return status == "Stopped", nil + }) + if err != nil { + return fmt.Errorf("timed out waiting for VM %s/%s to stop: %w", namespace, name, err) + } + log.Printf("VM %s/%s stopped, starting again", namespace, name) + + if err := v.StartVm(namespace, name); err != nil { + return fmt.Errorf("failed to start VM %s/%s: %w", namespace, name, err) + } + + err = wait.PollUntilContextTimeout(context.Background(), 10*time.Second, halfTimeout, true, func(ctx context.Context) (bool, error) { + status, err := v.GetVmStatus(namespace, name) + if err != nil { + return false, nil + } + return status == "Running", nil + }) + if err != nil { + return fmt.Errorf("timed out waiting for VM %s/%s to start: %w", namespace, name, err) + } + log.Printf("VM %s/%s restarted successfully", namespace, name) + + return nil +} + +// RequireVEP25Support is a pre-flight check that fails immediately if the +// installed HCO version is older than 1.18 or if the backup.kubevirt.io CRDs +// (VirtualMachineBackup, VirtualMachineBackupTracker) do not exist. +// Call this after EnsureVirtInstallation to gate the test suite early. +func (v *VirtOperator) RequireVEP25Support() error { + if v.Version == nil { + return fmt.Errorf("VirtOperator has no version — cannot verify VEP-25 support") + } + minVersion, err := version.ParseSemantic("1.18.0") + if err != nil { + return fmt.Errorf("failed to parse minimum version: %w", err) + } + if !v.Version.AtLeast(minVersion) { + return fmt.Errorf("HCO version %s is too old for VEP-25 (IncrementalBackup); need >= 1.18.0 — upgrade the community HCO or set HCO_INDEX_TAG=1.18.0", v.Version) + } + log.Printf("HCO version %s satisfies VEP-25 minimum (>= 1.18.0)", v.Version) + + crdGvr := schema.GroupVersionResource{Group: "apiextensions.k8s.io", Version: "v1", Resource: "customresourcedefinitions"} + for _, crd := range []string{"virtualmachinebackups.backup.kubevirt.io", "virtualmachinebackuptrackers.backup.kubevirt.io"} { + _, err := v.Dynamic.Resource(crdGvr).Get(context.Background(), crd, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("required CRD %s not found — VEP-25 is not available on this cluster: %w", crd, err) + } + log.Printf("VEP-25 CRD present: %s", crd) + } + return nil +} + +// EnableCBTFeatureGate patches the HyperConverged CR to set +// spec.featureGates.incrementalBackup = true, then waits for the KubeVirt CR +// to reflect "IncrementalBackup" in its featureGates and for the +// backup.kubevirt.io CRDs to appear (requires KubeVirt >= 1.8 / HCO >= 1.18). +func (v *VirtOperator) EnableCBTFeatureGate(timeout time.Duration) error { + log.Printf("Enabling incrementalBackup feature gate on HCO") + + err := wait.PollUntilContextTimeout(context.Background(), 5*time.Second, timeout, true, func(ctx context.Context) (bool, error) { + hco, err := v.Dynamic.Resource(hyperConvergedGvr).Namespace(v.Namespace).Get(ctx, "kubevirt-hyperconverged", metav1.GetOptions{}) + if err != nil { + return false, fmt.Errorf("failed to get HCO: %w", err) + } + + current, _, _ := unstructured.NestedBool(hco.UnstructuredContent(), "spec", "featureGates", "incrementalBackup") + log.Printf("HCO spec.featureGates.incrementalBackup current value: %v — setting to true", current) + + if err := unstructured.SetNestedField(hco.UnstructuredContent(), true, "spec", "featureGates", "incrementalBackup"); err != nil { + return false, fmt.Errorf("failed to set incrementalBackup feature gate: %w", err) + } + + _, err = v.Dynamic.Resource(hyperConvergedGvr).Namespace(v.Namespace).Update(ctx, hco, metav1.UpdateOptions{}) + if err != nil { + if apierrors.IsConflict(err) { + log.Printf("HCO modification conflict setting incrementalBackup, retrying...") + return false, nil + } + return false, err + } + return true, nil + }) + if err != nil { + return fmt.Errorf("failed to enable CBT feature gate: %w", err) + } + log.Printf("incrementalBackup feature gate set on HCO, waiting for propagation to KubeVirt CR") + + // Wait for "IncrementalBackup" to appear in the KubeVirt CR featureGates. + err = wait.PollUntilContextTimeout(context.Background(), 5*time.Second, timeout, true, func(ctx context.Context) (bool, error) { + kvList, err := v.Dynamic.Resource(kubevirtCrGvr).Namespace(v.Namespace).List(ctx, metav1.ListOptions{}) + if err != nil || len(kvList.Items) == 0 { + log.Printf("KubeVirt CR not yet available: %v", err) + return false, nil + } + kv := &kvList.Items[0] + gates, _, _ := unstructured.NestedStringSlice(kv.UnstructuredContent(), "spec", "configuration", "developerConfiguration", "featureGates") + for _, g := range gates { + if g == "IncrementalBackup" { + log.Printf("IncrementalBackup present in KubeVirt CR featureGates") + return true, nil + } + } + log.Printf("IncrementalBackup not yet in KubeVirt CR featureGates %v, retrying...", gates) + return false, nil + }) + if err != nil { + return fmt.Errorf("timed out waiting for IncrementalBackup to propagate to KubeVirt CR: %w", err) + } + + // Verify the backup.kubevirt.io CRDs exist (VirtualMachineBackup, VirtualMachineBackupTracker). + for _, crd := range []string{"virtualmachinebackups.backup.kubevirt.io", "virtualmachinebackuptrackers.backup.kubevirt.io"} { + crdGvr := schema.GroupVersionResource{Group: "apiextensions.k8s.io", Version: "v1", Resource: "customresourcedefinitions"} + _, err := v.Dynamic.Resource(crdGvr).Get(context.Background(), crd, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("backup CRD %s not found after enabling IncrementalBackup: %w", crd, err) + } + log.Printf("CRD %s exists", crd) + } + + log.Printf("incrementalBackup feature gate fully enabled and verified") + return nil +} + +// EnableCBTLabelSelector patches the HCO's kubevirt.kubevirt.io/jsonpatch +// annotation to inject changedBlockTrackingLabelSelectors into the KubeVirt CR. +// If the annotation already contains patches (e.g. emulation), the CBT patch +// is merged into the existing array. +func (v *VirtOperator) EnableCBTLabelSelector(timeout time.Duration) error { + log.Printf("Enabling CBT label selector via HCO jsonpatch annotation") + + cbtPatch := map[string]interface{}{ + "op": "add", + "path": cbtJsonPatchPath, + "value": map[string]interface{}{ + "virtualMachineLabelSelector": map[string]interface{}{ + "matchLabels": map[string]interface{}{ + "changedBlockTracking": "true", + }, + }, + }, + } + + err := wait.PollUntilContextTimeout(context.Background(), 5*time.Second, timeout, true, func(ctx context.Context) (bool, error) { + hco, err := v.Dynamic.Resource(hyperConvergedGvr).Namespace(v.Namespace).Get(ctx, "kubevirt-hyperconverged", metav1.GetOptions{}) + if err != nil { + return false, fmt.Errorf("failed to get HCO: %w", err) + } + + annotations, _, _ := unstructured.NestedMap(hco.UnstructuredContent(), "metadata", "annotations") + if annotations == nil { + annotations = make(map[string]interface{}) + } + + var patches []interface{} + if existing, ok := annotations[emulationAnnotation]; ok { + existingStr, isStr := existing.(string) + if isStr && existingStr != "" { + patches, err = parseJsonPatchAnnotation(existingStr) + if err != nil { + return false, err + } + if patchArrayContainsPath(patches, cbtJsonPatchPath) { + log.Printf("CBT label selector patch already present in annotation") + return true, nil + } + } + } + + patches = setPatchInArray(patches, cbtPatch) + patchBytes, err := json.Marshal(patches) + if err != nil { + return false, fmt.Errorf("failed to marshal jsonpatch: %w", err) + } + annotations[emulationAnnotation] = string(patchBytes) + + if err := unstructured.SetNestedMap(hco.UnstructuredContent(), annotations, "metadata", "annotations"); err != nil { + return false, err + } + + _, err = v.Dynamic.Resource(hyperConvergedGvr).Namespace(v.Namespace).Update(ctx, hco, metav1.UpdateOptions{}) + if err != nil { + if apierrors.IsConflict(err) { + log.Printf("HCO modification conflict setting CBT label selector, retrying...") + return false, nil + } + return false, err + } + return true, nil + }) + if err != nil { + return fmt.Errorf("failed to enable CBT label selector: %w", err) + } + + log.Printf("CBT label selector enabled via HCO jsonpatch annotation") + return nil +} + +// WaitForCBTEnabled polls the VM's status.changedBlockTracking.state until it +// equals "Enabled" or the timeout is reached. +func (v *VirtOperator) WaitForCBTEnabled(namespace, name string, timeout time.Duration) error { + log.Printf("Waiting for CBT to be enabled on VM %s/%s", namespace, name) + + err := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, timeout, true, func(ctx context.Context) (bool, error) { + vm, err := v.Dynamic.Resource(virtualMachineGvr).Namespace(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + log.Printf("Error getting VM %s/%s: %v", namespace, name, err) + return false, nil + } + + state, ok, err := unstructured.NestedString(vm.UnstructuredContent(), "status", "changedBlockTracking", "state") + if err != nil || !ok { + log.Printf("CBT state not yet available on VM %s/%s", namespace, name) + return false, nil + } + + log.Printf("VM %s/%s CBT state: %s", namespace, name, state) + return state == "Enabled", nil + }) + if err != nil { + return fmt.Errorf("timed out waiting for CBT to be enabled on VM %s/%s: %w", namespace, name, err) + } + + log.Printf("CBT is enabled on VM %s/%s", namespace, name) + return nil +} + +// CheckVMBackupTrackerExists checks if any VirtualMachineBackupTracker resources +// exist in the given namespace. Returns true if at least one VMBT is found. +func (v *VirtOperator) CheckVMBackupTrackerExists(namespace string) (bool, error) { + list, err := v.Dynamic.Resource(virtualMachineBackupTrackerGvr).Namespace(namespace).List(context.Background(), metav1.ListOptions{}) + if err != nil { + return false, fmt.Errorf("failed to list VirtualMachineBackupTrackers in %s: %w", namespace, err) + } + return len(list.Items) > 0, nil +} diff --git a/tests/e2e/lib/virt_storage_helpers.go b/tests/e2e/lib/virt_storage_helpers.go index 11d6b004a5..9dc2b18f5e 100644 --- a/tests/e2e/lib/virt_storage_helpers.go +++ b/tests/e2e/lib/virt_storage_helpers.go @@ -8,6 +8,7 @@ import ( "strings" "time" + corev1 "k8s.io/api/core/v1" storagev1 "k8s.io/api/storage/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -46,6 +47,11 @@ func (v *VirtOperator) CheckDataVolumeExists(namespace, name string) bool { return unstructuredDataVolume != nil } +func (v *VirtOperator) CheckDataSourceExists(namespace, name string) bool { + _, err := v.Dynamic.Resource(dataSourceGVR).Namespace(namespace).Get(context.Background(), name, metav1.GetOptions{}) + return err == nil +} + // Check the Status.Phase field of the given DataVolume, and make sure it is // marked "Succeeded". func (v *VirtOperator) checkDataVolumeReady(namespace, name string) bool { @@ -212,6 +218,71 @@ func (v *VirtOperator) CreateTargetDataSourceFromPvc(sourceNamespace, destinatio return nil } +// CreateTargetDataSourceFromSnapshot creates a DataSource in destinationNamespace pointing to a snapshot source. +func (v *VirtOperator) CreateTargetDataSourceFromSnapshot(sourceNamespace, destinationNamespace, sourceSnapshotName, destinationDataSourceName string) error { + unstructuredDataSource := unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "cdi.kubevirt.io/v1beta1", + "kind": "DataSource", + "metadata": map[string]interface{}{ + "name": destinationDataSourceName, + "namespace": destinationNamespace, + }, + "spec": map[string]interface{}{ + "source": map[string]interface{}{ + "snapshot": map[string]interface{}{ + "name": sourceSnapshotName, + "namespace": sourceNamespace, + }, + }, + }, + }, + } + + _, err := v.Dynamic.Resource(dataSourceGVR).Namespace(destinationNamespace).Create(context.Background(), &unstructuredDataSource, metav1.CreateOptions{}) + if err != nil { + if apierrors.IsAlreadyExists(err) { + return nil + } + if strings.Contains(err.Error(), "already exists") { + return nil + } + log.Printf("Error creating DataSource from snapshot: %v", err) + return err + } + + return nil +} + +// Find the given DataSource, and return the snapshot it points to (namespace, name) +func (v *VirtOperator) GetDataSourceSnapshot(ns, name string) (string, string, error) { + unstructuredDataSource, err := v.Dynamic.Resource(dataSourceGVR).Namespace(ns).Get(context.Background(), name, metav1.GetOptions{}) + if err != nil { + log.Printf("Error getting DataSource %s: %v", name, err) + return "", "", err + } + + snapshotName, ok, err := unstructured.NestedString(unstructuredDataSource.UnstructuredContent(), "status", "source", "snapshot", "name") + if err != nil { + log.Printf("Error getting snapshot from DataSource: %v", err) + return "", "", err + } + if !ok { + return "", "", errors.New("failed to get snapshot from " + name + " DataSource") + } + + snapshotNamespace, ok, err := unstructured.NestedString(unstructuredDataSource.UnstructuredContent(), "status", "source", "snapshot", "namespace") + if err != nil { + log.Printf("Error getting snapshot namespace from DataSource: %v", err) + return "", "", err + } + if !ok { + return "", "", errors.New("failed to get snapshot namespace from " + name + " DataSource") + } + + return snapshotNamespace, snapshotName, nil +} + // Find the given DataSource, and return the PVC it points to func (v *VirtOperator) GetDataSourcePvc(ns, name string) (string, string, error) { unstructuredDataSource, err := v.Dynamic.Resource(dataSourceGVR).Namespace(ns).Get(context.Background(), name, metav1.GetOptions{}) @@ -261,20 +332,63 @@ func (v *VirtOperator) GetDefaultStorageClass() (*storagev1.StorageClass, error) return nil, errors.New("no default storage class found") } +// getWorkerNodeZones returns the distinct topology zones of all worker nodes. +// This is used to constrain Immediate-mode storage classes so that EBS PVCs +// are never provisioned in AZs that have no schedulable worker nodes. +func (v *VirtOperator) getWorkerNodeZones() ([]string, error) { + nodes, err := v.Clientset.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{ + LabelSelector: "node-role.kubernetes.io/worker", + }) + if err != nil { + return nil, err + } + zoneSet := make(map[string]bool) + for _, node := range nodes.Items { + if zone, ok := node.Labels["topology.kubernetes.io/zone"]; ok { + zoneSet[zone] = true + } + } + zones := make([]string, 0, len(zoneSet)) + for zone := range zoneSet { + zones = append(zones, zone) + } + return zones, nil +} + // Check the VolumeBindingMode of the default storage class, and make an // Immediate-mode copy if it is set to WaitForFirstConsumer. +// allowedTopologies is set to the worker-node AZs so that EBS PVCs are not +// provisioned in zones with no schedulable workers (which would deadlock +// DataMover restore pods). func (v *VirtOperator) CreateImmediateModeStorageClass(name string) error { defaultStorageClass, err := v.GetDefaultStorageClass() if err != nil { return err } - immediateStorageClass := defaultStorageClass + immediateStorageClass := defaultStorageClass.DeepCopy() immediateStorageClass.VolumeBindingMode = ptr.To[storagev1.VolumeBindingMode](storagev1.VolumeBindingImmediate) immediateStorageClass.Name = name immediateStorageClass.ResourceVersion = "" immediateStorageClass.Annotations["storageclass.kubernetes.io/is-default-class"] = "false" + workerZones, err := v.getWorkerNodeZones() + if err != nil { + log.Printf("Warning: could not determine worker node zones for allowedTopologies: %v", err) + } else if len(workerZones) > 0 { + log.Printf("Restricting %s to worker zones: %v", name, workerZones) + immediateStorageClass.AllowedTopologies = []corev1.TopologySelectorTerm{ + { + MatchLabelExpressions: []corev1.TopologySelectorLabelRequirement{ + { + Key: "topology.kubernetes.io/zone", + Values: workerZones, + }, + }, + }, + } + } + _, err = v.Clientset.StorageV1().StorageClasses().Create(context.Background(), immediateStorageClass, metav1.CreateOptions{}) if apierrors.IsAlreadyExists(err) { return nil @@ -297,6 +411,9 @@ func (v *VirtOperator) CreateWaitForFirstConsumerStorageClass(name string) error wffcStorageClass.Annotations["storageclass.kubernetes.io/is-default-class"] = "false" _, err = v.Clientset.StorageV1().StorageClasses().Create(context.Background(), wffcStorageClass, metav1.CreateOptions{}) + if apierrors.IsAlreadyExists(err) { + return nil + } return err } diff --git a/tests/e2e/sample-applications/virtual-machines/cirros-test/cirros-test-cbt.yaml b/tests/e2e/sample-applications/virtual-machines/cirros-test/cirros-test-cbt.yaml new file mode 100644 index 0000000000..60e57dee6b --- /dev/null +++ b/tests/e2e/sample-applications/virtual-machines/cirros-test/cirros-test-cbt.yaml @@ -0,0 +1,55 @@ +apiVersion: v1 +kind: List +items: + - apiVersion: kubevirt.io/v1 + kind: VirtualMachine + metadata: + labels: + app: cirros-test + changedBlockTracking: "true" + name: cirros-test + namespace: cirros-test + spec: + dataVolumeTemplates: + - metadata: + annotations: + cdi.kubevirt.io/storage.deleteAfterCompletion: "false" + name: cirros-test-disk + spec: + pvc: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 150Mi + volumeMode: Block + source: + registry: + pullMethod: node + url: docker://quay.io/kubevirt/cirros-container-disk-demo + running: true + template: + metadata: + name: cirros-test + spec: + domain: + devices: + disks: + - disk: + bus: virtio + name: volume0 + interfaces: + - masquerade: {} + name: default + rng: {} + resources: + requests: + memory: 256M + networks: + - name: default + pod: {} + terminationGracePeriodSeconds: 0 + volumes: + - dataVolume: + name: cirros-test-disk + name: volume0 diff --git a/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/README.md b/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/README.md new file mode 100644 index 0000000000..8dcb571b9a --- /dev/null +++ b/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/README.md @@ -0,0 +1,205 @@ +# Kubevirt Datamover Manual Test + +This directory contains the manifests needed to manually run the kubevirt-datamover +CBT backup flow that the E2E test `"Kubevirt datamover backup with CBT"` automates. + +## Prerequisites + +- OpenShift cluster with OpenShift Virtualization (HCO >= 1.18) installed (KubeVirt >= v1.8.2) + - HCO 1.18+ and the `backup.kubevirt.io` CRDs are required for VEP-25 (IncrementalBackup / CBT) support + - KubeVirt >= v1.8.2 is required for the QEMU backup-abort fix (KubeVirt PR #16426) +- OADP operator installed +- A working BackupStorageLocation (S3 bucket with credentials) + +## Step 1: Configure HyperConverged Operator for CBT + +Two separate configurations are required on the HCO CR. + +### 1a. Enable the incrementalBackup feature gate + +Patch the HCO CR directly -- this is a first-class field: + +```bash +oc patch hyperconverged kubevirt-hyperconverged -n openshift-cnv --type merge -p ' +spec: + featureGates: + incrementalBackup: true +' +``` +**note:** this may be outdated +This enables both the `IncrementalBackup` and `UtilityVolumes` feature gates on +the KubeVirt CR automatically. + +### 1b. Enable the CBT label selector via jsonpatch annotation + +The `changedBlockTrackingLabelSelectors` field lives on the KubeVirt CR, which is +managed by HCO. To inject it without HCO overwriting it, use the jsonpatch +annotation on the HCO CR: + +```bash +oc annotate hyperconverged kubevirt-hyperconverged -n openshift-cnv --overwrite \ + kubevirt.kubevirt.io/jsonpatch='[{"op":"add","path":"/spec/configuration/changedBlockTrackingLabelSelectors","value":{"virtualMachineLabelSelector":{"matchLabels":{"changedBlockTracking":"true"}}}}]' +``` + +**If KVM emulation is also enabled**, combine both patches into one annotation: + +```bash +oc annotate hyperconverged kubevirt-hyperconverged -n openshift-cnv --overwrite \ + kubevirt.kubevirt.io/jsonpatch='[{"op":"add","path":"/spec/configuration/developerConfiguration","value":{"useEmulation":true}},{"op":"add","path":"/spec/configuration/changedBlockTrackingLabelSelectors","value":{"virtualMachineLabelSelector":{"matchLabels":{"changedBlockTracking":"true"}}}}]' +``` + +### Verify CBT is configured + +```bash +oc get kubevirt kubevirt-hyperconverged -n openshift-cnv -o jsonpath='{.spec.configuration.changedBlockTrackingLabelSelectors}' +``` + +Expected output: +```json +{"virtualMachineLabelSelector":{"matchLabels":{"changedBlockTracking":"true"}}} +``` + +## Step 2: Configure the DPA + +The DPA must include both the `kubevirt` and `kubevirt-datamover` default plugins. +The `kubevirt-datamover` plugin causes the OADP operator to deploy: +- The kubevirt-datamover-plugin as a Velero init container +- The kubevirt-datamover-controller Deployment + +Example DPA spec (adjust BSL/credentials for your environment): + +```yaml +apiVersion: oadp.openshift.io/v1alpha1 +kind: DataProtectionApplication +metadata: + name: velero-test + namespace: openshift-adp +spec: + configuration: + velero: + defaultPlugins: + - openshift + - csi + - aws + - kubevirt + - kubevirt-datamover + nodeAgent: + enable: true + uploaderType: kopia + backupLocations: + - velero: + provider: aws + default: true + objectStorage: + bucket: + prefix: velero + config: + region: + credential: + name: cloud-credentials + key: cloud +``` + +### Verify the datamover controller is running + +```bash +oc get deployment -n openshift-adp | grep datamover +oc get pods -n openshift-adp | grep datamover +``` + +## Step 3: Deploy the CirrOS VM with CBT label + +```bash +oc apply -f cirros-vm-cbt.yaml +``` + +Wait for the VM to be Running: + +```bash +oc get vm -n cirros-test cirros-test -w +``` + +## Step 4: Verify CBT is enabled on the VM + +With KubeVirt >= v1.8.2 and the feature gate + label selector configured in Step 1, +CBT is activated when the VM first boots — no manual restart cycle is required. + +If you are on an older KubeVirt version or CBT does not appear enabled after boot, +you can trigger activation with a stop/start cycle: + +```bash +virtctl stop cirros-test -n cirros-test +# Wait for Stopped... +oc wait vm cirros-test -n cirros-test --for=jsonpath='{.status.printableStatus}'=Stopped --timeout=5m + +virtctl start cirros-test -n cirros-test +# Wait for Running... +oc wait vm cirros-test -n cirros-test --for=jsonpath='{.status.printableStatus}'=Running --timeout=5m +``` + +Check that CBT is active: + +```bash +oc get vm cirros-test -n cirros-test -o jsonpath='{.status.changedBlockTracking.state}' +``` + +Expected output: `Enabled` + +## Step 5: Create the volume policy ConfigMap + +This ConfigMap tells Velero to skip CSI snapshots for PVCs, allowing the +kubevirt-datamover-plugin BackupItemActionV2 to handle them instead. + +```bash +oc apply -f volume-policy.yaml +``` + +## Step 6: Create the backup + +```bash +oc apply -f backup-cirros.yaml +``` + +### Monitor the backup + +```bash +oc get backup kubevirt-dm-backup-1 -n openshift-adp -w +``` + +### Check for kubevirt-datamover CRs + +These CRs are created by the kubevirt-datamover-controller when it processes +the DataUpload. Their presence confirms the datamover path is active. + +```bash +oc get virtualmachinebackuptrackers -A -o yaml +oc get virtualmachinebackups -A -o yaml +``` + +### Verify backup completed + +```bash +oc get backup kubevirt-dm-backup-1 -n openshift-adp -o jsonpath='{.status.phase}' +``` + +Expected output: `Completed` + +## Additional VM manifests + +This directory also contains VM manifests for other guest OS options that use the +same CBT + datamover flow. They are not yet exercised by automated CI but can be +used for manual testing following the same steps above. + +| File | VM name | Namespace | Notes | +|------|---------|-----------|-------| +| `fedora-todolist-cbt.yaml` | `fedora-todolist` | `mysql-persistent` | Fedora VM running a todolist/mariadb workload | +| `centos-stream10-cbt.yaml` | `centos-stream10-todolist` | `mysql-persistent` | CentOS Stream 10 VM running a todolist/mariadb workload | +| `backup-fedora.yaml` | — | `mysql-persistent` | Velero Backup CR for the Fedora/CentOS VMs; adjust `storageLocation` to match your DPA's BSL name | + +## Cleanup + +```bash +oc delete backup kubevirt-dm-backup-1 -n openshift-adp +oc delete configmap kubevirt-volume-policy -n openshift-adp +oc delete namespace cirros-test +``` diff --git a/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/backup-cirros.yaml b/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/backup-cirros.yaml new file mode 100644 index 0000000000..341e0ff5bd --- /dev/null +++ b/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/backup-cirros.yaml @@ -0,0 +1,13 @@ +apiVersion: velero.io/v1 +kind: Backup +metadata: + name: kubevirt-dm-backup-1 + namespace: openshift-adp +spec: + includedNamespaces: + - cirros-test + defaultVolumesToFsBackup: false + snapshotMoveData: true + resourcePolicy: + kind: ConfigMap + name: kubevirt-volume-policy diff --git a/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/backup-fedora.yaml b/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/backup-fedora.yaml new file mode 100644 index 0000000000..d13c39e0ec --- /dev/null +++ b/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/backup-fedora.yaml @@ -0,0 +1,13 @@ +apiVersion: velero.io/v1 +kind: Backup +metadata: + name: kubevirt-dm-backup-fedora1 + namespace: openshift-adp +spec: + includedNamespaces: + - mysql-persistent + defaultVolumesToFsBackup: false + snapshotMoveData: true + resourcePolicy: + kind: ConfigMap + name: kubevirt-volume-policy diff --git a/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/centos-stream10-cbt.yaml b/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/centos-stream10-cbt.yaml new file mode 100644 index 0000000000..446bfcbad7 --- /dev/null +++ b/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/centos-stream10-cbt.yaml @@ -0,0 +1,192 @@ +apiVersion: v1 +kind: List +items: + - kind: Namespace + apiVersion: v1 + metadata: + name: mysql-persistent + labels: + app: mysql + - apiVersion: v1 + kind: ServiceAccount + metadata: + name: mysql-persistent-sa + namespace: mysql-persistent + labels: + component: mysql-persistent + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: mysql + namespace: mysql-persistent + labels: + app: mysql + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + - kind: SecurityContextConstraints + apiVersion: security.openshift.io/v1 + metadata: + name: mysql-persistent-scc + allowPrivilegeEscalation: true + allowPrivilegedContainer: true + runAsUser: + type: RunAsAny + seLinuxContext: + type: RunAsAny + fsGroup: + type: RunAsAny + supplementalGroups: + type: RunAsAny + volumes: + - '*' + users: + - system:admin + - system:serviceaccount:mysql-persistent:mysql-persistent-sa + - apiVersion: v1 + kind: Service + metadata: + annotations: + template.openshift.io/expose-uri: mariadb://{.spec.clusterIP}:{.spec.ports[?(.name=="mysql")].port} + name: mysql + namespace: mysql-persistent + labels: + app: mysql + service: mysql + spec: + ports: + - protocol: TCP + name: mysql + port: 3306 + selector: + app: mysql + - apiVersion: v1 + kind: Service + metadata: + name: todolist + namespace: mysql-persistent + labels: + app: todolist + service: todolist + e2e-app: "true" + spec: + ports: + - name: web + port: 8000 + targetPort: 8000 + selector: + app: todolist + service: todolist + - apiVersion: route.openshift.io/v1 + kind: Route + metadata: + name: todolist-route + namespace: mysql-persistent + spec: + path: "/" + to: + kind: Service + name: todolist + - apiVersion: kubevirt.io/v1 + kind: VirtualMachine + metadata: + name: centos-stream10-todolist + namespace: mysql-persistent + labels: + changedBlockTracking: "true" + spec: + dataVolumeTemplates: + - metadata: + annotations: + cdi.kubevirt.io/storage.deleteAfterCompletion: "false" + name: centos-stream10-todolist-disk + spec: + pvc: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 30Gi + volumeMode: Block + source: + registry: + pullMethod: node + url: docker://quay.io/containerdisks/centos-stream:10 + running: true + template: + metadata: + annotations: + vm.kubevirt.io/flavor: tiny + vm.kubevirt.io/os: centos-stream10 + vm.kubevirt.io/workload: server + spec: + architecture: amd64 + domain: + devices: + disks: + - disk: + bus: virtio + name: rootdisk + - disk: + bus: virtio + name: cloudinitdisk + interfaces: + - macAddress: '02:73:43:00:00:08' + masquerade: {} + model: virtio + name: default + networkInterfaceMultiqueue: true + rng: {} + features: + acpi: {} + firmware: + bootloader: + bios: {} + machine: + type: pc-q35-rhel9.2.0 + memory: + guest: 2Gi + resources: {} + networks: + - name: default + pod: {} + terminationGracePeriodSeconds: 180 + volumes: + - dataVolume: + name: centos-stream10-todolist-disk + name: rootdisk + - cloudInitConfigDrive: + userData: |- + #cloud-config + user: cloud-user + password: dog8code + chpasswd: { expire: False } + packages: + - mariadb-server + - policycoreutils-python-utils + - unzip + - wget + runcmd: + - systemctl stop firewalld + - systemctl disable firewalld + - systemctl start mariadb + - systemctl enable mariadb + - mysql -uroot -e "CREATE DATABASE todolist; USE todolist; CREATE USER 'test'@'localhost' IDENTIFIED BY 'test';" + - mysql -uroot -e "grant all privileges on todolist.* to test@'localhost' identified by 'test'; FLUSH PRIVILEGES;" + - pushd /home/cloud-user/ + - wget https://github.com/weshayutin/todolist-mariadb-go/releases/download/testing3/todolist-linux-amd64.zip + - unzip todolist-linux-amd64.zip + - chown -R cloud-user:cloud-user /home/cloud-user + - semanage fcontext --add --type bin_t '/home/cloud-user/todolist-linux-amd64' + - restorecon -Fv /home/cloud-user/todolist-linux-amd64 + - cp systemd/todolist-mariadb.service /etc/systemd/system/ + - popd + - systemctl daemon-reload + - systemctl start todolist-mariadb.service + - systemctl enable todolist-mariadb.service + - systemctl status todolist-mariadb.service + - systemctl disable cloud-init + name: cloudinitdisk diff --git a/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/cirros-vm-cbt.yaml b/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/cirros-vm-cbt.yaml new file mode 100644 index 0000000000..7b557c8e3c --- /dev/null +++ b/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/cirros-vm-cbt.yaml @@ -0,0 +1,57 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: cirros-test +--- +apiVersion: kubevirt.io/v1 +kind: VirtualMachine +metadata: + labels: + app: cirros-test + changedBlockTracking: "true" + name: cirros-test + namespace: cirros-test +spec: + dataVolumeTemplates: + - metadata: + annotations: + cdi.kubevirt.io/storage.deleteAfterCompletion: "false" + name: cirros-test-disk + spec: + pvc: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 150Mi + volumeMode: Block + source: + registry: + pullMethod: node + url: docker://quay.io/kubevirt/cirros-container-disk-demo + running: true + template: + metadata: + name: cirros-test + spec: + domain: + devices: + disks: + - disk: + bus: virtio + name: volume0 + interfaces: + - masquerade: {} + name: default + rng: {} + resources: + requests: + memory: 256M + networks: + - name: default + pod: {} + terminationGracePeriodSeconds: 0 + volumes: + - dataVolume: + name: cirros-test-disk + name: volume0 diff --git a/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/cleanup.sh b/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/cleanup.sh new file mode 100755 index 0000000000..bf09619bbb --- /dev/null +++ b/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/cleanup.sh @@ -0,0 +1,203 @@ +#!/usr/bin/env bash +# cleanup.sh - Clean up kubevirt-datamover resources left behind after a backup run. +# +# Removes: +# - Datamover uploader pods (kubevirt-dm-du-* in OADP namespace) +# - Datamover staging PVCs (kubevirt-dm-pvc-du-* in OADP namespace) +# - VMB source PVCs (kubevirt-backup-du-* in APP namespace) +# - Orphaned / Released Retain PVs (backing any of the above) +# - VirtualMachineBackup (vmb-* in APP namespace) +# - VirtualMachineBackupTracker (vmbt-* in APP namespace) +# - DataUpload objects (du-kubevirt-dm-* in OADP namespace) +# +# Usage: +# ./cleanup.sh [APP_NAMESPACE] [OADP_NAMESPACE] +# +# Defaults: +# APP_NAMESPACE - auto-detected from VirtualMachineBackup resources +# OADP_NAMESPACE - openshift-adp + +set -euo pipefail + +APP_NS="${1:-}" +OADP_NS="${2:-openshift-adp}" + +# ── colours ────────────────────────────────────────────────────────────────── +RED='\033[0;31m'; YELLOW='\033[1;33m'; GREEN='\033[0;32m'; CYAN='\033[0;36m'; NC='\033[0m' +info() { echo -e "${CYAN}[INFO]${NC} $*"; } +ok() { echo -e "${GREEN}[OK]${NC} $*"; } +warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +deleted() { echo -e "${GREEN}[DEL]${NC} $*"; } + +# ── helpers ─────────────────────────────────────────────────────────────────── +delete_if_exists() { + local kind="$1" name="$2" ns_flag="${3:-}" + if oc get "$kind" "$name" $ns_flag &>/dev/null; then + oc delete "$kind" "$name" $ns_flag --ignore-not-found + deleted "$kind/$name" + fi +} + +wait_for_deletion() { + local kind="$1" name="$2" ns_flag="${3:-}" timeout="${4:-60}" + local elapsed=0 + while oc get "$kind" "$name" $ns_flag &>/dev/null; do + if (( elapsed >= timeout )); then + warn "Timed out waiting for $kind/$name to be deleted" + return 1 + fi + sleep 2; (( elapsed += 2 )) + done +} + +# ── auto-detect app namespace ───────────────────────────────────────────────── +if [[ -z "$APP_NS" ]]; then + info "No APP_NAMESPACE specified, auto-detecting from VirtualMachineBackup resources..." + APP_NS=$(oc get virtualmachinebackup -A --no-headers 2>/dev/null \ + | awk '{print $1}' | sort -u | head -1 || true) + if [[ -z "$APP_NS" ]]; then + # Fall back to VirtualMachineBackupTracker + APP_NS=$(oc get virtualmachinebackuptracker -A --no-headers 2>/dev/null \ + | awk '{print $1}' | sort -u | head -1 || true) + fi + if [[ -z "$APP_NS" ]]; then + warn "Could not auto-detect application namespace. Set APP_NAMESPACE as first argument." + warn "Skipping VirtualMachineBackup/Tracker cleanup." + else + info "Auto-detected application namespace: ${APP_NS}" + fi +fi + +echo "" +info "=== kubevirt-datamover cleanup ===" +info " OADP namespace : ${OADP_NS}" +info " App namespace : ${APP_NS:-}" +echo "" + +# ── 1. Datamover uploader pods ──────────────────────────────────────────────── +info "--- Datamover uploader pods (${OADP_NS}) ---" +pods=$(oc get pods -n "${OADP_NS}" --no-headers 2>/dev/null \ + | awk '/^kubevirt-dm-du-/{print $1}' || true) +if [[ -z "$pods" ]]; then + ok "No datamover uploader pods found." +else + for pod in $pods; do + oc delete pod "$pod" -n "${OADP_NS}" --ignore-not-found + deleted "pod/$pod" + done +fi + +# ── 2. DataUpload objects ───────────────────────────────────────────────────── +info "--- DataUpload objects (${OADP_NS}) ---" +dus=$(oc get dataupload -n "${OADP_NS}" --no-headers 2>/dev/null \ + | awk '/^du-kubevirt-dm-/{print $1}' || true) +if [[ -z "$dus" ]]; then + ok "No kubevirt-dm DataUpload objects found." +else + for du in $dus; do + oc delete dataupload "$du" -n "${OADP_NS}" --ignore-not-found + deleted "dataupload/$du" + done +fi + +# ── 3. VirtualMachineBackup + VirtualMachineBackupTracker ──────────────────── +if [[ -n "$APP_NS" ]]; then + info "--- VirtualMachineBackup (${APP_NS}) ---" + vmbs=$(oc get virtualmachinebackup -n "${APP_NS}" --no-headers 2>/dev/null \ + | awk '{print $1}' || true) + if [[ -z "$vmbs" ]]; then + ok "No VirtualMachineBackup objects found." + else + for vmb in $vmbs; do + # Remove finalizer first - VMBs can get stuck with one + oc patch virtualmachinebackup "$vmb" -n "${APP_NS}" \ + --type=merge -p '{"metadata":{"finalizers":[]}}' &>/dev/null || true + oc delete virtualmachinebackup "$vmb" -n "${APP_NS}" --ignore-not-found + deleted "virtualmachinebackup/$vmb" + done + fi + + info "--- VirtualMachineBackupTracker (${APP_NS}) ---" + vmbts=$(oc get virtualmachinebackuptracker -n "${APP_NS}" --no-headers 2>/dev/null \ + | awk '{print $1}' || true) + if [[ -z "$vmbts" ]]; then + ok "No VirtualMachineBackupTracker objects found." + else + for vmbt in $vmbts; do + oc patch virtualmachinebackuptracker "$vmbt" -n "${APP_NS}" \ + --type=merge -p '{"metadata":{"finalizers":[]}}' &>/dev/null || true + oc delete virtualmachinebackuptracker "$vmbt" -n "${APP_NS}" --ignore-not-found + deleted "virtualmachinebackuptracker/$vmbt" + done + fi + + info "--- VMB source PVCs (kubevirt-backup-du-* in ${APP_NS}) ---" + vmbpvcs=$(oc get pvc -n "${APP_NS}" --no-headers 2>/dev/null \ + | awk '/^kubevirt-backup-du-/{print $1}' || true) + if [[ -z "$vmbpvcs" ]]; then + ok "No VMB source PVCs found." + else + for pvc in $vmbpvcs; do + oc delete pvc "$pvc" -n "${APP_NS}" --ignore-not-found + deleted "pvc/$pvc (${APP_NS})" + done + fi +fi + +# ── 4. Staging PVCs in OADP namespace ──────────────────────────────────────── +info "--- Datamover staging PVCs (kubevirt-dm-pvc-du-* in ${OADP_NS}) ---" +staging_pvcs=$(oc get pvc -n "${OADP_NS}" --no-headers 2>/dev/null \ + | awk '/^kubevirt-dm-pvc-du-/{print $1}' || true) +if [[ -z "$staging_pvcs" ]]; then + ok "No staging PVCs found." +else + # Collect backing PV names before deleting PVCs + declare -a pvs_to_delete=() + for pvc in $staging_pvcs; do + pv=$(oc get pvc "$pvc" -n "${OADP_NS}" \ + -o jsonpath='{.spec.volumeName}' 2>/dev/null || true) + [[ -n "$pv" ]] && pvs_to_delete+=("$pv") + oc delete pvc "$pvc" -n "${OADP_NS}" --ignore-not-found + deleted "pvc/$pvc (${OADP_NS})" + done + + # Wait briefly for PVCs to clear before touching PVs + sleep 3 + + # ── 5. Backing PVs (Retain policy → won't auto-delete) ─────────────────── + info "--- Backing PVs (Retain policy) ---" + for pv in "${pvs_to_delete[@]+"${pvs_to_delete[@]}"}"; do + policy=$(oc get pv "$pv" -o jsonpath='{.spec.persistentVolumeReclaimPolicy}' 2>/dev/null || true) + if [[ "$policy" == "Retain" ]]; then + oc delete pv "$pv" --ignore-not-found + deleted "pv/$pv (Retain)" + else + ok "pv/$pv has ${policy} policy — will self-delete." + fi + done +fi + +# ── 6. Any remaining Released/orphaned Retain PVs for kubevirt-dm ──────────── +info "--- Orphaned Released Retain PVs (kubevirt-dm-pvc-du-*) ---" +orphan_pvs=$(oc get pv --no-headers 2>/dev/null \ + | awk '/Retain.*Released.*kubevirt-dm-pvc-du-/{print $1}' || true) +if [[ -z "$orphan_pvs" ]]; then + ok "No orphaned Released PVs found." +else + for pv in $orphan_pvs; do + oc delete pv "$pv" --ignore-not-found + deleted "pv/$pv (orphaned Released)" + done +fi + +echo "" +ok "=== Cleanup complete ===" +echo "" +info "Remaining kubevirt-dm resources:" +echo " Pods: $(oc get pods -n "${OADP_NS}" --no-headers 2>/dev/null | grep -c kubevirt-dm || echo 0)" +echo " PVCs: $(oc get pvc -n "${OADP_NS}" --no-headers 2>/dev/null | grep -c kubevirt-dm || echo 0)" +echo " PVs: $(oc get pv --no-headers 2>/dev/null | grep -c kubevirt-dm || echo 0)" +if [[ -n "$APP_NS" ]]; then +echo " VMB: $(oc get virtualmachinebackup -n "${APP_NS}" --no-headers 2>/dev/null | wc -l | tr -d ' ')" +echo " VMBT: $(oc get virtualmachinebackuptracker -n "${APP_NS}" --no-headers 2>/dev/null | wc -l | tr -d ' ')" +fi diff --git a/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/fedora-todolist-cbt.yaml b/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/fedora-todolist-cbt.yaml new file mode 100644 index 0000000000..22d4384f2d --- /dev/null +++ b/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/fedora-todolist-cbt.yaml @@ -0,0 +1,193 @@ +apiVersion: v1 +kind: List +items: + - kind: Namespace + apiVersion: v1 + metadata: + name: mysql-persistent + labels: + app: mysql + - apiVersion: v1 + kind: ServiceAccount + metadata: + name: mysql-persistent-sa + namespace: mysql-persistent + labels: + component: mysql-persistent + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: mysql + namespace: mysql-persistent + labels: + app: mysql + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + - kind: SecurityContextConstraints + apiVersion: security.openshift.io/v1 + metadata: + name: mysql-persistent-scc + allowPrivilegeEscalation: true + allowPrivilegedContainer: true + runAsUser: + type: RunAsAny + seLinuxContext: + type: RunAsAny + fsGroup: + type: RunAsAny + supplementalGroups: + type: RunAsAny + volumes: + - '*' + users: + - system:admin + - system:serviceaccount:mysql-persistent:mysql-persistent-sa + - apiVersion: v1 + kind: Service + metadata: + annotations: + template.openshift.io/expose-uri: mariadb://{.spec.clusterIP}:{.spec.ports[?(.name=="mysql")].port} + name: mysql + namespace: mysql-persistent + labels: + app: mysql + service: mysql + spec: + ports: + - protocol: TCP + name: mysql + port: 3306 + selector: + app: mysql + - apiVersion: v1 + kind: Service + metadata: + name: todolist + namespace: mysql-persistent + labels: + app: todolist + service: todolist + e2e-app: "true" + spec: + ports: + - name: web + port: 8000 + targetPort: 8000 + selector: + app: todolist + service: todolist + - apiVersion: route.openshift.io/v1 + kind: Route + metadata: + name: todolist-route + namespace: mysql-persistent + spec: + path: "/" + to: + kind: Service + name: todolist + - apiVersion: kubevirt.io/v1 + kind: VirtualMachine + metadata: + name: fedora-todolist + namespace: mysql-persistent + labels: + changedBlockTracking: "true" + spec: + dataVolumeTemplates: + - apiVersion: cdi.kubevirt.io/v1beta1 + kind: DataVolume + metadata: + name: fedora-todolist-disk + spec: + sourceRef: + kind: DataSource + name: fedora + namespace: openshift-virtualization-os-images + storage: + resources: + requests: + storage: 30Gi + running: true + template: + metadata: + annotations: + vm.kubevirt.io/flavor: tiny + vm.kubevirt.io/os: fedora + vm.kubevirt.io/workload: server + spec: + architecture: amd64 + domain: + devices: + disks: + - disk: + bus: virtio + name: rootdisk + - disk: + bus: virtio + name: cloudinitdisk + interfaces: + - macAddress: '02:73:43:00:00:07' + masquerade: {} + model: virtio + name: default + networkInterfaceMultiqueue: true + rng: {} + features: + acpi: {} + smm: + enabled: true + firmware: + bootloader: + efi: {} + machine: + type: pc-q35-rhel9.2.0 + memory: + guest: 2Gi + resources: {} + networks: + - name: default + pod: {} + terminationGracePeriodSeconds: 180 + volumes: + - dataVolume: + name: fedora-todolist-disk + name: rootdisk + - cloudInitConfigDrive: + userData: |- + #cloud-config + user: fedora + password: dog8code + chpasswd: { expire: False } + user: test + password: dog8code + chpasswd: { expire: False } + packages: + - mariadb-server + - unzip + - wget + runcmd: + - systemctl stop firewalld + - systemctl disable firewalld + - systemctl start mariadb + - systemctl enable mariadb + - mysql -uroot -e "CREATE DATABASE todolist; USE todolist; CREATE USER 'test'@'localhost' IDENTIFIED BY 'test';" + - mysql -uroot -e "grant all privileges on todolist.* to test@'localhost' identified by 'test'; FLUSH PRIVILEGES;" + - pushd /home/test/ + - wget https://github.com/weshayutin/todolist-mariadb-go/releases/download/testing3/todolist-linux-amd64.zip + - unzip todolist-linux-amd64.zip + - chown -R test:test /home/test + - semanage fcontext --add --type bin_t '/home/test/todolist-linux-amd64' + - restorecon -Fv /home/test/todolist-linux-amd64 + - cp systemd/todolist-mariadb.service /etc/systemd/system/ + - popd + - systemctl daemon-reload + - systemctl start todolist-mariadb.service + - systemctl enable todolist-mariadb.service + - systemctl status todolist-mariadb.service + - systemctl disable cloud-init + name: cloudinitdisk diff --git a/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/volume-policy.yaml b/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/volume-policy.yaml new file mode 100644 index 0000000000..dd7f63af92 --- /dev/null +++ b/tests/e2e/sample-applications/virtual-machines/kubevirt-dm/volume-policy.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: kubevirt-volume-policy + namespace: openshift-adp +data: + policy.yaml: | + version: v1 + volumePolicies: + - conditions: {} + action: + type: skip diff --git a/tests/e2e/templates/default_settings.json b/tests/e2e/templates/default_settings.json index a3596b2634..cccd4aa470 100644 --- a/tests/e2e/templates/default_settings.json +++ b/tests/e2e/templates/default_settings.json @@ -7,6 +7,7 @@ "aws", "kubevirt", "hypershift" + "kubevirt-datamover" ] } }, diff --git a/tests/e2e/virt_backup_restore_suite_test.go b/tests/e2e/virt_backup_restore_suite_test.go index d3cc605ae2..cdcabf6c5c 100644 --- a/tests/e2e/virt_backup_restore_suite_test.go +++ b/tests/e2e/virt_backup_restore_suite_test.go @@ -86,7 +86,15 @@ func runVmBackupAndRestore(brCase VmBackupRestoreCase, updateLastBRcase func(brC // Create DPA backupName, restoreName := prepareBackupAndRestore(brCase.BackupRestoreCase, func() {}) - err := lib.CreateNamespace(v.Clientset, brCase.Namespace) + // Ensure a clean namespace before each spec. The previous spec's restore + // leaves the namespace populated with a VM/PVC that may use a stale storage + // class. Deleting it first guarantees the template creates a fresh DV. + _ = v.RemoveVm(brCase.Namespace, brCase.Name, 2*time.Minute) + err := lib.DeleteNamespace(v.Clientset, brCase.Namespace) + gomega.Expect(err).To(gomega.BeNil()) + gomega.Eventually(lib.IsNamespaceDeleted(kubernetesClientForSuiteRun, brCase.Namespace), time.Minute*2, time.Second*5).Should(gomega.BeTrue()) + + err = lib.CreateNamespace(v.Clientset, brCase.Namespace) gomega.Expect(err).To(gomega.BeNil()) err = lib.InstallApplication(v.Client, brCase.Template) @@ -104,9 +112,11 @@ func runVmBackupAndRestore(brCase VmBackupRestoreCase, updateLastBRcase func(brC }) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - // TODO: find a better way to check for clout-init completion - if brCase.InitDelay > 0*time.Second { - log.Printf("Sleeping to wait for cloud-init to be ready...") + err = v.WaitForVMReady(brCase.Namespace, brCase.Name, 5*time.Minute) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + if brCase.InitDelay > 0 { + log.Printf("Waiting %v for VM %s/%s to finish booting (cloud-init, etc.)", brCase.InitDelay, brCase.Namespace, brCase.Name) time.Sleep(brCase.InitDelay) } @@ -150,6 +160,87 @@ func runVmBackupAndRestore(brCase VmBackupRestoreCase, updateLastBRcase func(brC gomega.Expect(err).To(gomega.BeNil()) } +func runCBTVmBackup(brCase VmBackupRestoreCase, updateLastBRcase func(brCase VmBackupRestoreCase), v *lib.VirtOperator) { + updateLastBRcase(brCase) + + backupName, _ := prepareBackupAndRestore(brCase.BackupRestoreCase, func() {}) + + gomega.Eventually(lib.IsNamespaceDeleted(kubernetesClientForSuiteRun, brCase.Namespace), time.Minute*2, time.Second*5).Should(gomega.BeTrue()) + err := lib.CreateNamespace(v.Clientset, brCase.Namespace) + gomega.Expect(err).To(gomega.BeNil()) + + err = lib.InstallApplication(v.Client, brCase.Template) + if err != nil { + fmt.Printf("Failed to install VM template %s: %v", brCase.Template, err) + } + gomega.Expect(err).To(gomega.BeNil()) + + log.Printf("Waiting for VM %s/%s to reach Running status", brCase.Namespace, brCase.Name) + err = wait.PollUntilContextTimeout(context.Background(), 10*time.Second, 15*time.Minute, true, func(ctx context.Context) (bool, error) { + status, err := v.GetVmStatus(brCase.Namespace, brCase.Name) + if err != nil { + log.Printf("VM %s/%s not yet available: %v", brCase.Namespace, brCase.Name, err) + return false, nil + } + return status == "Running", nil + }) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + err = v.WaitForVMReady(brCase.Namespace, brCase.Name, 5*time.Minute) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + if brCase.InitDelay > 0 { + log.Printf("Waiting %v for VM %s/%s to finish booting (cloud-init, etc.)", brCase.InitDelay, brCase.Namespace, brCase.Name) + time.Sleep(brCase.InitDelay) + } + + log.Printf("VM %s/%s is fully booted, CBT enabled, proceeding with backup", brCase.Namespace, brCase.Name) + + log.Printf("Creating kubevirt volume policy ConfigMap for custom action routing") + err = lib.EnsureKubevirtVolumePolicy(dpaCR.Client, namespace) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + log.Printf("Creating backup %s with kubevirt volume policy for case %s", backupName, brCase.Name) + err = lib.CreateBackupWithVolumePolicy(dpaCR.Client, namespace, backupName, []string{brCase.Namespace}, true) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Verify that the kubevirt-datamover controller creates a VirtualMachineBackupTracker + // in the VM namespace during the backup, confirming VEP-25 incremental backup is active. + vmbtSeen := false + vmbtCheckDone := make(chan struct{}) + go func() { + defer close(vmbtCheckDone) + for i := 0; i < 60; i++ { + found, checkErr := v.CheckVMBackupTrackerExists(brCase.Namespace) + if checkErr == nil && found { + log.Printf("VirtualMachineBackupTracker observed in %s — VEP-25 incremental backup confirmed", brCase.Namespace) + vmbtSeen = true + return + } + time.Sleep(5 * time.Second) + } + log.Printf("VirtualMachineBackupTracker was not observed in %s during backup window", brCase.Namespace) + }() + + gomega.Eventually(lib.IsKubevirtDMBackupDone(dpaCR.Client, dynamicClientForSuiteRun, namespace, backupName), brCase.BackupTimeout, time.Second*10).Should(gomega.BeTrue()) + <-vmbtCheckDone + describeBackup := lib.DescribeBackup(dpaCR.Client, namespace, backupName) + ginkgo.GinkgoWriter.Println(describeBackup) + + succeeded, err := lib.IsBackupCompletedSuccessfully(kubernetesClientForSuiteRun, dpaCR.Client, namespace, backupName) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Expect(succeeded).To(gomega.Equal(true)) + log.Printf("Backup for case %s succeeded", brCase.Name) + + gomega.Expect(vmbtSeen).To(gomega.BeTrue(), "expected VirtualMachineBackupTracker to be observed during backup (VEP-25 incremental backup)") + + err = v.RemoveVm(brCase.Namespace, brCase.Name, 5*time.Minute) + gomega.Expect(err).To(gomega.BeNil()) + err = lib.DeleteNamespace(v.Clientset, brCase.Namespace) + gomega.Expect(err).To(gomega.BeNil()) + gomega.Eventually(lib.IsNamespaceDeleted(kubernetesClientForSuiteRun, brCase.Namespace), time.Minute*5, time.Second*5).Should(gomega.BeTrue()) +} + var _ = ginkgo.Describe("VM backup and restore tests", ginkgo.Ordered, func() { var v *lib.VirtOperator var err error @@ -165,7 +256,15 @@ var _ = ginkgo.Describe("VM backup and restore tests", ginkgo.Ordered, func() { } var _ = ginkgo.BeforeAll(func() { - v, err = lib.GetVirtOperator(runTimeClientForSuiteRun, kubernetesClientForSuiteRun, dynamicClientForSuiteRun, useUpstreamHco) + indexTag := "" + if useCommunityHco { + indexTag = hcoIndexTag + log.Printf("Creating community HCO CatalogSource with index tag %s", hcoIndexTag) + err = lib.EnsureCommunityHcoCatalog(dynamicClientForSuiteRun, hcoIndexTag, 2*time.Minute) + gomega.Expect(err).To(gomega.BeNil()) + } + + v, err = lib.GetVirtOperator(runTimeClientForSuiteRun, kubernetesClientForSuiteRun, dynamicClientForSuiteRun, useUpstreamHco, indexTag) gomega.Expect(err).To(gomega.BeNil()) gomega.Expect(v).ToNot(gomega.BeNil()) @@ -175,6 +274,12 @@ var _ = ginkgo.Describe("VM backup and restore tests", ginkgo.Ordered, func() { wasInstalledFromTest = true } + // Pre-flight: require HCO >= 1.18 and backup.kubevirt.io CRDs for VEP-25. + if useCommunityHco { + err = v.RequireVEP25Support() + gomega.Expect(err).To(gomega.BeNil(), "VEP-25 pre-flight check failed — HCO 1.18+ and backup.kubevirt.io CRDs are required") + } + if kvmEmulation { err = v.EnsureEmulation(20 * time.Second) gomega.Expect(err).To(gomega.BeNil()) @@ -182,6 +287,12 @@ var _ = ginkgo.Describe("VM backup and restore tests", ginkgo.Ordered, func() { log.Println("Avoiding setting KVM emulation, by command line request") } + log.Printf("Creating test storage classes test-sc-immediate and test-sc-wffc") + err = v.CreateImmediateModeStorageClass("test-sc-immediate") + gomega.Expect(err).To(gomega.BeNil()) + err = v.CreateWaitForFirstConsumerStorageClass("test-sc-wffc") + gomega.Expect(err).To(gomega.BeNil()) + url, err := getLatestCirrosImageURL() gomega.Expect(err).To(gomega.BeNil()) err = v.EnsureNamespace(bootImageNamespace, 1*time.Minute) @@ -189,29 +300,47 @@ var _ = ginkgo.Describe("VM backup and restore tests", ginkgo.Ordered, func() { if !v.CheckDataVolumeExists(bootImageNamespace, "cirros") { err = v.EnsureDataVolumeFromUrl(bootImageNamespace, "cirros", url, "150Mi", 5*time.Minute) gomega.Expect(err).To(gomega.BeNil()) + cirrosDownloadedFromTest = true + } + // Always ensure the DataSource exists, even if the DataVolume was + // left over from a previous test run where the DataSource was not created. + if !v.CheckDataSourceExists(bootImageNamespace, "cirros") { err = v.CreateDataSourceFromPvc(bootImageNamespace, "cirros") gomega.Expect(err).To(gomega.BeNil()) - cirrosDownloadedFromTest = true } dpaCR.VeleroDefaultPlugins = append(dpaCR.VeleroDefaultPlugins, v1alpha1.DefaultPluginKubeVirt) + dpaCR.VeleroDefaultPlugins = append(dpaCR.VeleroDefaultPlugins, v1alpha1.DefaultPluginKubeVirtDataMover) - err = v.CreateImmediateModeStorageClass("test-sc-immediate") - gomega.Expect(err).To(gomega.BeNil()) - err = v.CreateWaitForFirstConsumerStorageClass("test-sc-wffc") - gomega.Expect(err).To(gomega.BeNil()) err = lib.DeleteBackupRepositories(runTimeClientForSuiteRun, namespace) gomega.Expect(err).To(gomega.BeNil()) err = lib.InstallApplication(v.Client, "./sample-applications/virtual-machines/cirros-test/cirros-rbac.yaml") gomega.Expect(err).To(gomega.BeNil()) - if v.Upstream { + // Fedora DataSource must be available in openshift-virtualization-os-images + // for the Fedora VM specs, regardless of upstream/downstream or community/GA HCO. + if v.CheckDataSourceExists("openshift-virtualization-os-images", "fedora") { + log.Printf("Fedora DataSource already exists in openshift-virtualization-os-images, skipping creation") + } else { log.Printf("Creating fedora DataSource in openshift-virtualization-os-images namespace") pvcNamespace, pvcName, err := v.GetDataSourcePvc("kubevirt-os-images", "fedora") - gomega.Expect(err).To(gomega.BeNil()) - err = v.CreateTargetDataSourceFromPvc(pvcNamespace, "openshift-virtualization-os-images", pvcName, "fedora") - gomega.Expect(err).To(gomega.BeNil()) + if err != nil { + log.Printf("Fedora DataSource is not PVC-backed, trying snapshot: %v", err) + snapshotNamespace, snapshotName, snapErr := v.GetDataSourceSnapshot("kubevirt-os-images", "fedora") + gomega.Expect(snapErr).To(gomega.BeNil()) + err = v.CreateTargetDataSourceFromSnapshot(snapshotNamespace, "openshift-virtualization-os-images", snapshotName, "fedora") + gomega.Expect(err).To(gomega.BeNil()) + } else { + err = v.CreateTargetDataSourceFromPvc(pvcNamespace, "openshift-virtualization-os-images", pvcName, "fedora") + gomega.Expect(err).To(gomega.BeNil()) + } } + log.Printf("Enabling CBT feature gate and label selector for kubevirt-datamover tests") + err = v.EnableCBTFeatureGate(5 * time.Minute) + gomega.Expect(err).To(gomega.BeNil()) + err = v.EnableCBTLabelSelector(30 * time.Second) + gomega.Expect(err).To(gomega.BeNil()) + }) var _ = ginkgo.AfterAll(func() { @@ -219,34 +348,38 @@ var _ = ginkgo.Describe("VM backup and restore tests", ginkgo.Ordered, func() { // using kopia to collect more info (DaemonSet) NewOADPDeploymentOperationDefault().Deploy(lib.KOPIA) - log.Printf("Creating real DataProtectionTest before must-gather") - bsls, err := dpaCR.ListBSLs() - gomega.Expect(err).ToNot(gomega.HaveOccurred()) + log.Printf("skipMustGather: %v", skipMustGather) + if !skipMustGather { + log.Printf("Creating real DataProtectionTest before must-gather") + bsls, err := dpaCR.ListBSLs() + gomega.Expect(err).ToNot(gomega.HaveOccurred()) - bslName := bsls.Items[0].Name - err = lib.CreateUploadTestOnlyDPT(dpaCR.Client, dpaCR.Namespace, bslName) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) + bslName := bsls.Items[0].Name + err = lib.CreateUploadTestOnlyDPT(dpaCR.Client, dpaCR.Namespace, bslName) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) - log.Printf("Running OADP must-gather") - err = lib.RunMustGather(artifact_dir, dpaCR.Client) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) + log.Printf("Running OADP must-gather") + err = lib.RunMustGather(artifact_dir, dpaCR.Client) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + } err = dpaCR.Delete() gomega.Expect(err).ToNot(gomega.HaveOccurred()) + if v != nil { + log.Printf("Removing test storage classes") + _ = v.RemoveStorageClass("test-sc-immediate") + _ = v.RemoveStorageClass("test-sc-wffc") + } + if v != nil && cirrosDownloadedFromTest { v.RemoveDataSource(bootImageNamespace, "cirros") v.RemoveDataVolume(bootImageNamespace, "cirros", 2*time.Minute) } if v != nil && wasInstalledFromTest { - v.EnsureVirtRemoval() + log.Printf("Skipping HCO/virt removal — leaving installation intact for reuse") } - - err = v.RemoveStorageClass("test-sc-immediate") - gomega.Expect(err).To(gomega.BeNil()) - err = v.RemoveStorageClass("test-sc-wffc") - gomega.Expect(err).To(gomega.BeNil()) }) var _ = ginkgo.AfterEach(func(ctx ginkgo.SpecContext) { @@ -362,4 +495,34 @@ var _ = ginkgo.Describe("VM backup and restore tests", ginkgo.Ordered, func() { }, }, nil), ) + + ginkgo.DescribeTable("Kubevirt datamover backup with CBT", + func(brCase VmBackupRestoreCase, expectedError error) { + runCBTVmBackup(brCase, updateLastBRcase, v) + }, + + ginkgo.Entry("no-application kubevirt-datamover backup, CirrOS VM with CBT", ginkgo.Label("virt"), VmBackupRestoreCase{ + Template: "./sample-applications/virtual-machines/cirros-test/cirros-test-cbt.yaml", + BackupRestoreCase: BackupRestoreCase{ + Namespace: "cirros-test", + Name: "cirros-test", + SkipVerifyLogs: true, + BackupRestoreType: lib.CSIDataMover, + BackupTimeout: 20 * time.Minute, + }, + }, nil), + + // FEDORA is not yet ready for CI and CBT + // ginkgo.Entry("todolist kubevirt-datamover backup, Fedora VM with CBT", ginkgo.Label("virt"), VmBackupRestoreCase{ + // Template: "./sample-applications/virtual-machines/kubevirt-dm/fedora-todolist-cbt.yaml", + // InitDelay: 3 * time.Minute, + // BackupRestoreCase: BackupRestoreCase{ + // Namespace: "mysql-persistent", + // Name: "fedora-todolist", + // SkipVerifyLogs: true, + // BackupRestoreType: lib.CSIDataMover, + // BackupTimeout: 45 * time.Minute, + // }, + // }, nil), + ) })