diff --git a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java index 76cd1f190af4..baadc0a67c32 100644 --- a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java +++ b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java @@ -4,6 +4,7 @@ import static com.dotcms.content.index.IndexConfigHelper.isMigrationNotStarted; import static com.dotcms.content.index.IndexConfigHelper.isMigrationStarted; import static com.dotcms.content.index.IndexConfigHelper.isReadEnabled; +import static com.dotcms.content.index.IndexConfigHelper.logShadowWriteFailure; import static com.dotmarketing.util.StringUtils.builder; import com.dotcms.api.system.event.message.MessageSeverity; @@ -175,7 +176,8 @@ public ContentletIndexAPIImpl() { CDIUtils.getBeanThrows(ContentletIndexOperationsOS.class)); } - /** Package-private constructor for testing. */ + /** Package-private constructor for testing: injects only the two provider operations. + * Still calls APILocator for the remaining dependencies. */ ContentletIndexAPIImpl(final ContentletIndexOperations operationsES, final ContentletIndexOperations operationsOS) { this.operationsES = operationsES; @@ -191,6 +193,31 @@ public ContentletIndexAPIImpl() { // Use getMappingAPI() for lazy initialization at first use. } + /** + * Full constructor for unit testing — injects all dependencies without calling + * {@link com.dotmarketing.business.APILocator}, allowing fully isolated tests. + * + * @param operationsES ES write operations provider + * @param operationsOS OS write operations provider + * @param indexAPI phase-aware index management API (controls list/cluster operations) + * @param legacyIndiciesAPI ES index pointer store (working/live slots) + * @param versionedIndicesAPI OS index pointer store (working/live slots) + */ + ContentletIndexAPIImpl( + final ContentletIndexOperations operationsES, + final ContentletIndexOperations operationsOS, + final IndexAPI indexAPI, + final IndiciesAPI legacyIndiciesAPI, + final VersionedIndicesAPI versionedIndicesAPI) { + this.operationsES = operationsES; + this.operationsOS = operationsOS; + this.router = new PhaseRouter<>(operationsES, operationsOS); + this.queueApi = null; // not needed for the methods under test + this.indexAPI = indexAPI; + this.legacyIndiciesAPI = legacyIndiciesAPI; + this.versionedIndicesAPI = versionedIndicesAPI; + } + /** * Lazy initializer avoids circular reference Stackoverflow error. * Thread-safe: uses {@link AtomicReference#updateAndGet} to ensure @@ -371,7 +398,7 @@ public void close() throws Exception { } catch (final Exception e) { if (entry.shadow) { // OS shadow write — fire-and-forget: log divergence, do not propagate. - Logger.warnAndDebug(CompositeBulkProcessor.class, + logShadowWriteFailure(CompositeBulkProcessor.class, "OS shadow processor failed to flush on close — ES flush succeeded; " + "OS index may diverge until next reindex. Cause: " + e.getMessage(), e); @@ -567,7 +594,9 @@ public synchronized boolean createContentIndex(final String indexName, final int result = false; } } - MappingHelper.getInstance().addCustomMapping(indexName); + if (result) { + MappingHelper.getInstance().addCustomMapping(indexName); + } return result; } diff --git a/dotCMS/src/main/java/com/dotcms/content/index/IndexConfigHelper.java b/dotCMS/src/main/java/com/dotcms/content/index/IndexConfigHelper.java index 3168dc688ef1..f2fa38f6e214 100644 --- a/dotCMS/src/main/java/com/dotcms/content/index/IndexConfigHelper.java +++ b/dotCMS/src/main/java/com/dotcms/content/index/IndexConfigHelper.java @@ -3,6 +3,7 @@ import com.dotcms.content.index.opensearch.OSIndexProperty; import com.dotcms.featureflag.FeatureFlagName; import com.dotmarketing.util.Config; +import com.dotmarketing.util.Logger; /** * Central helper for reading index-layer configuration properties. @@ -35,6 +36,35 @@ */ public interface IndexConfigHelper { + /** + * Config key controlling the log level for OS shadow write failures in dual-write phases. + * + *

Valid values: {@code DEBUG}, {@code INFO}, {@code WARN}, {@code ERROR} (default: {@code WARN}). + * Set to {@code ERROR} or {@code DEBUG} to increase/decrease visibility during migration QA.

+ */ + String SHADOW_WRITE_LOG_LEVEL_KEY = "DOTCMS_SHADOW_WRITE_LOG_LEVEL"; + + /** + * Logs an OS shadow write failure at the level configured by + * {@value #SHADOW_WRITE_LOG_LEVEL_KEY} (default: {@code WARN}). + * + * @param clazz the class to attribute the log entry to + * @param message the log message + * @param t the throwable, or {@code null} if none + */ + static void logShadowWriteFailure(final Class clazz, + final String message, + final Throwable t) { + final String level = Config.getStringProperty(SHADOW_WRITE_LOG_LEVEL_KEY, "WARN") + .toUpperCase(); + switch (level) { + case "DEBUG": Logger.debug(clazz, message, t); break; + case "INFO": Logger.info(clazz, message); break; + case "ERROR": Logger.error(clazz, message, t); break; + default: Logger.warn(clazz, message, t); break; + } + } + // ------------------------------------------------------------------------- // Migration phase // ------------------------------------------------------------------------- diff --git a/dotCMS/src/main/java/com/dotcms/content/index/PhaseRouter.java b/dotCMS/src/main/java/com/dotcms/content/index/PhaseRouter.java index 905dc4c1f6e3..25298ed286d6 100644 --- a/dotCMS/src/main/java/com/dotcms/content/index/PhaseRouter.java +++ b/dotCMS/src/main/java/com/dotcms/content/index/PhaseRouter.java @@ -4,6 +4,8 @@ import static com.dotcms.content.index.IndexConfigHelper.isMigrationNotStarted; import static com.dotcms.content.index.IndexConfigHelper.isReadEnabled; +import static com.dotcms.content.index.IndexConfigHelper.logShadowWriteFailure; + import com.dotmarketing.util.Logger; import java.util.List; import java.util.function.Consumer; @@ -246,7 +248,7 @@ public boolean writeBoolean(final Function fn) { if (impl == primary) { primaryEx = e; } else { - Logger.warn(PhaseRouter.class, + logShadowWriteFailure(PhaseRouter.class, "Shadow write failed (fire-and-forget in dual-write phase): " + e.getMessage(), e); } @@ -332,7 +334,7 @@ public void writeChecked(final ThrowingConsumer action) throws Exception { if (impl == primary) { primaryEx = e; // record — shadow must still be called } else { - Logger.warn(PhaseRouter.class, + logShadowWriteFailure(PhaseRouter.class, "Shadow write failed (fire-and-forget in dual-write phase): " + e.getMessage(), e); } @@ -372,7 +374,7 @@ public R writeReturningChecked(final ThrowingFunction fn) throws Excep try { fn.apply(shadow); } catch (Exception e) { - Logger.warn(PhaseRouter.class, + logShadowWriteFailure(PhaseRouter.class, "Shadow write failed (fire-and-forget in dual-write phase): " + e.getMessage(), e); } diff --git a/dotCMS/src/main/java/com/dotmarketing/common/reindex/BulkProcessorListener.java b/dotCMS/src/main/java/com/dotmarketing/common/reindex/BulkProcessorListener.java index 74ab14186e64..c3420f8491b1 100644 --- a/dotCMS/src/main/java/com/dotmarketing/common/reindex/BulkProcessorListener.java +++ b/dotCMS/src/main/java/com/dotmarketing/common/reindex/BulkProcessorListener.java @@ -1,5 +1,7 @@ package com.dotmarketing.common.reindex; +import static com.dotcms.content.index.IndexConfigHelper.logShadowWriteFailure; + import com.dotcms.content.index.IndexConfigHelper; import com.dotcms.content.index.IndexTag; import com.dotcms.content.index.domain.IndexBulkItemResult; @@ -111,8 +113,8 @@ public void afterBulk(final long executionId, final List re // OS shadow — fire-and-forget; log individual failures for observability only results.stream() .filter(IndexBulkItemResult::failed) - .forEach(r -> Logger.warn(this.getClass(), - "[OS] Index failure (fire-and-forget): " + r.failureMessage())); + .forEach(r -> logShadowWriteFailure(this.getClass(), + "[OS] Index failure (fire-and-forget): " + r.failureMessage(), null)); return; } Logger.debug(this.getClass(), "Bulk process completed"); @@ -155,7 +157,7 @@ public void afterBulk(final long executionId, final List re public void afterBulk(final long executionId, final Throwable failure) { final String msg = failure != null ? failure.getMessage() : "(no message)"; if (shadow) { - Logger.warnAndDebug(this.getClass(), + logShadowWriteFailure(this.getClass(), "[OS] Bulk process failed entirely (fire-and-forget): " + msg, failure); return; } diff --git a/dotCMS/src/main/resources/dotmarketing-config.properties b/dotCMS/src/main/resources/dotmarketing-config.properties index 79d8592946e8..b07a2cb5ca15 100644 --- a/dotCMS/src/main/resources/dotmarketing-config.properties +++ b/dotCMS/src/main/resources/dotmarketing-config.properties @@ -922,3 +922,14 @@ telemetry.collection.timeout.seconds=30 # Metrics taking longer than this will be logged as warnings for optimization # Default: 500 milliseconds telemetry.metric.slow.threshold.ms=500 + +## OpenSearch migration — shadow write observability +# Log level for OS shadow write failures during dual-write phases (1 and 2). +# Shadow failures are fire-and-forget: the ES (primary) result is returned to the +# caller regardless of OS outcome. This setting controls how loudly those failures +# are reported. +# Valid values: DEBUG, INFO, WARN, ERROR (default: WARN) +# Set to ERROR to surface shadow failures in dashboards/alerts. +# Set to DEBUG to suppress them during steady-state migration. +#DOTCMS_SHADOW_WRITE_LOG_LEVEL=WARN + diff --git a/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplPhaseTest.java b/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplPhaseTest.java new file mode 100644 index 000000000000..77c82172f545 --- /dev/null +++ b/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplPhaseTest.java @@ -0,0 +1,636 @@ +package com.dotcms.content.elasticsearch.business; + +import static com.dotcms.content.index.IndexConfigHelper.MigrationPhase.FLAG_KEY; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import com.dotcms.content.index.ContentletIndexOperations; +import com.dotcms.content.index.IndexAPI; +import com.dotcms.content.index.VersionedIndices; +import com.dotcms.content.index.VersionedIndicesAPI; +import com.dotcms.content.index.VersionedIndicesImpl; +import com.dotcms.content.index.domain.ClusterIndexHealth; +import com.dotcms.content.index.domain.ClusterStats; +import com.dotcms.content.index.domain.CreateIndexStatus; +import com.dotcms.content.index.domain.IndexBulkListener; +import com.dotcms.content.index.domain.IndexBulkProcessor; +import com.dotcms.content.index.domain.IndexBulkRequest; +import com.dotcms.content.index.domain.IndexStats; +import com.dotcms.contenttype.model.type.ContentType; +import com.dotmarketing.exception.DotDataException; +import com.dotmarketing.util.Config; +import java.io.IOException; +import java.sql.Connection; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import org.junit.After; +import org.junit.Test; + +/** + * Unit tests for {@link ContentletIndexAPIImpl} phase-aware behavior + * in the realistic "two different index names" scenario: + * + *
    + *
  • dotCMS started in Phase 0 and created ES index {@code working_T0} / {@code live_T0}
  • + *
  • Migration was started and OS index {@code working_T1} / {@code live_T1} was created
  • + *
  • Both indices co-exist; callers may supply either name
  • + *
+ * + *

These tests document the current, observable behavior of each API method + * across migration phases without requiring a running Elasticsearch or OpenSearch cluster. + * All infrastructure is replaced by in-memory fakes injected via the package-private + * testing constructor.

+ */ +public class ContentletIndexAPIImplPhaseTest { + + // ── Logical index names ─────────────────────────────────────────────────── + /** ES index timestamp suffix (created first, in Phase 0). */ + private static final String ES_WORKING = "working_T0"; + private static final String ES_LIVE = "live_T0"; + + /** OS index timestamp suffix (created during migration catchup). */ + private static final String OS_WORKING = "working_T1"; + private static final String OS_LIVE = "live_T1"; + + /** Cluster prefix prepended by both providers' {@code toPhysicalName()}. */ + private static final String CLUSTER_PREFIX = "cluster_test."; + + // ── Test teardown ───────────────────────────────────────────────────────── + + @After + public void clearPhase() { + Config.setProperty(FLAG_KEY, null); + } + + // ========================================================================= + // isDotCMSIndexName — purely syntactic (prefix check, no provider query) + // ========================================================================= + + /** + * Given Scenario: any migration phase. + * The method is purely syntactic: it checks whether the name starts with + * {@code "working_"} or {@code "live_"}. + * + * When : isDotCMSIndexName() is called with the ES logical name. + * Then : returns true — the ES name is a valid dotCMS index name. + */ + @Test + public void test_isDotCMSIndexName_esLogicalName_isTrue() { + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of(ES_WORKING, ES_LIVE)), + new FakeIndiciesAPI(), + new FakeVersionedIndicesAPI()); + + assertTrue("ES working name must be recognised as a dotCMS index", + api.isDotCMSIndexName(ES_WORKING)); + assertTrue("ES live name must be recognised as a dotCMS index", + api.isDotCMSIndexName(ES_LIVE)); + } + + /** + * Given Scenario: any migration phase. + * When : isDotCMSIndexName() is called with the OS logical name. + * Then : returns true — even though only OS has this index, the name itself + * starts with "working_" so it passes the syntactic check. + * The method does NOT query which provider actually holds the index. + */ + @Test + public void test_isDotCMSIndexName_osLogicalName_isTrue() { + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of()), + new FakeIndiciesAPI(), + new FakeVersionedIndicesAPI()); + + assertTrue("OS working name must be recognised as a dotCMS index (syntactic check only)", + api.isDotCMSIndexName(OS_WORKING)); + assertTrue("OS live name must be recognised as a dotCMS index", + api.isDotCMSIndexName(OS_LIVE)); + } + + /** + * Given Scenario: any phase. + * When : isDotCMSIndexName() is called with a physical name (cluster prefix included). + * Then : returns false — physical names do NOT start with "working_" or "live_". + * Callers must strip the cluster prefix before invoking this method. + */ + @Test + public void test_isDotCMSIndexName_physicalNameWithClusterPrefix_isFalse() { + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of()), + new FakeIndiciesAPI(), + new FakeVersionedIndicesAPI()); + + assertFalse("Physical name with cluster prefix must NOT be recognised", + api.isDotCMSIndexName(CLUSTER_PREFIX + ES_WORKING)); + assertFalse("Physical OS name with cluster prefix must NOT be recognised", + api.isDotCMSIndexName(CLUSTER_PREFIX + OS_WORKING)); + } + + /** + * Given Scenario: any phase. + * When : isDotCMSIndexName() is called with a name that has no recognised dotCMS prefix. + * Then : returns false. + */ + @Test + public void test_isDotCMSIndexName_unknownPrefix_isFalse() { + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of()), + new FakeIndiciesAPI(), + new FakeVersionedIndicesAPI()); + + assertFalse("Unrecognised prefix must return false", + api.isDotCMSIndexName("notadotcmsindex_T0")); + assertFalse("Null must return false", api.isDotCMSIndexName(null)); + } + + // ========================================================================= + // listDotCMSIndices — pure delegation to IndexAPI.getIndices(true, false) + // ========================================================================= + + /** + * Given Scenario: Phase 0 (ES only). + * The fake IndexAPI is pre-loaded with ES indices only. + * When : listDotCMSIndices() is called. + * Then : returns exactly the ES indices — OS is not consulted in Phase 0. + * The phase-awareness is encapsulated inside IndexAPIImpl (the real + * implementation); ContentletIndexAPIImpl simply delegates. + */ + @Test + public void test_listDotCMSIndices_phase0_esIndicesOnly() { + setPhase(0); + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of(ES_WORKING, ES_LIVE)), + new FakeIndiciesAPI(), + new FakeVersionedIndicesAPI()); + + final List indices = api.listDotCMSIndices(); + + assertEquals(2, indices.size()); + assertTrue(indices.contains(ES_WORKING)); + assertTrue(indices.contains(ES_LIVE)); + assertFalse("OS index must not appear in Phase 0", indices.contains(OS_WORKING)); + } + + /** + * Given Scenario: Phase 1 or 2 (dual-write). The fake IndexAPI returns a merged + * list of ES and OS indices, simulating IndexAPIImpl's merge behavior. + * When : listDotCMSIndices() is called. + * Then : returns indices from both providers. + * Each provider contributes its own timestamped names — T0 from ES, T1 from OS. + */ + @Test + public void test_listDotCMSIndices_dualWrite_returnsBothProviders() { + setPhase(1); + final List merged = List.of(ES_WORKING, ES_LIVE, OS_WORKING, OS_LIVE); + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(merged), + new FakeIndiciesAPI(), + new FakeVersionedIndicesAPI()); + + final List indices = api.listDotCMSIndices(); + + assertEquals(4, indices.size()); + assertTrue(indices.contains(ES_WORKING)); + assertTrue(indices.contains(OS_WORKING)); + } + + /** + * Given Scenario: Phase 3 (OS only). The fake IndexAPI returns OS indices only. + * When : listDotCMSIndices() is called. + * Then : returns only OS indices — ES is decommissioned. + */ + @Test + public void test_listDotCMSIndices_phase3_osIndicesOnly() { + setPhase(3); + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of(OS_WORKING, OS_LIVE)), + new FakeIndiciesAPI(), + new FakeVersionedIndicesAPI()); + + final List indices = api.listDotCMSIndices(); + + assertEquals(2, indices.size()); + assertTrue(indices.contains(OS_WORKING)); + assertFalse("ES index must not appear in Phase 3", indices.contains(ES_WORKING)); + } + + // ========================================================================= + // activateIndex — writes to index pointer stores (IndiciesAPI / VersionedIndicesAPI) + // ========================================================================= + + /** + * Given Scenario: Phase 0 (ES only). ES store is empty. + * When : activateIndex("working_T0") is called (ES name). + * Then : ES store (legacyIndiciesAPI) is updated with the physical working name. + * OS store (versionedIndicesAPI) is NOT touched — migration has not started. + */ + @Test + public void test_activateIndex_phase0_esName_updatesEsStoreOnly() throws DotDataException { + setPhase(0); + final FakeIndiciesAPI fakeIndicie = new FakeIndiciesAPI(); + final FakeVersionedIndicesAPI fakeVersioned = new FakeVersionedIndicesAPI(); + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of(ES_WORKING, ES_LIVE)), + fakeIndicie, + fakeVersioned); + + api.activateIndex(ES_WORKING); + + assertEquals("ES store must record the physical working name", + CLUSTER_PREFIX + ES_WORKING, fakeIndicie.loadIndicies().getWorking()); + assertNull("OS store must not be touched in Phase 0", + fakeVersioned.stored); + } + + /** + * Given Scenario: Phase 0 (ES only). Caller passes the OS index name (working_T1), + * even though OS is not active in Phase 0. + * When : activateIndex("working_T1") is called. + * Then : ES store is updated with the physical T1 name — activateIndex does NOT + * validate that the index physically exists; it only writes to the pointer store. + * OS store is NOT touched. + * + *

Observation: in Phase 0 the ES store can be pointed at an index + * name that does not physically exist in the ES cluster, because activateIndex is + * a pure pointer-store update with no existence check.

+ */ + @Test + public void test_activateIndex_phase0_osName_goesToEsStore_noOsWrite() throws DotDataException { + setPhase(0); + final FakeIndiciesAPI fakeIndicie = new FakeIndiciesAPI(); + final FakeVersionedIndicesAPI fakeVersioned = new FakeVersionedIndicesAPI(); + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of(ES_WORKING, ES_LIVE)), + fakeIndicie, + fakeVersioned); + + api.activateIndex(OS_WORKING); // OS name passed in Phase 0 + + assertEquals("ES store must record the OS physical name even in Phase 0", + CLUSTER_PREFIX + OS_WORKING, fakeIndicie.loadIndicies().getWorking()); + assertNull("OS store must not be touched in Phase 0", fakeVersioned.stored); + } + + /** + * Given Scenario: Phase 2 (dual-write, OS reads). Caller uses the ES index name T0. + * When : activateIndex("working_T0") is called. + * Then : ES store is updated with T0 (physical). + * OS store mirror is also updated — but it receives the SAME logical name T0, + * even though OS physically has T1. The mirror writes the name as-is, without + * checking which index actually exists in the OS cluster. + * + *

Key observation: after this call, the OS pointer store records + * "working_T0" as the active working index — a mismatch with the physical OS index. + * This is expected behavior during the catch-up window.

+ */ + @Test + public void test_activateIndex_phase2_esName_mirroredToOsStore() throws DotDataException { + setPhase(2); + final FakeIndiciesAPI fakeIndicie = new FakeIndiciesAPI(); + final FakeVersionedIndicesAPI fakeVersioned = new FakeVersionedIndicesAPI(); + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of()), + fakeIndicie, + fakeVersioned); + + api.activateIndex(ES_WORKING); + + // ES pointer store is updated ✓ + assertEquals("ES store must record the physical working name", + CLUSTER_PREFIX + ES_WORKING, fakeIndicie.loadIndicies().getWorking()); + + // OS store receives a mirror of the same logical name + assertTrue("OS store must be populated in Phase 2", + fakeVersioned.stored != null); + assertEquals("OS store must contain the mirrored working name (even if OS has a different physical index)", + CLUSTER_PREFIX + ES_WORKING, + fakeVersioned.stored.working().orElse(null)); + } + + /** + * Given Scenario: Phase 2 (dual-write, OS reads). Caller uses the OS index name T1. + * When : activateIndex("working_T1") is called. + * Then : ES store is updated with T1 (even though ES physically has T0). + * OS store is also updated with T1 — in this case both stores are correct + * because the caller used the OS-native name. + */ + @Test + public void test_activateIndex_phase2_osName_updatesBothStores() throws DotDataException { + setPhase(2); + final FakeIndiciesAPI fakeIndicie = new FakeIndiciesAPI(); + final FakeVersionedIndicesAPI fakeVersioned = new FakeVersionedIndicesAPI(); + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of()), + fakeIndicie, + fakeVersioned); + + api.activateIndex(OS_WORKING); + + assertEquals("ES store must record the OS physical name", + CLUSTER_PREFIX + OS_WORKING, fakeIndicie.loadIndicies().getWorking()); + assertEquals("OS store must record its own physical name", + CLUSTER_PREFIX + OS_WORKING, + fakeVersioned.stored.working().orElse(null)); + } + + /** + * Given Scenario: Phase 3 (OS only). + * When : activateIndex("working_T1") is called. + * Then : only the OS store (versionedIndicesAPI) is updated. + * ES store (legacyIndiciesAPI) is NOT touched — ES is decommissioned. + */ + @Test + public void test_activateIndex_phase3_onlyOsStoreUpdated() throws DotDataException { + setPhase(3); + final FakeIndiciesAPI fakeIndicie = new FakeIndiciesAPI(); + final FakeVersionedIndicesAPI fakeVersioned = new FakeVersionedIndicesAPI(); + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of()), + fakeIndicie, + fakeVersioned); + + api.activateIndex(OS_WORKING); + + assertNull("ES store must NOT be touched in Phase 3 (ES is decommissioned)", + fakeIndicie.loadIndicies().getWorking()); + assertEquals("OS store must record the working name", + CLUSTER_PREFIX + OS_WORKING, + fakeVersioned.stored.working().orElse(null)); + } + + // ========================================================================= + // deactivateIndex — clears a slot from the pointer stores by index type + // ========================================================================= + + /** + * Given Scenario: Phase 0 (ES only). ES store has working=T0, live=T0-live. + * When : deactivateIndex("working_T0") is called. + * Then : ES store working slot is cleared (null); live slot is preserved. + * OS store is NOT touched. + */ + @Test + public void test_deactivateIndex_phase0_clearsEsWorkingSlotOnly() + throws DotDataException, IOException { + setPhase(0); + final FakeIndiciesAPI fakeIndicie = new FakeIndiciesAPI(); + fakeIndicie.setWorking(CLUSTER_PREFIX + ES_WORKING); + fakeIndicie.setLive(CLUSTER_PREFIX + ES_LIVE); + final FakeVersionedIndicesAPI fakeVersioned = new FakeVersionedIndicesAPI(); + + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of()), fakeIndicie, fakeVersioned); + + api.deactivateIndex(ES_WORKING); + + assertNull("Working slot must be cleared after deactivation", + fakeIndicie.loadIndicies().getWorking()); + assertEquals("Live slot must be preserved", + CLUSTER_PREFIX + ES_LIVE, fakeIndicie.loadIndicies().getLive()); + assertNull("OS store must not be touched in Phase 0", fakeVersioned.stored); + } + + /** + * Given Scenario: Phase 2 (dual-write). Both ES and OS stores have their respective + * working indices recorded (T0 in ES, T1 in OS). + * When : deactivateIndex("working_T0") is called. + * Then : ES store working slot is cleared. + * OS store working slot is ALSO cleared — the deactivation clears the slot + * by INDEX TYPE (WORKING), not by the specific index name (T0 vs T1). + * The OS live slot is preserved. + * + *

Key observation: deactivateIndex identifies which slot to clear + * via the index-type prefix ("working_" / "live_"), not via the exact name. + * Passing "working_T0" in Phase 2 will clear the OS working slot even though OS + * physically records "working_T1" there.

+ */ + @Test + public void test_deactivateIndex_phase2_clearsBothStoresByIndexType() + throws DotDataException, IOException { + setPhase(2); + final FakeIndiciesAPI fakeIndicie = new FakeIndiciesAPI(); + fakeIndicie.setWorking(CLUSTER_PREFIX + ES_WORKING); + fakeIndicie.setLive(CLUSTER_PREFIX + ES_LIVE); + + final FakeVersionedIndicesAPI fakeVersioned = new FakeVersionedIndicesAPI(); + // Pre-populate OS store: OS working = T1, OS live = T1-live + fakeVersioned.stored = VersionedIndicesImpl.builder() + .working(CLUSTER_PREFIX + OS_WORKING) + .live(CLUSTER_PREFIX + OS_LIVE) + .build(); + + final ContentletIndexAPIImpl api = buildApi( + new FakeIndexAPI(List.of()), fakeIndicie, fakeVersioned); + + // Deactivate using the ES index name — but the SLOT (WORKING) is cleared in both stores + api.deactivateIndex(ES_WORKING); + + assertNull("ES working slot must be cleared", + fakeIndicie.loadIndicies().getWorking()); + assertEquals("ES live slot must be preserved", + CLUSTER_PREFIX + ES_LIVE, fakeIndicie.loadIndicies().getLive()); + + // OS mirror: working slot cleared (by index type), live preserved + assertNull("OS working slot must also be cleared (deactivation is by index type, not name)", + fakeVersioned.stored.working().orElse(null)); + assertEquals("OS live slot must be preserved", + CLUSTER_PREFIX + OS_LIVE, fakeVersioned.stored.live().orElse(null)); + } + + // ========================================================================= + // Factory and helpers + // ========================================================================= + + private static ContentletIndexAPIImpl buildApi( + final FakeIndexAPI fakeIndex, + final FakeIndiciesAPI fakeIndicie, + final FakeVersionedIndicesAPI fakeVersioned) { + return new ContentletIndexAPIImpl( + new FakeContentletIndexOperations(), + new FakeContentletIndexOperations(), + fakeIndex, + fakeIndicie, + fakeVersioned); + } + + private static void setPhase(final int ordinal) { + Config.setProperty(FLAG_KEY, String.valueOf(ordinal)); + } + + // ========================================================================= + // Fake implementations — in-memory stubs with no vendor dependencies + // ========================================================================= + + /** + * In-memory {@link IndexAPI} stub. + * Only the three methods used by the target methods are implemented; + * all others throw {@link UnsupportedOperationException}. + */ + static class FakeIndexAPI implements IndexAPI { + + private final List openIndices; + + FakeIndexAPI(final List openIndices) { + this.openIndices = new ArrayList<>(openIndices); + } + + @Override + public List getIndices(final boolean expandOpen, final boolean expandClosed) { + return Collections.unmodifiableList(openIndices); + } + + @Override + public List getClosedIndexes() { + return List.of(); + } + + @Override + public String getNameWithClusterIDPrefix(final String name) { + return name.startsWith(CLUSTER_PREFIX) ? name : CLUSTER_PREFIX + name; + } + + // ── unneeded methods ───────────────────────────────────────────────── + + @Override public Map getIndicesStats() { throw new UnsupportedOperationException(); } + @Override public Map flushCaches(List n) { throw new UnsupportedOperationException(); } + @Override public boolean optimize(List n) { throw new UnsupportedOperationException(); } + @Override public boolean delete(String n) { throw new UnsupportedOperationException(); } + @Override public boolean deleteMultiple(String... n) { throw new UnsupportedOperationException(); } + @Override public void deleteInactiveLiveWorkingIndices(int n) { throw new UnsupportedOperationException(); } + @Override public Set listIndices() { throw new UnsupportedOperationException(); } + @Override public boolean isIndexClosed(String n) { throw new UnsupportedOperationException(); } + @Override public boolean indexExists(String n) { throw new UnsupportedOperationException(); } + @Override public void createIndex(String n) { throw new UnsupportedOperationException(); } + @Override public CreateIndexStatus createIndex(String n, int s) { throw new UnsupportedOperationException(); } + @Override public void clearIndex(String n) { throw new UnsupportedOperationException(); } + @Override public CreateIndexStatus createIndex(String n, String s, int sh) { throw new UnsupportedOperationException(); } + @Override public String getDefaultIndexSettings() { throw new UnsupportedOperationException(); } + @Override public Map getClusterHealth() { throw new UnsupportedOperationException(); } + @Override public void updateReplicas(String n, int r) { throw new UnsupportedOperationException(); } + @Override public void createAlias(String n, String a) { throw new UnsupportedOperationException(); } + @Override public Map getIndexAlias(List n) { throw new UnsupportedOperationException(); } + @Override public Map getIndexAlias(String[] n) { throw new UnsupportedOperationException(); } + @Override public String getIndexAlias(String n) { throw new UnsupportedOperationException(); } + @Override public Map getAliasToIndexMap(List n) { throw new UnsupportedOperationException(); } + @Override public void closeIndex(String n) { throw new UnsupportedOperationException(); } + @Override public void openIndex(String n) { throw new UnsupportedOperationException(); } + @Override public List getLiveWorkingIndicesSortedByCreationDateDesc() { throw new UnsupportedOperationException(); } + @Override public Status getIndexStatus(String n) { throw new UnsupportedOperationException(); } + @Override public boolean waitUtilIndexReady() { throw new UnsupportedOperationException(); } + @Override public ClusterStats getClusterStats() { throw new UnsupportedOperationException(); } + } + + /** + * In-memory {@link IndiciesAPI} stub that stores the current index pointers as a + * mutable {@link IndiciesInfo}. + */ + @SuppressWarnings("deprecation") + static class FakeIndiciesAPI implements IndiciesAPI { + + private IndiciesInfo current = new IndiciesInfo.Builder().build(); + + void setWorking(final String working) { + current = new IndiciesInfo.Builder() + .setWorking(working) + .setLive(current.getLive()) + .build(); + } + + void setLive(final String live) { + current = new IndiciesInfo.Builder() + .setWorking(current.getWorking()) + .setLive(live) + .build(); + } + + @Override + public IndiciesInfo loadIndicies() { + return current; + } + + @Override + public IndiciesInfo loadIndicies(final Connection conn) { + return current; + } + + @Override + public void point(final IndiciesInfo newInfo) { + this.current = newInfo; + } + } + + /** + * In-memory {@link VersionedIndicesAPI} stub that stores a single + * {@link VersionedIndices} record (the "default" OS record). + */ + static class FakeVersionedIndicesAPI implements VersionedIndicesAPI { + + VersionedIndices stored = null; + + @Override + public Optional loadDefaultVersionedIndices() { + return Optional.ofNullable(stored); + } + + @Override + public void saveIndices(final VersionedIndices info) { + this.stored = info; + } + + @Override + public Optional loadNonVersionedIndices() { + return Optional.empty(); + } + + // ── unneeded methods ───────────────────────────────────────────────── + + @Override public Optional loadIndices(String v) { throw new UnsupportedOperationException(); } + @Override public List loadAllIndices() { throw new UnsupportedOperationException(); } + @Override public void removeVersion(String v) { throw new UnsupportedOperationException(); } + @Override public boolean versionExists(String v) { throw new UnsupportedOperationException(); } + @Override public int getIndicesCount(String v) { throw new UnsupportedOperationException(); } + @Override public Instant extractTimestamp(String n) { throw new UnsupportedOperationException(); } + @Override public void clearCache() { /* no-op */ } + } + + /** + * Minimal {@link ContentletIndexOperations} stub used only as a constructor argument. + * + *

Only {@link #toPhysicalName} is implemented (the default interface method is + * overridden to avoid calling a real {@link IndexAPI}). All bulk and lifecycle + * operations throw {@link UnsupportedOperationException} since none of the + * tested methods invoke them.

+ */ + static class FakeContentletIndexOperations implements ContentletIndexOperations { + + @Override + public String toPhysicalName(final String indexName) { + return indexName.startsWith(CLUSTER_PREFIX) ? indexName : CLUSTER_PREFIX + indexName; + } + + @Override + public IndexAPI indexAPI() { + throw new UnsupportedOperationException("indexAPI() not used by tests"); + } + + // ── unneeded methods ───────────────────────────────────────────────── + + @Override public IndexBulkRequest createBulkRequest() { throw new UnsupportedOperationException(); } + @Override public void addIndexOp(IndexBulkRequest r, String i, String d, String j) { throw new UnsupportedOperationException(); } + @Override public void addDeleteOp(IndexBulkRequest r, String i, String d) { throw new UnsupportedOperationException(); } + @Override public void setRefreshPolicy(IndexBulkRequest r, IndexBulkRequest.RefreshPolicy p) { throw new UnsupportedOperationException(); } + @Override public void putToIndex(IndexBulkRequest r) { throw new UnsupportedOperationException(); } + @Override public IndexBulkProcessor createBulkProcessor(IndexBulkListener l) { throw new UnsupportedOperationException(); } + @Override public void addIndexOpToProcessor(IndexBulkProcessor p, String i, String d, String j) { throw new UnsupportedOperationException(); } + @Override public void addDeleteOpToProcessor(IndexBulkProcessor p, String i, String d) { throw new UnsupportedOperationException(); } + @Override public boolean createContentIndex(String n, int s) { throw new UnsupportedOperationException(); } + @Override public void removeContentFromIndexByContentType(ContentType t) { throw new UnsupportedOperationException(); } + @Override public long getIndexDocumentCount(String n) { throw new UnsupportedOperationException(); } + } +} diff --git a/dotCMS/src/test/java/com/dotcms/content/index/PhaseRouterTest.java b/dotCMS/src/test/java/com/dotcms/content/index/PhaseRouterTest.java new file mode 100644 index 000000000000..c4323808cefe --- /dev/null +++ b/dotCMS/src/test/java/com/dotcms/content/index/PhaseRouterTest.java @@ -0,0 +1,340 @@ +package com.dotcms.content.index; + +import static com.dotcms.content.index.IndexConfigHelper.MigrationPhase.FLAG_KEY; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import com.dotmarketing.util.Config; +import java.io.IOException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Supplier; +import org.junit.After; +import org.junit.Test; + +/** + * Unit tests for {@link PhaseRouter} fire-and-forget and failure-propagation behaviour. + * + *

Scenario simulated

+ *

In a common dual-write catch-up scenario the ES and OS indices carry different + * timestamp suffixes. For example, ES has {@code working_T0} and OS has {@code working_T1}. + * Calling an index lifecycle operation ({@code closeIndex}, {@code openIndex}, {@code delete}) + * with the ES index name succeeds on ES but throws + * {@code [index_not_found_exception]} on OS, because OS does not know about {@code working_T0}.

+ * + *

The routing table below shows who is primary (failure propagates) and who is + * shadow (failure is fire-and-forget) in each phase:

+ * + *
+ * Phase │ Primary │ Shadow │ ES index = T0, OS index = T1  →  closeIndex("T0")
+ * ──────┼─────────┼────────┼────────────────────────────────────────────────────
+ *   0   │ ES only │  —     │ ES ok                             → success
+ *   1   │ ES      │ OS     │ ES ok, OS throws (T0 not in OS)   → success (fire-and-forget)
+ *   2   │ OS      │ ES     │ OS throws (T0 not in OS, primary) → THROWS
+ *   3   │ OS only │  —     │ OS throws                         → THROWS
+ * 
+ * + *

The inverse scenario (OS has T1, ES has T0, caller uses T1) is also tested for Phase 2 + * to confirm that ES shadow failures are fire-and-forget when OS is the primary reader.

+ */ +public class PhaseRouterTest { + + // ── Test setup / teardown ───────────────────────────────────────────────── + + @After + public void clearPhase() { + Config.setProperty(FLAG_KEY, null); + } + + // ========================================================================= + // write() — void unchecked fan-out + // Used by: IndexAPIImpl.closeIndex(), openIndex(), createAlias() + // ========================================================================= + + /** + * Given Scenario: Phase 1 (dual-write, ES reads). ES has "working_T0"; OS has "working_T1". + * When : closeIndex("working_T0") is fanned out to both providers. + * Then : ES (primary) succeeds; OS (shadow) throws index_not_found — exception is swallowed. + * Both providers were called (shadow is never skipped even on primary success). + */ + @Test + public void test_write_phase1_osShadowIndexNotFound_isSwallowed() { + final AtomicBoolean esCalled = new AtomicBoolean(); + final AtomicBoolean osCalled = new AtomicBoolean(); + + // ES has working_T0 → operation succeeds + final Runnable esAction = () -> esCalled.set(true); + + // OS has working_T1 → operation fails (index_not_found for T0) + final Runnable osAction = () -> { + osCalled.set(true); + throw new RuntimeException( + "[index_not_found_exception] no such index [working_T0] — OS index is working_T1"); + }; + + final PhaseRouter router = new PhaseRouter<>(esAction, osAction); + setPhase(1); + + // must not throw — OS shadow failure is fire-and-forget + router.write(Runnable::run); + + assertTrue("ES (primary) must be called", esCalled.get()); + assertTrue("OS (shadow) must also be called", osCalled.get()); + } + + /** + * Given Scenario: Phase 1. ES (primary) fails. + * When : write() is invoked. + * Then : exception propagates to the caller regardless of OS outcome. + * OS is still called (shadow must not be skipped). + */ + @Test + public void test_write_phase1_esPrimaryFailure_propagates() { + final AtomicBoolean osCalled = new AtomicBoolean(); + + final Runnable esAction = () -> { throw new RuntimeException("ES cluster unavailable"); }; + final Runnable osAction = () -> osCalled.set(true); + + final PhaseRouter router = new PhaseRouter<>(esAction, osAction); + setPhase(1); + + assertThrows(RuntimeException.class, () -> router.write(Runnable::run)); + assertTrue("OS (shadow) must be called even when primary fails", osCalled.get()); + } + + /** + * Given Scenario: Phase 2 (dual-write, OS reads). ES has "working_T0"; OS has "working_T1". + * When : closeIndex("working_T0") is fanned out. + * Then : OS is now the primary reader — its failure (index_not_found) must propagate. + */ + @Test + public void test_write_phase2_osPrimaryIndexNotFound_propagates() { + // ES has T0 (it is shadow in Phase 2) → would succeed + final Runnable esAction = () -> {}; + + // OS has T1, not T0 (it is primary in Phase 2) → throws + final Runnable osAction = () -> { + throw new RuntimeException( + "[index_not_found_exception] no such index [working_T0] — OS index is working_T1"); + }; + + final PhaseRouter router = new PhaseRouter<>(esAction, osAction); + setPhase(2); + + assertThrows(RuntimeException.class, () -> router.write(Runnable::run)); + } + + /** + * Given Scenario: Phase 2. Caller uses OS index name "working_T1". ES has "working_T0". + * When : closeIndex("working_T1") is fanned out. + * Then : OS (primary) succeeds; ES (shadow) throws index_not_found for T1 — swallowed. + */ + @Test + public void test_write_phase2_esShadowIndexNotFound_isSwallowed() { + // OS has T1 (primary in Phase 2) → succeeds + final Runnable osAction = () -> {}; + + // ES has T0 (shadow in Phase 2) → T1 not found on ES + final Runnable esAction = () -> { + throw new RuntimeException( + "[index_not_found_exception] no such index [working_T1] — ES index is working_T0"); + }; + + final PhaseRouter router = new PhaseRouter<>(esAction, osAction); + setPhase(2); + + // must not throw — ES shadow failure is fire-and-forget in Phase 2 + router.write(Runnable::run); + } + + /** + * Given Scenario: Phase 3 (OS only). OS does not have the requested index. + * When : write() is invoked. + * Then : exception propagates (single provider, no fire-and-forget). + * ES must not be called. + */ + @Test + public void test_write_phase3_osFailure_propagates_esNeverCalled() { + final Runnable esAction = () -> fail("ES must NOT be called in Phase 3"); + final Runnable osAction = () -> { + throw new RuntimeException("[index_not_found_exception] no such index [working_T0]"); + }; + + final PhaseRouter router = new PhaseRouter<>(esAction, osAction); + setPhase(3); + + assertThrows(RuntimeException.class, () -> router.write(Runnable::run)); + } + + /** + * Given Scenario: Phase 0 (ES only). OS would fail if called. + * When : write() is invoked. + * Then : only ES is called; OS is never reached. + */ + @Test + public void test_write_phase0_osNeverCalled() { + final AtomicBoolean osCalled = new AtomicBoolean(); + final Runnable esAction = () -> {}; + final Runnable osAction = () -> { + osCalled.set(true); + throw new RuntimeException("should not reach OS in Phase 0"); + }; + + final PhaseRouter router = new PhaseRouter<>(esAction, osAction); + setPhase(0); + + router.write(Runnable::run); + assertFalse("OS must NOT be called in Phase 0", osCalled.get()); + } + + // ========================================================================= + // writeBoolean() — boolean fan-out, returns primary result + // Used by: IndexAPIImpl.delete() + // ========================================================================= + + /** + * Given Scenario: Phase 1. ES has "working_T0" (delete returns true); OS has "working_T1". + * When : delete("working_T0") is fanned out. + * Then : OS throws index_not_found — swallowed; primary (ES) result {@code true} returned. + */ + @Test + public void test_writeBoolean_phase1_osShadowIndexNotFound_returnsEsResult() { + // ES.delete("working_T0") = acknowledged (true) + final Supplier esDelete = () -> true; + + // OS.delete("working_T0") = index_not_found + final Supplier osDelete = () -> { + throw new RuntimeException( + "[index_not_found_exception] no such index [working_T0]"); + }; + + final PhaseRouter> router = new PhaseRouter<>(esDelete, osDelete); + setPhase(1); + + final boolean result = router.writeBoolean(Supplier::get); + assertTrue("primary (ES) result must be returned when shadow fails", result); + } + + /** + * Given Scenario: Phase 1. ES.delete = false (e.g. not acknowledged); OS also fails. + * When : delete() is fanned out. + * Then : ES result false is returned (shadow swallowed); false is authoritative. + */ + @Test + public void test_writeBoolean_phase1_esReturnsFalse_shadowIgnored() { + final Supplier esDelete = () -> false; + final Supplier osDelete = () -> { + throw new RuntimeException("[index_not_found_exception] working_T0"); + }; + + final PhaseRouter> router = new PhaseRouter<>(esDelete, osDelete); + setPhase(1); + + final boolean result = router.writeBoolean(Supplier::get); + assertFalse("primary (ES) false result must be preserved", result); + } + + /** + * Given Scenario: Phase 2. OS.delete("working_T0") throws (OS is primary). + * When : delete() is fanned out. + * Then : exception propagates. + */ + @Test + public void test_writeBoolean_phase2_osPrimaryFailure_propagates() { + final Supplier esDelete = () -> true; + final Supplier osDelete = () -> { + throw new RuntimeException("[index_not_found_exception] working_T0"); + }; + + final PhaseRouter> router = new PhaseRouter<>(esDelete, osDelete); + setPhase(2); + + assertThrows(RuntimeException.class, () -> router.writeBoolean(Supplier::get)); + } + + // ========================================================================= + // writeChecked() — checked exception fan-out + // Used by: IndexAPIImpl.clearIndex(), createIndex(), updateReplicas() + // ========================================================================= + + /** + * Given Scenario: Phase 1. OS throws a checked IOException (index_not_found). + * When : writeChecked() is invoked. + * Then : checked exception is swallowed; no exception reaches the caller. + */ + @Test + public void test_writeChecked_phase1_osShadowCheckedFailure_isSwallowed() throws Exception { + final AtomicBoolean esCalled = new AtomicBoolean(); + + final PhaseRouter router = new PhaseRouter<>( + () -> esCalled.set(true), // ES succeeds + () -> { throw new IOException("[index_not_found_exception] working_T0"); } + ); + setPhase(1); + + // must not throw — checked exception from OS shadow is swallowed + router.writeChecked(ThrowingAction::run); + assertTrue("ES (primary) must be called", esCalled.get()); + } + + /** + * Given Scenario: Phase 1. ES (primary) throws a checked IOException. + * When : writeChecked() is invoked. + * Then : checked exception propagates to the caller. + */ + @Test + public void test_writeChecked_phase1_esPrimaryCheckedFailure_propagates() { + final PhaseRouter router = new PhaseRouter<>( + () -> { throw new IOException("ES index update failed"); }, + () -> {} // OS succeeds (shadow) + ); + setPhase(1); + + try { + router.writeChecked(ThrowingAction::run); + fail("expected IOException to propagate"); + } catch (IOException expected) { + assertTrue(expected.getMessage().contains("ES index update failed")); + } catch (Exception unexpected) { + fail("unexpected exception type: " + unexpected); + } + } + + /** + * Given Scenario: Phase 2. OS (primary) throws a checked IOException. + * When : writeChecked() is invoked. + * Then : checked exception propagates. + */ + @Test + public void test_writeChecked_phase2_osPrimaryCheckedFailure_propagates() { + final PhaseRouter router = new PhaseRouter<>( + () -> {}, // ES (shadow in Phase 2) succeeds + () -> { throw new IOException("[index_not_found_exception] working_T0"); } + ); + setPhase(2); + + try { + router.writeChecked(ThrowingAction::run); + fail("expected IOException to propagate"); + } catch (IOException expected) { + // correct + } catch (Exception unexpected) { + fail("unexpected exception type: " + unexpected); + } + } + + // ========================================================================= + // Helpers + // ========================================================================= + + /** Simulates a checked index operation (e.g. clearIndex, createIndex). */ + @FunctionalInterface + interface ThrowingAction { + void run() throws Exception; + } + + private static void setPhase(final int ordinal) { + Config.setProperty(FLAG_KEY, String.valueOf(ordinal)); + } +} diff --git a/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java b/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java index fa81478f3d4b..0a8b93f95fe0 100644 --- a/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java +++ b/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java @@ -1,5 +1,6 @@ package com.dotcms; +import com.dotcms.content.elasticsearch.business.ContentletIndexAPIImplMigrationIT; import com.dotcms.content.index.opensearch.ContentFactoryIndexOperationsOSIntegrationTest; import com.dotcms.content.index.opensearch.ContentletIndexOperationsOSIntegrationTest; import com.dotcms.content.index.opensearch.OSCreateContentIndexIntegrationTest; @@ -38,7 +39,8 @@ OSCreateContentIndexIntegrationTest.class, ContentFactoryIndexOperationsOSIntegrationTest.class, OSClientProviderIntegrationTest.class, - OSClientConfigTest.class + OSClientConfigTest.class, + ContentletIndexAPIImplMigrationIT.class }) public class OpenSearchUpgradeSuite { } \ No newline at end of file diff --git a/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIT.java b/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIT.java new file mode 100644 index 000000000000..7a5414147d8b --- /dev/null +++ b/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIT.java @@ -0,0 +1,676 @@ +package com.dotcms.content.elasticsearch.business; + +import static com.dotcms.content.index.IndexConfigHelper.MigrationPhase.FLAG_KEY; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assume.assumeFalse; + +import com.dotcms.DataProviderWeldRunner; +import com.dotcms.IntegrationTestBase; +import com.dotcms.content.index.IndexAPIImpl; +import com.dotcms.content.index.VersionedIndices; +import com.dotcms.content.index.opensearch.OSIndexAPIImpl; +import com.dotcms.util.IntegrationTestInitService; +import com.dotmarketing.business.APILocator; +import com.dotmarketing.exception.DotDataException; +import com.dotmarketing.util.Config; +import com.dotmarketing.util.Logger; +import io.vavr.control.Try; +import java.io.IOException; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import javax.enterprise.context.ApplicationScoped; +import javax.inject.Inject; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; + +/** + * Integration tests for {@link ContentletIndexAPIImpl} that exercise the phase-aware routing + * behavior against live search clusters. + * + *

Scenario

+ *

The tests simulate the real catch-up situation that arises during the ES→OS migration: + * Elasticsearch (or an ES-compatible cluster) was the original search backend and holds index + * {@code working_T0} / {@code live_T0}. OpenSearch was brought up later and received index + * {@code working_T1} / {@code live_T1}. Both clusters are live simultaneously; the migration + * phase flag controls which provider is treated as primary.

+ * + *

Two-cluster vs single-cluster profiles

+ *

Some tests require separate ES and OS clusters to observe cluster-isolation + * behavior (e.g. "Phase 0 shows only ES indices"). When both clients point to the same cluster + * — as happens in the {@code opensearch-upgrade} Maven profile where + * {@code DOT_ES_ENDPOINTS == OS_ENDPOINTS} — those tests are automatically skipped via + * {@link org.junit.Assume}. Tests that only verify routing logic or DB pointer state work + * correctly in both single- and two-cluster setups.

+ * + *

Run command

+ *
+ *   ./mvnw verify -pl :dotcms-integration \
+ *       -Dcoreit.test.skip=false \
+ *       -Dopensearch.upgrade.test=true \
+ *       -Dit.test=ContentletIndexAPIImplMigrationIT
+ * 
+ * + * @author Fabrizzio Araya + */ +@ApplicationScoped +@RunWith(DataProviderWeldRunner.class) +public class ContentletIndexAPIImplMigrationIT extends IntegrationTestBase { + + // ── Unique suffix prevents cross-run index name collisions ──────────────── + private static final String RUN_ID = + UUID.randomUUID().toString().replace("-", "").substring(0, 8); + + /** + * ES index name — represents the index created during Phase 0 (before OS existed). + */ + private static final String ES_WORKING = "working_t0_" + RUN_ID; + private static final String ES_LIVE = "live_t0_" + RUN_ID; + + /** + * OS index name — represents the index created during migration catch-up. + * Different timestamp suffix → different from the ES name. + */ + private static final String OS_WORKING = "working_t1_" + RUN_ID; + private static final String OS_LIVE = "live_t1_" + RUN_ID; + + /** + * Name used for the dual-write fan-out test — a single logical name that + * {@code createContentIndex()} sends to both providers simultaneously. + */ + private static final String DUAL_WORKING = "working_dual_" + RUN_ID; + + /** + * A name that both ES and OS reject at the cluster level: spaces are not allowed in index names. + * Used to verify that cluster-level validation errors always propagate — they are NOT + * swallowed by the fire-and-forget mechanism (which only applies to shadow {@code index_not_found}). + */ + private static final String INVALID_INDEX_NAME = "invalid name with spaces " + RUN_ID; + + /** + * A name that matches the {@code working} prefix (so {@link IndexType#WORKING} recognises it) + * but was never created in any cluster. + * Used to verify that {@code deactivateIndex} is a DB-pointer-only operation: it clears + * the slot based on the name pattern, without checking cluster existence. + */ + private static final String GHOST_WORKING = "working_ghost_" + RUN_ID; + + // ── CDI-injected direct OS handle (bypasses the phase router) ──────────── + @Inject + private OSIndexAPIImpl osIndexAPI; + + // ── Saved DB state — restored in @After to avoid polluting the running app ── + @SuppressWarnings("deprecation") + private IndiciesInfo savedEsInfo; + private Optional savedOsIndices; + + // ========================================================================= + // Lifecycle + // ========================================================================= + + @BeforeClass + public static void prepare() throws Exception { + IntegrationTestInitService.getInstance().init(); + } + + @Before + public void setUp() throws Exception { + savedEsInfo = APILocator.getIndiciesAPI().loadIndicies(); + savedOsIndices = APILocator.getVersionedIndicesAPI().loadDefaultVersionedIndices(); + + cleanupTestIndices(); + + // Create ES indices directly against the ES cluster (bypasses phase routing) + esImpl().createIndex(ES_WORKING, 1); + esImpl().createIndex(ES_LIVE, 1); + + // Create OS indices directly against the OS cluster (bypasses phase routing) + osIndexAPI.createIndex(OS_WORKING, 1); + osIndexAPI.createIndex(OS_LIVE, 1); + } + + @After + public void tearDown() throws Exception { + Config.setProperty(FLAG_KEY, null); + cleanupTestIndices(); + + Try.run(() -> APILocator.getIndiciesAPI().point(savedEsInfo)); + savedOsIndices.ifPresent(v -> + Try.run(() -> APILocator.getVersionedIndicesAPI().saveIndices(v))); + } + + // ========================================================================= + // listDotCMSIndices — real cluster visibility by phase + // + // NOTE: Phase 0 and Phase 3 isolation tests require two *separate* clusters + // (ES on one endpoint, OS on another). They are skipped automatically when + // the profile routes both clients to the same cluster. + // ========================================================================= + + /** + * Given Scenario: Phase 0 (ES only). ES has T0; OS has T1. + * When : listDotCMSIndices() is called. + * Then : Only the ES indices are returned — OS cluster not queried. + * Skip : when ES and OS endpoints are the same cluster (single-cluster profile). + */ + @Test + public void test_listDotCMSIndices_phase0_returnsEsIndicesOnly() { + assumeFalse("Requires separate ES and OS clusters (ES and OS point to same endpoint)", + esSameAsOs()); + setPhase(0); + + final List indices = contentletIndexAPI().listDotCMSIndices(); + + assertTrue("ES working must appear in Phase 0", indices.contains(ES_WORKING)); + assertTrue("ES live must appear in Phase 0", indices.contains(ES_LIVE)); + assertFalse("OS working must NOT appear in Phase 0 (OS not queried)", + indices.contains(OS_WORKING)); + + Logger.info(this, "✅ listDotCMSIndices Phase 0 — ES only: " + indices); + } + + /** + * Given Scenario: Phase 1 (dual-write). Both clusters are live. + * When : listDotCMSIndices() is called. + * Then : Both ES (T0) and OS (T1) indices appear — different names from different clusters. + * Works in single-cluster profile too: both T0 and T1 exist in the shared cluster. + */ + @Test + public void test_listDotCMSIndices_phase1_returnsBothIndexNames() { + setPhase(1); + + final List indices = contentletIndexAPI().listDotCMSIndices(); + + assertTrue("ES working (T0) must appear in Phase 1", indices.contains(ES_WORKING)); + assertTrue("ES live (T0) must appear in Phase 1", indices.contains(ES_LIVE)); + assertTrue("OS working (T1) must appear in Phase 1", indices.contains(OS_WORKING)); + assertTrue("OS live (T1) must appear in Phase 1", indices.contains(OS_LIVE)); + + Logger.info(this, "✅ listDotCMSIndices Phase 1 — both index names: " + indices); + } + + /** + * Given Scenario: Phase 3 (OS only). OS has T1. + * When : listDotCMSIndices() is called. + * Then : Only OS indices appear — ES decommissioned. + * Skip : when ES and OS endpoints are the same cluster. + */ + @Test + public void test_listDotCMSIndices_phase3_returnsOsIndicesOnly() { + assumeFalse("Requires separate ES and OS clusters (ES and OS point to same endpoint)", + esSameAsOs()); + setPhase(3); + + final List indices = contentletIndexAPI().listDotCMSIndices(); + + assertTrue("OS working must appear in Phase 3", indices.contains(OS_WORKING)); + assertFalse("ES working must NOT appear in Phase 3 (ES decommissioned)", + indices.contains(ES_WORKING)); + + Logger.info(this, "✅ listDotCMSIndices Phase 3 — OS only: " + indices); + } + + // ========================================================================= + // createContentIndex — real dual-write fan-out + // ========================================================================= + + /** + * Given Scenario: Phase 0. A new content index is requested. + * When : createContentIndex() is called. + * Then : Index created ONLY in ES. OS cluster unchanged. + * Skip : when ES and OS point to the same cluster. + */ + @Test + public void test_createContentIndex_phase0_createsOnlyInEs() throws IOException, DotIndexException { + assumeFalse("Requires separate ES and OS clusters", esSameAsOs()); + setPhase(0); + + assertFalse("Pre: not in ES yet", esImpl().indexExists(DUAL_WORKING)); + assertFalse("Pre: not in OS yet", osIndexAPI.indexExists(DUAL_WORKING)); + + contentletIndexAPI().createContentIndex(DUAL_WORKING, 1); + + assertTrue("Must exist in ES after Phase 0 createContentIndex", + esImpl().indexExists(DUAL_WORKING)); + assertFalse("Must NOT exist in OS in Phase 0", + osIndexAPI.indexExists(DUAL_WORKING)); + + Logger.info(this, "✅ createContentIndex Phase 0 — ES only: " + DUAL_WORKING); + } + + /** + * Given Scenario: Phase 1 (dual-write). A new content index is requested. + * When : createContentIndex() is called. + * Then : Index created in BOTH clusters simultaneously — the core dual-write guarantee. + * Works in single-cluster profile too (same cluster receives both writes). + */ + @Test + public void test_createContentIndex_phase1_createsInBothClusters() throws IOException, DotIndexException { + setPhase(1); + + assertFalse("Pre: not in ES yet", esImpl().indexExists(DUAL_WORKING)); + assertFalse("Pre: not in OS yet", osIndexAPI.indexExists(DUAL_WORKING)); + + contentletIndexAPI().createContentIndex(DUAL_WORKING, 1); + + assertTrue("Must exist in ES after Phase 1 (fan-out)", + esImpl().indexExists(DUAL_WORKING)); + assertTrue("Must exist in OS after Phase 1 (fan-out)", + osIndexAPI.indexExists(DUAL_WORKING)); + + Logger.info(this, "✅ createContentIndex Phase 1 — both: " + DUAL_WORKING); + } + + /** + * Given Scenario: Phase 3. A new content index is requested. + * When : createContentIndex() is called. + * Then : Index created ONLY in OS. + * Skip : when ES and OS point to the same cluster. + */ + @Test + public void test_createContentIndex_phase3_createsOnlyInOs() throws IOException, DotIndexException { + assumeFalse("Requires separate ES and OS clusters", esSameAsOs()); + setPhase(3); + + assertFalse("Pre: not in ES yet", esImpl().indexExists(DUAL_WORKING)); + assertFalse("Pre: not in OS yet", osIndexAPI.indexExists(DUAL_WORKING)); + + contentletIndexAPI().createContentIndex(DUAL_WORKING, 1); + + assertFalse("Must NOT exist in ES in Phase 3 (ES decommissioned)", + esImpl().indexExists(DUAL_WORKING)); + assertTrue("Must exist in OS after Phase 3", + osIndexAPI.indexExists(DUAL_WORKING)); + + Logger.info(this, "✅ createContentIndex Phase 3 — OS only: " + DUAL_WORKING); + } + + // ========================================================================= + // activateIndex — pointer-store writes verified against real DB + // ========================================================================= + + /** + * Given Scenario: Phase 0. ES store is currently pointing at the pre-existing app index. + * When : activateIndex(ES_WORKING) is called. + * Then : ES DB working slot is updated to T0. + * OS DB is unchanged compared to what it was before this call + * (Phase 0 → isMigrationStarted() = false → OS mirror block never executes). + */ + @Test + public void test_activateIndex_phase0_updatesEsDbOnly() throws DotDataException { + setPhase(0); + // Capture OS state immediately before the call — for before/after comparison + final Optional osBefore = + APILocator.getVersionedIndicesAPI().loadDefaultVersionedIndices(); + + contentletIndexAPI().activateIndex(ES_WORKING); + + final IndiciesInfo esInfo = APILocator.getIndiciesAPI().loadIndicies(); + assertTrue("ES DB working slot must hold the T0 physical name", + esInfo.getWorking() != null && esInfo.getWorking().endsWith(ES_WORKING)); + + // OS DB must be exactly as it was before — activateIndex in Phase 0 must not touch it + final Optional osAfter = + APILocator.getVersionedIndicesAPI().loadDefaultVersionedIndices(); + assertEquals("OS DB working slot must be unchanged by Phase 0 activateIndex", + osBefore.flatMap(VersionedIndices::working).orElse(null), + osAfter.flatMap(VersionedIndices::working).orElse(null)); + + Logger.info(this, "✅ activateIndex Phase 0 — ES DB: " + esInfo.getWorking()); + } + + /** + * Given Scenario: Phase 2 (dual-write, OS reads). Caller passes the ES index name T0. + * When : activateIndex(ES_WORKING) is called. + * Then : ES DB updated with T0. + * OS DB mirror ALSO updated with T0 — even though OS physically holds T1. + * This documents the known mismatch: the OS DB pointer reflects the name + * passed in, regardless of which index the OS cluster actually holds. + */ + @Test + public void test_activateIndex_phase2_esName_mirroredToOsDb() throws DotDataException { + setPhase(2); + + contentletIndexAPI().activateIndex(ES_WORKING); + + final IndiciesInfo esInfo = APILocator.getIndiciesAPI().loadIndicies(); + assertTrue("ES DB must hold T0 physical name", + esInfo.getWorking() != null && esInfo.getWorking().endsWith(ES_WORKING)); + + final Optional osInfo = + APILocator.getVersionedIndicesAPI().loadDefaultVersionedIndices(); + assertTrue("OS DB must be populated with the mirrored name", + osInfo.isPresent() + && osInfo.get().working().map(w -> w.endsWith(ES_WORKING)).orElse(false)); + + Logger.info(this, "✅ activateIndex Phase 2 (ES name) — ES DB: " + esInfo.getWorking() + + ", OS DB: " + osInfo.map(v -> v.working().orElse("(empty)")).orElse("(absent)")); + } + + /** + * Given Scenario: Phase 2. Caller passes the OS index name T1. + * When : activateIndex(OS_WORKING) is called. + * Then : Both pointer stores consistently point to T1. + */ + @Test + public void test_activateIndex_phase2_osName_updatesBothDbsConsistently() throws DotDataException { + setPhase(2); + + contentletIndexAPI().activateIndex(OS_WORKING); + + final IndiciesInfo esInfo = APILocator.getIndiciesAPI().loadIndicies(); + assertTrue("ES DB must point to T1", + esInfo.getWorking() != null && esInfo.getWorking().endsWith(OS_WORKING)); + + final Optional osInfo = + APILocator.getVersionedIndicesAPI().loadDefaultVersionedIndices(); + assertTrue("OS DB must also point to T1", + osInfo.isPresent() + && osInfo.get().working().map(w -> w.endsWith(OS_WORKING)).orElse(false)); + + Logger.info(this, "✅ activateIndex Phase 2 (OS name) — both DBs point to T1"); + } + + /** + * Given Scenario: Phase 3. + * When : activateIndex(OS_WORKING) is called. + * Then : Only OS DB updated. Legacy ES DB unchanged. + */ + @Test + public void test_activateIndex_phase3_onlyOsDbUpdated() throws DotDataException { + setPhase(3); + final String esWorkingBefore = APILocator.getIndiciesAPI().loadIndicies().getWorking(); + + contentletIndexAPI().activateIndex(OS_WORKING); + + assertEquals("ES DB must NOT be touched in Phase 3", + esWorkingBefore, APILocator.getIndiciesAPI().loadIndicies().getWorking()); + final Optional osInfo = + APILocator.getVersionedIndicesAPI().loadDefaultVersionedIndices(); + assertTrue("OS DB must hold T1", + osInfo.isPresent() + && osInfo.get().working().map(w -> w.endsWith(OS_WORKING)).orElse(false)); + + Logger.info(this, "✅ activateIndex Phase 3 — only OS DB updated"); + } + + // ========================================================================= + // fire-and-forget — real index_not_found handling at cluster level + // ========================================================================= + + /** + * Given Scenario: Phase 1 (dual-write, ES reads). ES has T0; OS has T1. + * When : closeIndex("working_T0") is called — T0 exists in ES but NOT in OS. + * Then : ES close succeeds; OS throws {@code index_not_found} for T0 (shadow in Phase 1). + * Exception is swallowed — the call returns normally. + * This is the exact bug scenario from #35302. + * + *

Skip: when ES and OS point to the same cluster — in that case T0 exists in + * both client views so neither throws {@code index_not_found} and the + * fire-and-forget path is not exercised.

+ */ + @Test + public void test_closeIndex_phase1_osIndexNotFound_isSwallowed() { + assumeFalse("Requires separate ES and OS clusters to get real index_not_found from OS", + esSameAsOs()); + setPhase(1); + + try { + APILocator.getESIndexAPI().closeIndex(ES_WORKING); + Logger.info(this, "✅ closeIndex Phase 1 — OS index_not_found swallowed successfully"); + } catch (final RuntimeException e) { + throw new AssertionError( + "closeIndex must NOT throw in Phase 1 when OS has a different index name. Got: " + + e.getMessage(), e); + } + } + + /** + * Given Scenario: Phase 1. ES has T0; OS has T1. Caller uses the OS name T1. + * When : closeIndex("working_T1") is called — T1 exists in OS but NOT in ES. + * Then : ES is primary in Phase 1 → ES throws {@code index_not_found} for T1. + * Primary failure PROPAGATES — callers must not use the OS-native name in Phase 1. + * + *

Skip: when ES and OS point to the same cluster — T1 exists in the ES view too.

+ */ + @Test(expected = RuntimeException.class) + public void test_closeIndex_phase1_esIndexNotFound_propagates() { + assumeFalse("Requires separate ES and OS clusters", esSameAsOs()); + setPhase(1); + + // OS_WORKING exists in OS but NOT in ES — ES is primary → must throw + APILocator.getESIndexAPI().closeIndex(OS_WORKING); + } + + /** + * Given Scenario: Phase 3 (OS only). OS has T1; T0 does not exist in OS. + * When : closeIndex("working_T0") is called. + * Then : OS is the only provider and is primary → exception propagates. + * + *

Skip: when ES and OS point to the same cluster — T0 also exists in the OS view.

+ */ + @Test(expected = RuntimeException.class) + public void test_closeIndex_phase3_osIndexNotFound_propagates() { + assumeFalse("Requires separate ES and OS clusters", esSameAsOs()); + setPhase(3); + + APILocator.getESIndexAPI().closeIndex(ES_WORKING); + } + + // ========================================================================= + // deactivateIndex — ghost index (name never created in any cluster) + // + // deactivateIndex is a pure pointer-store operation: it does NOT query the cluster + // to verify the index exists. It uses IndexType.WORKING.is(name) (a startsWith check) + // to decide which DB slot to clear, then writes the result back to the store. + // + // Calling it with a name that was never created must: + // - not throw (cluster absence is irrelevant) + // - clear the working slot in the primary pointer store + // ========================================================================= + + /** + * Given Scenario: Phase 1 (dual-write, ES reads). {@code GHOST_WORKING} starts with + * "working" but was never created in either cluster. + * When : deactivateIndex(GHOST_WORKING) is called. + * Then : No exception — {@code deactivateIndex} never validates cluster existence. + * The ES DB working slot is cleared (name matched {@link IndexType#WORKING}). + * The OS DB working slot is also cleared (mirrored in Phase 1). + * + *

This documents the fundamental contract: {@code deactivateIndex} is a + * pointer-store update driven by the name pattern, not by cluster state.

+ */ + @Test + @SuppressWarnings("deprecation") + public void test_deactivateIndex_ghostIndex_phase1_clearsWorkingSlot() + throws DotDataException, IOException { + setPhase(1); + + // Verify the ghost index truly doesn't exist in either cluster + assertFalse("Pre: ghost must not exist in ES", esImpl().indexExists(GHOST_WORKING)); + assertFalse("Pre: ghost must not exist in OS", osIndexAPI.indexExists(GHOST_WORKING)); + + // Give BOTH working AND live slots a known value. + // The live slot must survive the deactivate call so that the VersionedIndices + // builder always has at least one field set (saveIndices rejects empty builders). + contentletIndexAPI().activateIndex(ES_WORKING); + contentletIndexAPI().activateIndex(ES_LIVE); + assertNotNull("Pre: ES DB working slot must be non-null before deactivate", + APILocator.getIndiciesAPI().loadIndicies().getWorking()); + + // deactivateIndex with a ghost name must NOT throw + contentletIndexAPI().deactivateIndex(GHOST_WORKING); + + // ES DB working slot must be null — GHOST_WORKING starts with "working", + // so IndexType.WORKING.is(GHOST_WORKING) == true → builder.setWorking(null) + final IndiciesInfo esInfo = APILocator.getIndiciesAPI().loadIndicies(); + assertNull("ES DB working slot must be cleared after deactivating a ghost index", + esInfo.getWorking()); + + // OS DB working slot must also be cleared (mirrored in Phase 1). + // The live slot in OS must be preserved (builder kept it because IndexType.LIVE + // did not match GHOST_WORKING, so the save is valid). + final Optional osInfo = + APILocator.getVersionedIndicesAPI().loadDefaultVersionedIndices(); + assertTrue("OS DB working slot must be cleared (Phase 1 mirrors ES deactivation)", + osInfo.map(v -> v.working().isEmpty()).orElse(true)); + assertTrue("OS DB live slot must be preserved after deactivating the working slot", + osInfo.flatMap(VersionedIndices::live).isPresent()); + + Logger.info(this, + "✅ deactivateIndex Phase 1 cleared both DB working slots for ghost index: " + + GHOST_WORKING); + } + + /** + * Given Scenario: Phase 3 (OS only). {@code GHOST_WORKING} was never created in OS. + * When : deactivateIndex(GHOST_WORKING) is called. + * Then : No exception — OS is the primary store but cluster existence is not validated. + * The OS DB working slot is cleared; the legacy ES DB is not touched. + */ + @Test + @SuppressWarnings("deprecation") + public void test_deactivateIndex_ghostIndex_phase3_clearsOsWorkingSlot() + throws DotDataException, IOException { + setPhase(3); + + assertFalse("Pre: ghost must not exist in OS", osIndexAPI.indexExists(GHOST_WORKING)); + + // Give BOTH working AND live slots a known value. + // The live slot must survive the deactivate call so that VersionedIndicesAPI + // never sees an empty builder (saveIndices rejects empty builders). + contentletIndexAPI().activateIndex(OS_WORKING); + contentletIndexAPI().activateIndex(OS_LIVE); + assertTrue("Pre: OS DB working slot must be set", + APILocator.getVersionedIndicesAPI() + .loadDefaultVersionedIndices() + .flatMap(VersionedIndices::working) + .isPresent()); + + // Capture ES DB state — Phase 3 must not touch it + final String esWorkingBefore = + APILocator.getIndiciesAPI().loadIndicies().getWorking(); + + // Must not throw + contentletIndexAPI().deactivateIndex(GHOST_WORKING); + + // OS DB working slot cleared; live slot preserved (builder had live → save valid) + final Optional osAfter = + APILocator.getVersionedIndicesAPI().loadDefaultVersionedIndices(); + assertTrue("OS DB working slot must be cleared after deactivating a ghost index", + osAfter.map(v -> v.working().isEmpty()).orElse(true)); + assertTrue("OS DB live slot must be preserved after deactivating the working slot", + osAfter.flatMap(VersionedIndices::live).isPresent()); + + // Legacy ES DB untouched in Phase 3 + final String esWorkingAfter = + APILocator.getIndiciesAPI().loadIndicies().getWorking(); + assertEquals("ES DB must NOT be touched by deactivateIndex in Phase 3", + esWorkingBefore, esWorkingAfter); + + Logger.info(this, + "✅ deactivateIndex Phase 3 cleared OS DB working slot without touching ES DB"); + } + + // ========================================================================= + // Invalid index names — cluster rejection must always propagate + // + // The fire-and-forget mechanism silences shadow index_not_found errors only. + // A cluster-level rejection of a syntactically invalid name is a primary failure + // and must propagate regardless of phase. + // ========================================================================= + + /** + * Given Scenario: Phase 1 (dual-write, ES reads). Index name contains spaces. + * When : createContentIndex(invalidName) is called. + * Then : Both providers reject the name at the cluster level. + * {@link ContentletIndexAPI#createContentIndex(String,int)} + * absorbs provider-level exceptions and converts them to a {@code false} return value — + * this is the documented soft-failure contract for that method. + * The return value {@code false} signals to the caller that no cluster acknowledged the request. + */ + @Test + public void test_createContentIndex_invalidName_phase1_returnsFalse() + throws IOException, DotIndexException { + setPhase(1); + + final boolean result = contentletIndexAPI().createContentIndex(INVALID_INDEX_NAME, 1); + + assertFalse( + "createContentIndex must return false when clusters reject an invalid index name (Phase 1)", + result); + Logger.info(this, "✅ createContentIndex Phase 1 returned false for invalid name (as expected)"); + } + + /** + * Given Scenario: Phase 3 (OS only). Index name contains spaces. + * When : createContentIndex(invalidName) is called. + * Then : OS is the sole provider and rejects the name. The soft-failure return value + * {@code false} surfaces — the same contract as Phase 1, with a single-provider path. + */ + @Test + public void test_createContentIndex_invalidName_phase3_returnsFalse() + throws IOException, DotIndexException { + setPhase(3); + + final boolean result = contentletIndexAPI().createContentIndex(INVALID_INDEX_NAME, 1); + + assertFalse( + "createContentIndex must return false when the cluster rejects an invalid index name (Phase 3)", + result); + Logger.info(this, "✅ createContentIndex Phase 3 returned false for invalid name (as expected)"); + } + + // ========================================================================= + // Helpers + // ========================================================================= + + /** + * Returns {@code true} when the ES client and OS client are configured to talk to + * the same cluster endpoint. This happens in the {@code opensearch-upgrade} Maven profile, + * where {@code DOT_ES_ENDPOINTS} is overridden to equal {@code OS_ENDPOINTS}. + * + *

Tests that require two physically separate clusters use this to skip themselves + * via {@link org.junit.Assume#assumeFalse}.

+ */ + private static boolean esSameAsOs() { + final String esEndpoint = Config.getStringProperty("DOT_ES_ENDPOINTS", + "http://localhost:9207"); + final String osEndpoint = Config.getStringProperty("OS_ENDPOINTS", + "http://localhost:9201"); + return esEndpoint.equalsIgnoreCase(osEndpoint.trim()); + } + + private static void setPhase(final int ordinal) { + Config.setProperty(FLAG_KEY, String.valueOf(ordinal)); + } + + private static ContentletIndexAPI contentletIndexAPI() { + return APILocator.getContentletIndexAPI(); + } + + private static ESIndexAPI esImpl() { + return ((IndexAPIImpl) APILocator.getESIndexAPI()).esImpl(); + } + + private void cleanupTestIndices() { + final ESIndexAPI esIndex = esImpl(); + for (final String name : List.of(ES_WORKING, ES_LIVE, DUAL_WORKING)) { + Try.run(() -> { if (esIndex.indexExists(name)) esIndex.delete(name); }) + .onFailure(e -> Logger.warn(this, + "Cleanup: error removing ES index '" + name + "': " + e.getMessage())); + } + for (final String name : List.of(OS_WORKING, OS_LIVE, DUAL_WORKING)) { + Try.run(() -> { if (osIndexAPI.indexExists(name)) osIndexAPI.delete(name); }) + .onFailure(e -> Logger.warn(this, + "Cleanup: error removing OS index '" + name + "': " + e.getMessage())); + } + } +}