Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 15 additions & 49 deletions sdk/cosmos/azure-cosmos/azure/cosmos/_location_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,21 +39,14 @@


def _clean_location_list(locations: Optional[Sequence[Optional[str]]]) -> list[str]:
"""Drop ``None``/empty/whitespace-only entries from a location list.
"""Return the list with None, empty, and whitespace-only entries removed.

We filter these out at every intake point because the region normalizer
turns ``None`` (and empty/whitespace input) into an empty string. If such
a value were allowed into an exclusion list, every endpoint that wasn't
found in our by-endpoint lookup map — which also returns an empty string
as its default — would compare equal to it and get silently excluded.
Stripping the bad inputs once, at the boundary, prevents that collision.
Used at every location-list entry point so blank inputs cannot accidentally
match real endpoints during later comparisons.

:param locations: The raw location list to clean, or ``None``. May contain
``None`` entries and empty/whitespace-only strings.
:param locations: The raw list of region names, or None.
:type locations: Optional[Sequence[Optional[str]]]
:return: A new list containing only the non-empty, non-whitespace string
entries from ``locations``. Returns an empty list when ``locations``
is ``None`` or empty.
:return: The cleaned list. Empty when input is None or empty.
:rtype: list[str]
"""
if not locations:
Expand All @@ -62,41 +55,15 @@ def _clean_location_list(locations: Optional[Sequence[Optional[str]]]) -> list[s


def _normalize_region_name(region_name: Optional[str]) -> str:
"""Canonicalize a region name for equality comparison.

This is the single function every region-name comparison in this module
routes through (exclusion matching, preferred-location resolution, the
by-endpoint normalized lookup, the config-mismatch warning emitter, and
the most-preferred check in ``should_refresh_endpoints``). Two names
normalize to the same string iff the SDK treats them as the same region.

The rule, in plain terms:

- **Stripped:** outer and inner whitespace (via ``.strip()`` followed by
``.split()`` / ``"".join(...)``), case (``.lower()``), and the two
separator characters customers commonly write — ``-`` and ``_``.
- **Preserved:** ASCII letters and **digits**. The digit invariant is the
load-bearing one: it is what keeps ``"East US"`` and ``"East US 2"``
distinct after normalization (``"eastus"`` vs ``"eastus2"``). Stripping
digits, even as part of a well-meaning cleanup, would silently collide
these regions and route each one's traffic to the other.
- **``None`` / empty / whitespace-only input maps to ``""``.** That empty
string is a sentinel that also appears as the default value of the
by-endpoint name lookup, so callers must filter such inputs out at the
configuration boundary with :func:`_clean_location_list` before they
reach this function. See that function's docstring for the collision
scenario.
- **Idempotent.** ``_normalize_region_name(_normalize_region_name(x)) ==
_normalize_region_name(x)`` for every ``x``. Defensive double-calls are
safe.

:param region_name: A region name to canonicalize, or ``None``. Customer
input (preferred/excluded locations) and account-side names from the
gateway both flow through here.
"""Return a canonical form of a region name for equality checks.

Lowercases the text and strips spaces, hyphens, and underscores so values
like "East US 2", "east-us-2", and "east_us_2" compare equal. Digits are
kept so "East US" and "East US 2" stay distinct. None becomes "".

:param region_name: A region name, or None.
:type region_name: Optional[str]
:return: The canonicalized form, suitable for direct ``==`` / ``in``
comparison against other canonicalized names. Returns ``""`` for
``None`` or whitespace-only input.
:return: The canonical form, or "" for None or whitespace-only input.
:rtype: str
"""
if region_name is None:
Expand Down Expand Up @@ -648,9 +615,8 @@ def update_location_cache(self, write_locations=None, read_locations=None, enabl
self._read_locations_by_normalized,
)

# Config-time visibility for misconfigured region names. Dedupe ensures periodic
# refreshes do not re-emit identical warnings; new mismatches still surface because
# the dedupe key includes the available account regions snapshot.
# Warn once when configured region names do not match any account region.
# Repeated identical warnings are suppressed; a different set of regions emits a new one.
if self.connection_policy.PreferredLocations:
self._emit_config_mismatch_warning_once(
self.connection_policy.PreferredLocations,
Expand Down
3 changes: 2 additions & 1 deletion sdk/cosmos/azure-cosmos/cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"ppcb",
"reindexing",
"reranker",
"toctou"
"toctou",
"ufffd"
]
}
125 changes: 125 additions & 0 deletions sdk/cosmos/azure-cosmos/tests/routing/test_routing_map_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,131 @@ def refresher_fn():
self.assertEqual(none_seen['count'], 0,
"Cache entry should never be None during a refresh — it should be atomically replaced")

# The tests below run through SmartRoutingMapProvider to confirm that a
# bad cache snapshot surfaces as a CosmosHttpResponseError the caller can
# handle, not as a raw ValueError or AssertionError.

class _SequencedSnapshotClient(object):
"""Mock client that returns the next payload from response_sequence on
each fresh read, and an empty page when the If-None-Match matches the
last etag (acts like a 304 reply)."""

def __init__(self, response_sequence):
self.response_sequence = response_sequence
self.url_connection = "https://mock-sequenced-test.documents.azure.com:443/"
self.call_count = 0
self._last_etag = None

def _ReadPartitionKeyRanges(self, _collection_link, _feed_options=None, **kwargs):
headers_in = kwargs.get('headers') or {}
inm = headers_in.get('If-None-Match')
if inm is not None and inm == self._last_etag:
status_capture = kwargs.get('_internal_response_status_capture')
if status_capture is not None:
status_capture[0] = 304
captured_headers = kwargs.get('_internal_response_headers_capture')
if captured_headers is not None:
captured_headers.clear()
captured_headers.update({'ETag': self._last_etag})
return []
idx = min(self.call_count, len(self.response_sequence) - 1)
payload = self.response_sequence[idx]
self.call_count += 1
etag = f'"etag-{self.call_count}"'
self._last_etag = etag
captured_headers = kwargs.get('_internal_response_headers_capture')
if captured_headers is not None:
captured_headers.clear()
captured_headers.update({'ETag': etag})
status_capture = kwargs.get('_internal_response_status_capture')
if status_capture is not None:
status_capture[0] = 200
return payload

_OVERLAP_PAYLOAD = [
{'id': 'L', 'minInclusive': '', 'maxExclusive': '80'},
{'id': '10', 'minInclusive': '80', 'maxExclusive': 'A0'},
{'id': '10/0', 'minInclusive': '80', 'maxExclusive': '90'},
{'id': '10/1', 'minInclusive': '90', 'maxExclusive': 'A0'},
{'id': 'R', 'minInclusive': 'A0', 'maxExclusive': 'FF'},
]
_GAP_PAYLOAD = [
{'id': 'L', 'minInclusive': '', 'maxExclusive': '80'},
{'id': 'R', 'minInclusive': 'A0', 'maxExclusive': 'FF'},
]
_GOOD_PAYLOAD = [
{'id': 'L', 'minInclusive': '', 'maxExclusive': '80'},
{'id': '10/0', 'minInclusive': '80', 'maxExclusive': '90', 'parents': ['10']},
{'id': '10/1', 'minInclusive': '90', 'maxExclusive': 'A0', 'parents': ['10']},
{'id': 'R', 'minInclusive': 'A0', 'maxExclusive': 'FF'},
]

def _reset_shared_cache_state(self, provider):
"""Release the given provider and clear shared cache dicts so the next
sub-test or run starts with a clean slate."""
provider.release()
with _shared_cache_lock:
_shared_routing_map_cache.clear()
_shared_collection_locks.clear()
_shared_locks_locks.clear()
_shared_cache_refcounts.clear()

def test_smart_provider_does_not_leak_overlap_value_error_on_persistent_inconsistency(self):
"""A persistent overlap or gap snapshot must raise 503 with sub_status
21015 from SmartRoutingMapProvider.get_overlapping_ranges, not a bare
ValueError or AssertionError."""
full_range = routing_range.Range("", "FF", True, False)

for label, payload in (("overlap", self._OVERLAP_PAYLOAD), ("gap", self._GAP_PAYLOAD)):
with self.subTest(snapshot=label):
client = TestRoutingMapProvider._SequencedSnapshotClient([payload])
provider = SmartRoutingMapProvider(client)
try:
with patch(
'azure.cosmos._routing.routing_map_provider.time.sleep',
return_value=None,
):
with self.assertRaises(CosmosHttpResponseError) as ctx:
provider.get_overlapping_ranges(
"dbs/db/colls/container", [full_range]
)
exc = ctx.exception
self.assertEqual(
exc.status_code,
http_constants.StatusCodes.SERVICE_UNAVAILABLE,
f"Persistent {label} snapshot must surface as 503.",
)
self.assertEqual(
exc.sub_status,
http_constants.SubStatusCodes.ROUTING_MAP_SNAPSHOT_INCONSISTENT,
f"503 from a persistent {label} must set sub_status to 21015.",
)
self.assertNotIsInstance(exc, AssertionError)
self.assertFalse(isinstance(exc, ValueError))
finally:
self._reset_shared_cache_state(provider)

def test_smart_provider_recovers_through_full_stack_after_transient_overlap(self):
"""A bad overlap response followed by a good one must return the
expected ranges from get_overlapping_ranges."""
full_range = routing_range.Range("", "FF", True, False)
client = TestRoutingMapProvider._SequencedSnapshotClient(
[self._OVERLAP_PAYLOAD, self._GOOD_PAYLOAD]
)
provider = SmartRoutingMapProvider(client)
try:
with patch(
'azure.cosmos._routing.routing_map_provider.time.sleep',
return_value=None,
):
overlapping = provider.get_overlapping_ranges(
"dbs/db/colls/container", [full_range]
)
ids = [r['id'] for r in overlapping]
self.assertEqual(ids, ['L', '10/0', '10/1', 'R'])
finally:
self._reset_shared_cache_state(provider)

if __name__ == "__main__":
# import sys;sys.argv = ['', 'Test.testName']
unittest.main()
Loading
Loading