diff --git a/CMakeLists.txt b/CMakeLists.txt index a356ad84d..f227b715d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -137,7 +137,8 @@ set(LIB_SOURCE_FILES src/h3lib/lib/iterators.c src/h3lib/lib/vertexGraph.c src/h3lib/lib/faceijk.c - src/h3lib/lib/baseCells.c) + src/h3lib/lib/baseCells.c + src/h3lib/lib/low52.c) set(APP_SOURCE_FILES src/apps/applib/include/kml.h src/apps/applib/include/benchmark.h @@ -210,6 +211,7 @@ set(OTHER_SOURCE_FILES src/apps/testapps/testCoordIjk.c src/apps/testapps/testH3Memory.c src/apps/testapps/testH3Iterators.c + src/apps/testapps/testLow52.c src/apps/miscapps/cellToBoundaryHier.c src/apps/miscapps/cellToLatLngHier.c src/apps/miscapps/generateBaseCellNeighbors.c @@ -607,6 +609,7 @@ if(H3_IS_ROOT_PROJECT AND BUILD_TESTING) add_h3_test(testBaseCells src/apps/testapps/testBaseCells.c) add_h3_test(testPentagonIndexes src/apps/testapps/testPentagonIndexes.c) add_h3_test(testH3Iterators src/apps/testapps/testH3Iterators.c) + add_h3_test(testLow52 src/apps/testapps/testLow52.c) add_h3_test_with_arg(testH3NeighborRotations src/apps/testapps/testH3NeighborRotations.c 0) add_h3_test_with_arg(testH3NeighborRotations src/apps/testapps/testH3NeighborRotations.c 1) diff --git a/dev-docs/RFCs/canonicalization.md b/dev-docs/RFCs/canonicalization.md new file mode 100644 index 000000000..e8628732f --- /dev/null +++ b/dev-docs/RFCs/canonicalization.md @@ -0,0 +1,131 @@ +# RFC: Canonicalization for H3 cell sets + +* **Authors**: AJ Friend +* **Date**: 2021-12-122 +* **Status**: Draft + +## Abstract + +We propose a canonical form for **sets** of H3 cells based on +the "lower 52 bit" ordering. We also introduce fast "spatial join" operations +on the cell sets (like cell-in-set, intersection, union, etc.) that exploit +the canonical structure for speed gains. + + +## Motivation + +A canonical form for cell sets is useful when testing if two sets are equal. +That is, we'd like to be able to tell if two H3 cell arrays represent +the same mathematical set of cells, ignoring ordering or duplicated cells. + +If we have a function to canonicalize an H3 cell array, then we would +consider two arrays to be equivalent (as sets) if they each canonicalize +to the exact same (canonical) cell array. + +A canonical form is also useful if a user wanted to deterministically +hash H3 cell sets, and wanted the hash to be independent of ordering +or duplicates. + +The canonical form we'll propose also has the added benefit of allowing +for fast "spatial join" operations on canonicalized sets. For example, +we'll be able to do a fast binary search to see if a cell is a member +of a set, and an **even faster** binary search if the set is both +compacted and canonicalized. + +We'll get the same benefits computing the intersection of sets, or +simply testing for intersection. + +The canonical form also suggests a new "in-memory" cell compaction algorithm, +which avoids any dynamic memory allocation. This new compact algorithm +has the added benefit of returning cell arrays already in canonical form. + +## Terminology + +We propose a canonical form based on the "lower 52 bit" ordering, that is, +the ordering you would get if you only considered the lower 52 bits of the +H3 cell indexes. The lower 52 bits of an H3 index consist of 7 bits for the +base cell, and 3 bits for each of the 15 resolution digits. That sums up +to `7 + 3*15 = 52`. + +We'll only define this ordering for H3 **cells**. We're not considering +vertices or edges in this RFC. + +The lower 52 bit ordering can be implemented, for example, by +the `cmpLow52` comparison function given below. + + +```c +int cmpLow52(H3Index a, H3Index b) { + a <<= 12; + b <<= 12; + + if (a < b) return -1; + if (a > b) return +1; + return 0; +``` + +This ordering has the property that children cells are always less than +their parent cells. Ordered in an array with cells of multiple resolutions, +children cells are always to the left of their parents. + +We can also get slightly richer ordering information with a comparison function +with a declaration like + +```c +int cmpCanon(H3Index a, H3Index b); +``` + +defined so that: + +- `cmpCanon(a, b) == 0` if `a` and `b` are the same cell +- `cmpCanon(a, b) == -1` if `a` is a child (or further descendant) of `b` +- `cmpCanon(a, b) == +1` if `b` ... `a` +- `cmpCanon(a, b) == -2` if `a` < `b` in the low52 ordering, but they are not related +- `cmpCanon(a, b) == +2` if `b` < `a` ... + +Note that these two functions produce the same ordering when given to +the C standard library's `qsort`. + +### Array classifications + +Given these comparison functions, we can define 3 increasingly strict properties +on arrays of H3 cells: + +1. "lower 52" ordered +2. canonical +3. compacted and canonical + +#### Low52 ordered + +An H3 cell array `a` is "low52 ordered" if its elements are such that + +- `cmpLow52(a[i-1], a[i]) <= 0` or, equivalently, +- `cmpCanon(a[i-1], a[i]) <= 0`. + +Note that in this classification, arrays can have duplicated cell. We can also +have the parents, children, ancestors, or descendants of other cells in +the array. + +### Canonical + +We'll define a "canonical" H3 cell array to be one that is low52 ordered and +has the additional property that no duplicates, parents, children, ancestors, +or descendants of other cells are in the array. + +We can check this property by ensuring that + +```c +cmpCanon(a[i-1], a[i]) == -2 +``` + +for each adjacent pair of cells in the array. + +### Compacted and canonical + +A compacted and canonical H3 set is just what it sounds like. + +Many of the fast spatial join operations will work on canonical sets, but +will be faster on compacted canonical sets. + +## Proposal + diff --git a/src/apps/testapps/testLow52.c b/src/apps/testapps/testLow52.c new file mode 100644 index 000000000..98bee0d7c --- /dev/null +++ b/src/apps/testapps/testLow52.c @@ -0,0 +1,341 @@ +/* + * Copyright 2020-2021 Uber Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** @file + * @brief tests "lower 52 bit" ordering, canonicalization, and spatial join + * algorithms + * + * usage: `testLow52` + */ + +#include "h3api.h" +#include "test.h" + +typedef struct { + H3Index *cells; + int64_t N; +} CellArray; + +CellArray ca_init(int64_t N) { + CellArray a = {.cells = calloc(N, sizeof(H3Index)), .N = N}; + + return a; +} + +CellArray ca_disk(H3Index h, int k) { + CellArray arr = ca_init(H3_EXPORT(maxGridDiskSize)(k)); + t_assertSuccess(H3_EXPORT(gridDisk)(h, k, arr.cells)); + + return arr; +} + +CellArray ca_ring(H3Index h, int k) { + CellArray A = ca_init(6 * k); + t_assertSuccess(H3_EXPORT(gridRingUnsafe)(h, k, A.cells)); + + return A; +} + +void ca_canon(CellArray *arr) { + int64_t N; + t_assertSuccess(canonicalizeCells(arr->cells, arr->N, &N)); + arr->N = N; +} + +CellArray ca_compact(CellArray arr) { + CellArray packed = ca_init(arr.N); + t_assertSuccess(H3_EXPORT(compactCells)(arr.cells, packed.cells, arr.N)); + + return packed; +} + +CellArray ca_uncompact(CellArray arr, int res) { + int64_t N; + t_assertSuccess(H3_EXPORT(uncompactCellsSize)(arr.cells, arr.N, res, &N)); + + CellArray out = ca_init(N); + t_assertSuccess( + H3_EXPORT(uncompactCells)(arr.cells, arr.N, out.cells, out.N, res)); + + return out; +} + +/* +Return all cells that are distance k1 <= d <= k2 from h. + +So: + +- ca_thickRing(h, k, k) is the same as gridRing(h, k) +- ca_thickRing(h, 0, k) is the same as gridDisk(h, k) + + */ +CellArray ca_thickRing(H3Index h, int k1, int k2) { + CellArray A = ca_disk(h, k2); + + for (int64_t i = 0; i < A.N; i++) { + int64_t d; + H3_EXPORT(gridDistance)(h, A.cells[i], &d); + + if (d < k1) { + A.cells[i] = 0; + } + } + + CellArray B = ca_compact(A); + ca_canon(&B); + + free(A.cells); + + return B; +} + +CellArray ca_missingRing(H3Index h, int k1, int k2, int K) { + CellArray A = ca_disk(h, K); + + for (int64_t i = 0; i < A.N; i++) { + int64_t d; + H3_EXPORT(gridDistance)(h, A.cells[i], &d); + + if ((k1 <= d) && (d <= k2)) { + A.cells[i] = 0; + } + } + + CellArray B = ca_compact(A); + ca_canon(&B); + + free(A.cells); + + return B; +} + +void t_intersects(CellArray A, CellArray B, bool result) { + t_assert(result == intersectTheyDo(A.cells, A.N, B.cells, B.N), ""); +} + +void t_contains(CellArray A, H3Index h, bool result) { + t_assert(result == canonSearch(A.cells, A.N, h), ""); +} + +void t_isLow52(CellArray A, bool result) { + t_assert(result == isLow52Sorted(A.cells, A.N), ""); +} +void t_isCanon(CellArray A, bool result) { + t_assert(result == isCanonicalCells(A.cells, A.N), ""); +} + +void t_diskIntersect(H3Index a, H3Index b, int ka, int kb, + bool shouldIntersect) { + CellArray A = ca_disk(a, ka); + CellArray B = ca_disk(b, kb); + + ca_canon(&A); + ca_canon(&B); + + t_intersects(A, B, shouldIntersect); + t_intersects(B, A, shouldIntersect); + + free(A.cells); + free(B.cells); +} + +void t_diskIntersectCompact(H3Index a, H3Index b, int ka, int kb, + bool shouldIntersect) { + CellArray A, B, cA, cB; + + A = ca_disk(a, ka); + B = ca_disk(b, kb); + + cA = ca_compact(A); + cB = ca_compact(B); + ca_canon(&cA); + ca_canon(&cB); + + t_intersects(cA, cB, shouldIntersect); + t_intersects(cB, cA, shouldIntersect); + + free(A.cells); + free(B.cells); + free(cA.cells); + free(cB.cells); +} + +SUITE(low52tests) { + TEST(basic_low52) { + H3Index h = 0x89283082e73ffff; + int k = 100; + + CellArray A = ca_disk(h, k); + CellArray Z = {.N = 0, .cells = NULL}; // empty cell array + + // low 52 tests + t_isLow52(A, false); // shouldn't be sorted yet + t_assertSuccess(low52Sort(A.cells, A.N)); + t_isLow52(A, true); // should be sorted now! + + // canonical tests + t_isCanon(A, true); // no duplicates, so should already be canon + int64_t numBefore = A.N; + ca_canon(&A); + t_assert(A.N == numBefore, "Expect no change from canonicalizing."); + + // binary search + t_contains(A, h, true); // Needs to be in there! + t_contains(Z, h, false); // h can't be in an empty set + + // intersection + t_intersects(A, A, true); + t_intersects(Z, A, false); // first is empty + t_intersects(A, Z, false); // second is empty + t_intersects(Z, Z, false); // both are empty + + free(A.cells); + } + + TEST(handling_zeroes) { + H3Index h = 0x89283082e73ffff; + int k = 100; + + CellArray A = ca_disk(h, k); + + int64_t numBefore = A.N; + ca_canon(&A); + t_assert(A.N == numBefore, "Expect no change from canonicalizing."); + + t_isLow52(A, true); + t_isCanon(A, true); + + // insert zero at start of array + // isLow52Sorted is OK with zeros/H3_NULL, but isCanonicalCells is not + A.cells[0] = 0; + t_isLow52(A, true); + t_isCanon(A, false); + + // canonicalizing again should remove the zero + ca_canon(&A); + t_assert(A.N == numBefore - 1, "Lose one cell."); + t_isCanon(A, true); + + free(A.cells); + } + + TEST(compact_canon) { + H3Index h = 0x89283082e73ffff; + int res = 9; + int k = 100; + + CellArray U = ca_disk(h, k); // uncompacted set + ca_canon(&U); + + CellArray C = ca_compact(U); // compacted set + ca_canon(&C); + + t_isCanon(U, true); + t_isCanon(C, true); + + t_contains(C, h, true); + t_intersects(C, C, true); + t_intersects(C, U, true); + t_intersects(U, C, true); + + // test that uncompact keeps things canonical + CellArray U2 = ca_uncompact(C, res); + t_isCanon(U2, true); + + free(C.cells); + free(U.cells); + free(U2.cells); + } + + TEST(ring_intersect) { + H3Index h = 0x89283082e73ffff; + int k = 10; + + CellArray A = ca_ring(h, k); + CellArray B = ca_ring(h, k + 1); + ca_canon(&A); + ca_canon(&B); + + t_contains(A, h, false); + t_contains(B, h, false); + t_contains(A, A.cells[0], true); + + t_intersects(A, B, false); + t_intersects(B, A, false); + t_intersects(A, A, true); + t_intersects(B, B, true); + + // add a cell from A to B, so they now intersect + B.cells[B.N / 2] = A.cells[A.N / 2]; + ca_canon(&B); + t_intersects(A, B, true); + t_intersects(A, B, true); + + free(A.cells); + free(B.cells); + } + + TEST(disk_overlap) { + H3Index a = 0x89283082e73ffff; + H3Index b = 0x89283095063ffff; + + int64_t k; + H3_EXPORT(gridDistance)(a, b, &k); + t_assert(k == 20, ""); + + // not compacted + t_diskIntersect(a, b, 9, 9, false); // not yet + t_diskIntersect(a, b, 9, 10, false); // just barely disjoint + t_diskIntersect(a, b, 10, 10, true); // overlap + t_diskIntersect(a, b, 11, 11, true); // more overlap + + // compacted + t_diskIntersectCompact(a, b, 9, 9, false); // not yet + t_diskIntersectCompact(a, b, 9, 10, false); // just barely disjoint + t_diskIntersectCompact(a, b, 10, 10, true); // overlap + t_diskIntersectCompact(a, b, 11, 11, true); // more overlap + } + + TEST(tricky_rings1) { + H3Index h = 0x89283082e73ffff; + int K = 100; + int k1 = 40; + int k2 = 60; + + CellArray A = ca_thickRing(h, k1, k2); + CellArray B = ca_missingRing(h, k1, k2, K); + + t_intersects(A, B, false); + + free(A.cells); + free(B.cells); + } + + TEST(tricky_rings2) { + H3Index h = 0x89283082e73ffff; + int K = 100; + int k1 = 40; + int k2 = 60; + + CellArray A = ca_thickRing(h, k1, k2 + 1); + CellArray B = ca_missingRing(h, k1, k2, K); + + t_intersects(A, B, true); + + free(A.cells); + free(B.cells); + } +} diff --git a/src/h3lib/include/h3api.h.in b/src/h3lib/include/h3api.h.in index 23b27587c..25793563a 100644 --- a/src/h3lib/include/h3api.h.in +++ b/src/h3lib/include/h3api.h.in @@ -532,9 +532,10 @@ DECLSPEC H3Index H3_EXPORT(cellToCenterChild)(H3Index h, int childRes); * @{ */ /** @brief compacts the given set of hexagons as best as possible */ -DECLSPEC H3Error H3_EXPORT(compactCells)(const H3Index *h3Set, - H3Index *compactedSet, - const int64_t numHexes); +DECLSPEC H3Error H3_EXPORT(compactCells)( + const H3Index *h3Set, H3Index *compactedSet, + const int64_t + numHexes); // compact should tell you how many cells there are /** @} */ /** @defgroup uncompactCells uncompactCells @@ -734,6 +735,17 @@ DECLSPEC H3Error H3_EXPORT(experimentalLocalIjToH3)(H3Index origin, H3Index *out); /** @} */ +int isLow52Sorted(const H3Index *cells, const int64_t N); +H3Error low52Sort(H3Index *cells, const int64_t N); + +int isCanonicalCells(const H3Index *cells, const int64_t N); +H3Error canonicalizeCells(H3Index *cells, const int64_t numBefore, + int64_t *numAfter); + +int canonSearch(const H3Index *cells, const int64_t N, const H3Index h); +int intersectTheyDo(const H3Index *_A, const int64_t aN, const H3Index *_B, + const int64_t bN); + #ifdef __cplusplus } // extern "C" #endif diff --git a/src/h3lib/include/mathExtensions.h b/src/h3lib/include/mathExtensions.h index 2ed47e331..74d242021 100644 --- a/src/h3lib/include/mathExtensions.h +++ b/src/h3lib/include/mathExtensions.h @@ -26,6 +26,7 @@ * MAX returns the maximum of two values. */ #define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) // Internal functions int64_t _ipow(int64_t base, int64_t exp); diff --git a/src/h3lib/lib/low52.c b/src/h3lib/lib/low52.c new file mode 100644 index 000000000..fec21eb44 --- /dev/null +++ b/src/h3lib/lib/low52.c @@ -0,0 +1,396 @@ +#include +#include +#include + +#include "h3api.h" +#include "mathExtensions.h" + +#define HIGH_BITS(h, t) ((h) >> (64 - (t))) + +static int cmpLow52(H3Index a, H3Index b) { + a <<= 12; + b <<= 12; + + if (a < b) return -1; + if (a > b) return +1; + return 0; +} + +// todo: could have a version that assumes a <= b already. save any time? +static int cmpCanon(H3Index a, H3Index b) { + // skip high, mode, reserved bits + a <<= 8; + b <<= 8; + + // pull 4 resolution bits + int res_a = HIGH_BITS(a, 4); + int res_b = HIGH_BITS(b, 4); + + // move past 4 resolution bits + a <<= 4; + b <<= 4; + + // 7 bits for base cell, 3 for each shared resolution level + int common = 7 + 3 * MIN(res_a, res_b); + bool are_related = (HIGH_BITS(a, common) == HIGH_BITS(b, common)); + + if (are_related) { + if (a < b) return -1; + if (a > b) return +1; + } else { + if (a < b) return -2; + if (a > b) return +2; + } + + return 0; +} + +static int cmpLow52Ptr(const void *a_, const void *b_) { + H3Index a = *(const H3Index *)a_; + H3Index b = *(const H3Index *)b_; + + return cmpLow52(a, b); +} + +int isLow52Sorted(const H3Index *cells, const int64_t N) { + // note: returns true if N == 0 or N == 1 + for (int64_t i = 1; i < N; i++) { + if (cmpLow52(cells[i - 1], cells[i]) <= 0) { + // this pair is OK + } else { + return false; + } + } + return true; +} + +/* +Return True if cells is low52 ordered, and there no children/descendants/dupes. + +TODO: could maybe speed up by keeping resolution calculation since cells +appear in both LHS and RHS + */ +int isCanonicalCells(const H3Index *cells, const int64_t N) { + for (int64_t i = 0; i < N; i++) { + if (!H3_EXPORT(isValidCell)(cells[i])) { + return false; + } + } + + // note: returns true if N == 0 or N == 1 + for (int64_t i = 1; i < N; i++) { + if (cmpCanon(cells[i - 1], cells[i]) == -2) { + // this pair is OK + } else { + return false; + } + } + return true; +} + +// note that this places any zeros at the start of the array +H3Error low52Sort(H3Index *cells, const int64_t N) { + qsort(cells, N, sizeof(H3Index), cmpLow52Ptr); + return E_SUCCESS; +} + +// if c is a descendant of p (including being the same cell) +static bool isDesc(H3Index c, H3Index p) { + int x = cmpCanon(c, p); + return ((x == -1) || (x == 0)); +} + +/* +Remove cells which have a parent in the sorted array. +Assume the cell array is ordered by the lower52 order. +Walk from the right to the left, looking for descendants. + +TODO: we can maybe do the descendant test even faster, since we +assume the array is sorted! + */ +static void setDescToZero(H3Index *cells, const int64_t N) { + H3Index p = 0; // current parent + + for (int64_t i = N - 1; i >= 0; i--) { + if (cells[i] == 0) { + continue; + } + + if (p == 0) { + p = cells[i]; + } else if (isDesc(cells[i], p)) { + cells[i] = 0; + } else { + p = cells[i]; + } + } +} + +/* +Shift all the nonzero elements of the array to the left while preserving +their order. Return the number of nonzero elements. + */ +static int64_t shiftOutZeros(H3Index *cells, const int64_t N) { + int64_t i, k; + + for (k = 0; k < N; k++) { + if (cells[k] == 0) { + break; + } + } + + for (i = 0; i < N; i++) { + if ((cells[i] != 0) && (k < i)) { + cells[k] = cells[i]; + cells[i] = 0; + k++; + } + } + + return k; +} + +/* +Canonicalize an array of cells. The array is permitted to have +valid H3 cells and H3_NULL as elements. + +numAfter is the number of nonzero cells in the canonicalized array. +All nonzero elements are moved to the front/left of the array. + */ +H3Error canonicalizeCells(H3Index *cells, const int64_t numBefore, + int64_t *numAfter) { + low52Sort(cells, numBefore); + setDescToZero(cells, numBefore); + *numAfter = shiftOutZeros(cells, numBefore); + + return E_SUCCESS; +} + +// bsearch works on any canonical (maybe just low52ordered), but is faster on +// canonical compact low52ordered < canonical < canonical compact? feature +// matrix comparison bool is_canonical_compacted(const H3Index *cells, const +// int64_t N); // don't add this + +/* +bswarch works on low52_sorted. faster on canonical. fastest on canonical +compacted. + */ + +/* + Compact hex set binary search. + + Determine if h is in s_hexes, or a child of any hex in s_hexes. + + s_hexes is sorted asc by lower52(). this means children are to the left + of parents. + + ## Loop invariant + + We know that h is not before i or after j, so we only search + in the interval [i, j]. + + */ + +/* + +i = 0 +j = 7 +k = 3 + +| . | . | . | X | . | . | . | +i k j + + +two outcomes if we don't find a match: + +| . | . | . | +i j + + + | . | . | . | + i j + + +i = 0 +j = 2 +k = 1 + +| . | X | + \ \ \ + i k j + + +i = 0 +j = 1 +k = 0 + +| X | +i j +k + + +| X | + \ \ + i,k \ + j + +i = 0 +j = 0 +k = 0 + +| (empty array) + \ + i,j,k + + */ + +/* +At each iteration, we can select any k from i to j-1. +Typically, we'll just select a k midway between i and j. +This strategy makes us test the endpoints of the array first, +hoping for an early exit if h is clearly not in the set. +This is totally a heuristic, but should work well on typical +"clumps" of geo data. + */ +static inline int64_t kStrategy(int64_t i, int64_t j, int64_t N) { + if (i == 0) return 0; + if (j == N) return N - 1; + + return i + (j - i) / 2; +} + +int canonSearch(const H3Index *cells, const int64_t N, const H3Index h) { + int64_t i = 0; + int64_t j = N; + + while (i < j) { + int64_t k = kStrategy(i, j, N); + int cmp = cmpCanon(h, cells[k]); + + if (cmp == -1 || cmp == 0) { + return true; + } else if (cmp == -2) { + j = k; + } else if (cmp == +2) { + i = k + 1; + } else { + // This conclusion depends on `cells` being canonical. + // Not true if it is only low 52 sorted. + // TODO: also provide one that works on just low 52 arrays? + return false; + } + } + + return false; +} + +typedef struct { + const H3Index *cells; + int64_t N, i, j; +} SearchInterval; + +// returns -1 if h intersects with cells[i:j] +static int64_t disjointInsertionPoint(const SearchInterval A, const H3Index h) { + int64_t i = A.i; + int64_t j = A.j; + + while (i < j) { + int64_t k = i + (j - i) / 2; + int cmp = cmpCanon(h, A.cells[k]); + + if (cmp == -2) { + j = k; + } else if (cmp == +2) { + i = k + 1; + } else { + // cmp == -1, +1, or 0 + return -1; // h intersects with cells! + } + } + + return i; +} + +static bool wayLessThan(const SearchInterval A, const SearchInterval B) { + if (A.N > 0 && B.N > 0 && (cmpCanon(A.cells[A.N - 1], B.cells[0]) == -2)) { + return true; + } else { + return false; + } +} + +static void ensureASmaller(SearchInterval *A, SearchInterval *B) { + // ensure A is the smaller of the two sets. + SearchInterval temp; + + if ((B->j - B->i) < (A->j - A->i)) { + temp = *A; + *A = *B; + *B = temp; + } +} + +/* +Double binary search for a fast intersection test on canonical sets. +Faster if compact and canonical. + +Yoda naming until we come up with something better + */ +int intersectTheyDo(const H3Index *_A, const int64_t aN, const H3Index *_B, + const int64_t bN) { + SearchInterval A = {.cells = _A, .N = aN, .i = 0, .j = aN}; + SearchInterval B = {.cells = _B, .N = bN, .i = 0, .j = bN}; + + // check for a quick exit + if (wayLessThan(A, B)) return false; + if (wayLessThan(B, A)) return false; + + bool usingLeft = true; + + while ((A.i < A.j) && (B.i < B.j)) { + ensureASmaller(&A, &B); + + // take A[i] or A[j-1] and see what happens when we look into B[i:j] + usingLeft = !usingLeft; + H3Index h = usingLeft ? A.cells[A.i] : A.cells[A.j - 1]; + int64_t k = disjointInsertionPoint(B, h); + + if (k == -1) return true; // h found in B, so they intersect! + + if (usingLeft) { + B.i = k; + A.i++; + } else { + B.j = k; + A.j--; + } + } + + return false; +} + +/* +Just for comparison: + +This implementation is also **correct**, but I'm guessing it will be slower +on real data. The implementation above has a few heuristics that I think +will help with speed. + */ +int intersectTheyDo_slow(const H3Index *_A, const int64_t aN, const H3Index *_B, + const int64_t bN) { + SearchInterval A = {.cells = _A, .N = aN, .i = 0, .j = aN}; + SearchInterval B = {.cells = _B, .N = bN, .i = 0, .j = bN}; + + while ((A.i < A.j) && (B.i < B.j)) { + // take A[i] and see what happens when we look into B[i:j] + H3Index h = A.cells[A.i]; + int64_t k = disjointInsertionPoint(B, h); + + if (k == -1) return true; // they intersect! + + B.i = k; + A.i++; + } + + return false; +}