@@ -83,6 +83,8 @@ struct RowContainerIterator {
8383 char * rowBegin{nullptr };
8484 // / First byte after the end of the range containing 'currentRow'.
8585 char * endOfRun{nullptr };
86+ // / Cursor of the list row operation.
87+ int32_t listRowCursor{0 };
8688
8789 // / Returns the current row, skipping a possible normalized key below the
8890 // / first byte of row.
@@ -273,6 +275,21 @@ class RowContainer {
273275 const std::vector<TypePtr>& keyTypes,
274276 const std::vector<TypePtr>& dependentTypes,
275277 memory::MemoryPool* pool)
278+ : RowContainer(
279+ keyTypes,
280+ dependentTypes,
281+ /* useListRowIndex=*/ false ,
282+ pool) {}
283+
284+ // / If 'useListRowIndex' is true, the container maintains an internal array of
285+ // / row pointers so that listRowsFast() can return rows without scanning
286+ // / underlying allocations or checking free/probe flags. It is intended to be
287+ // / used in SortBuffer and SortInputSpiller to improve performance.
288+ RowContainer (
289+ const std::vector<TypePtr>& keyTypes,
290+ const std::vector<TypePtr>& dependentTypes,
291+ bool useListRowIndex,
292+ memory::MemoryPool* pool)
276293 : RowContainer(
277294 keyTypes,
278295 true , // nullableKeys
@@ -282,6 +299,7 @@ class RowContainer {
282299 false , // isJoinBuild
283300 false , // hasProbedFlag
284301 false , // hasNormalizedKey
302+ useListRowIndex,
285303 pool) {}
286304
287305 ~RowContainer ();
@@ -313,6 +331,7 @@ class RowContainer {
313331 bool isJoinBuild,
314332 bool hasProbedFlag,
315333 bool hasNormalizedKey,
334+ bool useListRowIndex,
316335 memory::MemoryPool* pool);
317336
318337 // / Allocates a new row and initializes possible aggregates to null.
@@ -637,6 +656,20 @@ class RowContainer {
637656 return count;
638657 }
639658
659+ // / Fast path for `listRows` that returns `rowPointers_` directly. Used by
660+ // / `SortBuffer` and `SortInputSpiller`, so it skips checking the free and
661+ // / probe flags.
662+ int32_t listRowsFast (RowContainerIterator* iter, int32_t maxRows, char ** rows)
663+ const {
664+ int32_t count = 0 ;
665+ while (count < maxRows && iter->listRowCursor < rowPointers_.size ()) {
666+ char * row = rowPointers_[iter->listRowCursor ];
667+ rows[count++] = row;
668+ ++iter->listRowCursor ;
669+ }
670+ return count;
671+ }
672+
640673 // / Extracts up to 'maxRows' rows starting at the position of 'iter'. A
641674 // / default constructed or reset iter starts at the beginning. Returns the
642675 // / number of rows written to 'rows'. Returns 0 when at end. Stops after the
@@ -651,6 +684,9 @@ class RowContainer {
651684
652685 int32_t listRows (RowContainerIterator* iter, int32_t maxRows, char ** rows)
653686 const {
687+ if (useListRowIndex_) {
688+ return listRowsFast (iter, maxRows, rows);
689+ }
654690 return listRows<ProbeType::kAll >(iter, maxRows, kUnlimited , rows);
655691 }
656692
@@ -791,6 +827,10 @@ class RowContainer {
791827 return 0 ;
792828 }
793829
830+ const std::vector<char *, StlAllocator<char *>>& testingRowPointers () const {
831+ return rowPointers_;
832+ }
833+
794834 memory::MemoryPool* pool () const {
795835 return stringAllocator_->pool ();
796836 }
@@ -1471,7 +1511,8 @@ class RowContainer {
14711511 const bool isJoinBuild_;
14721512 // True if normalized keys are enabled in initial state.
14731513 const bool hasNormalizedKeys_;
1474-
1514+ // True if use 'listRowsFast'.
1515+ const bool useListRowIndex_;
14751516 const std::unique_ptr<HashStringAllocator> stringAllocator_;
14761517
14771518 // Indicates if we can add new row to this row container. It is set to false
@@ -1527,6 +1568,7 @@ class RowContainer {
15271568 uint64_t numFreeRows_ = 0 ;
15281569
15291570 memory::AllocationPool rows_;
1571+ std::vector<char *, StlAllocator<char *>> rowPointers_;
15301572
15311573 int alignment_ = 1 ;
15321574
0 commit comments