-
Notifications
You must be signed in to change notification settings - Fork 4.8k
HIVE-29375: FULL OUTER JOIN is failing with Unexpected hash table key type DATE #6239
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -495,6 +495,47 @@ public void testBigIntRowsExact() throws Exception { | |
| /* doClipping */ false, /* useExactBytes */ true); | ||
| } | ||
|
|
||
| @Test | ||
| public void testDateRowsExact() throws Exception { | ||
| random = new Random(44332); | ||
|
|
||
| // Use a large capacity that doesn't require expansion, yet. | ||
| VectorMapJoinFastLongHashMapContainer map = | ||
| new VectorMapJoinFastLongHashMapContainer( | ||
| false, | ||
| false, | ||
| HashTableKeyType.DATE, | ||
| LARGE_CAPACITY, | ||
| LOAD_FACTOR, | ||
| LARGE_WB_SIZE, | ||
| -1, | ||
| tableDesc, | ||
| 4); | ||
|
|
||
| VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap(); | ||
| VectorRandomRowSource valueSource = new VectorRandomRowSource(); | ||
|
|
||
| valueSource.init( | ||
| random, | ||
| VectorRandomRowSource.SupportedTypes.ALL, | ||
| 4, | ||
| /* allowNulls */ false, /* isUnicodeOk */ | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Drop redundant comments
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ack |
||
| false); | ||
|
|
||
| int rowCount = 1000; | ||
| Object[][] rows = valueSource.randomRows(rowCount); | ||
|
|
||
| addAndVerifyRows( | ||
| valueSource, | ||
| rows, | ||
| map, | ||
| HashTableKeyType.DATE, | ||
| verifyTable, | ||
| new String[] {"date"}, | ||
| /* doClipping */ false, /* useExactBytes */ | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Drop redundant comments
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ack |
||
| true); | ||
| } | ||
|
|
||
| @Test | ||
| public void testIntRowsExact() throws Exception { | ||
| random = new Random(8238383); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| set hive.optimize.dynamic.partition.hashjoin=true; | ||
| set hive.auto.convert.join=true; | ||
|
|
||
| -- Test Date column | ||
| create table tbl1 (id int, event_date date); | ||
| create table tbl2 (id int, event_date date); | ||
|
|
||
| insert into tbl1 values (1, '2023-01-01'), (2, '2023-01-02'), (3, '2023-01-03'); | ||
| insert into tbl2 values (2, '2023-01-02'), (3, '2023-01-04'), (4, '2023-01-05'); | ||
|
|
||
| select tbl1.id, tbl1.event_date from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we are performing a join it would be nice to SELECT also columns from tbl2 otherwise we can't tell if the result is correct.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. will use
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we also print the plan using
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes. |
||
|
|
||
| -- Test timestamp column | ||
| create table tbl3 (id int, event_date timestamp); | ||
| create table tbl4 (id int, event_date timestamp); | ||
|
|
||
| insert into tbl3 values (1, '2025-12-17 10:20:30'), (2, '2025-12-17 11:20:30'); | ||
| insert into tbl4 values (2, '2025-12-17 11:20:30'), (3, '2025-12-17 09:20:30'); | ||
|
|
||
| select tbl3.id, tbl3.event_date from tbl3 full outer join tbl4 on tbl3.event_date = tbl4.event_date order by tbl3.id; | ||
|
|
||
| -- Test Double column | ||
| create table tbl5 (id int, val double); | ||
| create table tbl6 (id int, val double); | ||
|
|
||
| insert into tbl5 values (1, 5.6D), (2, 3.2D); | ||
| insert into tbl6 values (2, 3.2D), (3, 7.2D); | ||
|
|
||
| select tbl5.id, tbl5.val from tbl5 full outer join tbl6 on tbl5.val = tbl6.val order by tbl5.id; | ||
|
Comment on lines
+13
to
+29
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we adding tests for TIMESTAMP and DOUBLE types? They don't seem to be in the same code path with DATE. Are we fixing anything with respect to those data types?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not fixing anything for DOUBLE and TIMESTAMP types, just added as they were no covered in |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,149 @@ | ||
| PREHOOK: query: create table tbl1 (id int, event_date date) | ||
| PREHOOK: type: CREATETABLE | ||
| PREHOOK: Output: database:default | ||
| PREHOOK: Output: default@tbl1 | ||
| POSTHOOK: query: create table tbl1 (id int, event_date date) | ||
| POSTHOOK: type: CREATETABLE | ||
| POSTHOOK: Output: database:default | ||
| POSTHOOK: Output: default@tbl1 | ||
| PREHOOK: query: create table tbl2 (id int, event_date date) | ||
| PREHOOK: type: CREATETABLE | ||
| PREHOOK: Output: database:default | ||
| PREHOOK: Output: default@tbl2 | ||
| POSTHOOK: query: create table tbl2 (id int, event_date date) | ||
| POSTHOOK: type: CREATETABLE | ||
| POSTHOOK: Output: database:default | ||
| POSTHOOK: Output: default@tbl2 | ||
| PREHOOK: query: insert into tbl1 values (1, '2023-01-01'), (2, '2023-01-02'), (3, '2023-01-03') | ||
| PREHOOK: type: QUERY | ||
| PREHOOK: Input: _dummy_database@_dummy_table | ||
| PREHOOK: Output: default@tbl1 | ||
| POSTHOOK: query: insert into tbl1 values (1, '2023-01-01'), (2, '2023-01-02'), (3, '2023-01-03') | ||
| POSTHOOK: type: QUERY | ||
| POSTHOOK: Input: _dummy_database@_dummy_table | ||
| POSTHOOK: Output: default@tbl1 | ||
| POSTHOOK: Lineage: tbl1.event_date SCRIPT [] | ||
| POSTHOOK: Lineage: tbl1.id SCRIPT [] | ||
| PREHOOK: query: insert into tbl2 values (2, '2023-01-02'), (3, '2023-01-04'), (4, '2023-01-05') | ||
| PREHOOK: type: QUERY | ||
| PREHOOK: Input: _dummy_database@_dummy_table | ||
| PREHOOK: Output: default@tbl2 | ||
| POSTHOOK: query: insert into tbl2 values (2, '2023-01-02'), (3, '2023-01-04'), (4, '2023-01-05') | ||
| POSTHOOK: type: QUERY | ||
| POSTHOOK: Input: _dummy_database@_dummy_table | ||
| POSTHOOK: Output: default@tbl2 | ||
| POSTHOOK: Lineage: tbl2.event_date SCRIPT [] | ||
| POSTHOOK: Lineage: tbl2.id SCRIPT [] | ||
| PREHOOK: query: select tbl1.id, tbl1.event_date from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id | ||
| PREHOOK: type: QUERY | ||
| PREHOOK: Input: default@tbl1 | ||
| PREHOOK: Input: default@tbl2 | ||
| #### A masked pattern was here #### | ||
| POSTHOOK: query: select tbl1.id, tbl1.event_date from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id | ||
| POSTHOOK: type: QUERY | ||
| POSTHOOK: Input: default@tbl1 | ||
| POSTHOOK: Input: default@tbl2 | ||
| #### A masked pattern was here #### | ||
| 1 2023-01-01 | ||
| 2 2023-01-02 | ||
| 3 2023-01-03 | ||
| NULL NULL | ||
| NULL NULL | ||
| PREHOOK: query: create table tbl3 (id int, event_date timestamp) | ||
| PREHOOK: type: CREATETABLE | ||
| PREHOOK: Output: database:default | ||
| PREHOOK: Output: default@tbl3 | ||
| POSTHOOK: query: create table tbl3 (id int, event_date timestamp) | ||
| POSTHOOK: type: CREATETABLE | ||
| POSTHOOK: Output: database:default | ||
| POSTHOOK: Output: default@tbl3 | ||
| PREHOOK: query: create table tbl4 (id int, event_date timestamp) | ||
| PREHOOK: type: CREATETABLE | ||
| PREHOOK: Output: database:default | ||
| PREHOOK: Output: default@tbl4 | ||
| POSTHOOK: query: create table tbl4 (id int, event_date timestamp) | ||
| POSTHOOK: type: CREATETABLE | ||
| POSTHOOK: Output: database:default | ||
| POSTHOOK: Output: default@tbl4 | ||
| PREHOOK: query: insert into tbl3 values (1, '2025-12-17 10:20:30'), (2, '2025-12-17 11:20:30') | ||
| PREHOOK: type: QUERY | ||
| PREHOOK: Input: _dummy_database@_dummy_table | ||
| PREHOOK: Output: default@tbl3 | ||
| POSTHOOK: query: insert into tbl3 values (1, '2025-12-17 10:20:30'), (2, '2025-12-17 11:20:30') | ||
| POSTHOOK: type: QUERY | ||
| POSTHOOK: Input: _dummy_database@_dummy_table | ||
| POSTHOOK: Output: default@tbl3 | ||
| POSTHOOK: Lineage: tbl3.event_date SCRIPT [] | ||
| POSTHOOK: Lineage: tbl3.id SCRIPT [] | ||
| PREHOOK: query: insert into tbl4 values (2, '2025-12-17 11:20:30'), (3, '2025-12-17 09:20:30') | ||
| PREHOOK: type: QUERY | ||
| PREHOOK: Input: _dummy_database@_dummy_table | ||
| PREHOOK: Output: default@tbl4 | ||
| POSTHOOK: query: insert into tbl4 values (2, '2025-12-17 11:20:30'), (3, '2025-12-17 09:20:30') | ||
| POSTHOOK: type: QUERY | ||
| POSTHOOK: Input: _dummy_database@_dummy_table | ||
| POSTHOOK: Output: default@tbl4 | ||
| POSTHOOK: Lineage: tbl4.event_date SCRIPT [] | ||
| POSTHOOK: Lineage: tbl4.id SCRIPT [] | ||
| PREHOOK: query: select tbl3.id, tbl3.event_date from tbl3 full outer join tbl4 on tbl3.event_date = tbl4.event_date order by tbl3.id | ||
| PREHOOK: type: QUERY | ||
| PREHOOK: Input: default@tbl3 | ||
| PREHOOK: Input: default@tbl4 | ||
| #### A masked pattern was here #### | ||
| POSTHOOK: query: select tbl3.id, tbl3.event_date from tbl3 full outer join tbl4 on tbl3.event_date = tbl4.event_date order by tbl3.id | ||
| POSTHOOK: type: QUERY | ||
| POSTHOOK: Input: default@tbl3 | ||
| POSTHOOK: Input: default@tbl4 | ||
| #### A masked pattern was here #### | ||
| 1 2025-12-17 10:20:30 | ||
| 2 2025-12-17 11:20:30 | ||
| NULL NULL | ||
| PREHOOK: query: create table tbl5 (id int, val double) | ||
| PREHOOK: type: CREATETABLE | ||
| PREHOOK: Output: database:default | ||
| PREHOOK: Output: default@tbl5 | ||
| POSTHOOK: query: create table tbl5 (id int, val double) | ||
| POSTHOOK: type: CREATETABLE | ||
| POSTHOOK: Output: database:default | ||
| POSTHOOK: Output: default@tbl5 | ||
| PREHOOK: query: create table tbl6 (id int, val double) | ||
| PREHOOK: type: CREATETABLE | ||
| PREHOOK: Output: database:default | ||
| PREHOOK: Output: default@tbl6 | ||
| POSTHOOK: query: create table tbl6 (id int, val double) | ||
| POSTHOOK: type: CREATETABLE | ||
| POSTHOOK: Output: database:default | ||
| POSTHOOK: Output: default@tbl6 | ||
| PREHOOK: query: insert into tbl5 values (1, 5.6D), (2, 3.2D) | ||
| PREHOOK: type: QUERY | ||
| PREHOOK: Input: _dummy_database@_dummy_table | ||
| PREHOOK: Output: default@tbl5 | ||
| POSTHOOK: query: insert into tbl5 values (1, 5.6D), (2, 3.2D) | ||
| POSTHOOK: type: QUERY | ||
| POSTHOOK: Input: _dummy_database@_dummy_table | ||
| POSTHOOK: Output: default@tbl5 | ||
| POSTHOOK: Lineage: tbl5.id SCRIPT [] | ||
| POSTHOOK: Lineage: tbl5.val SCRIPT [] | ||
| PREHOOK: query: insert into tbl6 values (2, 3.2D), (3, 7.2D) | ||
| PREHOOK: type: QUERY | ||
| PREHOOK: Input: _dummy_database@_dummy_table | ||
| PREHOOK: Output: default@tbl6 | ||
| POSTHOOK: query: insert into tbl6 values (2, 3.2D), (3, 7.2D) | ||
| POSTHOOK: type: QUERY | ||
| POSTHOOK: Input: _dummy_database@_dummy_table | ||
| POSTHOOK: Output: default@tbl6 | ||
| POSTHOOK: Lineage: tbl6.id SCRIPT [] | ||
| POSTHOOK: Lineage: tbl6.val SCRIPT [] | ||
| PREHOOK: query: select tbl5.id, tbl5.val from tbl5 full outer join tbl6 on tbl5.val = tbl6.val order by tbl5.id | ||
| PREHOOK: type: QUERY | ||
| PREHOOK: Input: default@tbl5 | ||
| PREHOOK: Input: default@tbl6 | ||
| #### A masked pattern was here #### | ||
| POSTHOOK: query: select tbl5.id, tbl5.val from tbl5 full outer join tbl6 on tbl5.val = tbl6.val order by tbl5.id | ||
| POSTHOOK: type: QUERY | ||
| POSTHOOK: Input: default@tbl5 | ||
| POSTHOOK: Input: default@tbl6 | ||
| #### A masked pattern was here #### | ||
| 1 5.6 | ||
| 2 3.2 | ||
| NULL NULL |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we have unit tests exploiting this config? Do we need to add something in
TestMapJoinOperator?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will add a testcase
testDate0inTestMapJoinOperator. AstestString0makes use of theDATEtype, but it does so as a Value column, not as a Join Key.