Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,7 @@ protected void generateFullOuterSmallTableNoMatches(byte smallTablePos,
case SHORT:
case INT:
case LONG:
case DATE:
generateFullOuterLongKeySmallTableNoMatches();
break;
case STRING:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ public void init() {
case LONG:
integerTypeInfo = TypeInfoFactory.longTypeInfo;
break;
case DATE:
integerTypeInfo = TypeInfoFactory.dateTypeInfo;
break;
default:
throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType);
}
Expand Down Expand Up @@ -123,6 +126,9 @@ private boolean readNonMatchedLongKey(ByteSegmentRef keyRef) throws HiveExceptio
case LONG:
longValue = keyBinarySortableDeserializeRead.currentLong;
break;
case DATE:
longValue = keyBinarySortableDeserializeRead.currentDateWritable.getDays();
break;
default:
throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,9 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t
case LONG:
hashTableKeyType = HashTableKeyType.LONG;
break;
case DATE:
hashTableKeyType = HashTableKeyType.DATE;
break;
Comment on lines +397 to +399
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have unit tests exploiting this config? Do we need to add something in TestMapJoinOperator?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will add a testcase testDate0 in TestMapJoinOperator. As testString0 makes use of the DATE type, but it does so as a Value column, not as a Join Key.

case STRING:
hashTableKeyType = HashTableKeyType.STRING;
break;
Expand Down Expand Up @@ -547,6 +550,7 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator(
case BYTE:
case SHORT:
case INT:
case DATE:
case LONG:
switch (VectorMapJoinVariation) {
case INNER:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
import org.apache.hadoop.hive.serde2.WriteBuffers;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DateWritableV2;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.lazy.VerifyLazy;
import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead;
Expand Down Expand Up @@ -335,6 +336,7 @@ public void verify(VectorMapJoinFastHashTableContainerBase map,
case SHORT:
case INT:
case LONG:
case DATE:
{
Object[] keyRow = element.getKeyRow();
Object keyObject = keyRow[0];
Expand All @@ -357,6 +359,9 @@ public void verify(VectorMapJoinFastHashTableContainerBase map,
case LONG:
longKey = ((LongWritable) keyObject).get();
break;
case DATE:
longKey = ((DateWritableV2) keyObject).getDays();
break;
default:
throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,47 @@ public void testBigIntRowsExact() throws Exception {
/* doClipping */ false, /* useExactBytes */ true);
}

@Test
public void testDateRowsExact() throws Exception {
random = new Random(44332);

// Use a large capacity that doesn't require expansion, yet.
VectorMapJoinFastLongHashMapContainer map =
new VectorMapJoinFastLongHashMapContainer(
false,
false,
HashTableKeyType.DATE,
LARGE_CAPACITY,
LOAD_FACTOR,
LARGE_WB_SIZE,
-1,
tableDesc,
4);

VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
VectorRandomRowSource valueSource = new VectorRandomRowSource();

valueSource.init(
random,
VectorRandomRowSource.SupportedTypes.ALL,
4,
/* allowNulls */ false, /* isUnicodeOk */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Drop redundant comments

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack

false);

int rowCount = 1000;
Object[][] rows = valueSource.randomRows(rowCount);

addAndVerifyRows(
valueSource,
rows,
map,
HashTableKeyType.DATE,
verifyTable,
new String[] {"date"},
/* doClipping */ false, /* useExactBytes */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Drop redundant comments

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack

true);
}

@Test
public void testIntRowsExact() throws Exception {
random = new Random(8238383);
Expand Down
29 changes: 29 additions & 0 deletions ql/src/test/queries/clientpositive/vector_full_outer_join_date.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
set hive.optimize.dynamic.partition.hashjoin=true;
set hive.auto.convert.join=true;

-- Test Date column
create table tbl1 (id int, event_date date);
create table tbl2 (id int, event_date date);

insert into tbl1 values (1, '2023-01-01'), (2, '2023-01-02'), (3, '2023-01-03');
insert into tbl2 values (2, '2023-01-02'), (3, '2023-01-04'), (4, '2023-01-05');

select tbl1.id, tbl1.event_date from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we are performing a join it would be nice to SELECT also columns from tbl2 otherwise we can't tell if the result is correct.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will use select *

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we also print the plan using explain vectorization detail in order to ensure that we are indeed using the expected vectorized operator.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes.


-- Test timestamp column
create table tbl3 (id int, event_date timestamp);
create table tbl4 (id int, event_date timestamp);

insert into tbl3 values (1, '2025-12-17 10:20:30'), (2, '2025-12-17 11:20:30');
insert into tbl4 values (2, '2025-12-17 11:20:30'), (3, '2025-12-17 09:20:30');

select tbl3.id, tbl3.event_date from tbl3 full outer join tbl4 on tbl3.event_date = tbl4.event_date order by tbl3.id;

-- Test Double column
create table tbl5 (id int, val double);
create table tbl6 (id int, val double);

insert into tbl5 values (1, 5.6D), (2, 3.2D);
insert into tbl6 values (2, 3.2D), (3, 7.2D);

select tbl5.id, tbl5.val from tbl5 full outer join tbl6 on tbl5.val = tbl6.val order by tbl5.id;
Comment on lines +13 to +29
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we adding tests for TIMESTAMP and DOUBLE types? They don't seem to be in the same code path with DATE. Are we fixing anything with respect to those data types?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not fixing anything for DOUBLE and TIMESTAMP types, just added as they were no covered in vector_full_outer_join.q or vector_full_outer_join2.q. Will remove it.

Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
PREHOOK: query: create table tbl1 (id int, event_date date)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@tbl1
POSTHOOK: query: create table tbl1 (id int, event_date date)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tbl1
PREHOOK: query: create table tbl2 (id int, event_date date)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@tbl2
POSTHOOK: query: create table tbl2 (id int, event_date date)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tbl2
PREHOOK: query: insert into tbl1 values (1, '2023-01-01'), (2, '2023-01-02'), (3, '2023-01-03')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@tbl1
POSTHOOK: query: insert into tbl1 values (1, '2023-01-01'), (2, '2023-01-02'), (3, '2023-01-03')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@tbl1
POSTHOOK: Lineage: tbl1.event_date SCRIPT []
POSTHOOK: Lineage: tbl1.id SCRIPT []
PREHOOK: query: insert into tbl2 values (2, '2023-01-02'), (3, '2023-01-04'), (4, '2023-01-05')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@tbl2
POSTHOOK: query: insert into tbl2 values (2, '2023-01-02'), (3, '2023-01-04'), (4, '2023-01-05')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@tbl2
POSTHOOK: Lineage: tbl2.event_date SCRIPT []
POSTHOOK: Lineage: tbl2.id SCRIPT []
PREHOOK: query: select tbl1.id, tbl1.event_date from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id
PREHOOK: type: QUERY
PREHOOK: Input: default@tbl1
PREHOOK: Input: default@tbl2
#### A masked pattern was here ####
POSTHOOK: query: select tbl1.id, tbl1.event_date from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tbl1
POSTHOOK: Input: default@tbl2
#### A masked pattern was here ####
1 2023-01-01
2 2023-01-02
3 2023-01-03
NULL NULL
NULL NULL
PREHOOK: query: create table tbl3 (id int, event_date timestamp)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@tbl3
POSTHOOK: query: create table tbl3 (id int, event_date timestamp)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tbl3
PREHOOK: query: create table tbl4 (id int, event_date timestamp)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@tbl4
POSTHOOK: query: create table tbl4 (id int, event_date timestamp)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tbl4
PREHOOK: query: insert into tbl3 values (1, '2025-12-17 10:20:30'), (2, '2025-12-17 11:20:30')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@tbl3
POSTHOOK: query: insert into tbl3 values (1, '2025-12-17 10:20:30'), (2, '2025-12-17 11:20:30')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@tbl3
POSTHOOK: Lineage: tbl3.event_date SCRIPT []
POSTHOOK: Lineage: tbl3.id SCRIPT []
PREHOOK: query: insert into tbl4 values (2, '2025-12-17 11:20:30'), (3, '2025-12-17 09:20:30')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@tbl4
POSTHOOK: query: insert into tbl4 values (2, '2025-12-17 11:20:30'), (3, '2025-12-17 09:20:30')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@tbl4
POSTHOOK: Lineage: tbl4.event_date SCRIPT []
POSTHOOK: Lineage: tbl4.id SCRIPT []
PREHOOK: query: select tbl3.id, tbl3.event_date from tbl3 full outer join tbl4 on tbl3.event_date = tbl4.event_date order by tbl3.id
PREHOOK: type: QUERY
PREHOOK: Input: default@tbl3
PREHOOK: Input: default@tbl4
#### A masked pattern was here ####
POSTHOOK: query: select tbl3.id, tbl3.event_date from tbl3 full outer join tbl4 on tbl3.event_date = tbl4.event_date order by tbl3.id
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tbl3
POSTHOOK: Input: default@tbl4
#### A masked pattern was here ####
1 2025-12-17 10:20:30
2 2025-12-17 11:20:30
NULL NULL
PREHOOK: query: create table tbl5 (id int, val double)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@tbl5
POSTHOOK: query: create table tbl5 (id int, val double)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tbl5
PREHOOK: query: create table tbl6 (id int, val double)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@tbl6
POSTHOOK: query: create table tbl6 (id int, val double)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tbl6
PREHOOK: query: insert into tbl5 values (1, 5.6D), (2, 3.2D)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@tbl5
POSTHOOK: query: insert into tbl5 values (1, 5.6D), (2, 3.2D)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@tbl5
POSTHOOK: Lineage: tbl5.id SCRIPT []
POSTHOOK: Lineage: tbl5.val SCRIPT []
PREHOOK: query: insert into tbl6 values (2, 3.2D), (3, 7.2D)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@tbl6
POSTHOOK: query: insert into tbl6 values (2, 3.2D), (3, 7.2D)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@tbl6
POSTHOOK: Lineage: tbl6.id SCRIPT []
POSTHOOK: Lineage: tbl6.val SCRIPT []
PREHOOK: query: select tbl5.id, tbl5.val from tbl5 full outer join tbl6 on tbl5.val = tbl6.val order by tbl5.id
PREHOOK: type: QUERY
PREHOOK: Input: default@tbl5
PREHOOK: Input: default@tbl6
#### A masked pattern was here ####
POSTHOOK: query: select tbl5.id, tbl5.val from tbl5 full outer join tbl6 on tbl5.val = tbl6.val order by tbl5.id
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tbl5
POSTHOOK: Input: default@tbl6
#### A masked pattern was here ####
1 5.6
2 3.2
NULL NULL