Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions benchmarks/sql_benchmarks/hj/benchmarks/q16.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name Q16
group hj

init sql_benchmarks/hj/init/set_config.sql

load sql_benchmarks/hj/init/load.sql

assert I
SELECT count(*) > 0 FROM customer
----
true

expect_plan HashJoinExec

run
-- Q16: RightSemi, Small build (25 rows), 100% Hit rate
-- Build Side: nation (25 rows) | Probe Side: customer (1.5M rows)
SELECT c.k
FROM (SELECT CAST(n_nationkey AS INT) as k FROM nation) n
RIGHT SEMI JOIN (SELECT CAST(c_nationkey AS INT) as k FROM customer) c
ON n.k = c.k;
21 changes: 21 additions & 0 deletions benchmarks/sql_benchmarks/hj/benchmarks/q17.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name Q17
group hj

init sql_benchmarks/hj/init/set_config.sql

load sql_benchmarks/hj/init/load.sql

assert I
SELECT count(*) > 0 FROM lineitem
----
true

expect_plan HashJoinExec

run
-- Q17: RightSemi, Medium build (100K rows), 100% Hit rate
-- Build Side: supplier (100K rows) | Probe Side: lineitem (60M rows)
SELECT l.k
FROM (SELECT CAST(s_suppkey AS INT) as k FROM supplier) s
RIGHT SEMI JOIN (SELECT CAST(l_suppkey AS INT) as k FROM lineitem) l
ON s.k = l.k;
24 changes: 24 additions & 0 deletions benchmarks/sql_benchmarks/hj/benchmarks/q18.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name Q18
group hj

init sql_benchmarks/hj/init/set_config.sql

load sql_benchmarks/hj/init/load.sql

assert I
SELECT count(*) > 0 FROM lineitem
----
true

expect_plan HashJoinExec

run
-- Q18: RightSemi, Medium build (100K rows), 10% Hit rate
-- Build Side: supplier (100K rows) | Probe Side: lineitem (60M rows)
SELECT l.k
FROM (SELECT CAST(s_suppkey AS INT) as k FROM supplier) s
RIGHT SEMI JOIN (
SELECT CAST(CASE WHEN l_suppkey % 10 = 0 THEN l_suppkey ELSE l_suppkey + 1000000 END AS INT) as k
FROM lineitem
) l
ON s.k = l.k;
21 changes: 21 additions & 0 deletions benchmarks/sql_benchmarks/hj/benchmarks/q19.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name Q19
group hj

init sql_benchmarks/hj/init/set_config.sql

load sql_benchmarks/hj/init/load.sql

assert I
SELECT count(*) > 0 FROM customer
----
true

expect_plan HashJoinExec

run
-- Q19: RightAnti, Small build (25 rows), 100% Hit rate (no output)
-- Build Side: nation (25 rows) | Probe Side: customer (1.5M rows)
SELECT c.k
FROM (SELECT CAST(n_nationkey AS INT) as k FROM nation) n
RIGHT ANTI JOIN (SELECT CAST(c_nationkey AS INT) as k FROM customer) c
ON n.k = c.k;
21 changes: 21 additions & 0 deletions benchmarks/sql_benchmarks/hj/benchmarks/q20.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name Q20
group hj

init sql_benchmarks/hj/init/set_config.sql

load sql_benchmarks/hj/init/load.sql

assert I
SELECT count(*) > 0 FROM lineitem
----
true

expect_plan HashJoinExec

run
-- Q20: RightAnti, Medium build (100K rows), 100% Hit rate (no output)
-- Build Side: supplier (100K rows) | Probe Side: lineitem (60M rows)
SELECT l.k
FROM (SELECT CAST(s_suppkey AS INT) as k FROM supplier) s
RIGHT ANTI JOIN (SELECT CAST(l_suppkey AS INT) as k FROM lineitem) l
ON s.k = l.k;
24 changes: 24 additions & 0 deletions benchmarks/sql_benchmarks/hj/benchmarks/q21.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name Q21
group hj

init sql_benchmarks/hj/init/set_config.sql

load sql_benchmarks/hj/init/load.sql

assert I
SELECT count(*) > 0 FROM lineitem
----
true

expect_plan HashJoinExec

run
-- Q21: RightAnti, Medium build (100K rows), 10% Hit rate (90% output)
-- Build Side: supplier (100K rows) | Probe Side: lineitem (60M rows)
SELECT l.k
FROM (SELECT CAST(s_suppkey AS INT) as k FROM supplier) s
RIGHT ANTI JOIN (
SELECT CAST(CASE WHEN l_suppkey % 10 = 0 THEN l_suppkey ELSE l_suppkey + 1000000 END AS INT) as k
FROM lineitem
) l
ON s.k = l.k;
28 changes: 28 additions & 0 deletions benchmarks/sql_benchmarks/hj/benchmarks/q22.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name Q22
group hj

init sql_benchmarks/hj/init/set_config.sql

load sql_benchmarks/hj/init/load.sql

assert I
SELECT count(*) > 0 FROM lineitem
----
true

expect_plan HashJoinExec

run
-- Q22: RightSemi, Medium build (100K rows), ~1% Hit rate, fanout ~100
-- Build Side: supplier (100K rows) collapsed onto 1K distinct keys
-- Probe Side: lineitem (60M rows)
SELECT l.k
FROM (
SELECT CAST(((s_suppkey - 1) % 1000) + 1 AS INT) as k
FROM supplier
) s
RIGHT SEMI JOIN (
SELECT CAST(l_suppkey AS INT) as k
FROM lineitem
) l
ON s.k = l.k;
31 changes: 31 additions & 0 deletions benchmarks/sql_benchmarks/hj/benchmarks/q23.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name Q23
group hj

init sql_benchmarks/hj/init/set_config_no_stats.sql

load sql_benchmarks/hj/init/load.sql

assert I
SELECT count(*) > 0 FROM lineitem
----
true

expect_plan HashJoinExec

run
-- Q23: high-fanout string-key inner join.
-- Build ~32K rows / ~415 distinct keys (fanout ~78), probe ~2.3M rows
-- (all carrying the dominant key), output ~176M pairs. Long keys (~28 chars)
-- make per-pair key comparison expensive; count(*) isolates the match path.
-- Thresholds zeroed to force Partitioned mode (simulates absent row-count stats).
SELECT count(*)
FROM (
SELECT 'high_fanout_string_join_key_' || CAST((s_suppkey % 415) + 1 AS VARCHAR) as k
FROM supplier
WHERE s_suppkey <= 32340
) s
JOIN (
SELECT 'high_fanout_string_join_key_1' as k
FROM lineitem
WHERE l_orderkey % 265 = 0
) l ON s.k = l.k;
1 change: 1 addition & 0 deletions benchmarks/sql_benchmarks/hj/init/set_config.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
set datafusion.optimizer.join_reordering = false;
3 changes: 3 additions & 0 deletions benchmarks/sql_benchmarks/hj/init/set_config_no_stats.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
set datafusion.optimizer.join_reordering = false;
set datafusion.optimizer.hash_join_single_partition_threshold = 0;
set datafusion.optimizer.hash_join_single_partition_threshold_rows = 0;
Loading
Loading