Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 85 additions & 38 deletions iceberg/iceberg-handler/src/test/queries/positive/col_stats.q
Original file line number Diff line number Diff line change
Expand Up @@ -3,58 +3,105 @@
set hive.stats.autogather=true;
set hive.stats.column.autogather=true;

-- Create source table
drop table if exists src_ice;
create external table src_ice(
a int,
b string,
c int)
stored by iceberg;

insert into src_ice values
(1, 'one', 50),
(2, 'two', 51),
(2, 'two', 51),
(2, 'two', 51),
(3, 'three', 52),
(4, 'four', 53),
(5, 'five', 54),
(111, 'one', 55),
(333, 'two', 56);

-- Test hive.iceberg.stats.source = iceberg
set hive.iceberg.stats.source=iceberg;


-- Test NON-PARTITIONED table with hive.iceberg.stats.source=iceberg
drop table if exists tbl_ice_puffin;
create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2');
insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
explain select * from tbl_ice_puffin order by a, b, c;
select * from tbl_ice_puffin order by a, b, c;
desc formatted tbl_ice_puffin b;
update tbl_ice_puffin set b='two' where b='one' or b='three';
create external table tbl_ice_puffin(
a int,
b string,
c int)
stored by iceberg;

insert into tbl_ice_puffin select * from src_ice;
insert into tbl_ice_puffin select * from src_ice;

select count(*) from tbl_ice_puffin;
EXPLAIN select min(a), count(distinct b), max(c) from tbl_ice_puffin;
desc formatted tbl_ice_puffin B;

update tbl_ice_puffin
set b='two' where b='one' or b='three';

analyze table tbl_ice_puffin compute statistics for columns;
explain select * from tbl_ice_puffin order by a, b, c;
select * from tbl_ice_puffin order by a, b, c;

select count(*) from tbl_ice_puffin;
desc formatted tbl_ice_puffin b;
EXPLAIN select min(a), count(distinct b), max(c) from tbl_ice_puffin;
desc formatted tbl_ice_puffin B;


-- Test PARTITIONED table with hive.iceberg.stats.source=iceberg
drop table tbl_ice_puffin;
create external table tbl_ice_puffin(
a int,
b string
)
partitioned by (c int)
stored by iceberg;

insert overwrite table tbl_ice_puffin select * from src_ice;
delete from tbl_ice_puffin where a <= 2;

-- Test if hive.iceberg.stats.source is empty
analyze table tbl_ice_puffin compute statistics for columns A, C;

select count(*) from tbl_ice_puffin;
EXPLAIN select min(a), max(c) from tbl_ice_puffin;
desc formatted tbl_ice_puffin C;

set hive.stats.fetch.column.stats=true;
EXPLAIN select count(*) from src_ice t1 join tbl_ice_puffin t2 on (t1.a = t2.a);


-- Test hive.iceberg.stats.source is empty
set hive.iceberg.stats.source= ;
drop table if exists tbl_ice_puffin;
create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2');
insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
explain select * from tbl_ice_puffin order by a, b, c;

drop table tbl_ice_puffin;
create external table tbl_ice_puffin(
a int,
b string,
c int)
stored by iceberg;

insert into tbl_ice_puffin select * from src_ice;

set hive.iceberg.stats.source=iceberg;
drop table if exists tbl_ice_puffin;
create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2');
insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
explain select * from tbl_ice_puffin order by a, b, c;
select * from tbl_ice_puffin order by a, b, c;
select count(*) from tbl_ice_puffin;
desc formatted tbl_ice_puffin a;
EXPLAIN select min(a), count(distinct b), max(c) from tbl_ice_puffin;
desc formatted tbl_ice_puffin A;


-- Test hive.iceberg.stats.source = metastore
set hive.iceberg.stats.source=metastore;

drop table if exists tbl_ice;
create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2');
insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
explain select * from tbl_ice order by a, b, c;
select * from tbl_ice order by a, b, c;
select count(*) from tbl_ice;
create external table tbl_ice(
a int,
b string,
c int)
stored by iceberg;

set hive.iceberg.stats.source=iceberg;
delete from tbl_ice_puffin where a = 2;
analyze table tbl_ice_puffin compute statistics for columns A, C;
explain select * from tbl_ice_puffin order by a, b, c;
select count(*) from tbl_ice_puffin;
desc formatted tbl_ice_puffin C;
insert into tbl_ice select * from src_ice;

create table t1 (a int) stored by iceberg tblproperties ('format-version'='2');
create table t2 (b int) stored by iceberg tblproperties ('format-version'='2');
describe formatted t1;
describe formatted t2;
explain select * from t1 join t2 on t1.a = t2.b;
select count(*) from tbl_ice;
EXPLAIN select min(a), count(distinct b), max(c) from tbl_ice;
desc formatted tbl_ice A;
Original file line number Diff line number Diff line change
@@ -1,21 +1,47 @@
-- Mask random uuid
--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
set hive.fetch.task.conversion=none;

set hive.stats.autogather=true;
set hive.stats.column.autogather=true;

set hive.iceberg.stats.source=iceberg;

drop table if exists tbl_ice_puffin;
create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2');
insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
create external table tbl_ice_puffin(
a int,
b string,
c int)
stored by iceberg;

insert into tbl_ice_puffin values
(1, 'one', 50),
(2, 'two', 51),
(2, 'two', 51),
(2, 'two', 51),
(3, 'three', 52),
(4, 'four', 53),
(5, 'five', 54),
(111, 'one', 55),
(333, 'two', 56);

explain select * from tbl_ice_puffin;
desc formatted tbl_ice_puffin a;
desc formatted tbl_ice_puffin b;
desc formatted tbl_ice_puffin c;
explain select * from tbl_ice_puffin order by a, b, c;
insert into tbl_ice_puffin values (1000, 'one', 1000), (5000, 'two', 5000);

insert into tbl_ice_puffin values (1000, 'six', 1000), (5000, 'two', 5000);

explain select * from tbl_ice_puffin;
desc formatted tbl_ice_puffin a;
desc formatted tbl_ice_puffin b;
desc formatted tbl_ice_puffin c;
explain select * from tbl_ice_puffin order by a, b, c;
insert into tbl_ice_puffin values (10, 'one', 100000), (5000, 'two', 510000);
explain select * from tbl_ice_puffin order by a, b, c;

insert into tbl_ice_puffin values (10, 'six', 100000), (5000, 'two', 510000);

explain select * from tbl_ice_puffin;
desc formatted tbl_ice_puffin a;
desc formatted tbl_ice_puffin b;
desc formatted tbl_ice_puffin c;

-- Result: a = (min: 1, max: 5000) , c =(min: 50, max: 51000)
Original file line number Diff line number Diff line change
@@ -1,25 +1,33 @@
set hive.fetch.task.conversion=none;

create external table default.tbl_ice_puffin_time_travel(a int, b string, c int) stored by iceberg;
insert into default.tbl_ice_puffin_time_travel values (1, 'one', 50), (2, 'two', 51);
alter table default.tbl_ice_puffin_time_travel create tag checkpoint;
create external table tbl_ice_puffin_time_travel(
a int,
b string,
c int)
stored by iceberg;

explain select * from default.tbl_ice_puffin_time_travel;
insert into tbl_ice_puffin_time_travel values
(1, 'one', 50),
(2, 'two', 51);

alter table tbl_ice_puffin_time_travel create tag checkpoint;

explain select * from tbl_ice_puffin_time_travel;
explain select * from default.tbl_ice_puffin_time_travel.tag_checkpoint;

insert into tbl_ice_puffin_time_travel values
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null);
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null),
(null, null, null);

explain select * from default.tbl_ice_puffin_time_travel;
explain select * from tbl_ice_puffin_time_travel;
explain select * from default.tbl_ice_puffin_time_travel.tag_checkpoint;
Loading