Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions contrib/pax_storage/src/test/regress/expected/bfv_joins.out
Original file line number Diff line number Diff line change
Expand Up @@ -4190,6 +4190,36 @@ INSERT INTO ext_stats_tbl VALUES('tC', true);
ANALYZE ext_stats_tbl;
explain SELECT 1 FROM ext_stats_tbl t11 FULL JOIN ext_stats_tbl t12 ON t12.c2;
ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions
-- ORCA bug: a boolean ON-clause of a LEFT JOIN must not be pushed down as a
-- scan filter on the outer relation. When the same outer relation feeds
-- multiple LEFT JOINs whose ON-clauses use the same boolean column AND there
-- is a WHERE on top, the normalizer used to push the ON-pred onto the LOJ's
-- own outer child, discarding outer rows that should be null-padded.
create table loj_bool_x(c1 boolean);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table loj_bool_y1(c1 boolean);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table loj_bool_y2(c1 boolean);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into loj_bool_x values (true), (false), (false);
insert into loj_bool_y1 values (true);
insert into loj_bool_y2 values (true);
-- Expect 2 rows: the two FALSE rows in loj_bool_x, with NULL from loj_bool_y2.
-- The plan must NOT contain "Filter: c1" on Seq Scan of loj_bool_x.
select loj_bool_x.c1, loj_bool_y2.c1 as y2c1
from loj_bool_x left join loj_bool_y1 on loj_bool_x.c1
left join loj_bool_y2 on loj_bool_x.c1
where loj_bool_y2.c1 is null
order by 1, 2;
c1 | y2c1
----+------
f |
f |
(2 rows)

-- Clean up. None of the objects we create are very interesting to keep around.
reset search_path;
set client_min_messages='warning';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4215,6 +4215,36 @@ INSERT INTO ext_stats_tbl VALUES('tC', true);
ANALYZE ext_stats_tbl;
explain SELECT 1 FROM ext_stats_tbl t11 FULL JOIN ext_stats_tbl t12 ON t12.c2;
ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions
-- ORCA bug: a boolean ON-clause of a LEFT JOIN must not be pushed down as a
-- scan filter on the outer relation. When the same outer relation feeds
-- multiple LEFT JOINs whose ON-clauses use the same boolean column AND there
-- is a WHERE on top, the normalizer used to push the ON-pred onto the LOJ's
-- own outer child, discarding outer rows that should be null-padded.
create table loj_bool_x(c1 boolean);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table loj_bool_y1(c1 boolean);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table loj_bool_y2(c1 boolean);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into loj_bool_x values (true), (false), (false);
insert into loj_bool_y1 values (true);
insert into loj_bool_y2 values (true);
-- Expect 2 rows: the two FALSE rows in loj_bool_x, with NULL from loj_bool_y2.
-- The plan must NOT contain "Filter: c1" on Seq Scan of loj_bool_x.
select loj_bool_x.c1, loj_bool_y2.c1 as y2c1
from loj_bool_x left join loj_bool_y1 on loj_bool_x.c1
left join loj_bool_y2 on loj_bool_x.c1
where loj_bool_y2.c1 is null
order by 1, 2;
c1 | y2c1
----+------
f |
f |
(2 rows)

-- Clean up. None of the objects we create are very interesting to keep around.
reset search_path;
set client_min_messages='warning';
Expand Down
20 changes: 20 additions & 0 deletions contrib/pax_storage/src/test/regress/sql/bfv_joins.sql
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,26 @@ ANALYZE ext_stats_tbl;

explain SELECT 1 FROM ext_stats_tbl t11 FULL JOIN ext_stats_tbl t12 ON t12.c2;

-- ORCA bug: a boolean ON-clause of a LEFT JOIN must not be pushed down as a
-- scan filter on the outer relation. When the same outer relation feeds
-- multiple LEFT JOINs whose ON-clauses use the same boolean column AND there
-- is a WHERE on top, the normalizer used to push the ON-pred onto the LOJ's
-- own outer child, discarding outer rows that should be null-padded.
create table loj_bool_x(c1 boolean);
create table loj_bool_y1(c1 boolean);
create table loj_bool_y2(c1 boolean);
insert into loj_bool_x values (true), (false), (false);
insert into loj_bool_y1 values (true);
insert into loj_bool_y2 values (true);

-- Expect 2 rows: the two FALSE rows in loj_bool_x, with NULL from loj_bool_y2.
-- The plan must NOT contain "Filter: c1" on Seq Scan of loj_bool_x.
select loj_bool_x.c1, loj_bool_y2.c1 as y2c1
from loj_bool_x left join loj_bool_y1 on loj_bool_x.c1
left join loj_bool_y2 on loj_bool_x.c1
where loj_bool_y2.c1 is null
order by 1, 2;

-- Clean up. None of the objects we create are very interesting to keep around.
reset search_path;
set client_min_messages='warning';
Expand Down
2 changes: 1 addition & 1 deletion src/backend/gporca/libgpopt/src/xforms/CJoinOrderDPv2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -767,7 +767,7 @@ CJoinOrderDPv2::AddSelectNodeForRemainingEdges(CExpression *join_expr)
// we will have to repeat this check
pedge->m_fUsed = false;
}
else
else if (0 == pedge->m_loj_num)
{
// found an unused edge, this one will need to go into
// a select node on top of the join
Expand Down
30 changes: 30 additions & 0 deletions src/test/regress/expected/bfv_joins.out
Original file line number Diff line number Diff line change
Expand Up @@ -4235,6 +4235,36 @@ select (trunc(extract(epoch from now())) - :unix_time1) < 100 is_ok;
(1 row)

reset optimizer;
-- ORCA bug: a boolean ON-clause of a LEFT JOIN must not be pushed down as a
-- scan filter on the outer relation. When the same outer relation feeds
-- multiple LEFT JOINs whose ON-clauses use the same boolean column AND there
-- is a WHERE on top, the normalizer used to push the ON-pred onto the LOJ's
-- own outer child, discarding outer rows that should be null-padded.
create table loj_bool_x(c1 boolean);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table loj_bool_y1(c1 boolean);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table loj_bool_y2(c1 boolean);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into loj_bool_x values (true), (false), (false);
insert into loj_bool_y1 values (true);
insert into loj_bool_y2 values (true);
-- Expect 2 rows: the two FALSE rows in loj_bool_x, with NULL from loj_bool_y2.
-- The plan must NOT contain "Filter: c1" on Seq Scan of loj_bool_x.
select loj_bool_x.c1, loj_bool_y2.c1 as y2c1
from loj_bool_x left join loj_bool_y1 on loj_bool_x.c1
left join loj_bool_y2 on loj_bool_x.c1
where loj_bool_y2.c1 is null
order by 1, 2;
c1 | y2c1
----+------
f |
f |
(2 rows)

-- Clean up. None of the objects we create are very interesting to keep around.
reset search_path;
set client_min_messages='warning';
Expand Down
30 changes: 30 additions & 0 deletions src/test/regress/expected/bfv_joins_optimizer.out
Original file line number Diff line number Diff line change
Expand Up @@ -4252,6 +4252,36 @@ select (trunc(extract(epoch from now())) - :unix_time1) < 100 is_ok;
(1 row)

reset optimizer;
-- ORCA bug: a boolean ON-clause of a LEFT JOIN must not be pushed down as a
-- scan filter on the outer relation. When the same outer relation feeds
-- multiple LEFT JOINs whose ON-clauses use the same boolean column AND there
-- is a WHERE on top, the normalizer used to push the ON-pred onto the LOJ's
-- own outer child, discarding outer rows that should be null-padded.
create table loj_bool_x(c1 boolean);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table loj_bool_y1(c1 boolean);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create table loj_bool_y2(c1 boolean);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
insert into loj_bool_x values (true), (false), (false);
insert into loj_bool_y1 values (true);
insert into loj_bool_y2 values (true);
-- Expect 2 rows: the two FALSE rows in loj_bool_x, with NULL from loj_bool_y2.
-- The plan must NOT contain "Filter: c1" on Seq Scan of loj_bool_x.
select loj_bool_x.c1, loj_bool_y2.c1 as y2c1
from loj_bool_x left join loj_bool_y1 on loj_bool_x.c1
left join loj_bool_y2 on loj_bool_x.c1
where loj_bool_y2.c1 is null
order by 1, 2;
c1 | y2c1
----+------
f |
f |
(2 rows)

-- Clean up. None of the objects we create are very interesting to keep around.
reset search_path;
set client_min_messages='warning';
Expand Down
20 changes: 20 additions & 0 deletions src/test/regress/sql/bfv_joins.sql
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,26 @@ select (trunc(extract(epoch from now())) - :unix_time1) < 100 is_ok;

reset optimizer;

-- ORCA bug: a boolean ON-clause of a LEFT JOIN must not be pushed down as a
-- scan filter on the outer relation. When the same outer relation feeds
-- multiple LEFT JOINs whose ON-clauses use the same boolean column AND there
-- is a WHERE on top, the normalizer used to push the ON-pred onto the LOJ's
-- own outer child, discarding outer rows that should be null-padded.
create table loj_bool_x(c1 boolean);
create table loj_bool_y1(c1 boolean);
create table loj_bool_y2(c1 boolean);
insert into loj_bool_x values (true), (false), (false);
insert into loj_bool_y1 values (true);
insert into loj_bool_y2 values (true);

-- Expect 2 rows: the two FALSE rows in loj_bool_x, with NULL from loj_bool_y2.
-- The plan must NOT contain "Filter: c1" on Seq Scan of loj_bool_x.
select loj_bool_x.c1, loj_bool_y2.c1 as y2c1
from loj_bool_x left join loj_bool_y1 on loj_bool_x.c1
left join loj_bool_y2 on loj_bool_x.c1
where loj_bool_y2.c1 is null
order by 1, 2;

-- Clean up. None of the objects we create are very interesting to keep around.
reset search_path;
set client_min_messages='warning';
Expand Down
Loading