From ee9f52df4ccd6b8ee5b479948278a385fc031884 Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Fri, 3 Jul 2026 18:13:24 +0500 Subject: [PATCH] Fix returnless UNION EXISTS with VLE Project returnless UNION leaves to a stable internal column so postgres set-op planning does not see branch-specific target widths from injected RETURN * outputs. Add regression coverage for VLE branches, UNION ALL, and nested returnless UNION. Co-authored-by: OpenAI Codex 5.5 xhigh --- regress/expected/cypher_subquery.out | 67 ++++++++++++++++++++++++++++ regress/sql/cypher_subquery.sql | 38 ++++++++++++++++ src/backend/parser/cypher_clause.c | 40 +++++++++++++++-- 3 files changed, 142 insertions(+), 3 deletions(-) diff --git a/regress/expected/cypher_subquery.out b/regress/expected/cypher_subquery.out index 456b3a2c9..97cb5ae27 100644 --- a/regress/expected/cypher_subquery.out +++ b/regress/expected/cypher_subquery.out @@ -785,6 +785,73 @@ MATCH (a) WHERE EXISTS {MATCH (a)-[]-()} RETURN a}]] RETURN a $$) AS (result agt {"id": 1688849860263940, "label": "pet", "properties": {"name": "Garfield"}}::vertex (4 rows) +-- +-- issue 2396: returnless UNION with VLE in EXISTS +-- +SELECT * FROM cypher('subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a)-[:knows*1..2]->(:pet) + UNION + MATCH (a)-[:loved]->(:person) + } + RETURN a.name ORDER BY a.name $$) AS (name agtype); + name +----------- + "Calvin" + "Charlie" + "Jon" + "Tony" +(4 rows) + +SELECT * FROM cypher('subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a)-[:loved]->(:person) + UNION + MATCH (a)-[:knows*1..2]->(:pet) + } + RETURN a.name ORDER BY a.name $$) AS (name agtype); + name +----------- + "Calvin" + "Charlie" + "Jon" + "Tony" +(4 rows) + +SELECT * FROM cypher('subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a)-[:knows*1..2]->(:pet) + UNION ALL + MATCH (a)-[:loved]->(:person) + } + RETURN a.name ORDER BY a.name $$) AS (name agtype); + name +----------- + "Calvin" + "Charlie" + "Jon" + "Tony" +(4 rows) + +SELECT * FROM cypher('subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a)-[:loved]->(:person) + UNION + MATCH (a)-[:knows*1..2]->(:pet) + UNION + MATCH (a)-[:knows]->(:person) + WHERE a.name = 'Faye' + } + RETURN a.name ORDER BY a.name $$) AS (name agtype); + name +----------- + "Calvin" + "Charlie" + "Faye" + "Jon" + "Tony" +(5 rows) + -- -- Cleanup -- diff --git a/regress/sql/cypher_subquery.sql b/regress/sql/cypher_subquery.sql index 43c4db539..b25d5e834 100644 --- a/regress/sql/cypher_subquery.sql +++ b/regress/sql/cypher_subquery.sql @@ -395,6 +395,44 @@ MATCH (a) WHERE a.name = 'Hobbes' RETURN a}]] RETURN a $$) AS (result agtype); SELECT * FROM cypher('subquery', $$ MATCH (a:pet) WHERE [true] IN [[EXISTS { MATCH (a) WHERE EXISTS {MATCH (a)-[]-()} RETURN a}]] RETURN a $$) AS (result agtype); +-- +-- issue 2396: returnless UNION with VLE in EXISTS +-- +SELECT * FROM cypher('subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a)-[:knows*1..2]->(:pet) + UNION + MATCH (a)-[:loved]->(:person) + } + RETURN a.name ORDER BY a.name $$) AS (name agtype); + +SELECT * FROM cypher('subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a)-[:loved]->(:person) + UNION + MATCH (a)-[:knows*1..2]->(:pet) + } + RETURN a.name ORDER BY a.name $$) AS (name agtype); + +SELECT * FROM cypher('subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a)-[:knows*1..2]->(:pet) + UNION ALL + MATCH (a)-[:loved]->(:person) + } + RETURN a.name ORDER BY a.name $$) AS (name agtype); + +SELECT * FROM cypher('subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a)-[:loved]->(:person) + UNION + MATCH (a)-[:knows*1..2]->(:pet) + UNION + MATCH (a)-[:knows]->(:person) + WHERE a.name = 'Faye' + } + RETURN a.name ORDER BY a.name $$) AS (name agtype); + -- -- Cleanup -- diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 147e3e74e..e6b2060c1 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -80,6 +80,7 @@ #define AGE_VARNAME_ID AGE_DEFAULT_VARNAME_PREFIX"id" #define AGE_VARNAME_SET_CLAUSE AGE_DEFAULT_VARNAME_PREFIX"set_clause" #define AGE_VARNAME_SET_VALUE AGE_DEFAULT_VARNAME_PREFIX"set_value" +#define AGE_VARNAME_RETURNLESS_UNION AGE_DEFAULT_VARNAME_PREFIX"returnless_union" /* * In the transformation stage, we need to track @@ -295,9 +296,12 @@ static cypher_clause *make_cypher_clause(List *stmt); static Query *transform_cypher_union(cypher_parsestate *cpstate, cypher_clause *clause); +static void project_returnless_union_leaf(Query *query); + static Node * transform_cypher_union_tree(cypher_parsestate *cpstate, cypher_clause *clause, bool isTopLevel, + bool returnless_union, List **targetlist); Query *cypher_parse_sub_analyze_union(cypher_clause *clause, @@ -649,6 +653,18 @@ static cypher_clause *make_cypher_clause(List *stmt) return clause; } +static void project_returnless_union_leaf(Query *query) +{ + TargetEntry *tle; + + tle = makeTargetEntry((Expr *) makeBoolConst(true, false), + 1, + AGE_VARNAME_RETURNLESS_UNION, + false); + + query->targetList = list_make1(tle); +} + /* * transform_cypher_union - * transforms a union tree, derived from postgresql's @@ -710,7 +726,9 @@ static Query *transform_cypher_union(cypher_parsestate *cpstate, * Recursively transform the components of the tree. */ cypher_union_statement = (SetOperationStmt *) transform_cypher_union_tree(cpstate, - clause, true, NULL); + clause, true, + self->returnless_union, + NULL); Assert(cypher_union_statement); qry->setOperations = (Node *) cypher_union_statement; @@ -871,7 +889,8 @@ static Query *transform_cypher_union(cypher_parsestate *cpstate, */ static Node * transform_cypher_union_tree(cypher_parsestate *cpstate, cypher_clause *clause, - bool isTopLevel, List **targetlist) + bool isTopLevel, bool returnless_union, + List **targetlist) { bool isLeaf; @@ -973,6 +992,17 @@ transform_cypher_union_tree(cypher_parsestate *cpstate, cypher_clause *clause, } } + /* + * Returnless UNION branches use a parser-injected RETURN * only as a + * syntactic carrier. The branch output is unobservable, so expose one + * stable column to PostgreSQL's set-op planner instead of leaking the + * branch's current variable list into set-op metadata. + */ + if (returnless_union) + { + project_returnless_union_leaf(returnQuery); + } + /* * Extract a list of the non-junk TLEs for upper-level processing. */ @@ -1019,8 +1049,10 @@ transform_cypher_union_tree(cypher_parsestate *cpstate, cypher_clause *clause, ListCell *rtl; cypher_return *self = (cypher_return *) clause->self; const char *context; + bool child_returnless_union; context = "UNION"; + child_returnless_union = returnless_union || self->returnless_union; op->op = self->op; op->all = self->all_or_distinct; @@ -1031,6 +1063,7 @@ transform_cypher_union_tree(cypher_parsestate *cpstate, cypher_clause *clause, op->larg = transform_cypher_union_tree(cpstate, (cypher_clause *) self->larg, false, + child_returnless_union, <argetlist); /* @@ -1053,6 +1086,7 @@ transform_cypher_union_tree(cypher_parsestate *cpstate, cypher_clause *clause, op->rarg = transform_cypher_union_tree(cpstate, (cypher_clause *) self->rarg, false, + child_returnless_union, &rtargetlist); /* @@ -1062,7 +1096,7 @@ transform_cypher_union_tree(cypher_parsestate *cpstate, cypher_clause *clause, * matching, because they are not relevant to the end result. */ if (list_length(ltargetlist) != list_length(rtargetlist) && - self->returnless_union == false) + returnless_union == false) { ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR),