From b3280865a5cf2242748a7fc462f8923ce28fc744 Mon Sep 17 00:00:00 2001 From: serdarmumcu Date: Sat, 20 Jun 2026 19:53:25 +0300 Subject: [PATCH 01/20] Make age extension usable from shared_preload_libraries (#2438) When AGE is loaded via shared_preload_libraries, its hooks (post_parse_analyze, set_rel_pathlist, object_access) are active before CREATE EXTENSION age is run. This causes errors when non-Cypher queries trigger those hooks and ag_catalog does not yet exist. Changes: - Add is_age_extension_exist() with a relcache callback cache so that checking pg_extension is not repeated on every hook invocation. - Guard post_parse_analyze, set_rel_pathlist, and object_access hooks with is_age_extension_exist() so they become no-ops when the extension is not installed. - Refactor ag_ProcessUtility_hook to detect CREATE/DROP EXTENSION age and broadcast a relcache invalidation via CacheInvalidateRelcacheByRelid(ExtensionRelationId) so other backends update their cached extension state. - Wrap DROP EXTENSION processing in PG_TRY/PG_CATCH to restore object_access_hook if the drop fails (e.g. dependent objects). - Skip _PG_init during pg_upgrade (IsBinaryUpgrade) to avoid hook registration when the binary-upgrade machinery is running. - Add regression tests that verify hooks do not error when ag_catalog schema is absent. --- regress/expected/drop.out | 12 +- regress/sql/drop.sql | 9 ++ src/backend/age.c | 9 +- src/backend/catalog/ag_catalog.c | 216 ++++++++++++++++++++------- src/backend/optimizer/cypher_paths.c | 8 + src/backend/parser/cypher_analyze.c | 6 + src/include/catalog/ag_catalog.h | 2 + 7 files changed, 202 insertions(+), 60 deletions(-) diff --git a/regress/expected/drop.out b/regress/expected/drop.out index 3cfa2cf28..43e0bf41a 100644 --- a/regress/expected/drop.out +++ b/regress/expected/drop.out @@ -40,6 +40,14 @@ SELECT tablename FROM pg_catalog.pg_tables WHERE schemaname = 'ag_catalog'; ----------- (0 rows) +-- When ag_catalog is missing extension hooks shouldn't fail with the +-- ERROR schema "ag_catalog" does not exist. +-- It might happen when 'age' is loaded but extension isn't created yet. +SET client_min_messages TO WARNING; +DROP SCHEMA IF EXISTS ag_catalog CASCADE; +RESET client_min_messages; +CREATE SCHEMA _regress_drop; +DROP SCHEMA _regress_drop; -- should'n produce the ERROR -- Recreate the extension and validate we can recreate a graph CREATE EXTENSION age; SELECT create_graph('drop'); @@ -115,7 +123,7 @@ NOTICE: label "issue_1305"."r" has been dropped (1 row) SELECT drop_label('issue_1305', 'r'); -ERROR: rel_name not found for label "r" +ERROR: label "r" does not exist SELECT drop_label('issue_1305', 'n', false); NOTICE: label "issue_1305"."n" has been dropped drop_label @@ -124,7 +132,7 @@ NOTICE: label "issue_1305"."n" has been dropped (1 row) SELECT drop_label('issue_1305', 'n'); -ERROR: rel_name not found for label "n" +ERROR: label "n" does not exist SELECT * FROM drop_graph('issue_1305', true); NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to table issue_1305._ag_label_vertex diff --git a/regress/sql/drop.sql b/regress/sql/drop.sql index 564492bbc..a71d5a74c 100644 --- a/regress/sql/drop.sql +++ b/regress/sql/drop.sql @@ -28,6 +28,15 @@ SELECT nspname FROM pg_catalog.pg_namespace WHERE nspname = 'drop'; SELECT tablename FROM pg_catalog.pg_tables WHERE schemaname = 'ag_catalog'; +-- When ag_catalog is missing extension hooks shouldn't fail with the +-- ERROR schema "ag_catalog" does not exist. +-- It might happen when 'age' is loaded but extension isn't created yet. +SET client_min_messages TO WARNING; +DROP SCHEMA IF EXISTS ag_catalog CASCADE; +RESET client_min_messages; +CREATE SCHEMA _regress_drop; +DROP SCHEMA _regress_drop; -- should'n produce the ERROR + -- Recreate the extension and validate we can recreate a graph CREATE EXTENSION age; diff --git a/src/backend/age.c b/src/backend/age.c index 18085302c..24ae8456b 100644 --- a/src/backend/age.c +++ b/src/backend/age.c @@ -17,6 +17,9 @@ * under the License. */ +#include "postgres.h" +#include "miscadmin.h" + #include "catalog/ag_catalog.h" #include "nodes/ag_nodes.h" #include "optimizer/cypher_paths.h" @@ -25,7 +28,6 @@ #include "utils/age_global_graph.h" #if PG_VERSION_NUM < 170000 -#include "miscadmin.h" /* saved hook pointers for PG < 17 shmem path */ static shmem_request_hook_type prev_shmem_request_hook = NULL; @@ -56,6 +58,11 @@ void _PG_init(void); void _PG_init(void) { + if (IsBinaryUpgrade) + { + return; + } + register_ag_nodes(); set_rel_pathlist_init(); object_access_hook_init(); diff --git a/src/backend/catalog/ag_catalog.c b/src/backend/catalog/ag_catalog.c index f5276e092..107de370d 100644 --- a/src/backend/catalog/ag_catalog.c +++ b/src/backend/catalog/ag_catalog.c @@ -19,14 +19,18 @@ #include "postgres.h" +#include "access/xact.h" #include "catalog/dependency.h" #include "catalog/namespace.h" #include "catalog/objectaccess.h" #include "catalog/pg_class_d.h" +#include "catalog/pg_extension_d.h" #include "catalog/pg_namespace_d.h" #include "commands/defrem.h" +#include "commands/extension.h" #include "nodes/parsenodes.h" #include "tcop/utility.h" +#include "utils/inval.h" #include "utils/lsyscache.h" #include "catalog/ag_graph.h" @@ -34,6 +38,8 @@ #include "utils/ag_cache.h" #include "utils/age_global_graph.h" +static bool extension_cache_is_valid = false; +static bool age_extension_exists = false; static object_access_hook_type prev_object_access_hook; static ProcessUtility_hook_type prev_process_utility_hook; static bool prev_object_hook_is_set; @@ -45,8 +51,46 @@ void ag_ProcessUtility_hook(PlannedStmt *pstmt, const char *queryString, bool re QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc); -static bool is_age_drop(PlannedStmt *pstmt); -static void drop_age_extension(DropStmt *stmt); +static bool is_age_drop(DropStmt *drop_stmt); + +static void +invalidate_extension_cache_callback(Datum argument, Oid relationId) +{ + if (!OidIsValid(relationId) || relationId == ExtensionRelationId) + { + extension_cache_is_valid = false; + } +} + +/* + * We don't want most of hooks to do anything if the "age" extension isn't + * created. However, scanning pg_extension is a costly operation, therefore we + * implement a caching mechanism and reset it with the help of the relcache + * callback mechanism. + * + * Please also see ag_ProcessUtility_hook() function for more details. + */ +bool +is_age_extension_exists(void) +{ + static bool callback_registered = false; + + if (extension_cache_is_valid) + return age_extension_exists; + + if (!callback_registered) + { + CacheRegisterRelcacheCallback(invalidate_extension_cache_callback, + (Datum) 0); + callback_registered = true; + } + + age_extension_exists = OidIsValid(get_extension_oid("age", true)); + + extension_cache_is_valid = true; + + return age_extension_exists; +} void object_access_hook_init(void) { @@ -86,50 +130,97 @@ void process_utility_hook_fini(void) * information in the indexes and tables being dropped. To prevent an error * from being thrown, we need to disable the object_access_hook before dropping * the extension. + * + * Besides that, we want to notify other backends about the fact that "age" + * extension was probably created/dropped so that they can enable/disable + * hooks. */ void ag_ProcessUtility_hook(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc) { - if (is_age_drop(pstmt)) - { - drop_age_extension((DropStmt *)pstmt->utilityStmt); - } - else + bool creating_age = false; + bool dropping_age = false; + + if (!IsAbortedTransactionBlockState()) { - /* - * Check for TRUNCATE on graph label tables. If any truncated - * table is a graph label table, increment the version counter - * for that graph to invalidate VLE caches. We do this before - * the truncate executes so the cache is invalidated regardless. - */ - if (IsA(pstmt->utilityStmt, TruncateStmt)) + Node *parsetree = pstmt->utilityStmt; + + switch (nodeTag(parsetree)) { - TruncateStmt *tstmt = (TruncateStmt *) pstmt->utilityStmt; - ListCell *lc; + case T_CreateExtensionStmt: + { + CreateExtensionStmt *stmt = + (CreateExtensionStmt *) parsetree; + creating_age = strcmp(stmt->extname, "age") == 0; + } + break; + case T_DropStmt: + { + DropStmt *stmt = (DropStmt *) parsetree; - foreach(lc, tstmt->relations) - { - RangeVar *rv = (RangeVar *) lfirst(lc); - Oid rel_oid = RangeVarGetRelid(rv, AccessShareLock, true); + if (stmt->removeType != OBJECT_EXTENSION) + break; - if (OidIsValid(rel_oid)) - { - Oid graph_oid = get_graph_oid_for_table(rel_oid); + if (!is_age_drop(stmt)) + break; - if (OidIsValid(graph_oid)) + dropping_age = true; + } + break; + case T_TruncateStmt: + { + /* + * Check for TRUNCATE on graph label tables. If any + * truncated table is a graph label table, increment the + * version counter for that graph to invalidate VLE caches. + * We do this before the truncate executes so the cache is + * invalidated regardless. + */ + TruncateStmt *tstmt = (TruncateStmt *) parsetree; + ListCell *lc; + + foreach(lc, tstmt->relations) { - increment_graph_version(graph_oid); + RangeVar *rv = (RangeVar *) lfirst(lc); + Oid rel_oid = RangeVarGetRelid(rv, AccessShareLock, + true); + + if (OidIsValid(rel_oid)) + { + Oid graph_oid = + get_graph_oid_for_table(rel_oid); + + if (OidIsValid(graph_oid)) + { + increment_graph_version(graph_oid); + } + } } } - } + break; + default: + break; } + } + + if (dropping_age) + { + /* Remove all graphs */ + drop_graphs(get_graphnames()); + /* Remove the object access hook */ + object_access_hook_fini(); + } + + PG_TRY(); + { if (prev_process_utility_hook) { (*prev_process_utility_hook) (pstmt, queryString, readOnlyTree, - context, params, queryEnv, dest, qc); + context, params, queryEnv, dest, + qc); } else { @@ -141,38 +232,47 @@ void ag_ProcessUtility_hook(PlannedStmt *pstmt, const char *queryString, params, queryEnv, dest, qc); } } -} - -static void drop_age_extension(DropStmt *stmt) -{ - /* Remove all graphs */ - drop_graphs(get_graphnames()); + PG_CATCH(); + { + if (dropping_age) + { + /* + * We have to restore the disabled object_access_hook if + * DROP EXTENSION age failed. + */ + object_access_hook_init(); + } + PG_RE_THROW(); + } + PG_END_TRY(); - /* Remove the object access hook */ - object_access_hook_fini(); + if (dropping_age) + { + /* reset global variables for OIDs */ + clear_global_Oids_AGTYPE(); + clear_global_Oids_GRAPHID(); + clear_global_Oids_VERTEX_EDGE(); - /* - * Run Postgres' logic to perform the remaining work to drop the - * extension. - */ - RemoveObjects(stmt); + /* Restore the object access hook */ + object_access_hook_init(); + } - /* reset global variables for OIDs */ - clear_global_Oids_AGTYPE(); - clear_global_Oids_GRAPHID(); - clear_global_Oids_VERTEX_EDGE(); + if (creating_age || dropping_age) + { + /* Notify all backends that pg_extension was modified. */ + CacheInvalidateRelcacheByRelid(ExtensionRelationId); + } } /* Check to see if the Utility Command is to drop the AGE Extension. */ -static bool is_age_drop(PlannedStmt *pstmt) +static bool is_age_drop(DropStmt *drop_stmt) { ListCell *lc; - DropStmt *drop_stmt; - if (!IsA(pstmt->utilityStmt, DropStmt)) + if (!is_age_extension_exists()) + { return false; - - drop_stmt = (DropStmt *)pstmt->utilityStmt; + } foreach(lc, drop_stmt->objects) { @@ -183,8 +283,10 @@ static bool is_age_drop(PlannedStmt *pstmt) String *val = (String *)obj; char *str = val->sval; - if (!pg_strcasecmp(str, "age")) + if (strcmp(str, "age") == 0) + { return true; + } } } @@ -205,16 +307,16 @@ static void object_access(ObjectAccessType access, Oid class_id, Oid object_id, if (prev_object_access_hook) prev_object_access_hook(access, class_id, object_id, sub_id, arg); - /* We are interested in DROP SCHEMA and DROP TABLE commands. */ - if (access != OAT_DROP) + if (!is_age_extension_exists()) + { return; + } - /* - * Age might be installed into shared_preload_libraries before extension is - * created. In this case we must bail out from this hook. - */ - if (!OidIsValid(get_namespace_oid("ag_catalog", true))) + /* We are interested in DROP SCHEMA and DROP TABLE commands. */ + if (access != OAT_DROP) + { return; + } drop_arg = arg; diff --git a/src/backend/optimizer/cypher_paths.c b/src/backend/optimizer/cypher_paths.c index 6c4fd7e07..6d2e400c0 100644 --- a/src/backend/optimizer/cypher_paths.c +++ b/src/backend/optimizer/cypher_paths.c @@ -19,6 +19,7 @@ #include "postgres.h" +#include "catalog/ag_catalog.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" @@ -64,7 +65,14 @@ static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { if (prev_set_rel_pathlist_hook) + { prev_set_rel_pathlist_hook(root, rel, rti, rte); + } + + if (!is_age_extension_exists()) + { + return; + } switch (get_cypher_clause_kind(rte)) { diff --git a/src/backend/parser/cypher_analyze.c b/src/backend/parser/cypher_analyze.c index b2c9256ce..5b72f332e 100644 --- a/src/backend/parser/cypher_analyze.c +++ b/src/backend/parser/cypher_analyze.c @@ -19,6 +19,7 @@ #include "postgres.h" +#include "catalog/ag_catalog.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "parser/analyze.h" @@ -86,6 +87,11 @@ static void post_parse_analyze(ParseState *pstate, Query *query, JumbleState *js prev_post_parse_analyze_hook(pstate, query, jstate); } + if (!is_age_extension_exists()) + { + return; + } + /* * extra_node is set in the parsing stage to keep track of EXPLAIN. * So it needs to be set to NULL prior to any cypher parsing. diff --git a/src/include/catalog/ag_catalog.h b/src/include/catalog/ag_catalog.h index 56aa84700..a9ced279d 100644 --- a/src/include/catalog/ag_catalog.h +++ b/src/include/catalog/ag_catalog.h @@ -24,6 +24,8 @@ #include "utils/agtype.h" +bool is_age_extension_exists(void); + void object_access_hook_init(void); void object_access_hook_fini(void); From 19a3b630f44dc7c6eb6f83041907c7d082c62a80 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sat, 20 Jun 2026 09:55:09 -0700 Subject: [PATCH 02/20] feature: add create_subgraph() (#2441) Add the feature create_subgraph() for materialized induced-subgraph extraction. Add ag_catalog.create_subgraph(new_graph, from_graph, node_filter, relationship_filter) which materializes a new, persistent, fully Cypher-queryable AGE graph as the induced subgraph of an existing graph. Selection follows the graph-theory induced-subgraph definition as operationalized by Neo4j GDS gds.graph.filter(): * a vertex is kept iff node_filter holds ('*' keeps all); * an edge is kept iff relationship_filter holds AND both of its endpoints were kept (no dangling edges). Filters are arbitrary Cypher predicates bound to `n` (nodes) and `r` (relationships) and are evaluated by AGE's own Cypher engine against the source graph, so the full predicate language is available; label selection uses label(n)/label(r) since the match pattern is fixed. Implementation notes: * Result is a real, ACID, registered graph (create_graph + create_v/ elabel), not a virtual view; it composes with cypher() and itself. * Entity graphids are reassigned from the destination labels' own sequences (graphid encodes a per-graph label id), and edge endpoints are remapped through an old->new vertex map, enforcing the induced rule via inner joins. * Source label tables are read with FROM ONLY to avoid double-copying children under PostgreSQL table inheritance. * Properties of any agtype are preserved; self-loops and parallel edges (multigraph structure) are retained. * SECURITY INVOKER: reads respect the caller's table privileges and RLS; the new graph is owned by the caller. * Validates NULL/identical graph names, missing source, pre-existing destination, and a reserved dollar-quote token in predicates. Wire-up: * sql/age_subgraph.sql (new) registered in sql/sql_files after age_pg_upgrade; identical body added to age--1.7.0--y.y.y.sql so the upgrade-path catalog comparison matches. * regress/sql/subgraph.sql + expected output (new), added to REGRESS. Covers full copy, vertex-induced, node+rel, label-only edge drop, bipartite, empty result, composability, self-loops/parallel edges, property fidelity, and error cases over a ~4500-vertex / 2000-edge source graph. All 38 regression tests pass against PostgreSQL 18. Co-authored-by: GitHub Copilot (Claude Opus 4.8) <[email protected]> modified: Makefile modified: age--1.7.0--y.y.y.sql new file: regress/expected/subgraph.out new file: regress/sql/subgraph.sql new file: sql/age_subgraph.sql modified: sql/sql_files --- Makefile | 3 +- age--1.7.0--y.y.y.sql | 257 +++++++++++++++++++++++++ regress/expected/subgraph.out | 341 ++++++++++++++++++++++++++++++++++ regress/sql/subgraph.sql | 189 +++++++++++++++++++ sql/age_subgraph.sql | 294 +++++++++++++++++++++++++++++ sql/sql_files | 1 + 6 files changed, 1084 insertions(+), 1 deletion(-) create mode 100644 regress/expected/subgraph.out create mode 100644 regress/sql/subgraph.sql create mode 100644 sql/age_subgraph.sql diff --git a/Makefile b/Makefile index f2a0b9a62..3ea9236a6 100644 --- a/Makefile +++ b/Makefile @@ -184,7 +184,8 @@ REGRESS = scan \ security \ reserved_keyword_alias \ agtype_jsonb_cast \ - containment_selectivity + containment_selectivity \ + subgraph ifneq ($(EXTRA_TESTS),) REGRESS += $(EXTRA_TESTS) diff --git a/age--1.7.0--y.y.y.sql b/age--1.7.0--y.y.y.sql index a4cac0c5c..282eaa0f9 100644 --- a/age--1.7.0--y.y.y.sql +++ b/age--1.7.0--y.y.y.sql @@ -800,3 +800,260 @@ ALTER OPERATOR ag_catalog.?&(agtype, text[]) SET (RESTRICT = contsel, JOIN = contjoinsel); ALTER OPERATOR ag_catalog.?&(agtype, agtype) SET (RESTRICT = contsel, JOIN = contjoinsel); + +-- +-- create_subgraph(): materialized subgraph extraction (see sql/age_subgraph.sql). +-- Induced-subgraph semantics matching Neo4j GDS gds.graph.filter(): a vertex is +-- kept iff node_filter holds ('*' = all); an edge is kept iff relationship_filter +-- holds AND both endpoints are kept. Produces a persistent, Cypher-queryable graph. +-- +CREATE FUNCTION ag_catalog.create_subgraph(new_graph name, + from_graph name, + node_filter text DEFAULT '*', + relationship_filter text DEFAULT '*') + RETURNS TABLE(node_count bigint, relationship_count bigint) + LANGUAGE plpgsql + VOLATILE + SET search_path = ag_catalog, pg_catalog + AS $function$ +DECLARE + from_oid oid; + new_oid oid; + v_node_count bigint := 0; + v_rel_count bigint := 0; + rec RECORD; + cypher_q text; + where_clause text; + dst_label_id int; + dst_seq_fqn text; + dst_relation text; + inserted bigint; + has_rows boolean; +BEGIN + -- Argument validation. + IF new_graph IS NULL THEN + RAISE EXCEPTION 'new graph name must not be NULL'; + END IF; + IF from_graph IS NULL THEN + RAISE EXCEPTION 'source graph name must not be NULL'; + END IF; + IF new_graph = from_graph THEN + RAISE EXCEPTION 'cannot extract a subgraph of "%" into itself', from_graph; + END IF; + + -- NULL predicate is treated as the '*' wildcard (keep all). + IF node_filter IS NULL THEN + node_filter := '*'; + END IF; + IF relationship_filter IS NULL THEN + relationship_filter := '*'; + END IF; + + -- The predicates are embedded into a dollar-quoted cypher() query using the + -- $age_subgraph$ tag; reject predicates that contain the tag to keep the + -- quoting unambiguous. + IF position('$age_subgraph$' IN node_filter) > 0 + OR position('$age_subgraph$' IN relationship_filter) > 0 THEN + RAISE EXCEPTION 'filter predicate must not contain the reserved token $age_subgraph$'; + END IF; + + -- Validate source graph exists. + SELECT graphid INTO from_oid + FROM ag_catalog.ag_graph WHERE name = from_graph; + IF from_oid IS NULL THEN + RAISE EXCEPTION 'graph "%" does not exist', from_graph; + END IF; + + -- Validate destination graph does not exist (create_graph also enforces + -- naming rules and uniqueness, but we give a clear early error). + IF EXISTS (SELECT 1 FROM ag_catalog.ag_graph WHERE name = new_graph) THEN + RAISE EXCEPTION 'graph "%" already exists', new_graph; + END IF; + + -- Create the destination graph (default labels are created automatically). + PERFORM ag_catalog.create_graph(new_graph); + + SELECT graphid INTO new_oid + FROM ag_catalog.ag_graph WHERE name = new_graph; + + -- Working sets / mapping (uniquely named to avoid colliding with user temps). + DROP TABLE IF EXISTS _ag_sg_kept_v; + DROP TABLE IF EXISTS _ag_sg_kept_e; + DROP TABLE IF EXISTS _ag_sg_vmap; + DROP TABLE IF EXISTS _ag_sg_vstage; + DROP TABLE IF EXISTS _ag_sg_estage; + + -- + -- Kept vertices: evaluate node_filter with AGE's Cypher engine. The node + -- variable `n` is bound exactly as in the spec; '*' selects all vertices. + -- + IF node_filter IS NULL OR btrim(node_filter) = '*' THEN + where_clause := ''; + ELSE + where_clause := ' WHERE ' || node_filter; + END IF; + cypher_q := 'MATCH (n)' || where_clause || ' RETURN id(n)'; + + EXECUTE format( + 'CREATE TEMP TABLE _ag_sg_kept_v ON COMMIT DROP AS ' + 'SELECT DISTINCT ag_catalog.agtype_to_graphid(vid) AS gid ' + 'FROM ag_catalog.cypher(%L, $age_subgraph$%s$age_subgraph$) AS (vid agtype)', + from_graph, cypher_q); + CREATE INDEX ON _ag_sg_kept_v (gid); + + -- + -- Kept edges: evaluate relationship_filter with AGE's Cypher engine. The + -- relationship variable `r` is bound exactly as in the spec. + -- + IF relationship_filter IS NULL OR btrim(relationship_filter) = '*' THEN + where_clause := ''; + ELSE + where_clause := ' WHERE ' || relationship_filter; + END IF; + cypher_q := 'MATCH ()-[r]->()' || where_clause || ' RETURN id(r)'; + + EXECUTE format( + 'CREATE TEMP TABLE _ag_sg_kept_e ON COMMIT DROP AS ' + 'SELECT DISTINCT ag_catalog.agtype_to_graphid(eid) AS gid ' + 'FROM ag_catalog.cypher(%L, $age_subgraph$%s$age_subgraph$) AS (eid agtype)', + from_graph, cypher_q); + CREATE INDEX ON _ag_sg_kept_e (gid); + + -- old -> new vertex id mapping (graphid is unique within a graph). + CREATE TEMP TABLE _ag_sg_vmap (old_id graphid PRIMARY KEY, + new_id graphid NOT NULL) ON COMMIT DROP; + + -- + -- PASS 1: copy kept vertices, label by label, assigning new graphids and + -- recording the old->new mapping for edge remapping. + -- + FOR rec IN + SELECT name, id, relation, seq_name + FROM ag_catalog.ag_label + WHERE graph = from_oid AND kind = 'v' + ORDER BY id + LOOP + -- Skip labels with no surviving vertices. Read ONLY this label's own + -- rows: AGE label tables use table inheritance (custom labels inherit + -- from _ag_label_vertex), so a plain scan of a parent would also return + -- its children and copy them twice. + EXECUTE format( + 'SELECT EXISTS (SELECT 1 FROM ONLY %s t ' + 'WHERE EXISTS (SELECT 1 FROM _ag_sg_kept_v k WHERE k.gid = t.id))', + rec.relation::regclass::text) + INTO has_rows; + IF NOT has_rows THEN + CONTINUE; + END IF; + + -- Ensure the label exists in the destination graph. + IF rec.name <> '_ag_label_vertex' THEN + PERFORM 1 FROM ag_catalog.ag_label + WHERE graph = new_oid AND name = rec.name; + IF NOT FOUND THEN + EXECUTE format('SELECT ag_catalog.create_vlabel(%L, %L)', + new_graph, rec.name); + END IF; + END IF; + + SELECT id, seq_name, relation::regclass::text + INTO dst_label_id, dst_seq_fqn, dst_relation + FROM ag_catalog.ag_label + WHERE graph = new_oid AND name = rec.name; + dst_seq_fqn := format('%I.%I', new_graph, dst_seq_fqn); + + -- Stage surviving vertices with freshly generated ids in a real temp + -- table (single evaluation), then copy to the label table and record + -- the old->new mapping. A materialized stage avoids any ambiguity from + -- referencing a nextval-bearing CTE more than once. + DROP TABLE IF EXISTS _ag_sg_vstage; + EXECUTE format( + 'CREATE TEMP TABLE _ag_sg_vstage ON COMMIT DROP AS ' + 'SELECT t.id AS old_id, ' + ' ag_catalog._graphid(%s, nextval(%L)) AS new_id, ' + ' t.properties AS props ' + 'FROM ONLY %s t ' + 'WHERE EXISTS (SELECT 1 FROM _ag_sg_kept_v k WHERE k.gid = t.id)', + dst_label_id, dst_seq_fqn, rec.relation::regclass::text); + + EXECUTE format('INSERT INTO %s (id, properties) ' + 'SELECT new_id, props FROM _ag_sg_vstage', dst_relation); + + INSERT INTO _ag_sg_vmap (old_id, new_id) + SELECT old_id, new_id FROM _ag_sg_vstage; + + DROP TABLE _ag_sg_vstage; + END LOOP; + + SELECT count(*) INTO v_node_count FROM _ag_sg_vmap; + + -- + -- PASS 2: copy kept edges, remapping endpoints. The joins to _ag_sg_vmap + -- enforce the induced rule (an edge survives only if BOTH endpoints were + -- kept); membership in _ag_sg_kept_e applies relationship_filter. + -- + FOR rec IN + SELECT name, id, relation, seq_name + FROM ag_catalog.ag_label + WHERE graph = from_oid AND kind = 'e' + ORDER BY id + LOOP + -- Skip labels with no surviving edges. Read ONLY this label's own rows + -- (see the vertex pass for why inheritance requires ONLY). + EXECUTE format( + 'SELECT EXISTS (' + ' SELECT 1 FROM ONLY %s x ' + ' JOIN _ag_sg_vmap vs ON vs.old_id = x.start_id ' + ' JOIN _ag_sg_vmap ve ON ve.old_id = x.end_id ' + ' WHERE EXISTS (SELECT 1 FROM _ag_sg_kept_e k WHERE k.gid = x.id))', + rec.relation::regclass::text) + INTO has_rows; + IF NOT has_rows THEN + CONTINUE; + END IF; + + IF rec.name <> '_ag_label_edge' THEN + PERFORM 1 FROM ag_catalog.ag_label + WHERE graph = new_oid AND name = rec.name; + IF NOT FOUND THEN + EXECUTE format('SELECT ag_catalog.create_elabel(%L, %L)', + new_graph, rec.name); + END IF; + END IF; + + SELECT id, seq_name, relation::regclass::text + INTO dst_label_id, dst_seq_fqn, dst_relation + FROM ag_catalog.ag_label + WHERE graph = new_oid AND name = rec.name; + dst_seq_fqn := format('%I.%I', new_graph, dst_seq_fqn); + + -- Stage surviving edges, remapping endpoints through _ag_sg_vmap. The + -- joins enforce the induced rule (both endpoints kept); membership in + -- _ag_sg_kept_e applies relationship_filter. + DROP TABLE IF EXISTS _ag_sg_estage; + EXECUTE format( + 'CREATE TEMP TABLE _ag_sg_estage ON COMMIT DROP AS ' + 'SELECT ag_catalog._graphid(%s, nextval(%L)) AS new_id, ' + ' vs.new_id AS new_start, ve.new_id AS new_end, ' + ' x.properties AS props ' + 'FROM ONLY %s x ' + 'JOIN _ag_sg_vmap vs ON vs.old_id = x.start_id ' + 'JOIN _ag_sg_vmap ve ON ve.old_id = x.end_id ' + 'WHERE EXISTS (SELECT 1 FROM _ag_sg_kept_e k WHERE k.gid = x.id)', + dst_label_id, dst_seq_fqn, rec.relation::regclass::text); + + EXECUTE format('INSERT INTO %s (id, start_id, end_id, properties) ' + 'SELECT new_id, new_start, new_end, props ' + 'FROM _ag_sg_estage', dst_relation); + GET DIAGNOSTICS inserted = ROW_COUNT; + v_rel_count := v_rel_count + inserted; + + DROP TABLE _ag_sg_estage; + END LOOP; + + RETURN QUERY SELECT v_node_count, v_rel_count; +END; +$function$; + +COMMENT ON FUNCTION ag_catalog.create_subgraph(name, name, text, text) IS +'Materializes a new persistent graph as the induced subgraph of from_graph selected by a Cypher node predicate (on n) and relationship predicate (on r); ''*'' keeps all. An edge is kept only if its predicate holds and both endpoints are kept. Returns (node_count, relationship_count).'; diff --git a/regress/expected/subgraph.out b/regress/expected/subgraph.out new file mode 100644 index 000000000..a27569a5d --- /dev/null +++ b/regress/expected/subgraph.out @@ -0,0 +1,341 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +LOAD 'age'; +SET search_path TO ag_catalog; +-- Suppress the create_graph / create_vlabel NOTICE chatter so the assertions +-- below are the deterministic output. (The feature is exercised regardless.) +SET client_min_messages = warning; +-- +-- Build a "somewhat large" source graph with NO MATCH (fast bulk CREATE): +-- * 2000 isolated components, each (:Person{pid,age})-[:KNOWS{w}]->(:Friend{pid}) +-- => 2000 Person + 2000 Friend vertices, 2000 KNOWS edges +-- * 500 isolated :Company vertices (no edges) +-- Totals: 4500 vertices, 2000 edges, label set {Person,Friend,Company,KNOWS}. +-- +SELECT create_graph('sg_src'); + create_graph +-------------- + +(1 row) + +SELECT count(*) FROM cypher('sg_src', $$ + UNWIND range(1, 2000) AS i + CREATE (:Person {pid: i, age: i % 100})-[:KNOWS {w: i}]->(:Friend {pid: i}) +$$) AS (a agtype); + count +------- + 0 +(1 row) + +SELECT count(*) FROM cypher('sg_src', $$ + UNWIND range(1, 500) AS i CREATE (:Company {cid: i}) +$$) AS (a agtype); + count +------- + 0 +(1 row) + +-- Source baseline (printed for reference; deterministic). +SELECT + (SELECT count(*) FROM cypher('sg_src', $$ MATCH (n) RETURN n $$) AS (n agtype)) AS src_vertices, + (SELECT count(*) FROM cypher('sg_src', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype)) AS src_edges; + src_vertices | src_edges +--------------+----------- + 4500 | 2000 +(1 row) + +-- +-- 1. Full copy ('*','*'): counts equal the source, and the new graph round-trips. +-- +SELECT node_count, relationship_count +FROM create_subgraph('sg_all', 'sg_src', '*', '*'); + node_count | relationship_count +------------+-------------------- + 4500 | 2000 +(1 row) + +SELECT + (SELECT count(*) FROM cypher('sg_all', $$ MATCH (n) RETURN n $$) AS (n agtype)) + = (SELECT count(*) FROM cypher('sg_src', $$ MATCH (n) RETURN n $$) AS (n agtype)) AS nodes_match, + (SELECT count(*) FROM cypher('sg_all', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype)) + = (SELECT count(*) FROM cypher('sg_src', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype)) AS edges_match; + nodes_match | edges_match +-------------+------------- + t | t +(1 row) + +-- +-- 2. Vertex-induced (node filter only): keep pid <= 1000. An edge survives iff +-- BOTH endpoints survive (induced rule), with no relationship filter. +-- node_count is asserted against the function return; correctness is verified +-- by recomputing the induced set from the source (robust booleans). +-- +SELECT node_count, relationship_count +FROM create_subgraph('sg_v', 'sg_src', 'n.pid <= 1000', '*'); + node_count | relationship_count +------------+-------------------- + 2000 | 1000 +(1 row) + +SELECT + (SELECT count(*) FROM cypher('sg_v', $$ MATCH (n) RETURN n $$) AS (n agtype)) + = (SELECT count(*) FROM cypher('sg_src', + $$ MATCH (n) WHERE n.pid <= 1000 RETURN n $$) AS (n agtype)) AS nodes_ok, + (SELECT count(*) FROM cypher('sg_v', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype)) + = (SELECT count(*) FROM cypher('sg_src', + $$ MATCH (a)-[r]->(b) WHERE a.pid <= 1000 AND b.pid <= 1000 RETURN r $$) + AS (r agtype)) AS edges_ok; + nodes_ok | edges_ok +----------+---------- + t | t +(1 row) + +-- +-- 3. Node + relationship predicate: keep pid <= 1000 vertices and w <= 300 edges. +-- Edge survives iff w<=300 AND both endpoints pid<=1000. +-- +SELECT node_count, relationship_count +FROM create_subgraph('sg_nr', 'sg_src', 'n.pid <= 1000', 'r.w <= 300'); + node_count | relationship_count +------------+-------------------- + 2000 | 300 +(1 row) + +SELECT + (SELECT count(*) FROM cypher('sg_nr', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype)) + = (SELECT count(*) FROM cypher('sg_src', + $$ MATCH (a)-[r]->(b) WHERE r.w <= 300 AND a.pid <= 1000 AND b.pid <= 1000 + RETURN r $$) AS (r agtype)) AS edges_ok; + edges_ok +---------- + t +(1 row) + +-- +-- 4. Label filter excludes one endpoint type: keep only :Person. Every KNOWS +-- edge points Person->Friend, so all edges must be dropped (induced rule). +-- (AGE evaluates label predicates with label(n); GDS uses n:Person -- same +-- containment semantics, different predicate syntax.) +-- +SELECT node_count, relationship_count +FROM create_subgraph('sg_person', 'sg_src', $f$label(n) = 'Person'$f$, '*'); + node_count | relationship_count +------------+-------------------- + 2000 | 0 +(1 row) + +-- +-- 5. Bipartite (type filter): keep Person+Friend and KNOWS edges => all 2000. +-- +SELECT node_count, relationship_count +FROM create_subgraph('sg_bip', 'sg_src', + $f$label(n) = 'Person' OR label(n) = 'Friend'$f$, + $f$label(r) = 'KNOWS'$f$); + node_count | relationship_count +------------+-------------------- + 4000 | 2000 +(1 row) + +-- +-- 6. Empty result: a predicate matching nothing yields an empty subgraph +-- (not an error), with the default labels only. +-- +SELECT node_count, relationship_count +FROM create_subgraph('sg_empty', 'sg_src', 'n.pid < 0', '*'); + node_count | relationship_count +------------+-------------------- + 0 | 0 +(1 row) + +SELECT count(*) AS empty_vertices +FROM cypher('sg_empty', $$ MATCH (n) RETURN n $$) AS (n agtype); + empty_vertices +---------------- + 0 +(1 row) + +-- +-- 7. Composability: extract a subgraph from an already-extracted subgraph. +-- From sg_v (pid<=1000) keep pid<=500; verify against recomputation on sg_v. +-- +SELECT node_count, relationship_count +FROM create_subgraph('sg_v2', 'sg_v', 'n.pid <= 500', '*'); + node_count | relationship_count +------------+-------------------- + 1000 | 500 +(1 row) + +SELECT + (SELECT count(*) FROM cypher('sg_v2', $$ MATCH (n) RETURN n $$) AS (n agtype)) + = (SELECT count(*) FROM cypher('sg_v', + $$ MATCH (n) WHERE n.pid <= 500 RETURN n $$) AS (n agtype)) AS nodes_ok, + (SELECT count(*) FROM cypher('sg_v2', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype)) + = (SELECT count(*) FROM cypher('sg_v', + $$ MATCH (a)-[r]->(b) WHERE a.pid <= 500 AND b.pid <= 500 RETURN r $$) + AS (r agtype)) AS edges_ok; + nodes_ok | edges_ok +----------+---------- + t | t +(1 row) + +-- +-- 8. Self-loops and parallel edges (multigraph structure) are preserved. +-- +SELECT create_graph('sg_multi'); + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('sg_multi', $$ + CREATE (a:N {k: 1}) CREATE (a)-[:E {t: 1}]->(a) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('sg_multi', $$ + CREATE (a:N {k: 2}), (b:N {k: 3}), + (a)-[:E {t: 2}]->(b), (a)-[:E {t: 3}]->(b) +$$) AS (a agtype); + a +--- +(0 rows) + +SELECT node_count, relationship_count +FROM create_subgraph('sg_multi_sub', 'sg_multi', '*', '*'); + node_count | relationship_count +------------+-------------------- + 3 | 3 +(1 row) + +-- self-loop preserved (exactly one edge from a node to itself) +SELECT count(*) AS self_loops +FROM cypher('sg_multi_sub', $$ MATCH (a)-[r]->(a) RETURN r $$) AS (r agtype); + self_loops +------------ + 1 +(1 row) + +-- parallel edges preserved (two edges between k=2 and k=3) +SELECT count(*) AS parallel_edges +FROM cypher('sg_multi_sub', $$ MATCH (a {k: 2})-[r]->(b {k: 3}) RETURN r $$) AS (r agtype); + parallel_edges +---------------- + 2 +(1 row) + +-- +-- 9. Property fidelity: a copied vertex keeps its properties verbatim. +-- +SELECT count(*) AS person_500_age_ok +FROM cypher('sg_v', $$ MATCH (n:Person {pid: 500}) WHERE n.age = 0 RETURN n $$) AS (n agtype); + person_500_age_ok +------------------- + 1 +(1 row) + +-- +-- 10. Error handling / edge cases. +-- +-- NULL graph name +SELECT create_subgraph(NULL, 'sg_src', '*', '*'); +ERROR: new graph name must not be NULL +CONTEXT: PL/pgSQL function create_subgraph(name,name,text,text) line 18 at RAISE +-- source does not exist +SELECT create_subgraph('sg_x', 'no_such_graph', '*', '*'); +ERROR: graph "no_such_graph" does not exist +CONTEXT: PL/pgSQL function create_subgraph(name,name,text,text) line 47 at RAISE +-- extracting into the source itself +SELECT create_subgraph('sg_src', 'sg_src', '*', '*'); +ERROR: cannot extract a subgraph of "sg_src" into itself +CONTEXT: PL/pgSQL function create_subgraph(name,name,text,text) line 24 at RAISE +-- destination already exists +SELECT create_subgraph('sg_all', 'sg_src', '*', '*'); +ERROR: graph "sg_all" already exists +CONTEXT: PL/pgSQL function create_subgraph(name,name,text,text) line 53 at RAISE +-- invalid Cypher predicate is reported (propagated from the engine) +SELECT create_subgraph('sg_bad', 'sg_src', 'n.pid <<>> 1', '*'); +ERROR: operator does not exist: agtype <<>> agtype +LINE 1: ...her('sg_src', $age_subgraph$MATCH (n) WHERE n.pid <<>> 1 RET... + ^ +HINT: No operator matches the given name and argument types. You might need to add explicit type casts. +QUERY: CREATE TEMP TABLE _ag_sg_kept_v ON COMMIT DROP AS SELECT DISTINCT ag_catalog.agtype_to_graphid(vid) AS gid FROM ag_catalog.cypher('sg_src', $age_subgraph$MATCH (n) WHERE n.pid <<>> 1 RETURN id(n)$age_subgraph$) AS (vid agtype) +CONTEXT: PL/pgSQL function create_subgraph(name,name,text,text) line 80 at EXECUTE +-- cleanup +SELECT drop_graph('sg_v2', true); + drop_graph +------------ + +(1 row) + +SELECT drop_graph('sg_multi_sub', true); + drop_graph +------------ + +(1 row) + +SELECT drop_graph('sg_multi', true); + drop_graph +------------ + +(1 row) + +SELECT drop_graph('sg_empty', true); + drop_graph +------------ + +(1 row) + +SELECT drop_graph('sg_bip', true); + drop_graph +------------ + +(1 row) + +SELECT drop_graph('sg_person', true); + drop_graph +------------ + +(1 row) + +SELECT drop_graph('sg_nr', true); + drop_graph +------------ + +(1 row) + +SELECT drop_graph('sg_v', true); + drop_graph +------------ + +(1 row) + +SELECT drop_graph('sg_all', true); + drop_graph +------------ + +(1 row) + +SELECT drop_graph('sg_src', true); + drop_graph +------------ + +(1 row) + diff --git a/regress/sql/subgraph.sql b/regress/sql/subgraph.sql new file mode 100644 index 000000000..0d01dfe60 --- /dev/null +++ b/regress/sql/subgraph.sql @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +LOAD 'age'; +SET search_path TO ag_catalog; + +-- Suppress the create_graph / create_vlabel NOTICE chatter so the assertions +-- below are the deterministic output. (The feature is exercised regardless.) +SET client_min_messages = warning; + +-- +-- Build a "somewhat large" source graph with NO MATCH (fast bulk CREATE): +-- * 2000 isolated components, each (:Person{pid,age})-[:KNOWS{w}]->(:Friend{pid}) +-- => 2000 Person + 2000 Friend vertices, 2000 KNOWS edges +-- * 500 isolated :Company vertices (no edges) +-- Totals: 4500 vertices, 2000 edges, label set {Person,Friend,Company,KNOWS}. +-- +SELECT create_graph('sg_src'); + +SELECT count(*) FROM cypher('sg_src', $$ + UNWIND range(1, 2000) AS i + CREATE (:Person {pid: i, age: i % 100})-[:KNOWS {w: i}]->(:Friend {pid: i}) +$$) AS (a agtype); + +SELECT count(*) FROM cypher('sg_src', $$ + UNWIND range(1, 500) AS i CREATE (:Company {cid: i}) +$$) AS (a agtype); + +-- Source baseline (printed for reference; deterministic). +SELECT + (SELECT count(*) FROM cypher('sg_src', $$ MATCH (n) RETURN n $$) AS (n agtype)) AS src_vertices, + (SELECT count(*) FROM cypher('sg_src', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype)) AS src_edges; + +-- +-- 1. Full copy ('*','*'): counts equal the source, and the new graph round-trips. +-- +SELECT node_count, relationship_count +FROM create_subgraph('sg_all', 'sg_src', '*', '*'); + +SELECT + (SELECT count(*) FROM cypher('sg_all', $$ MATCH (n) RETURN n $$) AS (n agtype)) + = (SELECT count(*) FROM cypher('sg_src', $$ MATCH (n) RETURN n $$) AS (n agtype)) AS nodes_match, + (SELECT count(*) FROM cypher('sg_all', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype)) + = (SELECT count(*) FROM cypher('sg_src', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype)) AS edges_match; + +-- +-- 2. Vertex-induced (node filter only): keep pid <= 1000. An edge survives iff +-- BOTH endpoints survive (induced rule), with no relationship filter. +-- node_count is asserted against the function return; correctness is verified +-- by recomputing the induced set from the source (robust booleans). +-- +SELECT node_count, relationship_count +FROM create_subgraph('sg_v', 'sg_src', 'n.pid <= 1000', '*'); + +SELECT + (SELECT count(*) FROM cypher('sg_v', $$ MATCH (n) RETURN n $$) AS (n agtype)) + = (SELECT count(*) FROM cypher('sg_src', + $$ MATCH (n) WHERE n.pid <= 1000 RETURN n $$) AS (n agtype)) AS nodes_ok, + (SELECT count(*) FROM cypher('sg_v', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype)) + = (SELECT count(*) FROM cypher('sg_src', + $$ MATCH (a)-[r]->(b) WHERE a.pid <= 1000 AND b.pid <= 1000 RETURN r $$) + AS (r agtype)) AS edges_ok; + +-- +-- 3. Node + relationship predicate: keep pid <= 1000 vertices and w <= 300 edges. +-- Edge survives iff w<=300 AND both endpoints pid<=1000. +-- +SELECT node_count, relationship_count +FROM create_subgraph('sg_nr', 'sg_src', 'n.pid <= 1000', 'r.w <= 300'); + +SELECT + (SELECT count(*) FROM cypher('sg_nr', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype)) + = (SELECT count(*) FROM cypher('sg_src', + $$ MATCH (a)-[r]->(b) WHERE r.w <= 300 AND a.pid <= 1000 AND b.pid <= 1000 + RETURN r $$) AS (r agtype)) AS edges_ok; + +-- +-- 4. Label filter excludes one endpoint type: keep only :Person. Every KNOWS +-- edge points Person->Friend, so all edges must be dropped (induced rule). +-- (AGE evaluates label predicates with label(n); GDS uses n:Person -- same +-- containment semantics, different predicate syntax.) +-- +SELECT node_count, relationship_count +FROM create_subgraph('sg_person', 'sg_src', $f$label(n) = 'Person'$f$, '*'); + +-- +-- 5. Bipartite (type filter): keep Person+Friend and KNOWS edges => all 2000. +-- +SELECT node_count, relationship_count +FROM create_subgraph('sg_bip', 'sg_src', + $f$label(n) = 'Person' OR label(n) = 'Friend'$f$, + $f$label(r) = 'KNOWS'$f$); + +-- +-- 6. Empty result: a predicate matching nothing yields an empty subgraph +-- (not an error), with the default labels only. +-- +SELECT node_count, relationship_count +FROM create_subgraph('sg_empty', 'sg_src', 'n.pid < 0', '*'); + +SELECT count(*) AS empty_vertices +FROM cypher('sg_empty', $$ MATCH (n) RETURN n $$) AS (n agtype); + +-- +-- 7. Composability: extract a subgraph from an already-extracted subgraph. +-- From sg_v (pid<=1000) keep pid<=500; verify against recomputation on sg_v. +-- +SELECT node_count, relationship_count +FROM create_subgraph('sg_v2', 'sg_v', 'n.pid <= 500', '*'); + +SELECT + (SELECT count(*) FROM cypher('sg_v2', $$ MATCH (n) RETURN n $$) AS (n agtype)) + = (SELECT count(*) FROM cypher('sg_v', + $$ MATCH (n) WHERE n.pid <= 500 RETURN n $$) AS (n agtype)) AS nodes_ok, + (SELECT count(*) FROM cypher('sg_v2', $$ MATCH ()-[r]->() RETURN r $$) AS (r agtype)) + = (SELECT count(*) FROM cypher('sg_v', + $$ MATCH (a)-[r]->(b) WHERE a.pid <= 500 AND b.pid <= 500 RETURN r $$) + AS (r agtype)) AS edges_ok; + +-- +-- 8. Self-loops and parallel edges (multigraph structure) are preserved. +-- +SELECT create_graph('sg_multi'); +SELECT * FROM cypher('sg_multi', $$ + CREATE (a:N {k: 1}) CREATE (a)-[:E {t: 1}]->(a) +$$) AS (a agtype); +SELECT * FROM cypher('sg_multi', $$ + CREATE (a:N {k: 2}), (b:N {k: 3}), + (a)-[:E {t: 2}]->(b), (a)-[:E {t: 3}]->(b) +$$) AS (a agtype); + +SELECT node_count, relationship_count +FROM create_subgraph('sg_multi_sub', 'sg_multi', '*', '*'); + +-- self-loop preserved (exactly one edge from a node to itself) +SELECT count(*) AS self_loops +FROM cypher('sg_multi_sub', $$ MATCH (a)-[r]->(a) RETURN r $$) AS (r agtype); + +-- parallel edges preserved (two edges between k=2 and k=3) +SELECT count(*) AS parallel_edges +FROM cypher('sg_multi_sub', $$ MATCH (a {k: 2})-[r]->(b {k: 3}) RETURN r $$) AS (r agtype); + +-- +-- 9. Property fidelity: a copied vertex keeps its properties verbatim. +-- +SELECT count(*) AS person_500_age_ok +FROM cypher('sg_v', $$ MATCH (n:Person {pid: 500}) WHERE n.age = 0 RETURN n $$) AS (n agtype); + +-- +-- 10. Error handling / edge cases. +-- +-- NULL graph name +SELECT create_subgraph(NULL, 'sg_src', '*', '*'); +-- source does not exist +SELECT create_subgraph('sg_x', 'no_such_graph', '*', '*'); +-- extracting into the source itself +SELECT create_subgraph('sg_src', 'sg_src', '*', '*'); +-- destination already exists +SELECT create_subgraph('sg_all', 'sg_src', '*', '*'); +-- invalid Cypher predicate is reported (propagated from the engine) +SELECT create_subgraph('sg_bad', 'sg_src', 'n.pid <<>> 1', '*'); + +-- cleanup +SELECT drop_graph('sg_v2', true); +SELECT drop_graph('sg_multi_sub', true); +SELECT drop_graph('sg_multi', true); +SELECT drop_graph('sg_empty', true); +SELECT drop_graph('sg_bip', true); +SELECT drop_graph('sg_person', true); +SELECT drop_graph('sg_nr', true); +SELECT drop_graph('sg_v', true); +SELECT drop_graph('sg_all', true); +SELECT drop_graph('sg_src', true); diff --git a/sql/age_subgraph.sql b/sql/age_subgraph.sql new file mode 100644 index 000000000..960790ded --- /dev/null +++ b/sql/age_subgraph.sql @@ -0,0 +1,294 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +-- +-- create_subgraph(): materialized subgraph extraction. +-- +-- Builds a new, persistent AGE graph that is the subgraph of an existing graph +-- selected by a node predicate and a relationship predicate. The semantics +-- follow the graph-theory "induced subgraph" definition as operationalized by +-- Neo4j GDS gds.graph.filter(): +-- +-- * a vertex is kept iff node_filter evaluates true ('*' keeps all); +-- * an edge is kept iff relationship_filter evaluates true AND BOTH of its +-- endpoints were kept (the induced rule -- no dangling edges). +-- +-- Unlike the Neo4j in-memory projection, the result is a real, ACID, +-- fully-Cypher-queryable AGE graph; properties of any agtype are preserved, and +-- self-loops / parallel edges (multigraph structure) are kept. +-- +-- node_filter / relationship_filter are Cypher predicates bound to a single +-- entity -- the node variable is `n`, the relationship variable is `r` -- or +-- the literal '*' to keep all. They are evaluated by AGE's own Cypher engine +-- against the source graph, so the full Cypher predicate language is available. +-- +-- Internal entity ids (graphids) are reassigned in the new graph (a graphid +-- encodes the source graph's label id, which differs in the destination), and +-- edge endpoints are remapped accordingly. Properties are copied verbatim. +-- +CREATE FUNCTION ag_catalog.create_subgraph(new_graph name, + from_graph name, + node_filter text DEFAULT '*', + relationship_filter text DEFAULT '*') + RETURNS TABLE(node_count bigint, relationship_count bigint) + LANGUAGE plpgsql + VOLATILE + SET search_path = ag_catalog, pg_catalog + AS $function$ +DECLARE + from_oid oid; + new_oid oid; + v_node_count bigint := 0; + v_rel_count bigint := 0; + rec RECORD; + cypher_q text; + where_clause text; + dst_label_id int; + dst_seq_fqn text; + dst_relation text; + inserted bigint; + has_rows boolean; +BEGIN + -- Argument validation. + IF new_graph IS NULL THEN + RAISE EXCEPTION 'new graph name must not be NULL'; + END IF; + IF from_graph IS NULL THEN + RAISE EXCEPTION 'source graph name must not be NULL'; + END IF; + IF new_graph = from_graph THEN + RAISE EXCEPTION 'cannot extract a subgraph of "%" into itself', from_graph; + END IF; + + -- NULL predicate is treated as the '*' wildcard (keep all). + IF node_filter IS NULL THEN + node_filter := '*'; + END IF; + IF relationship_filter IS NULL THEN + relationship_filter := '*'; + END IF; + + -- The predicates are embedded into a dollar-quoted cypher() query using the + -- $age_subgraph$ tag; reject predicates that contain the tag to keep the + -- quoting unambiguous. + IF position('$age_subgraph$' IN node_filter) > 0 + OR position('$age_subgraph$' IN relationship_filter) > 0 THEN + RAISE EXCEPTION 'filter predicate must not contain the reserved token $age_subgraph$'; + END IF; + + -- Validate source graph exists. + SELECT graphid INTO from_oid + FROM ag_catalog.ag_graph WHERE name = from_graph; + IF from_oid IS NULL THEN + RAISE EXCEPTION 'graph "%" does not exist', from_graph; + END IF; + + -- Validate destination graph does not exist (create_graph also enforces + -- naming rules and uniqueness, but we give a clear early error). + IF EXISTS (SELECT 1 FROM ag_catalog.ag_graph WHERE name = new_graph) THEN + RAISE EXCEPTION 'graph "%" already exists', new_graph; + END IF; + + -- Create the destination graph (default labels are created automatically). + PERFORM ag_catalog.create_graph(new_graph); + + SELECT graphid INTO new_oid + FROM ag_catalog.ag_graph WHERE name = new_graph; + + -- Working sets / mapping (uniquely named to avoid colliding with user temps). + DROP TABLE IF EXISTS _ag_sg_kept_v; + DROP TABLE IF EXISTS _ag_sg_kept_e; + DROP TABLE IF EXISTS _ag_sg_vmap; + DROP TABLE IF EXISTS _ag_sg_vstage; + DROP TABLE IF EXISTS _ag_sg_estage; + + -- + -- Kept vertices: evaluate node_filter with AGE's Cypher engine. The node + -- variable `n` is bound exactly as in the spec; '*' selects all vertices. + -- + IF node_filter IS NULL OR btrim(node_filter) = '*' THEN + where_clause := ''; + ELSE + where_clause := ' WHERE ' || node_filter; + END IF; + cypher_q := 'MATCH (n)' || where_clause || ' RETURN id(n)'; + + EXECUTE format( + 'CREATE TEMP TABLE _ag_sg_kept_v ON COMMIT DROP AS ' + 'SELECT DISTINCT ag_catalog.agtype_to_graphid(vid) AS gid ' + 'FROM ag_catalog.cypher(%L, $age_subgraph$%s$age_subgraph$) AS (vid agtype)', + from_graph, cypher_q); + CREATE INDEX ON _ag_sg_kept_v (gid); + + -- + -- Kept edges: evaluate relationship_filter with AGE's Cypher engine. The + -- relationship variable `r` is bound exactly as in the spec. + -- + IF relationship_filter IS NULL OR btrim(relationship_filter) = '*' THEN + where_clause := ''; + ELSE + where_clause := ' WHERE ' || relationship_filter; + END IF; + cypher_q := 'MATCH ()-[r]->()' || where_clause || ' RETURN id(r)'; + + EXECUTE format( + 'CREATE TEMP TABLE _ag_sg_kept_e ON COMMIT DROP AS ' + 'SELECT DISTINCT ag_catalog.agtype_to_graphid(eid) AS gid ' + 'FROM ag_catalog.cypher(%L, $age_subgraph$%s$age_subgraph$) AS (eid agtype)', + from_graph, cypher_q); + CREATE INDEX ON _ag_sg_kept_e (gid); + + -- old -> new vertex id mapping (graphid is unique within a graph). + CREATE TEMP TABLE _ag_sg_vmap (old_id graphid PRIMARY KEY, + new_id graphid NOT NULL) ON COMMIT DROP; + + -- + -- PASS 1: copy kept vertices, label by label, assigning new graphids and + -- recording the old->new mapping for edge remapping. + -- + FOR rec IN + SELECT name, id, relation, seq_name + FROM ag_catalog.ag_label + WHERE graph = from_oid AND kind = 'v' + ORDER BY id + LOOP + -- Skip labels with no surviving vertices. Read ONLY this label's own + -- rows: AGE label tables use table inheritance (custom labels inherit + -- from _ag_label_vertex), so a plain scan of a parent would also return + -- its children and copy them twice. + EXECUTE format( + 'SELECT EXISTS (SELECT 1 FROM ONLY %s t ' + 'WHERE EXISTS (SELECT 1 FROM _ag_sg_kept_v k WHERE k.gid = t.id))', + rec.relation::regclass::text) + INTO has_rows; + IF NOT has_rows THEN + CONTINUE; + END IF; + + -- Ensure the label exists in the destination graph. + IF rec.name <> '_ag_label_vertex' THEN + PERFORM 1 FROM ag_catalog.ag_label + WHERE graph = new_oid AND name = rec.name; + IF NOT FOUND THEN + EXECUTE format('SELECT ag_catalog.create_vlabel(%L, %L)', + new_graph, rec.name); + END IF; + END IF; + + SELECT id, seq_name, relation::regclass::text + INTO dst_label_id, dst_seq_fqn, dst_relation + FROM ag_catalog.ag_label + WHERE graph = new_oid AND name = rec.name; + dst_seq_fqn := format('%I.%I', new_graph, dst_seq_fqn); + + -- Stage surviving vertices with freshly generated ids in a real temp + -- table (single evaluation), then copy to the label table and record + -- the old->new mapping. A materialized stage avoids any ambiguity from + -- referencing a nextval-bearing CTE more than once. + DROP TABLE IF EXISTS _ag_sg_vstage; + EXECUTE format( + 'CREATE TEMP TABLE _ag_sg_vstage ON COMMIT DROP AS ' + 'SELECT t.id AS old_id, ' + ' ag_catalog._graphid(%s, nextval(%L)) AS new_id, ' + ' t.properties AS props ' + 'FROM ONLY %s t ' + 'WHERE EXISTS (SELECT 1 FROM _ag_sg_kept_v k WHERE k.gid = t.id)', + dst_label_id, dst_seq_fqn, rec.relation::regclass::text); + + EXECUTE format('INSERT INTO %s (id, properties) ' + 'SELECT new_id, props FROM _ag_sg_vstage', dst_relation); + + INSERT INTO _ag_sg_vmap (old_id, new_id) + SELECT old_id, new_id FROM _ag_sg_vstage; + + DROP TABLE _ag_sg_vstage; + END LOOP; + + SELECT count(*) INTO v_node_count FROM _ag_sg_vmap; + + -- + -- PASS 2: copy kept edges, remapping endpoints. The joins to _ag_sg_vmap + -- enforce the induced rule (an edge survives only if BOTH endpoints were + -- kept); membership in _ag_sg_kept_e applies relationship_filter. + -- + FOR rec IN + SELECT name, id, relation, seq_name + FROM ag_catalog.ag_label + WHERE graph = from_oid AND kind = 'e' + ORDER BY id + LOOP + -- Skip labels with no surviving edges. Read ONLY this label's own rows + -- (see the vertex pass for why inheritance requires ONLY). + EXECUTE format( + 'SELECT EXISTS (' + ' SELECT 1 FROM ONLY %s x ' + ' JOIN _ag_sg_vmap vs ON vs.old_id = x.start_id ' + ' JOIN _ag_sg_vmap ve ON ve.old_id = x.end_id ' + ' WHERE EXISTS (SELECT 1 FROM _ag_sg_kept_e k WHERE k.gid = x.id))', + rec.relation::regclass::text) + INTO has_rows; + IF NOT has_rows THEN + CONTINUE; + END IF; + + IF rec.name <> '_ag_label_edge' THEN + PERFORM 1 FROM ag_catalog.ag_label + WHERE graph = new_oid AND name = rec.name; + IF NOT FOUND THEN + EXECUTE format('SELECT ag_catalog.create_elabel(%L, %L)', + new_graph, rec.name); + END IF; + END IF; + + SELECT id, seq_name, relation::regclass::text + INTO dst_label_id, dst_seq_fqn, dst_relation + FROM ag_catalog.ag_label + WHERE graph = new_oid AND name = rec.name; + dst_seq_fqn := format('%I.%I', new_graph, dst_seq_fqn); + + -- Stage surviving edges, remapping endpoints through _ag_sg_vmap. The + -- joins enforce the induced rule (both endpoints kept); membership in + -- _ag_sg_kept_e applies relationship_filter. + DROP TABLE IF EXISTS _ag_sg_estage; + EXECUTE format( + 'CREATE TEMP TABLE _ag_sg_estage ON COMMIT DROP AS ' + 'SELECT ag_catalog._graphid(%s, nextval(%L)) AS new_id, ' + ' vs.new_id AS new_start, ve.new_id AS new_end, ' + ' x.properties AS props ' + 'FROM ONLY %s x ' + 'JOIN _ag_sg_vmap vs ON vs.old_id = x.start_id ' + 'JOIN _ag_sg_vmap ve ON ve.old_id = x.end_id ' + 'WHERE EXISTS (SELECT 1 FROM _ag_sg_kept_e k WHERE k.gid = x.id)', + dst_label_id, dst_seq_fqn, rec.relation::regclass::text); + + EXECUTE format('INSERT INTO %s (id, start_id, end_id, properties) ' + 'SELECT new_id, new_start, new_end, props ' + 'FROM _ag_sg_estage', dst_relation); + GET DIAGNOSTICS inserted = ROW_COUNT; + v_rel_count := v_rel_count + inserted; + + DROP TABLE _ag_sg_estage; + END LOOP; + + RETURN QUERY SELECT v_node_count, v_rel_count; +END; +$function$; + +COMMENT ON FUNCTION ag_catalog.create_subgraph(name, name, text, text) IS +'Materializes a new persistent graph as the induced subgraph of from_graph selected by a Cypher node predicate (on n) and relationship predicate (on r); ''*'' keeps all. An edge is kept only if its predicate holds and both endpoints are kept. Returns (node_count, relationship_count).'; diff --git a/sql/sql_files b/sql/sql_files index 32f9a7099..996ad4b46 100644 --- a/sql/sql_files +++ b/sql/sql_files @@ -15,3 +15,4 @@ age_trig age_aggregate agtype_typecast age_pg_upgrade +age_subgraph From 1d55993fc15a2ed60235c5182a2aea486e711cdd Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sat, 20 Jun 2026 09:56:48 -0700 Subject: [PATCH 03/20] cypher_vle: add ORDER BY to non-deterministic RETURN queries (#2434) Several VLE regression queries RETURN multiple rows without an ORDER BY, so their row order depends on traversal/scan order and can vary between runs and platforms. Add ORDER BY ASC to those queries (on the path, edge-list, or graphid as appropriate) so the expected output is stable. The queries are pinned by path (p), edge list (e), or graphid (id(u)/id(v)/id(e[n])) depending on what each RETURN projects. Full audit of cypher_vle: all 38 multi-row result blocks were checked. After this change, every multi-row RETURN is deterministically ordered except the two SELECT * FROM show_list_use_vle('list01') calls, which are already deterministic because the function body orders its results with RETURN v ORDER BY id(v) (added in #2417); their result blocks are unchanged by this commit. This is a test-only change (regress/sql/cypher_vle.sql and regress/expected/cypher_vle.out); no extension C code or SQL is modified. Row counts are unchanged (pure reordering). All 37 regression tests pass (installcheck) on PostgreSQL 18.3. Co-authored-by: GitHub Copilot modified: regress/expected/cypher_vle.out modified: regress/sql/cypher_vle.sql --- regress/expected/cypher_vle.out | 156 ++++++++++++++++---------------- regress/sql/cypher_vle.sql | 78 ++++++++-------- 2 files changed, 117 insertions(+), 117 deletions(-) diff --git a/regress/expected/cypher_vle.out b/regress/expected/cypher_vle.out index 0f564015b..de85176b6 100644 --- a/regress/expected/cypher_vle.out +++ b/regress/expected/cypher_vle.out @@ -281,7 +281,7 @@ SELECT * FROM cypher('cypher_vle', $$MATCH ()-[*]->(v) RETURN count(*) $$) AS (e (1 row) -- Should find 2 -SELECT * FROM cypher('cypher_vle', $$MATCH (u:begin)<-[e*]-(v:end) RETURN e $$) AS (e agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH (u:begin)<-[e*]-(v:end) RETURN e ORDER BY e ASC $$) AS (e agtype); e ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [{"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge] @@ -289,7 +289,7 @@ SELECT * FROM cypher('cypher_vle', $$MATCH (u:begin)<-[e*]-(v:end) RETURN e $$) (2 rows) -- Should find 5 -SELECT * FROM cypher('cypher_vle', $$MATCH p=(:begin)<-[*1..1]-()-[]-() RETURN p ORDER BY p $$) AS (e agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(:begin)<-[*1..1]-()-[]-() RETURN p ORDER BY p ASC $$) AS (e agtype); e --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842626, "label": "edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "main edge", "number": 3, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path @@ -307,54 +307,54 @@ SELECT * FROM cypher('cypher_vle', $$MATCH p=()-[*]->(v) RETURN count(*) $$) AS (1 row) -- Should find 2 -SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)-[*3..3]->(v:end) RETURN p $$) AS (e agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)-[*3..3]->(v:end) RETURN p ORDER BY p ASC $$) AS (e agtype); e ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 1125899906842628, "label": "edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "main edge", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path (2 rows) -- Should find 12 -SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)-[*3..3]-(v:end) RETURN p $$) AS (e agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)-[*3..3]-(v:end) RETURN p ORDER BY p ASC $$) AS (e agtype); e ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685251, "label": "alternate_edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "alternate edge", "number": 3, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 1125899906842628, "label": "edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "main edge", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842626, "label": "edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "main edge", "number": 3, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842625, "label": "edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "main edge", "number": 4, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842626, "label": "edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "main edge", "number": 3, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685251, "label": "alternate_edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "alternate edge", "number": 3, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842626, "label": "edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "main edge", "number": 3, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685250, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "alternate edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842625, "label": "edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "main edge", "number": 4, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685250, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "alternate edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685251, "label": "alternate_edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "alternate edge", "number": 3, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685250, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "alternate edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842625, "label": "edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "main edge", "number": 4, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685251, "label": "alternate_edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "alternate edge", "number": 3, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 1970324836974594, "label": "self_loop", "end_id": 1688849860263937, "start_id": 1688849860263937, "properties": {"name": "self loop", "number": 2, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685250, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "alternate edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685250, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "alternate edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685251, "label": "alternate_edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "alternate edge", "number": 3, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685250, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "alternate edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842625, "label": "edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "main edge", "number": 4, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842626, "label": "edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "main edge", "number": 3, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842626, "label": "edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "main edge", "number": 3, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685251, "label": "alternate_edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "alternate edge", "number": 3, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842626, "label": "edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "main edge", "number": 3, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842625, "label": "edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "main edge", "number": 4, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 1125899906842628, "label": "edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "main edge", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path (12 rows) -- Each should find 2 -SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)<-[*]-(v:end) RETURN p $$) AS (e agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)<-[*]-(v:end) RETURN p ORDER BY p ASC $$) AS (e agtype); e ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 1970324836974594, "label": "self_loop", "end_id": 1688849860263937, "start_id": 1688849860263937, "properties": {"name": "self loop", "number": 2, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path (2 rows) -SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)<-[e*]-(v:end) RETURN p $$) AS (e agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)<-[e*]-(v:end) RETURN p ORDER BY p ASC $$) AS (e agtype); e ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 1970324836974594, "label": "self_loop", "end_id": 1688849860263937, "start_id": 1688849860263937, "properties": {"name": "self loop", "number": 2, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path (2 rows) -SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)<-[e*]-(v:end) RETURN e $$) AS (e agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)<-[e*]-(v:end) RETURN e ORDER BY e ASC $$) AS (e agtype); e ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [{"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge] [{"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1970324836974594, "label": "self_loop", "end_id": 1688849860263937, "start_id": 1688849860263937, "properties": {"name": "self loop", "number": 2, "dangerous": {"type": "all", "level": "all"}}}::edge] (2 rows) -SELECT * FROM cypher('cypher_vle', $$MATCH p=(:begin)<-[*]-()<-[]-(:end) RETURN p $$) AS (e agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(:begin)<-[*]-()<-[]-(:end) RETURN p ORDER BY p ASC $$) AS (e agtype); e ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path @@ -460,31 +460,31 @@ $$) AS (e1 agtype, e2 agtype); (1 row) -- Should return 1 path -SELECT * FROM cypher('cypher_vle', $$ MATCH p=()<-[e1*]-(:end)-[e2*]->(:begin) RETURN p $$) AS (result agtype); +SELECT * FROM cypher('cypher_vle', $$ MATCH p=()<-[e1*]-(:end)-[e2*]->(:begin) RETURN p ORDER BY p ASC $$) AS (result agtype); result ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 1970324836974594, "label": "self_loop", "end_id": 1688849860263937, "start_id": 1688849860263937, "properties": {"name": "self loop", "number": 2, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 844424930131969, "label": "begin", "properties": {}}::vertex]::path (1 row) -- Each should return 3 -SELECT * FROM cypher('cypher_vle', $$MATCH (u:begin)-[e*0..1]->(v) RETURN id(u), e, id(v) $$) AS (u agtype, e agtype, v agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH (u:begin)-[e*0..1]->(v) RETURN id(u), e, id(v) ORDER BY id(u) ASC, e ASC, id(v) ASC $$) AS (u agtype, e agtype, v agtype); u | e | v -----------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------ 844424930131969 | [] | 844424930131969 - 844424930131969 | [{"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge] | 1407374883553281 844424930131969 | [{"id": 1125899906842628, "label": "edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "main edge", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge] | 1407374883553281 + 844424930131969 | [{"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge] | 1407374883553281 (3 rows) -SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)-[e*0..1]->(v) RETURN p $$) AS (p agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)-[e*0..1]->(v) RETURN p ORDER BY p ASC $$) AS (p agtype); p -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex]::path - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 1125899906842628, "label": "edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "main edge", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path (3 rows) -- Each should return 5 -SELECT * FROM cypher('cypher_vle', $$MATCH (u)-[e*0..0]->(v) RETURN id(u), e, id(v) $$) AS (u agtype, e agtype, v agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH (u)-[e*0..0]->(v) RETURN id(u), e, id(v) ORDER BY id(u) ASC, e ASC, id(v) ASC $$) AS (u agtype, e agtype, v agtype); u | e | v ------------------+----+------------------ 844424930131969 | [] | 844424930131969 @@ -494,7 +494,7 @@ SELECT * FROM cypher('cypher_vle', $$MATCH (u)-[e*0..0]->(v) RETURN id(u), e, id 1688849860263937 | [] | 1688849860263937 (5 rows) -SELECT * FROM cypher('cypher_vle', $$MATCH p=(u)-[e*0..0]->(v) RETURN id(u), p, id(v) $$) AS (u agtype, p agtype, v agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(u)-[e*0..0]->(v) RETURN id(u), p, id(v) ORDER BY id(u) ASC, p ASC, id(v) ASC $$) AS (u agtype, p agtype, v agtype); u | p | v ------------------+-------------------------------------------------------------------------------+------------------ 844424930131969 | [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex]::path | 844424930131969 @@ -505,13 +505,13 @@ SELECT * FROM cypher('cypher_vle', $$MATCH p=(u)-[e*0..0]->(v) RETURN id(u), p, (5 rows) -- Each should return 13 and will be the same -SELECT * FROM cypher('cypher_vle', $$MATCH p=()-[*0..0]->()-[]->() RETURN p $$) AS (p agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=()-[*0..0]->()-[]->() RETURN p ORDER BY p ASC $$) AS (p agtype); p ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 1125899906842628, "label": "edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "main edge", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1970324836974593, "label": "self_loop", "end_id": 1407374883553281, "start_id": 1407374883553281, "properties": {"name": "self loop", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1970324836974593, "label": "self_loop", "end_id": 1407374883553281, "start_id": 1407374883553281, "properties": {"name": "self loop", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842626, "label": "edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "main edge", "number": 3, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685250, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "alternate edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path @@ -519,26 +519,26 @@ SELECT * FROM cypher('cypher_vle', $$MATCH p=()-[*0..0]->()-[]->() RETURN p $$) [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842625, "label": "edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "main edge", "number": 4, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685251, "label": "alternate_edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "alternate edge", "number": 3, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 1970324836974594, "label": "self_loop", "end_id": 1688849860263937, "start_id": 1688849860263937, "properties": {"name": "self loop", "number": 2, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path (13 rows) -SELECT * FROM cypher('cypher_vle', $$MATCH p=()-[]->()-[*0..0]->() RETURN p $$) AS (p agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=()-[]->()-[*0..0]->() RETURN p ORDER BY p ASC $$) AS (p agtype); p ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 844424930131969, "label": "begin", "properties": {}}::vertex]::path [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 1125899906842628, "label": "edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "main edge", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1970324836974593, "label": "self_loop", "end_id": 1407374883553281, "start_id": 1407374883553281, "properties": {"name": "self loop", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1970324836974593, "label": "self_loop", "end_id": 1407374883553281, "start_id": 1407374883553281, "properties": {"name": "self loop", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842626, "label": "edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "main edge", "number": 3, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685250, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "alternate edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 844424930131969, "label": "begin", "properties": {}}::vertex]::path [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842625, "label": "edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "main edge", "number": 4, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 1970324836974594, "label": "self_loop", "end_id": 1688849860263937, "start_id": 1688849860263937, "properties": {"name": "self loop", "number": 2, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685251, "label": "alternate_edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "alternate edge", "number": 3, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 1970324836974594, "label": "self_loop", "end_id": 1688849860263937, "start_id": 1688849860263937, "properties": {"name": "self loop", "number": 2, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path (13 rows) -- @@ -563,7 +563,7 @@ $$) AS (g1 agtype); /* should return 1 path with 1 edge */ SELECT * FROM cypher('mygraph', $$ MATCH p = ()-[:Edge*]->() - RETURN p + RETURN p ORDER BY p ASC $$) AS (g2 agtype); g2 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- @@ -583,24 +583,24 @@ $$) AS (g3 agtype); /* should find 2 paths with 1 edge */ SELECT * FROM cypher('mygraph', $$ MATCH p = ()-[:Edge]->() - RETURN p + RETURN p ORDER BY p ASC $$) AS (g4 agtype); g4 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 844424930131971, "label": "Node", "properties": {"name": "b"}}::vertex, {"id": 1125899906842626, "label": "Edge", "end_id": 844424930131970, "start_id": 844424930131971, "properties": {}}::edge, {"id": 844424930131970, "label": "Node", "properties": {"name": "c"}}::vertex]::path [{"id": 844424930131969, "label": "Node", "properties": {"name": "a"}}::vertex, {"id": 1125899906842627, "label": "Edge", "end_id": 844424930131971, "start_id": 844424930131969, "properties": {}}::edge, {"id": 844424930131971, "label": "Node", "properties": {"name": "b"}}::vertex]::path + [{"id": 844424930131971, "label": "Node", "properties": {"name": "b"}}::vertex, {"id": 1125899906842626, "label": "Edge", "end_id": 844424930131970, "start_id": 844424930131971, "properties": {}}::edge, {"id": 844424930131970, "label": "Node", "properties": {"name": "c"}}::vertex]::path (2 rows) /* should return 3 paths, 2 with 1 edge, 1 with 2 edges */ SELECT * FROM cypher('mygraph', $$ MATCH p = ()-[:Edge*]->() - RETURN p + RETURN p ORDER BY p ASC $$) AS (g5 agtype); g5 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 844424930131969, "label": "Node", "properties": {"name": "a"}}::vertex, {"id": 1125899906842627, "label": "Edge", "end_id": 844424930131971, "start_id": 844424930131969, "properties": {}}::edge, {"id": 844424930131971, "label": "Node", "properties": {"name": "b"}}::vertex, {"id": 1125899906842626, "label": "Edge", "end_id": 844424930131970, "start_id": 844424930131971, "properties": {}}::edge, {"id": 844424930131970, "label": "Node", "properties": {"name": "c"}}::vertex]::path - [{"id": 844424930131971, "label": "Node", "properties": {"name": "b"}}::vertex, {"id": 1125899906842626, "label": "Edge", "end_id": 844424930131970, "start_id": 844424930131971, "properties": {}}::edge, {"id": 844424930131970, "label": "Node", "properties": {"name": "c"}}::vertex]::path [{"id": 844424930131969, "label": "Node", "properties": {"name": "a"}}::vertex, {"id": 1125899906842627, "label": "Edge", "end_id": 844424930131971, "start_id": 844424930131969, "properties": {}}::edge, {"id": 844424930131971, "label": "Node", "properties": {"name": "b"}}::vertex]::path + [{"id": 844424930131971, "label": "Node", "properties": {"name": "b"}}::vertex, {"id": 1125899906842626, "label": "Edge", "end_id": 844424930131970, "start_id": 844424930131971, "properties": {}}::edge, {"id": 844424930131970, "label": "Node", "properties": {"name": "c"}}::vertex]::path + [{"id": 844424930131969, "label": "Node", "properties": {"name": "a"}}::vertex, {"id": 1125899906842627, "label": "Edge", "end_id": 844424930131971, "start_id": 844424930131969, "properties": {}}::edge, {"id": 844424930131971, "label": "Node", "properties": {"name": "b"}}::vertex, {"id": 1125899906842626, "label": "Edge", "end_id": 844424930131970, "start_id": 844424930131971, "properties": {}}::edge, {"id": 844424930131970, "label": "Node", "properties": {"name": "c"}}::vertex]::path (3 rows) SELECT drop_graph('mygraph', true); @@ -842,22 +842,22 @@ SELECT * FROM cypher('access',$$ CREATE ()-[:knows {id:2, arry:[0,1,2,3,{name: " --------- (0 rows) -SELECT * FROM cypher('access', $$ MATCH (u)-[e*]->(v) RETURN e $$)as (edges agtype); +SELECT * FROM cypher('access', $$ MATCH (u)-[e*]->(v) RETURN e ORDER BY e ASC $$)as (edges agtype); edges ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [{"id": 844424930131969, "label": "knows", "end_id": 281474976710658, "start_id": 281474976710657, "properties": {}}::edge] + [{"id": 844424930131970, "label": "knows", "end_id": 281474976710661, "start_id": 281474976710660, "properties": {}}::edge] [{"id": 844424930131971, "label": "knows", "end_id": 281474976710660, "start_id": 281474976710659, "properties": {}}::edge] [{"id": 844424930131971, "label": "knows", "end_id": 281474976710660, "start_id": 281474976710659, "properties": {}}::edge, {"id": 844424930131970, "label": "knows", "end_id": 281474976710661, "start_id": 281474976710660, "properties": {}}::edge] - [{"id": 844424930131970, "label": "knows", "end_id": 281474976710661, "start_id": 281474976710660, "properties": {}}::edge] + [{"id": 844424930131972, "label": "knows", "end_id": 281474976710664, "start_id": 281474976710663, "properties": {"id": 1}}::edge] [{"id": 844424930131973, "label": "knows", "end_id": 281474976710663, "start_id": 281474976710662, "properties": {"id": 0}}::edge] [{"id": 844424930131973, "label": "knows", "end_id": 281474976710663, "start_id": 281474976710662, "properties": {"id": 0}}::edge, {"id": 844424930131972, "label": "knows", "end_id": 281474976710664, "start_id": 281474976710663, "properties": {"id": 1}}::edge] - [{"id": 844424930131972, "label": "knows", "end_id": 281474976710664, "start_id": 281474976710663, "properties": {"id": 1}}::edge] + [{"id": 844424930131974, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710666, "properties": {"id": 3, "arry": [1, 3, {"name": "john", "stats": {"age": 1000}}]}}::edge] [{"id": 844424930131975, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710665, "properties": {"id": 2, "arry": [0, 1, 2, 3, {"name": "joe"}]}}::edge] [{"id": 844424930131975, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710665, "properties": {"id": 2, "arry": [0, 1, 2, 3, {"name": "joe"}]}}::edge, {"id": 844424930131974, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710666, "properties": {"id": 3, "arry": [1, 3, {"name": "john", "stats": {"age": 1000}}]}}::edge] - [{"id": 844424930131974, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710666, "properties": {"id": 3, "arry": [1, 3, {"name": "john", "stats": {"age": 1000}}]}}::edge] (10 rows) -SELECT * FROM cypher('access', $$ MATCH (u)-[e*2..2]->(v) RETURN e $$)as (edges agtype); +SELECT * FROM cypher('access', $$ MATCH (u)-[e*2..2]->(v) RETURN e ORDER BY e ASC $$)as (edges agtype); edges ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [{"id": 844424930131971, "label": "knows", "end_id": 281474976710660, "start_id": 281474976710659, "properties": {}}::edge, {"id": 844424930131970, "label": "knows", "end_id": 281474976710661, "start_id": 281474976710660, "properties": {}}::edge] @@ -865,7 +865,7 @@ SELECT * FROM cypher('access', $$ MATCH (u)-[e*2..2]->(v) RETURN e $$)as (edges [{"id": 844424930131975, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710665, "properties": {"id": 2, "arry": [0, 1, 2, 3, {"name": "joe"}]}}::edge, {"id": 844424930131974, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710666, "properties": {"id": 3, "arry": [1, 3, {"name": "john", "stats": {"age": 1000}}]}}::edge] (3 rows) -SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN properties(e[0]) $$) as (prop_first_edge agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN properties(e[0]) ORDER BY id(e[0]) ASC $$) as (prop_first_edge agtype); prop_first_edge -------------------------------------------------- {} @@ -873,7 +873,7 @@ SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN properties(e[0]) $ {"id": 2, "arry": [0, 1, 2, 3, {"name": "joe"}]} (3 rows) -SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[0].id $$) as (results agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[0].id ORDER BY id(e[0]) ASC $$) as (results agtype); results --------- @@ -881,7 +881,7 @@ SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[0].id $$) as (re 2 (3 rows) -SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[0].arry[2] $$) as (results agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[0].arry[2] ORDER BY id(e[0]) ASC $$) as (results agtype); results --------- @@ -889,7 +889,7 @@ SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[0].arry[2] $$) a 2 (3 rows) -SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN properties(e[1]) $$) as (prop_second_edge agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN properties(e[1]) ORDER BY id(e[1]) ASC $$) as (prop_second_edge agtype); prop_second_edge --------------------------------------------------------------------- {} @@ -897,7 +897,7 @@ SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN properties(e[1]) $ {"id": 3, "arry": [1, 3, {"name": "john", "stats": {"age": 1000}}]} (3 rows) -SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[1].id $$) as (results agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[1].id ORDER BY id(e[1]) ASC $$) as (results agtype); results --------- @@ -905,7 +905,7 @@ SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[1].id $$) as (re 3 (3 rows) -SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[1].arry[2] $$) as (results agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[1].arry[2] ORDER BY id(e[1]) ASC $$) as (results agtype); results ------------------------------------------ @@ -913,7 +913,7 @@ SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[1].arry[2] $$) a {"name": "john", "stats": {"age": 1000}} (3 rows) -SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[1].arry[2].stats $$) as (results agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[1].arry[2].stats ORDER BY id(e[1]) ASC $$) as (results agtype); results --------------- @@ -921,7 +921,7 @@ SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[1].arry[2].stats {"age": 1000} (3 rows) -SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN properties(e[2]) $$) as (prop_third_edge agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN properties(e[2]) ORDER BY id(e[2]) ASC $$) as (prop_third_edge agtype); prop_third_edge ----------------- @@ -929,37 +929,37 @@ SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN properties(e[2]) $ (3 rows) -SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN properties(e[0]), properties(e[1]) $$) as (prop_1st agtype, prop_2nd agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN properties(e[0]), properties(e[1]) ORDER BY id(e[0]) ASC, id(e[1]) ASC $$) as (prop_1st agtype, prop_2nd agtype); prop_1st | prop_2nd ---------------------------------------------------------------------+--------------------------------------------------------------------- {} | {} | {} | {} {} | - {"id": 0} | - {"id": 0} | {"id": 1} {"id": 1} | - {"id": 2, "arry": [0, 1, 2, 3, {"name": "joe"}]} | - {"id": 2, "arry": [0, 1, 2, 3, {"name": "joe"}]} | {"id": 3, "arry": [1, 3, {"name": "john", "stats": {"age": 1000}}]} + {"id": 0} | {"id": 1} + {"id": 0} | {"id": 3, "arry": [1, 3, {"name": "john", "stats": {"age": 1000}}]} | + {"id": 2, "arry": [0, 1, 2, 3, {"name": "joe"}]} | {"id": 3, "arry": [1, 3, {"name": "john", "stats": {"age": 1000}}]} + {"id": 2, "arry": [0, 1, 2, 3, {"name": "joe"}]} | (10 rows) -SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN e[0].id, e[1].id $$) as (results_1st agtype, results_2nd agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN e[0].id, e[1].id ORDER BY id(e[0]) ASC, id(e[1]) ASC $$) as (results_1st agtype, results_2nd agtype); results_1st | results_2nd -------------+------------- | | | | - 0 | - 0 | 1 1 | - 2 | - 2 | 3 + 0 | 1 + 0 | 3 | + 2 | 3 + 2 | (10 rows) -SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN e[0].arry, e[1].arry $$) as (results_1st agtype, results_2nd agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN e[0].arry, e[1].arry ORDER BY id(e[0]) ASC, id(e[1]) ASC $$) as (results_1st agtype, results_2nd agtype); results_1st | results_2nd --------------------------------------------------+-------------------------------------------------- | @@ -969,12 +969,12 @@ SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN e[0].arry, e[1].arry $ | | | - [0, 1, 2, 3, {"name": "joe"}] | - [0, 1, 2, 3, {"name": "joe"}] | [1, 3, {"name": "john", "stats": {"age": 1000}}] [1, 3, {"name": "john", "stats": {"age": 1000}}] | + [0, 1, 2, 3, {"name": "joe"}] | [1, 3, {"name": "john", "stats": {"age": 1000}}] + [0, 1, 2, 3, {"name": "joe"}] | (10 rows) -SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN e[0].arry[2], e[1].arry[2] $$) as (results_1st agtype, results_2nd agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN e[0].arry[2], e[1].arry[2] ORDER BY id(e[0]) ASC, id(e[1]) ASC $$) as (results_1st agtype, results_2nd agtype); results_1st | results_2nd ------------------------------------------+------------------------------------------ | @@ -984,9 +984,9 @@ SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN e[0].arry[2], e[1].arr | | | - 2 | - 2 | {"name": "john", "stats": {"age": 1000}} {"name": "john", "stats": {"age": 1000}} | + 2 | {"name": "john", "stats": {"age": 1000}} + 2 | (10 rows) SELECT drop_graph('access', true); @@ -1013,7 +1013,7 @@ SELECT * FROM cypher('issue_1043', $$ CREATE (n)-[:KNOWS {n:'hello'}]->({n:'hell --- (0 rows) -SELECT * FROM cypher('issue_1043', $$ MATCH (x)<-[y *]-(),({n:y[0].n}) RETURN x $$) as (a agtype); +SELECT * FROM cypher('issue_1043', $$ MATCH (x)<-[y *]-(),({n:y[0].n}) RETURN x ORDER BY id(x) ASC $$) as (a agtype); a ---------------------------------------------------------------------------- {"id": 281474976710658, "label": "", "properties": {"n": "hello"}}::vertex @@ -1024,13 +1024,13 @@ SELECT * FROM cypher('issue_1043', $$ CREATE (n)-[:KNOWS {n:'hello'}]->({n:'hell --- (0 rows) -SELECT * FROM cypher('issue_1043', $$ MATCH (x)<-[y *]-(),({n:y[0].n}) RETURN x $$) as (a agtype); +SELECT * FROM cypher('issue_1043', $$ MATCH (x)<-[y *]-(),({n:y[0].n}) RETURN x ORDER BY id(x) ASC $$) as (a agtype); a ---------------------------------------------------------------------------- {"id": 281474976710658, "label": "", "properties": {"n": "hello"}}::vertex - {"id": 281474976710660, "label": "", "properties": {"n": "hello"}}::vertex {"id": 281474976710658, "label": "", "properties": {"n": "hello"}}::vertex {"id": 281474976710660, "label": "", "properties": {"n": "hello"}}::vertex + {"id": 281474976710660, "label": "", "properties": {"n": "hello"}}::vertex (4 rows) SELECT drop_graph('issue_1043', true); @@ -1053,7 +1053,7 @@ NOTICE: graph "issue_1910" has been created (1 row) SELECT * FROM cypher('issue_1910', $$ MATCH (n) WHERE EXISTS((n)-[*1]-({name: 'Willem Defoe'})) - RETURN n.full_name $$) AS (full_name agtype); + RETURN n.full_name ORDER BY id(n) ASC $$) AS (full_name agtype); full_name ----------- (0 rows) @@ -1075,7 +1075,7 @@ SELECT * FROM cypher('issue_1910', $$ MATCH (u {name: 'John Doe'}) (0 rows) SELECT * FROM cypher('issue_1910', $$ MATCH (n) WHERE EXISTS((n)-[*]-({name: 'Willem Defoe'})) - RETURN n.name $$) AS (name agtype); + RETURN n.name ORDER BY id(n) ASC $$) AS (name agtype); name ---------------- "Jane Doe" @@ -1084,7 +1084,7 @@ SELECT * FROM cypher('issue_1910', $$ MATCH (n) WHERE EXISTS((n)-[*]-({name: 'Wi (3 rows) SELECT * FROM cypher('issue_1910', $$ MATCH (n) WHERE EXISTS((n)-[*1]-({name: 'Willem Defoe'})) - RETURN n.name $$) AS (name agtype); + RETURN n.name ORDER BY id(n) ASC $$) AS (name agtype); name ---------------- "John Doe" @@ -1092,7 +1092,7 @@ SELECT * FROM cypher('issue_1910', $$ MATCH (n) WHERE EXISTS((n)-[*1]-({name: 'W (2 rows) SELECT * FROM cypher('issue_1910', $$ MATCH (n) WHERE EXISTS((n)-[*2..2]-({name: 'Willem Defoe'})) - RETURN n.name $$) AS (name agtype); + RETURN n.name ORDER BY id(n) ASC $$) AS (name agtype); name ------------ "Jane Doe" diff --git a/regress/sql/cypher_vle.sql b/regress/sql/cypher_vle.sql index 4592a7fbd..5f3f54ed2 100644 --- a/regress/sql/cypher_vle.sql +++ b/regress/sql/cypher_vle.sql @@ -105,20 +105,20 @@ SELECT * FROM cypher('cypher_vle', $$MATCH ()-[*]->() RETURN count(*) $$) AS (e SELECT * FROM cypher('cypher_vle', $$MATCH (u)-[*]->() RETURN count(*) $$) AS (e agtype); SELECT * FROM cypher('cypher_vle', $$MATCH ()-[*]->(v) RETURN count(*) $$) AS (e agtype); -- Should find 2 -SELECT * FROM cypher('cypher_vle', $$MATCH (u:begin)<-[e*]-(v:end) RETURN e $$) AS (e agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH (u:begin)<-[e*]-(v:end) RETURN e ORDER BY e ASC $$) AS (e agtype); -- Should find 5 -SELECT * FROM cypher('cypher_vle', $$MATCH p=(:begin)<-[*1..1]-()-[]-() RETURN p ORDER BY p $$) AS (e agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(:begin)<-[*1..1]-()-[]-() RETURN p ORDER BY p ASC $$) AS (e agtype); -- Should find 2922 SELECT * FROM cypher('cypher_vle', $$MATCH p=()-[*]->(v) RETURN count(*) $$) AS (e agtype); -- Should find 2 -SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)-[*3..3]->(v:end) RETURN p $$) AS (e agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)-[*3..3]->(v:end) RETURN p ORDER BY p ASC $$) AS (e agtype); -- Should find 12 -SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)-[*3..3]-(v:end) RETURN p $$) AS (e agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)-[*3..3]-(v:end) RETURN p ORDER BY p ASC $$) AS (e agtype); -- Each should find 2 -SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)<-[*]-(v:end) RETURN p $$) AS (e agtype); -SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)<-[e*]-(v:end) RETURN p $$) AS (e agtype); -SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)<-[e*]-(v:end) RETURN e $$) AS (e agtype); -SELECT * FROM cypher('cypher_vle', $$MATCH p=(:begin)<-[*]-()<-[]-(:end) RETURN p $$) AS (e agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)<-[*]-(v:end) RETURN p ORDER BY p ASC $$) AS (e agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)<-[e*]-(v:end) RETURN p ORDER BY p ASC $$) AS (e agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)<-[e*]-(v:end) RETURN e ORDER BY e ASC $$) AS (e agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(:begin)<-[*]-()<-[]-(:end) RETURN p ORDER BY p ASC $$) AS (e agtype); -- Each should return 31 SELECT count(*) FROM cypher('cypher_vle', $$ MATCH ()-[e1]->(v)-[e2]->() RETURN e1,e2 $$) AS (e1 agtype, e2 agtype); SELECT count(*) FROM cypher('cypher_vle', $$ @@ -163,16 +163,16 @@ FROM cypher('cypher_vle', $$ RETURN a, e $$) AS (e1 agtype, e2 agtype); -- Should return 1 path -SELECT * FROM cypher('cypher_vle', $$ MATCH p=()<-[e1*]-(:end)-[e2*]->(:begin) RETURN p $$) AS (result agtype); +SELECT * FROM cypher('cypher_vle', $$ MATCH p=()<-[e1*]-(:end)-[e2*]->(:begin) RETURN p ORDER BY p ASC $$) AS (result agtype); -- Each should return 3 -SELECT * FROM cypher('cypher_vle', $$MATCH (u:begin)-[e*0..1]->(v) RETURN id(u), e, id(v) $$) AS (u agtype, e agtype, v agtype); -SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)-[e*0..1]->(v) RETURN p $$) AS (p agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH (u:begin)-[e*0..1]->(v) RETURN id(u), e, id(v) ORDER BY id(u) ASC, e ASC, id(v) ASC $$) AS (u agtype, e agtype, v agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(u:begin)-[e*0..1]->(v) RETURN p ORDER BY p ASC $$) AS (p agtype); -- Each should return 5 -SELECT * FROM cypher('cypher_vle', $$MATCH (u)-[e*0..0]->(v) RETURN id(u), e, id(v) $$) AS (u agtype, e agtype, v agtype); -SELECT * FROM cypher('cypher_vle', $$MATCH p=(u)-[e*0..0]->(v) RETURN id(u), p, id(v) $$) AS (u agtype, p agtype, v agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH (u)-[e*0..0]->(v) RETURN id(u), e, id(v) ORDER BY id(u) ASC, e ASC, id(v) ASC $$) AS (u agtype, e agtype, v agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=(u)-[e*0..0]->(v) RETURN id(u), p, id(v) ORDER BY id(u) ASC, p ASC, id(v) ASC $$) AS (u agtype, p agtype, v agtype); -- Each should return 13 and will be the same -SELECT * FROM cypher('cypher_vle', $$MATCH p=()-[*0..0]->()-[]->() RETURN p $$) AS (p agtype); -SELECT * FROM cypher('cypher_vle', $$MATCH p=()-[]->()-[*0..0]->() RETURN p $$) AS (p agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=()-[*0..0]->()-[]->() RETURN p ORDER BY p ASC $$) AS (p agtype); +SELECT * FROM cypher('cypher_vle', $$MATCH p=()-[]->()-[*0..0]->() RETURN p ORDER BY p ASC $$) AS (p agtype); -- -- Test VLE inside of a BEGIN/COMMIT block @@ -189,7 +189,7 @@ $$) AS (g1 agtype); /* should return 1 path with 1 edge */ SELECT * FROM cypher('mygraph', $$ MATCH p = ()-[:Edge*]->() - RETURN p + RETURN p ORDER BY p ASC $$) AS (g2 agtype); /* should delete the original path and replace it with a path with 2 edges */ @@ -202,13 +202,13 @@ $$) AS (g3 agtype); /* should find 2 paths with 1 edge */ SELECT * FROM cypher('mygraph', $$ MATCH p = ()-[:Edge]->() - RETURN p + RETURN p ORDER BY p ASC $$) AS (g4 agtype); /* should return 3 paths, 2 with 1 edge, 1 with 2 edges */ SELECT * FROM cypher('mygraph', $$ MATCH p = ()-[:Edge*]->() - RETURN p + RETURN p ORDER BY p ASC $$) AS (g5 agtype); SELECT drop_graph('mygraph', true); @@ -312,48 +312,48 @@ SELECT * FROM cypher('access',$$ CREATE ()-[:knows]->() $$) as (results agtype); SELECT * FROM cypher('access',$$ CREATE ()-[:knows]->()-[:knows]->()$$) as (results agtype); SELECT * FROM cypher('access',$$ CREATE ()-[:knows {id:0}]->()-[:knows {id: 1}]->() $$) as (results agtype); SELECT * FROM cypher('access',$$ CREATE ()-[:knows {id:2, arry:[0,1,2,3,{name: "joe"}]}]->()-[:knows {id: 3, arry:[1,3,{name:"john", stats: {age: 1000}}]}]->() $$) as (results agtype); -SELECT * FROM cypher('access', $$ MATCH (u)-[e*]->(v) RETURN e $$)as (edges agtype); -SELECT * FROM cypher('access', $$ MATCH (u)-[e*2..2]->(v) RETURN e $$)as (edges agtype); -SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN properties(e[0]) $$) as (prop_first_edge agtype); -SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[0].id $$) as (results agtype); -SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[0].arry[2] $$) as (results agtype); -SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN properties(e[1]) $$) as (prop_second_edge agtype); -SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[1].id $$) as (results agtype); -SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[1].arry[2] $$) as (results agtype); -SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[1].arry[2].stats $$) as (results agtype); -SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN properties(e[2]) $$) as (prop_third_edge agtype); - -SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN properties(e[0]), properties(e[1]) $$) as (prop_1st agtype, prop_2nd agtype); -SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN e[0].id, e[1].id $$) as (results_1st agtype, results_2nd agtype); -SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN e[0].arry, e[1].arry $$) as (results_1st agtype, results_2nd agtype); -SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN e[0].arry[2], e[1].arry[2] $$) as (results_1st agtype, results_2nd agtype); +SELECT * FROM cypher('access', $$ MATCH (u)-[e*]->(v) RETURN e ORDER BY e ASC $$)as (edges agtype); +SELECT * FROM cypher('access', $$ MATCH (u)-[e*2..2]->(v) RETURN e ORDER BY e ASC $$)as (edges agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN properties(e[0]) ORDER BY id(e[0]) ASC $$) as (prop_first_edge agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[0].id ORDER BY id(e[0]) ASC $$) as (results agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[0].arry[2] ORDER BY id(e[0]) ASC $$) as (results agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN properties(e[1]) ORDER BY id(e[1]) ASC $$) as (prop_second_edge agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[1].id ORDER BY id(e[1]) ASC $$) as (results agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[1].arry[2] ORDER BY id(e[1]) ASC $$) as (results agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN e[1].arry[2].stats ORDER BY id(e[1]) ASC $$) as (results agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*2..2]->() RETURN properties(e[2]) ORDER BY id(e[2]) ASC $$) as (prop_third_edge agtype); + +SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN properties(e[0]), properties(e[1]) ORDER BY id(e[0]) ASC, id(e[1]) ASC $$) as (prop_1st agtype, prop_2nd agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN e[0].id, e[1].id ORDER BY id(e[0]) ASC, id(e[1]) ASC $$) as (results_1st agtype, results_2nd agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN e[0].arry, e[1].arry ORDER BY id(e[0]) ASC, id(e[1]) ASC $$) as (results_1st agtype, results_2nd agtype); +SELECT * FROM cypher('access',$$ MATCH ()-[e*]->() RETURN e[0].arry[2], e[1].arry[2] ORDER BY id(e[0]) ASC, id(e[1]) ASC $$) as (results_1st agtype, results_2nd agtype); SELECT drop_graph('access', true); -- issue 1043 SELECT create_graph('issue_1043'); SELECT * FROM cypher('issue_1043', $$ CREATE (n)-[:KNOWS {n:'hello'}]->({n:'hello'}) $$) as (a agtype); -SELECT * FROM cypher('issue_1043', $$ MATCH (x)<-[y *]-(),({n:y[0].n}) RETURN x $$) as (a agtype); +SELECT * FROM cypher('issue_1043', $$ MATCH (x)<-[y *]-(),({n:y[0].n}) RETURN x ORDER BY id(x) ASC $$) as (a agtype); SELECT * FROM cypher('issue_1043', $$ CREATE (n)-[:KNOWS {n:'hello'}]->({n:'hello'}) $$) as (a agtype); -SELECT * FROM cypher('issue_1043', $$ MATCH (x)<-[y *]-(),({n:y[0].n}) RETURN x $$) as (a agtype); +SELECT * FROM cypher('issue_1043', $$ MATCH (x)<-[y *]-(),({n:y[0].n}) RETURN x ORDER BY id(x) ASC $$) as (a agtype); SELECT drop_graph('issue_1043', true); -- issue 1910 SELECT create_graph('issue_1910'); SELECT * FROM cypher('issue_1910', $$ MATCH (n) WHERE EXISTS((n)-[*1]-({name: 'Willem Defoe'})) - RETURN n.full_name $$) AS (full_name agtype); + RETURN n.full_name ORDER BY id(n) ASC $$) AS (full_name agtype); SELECT * FROM cypher('issue_1910', $$ CREATE ({name: 'Jane Doe'})-[:KNOWS]->({name: 'John Doe'}) $$) AS (result agtype); SELECT * FROM cypher('issue_1910', $$ CREATE ({name: 'Donald Defoe'})-[:KNOWS]->({name: 'Willem Defoe'}) $$) AS (result agtype); SELECT * FROM cypher('issue_1910', $$ MATCH (u {name: 'John Doe'}) MERGE (u)-[:KNOWS]->({name: 'Willem Defoe'}) $$) AS (result agtype); SELECT * FROM cypher('issue_1910', $$ MATCH (n) WHERE EXISTS((n)-[*]-({name: 'Willem Defoe'})) - RETURN n.name $$) AS (name agtype); + RETURN n.name ORDER BY id(n) ASC $$) AS (name agtype); SELECT * FROM cypher('issue_1910', $$ MATCH (n) WHERE EXISTS((n)-[*1]-({name: 'Willem Defoe'})) - RETURN n.name $$) AS (name agtype); + RETURN n.name ORDER BY id(n) ASC $$) AS (name agtype); SELECT * FROM cypher('issue_1910', $$ MATCH (n) WHERE EXISTS((n)-[*2..2]-({name: 'Willem Defoe'})) - RETURN n.name $$) AS (name agtype); + RETURN n.name ORDER BY id(n) ASC $$) AS (name agtype); SELECT drop_graph('issue_1910', true); From 7f45c7dc2d0290eec3c08b520a22d1b33921c065 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sat, 20 Jun 2026 09:57:33 -0700 Subject: [PATCH 04/20] age_global_graph: stabilize regression tests (#2431) age_global_graph: stabilize regression tests under concurrent xid load Wrap both vertex_stats() context-building phases in a single BEGIN ISOLATION LEVEL REPEATABLE READ; ... COMMIT; transaction so the three calls share one snapshot. This prevents the snapshot-fallback path in is_ggctx_invalid() from purging an already-built graph context when concurrent xid activity (autovacuum, parallel installcheck, replication, shared CI) advances the snapshot between calls, which would otherwise make the targeted delete_global_graphs(name) checks return false instead of the expected true. Read Committed is insufficient because it acquires a fresh snapshot per statement; REPEATABLE READ pins one snapshot for the whole transaction. Also add explicit ORDER BY id to the three direct-SQL label-table SELECTs (_ag_label_vertex x2, _ag_label_edge) that return multiple rows, so their output no longer depends on heap scan order. This is a test-only change (regress/sql/age_global_graph.sql and regress/expected/age_global_graph.out); no extension C code or SQL is modified. All 37 regression tests pass (installcheck) on PostgreSQL 18.3. Co-authored-by: GitHub Copilot modified: regress/expected/age_global_graph.out modified: regress/sql/age_global_graph.sql --- regress/expected/age_global_graph.out | 19 ++++++++++++++++--- regress/sql/age_global_graph.sql | 19 ++++++++++++++++--- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/regress/expected/age_global_graph.out b/regress/expected/age_global_graph.out index cbfeb6f3c..4833511a7 100644 --- a/regress/expected/age_global_graph.out +++ b/regress/expected/age_global_graph.out @@ -44,6 +44,14 @@ SELECT * FROM cypher('ag_graph_3', $$ CREATE (v:vertex3) RETURN v $$) AS (v agt (1 row) -- load contexts using the vertex_stats command +-- Build all three graph contexts under one snapshot. The vertex_stats() +-- calls are wrapped in a single REPEATABLE READ transaction so they share +-- one snapshot; this keeps the snapshot-fallback path in is_ggctx_invalid() +-- from purging an already-built context when concurrent xid activity +-- (autovacuum, parallel installcheck, replication) advances the snapshot +-- between calls. Read Committed is insufficient: it takes a fresh snapshot +-- per statement. +BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT * FROM cypher('ag_graph_3', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) AS (result agtype); result ----------------------------------------------------------------------------------------------- @@ -62,6 +70,7 @@ SELECT * FROM cypher('ag_graph_1', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY {"id": 844424930131969, "label": "vertex1", "in_degree": 0, "out_degree": 0, "self_loops": 0} (1 row) +COMMIT; --- loading undefined contexts --- should throw exception - graph "ag_graph_4" does not exist SELECT * FROM cypher('ag_graph_4', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) AS (result agtype); @@ -130,6 +139,9 @@ LINE 1: SELECT * FROM cypher('ag_graph_4', $$ RETURN delete_global_g... -- delete_GRAPH_global_contexts -- -- load contexts again +-- Same REPEATABLE READ wrap as the first build phase above, for the same +-- snapshot-stability reason. +BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT * FROM cypher('ag_graph_3', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) AS (result agtype); result ----------------------------------------------------------------------------------------------- @@ -148,6 +160,7 @@ SELECT * FROM cypher('ag_graph_1', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY {"id": 844424930131969, "label": "vertex1", "in_degree": 0, "out_degree": 0, "self_loops": 0} (1 row) +COMMIT; -- delete all graph contexts -- should return true SELECT * FROM cypher('ag_graph_1', $$ RETURN delete_global_graphs(NULL) $$) AS (result agtype); @@ -306,7 +319,7 @@ SELECT * FROM cypher('ag_graph_1', $$ RETURN graph_stats('ag_graph_1') $$) AS (r (1 row) -- remove some vertices -SELECT * FROM ag_graph_1._ag_label_vertex; +SELECT * FROM ag_graph_1._ag_label_vertex ORDER BY id; id | properties -----------------+-------------------------------------- 281474976710657 | {} @@ -325,7 +338,7 @@ SELECT * FROM ag_graph_1._ag_label_vertex; DELETE FROM ag_graph_1._ag_label_vertex WHERE id::text = '281474976710661'; DELETE FROM ag_graph_1._ag_label_vertex WHERE id::text = '281474976710662'; DELETE FROM ag_graph_1._ag_label_vertex WHERE id::text = '281474976710664'; -SELECT * FROM ag_graph_1._ag_label_vertex; +SELECT * FROM ag_graph_1._ag_label_vertex ORDER BY id; id | properties -----------------+-------------------------------------- 281474976710657 | {} @@ -338,7 +351,7 @@ SELECT * FROM ag_graph_1._ag_label_vertex; 844424930131969 | {} (8 rows) -SELECT * FROM ag_graph_1._ag_label_edge; +SELECT * FROM ag_graph_1._ag_label_edge ORDER BY id; id | start_id | end_id | properties ------------------+-----------------+-----------------+------------ 1125899906842625 | 281474976710659 | 281474976710660 | {} diff --git a/regress/sql/age_global_graph.sql b/regress/sql/age_global_graph.sql index 6ee25e1f3..9f4a1ce2d 100644 --- a/regress/sql/age_global_graph.sql +++ b/regress/sql/age_global_graph.sql @@ -16,9 +16,18 @@ SELECT * FROM create_graph('ag_graph_3'); SELECT * FROM cypher('ag_graph_3', $$ CREATE (v:vertex3) RETURN v $$) AS (v agtype); -- load contexts using the vertex_stats command +-- Build all three graph contexts under one snapshot. The vertex_stats() +-- calls are wrapped in a single REPEATABLE READ transaction so they share +-- one snapshot; this keeps the snapshot-fallback path in is_ggctx_invalid() +-- from purging an already-built context when concurrent xid activity +-- (autovacuum, parallel installcheck, replication) advances the snapshot +-- between calls. Read Committed is insufficient: it takes a fresh snapshot +-- per statement. +BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT * FROM cypher('ag_graph_3', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) AS (result agtype); SELECT * FROM cypher('ag_graph_2', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) AS (result agtype); SELECT * FROM cypher('ag_graph_1', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) AS (result agtype); +COMMIT; --- loading undefined contexts --- should throw exception - graph "ag_graph_4" does not exist @@ -55,9 +64,13 @@ SELECT * FROM cypher('ag_graph_4', $$ RETURN delete_global_graphs('ag_graph_4') -- -- load contexts again +-- Same REPEATABLE READ wrap as the first build phase above, for the same +-- snapshot-stability reason. +BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT * FROM cypher('ag_graph_3', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) AS (result agtype); SELECT * FROM cypher('ag_graph_2', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) AS (result agtype); SELECT * FROM cypher('ag_graph_1', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) AS (result agtype); +COMMIT; -- delete all graph contexts -- should return true @@ -115,12 +128,12 @@ SELECT * FROM cypher('ag_graph_1', $$ MATCH (u)-[e]->(v) RETURN u, e, v ORDER BY -- what is there now? SELECT * FROM cypher('ag_graph_1', $$ RETURN graph_stats('ag_graph_1') $$) AS (result agtype); -- remove some vertices -SELECT * FROM ag_graph_1._ag_label_vertex; +SELECT * FROM ag_graph_1._ag_label_vertex ORDER BY id; DELETE FROM ag_graph_1._ag_label_vertex WHERE id::text = '281474976710661'; DELETE FROM ag_graph_1._ag_label_vertex WHERE id::text = '281474976710662'; DELETE FROM ag_graph_1._ag_label_vertex WHERE id::text = '281474976710664'; -SELECT * FROM ag_graph_1._ag_label_vertex; -SELECT * FROM ag_graph_1._ag_label_edge; +SELECT * FROM ag_graph_1._ag_label_vertex ORDER BY id; +SELECT * FROM ag_graph_1._ag_label_edge ORDER BY id; -- The graph_stats query below will produce warnings for the dangling edges -- created by the DELETE commands above. The warnings appear in nondeterministic -- order because they come from iterating edge label tables (knows, stalks), From 22c65673eb32b1093dc10a92ea1d09d68ec3df49 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sat, 20 Jun 2026 09:58:14 -0700 Subject: [PATCH 05/20] Makefile: add installcheck-existing target and improve readability (#2437) Add an "installcheck-existing" target that runs the regression suite against an already-running PostgreSQL server, complementing the default "installcheck" (which builds a private temp instance). It points pg_regress at the server via the standard libpq variables (PGHOST/PGPORT/PGUSER; PGDATABASE defaults to contrib_regression) and lets pg_regress create the database and load the extension through --load-extension=age. It deliberately avoids --use-existing -- that option skips database creation and disables --load-extension -- so no manual CREATE EXTENSION step is required. The upgrade test (age_upgrade) is excluded because it stages synthetic extension files into the local sharedir that a running server would not see. Readability and maintainability improvements (no behavior change): - Derive age_sql from AGE_CURR_VER (read from age.control) so the version number is defined in exactly one place. - Add section banners and a top-of-file layout/target index; group the scattered upgrade-test pieces and move the ag_scanner flex rule in with the other parser-generation rules. - Wrap the long REGRESS_OPTS and EXTRA_CLEAN assignments across lines. - Fix the DATA filter-out pattern to use a double dash (age--%--y.y.y.sql) matching the actual template filename; the prior single-dash pattern only matched via greedy '%' expansion. - Anchor the age.control version regex (/^default_version/). - Replace the hardcoded "31 tests" comment with generic wording and add a "help" target listing the common targets. Verified on PostgreSQL 18: make installcheck (temp instance) and make installcheck-existing both pass; clean rebuild and make clean unaffected. Co-authored-by: GitHub Copilot modified: Makefile --- Makefile | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 125 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 3ea9236a6..a4a93aaa1 100644 --- a/Makefile +++ b/Makefile @@ -15,11 +15,38 @@ # specific language governing permissions and limitations # under the License. +# =========================================================================== +# Apache AGE extension build +# +# File layout (top to bottom): +# * Module +# * Upgrade regression-test support (1/2: variables) +# * Extension SQL & data files +# * Regression test suite (REGRESS / REGRESS_OPTS) +# * PGXS include +# * Build rules +# * Upgrade regression-test support (2/2: rules + installcheck lifecycle) +# * installcheck-existing (run against a running server) +# * help +# +# Common targets: +# all Build the extension (default) +# install Install into the PostgreSQL tree +# installcheck Run regression tests in a private temp instance +# installcheck-existing Run regression tests against a running server +# clean Remove build artifacts +# help Show the target list +# =========================================================================== + +# ===== Module ===== MODULE_big = age -age_sql = age--1.7.0.sql - -# --- Extension upgrade regression test support --- +# ===== Upgrade regression-test support (1/2: variables) ===== +# +# This feature spans two sections (the PGXS include forces the split): +# * 1/2 (here, pre-include): variables -- must be defined before DATA, +# REGRESS, and EXTRA_CLEAN reference them. +# * 2/2 (below the PGXS include): build rules + installcheck lifecycle. # # Validates the upgrade template (age----y.y.y.sql) by simulating an # extension version upgrade entirely within "make installcheck". The test: @@ -53,7 +80,7 @@ age_sql = age--1.7.0.sql # (e.g., age--1.7.0--1.8.0.sql is committed): the synthetic test is # redundant because the real script ships with the extension. # Current version from age.control (e.g., "1.7.0") -AGE_CURR_VER := $(shell awk -F"'" '/default_version/ {print $$2}' age.control 2>/dev/null) +AGE_CURR_VER := $(shell awk -F"'" '/^default_version/ {print $$2}' age.control 2>/dev/null) # Git commit that last changed age.control — the "initial release" commit AGE_VER_COMMIT := $(shell git log -1 --format=%H -- age.control 2>/dev/null) # Synthetic initial version: current version with _initial suffix @@ -80,6 +107,7 @@ AGE_REAL_UPGRADE := $(shell git ls-files 'age--$(AGE_CURR_VER)--*.sql' 2>/dev/nu # supersedes the synthetic one and has its own validation path. AGE_HAS_UPGRADE_TEST = $(and $(AGE_VER_COMMIT),$(AGE_UPGRADE_TEMPLATE),$(if $(AGE_REAL_UPGRADE),,yes)) +# ===== Object files ===== OBJS = src/backend/age.o \ src/backend/catalog/ag_catalog.o \ src/backend/catalog/ag_graph.o \ @@ -134,6 +162,7 @@ OBJS = src/backend/age.o \ src/backend/utils/name_validation.o \ src/backend/utils/ag_guc.o +# ===== Extension SQL & data files ===== EXTENSION = age # to allow cleaning of previous (old) age--.sql files @@ -143,12 +172,18 @@ SQLS := $(shell cat sql/sql_files) SQLS := $(addprefix sql/,$(SQLS)) SQLS := $(addsuffix .sql,$(SQLS)) +# Name of the generated install SQL (age--.sql). +# Derived from AGE_CURR_VER (read from age.control above) so the version +# number lives in exactly one place. +age_sql = age--$(AGE_CURR_VER).sql + DATA_built = $(age_sql) # Git-tracked upgrade scripts shipped with the extension (e.g., age--1.6.0--1.7.0.sql). # Excludes the upgrade template (y.y.y) and the synthetic stamped test file. -DATA = $(filter-out age--%-y.y.y.sql $(age_upgrade_test_sql),$(wildcard age--*--*.sql)) +DATA = $(filter-out age--%--y.y.y.sql $(age_upgrade_test_sql),$(wildcard age--*--*.sql)) +# ===== Regression test suite ===== # sorted in dependency order REGRESS = scan \ graphid \ @@ -203,10 +238,23 @@ REGRESS += drop srcdir=`pwd` ag_regress_dir = $(srcdir)/regress -REGRESS_OPTS = --load-extension=age --inputdir=$(ag_regress_dir) --outputdir=$(ag_regress_dir) --temp-instance=$(ag_regress_dir)/instance --port=61958 --encoding=UTF-8 --temp-config $(ag_regress_dir)/age_regression.conf +REGRESS_OPTS = --load-extension=age \ + --inputdir=$(ag_regress_dir) \ + --outputdir=$(ag_regress_dir) \ + --temp-instance=$(ag_regress_dir)/instance \ + --port=61958 \ + --encoding=UTF-8 \ + --temp-config $(ag_regress_dir)/age_regression.conf ag_regress_out = instance/ log/ results/ regression.* -EXTRA_CLEAN = $(addprefix $(ag_regress_dir)/, $(ag_regress_out)) src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h src/include/parser/cypher_kwlist_d.h $(all_age_sql) $(age_init_sql) $(age_upgrade_test_sql) $(ag_regress_dir)/age_upgrade_cleanup.sh +EXTRA_CLEAN = $(addprefix $(ag_regress_dir)/, $(ag_regress_out)) \ + src/backend/parser/cypher_gram.c \ + src/include/parser/cypher_gram_def.h \ + src/include/parser/cypher_kwlist_d.h \ + $(all_age_sql) \ + $(age_init_sql) \ + $(age_upgrade_test_sql) \ + $(ag_regress_dir)/age_upgrade_cleanup.sh GEN_KEYWORDLIST = $(PERL) -I ./tools/ ./tools/gen_keywordlist.pl GEN_KEYWORDLIST_DEPS = ./tools/gen_keywordlist.pl tools/PerfectHash.pm @@ -214,10 +262,13 @@ GEN_KEYWORDLIST_DEPS = ./tools/gen_keywordlist.pl tools/PerfectHash.pm ag_include_dir = $(srcdir)/src/include PG_CPPFLAGS = -I$(ag_include_dir) -I$(ag_include_dir)/parser +# ===== PGXS ===== PG_CONFIG ?= pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) +# ===== Build rules ===== + # 32-bit platform support: pass SIZEOF_DATUM=4 to enable (e.g., make SIZEOF_DATUM=4) # When SIZEOF_DATUM=4, PASSEDBYVALUE is stripped from graphid type for pass-by-reference. # If not specified, normal 64-bit behavior is used (PASSEDBYVALUE preserved). @@ -235,10 +286,11 @@ src/backend/parser/cypher_parser.o: src/backend/parser/cypher_gram.c src/include src/backend/parser/cypher_parser.bc: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h src/backend/parser/cypher_keywords.o: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h src/backend/parser/cypher_keywords.bc: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h +src/backend/parser/ag_scanner.c: FLEX_NO_BACKUP=yes # Build the default install SQL (age--.sql) from current HEAD's sql/sql_files. # This is what CREATE EXTENSION age installs — it contains ALL current functions. -# All 31 non-upgrade regression tests run against this complete SQL. +# Every non-upgrade regression test runs against this complete SQL. $(age_sql): $(SQLS) @echo "Building install SQL: $@ from HEAD" @cat $(SQLS) > $@ @@ -247,6 +299,12 @@ ifeq ($(SIZEOF_DATUM),4) @sed 's/^ PASSEDBYVALUE,$$/ -- PASSEDBYVALUE removed for 32-bit (see Makefile)/' $@ > $@.tmp && mv $@.tmp $@ endif +# ===== Upgrade regression-test support (2/2: rules + installcheck lifecycle) ===== +# +# Part 1/2 (variables) is above the PGXS include; the rules and target +# hooks below must follow the include. +# +# --- Synthetic SQL rules --- # Build synthetic "initial" version install SQL from the version-bump commit. # This represents the pre-upgrade state — the SQL at the time the version was # bumped in age.control. Used only by the upgrade test. @@ -266,9 +324,7 @@ $(age_upgrade_test_sql): $(AGE_UPGRADE_TEMPLATE) @sed -e "s/1\.X\.0/$(AGE_CURR_VER)/g" -e "s/y\.y\.y/$(AGE_CURR_VER)/g" $< > $@ endif -src/backend/parser/ag_scanner.c: FLEX_NO_BACKUP=yes - -# --- Upgrade test file lifecycle during installcheck --- +# --- installcheck lifecycle: stage synthetic files, then clean up --- # # Problem: The upgrade test needs age--.sql and age----.sql # in the PG extension directory for CREATE EXTENSION VERSION and ALTER @@ -288,7 +344,7 @@ SHAREDIR = $(shell $(PG_CONFIG) --sharedir) installcheck: export LC_COLLATE=C ifneq ($(AGE_HAS_UPGRADE_TEST),) .PHONY: _install_upgrade_test_files -_install_upgrade_test_files: $(age_init_sql) $(age_upgrade_test_sql) ## Build, install synthetic files, generate cleanup script +_install_upgrade_test_files: $(age_init_sql) $(age_upgrade_test_sql) # Build, install synthetic files, generate cleanup script @echo "Installing upgrade test files to $(SHAREDIR)/extension/" @$(INSTALL_DATA) $(age_init_sql) $(age_upgrade_test_sql) '$(SHAREDIR)/extension/' @printf '#!/bin/sh\nrm -f "$(SHAREDIR)/extension/$(age_init_sql)" "$(SHAREDIR)/extension/$(age_upgrade_test_sql)"\nrm -f "$(age_init_sql)" "$(age_upgrade_test_sql)" "$(ag_regress_dir)/age_upgrade_cleanup.sh"\n' > $(ag_regress_dir)/age_upgrade_cleanup.sh @@ -296,3 +352,60 @@ _install_upgrade_test_files: $(age_init_sql) $(age_upgrade_test_sql) ## Build, installcheck: _install_upgrade_test_files endif + +# ===== installcheck-existing: run tests against a running server ===== +# +# Runs the regression suite against an already-running PostgreSQL server +# instead of the private temp instance built by "make installcheck". +# +# "make installcheck" appends --temp-instance to REGRESS_OPTS, so it builds +# its own throwaway cluster and needs no running server. This target instead +# connects to the server selected by the standard libpq environment variables +# (PGHOST/PGPORT/PGUSER); PGDATABASE defaults to contrib_regression. Override +# any of them on the command line, e.g.: +# +# make installcheck-existing PGHOST=localhost PGPORT=5432 PGUSER=postgres +# +# pg_regress creates the database and loads the extension itself through +# --load-extension=age -- exactly as the temp-instance path does -- so no +# manual "CREATE EXTENSION" step is required. The connecting role must be +# allowed to CREATE DATABASE. +# +# This deliberately does NOT pass pg_regress --use-existing: that option skips +# database creation (which also disables --load-extension) and is only needed +# on clusters where the test role cannot CREATE DATABASE. For that narrow +# case, pre-create the database and extension and add --use-existing to +# EXTRA_REGRESS_OPTS. +# +# The upgrade test (age_upgrade) is excluded here: it installs synthetic +# extension files into the local $(SHAREDIR), which an existing or remote +# server would not see. Validate the upgrade path with "make installcheck". +# +# Locale note: locale-sensitive comparisons follow the existing server's own +# collation (fixed at its initdb time); the temp-instance locale flags do not +# apply to an already-running server. +PGDATABASE ?= contrib_regression +REGRESS_EXISTING = $(filter-out age_upgrade,$(REGRESS)) + +.PHONY: installcheck-existing +installcheck-existing: + $(pg_regress_installcheck) \ + --inputdir=$(ag_regress_dir) \ + --outputdir=$(ag_regress_dir) \ + --load-extension=age \ + $(if $(PGHOST),--host=$(PGHOST)) \ + $(if $(PGPORT),--port=$(PGPORT)) \ + $(if $(PGUSER),--user=$(PGUSER)) \ + --dbname=$(PGDATABASE) \ + $(REGRESS_EXISTING) + +# ===== Help ===== +.PHONY: help +help: + @echo "Apache AGE - common make targets:" + @echo " all Build the extension (default target)" + @echo " install Install the extension into the PostgreSQL tree" + @echo " installcheck Run the regression suite in a private temp instance" + @echo " installcheck-existing Run the regression suite against a running server" + @echo " clean Remove build artifacts" + @echo " help Show this message" From a6b547134dcdce06b8741bef7ec8308120ac8b6e Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sun, 21 Jun 2026 14:33:12 -0700 Subject: [PATCH 06/20] Make ag_catalog ownership and built-in resolution explicit (#2440) AGE places all of its objects in the ag_catalog schema. Make the assumptions around that schema explicit so installs and upgrades behave predictably regardless of how a database is provisioned: - Ownership-checked install: CREATE EXTENSION age installs into ag_catalog only when that schema does not already exist under a different owner, keeping ownership of AGE's catalog well-defined. - Deterministic name resolution: the pg_upgrade helper functions resolve built-ins from pg_catalog first and schema-qualify their format()/hashtext() calls, so their behavior does not depend on what else is defined in ag_catalog. - README note describing ag_catalog ownership and the install-time check. The upgrade script applies the same helper changes so existing installations get them on ALTER EXTENSION UPDATE. Adds an extension_security regression test covering the ownership check and the qualified-call / search_path properties. Assisted-by: GitHub Copilot (Claude Opus 4.8) modified: Makefile modified: README.md modified: age--1.7.0--y.y.y.sql new file: regress/expected/extension_security.out new file: regress/sql/extension_security.sql modified: sql/age_main.sql modified: sql/age_pg_upgrade.sql Resolved Conflicts: Makefile --- Makefile | 3 +- README.md | 10 +++ age--1.7.0--y.y.y.sql | 34 ++++++--- regress/expected/extension_security.out | 99 +++++++++++++++++++++++++ regress/sql/extension_security.sql | 82 ++++++++++++++++++++ sql/age_main.sql | 27 +++++++ sql/age_pg_upgrade.sql | 32 +++++--- 7 files changed, 265 insertions(+), 22 deletions(-) create mode 100644 regress/expected/extension_security.out create mode 100644 regress/sql/extension_security.sql diff --git a/Makefile b/Makefile index a4a93aaa1..b2d93ff4b 100644 --- a/Makefile +++ b/Makefile @@ -220,7 +220,8 @@ REGRESS = scan \ reserved_keyword_alias \ agtype_jsonb_cast \ containment_selectivity \ - subgraph + subgraph \ + extension_security ifneq ($(EXTRA_TESTS),) REGRESS += $(EXTRA_TESTS) diff --git a/README.md b/README.md index 819d9dcde..412b55e8d 100644 --- a/README.md +++ b/README.md @@ -215,6 +215,16 @@ LOAD 'age'; SET search_path = ag_catalog, "$user", public; ``` +### Note on `ag_catalog` ownership + +AGE installs all of its objects into the `ag_catalog` schema. Install AGE +(`CREATE EXTENSION age`) **before** granting the `CREATE` privilege on the +database to other roles. A role that can create schemas could otherwise +pre-create `ag_catalog` and own it; `CREATE EXTENSION age` therefore refuses to +install when `ag_catalog` already exists and is owned by a different role. If you +hit that error, drop the stray schema (`DROP SCHEMA ag_catalog CASCADE`) or +transfer its ownership to the installing role, then retry. +

  Using AGE with Non-Autocommit Clients (psycopg, JDBC, etc.)

If you are using AGE from a database client that does **not** default to autocommit — most commonly `psycopg` v3 or JDBC — you must understand how PostgreSQL's transaction semantics apply to AGE's setup and DDL-like functions. Otherwise, you may see graphs or labels that appear to be created successfully, but are not visible from new connections. diff --git a/age--1.7.0--y.y.y.sql b/age--1.7.0--y.y.y.sql index 282eaa0f9..ad2ce20fd 100644 --- a/age--1.7.0--y.y.y.sql +++ b/age--1.7.0--y.y.y.sql @@ -41,7 +41,10 @@ CREATE FUNCTION ag_catalog.age_prepare_pg_upgrade() RETURNS void LANGUAGE plpgsql - SET search_path = ag_catalog, pg_catalog + -- Resolve built-in functions and operators from pg_catalog first so they + -- are not overridden by same-named objects defined in ag_catalog. The + -- ag_catalog objects referenced here are schema-qualified. + SET search_path = pg_catalog, ag_catalog AS $function$ DECLARE graph_count integer; @@ -108,7 +111,10 @@ COMMENT ON FUNCTION ag_catalog.age_prepare_pg_upgrade() IS CREATE FUNCTION ag_catalog.age_finish_pg_upgrade() RETURNS void LANGUAGE plpgsql - SET search_path = ag_catalog, pg_catalog + -- Resolve built-in functions and operators from pg_catalog first so they + -- are not overridden by same-named objects defined in ag_catalog. The + -- ag_catalog objects referenced here are schema-qualified. + SET search_path = pg_catalog, ag_catalog AS $function$ DECLARE mapping_count integer; @@ -231,7 +237,7 @@ BEGIN -- and preserve original schema ownership. -- RAISE NOTICE 'Invalidating AGE caches...'; - PERFORM pg_catalog.pg_advisory_xact_lock(hashtext('age_finish_pg_upgrade')); + PERFORM pg_catalog.pg_advisory_xact_lock(pg_catalog.hashtext('age_finish_pg_upgrade')); DECLARE graph_rec RECORD; cache_invalidated boolean := false; @@ -245,8 +251,8 @@ BEGIN BEGIN -- Touch schema by changing owner to current_user then back to original -- This triggers cache invalidation without permanently changing ownership - EXECUTE format('ALTER SCHEMA %I OWNER TO %I', graph_rec.ns_name, current_user); - EXECUTE format('ALTER SCHEMA %I OWNER TO %I', graph_rec.ns_name, graph_rec.owner_name); + EXECUTE pg_catalog.format('ALTER SCHEMA %I OWNER TO %I', graph_rec.ns_name, current_user); + EXECUTE pg_catalog.format('ALTER SCHEMA %I OWNER TO %I', graph_rec.ns_name, graph_rec.owner_name); cache_invalidated := true; EXCEPTION WHEN insufficient_privilege THEN -- If we can't change ownership, skip this schema @@ -273,7 +279,10 @@ COMMENT ON FUNCTION ag_catalog.age_finish_pg_upgrade() IS CREATE FUNCTION ag_catalog.age_revert_pg_upgrade_changes() RETURNS void LANGUAGE plpgsql - SET search_path = ag_catalog, pg_catalog + -- Resolve built-in functions and operators from pg_catalog first so they + -- are not overridden by same-named objects defined in ag_catalog. The + -- ag_catalog objects referenced here are schema-qualified. + SET search_path = pg_catalog, ag_catalog AS $function$ BEGIN -- Check if namespace column is oid type (needs reverting) @@ -306,7 +315,7 @@ BEGIN -- Invalidate AGE's internal caches by touching each graph's namespace -- We use xact-level advisory lock and preserve original ownership -- - PERFORM pg_catalog.pg_advisory_xact_lock(hashtext('age_revert_pg_upgrade')); + PERFORM pg_catalog.pg_advisory_xact_lock(pg_catalog.hashtext('age_revert_pg_upgrade')); DECLARE graph_rec RECORD; BEGIN @@ -318,8 +327,8 @@ BEGIN LOOP BEGIN -- Touch schema by changing owner to current_user then back to original - EXECUTE format('ALTER SCHEMA %I OWNER TO %I', graph_rec.ns_name, current_user); - EXECUTE format('ALTER SCHEMA %I OWNER TO %I', graph_rec.ns_name, graph_rec.owner_name); + EXECUTE pg_catalog.format('ALTER SCHEMA %I OWNER TO %I', graph_rec.ns_name, current_user); + EXECUTE pg_catalog.format('ALTER SCHEMA %I OWNER TO %I', graph_rec.ns_name, graph_rec.owner_name); EXCEPTION WHEN insufficient_privilege THEN RAISE NOTICE 'Could not invalidate cache for schema % (insufficient privileges)', graph_rec.ns_name; END; @@ -345,7 +354,10 @@ CREATE FUNCTION ag_catalog.age_pg_upgrade_status() message text ) LANGUAGE plpgsql - SET search_path = ag_catalog, pg_catalog + -- Resolve built-in functions and operators from pg_catalog first so they + -- are not overridden by same-named objects defined in ag_catalog. The + -- ag_catalog objects referenced here are schema-qualified. + SET search_path = pg_catalog, ag_catalog AS $function$ DECLARE ns_type text; @@ -447,7 +459,7 @@ BEGIN AND t.tgname = '_age_cache_invalidate' ) THEN - EXECUTE format( + EXECUTE pg_catalog.format( 'CREATE TRIGGER _age_cache_invalidate ' 'AFTER INSERT OR UPDATE OR DELETE OR TRUNCATE ' 'ON %I.%I ' diff --git a/regress/expected/extension_security.out b/regress/expected/extension_security.out new file mode 100644 index 000000000..30241623a --- /dev/null +++ b/regress/expected/extension_security.out @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +LOAD 'age'; +SET search_path TO ag_catalog; +-- +-- pg_upgrade helper functions resolve built-ins from pg_catalog first. +-- +-- Each helper must place pg_catalog ahead of ag_catalog in its search_path, so +-- that built-in functions and operators always resolve to pg_catalog and are +-- not overridden by same-named objects defined in ag_catalog. +-- +SELECT p.proname, + array_to_string(p.proconfig, ', ') AS proconfig +FROM pg_proc p +JOIN pg_namespace n ON n.oid = p.pronamespace +WHERE n.nspname = 'ag_catalog' + AND p.proname IN ('age_prepare_pg_upgrade', 'age_finish_pg_upgrade', + 'age_revert_pg_upgrade_changes', 'age_pg_upgrade_status') +ORDER BY p.proname; + proname | proconfig +-------------------------------+------------------------------------ + age_finish_pg_upgrade | search_path=pg_catalog, ag_catalog + age_pg_upgrade_status | search_path=pg_catalog, ag_catalog + age_prepare_pg_upgrade | search_path=pg_catalog, ag_catalog + age_revert_pg_upgrade_changes | search_path=pg_catalog, ag_catalog +(4 rows) + +-- +-- The helper bodies must not contain unqualified format()/hashtext() calls; +-- those built-ins are explicitly schema-qualified to pg_catalog. +-- +SELECT p.proname, + (p.prosrc ~ '[^.]\mformat\s*\(') AS has_unqualified_format, + (p.prosrc ~ '[^.]\mhashtext\s*\(') AS has_unqualified_hashtext +FROM pg_proc p +JOIN pg_namespace n ON n.oid = p.pronamespace +WHERE n.nspname = 'ag_catalog' + AND p.proname IN ('age_finish_pg_upgrade', 'age_revert_pg_upgrade_changes') +ORDER BY p.proname; + proname | has_unqualified_format | has_unqualified_hashtext +-------------------------------+------------------------+-------------------------- + age_finish_pg_upgrade | f | f + age_revert_pg_upgrade_changes | f | f +(2 rows) + +-- +-- Install-time ownership check: CREATE EXTENSION age installs into ag_catalog +-- only when that schema does not already exist under a different owner. The +-- check compares schema ownership against the installing role. Verify the +-- underlying detection both ways with a probe schema, without disturbing the +-- already-installed extension. +-- +CREATE ROLE age_probe_role NOLOGIN; +CREATE SCHEMA age_probe AUTHORIZATION age_probe_role; +-- A schema owned by a different role is detected as foreign-owned. +SELECT EXISTS ( + SELECT 1 + FROM pg_catalog.pg_namespace n + WHERE n.nspname = 'age_probe' + AND n.nspowner <> (SELECT r.oid FROM pg_catalog.pg_roles r + WHERE r.rolname = current_user) +) AS foreign_owner_detected; + foreign_owner_detected +------------------------ + t +(1 row) + +-- ag_catalog, owned by the current (installing) role here, is not flagged +-- (the check does not false-positive on a normal install). +SELECT EXISTS ( + SELECT 1 + FROM pg_catalog.pg_namespace n + WHERE n.nspname = 'ag_catalog' + AND n.nspowner <> (SELECT r.oid FROM pg_catalog.pg_roles r + WHERE r.rolname = current_user) +) AS installer_owned_flagged; + installer_owned_flagged +------------------------- + f +(1 row) + +DROP SCHEMA age_probe; +DROP ROLE age_probe_role; diff --git a/regress/sql/extension_security.sql b/regress/sql/extension_security.sql new file mode 100644 index 000000000..433283989 --- /dev/null +++ b/regress/sql/extension_security.sql @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +LOAD 'age'; +SET search_path TO ag_catalog; + +-- +-- pg_upgrade helper functions resolve built-ins from pg_catalog first. +-- +-- Each helper must place pg_catalog ahead of ag_catalog in its search_path, so +-- that built-in functions and operators always resolve to pg_catalog and are +-- not overridden by same-named objects defined in ag_catalog. +-- +SELECT p.proname, + array_to_string(p.proconfig, ', ') AS proconfig +FROM pg_proc p +JOIN pg_namespace n ON n.oid = p.pronamespace +WHERE n.nspname = 'ag_catalog' + AND p.proname IN ('age_prepare_pg_upgrade', 'age_finish_pg_upgrade', + 'age_revert_pg_upgrade_changes', 'age_pg_upgrade_status') +ORDER BY p.proname; + +-- +-- The helper bodies must not contain unqualified format()/hashtext() calls; +-- those built-ins are explicitly schema-qualified to pg_catalog. +-- +SELECT p.proname, + (p.prosrc ~ '[^.]\mformat\s*\(') AS has_unqualified_format, + (p.prosrc ~ '[^.]\mhashtext\s*\(') AS has_unqualified_hashtext +FROM pg_proc p +JOIN pg_namespace n ON n.oid = p.pronamespace +WHERE n.nspname = 'ag_catalog' + AND p.proname IN ('age_finish_pg_upgrade', 'age_revert_pg_upgrade_changes') +ORDER BY p.proname; + +-- +-- Install-time ownership check: CREATE EXTENSION age installs into ag_catalog +-- only when that schema does not already exist under a different owner. The +-- check compares schema ownership against the installing role. Verify the +-- underlying detection both ways with a probe schema, without disturbing the +-- already-installed extension. +-- +CREATE ROLE age_probe_role NOLOGIN; +CREATE SCHEMA age_probe AUTHORIZATION age_probe_role; + +-- A schema owned by a different role is detected as foreign-owned. +SELECT EXISTS ( + SELECT 1 + FROM pg_catalog.pg_namespace n + WHERE n.nspname = 'age_probe' + AND n.nspowner <> (SELECT r.oid FROM pg_catalog.pg_roles r + WHERE r.rolname = current_user) +) AS foreign_owner_detected; + +-- ag_catalog, owned by the current (installing) role here, is not flagged +-- (the check does not false-positive on a normal install). +SELECT EXISTS ( + SELECT 1 + FROM pg_catalog.pg_namespace n + WHERE n.nspname = 'ag_catalog' + AND n.nspowner <> (SELECT r.oid FROM pg_catalog.pg_roles r + WHERE r.rolname = current_user) +) AS installer_owned_flagged; + +DROP SCHEMA age_probe; +DROP ROLE age_probe_role; diff --git a/sql/age_main.sql b/sql/age_main.sql index 72f420002..233d0d23f 100644 --- a/sql/age_main.sql +++ b/sql/age_main.sql @@ -20,6 +20,33 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION age" to load this file. \quit +-- +-- Ensure ag_catalog is created and owned by the installing role. +-- +-- CREATE EXTENSION places all of AGE's objects in ag_catalog. A normal install +-- creates that schema, owned by the installer. If ag_catalog already exists and +-- is owned by a different role, that role would retain control over the schema +-- that holds AGE's catalog objects. To keep ownership well-defined, refuse to +-- install into a pre-existing ag_catalog owned by another role. Ownership is +-- compared directly (not via role membership) so the check is exact even for a +-- superuser, who is otherwise considered a member of every role. +-- +DO $age_install_guard$ +BEGIN + IF EXISTS ( + SELECT 1 + FROM pg_catalog.pg_namespace n + WHERE n.nspname = 'ag_catalog' + AND n.nspowner <> (SELECT r.oid + FROM pg_catalog.pg_roles r + WHERE r.rolname = current_user) + ) THEN + RAISE EXCEPTION 'schema "ag_catalog" already exists and is not owned by the installing role "%"', current_user + USING HINT = 'Apache AGE will not install into a pre-existing ag_catalog owned by another role. Drop it (DROP SCHEMA ag_catalog CASCADE) or transfer its ownership to the installing role, then retry CREATE EXTENSION age.'; + END IF; +END +$age_install_guard$; + -- -- catalog tables -- diff --git a/sql/age_pg_upgrade.sql b/sql/age_pg_upgrade.sql index 42a06ecd6..68fbd1513 100644 --- a/sql/age_pg_upgrade.sql +++ b/sql/age_pg_upgrade.sql @@ -55,7 +55,10 @@ CREATE FUNCTION ag_catalog.age_prepare_pg_upgrade() RETURNS void LANGUAGE plpgsql - SET search_path = ag_catalog, pg_catalog + -- Resolve built-in functions and operators from pg_catalog first so they + -- are not overridden by same-named objects defined in ag_catalog. The + -- ag_catalog objects referenced here are schema-qualified. + SET search_path = pg_catalog, ag_catalog AS $function$ DECLARE graph_count integer; @@ -143,7 +146,10 @@ COMMENT ON FUNCTION ag_catalog.age_prepare_pg_upgrade() IS CREATE FUNCTION ag_catalog.age_finish_pg_upgrade() RETURNS void LANGUAGE plpgsql - SET search_path = ag_catalog, pg_catalog + -- Resolve built-in functions and operators from pg_catalog first so they + -- are not overridden by same-named objects defined in ag_catalog. The + -- ag_catalog objects referenced here are schema-qualified. + SET search_path = pg_catalog, ag_catalog AS $function$ DECLARE mapping_count integer; @@ -266,7 +272,7 @@ BEGIN -- and preserve original schema ownership. -- RAISE NOTICE 'Invalidating AGE caches...'; - PERFORM pg_catalog.pg_advisory_xact_lock(hashtext('age_finish_pg_upgrade')); + PERFORM pg_catalog.pg_advisory_xact_lock(pg_catalog.hashtext('age_finish_pg_upgrade')); DECLARE graph_rec RECORD; cache_invalidated boolean := false; @@ -280,8 +286,8 @@ BEGIN BEGIN -- Touch schema by changing owner to current_user then back to original -- This triggers cache invalidation without permanently changing ownership - EXECUTE format('ALTER SCHEMA %I OWNER TO %I', graph_rec.ns_name, current_user); - EXECUTE format('ALTER SCHEMA %I OWNER TO %I', graph_rec.ns_name, graph_rec.owner_name); + EXECUTE pg_catalog.format('ALTER SCHEMA %I OWNER TO %I', graph_rec.ns_name, current_user); + EXECUTE pg_catalog.format('ALTER SCHEMA %I OWNER TO %I', graph_rec.ns_name, graph_rec.owner_name); cache_invalidated := true; EXCEPTION WHEN insufficient_privilege THEN -- If we can't change ownership, skip this schema @@ -330,7 +336,10 @@ COMMENT ON FUNCTION ag_catalog.age_finish_pg_upgrade() IS CREATE FUNCTION ag_catalog.age_revert_pg_upgrade_changes() RETURNS void LANGUAGE plpgsql - SET search_path = ag_catalog, pg_catalog + -- Resolve built-in functions and operators from pg_catalog first so they + -- are not overridden by same-named objects defined in ag_catalog. The + -- ag_catalog objects referenced here are schema-qualified. + SET search_path = pg_catalog, ag_catalog AS $function$ BEGIN -- Check if namespace column is oid type (needs reverting) @@ -363,7 +372,7 @@ BEGIN -- Invalidate AGE's internal caches by touching each graph's namespace -- We use xact-level advisory lock and preserve original ownership -- - PERFORM pg_catalog.pg_advisory_xact_lock(hashtext('age_revert_pg_upgrade')); + PERFORM pg_catalog.pg_advisory_xact_lock(pg_catalog.hashtext('age_revert_pg_upgrade')); DECLARE graph_rec RECORD; BEGIN @@ -375,8 +384,8 @@ BEGIN LOOP BEGIN -- Touch schema by changing owner to current_user then back to original - EXECUTE format('ALTER SCHEMA %I OWNER TO %I', graph_rec.ns_name, current_user); - EXECUTE format('ALTER SCHEMA %I OWNER TO %I', graph_rec.ns_name, graph_rec.owner_name); + EXECUTE pg_catalog.format('ALTER SCHEMA %I OWNER TO %I', graph_rec.ns_name, current_user); + EXECUTE pg_catalog.format('ALTER SCHEMA %I OWNER TO %I', graph_rec.ns_name, graph_rec.owner_name); EXCEPTION WHEN insufficient_privilege THEN RAISE NOTICE 'Could not invalidate cache for schema % (insufficient privileges)', graph_rec.ns_name; END; @@ -410,7 +419,10 @@ CREATE FUNCTION ag_catalog.age_pg_upgrade_status() message text ) LANGUAGE plpgsql - SET search_path = ag_catalog, pg_catalog + -- Resolve built-in functions and operators from pg_catalog first so they + -- are not overridden by same-named objects defined in ag_catalog. The + -- ag_catalog objects referenced here are schema-qualified. + SET search_path = pg_catalog, ag_catalog AS $function$ DECLARE ns_type text; From b53c32ed1d31a09ded5e82d229feda4a48d85104 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sun, 21 Jun 2026 14:34:37 -0700 Subject: [PATCH 07/20] cypher_with: add ORDER BY to non-deterministic RETURN queries (#2436) Several cypher_with regression queries RETURN multiple rows without an ORDER BY, so their row order depends on heap/scan order and can vary between runs, build types, and platforms. Add ORDER BY ASC to those queries so the expected output is stable. Ordering keys use id() (a single int64 that bypasses the locale-sensitive string comparison path and is reproducible from the test's deterministic setup order), or the projected path/scalar where that is what the query returns. Where the underlying vertex/edge was dropped by a WITH projection, its id is threaded through as an alias rather than reordering the projection. Full audit of cypher_with: all 23 multi-row result blocks were checked. After this change, every multi-row, non-EXPLAIN RETURN is deterministically ordered. The two remaining unordered multi-row blocks are left as-is: - "RETURN lbl" returns two identical "Person" rows, so order cannot drift; - the 13 EXPLAIN (VERBOSE, COSTS OFF) plan blocks emit a fixed serial plan (no parallel/gather nodes), so their row order is already deterministic. This is a test-only change (regress/sql/cypher_with.sql and regress/expected/cypher_with.out); no extension C code or SQL is modified. Row counts are unchanged (pure reordering). All 37 regression tests pass (installcheck) on PostgreSQL 18.3. Co-authored-by: GitHub Copilot modified: regress/expected/cypher_with.out modified: regress/sql/cypher_with.sql --- regress/expected/cypher_with.out | 25 ++++++++++++++++++------- regress/sql/cypher_with.sql | 13 ++++++++++++- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/regress/expected/cypher_with.out b/regress/expected/cypher_with.out index 8864f026f..2fc330616 100644 --- a/regress/expected/cypher_with.out +++ b/regress/expected/cypher_with.out @@ -52,13 +52,14 @@ SELECT * FROM cypher('cypher_with', $$ MATCH (n)-[e]->(m) WITH n,e,m RETURN n,e,m + ORDER BY id(n) ASC, id(e) ASC, id(m) ASC $$) AS (N1 agtype, edge agtype, N2 agtype); n1 | edge | n2 --------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------- {"id": 281474976710657, "label": "", "properties": {"age": 36, "name": "Andres"}}::vertex | {"id": 844424930131969, "label": "BLOCKS", "end_id": 281474976710658, "start_id": 281474976710657, "properties": {}}::edge | {"id": 281474976710658, "label": "", "properties": {"age": 25, "name": "Caesar"}}::vertex - {"id": 281474976710659, "label": "", "properties": {"age": 55, "name": "Bossman"}}::vertex | {"id": 844424930131970, "label": "BLOCKS", "end_id": 281474976710660, "start_id": 281474976710659, "properties": {}}::edge | {"id": 281474976710660, "label": "", "properties": {"age": 35, "name": "David"}}::vertex {"id": 281474976710657, "label": "", "properties": {"age": 36, "name": "Andres"}}::vertex | {"id": 1125899906842625, "label": "KNOWS", "end_id": 281474976710659, "start_id": 281474976710657, "properties": {}}::edge | {"id": 281474976710659, "label": "", "properties": {"age": 55, "name": "Bossman"}}::vertex {"id": 281474976710658, "label": "", "properties": {"age": 25, "name": "Caesar"}}::vertex | {"id": 1125899906842626, "label": "KNOWS", "end_id": 281474976710661, "start_id": 281474976710658, "properties": {}}::edge | {"id": 281474976710661, "label": "", "properties": {"age": 37, "name": "George"}}::vertex + {"id": 281474976710659, "label": "", "properties": {"age": 55, "name": "Bossman"}}::vertex | {"id": 844424930131970, "label": "BLOCKS", "end_id": 281474976710660, "start_id": 281474976710659, "properties": {}}::edge | {"id": 281474976710660, "label": "", "properties": {"age": 35, "name": "David"}}::vertex {"id": 281474976710659, "label": "", "properties": {"age": 55, "name": "Bossman"}}::vertex | {"id": 1125899906842627, "label": "KNOWS", "end_id": 281474976710661, "start_id": 281474976710659, "properties": {}}::edge | {"id": 281474976710661, "label": "", "properties": {"age": 37, "name": "George"}}::vertex {"id": 281474976710660, "label": "", "properties": {"age": 35, "name": "David"}}::vertex | {"id": 1125899906842628, "label": "KNOWS", "end_id": 281474976710657, "start_id": 281474976710660, "properties": {}}::edge | {"id": 281474976710657, "label": "", "properties": {"age": 36, "name": "Andres"}}::vertex (6 rows) @@ -68,6 +69,7 @@ SELECT * FROM cypher('cypher_with', $$ MATCH (n)-[e]->(m) WITH n.name AS n1, e as edge, m.name as n2 RETURN n1,label(edge),n2 + ORDER BY id(edge) ASC $$) AS (start_node agtype,edge agtype, end_node agtype); start_node | edge | end_node ------------+----------+----------- @@ -83,13 +85,14 @@ SELECT * FROM cypher('cypher_with',$$ MATCH (person)-[r]->(otherPerson) WITH *, type(r) AS connectionType RETURN person.name, connectionType, otherPerson.name + ORDER BY id(person) ASC, id(otherPerson) ASC $$) AS (start_node agtype, connection agtype, end_node agtype); start_node | connection | end_node ------------+------------+----------- "Andres" | "BLOCKS" | "Caesar" - "Bossman" | "BLOCKS" | "David" "Andres" | "KNOWS" | "Bossman" "Caesar" | "KNOWS" | "George" + "Bossman" | "BLOCKS" | "David" "Bossman" | "KNOWS" | "George" "David" | "KNOWS" | "Andres" (6 rows) @@ -109,6 +112,7 @@ MATCH (george {name: 'George'})<-[]-(otherPerson) WITH otherPerson, toUpper(otherPerson.name) AS upperCaseName WHERE upperCaseName STARTS WITH 'C' RETURN otherPerson.name + ORDER BY id(otherPerson) ASC $$) as (name agtype); name ---------- @@ -120,6 +124,7 @@ SELECT * FROM cypher('cypher_with', $$ WITH otherPerson, count(*) AS foaf WHERE foaf > 1 RETURN otherPerson.name + ORDER BY id(otherPerson) ASC $$) as (name agtype); name ---------- @@ -131,15 +136,16 @@ SELECT * FROM cypher('cypher_with', $$ WITH p, length(p) AS path_length WHERE path_length > 1 RETURN p + ORDER BY p ASC $$) AS (pattern agtype); pattern -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 281474976710657, "label": "_ag_label_vertex", "properties": {"age": 36, "name": "Andres"}}::vertex, {"id": 1125899906842625, "label": "KNOWS", "end_id": 281474976710659, "start_id": 281474976710657, "properties": {}}::edge, {"id": 281474976710659, "label": "_ag_label_vertex", "properties": {"age": 55, "name": "Bossman"}}::vertex, {"id": 1125899906842627, "label": "KNOWS", "end_id": 281474976710661, "start_id": 281474976710659, "properties": {}}::edge, {"id": 281474976710661, "label": "_ag_label_vertex", "properties": {"age": 37, "name": "George"}}::vertex]::path - [{"id": 281474976710657, "label": "_ag_label_vertex", "properties": {"age": 36, "name": "Andres"}}::vertex, {"id": 1125899906842625, "label": "KNOWS", "end_id": 281474976710659, "start_id": 281474976710657, "properties": {}}::edge, {"id": 281474976710659, "label": "_ag_label_vertex", "properties": {"age": 55, "name": "Bossman"}}::vertex, {"id": 844424930131970, "label": "BLOCKS", "end_id": 281474976710660, "start_id": 281474976710659, "properties": {}}::edge, {"id": 281474976710660, "label": "_ag_label_vertex", "properties": {"age": 35, "name": "David"}}::vertex]::path [{"id": 281474976710657, "label": "_ag_label_vertex", "properties": {"age": 36, "name": "Andres"}}::vertex, {"id": 844424930131969, "label": "BLOCKS", "end_id": 281474976710658, "start_id": 281474976710657, "properties": {}}::edge, {"id": 281474976710658, "label": "_ag_label_vertex", "properties": {"age": 25, "name": "Caesar"}}::vertex, {"id": 1125899906842626, "label": "KNOWS", "end_id": 281474976710661, "start_id": 281474976710658, "properties": {}}::edge, {"id": 281474976710661, "label": "_ag_label_vertex", "properties": {"age": 37, "name": "George"}}::vertex]::path + [{"id": 281474976710657, "label": "_ag_label_vertex", "properties": {"age": 36, "name": "Andres"}}::vertex, {"id": 1125899906842625, "label": "KNOWS", "end_id": 281474976710659, "start_id": 281474976710657, "properties": {}}::edge, {"id": 281474976710659, "label": "_ag_label_vertex", "properties": {"age": 55, "name": "Bossman"}}::vertex, {"id": 844424930131970, "label": "BLOCKS", "end_id": 281474976710660, "start_id": 281474976710659, "properties": {}}::edge, {"id": 281474976710660, "label": "_ag_label_vertex", "properties": {"age": 35, "name": "David"}}::vertex]::path + [{"id": 281474976710657, "label": "_ag_label_vertex", "properties": {"age": 36, "name": "Andres"}}::vertex, {"id": 1125899906842625, "label": "KNOWS", "end_id": 281474976710659, "start_id": 281474976710657, "properties": {}}::edge, {"id": 281474976710659, "label": "_ag_label_vertex", "properties": {"age": 55, "name": "Bossman"}}::vertex, {"id": 1125899906842627, "label": "KNOWS", "end_id": 281474976710661, "start_id": 281474976710659, "properties": {}}::edge, {"id": 281474976710661, "label": "_ag_label_vertex", "properties": {"age": 37, "name": "George"}}::vertex]::path [{"id": 281474976710659, "label": "_ag_label_vertex", "properties": {"age": 55, "name": "Bossman"}}::vertex, {"id": 844424930131970, "label": "BLOCKS", "end_id": 281474976710660, "start_id": 281474976710659, "properties": {}}::edge, {"id": 281474976710660, "label": "_ag_label_vertex", "properties": {"age": 35, "name": "David"}}::vertex, {"id": 1125899906842628, "label": "KNOWS", "end_id": 281474976710657, "start_id": 281474976710660, "properties": {}}::edge, {"id": 281474976710657, "label": "_ag_label_vertex", "properties": {"age": 36, "name": "Andres"}}::vertex]::path - [{"id": 281474976710660, "label": "_ag_label_vertex", "properties": {"age": 35, "name": "David"}}::vertex, {"id": 1125899906842628, "label": "KNOWS", "end_id": 281474976710657, "start_id": 281474976710660, "properties": {}}::edge, {"id": 281474976710657, "label": "_ag_label_vertex", "properties": {"age": 36, "name": "Andres"}}::vertex, {"id": 1125899906842625, "label": "KNOWS", "end_id": 281474976710659, "start_id": 281474976710657, "properties": {}}::edge, {"id": 281474976710659, "label": "_ag_label_vertex", "properties": {"age": 55, "name": "Bossman"}}::vertex]::path [{"id": 281474976710660, "label": "_ag_label_vertex", "properties": {"age": 35, "name": "David"}}::vertex, {"id": 1125899906842628, "label": "KNOWS", "end_id": 281474976710657, "start_id": 281474976710660, "properties": {}}::edge, {"id": 281474976710657, "label": "_ag_label_vertex", "properties": {"age": 36, "name": "Andres"}}::vertex, {"id": 844424930131969, "label": "BLOCKS", "end_id": 281474976710658, "start_id": 281474976710657, "properties": {}}::edge, {"id": 281474976710658, "label": "_ag_label_vertex", "properties": {"age": 25, "name": "Caesar"}}::vertex]::path + [{"id": 281474976710660, "label": "_ag_label_vertex", "properties": {"age": 35, "name": "David"}}::vertex, {"id": 1125899906842628, "label": "KNOWS", "end_id": 281474976710657, "start_id": 281474976710660, "properties": {}}::edge, {"id": 281474976710657, "label": "_ag_label_vertex", "properties": {"age": 36, "name": "Andres"}}::vertex, {"id": 1125899906842625, "label": "KNOWS", "end_id": 281474976710659, "start_id": 281474976710657, "properties": {}}::edge, {"id": 281474976710659, "label": "_ag_label_vertex", "properties": {"age": 55, "name": "Bossman"}}::vertex]::path (6 rows) -- MATCH/WHERE with WITH/WHERE @@ -149,6 +155,7 @@ SELECT * FROM cypher('cypher_with', $$ WITH * WHERE m.name = 'Andres' RETURN m.name,label(e),b.name + ORDER BY id(m) ASC, id(e) ASC, id(b) ASC $$) AS (N1 agtype, edge agtype, N2 agtype); n1 | edge | n2 ----------+---------+----------- @@ -201,9 +208,10 @@ SELECT * FROM cypher('cypher_with', $$ MATCH (n)-[e]->(m) WITH n, e, m WHERE label(e) = 'KNOWS' - WITH n.name as n1, label(e) as edge, m.name as n2 + WITH id(e) AS eid, n.name as n1, label(e) as edge, m.name as n2 WHERE n1 = 'Andres' RETURN n1,edge,n2 + ORDER BY eid ASC $$) AS (N1 agtype, edge agtype, N2 agtype); n1 | edge | n2 ----------+---------+----------- @@ -217,6 +225,7 @@ SELECT * FROM cypher('cypher_with', $$ WITH x LIMIT 5 RETURN x + ORDER BY x ASC $$) as (name agtype); name ------ @@ -233,11 +242,12 @@ SELECT * FROM cypher('cypher_with', $$ WITH m as start_node, b as end_node WHERE end_node.name = 'George' RETURN id(start_node),start_node.name,id(end_node),end_node.name + ORDER BY id(start_node) ASC, id(end_node) ASC $$) AS (id1 agtype, name1 agtype, id2 agtype, name2 agtype); id1 | name1 | id2 | name2 -----------------+-----------+-----------------+---------- - 281474976710659 | "Bossman" | 281474976710661 | "George" 281474976710658 | "Caesar" | 281474976710661 | "George" + 281474976710659 | "Bossman" | 281474976710661 | "George" (2 rows) -- Expression item must be aliased. @@ -471,6 +481,7 @@ SELECT * FROM cypher('with_accessor_opt', $$ MATCH (n:Person) WITH n as m RETURN m + ORDER BY id(m) ASC $$) AS (n vertex); n --------------------------------------------------------------------- diff --git a/regress/sql/cypher_with.sql b/regress/sql/cypher_with.sql index 25e22b2a2..145356446 100644 --- a/regress/sql/cypher_with.sql +++ b/regress/sql/cypher_with.sql @@ -47,6 +47,7 @@ SELECT * FROM cypher('cypher_with', $$ MATCH (n)-[e]->(m) WITH n,e,m RETURN n,e,m + ORDER BY id(n) ASC, id(e) ASC, id(m) ASC $$) AS (N1 agtype, edge agtype, N2 agtype); -- WITH/AS @@ -55,12 +56,14 @@ SELECT * FROM cypher('cypher_with', $$ MATCH (n)-[e]->(m) WITH n.name AS n1, e as edge, m.name as n2 RETURN n1,label(edge),n2 + ORDER BY id(edge) ASC $$) AS (start_node agtype,edge agtype, end_node agtype); SELECT * FROM cypher('cypher_with',$$ MATCH (person)-[r]->(otherPerson) WITH *, type(r) AS connectionType RETURN person.name, connectionType, otherPerson.name + ORDER BY id(person) ASC, id(otherPerson) ASC $$) AS (start_node agtype, connection agtype, end_node agtype); SELECT * FROM cypher('cypher_with', $$ @@ -75,6 +78,7 @@ MATCH (george {name: 'George'})<-[]-(otherPerson) WITH otherPerson, toUpper(otherPerson.name) AS upperCaseName WHERE upperCaseName STARTS WITH 'C' RETURN otherPerson.name + ORDER BY id(otherPerson) ASC $$) as (name agtype); SELECT * FROM cypher('cypher_with', $$ @@ -82,6 +86,7 @@ SELECT * FROM cypher('cypher_with', $$ WITH otherPerson, count(*) AS foaf WHERE foaf > 1 RETURN otherPerson.name + ORDER BY id(otherPerson) ASC $$) as (name agtype); SELECT * FROM cypher('cypher_with', $$ @@ -89,6 +94,7 @@ SELECT * FROM cypher('cypher_with', $$ WITH p, length(p) AS path_length WHERE path_length > 1 RETURN p + ORDER BY p ASC $$) AS (pattern agtype); -- MATCH/WHERE with WITH/WHERE @@ -99,6 +105,7 @@ SELECT * FROM cypher('cypher_with', $$ WITH * WHERE m.name = 'Andres' RETURN m.name,label(e),b.name + ORDER BY id(m) ASC, id(e) ASC, id(b) ASC $$) AS (N1 agtype, edge agtype, N2 agtype); -- WITH/ORDER BY @@ -133,9 +140,10 @@ SELECT * FROM cypher('cypher_with', $$ MATCH (n)-[e]->(m) WITH n, e, m WHERE label(e) = 'KNOWS' - WITH n.name as n1, label(e) as edge, m.name as n2 + WITH id(e) AS eid, n.name as n1, label(e) as edge, m.name as n2 WHERE n1 = 'Andres' RETURN n1,edge,n2 + ORDER BY eid ASC $$) AS (N1 agtype, edge agtype, N2 agtype); SELECT * FROM cypher('cypher_with', $$ @@ -145,6 +153,7 @@ SELECT * FROM cypher('cypher_with', $$ WITH x LIMIT 5 RETURN x + ORDER BY x ASC $$) as (name agtype); SELECT * FROM cypher('cypher_with', $$ @@ -154,6 +163,7 @@ SELECT * FROM cypher('cypher_with', $$ WITH m as start_node, b as end_node WHERE end_node.name = 'George' RETURN id(start_node),start_node.name,id(end_node),end_node.name + ORDER BY id(start_node) ASC, id(end_node) ASC $$) AS (id1 agtype, name1 agtype, id2 agtype, name2 agtype); -- Expression item must be aliased. @@ -284,6 +294,7 @@ SELECT * FROM cypher('with_accessor_opt', $$ MATCH (n:Person) WITH n as m RETURN m + ORDER BY id(m) ASC $$) AS (n vertex); SELECT * FROM cypher('with_accessor_opt', $$ From 8690da28318073c8ac511967845bcb8099caa1ce Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sun, 21 Jun 2026 14:35:36 -0700 Subject: [PATCH 08/20] Add shortest_path / all_shortest_paths SRFs (#2430) Add two C set-returning functions that compute unweighted (hop-count) shortest paths over the cached global graph adjacency via BFS, callable both at the SQL top level and inside a cypher() RETURN: - age_shortest_path(...) -> the single shortest path (0 or 1 rows) - age_all_shortest_paths(...) -> every shortest path, one per row The signature follows the natural Cypher argument order (graph, start, end, edge_types, direction, min_hops, max_hops), registered in sql/agtype_typecast.sql (install) and age--1.7.0--y.y.y.sql (upgrade). Unimplemented parameters fail loudly: multiple relationship types and a non-zero min_hops raise ERRCODE_FEATURE_NOT_SUPPORTED. A single edge type (string or one-element array) is honored, and a NULL endpoint yields no rows per Cypher null semantics (wrong-typed endpoints / NULL graph still error). To call the SRFs inside a cypher() RETURN, transform_cypher_return now sets query->hasTargetSRFs (it was the only results-producing clause that didn't, so the planner never added a ProjectSet node), and transform_FuncCall auto-prepends the graph name for snake_case shortest_path / all_shortest_paths. camelCase names are reserved for the future native grammar. Robustness: - BFS guards against non-existent endpoints (returns 0 rows instead of crashing) and honors CHECK_FOR_INTERRUPTS. - An unknown edge label now matches no edges instead of silently traversing all of them (get_label_relation returns InvalidOid). Adds the age_shortest_path regression test (directed/undirected, label filtering, parallel edges, self-loops, max_hops, the not-supported stubs, NULL and non-existent endpoint/graph guards). 38/38 installcheck pass. Co-authored-by: Copilot modified: Makefile modified: age--1.7.0--y.y.y.sql modified: sql/agtype_typecast.sql modified: src/backend/parser/cypher_clause.c modified: src/backend/parser/cypher_expr.c modified: src/backend/utils/adt/age_vle.c new file: regress/expected/age_shortest_path.out new file: regress/sql/age_shortest_path.sql --- Makefile | 1 + age--1.7.0--y.y.y.sql | 31 + regress/expected/age_shortest_path.out | 977 +++++++++++++++++++++++++ regress/sql/age_shortest_path.sql | 630 ++++++++++++++++ sql/agtype_typecast.sql | 31 + src/backend/parser/cypher_clause.c | 1 + src/backend/parser/cypher_expr.c | 13 +- src/backend/utils/adt/age_vle.c | 796 ++++++++++++++++++++ 8 files changed, 2475 insertions(+), 5 deletions(-) create mode 100644 regress/expected/age_shortest_path.out create mode 100644 regress/sql/age_shortest_path.sql diff --git a/Makefile b/Makefile index b2d93ff4b..41208ee02 100644 --- a/Makefile +++ b/Makefile @@ -201,6 +201,7 @@ REGRESS = scan \ cypher_delete \ cypher_with \ cypher_vle \ + age_shortest_path \ cypher_union \ cypher_call \ cypher_merge \ diff --git a/age--1.7.0--y.y.y.sql b/age--1.7.0--y.y.y.sql index ad2ce20fd..b40cde092 100644 --- a/age--1.7.0--y.y.y.sql +++ b/age--1.7.0--y.y.y.sql @@ -549,6 +549,37 @@ CALLED ON NULL INPUT PARALLEL UNSAFE AS 'MODULE_PATHNAME'; +-- Unweighted (hop-count) shortest path between two vertices, computed over the +-- cached global graph adjacency via BFS. Returns a single path (0 or 1 rows). +-- Argument order mirrors the Cypher shortestPath() pattern +-- (a)-[:type*min_hops..max_hops]->(b): +-- (graph_name, start, end, edge_types, direction, min_hops, max_hops) +CREATE FUNCTION ag_catalog.age_shortest_path(IN agtype, IN agtype, IN agtype, + IN agtype DEFAULT NULL, + IN agtype DEFAULT NULL, + IN agtype DEFAULT NULL, + IN agtype DEFAULT NULL) + RETURNS SETOF agtype +LANGUAGE C +STABLE +CALLED ON NULL INPUT +PARALLEL UNSAFE +AS 'MODULE_PATHNAME'; + +-- All unweighted shortest paths between two vertices (one path per row). +-- Same argument order as age_shortest_path. +CREATE FUNCTION ag_catalog.age_all_shortest_paths(IN agtype, IN agtype, IN agtype, + IN agtype DEFAULT NULL, + IN agtype DEFAULT NULL, + IN agtype DEFAULT NULL, + IN agtype DEFAULT NULL) + RETURNS SETOF agtype +LANGUAGE C +STABLE +CALLED ON NULL INPUT +PARALLEL UNSAFE +AS 'MODULE_PATHNAME'; + -- -- Composite types for vertex and edge -- diff --git a/regress/expected/age_shortest_path.out b/regress/expected/age_shortest_path.out new file mode 100644 index 000000000..7eb751d12 --- /dev/null +++ b/regress/expected/age_shortest_path.out @@ -0,0 +1,977 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +LOAD 'age'; +SET search_path TO ag_catalog; +-- +-- age_shortest_path / age_all_shortest_paths +-- +SELECT * FROM create_graph('sp_graph'); +NOTICE: graph "sp_graph" has been created + create_graph +-------------- + +(1 row) + +-- Build a small deterministic graph: +-- +-- A +-- / \ +-- B C (A->B, A->C, B->D, C->D : two shortest A..D paths) +-- \ / +-- D +-- | +-- E (D->E : unique 3-hop path A..E) +-- +-- Z (isolated, unreachable) +-- +SELECT * FROM cypher('sp_graph', $$ + CREATE (a:Person {name: 'A'}), + (b:Person {name: 'B'}), + (c:Person {name: 'C'}), + (d:Person {name: 'D'}), + (e:Person {name: 'E'}), + (z:Person {name: 'Z'}), + (a)-[:KNOWS]->(b), + (a)-[:KNOWS]->(c), + (b)-[:KNOWS]->(d), + (c)-[:KNOWS]->(d), + (d)-[:KNOWS]->(e) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- materialize the global graph context +SELECT * FROM cypher('sp_graph', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) + AS (result agtype); + result +---------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Person", "in_degree": 0, "out_degree": 2, "self_loops": 0} + {"id": 844424930131970, "label": "Person", "in_degree": 1, "out_degree": 1, "self_loops": 0} + {"id": 844424930131971, "label": "Person", "in_degree": 1, "out_degree": 1, "self_loops": 0} + {"id": 844424930131972, "label": "Person", "in_degree": 2, "out_degree": 1, "self_loops": 0} + {"id": 844424930131973, "label": "Person", "in_degree": 1, "out_degree": 0, "self_loops": 0} + {"id": 844424930131974, "label": "Person", "in_degree": 0, "out_degree": 0, "self_loops": 0} +(6 rows) + +-- A -> D shortest path (length 2); expected: path_count = 1 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)) +); + path_count +------------ + 1 +(1 row) + +-- all shortest A -> D; expected: 2 paths (A-B-D and A-C-D), each length 2 +SELECT path +FROM age_all_shortest_paths( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)) +) AS path +ORDER BY path; + path +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [{"id": 844424930131969, "label": "Person", "properties": {"name": "A"}}::vertex, {"id": 1125899906842625, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {}}::edge, {"id": 844424930131970, "label": "Person", "properties": {"name": "B"}}::vertex, {"id": 1125899906842627, "label": "KNOWS", "end_id": 844424930131972, "start_id": 844424930131970, "properties": {}}::edge, {"id": 844424930131972, "label": "Person", "properties": {"name": "D"}}::vertex]::path + [{"id": 844424930131969, "label": "Person", "properties": {"name": "A"}}::vertex, {"id": 1125899906842626, "label": "KNOWS", "end_id": 844424930131971, "start_id": 844424930131969, "properties": {}}::edge, {"id": 844424930131971, "label": "Person", "properties": {"name": "C"}}::vertex, {"id": 1125899906842628, "label": "KNOWS", "end_id": 844424930131972, "start_id": 844424930131971, "properties": {}}::edge, {"id": 844424930131972, "label": "Person", "properties": {"name": "D"}}::vertex]::path +(2 rows) + +-- A -> E unique 3-hop path; expected: path_count = 1 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'E'}) RETURN id(n) $$) AS (id agtype)) +); + path_count +------------ + 1 +(1 row) + +-- A -> E with max_hops = 2; expected: path_count = 0 (E is 3 hops away) +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'E'}) RETURN id(n) $$) AS (id agtype)), + NULL, NULL, NULL, 2::agtype +); + path_count +------------ + 0 +(1 row) + +-- zero-length path, start == end; expected: path_count = 1 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)) +); + path_count +------------ + 1 +(1 row) + +-- unreachable vertex Z; expected: path_count = 0 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'Z'}) RETURN id(n) $$) AS (id agtype)) +); + path_count +------------ + 0 +(1 row) + +-- direction 'in': D -> A traversing edges backwards; expected: path_count = 2 +SELECT count(*) AS path_count +FROM age_all_shortest_paths( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + NULL, '"in"'::agtype +); + path_count +------------ + 2 +(1 row) + +-- direction 'out': D -> A not reachable forwards; expected: path_count = 0 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + NULL, '"out"'::agtype +); + path_count +------------ + 0 +(1 row) + +-- label filter 'KNOWS': A -> D still found; expected: path_count = 1 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype +); + path_count +------------ + 1 +(1 row) + +-- error: invalid direction string; expected: ERROR (must be 'out', 'in', or 'any') +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + NULL, '"sideways"'::agtype +); +ERROR: direction argument must be one of 'out', 'in', or 'any' +-- error: start argument is neither a vertex nor an integer id; expected: ERROR +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + '"not_a_vertex"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)) +); +ERROR: start vertex argument must be a vertex or the integer id +-- +-- Non-existent endpoint guards. These must NOT crash the backend and must +-- return no rows (a path can only exist between vertices in the graph). +-- Previously, start == end on a non-existent vertex id was matched at BFS +-- depth 0 and path reconstruction dereferenced a missing vertex, crashing +-- the server. +-- +-- start == end on a non-existent integer id; expected: path_count = 0 +SELECT count(*) AS path_count +FROM age_shortest_path('"sp_graph"'::agtype, 999999::agtype, 999999::agtype); + path_count +------------ + 0 +(1 row) + +-- existing start -> non-existent end; expected: path_count = 0 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + 999999::agtype +); + path_count +------------ + 0 +(1 row) + +-- non-existent start -> existing end; expected: path_count = 0 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + 999999::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)) +); + path_count +------------ + 0 +(1 row) + +-- all-shortest-paths with start == end non-existent; expected: 0 rows +SELECT count(*) AS path_count +FROM age_all_shortest_paths('"sp_graph"'::agtype, 999999::agtype, 999999::agtype); + path_count +------------ + 0 +(1 row) + +-- cleanup +SELECT * FROM drop_graph('sp_graph', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table sp_graph._ag_label_vertex +drop cascades to table sp_graph._ag_label_edge +drop cascades to table sp_graph."Person" +drop cascades to table sp_graph."KNOWS" +NOTICE: graph "sp_graph" has been dropped + drop_graph +------------ + +(1 row) + +-- +-- Empty graph: a graph that exists but has no vertices must return no rows +-- (and must not hang or crash) for any endpoint query. +-- +SELECT * FROM create_graph('sp_empty'); +NOTICE: graph "sp_empty" has been created + create_graph +-------------- + +(1 row) + +SELECT count(*) AS path_count +FROM age_shortest_path('"sp_empty"'::agtype, 0::agtype, 1::agtype); + path_count +------------ + 0 +(1 row) + +SELECT count(*) AS path_count +FROM age_all_shortest_paths('"sp_empty"'::agtype, 0::agtype, 0::agtype); + path_count +------------ + 0 +(1 row) + +SELECT * FROM drop_graph('sp_empty', true); +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table sp_empty._ag_label_vertex +drop cascades to table sp_empty._ag_label_edge +NOTICE: graph "sp_empty" has been dropped + drop_graph +------------ + +(1 row) + +-- +-- A large, programmatically generated graph (120 nodes) exercising long +-- shortest paths (length up to 20), high-multiplicity all-shortest-paths, +-- label filtering, and directed vs. undirected reachability. +-- +-- Nodes: (:N {id: 0..119}). Structures built on top of them: +-- +-- * Main chain 0 -> 1 -> ... -> 20 (unique 20-hop path) +-- * Alternate chain 0 -> 50 -> 51 -> ... -> 68 -> 20 +-- (a second, disjoint 20-hop path 0..20) +-- => all-shortest-paths 0..20 under KNOWS = 2 paths of length 20 +-- * 3x3 lattice on ids 70..78, id = 70 + 3*row + col, edges go right +-- (id->id+1) and down (id->id+3). Monotone 70..78 paths: +-- => all-shortest-paths 70..78 = C(4,2) = 6 paths of length 4 +-- * LIKES shortcut 0 -[:LIKES]-> 20 (1 hop; only visible when the edge +-- label filter is NOT restricted to KNOWS) +-- * Back-edge triangle 0 -> 96 -> 95 -> 0 +-- => directed 0->95 = 2 hops (0-96-95); undirected 0..95 = 1 hop +-- * Many unused ids (e.g. 119) remain isolated / unreachable. +-- +SELECT * FROM create_graph('sp_big'); +NOTICE: graph "sp_big" has been created + create_graph +-------------- + +(1 row) + +-- 120 vertices, ids 0..119 +SELECT * FROM cypher('sp_big', $$ + UNWIND range(0, 119) AS i CREATE (:N {id: i}) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- main chain 0->1->...->20 (KNOWS) +SELECT * FROM cypher('sp_big', $$ + UNWIND range(0, 19) AS i + MATCH (a:N {id: i}), (b:N {id: i + 1}) + CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- alternate, disjoint 20-hop path 0->50->51->...->68->20 (KNOWS) +SELECT * FROM cypher('sp_big', $$ + MATCH (a:N {id: 0}), (b:N {id: 50}) CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('sp_big', $$ + UNWIND range(50, 67) AS i + MATCH (a:N {id: i}), (b:N {id: i + 1}) + CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('sp_big', $$ + MATCH (a:N {id: 68}), (b:N {id: 20}) CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- 3x3 lattice on ids 70..78: right edges (id -> id+1) +SELECT * FROM cypher('sp_big', $$ + UNWIND [0, 1, 2] AS r + UNWIND [0, 1] AS c + MATCH (a:N {id: 70 + 3 * r + c}), (b:N {id: 70 + 3 * r + c + 1}) + CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- 3x3 lattice: down edges (id -> id+3) +SELECT * FROM cypher('sp_big', $$ + UNWIND [0, 1] AS r + UNWIND [0, 1, 2] AS c + MATCH (a:N {id: 70 + 3 * r + c}), (b:N {id: 70 + 3 * (r + 1) + c}) + CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- back-edge triangle 0 -> 96 -> 95 -> 0 (KNOWS) +SELECT * FROM cypher('sp_big', $$ + MATCH (a:N {id: 0}), (b:N {id: 96}) CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('sp_big', $$ + MATCH (a:N {id: 96}), (b:N {id: 95}) CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('sp_big', $$ + MATCH (a:N {id: 95}), (b:N {id: 0}) CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- labelled shortcut 0 -[:LIKES]-> 20 +SELECT * FROM cypher('sp_big', $$ + MATCH (a:N {id: 0}), (b:N {id: 20}) CREATE (a)-[:LIKES]->(b) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- sanity: vertex count (also materializes the global context); expected: count = 120 +SELECT * FROM cypher('sp_big', $$ MATCH (n) RETURN count(n) $$) AS (n agtype); + n +----- + 120 +(1 row) + +-- all shortest 0 -> 20 under KNOWS (main chain + disjoint alternate); +-- expected: 2 paths, each exactly 20 hops +SELECT path +FROM age_all_shortest_paths( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 20}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype +) AS path +ORDER BY path; + path +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + [{"id": 844424930131969, "label": "N", "properties": {"id": 0}}::vertex, {"id": 1125899906842625, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {}}::edge, {"id": 844424930131970, "label": "N", "properties": {"id": 1}}::vertex, {"id": 1125899906842626, "label": "KNOWS", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge, {"id": 844424930131971, "label": "N", "properties": {"id": 2}}::vertex, {"id": 1125899906842627, "label": "KNOWS", "end_id": 844424930131972, "start_id": 844424930131971, "properties": {}}::edge, {"id": 844424930131972, "label": "N", "properties": {"id": 3}}::vertex, {"id": 1125899906842628, "label": "KNOWS", "end_id": 844424930131973, "start_id": 844424930131972, "properties": {}}::edge, {"id": 844424930131973, "label": "N", "properties": {"id": 4}}::vertex, {"id": 1125899906842629, "label": "KNOWS", "end_id": 844424930131974, "start_id": 844424930131973, "properties": {}}::edge, {"id": 844424930131974, "label": "N", "properties": {"id": 5}}::vertex, {"id": 1125899906842630, "label": "KNOWS", "end_id": 844424930131975, "start_id": 844424930131974, "properties": {}}::edge, {"id": 844424930131975, "label": "N", "properties": {"id": 6}}::vertex, {"id": 1125899906842631, "label": "KNOWS", "end_id": 844424930131976, "start_id": 844424930131975, "properties": {}}::edge, {"id": 844424930131976, "label": "N", "properties": {"id": 7}}::vertex, {"id": 1125899906842632, "label": "KNOWS", "end_id": 844424930131977, "start_id": 844424930131976, "properties": {}}::edge, {"id": 844424930131977, "label": "N", "properties": {"id": 8}}::vertex, {"id": 1125899906842633, "label": "KNOWS", "end_id": 844424930131978, "start_id": 844424930131977, "properties": {}}::edge, {"id": 844424930131978, "label": "N", "properties": {"id": 9}}::vertex, {"id": 1125899906842634, "label": "KNOWS", "end_id": 844424930131979, "start_id": 844424930131978, "properties": {}}::edge, {"id": 844424930131979, "label": "N", "properties": {"id": 10}}::vertex, {"id": 1125899906842635, "label": "KNOWS", "end_id": 844424930131980, "start_id": 844424930131979, "properties": {}}::edge, {"id": 844424930131980, "label": "N", "properties": {"id": 11}}::vertex, {"id": 1125899906842636, "label": "KNOWS", "end_id": 844424930131981, "start_id": 844424930131980, "properties": {}}::edge, {"id": 844424930131981, "label": "N", "properties": {"id": 12}}::vertex, {"id": 1125899906842637, "label": "KNOWS", "end_id": 844424930131982, "start_id": 844424930131981, "properties": {}}::edge, {"id": 844424930131982, "label": "N", "properties": {"id": 13}}::vertex, {"id": 1125899906842638, "label": "KNOWS", "end_id": 844424930131983, "start_id": 844424930131982, "properties": {}}::edge, {"id": 844424930131983, "label": "N", "properties": {"id": 14}}::vertex, {"id": 1125899906842639, "label": "KNOWS", "end_id": 844424930131984, "start_id": 844424930131983, "properties": {}}::edge, {"id": 844424930131984, "label": "N", "properties": {"id": 15}}::vertex, {"id": 1125899906842640, "label": "KNOWS", "end_id": 844424930131985, "start_id": 844424930131984, "properties": {}}::edge, {"id": 844424930131985, "label": "N", "properties": {"id": 16}}::vertex, {"id": 1125899906842641, "label": "KNOWS", "end_id": 844424930131986, "start_id": 844424930131985, "properties": {}}::edge, {"id": 844424930131986, "label": "N", "properties": {"id": 17}}::vertex, {"id": 1125899906842642, "label": "KNOWS", "end_id": 844424930131987, "start_id": 844424930131986, "properties": {}}::edge, {"id": 844424930131987, "label": "N", "properties": {"id": 18}}::vertex, {"id": 1125899906842643, "label": "KNOWS", "end_id": 844424930131988, "start_id": 844424930131987, "properties": {}}::edge, {"id": 844424930131988, "label": "N", "properties": {"id": 19}}::vertex, {"id": 1125899906842644, "label": "KNOWS", "end_id": 844424930131989, "start_id": 844424930131988, "properties": {}}::edge, {"id": 844424930131989, "label": "N", "properties": {"id": 20}}::vertex]::path + [{"id": 844424930131969, "label": "N", "properties": {"id": 0}}::vertex, {"id": 1125899906842645, "label": "KNOWS", "end_id": 844424930132019, "start_id": 844424930131969, "properties": {}}::edge, {"id": 844424930132019, "label": "N", "properties": {"id": 50}}::vertex, {"id": 1125899906842646, "label": "KNOWS", "end_id": 844424930132020, "start_id": 844424930132019, "properties": {}}::edge, {"id": 844424930132020, "label": "N", "properties": {"id": 51}}::vertex, {"id": 1125899906842647, "label": "KNOWS", "end_id": 844424930132021, "start_id": 844424930132020, "properties": {}}::edge, {"id": 844424930132021, "label": "N", "properties": {"id": 52}}::vertex, {"id": 1125899906842648, "label": "KNOWS", "end_id": 844424930132022, "start_id": 844424930132021, "properties": {}}::edge, {"id": 844424930132022, "label": "N", "properties": {"id": 53}}::vertex, {"id": 1125899906842649, "label": "KNOWS", "end_id": 844424930132023, "start_id": 844424930132022, "properties": {}}::edge, {"id": 844424930132023, "label": "N", "properties": {"id": 54}}::vertex, {"id": 1125899906842650, "label": "KNOWS", "end_id": 844424930132024, "start_id": 844424930132023, "properties": {}}::edge, {"id": 844424930132024, "label": "N", "properties": {"id": 55}}::vertex, {"id": 1125899906842651, "label": "KNOWS", "end_id": 844424930132025, "start_id": 844424930132024, "properties": {}}::edge, {"id": 844424930132025, "label": "N", "properties": {"id": 56}}::vertex, {"id": 1125899906842652, "label": "KNOWS", "end_id": 844424930132026, "start_id": 844424930132025, "properties": {}}::edge, {"id": 844424930132026, "label": "N", "properties": {"id": 57}}::vertex, {"id": 1125899906842653, "label": "KNOWS", "end_id": 844424930132027, "start_id": 844424930132026, "properties": {}}::edge, {"id": 844424930132027, "label": "N", "properties": {"id": 58}}::vertex, {"id": 1125899906842654, "label": "KNOWS", "end_id": 844424930132028, "start_id": 844424930132027, "properties": {}}::edge, {"id": 844424930132028, "label": "N", "properties": {"id": 59}}::vertex, {"id": 1125899906842655, "label": "KNOWS", "end_id": 844424930132029, "start_id": 844424930132028, "properties": {}}::edge, {"id": 844424930132029, "label": "N", "properties": {"id": 60}}::vertex, {"id": 1125899906842656, "label": "KNOWS", "end_id": 844424930132030, "start_id": 844424930132029, "properties": {}}::edge, {"id": 844424930132030, "label": "N", "properties": {"id": 61}}::vertex, {"id": 1125899906842657, "label": "KNOWS", "end_id": 844424930132031, "start_id": 844424930132030, "properties": {}}::edge, {"id": 844424930132031, "label": "N", "properties": {"id": 62}}::vertex, {"id": 1125899906842658, "label": "KNOWS", "end_id": 844424930132032, "start_id": 844424930132031, "properties": {}}::edge, {"id": 844424930132032, "label": "N", "properties": {"id": 63}}::vertex, {"id": 1125899906842659, "label": "KNOWS", "end_id": 844424930132033, "start_id": 844424930132032, "properties": {}}::edge, {"id": 844424930132033, "label": "N", "properties": {"id": 64}}::vertex, {"id": 1125899906842660, "label": "KNOWS", "end_id": 844424930132034, "start_id": 844424930132033, "properties": {}}::edge, {"id": 844424930132034, "label": "N", "properties": {"id": 65}}::vertex, {"id": 1125899906842661, "label": "KNOWS", "end_id": 844424930132035, "start_id": 844424930132034, "properties": {}}::edge, {"id": 844424930132035, "label": "N", "properties": {"id": 66}}::vertex, {"id": 1125899906842662, "label": "KNOWS", "end_id": 844424930132036, "start_id": 844424930132035, "properties": {}}::edge, {"id": 844424930132036, "label": "N", "properties": {"id": 67}}::vertex, {"id": 1125899906842663, "label": "KNOWS", "end_id": 844424930132037, "start_id": 844424930132036, "properties": {}}::edge, {"id": 844424930132037, "label": "N", "properties": {"id": 68}}::vertex, {"id": 1125899906842664, "label": "KNOWS", "end_id": 844424930131989, "start_id": 844424930132037, "properties": {}}::edge, {"id": 844424930131989, "label": "N", "properties": {"id": 20}}::vertex]::path +(2 rows) + +-- any label: the LIKES shortcut collapses 0 -> 20; expected: path_count = 1 +SELECT count(*) AS path_count +FROM age_all_shortest_paths( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 20}) RETURN id(n) $$) AS (id agtype)), + NULL, '"out"'::agtype +); + path_count +------------ + 1 +(1 row) + +-- all shortest 70 -> 78 across the 3x3 lattice; expected: path_count = 6 (C(4,2)) +SELECT count(*) AS path_count +FROM age_all_shortest_paths( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 70}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 78}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype +); + path_count +------------ + 6 +(1 row) + +-- the lattice paths listed; expected: 6 paths, each 4 hops +SELECT path +FROM age_all_shortest_paths( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 70}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 78}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype +) AS path +ORDER BY path; + path +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [{"id": 844424930132039, "label": "N", "properties": {"id": 70}}::vertex, {"id": 1125899906842665, "label": "KNOWS", "end_id": 844424930132040, "start_id": 844424930132039, "properties": {}}::edge, {"id": 844424930132040, "label": "N", "properties": {"id": 71}}::vertex, {"id": 1125899906842666, "label": "KNOWS", "end_id": 844424930132041, "start_id": 844424930132040, "properties": {}}::edge, {"id": 844424930132041, "label": "N", "properties": {"id": 72}}::vertex, {"id": 1125899906842673, "label": "KNOWS", "end_id": 844424930132044, "start_id": 844424930132041, "properties": {}}::edge, {"id": 844424930132044, "label": "N", "properties": {"id": 75}}::vertex, {"id": 1125899906842676, "label": "KNOWS", "end_id": 844424930132047, "start_id": 844424930132044, "properties": {}}::edge, {"id": 844424930132047, "label": "N", "properties": {"id": 78}}::vertex]::path + [{"id": 844424930132039, "label": "N", "properties": {"id": 70}}::vertex, {"id": 1125899906842665, "label": "KNOWS", "end_id": 844424930132040, "start_id": 844424930132039, "properties": {}}::edge, {"id": 844424930132040, "label": "N", "properties": {"id": 71}}::vertex, {"id": 1125899906842672, "label": "KNOWS", "end_id": 844424930132043, "start_id": 844424930132040, "properties": {}}::edge, {"id": 844424930132043, "label": "N", "properties": {"id": 74}}::vertex, {"id": 1125899906842668, "label": "KNOWS", "end_id": 844424930132044, "start_id": 844424930132043, "properties": {}}::edge, {"id": 844424930132044, "label": "N", "properties": {"id": 75}}::vertex, {"id": 1125899906842676, "label": "KNOWS", "end_id": 844424930132047, "start_id": 844424930132044, "properties": {}}::edge, {"id": 844424930132047, "label": "N", "properties": {"id": 78}}::vertex]::path + [{"id": 844424930132039, "label": "N", "properties": {"id": 70}}::vertex, {"id": 1125899906842665, "label": "KNOWS", "end_id": 844424930132040, "start_id": 844424930132039, "properties": {}}::edge, {"id": 844424930132040, "label": "N", "properties": {"id": 71}}::vertex, {"id": 1125899906842672, "label": "KNOWS", "end_id": 844424930132043, "start_id": 844424930132040, "properties": {}}::edge, {"id": 844424930132043, "label": "N", "properties": {"id": 74}}::vertex, {"id": 1125899906842675, "label": "KNOWS", "end_id": 844424930132046, "start_id": 844424930132043, "properties": {}}::edge, {"id": 844424930132046, "label": "N", "properties": {"id": 77}}::vertex, {"id": 1125899906842670, "label": "KNOWS", "end_id": 844424930132047, "start_id": 844424930132046, "properties": {}}::edge, {"id": 844424930132047, "label": "N", "properties": {"id": 78}}::vertex]::path + [{"id": 844424930132039, "label": "N", "properties": {"id": 70}}::vertex, {"id": 1125899906842671, "label": "KNOWS", "end_id": 844424930132042, "start_id": 844424930132039, "properties": {}}::edge, {"id": 844424930132042, "label": "N", "properties": {"id": 73}}::vertex, {"id": 1125899906842667, "label": "KNOWS", "end_id": 844424930132043, "start_id": 844424930132042, "properties": {}}::edge, {"id": 844424930132043, "label": "N", "properties": {"id": 74}}::vertex, {"id": 1125899906842668, "label": "KNOWS", "end_id": 844424930132044, "start_id": 844424930132043, "properties": {}}::edge, {"id": 844424930132044, "label": "N", "properties": {"id": 75}}::vertex, {"id": 1125899906842676, "label": "KNOWS", "end_id": 844424930132047, "start_id": 844424930132044, "properties": {}}::edge, {"id": 844424930132047, "label": "N", "properties": {"id": 78}}::vertex]::path + [{"id": 844424930132039, "label": "N", "properties": {"id": 70}}::vertex, {"id": 1125899906842671, "label": "KNOWS", "end_id": 844424930132042, "start_id": 844424930132039, "properties": {}}::edge, {"id": 844424930132042, "label": "N", "properties": {"id": 73}}::vertex, {"id": 1125899906842667, "label": "KNOWS", "end_id": 844424930132043, "start_id": 844424930132042, "properties": {}}::edge, {"id": 844424930132043, "label": "N", "properties": {"id": 74}}::vertex, {"id": 1125899906842675, "label": "KNOWS", "end_id": 844424930132046, "start_id": 844424930132043, "properties": {}}::edge, {"id": 844424930132046, "label": "N", "properties": {"id": 77}}::vertex, {"id": 1125899906842670, "label": "KNOWS", "end_id": 844424930132047, "start_id": 844424930132046, "properties": {}}::edge, {"id": 844424930132047, "label": "N", "properties": {"id": 78}}::vertex]::path + [{"id": 844424930132039, "label": "N", "properties": {"id": 70}}::vertex, {"id": 1125899906842671, "label": "KNOWS", "end_id": 844424930132042, "start_id": 844424930132039, "properties": {}}::edge, {"id": 844424930132042, "label": "N", "properties": {"id": 73}}::vertex, {"id": 1125899906842674, "label": "KNOWS", "end_id": 844424930132045, "start_id": 844424930132042, "properties": {}}::edge, {"id": 844424930132045, "label": "N", "properties": {"id": 76}}::vertex, {"id": 1125899906842669, "label": "KNOWS", "end_id": 844424930132046, "start_id": 844424930132045, "properties": {}}::edge, {"id": 844424930132046, "label": "N", "properties": {"id": 77}}::vertex, {"id": 1125899906842670, "label": "KNOWS", "end_id": 844424930132047, "start_id": 844424930132046, "properties": {}}::edge, {"id": 844424930132047, "label": "N", "properties": {"id": 78}}::vertex]::path +(6 rows) + +-- max_hops = 19, one short of the 20-hop route; expected: path_count = 0 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 20}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, NULL, 19::agtype +); + path_count +------------ + 0 +(1 row) + +-- max_hops = 20 admits the full route; expected: path_count = 2 +SELECT count(*) AS path_count +FROM age_all_shortest_paths( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 20}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, NULL, 20::agtype +); + path_count +------------ + 2 +(1 row) + +-- DIRECTED out: 0 -> 95 must traverse 0->96->95; expected: 1 path (length 2) +SELECT path +FROM age_shortest_path( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 95}) RETURN id(n) $$) AS (id agtype)), + NULL, '"out"'::agtype +) AS path +ORDER BY path; + path +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + [{"id": 844424930131969, "label": "N", "properties": {"id": 0}}::vertex, {"id": 1125899906842677, "label": "KNOWS", "end_id": 844424930132065, "start_id": 844424930131969, "properties": {}}::edge, {"id": 844424930132065, "label": "N", "properties": {"id": 96}}::vertex, {"id": 1125899906842678, "label": "KNOWS", "end_id": 844424930132064, "start_id": 844424930132065, "properties": {}}::edge, {"id": 844424930132064, "label": "N", "properties": {"id": 95}}::vertex]::path +(1 row) + +-- UNDIRECTED: 0 .. 95 via the 95->0 back edge; expected: 1 path (length 1) +SELECT path +FROM age_shortest_path( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 95}) RETURN id(n) $$) AS (id agtype)) +) AS path +ORDER BY path; + path +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [{"id": 844424930131969, "label": "N", "properties": {"id": 0}}::vertex, {"id": 1125899906842679, "label": "KNOWS", "end_id": 844424930131969, "start_id": 844424930132064, "properties": {}}::edge, {"id": 844424930132064, "label": "N", "properties": {"id": 95}}::vertex]::path +(1 row) + +-- DIRECTED out: 78 -> 70 against lattice flow; expected: path_count = 0 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 78}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 70}) RETURN id(n) $$) AS (id agtype)), + NULL, '"out"'::agtype +); + path_count +------------ + 0 +(1 row) + +-- UNDIRECTED: 78 .. 70 reverses the lattice; expected: path_count = 6 +SELECT count(*) AS path_count +FROM age_all_shortest_paths( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 78}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 70}) RETURN id(n) $$) AS (id agtype)) +); + path_count +------------ + 6 +(1 row) + +-- isolated id 119 unreachable from 0; expected: path_count = 0 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 119}) RETURN id(n) $$) AS (id agtype)) +); + path_count +------------ + 0 +(1 row) + +-- zero-length path, start == end; expected: path_count = 1 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)) +); + path_count +------------ + 1 +(1 row) + +-- cleanup +SELECT * FROM drop_graph('sp_big', true); +NOTICE: drop cascades to 5 other objects +DETAIL: drop cascades to table sp_big._ag_label_vertex +drop cascades to table sp_big._ag_label_edge +drop cascades to table sp_big."N" +drop cascades to table sp_big."KNOWS" +drop cascades to table sp_big."LIKES" +NOTICE: graph "sp_big" has been dropped + drop_graph +------------ + +(1 row) + +-- +-- Calling the age_* SRFs from inside cypher() (Tier 1). +-- +-- Because the functions are prefixed with age_, the cypher() parser resolves +-- the unqualified names 'shortest_path' and 'all_shortest_paths' to +-- ag_catalog.age_shortest_path / ag_catalog.age_all_shortest_paths, and the +-- graph name is auto-injected as the first argument (like vle/vertex_stats), +-- so callers pass only the bound endpoints. A whole vertex implicitly casts to +-- agtype, so the argument types resolve. The SRFs are set-returning and now +-- work in a cypher RETURN projection (ProjectSet), returning one row per path. +-- +SELECT * FROM create_graph('sp_cy'); +NOTICE: graph "sp_cy" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('sp_cy', $$ + CREATE (a:N {name: 'A'}), + (b:N {name: 'B'}), + (c:N {name: 'C'}), + (a)-[:KNOWS]->(b), + (b)-[:KNOWS]->(c) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- materialize the global graph context +SELECT * FROM cypher('sp_cy', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) + AS (result agtype); + result +----------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "N", "in_degree": 0, "out_degree": 1, "self_loops": 0} + {"id": 844424930131970, "label": "N", "in_degree": 1, "out_degree": 1, "self_loops": 0} + {"id": 844424930131971, "label": "N", "in_degree": 1, "out_degree": 0, "self_loops": 0} +(3 rows) + +-- shortest_path() inside a cypher RETURN; the graph name is auto-injected and +-- the bound vertices are passed; expected: 1 path A..C (length 2) +SELECT * FROM cypher('sp_cy', $$ + MATCH (a:N {name:'A'}), (c:N {name:'C'}) + RETURN shortest_path(a, c) +$$) AS (path agtype); + path +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [{"id": 844424930131969, "label": "N", "properties": {"name": "A"}}::vertex, {"id": 1125899906842625, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {}}::edge, {"id": 844424930131970, "label": "N", "properties": {"name": "B"}}::vertex, {"id": 1125899906842626, "label": "KNOWS", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge, {"id": 844424930131971, "label": "N", "properties": {"name": "C"}}::vertex]::path +(1 row) + +-- all_shortest_paths() inside a cypher RETURN; expected: 1 path A..C (length 2) +SELECT * FROM cypher('sp_cy', $$ + MATCH (a:N {name:'A'}), (c:N {name:'C'}) + RETURN all_shortest_paths(a, c) +$$) AS (path agtype); + path +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [{"id": 844424930131969, "label": "N", "properties": {"name": "A"}}::vertex, {"id": 1125899906842625, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {}}::edge, {"id": 844424930131970, "label": "N", "properties": {"name": "B"}}::vertex, {"id": 1125899906842626, "label": "KNOWS", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge, {"id": 844424930131971, "label": "N", "properties": {"name": "C"}}::vertex]::path +(1 row) + +-- in-cypher with an explicit edge-label filter; expected: 1 path A..C (length 2) +SELECT * FROM cypher('sp_cy', $$ + MATCH (a:N {name:'A'}), (c:N {name:'C'}) + RETURN shortest_path(a, c, 'KNOWS') +$$) AS (path agtype); + path +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [{"id": 844424930131969, "label": "N", "properties": {"name": "A"}}::vertex, {"id": 1125899906842625, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {}}::edge, {"id": 844424930131970, "label": "N", "properties": {"name": "B"}}::vertex, {"id": 1125899906842626, "label": "KNOWS", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge, {"id": 844424930131971, "label": "N", "properties": {"name": "C"}}::vertex]::path +(1 row) + +-- still supported: call the SRF at the top level; expected: 1 path A..C (length 2) +SELECT path +FROM age_shortest_path( + '"sp_cy"'::agtype, + (SELECT id FROM cypher('sp_cy', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_cy', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)) +) AS path +ORDER BY path; + path +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [{"id": 844424930131969, "label": "N", "properties": {"name": "A"}}::vertex, {"id": 1125899906842625, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {}}::edge, {"id": 844424930131970, "label": "N", "properties": {"name": "B"}}::vertex, {"id": 1125899906842626, "label": "KNOWS", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge, {"id": 844424930131971, "label": "N", "properties": {"name": "C"}}::vertex]::path +(1 row) + +-- cleanup +SELECT * FROM drop_graph('sp_cy', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table sp_cy._ag_label_vertex +drop cascades to table sp_cy._ag_label_edge +drop cascades to table sp_cy."N" +drop cascades to table sp_cy."KNOWS" +NOTICE: graph "sp_cy" has been dropped + drop_graph +------------ + +(1 row) + +-- +-- Edge cases: parallel/multi-edges, self-loops, unknown edge labels, +-- max_hops boundaries (0 and negative), explicit 'any' direction, and +-- NULL / unknown-graph argument errors. +-- +SELECT * FROM create_graph('sp_edge'); +NOTICE: graph "sp_edge" has been created + create_graph +-------------- + +(1 row) + +-- A and B are connected by TWO parallel KNOWS edges plus one LIKES edge. +-- B->C is a single KNOWS edge. S has a self-loop. These exercise the +-- multi-predecessor (parallel edge) logic and the label filter. +SELECT * FROM cypher('sp_edge', $$ + CREATE (a:N {name: 'A'}), + (b:N {name: 'B'}), + (c:N {name: 'C'}), + (s:N {name: 'S'}), + (a)-[:KNOWS]->(b), + (a)-[:KNOWS]->(b), + (a)-[:LIKES]->(b), + (b)-[:KNOWS]->(c), + (s)-[:KNOWS]->(s) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- materialize the global graph context +SELECT * FROM cypher('sp_edge', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) + AS (result agtype); + result +----------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "N", "in_degree": 0, "out_degree": 3, "self_loops": 0} + {"id": 844424930131970, "label": "N", "in_degree": 3, "out_degree": 1, "self_loops": 0} + {"id": 844424930131971, "label": "N", "in_degree": 1, "out_degree": 0, "self_loops": 0} + {"id": 844424930131972, "label": "N", "in_degree": 1, "out_degree": 1, "self_loops": 1} +(4 rows) + +-- parallel edges: two distinct KNOWS edges A->B are two distinct shortest +-- paths; expected count 2 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype); + count +------- + 2 +(1 row) + +-- no label filter: 2 KNOWS + 1 LIKES edge A->B are three distinct shortest +-- paths; expected count 3 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype)), + NULL::agtype, '"out"'::agtype); + count +------- + 3 +(1 row) + +-- single shortest path A->B picks exactly one of the parallel edges; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype))); + count +------- + 1 +(1 row) + +-- self-loop: a vertex with an edge to itself yields only the zero-length +-- path for start == end (the self-loop is never used); count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'S'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'S'}) RETURN id(n) $$) AS (id agtype))); + count +------- + 1 +(1 row) + +-- all_shortest_paths with start == end (existing vertex): one zero-length +-- path; count 1 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'S'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'S'}) RETURN id(n) $$) AS (id agtype))); + count +------- + 1 +(1 row) + +-- unknown relationship type matches no edges: A..C filtered by a label that +-- does not exist must return no path (NOT silently fall back to all edges); +-- count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"NOSUCHLABEL"'::agtype, '"out"'::agtype); + count +------- + 0 +(1 row) + +SELECT count(*) FROM age_all_shortest_paths( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"NOSUCHLABEL"'::agtype, '"out"'::agtype); + count +------- + 0 +(1 row) + +-- the zero-length (start == end) path has no edges, so an unknown label +-- still matches it; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'S'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'S'}) RETURN id(n) $$) AS (id agtype)), + '"NOSUCHLABEL"'::agtype, '"out"'::agtype); + count +------- + 1 +(1 row) + +-- existing label that does not connect the endpoints: LIKES only exists on +-- A->B, so A..C filtered by LIKES is unreachable; count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"LIKES"'::agtype, '"out"'::agtype); + count +------- + 0 +(1 row) + +-- max_hops = 0 with start == end: the zero-length path is still returned; +-- count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + NULL::agtype, NULL::agtype, NULL::agtype, '0'::agtype); + count +------- + 1 +(1 row) + +-- max_hops = 0 with adjacent distinct endpoints: no path within zero hops; +-- count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype)), + NULL::agtype, NULL::agtype, NULL::agtype, '0'::agtype); + count +------- + 0 +(1 row) + +-- negative max_hops is treated as unbounded: A..C (length 2) is found; +-- count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + NULL::agtype, NULL::agtype, NULL::agtype, '-1'::agtype); + count +------- + 1 +(1 row) + +-- explicit 'any' direction string (vs the default NULL == undirected); +-- two parallel KNOWS edges A->B give two shortest paths; count 2 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"any"'::agtype); + count +------- + 2 +(1 row) + +-- NULL start (or end) vertex yields no rows (Cypher null semantics: a null +-- endpoint simply produces no match, it is not an error); count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + NULL::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype))); + count +------- + 0 +(1 row) + +-- NULL end vertex likewise yields no rows; count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + NULL::agtype); + count +------- + 0 +(1 row) + +-- all_shortest_paths with a NULL endpoint also yields no rows; count 0 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + NULL::agtype); + count +------- + 0 +(1 row) + +-- a single relationship type may be passed as a one-element array; expected: +-- same as the bare-string form, A..C under KNOWS (length 2); count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '["KNOWS"]'::agtype, '"out"'::agtype); + count +------- + 1 +(1 row) + +-- multiple relationship types are not yet supported; expected: ERROR +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '["KNOWS", "LIKES"]'::agtype, '"out"'::agtype); +ERROR: age_shortest_path: multiple relationship types are not yet supported +-- a non-zero minimum hop count is not yet supported; expected: ERROR +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype); +ERROR: age_shortest_path: a minimum hop count is not yet supported +-- a minimum hop count of 0 is the default and is accepted; A..C (length 2); +-- count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 0::agtype); + count +------- + 1 +(1 row) + +-- a graph name that does not exist is an error +SELECT count(*) FROM age_shortest_path('"no_such_graph"'::agtype, '1'::agtype, '2'::agtype); +ERROR: schema "no_such_graph" does not exist +-- cleanup +SELECT * FROM drop_graph('sp_edge', true); +NOTICE: drop cascades to 5 other objects +DETAIL: drop cascades to table sp_edge._ag_label_vertex +drop cascades to table sp_edge._ag_label_edge +drop cascades to table sp_edge."N" +drop cascades to table sp_edge."KNOWS" +drop cascades to table sp_edge."LIKES" +NOTICE: graph "sp_edge" has been dropped + drop_graph +------------ + +(1 row) + diff --git a/regress/sql/age_shortest_path.sql b/regress/sql/age_shortest_path.sql new file mode 100644 index 000000000..82b4d66bb --- /dev/null +++ b/regress/sql/age_shortest_path.sql @@ -0,0 +1,630 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +LOAD 'age'; +SET search_path TO ag_catalog; + +-- +-- age_shortest_path / age_all_shortest_paths +-- + +SELECT * FROM create_graph('sp_graph'); + +-- Build a small deterministic graph: +-- +-- A +-- / \ +-- B C (A->B, A->C, B->D, C->D : two shortest A..D paths) +-- \ / +-- D +-- | +-- E (D->E : unique 3-hop path A..E) +-- +-- Z (isolated, unreachable) +-- +SELECT * FROM cypher('sp_graph', $$ + CREATE (a:Person {name: 'A'}), + (b:Person {name: 'B'}), + (c:Person {name: 'C'}), + (d:Person {name: 'D'}), + (e:Person {name: 'E'}), + (z:Person {name: 'Z'}), + (a)-[:KNOWS]->(b), + (a)-[:KNOWS]->(c), + (b)-[:KNOWS]->(d), + (c)-[:KNOWS]->(d), + (d)-[:KNOWS]->(e) +$$) AS (result agtype); + +-- materialize the global graph context +SELECT * FROM cypher('sp_graph', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) + AS (result agtype); + +-- A -> D shortest path (length 2); expected: path_count = 1 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)) +); + +-- all shortest A -> D; expected: 2 paths (A-B-D and A-C-D), each length 2 +SELECT path +FROM age_all_shortest_paths( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)) +) AS path +ORDER BY path; + +-- A -> E unique 3-hop path; expected: path_count = 1 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'E'}) RETURN id(n) $$) AS (id agtype)) +); + +-- A -> E with max_hops = 2; expected: path_count = 0 (E is 3 hops away) +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'E'}) RETURN id(n) $$) AS (id agtype)), + NULL, NULL, NULL, 2::agtype +); + +-- zero-length path, start == end; expected: path_count = 1 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)) +); + +-- unreachable vertex Z; expected: path_count = 0 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'Z'}) RETURN id(n) $$) AS (id agtype)) +); + +-- direction 'in': D -> A traversing edges backwards; expected: path_count = 2 +SELECT count(*) AS path_count +FROM age_all_shortest_paths( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + NULL, '"in"'::agtype +); + +-- direction 'out': D -> A not reachable forwards; expected: path_count = 0 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + NULL, '"out"'::agtype +); + +-- label filter 'KNOWS': A -> D still found; expected: path_count = 1 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype +); + +-- error: invalid direction string; expected: ERROR (must be 'out', 'in', or 'any') +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + NULL, '"sideways"'::agtype +); + +-- error: start argument is neither a vertex nor an integer id; expected: ERROR +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + '"not_a_vertex"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)) +); + +-- +-- Non-existent endpoint guards. These must NOT crash the backend and must +-- return no rows (a path can only exist between vertices in the graph). +-- Previously, start == end on a non-existent vertex id was matched at BFS +-- depth 0 and path reconstruction dereferenced a missing vertex, crashing +-- the server. +-- + +-- start == end on a non-existent integer id; expected: path_count = 0 +SELECT count(*) AS path_count +FROM age_shortest_path('"sp_graph"'::agtype, 999999::agtype, 999999::agtype); + +-- existing start -> non-existent end; expected: path_count = 0 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + 999999::agtype +); + +-- non-existent start -> existing end; expected: path_count = 0 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + 999999::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)) +); + +-- all-shortest-paths with start == end non-existent; expected: 0 rows +SELECT count(*) AS path_count +FROM age_all_shortest_paths('"sp_graph"'::agtype, 999999::agtype, 999999::agtype); + +-- cleanup +SELECT * FROM drop_graph('sp_graph', true); + + +-- +-- Empty graph: a graph that exists but has no vertices must return no rows +-- (and must not hang or crash) for any endpoint query. +-- +SELECT * FROM create_graph('sp_empty'); +SELECT count(*) AS path_count +FROM age_shortest_path('"sp_empty"'::agtype, 0::agtype, 1::agtype); +SELECT count(*) AS path_count +FROM age_all_shortest_paths('"sp_empty"'::agtype, 0::agtype, 0::agtype); +SELECT * FROM drop_graph('sp_empty', true); + + + +-- +-- A large, programmatically generated graph (120 nodes) exercising long +-- shortest paths (length up to 20), high-multiplicity all-shortest-paths, +-- label filtering, and directed vs. undirected reachability. +-- +-- Nodes: (:N {id: 0..119}). Structures built on top of them: +-- +-- * Main chain 0 -> 1 -> ... -> 20 (unique 20-hop path) +-- * Alternate chain 0 -> 50 -> 51 -> ... -> 68 -> 20 +-- (a second, disjoint 20-hop path 0..20) +-- => all-shortest-paths 0..20 under KNOWS = 2 paths of length 20 +-- * 3x3 lattice on ids 70..78, id = 70 + 3*row + col, edges go right +-- (id->id+1) and down (id->id+3). Monotone 70..78 paths: +-- => all-shortest-paths 70..78 = C(4,2) = 6 paths of length 4 +-- * LIKES shortcut 0 -[:LIKES]-> 20 (1 hop; only visible when the edge +-- label filter is NOT restricted to KNOWS) +-- * Back-edge triangle 0 -> 96 -> 95 -> 0 +-- => directed 0->95 = 2 hops (0-96-95); undirected 0..95 = 1 hop +-- * Many unused ids (e.g. 119) remain isolated / unreachable. +-- +SELECT * FROM create_graph('sp_big'); + +-- 120 vertices, ids 0..119 +SELECT * FROM cypher('sp_big', $$ + UNWIND range(0, 119) AS i CREATE (:N {id: i}) +$$) AS (result agtype); + +-- main chain 0->1->...->20 (KNOWS) +SELECT * FROM cypher('sp_big', $$ + UNWIND range(0, 19) AS i + MATCH (a:N {id: i}), (b:N {id: i + 1}) + CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); + +-- alternate, disjoint 20-hop path 0->50->51->...->68->20 (KNOWS) +SELECT * FROM cypher('sp_big', $$ + MATCH (a:N {id: 0}), (b:N {id: 50}) CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); +SELECT * FROM cypher('sp_big', $$ + UNWIND range(50, 67) AS i + MATCH (a:N {id: i}), (b:N {id: i + 1}) + CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); +SELECT * FROM cypher('sp_big', $$ + MATCH (a:N {id: 68}), (b:N {id: 20}) CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); + +-- 3x3 lattice on ids 70..78: right edges (id -> id+1) +SELECT * FROM cypher('sp_big', $$ + UNWIND [0, 1, 2] AS r + UNWIND [0, 1] AS c + MATCH (a:N {id: 70 + 3 * r + c}), (b:N {id: 70 + 3 * r + c + 1}) + CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); + +-- 3x3 lattice: down edges (id -> id+3) +SELECT * FROM cypher('sp_big', $$ + UNWIND [0, 1] AS r + UNWIND [0, 1, 2] AS c + MATCH (a:N {id: 70 + 3 * r + c}), (b:N {id: 70 + 3 * (r + 1) + c}) + CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); + +-- back-edge triangle 0 -> 96 -> 95 -> 0 (KNOWS) +SELECT * FROM cypher('sp_big', $$ + MATCH (a:N {id: 0}), (b:N {id: 96}) CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); +SELECT * FROM cypher('sp_big', $$ + MATCH (a:N {id: 96}), (b:N {id: 95}) CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); +SELECT * FROM cypher('sp_big', $$ + MATCH (a:N {id: 95}), (b:N {id: 0}) CREATE (a)-[:KNOWS]->(b) +$$) AS (result agtype); + +-- labelled shortcut 0 -[:LIKES]-> 20 +SELECT * FROM cypher('sp_big', $$ + MATCH (a:N {id: 0}), (b:N {id: 20}) CREATE (a)-[:LIKES]->(b) +$$) AS (result agtype); + +-- sanity: vertex count (also materializes the global context); expected: count = 120 +SELECT * FROM cypher('sp_big', $$ MATCH (n) RETURN count(n) $$) AS (n agtype); + +-- all shortest 0 -> 20 under KNOWS (main chain + disjoint alternate); +-- expected: 2 paths, each exactly 20 hops +SELECT path +FROM age_all_shortest_paths( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 20}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype +) AS path +ORDER BY path; + +-- any label: the LIKES shortcut collapses 0 -> 20; expected: path_count = 1 +SELECT count(*) AS path_count +FROM age_all_shortest_paths( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 20}) RETURN id(n) $$) AS (id agtype)), + NULL, '"out"'::agtype +); + +-- all shortest 70 -> 78 across the 3x3 lattice; expected: path_count = 6 (C(4,2)) +SELECT count(*) AS path_count +FROM age_all_shortest_paths( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 70}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 78}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype +); + +-- the lattice paths listed; expected: 6 paths, each 4 hops +SELECT path +FROM age_all_shortest_paths( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 70}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 78}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype +) AS path +ORDER BY path; + +-- max_hops = 19, one short of the 20-hop route; expected: path_count = 0 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 20}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, NULL, 19::agtype +); + +-- max_hops = 20 admits the full route; expected: path_count = 2 +SELECT count(*) AS path_count +FROM age_all_shortest_paths( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 20}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, NULL, 20::agtype +); + +-- DIRECTED out: 0 -> 95 must traverse 0->96->95; expected: 1 path (length 2) +SELECT path +FROM age_shortest_path( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 95}) RETURN id(n) $$) AS (id agtype)), + NULL, '"out"'::agtype +) AS path +ORDER BY path; + +-- UNDIRECTED: 0 .. 95 via the 95->0 back edge; expected: 1 path (length 1) +SELECT path +FROM age_shortest_path( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 95}) RETURN id(n) $$) AS (id agtype)) +) AS path +ORDER BY path; + +-- DIRECTED out: 78 -> 70 against lattice flow; expected: path_count = 0 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 78}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 70}) RETURN id(n) $$) AS (id agtype)), + NULL, '"out"'::agtype +); + +-- UNDIRECTED: 78 .. 70 reverses the lattice; expected: path_count = 6 +SELECT count(*) AS path_count +FROM age_all_shortest_paths( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 78}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 70}) RETURN id(n) $$) AS (id agtype)) +); + +-- isolated id 119 unreachable from 0; expected: path_count = 0 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 119}) RETURN id(n) $$) AS (id agtype)) +); + +-- zero-length path, start == end; expected: path_count = 1 +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_big"'::agtype, + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_big', $$ MATCH (n:N {id: 0}) RETURN id(n) $$) AS (id agtype)) +); + +-- cleanup +SELECT * FROM drop_graph('sp_big', true); + +-- +-- Calling the age_* SRFs from inside cypher() (Tier 1). +-- +-- Because the functions are prefixed with age_, the cypher() parser resolves +-- the unqualified names 'shortest_path' and 'all_shortest_paths' to +-- ag_catalog.age_shortest_path / ag_catalog.age_all_shortest_paths, and the +-- graph name is auto-injected as the first argument (like vle/vertex_stats), +-- so callers pass only the bound endpoints. A whole vertex implicitly casts to +-- agtype, so the argument types resolve. The SRFs are set-returning and now +-- work in a cypher RETURN projection (ProjectSet), returning one row per path. +-- +SELECT * FROM create_graph('sp_cy'); + +SELECT * FROM cypher('sp_cy', $$ + CREATE (a:N {name: 'A'}), + (b:N {name: 'B'}), + (c:N {name: 'C'}), + (a)-[:KNOWS]->(b), + (b)-[:KNOWS]->(c) +$$) AS (result agtype); + +-- materialize the global graph context +SELECT * FROM cypher('sp_cy', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) + AS (result agtype); + +-- shortest_path() inside a cypher RETURN; the graph name is auto-injected and +-- the bound vertices are passed; expected: 1 path A..C (length 2) +SELECT * FROM cypher('sp_cy', $$ + MATCH (a:N {name:'A'}), (c:N {name:'C'}) + RETURN shortest_path(a, c) +$$) AS (path agtype); + +-- all_shortest_paths() inside a cypher RETURN; expected: 1 path A..C (length 2) +SELECT * FROM cypher('sp_cy', $$ + MATCH (a:N {name:'A'}), (c:N {name:'C'}) + RETURN all_shortest_paths(a, c) +$$) AS (path agtype); + +-- in-cypher with an explicit edge-label filter; expected: 1 path A..C (length 2) +SELECT * FROM cypher('sp_cy', $$ + MATCH (a:N {name:'A'}), (c:N {name:'C'}) + RETURN shortest_path(a, c, 'KNOWS') +$$) AS (path agtype); + +-- still supported: call the SRF at the top level; expected: 1 path A..C (length 2) +SELECT path +FROM age_shortest_path( + '"sp_cy"'::agtype, + (SELECT id FROM cypher('sp_cy', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_cy', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)) +) AS path +ORDER BY path; + +-- cleanup +SELECT * FROM drop_graph('sp_cy', true); + +-- +-- Edge cases: parallel/multi-edges, self-loops, unknown edge labels, +-- max_hops boundaries (0 and negative), explicit 'any' direction, and +-- NULL / unknown-graph argument errors. +-- +SELECT * FROM create_graph('sp_edge'); + +-- A and B are connected by TWO parallel KNOWS edges plus one LIKES edge. +-- B->C is a single KNOWS edge. S has a self-loop. These exercise the +-- multi-predecessor (parallel edge) logic and the label filter. +SELECT * FROM cypher('sp_edge', $$ + CREATE (a:N {name: 'A'}), + (b:N {name: 'B'}), + (c:N {name: 'C'}), + (s:N {name: 'S'}), + (a)-[:KNOWS]->(b), + (a)-[:KNOWS]->(b), + (a)-[:LIKES]->(b), + (b)-[:KNOWS]->(c), + (s)-[:KNOWS]->(s) +$$) AS (result agtype); + +-- materialize the global graph context +SELECT * FROM cypher('sp_edge', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) + AS (result agtype); + +-- parallel edges: two distinct KNOWS edges A->B are two distinct shortest +-- paths; expected count 2 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype); + +-- no label filter: 2 KNOWS + 1 LIKES edge A->B are three distinct shortest +-- paths; expected count 3 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype)), + NULL::agtype, '"out"'::agtype); + +-- single shortest path A->B picks exactly one of the parallel edges; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype))); + +-- self-loop: a vertex with an edge to itself yields only the zero-length +-- path for start == end (the self-loop is never used); count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'S'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'S'}) RETURN id(n) $$) AS (id agtype))); + +-- all_shortest_paths with start == end (existing vertex): one zero-length +-- path; count 1 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'S'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'S'}) RETURN id(n) $$) AS (id agtype))); + +-- unknown relationship type matches no edges: A..C filtered by a label that +-- does not exist must return no path (NOT silently fall back to all edges); +-- count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"NOSUCHLABEL"'::agtype, '"out"'::agtype); + +SELECT count(*) FROM age_all_shortest_paths( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"NOSUCHLABEL"'::agtype, '"out"'::agtype); + +-- the zero-length (start == end) path has no edges, so an unknown label +-- still matches it; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'S'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'S'}) RETURN id(n) $$) AS (id agtype)), + '"NOSUCHLABEL"'::agtype, '"out"'::agtype); + +-- existing label that does not connect the endpoints: LIKES only exists on +-- A->B, so A..C filtered by LIKES is unreachable; count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"LIKES"'::agtype, '"out"'::agtype); + +-- max_hops = 0 with start == end: the zero-length path is still returned; +-- count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + NULL::agtype, NULL::agtype, NULL::agtype, '0'::agtype); + +-- max_hops = 0 with adjacent distinct endpoints: no path within zero hops; +-- count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype)), + NULL::agtype, NULL::agtype, NULL::agtype, '0'::agtype); + +-- negative max_hops is treated as unbounded: A..C (length 2) is found; +-- count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + NULL::agtype, NULL::agtype, NULL::agtype, '-1'::agtype); + +-- explicit 'any' direction string (vs the default NULL == undirected); +-- two parallel KNOWS edges A->B give two shortest paths; count 2 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"any"'::agtype); + +-- NULL start (or end) vertex yields no rows (Cypher null semantics: a null +-- endpoint simply produces no match, it is not an error); count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + NULL::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype))); + +-- NULL end vertex likewise yields no rows; count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + NULL::agtype); + +-- all_shortest_paths with a NULL endpoint also yields no rows; count 0 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + NULL::agtype); + +-- a single relationship type may be passed as a one-element array; expected: +-- same as the bare-string form, A..C under KNOWS (length 2); count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '["KNOWS"]'::agtype, '"out"'::agtype); + +-- multiple relationship types are not yet supported; expected: ERROR +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '["KNOWS", "LIKES"]'::agtype, '"out"'::agtype); + +-- a non-zero minimum hop count is not yet supported; expected: ERROR +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype); + +-- a minimum hop count of 0 is the default and is accepted; A..C (length 2); +-- count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 0::agtype); + +-- a graph name that does not exist is an error +SELECT count(*) FROM age_shortest_path('"no_such_graph"'::agtype, '1'::agtype, '2'::agtype); + +-- cleanup +SELECT * FROM drop_graph('sp_edge', true); diff --git a/sql/agtype_typecast.sql b/sql/agtype_typecast.sql index abca5e518..f12f215f6 100644 --- a/sql/agtype_typecast.sql +++ b/sql/agtype_typecast.sql @@ -98,6 +98,37 @@ CALLED ON NULL INPUT PARALLEL UNSAFE -- might be safe AS 'MODULE_PATHNAME'; +-- Unweighted (hop-count) shortest path between two vertices, computed over the +-- cached global graph adjacency via BFS. Returns a single path (0 or 1 rows). +-- Argument order mirrors the Cypher shortestPath() pattern +-- (a)-[:type*min_hops..max_hops]->(b): +-- (graph_name, start, end, edge_types, direction, min_hops, max_hops) +CREATE FUNCTION ag_catalog.age_shortest_path(IN agtype, IN agtype, IN agtype, + IN agtype DEFAULT NULL, + IN agtype DEFAULT NULL, + IN agtype DEFAULT NULL, + IN agtype DEFAULT NULL) + RETURNS SETOF agtype +LANGUAGE C +STABLE +CALLED ON NULL INPUT +PARALLEL UNSAFE +AS 'MODULE_PATHNAME'; + +-- All unweighted shortest paths between two vertices (one path per row). +-- Same argument order as age_shortest_path. +CREATE FUNCTION ag_catalog.age_all_shortest_paths(IN agtype, IN agtype, IN agtype, + IN agtype DEFAULT NULL, + IN agtype DEFAULT NULL, + IN agtype DEFAULT NULL, + IN agtype DEFAULT NULL) + RETURNS SETOF agtype +LANGUAGE C +STABLE +CALLED ON NULL INPUT +PARALLEL UNSAFE +AS 'MODULE_PATHNAME'; + -- function to build an edge for a VLE match CREATE FUNCTION ag_catalog.age_build_vle_match_edge(agtype, agtype) RETURNS agtype diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index dcd31b9df..5ac9dea65 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -2785,6 +2785,7 @@ static Query *transform_cypher_return(cypher_parsestate *cpstate, query->jointree = makeFromExpr(pstate->p_joinlist, NULL); query->hasAggs = pstate->p_hasAggs; query->hasSubLinks = pstate->p_hasSubLinks; + query->hasTargetSRFs = pstate->p_hasTargetSRFs; assign_query_collations(pstate, query); diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c index e62f2c6e7..7e4f44600 100644 --- a/src/backend/parser/cypher_expr.c +++ b/src/backend/parser/cypher_expr.c @@ -2265,16 +2265,19 @@ static Node *transform_FuncCall(cypher_parsestate *cpstate, FuncCall *fn) fname = list_make2(makeString("ag_catalog"), makeString(ag_name)); /* - * Currently 3 functions need the graph name passed in as the first - * argument - in addition to the other arguments: startNode, endNode, - * and vle. So, check for those 3 functions here and that the arg list - * is not empty. Then prepend the graph name if necessary. + * Currently these functions need the graph name passed in as the + * first argument - in addition to the other arguments: startNode, + * endNode, vle, vertex_stats, shortest_path, and all_shortest_paths. + * So, check for those functions here and that the arg list is not + * empty. Then prepend the graph name if necessary. */ if ((list_length(targs) != 0) && (strcasecmp("startNode", name) == 0 || strcasecmp("endNode", name) == 0 || strcasecmp("vle", name) == 0 || - strcasecmp("vertex_stats", name) == 0)) + strcasecmp("vertex_stats", name) == 0 || + strcasecmp("shortest_path", name) == 0 || + strcasecmp("all_shortest_paths", name) == 0)) { char *graph_name = cpstate->graph_name; Datum d = string_to_agtype(graph_name); diff --git a/src/backend/utils/adt/age_vle.c b/src/backend/utils/adt/age_vle.c index 9e433b9e2..804d9e17e 100644 --- a/src/backend/utils/adt/age_vle.c +++ b/src/backend/utils/adt/age_vle.c @@ -65,6 +65,8 @@ #include "common/hashfn.h" #include "funcapi.h" +#include "miscadmin.h" +#include "nodes/pg_list.h" #include "utils/datum.h" #include "utils/lsyscache.h" @@ -1065,6 +1067,14 @@ static bool dfs_find_a_path_between(VLE_local_context *vlelctx) bool found = false; uint32 edge_hashvalue; + /* + * Allow this traversal to be cancelled (e.g. by a user Ctrl-C or a + * statement_timeout). On a large or densely connected graph this DFS + * can run for a long time, so we must yield to interrupt processing + * on every iteration. + */ + CHECK_FOR_INTERRUPTS(); + /* get an edge, but leave it on the stack for now */ edge_id = gid_stack_peek(edge_stack); /* @@ -1200,6 +1210,14 @@ static bool dfs_find_a_path_from(VLE_local_context *vlelctx) bool found = false; uint32 edge_hashvalue; + /* + * Allow this traversal to be cancelled (e.g. by a user Ctrl-C or a + * statement_timeout). On a large or densely connected graph this DFS + * can run for a long time, so we must yield to interrupt processing + * on every iteration. + */ + CHECK_FOR_INTERRUPTS(); + /* get an edge, but leave it on the stack for now */ edge_id = gid_stack_peek(edge_stack); /* @@ -2774,3 +2792,781 @@ Datum _ag_enforce_edge_uniqueness(PG_FUNCTION_ARGS) hash_destroy(exists_hash); PG_RETURN_BOOL(true); } + +/* + * --------------------------------------------------------------------------- + * Shortest path / all shortest paths + * --------------------------------------------------------------------------- + * + * Plain (non-grammar) set-returning functions that compute the unweighted + * (hop-count) shortest path between two vertices, built directly on top of the + * cached global graph (GRAPH_global_context) and its flat-array adjacency + * (VertexEdgeArray). These do NOT go through the VLE grammar/transform path; + * they are user-callable helpers: + * + * ag_catalog.age_shortest_path(graph, start, end + * [, edge_types [, direction [, min_hops [, max_hops]]]]) + * ag_catalog.age_all_shortest_paths(graph, start, end + * [, edge_types [, direction [, min_hops [, max_hops]]]]) + * + * Both perform a breadth-first search from the start vertex. age_shortest_path + * returns a single path (0 or 1 rows); age_all_shortest_paths returns every + * path whose length equals the minimum hop count (one row per path), by + * recording a predecessor multiset during the BFS and enumerating the + * resulting shortest-path DAG. + * + * Because BFS depth strictly increases, every emitted path is simple (no + * repeated vertex and therefore no repeated edge), satisfying openCypher + * edge-isomorphism for these fixed-length results. + */ + +/* Simple FIFO queue of graphids for the BFS frontier. */ +typedef struct sp_queue +{ + graphid *data; + int64 head; + int64 tail; + int64 cap; +} sp_queue; + +/* One predecessor edge on a shortest path (all-shortest-paths mode). */ +typedef struct sp_pred +{ + graphid edge; + graphid parent_vertex; +} sp_pred; + +/* Per-vertex BFS bookkeeping, keyed by vertex_id in the visited hashtable. */ +typedef struct sp_visit_entry +{ + graphid vertex_id; /* hash key — must be first */ + int64 depth; /* BFS depth from the source vertex */ + graphid parent_edge; /* single-path reconstruction */ + graphid parent_vertex; /* single-path reconstruction */ + List *preds; /* sp_pred * list for all-shortest-paths mode */ +} sp_visit_entry; + +/* Cross-call SRF state: the precomputed result paths streamed one per call. */ +typedef struct sp_srf_state +{ + Datum *paths; + int64 npaths; + int64 next; +} sp_srf_state; + +static void sp_queue_init(sp_queue *q) +{ + q->cap = 1024; + q->head = 0; + q->tail = 0; + q->data = palloc(sizeof(graphid) * q->cap); +} + +static void sp_queue_push(sp_queue *q, graphid v) +{ + if (q->tail == q->cap) + { + q->cap = q->cap * 2; + q->data = repalloc(q->data, sizeof(graphid) * q->cap); + } + q->data[q->tail] = v; + q->tail = q->tail + 1; +} + +static bool sp_queue_is_empty(sp_queue *q) +{ + return q->head == q->tail; +} + +static graphid sp_queue_pop(sp_queue *q) +{ + graphid v = q->data[q->head]; + + q->head = q->head + 1; + return v; +} + +/* Resolve a vertex argument (a vertex agtype or an integer id) to a graphid. */ +static graphid sp_agtype_to_graphid(agtype *agt, const char *argname) +{ + agtype_value *agtv = NULL; + + agtv = get_agtype_value("age_shortest_path", agt, AGTV_VERTEX, false); + + if (agtv != NULL && agtv->type == AGTV_VERTEX) + { + agtv = GET_AGTYPE_VALUE_OBJECT_VALUE(agtv, "id"); + } + else if (agtv == NULL || agtv->type != AGTV_INTEGER) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("%s argument must be a vertex or the integer id", + argname))); + } + + return agtv->val.int_value; +} + +/* Resolve the optional direction argument; NULL defaults to undirected. */ +static cypher_rel_dir sp_agtype_to_direction(agtype *agt) +{ + agtype_value *agtv = NULL; + char *s = NULL; + cypher_rel_dir dir = CYPHER_REL_DIR_NONE; + + if (agt == NULL) + { + return CYPHER_REL_DIR_NONE; + } + + agtv = get_agtype_value("age_shortest_path", agt, AGTV_STRING, true); + s = pnstrdup(agtv->val.string.val, agtv->val.string.len); + + if (pg_strcasecmp(s, "out") == 0) + { + dir = CYPHER_REL_DIR_RIGHT; + } + else if (pg_strcasecmp(s, "in") == 0) + { + dir = CYPHER_REL_DIR_LEFT; + } + else if (pg_strcasecmp(s, "any") == 0) + { + dir = CYPHER_REL_DIR_NONE; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("direction argument must be one of 'out', 'in', or 'any'"))); + } + + pfree_if_not_null(s); + return dir; +} + +/* + * Wrap an interleaved [vertex, edge, vertex, ... , vertex] graphid array in a + * VLE_path_container and materialize it as an AGTV_PATH agtype Datum. + */ +static Datum sp_build_path_datum(Oid graph_oid, graphid *alt, int64 alt_len) +{ + VLE_path_container *vpc = NULL; + graphid *arr = NULL; + agtype_value *agtv_path = NULL; + agtype *agt = NULL; + + vpc = create_VLE_path_container(alt_len); + vpc->graph_oid = graph_oid; + + arr = GET_GRAPHID_ARRAY_FROM_CONTAINER(vpc); + memcpy(arr, alt, sizeof(graphid) * alt_len); + + vpc->start_vid = alt[0]; + vpc->end_vid = alt[alt_len - 1]; + + agtv_path = build_path(vpc); + agt = agtype_value_to_agtype(agtv_path); + + return AGTYPE_P_GET_DATUM(agt); +} + +/* + * Breadth-first search from source toward target over the flat-array + * adjacency. Returns the visited hashtable; sets *out_found and (if found) + * *out_target_depth (the shortest hop count). In all-shortest-paths mode + * (collect_all) every shortest-path predecessor is recorded per vertex. + */ +static HTAB *sp_run_bfs(GRAPH_global_context *ggctx, graphid source, + graphid target, bool filter_edges, Oid edge_label_oid, + cypher_rel_dir dir, int64 max_hops, bool collect_all, + int64 *out_target_depth, bool *out_found) +{ + HASHCTL ctl; + HTAB *visited = NULL; + sp_queue q; + sp_visit_entry *se = NULL; + bool found = false; + int64 target_depth = -1; + bool dir_out = (dir == CYPHER_REL_DIR_RIGHT || dir == CYPHER_REL_DIR_NONE); + bool dir_in = (dir == CYPHER_REL_DIR_LEFT || dir == CYPHER_REL_DIR_NONE); + + /* visited hashtable: graphid -> sp_visit_entry */ + MemSet(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(int64); + ctl.entrysize = sizeof(sp_visit_entry); + ctl.hash = graphid_hash; + visited = hash_create("age shortest path visited", 1024, &ctl, + HASH_ELEM | HASH_FUNCTION); + + /* + * A path can only exist between vertices that actually exist in the graph. + * If either endpoint is missing we are done: report "not found" and return + * the (empty) visited table. This guard is critical: without it a source + * that equals a non-existent target would be matched at depth 0 (see the + * "u == target" check below), and path reconstruction would then try to + * materialize a vertex that does not exist, dereferencing invalid memory + * and crashing the backend. + */ + if (get_vertex_entry(ggctx, source) == NULL || + get_vertex_entry(ggctx, target) == NULL) + { + *out_target_depth = -1; + *out_found = false; + return visited; + } + + sp_queue_init(&q); + + /* seed the frontier with the source vertex at depth 0 */ + se = (sp_visit_entry *) hash_search(visited, &source, HASH_ENTER, NULL); + se->vertex_id = source; + se->depth = 0; + se->parent_edge = 0; + se->parent_vertex = source; + se->preds = NIL; + sp_queue_push(&q, source); + + while (!sp_queue_is_empty(&q)) + { + graphid u = sp_queue_pop(&q); + sp_visit_entry *ue = NULL; + vertex_entry *ve = NULL; + int64 du = 0; + int pass = 0; + + /* + * Allow this search to be cancelled (e.g. by a user Ctrl-C or a + * statement_timeout). On a large graph the BFS frontier can grow very + * large, so we must yield to interrupt processing on every iteration. + */ + CHECK_FOR_INTERRUPTS(); + + ue = (sp_visit_entry *) hash_search(visited, &u, HASH_FIND, NULL); + du = ue->depth; + + /* target reached: record its (shortest) depth */ + if (u == target) + { + found = true; + if (target_depth < 0) + { + target_depth = du; + } + /* single-path mode: the first discovery is sufficient */ + if (!collect_all) + { + break; + } + } + + /* never expand at or beyond the shortest target depth */ + if (target_depth >= 0 && du >= target_depth) + { + continue; + } + + /* respect the optional upper hop bound */ + if (max_hops >= 0 && du >= max_hops) + { + continue; + } + + ve = get_vertex_entry(ggctx, u); + if (ve == NULL) + { + continue; + } + + /* pass 0 = outgoing edges, pass 1 = incoming edges */ + for (pass = 0; pass < 2; pass++) + { + VertexEdgeArray *edges = NULL; + int32 i = 0; + + if (pass == 0) + { + if (!dir_out) + { + continue; + } + edges = get_vertex_entry_edges_out_array(ve); + } + else + { + if (!dir_in) + { + continue; + } + edges = get_vertex_entry_edges_in_array(ve); + } + + if (edges == NULL || edges->array == NULL) + { + continue; + } + + for (i = 0; i < edges->size; i++) + { + graphid eid = edges->array[i]; + edge_entry *ee = NULL; + graphid v = 0; + sp_visit_entry *vse = NULL; + bool was_present = false; + + ee = get_edge_entry(ggctx, eid); + if (ee == NULL) + { + continue; + } + + /* + * Optional edge label filter. When a label filter is active + * we keep only edges whose label table oid matches. Note that + * a label name which does not exist in this graph resolves to + * InvalidOid; because no real edge can have an InvalidOid + * label table, every edge is then skipped and only the + * zero-length (start == end) path can match -- matching the + * openCypher semantics that an unknown relationship type + * matches no relationships. + */ + if (filter_edges && + get_edge_entry_label_table_oid(ee) != edge_label_oid) + { + continue; + } + + /* the neighbor depends on which side of the edge u is on */ + if (pass == 0) + { + v = get_edge_entry_end_vertex_id(ee); + } + else + { + v = get_edge_entry_start_vertex_id(ee); + } + + /* self loops never shorten a path to a different vertex */ + if (v == u) + { + continue; + } + + vse = (sp_visit_entry *) hash_search(visited, &v, HASH_ENTER, + &was_present); + if (!was_present) + { + vse->vertex_id = v; + vse->depth = du + 1; + vse->parent_edge = eid; + vse->parent_vertex = u; + vse->preds = NIL; + + if (collect_all) + { + sp_pred *p = palloc(sizeof(sp_pred)); + + p->edge = eid; + p->parent_vertex = u; + vse->preds = lappend(vse->preds, p); + } + + sp_queue_push(&q, v); + } + else if (collect_all && vse->depth == du + 1) + { + /* another equally-short predecessor of v */ + sp_pred *p = palloc(sizeof(sp_pred)); + + p->edge = eid; + p->parent_vertex = u; + vse->preds = lappend(vse->preds, p); + } + } + } + } + + *out_target_depth = target_depth; + *out_found = found; + return visited; +} + +/* + * Recursively enumerate every shortest path by walking the predecessor DAG + * from target back to source. Each completed path is appended to *out as a + * freshly allocated interleaved graphid array of length alt_len. + */ +static void sp_enumerate(HTAB *visited, graphid source, graphid cur, + graphid *alt, int64 alt_len, int64 pos, List **out) +{ + sp_visit_entry *e = NULL; + ListCell *lc = NULL; + + /* + * Enumerating every shortest path can be combinatorially expensive, so + * allow the user to cancel (Ctrl-C / statement_timeout) at each step. + */ + CHECK_FOR_INTERRUPTS(); + + alt[pos] = cur; + + if (cur == source) + { + /* a complete path only when we have consumed the whole array */ + if (pos == 0) + { + graphid *copy = palloc(sizeof(graphid) * alt_len); + + memcpy(copy, alt, sizeof(graphid) * alt_len); + *out = lappend(*out, copy); + } + return; + } + + e = (sp_visit_entry *) hash_search(visited, &cur, HASH_FIND, NULL); + if (e == NULL) + { + return; + } + + foreach(lc, e->preds) + { + sp_pred *p = (sp_pred *) lfirst(lc); + + alt[pos - 1] = p->edge; + sp_enumerate(visited, source, p->parent_vertex, alt, alt_len, pos - 2, + out); + } +} + +/* + * Resolve arguments, run the BFS, and materialize the result path(s) as an + * array of AGTV_PATH agtype Datums. Returns NULL with *out_count == 0 when no + * path exists. Caller must run in a context that survives the SRF. + */ +static Datum *sp_compute_paths(agtype *graph_name_agt, agtype *start_agt, + agtype *end_agt, agtype *label_agt, + agtype *dir_agt, agtype *minhops_agt, + agtype *maxhops_agt, bool collect_all, + int64 *out_count) +{ + agtype_value *agtv_temp = NULL; + char *graph_name = NULL; + Oid graph_oid = InvalidOid; + GRAPH_global_context *ggctx = NULL; + graphid source = 0; + graphid target = 0; + bool filter_edges = false; + Oid edge_label_oid = InvalidOid; + cypher_rel_dir dir = CYPHER_REL_DIR_NONE; + int64 max_hops = -1; + HTAB *visited = NULL; + int64 target_depth = -1; + bool found = false; + Datum *paths = NULL; + + *out_count = 0; + + /* the graph name is required */ + if (graph_name_agt == NULL) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("age_shortest_path: graph name cannot be NULL"))); + } + + agtv_temp = get_agtype_value("age_shortest_path", graph_name_agt, + AGTV_STRING, true); + graph_name = pnstrdup(agtv_temp->val.string.val, + agtv_temp->val.string.len); + graph_oid = get_graph_oid(graph_name); + + /* + * A NULL start or end vertex yields no rows, matching Cypher semantics + * where a null endpoint simply produces no match (it is not an error). + */ + if (start_agt == NULL || end_agt == NULL) + { + return NULL; + } + + source = sp_agtype_to_graphid(start_agt, "start vertex"); + target = sp_agtype_to_graphid(end_agt, "end vertex"); + + /* + * Optional edge type filter. A single relationship type may be supplied + * either as a bare string or as a one-element array. Multiple relationship + * types (an array with more than one element) are not yet supported. + */ + if (label_agt != NULL) + { + char *label_name = NULL; + + if (AGT_ROOT_IS_ARRAY(label_agt) && !AGT_ROOT_IS_SCALAR(label_agt)) + { + int nelems = AGT_ROOT_COUNT(label_agt); + + if (nelems > 1) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("age_shortest_path: multiple relationship types are not yet supported"))); + } + + if (nelems == 1) + { + agtv_temp = get_ith_agtype_value_from_container( + &label_agt->root, 0); + if (agtv_temp->type != AGTV_STRING) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("age_shortest_path: relationship type must be a string"))); + } + if (agtv_temp->val.string.len != 0) + { + label_name = pnstrdup(agtv_temp->val.string.val, + agtv_temp->val.string.len); + edge_label_oid = get_label_relation(label_name, graph_oid); + filter_edges = true; + } + } + } + else + { + agtv_temp = get_agtype_value("age_shortest_path", label_agt, + AGTV_STRING, true); + if (agtv_temp->val.string.len != 0) + { + label_name = pnstrdup(agtv_temp->val.string.val, + agtv_temp->val.string.len); + edge_label_oid = get_label_relation(label_name, graph_oid); + filter_edges = true; + } + } + } + + /* optional direction (defaults to undirected) */ + dir = sp_agtype_to_direction(dir_agt); + + /* + * Optional minimum hop count. A genuine minimum-length constraint needs a + * different search than plain BFS, so for now only the default (NULL or 0) + * is accepted; any other value is rejected loudly. + */ + if (minhops_agt != NULL) + { + int64 min_hops = 0; + + agtv_temp = get_agtype_value("age_shortest_path", minhops_agt, + AGTV_INTEGER, true); + min_hops = agtv_temp->val.int_value; + if (min_hops != 0) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("age_shortest_path: a minimum hop count is not yet supported"))); + } + } + + /* optional upper hop bound (NULL or negative means unbounded) */ + if (maxhops_agt != NULL) + { + agtv_temp = get_agtype_value("age_shortest_path", maxhops_agt, + AGTV_INTEGER, true); + max_hops = agtv_temp->val.int_value; + if (max_hops < 0) + { + max_hops = -1; + } + } + + /* build / fetch the global graph cache for this graph */ + ggctx = manage_GRAPH_global_contexts(graph_name, graph_oid); + if (ggctx == NULL) + { + return NULL; + } + + /* run the breadth-first search */ + visited = sp_run_bfs(ggctx, source, target, filter_edges, edge_label_oid, + dir, max_hops, collect_all, &target_depth, &found); + + if (!found) + { + hash_destroy(visited); + return NULL; + } + + if (!collect_all) + { + /* reconstruct the single shortest path from the parent pointers */ + int64 alt_len = (2 * target_depth) + 1; + graphid *alt = palloc(sizeof(graphid) * alt_len); + int64 pos = alt_len - 1; + graphid cur = target; + + alt[pos] = cur; + pos = pos - 1; + while (cur != source) + { + sp_visit_entry *e = NULL; + + e = (sp_visit_entry *) hash_search(visited, &cur, HASH_FIND, NULL); + alt[pos] = e->parent_edge; + pos = pos - 1; + alt[pos] = e->parent_vertex; + pos = pos - 1; + cur = e->parent_vertex; + } + + paths = palloc(sizeof(Datum)); + paths[0] = sp_build_path_datum(graph_oid, alt, alt_len); + *out_count = 1; + } + else + { + /* enumerate every equal-length shortest path */ + int64 alt_len = (2 * target_depth) + 1; + graphid *alt = palloc(sizeof(graphid) * alt_len); + List *arrays = NIL; + ListCell *lc = NULL; + int64 n = 0; + int64 idx = 0; + + sp_enumerate(visited, source, target, alt, alt_len, alt_len - 1, + &arrays); + + n = list_length(arrays); + paths = palloc(sizeof(Datum) * (n > 0 ? n : 1)); + foreach(lc, arrays) + { + graphid *a = (graphid *) lfirst(lc); + + paths[idx] = sp_build_path_datum(graph_oid, a, alt_len); + idx = idx + 1; + } + *out_count = n; + } + + hash_destroy(visited); + return paths; +} + +/* + * Shared SRF driver for age_shortest_path / age_all_shortest_paths. The first + * call computes every result path up front and stores them; subsequent calls + * stream them one per row. + */ +static Datum sp_srf_impl(FunctionCallInfo fcinfo, bool collect_all) +{ + FuncCallContext *funcctx = NULL; + sp_srf_state *state = NULL; + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldctx; + agtype *a_graph = NULL; + agtype *a_start = NULL; + agtype *a_end = NULL; + agtype *a_label = NULL; + agtype *a_dir = NULL; + agtype *a_min = NULL; + agtype *a_max = NULL; + + funcctx = SRF_FIRSTCALL_INIT(); + oldctx = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* + * Argument order mirrors the Cypher shortestPath() pattern + * (a)-[:type*min_hops..max_hops]->(b): + * 0 graph, 1 start, 2 end, 3 edge_types, 4 direction, + * 5 min_hops, 6 max_hops + */ + a_graph = PG_ARGISNULL(0) ? NULL : AG_GET_ARG_AGTYPE_P(0); + a_start = PG_ARGISNULL(1) ? NULL : AG_GET_ARG_AGTYPE_P(1); + a_end = PG_ARGISNULL(2) ? NULL : AG_GET_ARG_AGTYPE_P(2); + a_label = PG_ARGISNULL(3) ? NULL : AG_GET_ARG_AGTYPE_P(3); + a_dir = PG_ARGISNULL(4) ? NULL : AG_GET_ARG_AGTYPE_P(4); + a_min = PG_ARGISNULL(5) ? NULL : AG_GET_ARG_AGTYPE_P(5); + a_max = PG_ARGISNULL(6) ? NULL : AG_GET_ARG_AGTYPE_P(6); + + /* treat an explicit agtype null the same as a SQL NULL */ + if (a_start != NULL && is_agtype_null(a_start)) + { + a_start = NULL; + } + if (a_end != NULL && is_agtype_null(a_end)) + { + a_end = NULL; + } + if (a_label != NULL && is_agtype_null(a_label)) + { + a_label = NULL; + } + if (a_dir != NULL && is_agtype_null(a_dir)) + { + a_dir = NULL; + } + if (a_min != NULL && is_agtype_null(a_min)) + { + a_min = NULL; + } + if (a_max != NULL && is_agtype_null(a_max)) + { + a_max = NULL; + } + + state = palloc0(sizeof(sp_srf_state)); + state->next = 0; + state->paths = sp_compute_paths(a_graph, a_start, a_end, a_label, + a_dir, a_min, a_max, collect_all, + &state->npaths); + funcctx->user_fctx = state; + + MemoryContextSwitchTo(oldctx); + } + + funcctx = SRF_PERCALL_SETUP(); + state = (sp_srf_state *) funcctx->user_fctx; + + if (state->next < state->npaths) + { + Datum d = state->paths[state->next]; + + state->next = state->next + 1; + SRF_RETURN_NEXT(funcctx, d); + } + + SRF_RETURN_DONE(funcctx); +} + +/* + * age_shortest_path(graph_name, start, end [, edge_types [, direction + * [, min_hops [, max_hops]]]]) -> SETOF agtype + * + * Returns the single unweighted shortest path (as an AGTV_PATH) between the + * start and end vertices, or no rows if unreachable. + */ +PG_FUNCTION_INFO_V1(age_shortest_path); + +Datum age_shortest_path(PG_FUNCTION_ARGS) +{ + return sp_srf_impl(fcinfo, false); +} + +/* + * age_all_shortest_paths(graph_name, start, end [, edge_types [, direction + * [, min_hops [, max_hops]]]]) -> SETOF agtype + * + * Returns every unweighted shortest path (one AGTV_PATH per row) between the + * start and end vertices, i.e. all paths whose length equals the minimum hop + * count, or no rows if unreachable. + */ +PG_FUNCTION_INFO_V1(age_all_shortest_paths); + +Datum age_all_shortest_paths(PG_FUNCTION_ARGS) +{ + return sp_srf_impl(fcinfo, true); +} From 88c1c3363cd90763c080a1c611d5525fbf5b9630 Mon Sep 17 00:00:00 2001 From: Greg Felice Date: Mon, 22 Jun 2026 12:34:31 -0400 Subject: [PATCH 09/20] Support pattern expressions as boolean expressions (#2360) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Support pattern expressions in WHERE clause via GLR parser (issue #1577) Enable bare graph patterns as boolean expressions in WHERE clauses: MATCH (a:Person), (b:Person) WHERE (a)-[:KNOWS]->(b) -- now valid, equivalent to EXISTS(...) RETURN a.name, b.name Previously, this required wrapping in EXISTS(): WHERE EXISTS((a)-[:KNOWS]->(b)) The bare pattern syntax is standard openCypher and is used extensively in Neo4j. Its absence was the most frequently cited migration blocker. Implementation approach: - Switch the Cypher parser from LALR(1) to Bison GLR mode. GLR handles the inherent ambiguity between parenthesized expressions '(' expr ')' and graph path nodes '(' var_name label_opt props ')' by forking the parse stack and discarding the failing path. - Add anonymous_path as an expr_atom alternative with %dprec 1 (lower priority than expression path at %dprec 2). The action wraps the pattern in a cypher_sub_pattern + EXISTS SubLink, reusing the same transform_cypher_sub_pattern() machinery as explicit EXISTS(). - Extract make_exists_pattern_sublink() helper shared by both EXISTS(pattern) and bare pattern rules. - Fix YYLLOC_DEFAULT to use YYRHSLOC() for GLR compatibility. - %dprec annotations on expr_var/var_name_opt resolve the reduce/reduce conflict between expression variables and pattern node variables. Conflict budget: 7 shift/reduce (path extension vs arithmetic on -/<), 3 reduce/reduce (expr_var vs var_name_opt on )/}/=). All are expected and handled correctly by GLR forking + %dprec disambiguation. All 32 regression tests pass (31 existing + 1 new). New pattern_expression test covers: bare patterns, NOT patterns, labeled nodes, AND/OR combinations, left-directed patterns, anonymous nodes, multi-hop patterns, EXISTS() backward compatibility, and non-pattern expression regression checks. Co-Authored-By: Claude Opus 4.6 (1M context) * Address Copilot review: comment placement, %expect docs, test wording 1. Move "Helper function to create an ExplainStmt node" comment from above make_exists_pattern_sublink() to above make_explain_stmt() where it belongs. 2. Add block comment documenting the %expect/%expect-rr conflict budget: 7 S/R from path vs arithmetic on - and <, 3 R/R from expr_var vs var_name_opt on ) } =. 3. Clarify test comment: "Regular expressions" -> "Regular (non-pattern) expressions" to avoid confusion with regex. Regression test: pattern_expression OK. Co-Authored-By: Claude Opus 4.6 (1M context) * Address Copilot round 3: broaden scope, remove %expect fragility - Pattern expressions are now accepted anywhere an expr is valid (RETURN, WITH, SET, CASE, boolean combinations), not only WHERE. This matches openCypher semantics and documents the broader surface area that was already implicitly enabled by adding anonymous_path to expr_atom. Added regression tests for each new context: RETURN projection (bare and AS-aliased), mixed with other projections, CASE WHEN, boolean AND/OR combinators, SET to persist a computed boolean property, and WITH ... WHERE pipeline. - Remove the hardcoded `%expect 7` / `%expect-rr 3` conflict budget from cypher_gram.y. The exact conflict counts can drift across Bison versions and distros, which would break builds even though the grammar is correct (GLR handles the conflicts at runtime via fork + %dprec). Instead, pass -Wno-conflicts-sr / -Wno-conflicts-rr via BISONFLAGS in the Makefile so the build stays clean without binding us to a specific Bison release. Kept a block comment in the grammar explaining why GLR conflicts are expected and how they resolve. Co-Authored-By: Claude Opus 4.6 (1M context) * Address jrgemignani review: keep -Werror, restore %expect budget Reverts the broad `-Werror` drop and the no-%expect approach from the prior round on jrgemignani's request. The earlier framing — that conflict counts drift across Bison versions, so %expect is fragile — overcorrected: it removed the only build-time alarm bell for unintended new conflicts. Makefile: keep -Werror so any unexpected Bison warning (unused rules, undeclared types, etc.) still fails the build; downgrade only the two conflict categories to plain warnings via -Wno-error=conflicts-sr -Wno-error=conflicts-rr. pgxs auto-adds -Wno-deprecated, so existing %name-prefix= / %pure-parser directives remain non-erroring. cypher_gram.y: add `%expect 7` and `%expect-rr 3` matching the Bison 3.8.2 totals. Bison treats %expect as exact-match, not as a ceiling — any deviation fails the build and forces an audit of the new conflicts. Comment updated to reflect that future Bison versions reporting different counts should bump the numbers explicitly with a version note in the commit message, rather than removing the directive. No grammar or runtime change. Cassert installcheck 34/34 AGE tests green. * Add follow-up regression coverage for pattern expressions (#2360) Addresses the non-blocking test-coverage follow-ups from the review: pattern expressions in additional contexts opened up by allowing anonymous_path as an expr_atom. New cases (all verified against a PG18 build): - Single-node pattern on a bound variable (a:Label). Documented as an EXISTS existence check, NOT an openCypher label predicate: a matching label is always true, and a non-matching label hits AGE's pre-existing "multiple labels for variable" restriction (captured as expected error). - Pattern expressions inside list and map literals. - Pattern expressions as function arguments: collect() shows correct per-row booleans; count() counts all rows (non-null bool) -- documented so the value is not mistaken for a bug. - Pattern expression in OPTIONAL MATCH ... WHERE (null-preserving). - EXISTS() and a bare pattern combined in one predicate. make installcheck: 33/33 green. --------- Co-authored-by: Claude Opus 4.6 (1M context) --- Makefile | 17 +- regress/expected/pattern_expression.out | 457 ++++++++++++++++++++++++ regress/sql/pattern_expression.sql | 305 ++++++++++++++++ src/backend/parser/cypher_gram.y | 101 ++++-- 4 files changed, 859 insertions(+), 21 deletions(-) create mode 100644 regress/expected/pattern_expression.out create mode 100644 regress/sql/pattern_expression.sql diff --git a/Makefile b/Makefile index 41208ee02..b0059213c 100644 --- a/Makefile +++ b/Makefile @@ -215,6 +215,7 @@ REGRESS = scan \ jsonb_operators \ list_comprehension \ predicate_functions \ + pattern_expression \ map_projection \ direct_field_access \ security \ @@ -282,7 +283,21 @@ src/include/parser/cypher_kwlist_d.h: src/include/parser/cypher_kwlist.h $(GEN_K src/include/parser/cypher_gram_def.h: src/backend/parser/cypher_gram.c -src/backend/parser/cypher_gram.c: BISONFLAGS += --defines=src/include/parser/cypher_gram_def.h -Werror +# +# The Cypher grammar uses GLR mode with a number of inherent shift/reduce +# and reduce/reduce conflicts arising from the ambiguity between +# parenthesized expressions and graph patterns (both start with '('). +# GLR handles these correctly at runtime by forking at the conflict +# point; %dprec annotations resolve cases where both forks succeed. +# +# We keep -Werror so any unexpected Bison warning (unused rules, undeclared +# types, etc.) still fails the build; we downgrade only the two conflict +# categories to plain warnings via -Wno-error=. The exact conflict totals +# are pinned by %expect / %expect-rr in cypher_gram.y, which Bison treats +# as exact-match: any deviation fails the build and forces an audit of +# the new conflicts. +# +src/backend/parser/cypher_gram.c: BISONFLAGS += --defines=src/include/parser/cypher_gram_def.h -Werror -Wno-error=conflicts-sr -Wno-error=conflicts-rr src/backend/parser/cypher_parser.o: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h src/backend/parser/cypher_parser.bc: src/backend/parser/cypher_gram.c src/include/parser/cypher_gram_def.h diff --git a/regress/expected/pattern_expression.out b/regress/expected/pattern_expression.out new file mode 100644 index 000000000..0494d49b9 --- /dev/null +++ b/regress/expected/pattern_expression.out @@ -0,0 +1,457 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +LOAD 'age'; +SET search_path TO ag_catalog; +SELECT create_graph('pattern_expr'); +NOTICE: graph "pattern_expr" has been created + create_graph +-------------- + +(1 row) + +-- +-- Setup test data +-- +SELECT * FROM cypher('pattern_expr', $$ + CREATE (alice:Person {name: 'Alice'})-[:KNOWS]->(bob:Person {name: 'Bob'}), + (alice)-[:WORKS_WITH]->(charlie:Person {name: 'Charlie'}), + (dave:Person {name: 'Dave'}) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- +-- Basic pattern expression in WHERE +-- +-- Bare pattern: (a)-[:REL]->(b) +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a)-[:KNOWS]->(b) + RETURN a.name, b.name + ORDER BY a.name, b.name +$$) AS (a agtype, b agtype); + a | b +---------+------- + "Alice" | "Bob" +(1 row) + +-- +-- NOT pattern expression +-- +-- Find people who don't KNOW anyone +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE NOT (a)-[:KNOWS]->(:Person) + RETURN a.name + ORDER BY a.name +$$) AS (result agtype); + result +----------- + "Bob" + "Charlie" + "Dave" +(3 rows) + +-- +-- Pattern with labeled first node +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a:Person)-[:KNOWS]->(b) + RETURN a.name, b.name + ORDER BY a.name +$$) AS (a agtype, b agtype); + a | b +---------+------- + "Alice" | "Bob" +(1 row) + +-- +-- Pattern combined with AND +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a)-[:KNOWS]->(b) AND a.name = 'Alice' + RETURN a.name, b.name +$$) AS (a agtype, b agtype); + a | b +---------+------- + "Alice" | "Bob" +(1 row) + +-- +-- Pattern combined with OR +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a)-[:KNOWS]->(b) OR (a)-[:WORKS_WITH]->(b) + RETURN a.name, b.name + ORDER BY a.name, b.name +$$) AS (a agtype, b agtype); + a | b +---------+----------- + "Alice" | "Bob" + "Alice" | "Charlie" +(2 rows) + +-- +-- Left-directed pattern +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a)<-[:KNOWS]-(b) + RETURN a.name, b.name + ORDER BY a.name +$$) AS (a agtype, b agtype); + a | b +-------+--------- + "Bob" | "Alice" +(1 row) + +-- +-- Pattern with anonymous nodes +-- +-- Find anyone who has any outgoing KNOWS relationship +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE (a)-[:KNOWS]->() + RETURN a.name + ORDER BY a.name +$$) AS (result agtype); + result +--------- + "Alice" +(1 row) + +-- +-- Multiple relationship pattern +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (c:Person) + WHERE (a)-[:KNOWS]->()-[:WORKS_WITH]->(c) + RETURN a.name, c.name + ORDER BY a.name +$$) AS (a agtype, c agtype); + a | c +---+--- +(0 rows) + +-- +-- Existing EXISTS() syntax still works (backward compatibility) +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE EXISTS((a)-[:KNOWS]->(b)) + RETURN a.name, b.name + ORDER BY a.name +$$) AS (a agtype, b agtype); + a | b +---------+------- + "Alice" | "Bob" +(1 row) + +-- +-- Pattern expression produces same results as EXISTS() +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE (a)-[:KNOWS]->(:Person) + RETURN a.name + ORDER BY a.name +$$) AS (result agtype); + result +--------- + "Alice" +(1 row) + +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE EXISTS((a)-[:KNOWS]->(:Person)) + RETURN a.name + ORDER BY a.name +$$) AS (result agtype); + result +--------- + "Alice" +(1 row) + +-- +-- Regular (non-pattern) expressions still work (no regression) +-- +SELECT * FROM cypher('pattern_expr', $$ + RETURN (1 + 2) +$$) AS (result agtype); + result +-------- + 3 +(1 row) + +SELECT * FROM cypher('pattern_expr', $$ + MATCH (n:Person) + WHERE n.name = 'Alice' + RETURN (n.name) +$$) AS (result agtype); + result +--------- + "Alice" +(1 row) + +-- +-- Pattern expressions in RETURN (boolean projection) +-- +-- Each person gets a column showing whether they know someone +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, (a)-[:KNOWS]->(:Person) AS knows_someone + ORDER BY a.name +$$) AS (name agtype, knows_someone agtype); + name | knows_someone +-----------+--------------- + "Alice" | true + "Bob" | false + "Charlie" | false + "Dave" | false +(4 rows) + +-- Mix pattern expression with other projections +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, (a)-[:KNOWS]->(:Person), (a)-[:WORKS_WITH]->(:Person) + ORDER BY a.name +$$) AS (name agtype, knows agtype, works_with agtype); + name | knows | works_with +-----------+-------+------------ + "Alice" | true | true + "Bob" | false | false + "Charlie" | false | false + "Dave" | false | false +(4 rows) + +-- +-- Pattern expressions in CASE WHEN +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, + CASE WHEN (a)-[:KNOWS]->(:Person) THEN 'social' + ELSE 'loner' + END + ORDER BY a.name +$$) AS (name agtype, kind agtype); + name | kind +-----------+---------- + "Alice" | "social" + "Bob" | "loner" + "Charlie" | "loner" + "Dave" | "loner" +(4 rows) + +-- +-- Pattern expressions combined with boolean operators in RETURN +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, + (a)-[:KNOWS]->(:Person) AND (a)-[:WORKS_WITH]->(:Person) AS has_both, + (a)-[:KNOWS]->(:Person) OR (a)-[:WORKS_WITH]->(:Person) AS has_either + ORDER BY a.name +$$) AS (name agtype, has_both agtype, has_either agtype); + name | has_both | has_either +-----------+----------+------------ + "Alice" | true | true + "Bob" | false | false + "Charlie" | false | false + "Dave" | false | false +(4 rows) + +-- +-- Pattern expression in SET (store boolean as property) +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + SET a.is_social = (a)-[:KNOWS]->(:Person) + RETURN a.name, a.is_social + ORDER BY a.name +$$) AS (name agtype, is_social agtype); + name | is_social +-----------+----------- + "Alice" | true + "Bob" | false + "Charlie" | false + "Dave" | false +(4 rows) + +-- +-- Pattern expression in WITH (carry boolean through pipeline) +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WITH a.name AS name, (a)-[:KNOWS]->(:Person) AS knows + WHERE knows + RETURN name + ORDER BY name +$$) AS (result agtype); + result +--------- + "Alice" +(1 row) + +-- +-- Follow-up coverage (review #2360): pattern expressions in additional +-- expression contexts opened up by allowing anonymous_path as an expr_atom. +-- +-- +-- Single-node pattern on an already-bound variable: (a:Label) +-- +-- NOTE: this is an EXISTS existence check on the bound variable, NOT an +-- openCypher label predicate. A matching label is therefore always true +-- (the variable is already bound), and a *different* label is rejected by +-- AGE's pre-existing "multiple labels for variable" restriction rather than +-- evaluating to false. Both behaviours are captured here so any future change +-- to single-node-pattern semantics is caught by this test. +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, (a:Person) + ORDER BY a.name +$$) AS (name agtype, is_person agtype); + name | is_person +-----------+----------- + "Alice" | true + "Bob" | true + "Charlie" | true + "Dave" | true +(4 rows) + +-- A non-matching label errors (pre-existing limitation, not a regression) +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, (a:Animal) + ORDER BY a.name +$$) AS (name agtype, is_animal agtype); +ERROR: multiple labels for variable 'a' are not supported +LINE 3: RETURN a.name, (a:Animal) + ^ +-- +-- Pattern expressions inside a list literal +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, [(a)-[:KNOWS]->(:Person), (a)-[:WORKS_WITH]->(:Person)] + ORDER BY a.name +$$) AS (name agtype, flags agtype); + name | flags +-----------+---------------- + "Alice" | [true, true] + "Bob" | [false, false] + "Charlie" | [false, false] + "Dave" | [false, false] +(4 rows) + +-- +-- Pattern expressions inside a map literal +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, {knows: (a)-[:KNOWS]->(:Person), works: (a)-[:WORKS_WITH]->(:Person)} + ORDER BY a.name +$$) AS (name agtype, m agtype); + name | m +-----------+---------------------------------- + "Alice" | {"knows": true, "works": true} + "Bob" | {"knows": false, "works": false} + "Charlie" | {"knows": false, "works": false} + "Dave" | {"knows": false, "works": false} +(4 rows) + +-- +-- Pattern expressions as function arguments +-- +-- collect() shows the per-row boolean values are correct (ORDER BY before +-- the aggregate so the collected order is deterministic across scan plans). +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WITH a ORDER BY a.name + RETURN collect((a)-[:KNOWS]->(:Person)) +$$) AS (vals agtype); + vals +----------------------------- + [true, false, false, false] +(1 row) + +-- count() counts non-null values; a boolean (including false) is non-null, +-- so this counts every row rather than only the matching ones. This is the +-- expected SQL aggregate semantics, documented here so the value is not +-- mistaken for a bug. +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN count((a)-[:KNOWS]->(:Person)) +$$) AS (c agtype); + c +--- + 4 +(1 row) + +-- +-- Pattern expression in OPTIONAL MATCH ... WHERE (null-preserving) +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + OPTIONAL MATCH (b:Person) WHERE (a)-[:KNOWS]->(b) + RETURN a.name, b.name + ORDER BY a.name, b.name +$$) AS (a agtype, b agtype); + a | b +-----------+------- + "Alice" | "Bob" + "Bob" | + "Charlie" | + "Dave" | +(4 rows) + +-- +-- EXISTS() and a bare pattern combined in a single predicate +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE EXISTS((a)-[:KNOWS]->(:Person)) AND (a)-[:WORKS_WITH]->(:Person) + RETURN a.name + ORDER BY a.name +$$) AS (name agtype); + name +--------- + "Alice" +(1 row) + +-- +-- Cleanup +-- +SELECT * FROM drop_graph('pattern_expr', true); +NOTICE: drop cascades to 5 other objects +DETAIL: drop cascades to table pattern_expr._ag_label_vertex +drop cascades to table pattern_expr._ag_label_edge +drop cascades to table pattern_expr."Person" +drop cascades to table pattern_expr."KNOWS" +drop cascades to table pattern_expr."WORKS_WITH" +NOTICE: graph "pattern_expr" has been dropped + drop_graph +------------ + +(1 row) + diff --git a/regress/sql/pattern_expression.sql b/regress/sql/pattern_expression.sql new file mode 100644 index 000000000..fff8476e5 --- /dev/null +++ b/regress/sql/pattern_expression.sql @@ -0,0 +1,305 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +LOAD 'age'; +SET search_path TO ag_catalog; + +SELECT create_graph('pattern_expr'); + +-- +-- Setup test data +-- +SELECT * FROM cypher('pattern_expr', $$ + CREATE (alice:Person {name: 'Alice'})-[:KNOWS]->(bob:Person {name: 'Bob'}), + (alice)-[:WORKS_WITH]->(charlie:Person {name: 'Charlie'}), + (dave:Person {name: 'Dave'}) +$$) AS (result agtype); + +-- +-- Basic pattern expression in WHERE +-- +-- Bare pattern: (a)-[:REL]->(b) +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a)-[:KNOWS]->(b) + RETURN a.name, b.name + ORDER BY a.name, b.name +$$) AS (a agtype, b agtype); + +-- +-- NOT pattern expression +-- +-- Find people who don't KNOW anyone +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE NOT (a)-[:KNOWS]->(:Person) + RETURN a.name + ORDER BY a.name +$$) AS (result agtype); + +-- +-- Pattern with labeled first node +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a:Person)-[:KNOWS]->(b) + RETURN a.name, b.name + ORDER BY a.name +$$) AS (a agtype, b agtype); + +-- +-- Pattern combined with AND +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a)-[:KNOWS]->(b) AND a.name = 'Alice' + RETURN a.name, b.name +$$) AS (a agtype, b agtype); + +-- +-- Pattern combined with OR +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a)-[:KNOWS]->(b) OR (a)-[:WORKS_WITH]->(b) + RETURN a.name, b.name + ORDER BY a.name, b.name +$$) AS (a agtype, b agtype); + +-- +-- Left-directed pattern +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE (a)<-[:KNOWS]-(b) + RETURN a.name, b.name + ORDER BY a.name +$$) AS (a agtype, b agtype); + +-- +-- Pattern with anonymous nodes +-- +-- Find anyone who has any outgoing KNOWS relationship +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE (a)-[:KNOWS]->() + RETURN a.name + ORDER BY a.name +$$) AS (result agtype); + +-- +-- Multiple relationship pattern +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (c:Person) + WHERE (a)-[:KNOWS]->()-[:WORKS_WITH]->(c) + RETURN a.name, c.name + ORDER BY a.name +$$) AS (a agtype, c agtype); + +-- +-- Existing EXISTS() syntax still works (backward compatibility) +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person), (b:Person) + WHERE EXISTS((a)-[:KNOWS]->(b)) + RETURN a.name, b.name + ORDER BY a.name +$$) AS (a agtype, b agtype); + +-- +-- Pattern expression produces same results as EXISTS() +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE (a)-[:KNOWS]->(:Person) + RETURN a.name + ORDER BY a.name +$$) AS (result agtype); + +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE EXISTS((a)-[:KNOWS]->(:Person)) + RETURN a.name + ORDER BY a.name +$$) AS (result agtype); + +-- +-- Regular (non-pattern) expressions still work (no regression) +-- +SELECT * FROM cypher('pattern_expr', $$ + RETURN (1 + 2) +$$) AS (result agtype); + +SELECT * FROM cypher('pattern_expr', $$ + MATCH (n:Person) + WHERE n.name = 'Alice' + RETURN (n.name) +$$) AS (result agtype); + +-- +-- Pattern expressions in RETURN (boolean projection) +-- +-- Each person gets a column showing whether they know someone +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, (a)-[:KNOWS]->(:Person) AS knows_someone + ORDER BY a.name +$$) AS (name agtype, knows_someone agtype); + +-- Mix pattern expression with other projections +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, (a)-[:KNOWS]->(:Person), (a)-[:WORKS_WITH]->(:Person) + ORDER BY a.name +$$) AS (name agtype, knows agtype, works_with agtype); + +-- +-- Pattern expressions in CASE WHEN +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, + CASE WHEN (a)-[:KNOWS]->(:Person) THEN 'social' + ELSE 'loner' + END + ORDER BY a.name +$$) AS (name agtype, kind agtype); + +-- +-- Pattern expressions combined with boolean operators in RETURN +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, + (a)-[:KNOWS]->(:Person) AND (a)-[:WORKS_WITH]->(:Person) AS has_both, + (a)-[:KNOWS]->(:Person) OR (a)-[:WORKS_WITH]->(:Person) AS has_either + ORDER BY a.name +$$) AS (name agtype, has_both agtype, has_either agtype); + +-- +-- Pattern expression in SET (store boolean as property) +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + SET a.is_social = (a)-[:KNOWS]->(:Person) + RETURN a.name, a.is_social + ORDER BY a.name +$$) AS (name agtype, is_social agtype); + +-- +-- Pattern expression in WITH (carry boolean through pipeline) +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WITH a.name AS name, (a)-[:KNOWS]->(:Person) AS knows + WHERE knows + RETURN name + ORDER BY name +$$) AS (result agtype); + +-- +-- Follow-up coverage (review #2360): pattern expressions in additional +-- expression contexts opened up by allowing anonymous_path as an expr_atom. +-- + +-- +-- Single-node pattern on an already-bound variable: (a:Label) +-- +-- NOTE: this is an EXISTS existence check on the bound variable, NOT an +-- openCypher label predicate. A matching label is therefore always true +-- (the variable is already bound), and a *different* label is rejected by +-- AGE's pre-existing "multiple labels for variable" restriction rather than +-- evaluating to false. Both behaviours are captured here so any future change +-- to single-node-pattern semantics is caught by this test. +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, (a:Person) + ORDER BY a.name +$$) AS (name agtype, is_person agtype); + +-- A non-matching label errors (pre-existing limitation, not a regression) +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, (a:Animal) + ORDER BY a.name +$$) AS (name agtype, is_animal agtype); + +-- +-- Pattern expressions inside a list literal +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, [(a)-[:KNOWS]->(:Person), (a)-[:WORKS_WITH]->(:Person)] + ORDER BY a.name +$$) AS (name agtype, flags agtype); + +-- +-- Pattern expressions inside a map literal +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN a.name, {knows: (a)-[:KNOWS]->(:Person), works: (a)-[:WORKS_WITH]->(:Person)} + ORDER BY a.name +$$) AS (name agtype, m agtype); + +-- +-- Pattern expressions as function arguments +-- +-- collect() shows the per-row boolean values are correct (ORDER BY before +-- the aggregate so the collected order is deterministic across scan plans). +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WITH a ORDER BY a.name + RETURN collect((a)-[:KNOWS]->(:Person)) +$$) AS (vals agtype); + +-- count() counts non-null values; a boolean (including false) is non-null, +-- so this counts every row rather than only the matching ones. This is the +-- expected SQL aggregate semantics, documented here so the value is not +-- mistaken for a bug. +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + RETURN count((a)-[:KNOWS]->(:Person)) +$$) AS (c agtype); + +-- +-- Pattern expression in OPTIONAL MATCH ... WHERE (null-preserving) +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + OPTIONAL MATCH (b:Person) WHERE (a)-[:KNOWS]->(b) + RETURN a.name, b.name + ORDER BY a.name, b.name +$$) AS (a agtype, b agtype); + +-- +-- EXISTS() and a bare pattern combined in a single predicate +-- +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a:Person) + WHERE EXISTS((a)-[:KNOWS]->(:Person)) AND (a)-[:WORKS_WITH]->(:Person) + RETURN a.name + ORDER BY a.name +$$) AS (name agtype); + +-- +-- Cleanup +-- +SELECT * FROM drop_graph('pattern_expr', true); diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index c614e1dbe..b23fa705a 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -34,7 +34,7 @@ do \ { \ if ((n) > 0) \ - current = (rhs)[1]; \ + current = YYRHSLOC(rhs, 1); \ else \ current = -1; \ } while (0) @@ -49,6 +49,43 @@ %locations %name-prefix="cypher_yy" %pure-parser +/* + * GLR mode handles the ambiguity between parenthesized expressions and + * graph patterns. For example, WHERE (a)-[:KNOWS]->(b) starts with (a) + * which is valid as both an expression and a path_node. The parser forks + * at the conflict point and discards the failing path. %dprec annotations + * on expr_var/var_name_opt and '(' expr ')'/anonymous_path resolve cases + * where both paths succeed (bare (a) prefers the expression interpretation). + */ +%glr-parser +/* + * GLR conflicts are expected and correct for this grammar. They arise + * from the inherent ambiguity between parenthesized expressions and + * graph patterns: the shift/reduce conflicts on '-', '<', '{', + * PARAMETER and ')' all come from path extension vs. arithmetic or + * parenthesized-expression alternatives after a leading '(', and the + * reduce/reduce conflicts on ')', '}' and '=' come from the overlap + * between expr_var and var_name_opt. GLR handles all of these by + * forking at the conflict point and discarding the failing alternative; + * %dprec annotations on expr_var/var_name_opt and '(' expr ')' / + * anonymous_path resolve cases where both forks succeed (bare (a) + * prefers the expression interpretation). + * + * The %expect / %expect-rr counts below match the Bison-reported totals + * (7 SR / 3 RR on Bison 3.8.2). Bison treats %expect as exact, not as + * a ceiling: any deviation up or down fails the build. That is the + * alarm bell — if a grammar change moves either count, the build stops + * and the conflicts must be audited to confirm they remain the inherent + * '(' expr ')' vs anonymous_path ambiguities (resolved by %dprec at + * runtime) rather than an unintended new ambiguity. The Makefile + * downgrades -Wconflicts-sr / -Wconflicts-rr from errors to warnings + * (-Wno-error=conflicts-{sr,rr}) so %expect, not the warning category, + * controls the build-fail threshold. If a future Bison version reports + * different counts for the same grammar, update these numbers and note + * the version in the commit message. + */ +%expect 7 +%expect-rr 3 %lex-param {ag_scanner_t scanner} %parse-param {ag_scanner_t scanner} @@ -292,6 +329,9 @@ static Node *build_predicate_function_node(cypher_predicate_function_kind kind, Node *var, Node *expr, Node *where, int location); +/* pattern expression helper */ +static Node *make_exists_pattern_sublink(Node *pattern, int location); + /* helper functions */ static ExplainStmt *make_explain_stmt(List *options); static void validate_return_item_aliases(List *items, ag_scanner_t scanner); @@ -1876,21 +1916,7 @@ expr_func_subexpr: } | EXISTS '(' anonymous_path ')' { - cypher_sub_pattern *sub; - SubLink *n; - - sub = make_ag_node(cypher_sub_pattern); - sub->kind = CSP_EXISTS; - sub->pattern = list_make1($3); - - n = makeNode(SubLink); - n->subLinkType = EXISTS_SUBLINK; - n->subLinkId = 0; - n->testexpr = NULL; - n->operName = NIL; - n->subselect = (Node *) sub; - n->location = @1; - $$ = (Node *)node_to_agtype((Node *)n, "boolean", @1); + $$ = make_exists_pattern_sublink($3, @1); } | EXISTS '(' property_value ')' { @@ -2026,7 +2052,7 @@ expr_atom: $$ = (Node *)n; } - | '(' expr ')' + | '(' expr ')' %dprec 2 { Node *n = $2; @@ -2037,6 +2063,17 @@ expr_atom: } $$ = n; } + | anonymous_path %dprec 1 + { + /* + * Bare pattern in expression context is semantically + * equivalent to EXISTS(pattern). Example: + * WHERE (a)-[:KNOWS]->(b) + * becomes + * WHERE EXISTS((a)-[:KNOWS]->(b)) + */ + $$ = make_exists_pattern_sublink($1, @1); + } | expr_case | expr_var | expr_func @@ -2288,7 +2325,7 @@ expr_case_default: ; expr_var: - var_name + var_name %dprec 2 { ColumnRef *n; @@ -2374,11 +2411,11 @@ var_name_alias: ; var_name_opt: - /* empty */ + /* empty */ %dprec 1 { $$ = NULL; } - | var_name + | var_name %dprec 1 ; label_name: @@ -3585,6 +3622,30 @@ static Node *build_predicate_function_node(cypher_predicate_function_kind kind, } } +/* + * Wrap a graph pattern in an EXISTS SubLink. Used by both + * EXISTS(pattern) syntax and bare pattern expressions in WHERE. + */ +static Node *make_exists_pattern_sublink(Node *pattern, int location) +{ + cypher_sub_pattern *sub; + SubLink *n; + + sub = make_ag_node(cypher_sub_pattern); + sub->kind = CSP_EXISTS; + sub->pattern = list_make1(pattern); + + n = makeNode(SubLink); + n->subLinkType = EXISTS_SUBLINK; + n->subLinkId = 0; + n->testexpr = NULL; + n->operName = NIL; + n->subselect = (Node *) sub; + n->location = location; + + return (Node *)node_to_agtype((Node *)n, "boolean", location); +} + /* Helper function to create an ExplainStmt node */ static ExplainStmt *make_explain_stmt(List *options) { From 277308078cea0abd056ffe74990df85983cf83d5 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Fri, 26 Jun 2026 15:54:54 -0700 Subject: [PATCH 10/20] Add reduce() list folding function (#2435) Implement the openCypher reduce(acc = init, var IN list | body) expression, which folds an arbitrary expression over a list, threading an accumulator across the elements in list order. This closes a long-standing gap (reduce() was previously unsupported) and works both at the SQL top level and inside a cypher() RETURN/WHERE. Implementation -------------- reduce() is desugared, at transform time, into a correlated scalar subquery over a new ordered aggregate rather than a new executor node, so no PostgreSQL core changes are required: CASE WHEN list IS NULL THEN NULL ELSE COALESCE((SELECT ag_catalog.age_reduce( , ''::text, r.elem ORDER BY r.ord) FROM unnest() WITH ORDINALITY AS r(elem, ord)), ) END - A new cypher_reduce extensible node carries the accumulator/element names and the init/list/body expressions (grammar production, keyword, and the copy/out/read serialization plumbing). - The fold body is transformed against a throwaway two-column agtype namespace, its accumulator and element Var references are rewritten to PARAM_EXEC params 0 and 1, and it is serialized with nodeToString() into a text argument. - age_reduce_transfn (a custom agtype aggregate transition function) deserializes and compiles the body once per group with ExecInitExpr, then evaluates it per element with ExecEvalExpr, rebinding the two params. The body is normalized to agtype at transform time so a boolean or other non-agtype result cannot be misread as a by-reference Datum. Semantics --------- - List order is preserved (unnest WITH ORDINALITY + aggregate ORDER BY). - An empty list yields the initial value; a NULL list yields NULL. - The list and initial value may reference outer-query variables (e.g. reduce(total = 0, n IN nodes(p) | total + n.age)); the body may reference only the accumulator and element. - Arithmetic, string, list-building, boolean/comparison (AND/OR/=/>), CASE, and element property-access bodies are all supported. - Outer-variable, query-parameter, nested-reduce, and aggregate references inside the body raise a clean ERRCODE_FEATURE_NOT_SUPPORTED error. - reduce is registered as a safe keyword so it remains usable as a property or map key, preserving backward compatibility. Tests ----- Adds the age_reduce regression test (registered in the install SQL and the upgrade template so age_upgrade passes), covering: arithmetic/product/string folds; order sensitivity; empty/NULL list; NULL element and NULL init; list-building and CASE bodies; boolean and comparison bodies; element property access; multiple and nested (in list/init) reduce(); reduce() in boolean expressions, WHERE, and list comprehensions; folds over collected nodes and node list properties; the not-supported rejections; and reduce as a map key. Following reviewer feedback, three further semantics-coverage gaps are pinned directly so the mechanisms that make the aggregate desugaring correct are exercised by tests rather than only correct by inspection: - A fold body that produces null mid-fold and then recovers: the agtype 'null' running state is a readable value, so a later element folds back out of it (distinct from "null propagates to a null result", which was already covered). - An empty list with a NULL initial value: COALESCE(, init) yields NULL, kept distinct from a body that legitimately folds to agtype 'null', which must not be resurrected to the initial value. - A type error and a runtime division-by-zero error in the body: both abort cleanly out of the standalone per-element evaluator rather than corrupting the running aggregate state. All multi-row results are ordered. 42/42 installcheck pass. Future work ----------- The body restriction (accumulator and element only) is a property of the standalone expression evaluation and can be relaxed without core changes: - Allow loop-invariant outer-variable and cypher $parameter references in the body by capturing them as additional eager aggregate arguments bound to extra param slots. - Support a nested reduce() inside the body via an SPI-based evaluation fallback for subquery-bearing bodies. Aggregates inside the body remain intentionally unsupported, matching the openCypher specification. Co-Authored-By: Claude Opus 4.8 (1M context) modified: Makefile modified: age--1.7.0--y.y.y.sql new file: regress/expected/age_reduce.out new file: regress/sql/age_reduce.sql modified: sql/age_aggregate.sql modified: src/backend/nodes/ag_nodes.c modified: src/backend/nodes/cypher_copyfuncs.c modified: src/backend/nodes/cypher_outfuncs.c modified: src/backend/nodes/cypher_readfuncs.c modified: src/backend/parser/cypher_analyze.c modified: src/backend/parser/cypher_clause.c modified: src/backend/parser/cypher_gram.y modified: src/backend/utils/adt/agtype.c modified: src/include/nodes/ag_nodes.h modified: src/include/nodes/cypher_copyfuncs.h modified: src/include/nodes/cypher_nodes.h modified: src/include/nodes/cypher_outfuncs.h modified: src/include/nodes/cypher_readfuncs.h modified: src/include/parser/cypher_kwlist.h --- Makefile | 1 + age--1.7.0--y.y.y.sql | 22 ++ regress/expected/age_reduce.out | 538 +++++++++++++++++++++++++++ regress/sql/age_reduce.sql | 350 +++++++++++++++++ sql/age_aggregate.sql | 22 ++ src/backend/nodes/ag_nodes.c | 6 +- src/backend/nodes/cypher_copyfuncs.c | 12 + src/backend/nodes/cypher_outfuncs.c | 12 + src/backend/nodes/cypher_readfuncs.c | 14 + src/backend/parser/cypher_analyze.c | 19 + src/backend/parser/cypher_clause.c | 421 +++++++++++++++++++++ src/backend/parser/cypher_gram.y | 92 ++++- src/backend/utils/adt/agtype.c | 165 ++++++++ src/include/nodes/ag_nodes.h | 4 +- src/include/nodes/cypher_copyfuncs.h | 4 + src/include/nodes/cypher_nodes.h | 18 + src/include/nodes/cypher_outfuncs.h | 1 + src/include/nodes/cypher_readfuncs.h | 3 + src/include/parser/cypher_kwlist.h | 1 + 19 files changed, 1701 insertions(+), 4 deletions(-) create mode 100644 regress/expected/age_reduce.out create mode 100644 regress/sql/age_reduce.sql diff --git a/Makefile b/Makefile index b0059213c..21c7f81f2 100644 --- a/Makefile +++ b/Makefile @@ -216,6 +216,7 @@ REGRESS = scan \ list_comprehension \ predicate_functions \ pattern_expression \ + age_reduce \ map_projection \ direct_field_access \ security \ diff --git a/age--1.7.0--y.y.y.sql b/age--1.7.0--y.y.y.sql index b40cde092..ec909d2bb 100644 --- a/age--1.7.0--y.y.y.sql +++ b/age--1.7.0--y.y.y.sql @@ -1100,3 +1100,25 @@ $function$; COMMENT ON FUNCTION ag_catalog.create_subgraph(name, name, text, text) IS 'Materializes a new persistent graph as the induced subgraph of from_graph selected by a Cypher node predicate (on n) and relationship predicate (on r); ''*'' keeps all. An edge is kept only if its predicate holds and both endpoints are kept. Returns (node_count, relationship_count).'; + +-- +-- reduce(acc = init, var IN list | body) fold support +-- +-- Transition function for the age_reduce aggregate. The fold body is compiled +-- by transform_cypher_reduce() with the accumulator and element rewritten to +-- PARAM_EXEC params 0 and 1 and serialized into the text argument; the +-- transition evaluates it for each element in list order. It must be callable +-- with a NULL transition state (no initcond), so it is intentionally not STRICT. +CREATE FUNCTION ag_catalog.age_reduce_transfn(agtype, agtype, text, agtype) + RETURNS agtype + LANGUAGE c +PARALLEL UNSAFE +AS 'MODULE_PATHNAME'; + +-- aggregate definition for reduce(); direct arguments are +-- (init, serialized-body, element), with the element fed ORDER BY ordinality. +CREATE AGGREGATE ag_catalog.age_reduce(agtype, text, agtype) +( + stype = agtype, + sfunc = ag_catalog.age_reduce_transfn +); diff --git a/regress/expected/age_reduce.out b/regress/expected/age_reduce.out new file mode 100644 index 000000000..8a198965a --- /dev/null +++ b/regress/expected/age_reduce.out @@ -0,0 +1,538 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +LOAD 'age'; +SET search_path TO ag_catalog; +SELECT create_graph('reduce'); +NOTICE: graph "reduce" has been created + create_graph +-------------- + +(1 row) + +-- +-- Basic folds +-- +-- sum +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x) +$$) AS (result agtype); + result +-------- + 6 +(1 row) + +-- sum of a longer list +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] | s + x) +$$) AS (result agtype); + result +-------- + 55 +(1 row) + +-- product (factorial) +SELECT * FROM cypher('reduce', $$ + RETURN reduce(p = 1, x IN [1, 2, 3, 4, 5] | p * x) +$$) AS (result agtype); + result +-------- + 120 +(1 row) + +-- non-zero initial accumulator +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 100, x IN [1, 2, 3] | s + x) +$$) AS (result agtype); + result +-------- + 106 +(1 row) + +-- single element +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [42] | s + x) +$$) AS (result agtype); + result +-------- + 42 +(1 row) + +-- +-- List order is significant +-- +-- left-associative subtraction: ((((0-1)-2)-3)-4) = -10 +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3, 4] | s - x) +$$) AS (result agtype); + result +-------- + -10 +(1 row) + +-- forward string concatenation +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = '', x IN ['a', 'b', 'c'] | s + x) +$$) AS (result agtype); + result +-------- + "abc" +(1 row) + +-- reverse string concatenation (element before accumulator) +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = '', x IN ['a', 'b', 'c'] | x + s) +$$) AS (result agtype); + result +-------- + "cba" +(1 row) + +-- +-- Empty and NULL list semantics +-- +-- empty list returns the initial value +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [] | s + x) +$$) AS (result agtype); + result +-------- + 0 +(1 row) + +-- empty list returns the initial value (non-zero) +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 999, x IN [] | s + x) +$$) AS (result agtype); + result +-------- + 999 +(1 row) + +-- NULL list returns NULL +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN null | s + x) +$$) AS (result agtype); + result +-------- + +(1 row) + +-- empty list with a NULL initial value yields NULL: the list is empty (not +-- null) so the fold runs over zero rows, and COALESCE(, init) is +-- COALESCE(NULL, NULL) -> NULL. (Distinct from a NULL *list*, which the outer +-- CASE short-circuits to NULL, and from a non-empty list with a NULL init, +-- which seeds the accumulator with agtype 'null'.) +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = null, x IN [] | s + x) +$$) AS (result agtype); + result +-------- + +(1 row) + +-- +-- NULL handling within the fold +-- +-- a NULL element propagates through arithmetic to NULL +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, null, 3] | s + x) +$$) AS (result agtype); + result +-------- + null +(1 row) + +-- NULL initial value +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = null, x IN [1, 2, 3] | s) +$$) AS (result agtype); + result +-------- + null +(1 row) + +-- a body that always evaluates to null yields null, NOT the initial value: +-- every step stores agtype 'null' as the running state, so the final state is +-- a real agtype 'null' and the empty-list COALESCE(..., init) guard must not +-- resurrect the initial value here (the load-bearing fold-to-null vs empty-list +-- distinction) +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 7, x IN [1, 2, 3] | null) +$$) AS (result agtype); + result +-------- + null +(1 row) + +-- the accumulator legitimately becomes null mid-fold and the body climbs back +-- out of it: element 2 sets the accumulator to null, element 3 produces a fresh +-- non-null value, and element 4 reads that recovered state (999 + 4), proving a +-- null intermediate state does not poison the rest of the fold +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3, 4] | + CASE WHEN x = 2 THEN null + WHEN x = 3 THEN 999 + ELSE s + x END) +$$) AS (result agtype); + result +-------- + 1003 +(1 row) + +-- +-- Errors raised from the fold body propagate cleanly +-- +-- a type error in the body (agtype number + map) aborts the statement rather +-- than corrupting the running aggregate state or crashing the backend +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2] | s + {a: 1}) +$$) AS (result agtype); +ERROR: invalid left operand for agtype concatenation +-- a runtime arithmetic error in the body (division by zero) likewise aborts +-- the fold; the error surfaces from the standalone per-element evaluator +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 1, x IN [1, 0, 2] | s / x) +$$) AS (result agtype); +ERROR: division by zero +-- +-- Building a list with the accumulator +-- +-- collect squares +SELECT * FROM cypher('reduce', $$ + RETURN reduce(acc = [], x IN [1, 2, 3] | acc + [x * x]) +$$) AS (result agtype); + result +----------- + [1, 4, 9] +(1 row) + +-- +-- A conditional body (CASE) +-- +-- sum of even elements only +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3, 4, 5, 6] | CASE WHEN x % 2 = 0 THEN s + x ELSE s END) +$$) AS (result agtype); + result +-------- + 12 +(1 row) + +-- +-- Boolean and comparison fold bodies +-- +-- the body evaluates to a boolean, which is normalized to an agtype boolean +-- (a boolean accumulator is a real Cypher use case for "all"/"any" style folds) +-- logical AND fold: all true? +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = true, x IN [true, true, false] | s AND x) +$$) AS (result agtype); + result +-------- + false +(1 row) + +-- logical OR fold: any true? +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = false, x IN [false, true, false] | s OR x) +$$) AS (result agtype); + result +-------- + true +(1 row) + +-- a comparison body +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = false, x IN [1, 2, 3] | x = 2) +$$) AS (result agtype); + result +-------- + false +(1 row) + +-- "does any element equal 2?" (search fold) +SELECT * FROM cypher('reduce', $$ + RETURN reduce(found = false, x IN [1, 2, 3] | found OR x = 2) +$$) AS (result agtype); + result +-------- + true +(1 row) + +-- "are all elements positive?" (using a comparison inside the fold) +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = true, x IN [1, 2, 3] | s AND x > 0) +$$) AS (result agtype); + result +-------- + true +(1 row) + +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = true, x IN [1, -2, 3] | s AND x > 0) +$$) AS (result agtype); + result +-------- + false +(1 row) + +-- +-- Property access on the element variable +-- +-- sum a field across a list of maps +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [{n: 1}, {n: 2}, {n: 3}] | s + x.n) +$$) AS (result agtype); + result +-------- + 6 +(1 row) + +-- concatenate a string field across a list of maps +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = '', x IN [{w: 'a'}, {w: 'b'}, {w: 'c'}] | s + x.w) +$$) AS (result agtype); + result +-------- + "abc" +(1 row) + +-- +-- Multiple reduce() in one expression +-- +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x) + reduce(p = 1, y IN [2, 3] | p * y) +$$) AS (result agtype); + result +-------- + 12 +(1 row) + +-- +-- reduce() in a boolean expression +-- +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x) > 5 + AND reduce(p = 1, y IN [2, 3] | p * y) < 10 +$$) AS (result agtype); + result +-------- + true +(1 row) + +-- +-- reduce() nested in the list or initial value of another reduce() +-- +-- nesting is allowed in the list and the initial value (both evaluated in the +-- outer context) even though it is rejected inside the fold body. +-- nested reduce() in the list +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [reduce(a = 0, y IN [1, 2, 3] | a + y), 10] | s + x) +$$) AS (result agtype); + result +-------- + 16 +(1 row) + +-- nested reduce() in the initial value +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = reduce(a = 0, y IN [1, 2, 3] | a + y), x IN [10, 20] | s + x) +$$) AS (result agtype); + result +-------- + 36 +(1 row) + +-- +-- reduce() over a correlated (per-row) list +-- +SELECT * FROM cypher('reduce', $$ + UNWIND [[1, 2], [3, 4, 5], []] AS arr + RETURN reduce(s = 0, x IN arr | s + x) AS total + ORDER BY total +$$) AS (result agtype); + result +-------- + 0 + 3 + 12 +(3 rows) + +-- +-- reduce() with the list and initial value bound in an outer clause +-- +SELECT * FROM cypher('reduce', $$ + WITH [10, 20, 30] AS ns + RETURN reduce(t = 0, n IN ns | t + n) +$$) AS (result agtype); + result +-------- + 60 +(1 row) + +-- the initial value may reference an outer variable (correlation is allowed +-- in the init and the list, only not in the body) +SELECT * FROM cypher('reduce', $$ + WITH 5 AS base + RETURN reduce(s = base, x IN [1, 2, 3] | s + x) +$$) AS (result agtype); + result +-------- + 11 +(1 row) + +-- +-- reduce() nested inside a list comprehension +-- +SELECT * FROM cypher('reduce', $$ + RETURN [v IN [1, 2, 3] | reduce(s = 0, x IN [v, v, v] | s + x)] +$$) AS (result agtype); + result +----------- + [3, 6, 9] +(1 row) + +-- +-- reduce() in a WHERE clause +-- +SELECT * FROM cypher('reduce', $$ + UNWIND [[1, 2, 3], [1, 1], [10]] AS l + WITH l WHERE reduce(s = 0, x IN l | s + x) > 3 + RETURN l + ORDER BY l +$$) AS (result agtype); + result +----------- + [1, 2, 3] + [10] +(2 rows) + +-- +-- reduce() over graph data (the canonical Cypher example) +-- +SELECT * FROM cypher('reduce', $$ + CREATE (:person {name: 'Alice', age: 38}), + (:person {name: 'Bob', age: 25}), + (:person {name: 'Daniel', age: 54}) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- sum the ages of all person nodes +SELECT * FROM cypher('reduce', $$ + MATCH (p:person) + WITH collect(p) AS people + RETURN reduce(total = 0, n IN people | total + n.age) +$$) AS (result agtype); + result +-------- + 117 +(1 row) + +-- +-- reduce() over a graph node's list property +-- +SELECT * FROM cypher('reduce', $$ + CREATE (:bag {name: 'low', vals: [1, 2, 3]}), + (:bag {name: 'mid', vals: [5, 5, 5]}), + (:bag {name: 'high', vals: [10, 20, 30]}) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- filter nodes by a reduce() over their list property +SELECT * FROM cypher('reduce', $$ + MATCH (u:bag) WHERE reduce(s = 0, x IN u.vals | s + x) > 10 + RETURN u.name + ORDER BY u.name +$$) AS (result agtype); + result +-------- + "high" + "mid" +(2 rows) + +-- compute a reduce() value per node and order by it +SELECT * FROM cypher('reduce', $$ + MATCH (u:bag) + RETURN u.name AS name, reduce(s = 0, x IN u.vals | s + x) AS total + ORDER BY total +$$) AS (name agtype, total agtype); + name | total +--------+------- + "low" | 6 + "mid" | 15 + "high" | 60 +(3 rows) + +-- +-- Not-yet-supported constructs raise a clean feature error +-- +-- an outer variable referenced in the body +SELECT * FROM cypher('reduce', $$ + WITH 5 AS w + RETURN reduce(s = 0, x IN [1, 2] | s + x + w) +$$) AS (result agtype); +ERROR: a reduce() expression may only reference its accumulator and element variables +LINE 1: SELECT * FROM cypher('reduce', $$ + ^ +-- a nested reduce() in the body +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2] | s + reduce(t = 0, y IN [x] | t + y)) +$$) AS (result agtype); +ERROR: subqueries (including a nested reduce()) are not supported in a reduce() expression +LINE 1: SELECT * FROM cypher('reduce', $$ + ^ +-- an aggregate function in the body +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2] | s + count(x)) +$$) AS (result agtype); +ERROR: aggregate functions are not supported in a reduce() expression +LINE 1: SELECT * FROM cypher('reduce', $$ + ^ +-- +-- "reduce" as a property key name (safe_keywords backward compatibility): +-- because reduce() introduced a reserved keyword, confirm the word is still +-- usable as a map key, the same way any/none/single are. +-- +SELECT * FROM cypher('reduce', $$ + RETURN {reduce: 1, any: 2, none: 3} +$$) AS (result agtype); + result +------------------------------------ + {"any": 2, "none": 3, "reduce": 1} +(1 row) + +-- +-- Cleanup +-- +SELECT * FROM drop_graph('reduce', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table reduce._ag_label_vertex +drop cascades to table reduce._ag_label_edge +drop cascades to table reduce.person +drop cascades to table reduce.bag +NOTICE: graph "reduce" has been dropped + drop_graph +------------ + +(1 row) + diff --git a/regress/sql/age_reduce.sql b/regress/sql/age_reduce.sql new file mode 100644 index 000000000..cf1261010 --- /dev/null +++ b/regress/sql/age_reduce.sql @@ -0,0 +1,350 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +LOAD 'age'; +SET search_path TO ag_catalog; + +SELECT create_graph('reduce'); + +-- +-- Basic folds +-- +-- sum +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x) +$$) AS (result agtype); + +-- sum of a longer list +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] | s + x) +$$) AS (result agtype); + +-- product (factorial) +SELECT * FROM cypher('reduce', $$ + RETURN reduce(p = 1, x IN [1, 2, 3, 4, 5] | p * x) +$$) AS (result agtype); + +-- non-zero initial accumulator +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 100, x IN [1, 2, 3] | s + x) +$$) AS (result agtype); + +-- single element +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [42] | s + x) +$$) AS (result agtype); + +-- +-- List order is significant +-- +-- left-associative subtraction: ((((0-1)-2)-3)-4) = -10 +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3, 4] | s - x) +$$) AS (result agtype); + +-- forward string concatenation +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = '', x IN ['a', 'b', 'c'] | s + x) +$$) AS (result agtype); + +-- reverse string concatenation (element before accumulator) +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = '', x IN ['a', 'b', 'c'] | x + s) +$$) AS (result agtype); + +-- +-- Empty and NULL list semantics +-- +-- empty list returns the initial value +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [] | s + x) +$$) AS (result agtype); + +-- empty list returns the initial value (non-zero) +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 999, x IN [] | s + x) +$$) AS (result agtype); + +-- NULL list returns NULL +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN null | s + x) +$$) AS (result agtype); + +-- empty list with a NULL initial value yields NULL: the list is empty (not +-- null) so the fold runs over zero rows, and COALESCE(, init) is +-- COALESCE(NULL, NULL) -> NULL. (Distinct from a NULL *list*, which the outer +-- CASE short-circuits to NULL, and from a non-empty list with a NULL init, +-- which seeds the accumulator with agtype 'null'.) +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = null, x IN [] | s + x) +$$) AS (result agtype); + +-- +-- NULL handling within the fold +-- +-- a NULL element propagates through arithmetic to NULL +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, null, 3] | s + x) +$$) AS (result agtype); + +-- NULL initial value +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = null, x IN [1, 2, 3] | s) +$$) AS (result agtype); + +-- a body that always evaluates to null yields null, NOT the initial value: +-- every step stores agtype 'null' as the running state, so the final state is +-- a real agtype 'null' and the empty-list COALESCE(..., init) guard must not +-- resurrect the initial value here (the load-bearing fold-to-null vs empty-list +-- distinction) +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 7, x IN [1, 2, 3] | null) +$$) AS (result agtype); + +-- the accumulator legitimately becomes null mid-fold and the body climbs back +-- out of it: element 2 sets the accumulator to null, element 3 produces a fresh +-- non-null value, and element 4 reads that recovered state (999 + 4), proving a +-- null intermediate state does not poison the rest of the fold +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3, 4] | + CASE WHEN x = 2 THEN null + WHEN x = 3 THEN 999 + ELSE s + x END) +$$) AS (result agtype); + +-- +-- Errors raised from the fold body propagate cleanly +-- +-- a type error in the body (agtype number + map) aborts the statement rather +-- than corrupting the running aggregate state or crashing the backend +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2] | s + {a: 1}) +$$) AS (result agtype); + +-- a runtime arithmetic error in the body (division by zero) likewise aborts +-- the fold; the error surfaces from the standalone per-element evaluator +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 1, x IN [1, 0, 2] | s / x) +$$) AS (result agtype); + +-- +-- Building a list with the accumulator +-- +-- collect squares +SELECT * FROM cypher('reduce', $$ + RETURN reduce(acc = [], x IN [1, 2, 3] | acc + [x * x]) +$$) AS (result agtype); + +-- +-- A conditional body (CASE) +-- +-- sum of even elements only +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3, 4, 5, 6] | CASE WHEN x % 2 = 0 THEN s + x ELSE s END) +$$) AS (result agtype); + +-- +-- Boolean and comparison fold bodies +-- +-- the body evaluates to a boolean, which is normalized to an agtype boolean +-- (a boolean accumulator is a real Cypher use case for "all"/"any" style folds) +-- logical AND fold: all true? +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = true, x IN [true, true, false] | s AND x) +$$) AS (result agtype); + +-- logical OR fold: any true? +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = false, x IN [false, true, false] | s OR x) +$$) AS (result agtype); + +-- a comparison body +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = false, x IN [1, 2, 3] | x = 2) +$$) AS (result agtype); + +-- "does any element equal 2?" (search fold) +SELECT * FROM cypher('reduce', $$ + RETURN reduce(found = false, x IN [1, 2, 3] | found OR x = 2) +$$) AS (result agtype); + +-- "are all elements positive?" (using a comparison inside the fold) +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = true, x IN [1, 2, 3] | s AND x > 0) +$$) AS (result agtype); + +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = true, x IN [1, -2, 3] | s AND x > 0) +$$) AS (result agtype); + +-- +-- Property access on the element variable +-- +-- sum a field across a list of maps +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [{n: 1}, {n: 2}, {n: 3}] | s + x.n) +$$) AS (result agtype); + +-- concatenate a string field across a list of maps +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = '', x IN [{w: 'a'}, {w: 'b'}, {w: 'c'}] | s + x.w) +$$) AS (result agtype); + +-- +-- Multiple reduce() in one expression +-- +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x) + reduce(p = 1, y IN [2, 3] | p * y) +$$) AS (result agtype); + +-- +-- reduce() in a boolean expression +-- +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x) > 5 + AND reduce(p = 1, y IN [2, 3] | p * y) < 10 +$$) AS (result agtype); + +-- +-- reduce() nested in the list or initial value of another reduce() +-- +-- nesting is allowed in the list and the initial value (both evaluated in the +-- outer context) even though it is rejected inside the fold body. +-- nested reduce() in the list +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [reduce(a = 0, y IN [1, 2, 3] | a + y), 10] | s + x) +$$) AS (result agtype); + +-- nested reduce() in the initial value +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = reduce(a = 0, y IN [1, 2, 3] | a + y), x IN [10, 20] | s + x) +$$) AS (result agtype); + +-- +-- reduce() over a correlated (per-row) list +-- +SELECT * FROM cypher('reduce', $$ + UNWIND [[1, 2], [3, 4, 5], []] AS arr + RETURN reduce(s = 0, x IN arr | s + x) AS total + ORDER BY total +$$) AS (result agtype); + +-- +-- reduce() with the list and initial value bound in an outer clause +-- +SELECT * FROM cypher('reduce', $$ + WITH [10, 20, 30] AS ns + RETURN reduce(t = 0, n IN ns | t + n) +$$) AS (result agtype); + +-- the initial value may reference an outer variable (correlation is allowed +-- in the init and the list, only not in the body) +SELECT * FROM cypher('reduce', $$ + WITH 5 AS base + RETURN reduce(s = base, x IN [1, 2, 3] | s + x) +$$) AS (result agtype); + +-- +-- reduce() nested inside a list comprehension +-- +SELECT * FROM cypher('reduce', $$ + RETURN [v IN [1, 2, 3] | reduce(s = 0, x IN [v, v, v] | s + x)] +$$) AS (result agtype); + +-- +-- reduce() in a WHERE clause +-- +SELECT * FROM cypher('reduce', $$ + UNWIND [[1, 2, 3], [1, 1], [10]] AS l + WITH l WHERE reduce(s = 0, x IN l | s + x) > 3 + RETURN l + ORDER BY l +$$) AS (result agtype); + +-- +-- reduce() over graph data (the canonical Cypher example) +-- +SELECT * FROM cypher('reduce', $$ + CREATE (:person {name: 'Alice', age: 38}), + (:person {name: 'Bob', age: 25}), + (:person {name: 'Daniel', age: 54}) +$$) AS (result agtype); + +-- sum the ages of all person nodes +SELECT * FROM cypher('reduce', $$ + MATCH (p:person) + WITH collect(p) AS people + RETURN reduce(total = 0, n IN people | total + n.age) +$$) AS (result agtype); + +-- +-- reduce() over a graph node's list property +-- +SELECT * FROM cypher('reduce', $$ + CREATE (:bag {name: 'low', vals: [1, 2, 3]}), + (:bag {name: 'mid', vals: [5, 5, 5]}), + (:bag {name: 'high', vals: [10, 20, 30]}) +$$) AS (result agtype); + +-- filter nodes by a reduce() over their list property +SELECT * FROM cypher('reduce', $$ + MATCH (u:bag) WHERE reduce(s = 0, x IN u.vals | s + x) > 10 + RETURN u.name + ORDER BY u.name +$$) AS (result agtype); + +-- compute a reduce() value per node and order by it +SELECT * FROM cypher('reduce', $$ + MATCH (u:bag) + RETURN u.name AS name, reduce(s = 0, x IN u.vals | s + x) AS total + ORDER BY total +$$) AS (name agtype, total agtype); + +-- +-- Not-yet-supported constructs raise a clean feature error +-- +-- an outer variable referenced in the body +SELECT * FROM cypher('reduce', $$ + WITH 5 AS w + RETURN reduce(s = 0, x IN [1, 2] | s + x + w) +$$) AS (result agtype); + +-- a nested reduce() in the body +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2] | s + reduce(t = 0, y IN [x] | t + y)) +$$) AS (result agtype); + +-- an aggregate function in the body +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2] | s + count(x)) +$$) AS (result agtype); + +-- +-- "reduce" as a property key name (safe_keywords backward compatibility): +-- because reduce() introduced a reserved keyword, confirm the word is still +-- usable as a map key, the same way any/none/single are. +-- +SELECT * FROM cypher('reduce', $$ + RETURN {reduce: 1, any: 2, none: 3} +$$) AS (result agtype); + +-- +-- Cleanup +-- +SELECT * FROM drop_graph('reduce', true); diff --git a/sql/age_aggregate.sql b/sql/age_aggregate.sql index a8ea425cc..fb258e5c5 100644 --- a/sql/age_aggregate.sql +++ b/sql/age_aggregate.sql @@ -216,3 +216,25 @@ CREATE AGGREGATE ag_catalog.age_collect(variadic "any") finalfunc = ag_catalog.age_collect_aggfinalfn, parallel = safe ); + +-- +-- reduce(acc = init, var IN list | body) fold support +-- +-- Transition function for the age_reduce aggregate. The fold body is compiled +-- by transform_cypher_reduce() with the accumulator and element rewritten to +-- PARAM_EXEC params 0 and 1 and serialized into the text argument; the +-- transition evaluates it for each element in list order. It must be callable +-- with a NULL transition state (no initcond), so it is intentionally not STRICT. +CREATE FUNCTION ag_catalog.age_reduce_transfn(agtype, agtype, text, agtype) + RETURNS agtype + LANGUAGE c +PARALLEL UNSAFE +AS 'MODULE_PATHNAME'; + +-- aggregate definition for reduce(); direct arguments are +-- (init, serialized-body, element), with the element fed ORDER BY ordinality. +CREATE AGGREGATE ag_catalog.age_reduce(agtype, text, agtype) +( + stype = agtype, + sfunc = ag_catalog.age_reduce_transfn +); diff --git a/src/backend/nodes/ag_nodes.c b/src/backend/nodes/ag_nodes.c index bd78549ca..ac659e1b6 100644 --- a/src/backend/nodes/ag_nodes.c +++ b/src/backend/nodes/ag_nodes.c @@ -65,7 +65,8 @@ const char *node_names[] = { "cypher_delete_information", "cypher_delete_item", "cypher_merge_information", - "cypher_predicate_function" + "cypher_predicate_function", + "cypher_reduce" }; /* @@ -134,7 +135,8 @@ const ExtensibleNodeMethods node_methods[] = { DEFINE_NODE_METHODS_EXTENDED(cypher_delete_information), DEFINE_NODE_METHODS_EXTENDED(cypher_delete_item), DEFINE_NODE_METHODS_EXTENDED(cypher_merge_information), - DEFINE_NODE_METHODS_EXTENDED(cypher_predicate_function) + DEFINE_NODE_METHODS_EXTENDED(cypher_predicate_function), + DEFINE_NODE_METHODS_EXTENDED(cypher_reduce) }; static bool equal_ag_node(const ExtensibleNode *a, const ExtensibleNode *b) diff --git a/src/backend/nodes/cypher_copyfuncs.c b/src/backend/nodes/cypher_copyfuncs.c index 283096ca7..549218759 100644 --- a/src/backend/nodes/cypher_copyfuncs.c +++ b/src/backend/nodes/cypher_copyfuncs.c @@ -185,3 +185,15 @@ void copy_cypher_predicate_function(ExtensibleNode *newnode, COPY_NODE_FIELD(expr); COPY_NODE_FIELD(where); } + +/* copy function for cypher_reduce */ +void copy_cypher_reduce(ExtensibleNode *newnode, const ExtensibleNode *from) +{ + COPY_LOCALS(cypher_reduce); + + COPY_STRING_FIELD(acc_varname); + COPY_NODE_FIELD(init_expr); + COPY_STRING_FIELD(elem_varname); + COPY_NODE_FIELD(list_expr); + COPY_NODE_FIELD(body_expr); +} diff --git a/src/backend/nodes/cypher_outfuncs.c b/src/backend/nodes/cypher_outfuncs.c index 84d32a8f8..4a35be02f 100644 --- a/src/backend/nodes/cypher_outfuncs.c +++ b/src/backend/nodes/cypher_outfuncs.c @@ -200,6 +200,18 @@ void out_cypher_predicate_function(StringInfo str, const ExtensibleNode *node) WRITE_NODE_FIELD(where); } +/* serialization function for the cypher_reduce ExtensibleNode. */ +void out_cypher_reduce(StringInfo str, const ExtensibleNode *node) +{ + DEFINE_AG_NODE(cypher_reduce); + + WRITE_STRING_FIELD(acc_varname); + WRITE_NODE_FIELD(init_expr); + WRITE_STRING_FIELD(elem_varname); + WRITE_NODE_FIELD(list_expr); + WRITE_NODE_FIELD(body_expr); +} + /* serialization function for the cypher_merge ExtensibleNode. */ void out_cypher_merge(StringInfo str, const ExtensibleNode *node) { diff --git a/src/backend/nodes/cypher_readfuncs.c b/src/backend/nodes/cypher_readfuncs.c index 1e7e0ef82..a9a2ffabd 100644 --- a/src/backend/nodes/cypher_readfuncs.c +++ b/src/backend/nodes/cypher_readfuncs.c @@ -329,3 +329,17 @@ void read_cypher_predicate_function(struct ExtensibleNode *node) READ_NODE_FIELD(expr); READ_NODE_FIELD(where); } + +/* + * Deserialize a string representing the cypher_reduce data structure. + */ +void read_cypher_reduce(struct ExtensibleNode *node) +{ + READ_LOCALS(cypher_reduce); + + READ_STRING_FIELD(acc_varname); + READ_NODE_FIELD(init_expr); + READ_STRING_FIELD(elem_varname); + READ_NODE_FIELD(list_expr); + READ_NODE_FIELD(body_expr); +} diff --git a/src/backend/parser/cypher_analyze.c b/src/backend/parser/cypher_analyze.c index 5b72f332e..5dd53dcd0 100644 --- a/src/backend/parser/cypher_analyze.c +++ b/src/backend/parser/cypher_analyze.c @@ -865,6 +865,25 @@ bool cypher_raw_expr_tree_walker_impl(Node *node, return true; } } + else if (is_ag_node(node, cypher_reduce)) + { + cypher_reduce *rd = (cypher_reduce *)node; + + if (WALK(rd->init_expr)) + { + return true; + } + + if (WALK(rd->list_expr)) + { + return true; + } + + if (WALK(rd->body_expr)) + { + return true; + } + } /* Add more node types here as needed */ else { diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 5ac9dea65..a3a1a5044 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -50,6 +50,7 @@ #include "parser/cypher_expr.h" #include "parser/cypher_item.h" #include "parser/cypher_parse_agg.h" +#include "utils/agtype.h" #include "parser/cypher_transform_entity.h" #include "utils/ag_cache.h" #include "utils/ag_func.h" @@ -315,6 +316,10 @@ static Query *transform_cypher_list_comprehension(cypher_parsestate *cpstate, static Query *transform_cypher_predicate_function(cypher_parsestate *cpstate, cypher_clause *clause); +/* reduce */ +static Query *transform_cypher_reduce(cypher_parsestate *cpstate, + cypher_clause *clause); + /* merge */ static Query *transform_cypher_merge(cypher_parsestate *cpstate, cypher_clause *clause); @@ -579,6 +584,10 @@ Query *transform_cypher_clause(cypher_parsestate *cpstate, { result = transform_cypher_predicate_function(cpstate, clause); } + else if (is_ag_node(self, cypher_reduce)) + { + result = transform_cypher_reduce(cpstate, clause); + } else { ereport(ERROR, (errmsg_internal("unexpected Node for cypher_clause"))); @@ -2016,6 +2025,418 @@ static Query *transform_cypher_predicate_function(cypher_parsestate *cpstate, } } +/* + * Mutator context for rewriting the fold body's accumulator/element Vars + * (columns 1 and 2 of the throwaway namespace RTE) into PARAM_EXEC params. + */ +typedef struct reduce_var_param_context +{ + int varno; /* rangetable index of the dummy (acc, elem) RTE */ +} reduce_var_param_context; + +/* + * Rewrite Var(varno, 1) -> Param(PARAM_EXEC, 0) [accumulator] and + * Var(varno, 2) -> Param(PARAM_EXEC, 1) [element] in the transformed fold + * body, so the body can be evaluated standalone inside age_reduce_transfn + * with the two params rebound for every element. + */ +static Node *reduce_var_to_param_mutator(Node *node, void *context) +{ + reduce_var_param_context *ctx = (reduce_var_param_context *) context; + + if (node == NULL) + { + return NULL; + } + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + /* + * Only the dummy (acc, elem) RTE at this level is rewritten. The + * varlevelsup == 0 check is essential: an outer-query RTE can share + * the same varno (each parse state's range table is numbered from 1), + * so without it a correlated outer reference at attno 1/2 would be + * silently rewritten into the accumulator/element param. Outer Vars + * are instead left in place and rejected by reduce_body_check_walker. + */ + if (var->varno == ctx->varno && var->varlevelsup == 0 && + (var->varattno == 1 || var->varattno == 2)) + { + Param *param = makeNode(Param); + + param->paramkind = PARAM_EXEC; + param->paramid = var->varattno - 1; + param->paramtype = AGTYPEOID; + param->paramtypmod = -1; + param->paramcollid = InvalidOid; + param->location = -1; + + return (Node *) param; + } + } + + return expression_tree_mutator(node, reduce_var_to_param_mutator, context); +} + +/* + * Build a throwaway subquery "SELECT NULL::agtype AS , NULL::agtype AS + * " used only to give the fold body a namespace in which the accumulator + * and element variables resolve to agtype columns. Those references are later + * rewritten to PARAM_EXEC params and the subquery is discarded. + */ +static Query *make_reduce_var_subquery(char *acc_name, char *elem_name) +{ + Query *subquery = makeNode(Query); + Const *acc_const; + Const *elem_const; + TargetEntry *acc_te; + TargetEntry *elem_te; + + acc_const = makeConst(AGTYPEOID, -1, InvalidOid, -1, (Datum) 0, true, false); + elem_const = makeConst(AGTYPEOID, -1, InvalidOid, -1, (Datum) 0, true, false); + + acc_te = makeTargetEntry((Expr *) acc_const, 1, acc_name, false); + elem_te = makeTargetEntry((Expr *) elem_const, 2, elem_name, false); + + subquery->commandType = CMD_SELECT; + subquery->targetList = list_make2(acc_te, elem_te); + subquery->jointree = makeFromExpr(NIL, NULL); + subquery->rtable = NIL; + subquery->rteperminfos = NIL; + + return subquery; +} + +/* + * Validate a transformed-and-mutated reduce() fold body. After + * reduce_var_to_param_mutator() has replaced the accumulator and element with + * PARAM_EXEC params 0 and 1, a valid body is a pure expression over those two + * params: it must contain no other Vars (outer-query references), no other + * params, and no aggregates or subqueries, because the body is evaluated + * standalone (ExecEvalExpr) inside age_reduce_transfn with only those two + * param slots bound. + */ +static bool reduce_body_check_walker(Node *node, void *context) +{ + if (node == NULL) + { + return false; + } + + if (IsA(node, Var)) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("a reduce() expression may only reference its accumulator and element variables"))); + } + + if (IsA(node, Param)) + { + Param *param = (Param *) node; + + if (param->paramkind != PARAM_EXEC || + (param->paramid != 0 && param->paramid != 1)) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("a reduce() expression may not reference query parameters"))); + } + } + + if (IsA(node, Aggref) || IsA(node, GroupingFunc) || IsA(node, WindowFunc)) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("aggregate functions are not supported in a reduce() expression"))); + } + + if (IsA(node, SubLink)) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("subqueries (including a nested reduce()) are not supported in a reduce() expression"))); + } + + return expression_tree_walker(node, reduce_body_check_walker, context); +} + +/* + * Transform a cypher_reduce node into a query tree. + * + * reduce(acc = init, var IN list | body) is rewritten into a scalar subquery + * over the age_reduce aggregate, with the list unnested WITH ORDINALITY and the + * aggregate ordered by that ordinality so the fold runs in list order: + * + * SELECT ag_catalog.age_reduce(, ''::text, + * r.elem ORDER BY r.ord) + * FROM unnest() WITH ORDINALITY AS r(elem, ord) + * + * The fold body is transformed separately with the accumulator and element + * rewritten to PARAM_EXEC params 0 and 1, serialized into the text argument, + * and evaluated per element inside age_reduce_transfn. + * + * The null/empty-list guard + * (CASE WHEN list IS NULL THEN NULL ELSE COALESCE(, init) END) is built + * at the grammar level in build_reduce_node(). + */ +static Query *transform_cypher_reduce(cypher_parsestate *cpstate, + cypher_clause *clause) +{ + cypher_reduce *reduce = (cypher_reduce *) clause->self; + Query *query; + Query *var_subquery; + cypher_parsestate *body_cpstate; + ParseState *body_pstate; + ParseNamespaceItem *body_pnsi; + Node *body_node; + char *body_serialized; + reduce_var_param_context mutator_ctx; + cypher_parsestate *child_cpstate; + ParseState *child_pstate; + FuncCall *unnest_fc; + RangeFunction *rf; + RangeTblEntry *rte = NULL; + int rtindex = 0; + List *namespace = NULL; + Node *from_item; + Node *init_node; + Node *elem_var; + Var *ord_var; + TargetEntry *ord_te; + SortGroupClause *sortcl; + Oid sort_ltop; + Oid sort_eqop; + bool sort_hashable; + Const *body_const; + Aggref *agg; + Oid agg_oid; + Oid agg_argtypes[3]; + TargetEntry *result_te; + + /* + * 1. Resolve the fold body's accumulator and element variables against a + * throwaway 2-column agtype subquery, rewrite those Vars to PARAM_EXEC + * params, validate it is a pure expression over those params, and + * serialize the body for age_reduce_transfn. + */ + body_cpstate = make_cypher_parsestate(cpstate); + body_pstate = (ParseState *) body_cpstate; + + var_subquery = make_reduce_var_subquery(reduce->acc_varname, + reduce->elem_varname); + body_pnsi = addRangeTableEntryForSubquery(body_pstate, var_subquery, + makeAlias("reduce_vars", NIL), + false, true); + addNSItemToQuery(body_pstate, body_pnsi, false, true, true); + + body_node = transform_cypher_expr(body_cpstate, reduce->body_expr, + EXPR_KIND_SELECT_TARGET); + + /* + * The accumulator is always an agtype value (the aggregate's stype is + * agtype). A fold body can legitimately produce a non-agtype scalar -- for + * example "s AND x" or "x = 2" yield a boolean -- so normalize the body to + * agtype here. Without this the transition function would treat a by-value + * Datum (e.g. bool) as a by-reference varlena and crash. A boolean is + * wrapped in ag_catalog.bool_to_agtype() (AGE registers no implicit + * boolean-to-agtype cast); any other non-agtype type is coerced through + * the normal cast machinery, which raises a clean error if impossible. + */ + if (exprType(body_node) != AGTYPEOID) + { + if (exprType(body_node) == BOOLOID) + { + Oid bool_to_agtype_oid = get_ag_func_oid("bool_to_agtype", 1, + BOOLOID); + + body_node = (Node *) makeFuncExpr(bool_to_agtype_oid, AGTYPEOID, + list_make1(body_node), + InvalidOid, InvalidOid, + COERCE_EXPLICIT_CALL); + } + else + { + body_node = coerce_to_common_type(body_pstate, body_node, + AGTYPEOID, "reduce"); + } + } + + mutator_ctx.varno = body_pnsi->p_rtindex; + body_node = reduce_var_to_param_mutator(body_node, &mutator_ctx); + + reduce_body_check_walker(body_node, NULL); + + body_serialized = nodeToString(body_node); + + free_cypher_parsestate(body_cpstate); + + /* + * 2. Build the outer aggregate query: + * SELECT age_reduce(, ''::text, r.elem ORDER BY r.ord) + * FROM unnest() WITH ORDINALITY AS r(elem, ord) + */ + query = makeNode(Query); + query->commandType = CMD_SELECT; + + child_cpstate = make_cypher_parsestate(cpstate); + child_pstate = (ParseState *) child_cpstate; + + unnest_fc = makeFuncCall(list_make1(makeString("unnest")), + list_make1(reduce->list_expr), + COERCE_SQL_SYNTAX, -1); + rf = makeNode(RangeFunction); + rf->lateral = false; + rf->ordinality = true; + rf->is_rowsfrom = false; + rf->functions = list_make1(list_make2((Node *) unnest_fc, NIL)); + rf->alias = makeAlias("reduce_src", + list_make2(makeString(reduce->elem_varname), + makeString("reduce_ordinality"))); + rf->coldeflist = NIL; + + from_item = transform_from_clause_item(child_cpstate, (Node *) rf, + &rte, &rtindex, &namespace); + checkNameSpaceConflicts(child_pstate, child_pstate->p_namespace, namespace); + child_pstate->p_joinlist = lappend(child_pstate->p_joinlist, from_item); + child_pstate->p_namespace = list_concat(child_pstate->p_namespace, + namespace); + setNamespaceLateralState(child_pstate->p_namespace, false, true); + + /* arguments to age_reduce: init, serialized body text, element column */ + init_node = transform_cypher_expr(child_cpstate, reduce->init_expr, + EXPR_KIND_SELECT_TARGET); + elem_var = colNameToVar(child_pstate, reduce->elem_varname, false, -1); + body_const = makeConst(TEXTOID, -1, InvalidOid, -1, + CStringGetTextDatum(body_serialized), false, false); + + /* the WITH ORDINALITY column (bigint), used only to order the fold */ + ord_var = makeVar(rtindex, 2, INT8OID, -1, InvalidOid, 0); + get_sort_group_operators(INT8OID, true, true, false, + &sort_ltop, &sort_eqop, NULL, &sort_hashable); + + ord_te = makeTargetEntry((Expr *) ord_var, 4, NULL, true); + ord_te->ressortgroupref = 1; + + sortcl = makeNode(SortGroupClause); + sortcl->tleSortGroupRef = 1; + sortcl->eqop = sort_eqop; + sortcl->sortop = sort_ltop; + sortcl->reverse_sort = false; + sortcl->nulls_first = false; + sortcl->hashable = sort_hashable; + + /* + * Evaluate exactly once per reduce() instead of once per element. + * A regular aggregate argument is evaluated by the executor for every + * input row, but age_reduce_transfn only reads the init argument on the + * first transition (when the running state is still NULL). Re-evaluating + * an expensive init wastes work, and a volatile init would fire its side + * effects once per element. + * + * Rows are fed to the aggregate in ascending ordinality order, so the + * first transition is always the row with ordinality 1. Wrapping init in + * CASE WHEN reduce_ordinality = 1 THEN ELSE NULL::agtype END + * computes on exactly that row (CASE only evaluates the matching + * branch's result) and passes a NULL init -- which the transition + * function ignores -- on every other row. The empty-list case is handled + * separately by the COALESCE(..., init) guard in build_reduce_node(). + */ + { + OpExpr *ord_is_first; + Const *one_const; + CaseWhen *init_when; + CaseExpr *init_case; + Const *null_init; + + one_const = makeConst(INT8OID, -1, InvalidOid, sizeof(int64), + Int64GetDatum(1), false, true); + + ord_is_first = makeNode(OpExpr); + ord_is_first->opno = sort_eqop; /* int8 equality */ + ord_is_first->opfuncid = get_opcode(sort_eqop); + ord_is_first->opresulttype = BOOLOID; + ord_is_first->opretset = false; + ord_is_first->opcollid = InvalidOid; + ord_is_first->inputcollid = InvalidOid; + ord_is_first->args = list_make2(copyObject(ord_var), one_const); + ord_is_first->location = -1; + + null_init = makeConst(AGTYPEOID, -1, InvalidOid, -1, (Datum) 0, + true, false); + + init_when = makeNode(CaseWhen); + init_when->expr = (Expr *) ord_is_first; + init_when->result = (Expr *) init_node; + init_when->location = -1; + + init_case = makeNode(CaseExpr); + init_case->casetype = AGTYPEOID; + init_case->casecollid = InvalidOid; + init_case->arg = NULL; + init_case->args = list_make1(init_when); + init_case->defresult = (Expr *) null_init; + init_case->location = -1; + + init_node = (Node *) init_case; + } + + /* look up the age_reduce(agtype, text, agtype) aggregate */ + agg_argtypes[0] = AGTYPEOID; + agg_argtypes[1] = TEXTOID; + agg_argtypes[2] = AGTYPEOID; + agg_oid = LookupFuncName(list_make2(makeString("ag_catalog"), + makeString("age_reduce")), + 3, agg_argtypes, false); + + agg = makeNode(Aggref); + agg->aggfnoid = agg_oid; + agg->aggtype = AGTYPEOID; + agg->aggcollid = InvalidOid; + agg->inputcollid = InvalidOid; + agg->aggtranstype = InvalidOid; /* filled by the planner */ + agg->aggargtypes = list_make3_oid(AGTYPEOID, TEXTOID, AGTYPEOID); + agg->aggdirectargs = NIL; + agg->args = list_make4(makeTargetEntry((Expr *) init_node, 1, NULL, false), + makeTargetEntry((Expr *) body_const, 2, NULL, false), + makeTargetEntry((Expr *) elem_var, 3, NULL, false), + ord_te); + agg->aggorder = list_make1(sortcl); + agg->aggdistinct = NIL; + agg->aggfilter = NULL; + agg->aggstar = false; + agg->aggvariadic = false; + agg->aggkind = AGGKIND_NORMAL; + agg->aggpresorted = false; + agg->agglevelsup = 0; + agg->aggsplit = AGGSPLIT_SIMPLE; + agg->aggno = -1; + agg->aggtransno = -1; + agg->location = -1; + + child_pstate->p_hasAggs = true; + + result_te = makeTargetEntry((Expr *) agg, + (AttrNumber) child_pstate->p_next_resno++, + "reduce", false); + + query->targetList = list_make1(result_te); + query->jointree = makeFromExpr(child_pstate->p_joinlist, NULL); + query->rtable = child_pstate->p_rtable; + query->rteperminfos = child_pstate->p_rteperminfos; + query->hasAggs = true; + query->hasSubLinks = child_pstate->p_hasSubLinks; + query->hasTargetSRFs = child_pstate->p_hasTargetSRFs; + + assign_query_collations(child_pstate, query); + parse_check_aggregates(child_pstate, query); + + free_cypher_parsestate(child_cpstate); + + return query; +} + /* * Iterate through the list of items to delete and extract the variable name. * Then find the resno that the variable name belongs to. diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index b23fa705a..83d69c83b 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -131,7 +131,7 @@ MATCH MERGE NONE NOT NULL_P ON OPERATOR OPTIONAL OR ORDER - REMOVE RETURN + REDUCE REMOVE RETURN SET SINGLE SKIP STARTS THEN TRUE_P UNION UNWIND @@ -332,6 +332,11 @@ static Node *build_predicate_function_node(cypher_predicate_function_kind kind, /* pattern expression helper */ static Node *make_exists_pattern_sublink(Node *pattern, int location); +/* reduce(acc = init, var IN list | body) */ +static Node *build_reduce_node(char *acc_varname, Node *init_expr, + char *elem_varname, Node *list_expr, + Node *body_expr, int location); + /* helper functions */ static ExplainStmt *make_explain_stmt(List *options); static void validate_return_item_aliases(List *items, ag_scanner_t scanner); @@ -1963,6 +1968,10 @@ expr_func_subexpr: { $$ = build_predicate_function_node(CPFK_SINGLE, $3, $5, $7, @1); } + | REDUCE '(' var_name '=' expr ',' var_name IN expr '|' expr ')' + { + $$ = build_reduce_node($3, $5, $7, $9, $11, @1); + } ; expr_subquery: @@ -2561,6 +2570,7 @@ safe_keywords: | OPTIONAL { $$ = KEYWORD_STRDUP($1); } | OR { $$ = KEYWORD_STRDUP($1); } | ORDER { $$ = KEYWORD_STRDUP($1); } + | REDUCE { $$ = KEYWORD_STRDUP($1); } | REMOVE { $$ = KEYWORD_STRDUP($1); } | RETURN { $$ = KEYWORD_STRDUP($1); } | SET { $$ = KEYWORD_STRDUP($1); } @@ -3646,6 +3656,86 @@ static Node *make_exists_pattern_sublink(Node *pattern, int location) return (Node *)node_to_agtype((Node *)n, "boolean", location); } +/* + * Helper function to build a reduce() grammar node. + * + * Follows the openCypher syntax: + * reduce(acc = init, var IN list | body) + * + * The accumulator `acc` is seeded with `init` and threaded across the + * elements of `list` (bound to `var`) in list order, with `body` producing + * the next accumulator value at each step. The result is the final + * accumulator value, or `init` when the list is empty. + * + * The reduce node is wrapped in an EXPR_SUBLINK (scalar subquery) whose + * subselect is the cypher_reduce node; transform_cypher_reduce() in + * cypher_clause.c rewrites it into a correlated scalar subquery over an + * ordered aggregate. + * + * The whole thing is then wrapped so the openCypher null/empty-list semantics + * hold without the transform layer having to special-case them: + * + * CASE WHEN list IS NULL THEN NULL + * ELSE COALESCE((reduce subquery), init) END + * + * A NULL list yields NULL; an empty list yields `init` (the aggregate runs + * over zero rows and returns SQL NULL, which COALESCE replaces with init); + * a non-empty list yields the fold result. The list and init grammar nodes + * are shared between the reduce node and this guard, which is safe because + * AGE's expression transformer builds new nodes rather than mutating in place. + */ +static Node *build_reduce_node(char *acc_varname, Node *init_expr, + char *elem_varname, Node *list_expr, + Node *body_expr, int location) +{ + SubLink *sub; + cypher_reduce *reduce_node = NULL; + CoalesceExpr *coalesce; + NullTest *null_test; + CaseWhen *case_when; + CaseExpr *guard; + + reduce_node = make_ag_node(cypher_reduce); + reduce_node->acc_varname = acc_varname; + reduce_node->init_expr = init_expr; + reduce_node->elem_varname = elem_varname; + reduce_node->list_expr = list_expr; + reduce_node->body_expr = body_expr; + + sub = makeNode(SubLink); + sub->subLinkId = 0; + sub->testexpr = NULL; + sub->operName = NIL; + sub->subselect = (Node *) reduce_node; + sub->location = location; + sub->subLinkType = EXPR_SUBLINK; + + /* COALESCE((reduce subquery), init) -- empty list falls back to init */ + coalesce = makeNode(CoalesceExpr); + coalesce->args = list_make2((Node *) sub, init_expr); + coalesce->location = location; + + /* CASE WHEN list IS NULL THEN NULL ELSE END */ + null_test = makeNode(NullTest); + null_test->arg = (Expr *) list_expr; + null_test->nulltesttype = IS_NULL; + null_test->argisrow = false; + null_test->location = location; + + case_when = makeNode(CaseWhen); + case_when->expr = (Expr *) null_test; + case_when->result = (Expr *) make_null_const(location); + case_when->location = location; + + guard = makeNode(CaseExpr); + guard->arg = NULL; + guard->args = list_make1(case_when); + guard->defresult = (Expr *) coalesce; + guard->location = location; + + return (Node *) guard; +} + /* Helper function to create an ExplainStmt node */ static ExplainStmt *make_explain_stmt(List *options) { diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index cfff8138f..bf69bf1fa 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -47,8 +47,11 @@ #include "miscadmin.h" #include "parser/parse_coerce.h" #include "nodes/nodes.h" +#include "nodes/nodeFuncs.h" +#include "executor/executor.h" #include "utils/acl.h" #include "utils/builtins.h" +#include "utils/datum.h" #include "executor/cypher_utils.h" #include "utils/float.h" #include "utils/lsyscache.h" @@ -11575,6 +11578,168 @@ Datum age_float8_stddev_pop_aggfinalfn(PG_FUNCTION_ARGS) PG_RETURN_POINTER(agtype_value_to_agtype(&agtv_float)); } +/* + * Per-aggregate-group evaluation state for reduce(). Caches the compiled + * fold-body expression and a standalone ExprContext whose PARAM_EXEC slots + * (0 = accumulator, 1 = current element) are rebound on every element. + */ +typedef struct reduce_eval_ctx +{ + ExprState *body_state; /* compiled fold-body expression */ + ExprContext *econtext; /* eval context carrying the param slots */ + ParamExecData *params; /* [0] = accumulator, [1] = current element */ +} reduce_eval_ctx; + +/* Build an agtype 'null' Datum (a real agtype value, not a SQL NULL). */ +static Datum reduce_agtype_null(void) +{ + agtype_value agtv; + + agtv.type = AGTV_NULL; + return AGTYPE_P_GET_DATUM(agtype_value_to_agtype(&agtv)); +} + +/* + * age_reduce_transfn(state agtype, init agtype, body text, element agtype) + * + * Transition function for the age_reduce aggregate that implements the Cypher + * reduce(acc = init, var IN list | body) fold. The fold body is compiled by + * transform_cypher_reduce() with the accumulator and element rewritten to + * PARAM_EXEC params 0 and 1, then serialized into the `body` text argument. + * + * On the first element of a group the accumulator is seeded from `init` + * (the running state is NULL because the aggregate uses no initcond); on + * every element the body is evaluated with the params rebound, and the result + * becomes the next accumulator state. + * + * The accumulator and element are normalized to a non-NULL agtype 'null' + * before evaluation so that (a) the fold body sees agtype values and Cypher + * null semantics apply, and (b) the running state is never a SQL NULL, which + * keeps PG_ARGISNULL(0) a reliable "first element of the group" signal even + * when the fold legitimately produces null. + */ +PG_FUNCTION_INFO_V1(age_reduce_transfn); + +Datum age_reduce_transfn(PG_FUNCTION_ARGS) +{ + MemoryContext aggcontext; + MemoryContext oldctx; + reduce_eval_ctx *rc; + Datum acc; + Datum element; + Datum result; + bool result_isnull; + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("age_reduce_transfn called in a non-aggregate context"))); + } + + /* the fold can run over a large list; stay responsive to cancellation */ + CHECK_FOR_INTERRUPTS(); + + /* + * One-time per-FmgrInfo setup: deserialize and compile the fold body, and + * build the standalone ExprContext plus its two PARAM_EXEC slots. The body + * text is a query constant, so caching the compiled state across groups is + * correct. + */ + rc = (reduce_eval_ctx *) fcinfo->flinfo->fn_extra; + if (rc == NULL) + { + text *body_txt; + char *body_str; + Node *body_node; + + if (PG_ARGISNULL(2)) + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("age_reduce: missing fold expression"))); + } + + oldctx = MemoryContextSwitchTo(fcinfo->flinfo->fn_mcxt); + rc = (reduce_eval_ctx *) palloc0(sizeof(reduce_eval_ctx)); + body_txt = PG_GETARG_TEXT_PP(2); + body_str = text_to_cstring(body_txt); + body_node = (Node *) stringToNode(body_str); + + /* + * age_reduce() is SQL-callable, so the serialized body argument is + * not guaranteed to have come from transform_cypher_reduce(). The + * running state is stored as an agtype varlena (the datumCopy() below + * uses typbyval=false, typlen=-1), so a body that evaluates to a + * by-value type (e.g. a bare boolean or integer) would have its Datum + * misread as a pointer and could crash the backend. Reject any body + * whose result type is not agtype. transform_cypher_reduce() always + * normalizes the fold body to agtype, so a planner-generated reduce() + * is never rejected here. + */ + if (exprType(body_node) != AGTYPEOID) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("age_reduce: fold expression must return agtype"))); + } + + rc->body_state = ExecInitExpr((Expr *) body_node, NULL); + rc->econtext = CreateStandaloneExprContext(); + rc->params = (ParamExecData *) palloc0(sizeof(ParamExecData) * 2); + rc->econtext->ecxt_param_exec_vals = rc->params; + fcinfo->flinfo->fn_extra = rc; + MemoryContextSwitchTo(oldctx); + } + + /* + * Seed the accumulator. The aggregate declares no initcond, so on the + * first element the running state (arg 0) is NULL and we use `init` + * (arg 1); thereafter the accumulator is the prior state. A NULL init is + * normalized to agtype 'null'. + */ + if (PG_ARGISNULL(0)) + { + acc = PG_ARGISNULL(1) ? reduce_agtype_null() : PG_GETARG_DATUM(1); + } + else + { + acc = PG_GETARG_DATUM(0); + } + + /* a NULL element is likewise normalized to agtype 'null' */ + element = PG_ARGISNULL(3) ? reduce_agtype_null() : PG_GETARG_DATUM(3); + + /* bind PARAM_EXEC 0 = accumulator, 1 = current element */ + rc->params[0].value = acc; + rc->params[0].isnull = false; + rc->params[0].execPlan = NULL; + rc->params[1].value = element; + rc->params[1].isnull = false; + rc->params[1].execPlan = NULL; + + /* evaluate the fold body for this element */ + ResetExprContext(rc->econtext); + result = ExecEvalExpr(rc->body_state, rc->econtext, &result_isnull); + + /* + * Never let the running state become a SQL NULL: a null fold result is + * stored as agtype 'null' so the next element is not mistaken for the + * first one (see PG_ARGISNULL(0) above). + */ + if (result_isnull) + { + result = reduce_agtype_null(); + } + + /* the new state must survive in the aggregate context across elements */ + oldctx = MemoryContextSwitchTo(aggcontext); + result = datumCopy(result, false, -1); + MemoryContextSwitchTo(oldctx); + + PG_RETURN_DATUM(result); +} + PG_FUNCTION_INFO_V1(age_agtype_larger_aggtransfn); Datum age_agtype_larger_aggtransfn(PG_FUNCTION_ARGS) diff --git a/src/include/nodes/ag_nodes.h b/src/include/nodes/ag_nodes.h index 47c55041b..57b2deabc 100644 --- a/src/include/nodes/ag_nodes.h +++ b/src/include/nodes/ag_nodes.h @@ -77,7 +77,9 @@ typedef enum ag_node_tag cypher_delete_item_t, cypher_merge_information_t, /* predicate functions */ - cypher_predicate_function_t + cypher_predicate_function_t, + /* reduce */ + cypher_reduce_t } ag_node_tag; extern const char *node_names[]; diff --git a/src/include/nodes/cypher_copyfuncs.h b/src/include/nodes/cypher_copyfuncs.h index e770cebe2..fcad598b7 100644 --- a/src/include/nodes/cypher_copyfuncs.h +++ b/src/include/nodes/cypher_copyfuncs.h @@ -56,4 +56,8 @@ void copy_cypher_merge_information(ExtensibleNode *newnode, /* predicate function data structure */ void copy_cypher_predicate_function(ExtensibleNode *newnode, const ExtensibleNode *from); + +/* reduce data structure */ +void copy_cypher_reduce(ExtensibleNode *newnode, + const ExtensibleNode *from); #endif diff --git a/src/include/nodes/cypher_nodes.h b/src/include/nodes/cypher_nodes.h index 3433bebb0..5efbe95f7 100644 --- a/src/include/nodes/cypher_nodes.h +++ b/src/include/nodes/cypher_nodes.h @@ -247,6 +247,24 @@ typedef struct cypher_predicate_function Node *where; /* the predicate to test */ } cypher_predicate_function; +/* + * reduce(acc = init, var IN list | body) + * + * Folds `body` over `list`, threading an accumulator `acc` (seeded with + * `init`) across the elements in list order, binding each element to `var`. + * Transformed into a correlated scalar subquery over an ordered aggregate + * by transform_cypher_reduce() in cypher_clause.c. + */ +typedef struct cypher_reduce +{ + ExtensibleNode extensible; + char *acc_varname; /* accumulator variable name */ + Node *init_expr; /* initial accumulator value */ + char *elem_varname; /* per-element variable name */ + Node *list_expr; /* the list to fold over */ + Node *body_expr; /* the fold expression evaluated per element */ +} cypher_reduce; + typedef enum cypher_map_projection_element_type { PROPERTY_SELECTOR = 0, /* map_var { .key } */ diff --git a/src/include/nodes/cypher_outfuncs.h b/src/include/nodes/cypher_outfuncs.h index 55285bdba..fc2a830b9 100644 --- a/src/include/nodes/cypher_outfuncs.h +++ b/src/include/nodes/cypher_outfuncs.h @@ -50,6 +50,7 @@ void out_cypher_map_projection(StringInfo str, const ExtensibleNode *node); void out_cypher_list(StringInfo str, const ExtensibleNode *node); void out_cypher_list_comprehension(StringInfo str, const ExtensibleNode *node); void out_cypher_predicate_function(StringInfo str, const ExtensibleNode *node); +void out_cypher_reduce(StringInfo str, const ExtensibleNode *node); /* comparison expression */ void out_cypher_comparison_aexpr(StringInfo str, const ExtensibleNode *node); diff --git a/src/include/nodes/cypher_readfuncs.h b/src/include/nodes/cypher_readfuncs.h index 9202ba511..2f27f91cd 100644 --- a/src/include/nodes/cypher_readfuncs.h +++ b/src/include/nodes/cypher_readfuncs.h @@ -54,4 +54,7 @@ void read_cypher_merge_information(struct ExtensibleNode *node); /* predicate function data structure */ void read_cypher_predicate_function(struct ExtensibleNode *node); +/* reduce data structure */ +void read_cypher_reduce(struct ExtensibleNode *node); + #endif diff --git a/src/include/parser/cypher_kwlist.h b/src/include/parser/cypher_kwlist.h index 44ac09452..909b5d272 100644 --- a/src/include/parser/cypher_kwlist.h +++ b/src/include/parser/cypher_kwlist.h @@ -36,6 +36,7 @@ PG_KEYWORD("operator", OPERATOR, RESERVED_KEYWORD) PG_KEYWORD("optional", OPTIONAL, RESERVED_KEYWORD) PG_KEYWORD("or", OR, RESERVED_KEYWORD) PG_KEYWORD("order", ORDER, RESERVED_KEYWORD) +PG_KEYWORD("reduce", REDUCE, RESERVED_KEYWORD) PG_KEYWORD("remove", REMOVE, RESERVED_KEYWORD) PG_KEYWORD("return", RETURN, RESERVED_KEYWORD) PG_KEYWORD("set", SET, RESERVED_KEYWORD) From bdbb7177818c32e03474c1ee17a21d03a2cf7251 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Mon, 29 Jun 2026 06:47:55 -0700 Subject: [PATCH 11/20] resolve subgraph staging sequences via regclass (#2446) The vertex/edge staging copies in create_subgraph() generated new graphids with nextval(%L), which binds the sequence as a string literal and invokes the nextval(text) overload. That re-resolves the schema-qualified sequence name on each call. Cast the literal to regclass (nextval(%L::regclass)) so the sequence is resolved once to its OID, matching how AGE defines its label id defaults (nextval('schema.seq'::regclass)). Applied to both the vertex and edge staging queries, in sql/age_subgraph.sql and the identical body in the age--1.7.0--y.y.y.sql upgrade template so the upgrade-path catalog comparison still matches. Behavior is unchanged; all 38 regression tests pass against PostgreSQL 18. Addresses Copilot review feedback on apache/age#2441. Co-authored-by: GitHub Copilot (Claude Opus 4.8) <[email protected]> modified: age--1.7.0--y.y.y.sql modified: sql/age_subgraph.sql --- age--1.7.0--y.y.y.sql | 4 ++-- sql/age_subgraph.sql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/age--1.7.0--y.y.y.sql b/age--1.7.0--y.y.y.sql index ec909d2bb..6dc8f707f 100644 --- a/age--1.7.0--y.y.y.sql +++ b/age--1.7.0--y.y.y.sql @@ -1013,7 +1013,7 @@ BEGIN EXECUTE format( 'CREATE TEMP TABLE _ag_sg_vstage ON COMMIT DROP AS ' 'SELECT t.id AS old_id, ' - ' ag_catalog._graphid(%s, nextval(%L)) AS new_id, ' + ' ag_catalog._graphid(%s, nextval(%L::regclass)) AS new_id, ' ' t.properties AS props ' 'FROM ONLY %s t ' 'WHERE EXISTS (SELECT 1 FROM _ag_sg_kept_v k WHERE k.gid = t.id)', @@ -1076,7 +1076,7 @@ BEGIN DROP TABLE IF EXISTS _ag_sg_estage; EXECUTE format( 'CREATE TEMP TABLE _ag_sg_estage ON COMMIT DROP AS ' - 'SELECT ag_catalog._graphid(%s, nextval(%L)) AS new_id, ' + 'SELECT ag_catalog._graphid(%s, nextval(%L::regclass)) AS new_id, ' ' vs.new_id AS new_start, ve.new_id AS new_end, ' ' x.properties AS props ' 'FROM ONLY %s x ' diff --git a/sql/age_subgraph.sql b/sql/age_subgraph.sql index 960790ded..0d7e3648d 100644 --- a/sql/age_subgraph.sql +++ b/sql/age_subgraph.sql @@ -205,7 +205,7 @@ BEGIN EXECUTE format( 'CREATE TEMP TABLE _ag_sg_vstage ON COMMIT DROP AS ' 'SELECT t.id AS old_id, ' - ' ag_catalog._graphid(%s, nextval(%L)) AS new_id, ' + ' ag_catalog._graphid(%s, nextval(%L::regclass)) AS new_id, ' ' t.properties AS props ' 'FROM ONLY %s t ' 'WHERE EXISTS (SELECT 1 FROM _ag_sg_kept_v k WHERE k.gid = t.id)', @@ -268,7 +268,7 @@ BEGIN DROP TABLE IF EXISTS _ag_sg_estage; EXECUTE format( 'CREATE TEMP TABLE _ag_sg_estage ON COMMIT DROP AS ' - 'SELECT ag_catalog._graphid(%s, nextval(%L)) AS new_id, ' + 'SELECT ag_catalog._graphid(%s, nextval(%L::regclass)) AS new_id, ' ' vs.new_id AS new_start, ve.new_id AS new_end, ' ' x.properties AS props ' 'FROM ONLY %s x ' From 7c7f3aef277426bb92a34dbde12be23299d3ff9d Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Mon, 29 Jun 2026 06:48:05 -0700 Subject: [PATCH 12/20] Support relationship-type filters and a minimum hop count (#2442) Support relationship-type filters and a minimum hop count in shortest_path SRFs age_shortest_path / age_all_shortest_paths gain two related capabilities, both following openCypher / Neo4j semantics. Relationship-type filtering: the edge_types argument now accepts an array of types; an edge matches when its label is any one of the requested types. A bare string or a one-element array keeps the single-type behaviour, an empty string/array or NULL means no filter, and an unknown type matches nothing. sp_run_bfs takes an Oid set rather than a single oid, and sp_compute_paths resolves the argument into that set. Minimum hop count: the new min_hops argument is a lower bound on the path length. When it does not exceed the true shortest distance it imposes no constraint, so the normal BFS shortest-path result is returned. When it exceeds the shortest distance, BFS cannot produce a qualifying path, so the search falls back to the variable-length-edge depth-first engine (sp_minhops_fallback), which enumerates edge-distinct paths (relationship-uniqueness / trail semantics) and returns the shortest path(s) whose length is at least min_hops. This regime permits revisiting a vertex and closed walks back to the start, but never reusing an edge. A private memory context bounds the search and a cost guard caps the number of examined paths, raising PROGRAM_LIMIT_EXCEEDED (with a hint to bound the search with a maximum hop count) when the cap is exceeded. The hard regime combined with multiple relationship types is unsupported, because the VLE engine matches a single label; that case raises FEATURE_NOT_SUPPORTED. Regression coverage spans single- and multi-type filters, directed and undirected reachability, multiplicity of equal-length paths, max_hops bounds, NULL and non-existent endpoints, and both min_hops regimes, including a vertex-revisiting longer path (sp_revisit) and a closed-walk cycle back to the start (sp_tri). The in-cypher() Tier 1 call forms are exercised as well. Review feedback addressed: 1. Error messages now report the function actually called. age_shortest_path and age_all_shortest_paths share their argument-resolution helpers, which hard-coded an "age_shortest_path" prefix regardless of the caller; the caller's name is now threaded through so each function reports its own (this also corrects a mislabeled multi-type min_hops error). A new regression case (sp_errname) pins the behaviour for both functions. 2. age_all_shortest_paths now bounds the number of materialized result paths. The shortest-path DAG can contain exponentially many equal-length paths, all built up front before the first row streams; enumeration is capped at SP_MAX_RESULT_PATHS (1,000,000), raising PROGRAM_LIMIT_EXCEEDED with a hint to narrow the search, mirroring the existing min-hops candidate cap. 3. The BFS search state (visited table, frontier queue, predecessor multiset, and intermediate path arrays) now lives in a private scratch memory context that is deleted once the surviving result Datums are built in the SRF context, rather than persisting in multi_call_memory_ctx for the life of the SRF. This bounds peak memory to the result set plus one search and matches the pattern sp_minhops_fallback already used. 4. A second review round tightened memory hygiene and reporting: the pnstrdup'd relationship-type name is freed once resolved to an oid (it was retained for the life of the SRF) in both the array and scalar cases; the invalid-direction error now carries the called function's name like the other argument errors; the min-hops fallback's private context is renamed to a caller-neutral "age shortest path minhops" (it is shared by both SRFs); and the multi-type label-filter comment is corrected to note that an unknown type merely contributes no matches -- known types in the same set still match, and only an all-unknown set leaves just the zero-length path. 41/41 installcheck. Co-authored-by: Copilot modified: regress/expected/age_shortest_path.out modified: regress/sql/age_shortest_path.sql modified: src/backend/utils/adt/age_vle.c --- regress/expected/age_shortest_path.out | 762 ++++++++++++++++++++++++- regress/sql/age_shortest_path.sql | 487 +++++++++++++++- src/backend/utils/adt/age_vle.c | 445 +++++++++++++-- 3 files changed, 1627 insertions(+), 67 deletions(-) diff --git a/regress/expected/age_shortest_path.out b/regress/expected/age_shortest_path.out index 7eb751d12..f7f8f1300 100644 --- a/regress/expected/age_shortest_path.out +++ b/regress/expected/age_shortest_path.out @@ -158,6 +158,20 @@ FROM age_all_shortest_paths( 2 (1 row) +-- single shortest path with direction 'in': D -> A backwards; expected: +-- path_count = 1 (the single-path variant picks one of the two routes) +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + NULL, '"in"'::agtype +); + path_count +------------ + 1 +(1 row) + -- direction 'out': D -> A not reachable forwards; expected: path_count = 0 SELECT count(*) AS path_count FROM age_shortest_path( @@ -192,7 +206,7 @@ FROM age_shortest_path( (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), NULL, '"sideways"'::agtype ); -ERROR: direction argument must be one of 'out', 'in', or 'any' +ERROR: age_shortest_path: direction argument must be one of 'out', 'in', or 'any' -- error: start argument is neither a vertex nor an integer id; expected: ERROR SELECT count(*) AS path_count FROM age_shortest_path( @@ -201,6 +215,15 @@ FROM age_shortest_path( (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)) ); ERROR: start vertex argument must be a vertex or the integer id +-- error: end argument is neither a vertex nor an integer id; expected: ERROR +-- (symmetric to the start-vertex check above) +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + '"not_a_vertex"'::agtype +); +ERROR: end vertex argument must be a vertex or the integer id -- -- Non-existent endpoint guards. These must NOT crash the backend and must -- return no rows (a path can only exist between vertices in the graph). @@ -932,20 +955,113 @@ SELECT count(*) FROM age_shortest_path( 1 (1 row) --- multiple relationship types are not yet supported; expected: ERROR +-- multiple relationship types: an array of types matches an edge whose type +-- is any one of them. A..C single shortest under {KNOWS, LIKES} (length 2); +-- count 1 SELECT count(*) FROM age_shortest_path( '"sp_edge"'::agtype, (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), '["KNOWS", "LIKES"]'::agtype, '"out"'::agtype); -ERROR: age_shortest_path: multiple relationship types are not yet supported --- a non-zero minimum hop count is not yet supported; expected: ERROR + count +------- + 1 +(1 row) + +-- all shortest A..C under {KNOWS, LIKES}: three A->B edges (2 KNOWS + 1 LIKES) +-- each extend by the single B->C KNOWS edge; count 3 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '["KNOWS", "LIKES"]'::agtype, '"out"'::agtype); + count +------- + 3 +(1 row) + +-- all shortest A..B under {KNOWS, LIKES}: the two parallel KNOWS edges plus +-- the one LIKES edge are three distinct one-hop paths; count 3 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype)), + '["KNOWS", "LIKES"]'::agtype, '"any"'::agtype); + count +------- + 3 +(1 row) + +-- a multi-type array containing an unknown type ignores the unknown member: +-- {NOSUCHLABEL, KNOWS} still finds A..C via KNOWS; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '["NOSUCHLABEL", "KNOWS"]'::agtype, '"out"'::agtype); + count +------- + 1 +(1 row) + +-- a multi-type array of only types that do not connect the endpoints: +-- {LIKES} reaches B but B..C has no LIKES edge, so A..C is unreachable; +-- count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '["LIKES", "NOSUCHLABEL"]'::agtype, '"out"'::agtype); + count +------- + 0 +(1 row) + +-- an empty relationship-type array imposes no filter (same as NULL): A..C +-- (length 2) is found; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '[]'::agtype, '"out"'::agtype); + count +------- + 1 +(1 row) + +-- a non-string element in the relationship-type array is an error +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '["KNOWS", 7]'::agtype, '"out"'::agtype); +ERROR: age_shortest_path: relationship type must be a string +-- a minimum hop count that does not exceed the shortest distance imposes no +-- extra constraint; A..C via KNOWS has length 2, so min_hops=2 is accepted and +-- returns the length-2 path; count 1 SELECT count(*) FROM age_shortest_path( '"sp_edge"'::agtype, (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype); -ERROR: age_shortest_path: a minimum hop count is not yet supported + count +------- + 1 +(1 row) + +-- a minimum hop count greater than the shortest distance falls back to the +-- exhaustive (VLE) search; A..C in this DAG has no length-3 path (every longer +-- route would have to reuse an edge), so min_hops=3 yields no rows; count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 3::agtype); + count +------- + 0 +(1 row) + -- a minimum hop count of 0 is the default and is accepted; A..C (length 2); -- count 1 SELECT count(*) FROM age_shortest_path( @@ -961,6 +1077,39 @@ SELECT count(*) FROM age_shortest_path( -- a graph name that does not exist is an error SELECT count(*) FROM age_shortest_path('"no_such_graph"'::agtype, '1'::agtype, '2'::agtype); ERROR: schema "no_such_graph" does not exist +-- a NULL graph name is an error (the graph name is required, unlike the +-- endpoints which accept NULL as "no match") +SELECT count(*) FROM age_shortest_path( + NULL::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype))); +ERROR: age_shortest_path: graph name cannot be NULL +-- a non-integer max_hops is an error (the hop bounds must be integers) +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, NULL::agtype, '"not_an_int"'::agtype); +ERROR: age_shortest_path: agtype argument of wrong type +-- a non-integer min_hops is an error (symmetric to max_hops above) +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, '"not_an_int"'::agtype); +ERROR: age_shortest_path: agtype argument of wrong type +-- a negative min_hops is clamped to 0 (no constraint), so A..C (length 2) is +-- still found; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, '-3'::agtype); + count +------- + 1 +(1 row) + -- cleanup SELECT * FROM drop_graph('sp_edge', true); NOTICE: drop cascades to 5 other objects @@ -975,3 +1124,606 @@ NOTICE: graph "sp_edge" has been dropped (1 row) +-- +-- Calling shortest_path / all_shortest_paths from inside cypher() (Tier 1) +-- WITH a relationship-type filter -- both a single type (bare string) and +-- multiple types (a list literal). The graph name is auto-injected, so the +-- in-cypher call passes only the bound endpoints and the type filter. +-- +-- Graph: A and B are joined by two parallel KNOWS edges plus one LIKES edge; +-- B->C is a single KNOWS edge. This lets the all_shortest_paths variant return +-- more than one path so the multiplicity is visible. +-- +SELECT * FROM create_graph('sp_cy_lbl'); +NOTICE: graph "sp_cy_lbl" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('sp_cy_lbl', $$ + CREATE (a:N {name: 'A'}), + (b:N {name: 'B'}), + (c:N {name: 'C'}), + (a)-[:KNOWS]->(b), + (a)-[:KNOWS]->(b), + (a)-[:LIKES]->(b), + (b)-[:KNOWS]->(c) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- materialize the global graph context +SELECT * FROM cypher('sp_cy_lbl', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) + AS (result agtype); + result +----------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "N", "in_degree": 0, "out_degree": 3, "self_loops": 0} + {"id": 844424930131970, "label": "N", "in_degree": 3, "out_degree": 1, "self_loops": 0} + {"id": 844424930131971, "label": "N", "in_degree": 1, "out_degree": 0, "self_loops": 0} +(3 rows) + +-- shortest_path() in-cypher with a single relationship type; A..C via KNOWS +-- (length 2); expected: 1 path +SELECT count(*) FROM cypher('sp_cy_lbl', $$ + MATCH (a:N {name:'A'}), (c:N {name:'C'}) + RETURN shortest_path(a, c, 'KNOWS') +$$) AS (path agtype); + count +------- + 1 +(1 row) + +-- all_shortest_paths() in-cypher with a single relationship type; the two +-- parallel KNOWS edges A->B make two distinct shortest A..C paths; expected: 2 +SELECT count(*) FROM cypher('sp_cy_lbl', $$ + MATCH (a:N {name:'A'}), (c:N {name:'C'}) + RETURN all_shortest_paths(a, c, 'KNOWS') +$$) AS (path agtype); + count +------- + 2 +(1 row) + +-- shortest_path() in-cypher with multiple relationship types passed as a list +-- literal; A..C under {KNOWS, LIKES} (length 2); expected: 1 path +SELECT count(*) FROM cypher('sp_cy_lbl', $$ + MATCH (a:N {name:'A'}), (c:N {name:'C'}) + RETURN shortest_path(a, c, ['KNOWS', 'LIKES']) +$$) AS (path agtype); + count +------- + 1 +(1 row) + +-- all_shortest_paths() in-cypher with multiple relationship types; the three +-- A->B edges (2 KNOWS + 1 LIKES) each extend by the single B->C KNOWS edge, +-- giving three distinct shortest A..C paths; expected: 3 +SELECT count(*) FROM cypher('sp_cy_lbl', $$ + MATCH (a:N {name:'A'}), (c:N {name:'C'}) + RETURN all_shortest_paths(a, c, ['KNOWS', 'LIKES']) +$$) AS (path agtype); + count +------- + 3 +(1 row) + +-- all_shortest_paths() in-cypher, multiple types, adjacent endpoints: the two +-- parallel KNOWS edges plus the one LIKES edge are three one-hop A..B paths; +-- expected: 3 +SELECT count(*) FROM cypher('sp_cy_lbl', $$ + MATCH (a:N {name:'A'}), (b:N {name:'B'}) + RETURN all_shortest_paths(a, b, ['KNOWS', 'LIKES']) +$$) AS (path agtype); + count +------- + 3 +(1 row) + +-- multiple types where only one connects the endpoints: {LIKES} reaches B but +-- B->C has no LIKES edge, so A..C is unreachable in-cypher; expected: 0 +SELECT count(*) FROM cypher('sp_cy_lbl', $$ + MATCH (a:N {name:'A'}), (c:N {name:'C'}) + RETURN all_shortest_paths(a, c, ['LIKES']) +$$) AS (path agtype); + count +------- + 0 +(1 row) + +-- cleanup +SELECT * FROM drop_graph('sp_cy_lbl', true); +NOTICE: drop cascades to 5 other objects +DETAIL: drop cascades to table sp_cy_lbl._ag_label_vertex +drop cascades to table sp_cy_lbl._ag_label_edge +drop cascades to table sp_cy_lbl."N" +drop cascades to table sp_cy_lbl."KNOWS" +drop cascades to table sp_cy_lbl."LIKES" +NOTICE: graph "sp_cy_lbl" has been dropped + drop_graph +------------ + +(1 row) + +-- +-- Minimum hop count fallback (Tier: VLE exhaustive search). When the requested +-- minimum hop count exceeds the true shortest distance, the BFS shortest-path +-- cannot satisfy it (it needs longer paths), so the implementation falls back +-- to the variable-length-edge depth-first engine and returns the shortest +-- path(s) whose length is at least min_hops. +-- +-- Graph: A reaches C directly (length 1) and also via two distinct +-- intermediate vertices B1 and B2 (length 2 each): +-- A->C, A->B1->C, A->B2->C +-- +SELECT * FROM create_graph('sp_min'); +NOTICE: graph "sp_min" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('sp_min', $$ + CREATE (a:N {name: 'A'}), + (c:N {name: 'C'}), + (b1:N {name: 'B1'}), + (b2:N {name: 'B2'}), + (a)-[:KNOWS]->(c), + (a)-[:KNOWS]->(b1), + (b1)-[:KNOWS]->(c), + (a)-[:KNOWS]->(b2), + (b2)-[:KNOWS]->(c) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- materialize the global graph context +SELECT * FROM cypher('sp_min', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) + AS (result agtype); + result +----------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "N", "in_degree": 0, "out_degree": 3, "self_loops": 0} + {"id": 844424930131970, "label": "N", "in_degree": 3, "out_degree": 0, "self_loops": 0} + {"id": 844424930131971, "label": "N", "in_degree": 1, "out_degree": 1, "self_loops": 0} + {"id": 844424930131972, "label": "N", "in_degree": 1, "out_degree": 1, "self_loops": 0} +(4 rows) + +-- baseline: the shortest A..C is the direct length-1 edge; count 1 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype); + count +------- + 1 +(1 row) + +-- min_hops=2 excludes the direct edge and falls back to the exhaustive search; +-- the two length-2 routes A->B1->C and A->B2->C are the shortest qualifying +-- paths; all_shortest_paths returns both; count 2 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype); + count +------- + 2 +(1 row) + +-- single shortest_path with min_hops=2 picks exactly one of the two length-2 +-- routes; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype); + count +------- + 1 +(1 row) + +-- the qualifying length-2 paths materialize correctly; all_shortest_paths +-- returns the full, order-stable set (the single shortest_path variant would +-- return an arbitrary one of the two equal-length routes, which is not a +-- deterministic choice), so both A->B1->C and A->B2->C are listed +SELECT path FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype) AS path +ORDER BY path; + path +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [{"id": 844424930131969, "label": "N", "properties": {"name": "A"}}::vertex, {"id": 1125899906842626, "label": "KNOWS", "end_id": 844424930131971, "start_id": 844424930131969, "properties": {}}::edge, {"id": 844424930131971, "label": "N", "properties": {"name": "B1"}}::vertex, {"id": 1125899906842627, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131971, "properties": {}}::edge, {"id": 844424930131970, "label": "N", "properties": {"name": "C"}}::vertex]::path + [{"id": 844424930131969, "label": "N", "properties": {"name": "A"}}::vertex, {"id": 1125899906842628, "label": "KNOWS", "end_id": 844424930131972, "start_id": 844424930131969, "properties": {}}::edge, {"id": 844424930131972, "label": "N", "properties": {"name": "B2"}}::vertex, {"id": 1125899906842629, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131972, "properties": {}}::edge, {"id": 844424930131970, "label": "N", "properties": {"name": "C"}}::vertex]::path +(2 rows) + +-- min_hops=2 with a matching max_hops=2 returns the same two length-2 paths; +-- count 2 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype, 2::agtype); + count +------- + 2 +(1 row) + +-- min_hops greater than max_hops is unsatisfiable; count 0 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype, 1::agtype); + count +------- + 0 +(1 row) + +-- min_hops=3 has no qualifying path (a length-3 A..C would have to reuse an +-- edge, which relationship-uniqueness forbids); count 0 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 3::agtype); + count +------- + 0 +(1 row) + +-- no edge-type filter also reaches the fallback; the two length-2 routes are +-- returned; count 2 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + NULL::agtype, '"out"'::agtype, 2::agtype); + count +------- + 2 +(1 row) + +-- the fallback honours direction: traversing edges backwards (C..A, 'in') with +-- min_hops=2 also returns the two length-2 routes; count 2 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"in"'::agtype, 2::agtype); + count +------- + 2 +(1 row) + +-- the fallback respects direction: there is no forward C..A path, so 'out' +-- with min_hops=2 returns nothing; count 0 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype); + count +------- + 0 +(1 row) + +-- an unknown relationship type in the fallback matches no edges, so even the +-- shortest qualifying path cannot be formed; count 0 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"NOSUCHLABEL"'::agtype, '"out"'::agtype, 2::agtype); + count +------- + 0 +(1 row) + +-- a minimum hop count greater than the shortest distance combined with +-- multiple relationship types is not supported (the VLE engine matches a +-- single label only); expected: ERROR +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '["KNOWS", "LIKES"]'::agtype, '"out"'::agtype, 2::agtype); +ERROR: age_all_shortest_paths: a minimum hop count greater than the shortest path length is not supported with multiple relationship types +-- cleanup +SELECT * FROM drop_graph('sp_min', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table sp_min._ag_label_vertex +drop cascades to table sp_min._ag_label_edge +drop cascades to table sp_min."N" +drop cascades to table sp_min."KNOWS" +NOTICE: graph "sp_min" has been dropped + drop_graph +------------ + +(1 row) + +-- +-- Minimum hop count fallback with a VERTEX-REVISITING longer path. This is the +-- defining behaviour of the exhaustive-search regime: relationship-uniqueness +-- (Cypher trail semantics) forbids reusing an EDGE but permits revisiting a +-- VERTEX, so a qualifying path longer than the shortest distance may loop back +-- through an already-seen node. +-- +-- Graph: A -> B -> C, C -> B (back edge), B -> D +-- A..D shortest distance = 2 (A->B->D) +-- there is no edge-distinct length-3 A..D path +-- the only edge-distinct length-4 A..D path is A->B->C->B->D, which +-- revisits vertex B but uses each of the four edges exactly once +-- +SELECT * FROM create_graph('sp_revisit'); +NOTICE: graph "sp_revisit" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('sp_revisit', $$ + CREATE (a:N {name: 'A'}), + (b:N {name: 'B'}), + (c:N {name: 'C'}), + (d:N {name: 'D'}), + (a)-[:KNOWS]->(b), + (b)-[:KNOWS]->(c), + (c)-[:KNOWS]->(b), + (b)-[:KNOWS]->(d) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- materialize the global graph context +SELECT * FROM cypher('sp_revisit', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) + AS (result agtype); + result +----------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "N", "in_degree": 0, "out_degree": 1, "self_loops": 0} + {"id": 844424930131970, "label": "N", "in_degree": 2, "out_degree": 2, "self_loops": 0} + {"id": 844424930131971, "label": "N", "in_degree": 1, "out_degree": 1, "self_loops": 0} + {"id": 844424930131972, "label": "N", "in_degree": 1, "out_degree": 0, "self_loops": 0} +(4 rows) + +-- min_hops=2 equals the shortest distance, so the easy (BFS) regime returns the +-- direct A->B->D route; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_revisit"'::agtype, + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype); + count +------- + 1 +(1 row) + +-- min_hops is a lower bound, not an exact length: with min_hops=3 there is no +-- length-3 edge-distinct path, so the search returns the next-shortest +-- qualifying path, the length-4 route A->B->C->B->D; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_revisit"'::agtype, + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 3::agtype); + count +------- + 1 +(1 row) + +-- min_hops=4 is satisfied only by the vertex-revisiting A->B->C->B->D path; +-- count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_revisit"'::agtype, + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 4::agtype); + count +------- + 1 +(1 row) + +-- the length-4 path is unique, so its materialized form is deterministic; it +-- visits B twice (B appears at positions 2 and 4) yet repeats no edge +SELECT path FROM age_shortest_path( + '"sp_revisit"'::agtype, + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 4::agtype) AS path +ORDER BY path; + path +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [{"id": 844424930131969, "label": "N", "properties": {"name": "A"}}::vertex, {"id": 1125899906842625, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {}}::edge, {"id": 844424930131970, "label": "N", "properties": {"name": "B"}}::vertex, {"id": 1125899906842626, "label": "KNOWS", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge, {"id": 844424930131971, "label": "N", "properties": {"name": "C"}}::vertex, {"id": 1125899906842627, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131971, "properties": {}}::edge, {"id": 844424930131970, "label": "N", "properties": {"name": "B"}}::vertex, {"id": 1125899906842628, "label": "KNOWS", "end_id": 844424930131972, "start_id": 844424930131970, "properties": {}}::edge, {"id": 844424930131972, "label": "N", "properties": {"name": "D"}}::vertex]::path +(1 row) + +-- min_hops=5 exhausts the four edges without an edge-distinct path; count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_revisit"'::agtype, + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 5::agtype); + count +------- + 0 +(1 row) + +-- cleanup +SELECT * FROM drop_graph('sp_revisit', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table sp_revisit._ag_label_vertex +drop cascades to table sp_revisit._ag_label_edge +drop cascades to table sp_revisit."N" +drop cascades to table sp_revisit."KNOWS" +NOTICE: graph "sp_revisit" has been dropped + drop_graph +------------ + +(1 row) + +-- +-- Minimum hop count fallback with a CLOSED WALK (start == end through a cycle). +-- When start and end are the same vertex the shortest distance is 0, so any +-- positive min_hops forces the exhaustive search to look for a cycle that +-- returns to the start using edge-distinct steps. +-- +-- Graph: a single directed triangle A -> B -> C -> A +-- A..A shortest distance = 0 (the zero-length path) +-- the only edge-distinct closed walk is the length-3 triangle A->B->C->A +-- +SELECT * FROM create_graph('sp_tri'); +NOTICE: graph "sp_tri" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('sp_tri', $$ + CREATE (a:N {name: 'A'}), + (b:N {name: 'B'}), + (c:N {name: 'C'}), + (a)-[:KNOWS]->(b), + (b)-[:KNOWS]->(c), + (c)-[:KNOWS]->(a) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- materialize the global graph context +SELECT * FROM cypher('sp_tri', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) + AS (result agtype); + result +----------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "N", "in_degree": 1, "out_degree": 1, "self_loops": 0} + {"id": 844424930131970, "label": "N", "in_degree": 1, "out_degree": 1, "self_loops": 0} + {"id": 844424930131971, "label": "N", "in_degree": 1, "out_degree": 1, "self_loops": 0} +(3 rows) + +-- no min_hops: start == end yields the zero-length path; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_tri"'::agtype, + (SELECT id FROM cypher('sp_tri', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_tri', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype); + count +------- + 1 +(1 row) + +-- min_hops=3 forces the exhaustive search to find the closed triangle walk +-- A->B->C->A; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_tri"'::agtype, + (SELECT id FROM cypher('sp_tri', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_tri', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 3::agtype); + count +------- + 1 +(1 row) + +-- the closed walk is unique, so its materialized form is deterministic; it +-- starts and ends at A and traverses each triangle edge once +SELECT path FROM age_shortest_path( + '"sp_tri"'::agtype, + (SELECT id FROM cypher('sp_tri', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_tri', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 3::agtype) AS path +ORDER BY path; + path +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + [{"id": 844424930131969, "label": "N", "properties": {"name": "A"}}::vertex, {"id": 1125899906842625, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {}}::edge, {"id": 844424930131970, "label": "N", "properties": {"name": "B"}}::vertex, {"id": 1125899906842626, "label": "KNOWS", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge, {"id": 844424930131971, "label": "N", "properties": {"name": "C"}}::vertex, {"id": 1125899906842627, "label": "KNOWS", "end_id": 844424930131969, "start_id": 844424930131971, "properties": {}}::edge, {"id": 844424930131969, "label": "N", "properties": {"name": "A"}}::vertex]::path +(1 row) + +-- min_hops=4 cannot be met without reusing an edge of the three-edge triangle; +-- count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_tri"'::agtype, + (SELECT id FROM cypher('sp_tri', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_tri', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 4::agtype); + count +------- + 0 +(1 row) + +-- cleanup +SELECT * FROM drop_graph('sp_tri', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table sp_tri._ag_label_vertex +drop cascades to table sp_tri._ag_label_edge +drop cascades to table sp_tri."N" +drop cascades to table sp_tri."KNOWS" +NOTICE: graph "sp_tri" has been dropped + drop_graph +------------ + +(1 row) + +-- +-- Error messages report the actual SRF that was called. age_shortest_path and +-- age_all_shortest_paths share their argument-resolution helpers; these cases +-- confirm each reports its own name in the error text rather than a single +-- hard-coded "age_shortest_path" prefix. +-- +SELECT * FROM create_graph('sp_errname'); +NOTICE: graph "sp_errname" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('sp_errname', $$ + CREATE (a:N {name: 'A'})-[:KNOWS]->(b:N {name: 'B'}) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- materialize the global graph context +SELECT * FROM cypher('sp_errname', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) + AS (result agtype); + result +----------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "N", "in_degree": 0, "out_degree": 1, "self_loops": 0} + {"id": 844424930131970, "label": "N", "in_degree": 1, "out_degree": 0, "self_loops": 0} +(2 rows) + +-- a NULL graph name errors with the called function's name as the prefix +SELECT count(*) FROM age_shortest_path(NULL::agtype, 0::agtype, 1::agtype); +ERROR: age_shortest_path: graph name cannot be NULL +SELECT count(*) FROM age_all_shortest_paths(NULL::agtype, 0::agtype, 1::agtype); +ERROR: age_all_shortest_paths: graph name cannot be NULL +-- a non-string relationship type errors with the called function's name as the +-- prefix (the array element 1 is an integer, not a string) +SELECT count(*) FROM age_shortest_path( + '"sp_errname"'::agtype, + (SELECT id FROM cypher('sp_errname', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_errname', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype)), + '[1]'::agtype); +ERROR: age_shortest_path: relationship type must be a string +SELECT count(*) FROM age_all_shortest_paths( + '"sp_errname"'::agtype, + (SELECT id FROM cypher('sp_errname', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_errname', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype)), + '[1]'::agtype); +ERROR: age_all_shortest_paths: relationship type must be a string +-- cleanup +SELECT * FROM drop_graph('sp_errname', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table sp_errname._ag_label_vertex +drop cascades to table sp_errname._ag_label_edge +drop cascades to table sp_errname."N" +drop cascades to table sp_errname."KNOWS" +NOTICE: graph "sp_errname" has been dropped + drop_graph +------------ + +(1 row) + diff --git a/regress/sql/age_shortest_path.sql b/regress/sql/age_shortest_path.sql index 82b4d66bb..5bf86d364 100644 --- a/regress/sql/age_shortest_path.sql +++ b/regress/sql/age_shortest_path.sql @@ -115,6 +115,16 @@ FROM age_all_shortest_paths( NULL, '"in"'::agtype ); +-- single shortest path with direction 'in': D -> A backwards; expected: +-- path_count = 1 (the single-path variant picks one of the two routes) +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + NULL, '"in"'::agtype +); + -- direction 'out': D -> A not reachable forwards; expected: path_count = 0 SELECT count(*) AS path_count FROM age_shortest_path( @@ -150,6 +160,15 @@ FROM age_shortest_path( (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)) ); +-- error: end argument is neither a vertex nor an integer id; expected: ERROR +-- (symmetric to the start-vertex check above) +SELECT count(*) AS path_count +FROM age_shortest_path( + '"sp_graph"'::agtype, + (SELECT id FROM cypher('sp_graph', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + '"not_a_vertex"'::agtype +); + -- -- Non-existent endpoint guards. These must NOT crash the backend and must -- return no rows (a path can only exist between vertices in the graph). @@ -601,20 +620,81 @@ SELECT count(*) FROM age_shortest_path( (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), '["KNOWS"]'::agtype, '"out"'::agtype); --- multiple relationship types are not yet supported; expected: ERROR +-- multiple relationship types: an array of types matches an edge whose type +-- is any one of them. A..C single shortest under {KNOWS, LIKES} (length 2); +-- count 1 SELECT count(*) FROM age_shortest_path( '"sp_edge"'::agtype, (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), '["KNOWS", "LIKES"]'::agtype, '"out"'::agtype); --- a non-zero minimum hop count is not yet supported; expected: ERROR +-- all shortest A..C under {KNOWS, LIKES}: three A->B edges (2 KNOWS + 1 LIKES) +-- each extend by the single B->C KNOWS edge; count 3 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '["KNOWS", "LIKES"]'::agtype, '"out"'::agtype); + +-- all shortest A..B under {KNOWS, LIKES}: the two parallel KNOWS edges plus +-- the one LIKES edge are three distinct one-hop paths; count 3 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype)), + '["KNOWS", "LIKES"]'::agtype, '"any"'::agtype); + +-- a multi-type array containing an unknown type ignores the unknown member: +-- {NOSUCHLABEL, KNOWS} still finds A..C via KNOWS; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '["NOSUCHLABEL", "KNOWS"]'::agtype, '"out"'::agtype); + +-- a multi-type array of only types that do not connect the endpoints: +-- {LIKES} reaches B but B..C has no LIKES edge, so A..C is unreachable; +-- count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '["LIKES", "NOSUCHLABEL"]'::agtype, '"out"'::agtype); + +-- an empty relationship-type array imposes no filter (same as NULL): A..C +-- (length 2) is found; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '[]'::agtype, '"out"'::agtype); + +-- a non-string element in the relationship-type array is an error +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '["KNOWS", 7]'::agtype, '"out"'::agtype); + +-- a minimum hop count that does not exceed the shortest distance imposes no +-- extra constraint; A..C via KNOWS has length 2, so min_hops=2 is accepted and +-- returns the length-2 path; count 1 SELECT count(*) FROM age_shortest_path( '"sp_edge"'::agtype, (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype); +-- a minimum hop count greater than the shortest distance falls back to the +-- exhaustive (VLE) search; A..C in this DAG has no length-3 path (every longer +-- route would have to reuse an edge), so min_hops=3 yields no rows; count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 3::agtype); + -- a minimum hop count of 0 is the default and is accepted; A..C (length 2); -- count 1 SELECT count(*) FROM age_shortest_path( @@ -626,5 +706,408 @@ SELECT count(*) FROM age_shortest_path( -- a graph name that does not exist is an error SELECT count(*) FROM age_shortest_path('"no_such_graph"'::agtype, '1'::agtype, '2'::agtype); +-- a NULL graph name is an error (the graph name is required, unlike the +-- endpoints which accept NULL as "no match") +SELECT count(*) FROM age_shortest_path( + NULL::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype))); + +-- a non-integer max_hops is an error (the hop bounds must be integers) +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, NULL::agtype, '"not_an_int"'::agtype); + +-- a non-integer min_hops is an error (symmetric to max_hops above) +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, '"not_an_int"'::agtype); + +-- a negative min_hops is clamped to 0 (no constraint), so A..C (length 2) is +-- still found; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_edge"'::agtype, + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_edge', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, '-3'::agtype); + -- cleanup SELECT * FROM drop_graph('sp_edge', true); + +-- +-- Calling shortest_path / all_shortest_paths from inside cypher() (Tier 1) +-- WITH a relationship-type filter -- both a single type (bare string) and +-- multiple types (a list literal). The graph name is auto-injected, so the +-- in-cypher call passes only the bound endpoints and the type filter. +-- +-- Graph: A and B are joined by two parallel KNOWS edges plus one LIKES edge; +-- B->C is a single KNOWS edge. This lets the all_shortest_paths variant return +-- more than one path so the multiplicity is visible. +-- +SELECT * FROM create_graph('sp_cy_lbl'); + +SELECT * FROM cypher('sp_cy_lbl', $$ + CREATE (a:N {name: 'A'}), + (b:N {name: 'B'}), + (c:N {name: 'C'}), + (a)-[:KNOWS]->(b), + (a)-[:KNOWS]->(b), + (a)-[:LIKES]->(b), + (b)-[:KNOWS]->(c) +$$) AS (result agtype); + +-- materialize the global graph context +SELECT * FROM cypher('sp_cy_lbl', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) + AS (result agtype); + +-- shortest_path() in-cypher with a single relationship type; A..C via KNOWS +-- (length 2); expected: 1 path +SELECT count(*) FROM cypher('sp_cy_lbl', $$ + MATCH (a:N {name:'A'}), (c:N {name:'C'}) + RETURN shortest_path(a, c, 'KNOWS') +$$) AS (path agtype); + +-- all_shortest_paths() in-cypher with a single relationship type; the two +-- parallel KNOWS edges A->B make two distinct shortest A..C paths; expected: 2 +SELECT count(*) FROM cypher('sp_cy_lbl', $$ + MATCH (a:N {name:'A'}), (c:N {name:'C'}) + RETURN all_shortest_paths(a, c, 'KNOWS') +$$) AS (path agtype); + +-- shortest_path() in-cypher with multiple relationship types passed as a list +-- literal; A..C under {KNOWS, LIKES} (length 2); expected: 1 path +SELECT count(*) FROM cypher('sp_cy_lbl', $$ + MATCH (a:N {name:'A'}), (c:N {name:'C'}) + RETURN shortest_path(a, c, ['KNOWS', 'LIKES']) +$$) AS (path agtype); + +-- all_shortest_paths() in-cypher with multiple relationship types; the three +-- A->B edges (2 KNOWS + 1 LIKES) each extend by the single B->C KNOWS edge, +-- giving three distinct shortest A..C paths; expected: 3 +SELECT count(*) FROM cypher('sp_cy_lbl', $$ + MATCH (a:N {name:'A'}), (c:N {name:'C'}) + RETURN all_shortest_paths(a, c, ['KNOWS', 'LIKES']) +$$) AS (path agtype); + +-- all_shortest_paths() in-cypher, multiple types, adjacent endpoints: the two +-- parallel KNOWS edges plus the one LIKES edge are three one-hop A..B paths; +-- expected: 3 +SELECT count(*) FROM cypher('sp_cy_lbl', $$ + MATCH (a:N {name:'A'}), (b:N {name:'B'}) + RETURN all_shortest_paths(a, b, ['KNOWS', 'LIKES']) +$$) AS (path agtype); + +-- multiple types where only one connects the endpoints: {LIKES} reaches B but +-- B->C has no LIKES edge, so A..C is unreachable in-cypher; expected: 0 +SELECT count(*) FROM cypher('sp_cy_lbl', $$ + MATCH (a:N {name:'A'}), (c:N {name:'C'}) + RETURN all_shortest_paths(a, c, ['LIKES']) +$$) AS (path agtype); + +-- cleanup +SELECT * FROM drop_graph('sp_cy_lbl', true); + +-- +-- Minimum hop count fallback (Tier: VLE exhaustive search). When the requested +-- minimum hop count exceeds the true shortest distance, the BFS shortest-path +-- cannot satisfy it (it needs longer paths), so the implementation falls back +-- to the variable-length-edge depth-first engine and returns the shortest +-- path(s) whose length is at least min_hops. +-- +-- Graph: A reaches C directly (length 1) and also via two distinct +-- intermediate vertices B1 and B2 (length 2 each): +-- A->C, A->B1->C, A->B2->C +-- +SELECT * FROM create_graph('sp_min'); + +SELECT * FROM cypher('sp_min', $$ + CREATE (a:N {name: 'A'}), + (c:N {name: 'C'}), + (b1:N {name: 'B1'}), + (b2:N {name: 'B2'}), + (a)-[:KNOWS]->(c), + (a)-[:KNOWS]->(b1), + (b1)-[:KNOWS]->(c), + (a)-[:KNOWS]->(b2), + (b2)-[:KNOWS]->(c) +$$) AS (result agtype); + +-- materialize the global graph context +SELECT * FROM cypher('sp_min', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) + AS (result agtype); + +-- baseline: the shortest A..C is the direct length-1 edge; count 1 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype); + +-- min_hops=2 excludes the direct edge and falls back to the exhaustive search; +-- the two length-2 routes A->B1->C and A->B2->C are the shortest qualifying +-- paths; all_shortest_paths returns both; count 2 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype); + +-- single shortest_path with min_hops=2 picks exactly one of the two length-2 +-- routes; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype); + +-- the qualifying length-2 paths materialize correctly; all_shortest_paths +-- returns the full, order-stable set (the single shortest_path variant would +-- return an arbitrary one of the two equal-length routes, which is not a +-- deterministic choice), so both A->B1->C and A->B2->C are listed +SELECT path FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype) AS path +ORDER BY path; + +-- min_hops=2 with a matching max_hops=2 returns the same two length-2 paths; +-- count 2 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype, 2::agtype); + +-- min_hops greater than max_hops is unsatisfiable; count 0 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype, 1::agtype); + +-- min_hops=3 has no qualifying path (a length-3 A..C would have to reuse an +-- edge, which relationship-uniqueness forbids); count 0 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 3::agtype); + +-- no edge-type filter also reaches the fallback; the two length-2 routes are +-- returned; count 2 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + NULL::agtype, '"out"'::agtype, 2::agtype); + +-- the fallback honours direction: traversing edges backwards (C..A, 'in') with +-- min_hops=2 also returns the two length-2 routes; count 2 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"in"'::agtype, 2::agtype); + +-- the fallback respects direction: there is no forward C..A path, so 'out' +-- with min_hops=2 returns nothing; count 0 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype); + +-- an unknown relationship type in the fallback matches no edges, so even the +-- shortest qualifying path cannot be formed; count 0 +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '"NOSUCHLABEL"'::agtype, '"out"'::agtype, 2::agtype); + +-- a minimum hop count greater than the shortest distance combined with +-- multiple relationship types is not supported (the VLE engine matches a +-- single label only); expected: ERROR +SELECT count(*) FROM age_all_shortest_paths( + '"sp_min"'::agtype, + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_min', $$ MATCH (n {name:'C'}) RETURN id(n) $$) AS (id agtype)), + '["KNOWS", "LIKES"]'::agtype, '"out"'::agtype, 2::agtype); + +-- cleanup +SELECT * FROM drop_graph('sp_min', true); + +-- +-- Minimum hop count fallback with a VERTEX-REVISITING longer path. This is the +-- defining behaviour of the exhaustive-search regime: relationship-uniqueness +-- (Cypher trail semantics) forbids reusing an EDGE but permits revisiting a +-- VERTEX, so a qualifying path longer than the shortest distance may loop back +-- through an already-seen node. +-- +-- Graph: A -> B -> C, C -> B (back edge), B -> D +-- A..D shortest distance = 2 (A->B->D) +-- there is no edge-distinct length-3 A..D path +-- the only edge-distinct length-4 A..D path is A->B->C->B->D, which +-- revisits vertex B but uses each of the four edges exactly once +-- +SELECT * FROM create_graph('sp_revisit'); + +SELECT * FROM cypher('sp_revisit', $$ + CREATE (a:N {name: 'A'}), + (b:N {name: 'B'}), + (c:N {name: 'C'}), + (d:N {name: 'D'}), + (a)-[:KNOWS]->(b), + (b)-[:KNOWS]->(c), + (c)-[:KNOWS]->(b), + (b)-[:KNOWS]->(d) +$$) AS (result agtype); + +-- materialize the global graph context +SELECT * FROM cypher('sp_revisit', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) + AS (result agtype); + +-- min_hops=2 equals the shortest distance, so the easy (BFS) regime returns the +-- direct A->B->D route; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_revisit"'::agtype, + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 2::agtype); + +-- min_hops is a lower bound, not an exact length: with min_hops=3 there is no +-- length-3 edge-distinct path, so the search returns the next-shortest +-- qualifying path, the length-4 route A->B->C->B->D; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_revisit"'::agtype, + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 3::agtype); + +-- min_hops=4 is satisfied only by the vertex-revisiting A->B->C->B->D path; +-- count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_revisit"'::agtype, + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 4::agtype); + +-- the length-4 path is unique, so its materialized form is deterministic; it +-- visits B twice (B appears at positions 2 and 4) yet repeats no edge +SELECT path FROM age_shortest_path( + '"sp_revisit"'::agtype, + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 4::agtype) AS path +ORDER BY path; + +-- min_hops=5 exhausts the four edges without an edge-distinct path; count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_revisit"'::agtype, + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_revisit', $$ MATCH (n {name:'D'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 5::agtype); + +-- cleanup +SELECT * FROM drop_graph('sp_revisit', true); + +-- +-- Minimum hop count fallback with a CLOSED WALK (start == end through a cycle). +-- When start and end are the same vertex the shortest distance is 0, so any +-- positive min_hops forces the exhaustive search to look for a cycle that +-- returns to the start using edge-distinct steps. +-- +-- Graph: a single directed triangle A -> B -> C -> A +-- A..A shortest distance = 0 (the zero-length path) +-- the only edge-distinct closed walk is the length-3 triangle A->B->C->A +-- +SELECT * FROM create_graph('sp_tri'); + +SELECT * FROM cypher('sp_tri', $$ + CREATE (a:N {name: 'A'}), + (b:N {name: 'B'}), + (c:N {name: 'C'}), + (a)-[:KNOWS]->(b), + (b)-[:KNOWS]->(c), + (c)-[:KNOWS]->(a) +$$) AS (result agtype); + +-- materialize the global graph context +SELECT * FROM cypher('sp_tri', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) + AS (result agtype); + +-- no min_hops: start == end yields the zero-length path; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_tri"'::agtype, + (SELECT id FROM cypher('sp_tri', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_tri', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype); + +-- min_hops=3 forces the exhaustive search to find the closed triangle walk +-- A->B->C->A; count 1 +SELECT count(*) FROM age_shortest_path( + '"sp_tri"'::agtype, + (SELECT id FROM cypher('sp_tri', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_tri', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 3::agtype); + +-- the closed walk is unique, so its materialized form is deterministic; it +-- starts and ends at A and traverses each triangle edge once +SELECT path FROM age_shortest_path( + '"sp_tri"'::agtype, + (SELECT id FROM cypher('sp_tri', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_tri', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 3::agtype) AS path +ORDER BY path; + +-- min_hops=4 cannot be met without reusing an edge of the three-edge triangle; +-- count 0 +SELECT count(*) FROM age_shortest_path( + '"sp_tri"'::agtype, + (SELECT id FROM cypher('sp_tri', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_tri', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + '"KNOWS"'::agtype, '"out"'::agtype, 4::agtype); + +-- cleanup +SELECT * FROM drop_graph('sp_tri', true); + +-- +-- Error messages report the actual SRF that was called. age_shortest_path and +-- age_all_shortest_paths share their argument-resolution helpers; these cases +-- confirm each reports its own name in the error text rather than a single +-- hard-coded "age_shortest_path" prefix. +-- +SELECT * FROM create_graph('sp_errname'); + +SELECT * FROM cypher('sp_errname', $$ + CREATE (a:N {name: 'A'})-[:KNOWS]->(b:N {name: 'B'}) +$$) AS (result agtype); + +-- materialize the global graph context +SELECT * FROM cypher('sp_errname', $$ MATCH (u) RETURN vertex_stats(u) ORDER BY id(u) $$) + AS (result agtype); + +-- a NULL graph name errors with the called function's name as the prefix +SELECT count(*) FROM age_shortest_path(NULL::agtype, 0::agtype, 1::agtype); +SELECT count(*) FROM age_all_shortest_paths(NULL::agtype, 0::agtype, 1::agtype); + +-- a non-string relationship type errors with the called function's name as the +-- prefix (the array element 1 is an integer, not a string) +SELECT count(*) FROM age_shortest_path( + '"sp_errname"'::agtype, + (SELECT id FROM cypher('sp_errname', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_errname', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype)), + '[1]'::agtype); +SELECT count(*) FROM age_all_shortest_paths( + '"sp_errname"'::agtype, + (SELECT id FROM cypher('sp_errname', $$ MATCH (n {name:'A'}) RETURN id(n) $$) AS (id agtype)), + (SELECT id FROM cypher('sp_errname', $$ MATCH (n {name:'B'}) RETURN id(n) $$) AS (id agtype)), + '[1]'::agtype); + +-- cleanup +SELECT * FROM drop_graph('sp_errname', true); diff --git a/src/backend/utils/adt/age_vle.c b/src/backend/utils/adt/age_vle.c index 804d9e17e..cb036b154 100644 --- a/src/backend/utils/adt/age_vle.c +++ b/src/backend/utils/adt/age_vle.c @@ -2887,11 +2887,12 @@ static graphid sp_queue_pop(sp_queue *q) } /* Resolve a vertex argument (a vertex agtype or an integer id) to a graphid. */ -static graphid sp_agtype_to_graphid(agtype *agt, const char *argname) +static graphid sp_agtype_to_graphid(agtype *agt, char *fname, + const char *argname) { agtype_value *agtv = NULL; - agtv = get_agtype_value("age_shortest_path", agt, AGTV_VERTEX, false); + agtv = get_agtype_value(fname, agt, AGTV_VERTEX, false); if (agtv != NULL && agtv->type == AGTV_VERTEX) { @@ -2909,7 +2910,7 @@ static graphid sp_agtype_to_graphid(agtype *agt, const char *argname) } /* Resolve the optional direction argument; NULL defaults to undirected. */ -static cypher_rel_dir sp_agtype_to_direction(agtype *agt) +static cypher_rel_dir sp_agtype_to_direction(agtype *agt, char *fname) { agtype_value *agtv = NULL; char *s = NULL; @@ -2920,7 +2921,7 @@ static cypher_rel_dir sp_agtype_to_direction(agtype *agt) return CYPHER_REL_DIR_NONE; } - agtv = get_agtype_value("age_shortest_path", agt, AGTV_STRING, true); + agtv = get_agtype_value(fname, agt, AGTV_STRING, true); s = pnstrdup(agtv->val.string.val, agtv->val.string.len); if (pg_strcasecmp(s, "out") == 0) @@ -2939,7 +2940,8 @@ static cypher_rel_dir sp_agtype_to_direction(agtype *agt) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("direction argument must be one of 'out', 'in', or 'any'"))); + errmsg("%s: direction argument must be one of 'out', 'in', or 'any'", + fname))); } pfree_if_not_null(s); @@ -2979,7 +2981,7 @@ static Datum sp_build_path_datum(Oid graph_oid, graphid *alt, int64 alt_len) * (collect_all) every shortest-path predecessor is recorded per vertex. */ static HTAB *sp_run_bfs(GRAPH_global_context *ggctx, graphid source, - graphid target, bool filter_edges, Oid edge_label_oid, + graphid target, Oid *label_oids, int n_label_oids, cypher_rel_dir dir, int64 max_hops, bool collect_all, int64 *out_target_depth, bool *out_found) { @@ -3123,18 +3125,35 @@ static HTAB *sp_run_bfs(GRAPH_global_context *ggctx, graphid source, /* * Optional edge label filter. When a label filter is active - * we keep only edges whose label table oid matches. Note that - * a label name which does not exist in this graph resolves to - * InvalidOid; because no real edge can have an InvalidOid - * label table, every edge is then skipped and only the - * zero-length (start == end) path can match -- matching the - * openCypher semantics that an unknown relationship type - * matches no relationships. + * (n_label_oids > 0) we keep only edges whose label table oid + * is one of the requested relationship types. A requested type + * that does not exist in this graph resolves to InvalidOid; + * since no real edge can have an InvalidOid label table, such a + * type contributes no matches and simply drops out of the set, + * while edges of any of the other (known) requested types still + * match. Only when every requested type is unknown does the + * filter match no edges, leaving just the zero-length + * (start == end) path -- matching the openCypher semantics that + * an unknown relationship type matches no relationships. */ - if (filter_edges && - get_edge_entry_label_table_oid(ee) != edge_label_oid) + if (n_label_oids > 0) { - continue; + Oid ee_label_oid = get_edge_entry_label_table_oid(ee); + bool label_match = false; + int li = 0; + + for (li = 0; li < n_label_oids; li++) + { + if (label_oids[li] == ee_label_oid) + { + label_match = true; + break; + } + } + if (!label_match) + { + continue; + } } /* the neighbor depends on which side of the edge u is on */ @@ -3192,13 +3211,25 @@ static HTAB *sp_run_bfs(GRAPH_global_context *ggctx, graphid source, return visited; } +/* + * Maximum number of result paths age_all_shortest_paths will materialize + * before raising an error. The shortest-path DAG can contain exponentially + * many equal-length paths (grid-like or multi-edge graphs), and they are all + * built up front in the SRF's memory context, so this is a backstop against + * unbounded memory growth. CHECK_FOR_INTERRUPTS() in sp_enumerate still allows + * cancellation, but a fast explosion can outrun a statement_timeout. + */ +#define SP_MAX_RESULT_PATHS 1000000 + /* * Recursively enumerate every shortest path by walking the predecessor DAG * from target back to source. Each completed path is appended to *out as a - * freshly allocated interleaved graphid array of length alt_len. + * freshly allocated interleaved graphid array of length alt_len. The running + * total is capped at SP_MAX_RESULT_PATHS to bound peak memory. */ static void sp_enumerate(HTAB *visited, graphid source, graphid cur, - graphid *alt, int64 alt_len, int64 pos, List **out) + graphid *alt, int64 alt_len, int64 pos, + char *fname, List **out) { sp_visit_entry *e = NULL; ListCell *lc = NULL; @@ -3220,6 +3251,20 @@ static void sp_enumerate(HTAB *visited, graphid source, graphid cur, memcpy(copy, alt, sizeof(graphid) * alt_len); *out = lappend(*out, copy); + + /* + * Bound the number of materialized paths. Without a ceiling, a + * combinatorial shortest-path DAG could exhaust memory before the + * first row is returned. + */ + if (list_length(*out) > SP_MAX_RESULT_PATHS) + { + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("%s: shortest path count exceeded %d", + fname, SP_MAX_RESULT_PATHS), + errhint("Narrow the search with a relationship type or a maximum hop count, or use age_shortest_path for a single path."))); + } } return; } @@ -3236,10 +3281,193 @@ static void sp_enumerate(HTAB *visited, graphid source, graphid cur, alt[pos - 1] = p->edge; sp_enumerate(visited, source, p->parent_vertex, alt, alt_len, pos - 2, - out); + fname, out); } } +/* + * Maximum number of distinct paths the minimum-hop fallback will enumerate + * before giving up. The exhaustive DFS used for a minimum hop count greater + * than the shortest distance can explode on dense graphs, so this acts as a + * safety valve alongside CHECK_FOR_INTERRUPTS()/statement_timeout in the DFS. + */ +#define SP_MINHOPS_MAX_PATHS 1000000 + +/* + * Fallback for the "minimum hop count greater than the shortest distance" + * regime, which plain BFS cannot satisfy (it requires longer, vertex-revisiting + * paths under relationship-uniqueness). This reuses the VLE depth-first engine + * directly: it builds a VLE_local_context by hand (no fcinfo), enumerates every + * path whose length is within [min_hops, max_hops], and keeps only those of the + * smallest qualifying length. For shortest_path one such path is returned; for + * all_shortest_paths every tie at that length is returned. Returns NULL with + * *out_count == 0 when no qualifying path exists. + * + * The VLE engine matches a single edge label oid only, so a multi-type filter + * is rejected by the caller before reaching here. A single label_oid of + * InvalidOid means "any edge label". + */ +static Datum *sp_minhops_fallback(GRAPH_global_context *ggctx, Oid graph_oid, + const char *graph_name, char *fname, + graphid source, graphid target, Oid label_oid, + cypher_rel_dir dir, int64 min_hops, + int64 max_hops, bool collect_all, + int64 *out_count) +{ + MemoryContext oldctx = CurrentMemoryContext; + MemoryContext tmpctx = NULL; + VLE_local_context *vlelctx = NULL; + agtype_value av_empty; + agtype *empty_constraint = NULL; + List *best = NIL; + ListCell *lc = NULL; + int64 best_len = PG_INT64_MAX; + int64 examined = 0; + int64 result_len = 0; + int64 n = 0; + int64 idx = 0; + Datum *paths = NULL; + + *out_count = 0; + + /* do the VLE enumeration in a private context we can throw away at the end */ + tmpctx = AllocSetContextCreate(oldctx, "age shortest path minhops", + ALLOCSET_DEFAULT_SIZES); + MemoryContextSwitchTo(tmpctx); + + /* an empty property constraint object: every edge satisfies it */ + av_empty.type = AGTV_OBJECT; + av_empty.val.object.num_pairs = 0; + av_empty.val.object.pairs = NULL; + empty_constraint = agtype_value_to_agtype(&av_empty); + + /* build the VLE local context by hand (no fcinfo, no caching) */ + vlelctx = palloc0(sizeof(VLE_local_context)); + vlelctx->graph_name = pnstrdup(graph_name, strlen(graph_name)); + vlelctx->graph_oid = graph_oid; + vlelctx->ggctx = ggctx; + vlelctx->path_function = VLE_FUNCTION_PATHS_BETWEEN; + vlelctx->next_vertex = NULL; + vlelctx->vsid = source; + vlelctx->veid = target; + vlelctx->edge_property_constraint = empty_constraint; + vlelctx->edge_property_constraint_datum = + AGTYPE_P_GET_DATUM(empty_constraint); + vlelctx->edge_property_constraint_hash = + datum_image_hash(vlelctx->edge_property_constraint_datum, false, -1); + vlelctx->edge_label_name = NULL; + vlelctx->edge_label_name_oid = label_oid; + vlelctx->lidx = (min_hops > 0) ? min_hops : 1; + if (max_hops < 0) + { + vlelctx->uidx_infinite = true; + vlelctx->uidx = 0; + } + else + { + vlelctx->uidx_infinite = false; + vlelctx->uidx = max_hops; + } + vlelctx->edge_direction = dir; + vlelctx->use_cache = false; + vlelctx->vle_grammar_node_id = 0; + vlelctx->next = NULL; + vlelctx->is_dirty = true; + + create_VLE_local_state_hashtable(vlelctx); + vlelctx->dfs_vertex_stack = new_gid_stack(); + vlelctx->dfs_edge_stack = new_gid_stack(); + vlelctx->dfs_path_stack = new_gid_stack(); + load_initial_dfs_stacks(vlelctx); + + /* + * Enumerate qualifying paths, keeping only those of the smallest length + * seen. The DFS yields paths in no particular length order, so a strictly + * shorter path resets the kept set. + */ + while (dfs_find_a_path_between(vlelctx)) + { + int64 hops = gid_stack_size(vlelctx->dfs_path_stack); + bool take = false; + bool reset = false; + + examined = examined + 1; + if (examined > SP_MINHOPS_MAX_PATHS) + { + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("%s: minimum hop count search exceeded %d candidate paths", + fname, SP_MINHOPS_MAX_PATHS), + errhint("Provide a maximum hop count to bound the search."))); + } + + if (hops < best_len) + { + take = true; + reset = true; + } + else if (hops == best_len && collect_all) + { + take = true; + } + + if (take) + { + VLE_path_container *vpc = NULL; + graphid *garr = NULL; + int64 arrlen = 0; + + vpc = build_VLE_path_container(vlelctx); + garr = GET_GRAPHID_ARRAY_FROM_CONTAINER(vpc); + arrlen = vpc->graphid_array_size; + + /* copy the path into the surviving context and record it */ + MemoryContextSwitchTo(oldctx); + if (reset) + { + list_free_deep(best); + best = NIL; + best_len = hops; + } + { + graphid *copy = palloc(sizeof(graphid) * arrlen); + + memcpy(copy, garr, sizeof(graphid) * arrlen); + best = lappend(best, copy); + } + MemoryContextSwitchTo(tmpctx); + + pfree(vpc); + } + } + + /* tear down the VLE engine state, then drop the whole scratch context */ + free_VLE_local_context(vlelctx); + MemoryContextSwitchTo(oldctx); + MemoryContextDelete(tmpctx); + + n = list_length(best); + if (n == 0) + { + return NULL; + } + + /* every kept path has the same (minimum qualifying) length */ + result_len = (2 * best_len) + 1; + paths = palloc(sizeof(Datum) * n); + foreach(lc, best) + { + graphid *a = (graphid *) lfirst(lc); + + paths[idx] = sp_build_path_datum(graph_oid, a, result_len); + idx = idx + 1; + } + + list_free_deep(best); + *out_count = n; + return paths; +} + /* * Resolve arguments, run the BFS, and materialize the result path(s) as an * array of AGTV_PATH agtype Datums. Returns NULL with *out_count == 0 when no @@ -3248,8 +3476,8 @@ static void sp_enumerate(HTAB *visited, graphid source, graphid cur, static Datum *sp_compute_paths(agtype *graph_name_agt, agtype *start_agt, agtype *end_agt, agtype *label_agt, agtype *dir_agt, agtype *minhops_agt, - agtype *maxhops_agt, bool collect_all, - int64 *out_count) + agtype *maxhops_agt, char *fname, + bool collect_all, int64 *out_count) { agtype_value *agtv_temp = NULL; char *graph_name = NULL; @@ -3257,14 +3485,17 @@ static Datum *sp_compute_paths(agtype *graph_name_agt, agtype *start_agt, GRAPH_global_context *ggctx = NULL; graphid source = 0; graphid target = 0; - bool filter_edges = false; - Oid edge_label_oid = InvalidOid; + Oid *label_oids = NULL; + int n_label_oids = 0; cypher_rel_dir dir = CYPHER_REL_DIR_NONE; + int64 min_hops = 0; int64 max_hops = -1; HTAB *visited = NULL; int64 target_depth = -1; bool found = false; Datum *paths = NULL; + MemoryContext oldctx = CurrentMemoryContext; + MemoryContext scratch = NULL; *out_count = 0; @@ -3273,10 +3504,10 @@ static Datum *sp_compute_paths(agtype *graph_name_agt, agtype *start_agt, { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("age_shortest_path: graph name cannot be NULL"))); + errmsg("%s: graph name cannot be NULL", fname))); } - agtv_temp = get_agtype_value("age_shortest_path", graph_name_agt, + agtv_temp = get_agtype_value(fname, graph_name_agt, AGTV_STRING, true); graph_name = pnstrdup(agtv_temp->val.string.val, agtv_temp->val.string.len); @@ -3288,16 +3519,21 @@ static Datum *sp_compute_paths(agtype *graph_name_agt, agtype *start_agt, */ if (start_agt == NULL || end_agt == NULL) { + pfree_if_not_null(graph_name); return NULL; } - source = sp_agtype_to_graphid(start_agt, "start vertex"); - target = sp_agtype_to_graphid(end_agt, "end vertex"); + source = sp_agtype_to_graphid(start_agt, fname, "start vertex"); + target = sp_agtype_to_graphid(end_agt, fname, "end vertex"); /* - * Optional edge type filter. A single relationship type may be supplied - * either as a bare string or as a one-element array. Multiple relationship - * types (an array with more than one element) are not yet supported. + * Optional edge type filter. A relationship type may be supplied as a + * bare string, or one or more types may be supplied as an array of + * strings. Each (non-empty) type name is resolved to its edge label table + * oid; an edge is kept when its label oid is one of the requested set. An + * empty string, an empty array, or NULL means no filter (every edge is + * traversed). An unknown type resolves to InvalidOid and so matches no + * edges. */ if (label_agt != NULL) { @@ -3306,74 +3542,84 @@ static Datum *sp_compute_paths(agtype *graph_name_agt, agtype *start_agt, if (AGT_ROOT_IS_ARRAY(label_agt) && !AGT_ROOT_IS_SCALAR(label_agt)) { int nelems = AGT_ROOT_COUNT(label_agt); + int i = 0; - if (nelems > 1) + if (nelems > 0) { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("age_shortest_path: multiple relationship types are not yet supported"))); + label_oids = palloc(sizeof(Oid) * nelems); } - if (nelems == 1) + for (i = 0; i < nelems; i++) { agtv_temp = get_ith_agtype_value_from_container( - &label_agt->root, 0); + &label_agt->root, i); if (agtv_temp->type != AGTV_STRING) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("age_shortest_path: relationship type must be a string"))); + errmsg("%s: relationship type must be a string", + fname))); } + /* skip empty type names; they impose no constraint */ if (agtv_temp->val.string.len != 0) { label_name = pnstrdup(agtv_temp->val.string.val, agtv_temp->val.string.len); - edge_label_oid = get_label_relation(label_name, graph_oid); - filter_edges = true; + label_oids[n_label_oids] = + get_label_relation(label_name, graph_oid); + n_label_oids = n_label_oids + 1; + + /* the resolved oid is all we keep; free the type name */ + pfree(label_name); + label_name = NULL; } } } else { - agtv_temp = get_agtype_value("age_shortest_path", label_agt, + agtv_temp = get_agtype_value(fname, label_agt, AGTV_STRING, true); if (agtv_temp->val.string.len != 0) { label_name = pnstrdup(agtv_temp->val.string.val, agtv_temp->val.string.len); - edge_label_oid = get_label_relation(label_name, graph_oid); - filter_edges = true; + label_oids = palloc(sizeof(Oid)); + label_oids[0] = get_label_relation(label_name, graph_oid); + n_label_oids = 1; + + /* the resolved oid is all we keep; free the type name */ + pfree(label_name); + label_name = NULL; } } } /* optional direction (defaults to undirected) */ - dir = sp_agtype_to_direction(dir_agt); + dir = sp_agtype_to_direction(dir_agt, fname); /* - * Optional minimum hop count. A genuine minimum-length constraint needs a - * different search than plain BFS, so for now only the default (NULL or 0) - * is accepted; any other value is rejected loudly. + * Optional minimum hop count (NULL or negative means none). A minimum + * that does not exceed the true shortest distance imposes no additional + * constraint, so it is handled directly by the BFS result below. A + * minimum greater than the shortest distance requires enumerating longer, + * vertex-revisiting paths, which plain BFS cannot do; that case falls + * back to the VLE depth-first engine after the search (see below). */ if (minhops_agt != NULL) { - int64 min_hops = 0; - - agtv_temp = get_agtype_value("age_shortest_path", minhops_agt, + agtv_temp = get_agtype_value(fname, minhops_agt, AGTV_INTEGER, true); min_hops = agtv_temp->val.int_value; - if (min_hops != 0) + if (min_hops < 0) { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("age_shortest_path: a minimum hop count is not yet supported"))); + min_hops = 0; } } /* optional upper hop bound (NULL or negative means unbounded) */ if (maxhops_agt != NULL) { - agtv_temp = get_agtype_value("age_shortest_path", maxhops_agt, + agtv_temp = get_agtype_value(fname, maxhops_agt, AGTV_INTEGER, true); max_hops = agtv_temp->val.int_value; if (max_hops < 0) @@ -3386,19 +3632,87 @@ static Datum *sp_compute_paths(agtype *graph_name_agt, agtype *start_agt, ggctx = manage_GRAPH_global_contexts(graph_name, graph_oid); if (ggctx == NULL) { + pfree_if_not_null(graph_name); + pfree_if_not_null(label_oids); return NULL; } + /* + * Run the search and reconstruct the result path(s) in a private scratch + * context. The BFS bookkeeping (visited table, frontier queue, predecessor + * multiset) and the intermediate path arrays are only needed while we + * compute; the surviving result Datums are built in the caller's + * (SRF-lifetime) context and copied out before the scratch context is + * deleted. This bounds peak memory to the result set plus one search, + * rather than retaining the whole search state for the life of the SRF. + */ + scratch = AllocSetContextCreate(oldctx, "age shortest path scratch", + ALLOCSET_DEFAULT_SIZES); + MemoryContextSwitchTo(scratch); + /* run the breadth-first search */ - visited = sp_run_bfs(ggctx, source, target, filter_edges, edge_label_oid, + visited = sp_run_bfs(ggctx, source, target, label_oids, n_label_oids, dir, max_hops, collect_all, &target_depth, &found); if (!found) { - hash_destroy(visited); + MemoryContextSwitchTo(oldctx); + MemoryContextDelete(scratch); + pfree_if_not_null(graph_name); + pfree_if_not_null(label_oids); return NULL; } + /* + * A minimum hop count greater than the true shortest distance can only be + * satisfied by longer, vertex-revisiting paths (Neo4j's exhaustive search + * regime). Plain BFS cannot produce those, so fall back to the VLE + * depth-first engine for that case. When min_hops <= target_depth the + * bound imposes no additional constraint and the shortest path(s) already + * found are returned unchanged. + * + * The VLE engine matches a single edge label only, so a multi-type filter + * combined with this regime is still unsupported. + */ + if (min_hops > 0 && target_depth < min_hops) + { + Oid fallback_label_oid = InvalidOid; + + /* the BFS scratch is no longer needed; the fallback uses its own */ + MemoryContextSwitchTo(oldctx); + MemoryContextDelete(scratch); + + if (n_label_oids > 1) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("%s: a minimum hop count greater than the shortest path length is not supported with multiple relationship types", + fname))); + } + + if (n_label_oids == 1) + { + fallback_label_oid = label_oids[0]; + } + + /* + * The fallback duplicates graph_name internally and only needs the + * resolved label oid, so the temporaries are freed here once its + * result is captured rather than retained for the SRF's lifetime. + */ + { + Datum *fb_paths; + + fb_paths = sp_minhops_fallback(ggctx, graph_oid, graph_name, fname, + source, target, fallback_label_oid, + dir, min_hops, max_hops, collect_all, + out_count); + pfree_if_not_null(graph_name); + pfree_if_not_null(label_oids); + return fb_paths; + } + } + if (!collect_all) { /* reconstruct the single shortest path from the parent pointers */ @@ -3421,6 +3735,8 @@ static Datum *sp_compute_paths(agtype *graph_name_agt, agtype *start_agt, cur = e->parent_vertex; } + /* build the surviving result Datum in the caller's context */ + MemoryContextSwitchTo(oldctx); paths = palloc(sizeof(Datum)); paths[0] = sp_build_path_datum(graph_oid, alt, alt_len); *out_count = 1; @@ -3436,9 +3752,12 @@ static Datum *sp_compute_paths(agtype *graph_name_agt, agtype *start_agt, int64 idx = 0; sp_enumerate(visited, source, target, alt, alt_len, alt_len - 1, - &arrays); + fname, &arrays); n = list_length(arrays); + + /* build the surviving result Datums in the caller's context */ + MemoryContextSwitchTo(oldctx); paths = palloc(sizeof(Datum) * (n > 0 ? n : 1)); foreach(lc, arrays) { @@ -3450,7 +3769,11 @@ static Datum *sp_compute_paths(agtype *graph_name_agt, agtype *start_agt, *out_count = n; } - hash_destroy(visited); + /* results are copied out; drop the BFS/enumeration scratch */ + MemoryContextSwitchTo(oldctx); + MemoryContextDelete(scratch); + pfree_if_not_null(graph_name); + pfree_if_not_null(label_oids); return paths; } @@ -3521,8 +3844,10 @@ static Datum sp_srf_impl(FunctionCallInfo fcinfo, bool collect_all) state = palloc0(sizeof(sp_srf_state)); state->next = 0; state->paths = sp_compute_paths(a_graph, a_start, a_end, a_label, - a_dir, a_min, a_max, collect_all, - &state->npaths); + a_dir, a_min, a_max, + collect_all ? "age_all_shortest_paths" + : "age_shortest_path", + collect_all, &state->npaths); funcctx->user_fctx = state; MemoryContextSwitchTo(oldctx); From 10e0f7461cd475edd51ce5abe7dccf89622adbd2 Mon Sep 17 00:00:00 2001 From: Greg Felice Date: Mon, 29 Jun 2026 12:57:18 -0400 Subject: [PATCH 13/20] Fix single-node labeled pattern expressions not filtering by label (#2443) (#2444) A single-node labeled pattern used as a boolean expression -- e.g. `WHERE (a:Person)`, `WHERE EXISTS((a:Person))` -- was accepted but did not test the bound vertex's label. It desugars to an EXISTS sub-pattern, and make_path_join_quals() returned early for vertex-only patterns (list_length(entities) < 3), emitting no quals. With no edge to carry a correlation, the sub-pattern referenced nothing from the enclosing query, so the planner produced an uncorrelated one-time InitPlan that was trivially true whenever any vertex of that label existed -- the predicate matched every outer row. Emit an explicit label-id filter for a vertex-only pattern whose vertex carries a non-default label and whose variable is declared in an ancestor parse state (i.e. a correlated reference). make_qual() builds a name-based id reference that resolves to the outer variable, so the filter both correlates the sub-pattern to that variable and enforces the label. Freshly scanned, non-correlated vertices (no ancestor binding) are untouched, so plain MATCH (a:Person) and "does any X exist" EXISTS checks are unaffected. Add regression coverage in pattern_expression: WHERE (a:Person), WHERE NOT (a:Person), and EXISTS((a:Company)) against a graph with a non-Person vertex. All 41 regression tests pass. --- regress/expected/pattern_expression.out | 70 ++++++++++++++++++++++--- regress/sql/pattern_expression.sql | 49 ++++++++++++++--- src/backend/parser/cypher_clause.c | 50 +++++++++++++++++- 3 files changed, 154 insertions(+), 15 deletions(-) diff --git a/regress/expected/pattern_expression.out b/regress/expected/pattern_expression.out index 0494d49b9..93a02e3fa 100644 --- a/regress/expected/pattern_expression.out +++ b/regress/expected/pattern_expression.out @@ -320,12 +320,15 @@ $$) AS (result agtype); -- -- Single-node pattern on an already-bound variable: (a:Label) -- --- NOTE: this is an EXISTS existence check on the bound variable, NOT an --- openCypher label predicate. A matching label is therefore always true --- (the variable is already bound), and a *different* label is rejected by --- AGE's pre-existing "multiple labels for variable" restriction rather than --- evaluating to false. Both behaviours are captured here so any future change --- to single-node-pattern semantics is caught by this test. +-- NOTE: as of #2443 a single-node labeled pattern is a correlated label +-- predicate -- in WHERE / EXISTS it tests whether the bound vertex actually +-- has the label (see the WHERE (a:Person) / EXISTS((a:Company)) cases in the +-- #2443 section below). Here the variable is already bound to the SAME label, +-- so the predicate is trivially true (the label matches). A *different* label +-- on an already-bound variable is still rejected by AGE's pre-existing +-- "multiple labels for variable" restriction rather than evaluating to false; +-- that is an orthogonal limitation, captured here so any future change to +-- single-node-pattern semantics is caught by this test. SELECT * FROM cypher('pattern_expr', $$ MATCH (a:Person) RETURN a.name, (a:Person) @@ -439,16 +442,69 @@ $$) AS (name agtype); "Alice" (1 row) +-- +-- Single-node labeled pattern as a boolean (#2443) +-- +-- A bound vertex carrying a label, e.g. (a:Person), must test that vertex's +-- label rather than be trivially true. Add a non-Person vertex so the filter +-- is observable (every other vertex in this graph is a :Person). +SELECT * FROM cypher('pattern_expr', $$ + CREATE (:Company {name: 'Acme'}) +$$) AS (result agtype); + result +-------- +(0 rows) + +-- bare single-node label predicate in WHERE: only the :Person vertices +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a) + WHERE (a:Person) + RETURN a.name + ORDER BY a.name +$$) AS (name agtype); + name +----------- + "Alice" + "Bob" + "Charlie" + "Dave" +(4 rows) + +-- negated: only the non-Person vertex +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a) + WHERE NOT (a:Person) + RETURN a.name + ORDER BY a.name +$$) AS (name agtype); + name +-------- + "Acme" +(1 row) + +-- EXISTS() form of a single-node label predicate +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a) + WHERE EXISTS((a:Company)) + RETURN a.name + ORDER BY a.name +$$) AS (name agtype); + name +-------- + "Acme" +(1 row) + -- -- Cleanup -- SELECT * FROM drop_graph('pattern_expr', true); -NOTICE: drop cascades to 5 other objects +NOTICE: drop cascades to 6 other objects DETAIL: drop cascades to table pattern_expr._ag_label_vertex drop cascades to table pattern_expr._ag_label_edge drop cascades to table pattern_expr."Person" drop cascades to table pattern_expr."KNOWS" drop cascades to table pattern_expr."WORKS_WITH" +drop cascades to table pattern_expr."Company" NOTICE: graph "pattern_expr" has been dropped drop_graph ------------ diff --git a/regress/sql/pattern_expression.sql b/regress/sql/pattern_expression.sql index fff8476e5..9ded819ef 100644 --- a/regress/sql/pattern_expression.sql +++ b/regress/sql/pattern_expression.sql @@ -222,12 +222,15 @@ $$) AS (result agtype); -- -- Single-node pattern on an already-bound variable: (a:Label) -- --- NOTE: this is an EXISTS existence check on the bound variable, NOT an --- openCypher label predicate. A matching label is therefore always true --- (the variable is already bound), and a *different* label is rejected by --- AGE's pre-existing "multiple labels for variable" restriction rather than --- evaluating to false. Both behaviours are captured here so any future change --- to single-node-pattern semantics is caught by this test. +-- NOTE: as of #2443 a single-node labeled pattern is a correlated label +-- predicate -- in WHERE / EXISTS it tests whether the bound vertex actually +-- has the label (see the WHERE (a:Person) / EXISTS((a:Company)) cases in the +-- #2443 section below). Here the variable is already bound to the SAME label, +-- so the predicate is trivially true (the label matches). A *different* label +-- on an already-bound variable is still rejected by AGE's pre-existing +-- "multiple labels for variable" restriction rather than evaluating to false; +-- that is an orthogonal limitation, captured here so any future change to +-- single-node-pattern semantics is caught by this test. SELECT * FROM cypher('pattern_expr', $$ MATCH (a:Person) RETURN a.name, (a:Person) @@ -299,6 +302,40 @@ SELECT * FROM cypher('pattern_expr', $$ ORDER BY a.name $$) AS (name agtype); +-- +-- Single-node labeled pattern as a boolean (#2443) +-- +-- A bound vertex carrying a label, e.g. (a:Person), must test that vertex's +-- label rather than be trivially true. Add a non-Person vertex so the filter +-- is observable (every other vertex in this graph is a :Person). +SELECT * FROM cypher('pattern_expr', $$ + CREATE (:Company {name: 'Acme'}) +$$) AS (result agtype); + +-- bare single-node label predicate in WHERE: only the :Person vertices +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a) + WHERE (a:Person) + RETURN a.name + ORDER BY a.name +$$) AS (name agtype); + +-- negated: only the non-Person vertex +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a) + WHERE NOT (a:Person) + RETURN a.name + ORDER BY a.name +$$) AS (name agtype); + +-- EXISTS() form of a single-node label predicate +SELECT * FROM cypher('pattern_expr', $$ + MATCH (a) + WHERE EXISTS((a:Company)) + RETURN a.name + ORDER BY a.name +$$) AS (name agtype); + -- -- Cleanup -- diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index a3a1a5044..6582ff8d1 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -5873,10 +5873,56 @@ static List *make_path_join_quals(cypher_parsestate *cpstate, List *entities) List *quals = NIL; List *join_quals; - /* for vertex only queries, there is no work to do */ + /* + * Vertex-only patterns have no edges, so the edge-driven correlation and + * label-filter logic below never runs. That is correct for a freshly + * scanned vertex -- its label comes from its label-table scan. But a + * vertex that refers to a variable from an ENCLOSING query -- e.g. the + * (a:Person) in MATCH (a) WHERE (a:Person) / EXISTS((a:Person)) -- is not + * scanned from its label table here. Without an explicit filter such a + * sub-pattern is uncorrelated and trivially true (the label is never + * tested). If the vertex carries a non-default label and its variable + * exists in an ancestor parse state, emit a label-id filter: make_qual + * builds a name-based id reference that resolves to the outer variable, + * which both correlates the sub-pattern to it and enforces the label. + */ if (list_length(entities) < 3) { - return NIL; + cypher_parsestate *parent_cpstate = + (cypher_parsestate *) cpstate->pstate.parentParseState; + ListCell *vlc; + + if (parent_cpstate != NULL) + { + foreach (vlc, entities) + { + transform_entity *ent = lfirst(vlc); + char *label; + char *name; + + if (ent->type != ENT_VERTEX) + { + continue; + } + + label = ent->entity.node->label; + name = ent->entity.node->name; + + if (label != NULL && !IS_DEFAULT_LABEL_VERTEX(label) && + name != NULL && + find_variable(parent_cpstate, name) != NULL) + { + Node *id_field = make_qual(cpstate, ent, "id"); + + quals = lappend(quals, + filter_vertices_on_label_id(cpstate, + id_field, + label)); + } + } + } + + return quals; } lc = list_head(entities); From a7e10f478b7be7c5083ea16aea32e419c805b249 Mon Sep 17 00:00:00 2001 From: Prashant Chinnam <5108573+crprashant@users.noreply.github.com> Date: Mon, 29 Jun 2026 10:16:57 -0700 Subject: [PATCH 14/20] Preserve null-valued keys in map literals (#2391) (#2412) * Preserve null-valued keys in map literals (#2391) Map literals such as RETURN {a: null} previously dropped keys whose values were null, producing {} instead of {"a": null}. This diverged from the openCypher / Neo4j semantics where map literals preserve every key the user wrote, including those bound to null. Root cause: cypher_map.keep_null defaulted to false (zero-initialised), so the grammar-produced node fed agtype_build_map_nonull, which strips null entries. Call sites that legitimately need strip-null semantics (CREATE node/edge property maps and SET = assignments) already set keep_null=false explicitly, and the MATCH pattern path sets it to true explicitly. Flipping the grammar default to true therefore only affects the cases that were buggy (bare map expressions and nested map values), and leaves CREATE/SET behaviour unchanged. Two preexisting tests encoded the old buggy output and are updated: expr.out (bare RETURN maps now keep the null value) and agtype.out (a nested map inside an orderability test no longer drops its null entry, shifting one row in the ORDER BY result). Dedicated regression coverage for #2391 is added to regress/sql/expr.sql. * Address review: move 'End of tests' marker and drop order claim - Move 'End of tests' to after the Issue 2391 test block so it marks the actual end of the file. - Remove 'in order' from the mixed-values comment since map key ordering is not guaranteed. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Update accessor EXPLAIN expected output for null-preserving map literals Map literals now build with agtype_build_map (keep_null) instead of agtype_build_map_nonull, so the accessor-optimization EXPLAIN plan in expr.out must reflect the new function name. Also strip a stray trailing CRLF on the last line of expr.sql/expr.out that leaked a carriage return into the regression output. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Add nested-map write coverage and clarify top-level strip wording A reviewer noted the keep_null=true default reaches further than the PR summary stated: CREATE / SET = only override keep_null=false on the top-level property map, not recursively. A nested map value is its own cypher_map node, so it now inherits the new default and preserves its null-valued keys (e.g. CREATE (n {a: {b: null}}) stores a -> {"b": null}). - regress/sql/expr.sql, regress/expected/expr.out: pin the nested case under a write with CREATE (n:Nested {a: {b: null}}), MATCH ... SET n = {a: {b: null}}, and a MATCH verify. - src/backend/parser/cypher_gram.y: clarify the map: rule comment to state the CREATE / SET override is top-level only and nested map values keep the null-preserving default. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- regress/expected/agtype.out | 4 +- regress/expected/expr.out | 148 +++++++++++++++++++++++++++++-- regress/sql/expr.sql | 54 +++++++++++ src/backend/parser/cypher_gram.y | 10 +++ 4 files changed, 205 insertions(+), 11 deletions(-) diff --git a/regress/expected/agtype.out b/regress/expected/agtype.out index b2f3b83e1..0f4775b88 100644 --- a/regress/expected/agtype.out +++ b/regress/expected/agtype.out @@ -2207,8 +2207,8 @@ SELECT * FROM cypher('orderability_graph', $$ MATCH (n) RETURN n ORDER BY n.prop {"id": 844424930131981, "label": "vertex", "properties": {"prop": [{"id": 0, "label": "v", "properties": {"i": 0}}::vertex, {"id": 2, "label": "e", "end_id": 1, "start_id": 0, "properties": {"i": 0}}::edge, {"id": 1, "label": "v", "properties": {"i": 0}}::vertex]::path}}::vertex {"id": 844424930131980, "label": "vertex", "properties": {"prop": {"id": 2, "label": "e", "end_id": 1, "start_id": 0, "properties": {"i": 0}}::edge}}::vertex {"id": 844424930131979, "label": "vertex", "properties": {"prop": {"id": 0, "label": "v", "properties": {"i": 0}}::vertex}}::vertex - {"id": 844424930131978, "label": "vertex", "properties": {"prop": {"bool": true}}}::vertex {"id": 844424930131977, "label": "vertex", "properties": {"prop": {"i": 0, "bool": true}}}::vertex + {"id": 844424930131978, "label": "vertex", "properties": {"prop": {"i": null, "bool": true}}}::vertex {"id": 844424930131975, "label": "vertex", "properties": {"prop": [1, 2, 3]}}::vertex {"id": 844424930131976, "label": "vertex", "properties": {"prop": [1, 2, 3, 4, 5]}}::vertex {"id": 844424930131973, "label": "vertex", "properties": {"prop": "string"}}::vertex @@ -2230,8 +2230,8 @@ SELECT * FROM cypher('orderability_graph', $$ MATCH (n) RETURN n ORDER BY n.prop {"id": 844424930131973, "label": "vertex", "properties": {"prop": "string"}}::vertex {"id": 844424930131976, "label": "vertex", "properties": {"prop": [1, 2, 3, 4, 5]}}::vertex {"id": 844424930131975, "label": "vertex", "properties": {"prop": [1, 2, 3]}}::vertex + {"id": 844424930131978, "label": "vertex", "properties": {"prop": {"i": null, "bool": true}}}::vertex {"id": 844424930131977, "label": "vertex", "properties": {"prop": {"i": 0, "bool": true}}}::vertex - {"id": 844424930131978, "label": "vertex", "properties": {"prop": {"bool": true}}}::vertex {"id": 844424930131979, "label": "vertex", "properties": {"prop": {"id": 0, "label": "v", "properties": {"i": 0}}::vertex}}::vertex {"id": 844424930131980, "label": "vertex", "properties": {"prop": {"id": 2, "label": "e", "end_id": 1, "start_id": 0, "properties": {"i": 0}}::edge}}::vertex {"id": 844424930131981, "label": "vertex", "properties": {"prop": [{"id": 0, "label": "v", "properties": {"i": 0}}::vertex, {"id": 2, "label": "e", "end_id": 1, "start_id": 0, "properties": {"i": 0}}::edge, {"id": 1, "label": "v", "properties": {"i": 0}}::vertex]::path}}::vertex diff --git a/regress/expected/expr.out b/regress/expected/expr.out index 3c80bcae5..d6b9ac155 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -40,18 +40,18 @@ SELECT * FROM cypher('expr', $$RETURN {}$$) AS r(c agtype); SELECT * FROM cypher('expr', $$ RETURN {s: 's', i: 1, f: 1.0, b: true, z: null} $$) AS r(c agtype); - c ------------------------------------------ - {"b": true, "f": 1.0, "i": 1, "s": "s"} + c +---------------------------------------------------- + {"b": true, "f": 1.0, "i": 1, "s": "s", "z": null} (1 row) -- nested maps SELECT * FROM cypher('expr', $$ RETURN {s: {s: 's'}, t: {i: 1, e: {f: 1.0}, s: {a: {b: true}}}, z: null} $$) AS r(c agtype); - c ----------------------------------------------------------------------------- - {"s": {"s": "s"}, "t": {"e": {"f": 1.0}, "i": 1, "s": {"a": {"b": true}}}} + c +--------------------------------------------------------------------------------------- + {"s": {"s": "s"}, "t": {"e": {"f": 1.0}, "i": 1, "s": {"a": {"b": true}}}, "z": null} (1 row) -- @@ -9866,10 +9866,10 @@ SELECT * FROM cypher('accessor_opt', $$ MATCH (n:Person) RETURN {id: id(n), name: n.name} $$) AS (plan agtype); - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------ Seq Scan on accessor_opt."Person" n - Output: agtype_build_map_nonull('id'::text, n.id, 'name'::text, agtype_access_operator(VARIADIC ARRAY[n.properties, '"name"'::agtype])) + Output: agtype_build_map('id'::text, n.id, 'name'::text, agtype_access_operator(VARIADIC ARRAY[n.properties, '"name"'::agtype])) (2 rows) SELECT * FROM cypher('accessor_opt', $$ @@ -10558,6 +10558,136 @@ NOTICE: graph "list" has been dropped (1 row) +-- +-- Issue 2391 - map literals must preserve keys whose values are null +-- +SELECT create_graph('issue_2391'); +NOTICE: graph "issue_2391" has been created + create_graph +-------------- + +(1 row) + +-- single-key null +SELECT * FROM cypher('issue_2391', $$ + RETURN {a: null} AS m +$$) AS (m agtype); + m +------------- + {"a": null} +(1 row) + +-- multiple null values +SELECT * FROM cypher('issue_2391', $$ + RETURN {companyName: null, sinceYear: null} AS m +$$) AS (m agtype); + m +------------------------------------------ + {"sinceYear": null, "companyName": null} +(1 row) + +-- keys() must see the null-valued key +SELECT * FROM cypher('issue_2391', $$ + RETURN keys({a: null}) AS ks +$$) AS (ks agtype); + ks +------- + ["a"] +(1 row) + +-- coalesce passes a non-null map (map itself is not null) through +SELECT * FROM cypher('issue_2391', $$ + RETURN coalesce({a: null}, null) AS m +$$) AS (m agtype); + m +------------- + {"a": null} +(1 row) + +-- nested map values inside an expression also preserve nulls +SELECT * FROM cypher('issue_2391', $$ + RETURN {outer: {inner: null, kept: 1}} AS m +$$) AS (m agtype); + m +--------------------------------------- + {"outer": {"kept": 1, "inner": null}} +(1 row) + +-- mixed non-null and null values are all preserved +SELECT * FROM cypher('issue_2391', $$ + RETURN {a: 1, b: null, c: 'x'} AS m +$$) AS (m agtype); + m +------------------------------- + {"a": 1, "b": null, "c": "x"} +(1 row) + +-- control: empty map is still empty +SELECT * FROM cypher('issue_2391', $$ + RETURN {} AS m +$$) AS (m agtype); + m +---- + {} +(1 row) + +-- control: CREATE must still strip top-level null properties so +-- setting a property to null removes it from storage +SELECT * FROM cypher('issue_2391', $$ + CREATE (n:Item {keep: 1, drop: null}) RETURN n +$$) AS (n agtype); + n +----------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Item", "properties": {"keep": 1}}::vertex +(1 row) + +SELECT * FROM cypher('issue_2391', $$ + MATCH (n:Item) RETURN n +$$) AS (n agtype); + n +----------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Item", "properties": {"keep": 1}}::vertex +(1 row) + +-- nested map values under a write (CREATE / SET =) are preserved: the +-- top-level property map is null-stripped, but a nested map literal is +-- its own node and keeps its null-valued keys +SELECT * FROM cypher('issue_2391', $$ + CREATE (n:Nested {a: {b: null}}) RETURN n +$$) AS (n agtype); + n +--------------------------------------------------------------------------------------- + {"id": 1125899906842625, "label": "Nested", "properties": {"a": {"b": null}}}::vertex +(1 row) + +SELECT * FROM cypher('issue_2391', $$ + MATCH (n:Nested) SET n = {a: {b: null}} RETURN n +$$) AS (n agtype); + n +--------------------------------------------------------------------------------------- + {"id": 1125899906842625, "label": "Nested", "properties": {"a": {"b": null}}}::vertex +(1 row) + +SELECT * FROM cypher('issue_2391', $$ + MATCH (n:Nested) RETURN n +$$) AS (n agtype); + n +--------------------------------------------------------------------------------------- + {"id": 1125899906842625, "label": "Nested", "properties": {"a": {"b": null}}}::vertex +(1 row) + +SELECT * FROM drop_graph('issue_2391', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table issue_2391._ag_label_vertex +drop cascades to table issue_2391._ag_label_edge +drop cascades to table issue_2391."Item" +drop cascades to table issue_2391."Nested" +NOTICE: graph "issue_2391" has been dropped + drop_graph +------------ + +(1 row) + -- -- End of tests -- diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql index b951a2367..251349cef 100644 --- a/regress/sql/expr.sql +++ b/regress/sql/expr.sql @@ -4136,6 +4136,60 @@ SELECT * FROM drop_graph('regex', true); SELECT * FROM drop_graph('keys', true); SELECT * FROM drop_graph('list', true); +-- +-- Issue 2391 - map literals must preserve keys whose values are null +-- +SELECT create_graph('issue_2391'); +-- single-key null +SELECT * FROM cypher('issue_2391', $$ + RETURN {a: null} AS m +$$) AS (m agtype); +-- multiple null values +SELECT * FROM cypher('issue_2391', $$ + RETURN {companyName: null, sinceYear: null} AS m +$$) AS (m agtype); +-- keys() must see the null-valued key +SELECT * FROM cypher('issue_2391', $$ + RETURN keys({a: null}) AS ks +$$) AS (ks agtype); +-- coalesce passes a non-null map (map itself is not null) through +SELECT * FROM cypher('issue_2391', $$ + RETURN coalesce({a: null}, null) AS m +$$) AS (m agtype); +-- nested map values inside an expression also preserve nulls +SELECT * FROM cypher('issue_2391', $$ + RETURN {outer: {inner: null, kept: 1}} AS m +$$) AS (m agtype); +-- mixed non-null and null values are all preserved +SELECT * FROM cypher('issue_2391', $$ + RETURN {a: 1, b: null, c: 'x'} AS m +$$) AS (m agtype); +-- control: empty map is still empty +SELECT * FROM cypher('issue_2391', $$ + RETURN {} AS m +$$) AS (m agtype); +-- control: CREATE must still strip top-level null properties so +-- setting a property to null removes it from storage +SELECT * FROM cypher('issue_2391', $$ + CREATE (n:Item {keep: 1, drop: null}) RETURN n +$$) AS (n agtype); +SELECT * FROM cypher('issue_2391', $$ + MATCH (n:Item) RETURN n +$$) AS (n agtype); +-- nested map values under a write (CREATE / SET =) are preserved: the +-- top-level property map is null-stripped, but a nested map literal is +-- its own node and keeps its null-valued keys +SELECT * FROM cypher('issue_2391', $$ + CREATE (n:Nested {a: {b: null}}) RETURN n +$$) AS (n agtype); +SELECT * FROM cypher('issue_2391', $$ + MATCH (n:Nested) SET n = {a: {b: null}} RETURN n +$$) AS (n agtype); +SELECT * FROM cypher('issue_2391', $$ + MATCH (n:Nested) RETURN n +$$) AS (n agtype); +SELECT * FROM drop_graph('issue_2391', true); + -- -- End of tests -- diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index 83d69c83b..ddd62d7ee 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -2127,6 +2127,16 @@ map: n = make_ag_node(cypher_map); n->keyvals = $2; + /* + * By default, a Cypher map literal preserves keys whose + * values are null (openCypher / Neo4j semantics: e.g. + * RETURN {a: null} yields {a: null}, not {}). CREATE and + * SET = override this to false on the top-level property + * map in cypher_clause.c so null properties are stripped + * on write; a nested map value is its own node and keeps + * this default, preserving its null-valued keys. + */ + n->keep_null = true; $$ = (Node *)n; } From 934ad255b7e2dfad64214fdc52205c44bda1fe43 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Wed, 1 Jul 2026 19:01:42 -0700 Subject: [PATCH 15/20] Fix Node.js driver CI build broken by @types/node drift (#2452) The Node.js driver CI (npm install -> npm run build -> tsc) failed with parser errors in node_modules/@types/node/ffi.d.ts (TS1139/TS1005/TS1109/ TS1128). package-lock.json is gitignored, so CI resolves dependencies purely from package.json. @types/node was only pulled transitively via a wildcard range (@types/pg and jest depend on @types/node@*), so a fresh install grabbed the latest (26.x). That version uses `const` type parameters (a TypeScript 5.0 feature) in ffi.d.ts, which typescript@4.9 cannot parse. skipLibCheck does not suppress these parser-level errors. The runtime Node version is unrelated: @types/node is resolved from the npm dependency graph, not the Node.js runtime. Fix: - Add a bounded direct devDependency "@types/node": "^20.19.0" so a fresh install constrains the typings to the Node 20 LTS line, which is compatible with typescript@4.9 and keeps the toolchain consistent (eslint 7 / typescript-eslint 4 / TS 4.9 / Node 20 typings). - Pin CI to Node 20 (setup-node@v4, node-version: 20) for reproducibility and to match the pinned typings; replaces the deprecated setup-node@v3 and floating node-version: latest. Verified: a clean, no-lockfile install (matching CI) now resolves @types/node@20.19.43 and tsc builds successfully. Co-authored-by: Copilot modified: .github/workflows/nodejs-driver.yaml modified: drivers/nodejs/package.json --- .github/workflows/nodejs-driver.yaml | 4 ++-- drivers/nodejs/package.json | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/nodejs-driver.yaml b/.github/workflows/nodejs-driver.yaml index d0557fdf9..e6a9e4610 100644 --- a/.github/workflows/nodejs-driver.yaml +++ b/.github/workflows/nodejs-driver.yaml @@ -22,9 +22,9 @@ jobs: run: docker compose up -d - name: Set up Node - uses: actions/setup-node@v3 + uses: actions/setup-node@v4 with: - node-version: latest + node-version: 20 - name: Install dependencies run: npm install diff --git a/drivers/nodejs/package.json b/drivers/nodejs/package.json index 15c2371f4..d17aa3b32 100644 --- a/drivers/nodejs/package.json +++ b/drivers/nodejs/package.json @@ -34,6 +34,7 @@ }, "devDependencies": { "@types/jest": "^29.5.14", + "@types/node": "^20.19.0", "@types/pg": "^7.14.10", "@typescript-eslint/eslint-plugin": "^4.22.1", "@typescript-eslint/parser": "^4.22.1", From 2e6db9152ee5ae3689d0a5da7e75c37e91b5c131 Mon Sep 17 00:00:00 2001 From: Greg Felice Date: Thu, 2 Jul 2026 10:39:37 -0400 Subject: [PATCH 16/20] Fix stack overflow and precision loss in toFloatList() conversion (#2451) toFloatList()'s AGTV_FLOAT branch formatted each element with sprintf(buffer, "%f", ...) into a fixed 64-byte stack buffer and then re-parsed the string back into a float. This had two defects: 1. Stack overflow. "%f" prints the full integer part with no width limit, so a large magnitude overflows the 64-byte buffer. The value is query-reachable: RETURN toFloatList([1.0e308]) needs ~317 bytes (309 integer digits + ".000000") and smashes the stack. This is the issue reported in #2410. 2. Precision loss. "%f" emits only 6 fractional digits, so the format-and-reparse round trip was lossy -- toFloatList([0.123456789]) returned 0.123457. The element is already a float8, so the whole format/reparse step is unnecessary. Assign elem->val.float_value directly. This removes the stack buffer entirely (no magic buffer size to justify) and fixes both the overflow and the precision loss at once. Also harden toStringList(): its "%.*g"/"%ld" conversions use bounded formats and were never overflow-prone, but switch them from sprintf to snprintf as defensive depth. Add regression coverage to regress/sql/expr.sql for both the large magnitude case (no overflow) and precision preservation. This reimplements the fix originally proposed by David Christensen in #2410, whose report identified the sprintf overflow. Co-authored-by: David Christensen --- regress/expected/expr.out | 20 ++++++++++++++++++++ regress/sql/expr.sql | 10 ++++++++++ src/backend/utils/adt/agtype.c | 19 +++++++++++-------- 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/regress/expected/expr.out b/regress/expected/expr.out index d6b9ac155..806a6f65c 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -3548,6 +3548,26 @@ $$) AS (toFloatList agtype); [1.20002] (1 row) +-- large magnitudes must not overflow the conversion (regression: unbounded +-- sprintf into a fixed stack buffer overflowed for values like 1.0e308) +SELECT * FROM cypher('expr', $$ + RETURN toFloatList([1.0e308, -1.0e308]) +$$) AS (toFloatList agtype); + tofloatlist +------------------- + [1e+308, -1e+308] +(1 row) + +-- precision must be preserved (regression: "%f" format truncated to 6 digits, +-- so 0.123456789 came back as 0.123457) +SELECT * FROM cypher('expr', $$ + RETURN toFloatList([0.123456789]) +$$) AS (toFloatList agtype); + tofloatlist +--------------- + [0.123456789] +(1 row) + -- should return null SELECT * FROM cypher('expr', $$ RETURN toFloatList(['true']) diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql index 251349cef..d4d900a1c 100644 --- a/regress/sql/expr.sql +++ b/regress/sql/expr.sql @@ -1520,6 +1520,16 @@ $$) AS (toFloatList agtype); SELECT * FROM cypher('expr', $$ RETURN toFloatList([1.20002]) $$) AS (toFloatList agtype); +-- large magnitudes must not overflow the conversion (regression: unbounded +-- sprintf into a fixed stack buffer overflowed for values like 1.0e308) +SELECT * FROM cypher('expr', $$ + RETURN toFloatList([1.0e308, -1.0e308]) +$$) AS (toFloatList agtype); +-- precision must be preserved (regression: "%f" format truncated to 6 digits, +-- so 0.123456789 came back as 0.123457) +SELECT * FROM cypher('expr', $$ + RETURN toFloatList([0.123456789]) +$$) AS (toFloatList agtype); -- should return null SELECT * FROM cypher('expr', $$ RETURN toFloatList(['true']) diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index bf69bf1fa..0e1a7963f 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -7099,8 +7099,6 @@ Datum age_tofloatlist(PG_FUNCTION_ARGS) int count; int i; bool is_valid = false; - float8 float_num; - char buffer[64]; /* check for null */ if (PG_ARGISNULL(0)) @@ -7160,11 +7158,16 @@ Datum age_tofloatlist(PG_FUNCTION_ARGS) case AGTV_FLOAT: + /* + * The element is already a float8, so assign it directly. The + * previous approach formatted it to a string with sprintf() and + * re-parsed it: that both overflowed a fixed 64-byte stack buffer + * for large magnitudes (e.g. 1.0e308 needs ~317 chars) and lost + * precision, since "%f" truncates to 6 fractional digits. Direct + * assignment avoids both problems. + */ float_elem.type = AGTV_FLOAT; - float_num = elem->val.float_value; - sprintf(buffer, "%f", float_num); - string = buffer; - float_elem.val.float_value = float8in_internal_null(string, NULL, "double precision", string, &is_valid); + float_elem.val.float_value = elem->val.float_value; agis_result.res = push_agtype_value(&agis_result.parse_state, WAGT_ELEM, &float_elem); break; @@ -8146,7 +8149,7 @@ Datum age_tostringlist(PG_FUNCTION_ARGS) case AGTV_FLOAT: - sprintf(buffer, "%.*g", DBL_DIG, elem->val.float_value); + snprintf(buffer, sizeof(buffer), "%.*g", DBL_DIG, elem->val.float_value); string_elem.val.string.val = pstrdup(buffer); string_elem.val.string.len = strlen(buffer); @@ -8157,7 +8160,7 @@ Datum age_tostringlist(PG_FUNCTION_ARGS) case AGTV_INTEGER: - sprintf(buffer, "%ld", elem->val.int_value); + snprintf(buffer, sizeof(buffer), "%ld", elem->val.int_value); string_elem.val.string.val = pstrdup(buffer); string_elem.val.string.len = strlen(buffer); From d53c1b9ff9555947eeaf3abf52c61b3ff9d7033f Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Thu, 2 Jul 2026 09:58:56 -0700 Subject: [PATCH 17/20] Support outer references in reduce() fold bodies (#2448) Allow a reduce(acc = init, var IN list | body) fold body to reference loop-invariant values from the enclosing query -- outer-query variables and cypher() parameters -- in addition to the accumulator and element. These were previously rejected with ERRCODE_FEATURE_NOT_SUPPORTED. How it works ------------ The fold body is still compiled to a standalone expression evaluated by age_reduce_transfn, so an outer reference (which cannot be evaluated there) is captured at transform time and supplied as a value: - After the accumulator and element are rewritten to PARAM_EXEC params 0 and 1, transform_cypher_reduce() walks the body and replaces each loop-invariant outer reference -- one that references an outer Var or a cypher() $parameter but not the accumulator/element -- with a new PARAM_EXEC param 2, 3, ... in body order. Capture is at leaf granularity: only the bare outer value is hoisted out of the body, while the operators, function calls and CASE/AND/OR/coalesce branches around it stay in the serialized body. Because a captured value becomes an aggregate argument that the executor evaluates eagerly and unconditionally, hoisting a whole computed subtree (for example "1/z" under a never-taken CASE branch) would defeat the fold's short-circuiting; capturing only the leaf keeps evaluation under the body's own control flow. The one exception is an outer reference that is not itself agtype-typed -- most commonly the graphid inside a graph vertex/edge variable -- whose smallest enclosing agtype-typed subtree is captured whole, since it cannot stand alone as an agtype[] extra. - The captured expressions are passed to the aggregate as a trailing agtype[] argument; age_reduce(agtype, text, agtype, agtype[]) and its transition function gain this argument. - age_reduce_transfn sizes its param array to 2 + the number of captures and binds the captured values to params 2.. on every row. Because the captures are evaluated in the outer query context as ordinary aggregate arguments, a correlated capture is re-evaluated per group, so an outer value that varies per row (for example under UNWIND) is folded with the correct value. Each capture slot is rebound on every row, and the trailing extras argument is read only when the aggregate actually passes it (PG_NARGS), keeping the transition safe under direct age_reduce() SQL calls and an older 4-argument signature. This keeps the no-core-patch design: the body is still a serialized standalone expression, and the only new machinery is the captured-value plumbing. Still rejected -------------- Subqueries in the body (including a nested reduce()) and aggregate functions remain unsupported and raise a clean ERRCODE_FEATURE_NOT_SUPPORTED error: a subquery cannot be planned as a plain aggregate argument, and an aggregate in a per-element fold is undefined per the openCypher specification. Tests ----- age_reduce gains an "Outer references in the fold body" section covering a plain outer variable, an outer variable used as a multiplier, two distinct outer variables, a property of an outer graph variable, the same outer variable referenced more than once, a property of an outer map, a subexpression that mixes an outer reference with the element (only the loop-invariant part is captured), an outer reference inside a CASE branch of the body, a NULL outer value propagating through the fold, multiple captures mixing a NULL and a non-NULL outer value, an outer variable that changes per row (captured per group), and a cypher() parameter supplied via a prepared statement. A "Short-circuit evaluation is preserved for outer references in the body" section verifies that a guarded outer sub-expression is not evaluated on a branch that is not taken: a never-taken CASE THEN branch, a never-taken CASE ELSE branch, an OR and an AND that short-circuit, and a coalesce -- each of which would divide by zero if the outer "1/w" were hoisted into an eagerly evaluated aggregate argument -- plus a guarded branch that is taken and evaluates its outer division normally. The previously-rejected outer-variable case is moved out of the not-supported section, which now covers a nested reduce() (any subquery in the body is unsupported) and an aggregate in the body. The same change also broadens the base reduce() coverage with value-type folds (a float accumulator, negative numbers, a map accumulator passed through unchanged, and list elements indexed in the body), function calls in the fold body (a scalar function over the element and the list itself produced by a function), reduce() composed with surrounding expressions (consumed by another function and used in a comparison), and syntax-error checks for each required piece of the form -- the "= init", ", var IN list", and "| body" clauses, plus a rejected qualified iterator variable. 42/42 installcheck pass. Co-authored-by: Copilot modified: age--1.7.0--y.y.y.sql modified: regress/expected/age_reduce.out modified: regress/sql/age_reduce.sql modified: sql/age_aggregate.sql modified: src/backend/parser/cypher_clause.c modified: src/backend/utils/adt/agtype.c --- age--1.7.0--y.y.y.sql | 14 +- regress/expected/age_reduce.out | 326 ++++++++++++++++++++++++++- regress/sql/age_reduce.sql | 206 ++++++++++++++++- sql/age_aggregate.sql | 14 +- src/backend/parser/cypher_clause.c | 349 +++++++++++++++++++++++++++-- src/backend/utils/adt/agtype.c | 94 +++++++- 6 files changed, 952 insertions(+), 51 deletions(-) diff --git a/age--1.7.0--y.y.y.sql b/age--1.7.0--y.y.y.sql index 6dc8f707f..fd1f28160 100644 --- a/age--1.7.0--y.y.y.sql +++ b/age--1.7.0--y.y.y.sql @@ -1107,17 +1107,21 @@ COMMENT ON FUNCTION ag_catalog.create_subgraph(name, name, text, text) IS -- Transition function for the age_reduce aggregate. The fold body is compiled -- by transform_cypher_reduce() with the accumulator and element rewritten to -- PARAM_EXEC params 0 and 1 and serialized into the text argument; the --- transition evaluates it for each element in list order. It must be callable --- with a NULL transition state (no initcond), so it is intentionally not STRICT. -CREATE FUNCTION ag_catalog.age_reduce_transfn(agtype, agtype, text, agtype) +-- transition evaluates it for each element in list order. The trailing +-- agtype[] argument carries the loop-invariant outer values (outer-query +-- variables and cypher() parameters) referenced by the body, bound to +-- PARAM_EXEC params 2, 3, ... It must be callable with a NULL transition state +-- (no initcond), so it is intentionally not STRICT. +CREATE FUNCTION ag_catalog.age_reduce_transfn(agtype, agtype, text, agtype, agtype[]) RETURNS agtype LANGUAGE c PARALLEL UNSAFE AS 'MODULE_PATHNAME'; -- aggregate definition for reduce(); direct arguments are --- (init, serialized-body, element), with the element fed ORDER BY ordinality. -CREATE AGGREGATE ag_catalog.age_reduce(agtype, text, agtype) +-- (init, serialized-body, element, captured-outer-values), with the element +-- fed ORDER BY ordinality. +CREATE AGGREGATE ag_catalog.age_reduce(agtype, text, agtype, agtype[]) ( stype = agtype, sfunc = ag_catalog.age_reduce_transfn diff --git a/regress/expected/age_reduce.out b/regress/expected/age_reduce.out index 8a198965a..5f3bf29f8 100644 --- a/regress/expected/age_reduce.out +++ b/regress/expected/age_reduce.out @@ -222,6 +222,87 @@ $$) AS (result agtype); [1, 4, 9] (1 row) +-- +-- Value types in the fold +-- +-- a float accumulator and float elements +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0.0, x IN [1.5, 2.5, 3.0] | s + x) +$$) AS (result agtype); + result +-------- + 7.0 +(1 row) + +-- negative numbers +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [-1, -2, -3] | s + x) +$$) AS (result agtype); + result +-------- + -6 +(1 row) + +-- a map accumulator passed through unchanged +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = {n: 0}, x IN [1, 2, 3] | s) +$$) AS (result agtype); + result +---------- + {"n": 0} +(1 row) + +-- elements that are themselves lists, indexed in the body +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [[1, 2], [3, 4], [5, 6]] | s + x[0]) +$$) AS (result agtype); + result +-------- + 9 +(1 row) + +-- +-- Function calls in the fold body +-- +-- a scalar function applied to the element +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN ['a', 'bb', 'ccc'] | s + size(x)) +$$) AS (result agtype); + result +-------- + 6 +(1 row) + +-- the list itself produced by a function +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN range(1, 5) | s + x) +$$) AS (result agtype); + result +-------- + 15 +(1 row) + +-- +-- Composing reduce() with surrounding expressions +-- +-- the reduce() result consumed by another function +SELECT * FROM cypher('reduce', $$ + RETURN size(reduce(s = [], x IN [1, 2, 3, 4] | s + [x])) +$$) AS (result agtype); + result +-------- + 4 +(1 row) + +-- the reduce() result used in a comparison +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x) = 6 +$$) AS (result agtype); + result +-------- + true +(1 row) + -- -- A conditional body (CASE) -- @@ -484,17 +565,197 @@ $$) AS (name agtype, total agtype); (3 rows) -- --- Not-yet-supported constructs raise a clean feature error +-- Outer references in the fold body -- --- an outer variable referenced in the body +-- The body may reference loop-invariant values from the enclosing query: an +-- outer variable, a property of an outer variable, or a cypher() parameter. +-- a plain outer variable in the body SELECT * FROM cypher('reduce', $$ WITH 5 AS w - RETURN reduce(s = 0, x IN [1, 2] | s + x + w) + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x + w) $$) AS (result agtype); -ERROR: a reduce() expression may only reference its accumulator and element variables -LINE 1: SELECT * FROM cypher('reduce', $$ - ^ --- a nested reduce() in the body + result +-------- + 21 +(1 row) + +-- an outer variable used as a multiplier +SELECT * FROM cypher('reduce', $$ + WITH 3 AS factor + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x * factor) +$$) AS (result agtype); + result +-------- + 18 +(1 row) + +-- two distinct outer variables in the body +SELECT * FROM cypher('reduce', $$ + WITH 2 AS a, 100 AS b + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x * a + b) +$$) AS (result agtype); + result +-------- + 312 +(1 row) + +-- a property of an outer (graph) variable in the body +SELECT * FROM cypher('reduce', $$ + MATCH (u:bag) WHERE u.name = 'mid' + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x + u.vals[0]) +$$) AS (result agtype); + result +-------- + 21 +(1 row) + +-- the same outer variable referenced more than once in the body +SELECT * FROM cypher('reduce', $$ + WITH 7 AS k + RETURN reduce(s = 0, x IN [1, 2, 3] | s + k + k) +$$) AS (result agtype); + result +-------- + 42 +(1 row) + +-- a property of an outer map referenced in the body +SELECT * FROM cypher('reduce', $$ + WITH {factor: 10} AS m + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x * m.factor) +$$) AS (result agtype); + result +-------- + 60 +(1 row) + +-- a subexpression that mixes an outer reference with the element: only the +-- loop-invariant part (the outer list) is captured, the element index is not +SELECT * FROM cypher('reduce', $$ + WITH [10, 20, 30] AS lookup + RETURN reduce(s = 0, x IN [1, 2, 3] | s + lookup[x - 1]) +$$) AS (result agtype); + result +-------- + 60 +(1 row) + +-- an outer reference inside a CASE branch of the body is captured +SELECT * FROM cypher('reduce', $$ + WITH 10 AS w + RETURN reduce(s = 0, x IN [1, 2, 3] | CASE WHEN x % 2 = 0 THEN s + w ELSE s + x END) +$$) AS (result agtype); + result +-------- + 14 +(1 row) + +-- a NULL outer value propagates through the fold +SELECT * FROM cypher('reduce', $$ + WITH null AS w + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x + w) +$$) AS (result agtype); + result +-------- + null +(1 row) + +-- multiple outer captures with a mix of NULL and non-NULL: each is bound to its +-- own slot (the non-NULL multiplier is bound and the NULL still propagates) +SELECT * FROM cypher('reduce', $$ + WITH 3 AS a, null AS b + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x * a + b) +$$) AS (result agtype); + result +-------- + null +(1 row) + +-- an outer variable that changes per row is captured per group +SELECT * FROM cypher('reduce', $$ + UNWIND [1, 2, 3] AS m + RETURN reduce(s = 0, x IN [1, 2, 3, 4] | s + x * m) AS total + ORDER BY total +$$) AS (result agtype); + result +-------- + 10 + 20 + 30 +(3 rows) + +-- +-- Short-circuit evaluation is preserved for outer references in the body +-- +-- Only the outer leaf is captured; operators and CASE/AND/OR branches stay in +-- the body, so a guarded outer sub-expression is not evaluated on a branch +-- that is not taken. Each case below would divide by zero if the whole "1/w" +-- were hoisted into an eagerly evaluated aggregate argument instead. +-- the THEN branch is never taken, so "1/w" is not evaluated (expect 6) +SELECT * FROM cypher('reduce', $$ + WITH 0 AS w + RETURN reduce(s = 0, x IN [1, 2, 3] | CASE WHEN false THEN s + 1/w ELSE s + x END) +$$) AS (result agtype); + result +-------- + 6 +(1 row) + +-- the ELSE branch is never taken, so "1/w" is not evaluated (expect 6) +SELECT * FROM cypher('reduce', $$ + WITH 0 AS w + RETURN reduce(s = 0, x IN [1, 2, 3] | CASE WHEN true THEN s + x ELSE s + 1/w END) +$$) AS (result agtype); + result +-------- + 6 +(1 row) + +-- OR short-circuits once "w = 0" is true, so "1/w > 0" is not evaluated +SELECT * FROM cypher('reduce', $$ + WITH 0 AS w + RETURN reduce(s = true, x IN [1, 2, 3] | s AND (w = 0 OR 1/w > 0)) +$$) AS (result agtype); + result +-------- + true +(1 row) + +-- AND short-circuits once "w <> 0" is false, so "1/w > 0" is not evaluated +SELECT * FROM cypher('reduce', $$ + WITH 0 AS w + RETURN reduce(s = true, x IN [1, 2, 3] | s AND (w <> 0 AND 1/w > 0)) +$$) AS (result agtype); + result +-------- + false +(1 row) + +-- coalesce short-circuits: "1/w" is not evaluated when arg 1 is non-null +SELECT * FROM cypher('reduce', $$ + WITH 0 AS w + RETURN reduce(s = 0, x IN [1, 2, 3] | s + coalesce(w, 1/w)) +$$) AS (result agtype); + result +-------- + 0 +(1 row) + +-- when the guarded branch is taken, the outer sub-expression is evaluated +-- normally (division by a non-zero outer value): x = 2 -> s + 10/2 (expect 9) +SELECT * FROM cypher('reduce', $$ + WITH 2 AS w + RETURN reduce(s = 0, x IN [1, 2, 3] | CASE WHEN x % 2 = 0 THEN s + 10/w ELSE s + x END) +$$) AS (result agtype); + result +-------- + 9 +(1 row) + +-- +-- Not-yet-supported constructs raise a clean feature error +-- +-- a nested reduce() in the body (any subquery in the body is unsupported) SELECT * FROM cypher('reduce', $$ RETURN reduce(s = 0, x IN [1, 2] | s + reduce(t = 0, y IN [x] | t + y)) $$) AS (result agtype); @@ -509,6 +770,57 @@ ERROR: aggregate functions are not supported in a reduce() expression LINE 1: SELECT * FROM cypher('reduce', $$ ^ -- +-- Syntax errors: each required piece of the reduce() form is enforced +-- +-- missing "= init" +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s, x IN [1, 2] | s + x) +$$) AS (result agtype); +ERROR: syntax error at or near "," +LINE 2: RETURN reduce(s, x IN [1, 2] | s + x) + ^ +-- missing ", var IN list" +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0 | s) +$$) AS (result agtype); +ERROR: syntax error at or near "|" +LINE 2: RETURN reduce(s = 0 | s) + ^ +-- missing "| body" +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2]) +$$) AS (result agtype); +ERROR: syntax error at or near ")" +LINE 2: RETURN reduce(s = 0, x IN [1, 2]) + ^ +-- a qualified iterator variable is not allowed +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x.y IN [1, 2] | s) +$$) AS (result agtype); +ERROR: syntax error at or near "." +LINE 2: RETURN reduce(s = 0, x.y IN [1, 2] | s) + ^ +-- +-- cypher() parameter referenced in the fold body (via a prepared statement) +-- +PREPARE reduce_param(agtype) AS + SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x + $p) + $$, $1) AS (result agtype); +EXECUTE reduce_param('{"p": 10}'); + result +-------- + 36 +(1 row) + +EXECUTE reduce_param('{"p": 100}'); + result +-------- + 306 +(1 row) + +DEALLOCATE reduce_param; +-- -- "reduce" as a property key name (safe_keywords backward compatibility): -- because reduce() introduced a reserved keyword, confirm the word is still -- usable as a map key, the same way any/none/single are. diff --git a/regress/sql/age_reduce.sql b/regress/sql/age_reduce.sql index cf1261010..fbe324cd1 100644 --- a/regress/sql/age_reduce.sql +++ b/regress/sql/age_reduce.sql @@ -151,6 +151,55 @@ SELECT * FROM cypher('reduce', $$ RETURN reduce(acc = [], x IN [1, 2, 3] | acc + [x * x]) $$) AS (result agtype); +-- +-- Value types in the fold +-- +-- a float accumulator and float elements +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0.0, x IN [1.5, 2.5, 3.0] | s + x) +$$) AS (result agtype); + +-- negative numbers +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [-1, -2, -3] | s + x) +$$) AS (result agtype); + +-- a map accumulator passed through unchanged +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = {n: 0}, x IN [1, 2, 3] | s) +$$) AS (result agtype); + +-- elements that are themselves lists, indexed in the body +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [[1, 2], [3, 4], [5, 6]] | s + x[0]) +$$) AS (result agtype); + +-- +-- Function calls in the fold body +-- +-- a scalar function applied to the element +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN ['a', 'bb', 'ccc'] | s + size(x)) +$$) AS (result agtype); + +-- the list itself produced by a function +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN range(1, 5) | s + x) +$$) AS (result agtype); + +-- +-- Composing reduce() with surrounding expressions +-- +-- the reduce() result consumed by another function +SELECT * FROM cypher('reduce', $$ + RETURN size(reduce(s = [], x IN [1, 2, 3, 4] | s + [x])) +$$) AS (result agtype); + +-- the reduce() result used in a comparison +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x) = 6 +$$) AS (result agtype); + -- -- A conditional body (CASE) -- @@ -317,15 +366,127 @@ SELECT * FROM cypher('reduce', $$ $$) AS (name agtype, total agtype); -- --- Not-yet-supported constructs raise a clean feature error +-- Outer references in the fold body -- --- an outer variable referenced in the body +-- The body may reference loop-invariant values from the enclosing query: an +-- outer variable, a property of an outer variable, or a cypher() parameter. +-- a plain outer variable in the body SELECT * FROM cypher('reduce', $$ WITH 5 AS w - RETURN reduce(s = 0, x IN [1, 2] | s + x + w) + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x + w) +$$) AS (result agtype); + +-- an outer variable used as a multiplier +SELECT * FROM cypher('reduce', $$ + WITH 3 AS factor + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x * factor) +$$) AS (result agtype); + +-- two distinct outer variables in the body +SELECT * FROM cypher('reduce', $$ + WITH 2 AS a, 100 AS b + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x * a + b) +$$) AS (result agtype); + +-- a property of an outer (graph) variable in the body +SELECT * FROM cypher('reduce', $$ + MATCH (u:bag) WHERE u.name = 'mid' + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x + u.vals[0]) $$) AS (result agtype); --- a nested reduce() in the body +-- the same outer variable referenced more than once in the body +SELECT * FROM cypher('reduce', $$ + WITH 7 AS k + RETURN reduce(s = 0, x IN [1, 2, 3] | s + k + k) +$$) AS (result agtype); + +-- a property of an outer map referenced in the body +SELECT * FROM cypher('reduce', $$ + WITH {factor: 10} AS m + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x * m.factor) +$$) AS (result agtype); + +-- a subexpression that mixes an outer reference with the element: only the +-- loop-invariant part (the outer list) is captured, the element index is not +SELECT * FROM cypher('reduce', $$ + WITH [10, 20, 30] AS lookup + RETURN reduce(s = 0, x IN [1, 2, 3] | s + lookup[x - 1]) +$$) AS (result agtype); + +-- an outer reference inside a CASE branch of the body is captured +SELECT * FROM cypher('reduce', $$ + WITH 10 AS w + RETURN reduce(s = 0, x IN [1, 2, 3] | CASE WHEN x % 2 = 0 THEN s + w ELSE s + x END) +$$) AS (result agtype); + +-- a NULL outer value propagates through the fold +SELECT * FROM cypher('reduce', $$ + WITH null AS w + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x + w) +$$) AS (result agtype); + +-- multiple outer captures with a mix of NULL and non-NULL: each is bound to its +-- own slot (the non-NULL multiplier is bound and the NULL still propagates) +SELECT * FROM cypher('reduce', $$ + WITH 3 AS a, null AS b + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x * a + b) +$$) AS (result agtype); + +-- an outer variable that changes per row is captured per group +SELECT * FROM cypher('reduce', $$ + UNWIND [1, 2, 3] AS m + RETURN reduce(s = 0, x IN [1, 2, 3, 4] | s + x * m) AS total + ORDER BY total +$$) AS (result agtype); + +-- +-- Short-circuit evaluation is preserved for outer references in the body +-- +-- Only the outer leaf is captured; operators and CASE/AND/OR branches stay in +-- the body, so a guarded outer sub-expression is not evaluated on a branch +-- that is not taken. Each case below would divide by zero if the whole "1/w" +-- were hoisted into an eagerly evaluated aggregate argument instead. +-- the THEN branch is never taken, so "1/w" is not evaluated (expect 6) +SELECT * FROM cypher('reduce', $$ + WITH 0 AS w + RETURN reduce(s = 0, x IN [1, 2, 3] | CASE WHEN false THEN s + 1/w ELSE s + x END) +$$) AS (result agtype); + +-- the ELSE branch is never taken, so "1/w" is not evaluated (expect 6) +SELECT * FROM cypher('reduce', $$ + WITH 0 AS w + RETURN reduce(s = 0, x IN [1, 2, 3] | CASE WHEN true THEN s + x ELSE s + 1/w END) +$$) AS (result agtype); + +-- OR short-circuits once "w = 0" is true, so "1/w > 0" is not evaluated +SELECT * FROM cypher('reduce', $$ + WITH 0 AS w + RETURN reduce(s = true, x IN [1, 2, 3] | s AND (w = 0 OR 1/w > 0)) +$$) AS (result agtype); + +-- AND short-circuits once "w <> 0" is false, so "1/w > 0" is not evaluated +SELECT * FROM cypher('reduce', $$ + WITH 0 AS w + RETURN reduce(s = true, x IN [1, 2, 3] | s AND (w <> 0 AND 1/w > 0)) +$$) AS (result agtype); + +-- coalesce short-circuits: "1/w" is not evaluated when arg 1 is non-null +SELECT * FROM cypher('reduce', $$ + WITH 0 AS w + RETURN reduce(s = 0, x IN [1, 2, 3] | s + coalesce(w, 1/w)) +$$) AS (result agtype); + +-- when the guarded branch is taken, the outer sub-expression is evaluated +-- normally (division by a non-zero outer value): x = 2 -> s + 10/2 (expect 9) +SELECT * FROM cypher('reduce', $$ + WITH 2 AS w + RETURN reduce(s = 0, x IN [1, 2, 3] | CASE WHEN x % 2 = 0 THEN s + 10/w ELSE s + x END) +$$) AS (result agtype); + +-- +-- Not-yet-supported constructs raise a clean feature error +-- +-- a nested reduce() in the body (any subquery in the body is unsupported) SELECT * FROM cypher('reduce', $$ RETURN reduce(s = 0, x IN [1, 2] | s + reduce(t = 0, y IN [x] | t + y)) $$) AS (result agtype); @@ -335,6 +496,43 @@ SELECT * FROM cypher('reduce', $$ RETURN reduce(s = 0, x IN [1, 2] | s + count(x)) $$) AS (result agtype); +-- +-- Syntax errors: each required piece of the reduce() form is enforced +-- +-- missing "= init" +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s, x IN [1, 2] | s + x) +$$) AS (result agtype); + +-- missing ", var IN list" +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0 | s) +$$) AS (result agtype); + +-- missing "| body" +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2]) +$$) AS (result agtype); + +-- a qualified iterator variable is not allowed +SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x.y IN [1, 2] | s) +$$) AS (result agtype); + +-- +-- cypher() parameter referenced in the fold body (via a prepared statement) +-- +PREPARE reduce_param(agtype) AS + SELECT * FROM cypher('reduce', $$ + RETURN reduce(s = 0, x IN [1, 2, 3] | s + x + $p) + $$, $1) AS (result agtype); + +EXECUTE reduce_param('{"p": 10}'); + +EXECUTE reduce_param('{"p": 100}'); + +DEALLOCATE reduce_param; + -- -- "reduce" as a property key name (safe_keywords backward compatibility): -- because reduce() introduced a reserved keyword, confirm the word is still diff --git a/sql/age_aggregate.sql b/sql/age_aggregate.sql index fb258e5c5..9ad715683 100644 --- a/sql/age_aggregate.sql +++ b/sql/age_aggregate.sql @@ -223,17 +223,21 @@ CREATE AGGREGATE ag_catalog.age_collect(variadic "any") -- Transition function for the age_reduce aggregate. The fold body is compiled -- by transform_cypher_reduce() with the accumulator and element rewritten to -- PARAM_EXEC params 0 and 1 and serialized into the text argument; the --- transition evaluates it for each element in list order. It must be callable --- with a NULL transition state (no initcond), so it is intentionally not STRICT. -CREATE FUNCTION ag_catalog.age_reduce_transfn(agtype, agtype, text, agtype) +-- transition evaluates it for each element in list order. The trailing +-- agtype[] argument carries the loop-invariant outer values (outer-query +-- variables and cypher() parameters) referenced by the body, bound to +-- PARAM_EXEC params 2, 3, ... It must be callable with a NULL transition state +-- (no initcond), so it is intentionally not STRICT. +CREATE FUNCTION ag_catalog.age_reduce_transfn(agtype, agtype, text, agtype, agtype[]) RETURNS agtype LANGUAGE c PARALLEL UNSAFE AS 'MODULE_PATHNAME'; -- aggregate definition for reduce(); direct arguments are --- (init, serialized-body, element), with the element fed ORDER BY ordinality. -CREATE AGGREGATE ag_catalog.age_reduce(agtype, text, agtype) +-- (init, serialized-body, element, captured-outer-values), with the element +-- fed ORDER BY ordinality. +CREATE AGGREGATE ag_catalog.age_reduce(agtype, text, agtype, agtype[]) ( stype = agtype, sfunc = ag_catalog.age_reduce_transfn diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 6582ff8d1..72370ba06 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -2110,15 +2110,60 @@ static Query *make_reduce_var_subquery(char *acc_name, char *elem_name) } /* - * Validate a transformed-and-mutated reduce() fold body. After - * reduce_var_to_param_mutator() has replaced the accumulator and element with - * PARAM_EXEC params 0 and 1, a valid body is a pure expression over those two - * params: it must contain no other Vars (outer-query references), no other - * params, and no aggregates or subqueries, because the body is evaluated - * standalone (ExecEvalExpr) inside age_reduce_transfn with only those two - * param slots bound. + * Walker: true if the subtree references the reduce() accumulator or element, + * i.e. it contains PARAM_EXEC param 0 or 1 (assigned by + * reduce_var_to_param_mutator). Such a subtree changes per element and cannot + * be captured as a loop-invariant outer value. */ -static bool reduce_body_check_walker(Node *node, void *context) +static bool reduce_expr_has_acc_elem(Node *node, void *context) +{ + if (node == NULL) + { + return false; + } + + if (IsA(node, Param)) + { + Param *param = (Param *) node; + + if (param->paramkind == PARAM_EXEC && + (param->paramid == 0 || param->paramid == 1)) + { + return true; + } + } + + return expression_tree_walker(node, reduce_expr_has_acc_elem, context); +} + +/* + * Walker: true if the subtree contains an aggregate, grouping, or window + * function. Such a node cannot be evaluated standalone and must not be folded + * into a captured outer value (it would become an illegal nested aggregate). + */ +static bool reduce_expr_has_aggregate(Node *node, void *context) +{ + if (node == NULL) + { + return false; + } + + if (IsA(node, Aggref) || IsA(node, GroupingFunc) || IsA(node, WindowFunc)) + { + return true; + } + + return expression_tree_walker(node, reduce_expr_has_aggregate, context); +} + +/* + * Walker: true if the subtree references anything that cannot be evaluated + * standalone -- an outer-query Var or a non-PARAM_EXEC parameter (e.g. a + * cypher() $parameter, which transforms to agtype_access_operator over a + * PARAM_EXTERN). Such a subtree must be captured and supplied to the fold via + * the extras array. A subtree of only constants does not need capturing. + */ +static bool reduce_expr_needs_capture(Node *node, void *context) { if (node == NULL) { @@ -2127,24 +2172,150 @@ static bool reduce_body_check_walker(Node *node, void *context) if (IsA(node, Var)) { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("a reduce() expression may only reference its accumulator and element variables"))); + return true; } if (IsA(node, Param)) { Param *param = (Param *) node; - if (param->paramkind != PARAM_EXEC || - (param->paramid != 0 && param->paramid != 1)) + if (param->paramkind != PARAM_EXEC) { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("a reduce() expression may not reference query parameters"))); + return true; + } + } + + return expression_tree_walker(node, reduce_expr_needs_capture, context); +} + +/* + * Walker: true if the subtree contains a subquery (SubLink). A captured outer + * value is supplied to the aggregate as a plain expression argument, which the + * standalone fold evaluator cannot plan, so a subtree containing a subquery is + * never captured -- it falls through to the explicit rejection instead. + */ +static bool reduce_expr_has_sublink(Node *node, void *context) +{ + if (node == NULL) + { + return false; + } + + if (IsA(node, SubLink)) + { + return true; + } + + return expression_tree_walker(node, reduce_expr_has_sublink, context); +} + +/* + * Walker: true if the subtree contains an outer reference that is not itself + * agtype-typed -- a non-agtype Var, or a non-agtype non-PARAM_EXEC Param. The + * common case is the graphid component of a graph vertex/edge variable: a + * pattern variable expands to a builder over its underlying columns, one of + * which is a graphid Var. Such a value cannot stand alone as an agtype[] extra, + * so its smallest enclosing agtype-typed subtree is captured whole rather than + * being decomposed to leaves. + */ +static bool reduce_expr_has_nonagtype_outer(Node *node, void *context) +{ + if (node == NULL) + { + return false; + } + + if (IsA(node, Var)) + { + if (((Var *) node)->vartype != AGTYPEOID) + { + return true; + } + } + + if (IsA(node, Param)) + { + Param *param = (Param *) node; + + if (param->paramkind != PARAM_EXEC && param->paramtype != AGTYPEOID) + { + return true; } } + return expression_tree_walker(node, reduce_expr_has_nonagtype_outer, + context); +} + +/* + * Mutator context for capturing loop-invariant outer references in a reduce() + * fold body. Each captured subtree is assigned the next PARAM_EXEC id (starting + * at 2, after the accumulator and element) and collected, in id order, so the + * caller can supply the values to age_reduce_transfn through the extras array. + */ +typedef struct reduce_capture_context +{ + int next_slot; /* next PARAM_EXEC id to assign (starts at 2) */ + List *captured; /* captured outer-reference exprs, in slot order */ +} reduce_capture_context; + +/* + * Capture the loop-invariant outer references in a reduce() fold body. + * + * After reduce_var_to_param_mutator() has rewritten the accumulator and + * element to PARAM_EXEC params 0 and 1, the remaining outer references (outer- + * query variables and cypher() $parameters) are replaced by new PARAM_EXEC + * params 2, 3, ... and collected in slot order. Each captured value is + * loop-invariant within a fold, so the executor evaluates it once per row in + * the outer query context (as an aggregate argument) and binds it to its slot; + * the body expression is then evaluated per element by the fold. + * + * Capture is as fine-grained as the agtype[] extras argument allows, because a + * captured value becomes an aggregate argument that the executor evaluates + * eagerly and unconditionally. Hoisting a whole computed subtree out of the + * body would defeat short-circuiting: in + * reduce(s = 0, x IN [1] | CASE WHEN false THEN s + 1/z ELSE s END) + * capturing "1/z" would divide by zero even though the WHEN branch is never + * taken. So when every outer reference in a loop-invariant subtree is itself + * agtype-typed, the mutator recurses and captures only the bare leaves (here + * "z"), leaving the operators and CASE/AND/OR branches in the body under the + * fold's own control flow. A leaf read cannot raise an error, so evaluating it + * eagerly is safe; the only cost is re-evaluating a loop-invariant + * sub-computation per element, which is always correct. + * + * The exception is an outer reference that is not agtype-typed and so cannot be + * an agtype[] extra on its own -- most commonly the graphid inside a graph + * vertex/edge variable, which expands to a builder over its underlying columns. + * Such a subtree cannot be decomposed to agtype leaves, so its smallest + * enclosing agtype-typed subtree is captured whole (for example the scalar + * value of "u.vals[0]"). A property read like that cannot raise an error + * either, so eager evaluation is still safe. + * + * Aggregates and subqueries (including a nested reduce()) are rejected + * outright: an aggregate is undefined inside a per-element fold, and a subquery + * cannot be supplied as a plain aggregate argument or evaluated standalone. + */ +static Node *reduce_capture_mutator(Node *node, void *context) +{ + reduce_capture_context *ctx = (reduce_capture_context *) context; + + if (node == NULL) + { + return NULL; + } + + /* + * Container / support nodes that expression_tree_mutator hands us are not + * themselves typed expressions (calling exprType on them errors), so just + * recurse into them. For an agtype scalar fold body these are List nodes + * (argument lists) and CaseWhen nodes (CASE branches). + */ + if (IsA(node, List) || IsA(node, CaseWhen)) + { + return expression_tree_mutator(node, reduce_capture_mutator, context); + } + + /* an aggregate in the fold body is never supported */ if (IsA(node, Aggref) || IsA(node, GroupingFunc) || IsA(node, WindowFunc)) { ereport(ERROR, @@ -2152,6 +2323,62 @@ static bool reduce_body_check_walker(Node *node, void *context) errmsg("aggregate functions are not supported in a reduce() expression"))); } + /* + * A loop-invariant, agtype-typed subtree that references an outer value and + * embeds no aggregate or subquery is a capture candidate. The exprType + * guard is evaluated first and the accumulator/element and subquery tests + * short-circuit the rest, so the walkers are never run on a non-agtype + * wrapper or descended into a nested reduce()'s subquery -- both of which + * would otherwise trip the tree walker on a node it cannot type. + */ + if (exprType(node) == AGTYPEOID && + !reduce_expr_has_acc_elem(node, NULL) && + !reduce_expr_has_aggregate(node, NULL) && + !reduce_expr_has_sublink(node, NULL) && + reduce_expr_needs_capture(node, NULL)) + { + /* + * Capture the whole subtree when it is a bare outer leaf (a Var or a + * cypher() $parameter Param) or the smallest enclosing agtype-typed + * wrapper of a non-agtype outer reference -- most commonly the graphid + * of a graph vertex/edge variable, which cannot stand alone as an + * agtype[] extra. Such a value is a plain read that cannot raise an + * error, so evaluating it eagerly as an aggregate argument is safe. + */ + if (IsA(node, Var) || + (IsA(node, Param) && ((Param *) node)->paramkind != PARAM_EXEC) || + reduce_expr_has_nonagtype_outer(node, NULL)) + { + Param *param = makeNode(Param); + + param->paramkind = PARAM_EXEC; + param->paramid = ctx->next_slot++; + param->paramtype = AGTYPEOID; + param->paramtypmod = -1; + param->paramcollid = InvalidOid; + param->location = -1; + + ctx->captured = lappend(ctx->captured, copyObject(node)); + + return (Node *) param; + } + + /* + * Otherwise every outer reference in the subtree is agtype-typed and + * the node itself is a computation (an operator, function call, or CASE + * result). Recurse to capture those outer leaves individually and leave + * the computation in the body, so the fold's own control flow -- not an + * eagerly evaluated aggregate argument -- decides whether it runs. This + * is what preserves CASE/AND/OR short-circuiting. + */ + return expression_tree_mutator(node, reduce_capture_mutator, context); + } + + /* + * A subquery in the body (for example a nested reduce()) is never captured + * -- the capture test above excludes it -- and cannot be evaluated + * standalone by the fold; reject it. + */ if (IsA(node, SubLink)) { ereport(ERROR, @@ -2159,6 +2386,43 @@ static bool reduce_body_check_walker(Node *node, void *context) errmsg("subqueries (including a nested reduce()) are not supported in a reduce() expression"))); } + return expression_tree_mutator(node, reduce_capture_mutator, context); +} + +/* + * Safety net run after reduce_capture_mutator(). A valid body now references + * only PARAM_EXEC params (0/1 for the accumulator and element, 2.. for the + * captured outer values) and constants. Any remaining Var or non-PARAM_EXEC + * parameter is an outer reference that could not be captured (for example a + * non-agtype-typed one); reject it cleanly rather than letting it reach the + * standalone evaluator. + */ +static bool reduce_body_check_walker(Node *node, void *context) +{ + if (node == NULL) + { + return false; + } + + if (IsA(node, Var)) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("a reduce() expression references a value that cannot be used in the fold body"))); + } + + if (IsA(node, Param)) + { + Param *param = (Param *) node; + + if (param->paramkind != PARAM_EXEC) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("a reduce() expression references a value that cannot be used in the fold body"))); + } + } + return expression_tree_walker(node, reduce_body_check_walker, context); } @@ -2170,12 +2434,17 @@ static bool reduce_body_check_walker(Node *node, void *context) * aggregate ordered by that ordinality so the fold runs in list order: * * SELECT ag_catalog.age_reduce(, ''::text, - * r.elem ORDER BY r.ord) + * r.elem, + * ORDER BY r.ord) * FROM unnest() WITH ORDINALITY AS r(elem, ord) * * The fold body is transformed separately with the accumulator and element * rewritten to PARAM_EXEC params 0 and 1, serialized into the text argument, - * and evaluated per element inside age_reduce_transfn. + * and evaluated per element inside age_reduce_transfn. Loop-invariant outer + * references in the body (outer-query variables and cypher() parameters) are + * captured as PARAM_EXEC params 2.. and passed through the trailing agtype[] + * argument so the body can use values from the enclosing query; correlated + * captures are re-evaluated per group. * * The null/empty-list guard * (CASE WHEN list IS NULL THEN NULL ELSE COALESCE(, init) END) is built @@ -2193,6 +2462,7 @@ static Query *transform_cypher_reduce(cypher_parsestate *cpstate, Node *body_node; char *body_serialized; reduce_var_param_context mutator_ctx; + reduce_capture_context capture_ctx; cypher_parsestate *child_cpstate; ParseState *child_pstate; FuncCall *unnest_fc; @@ -2210,9 +2480,11 @@ static Query *transform_cypher_reduce(cypher_parsestate *cpstate, Oid sort_eqop; bool sort_hashable; Const *body_const; + ArrayExpr *extras_arr; + List *extras_exprs = NIL; Aggref *agg; Oid agg_oid; - Oid agg_argtypes[3]; + Oid agg_argtypes[4]; TargetEntry *result_te; /* @@ -2266,6 +2538,18 @@ static Query *transform_cypher_reduce(cypher_parsestate *cpstate, mutator_ctx.varno = body_pnsi->p_rtindex; body_node = reduce_var_to_param_mutator(body_node, &mutator_ctx); + /* + * Capture loop-invariant outer references (outer-query variables and + * cypher() parameters) in the body as PARAM_EXEC params 2.. and collect + * them in slot order; their values are supplied to the fold through the + * extras array argument built below. + */ + capture_ctx.next_slot = 2; + capture_ctx.captured = NIL; + body_node = reduce_capture_mutator(body_node, &capture_ctx); + extras_exprs = capture_ctx.captured; + + /* reject anything in the body that could not be captured or evaluated */ reduce_body_check_walker(body_node, NULL); body_serialized = nodeToString(body_node); @@ -2316,7 +2600,7 @@ static Query *transform_cypher_reduce(cypher_parsestate *cpstate, get_sort_group_operators(INT8OID, true, true, false, &sort_ltop, &sort_eqop, NULL, &sort_hashable); - ord_te = makeTargetEntry((Expr *) ord_var, 4, NULL, true); + ord_te = makeTargetEntry((Expr *) ord_var, 5, NULL, true); ord_te->ressortgroupref = 1; sortcl = makeNode(SortGroupClause); @@ -2382,13 +2666,28 @@ static Query *transform_cypher_reduce(cypher_parsestate *cpstate, init_node = (Node *) init_case; } - /* look up the age_reduce(agtype, text, agtype) aggregate */ + /* + * The captured loop-invariant outer values (outer-query variables and + * cypher() parameters referenced by the body) are passed to the aggregate + * as an agtype[] argument, in the same order their PARAM_EXEC params 2, 3, + * ... were assigned. When the body references nothing outside the + * accumulator and element this is an empty array. + */ + extras_arr = makeNode(ArrayExpr); + extras_arr->array_typeid = AGTYPEARRAYOID; + extras_arr->element_typeid = AGTYPEOID; + extras_arr->elements = extras_exprs; + extras_arr->multidims = false; + extras_arr->location = -1; + + /* look up the age_reduce(agtype, text, agtype, agtype[]) aggregate */ agg_argtypes[0] = AGTYPEOID; agg_argtypes[1] = TEXTOID; agg_argtypes[2] = AGTYPEOID; + agg_argtypes[3] = AGTYPEARRAYOID; agg_oid = LookupFuncName(list_make2(makeString("ag_catalog"), makeString("age_reduce")), - 3, agg_argtypes, false); + 4, agg_argtypes, false); agg = makeNode(Aggref); agg->aggfnoid = agg_oid; @@ -2396,11 +2695,13 @@ static Query *transform_cypher_reduce(cypher_parsestate *cpstate, agg->aggcollid = InvalidOid; agg->inputcollid = InvalidOid; agg->aggtranstype = InvalidOid; /* filled by the planner */ - agg->aggargtypes = list_make3_oid(AGTYPEOID, TEXTOID, AGTYPEOID); + agg->aggargtypes = list_make4_oid(AGTYPEOID, TEXTOID, AGTYPEOID, + AGTYPEARRAYOID); agg->aggdirectargs = NIL; - agg->args = list_make4(makeTargetEntry((Expr *) init_node, 1, NULL, false), + agg->args = list_make5(makeTargetEntry((Expr *) init_node, 1, NULL, false), makeTargetEntry((Expr *) body_const, 2, NULL, false), makeTargetEntry((Expr *) elem_var, 3, NULL, false), + makeTargetEntry((Expr *) extras_arr, 4, NULL, false), ord_te); agg->aggorder = list_make1(sortcl); agg->aggdistinct = NIL; diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index 0e1a7963f..10d95ee43 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -11584,13 +11584,16 @@ Datum age_float8_stddev_pop_aggfinalfn(PG_FUNCTION_ARGS) /* * Per-aggregate-group evaluation state for reduce(). Caches the compiled * fold-body expression and a standalone ExprContext whose PARAM_EXEC slots - * (0 = accumulator, 1 = current element) are rebound on every element. + * are rebound on every element. Slot 0 = accumulator, slot 1 = current + * element, and slots 2 .. nparams-1 = captured loop-invariant outer values + * (outer-query variables and cypher() parameters referenced by the body). */ typedef struct reduce_eval_ctx { ExprState *body_state; /* compiled fold-body expression */ ExprContext *econtext; /* eval context carrying the param slots */ - ParamExecData *params; /* [0] = accumulator, [1] = current element */ + ParamExecData *params; /* [0]=accumulator, [1]=element, [2..]=outer refs */ + int nparams; /* total param slots = 2 + number of captures */ } reduce_eval_ctx; /* Build an agtype 'null' Datum (a real agtype value, not a SQL NULL). */ @@ -11603,12 +11606,17 @@ static Datum reduce_agtype_null(void) } /* - * age_reduce_transfn(state agtype, init agtype, body text, element agtype) + * age_reduce_transfn(state agtype, init agtype, body text, element agtype, + * extras agtype[]) * * Transition function for the age_reduce aggregate that implements the Cypher * reduce(acc = init, var IN list | body) fold. The fold body is compiled by * transform_cypher_reduce() with the accumulator and element rewritten to * PARAM_EXEC params 0 and 1, then serialized into the `body` text argument. + * Any loop-invariant outer-query variable or cypher() parameter referenced by + * the body is captured into the `extras` agtype array and rewritten to a + * PARAM_EXEC param 2, 3, ... in body order; those slots are bound from the + * array here. * * On the first element of a group the accumulator is seeded from `init` * (the running state is NULL because the aggregate uses no initcond); on @@ -11655,6 +11663,7 @@ Datum age_reduce_transfn(PG_FUNCTION_ARGS) text *body_txt; char *body_str; Node *body_node; + int n_extras = 0; if (PG_ARGISNULL(2)) { @@ -11663,6 +11672,25 @@ Datum age_reduce_transfn(PG_FUNCTION_ARGS) errmsg("age_reduce: missing fold expression"))); } + /* + * The number of captured outer values is fixed for this aggregate + * call (the body's structure does not change between rows), so it is + * read once here to size the param array. Their values are bound per + * row below because a correlated capture changes between groups. + * + * The PG_NARGS() guard lets the function tolerate being reached + * through an older 4-argument aggregate definition (for example a + * stale catalog paired with a newer age.so): a missing extras + * argument is simply treated as zero captures. + */ + if (PG_NARGS() > 4 && !PG_ARGISNULL(4)) + { + ArrayType *extras_arr = PG_GETARG_ARRAYTYPE_P(4); + + n_extras = ArrayGetNItems(ARR_NDIM(extras_arr), + ARR_DIMS(extras_arr)); + } + oldctx = MemoryContextSwitchTo(fcinfo->flinfo->fn_mcxt); rc = (reduce_eval_ctx *) palloc0(sizeof(reduce_eval_ctx)); body_txt = PG_GETARG_TEXT_PP(2); @@ -11689,7 +11717,9 @@ Datum age_reduce_transfn(PG_FUNCTION_ARGS) rc->body_state = ExecInitExpr((Expr *) body_node, NULL); rc->econtext = CreateStandaloneExprContext(); - rc->params = (ParamExecData *) palloc0(sizeof(ParamExecData) * 2); + rc->nparams = 2 + n_extras; + rc->params = (ParamExecData *) palloc0(sizeof(ParamExecData) * + rc->nparams); rc->econtext->ecxt_param_exec_vals = rc->params; fcinfo->flinfo->fn_extra = rc; MemoryContextSwitchTo(oldctx); @@ -11713,6 +11743,9 @@ Datum age_reduce_transfn(PG_FUNCTION_ARGS) /* a NULL element is likewise normalized to agtype 'null' */ element = PG_ARGISNULL(3) ? reduce_agtype_null() : PG_GETARG_DATUM(3); + /* evaluate the fold body for this element */ + ResetExprContext(rc->econtext); + /* bind PARAM_EXEC 0 = accumulator, 1 = current element */ rc->params[0].value = acc; rc->params[0].isnull = false; @@ -11721,8 +11754,57 @@ Datum age_reduce_transfn(PG_FUNCTION_ARGS) rc->params[1].isnull = false; rc->params[1].execPlan = NULL; - /* evaluate the fold body for this element */ - ResetExprContext(rc->econtext); + /* + * Bind the captured loop-invariant outer values to params 2 .. The values + * are pulled from the extras array every row because correlated captures + * differ between groups; the per-row deconstruction is done in the + * econtext's per-tuple memory (reset above) so it does not leak. A NULL + * array element is normalized to agtype 'null' like the accumulator and + * element. + * + * Every slot 2 .. nparams-1 is rebound on every row, so a slot never + * retains a value from a previous row -- which, after the per-tuple reset + * above, would be a dangling pointer. If the extras array supplies fewer + * values than there are capture slots (only reachable through a direct SQL + * call with a varying-length array), the unsupplied slots are filled with + * agtype 'null'. The PG_NARGS() guard keeps the arg-4 access safe under an + * older 4-argument signature. + */ + if (rc->nparams > 2 && PG_NARGS() > 4 && !PG_ARGISNULL(4)) + { + ArrayType *extras_arr = PG_GETARG_ARRAYTYPE_P(4); + Oid elemtype = ARR_ELEMTYPE(extras_arr); + int16 typlen; + bool typbyval; + char typalign; + Datum *ex_vals; + bool *ex_nulls; + int ex_n; + int i; + MemoryContext per_tuple = rc->econtext->ecxt_per_tuple_memory; + MemoryContext save = MemoryContextSwitchTo(per_tuple); + + get_typlenbyvalalign(elemtype, &typlen, &typbyval, &typalign); + deconstruct_array(extras_arr, elemtype, typlen, typbyval, typalign, + &ex_vals, &ex_nulls, &ex_n); + + for (i = 0; (2 + i) < rc->nparams; i++) + { + if (i < ex_n && !ex_nulls[i]) + { + rc->params[2 + i].value = ex_vals[i]; + } + else + { + rc->params[2 + i].value = reduce_agtype_null(); + } + rc->params[2 + i].isnull = false; + rc->params[2 + i].execPlan = NULL; + } + + MemoryContextSwitchTo(save); + } + result = ExecEvalExpr(rc->body_state, rc->econtext, &result_isnull); /* From 373da2dca16b0de4d706b29d2df1a2cdf2389e3d Mon Sep 17 00:00:00 2001 From: Greg Felice Date: Thu, 2 Jul 2026 13:00:47 -0400 Subject: [PATCH 18/20] Fix segfault and out-of-bounds reads in file loaders on malformed rows (#2453) * Fix segfault and out-of-bounds reads in file loaders on malformed rows load_edges_from_file() and load_labels_from_file() build their COPY parser with only format=csv and header=false, so COPY uses its default comma delimiter. A file delimited by anything else (or a malformed row) then parses with an unexpected column count, and the loaders indexed the parsed fields without validating that count: - process_edge_row() reads the four fixed fields fields[0..3] unconditionally. A non-comma-delimited edge file parses as a single column, so fields[1..3] are out of bounds -> segfault (issue #2449). - create_agtype_from_list()/_i() pair header[i] with fields[i] for all i < nfields, so a row with more fields than the header reads header[i] out of bounds. Add bounds validation that turns these into clear errors: - Edge header must have >= 4 columns; a smaller count almost always means the wrong delimiter, so the error carries a hint. - Each edge row must have >= 4 columns and no more than the header's. - Each label row must have no more than the header's column count. Rows with fewer trailing columns than the header remain allowed, matching existing behavior (exercised by the existing conversion tests). This closes the segfault and out-of-bounds reads. The silent mis-parsing of a non-comma file whose header and rows share the same (wrong) column count is not detectable here; adding a delimiter option to the load functions is a separate follow-up. Adds a regression test in age_load using a pipe-delimited edge file. Addresses #2449. * loader guards: clarify error wording and add per-row regression coverage Address review feedback on the nfields guards: - Error messages now say "the header's %d columns" (was "the header's %d"), making the count's unit explicit. - Add regression cases exercising the per-row guards, which previously only had coverage for the mis-delimited-header path: * an edge row with fewer than 4 columns * an edge row with more columns than the header * a label row with more columns than the header Each asserts a clean ERROR (these were the out-of-bounds reads the guards now catch). --- regress/age_load/data/bad_delim_edges.csv | 2 + regress/age_load/data/edges_long_row.csv | 2 + regress/age_load/data/edges_short_row.csv | 2 + regress/age_load/data/labels_long_row.csv | 2 + regress/expected/age_load.out | 57 +++++++++++++++++++++++ regress/sql/age_load.sql | 29 ++++++++++++ src/backend/utils/load/ag_load_edges.c | 39 ++++++++++++++++ src/backend/utils/load/ag_load_labels.c | 17 +++++++ 8 files changed, 150 insertions(+) create mode 100644 regress/age_load/data/bad_delim_edges.csv create mode 100644 regress/age_load/data/edges_long_row.csv create mode 100644 regress/age_load/data/edges_short_row.csv create mode 100644 regress/age_load/data/labels_long_row.csv diff --git a/regress/age_load/data/bad_delim_edges.csv b/regress/age_load/data/bad_delim_edges.csv new file mode 100644 index 000000000..c72170a6a --- /dev/null +++ b/regress/age_load/data/bad_delim_edges.csv @@ -0,0 +1,2 @@ +start_id|start_vertex_type|end_id|end_vertex_type +1|V|2|V diff --git a/regress/age_load/data/edges_long_row.csv b/regress/age_load/data/edges_long_row.csv new file mode 100644 index 000000000..2036f534a --- /dev/null +++ b/regress/age_load/data/edges_long_row.csv @@ -0,0 +1,2 @@ +start_id,start_vertex_type,end_id,end_vertex_type +1,V,2,V,extra diff --git a/regress/age_load/data/edges_short_row.csv b/regress/age_load/data/edges_short_row.csv new file mode 100644 index 000000000..e307927b3 --- /dev/null +++ b/regress/age_load/data/edges_short_row.csv @@ -0,0 +1,2 @@ +start_id,start_vertex_type,end_id,end_vertex_type +1,V diff --git a/regress/age_load/data/labels_long_row.csv b/regress/age_load/data/labels_long_row.csv new file mode 100644 index 000000000..72ec2a305 --- /dev/null +++ b/regress/age_load/data/labels_long_row.csv @@ -0,0 +1,2 @@ +id,name +1,Alice,extra diff --git a/regress/expected/age_load.out b/regress/expected/age_load.out index 1f76c31ce..17c5ecc27 100644 --- a/regress/expected/age_load.out +++ b/regress/expected/age_load.out @@ -454,6 +454,63 @@ NOTICE: graph "agload_conversion" has been dropped (1 row) +-- +-- Issue 2449: mis-delimited / malformed load files must fail with a clear +-- error instead of segfaulting or silently corrupting data. Edge files +-- require the 4 fixed columns; a file that is not comma-delimited parses as +-- a single column, so this must be rejected at the header. +-- +SELECT create_graph('agload_delim'); +NOTICE: graph "agload_delim" has been created + create_graph +-------------- + +(1 row) + +SELECT create_vlabel('agload_delim', 'V'); +NOTICE: VLabel "V" has been created + create_vlabel +--------------- + +(1 row) + +SELECT create_elabel('agload_delim', 'E'); +NOTICE: ELabel "E" has been created + create_elabel +--------------- + +(1 row) + +-- pipe-delimited edge file -> parses to 1 column -> clean error at the header +-- (was a segfault) +SELECT load_edges_from_file('agload_delim', 'E', 'age_load/bad_delim_edges.csv'); +ERROR: edge file must have at least 4 columns (start_id, start_vertex_type, end_id, end_vertex_type), but the header has 1 +HINT: load_edges_from_file expects a comma-delimited CSV; check the file's delimiter. +-- per-row guards (header is valid, but an individual data row is ragged): +-- an edge row with fewer than 4 columns -> clean error (was an OOB read of +-- the fixed fields[1..3]) +SELECT load_edges_from_file('agload_delim', 'E', 'age_load/edges_short_row.csv'); +ERROR: edge file row has 2 columns; expected at least 4 and no more than the header's 4 columns +-- an edge row with more columns than the header -> clean error (was an OOB +-- read of header[i] in create_agtype_from_list_i) +SELECT load_edges_from_file('agload_delim', 'E', 'age_load/edges_long_row.csv'); +ERROR: edge file row has 5 columns; expected at least 4 and no more than the header's 4 columns +-- a label row with more columns than the header -> clean error (was an OOB +-- read of header[i] in create_agtype_from_list) +SELECT load_labels_from_file('agload_delim', 'V', 'age_load/labels_long_row.csv'); +ERROR: label file row has 3 columns, more than the header's 2 columns +SELECT drop_graph('agload_delim', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table agload_delim._ag_label_vertex +drop cascades to table agload_delim._ag_label_edge +drop cascades to table agload_delim."V" +drop cascades to table agload_delim."E" +NOTICE: graph "agload_delim" has been dropped + drop_graph +------------ + +(1 row) + -- -- Test security and permissions -- diff --git a/regress/sql/age_load.sql b/regress/sql/age_load.sql index 976f050af..196b09806 100644 --- a/regress/sql/age_load.sql +++ b/regress/sql/age_load.sql @@ -194,6 +194,35 @@ SELECT load_edges_from_file('agload_conversion', 'Edges1', '../../etc/passwd', t -- SELECT drop_graph('agload_conversion', true); +-- +-- Issue 2449: mis-delimited / malformed load files must fail with a clear +-- error instead of segfaulting or silently corrupting data. Edge files +-- require the 4 fixed columns; a file that is not comma-delimited parses as +-- a single column, so this must be rejected at the header. +-- +SELECT create_graph('agload_delim'); +SELECT create_vlabel('agload_delim', 'V'); +SELECT create_elabel('agload_delim', 'E'); + +-- pipe-delimited edge file -> parses to 1 column -> clean error at the header +-- (was a segfault) +SELECT load_edges_from_file('agload_delim', 'E', 'age_load/bad_delim_edges.csv'); + +-- per-row guards (header is valid, but an individual data row is ragged): +-- an edge row with fewer than 4 columns -> clean error (was an OOB read of +-- the fixed fields[1..3]) +SELECT load_edges_from_file('agload_delim', 'E', 'age_load/edges_short_row.csv'); + +-- an edge row with more columns than the header -> clean error (was an OOB +-- read of header[i] in create_agtype_from_list_i) +SELECT load_edges_from_file('agload_delim', 'E', 'age_load/edges_long_row.csv'); + +-- a label row with more columns than the header -> clean error (was an OOB +-- read of header[i] in create_agtype_from_list) +SELECT load_labels_from_file('agload_delim', 'V', 'age_load/labels_long_row.csv'); + +SELECT drop_graph('agload_delim', true); + -- -- Test security and permissions -- diff --git a/src/backend/utils/load/ag_load_edges.c b/src/backend/utils/load/ag_load_edges.c index c05bf3352..01585bab0 100644 --- a/src/backend/utils/load/ag_load_edges.c +++ b/src/backend/utils/load/ag_load_edges.c @@ -56,6 +56,24 @@ static void process_edge_row(char **fields, int nfields, char *end_vertex_type; agtype *edge_properties; + /* + * Guard the fixed fields[0..3] accesses below and the header[i]/fields[i] + * pairing in create_agtype_from_list_i() against out-of-bounds reads on + * malformed or mis-delimited rows. A row must have at least the 4 fixed + * columns and no more columns than the header (rows with fewer trailing + * property columns than the header are allowed, matching existing + * behavior). A single-column row from a non-comma-delimited file is + * rejected here (previously it segfaulted). + */ + if (nfields < 4 || nfields > header_count) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("edge file row has %d columns; expected at least 4 " + "and no more than the header's %d columns", + nfields, header_count))); + } + /* Generate edge ID */ entry_id = nextval_internal(label_seq_relid, true); edge_id = make_graphid(label_id, entry_id); @@ -219,6 +237,27 @@ int create_edges_from_csv_file(char *file_path, header[i] = trim_whitespace(fields[i]); } + /* + * Edge files require the four fixed columns start_id, + * start_vertex_type, end_id and end_vertex_type. A smaller + * count almost always means the file is not comma-delimited + * (COPY defaults to comma). Fail clearly here instead of + * reading past the parsed fields in process_edge_row(), which + * previously caused a segfault. + */ + if (header_count < 4) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("edge file must have at least 4 columns " + "(start_id, start_vertex_type, end_id, " + "end_vertex_type), but the header has %d", + header_count), + errhint("load_edges_from_file expects a " + "comma-delimited CSV; check the file's " + "delimiter."))); + } + is_first_row = false; } else diff --git a/src/backend/utils/load/ag_load_labels.c b/src/backend/utils/load/ag_load_labels.c index 5b11f68b8..236d47a1d 100644 --- a/src/backend/utils/load/ag_load_labels.c +++ b/src/backend/utils/load/ag_load_labels.c @@ -46,6 +46,23 @@ static void process_vertex_row(char **fields, int nfields, TupleTableSlot *slot; agtype *vertex_properties; + /* + * Guard the header[i]/fields[i] pairing in create_agtype_from_list() + * against out-of-bounds reads on malformed rows that have more fields + * than the header. Rows with fewer fields than the header are allowed + * (matching existing behavior). Note: a file delimited by something + * other than comma is parsed as a single column throughout, so header + * and rows still match and the data lands in properties verbatim -- + * specifying the delimiter is the separate fix for that. + */ + if (nfields > header_count) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("label file row has %d columns, more than the " + "header's %d columns", nfields, header_count))); + } + /* Generate or use provided entry_id */ if (id_field_exists) { From 8373dc80ed28cbbdd682e3195cabc60cad2a4038 Mon Sep 17 00:00:00 2001 From: Greg Felice Date: Thu, 2 Jul 2026 13:02:07 -0400 Subject: [PATCH 19/20] ci: pin Build/Regression runner to ubuntu-24.04 and guard Bison version for GLR grammar (#2445) * ci: pin runner to ubuntu-24.04 + guard Bison version for GLR grammar The Cypher GLR grammar pins exact shift/reduce and reduce/reduce conflict counts via %expect / %expect-rr in cypher_gram.y, and Bison treats %expect as exact-match: a different Bison version can report different counts and break the build. ubuntu-latest floats to new Ubuntu releases (and new Bison versions), which would silently shift those counts. Pin runs-on to ubuntu-24.04 to freeze Bison at 3.8.x, and add a guard step that fails loudly with a pointer to cypher_gram.y if Bison ever drifts off 3.8.x. Reproducibility comes from pinning the variable rather than widening the conflict-count tolerance, keeping the exact-match alarm for genuinely new grammar conflicts intact. * ci: make Bison version parse robust (awk + explicit empty guard) Address Copilot review on #2445: the previous grep-based parse could silently yield an empty version and fail with a confusing 'Bison != 3.8.x' message. Parse the version field with awk and error explicitly when it can't be determined. Resolved conflict: .github/workflows/installcheck.yaml --- .github/workflows/installcheck.yaml | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/.github/workflows/installcheck.yaml b/.github/workflows/installcheck.yaml index 9ea32e214..61687cd15 100644 --- a/.github/workflows/installcheck.yaml +++ b/.github/workflows/installcheck.yaml @@ -9,7 +9,12 @@ on: jobs: build: - runs-on: ubuntu-latest + # Pinned (not ubuntu-latest) so the Bison version stays fixed at 3.8.x. + # The Cypher GLR grammar pins exact conflict counts via %expect / %expect-rr + # in src/backend/parser/cypher_gram.y, and Bison treats %expect as exact-match: + # a different Bison version can report different counts and break the build. + # Freezing the runner image freezes Bison; bump both together, intentionally. + runs-on: ubuntu-24.04 steps: - name: Get latest commit id of PostgreSQL 19 @@ -28,6 +33,27 @@ jobs: sudo apt-get update sudo apt-get install -y build-essential libreadline-dev zlib1g-dev flex bison + - name: Verify Bison version (grammar conflict counts are pinned) + run: | + ver=$(bison --version | awk 'NR==1 {print $NF}') + if [ -z "$ver" ]; then + echo "::error::Could not determine Bison version from 'bison --version'." + echo "::error::Expected the first line to end with a version (e.g. '... 3.8.2')." + exit 1 + fi + echo "bison $ver" + case "$ver" in + 3.8.*) ;; + *) + echo "::error::Bison $ver != 3.8.x. The Cypher GLR grammar pins exact" + echo "::error::%expect / %expect-rr conflict counts in src/backend/parser/cypher_gram.y." + echo "::error::A new Bison version may report different counts. Re-run bison locally," + echo "::error::update the %expect/%expect-rr numbers (and the comment block), then bump" + echo "::error::the pinned runner image and this guard together." + exit 1 + ;; + esac + - name: Install PostgreSQL 19 and some extensions if: steps.pg19cache.outputs.cache-hit != 'true' run: | From 772c7eb17684cab91c2c5227db098944d6583bac Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Thu, 2 Jul 2026 10:23:39 -0700 Subject: [PATCH 20/20] Add automatic header-dependency tracking to the Makefile (#2454) AGE lists OBJS explicitly and relies on PGXS, whose built-in dependency tracking only runs when the server was built with --enable-depend (often disabled). Consequently, editing a header did not recompile the .c files that include it, leaving stale .o files. This is especially dangerous for node/struct headers: a stale ag_nodes.o keeps an outdated node_size, so _readExtensibleNode under-allocates and readNode corrupts the heap ("unrecognized node type"). Emit a .d file beside each object via -MMD -MP and -include them, deriving DEPFILES from OBJS. The mechanism is self-contained (independent of --enable-depend): -MMD skips system headers and -MP tolerates deleted headers. On servers built with --enable-depend, PGXS appends its own -MF after CFLAGS (last -MF wins), so this degrades cleanly to PGXS's tracking. Add DEPFILES to EXTRA_CLEAN and *.d to .gitignore. Co-authored-by: Copilot modified: .gitignore modified: Makefile --- .gitignore | 1 + Makefile | 24 +++++++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 1e2f8f674..98f4a7f9a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.o +*.d *.so build.sh .idea diff --git a/Makefile b/Makefile index 21c7f81f2..32f34d6e6 100644 --- a/Makefile +++ b/Makefile @@ -162,6 +162,10 @@ OBJS = src/backend/age.o \ src/backend/utils/name_validation.o \ src/backend/utils/ag_guc.o +# Per-object header-dependency files (see "Automatic header-dependency +# tracking" below the PGXS include). One .d is generated beside each .o. +DEPFILES = $(OBJS:.o=.d) + # ===== Extension SQL & data files ===== EXTENSION = age @@ -258,7 +262,8 @@ EXTRA_CLEAN = $(addprefix $(ag_regress_dir)/, $(ag_regress_out)) \ $(all_age_sql) \ $(age_init_sql) \ $(age_upgrade_test_sql) \ - $(ag_regress_dir)/age_upgrade_cleanup.sh + $(ag_regress_dir)/age_upgrade_cleanup.sh \ + $(DEPFILES) GEN_KEYWORDLIST = $(PERL) -I ./tools/ ./tools/gen_keywordlist.pl GEN_KEYWORDLIST_DEPS = ./tools/gen_keywordlist.pl tools/PerfectHash.pm @@ -271,6 +276,23 @@ PG_CONFIG ?= pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) +# ===== Automatic header-dependency tracking ===== +# +# AGE lists OBJS explicitly, and PGXS's built-in .deps tracking only runs when +# the *server* was built with --enable-depend (often off). Without the lines +# below, editing a header does NOT rebuild the .c files that include it, leaving +# STALE .o files. This is especially dangerous for node/struct headers: a stale +# ag_nodes.o keeps an old node_size, so _readExtensibleNode under-allocates and +# readNode corrupts the heap ("unrecognized node type: "). +# +# The compiler emits a .d file next to each object (-MMD = user headers only; +# -MP adds phony targets so deleting a header does not break the build). With +# "-o foo.o", -MMD writes "foo.d" automatically (no -MF, no basename clashes). +# On servers that DO set --enable-depend, PGXS appends its own "-MF .deps/*.Po" +# after $(CFLAGS) (last -MF wins), so this degrades cleanly to that mechanism. +override CFLAGS += -MMD -MP +-include $(DEPFILES) + # ===== Build rules ===== # 32-bit platform support: pass SIZEOF_DATUM=4 to enable (e.g., make SIZEOF_DATUM=4)