Skip to content

Commit 6d25d66

Browse files
rafsun42jrgemignani
authored andcommitted
Optimize performance of detach delete (#1271)
Previously, for each vertex to be deleted, all edge tables were scanned once to process the connected edges. Now, this task is postponed until all vertices are deleted. So, the connected edges can be processed in only one scan of the edge tables regardless of the number of deleted vertices.
1 parent 7b6c108 commit 6d25d66

File tree

4 files changed

+236
-55
lines changed

4 files changed

+236
-55
lines changed

regress/expected/cypher_delete.out

Lines changed: 111 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@ SELECT * FROM cypher('cypher_delete', $$CREATE (:v)-[:e]->(:v)$$) AS (a agtype);
6262

6363
--Should Fail
6464
SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->(n2) DELETE n1 RETURN n1$$) AS (a agtype);
65-
ERROR: Cannot delete vertex n1, because it still has edges attached. To delete this vertex, you must first delete the attached edges.
65+
ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE.
6666
SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->(n2) DELETE n2 RETURN n2$$) AS (a agtype);
67-
ERROR: Cannot delete vertex n2, because it still has edges attached. To delete this vertex, you must first delete the attached edges.
67+
ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE.
6868
SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->(n2) DELETE e RETURN e$$) AS (a agtype);
6969
a
7070
------------------------------------------------------------------------------------------------------------------------
@@ -188,7 +188,7 @@ SELECT * FROM cypher('cypher_delete', $$CREATE (n:v)-[:e]->(:v) CREATE (n)-[:e]-
188188
(0 rows)
189189

190190
SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[]->() DELETE n1 RETURN n1$$) AS (a agtype);
191-
ERROR: Cannot delete vertex n1, because it still has edges attached. To delete this vertex, you must first delete the attached edges.
191+
ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE.
192192
--Cleanup
193193
SELECT * FROM cypher('cypher_delete', $$MATCH(n) DETACH DELETE n RETURN n$$) AS (a agtype);
194194
a
@@ -234,7 +234,7 @@ SELECT * FROM cypher('cypher_delete', $$CREATE (n:v)-[:e]->(:v)$$) AS (a agtype)
234234
(0 rows)
235235

236236
SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->() DELETE n1, e RETURN n1$$) AS (a agtype);
237-
ERROR: Cannot delete vertex n1, because it still has edges attached. To delete this vertex, you must first delete the attached edges.
237+
ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE.
238238
--Cleanup
239239
SELECT * FROM cypher('cypher_delete', $$MATCH(n) DETACH DELETE n RETURN n$$) AS (a agtype);
240240
a
@@ -651,6 +651,113 @@ SELECT * FROM cypher('cypher_delete', $$MATCH (u:vertices) RETURN u $$) AS (resu
651651
--------
652652
(0 rows)
653653

654+
--
655+
-- Detach Delete
656+
--
657+
SELECT create_graph('detach_delete');
658+
NOTICE: graph "detach_delete" has been created
659+
create_graph
660+
--------------
661+
662+
(1 row)
663+
664+
SELECT * FROM cypher('detach_delete',
665+
$$
666+
CREATE (x:Label3{name:'x', delete: true}),
667+
(y:Label3{name:'y', delete: true}),
668+
(a:Label1{name:'a', delete: true}),
669+
(b:Label5{name:'b'}),
670+
(c:Label5{name:'c'}),
671+
(d:Label5{name:'d'}),
672+
(m:Label7{name:'m', delete: true}),
673+
(n:Label2{name:'n'}),
674+
(p:Label2{name:'p'}),
675+
(q:Label2{name:'q'}),
676+
(a)-[:rel1{name:'ab'}]->(b),
677+
(c)-[:rel2{name:'cd'}]->(d),
678+
(n)-[:rel3{name:'nm'}]->(m),
679+
(a)-[:rel4{name:'am'}]->(m),
680+
(p)-[:rel5{name:'pq'}]->(q)
681+
$$
682+
) as (a agtype);
683+
a
684+
---
685+
(0 rows)
686+
687+
-- no vertices or edges are deleted (error is expected)
688+
SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), (z:Label7) DELETE x, y, z RETURN 1 $$) as (a agtype);
689+
ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE.
690+
SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname agtype);
691+
vname
692+
-------
693+
"x"
694+
"y"
695+
"a"
696+
"b"
697+
"c"
698+
"d"
699+
"m"
700+
"n"
701+
"p"
702+
"q"
703+
(10 rows)
704+
705+
SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as (ename agtype);
706+
ename
707+
-------
708+
"ab"
709+
"cd"
710+
"nm"
711+
"am"
712+
"pq"
713+
(5 rows)
714+
715+
-- x, y, a, m, ab, nm, am are deleted
716+
SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), (z:Label7) DETACH DELETE x, y, z RETURN 1 $$) as (a agtype);
717+
a
718+
---
719+
1
720+
1
721+
(2 rows)
722+
723+
SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname agtype);
724+
vname
725+
-------
726+
"b"
727+
"c"
728+
"d"
729+
"n"
730+
"p"
731+
"q"
732+
(6 rows)
733+
734+
SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as (ename agtype);
735+
ename
736+
-------
737+
"cd"
738+
"pq"
739+
(2 rows)
740+
741+
SELECT drop_graph('detach_delete', true);
742+
NOTICE: drop cascades to 12 other objects
743+
DETAIL: drop cascades to table detach_delete._ag_label_vertex
744+
drop cascades to table detach_delete._ag_label_edge
745+
drop cascades to table detach_delete."Label3"
746+
drop cascades to table detach_delete."Label1"
747+
drop cascades to table detach_delete."Label5"
748+
drop cascades to table detach_delete."Label7"
749+
drop cascades to table detach_delete."Label2"
750+
drop cascades to table detach_delete.rel1
751+
drop cascades to table detach_delete.rel2
752+
drop cascades to table detach_delete.rel3
753+
drop cascades to table detach_delete.rel4
754+
drop cascades to table detach_delete.rel5
755+
NOTICE: graph "detach_delete" has been dropped
756+
drop_graph
757+
------------
758+
759+
(1 row)
760+
654761
--
655762
-- Clean up
656763
--

regress/sql/cypher_delete.sql

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,43 @@ END;
246246

247247
SELECT * FROM cypher('cypher_delete', $$MATCH (u:vertices) RETURN u $$) AS (result agtype);
248248

249+
--
250+
-- Detach Delete
251+
--
252+
253+
SELECT create_graph('detach_delete');
254+
SELECT * FROM cypher('detach_delete',
255+
$$
256+
CREATE (x:Label3{name:'x', delete: true}),
257+
(y:Label3{name:'y', delete: true}),
258+
(a:Label1{name:'a', delete: true}),
259+
(b:Label5{name:'b'}),
260+
(c:Label5{name:'c'}),
261+
(d:Label5{name:'d'}),
262+
(m:Label7{name:'m', delete: true}),
263+
(n:Label2{name:'n'}),
264+
(p:Label2{name:'p'}),
265+
(q:Label2{name:'q'}),
266+
(a)-[:rel1{name:'ab'}]->(b),
267+
(c)-[:rel2{name:'cd'}]->(d),
268+
(n)-[:rel3{name:'nm'}]->(m),
269+
(a)-[:rel4{name:'am'}]->(m),
270+
(p)-[:rel5{name:'pq'}]->(q)
271+
$$
272+
) as (a agtype);
273+
274+
-- no vertices or edges are deleted (error is expected)
275+
SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), (z:Label7) DELETE x, y, z RETURN 1 $$) as (a agtype);
276+
SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname agtype);
277+
SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as (ename agtype);
278+
279+
-- x, y, a, m, ab, nm, am are deleted
280+
SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), (z:Label7) DETACH DELETE x, y, z RETURN 1 $$) as (a agtype);
281+
SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname agtype);
282+
SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as (ename agtype);
283+
284+
SELECT drop_graph('detach_delete', true);
285+
249286
--
250287
-- Clean up
251288
--

src/backend/executor/cypher_delete.c

Lines changed: 67 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "parser/parsetree.h"
3333
#include "storage/bufmgr.h"
3434
#include "utils/rel.h"
35+
#include "common/hashfn.h"
3536

3637
#include "catalog/ag_label.h"
3738
#include "executor/cypher_executor.h"
@@ -48,9 +49,7 @@ static void rescan_cypher_delete(CustomScanState *node);
4849

4950
static void process_delete_list(CustomScanState *node);
5051

51-
static void find_connected_edges(CustomScanState *node, char *graph_name,
52-
List *labels, char *var_name, graphid id,
53-
bool detach_delete);
52+
static void check_for_connected_edges(CustomScanState *node);
5453
static agtype_value *extract_entity(CustomScanState *node,
5554
TupleTableSlot *scanTupleSlot,
5655
int entity_position);
@@ -83,6 +82,7 @@ static void begin_cypher_delete(CustomScanState *node, EState *estate,
8382
cypher_delete_custom_scan_state *css =
8483
(cypher_delete_custom_scan_state *)node;
8584
Plan *subplan;
85+
HASHCTL hashctl;
8686

8787
Assert(list_length(css->cs->custom_plans) == 1);
8888

@@ -112,6 +112,16 @@ static void begin_cypher_delete(CustomScanState *node, EState *estate,
112112
*/
113113
css->edge_labels = get_all_edge_labels_per_graph(estate, css->delete_data->graph_oid);
114114

115+
/* init vertex_id_htab */
116+
MemSet(&hashctl, 0, sizeof(hashctl));
117+
hashctl.keysize = sizeof(graphid);
118+
hashctl.entrysize =
119+
sizeof(graphid); // entries are not used, but entrysize must >= keysize
120+
hashctl.hash = tag_hash;
121+
css->vertex_id_htab = hash_create(DELETE_VERTEX_HTAB_NAME,
122+
DELETE_VERTEX_HTAB_SIZE, &hashctl,
123+
HASH_ELEM | HASH_FUNCTION);
124+
115125
/*
116126
* Postgres does not assign the es_output_cid in queries that do
117127
* not write to disk, ie: SELECT commands. We need the command id
@@ -194,6 +204,10 @@ static TupleTableSlot *exec_cypher_delete(CustomScanState *node)
194204
*/
195205
static void end_cypher_delete(CustomScanState *node)
196206
{
207+
check_for_connected_edges(node);
208+
209+
hash_destroy(((cypher_delete_custom_scan_state *)node)->vertex_id_htab);
210+
197211
ExecEndNode(node->ss.ps.lefttree);
198212
}
199213

@@ -443,15 +457,15 @@ static void process_delete_list(CustomScanState *node)
443457
}
444458

445459
/*
446-
* For vertices, we need to check if the vertex is connected to any
447-
* edges, * if there are, we need to delete them or throw an error,
448-
* depending on if the query specified the DETACH option.
460+
* For vertices, we insert the vertex ID in the hashtable
461+
* vertex_id_htab. This hashtable is used later to process
462+
* connected edges.
449463
*/
450464
if (original_entity_value->type == AGTV_VERTEX)
451465
{
452-
find_connected_edges(node, css->delete_data->graph_name,
453-
css->edge_labels, item->var_name,
454-
id->val.int_value, css->delete_data->detach);
466+
bool found;
467+
hash_search(css->vertex_id_htab, (void *)&(id->val.int_value),
468+
HASH_ENTER, &found);
455469
}
456470

457471
/* At this point, we are ready to delete the node/vertex. */
@@ -464,85 +478,87 @@ static void process_delete_list(CustomScanState *node)
464478
}
465479

466480
/*
467-
* Find the edges connected to the given node. If there is any edges either
468-
* delete them or throw an error, depending on the detach delete option.
481+
* Scans the edge tables and checks if the deleted vertices are connected to
482+
* any edge(s). For DETACH DELETE, the connected edges are deleted. Otherwise,
483+
* an error is thrown.
469484
*/
470-
static void find_connected_edges(CustomScanState *node, char *graph_name,
471-
List *labels, char *var_name, graphid id,
472-
bool detach_delete)
485+
static void check_for_connected_edges(CustomScanState *node)
473486
{
487+
ListCell *lc;
474488
cypher_delete_custom_scan_state *css =
475489
(cypher_delete_custom_scan_state *)node;
476490
EState *estate = css->css.ss.ps.state;
477-
ListCell *lc;
491+
char *graph_name = css->delete_data->graph_name;
478492

479-
Increment_Estate_CommandId(estate);
480-
481-
/*
482-
* We need to scan through all the edges to see if this vertex has
483-
* any edges attached to it.
484-
*
485-
* XXX: If we implement an on-disc graph storage system. Such as
486-
* an adjacency matrix, the performance of this check can be massively
487-
* improved. However, right now we have to scan every edge to see if
488-
* one has this vertex as a start or end vertex.
489-
*/
490-
foreach(lc, labels)
493+
/* scans each label from css->edge_labels */
494+
foreach (lc, css->edge_labels)
491495
{
492496
char *label_name = lfirst(lc);
493497
ResultRelInfo *resultRelInfo;
494498
TableScanDesc scan_desc;
495499
HeapTuple tuple;
496500
TupleTableSlot *slot;
497501

498-
resultRelInfo = create_entity_result_rel_info(estate,
499-
graph_name, label_name);
500-
502+
resultRelInfo = create_entity_result_rel_info(estate, graph_name,
503+
label_name);
501504
scan_desc = table_beginscan(resultRelInfo->ri_RelationDesc,
502505
estate->es_snapshot, 0, NULL);
503-
504506
slot = ExecInitExtraTupleSlot(
505507
estate, RelationGetDescr(resultRelInfo->ri_RelationDesc),
506508
&TTSOpsHeapTuple);
507509

508-
// scan the table
509-
while(true)
510+
/* for each row */
511+
while (true)
510512
{
511-
graphid startid, endid;
513+
graphid startid;
514+
graphid endid;
512515
bool isNull;
516+
bool found_startid = false;
517+
bool found_endid = false;
513518

514519
tuple = heap_getnext(scan_desc, ForwardScanDirection);
515520

516-
// no more tuples to process, break and scan the next label.
521+
/* no more tuples to process, break and scan the next label. */
517522
if (!HeapTupleIsValid(tuple))
523+
{
518524
break;
525+
}
519526

520527
ExecStoreHeapTuple(tuple, slot, false);
521528

522-
startid = GRAPHID_GET_DATUM(slot_getattr(slot, Anum_ag_label_edge_table_start_id, &isNull));
523-
endid = GRAPHID_GET_DATUM(slot_getattr(slot, Anum_ag_label_edge_table_end_id, &isNull));
529+
startid = GRAPHID_GET_DATUM(slot_getattr(
530+
slot, Anum_ag_label_edge_table_start_id, &isNull));
531+
endid = GRAPHID_GET_DATUM(
532+
slot_getattr(slot, Anum_ag_label_edge_table_end_id, &isNull));
533+
534+
hash_search(css->vertex_id_htab, (void *)&startid, HASH_FIND,
535+
&found_startid);
524536

525-
if (id == startid || id == endid)
537+
if (!found_startid)
526538
{
527-
/*
528-
* We have found an edge that uses the vertex. Either delete the
529-
* edge or throw an error. Depending on whether the DETACH
530-
* option was specified in the query.
531-
*/
532-
if (detach_delete)
539+
hash_search(css->vertex_id_htab, (void *)&endid, HASH_FIND,
540+
&found_endid);
541+
}
542+
543+
if (found_startid || found_endid)
544+
{
545+
if (css->delete_data->detach)
546+
{
533547
delete_entity(estate, resultRelInfo, tuple);
548+
}
534549
else
535-
ereport(ERROR,
536-
(errcode(ERRCODE_INTERNAL_ERROR),
537-
errmsg("Cannot delete vertex %s, because it still has edges attached. "
538-
"To delete this vertex, you must first delete the attached edges.",
539-
var_name)));
550+
{
551+
ereport(
552+
ERROR,
553+
(errcode(ERRCODE_INTERNAL_ERROR),
554+
errmsg(
555+
"Cannot delete a vertex that has edge(s). "
556+
"Delete the edge(s) first, or try DETACH DELETE.")));
557+
}
540558
}
541559
}
542560

543561
table_endscan(scan_desc);
544562
destroy_entity_result_rel_info(resultRelInfo);
545563
}
546-
547-
Decrement_Estate_CommandId(estate);
548564
}

0 commit comments

Comments
 (0)