-
Notifications
You must be signed in to change notification settings - Fork 6
/
KnowledgeGraphQueriesPlayground.txt
122 lines (99 loc) · 3.79 KB
/
KnowledgeGraphQueriesPlayground.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
//+++++++ QUALITY CONTROL CHECKS++++++++
// SPOT SELF-LOOPS
MATCH (c:CONCEPT)-[*1]->(c2:CONCEPT)
WHERE c.id = c2.id
RETURN c, c2
// SPOT TRANSITIVE LOOPS (max length of the path: 100)
MATCH (c:CONCEPT)-[*2..100]->(c2:CONCEPT)
WHERE c.id = c2.id
RETURN c, c2
// SPOT ISOLATED NODES
MATCH (c:CONCEPT)-[r]-() WHERE r IS NULL
RETURN c
// +++++++++++++++++++++++++++++++++++++++
//+++++++ META QUERIES++++++++
// FIND 'DENSELY' CONNECTED NODES (more than 20 relationships)
MATCH (c:CONCEPT)-[r]-(:CONCEPT)
WITH count(r) as number_of_rels, c
WHERE number_of_rels > 20
RETURN c.id
//++++++++++++++++++++++++++++
// GET CONCEPT ANCESTORS OF A SPECIFIC CONCEPT
MATCH (c:CONCEPT {id: 'xxxx'})-[*]->(c2:CONCEPT)
RETURN DISTINCT c2.id
// GET CONCEPT ANCESTORS OF LIST (LIST NOT INCLUDED)
MATCH (c:CONCEPT)-[*]->(c2:CONCEPT)
WHERE c.id IN ['xxx', 'yyyy', 'zzzz....']
RETURN DISTINCT c2.id
// GET CONCEPT ANCESTORS OF LIST (LIST INCLUDED)
MATCH (c:CONCEPT)-[*]->(c2:CONCEPT)
WHERE c.id IN ['xxxx', 'yyyy', 'zzzz']
WITH collect(DISTINCT c.id) + collect(DISTINCT c2.id) as results
RETURN results
// GET CONCEPTS ASSOCIATED (explicitly + implicitly) TO A DATASET
MATCH (c2:CONCEPT)<-[*]-(c:CONCEPT)<-[:TAGGED_WITH]-(d:DATASET{id: 'xxxx'})
WITH COLLECT(c2.id) + COLLECT(c.id) AS concepts
RETURN concepts
// GET CONCEPTS ASSOCIATED (explicitly) TO A DATASET
MATCH (c:CONCEPT)<-[:TAGGED_WITH]-(d:DATASET{id: 'xxxx'})
RETURN c.id
// GET DATASETS EXPLICITLY TAGGED WITH ALL THE TAGS PROVIDED
MATCH (c:CONCEPT)<-[:TAGGED_WITH]-(d:DATASET)
WITH d, collect(c.id) as concepts
WHERE ALL (v IN ['xxx', 'yyyy', 'zzz...'] WHERE v IN concepts)
RETURN d.id
// GET DATASETS TAGGED WITH ALL THE TAGS PROVIDED (or a descendant of them)
MATCH (c1:CONCEPT)-[*]->(c:CONCEPT)<-[:TAGGED_WITH]-(d:DATASET)
WITH d, collect(c.id) + collect(c1.id) AS concepts
WHERE ALL (v IN ['global', 'water', 'precipitation'] WHERE v IN concepts)
RETURN d.id, concepts
// GET DATASETS TAGGED WITH A COMMON TAG (or a more specific tag that is a descendant of the set of tags provided)
MATCH (c:CONCEPT)-[*]->(c2:CONCEPT)<-[:TAGGED_WITH]-(d:DATASET)
WHERE c.id IN ['xxx', 'yyyy', 'zzzz....']
WITH COLLECT(d.id) AS datasets
MATCH (c)<-[:TAGGED_WITH]-(d2:DATASET)
WITH COLLECT(d2.id) + datasets AS results
RETURN DISTINCT results
// GET SIMILAR DATASETS 1 (datasets tagged with at least one of the concepts related to the dataset provided) (limit 3)
MATCH (d:DATASET{id:'xxxx'})-[:TAGGED_WITH]->(:CONCEPT)<-[:TAGGED_WITH]-(d2:DATASET)
RETURN DISTINCT d2.id LIMIT 3
// GET SIMILAR DATASETS 2 (datasets tagged with at least one of the concepts -or concept ancestors- related to the dataset provided) (limit 3)
MATCH (d:DATASET{id:'xxxx'})-[:TAGGED_WITH]->(c:CONCEPT)<-[:TAGGED_WITH]-(d2:DATASET)
WITH COLLECT(DISTINCT d2.id) AS direct_datasets
MATCH (c)-[*]->(c2:CONCEPT)<-[:TAGGED_WITH]-(d3:DATASET)
WITH COLLECT(d3.id) + direct_datasets AS result
RETURN DISTINCT result
// EXPORT DATABASE TO JSON
MATCH (a:CONCEPT)-[r]->(b:CONCEPT)
WITH collect(
{
source: a.id,
target: b.id,
relType: type(r)
}
) AS edges
MATCH (a:CONCEPT)
WITH edges, COLLECT({ id: a.id, label: a.label, synonyms: a.synonyms, default_parent: a.default_parent, labels: labels(a)}) AS nodes
RETURN { edges: edges, nodes: nodes }
// EXPORT WHOLE DATABASE
MATCH (a)-[r]->(b)
WITH collect(
{
source: a.id,
target: b.id,
relType: type(r),
properties: keys(r),
sourceType: labels(a),
targetType: labels(b)
}
) AS edges
MATCH (a)
WITH edges, COLLECT({ id: a.id, label: a.label, synonyms: a.synonyms, default_parent: a.default_parent, labels: labels(a)}) AS nodes
RETURN { edges: edges, nodes: nodes }
// UPDATE DATASETS THAT ARE ONLY USED IN PREP
MATCH (d:DATASET)<-[t:TAGGED_WITH]
WHERE d.id IN ['']
SET t.application = 'prep'
// DELETE EVERYTHING
MATCH (n)
DETACH DELETE n