Skip to content

Commit 6273ff2

Browse files
committed
Added initial version of a schema viewer for property graphs
1 parent 5848a19 commit 6273ff2

File tree

3 files changed

+351
-1
lines changed

3 files changed

+351
-1
lines changed

src/graph_notebook/magics/graph_magic.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
from graph_notebook.decorators.decorators import display_exceptions, magic_variables, \
4545
neptune_db_only, neptune_graph_only
4646
from graph_notebook.magics.ml import neptune_ml_magic_handler, generate_neptune_ml_parser
47+
from graph_notebook.magics.schema import get_schema
4748
from graph_notebook.magics.streams import StreamViewer
4849
from graph_notebook.neptune.client import (ClientBuilder, Client, PARALLELISM_OPTIONS, PARALLELISM_HIGH, \
4950
LOAD_JOB_MODES, MODE_AUTO, FINAL_LOAD_STATUSES, SPARQL_ACTION, FORMAT_CSV, FORMAT_OPENCYPHER, FORMAT_NTRIPLE, \
@@ -66,6 +67,7 @@
6667
from graph_notebook.options import OPTIONS_DEFAULT_DIRECTED, vis_options_merge
6768
from graph_notebook.magics.metadata import build_sparql_metadata_from_query, build_gremlin_metadata_from_query, \
6869
build_opencypher_metadata_from_query
70+
from dataclasses import asdict
6971

7072
sparql_table_template = retrieve_template("sparql_table.html")
7173
sparql_explain_template = retrieve_template("sparql_explain.html")
@@ -3967,3 +3969,60 @@ def handle_opencypher_status(self, line, local_ns):
39673969
store_to_ns(args.store_to, js, local_ns)
39683970
if not args.silent:
39693971
print(json.dumps(js, indent=2))
3972+
3973+
@line_magic
3974+
@needs_local_scope
3975+
@display_exceptions
3976+
def graph_schema(self, line='', local_ns=None):
3977+
logger.info(f'calling for schema on endpoint {self.graph_notebook_config.host}')
3978+
parser = argparse.ArgumentParser()
3979+
parser.add_argument('language', nargs='?', type=str.lower, default="propertygraph",
3980+
help=f'The language endpoint to use. Valid inputs: {STATISTICS_LANGUAGE_INPUTS}. '
3981+
f'Default: propertygraph.',
3982+
choices=STATISTICS_LANGUAGE_INPUTS)
3983+
parser.add_argument('--silent', action='store_true', default=False, help="Display no output.")
3984+
parser.add_argument('--store-to', type=str, default='', help='store query result to this variable')
3985+
args = parser.parse_args(line.split())
3986+
3987+
if not args.language == 'propertygraph':
3988+
print("Currently, only fetching the schema of property graphs is supported.")
3989+
else:
3990+
try:
3991+
if not args.silent:
3992+
tab = widgets.Tab()
3993+
titles = []
3994+
children = []
3995+
3996+
schema = get_schema(self.client, self.graph_notebook_config)
3997+
if not args.silent:
3998+
# Display GRAPH tab
3999+
gn = OCNetwork(group_by_property='~label')
4000+
logger.info("Creating schema network")
4001+
gn.create_schema_network(schema)
4002+
logger.debug(f'number of nodes is {len(gn.graph.nodes)}')
4003+
if len(gn.graph.nodes) > 0:
4004+
options = self.graph_notebook_vis_options.copy()
4005+
options['edges']['smooth'] = 'dynamic'
4006+
force_graph_output = Force(network=gn, options=options)
4007+
titles.append('Graph')
4008+
children.append(force_graph_output)
4009+
4010+
# Display JSON tab
4011+
json_output = widgets.Output(layout=DEFAULT_LAYOUT)
4012+
with json_output:
4013+
print(json.dumps(asdict(schema), indent=2))
4014+
children.append(json_output)
4015+
titles.append('JSON')
4016+
4017+
tab.children = children
4018+
4019+
for i in range(len(titles)):
4020+
tab.set_title(i, titles[i])
4021+
display(tab)
4022+
4023+
store_to_ns(args.store_to, schema, local_ns)
4024+
except Exception as e:
4025+
if not args.silent:
4026+
print("Encountered an error when attempting to retrieve graph schema:\n")
4027+
print(e)
4028+
store_to_ns(args.store_to, e, local_ns)
Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
import logging
2+
from graph_notebook.neptune.client import Client, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME
3+
from graph_notebook.configuration.generate_config import Configuration
4+
from typing import Any, Dict, List, Optional, Tuple
5+
from dataclasses import dataclass, field
6+
7+
logger = logging.getLogger("graph_magic")
8+
9+
@dataclass
10+
class Property:
11+
"""Represents a property definition for nodes and relationships in the graph.
12+
13+
Properties are key-value pairs that can be attached to both nodes and
14+
relationships, storing additional metadata about these graph elements.
15+
16+
Attributes:
17+
name (str): The name/key of the property
18+
type (str): The data type of the property value
19+
"""
20+
21+
name: str
22+
type: List[str]
23+
24+
25+
@dataclass
26+
class Node:
27+
"""Defines a node type in the graph schema.
28+
29+
Nodes represent entities in the graph database and can have labels
30+
and properties that describe their characteristics.
31+
32+
Attributes:
33+
labels (str): The label(s) that categorize this node type
34+
properties (List[Property]): List of properties that can be assigned to this node type
35+
"""
36+
37+
labels: str
38+
properties: List[Property] = field(default_factory=list)
39+
40+
41+
@dataclass
42+
class Relationship:
43+
"""Defines a relationship type in the graph schema.
44+
45+
Relationships represent connections between nodes in the graph and can
46+
have their own properties to describe the nature of the connection.
47+
48+
Attributes:
49+
type (str): The type/category of the relationship
50+
properties (List[Property]): List of properties that can be assigned to this relationship type
51+
"""
52+
53+
type: str
54+
properties: List[Property] = field(default_factory=list)
55+
56+
57+
@dataclass
58+
class RelationshipPattern:
59+
"""Defines a valid relationship pattern between nodes in the graph.
60+
61+
Relationship patterns describe the allowed connections between different
62+
types of nodes in the graph schema.
63+
64+
Attributes:
65+
left_node (str): The label of the source/starting node
66+
right_node (str): The label of the target/ending node
67+
relation (str): The type of relationship connecting the nodes
68+
"""
69+
70+
left_node: str
71+
right_node: str
72+
relation: str
73+
74+
75+
@dataclass
76+
class GraphSchema:
77+
"""Represents the complete schema definition for the graph database.
78+
79+
The graph schema defines all possible node types, relationship types,
80+
and valid patterns of connections between nodes.
81+
82+
Attributes:
83+
nodes (List[Node]): List of all node types defined in the schema
84+
relationships (List[Relationship]): List of all relationship types defined in the schema
85+
relationship_patterns (List[RelationshipPattern]): List of valid relationship patterns
86+
"""
87+
88+
nodes: List[Node]
89+
relationships: List[Relationship]
90+
relationship_patterns: List[RelationshipPattern]
91+
92+
93+
def _get_labels(summary) -> Tuple[List[str], List[str]]:
94+
"""Get node and edge labels from the Neptune statistics summary.
95+
96+
Returns:
97+
Tuple[List[str], List[str]]: A tuple containing two lists:
98+
1. List of node labels
99+
2. List of edge labels
100+
"""
101+
n_labels = summary['nodeLabels']
102+
e_labels = summary['edgeLabels']
103+
return n_labels, e_labels
104+
105+
def _get_triples(client:Client, e_labels: List[str]) -> List[RelationshipPattern]:
106+
triple_query = """
107+
MATCH (a)-[e:`{e_label}`]->(b)
108+
WITH a,e,b LIMIT 3000
109+
RETURN DISTINCT labels(a) AS from, type(e) AS edge, labels(b) AS to
110+
LIMIT 10
111+
"""
112+
113+
triple_schema = []
114+
for label in e_labels:
115+
logger.debug(f'Running get triples for {label}')
116+
q = triple_query.format(e_label=label)
117+
data = client.opencypher_http(q).json()
118+
119+
for d in data['results']:
120+
triple = RelationshipPattern(d["from"][0], d["to"][0], d["edge"])
121+
triple_schema.append(triple)
122+
123+
return triple_schema
124+
125+
def _get_node_properties(client:Client,
126+
n_labels: List[str], types: Dict[str, str]
127+
) -> List:
128+
node_properties_query = """
129+
MATCH (a:`{n_label}`)
130+
RETURN properties(a) AS props
131+
LIMIT 100
132+
"""
133+
nodes = []
134+
for label in n_labels:
135+
logger.debug(f'Running get node properties for {label}')
136+
q = node_properties_query.format(n_label=label)
137+
data = {"label": label, "properties": client.opencypher_http(q).json()['results']}
138+
s = set({})
139+
for p in data["properties"]:
140+
props = {}
141+
142+
for k, v in p['props'].items():
143+
prop_type = types[type(v).__name__]
144+
if k not in props:
145+
props[k] = {prop_type}
146+
else:
147+
props[k].update([prop_type])
148+
149+
properties = []
150+
for k, v in props.items():
151+
properties.append(Property(name=k, type=list(v)))
152+
153+
np = {
154+
"properties": [{"property": k, "type": v} for k, v in s],
155+
"labels": label,
156+
}
157+
nodes.append(Node(labels=label, properties=properties))
158+
159+
return nodes
160+
161+
def _get_edge_properties(client:Client,
162+
e_labels: List[str], types: Dict[str, str]
163+
) -> List:
164+
edge_properties_query = """
165+
MATCH ()-[e:`{e_label}`]->()
166+
RETURN properties(e) AS props
167+
LIMIT 100
168+
"""
169+
edges = []
170+
for label in e_labels:
171+
logger.debug(f'Running get edge properties for {label}')
172+
q = edge_properties_query.format(e_label=label)
173+
data = {"label": label, "properties": client.opencypher_http(q).json()['results']}
174+
s = set({})
175+
for p in data["properties"]:
176+
from typing import cast
177+
178+
p_dict = cast(Dict[str, Any], p)
179+
props = cast(Dict[str, Any], p_dict["props"])
180+
181+
props = {}
182+
for k, v in p['props'].items():
183+
prop_type = types[type(v).__name__]
184+
if k not in props:
185+
props[k] = {prop_type}
186+
else:
187+
props[k].update([prop_type])
188+
properties = []
189+
for k, v in props.items():
190+
properties.append(Property(name=k, type=list(v)))
191+
192+
edges.append(Relationship(type=label, properties=properties))
193+
194+
return edges
195+
196+
def get_schema(client:Client, config:Configuration) -> GraphSchema:
197+
if config.neptune_service == NEPTUNE_DB_SERVICE_NAME:
198+
logger.info("Finding Schema for Neptune Database")
199+
summary = client.statistics('propertygraph', True, 'basic', False)
200+
logger.info("Summary retrieved")
201+
logger.debug(summary.json()['payload']['graphSummary'])
202+
summary=summary.json()['payload']['graphSummary']
203+
types = {
204+
'str': 'STRING',
205+
'float': 'DOUBLE',
206+
'int': 'INTEGER',
207+
'list': 'LIST',
208+
'dict': 'MAP',
209+
'bool': 'BOOLEAN',
210+
}
211+
n_labels, e_labels = _get_labels(summary)
212+
logger.info("Getting Triples")
213+
triple_schema = _get_triples(client, e_labels)
214+
logger.debug(triple_schema)
215+
logger.info("Node Properties retrieved")
216+
nodes = _get_node_properties(client, n_labels, types)
217+
logger.debug(nodes)
218+
logger.info("Edge Properties retrieved")
219+
rels = _get_edge_properties(client, e_labels, types)
220+
logger.debug(rels)
221+
graph = GraphSchema(nodes=nodes, relationships=rels, relationship_patterns=triple_schema)
222+
return graph
223+
elif config.neptune_service == NEPTUNE_ANALYTICS_SERVICE_NAME:
224+
logger.info("Finding Schema for Neptune Analytics")
225+
res = client.opencypher_http("CALL neptune.graph.pg_schema()")
226+
raw_schema = res.json()['results'][0]['schema']
227+
graph = GraphSchema(nodes=[], relationships=[], relationship_patterns=[])
228+
for i in raw_schema['labelTriples']:
229+
graph.relationship_patterns.append(
230+
RelationshipPattern(left_node=i['~from'], relation=i['~type'], right_node=i['~to'])
231+
)
232+
233+
# Process node labels and properties
234+
for l in raw_schema['nodeLabels']:
235+
details = raw_schema['nodeLabelDetails'][l]
236+
props = []
237+
for p in details['properties']:
238+
props.append(Property(name=p, type=details['properties'][p]['datatypes']))
239+
graph.nodes.append(Node(labels=l, properties=props))
240+
241+
# Process edge labels and properties
242+
for l in raw_schema['edgeLabels']:
243+
details = raw_schema['edgeLabelDetails'][l]
244+
props = []
245+
for p in details['properties']:
246+
props.append(Property(name=p, type=details['properties'][p]['datatypes']))
247+
graph.relationships.append(Relationship(type=l, properties=props))
248+
return graph
249+
else:
250+
raise NotImplementedError

src/graph_notebook/network/opencypher/OCNetwork.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@
77

88
from graph_notebook.network.EventfulNetwork import EventfulNetwork, DEFAULT_GRP, DEPTH_GRP_KEY, DEFAULT_RAW_GRP_KEY
99
from networkx import MultiDiGraph
10+
from graph_notebook.magics.schema import GraphSchema
1011

1112
logging.basicConfig()
12-
logger = logging.getLogger(__file__)
13+
logger = logging.getLogger("graph_magic")
1314

1415
DEFAULT_LABEL_MAX_LENGTH = 10
1516
ENTITY_KEY = "~entityType"
@@ -224,3 +225,43 @@ def add_results(self, results):
224225
logger.debug(f'Property {res_sublist} in list results set is invalid, skipping')
225226
logger.debug(f'Error: {e}')
226227
continue
228+
229+
def create_schema_network(self, schema:GraphSchema):
230+
try:
231+
for item in schema.nodes:
232+
props = {}
233+
logger.debug(item)
234+
for p in item.properties:
235+
props[p.name] = p.type
236+
props['~label'] = item.labels
237+
self.add_node(
238+
node_id=item.labels,
239+
data={
240+
"properties": props,
241+
"label": item.labels,
242+
"title": item.labels,
243+
},
244+
)
245+
246+
for item in schema.relationship_patterns:
247+
props = {}
248+
edge = [i for i in schema.relationships if i.type == item.relation]
249+
for p in edge[0].properties:
250+
props[p.name] = p.type
251+
props['~label'] = item.relation
252+
self.add_edge(
253+
from_id=item.left_node,
254+
to_id=item.right_node,
255+
edge_id=str(item),
256+
label=item.relation,
257+
title=item.relation,
258+
data={
259+
"properties": props,
260+
"label": item.relation,
261+
"title": item.relation,
262+
},
263+
)
264+
265+
except (TypeError, ValueError) as network_creation_error:
266+
print(network_creation_error)
267+
logger.debug(f"Error: {network_creation_error}")

0 commit comments

Comments
 (0)