1+ import logging
2+ from graph_notebook .neptune .client import Client , NEPTUNE_DB_SERVICE_NAME , NEPTUNE_ANALYTICS_SERVICE_NAME
3+ from graph_notebook .configuration .generate_config import Configuration
4+ from typing import Any , Dict , List , Optional , Tuple
5+ from dataclasses import dataclass , field
6+
7+ logger = logging .getLogger ("graph_magic" )
8+
9+ @dataclass
10+ class Property :
11+ """Represents a property definition for nodes and relationships in the graph.
12+
13+ Properties are key-value pairs that can be attached to both nodes and
14+ relationships, storing additional metadata about these graph elements.
15+
16+ Attributes:
17+ name (str): The name/key of the property
18+ type (str): The data type of the property value
19+ """
20+
21+ name : str
22+ type : List [str ]
23+
24+
25+ @dataclass
26+ class Node :
27+ """Defines a node type in the graph schema.
28+
29+ Nodes represent entities in the graph database and can have labels
30+ and properties that describe their characteristics.
31+
32+ Attributes:
33+ labels (str): The label(s) that categorize this node type
34+ properties (List[Property]): List of properties that can be assigned to this node type
35+ """
36+
37+ labels : str
38+ properties : List [Property ] = field (default_factory = list )
39+
40+
41+ @dataclass
42+ class Relationship :
43+ """Defines a relationship type in the graph schema.
44+
45+ Relationships represent connections between nodes in the graph and can
46+ have their own properties to describe the nature of the connection.
47+
48+ Attributes:
49+ type (str): The type/category of the relationship
50+ properties (List[Property]): List of properties that can be assigned to this relationship type
51+ """
52+
53+ type : str
54+ properties : List [Property ] = field (default_factory = list )
55+
56+
57+ @dataclass
58+ class RelationshipPattern :
59+ """Defines a valid relationship pattern between nodes in the graph.
60+
61+ Relationship patterns describe the allowed connections between different
62+ types of nodes in the graph schema.
63+
64+ Attributes:
65+ left_node (str): The label of the source/starting node
66+ right_node (str): The label of the target/ending node
67+ relation (str): The type of relationship connecting the nodes
68+ """
69+
70+ left_node : str
71+ right_node : str
72+ relation : str
73+
74+
75+ @dataclass
76+ class GraphSchema :
77+ """Represents the complete schema definition for the graph database.
78+
79+ The graph schema defines all possible node types, relationship types,
80+ and valid patterns of connections between nodes.
81+
82+ Attributes:
83+ nodes (List[Node]): List of all node types defined in the schema
84+ relationships (List[Relationship]): List of all relationship types defined in the schema
85+ relationship_patterns (List[RelationshipPattern]): List of valid relationship patterns
86+ """
87+
88+ nodes : List [Node ]
89+ relationships : List [Relationship ]
90+ relationship_patterns : List [RelationshipPattern ]
91+
92+
93+ def _get_labels (summary ) -> Tuple [List [str ], List [str ]]:
94+ """Get node and edge labels from the Neptune statistics summary.
95+
96+ Returns:
97+ Tuple[List[str], List[str]]: A tuple containing two lists:
98+ 1. List of node labels
99+ 2. List of edge labels
100+ """
101+ n_labels = summary ['nodeLabels' ]
102+ e_labels = summary ['edgeLabels' ]
103+ return n_labels , e_labels
104+
105+ def _get_triples (client :Client , e_labels : List [str ]) -> List [RelationshipPattern ]:
106+ triple_query = """
107+ MATCH (a)-[e:`{e_label}`]->(b)
108+ WITH a,e,b LIMIT 3000
109+ RETURN DISTINCT labels(a) AS from, type(e) AS edge, labels(b) AS to
110+ LIMIT 10
111+ """
112+
113+ triple_schema = []
114+ for label in e_labels :
115+ logger .debug (f'Running get triples for { label } ' )
116+ q = triple_query .format (e_label = label )
117+ data = client .opencypher_http (q ).json ()
118+
119+ for d in data ['results' ]:
120+ triple = RelationshipPattern (d ["from" ][0 ], d ["to" ][0 ], d ["edge" ])
121+ triple_schema .append (triple )
122+
123+ return triple_schema
124+
125+ def _get_node_properties (client :Client ,
126+ n_labels : List [str ], types : Dict [str , str ]
127+ ) -> List :
128+ node_properties_query = """
129+ MATCH (a:`{n_label}`)
130+ RETURN properties(a) AS props
131+ LIMIT 100
132+ """
133+ nodes = []
134+ for label in n_labels :
135+ logger .debug (f'Running get node properties for { label } ' )
136+ q = node_properties_query .format (n_label = label )
137+ data = {"label" : label , "properties" : client .opencypher_http (q ).json ()['results' ]}
138+ s = set ({})
139+ for p in data ["properties" ]:
140+ props = {}
141+
142+ for k , v in p ['props' ].items ():
143+ prop_type = types [type (v ).__name__ ]
144+ if k not in props :
145+ props [k ] = {prop_type }
146+ else :
147+ props [k ].update ([prop_type ])
148+
149+ properties = []
150+ for k , v in props .items ():
151+ properties .append (Property (name = k , type = list (v )))
152+
153+ np = {
154+ "properties" : [{"property" : k , "type" : v } for k , v in s ],
155+ "labels" : label ,
156+ }
157+ nodes .append (Node (labels = label , properties = properties ))
158+
159+ return nodes
160+
161+ def _get_edge_properties (client :Client ,
162+ e_labels : List [str ], types : Dict [str , str ]
163+ ) -> List :
164+ edge_properties_query = """
165+ MATCH ()-[e:`{e_label}`]->()
166+ RETURN properties(e) AS props
167+ LIMIT 100
168+ """
169+ edges = []
170+ for label in e_labels :
171+ logger .debug (f'Running get edge properties for { label } ' )
172+ q = edge_properties_query .format (e_label = label )
173+ data = {"label" : label , "properties" : client .opencypher_http (q ).json ()['results' ]}
174+ s = set ({})
175+ for p in data ["properties" ]:
176+ from typing import cast
177+
178+ p_dict = cast (Dict [str , Any ], p )
179+ props = cast (Dict [str , Any ], p_dict ["props" ])
180+
181+ props = {}
182+ for k , v in p ['props' ].items ():
183+ prop_type = types [type (v ).__name__ ]
184+ if k not in props :
185+ props [k ] = {prop_type }
186+ else :
187+ props [k ].update ([prop_type ])
188+ properties = []
189+ for k , v in props .items ():
190+ properties .append (Property (name = k , type = list (v )))
191+
192+ edges .append (Relationship (type = label , properties = properties ))
193+
194+ return edges
195+
196+ def get_schema (client :Client , config :Configuration ) -> GraphSchema :
197+ if config .neptune_service == NEPTUNE_DB_SERVICE_NAME :
198+ logger .info ("Finding Schema for Neptune Database" )
199+ summary = client .statistics ('propertygraph' , True , 'basic' , False )
200+ logger .info ("Summary retrieved" )
201+ logger .debug (summary .json ()['payload' ]['graphSummary' ])
202+ summary = summary .json ()['payload' ]['graphSummary' ]
203+ types = {
204+ 'str' : 'STRING' ,
205+ 'float' : 'DOUBLE' ,
206+ 'int' : 'INTEGER' ,
207+ 'list' : 'LIST' ,
208+ 'dict' : 'MAP' ,
209+ 'bool' : 'BOOLEAN' ,
210+ }
211+ n_labels , e_labels = _get_labels (summary )
212+ logger .info ("Getting Triples" )
213+ triple_schema = _get_triples (client , e_labels )
214+ logger .debug (triple_schema )
215+ logger .info ("Node Properties retrieved" )
216+ nodes = _get_node_properties (client , n_labels , types )
217+ logger .debug (nodes )
218+ logger .info ("Edge Properties retrieved" )
219+ rels = _get_edge_properties (client , e_labels , types )
220+ logger .debug (rels )
221+ graph = GraphSchema (nodes = nodes , relationships = rels , relationship_patterns = triple_schema )
222+ return graph
223+ elif config .neptune_service == NEPTUNE_ANALYTICS_SERVICE_NAME :
224+ logger .info ("Finding Schema for Neptune Analytics" )
225+ res = client .opencypher_http ("CALL neptune.graph.pg_schema()" )
226+ raw_schema = res .json ()['results' ][0 ]['schema' ]
227+ graph = GraphSchema (nodes = [], relationships = [], relationship_patterns = [])
228+ for i in raw_schema ['labelTriples' ]:
229+ graph .relationship_patterns .append (
230+ RelationshipPattern (left_node = i ['~from' ], relation = i ['~type' ], right_node = i ['~to' ])
231+ )
232+
233+ # Process node labels and properties
234+ for l in raw_schema ['nodeLabels' ]:
235+ details = raw_schema ['nodeLabelDetails' ][l ]
236+ props = []
237+ for p in details ['properties' ]:
238+ props .append (Property (name = p , type = details ['properties' ][p ]['datatypes' ]))
239+ graph .nodes .append (Node (labels = l , properties = props ))
240+
241+ # Process edge labels and properties
242+ for l in raw_schema ['edgeLabels' ]:
243+ details = raw_schema ['edgeLabelDetails' ][l ]
244+ props = []
245+ for p in details ['properties' ]:
246+ props .append (Property (name = p , type = details ['properties' ][p ]['datatypes' ]))
247+ graph .relationships .append (Relationship (type = l , properties = props ))
248+ return graph
249+ else :
250+ raise NotImplementedError
0 commit comments