88import sys
99from decimal import Decimal , InvalidOperation
1010import os
11+ import codecs
1112from collections import OrderedDict
13+
1214import openpyxl
1315from six import text_type
1416from warnings import warn
1517import traceback
1618import datetime
19+ import json
1720import pytz
1821from openpyxl .utils import _get_column_letter , column_index_from_string
22+ from flattentool .lib import decimal_default , Cell
23+ import tempfile
1924
2025WITH_CELLS = True
2126
22- class Cell :
23- def __init__ (self , cell_value , cell_location ):
24- self .cell_value = cell_value
25- self .cell_location = cell_location
26- self .sub_cells = []
2727
2828# The "pylint: disable" lines exist to ignore warnings about the imports we expect not to work not working
2929
@@ -231,26 +231,41 @@ def inthere(unflattened, id_name):
231231 else :
232232 main_sheet_by_ocid [root_id_or_none ].append (unflattened )
233233 temporarydicts_to_lists (main_sheet_by_ocid )
234+
234235 return sum (main_sheet_by_ocid .values (), [])
235236
237+
236238 def unflatten (self ):
237- result = self .do_unflatten ()
238239 if WITH_CELLS :
239- result = extract_list_to_value (result )
240- return result
240+ tmp_directory = tempfile .mkdtemp ()
241+ file_name = os .path .join (tmp_directory , 'unflattened.json' )
242+ self .results_from_cell_tree ({}, 'main' , file_name )
243+ with open (file_name ) as unflattened :
244+ return json .load (unflattened , object_pairs_hook = OrderedDict )['main' ]
245+ return self .do_unflatten ()
246+
247+
248+ def extract_error_path (self , cell_tree ):
249+ return sorted (extract_list_to_error_path ([self .root_list_path ], cell_tree ).items ())
250+
241251
242- def fancy_unflatten (self ):
252+ def results_from_cell_tree (self , base , main_sheet_name , output_name ):
253+ cell_tree = self .do_unflatten ()
254+ base [main_sheet_name ] = cell_tree
255+ with codecs .open (output_name , 'w' , encoding = 'utf-8' ) as fp :
256+ json .dump (base , fp , indent = 4 , default = decimal_default , ensure_ascii = False )
257+ return self .extract_error_path (cell_tree )
258+
259+
260+ def fancy_unflatten (self , base , main_sheet_name , output_name , cell_source_map , heading_source_map ):
243261 if not WITH_CELLS :
244262 raise Exception ('Can only do a fancy_unflatten() if WITH_CELLS=True' )
245- cell_tree = self .do_unflatten ()
246- result = extract_list_to_value (cell_tree )
247- cell_source_map = extract_list_to_error_path ([self .root_list_path ], cell_tree )
248- ordered_items = sorted (cell_source_map .items ())
249- ordered_cell_source_map = OrderedDict (( '/' .join (str (x ) for x in path ), location ) for path , location in ordered_items )
263+ ordered_items = self .results_from_cell_tree (base , main_sheet_name , output_name )
264+ if not cell_source_map and not heading_source_map :
265+ return
250266 row_source_map = OrderedDict ()
251- heading_source_map = OrderedDict ()
252- for path , _ in ordered_items :
253- cells = cell_source_map [path ]
267+ heading_source_map_data = OrderedDict ()
268+ for path , cells in ordered_items :
254269 # Prepare row_source_map key
255270 key = '/' .join (str (x ) for x in path [:- 1 ])
256271 if not key in row_source_map :
@@ -263,19 +278,28 @@ def fancy_unflatten(self):
263278 except :
264279 header_path_parts .append (x )
265280 header_path = '/' .join (header_path_parts )
266- if header_path not in heading_source_map :
267- heading_source_map [header_path ] = []
281+ if header_path not in heading_source_map_data :
282+ heading_source_map_data [header_path ] = []
268283 # Populate the row and header source maps
269284 for cell in cells :
270285 sheet , col , row , header = cell
271286 if (sheet , row ) not in row_source_map [key ]:
272287 row_source_map [key ].append ((sheet , row ))
273- if (sheet , header ) not in heading_source_map [header_path ]:
274- heading_source_map [header_path ].append ((sheet , header ))
288+ if (sheet , header ) not in heading_source_map_data [header_path ]:
289+ heading_source_map_data [header_path ].append ((sheet , header ))
275290 for key in row_source_map :
276- assert key not in ordered_cell_source_map , 'Row/cell collision: {}' .format (key )
277- ordered_cell_source_map [key ] = row_source_map [key ]
278- return result , ordered_cell_source_map , heading_source_map
291+ ordered_items .append ((key .split ('/' ), row_source_map [key ]))
292+
293+ if cell_source_map :
294+ with codecs .open (cell_source_map , 'w' , encoding = 'utf-8' ) as fp :
295+ json .dump (
296+ OrderedDict (( '/' .join (str (x ) for x in path ), location ) for path , location in ordered_items ),
297+ fp , default = decimal_default , ensure_ascii = False , indent = 4
298+ )
299+ if heading_source_map :
300+ with codecs .open (heading_source_map , 'w' , encoding = 'utf-8' ) as fp :
301+ json .dump (heading_source_map_data , fp , indent = 4 , default = decimal_default , ensure_ascii = False )
302+
279303
280304def extract_list_to_error_path (path , input ):
281305 output = {}
@@ -310,24 +334,6 @@ def extract_dict_to_error_path(path, input):
310334 raise Exception ('Unexpected result type in the JSON cell tree: {}' .format (input [k ]))
311335 return output
312336
313- def extract_list_to_value (input ):
314- output = []
315- for item in input :
316- output .append (extract_dict_to_value (item ))
317- return output
318-
319- def extract_dict_to_value (input ):
320- output = OrderedDict ()
321- for k in input :
322- if isinstance (input [k ], list ):
323- output [k ] = extract_list_to_value (input [k ])
324- elif isinstance (input [k ], dict ):
325- output [k ] = extract_dict_to_value (input [k ])
326- elif isinstance (input [k ], Cell ):
327- output [k ] = input [k ].cell_value
328- else :
329- raise Exception ('Unexpected result type in the JSON cell tree: {}' .format (input [k ]))
330- return output
331337
332338class CSVInput (SpreadsheetInput ):
333339 encoding = 'utf-8'
@@ -538,6 +544,7 @@ def path_search(nested_dict, path_list, id_fields=None, path=None, top=False, to
538544
539545
540546class TemporaryDict (UserDict ):
547+ __slots__ = ['keyfield' , 'items_no_keyfield' , 'data' , 'top_sheet' ]
541548 def __init__ (self , keyfield , top_sheet = False ):
542549 self .keyfield = keyfield
543550 self .items_no_keyfield = []
0 commit comments