3434array_signed_int = array .array ('l' , [1 , 2 , 3 , 4 , 5 ])
3535array_signed_int_empty = array .array ('l' )
3636# should show as hello alpha and omega
37- array_unicode = array .array ('u ' , 'hello \u03B1 and \u03C9 ' )
37+ array_unicode = array .array ('w ' , 'hello \u03B1 and \u03C9 ' )
3838
3939# list
4040list_empty = []
@@ -192,7 +192,7 @@ def test_plot_returning_ax_and_using_show():
192192la_long_axes_names = la .zeros ('first_axis=a0,a1; second_axis=b0,b1' )
193193
194194if importlib .util .find_spec ('xlwings' ) is not None :
195- la_wb = la .open_excel ('test.xlsx' )
195+ la_wb = la .open_excel ('data/ test.xlsx' )
196196else :
197197 print ("skipping larray.Workbook test (xlwings not installed)" )
198198 la_wb = None
@@ -231,13 +231,6 @@ def test_plot_returning_ax_and_using_show():
231231# compare(la_int_2d, la_int_2d + 1.0, names=['la_int_2d', 'la_int_2d + 1.0'])
232232# compare(np.random.normal(0, 1, size=(10, 2)), np.random.normal(0, 1, size=(10, 2)))
233233
234- # sess1 = la.Session(arr4=arr4, arr3=la_arr3, data=data3)
235- # sess1.save('sess1.h5')
236- # sess2 = la.Session(arr4=arr4 + 1.0, arr3=la_arr3 * 2.0, data=data3 * 1.05)
237- # compare('sess1.h5', sess2) # sess1.h5/data is nan because np arrays are not saved to H5
238- # compare(Path('sess1.h5'), sess2)
239- # compare(la.Session(arr2=arr2, arr3=la_arr3),
240- # la.Session(arr2=arr2 + 1.0, arr3=la_arr3 * 2.0))
241234arr1 = la .ndtest ((2 , 3 ))
242235arr2 = la .ndtest ((3 , 4 ))
243236arr1bis = arr1 .copy ()
@@ -313,6 +306,24 @@ def test_plot_returning_ax_and_using_show():
313306# compare(arr1, arr6)
314307# compare(arr6, arr1)
315308
309+ def test_compare_with_file_path ():
310+ from larray_editor .api import compare
311+
312+ sess1 = la .Session (arr4 = la_int_2d , arr3 = la_float_round_values ,
313+ data = np_arr2d )
314+ sess1 .save ('sess1.h5' )
315+ sess2 = la .Session (arr4 = la_int_2d + 1.0 , arr3 = la_float_round_values * 2.0 ,
316+ data = np_arr2d * 1.05 )
317+ # sess1.h5/data is nan because np arrays are not saved to H5
318+ # using a string path
319+ compare ('sess1.h5' , sess2 )
320+ # using a Path object
321+ compare (Path ('sess1.h5' ), sess2 )
322+ Path ('sess1.h5' ).unlink ()
323+
324+ # test_compare_with_file_path()
325+
326+
316327# test for arr.plot(show=True) which is the default
317328# =================================================
318329# arr = la.ndtest((20, 5)) + la.random.randint(0, 3, axes="a=a0..a19;b=b0..b4")
@@ -328,9 +339,9 @@ def test_run_editor_on_exception(local_arr):
328339# run_editor_on_exception(usercode_traceback=False, usercode_frame=False)
329340
330341# test_run_editor_on_exception(arr2)
331- def make_test_df (size ):
342+ def make_test_df (size , offset = 0 ):
332343 return pd .DataFrame ({
333- 'name' : la .sequence (size ).apply (lambda i : f'name{ i } ' ).to_series (),
344+ 'name' : la .sequence (size , initial = offset ).apply (lambda i : f'name{ i } ' ).to_series (),
334345 'age' : la .random .randint (0 , 105 , axes = size ).to_series (),
335346 'male' : (la .random .randint (0 , 2 , axes = size ) == 1 ).to_series (),
336347 'height' : la .random .normal (1.75 , 0.07 , axes = size ).to_series ()
@@ -345,9 +356,61 @@ def make_test_df(size):
345356pd_series = pd_df2 .stack ()
346357
347358pd_df_big = la_big3d .df
348- # _big_no_idx = pd_df_big.reset_index()
349- # _big_no_idx.to_parquet('big.parquet')
350- # _big_no_idx.to_feather('big.feather')
359+
360+ if not Path ('data/big.parquet' ).exists ():
361+ print ("Generating big.parquet test files (this may take a while)..." ,
362+ end = ' ' , flush = True )
363+ _big_no_idx = pd_df_big .reset_index ()
364+ _big_no_idx .to_parquet ('data/big.parquet' )
365+ # Polars seems to have issues with Feather files written by Pandas
366+ # _big_no_idx.to_feather('data/big.feather')
367+ del _big_no_idx
368+ print ("done." )
369+
370+ if not Path ('data/big.h5' ).exists ():
371+ print ("Generating big.h5 test file..." , end = ' ' , flush = True )
372+ la_big3d .to_hdf ('data/big.h5' , key = 'data' )
373+ print ("done." )
374+
375+ if not Path ('data/big.csv' ).exists ():
376+ print ("Generating big.csv test file..." , end = ' ' , flush = True )
377+ la_big3d .to_csv ('data/big.csv' )
378+ print ("done." )
379+
380+ try :
381+ import pyarrow as pa
382+ import pyarrow .parquet as pq
383+
384+ pyarrow_int_array = pa .array ([2 , 4 , 5 , 42 ])
385+ pyarrow_str_array = pa .array (["Hello" , "from" , "Arrow" , "!" ])
386+ pyarrow_table = pa .Table .from_arrays ([pyarrow_int_array , pyarrow_str_array ],
387+ names = ["int_col" , "str_col" ])
388+
389+ pyarrow_parquet_file = pq .ParquetFile ('data/big.parquet' )
390+
391+ def gen_feather_file (fpath ):
392+ print ("Generating big.feather test file..." , end = ' ' , flush = True )
393+ BATCH_SIZE = 10_000
394+ NUM_BATCHES = 10_000
395+ schema = pa .schema ([
396+ pa .field ('name' , pa .string ()),
397+ pa .field ('age' , pa .int32 ()),
398+ pa .field ('male' , pa .bool_ ()),
399+ pa .field ('height' , pa .float32 ()),
400+ ])
401+ with pa .OSFile (fpath , 'wb' ) as sink :
402+ with pa .ipc .new_file (sink , schema ) as writer :
403+ for batch_num in range (NUM_BATCHES ):
404+ batch_df = make_test_df (BATCH_SIZE ,
405+ offset = batch_num * BATCH_SIZE )
406+ batch = pa .RecordBatch .from_pandas (batch_df , schema = schema )
407+ writer .write (batch )
408+ print ("done." )
409+
410+ if not Path ('data/big.feather' ).exists ():
411+ gen_feather_file ('data/big.feather' )
412+ except ImportError :
413+ print ("skipping pyarrow tests (not installed)" )
351414
352415try :
353416 import polars as pl
@@ -359,8 +422,8 @@ def make_test_df(size):
359422 pl_df3 = pl_df1 .select (pl .from_epoch (pl .col ('M' )).alias ('datetime_col' ), 'M' ).limit (5 )
360423 pl_df_big = pl .from_pandas (pd_df_big , include_index = True )
361424 pl_df_mixed = pl .from_pandas (pd_df_mixed , include_index = False )
362- pl_lf_parquet = pl .scan_parquet ('big.parquet' )
363- pl_lf_feather = pl .scan_ipc ('big.feather' )
425+ pl_lf_parquet = pl .scan_parquet ('data/ big.parquet' )
426+ pl_lf_feather = pl .scan_ipc ('data/ big.feather' )
364427
365428 try :
366429 import narwhals as nw
@@ -376,33 +439,7 @@ def make_test_df(size):
376439
377440path_dir = Path ('.' )
378441path_py = Path ('test_adapter.py' )
379- path_csv = Path ('be.csv' )
380-
381- try :
382- import pyarrow as pa
383- import pyarrow .parquet as pq
384-
385- pyarrow_int_array = pa .array ([2 , 4 , 5 , 42 ])
386- pyarrow_str_array = pa .array (["Hello" , "from" , "Arrow" , "!" ])
387- pyarrow_table = pa .Table .from_arrays ([pyarrow_int_array , pyarrow_str_array ],
388- names = ["int_col" , "str_col" ])
389-
390- pyarrow_parquet_file = pq .ParquetFile ('c:/tmp/exiobase/full/L.parquet' )
391-
392- # to generate a big feather/arrow test file, use something like (just add more columns):
393- # BATCH_SIZE = 10000
394- # NUM_BATCHES = 1000
395- # schema = pa.schema([pa.field('nums', pa.int32())])
396- # with pa.OSFile('bigfile.arrow', 'wb') as sink:
397- # with pa.ipc.new_file(sink, schema) as writer:
398- # for row in range(NUM_BATCHES):
399- # batch = pa.record_batch([pa.array(range(BATCH_SIZE), type=pa.int32())], schema)
400- # writer.write(batch)
401- # from pyarrow.dataset import dataset
402-
403- # d = dataset('OIN/data.feather', format='ipc')
404- except ImportError :
405- print ("skipping pyarrow tests (not installed)" )
442+ path_csv = Path ('data/big.csv' )
406443
407444# import cProfile as profile
408445# profile.runctx('edit(la.Session(arr2=arr2))', vars(), {},
@@ -419,18 +456,28 @@ def make_test_df(size):
419456try :
420457 import duckdb
421458
459+ # in-memory duckdb database
422460 duckdb_con = duckdb .connect (":memory:" )
423461 duckdb_con .execute ("create table lang (name VARCHAR, first_appeared INTEGER)" )
424462 duckdb_con .executemany ("insert into lang values (?, ?)" , list_mixed_tuples )
425463 duckdb_table = duckdb_con .table ('lang' )
464+
465+ if not Path ('data/test.duckdb' ).exists ():
466+ print ("Generating test.duckdb test file..." , end = ' ' , flush = True )
467+ duckdb_con .execute ("""
468+ ATTACH 'data/test.duckdb' AS file_db;
469+ COPY FROM DATABASE memory TO file_db;
470+ DETACH file_db;""" )
471+ duckdb_file_con = duckdb .connect ('data/test.duckdb' )
472+ duckdb_file_con .execute ("CREATE TABLE big AS SELECT * FROM "
473+ "read_parquet('data/big.parquet')" )
474+ duckdb_file_con .close ()
475+ print ("done." )
476+
426477except ImportError :
427478 print ("skipping duckdb tests (not installed)" )
428479
429- zipf = zipfile .ZipFile ('c:/Users/gdm/Downloads/active_directory-0.6.7.zip' )
430-
431- # from pandasgui.datasets import pokemon, titanic, mi_manufacturing, trump_tweets, all_datasets
432- # from pandasgui import show
433- # gui = show(pokemon, titanic, mi_manufacturing)
480+ zipf = zipfile .ZipFile ('data/test.zip' )
434481
435482edit ()
436483# debug()
0 commit comments