Skip to content

Commit f68f0a8

Browse files
committed
CLN: only use committed files in tests
1 parent ab29402 commit f68f0a8

File tree

3 files changed

+95
-48
lines changed

3 files changed

+95
-48
lines changed

larray_editor/tests/data/test.xlsx

10.2 KB
Binary file not shown.

larray_editor/tests/data/test.zip

632 Bytes
Binary file not shown.

larray_editor/tests/test_api_larray.py

Lines changed: 95 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
array_signed_int = array.array('l', [1, 2, 3, 4, 5])
3535
array_signed_int_empty = array.array('l')
3636
# should show as hello alpha and omega
37-
array_unicode = array.array('u', 'hello \u03B1 and \u03C9')
37+
array_unicode = array.array('w', 'hello \u03B1 and \u03C9')
3838

3939
# list
4040
list_empty = []
@@ -192,7 +192,7 @@ def test_plot_returning_ax_and_using_show():
192192
la_long_axes_names = la.zeros('first_axis=a0,a1; second_axis=b0,b1')
193193

194194
if importlib.util.find_spec('xlwings') is not None:
195-
la_wb = la.open_excel('test.xlsx')
195+
la_wb = la.open_excel('data/test.xlsx')
196196
else:
197197
print("skipping larray.Workbook test (xlwings not installed)")
198198
la_wb = None
@@ -231,13 +231,6 @@ def test_plot_returning_ax_and_using_show():
231231
# compare(la_int_2d, la_int_2d + 1.0, names=['la_int_2d', 'la_int_2d + 1.0'])
232232
# compare(np.random.normal(0, 1, size=(10, 2)), np.random.normal(0, 1, size=(10, 2)))
233233

234-
# sess1 = la.Session(arr4=arr4, arr3=la_arr3, data=data3)
235-
# sess1.save('sess1.h5')
236-
# sess2 = la.Session(arr4=arr4 + 1.0, arr3=la_arr3 * 2.0, data=data3 * 1.05)
237-
# compare('sess1.h5', sess2) # sess1.h5/data is nan because np arrays are not saved to H5
238-
# compare(Path('sess1.h5'), sess2)
239-
# compare(la.Session(arr2=arr2, arr3=la_arr3),
240-
# la.Session(arr2=arr2 + 1.0, arr3=la_arr3 * 2.0))
241234
arr1 = la.ndtest((2, 3))
242235
arr2 = la.ndtest((3, 4))
243236
arr1bis = arr1.copy()
@@ -313,6 +306,24 @@ def test_plot_returning_ax_and_using_show():
313306
# compare(arr1, arr6)
314307
# compare(arr6, arr1)
315308

309+
def test_compare_with_file_path():
310+
from larray_editor.api import compare
311+
312+
sess1 = la.Session(arr4=la_int_2d, arr3=la_float_round_values,
313+
data=np_arr2d)
314+
sess1.save('sess1.h5')
315+
sess2 = la.Session(arr4=la_int_2d + 1.0, arr3=la_float_round_values * 2.0,
316+
data=np_arr2d * 1.05)
317+
# sess1.h5/data is nan because np arrays are not saved to H5
318+
# using a string path
319+
compare('sess1.h5', sess2)
320+
# using a Path object
321+
compare(Path('sess1.h5'), sess2)
322+
Path('sess1.h5').unlink()
323+
324+
# test_compare_with_file_path()
325+
326+
316327
# test for arr.plot(show=True) which is the default
317328
# =================================================
318329
# arr = la.ndtest((20, 5)) + la.random.randint(0, 3, axes="a=a0..a19;b=b0..b4")
@@ -328,9 +339,9 @@ def test_run_editor_on_exception(local_arr):
328339
# run_editor_on_exception(usercode_traceback=False, usercode_frame=False)
329340

330341
# test_run_editor_on_exception(arr2)
331-
def make_test_df(size):
342+
def make_test_df(size, offset=0):
332343
return pd.DataFrame({
333-
'name': la.sequence(size).apply(lambda i: f'name{i}').to_series(),
344+
'name': la.sequence(size, initial=offset).apply(lambda i: f'name{i}').to_series(),
334345
'age': la.random.randint(0, 105, axes=size).to_series(),
335346
'male': (la.random.randint(0, 2, axes=size) == 1).to_series(),
336347
'height': la.random.normal(1.75, 0.07, axes=size).to_series()
@@ -345,9 +356,61 @@ def make_test_df(size):
345356
pd_series = pd_df2.stack()
346357

347358
pd_df_big = la_big3d.df
348-
# _big_no_idx = pd_df_big.reset_index()
349-
# _big_no_idx.to_parquet('big.parquet')
350-
# _big_no_idx.to_feather('big.feather')
359+
360+
if not Path('data/big.parquet').exists():
361+
print("Generating big.parquet test files (this may take a while)...",
362+
end=' ', flush=True)
363+
_big_no_idx = pd_df_big.reset_index()
364+
_big_no_idx.to_parquet('data/big.parquet')
365+
# Polars seems to have issues with Feather files written by Pandas
366+
# _big_no_idx.to_feather('data/big.feather')
367+
del _big_no_idx
368+
print("done.")
369+
370+
if not Path('data/big.h5').exists():
371+
print("Generating big.h5 test file...", end=' ', flush=True)
372+
la_big3d.to_hdf('data/big.h5', key='data')
373+
print("done.")
374+
375+
if not Path('data/big.csv').exists():
376+
print("Generating big.csv test file...", end=' ', flush=True)
377+
la_big3d.to_csv('data/big.csv')
378+
print("done.")
379+
380+
try:
381+
import pyarrow as pa
382+
import pyarrow.parquet as pq
383+
384+
pyarrow_int_array = pa.array([2, 4, 5, 42])
385+
pyarrow_str_array = pa.array(["Hello", "from", "Arrow", "!"])
386+
pyarrow_table = pa.Table.from_arrays([pyarrow_int_array, pyarrow_str_array],
387+
names=["int_col", "str_col"])
388+
389+
pyarrow_parquet_file = pq.ParquetFile('data/big.parquet')
390+
391+
def gen_feather_file(fpath):
392+
print("Generating big.feather test file...", end=' ', flush=True)
393+
BATCH_SIZE = 10_000
394+
NUM_BATCHES = 10_000
395+
schema = pa.schema([
396+
pa.field('name', pa.string()),
397+
pa.field('age', pa.int32()),
398+
pa.field('male', pa.bool_()),
399+
pa.field('height', pa.float32()),
400+
])
401+
with pa.OSFile(fpath, 'wb') as sink:
402+
with pa.ipc.new_file(sink, schema) as writer:
403+
for batch_num in range(NUM_BATCHES):
404+
batch_df = make_test_df(BATCH_SIZE,
405+
offset=batch_num * BATCH_SIZE)
406+
batch = pa.RecordBatch.from_pandas(batch_df, schema=schema)
407+
writer.write(batch)
408+
print("done.")
409+
410+
if not Path('data/big.feather').exists():
411+
gen_feather_file('data/big.feather')
412+
except ImportError:
413+
print("skipping pyarrow tests (not installed)")
351414

352415
try:
353416
import polars as pl
@@ -359,8 +422,8 @@ def make_test_df(size):
359422
pl_df3 = pl_df1.select(pl.from_epoch(pl.col('M')).alias('datetime_col'), 'M').limit(5)
360423
pl_df_big = pl.from_pandas(pd_df_big, include_index=True)
361424
pl_df_mixed = pl.from_pandas(pd_df_mixed, include_index=False)
362-
pl_lf_parquet = pl.scan_parquet('big.parquet')
363-
pl_lf_feather = pl.scan_ipc('big.feather')
425+
pl_lf_parquet = pl.scan_parquet('data/big.parquet')
426+
pl_lf_feather = pl.scan_ipc('data/big.feather')
364427

365428
try:
366429
import narwhals as nw
@@ -376,33 +439,7 @@ def make_test_df(size):
376439

377440
path_dir = Path('.')
378441
path_py = Path('test_adapter.py')
379-
path_csv = Path('be.csv')
380-
381-
try:
382-
import pyarrow as pa
383-
import pyarrow.parquet as pq
384-
385-
pyarrow_int_array = pa.array([2, 4, 5, 42])
386-
pyarrow_str_array = pa.array(["Hello", "from", "Arrow", "!"])
387-
pyarrow_table = pa.Table.from_arrays([pyarrow_int_array, pyarrow_str_array],
388-
names=["int_col", "str_col"])
389-
390-
pyarrow_parquet_file = pq.ParquetFile('c:/tmp/exiobase/full/L.parquet')
391-
392-
# to generate a big feather/arrow test file, use something like (just add more columns):
393-
# BATCH_SIZE = 10000
394-
# NUM_BATCHES = 1000
395-
# schema = pa.schema([pa.field('nums', pa.int32())])
396-
# with pa.OSFile('bigfile.arrow', 'wb') as sink:
397-
# with pa.ipc.new_file(sink, schema) as writer:
398-
# for row in range(NUM_BATCHES):
399-
# batch = pa.record_batch([pa.array(range(BATCH_SIZE), type=pa.int32())], schema)
400-
# writer.write(batch)
401-
# from pyarrow.dataset import dataset
402-
403-
# d = dataset('OIN/data.feather', format='ipc')
404-
except ImportError:
405-
print("skipping pyarrow tests (not installed)")
442+
path_csv = Path('data/big.csv')
406443

407444
# import cProfile as profile
408445
# profile.runctx('edit(la.Session(arr2=arr2))', vars(), {},
@@ -419,18 +456,28 @@ def make_test_df(size):
419456
try:
420457
import duckdb
421458

459+
# in-memory duckdb database
422460
duckdb_con = duckdb.connect(":memory:")
423461
duckdb_con.execute("create table lang (name VARCHAR, first_appeared INTEGER)")
424462
duckdb_con.executemany("insert into lang values (?, ?)", list_mixed_tuples)
425463
duckdb_table = duckdb_con.table('lang')
464+
465+
if not Path('data/test.duckdb').exists():
466+
print("Generating test.duckdb test file...", end=' ', flush=True)
467+
duckdb_con.execute("""
468+
ATTACH 'data/test.duckdb' AS file_db;
469+
COPY FROM DATABASE memory TO file_db;
470+
DETACH file_db;""")
471+
duckdb_file_con = duckdb.connect('data/test.duckdb')
472+
duckdb_file_con.execute("CREATE TABLE big AS SELECT * FROM "
473+
"read_parquet('data/big.parquet')")
474+
duckdb_file_con.close()
475+
print("done.")
476+
426477
except ImportError:
427478
print("skipping duckdb tests (not installed)")
428479

429-
zipf = zipfile.ZipFile('c:/Users/gdm/Downloads/active_directory-0.6.7.zip')
430-
431-
# from pandasgui.datasets import pokemon, titanic, mi_manufacturing, trump_tweets, all_datasets
432-
# from pandasgui import show
433-
# gui = show(pokemon, titanic, mi_manufacturing)
480+
zipf = zipfile.ZipFile('data/test.zip')
434481

435482
edit()
436483
# debug()

0 commit comments

Comments
 (0)