diff --git a/modules/util/hdf5_conversion.py b/modules/util/hdf5_conversion.py index 0aa0eb33bd77c252fd2dd24919462d965d175589..60b5617abdbb668858826a930479f7878071a948 100644 --- a/modules/util/hdf5_conversion.py +++ b/modules/util/hdf5_conversion.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd -def hdf5_to_npz_csv(hdf5_filename, output_file_prefix, chunk_size=1000): +def hdf5_to_npz_csv(hdf5_filename, output_file_prefix, chunk_size=10000): """ Convert HDF5 files to NumPy's NPZ and CSV formats in chunks. Only values where the boolean mask is True are included. @@ -16,10 +16,10 @@ def hdf5_to_npz_csv(hdf5_filename, output_file_prefix, chunk_size=1000): # Step 1: Open HDF5 file with h5py.File(hdf5_filename, "r") as file: - mask = np.asarray(file["mask"]) # If mask needs to be applied, load it into memory + mask = np.asarray(file["FOV_mask"]) # If mask needs to be applied, load it into memory # For each dataset - for dataset_name in file.keys(): + for dataset_name in filter(lambda key: key != "FOV_mask", file.keys()): dataset = file[dataset_name] # Determine how many chunks are needed (rounded up)