diff --git a/modules/util/hdf5_conversion.py b/modules/util/hdf5_conversion.py index 08dc02e09003dcadfb0aa8c62daa922c483a1351..d92fb4292bea7c272429a2e826d686ccbb4abe85 100644 --- a/modules/util/hdf5_conversion.py +++ b/modules/util/hdf5_conversion.py @@ -2,6 +2,7 @@ import h5py import numpy as np import pandas as pd import random +import os def hdf5_to_npz_csv(hdf5_filename, output_file_prefix, chunk_size=10000): @@ -14,17 +15,9 @@ def hdf5_to_npz_csv(hdf5_filename, output_file_prefix, chunk_size=10000): output_file_prefix (str): Prefix for the output NPZ and CSV files. chunk_size (int): Size of chunks to process at once (default is 1000). """ + # Get the directory from hdf5_filename + dirpath = os.path.dirname(hdf5_filename) - # # New step: keep only 10 mask values == True - # mask_indices = list(np.nonzero(mask)[0]) - # if len(mask_indices) > 10: - # selected_indices = random.sample(mask_indices, 10) - # new_mask = np.zeros(mask.size).astype(np.bool) - # new_mask[selected_indices] = True - # mask = new_mask - # keep_array.append(data[mask]) - - # Step 1: Open HDF5 file with h5py.File(hdf5_filename, "r") as file: fov_mask = np.asarray(file["FOV_mask"]) # these are 1D arrays that we'll broadcast from below @@ -85,9 +78,9 @@ def hdf5_to_npz_csv(hdf5_filename, output_file_prefix, chunk_size=10000): df = pd.DataFrame(combined_dict) # Write the DataFrame to a file - df.to_csv(f"{output_file_prefix}_1D.csv", index=False) + df.to_csv(os.path.join(dirpath, f"{output_file_prefix}_1D.csv"), index=False) # Write the combined_dict to a new HDF5 file - with h5py.File(f"{output_file_prefix}_1D.h5", 'w') as output_file: + with h5py.File(os.path.join(dirpath, f"{output_file_prefix}_1D.h5"), 'w') as output_file: for key, data in combined_dict.items(): output_file.create_dataset(key, data=data) \ No newline at end of file