snapshot...

a83e34b3 · tomrink · c10b0129 · a83e34b3
Commit a83e34b3 authored Apr 24, 2024 by tomrink
--- a/modules/util/hdf5_conversion.py
+++ b/modules/util/hdf5_conversion.py
@@ -2,6 +2,7 @@ import h5py
 import numpy as np
 import pandas as pd
 import random
+import os


 def hdf5_to_npz_csv(hdf5_filename, output_file_prefix, chunk_size=10000):
@@ -14,17 +15,9 @@ def hdf5_to_npz_csv(hdf5_filename, output_file_prefix, chunk_size=10000):
    output_file_prefix (str): Prefix for the output NPZ and CSV files.
    chunk_size (int): Size of chunks to process at once (default is 1000).
    """
+    # Get the directory from hdf5_filename
+    dirpath = os.path.dirname(hdf5_filename)

-    # # New step: keep only 10 mask values == True
-    # mask_indices = list(np.nonzero(mask)[0])
-    # if len(mask_indices) > 10:
-    #     selected_indices = random.sample(mask_indices, 10)
-    #     new_mask = np.zeros(mask.size).astype(np.bool)
-    #     new_mask[selected_indices] = True
-    #     mask = new_mask
-    #     keep_array.append(data[mask])
-
-    # Step 1: Open HDF5 file
    with h5py.File(hdf5_filename, "r") as file:
        fov_mask = np.asarray(file["FOV_mask"])
        # these are 1D arrays that we'll broadcast from below
@@ -85,9 +78,9 @@ def hdf5_to_npz_csv(hdf5_filename, output_file_prefix, chunk_size=10000):
        df = pd.DataFrame(combined_dict)

        # Write the DataFrame to a file
-        df.to_csv(f"{output_file_prefix}_1D.csv", index=False)
+        df.to_csv(os.path.join(dirpath, f"{output_file_prefix}_1D.csv"), index=False)

        # Write the combined_dict to a new HDF5 file
-        with h5py.File(f"{output_file_prefix}_1D.h5", 'w') as output_file:
+        with h5py.File(os.path.join(dirpath, f"{output_file_prefix}_1D.h5"), 'w') as output_file:
            for key, data in combined_dict.items():
                output_file.create_dataset(key, data=data)
\ No newline at end of file