Skip to content
Snippets Groups Projects
Commit a83e34b3 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent c10b0129
No related branches found
No related tags found
No related merge requests found
......@@ -2,6 +2,7 @@ import h5py
import numpy as np
import pandas as pd
import random
import os
def hdf5_to_npz_csv(hdf5_filename, output_file_prefix, chunk_size=10000):
......@@ -14,17 +15,9 @@ def hdf5_to_npz_csv(hdf5_filename, output_file_prefix, chunk_size=10000):
output_file_prefix (str): Prefix for the output NPZ and CSV files.
chunk_size (int): Size of chunks to process at once (default is 1000).
"""
# Get the directory from hdf5_filename
dirpath = os.path.dirname(hdf5_filename)
# # New step: keep only 10 mask values == True
# mask_indices = list(np.nonzero(mask)[0])
# if len(mask_indices) > 10:
# selected_indices = random.sample(mask_indices, 10)
# new_mask = np.zeros(mask.size).astype(np.bool)
# new_mask[selected_indices] = True
# mask = new_mask
# keep_array.append(data[mask])
# Step 1: Open HDF5 file
with h5py.File(hdf5_filename, "r") as file:
fov_mask = np.asarray(file["FOV_mask"])
# these are 1D arrays that we'll broadcast from below
......@@ -85,9 +78,9 @@ def hdf5_to_npz_csv(hdf5_filename, output_file_prefix, chunk_size=10000):
df = pd.DataFrame(combined_dict)
# Write the DataFrame to a file
df.to_csv(f"{output_file_prefix}_1D.csv", index=False)
df.to_csv(os.path.join(dirpath, f"{output_file_prefix}_1D.csv"), index=False)
# Write the combined_dict to a new HDF5 file
with h5py.File(f"{output_file_prefix}_1D.h5", 'w') as output_file:
with h5py.File(os.path.join(dirpath, f"{output_file_prefix}_1D.h5"), 'w') as output_file:
for key, data in combined_dict.items():
output_file.create_dataset(key, data=data)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment