Skip to content
Snippets Groups Projects
Commit a83e34b3 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent c10b0129
Branches
No related tags found
No related merge requests found
......@@ -2,6 +2,7 @@ import h5py
import numpy as np
import pandas as pd
import random
import os
def hdf5_to_npz_csv(hdf5_filename, output_file_prefix, chunk_size=10000):
......@@ -14,17 +15,9 @@ def hdf5_to_npz_csv(hdf5_filename, output_file_prefix, chunk_size=10000):
output_file_prefix (str): Prefix for the output NPZ and CSV files.
chunk_size (int): Size of chunks to process at once (default is 1000).
"""
# Get the directory from hdf5_filename
dirpath = os.path.dirname(hdf5_filename)
# # New step: keep only 10 mask values == True
# mask_indices = list(np.nonzero(mask)[0])
# if len(mask_indices) > 10:
# selected_indices = random.sample(mask_indices, 10)
# new_mask = np.zeros(mask.size).astype(np.bool)
# new_mask[selected_indices] = True
# mask = new_mask
# keep_array.append(data[mask])
# Step 1: Open HDF5 file
with h5py.File(hdf5_filename, "r") as file:
fov_mask = np.asarray(file["FOV_mask"])
# these are 1D arrays that we'll broadcast from below
......@@ -85,9 +78,9 @@ def hdf5_to_npz_csv(hdf5_filename, output_file_prefix, chunk_size=10000):
df = pd.DataFrame(combined_dict)
# Write the DataFrame to a file
df.to_csv(f"{output_file_prefix}_1D.csv", index=False)
df.to_csv(os.path.join(dirpath, f"{output_file_prefix}_1D.csv"), index=False)
# Write the combined_dict to a new HDF5 file
with h5py.File(f"{output_file_prefix}_1D.h5", 'w') as output_file:
with h5py.File(os.path.join(dirpath, f"{output_file_prefix}_1D.h5"), 'w') as output_file:
for key, data in combined_dict.items():
output_file.create_dataset(key, data=data)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment