From 0f1c3c756f50dd86049e9507e44573caca30113f Mon Sep 17 00:00:00 2001 From: tomrink <rink@ssec.wisc.edu> Date: Fri, 19 Apr 2024 12:58:47 -0500 Subject: [PATCH] snapshot... --- modules/util/split_nc4.py | 54 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 modules/util/split_nc4.py diff --git a/modules/util/split_nc4.py b/modules/util/split_nc4.py new file mode 100644 index 00000000..4fcfaacc --- /dev/null +++ b/modules/util/split_nc4.py @@ -0,0 +1,54 @@ +import netCDF4 as nc +import numpy as np + + +def split_dataset(input_file, output_pattern, dim_name, chunk_size): + # Load the input dataset + with nc.Dataset(input_file, 'r') as ds: + dim_size = len(ds.dimensions[dim_name]) + + # Calculate the number of chunks + num_chunks = int(np.ceil(dim_size / chunk_size)) + + # Loop through each chunk + for i in range(num_chunks-1): + # Determine the start and end indices of this chunk + start = i * chunk_size + end = min((i + 1) * chunk_size, dim_size) + + # Slicing along our dimension of interest + slice_indices = slice(start, end) + + # Create a new output file for this chunk + output_file = output_pattern.format(i) + + with nc.Dataset(output_file, 'w') as ds_out: + # Copy dimensions + for name, dim in ds.dimensions.items(): + # Adjust the dimension size for the split dimension + if name == dim_name: + dim_size = len(range(start, end)) + else: + dim_size = len(dim) if not dim.isunlimited() else None + + ds_out.createDimension(name, dim_size) + + # Copy variables + for name, var in ds.variables.items(): + outVar = ds_out.createVariable(name, var.datatype, var.dimensions) + + # Copy variable attributes + outVar.setncatts({k: var.getncattr(k) for k in var.ncattrs()}) + + # Divide variable data for the split dimension, keep others as is + if dim_name in var.dimensions: + print(name, outVar.shape, var.shape) + outVar[:,] = var[slice_indices,] + else: + outVar[:,] = var[:,] + + # Copy global attributes + ds_out.setncatts({k: ds.getncattr(k) for k in ds.ncattrs()}) + +# Call the function +# split_dataset('input.nc', 'output_{}.nc', 'time', 10) \ No newline at end of file -- GitLab