Skip to content
Snippets Groups Projects
Commit 14f89207 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent 0f1c3c75
No related branches found
No related tags found
No related merge requests found
...@@ -4,51 +4,56 @@ import numpy as np ...@@ -4,51 +4,56 @@ import numpy as np
def split_dataset(input_file, output_pattern, dim_name, chunk_size): def split_dataset(input_file, output_pattern, dim_name, chunk_size):
# Load the input dataset # Load the input dataset
with nc.Dataset(input_file, 'r') as ds: ds = nc.Dataset(input_file, 'r', format='NETCDF4')
dim_size = len(ds.dimensions[dim_name]) dim_size = len(ds.dimensions[dim_name])
# Calculate the number of chunks # Calculate the number of chunks
num_chunks = int(np.ceil(dim_size / chunk_size)) num_chunks = int(np.ceil(dim_size / chunk_size))
# Loop through each chunk # Loop through each chunk
for i in range(num_chunks-1): for i in range(num_chunks-1):
# Determine the start and end indices of this chunk # Determine the start and end indices of this chunk
start = i * chunk_size start = i * chunk_size
end = min((i + 1) * chunk_size, dim_size) end = min((i + 1) * chunk_size, dim_size)
# Slicing along our dimension of interest # Slicing along our dimension of interest
slice_indices = slice(start, end) slice_indices = slice(start, end)
# Create a new output file for this chunk # Create a new output file for this chunk
output_file = output_pattern.format(i) output_file = output_pattern.format(i)
rootgrp = nc.Dataset(output_file, 'w', format='NETCDF4')
with nc.Dataset(output_file, 'w') as ds_out:
# Copy dimensions # Copy dimensions
for name, dim in ds.dimensions.items(): for name, dim in ds.dimensions.items():
# Adjust the dimension size for the split dimension # Adjust the dimension size for the split dimension
if name == dim_name: if name == dim_name:
dim_size = len(range(start, end)) dim_size = len(range(start, end))
else: else:
dim_size = len(dim) if not dim.isunlimited() else None dim_size = len(dim) if not dim.isunlimited() else None
ds_out.createDimension(name, dim_size) rootgrp.createDimension(name, dim_size)
# Copy variables # Copy variables
for name, var in ds.variables.items(): for name, var in ds.variables.items():
outVar = ds_out.createVariable(name, var.datatype, var.dimensions) var.set_auto_maskandscale(False)
outVar = rootgrp.createVariable(name, var.datatype, var.dimensions)
# Copy variable attributes outVar.set_auto_maskandscale(False)
outVar.setncatts({k: var.getncattr(k) for k in var.ncattrs()})
# Copy variable attributes
# Divide variable data for the split dimension, keep others as is if name != 'gs_1c_spect': # The original file has bad metadata for this, and possibly other fields
if dim_name in var.dimensions: outVar.setncatts({k: var.getncattr(k) for k in var.ncattrs()})
print(name, outVar.shape, var.shape)
outVar[:,] = var[slice_indices,] # Divide variable data for the split dimension, keep others as is
else: if dim_name in var.dimensions:
outVar[:,] = var[:,] outVar[:,] = var[slice_indices,]
else:
# Copy global attributes outVar[:,] = var[:,]
ds_out.setncatts({k: ds.getncattr(k) for k in ds.ncattrs()})
# Copy global attributes
rootgrp.setncatts({k: ds.getncattr(k) for k in ds.ncattrs()})
rootgrp.close()
ds.close()
# Call the function # Call the function
# split_dataset('input.nc', 'output_{}.nc', 'time', 10) # split_dataset('input.nc', 'output_{}.nc', 'time', 10)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment