Skip to content
Snippets Groups Projects
Commit 14f89207 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent 0f1c3c75
No related branches found
No related tags found
No related merge requests found
......@@ -4,51 +4,56 @@ import numpy as np
def split_dataset(input_file, output_pattern, dim_name, chunk_size):
# Load the input dataset
with nc.Dataset(input_file, 'r') as ds:
dim_size = len(ds.dimensions[dim_name])
# Calculate the number of chunks
num_chunks = int(np.ceil(dim_size / chunk_size))
# Loop through each chunk
for i in range(num_chunks-1):
# Determine the start and end indices of this chunk
start = i * chunk_size
end = min((i + 1) * chunk_size, dim_size)
# Slicing along our dimension of interest
slice_indices = slice(start, end)
# Create a new output file for this chunk
output_file = output_pattern.format(i)
with nc.Dataset(output_file, 'w') as ds_out:
# Copy dimensions
for name, dim in ds.dimensions.items():
# Adjust the dimension size for the split dimension
if name == dim_name:
dim_size = len(range(start, end))
else:
dim_size = len(dim) if not dim.isunlimited() else None
ds_out.createDimension(name, dim_size)
# Copy variables
for name, var in ds.variables.items():
outVar = ds_out.createVariable(name, var.datatype, var.dimensions)
# Copy variable attributes
outVar.setncatts({k: var.getncattr(k) for k in var.ncattrs()})
# Divide variable data for the split dimension, keep others as is
if dim_name in var.dimensions:
print(name, outVar.shape, var.shape)
outVar[:,] = var[slice_indices,]
else:
outVar[:,] = var[:,]
# Copy global attributes
ds_out.setncatts({k: ds.getncattr(k) for k in ds.ncattrs()})
ds = nc.Dataset(input_file, 'r', format='NETCDF4')
dim_size = len(ds.dimensions[dim_name])
# Calculate the number of chunks
num_chunks = int(np.ceil(dim_size / chunk_size))
# Loop through each chunk
for i in range(num_chunks-1):
# Determine the start and end indices of this chunk
start = i * chunk_size
end = min((i + 1) * chunk_size, dim_size)
# Slicing along our dimension of interest
slice_indices = slice(start, end)
# Create a new output file for this chunk
output_file = output_pattern.format(i)
rootgrp = nc.Dataset(output_file, 'w', format='NETCDF4')
# Copy dimensions
for name, dim in ds.dimensions.items():
# Adjust the dimension size for the split dimension
if name == dim_name:
dim_size = len(range(start, end))
else:
dim_size = len(dim) if not dim.isunlimited() else None
rootgrp.createDimension(name, dim_size)
# Copy variables
for name, var in ds.variables.items():
var.set_auto_maskandscale(False)
outVar = rootgrp.createVariable(name, var.datatype, var.dimensions)
outVar.set_auto_maskandscale(False)
# Copy variable attributes
if name != 'gs_1c_spect': # The original file has bad metadata for this, and possibly other fields
outVar.setncatts({k: var.getncattr(k) for k in var.ncattrs()})
# Divide variable data for the split dimension, keep others as is
if dim_name in var.dimensions:
outVar[:,] = var[slice_indices,]
else:
outVar[:,] = var[:,]
# Copy global attributes
rootgrp.setncatts({k: ds.getncattr(k) for k in ds.ncattrs()})
rootgrp.close()
ds.close()
# Call the function
# split_dataset('input.nc', 'output_{}.nc', 'time', 10)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment