Skip to content
Snippets Groups Projects
Commit 14f89207 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent 0f1c3c75
No related branches found
No related tags found
No related merge requests found
...@@ -4,7 +4,7 @@ import numpy as np ...@@ -4,7 +4,7 @@ import numpy as np
def split_dataset(input_file, output_pattern, dim_name, chunk_size): def split_dataset(input_file, output_pattern, dim_name, chunk_size):
# Load the input dataset # Load the input dataset
with nc.Dataset(input_file, 'r') as ds: ds = nc.Dataset(input_file, 'r', format='NETCDF4')
dim_size = len(ds.dimensions[dim_name]) dim_size = len(ds.dimensions[dim_name])
# Calculate the number of chunks # Calculate the number of chunks
...@@ -21,8 +21,8 @@ def split_dataset(input_file, output_pattern, dim_name, chunk_size): ...@@ -21,8 +21,8 @@ def split_dataset(input_file, output_pattern, dim_name, chunk_size):
# Create a new output file for this chunk # Create a new output file for this chunk
output_file = output_pattern.format(i) output_file = output_pattern.format(i)
rootgrp = nc.Dataset(output_file, 'w', format='NETCDF4')
with nc.Dataset(output_file, 'w') as ds_out:
# Copy dimensions # Copy dimensions
for name, dim in ds.dimensions.items(): for name, dim in ds.dimensions.items():
# Adjust the dimension size for the split dimension # Adjust the dimension size for the split dimension
...@@ -31,24 +31,29 @@ def split_dataset(input_file, output_pattern, dim_name, chunk_size): ...@@ -31,24 +31,29 @@ def split_dataset(input_file, output_pattern, dim_name, chunk_size):
else: else:
dim_size = len(dim) if not dim.isunlimited() else None dim_size = len(dim) if not dim.isunlimited() else None
ds_out.createDimension(name, dim_size) rootgrp.createDimension(name, dim_size)
# Copy variables # Copy variables
for name, var in ds.variables.items(): for name, var in ds.variables.items():
outVar = ds_out.createVariable(name, var.datatype, var.dimensions) var.set_auto_maskandscale(False)
outVar = rootgrp.createVariable(name, var.datatype, var.dimensions)
outVar.set_auto_maskandscale(False)
# Copy variable attributes # Copy variable attributes
if name != 'gs_1c_spect': # The original file has bad metadata for this, and possibly other fields
outVar.setncatts({k: var.getncattr(k) for k in var.ncattrs()}) outVar.setncatts({k: var.getncattr(k) for k in var.ncattrs()})
# Divide variable data for the split dimension, keep others as is # Divide variable data for the split dimension, keep others as is
if dim_name in var.dimensions: if dim_name in var.dimensions:
print(name, outVar.shape, var.shape)
outVar[:,] = var[slice_indices,] outVar[:,] = var[slice_indices,]
else: else:
outVar[:,] = var[:,] outVar[:,] = var[:,]
# Copy global attributes # Copy global attributes
ds_out.setncatts({k: ds.getncattr(k) for k in ds.ncattrs()}) rootgrp.setncatts({k: ds.getncattr(k) for k in ds.ncattrs()})
rootgrp.close()
ds.close()
# Call the function # Call the function
# split_dataset('input.nc', 'output_{}.nc', 'time', 10) # split_dataset('input.nc', 'output_{}.nc', 'time', 10)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment