import numpy as np import h5py import xarray as xr from util.util import get_grid_values_all def acspo_validate(oper_file, cspp_file): h5f_oper = h5py.File(oper_file, 'r') h5f_cspp = h5py.File(cspp_file, 'r') lon_cspp = get_grid_values_all(h5f_cspp, 'lon') lat_cspp = get_grid_values_all(h5f_cspp, 'lat') print('cspp shape: ', lat_cspp.shape) c_idx = lat_cspp.shape[1] // 2 cntr_lon_cspp = lon_cspp[:, c_idx] cntr_lat_cspp = lat_cspp[:, c_idx] sst_cspp = get_grid_values_all(h5f_cspp, 'sea_surface_temperature')[0, ] l2p_flags_cspp = get_grid_values_all(h5f_cspp, 'l2p_flags')[0, ] lon_oper = get_grid_values_all(h5f_oper, 'lon') lat_oper = get_grid_values_all(h5f_oper, 'lat') cntr_lon_oper = lon_oper[:, c_idx] cntr_lat_oper = lat_oper[:, c_idx] print('oper shape: ', lat_oper.shape) sst_oper = get_grid_values_all(h5f_oper, 'sea_surface_temperature')[0, ] l2p_flags_oper = get_grid_values_all(h5f_oper, 'l2p_flags')[0, ] # generate a ndarray of boolean cspp_clear = (l2p_flags_cspp & (1 << 15)) != 0 oper_clear = (l2p_flags_oper & (1 << 15)) != 0 start_idx_oper = -1 stop_idx_oper = -1 start_idx_cspp = -1 stop_idx_cspp = -1 for k in range(len(cntr_lat_oper)): c_a = np.isclose(cntr_lat_oper[k], cntr_lat_cspp) if np.size(np.nonzero(c_a)[0]) == 1: if start_idx_oper == -1: start_idx_oper = k start_idx_cspp = np.nonzero(c_a)[0][0] else: stop_idx_oper = k stop_idx_cspp = np.nonzero(c_a)[0][0] print('oper start, stop ', start_idx_oper, stop_idx_oper) print('cspp start, stop ', start_idx_cspp, stop_idx_cspp) lon_cspp = lon_cspp[start_idx_cspp:stop_idx_cspp, :] lat_cspp = lat_cspp[start_idx_cspp:stop_idx_cspp, :] lon_oper = lon_oper[start_idx_oper:stop_idx_oper, :] lat_oper = lat_oper[start_idx_oper:stop_idx_oper, :] cspp_clear = cspp_clear[start_idx_oper:stop_idx_oper, :] oper_clear = oper_clear[start_idx_oper:stop_idx_oper, :] overlap_shape = lon_cspp.shape print('overlap shape, size: ', overlap_shape, np.size(lon_cspp)) print(np.sum(np.isclose(lon_cspp, lon_oper, rtol=0.001))) print(np.sum(np.isclose(lat_cspp, lat_oper, rtol=0.001))) sst_cspp = sst_cspp[start_idx_cspp:stop_idx_cspp, :].flatten() sst_oper = sst_oper[start_idx_oper:stop_idx_oper, :].flatten() oper_clear = oper_clear.flatten() cspp_clear = cspp_clear.flatten() both_clear = oper_clear & cspp_clear sst_cspp = sst_cspp[both_clear] sst_oper = sst_oper[both_clear] lon_cspp = lon_cspp.flatten() lat_cspp = lat_cspp.flatten() lon_oper = lon_oper.flatten() lat_oper = lat_oper.flatten() lon_cspp = lon_cspp[both_clear] lat_cspp = lat_cspp[both_clear] lon_oper = lon_oper[both_clear] lat_oper = lat_oper[both_clear] both_valid = np.invert(np.isnan(sst_cspp)) & np.invert(np.isnan(sst_oper)) print('number of clear and valid SSTs in both: ', np.sum(both_valid)) valid_sst_cspp = sst_cspp[both_valid] valid_sst_oper = sst_oper[both_valid] valid_lon_cspp = lon_cspp[both_valid] valid_lat_cspp = lat_cspp[both_valid] valid_lon_oper = lon_oper[both_valid] valid_lat_oper = lat_oper[both_valid] xarray_data = xr.Dataset({ 'sst_cspp': xr.DataArray([valid_sst_cspp], coords=None, dims=None, name='sst_cspp'), 'sst_oper': xr.DataArray([valid_sst_oper], coords=None, dims=None, name='sst_oper'), 'cspp_lat': xr.DataArray([valid_lat_cspp], coords=None, dims=None, name='cspp_lat'), 'cspp_lon': xr.DataArray([valid_lon_cspp], coords=None, dims=None, name='cspp_lon'), 'oper_lat': xr.DataArray([valid_lat_oper], coords=None, dims=None, name='oper_lat'), 'oper_lon': xr.DataArray([valid_lon_oper], coords=None, dims=None, name='oper_lon'), }) xarray_data.to_netcdf('/Users/tomrink/sst_values.nc') print('fraction approx equal: ', np.sum(np.isclose(valid_sst_cspp, valid_sst_oper, rtol=0.001))/np.sum(both_valid)) # print(np.histogram((sst_cspp[both_valid] - sst_oper[both_valid]), bins=10)) h5f_oper.close() h5f_cspp.close()