Skip to content
Snippets Groups Projects
Commit 0d7af0c9 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent 03a3cc13
No related branches found
No related tags found
No related merge requests found
...@@ -3,38 +3,14 @@ import h5py ...@@ -3,38 +3,14 @@ import h5py
from util.util import get_grid_values, is_day from util.util import get_grid_values, is_day
import glob import glob
keep_out_opd = ['/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/arm/2019/11/02/clavrx_VNP02IMG.A2019306.1912.001.2019307003236.uwssec.nc', target_param = 'cloud_probability'
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/arm/2019/04/13/clavrx_VNP02IMG.A2019103.1918.001.2019104005120.uwssec.nc', # target_param = 'cld_opd_dcomp'
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/05/25/clavrx_VNP02IMG.A2019145.1936.001.2019146005424.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/11/01/clavrx_VNP02IMG.A2019305.1936.001.2019306005913.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/03/01/clavrx_VNP02IMG.A2019060.1930.001.2019061005942.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/table_mountain/2019/12/01/clavrx_VNP02IMG.A2019335.2012.001.2019336013827.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/table_mountain/2019/05/18/clavrx_VNP02IMG.A2019138.2006.001.2019139013059.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/fort_peck/2019/01/28/clavrx_VNP02IMG.A2019028.1930.001.2019029005408.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/fort_peck/2019/08/08/clavrx_VNP02IMG.A2019220.1930.001.2019221010714.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/10/13/clavrx_VNP02IMG.A2019286.1848.001.2019287001722.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/03/20/clavrx_VNP02IMG.A2019079.1830.001.2019079235918.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/12/26/clavrx_VNP02IMG.A2019360.1900.001.2019361001327.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/desert_rock/2019/02/05/clavrx_VNP02IMG.A2019036.2018.001.2019037030301.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/desert_rock/2019/03/30/clavrx_VNP02IMG.A2019089.2024.001.2019090015614.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/bondville_il/2019/11/03/clavrx_VNP02IMG.A2019307.1854.001.2019308001716.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/goodwin_creek/2019/04/15/clavrx_VNP02IMG.A2019105.1842.001.2019106001003.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/penn_state/2019/07/18/clavrx_VNP02IMG.A2019199.1742.001.2019199230925.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/penn_state/2019/02/02/clavrx_VNP02IMG.A2019033.1754.001.2019034011318.uwssec.nc']
keep_out = keep_out_opd
# target_param = 'cloud_probability'
target_param = 'cld_opd_dcomp'
group_name_i = 'super/' group_name_i = 'super/'
group_name_m = 'orig/' group_name_m = 'orig/'
solzen_name = group_name_m + 'solar_zenith' solzen_name = group_name_m + 'solar_zenith'
# params_i = [group_name_i+'temp_11_0um', group_name_i+'refl_0_65um', group_name_i+target_param]
# params_m = [group_name_m+'temp_11_0um', group_name_m+'refl_0_65um', group_name_m+target_param]
params_i = [group_name_i+'temp_ch38', group_name_i+'refl_ch01', group_name_i+target_param] params_i = [group_name_i+'temp_ch38', group_name_i+'refl_ch01', group_name_i+target_param]
params_m = [group_name_m+'temp_ch38', group_name_m+'refl_ch01', group_name_m+target_param] params_m = [group_name_m+'temp_ch38', group_name_m+'refl_ch01', group_name_m+target_param]
...@@ -86,7 +62,6 @@ def process_cld_opd(grd_k): ...@@ -86,7 +62,6 @@ def process_cld_opd(grd_k):
grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaN to 0 grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaN to 0
return grd_k return grd_k
# glob.glob('/ships19/cloud/scratch/cphillips/super_abi_l2/goodwin_creek_v3/2020/*/0[1-4]/*/*.nc', recursive=True)
def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', start=10): def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', start=10):
cnt = start cnt = start
...@@ -97,21 +72,19 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st ...@@ -97,21 +72,19 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
path = directory + '**' + '/' + pattern path = directory + '**' + '/' + pattern
all_files = glob.glob(path, recursive=True) all_files = glob.glob(path, recursive=True)
data_files = [f for f in all_files if f not in keep_out] valid_files = glob.glob(directory + '*/0[1-4]/*/*.nc', recursive=True)
# data_files = glob.glob(path, recursive=True) train_files = [f for f in all_files if f not in valid_files]
valid_tiles_i = [] data_tiles_i = []
train_tiles_i = [] data_tiles_m = []
valid_tiles_m = []
train_tiles_m = []
f_cnt = 0 f_cnt = 0
num_files = len(data_files) num_files = len(all_files)
print('Start, number of files: ', num_files) print('Start, number of files: ', num_files)
total_num_not_missing = 0 total_num_not_missing = 0
for idx, data_f in enumerate(data_files): for idx, data_f in enumerate(valid_files):
# if idx % 4 == 0: # if we want to skip some files # if idx % 4 == 0: # if we want to skip some files
if True: if True:
try: try:
...@@ -121,8 +94,7 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st ...@@ -121,8 +94,7 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
continue continue
try: try:
num_not_missing = run(h5f, params_m, train_tiles_m, valid_tiles_m, num_not_missing = run(h5f, params_m, data_tiles_m, params_i, data_tiles_i,
params_i, train_tiles_i, valid_tiles_i,
num_keep_x_tiles=num_keep_x_tiles, tile_width=16, kernel_size=4, factor=4, day_night=day_night) num_keep_x_tiles=num_keep_x_tiles, tile_width=16, kernel_size=4, factor=4, day_night=day_night)
except Exception as e: except Exception as e:
print(e) print(e)
...@@ -134,59 +106,98 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st ...@@ -134,59 +106,98 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
total_num_not_missing += num_not_missing total_num_not_missing += num_not_missing
if len(train_tiles_m) == 0 and len(valid_tiles_m) == 0: if len(data_tiles_m) == 0:
continue continue
if (f_cnt % 20) == 0: if (f_cnt % 20) == 0:
num_valid_samples = 0 num_valid_samples = 0
if len(valid_tiles_m) > 0: if len(data_tiles_m) > 0:
valid_i = np.stack(valid_tiles_i) valid_i = np.stack(data_tiles_i)
valid_m = np.stack(valid_tiles_m) valid_m = np.stack(data_tiles_m)
np.save(out_directory + 'valid_mres_' + str(cnt), valid_m) np.save(out_directory + 'valid_mres_' + str(cnt), valid_m)
np.save(out_directory + 'valid_ires_' + str(cnt), valid_i) np.save(out_directory + 'valid_ires_' + str(cnt), valid_i)
num_valid_samples = valid_m.shape[0] num_valid_samples = valid_m.shape[0]
num_train_samples = 0 data_tiles_i = []
if len(train_tiles_m) > 0: data_tiles_m = []
train_i = np.stack(train_tiles_i)
train_m = np.stack(train_tiles_m)
np.save(out_directory + 'train_ires_' + str(cnt), train_i)
np.save(out_directory + 'train_mres_' + str(cnt), train_m)
num_train_samples = train_m.shape[0]
valid_tiles_i = []
train_tiles_i = []
valid_tiles_m = []
train_tiles_m = []
print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100)) print(' num_valid_samples, progress % : ', num_valid_samples, int((f_cnt/num_files)*100))
total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples total_num_valid_samples += num_valid_samples
print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples, print('total_num_valid_samples, total_num_not_missing: ', total_num_valid_samples, total_num_not_missing)
total_num_valid_samples, total_num_not_missing)
print('--------------------------------------------------') print('--------------------------------------------------')
cnt += 1 cnt += 1
# Write out leftover, if any. Maybe make this better someday # Write out leftover, if any. Maybe make this better someday
num_valid_samples = 0 num_valid_samples = 0
if len(valid_tiles_m) > 0: if len(data_tiles_m) > 0:
valid_i = np.stack(valid_tiles_i) valid_i = np.stack(data_tiles_i)
valid_m = np.stack(valid_tiles_m) valid_m = np.stack(data_tiles_m)
np.save(out_directory + 'valid_mres_' + str(cnt), valid_m) np.save(out_directory + 'valid_mres_' + str(cnt), valid_m)
np.save(out_directory + 'valid_ires_' + str(cnt), valid_i) np.save(out_directory + 'valid_ires_' + str(cnt), valid_i)
num_valid_samples = valid_m.shape[0] num_valid_samples = valid_m.shape[0]
data_tiles_i = []
data_tiles_m = []
f_cnt = 0
total_num_not_missing = 0
for idx, data_f in enumerate(train_files):
# if idx % 4 == 0: # if we want to skip some files
if True:
try:
h5f = h5py.File(data_f, 'r')
except:
print('cant open file: ', data_f)
continue
try:
num_not_missing = run(h5f, params_m, data_tiles_m, params_i, data_tiles_i,
num_keep_x_tiles=num_keep_x_tiles, tile_width=16, kernel_size=4, factor=4, day_night=day_night)
except Exception as e:
print(e)
h5f.close()
continue
print(data_f)
f_cnt += 1
h5f.close()
total_num_not_missing += num_not_missing
if len(data_tiles_m) == 0:
continue
if (f_cnt % 20) == 0:
num_train_samples = 0
if len(data_tiles_m) > 0:
train_i = np.stack(data_tiles_i)
train_m = np.stack(data_tiles_m)
np.save(out_directory + 'train_ires_' + str(cnt), train_i)
np.save(out_directory + 'train_mres_' + str(cnt), train_m)
num_train_samples = train_m.shape[0]
data_tiles_i = []
data_tiles_m = []
print(' num_train_samples, progress % : ', num_train_samples, int((f_cnt/num_files)*100))
total_num_train_samples += num_train_samples
print('total_num_train_samples, total_num_not_missing: ', total_num_train_samples, total_num_not_missing)
print('--------------------------------------------------')
cnt += 1
# Write out leftover, if any. Maybe make this better someday
num_train_samples = 0 num_train_samples = 0
if len(train_tiles_m) > 0: if len(data_tiles_m) > 0:
train_i = np.stack(train_tiles_i) train_i = np.stack(data_tiles_i)
train_m = np.stack(train_tiles_m) train_m = np.stack(data_tiles_m)
np.save(out_directory + 'train_ires_' + str(cnt), train_i) np.save(out_directory + 'train_ires_' + str(cnt), train_i)
np.save(out_directory + 'train_mres_' + str(cnt), train_m) np.save(out_directory + 'train_mres_' + str(cnt), train_m)
num_train_samples = train_m.shape[0] num_train_samples = train_m.shape[0]
print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples,
int((f_cnt / num_files) * 100)) int((f_cnt / num_files) * 100))
total_num_train_samples += num_train_samples total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples total_num_valid_samples += num_valid_samples
print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples, print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples,
...@@ -198,8 +209,7 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st ...@@ -198,8 +209,7 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
# tile_width: Must be even! # tile_width: Must be even!
# kernel_size: Must be odd! # kernel_size: Must be odd!
def run(h5f, params_m, train_tiles_m, valid_tiles_m, params_i, train_tiles_i, valid_tiles_i, def run(h5f, params_m, data_tiles_m, params_i, data_tiles_i, num_keep_x_tiles=8, tile_width=64, kernel_size=3, factor=2, day_night='ANY'):
num_keep_x_tiles=8, tile_width=64, kernel_size=3, factor=2, day_night='ANY'):
border = int((kernel_size - 1)/2) + 1 # Need to add for interpolation with no edge effects border = int((kernel_size - 1)/2) + 1 # Need to add for interpolation with no edge effects
...@@ -235,15 +245,11 @@ def run(h5f, params_m, train_tiles_m, valid_tiles_m, params_i, train_tiles_i, va ...@@ -235,15 +245,11 @@ def run(h5f, params_m, train_tiles_m, valid_tiles_m, params_i, train_tiles_i, va
i_skip = tile_width i_skip = tile_width
j_skip = tile_width j_skip = tile_width
# i_start = int(num_pixels / 2) - int((num_keep_x_tiles * tile_width) / 2)
# j_start = 0
i_start = border - 1 # zero-based i_start = border - 1 # zero-based
j_start = border - 1 # zero-based j_start = border - 1 # zero-based
num_y_tiles = int(num_lines / tile_width) - 1 num_y_tiles = int(num_lines / tile_width) - 1
data_tiles_m = []
data_tiles_i = []
num_not_missing = 0 num_not_missing = 0
for j in range(num_y_tiles): for j in range(num_y_tiles):
...@@ -270,16 +276,16 @@ def run(h5f, params_m, train_tiles_m, valid_tiles_m, params_i, train_tiles_i, va ...@@ -270,16 +276,16 @@ def run(h5f, params_m, train_tiles_m, valid_tiles_m, params_i, train_tiles_i, va
data_tiles_m.append(nda_m) data_tiles_m.append(nda_m)
data_tiles_i.append(nda_i) data_tiles_i.append(nda_i)
num_tiles = len(data_tiles_i) # num_tiles = len(data_tiles_i)
num_valid = int(num_tiles * 0.10) # num_valid = int(num_tiles * 0.10)
num_train = num_tiles - num_valid # num_train = num_tiles - num_valid
#
for k in range(num_train): # for k in range(num_train):
train_tiles_m.append(data_tiles_m[k]) # train_tiles_m.append(data_tiles_m[k])
train_tiles_i.append(data_tiles_i[k]) # train_tiles_i.append(data_tiles_i[k])
#
for k in range(num_valid): # for k in range(num_valid):
valid_tiles_m.append(data_tiles_m[num_train + k]) # valid_tiles_m.append(data_tiles_m[num_train + k])
valid_tiles_i.append(data_tiles_i[num_train + k]) # valid_tiles_i.append(data_tiles_i[num_train + k])
return num_not_missing return num_not_missing
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment