Skip to content
Snippets Groups Projects
Commit 50519005 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent e12a0003
No related merge requests found
......@@ -80,103 +80,101 @@ def run_all(directory, out_directory, day_night='ANY'):
total_num_train_samples = 0
total_num_valid_samples = 0
for p in os.scandir(directory):
if not p.is_dir():
continue
print(p.name)
# data_files = glob.glob(directory + p.name+'/'+'clavrx*highres*.nc')
data_files = glob.glob(directory + p.name+'/'+'clavrx_snpp_viirs*.uwssec*.nc')
# data_files = glob.glob(directory + p.name + '/' + 'VNP02MOD*.uwssec.nc')
label_valid_tiles = []
label_train_tiles = []
data_valid_tiles = []
data_train_tiles = []
f_cnt = 0
for idx, data_f in enumerate(data_files):
# if idx % 4 == 0: # if we want to skip some files
if True:
# w_o_ext, ext = os.path.splitext(data_f)
# pname, fname = os.path.split(data_f)
# toks = fname.split('.')
# label_f = pname + '/' + 'clavrx_VNP02MOD.' + toks[1]+'.'+toks[2]+'.'+toks[3]+'.'+toks[4]+'.'+'uwssec.highres.nc.level2.nc'
# if not os.path.exists(label_f):
# continue
try:
data_h5f = h5py.File(data_f, 'r')
except:
print('cant open file: ', data_f)
continue
# try:
# label_h5f = h5py.File(label_f, 'r')
# except:
# print('cant open file: ', label_f)
# data_h5f.close()
# continue
data_tiles = []
label_tiles = []
try:
run(data_h5f, data_params, data_tiles, tile_width=128, kernel_size=7, day_night=day_night)
# run(data_h5f, label_params, label_tiles, tile_width=128, kernel_size=7, day_night=day_night)
except Exception as e:
print(e)
data_h5f.close()
#label_h5f.close()
continue
# pattern = 'clavrx*highres*.nc'
# pattern = 'VNP02MOD*.uwssec.nc'
pattern = 'clavrx_snpp_viirs*.uwssec*.nc'
path = directory + '**' + '/' + pattern
data_files = glob.glob(path, recursive=True)
label_valid_tiles = []
label_train_tiles = []
data_valid_tiles = []
data_train_tiles = []
f_cnt = 0
for idx, data_f in enumerate(data_files):
# if idx % 4 == 0: # if we want to skip some files
if True:
# w_o_ext, ext = os.path.splitext(data_f)
# pname, fname = os.path.split(data_f)
# toks = fname.split('.')
# label_f = pname + '/' + 'clavrx_VNP02MOD.' + toks[1]+'.'+toks[2]+'.'+toks[3]+'.'+toks[4]+'.'+'uwssec.highres.nc.level2.nc'
# if not os.path.exists(label_f):
# continue
try:
data_h5f = h5py.File(data_f, 'r')
except:
print('cant open file: ', data_f)
continue
# try:
# label_h5f = h5py.File(label_f, 'r')
# except:
# print('cant open file: ', label_f)
# data_h5f.close()
# continue
data_tiles = []
label_tiles = []
try:
run(data_h5f, data_params, data_tiles, tile_width=128, kernel_size=7, day_night=day_night)
# run(data_h5f, label_params, label_tiles, tile_width=128, kernel_size=7, day_night=day_night)
except Exception as e:
print(e)
data_h5f.close()
#label_h5f.close()
continue
data_h5f.close()
#label_h5f.close()
# if len(data_tiles) == 0 or len(label_tiles) == 0:
# continue
# if len(data_tiles) != len(label_tiles):
# print('weirdness: ', data_f)
# continue
if len(data_tiles) == 0:
continue
# if len(data_tiles) == 0 or len(label_tiles) == 0:
# continue
# if len(data_tiles) != len(label_tiles):
# print('weirdness: ', data_f)
# continue
if len(data_tiles) == 0:
continue
num = len(data_tiles)
n_vld = int(num * 0.1)
# [label_valid_tiles.append(label_tiles[k]) for k in range(n_vld)]
# [label_train_tiles.append(label_tiles[k]) for k in range(n_vld, num)]
[data_valid_tiles.append(data_tiles[k]) for k in range(n_vld)]
[data_train_tiles.append(data_tiles[k]) for k in range(n_vld, num)]
f_cnt += 1
if f_cnt == 5:
f_cnt = 0
# label_valid = np.stack(label_valid_tiles)
# label_train = np.stack(label_train_tiles)
data_valid = np.stack(data_valid_tiles)
data_train = np.stack(data_train_tiles)
np.save(out_directory+'data_train_' + str(cnt), data_train)
np.save(out_directory+'data_valid_' + str(cnt), data_valid)
# np.save(out_directory+'label_train_' + str(cnt), label_train)
# np.save(out_directory+'label_valid_' + str(cnt), label_valid)
label_valid_tiles = []
label_train_tiles = []
data_valid_tiles = []
data_train_tiles = []
num_train_samples = data_train.shape[0]
num_valid_samples = data_valid.shape[0]
print(' file # done: ', cnt)
print('num_train_samples, num_valid_samples: ', num_train_samples, num_valid_samples)
total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples
cnt += 1
num = len(data_tiles)
n_vld = int(num * 0.1)
# [label_valid_tiles.append(label_tiles[k]) for k in range(n_vld)]
# [label_train_tiles.append(label_tiles[k]) for k in range(n_vld, num)]
[data_valid_tiles.append(data_tiles[k]) for k in range(n_vld)]
[data_train_tiles.append(data_tiles[k]) for k in range(n_vld, num)]
f_cnt += 1
if f_cnt == 5:
f_cnt = 0
# label_valid = np.stack(label_valid_tiles)
# label_train = np.stack(label_train_tiles)
data_valid = np.stack(data_valid_tiles)
data_train = np.stack(data_train_tiles)
np.save(out_directory+'data_train_' + str(cnt), data_train)
np.save(out_directory+'data_valid_' + str(cnt), data_valid)
# np.save(out_directory+'label_train_' + str(cnt), label_train)
# np.save(out_directory+'label_valid_' + str(cnt), label_valid)
label_valid_tiles = []
label_train_tiles = []
data_valid_tiles = []
data_train_tiles = []
num_train_samples = data_train.shape[0]
num_valid_samples = data_valid.shape[0]
print(' file # done: ', cnt)
print('num_train_samples, num_valid_samples: ', num_train_samples, num_valid_samples)
total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples
cnt += 1
print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment