Skip to content
Snippets Groups Projects
Commit 50519005 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent e12a0003
No related branches found
No related tags found
No related merge requests found
......@@ -80,103 +80,101 @@ def run_all(directory, out_directory, day_night='ANY'):
total_num_train_samples = 0
total_num_valid_samples = 0
for p in os.scandir(directory):
if not p.is_dir():
continue
print(p.name)
# data_files = glob.glob(directory + p.name+'/'+'clavrx*highres*.nc')
data_files = glob.glob(directory + p.name+'/'+'clavrx_snpp_viirs*.uwssec*.nc')
# data_files = glob.glob(directory + p.name + '/' + 'VNP02MOD*.uwssec.nc')
label_valid_tiles = []
label_train_tiles = []
data_valid_tiles = []
data_train_tiles = []
f_cnt = 0
for idx, data_f in enumerate(data_files):
# if idx % 4 == 0: # if we want to skip some files
if True:
# w_o_ext, ext = os.path.splitext(data_f)
# pname, fname = os.path.split(data_f)
# toks = fname.split('.')
# label_f = pname + '/' + 'clavrx_VNP02MOD.' + toks[1]+'.'+toks[2]+'.'+toks[3]+'.'+toks[4]+'.'+'uwssec.highres.nc.level2.nc'
# if not os.path.exists(label_f):
# continue
try:
data_h5f = h5py.File(data_f, 'r')
except:
print('cant open file: ', data_f)
continue
# try:
# label_h5f = h5py.File(label_f, 'r')
# except:
# print('cant open file: ', label_f)
# data_h5f.close()
# continue
data_tiles = []
label_tiles = []
try:
run(data_h5f, data_params, data_tiles, tile_width=128, kernel_size=7, day_night=day_night)
# run(data_h5f, label_params, label_tiles, tile_width=128, kernel_size=7, day_night=day_night)
except Exception as e:
print(e)
data_h5f.close()
#label_h5f.close()
continue
# pattern = 'clavrx*highres*.nc'
# pattern = 'VNP02MOD*.uwssec.nc'
pattern = 'clavrx_snpp_viirs*.uwssec*.nc'
path = directory + '**' + '/' + pattern
data_files = glob.glob(path, recursive=True)
label_valid_tiles = []
label_train_tiles = []
data_valid_tiles = []
data_train_tiles = []
f_cnt = 0
for idx, data_f in enumerate(data_files):
# if idx % 4 == 0: # if we want to skip some files
if True:
# w_o_ext, ext = os.path.splitext(data_f)
# pname, fname = os.path.split(data_f)
# toks = fname.split('.')
# label_f = pname + '/' + 'clavrx_VNP02MOD.' + toks[1]+'.'+toks[2]+'.'+toks[3]+'.'+toks[4]+'.'+'uwssec.highres.nc.level2.nc'
# if not os.path.exists(label_f):
# continue
try:
data_h5f = h5py.File(data_f, 'r')
except:
print('cant open file: ', data_f)
continue
# try:
# label_h5f = h5py.File(label_f, 'r')
# except:
# print('cant open file: ', label_f)
# data_h5f.close()
# continue
data_tiles = []
label_tiles = []
try:
run(data_h5f, data_params, data_tiles, tile_width=128, kernel_size=7, day_night=day_night)
# run(data_h5f, label_params, label_tiles, tile_width=128, kernel_size=7, day_night=day_night)
except Exception as e:
print(e)
data_h5f.close()
#label_h5f.close()
continue
data_h5f.close()
#label_h5f.close()
# if len(data_tiles) == 0 or len(label_tiles) == 0:
# continue
# if len(data_tiles) != len(label_tiles):
# print('weirdness: ', data_f)
# continue
if len(data_tiles) == 0:
continue
# if len(data_tiles) == 0 or len(label_tiles) == 0:
# continue
# if len(data_tiles) != len(label_tiles):
# print('weirdness: ', data_f)
# continue
if len(data_tiles) == 0:
continue
num = len(data_tiles)
n_vld = int(num * 0.1)
# [label_valid_tiles.append(label_tiles[k]) for k in range(n_vld)]
# [label_train_tiles.append(label_tiles[k]) for k in range(n_vld, num)]
[data_valid_tiles.append(data_tiles[k]) for k in range(n_vld)]
[data_train_tiles.append(data_tiles[k]) for k in range(n_vld, num)]
f_cnt += 1
if f_cnt == 5:
f_cnt = 0
# label_valid = np.stack(label_valid_tiles)
# label_train = np.stack(label_train_tiles)
data_valid = np.stack(data_valid_tiles)
data_train = np.stack(data_train_tiles)
np.save(out_directory+'data_train_' + str(cnt), data_train)
np.save(out_directory+'data_valid_' + str(cnt), data_valid)
# np.save(out_directory+'label_train_' + str(cnt), label_train)
# np.save(out_directory+'label_valid_' + str(cnt), label_valid)
label_valid_tiles = []
label_train_tiles = []
data_valid_tiles = []
data_train_tiles = []
num_train_samples = data_train.shape[0]
num_valid_samples = data_valid.shape[0]
print(' file # done: ', cnt)
print('num_train_samples, num_valid_samples: ', num_train_samples, num_valid_samples)
total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples
cnt += 1
num = len(data_tiles)
n_vld = int(num * 0.1)
# [label_valid_tiles.append(label_tiles[k]) for k in range(n_vld)]
# [label_train_tiles.append(label_tiles[k]) for k in range(n_vld, num)]
[data_valid_tiles.append(data_tiles[k]) for k in range(n_vld)]
[data_train_tiles.append(data_tiles[k]) for k in range(n_vld, num)]
f_cnt += 1
if f_cnt == 5:
f_cnt = 0
# label_valid = np.stack(label_valid_tiles)
# label_train = np.stack(label_train_tiles)
data_valid = np.stack(data_valid_tiles)
data_train = np.stack(data_train_tiles)
np.save(out_directory+'data_train_' + str(cnt), data_train)
np.save(out_directory+'data_valid_' + str(cnt), data_valid)
# np.save(out_directory+'label_train_' + str(cnt), label_train)
# np.save(out_directory+'label_valid_' + str(cnt), label_valid)
label_valid_tiles = []
label_train_tiles = []
data_valid_tiles = []
data_train_tiles = []
num_train_samples = data_train.shape[0]
num_valid_samples = data_valid.shape[0]
print(' file # done: ', cnt)
print('num_train_samples, num_valid_samples: ', num_train_samples, num_valid_samples)
total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples
cnt += 1
print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment