From 505190054ed5dcc0bcb17461933adf8d24144655 Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Thu, 6 Oct 2022 12:13:06 -0500
Subject: [PATCH] snapshot...

---
 modules/util/viirs_l1b_l2.py | 184 +++++++++++++++++------------------
 1 file changed, 91 insertions(+), 93 deletions(-)

diff --git a/modules/util/viirs_l1b_l2.py b/modules/util/viirs_l1b_l2.py
index 8804f92e..a9b08dd3 100644
--- a/modules/util/viirs_l1b_l2.py
+++ b/modules/util/viirs_l1b_l2.py
@@ -80,103 +80,101 @@ def run_all(directory, out_directory, day_night='ANY'):
     total_num_train_samples = 0
     total_num_valid_samples = 0
 
-    for p in os.scandir(directory):
-        if not p.is_dir():
-            continue
-        print(p.name)
-
-        # data_files = glob.glob(directory + p.name+'/'+'clavrx*highres*.nc')
-        data_files = glob.glob(directory + p.name+'/'+'clavrx_snpp_viirs*.uwssec*.nc')
-        # data_files = glob.glob(directory + p.name + '/' + 'VNP02MOD*.uwssec.nc')
-
-        label_valid_tiles = []
-        label_train_tiles = []
-        data_valid_tiles = []
-        data_train_tiles = []
-        f_cnt = 0
-
-        for idx, data_f in enumerate(data_files):
-            # if idx % 4 == 0:  # if we want to skip some files
-            if True:
-                # w_o_ext, ext = os.path.splitext(data_f)
-                # pname, fname = os.path.split(data_f)
-                # toks = fname.split('.')
-                # label_f = pname + '/' + 'clavrx_VNP02MOD.' + toks[1]+'.'+toks[2]+'.'+toks[3]+'.'+toks[4]+'.'+'uwssec.highres.nc.level2.nc'
-                # if not os.path.exists(label_f):
-                #     continue
-
-                try:
-                    data_h5f = h5py.File(data_f, 'r')
-                except:
-                    print('cant open file: ', data_f)
-                    continue
-
-                # try:
-                #     label_h5f = h5py.File(label_f, 'r')
-                # except:
-                #     print('cant open file: ', label_f)
-                #     data_h5f.close()
-                #     continue
-
-                data_tiles = []
-                label_tiles = []
-
-                try:
-                    run(data_h5f, data_params, data_tiles, tile_width=128, kernel_size=7, day_night=day_night)
-                    # run(data_h5f, label_params, label_tiles, tile_width=128, kernel_size=7, day_night=day_night)
-                except Exception as e:
-                    print(e)
-                    data_h5f.close()
-                    #label_h5f.close()
-                    continue
+    # pattern = 'clavrx*highres*.nc'
+    # pattern = 'VNP02MOD*.uwssec.nc'
+    pattern = 'clavrx_snpp_viirs*.uwssec*.nc'
+    path = directory + '**' + '/' + pattern
+
+    data_files = glob.glob(path, recursive=True)
+
+    label_valid_tiles = []
+    label_train_tiles = []
+    data_valid_tiles = []
+    data_train_tiles = []
+    f_cnt = 0
+
+    for idx, data_f in enumerate(data_files):
+        # if idx % 4 == 0:  # if we want to skip some files
+        if True:
+            # w_o_ext, ext = os.path.splitext(data_f)
+            # pname, fname = os.path.split(data_f)
+            # toks = fname.split('.')
+            # label_f = pname + '/' + 'clavrx_VNP02MOD.' + toks[1]+'.'+toks[2]+'.'+toks[3]+'.'+toks[4]+'.'+'uwssec.highres.nc.level2.nc'
+            # if not os.path.exists(label_f):
+            #     continue
+
+            try:
+                data_h5f = h5py.File(data_f, 'r')
+            except:
+                print('cant open file: ', data_f)
+                continue
 
+            # try:
+            #     label_h5f = h5py.File(label_f, 'r')
+            # except:
+            #     print('cant open file: ', label_f)
+            #     data_h5f.close()
+            #     continue
+
+            data_tiles = []
+            label_tiles = []
+
+            try:
+                run(data_h5f, data_params, data_tiles, tile_width=128, kernel_size=7, day_night=day_night)
+                # run(data_h5f, label_params, label_tiles, tile_width=128, kernel_size=7, day_night=day_night)
+            except Exception as e:
+                print(e)
                 data_h5f.close()
                 #label_h5f.close()
+                continue
+
+            data_h5f.close()
+            #label_h5f.close()
+
+            # if len(data_tiles) == 0 or len(label_tiles) == 0:
+            #     continue
+            # if len(data_tiles) != len(label_tiles):
+            #     print('weirdness: ', data_f)
+            #     continue
+
+            if len(data_tiles) == 0:
+                continue
 
-                # if len(data_tiles) == 0 or len(label_tiles) == 0:
-                #     continue
-                # if len(data_tiles) != len(label_tiles):
-                #     print('weirdness: ', data_f)
-                #     continue
-
-                if len(data_tiles) == 0:
-                    continue
-
-                num = len(data_tiles)
-                n_vld = int(num * 0.1)
-
-                # [label_valid_tiles.append(label_tiles[k]) for k in range(n_vld)]
-                # [label_train_tiles.append(label_tiles[k]) for k in range(n_vld, num)]
-                [data_valid_tiles.append(data_tiles[k]) for k in range(n_vld)]
-                [data_train_tiles.append(data_tiles[k]) for k in range(n_vld, num)]
-
-                f_cnt += 1
-                if f_cnt == 5:
-                    f_cnt = 0
-
-                    # label_valid = np.stack(label_valid_tiles)
-                    # label_train = np.stack(label_train_tiles)
-                    data_valid = np.stack(data_valid_tiles)
-                    data_train = np.stack(data_train_tiles)
-
-                    np.save(out_directory+'data_train_' + str(cnt), data_train)
-                    np.save(out_directory+'data_valid_' + str(cnt), data_valid)
-                    # np.save(out_directory+'label_train_' + str(cnt), label_train)
-                    # np.save(out_directory+'label_valid_' + str(cnt), label_valid)
-
-                    label_valid_tiles = []
-                    label_train_tiles = []
-                    data_valid_tiles = []
-                    data_train_tiles = []
-
-                    num_train_samples = data_train.shape[0]
-                    num_valid_samples = data_valid.shape[0]
-                    print('   file # done: ', cnt)
-                    print('num_train_samples, num_valid_samples: ', num_train_samples, num_valid_samples)
-                    total_num_train_samples += num_train_samples
-                    total_num_valid_samples += num_valid_samples
-
-                    cnt += 1
+            num = len(data_tiles)
+            n_vld = int(num * 0.1)
+
+            # [label_valid_tiles.append(label_tiles[k]) for k in range(n_vld)]
+            # [label_train_tiles.append(label_tiles[k]) for k in range(n_vld, num)]
+            [data_valid_tiles.append(data_tiles[k]) for k in range(n_vld)]
+            [data_train_tiles.append(data_tiles[k]) for k in range(n_vld, num)]
+
+            f_cnt += 1
+            if f_cnt == 5:
+                f_cnt = 0
+
+                # label_valid = np.stack(label_valid_tiles)
+                # label_train = np.stack(label_train_tiles)
+                data_valid = np.stack(data_valid_tiles)
+                data_train = np.stack(data_train_tiles)
+
+                np.save(out_directory+'data_train_' + str(cnt), data_train)
+                np.save(out_directory+'data_valid_' + str(cnt), data_valid)
+                # np.save(out_directory+'label_train_' + str(cnt), label_train)
+                # np.save(out_directory+'label_valid_' + str(cnt), label_valid)
+
+                label_valid_tiles = []
+                label_train_tiles = []
+                data_valid_tiles = []
+                data_train_tiles = []
+
+                num_train_samples = data_train.shape[0]
+                num_valid_samples = data_valid.shape[0]
+                print('   file # done: ', cnt)
+                print('num_train_samples, num_valid_samples: ', num_train_samples, num_valid_samples)
+                total_num_train_samples += num_train_samples
+                total_num_valid_samples += num_valid_samples
+
+                cnt += 1
 
     print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
 
-- 
GitLab