diff --git a/modules/icing/pirep_goes.py b/modules/icing/pirep_goes.py
index 0f8b4506f7f4e7da0c5436a338ad2e5e18e0f610..60e0f96e647e71596449bd7767d1b093cc92e2d6 100644
--- a/modules/icing/pirep_goes.py
+++ b/modules/icing/pirep_goes.py
@@ -49,7 +49,7 @@ no_icing_files = ['no_icing_2018010600_2018033022_DAY.h5', 'no_icing_2018040100_
 no_icing_l1b_files = []
 
 train_params_day = ['cld_height_acha', 'cld_geo_thick', 'supercooled_cloud_fraction', 'cld_temp_acha', 'solar_zenith_angle',
-                    'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp', 'lwc_dcomp', 'cloud_phase']
+                    'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp', 'lwc_dcomp', 'cloud_phase', 'cloud_mask']
 
 
 def setup():
@@ -917,6 +917,103 @@ def fov_extract(outfile='/Users/tomrink/fovs_out.h5'):
     icing_int_ds = h5f_out.create_dataset('icing_intensity', data=icing_intensity, dtype='i4')
     icing_int_ds.attrs.create('long_name', data='From PIREP. -1:No Icing, 1:Trace, 2:Light, 3:Light Moderate, 4:Moderate, 5:Moderate Severe, 6:Severe')
 
+    # copy relevant attributes
+    for ds_name in train_params_day:
+        h5f_ds = h5f_out[ds_name]
+        h5f_ds.attrs.create('standard_name', data=h5f_expl[ds_name].attrs.get('standard_name'))
+        h5f_ds.attrs.create('long_name', data=h5f_expl[ds_name].attrs.get('long_name'))
+        h5f_ds.attrs.create('units', data=h5f_expl[ds_name].attrs.get('units'))
+
+    # --- close files
+    for h5f in h5_s_icing:
+        h5f.close()
+
+    for h5f in h5_s_no_icing:
+        h5f.close()
+
+    h5f_out.close()
+    h5f_expl.close()
+
+
+def box_extract(outfile='/Users/tomrink/box_out.h5'):
+    icing_int_s = []
+
+    h5_s_icing = []
+    h5_s_no_icing = []
+
+    icing_data_dct = {ds: [] for ds in train_params_day}
+    no_icing_data_dct = {ds: [] for ds in train_params_day}
+
+    num_ice = 0
+    for fidx in range(len(icing_files)):
+        fname = icing_files[fidx]
+        f = h5py.File(data_dir+fname, 'r')
+        h5_s_icing.append(f)
+
+        times = f['time'][:]
+        num_obs = len(times)
+
+        icing_int = f['icing_intensity'][:]
+
+        for i in range(num_obs):
+            for ds_name in train_params_day:
+                dat = f[ds_name][i, 12:28, 12:28]
+                icing_data_dct[ds_name].append(dat)
+            icing_int_s.append(icing_int[i])
+
+        print(fname)
+
+    for ds_name in train_params_day:
+        lst = icing_data_dct[ds_name]
+        icing_data_dct[ds_name] = np.stack(lst, axis=0)
+    icing_int_s = np.array(icing_int_s)
+
+    num_no_ice = 0
+    for fidx in range(len(no_icing_files)):
+        fname = no_icing_files[fidx]
+        f = h5py.File(data_dir+fname, 'r')
+        h5_s_no_icing.append(f)
+
+        times = f['time']
+        num_obs = len(times)
+
+        for i in range(num_obs):
+            for ds_name in train_params_day:
+                dat = f[ds_name][i, 12:28, 12:28]
+                no_icing_data_dct[ds_name].append(dat)
+            num_no_ice += 1
+
+        print(fname)
+
+    for ds_name in train_params_day:
+        lst = no_icing_data_dct[ds_name]
+        no_icing_data_dct[ds_name] = np.stack(lst, axis=0)
+    no_icing_int_s = np.full(num_no_ice, -1)
+
+    ds_indexes = np.arange(num_ice + num_no_ice)
+    np.random.shuffle(ds_indexes)
+
+    icing_intensity = np.concatenate([icing_int_s, no_icing_int_s])
+
+    data_dct = {}
+    for ds_name in train_params_day:
+        data_dct[ds_name] = np.concatenate([icing_data_dct[ds_name], no_icing_data_dct[ds_name]])
+
+    for ds_name in train_params_day:
+        data_dct[ds_name] = data_dct[ds_name][ds_indexes]
+    icing_intensity = icing_intensity[ds_indexes]
+
+    h5f_expl = h5py.File(a_clvr_file, 'r')
+    h5f_out = h5py.File(outfile, 'w')
+
+    for idx, ds_name in enumerate(train_params_day):
+        dt = ds_types[ds_list.index(ds_name)]
+        data = data_dct[ds_name]
+        h5f_out.create_dataset(ds_name, data=data, dtype=dt)
+
+    icing_int_ds = h5f_out.create_dataset('icing_intensity', data=icing_intensity, dtype='i4')
+    icing_int_ds.attrs.create('long_name', data='From PIREP. -1:No Icing, 1:Trace, 2:Light, 3:Light Moderate, 4:Moderate, 5:Moderate Severe, 6:Severe')
+
     # copy relevant attributes
     for ds_name in train_params_day:
         h5f_ds = h5f_out[ds_name]