Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
J
jupyter_lab_slurm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
David Hoese
jupyter_lab_slurm
Commits
932df900
Commit
932df900
authored
Feb 3, 2019
by
David Hoese
Browse files
Options
Downloads
Patches
Plain Diff
Add initial notebook examples
parent
381a0bb2
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
g16_abi_20190117.ipynb
+274
-0
274 additions, 0 deletions
g16_abi_20190117.ipynb
pangeo_test.ipynb
+173
-0
173 additions, 0 deletions
pangeo_test.ipynb
with
447 additions
and
0 deletions
g16_abi_20190117.ipynb
0 → 100644
+
274
−
0
View file @
932df900
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.environ['HDF5_USE_FILE_LOCKING'] = \"FALSE\"\n",
"os.environ['PYTROLL_CHUNK_SIZE'] = \"2048\"\n",
"from glob import glob\n",
"from satpy import MultiScene\n",
"import dask\n",
"from multiprocessing.pool import ThreadPool\n",
"from dask.diagnostics import ProgressBar"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"12\n"
]
}
],
"source": [
"input_files = glob('/arcdata/goes/grb/goes16/2019/2019_01_17_017/abi/L1b/RadC/*C01*s201901712*.nc')\n",
"# input_files = glob('/arcdata/goes/grb/goes16/2019/2019_01_17_017/abi/L1b/RadC/*s2019017[12]*.nc')\n",
"#input_files = glob('/arcdata/goes/grb/goes16/2019/2019_01_18_018/abi/L1b/RadC/*s2019018[12]*.nc')\n",
"#input_files += glob('/arcdata/goes/grb/goes16/2019/2019_01_19_019/abi/L1b/RadC/*s20190190*.nc')\n",
"print(len(input_files))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"mscn = MultiScene.from_files(input_files, reader='abi_l1b')\n",
"mscn.load(['C01'])\n",
"# mscn.load(['true_color_night'])\n",
"# res_mscn = mscn.resample(resampler='native')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/davidh/miniconda3/envs/pangeo/lib/python3.6/site-packages/distributed/bokeh/core.py:56: UserWarning: \n",
"Port 33121 is already in use. \n",
"Perhaps you already have a cluster running?\n",
"Hosting the diagnostics dashboard on a random port instead.\n",
" warnings.warn('\\n' + msg)\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "98fe775311a4451091c5efb79475d267",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"VBox(children=(HTML(value='<h2>SLURMCluster</h2>'), HBox(children=(HTML(value='\\n<div>\\n <style scoped>\\n …"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import sys\n",
"from dask_jobqueue import SLURMCluster\n",
"cluster = SLURMCluster(partition='all',\n",
" walltime='02:00:00',\n",
" name='davidh_dask',\n",
" cores=4,\n",
" processes=2,\n",
" memory='20GB',\n",
" python=sys.executable, # '/home/davidh/miniconda3/envs/pangeo/bin/python',\n",
" local_directory='/scratch',\n",
" diagnostics_port=int(os.getenv('DASK_PORT', 8787)),\n",
" )\n",
"cluster.scale(5)\n",
"cluster"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table style=\"border: 2px solid white;\">\n",
"<tr>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Client</h3>\n",
"<ul>\n",
" <li><b>Scheduler: </b>tcp://10.23.255.247:33990\n",
" <li><b>Dashboard: </b><a href='http://10.23.255.247:35285/status' target='_blank'>http://10.23.255.247:35285/status</a>\n",
"</ul>\n",
"</td>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Cluster</h3>\n",
"<ul>\n",
" <li><b>Workers: </b>0</li>\n",
" <li><b>Cores: </b>0</li>\n",
" <li><b>Memory: </b>0 B</li>\n",
"</ul>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<Client: scheduler='tcp://10.23.255.247:33990' processes=0 cores=0>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from dask.distributed import Client\n",
"client = Client(cluster)\n",
"client"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"c01_avg = np.mean([scn['C01'] for scn in mscn], axis=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"c01_avg"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Scene 0\n",
"Scene 1\n",
"Scene 2\n",
"Scene 3\n",
"Scene 4\n",
"Scene 5\n",
"Scene 6\n",
"Scene 7\n",
"Scene 8\n",
"Scene 9\n",
"Scene 10\n",
"CPU times: user 39.1 s, sys: 6.36 s, total: 45.4 s\n",
"Wall time: 1min 7s\n"
]
}
],
"source": [
"%%time\n",
"all_jobs = []\n",
"for idx, scn in enumerate(res_mscn):\n",
" print(\"Scene {:d}\".format(idx))\n",
" res = scn.save_datasets(writer='scmi', base_dir='/odyssey/isis/tmp/davidh', sector_id=\"GOES_EAST\", source_name=\"SSEC\", compute=False)\n",
" all_jobs.append(res)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"all_jobs = [x for y in all_jobs for x in y]\n",
"print(len(all_jobs))\n",
"all_jobs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dask.compute(all_jobs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"mscn.save_animation('/odyssey/isis/tmp/davidh/g16_abi_{name}_{start_time:%Y%m%d_%H%M%S}/g16_abi_{name}_{start_time:%Y%m%d_%H%M%S}.mp4', fps=24, batch_size=16)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!png2mp4.sh /odyssey/isis/tmp/davidh/g16_abi_true_color_night.mp4 /odyssey/isis/tmp/davidh/g16_abi_true_color_night_2*/*.png"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"del client\n",
"del cluster"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
%% Cell type:code id: tags:
```
python
import
os
os
.
environ
[
'
HDF5_USE_FILE_LOCKING
'
]
=
"
FALSE
"
os
.
environ
[
'
PYTROLL_CHUNK_SIZE
'
]
=
"
2048
"
from
glob
import
glob
from
satpy
import
MultiScene
import
dask
from
multiprocessing.pool
import
ThreadPool
from
dask.diagnostics
import
ProgressBar
```
%% Cell type:code id: tags:
```
python
input_files
=
glob
(
'
/arcdata/goes/grb/goes16/2019/2019_01_17_017/abi/L1b/RadC/*C01*s201901712*.nc
'
)
# input_files = glob('/arcdata/goes/grb/goes16/2019/2019_01_17_017/abi/L1b/RadC/*s2019017[12]*.nc')
#input_files = glob('/arcdata/goes/grb/goes16/2019/2019_01_18_018/abi/L1b/RadC/*s2019018[12]*.nc')
#input_files += glob('/arcdata/goes/grb/goes16/2019/2019_01_19_019/abi/L1b/RadC/*s20190190*.nc')
print
(
len
(
input_files
))
```
%% Output
12
%% Cell type:code id: tags:
```
python
mscn
=
MultiScene
.
from_files
(
input_files
,
reader
=
'
abi_l1b
'
)
mscn
.
load
([
'
C01
'
])
# mscn.load(['true_color_night'])
# res_mscn = mscn.resample(resampler='native')
```
%% Cell type:code id: tags:
```
python
import
sys
from
dask_jobqueue
import
SLURMCluster
cluster
=
SLURMCluster
(
partition
=
'
all
'
,
walltime
=
'
02:00:00
'
,
name
=
'
davidh_dask
'
,
cores
=
4
,
processes
=
2
,
memory
=
'
20GB
'
,
python
=
sys
.
executable
,
# '/home/davidh/miniconda3/envs/pangeo/bin/python',
local_directory
=
'
/scratch
'
,
diagnostics_port
=
int
(
os
.
getenv
(
'
DASK_PORT
'
,
8787
)),
)
cluster
.
scale
(
5
)
cluster
```
%% Output
/home/davidh/miniconda3/envs/pangeo/lib/python3.6/site-packages/distributed/bokeh/core.py:56: UserWarning:
Port 33121 is already in use.
Perhaps you already have a cluster running?
Hosting the diagnostics dashboard on a random port instead.
warnings.warn('\n' + msg)
%% Cell type:code id: tags:
```
python
from
dask.distributed
import
Client
client
=
Client
(
cluster
)
client
```
%% Output
<Client: scheduler='tcp://10.23.255.247:33990' processes=0 cores=0>
%% Cell type:code id: tags:
```
python
import
numpy
as
np
c01_avg
=
np
.
mean
([
scn
[
'
C01
'
]
for
scn
in
mscn
],
axis
=
0
)
```
%% Cell type:code id: tags:
```
python
c01_avg
```
%% Cell type:code id: tags:
```
python
%%
time
all_jobs
=
[]
for
idx
,
scn
in
enumerate
(
res_mscn
):
print
(
"
Scene {:d}
"
.
format
(
idx
))
res
=
scn
.
save_datasets
(
writer
=
'
scmi
'
,
base_dir
=
'
/odyssey/isis/tmp/davidh
'
,
sector_id
=
"
GOES_EAST
"
,
source_name
=
"
SSEC
"
,
compute
=
False
)
all_jobs
.
append
(
res
)
```
%% Output
Scene 0
Scene 1
Scene 2
Scene 3
Scene 4
Scene 5
Scene 6
Scene 7
Scene 8
Scene 9
Scene 10
CPU times: user 39.1 s, sys: 6.36 s, total: 45.4 s
Wall time: 1min 7s
%% Cell type:code id: tags:
```
python
all_jobs
=
[
x
for
y
in
all_jobs
for
x
in
y
]
print
(
len
(
all_jobs
))
all_jobs
```
%% Cell type:code id: tags:
```
python
dask
.
compute
(
all_jobs
)
```
%% Cell type:code id: tags:
```
python
%%
time
mscn
.
save_animation
(
'
/odyssey/isis/tmp/davidh/g16_abi_{name}_{start_time:%Y%m%d_%H%M%S}/g16_abi_{name}_{start_time:%Y%m%d_%H%M%S}.mp4
'
,
fps
=
24
,
batch_size
=
16
)
```
%% Cell type:code id: tags:
```
python
!
png2mp4
.
sh
/
odyssey
/
isis
/
tmp
/
davidh
/
g16_abi_true_color_night
.
mp4
/
odyssey
/
isis
/
tmp
/
davidh
/
g16_abi_true_color_night_2
*/*
.
png
```
%% Cell type:code id: tags:
```
python
del
client
del
cluster
```
%% Cell type:code id: tags:
```
python
```
This diff is collapsed.
Click to expand it.
pangeo_test.ipynb
0 → 100644
+
173
−
0
View file @
932df900
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Basic Dask Test on a Slurm Cluster\n",
"\n",
"See additional options for Dask's SLURMCluster class [here](http://jobqueue.dask.org/en/latest/generated/dask_jobqueue.SLURMCluster.html).\n",
"\n",
"Note the \"diagnostics_port\" keyword argument is only available in dask-jobqueue >0.4.1 which as of this writing has not been released."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"from dask_jobqueue import SLURMCluster\n",
"cluster = SLURMCluster(partition='all',\n",
" walltime='02:00:00',\n",
" name='davidh_dask_pangeo',\n",
" cores=4,\n",
" processes=2,\n",
" memory='20GB',\n",
" python=sys.executable, # '/home/davidh/miniconda3/envs/pangeo/bin/python',\n",
" local_directory='/scratch',\n",
" diagnostics_port=int(os.getenv('DASK_PORT', 8787)),\n",
" )\n",
"cluster.scale(5)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c145c20c6e6e4da398f2adcb073b3ac5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"VBox(children=(HTML(value='<h2>SLURMCluster</h2>'), HBox(children=(HTML(value='\\n<div>\\n <style scoped>\\n …"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"cluster"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from dask.distributed import Client\n",
"client = Client(cluster)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table style=\"border: 2px solid white;\">\n",
"<tr>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Client</h3>\n",
"<ul>\n",
" <li><b>Scheduler: </b>tcp://10.23.255.247:47704\n",
" <li><b>Dashboard: </b><a href='http://10.23.255.247:33121/status' target='_blank'>http://10.23.255.247:33121/status</a>\n",
"</ul>\n",
"</td>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Cluster</h3>\n",
"<ul>\n",
" <li><b>Workers: </b>6</li>\n",
" <li><b>Cores: </b>12</li>\n",
" <li><b>Memory: </b>60.00 GB</li>\n",
"</ul>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<Client: scheduler='tcp://10.23.255.247:47704' processes=6 cores=12>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"import dask.array as da\n",
"def dask_test():\n",
" a = da.random.random(size=(50000, 1000), chunks=(1000, 1000))\n",
" q, r = da.linalg.qr(a)\n",
" a2 = q.dot(r)\n",
" out = a2.compute()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 4.31 s, sys: 1.36 s, total: 5.67 s\n",
"Wall time: 15.6 s\n"
]
}
],
"source": [
"%%time\n",
"dask_test()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
%% Cell type:markdown id: tags:
# Basic Dask Test on a Slurm Cluster
See additional options for Dask's SLURMCluster class
[
here
](
http://jobqueue.dask.org/en/latest/generated/dask_jobqueue.SLURMCluster.html
)
.
Note the "diagnostics_port" keyword argument is only available in dask-jobqueue >0.4.1 which as of this writing has not been released.
%% Cell type:code id: tags:
```
python
import
os
import
sys
from
dask_jobqueue
import
SLURMCluster
cluster
=
SLURMCluster
(
partition
=
'
all
'
,
walltime
=
'
02:00:00
'
,
name
=
'
davidh_dask_pangeo
'
,
cores
=
4
,
processes
=
2
,
memory
=
'
20GB
'
,
python
=
sys
.
executable
,
# '/home/davidh/miniconda3/envs/pangeo/bin/python',
local_directory
=
'
/scratch
'
,
diagnostics_port
=
int
(
os
.
getenv
(
'
DASK_PORT
'
,
8787
)),
)
cluster
.
scale
(
5
)
```
%% Cell type:code id: tags:
```
python
cluster
```
%% Output
%% Cell type:code id: tags:
```
python
from
dask.distributed
import
Client
client
=
Client
(
cluster
)
```
%% Cell type:code id: tags:
```
python
client
```
%% Output
<Client: scheduler='tcp://10.23.255.247:47704' processes=6 cores=12>
%% Cell type:code id: tags:
```
python
import
dask.array
as
da
def
dask_test
():
a
=
da
.
random
.
random
(
size
=
(
50000
,
1000
),
chunks
=
(
1000
,
1000
))
q
,
r
=
da
.
linalg
.
qr
(
a
)
a2
=
q
.
dot
(
r
)
out
=
a2
.
compute
()
```
%% Cell type:code id: tags:
```
python
%%
time
dask_test
()
```
%% Output
CPU times: user 4.31 s, sys: 1.36 s, total: 5.67 s
Wall time: 15.6 s
%% Cell type:code id: tags:
```
python
```
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
sign in
to comment