From df43a87117d8ee475a61b14ed74b4f5515847546 Mon Sep 17 00:00:00 2001 From: David Hoese <david.hoese@ssec.wisc.edu> Date: Fri, 23 Aug 2019 16:49:15 -0500 Subject: [PATCH] Add slideshow notebook --- .gitignore | 5 + README.md | 20 + slideshow.ipynb | 2930 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 2955 insertions(+) create mode 100644 .gitignore create mode 100644 slideshow.ipynb diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..abb86db --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +test.npy +test_binary.dat +test_multiple.npz +slideshow.slides.html +.ipynb_checkpoints diff --git a/README.md b/README.md index 823a26d..4b0a492 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,11 @@ This repository provides Jupyter Notebooks and other information to provide an overview of the NumPy python library. The information covers concepts for beginners like why you should use NumPy to advanced concepts and usages. +This was first presented at the SSEC Programmer Brown Bag on +September 9th, 2019. + +## Install + To use these lessons interactively: 1. Clone the repository. @@ -13,3 +18,18 @@ To use these lessons interactively: notebooks in this repository one at a time and step through the instructions. +## Create Slideshow + +The notebook `slideshow.ipynb` is formatted specially to be presentable +as a series of slide shows. This use the jupyter `nbconvert` tool to +generate a `reveal.js` presentation. These presentations, once started, +have one or more slides that can be traversed using the right (next) and +left (previous) arrow keys. Slides may also include "subslides" which +can be accessed using the up/down arrow keys. Use `Esc` to see an overview +of the entire presentation. + +To convert the notebook to a slideshow presentation and serve it immediately: + +```bash +jupyter nbconvert slideshow.ipynb --to slides --post serve +``` \ No newline at end of file diff --git a/slideshow.ipynb b/slideshow.ipynb new file mode 100644 index 0000000..961a227 --- /dev/null +++ b/slideshow.ipynb @@ -0,0 +1,2930 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# NumPy - A to Z\n", + "\n", + "By: David Hoese (@djhoese)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Why NumPy?\n", + "\n", + "Lists are cool, but not for arithmetic." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[1, 2, 3]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_arr = [1, 2, 3]\n", + "my_arr" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[6, 7, 8]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result = []\n", + "for val in my_arr:\n", + " result.append(val + 5)\n", + "result" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "# NumPy Arrays\n", + "\n", + "NumPy Arrays are memory efficient, easy to use, and perform calculations fast." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([6, 7, 8])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "my_arr = np.array([1, 2, 3])\n", + "my_arr + 5" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Useful Array Attributes" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(10, 5)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_arr = np.zeros((10, 5))\n", + "my_arr.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_arr.ndim" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "dtype('float64')" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_arr.dtype" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "50" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_arr.size" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "400" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_arr.nbytes" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_arr.T" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5, 10)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_arr.T.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Creating Arrays\n", + "\n", + "All of the functions can be found in NumPy's documentation [here](https://docs.scipy.org/doc/numpy-1.13.0/reference/routines.array-creation.html)." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 2, 3])" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.array([1, 2, 3])" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1.2, 2.2, 3.2])" + ] + }, + "execution_count": 113, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.array([1.2, 2.2, 3.2])" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1., 2., 3.], dtype=float32)" + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.array([1, 2, 3], dtype=np.float32)" + ] + }, + { + "cell_type": "code", + "execution_count": 165, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ True, False, True, True, True, True])" + ] + }, + "execution_count": 165, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.array([-1, 0, 1, 2, 3, 4], dtype=np.bool)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.zeros((2, 10))" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],\n", + " [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.ones((2, 10))" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],\n", + " [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.empty((2, 10)) # \"random\"" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[2.35, 2.35, 2.35, 2.35, 2.35, 2.35, 2.35, 2.35, 2.35, 2.35],\n", + " [2.35, 2.35, 2.35, 2.35, 2.35, 2.35, 2.35, 2.35, 2.35, 2.35]])" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.full((2, 10), 2.35)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],\n", + " [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]])" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.full((2, 10), np.nan)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(10, 5)" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_arr.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1., 1., 1., 1., 1.],\n", + " [1., 1., 1., 1., 1.],\n", + " [1., 1., 1., 1., 1.],\n", + " [1., 1., 1., 1., 1.],\n", + " [1., 1., 1., 1., 1.],\n", + " [1., 1., 1., 1., 1.],\n", + " [1., 1., 1., 1., 1.],\n", + " [1., 1., 1., 1., 1.],\n", + " [1., 1., 1., 1., 1.],\n", + " [1., 1., 1., 1., 1.]])" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.ones_like(my_arr)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1., 0., 0., 0., 0.],\n", + " [0., 1., 0., 0., 0.],\n", + " [0., 0., 1., 0., 0.],\n", + " [0., 0., 0., 1., 0.],\n", + " [0., 0., 0., 0., 1.]])" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.eye(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]])" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.eye(5, 10, k=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 2, 3, 4])" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.arange(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0., 1., 2., 3., 4.])" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.arange(5.0)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([5., 6., 7.])" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.arange(5.0, 8.0)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 5. , 10.2, 15.4, 20.6])" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.arange(5.0, 25.0, 5.2)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([], dtype=float64)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.arange(8.0, 5.0)" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([8. , 6.8, 5.6])" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.arange(8.0, 5.0, -1.2)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 5. , 5.45833333, 5.91666667, 6.375 , 6.83333333,\n", + " 7.29166667, 7.75 , 8.20833333, 8.66666667, 9.125 ,\n", + " 9.58333333, 10.04166667, 10.5 , 10.95833333, 11.41666667,\n", + " 11.875 , 12.33333333, 12.79166667, 13.25 , 13.70833333,\n", + " 14.16666667, 14.625 , 15.08333333, 15.54166667, 16. ])" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.linspace(5, 16, 25)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 5. , 5.44, 5.88, 6.32, 6.76, 7.2 , 7.64, 8.08, 8.52,\n", + " 8.96, 9.4 , 9.84, 10.28, 10.72, 11.16, 11.6 , 12.04, 12.48,\n", + " 12.92, 13.36, 13.8 , 14.24, 14.68, 15.12, 15.56])" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.linspace(5, 16, 25, endpoint=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 5. , 5.45833333, 5.91666667, 6.375 , 6.83333333,\n", + " 7.29166667, 7.75 , 8.20833333, 8.66666667, 9.125 ,\n", + " 9.58333333, 10.04166667, 10.5 , 10.95833333, 11.41666667,\n", + " 11.875 , 12.33333333, 12.79166667, 13.25 , 13.70833333,\n", + " 14.16666667, 14.625 , 15.08333333, 15.54166667, 16. ]), 0.4583333333333333)" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.linspace(5, 16, 25, retstep=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 100. , 187.38174229, 351.11917342, 657.93322466,\n", + " 1232.84673944, 2310.12970008, 4328.76128108, 8111.3083079 ,\n", + " 15199.11082953, 28480.35868436, 53366.99231206, 100000. ])" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.logspace(2, 5, num=12)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 4. , 4.36203093, 4.75682846, 5.18735822, 5.65685425,\n", + " 6.1688433 , 6.72717132, 7.33603235, 8. , 8.72406186,\n", + " 9.51365692, 10.37471644, 11.3137085 , 12.3376866 , 13.45434264,\n", + " 14.67206469, 16. , 17.44812372, 19.02731384, 20.74943287,\n", + " 22.627417 , 24.67537321, 26.90868529, 29.34412938, 32. ])" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.logspace(2, 5, num=25, base=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[5, 6, 7],\n", + " [5, 6, 7],\n", + " [5, 6, 7],\n", + " [5, 6, 7]])" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mg = np.meshgrid(np.arange(5, 8), np.arange(12, 16))\n", + "mg[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[12, 12, 12],\n", + " [13, 13, 13],\n", + " [14, 14, 14],\n", + " [15, 15, 15]])" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mg[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[[12, 12, 12],\n", + " [13, 13, 13],\n", + " [14, 14, 14],\n", + " [15, 15, 15]],\n", + "\n", + " [[ 5, 6, 7],\n", + " [ 5, 6, 7],\n", + " [ 5, 6, 7],\n", + " [ 5, 6, 7]]])" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# without creating temporary arrays\n", + "np.mgrid[12:16, 5:8]" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(10, 10)" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.repeat(my_arr, 2, axis=1).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0, 1, 2, 3, 4],\n", + " [5, 6, 7, 8, 9]])" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.arange(10).reshape((2, 5))" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0, 1, 0, 1, 0, 1],\n", + " [2, 3, 2, 3, 2, 3],\n", + " [0, 1, 0, 1, 0, 1],\n", + " [2, 3, 2, 3, 2, 3]])" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = np.arange(4).reshape((2, 2))\n", + "np.tile(a, (2, 3))" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0.637855 , 0.11200493],\n", + " [0.89870718, 0.27657074]])" + ] + }, + "execution_count": 134, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.random((2, 2)) # alias for np.random.random_sample\n", + "# Similar: np.random.rand(2, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "9" + ] + }, + "execution_count": 139, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.randint(5, 10)" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 9, 8, 6],\n", + " [14, 11, 10]])" + ] + }, + "execution_count": 140, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.randint(5, 15, size=(2, 3))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Flatten a multidimensional array\n", + "\n", + "`.flatten` always returns a copy" + ] + }, + { + "cell_type": "code", + "execution_count": 217, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0., 0., 0., 0.])" + ] + }, + "execution_count": 217, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = np.zeros((2, 2))\n", + "a.flatten()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "source": [ + "Get a flattened version of the array that is contiguous in memory. If the array is contiguous already then this is a \"view\" and data is not copied." + ] + }, + { + "cell_type": "code", + "execution_count": 218, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0., 0., 0., 0.])" + ] + }, + "execution_count": 218, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a.ravel()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "source": [ + "Get flattened view that doesn't have to be contiguous in memory." + ] + }, + { + "cell_type": "code", + "execution_count": 219, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0., 0., 0., 0.])" + ] + }, + "execution_count": 219, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a.reshape((-1,))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Flatten one or more dimensions:" + ] + }, + { + "cell_type": "code", + "execution_count": 233, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(3, 20, 6)" + ] + }, + "execution_count": 233, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = np.zeros((3, 4, 5, 6))\n", + "a.reshape((3, -1, 6)).shape" + ] + }, + { + "cell_type": "code", + "execution_count": 234, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + " C_CONTIGUOUS : True\n", + " F_CONTIGUOUS : False\n", + " OWNDATA : True\n", + " WRITEABLE : True\n", + " ALIGNED : True\n", + " WRITEBACKIFCOPY : False\n", + " UPDATEIFCOPY : False" + ] + }, + "execution_count": 234, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a.flags" + ] + }, + { + "cell_type": "code", + "execution_count": 236, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + " C_CONTIGUOUS : False\n", + " F_CONTIGUOUS : False\n", + " OWNDATA : False\n", + " WRITEABLE : True\n", + " ALIGNED : True\n", + " WRITEBACKIFCOPY : False\n", + " UPDATEIFCOPY : False" + ] + }, + "execution_count": 236, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a[::2].flags" + ] + }, + { + "cell_type": "code", + "execution_count": 240, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + " C_CONTIGUOUS : False\n", + " F_CONTIGUOUS : True\n", + " OWNDATA : True\n", + " WRITEABLE : True\n", + " ALIGNED : True\n", + " WRITEBACKIFCOPY : False\n", + " UPDATEIFCOPY : False" + ] + }, + "execution_count": 240, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.asfortranarray(a).flags" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Universal functions and Broadcasting\n", + "\n", + "Universal functions (ufuncs) perform operations on arrays element-by-element. If arrays are **not** the same shape, we can broadcast the array elements to match shapes and perform the function. NumPy's available universal functions are documented [here](https://docs.scipy.org/doc/numpy/reference/ufuncs.html#available-ufuncs)." + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[2 2 2 2 2 2 2 2 2 2]\n", + " [2 2 2 2 2 2 2 2 2 2]]\n", + "[0 1 2 3 4 5 6 7 8 9]\n" + ] + } + ], + "source": [ + "a = np.full((2, 10), 2)\n", + "b = np.arange(10)\n", + "print(a)\n", + "print(b)" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],\n", + " [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]])" + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a + b" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 1, 2, 4, 8, 16, 32, 64, 128, 256, 512],\n", + " [ 1, 2, 4, 8, 16, 32, 64, 128, 256, 512]])" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.power(a, b)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "NumPy also automatically casts data types when needed." + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([3.2, 5.5, 7.6], dtype=float32)" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = np.array([1, 2, 3], dtype=np.uint8)\n", + "b = np.array([2.2, 3.5, 4.6], dtype=np.float32)\n", + "a + b" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "# Logical and Bitwise Functions" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 2, 3, 4])" + ] + }, + "execution_count": 168, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = np.arange(5)\n", + "a " + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([False, False, False, True, True])" + ] + }, + "execution_count": 171, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a > 2" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([False, True, True, False, False])" + ] + }, + "execution_count": 172, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(a > 0) & (a < 3)" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([False, True, True, False, False])" + ] + }, + "execution_count": 173, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.logical_and(a > 0, a < 3)" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "ename": "ValueError", + "evalue": "The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-175-81e674c5a685>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mValueError\u001b[0m: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()" + ] + } + ], + "source": [ + "(a > 0) and (a < 3)" + ] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 2, 4])" + ] + }, + "execution_count": 174, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b = np.arange(3, 8)\n", + "a & b" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([3, 5, 7, 7, 7])" + ] + }, + "execution_count": 176, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a | b" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([3, 5, 7, 5, 3])" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a ^ b" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([False, False, False, False, False])" + ] + }, + "execution_count": 178, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a == b" + ] + }, + { + "cell_type": "code", + "execution_count": 179, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ True, True, True, True, True])" + ] + }, + "execution_count": 179, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a != b" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([False, False, False, True, True])" + ] + }, + "execution_count": 182, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = a > 2\n", + "c" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ True, True, True, False, False])" + ] + }, + "execution_count": 183, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "~c" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Indexing and Slicing" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "a = np.zeros((5, 10))" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a[1, 2] # index-1 row, index-2 column" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0., 0., 0., 0., 0.])" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a[:, 0]" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a[0:2]" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a[1:5:2] # every other" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a[:, :]" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(5, 1, 10, 1)" + ] + }, + "execution_count": 122, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a[:, None, :, None].shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "Useful if you need to an array to have a certain number of dimensions." + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4,\n", + " 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8,\n", + " 8, 9, 9, 9, 9, 9])" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.repeat(np.arange(10), 5)" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0, 0, 0, 0, 0],\n", + " [1, 1, 1, 1, 1],\n", + " [2, 2, 2, 2, 2],\n", + " [3, 3, 3, 3, 3],\n", + " [4, 4, 4, 4, 4],\n", + " [5, 5, 5, 5, 5],\n", + " [6, 6, 6, 6, 6],\n", + " [7, 7, 7, 7, 7],\n", + " [8, 8, 8, 8, 8],\n", + " [9, 9, 9, 9, 9]])" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.repeat(np.arange(10)[:, None], 5, axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "What if I don't know how many dimensions there are?" + ] + }, + { + "cell_type": "code", + "execution_count": 229, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(3, 4, 5)" + ] + }, + "execution_count": 229, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = np.zeros((3, 4, 5, 6)) # 4 dimensions\n", + "a[:, :, :, 0].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(3, 4, 5)" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a[..., 0].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Ellipsis" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# '...' maps to the Ellipsis object\n", + "Ellipsis # this is a builtin python object" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Boolean Masks and Index Arrays" + ] + }, + { + "cell_type": "code", + "execution_count": 187, + "metadata": {}, + "outputs": [], + "source": [ + "a = np.arange(40).reshape((5, 8))\n", + "b = np.random.randint(5, 17, size=(5, 8))" + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ True, False, True, True, False, False, True, False],\n", + " [False, True, False, True, False, False, False, False],\n", + " [ True, False, True, False, False, True, False, True],\n", + " [ True, True, False, True, False, False, True, False],\n", + " [ True, False, True, False, False, True, False, False]])" + ] + }, + "execution_count": 150, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b > 12" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 2, 3, 6, 9, 11, 16, 18, 21, 23, 24, 25, 27, 30, 32, 34, 37])" + ] + }, + "execution_count": 152, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a[b > 12]" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 2, 4, 20, 24, 28, 32, 34, 36, 40, 46, 52, 64, 66, 72, 76])" + ] + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a[b > 12] * 2" + ] + }, + { + "cell_type": "code", + "execution_count": 162, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 0, 0, 3, 4, 5, 6, 7],\n", + " [ 8, 9, 0, 11, 0, 13, 0, 15],\n", + " [ 0, 0, 0, 19, 0, 21, 22, 0],\n", + " [24, 25, 0, 27, 28, 29, 30, 31],\n", + " [ 0, 0, 34, 35, 0, 37, 0, 39]])" + ] + }, + "execution_count": 162, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = a.copy()\n", + "c[b > 12] = 0\n", + "c" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[100, 1, 2, 103, 104, 105, 106, 107],\n", + " [108, 109, 10, 111, 12, 113, 14, 115],\n", + " [ 16, 17, 18, 119, 20, 121, 122, 23],\n", + " [124, 125, 26, 127, 128, 129, 130, 131],\n", + " [ 32, 33, 134, 135, 36, 137, 38, 139]])" + ] + }, + "execution_count": 163, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# <condition>, <x>, <y>\n", + "# result = <x> if <condition> else <y>\n", + "np.where(b > 12, a, a + 100)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "What if we wanted the locations of where things are True?" + ] + }, + { + "cell_type": "code", + "execution_count": 189, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "idx = np.nonzero(b > 12)" + ] + }, + { + "cell_type": "code", + "execution_count": 190, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 3, 8, 16, 17, 20, 21, 22, 24, 25, 28, 29, 36])" + ] + }, + "execution_count": 190, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a[idx]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Tons of Functions\n", + "\n", + "* np.sum\n", + "* np.mean\n", + "* np.std\n", + "* np.min\n", + "* np.max\n", + "* np.argmin\n", + "* np.argmax" + ] + }, + { + "cell_type": "code", + "execution_count": 191, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "780" + ] + }, + "execution_count": 191, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 192, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "780" + ] + }, + "execution_count": 192, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.sum(a)" + ] + }, + { + "cell_type": "code", + "execution_count": 197, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 197, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a.min()" + ] + }, + { + "cell_type": "code", + "execution_count": 198, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 8, 16, 24, 32])" + ] + }, + "execution_count": 198, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a.min(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 208, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "39" + ] + }, + "execution_count": 208, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.argmax(a) # index in to flattened array" + ] + }, + { + "cell_type": "code", + "execution_count": 209, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(4, 7)" + ] + }, + "execution_count": 209, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.unravel_index(np.argmax(a), a.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Masked Arrays\n", + "\n", + "Use Masked Arrays to \"hide\" bad values in your data.\n", + "\n", + "My Opinion: Don't use masked arrays unless you really have to. Use higher level libraries like pandas and xarray where NaN (`np.nan`) is used as a \"sentinel\" value. Masked arrays use more memory and calculations can be slower when using them." + ] + }, + { + "cell_type": "code", + "execution_count": 254, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "masked_array(data=[1, --, 3],\n", + " mask=[False, True, False],\n", + " fill_value=999999)" + ] + }, + "execution_count": 254, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = np.ma.masked_array([1, 2, 3], mask=[False, True, False])\n", + "a" + ] + }, + { + "cell_type": "code", + "execution_count": 243, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "masked_array(data=[2, --, 4],\n", + " mask=[False, True, False],\n", + " fill_value=999999)" + ] + }, + "execution_count": 243, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a + 1" + ] + }, + { + "cell_type": "code", + "execution_count": 244, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 2, 3])" + ] + }, + "execution_count": 244, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a.data" + ] + }, + { + "cell_type": "code", + "execution_count": 245, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([False, True, False])" + ] + }, + "execution_count": 245, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a.mask" + ] + }, + { + "cell_type": "code", + "execution_count": 256, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 0, 3])" + ] + }, + "execution_count": 256, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a.filled(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 247, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "masked_array(data=[--, --, 2, 3, 4],\n", + " mask=[ True, True, False, False, False],\n", + " fill_value=999999)" + ] + }, + "execution_count": 247, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = np.arange(5)\n", + "np.ma.masked_where(a < 2, a)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Saving flat binary files\n", + "\n", + "If you want to save numpy arrays for later use in the simplest format with no additional dependencies, write them as either a flat binary file or a .npy/.npz file.\n", + "\n", + "To create a flat binary file from an array:" + ] + }, + { + "cell_type": "code", + "execution_count": 270, + "metadata": {}, + "outputs": [], + "source": [ + "a = np.arange(5, dtype=np.uint8)\n", + "a.tofile('test_binary.dat')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "source": [ + "To load data from a flat binary file:" + ] + }, + { + "cell_type": "code", + "execution_count": 271, + "metadata": {}, + "outputs": [], + "source": [ + "a = np.fromfile('test_binary.dat', dtype=np.uint8)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Be careful. No dtype or shape information is recorded in the binary file. `fromfile` will read the file as a flat array.\n", + "\n", + "An even better option than `fromfile` is to use `memmap`. Instead of loading all of the data in to memory, a memory map keeps the data on disk and only reads what it needs (and other fancy OS-level stuff)." + ] + }, + { + "cell_type": "code", + "execution_count": 272, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "a = np.memmap('test_binary.dat', dtype=np.uint8, mode='r')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "A better option is the `.npy` format which records this dtype and shape information." + ] + }, + { + "cell_type": "code", + "execution_count": 273, + "metadata": {}, + "outputs": [], + "source": [ + "np.save('test.npy', a)" + ] + }, + { + "cell_type": "code", + "execution_count": 275, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "memmap([0, 1, 2, 3, 4], dtype=uint8)" + ] + }, + "execution_count": 275, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_a = np.load('test.npy', mmap_mode='r')\n", + "new_a" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note the `mmap_mode` argument is optional. Without it we would be loading the data in to memory instead of a memory map." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Save multiple arrays to disk\n", + "\n", + "Although other file formats provide more features (HDF5, NetCDF4, zarr, etc), numpy also comes with a `.npz` format for storing multiple arrays at once. There is a `savez` function and a compressing version called `savez_compressed`." + ] + }, + { + "cell_type": "code", + "execution_count": 276, + "metadata": {}, + "outputs": [], + "source": [ + "a = np.arange(5, dtype=np.uint8)\n", + "b = np.arange(10, dtype=np.float32)\n", + "np.savez('test_multiple.npz', my_var1=a, my_var2=b)" + ] + }, + { + "cell_type": "code", + "execution_count": 278, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "<numpy.lib.npyio.NpzFile at 0x11f6bbd68>" + ] + }, + "execution_count": 278, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from_disk = np.load('test_multiple.npz') # same load as before\n", + "from_disk" + ] + }, + { + "cell_type": "code", + "execution_count": 280, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['my_var1', 'my_var2']" + ] + }, + "execution_count": 280, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(from_disk.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 281, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 2, 3, 4], dtype=uint8)" + ] + }, + "execution_count": 281, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from_disk['my_var1']" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Not Covered\n", + "\n", + "* Record arrays (use pandas or xarray)\n", + "* Matrix\n", + "* dask\n", + "* pandas\n", + "* xarray\n", + "* zarr" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} -- GitLab