# Download this page as: # # - :download:`a commented Python script ` # - :download:`a minimal Python script ` # # .. _numpy_basics: # # .. _`linspace`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.linspace.html # .. _`logspace`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.logspace.html # .. _`arange`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.arange.html#numpy.arange # .. _`meshgrid`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.meshgrid.html # .. _`ones`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ones.html # .. _`ones_like`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ones_like.html#numpy.ones_like # .. _`zeros`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.zeros.html # .. _`zeros_like`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.zeros_like.html#numpy.zeros_like # # NumPy basics # ============ # # This page introduces the scientific number-crunching package numpy. It covers # only the basics of the basics. The goal is to justify its use and make the distinction # between arrays and lists. After going through this page, the reader should be # able to understand the concept of numpy arrays, create arrays from scratch and # do calculations with them. We stick to 1D arrays here, the extension to multidimensional # arrays is for a later tutorial. # # .. contents:: # # Why Numpy? # ---------- # # Python's ease-of-use often comes at a price: speed. Let's try to compute the # sine of 20 million (uniform) random floats using Python's standard modules, and # time it. # # import time # get access to CPU time import math # standard module implementing mathematical operators import random # generate random numbers import numpy as np t0 = time.time() output = [] for i in range(20000000): output.append(math.sin(random.random())) print(time.time()-t0) # in seconds # # We can gain ~50% with some tricks (list comprehension, generator and local variables): # sin,rand = math.sin,random.random t0 = time.time() output = [sin(rand()) for i in xrange(20000000)] # in Python 3, replace xrange with range print(time.time()-t0) # # With numpy, we can gain another 50%, **and** have a much cleaner implementation: # t0 = time.time() output = np.sin(np.random.uniform(size=20000000)) print(time.time()-t0) # # # A pure FORTRAN program is, however, still almost 50% faster than numpy. # # Basically, numpy provides vectorized functions written in C or FORTRAN that # can act on pure Python objects, with a little bit of function-call overhead. # Most of the looping is done in C or FORTRAN, avoiding the expensive ``for`` # loops in pure Python. # # Sometimes doing things in a vectorized way is not possible or just too # confusing. Vectorization is more an art than a science, so the basic answer is that if it runs # fast enough then you are good to go. Otherwise things need to be vectorized # or maybe coded in C or Fortran (see :ref:`Optimization `). # # Making arrays # ------------- # # Arrays can be created in different ways: # a = np.array([10, 20, 30, 40]) # create an array from a list of values a b = np.arange(4) # create an array of 4 integers, from 0 to 3 b np.linspace(-np.pi, np.pi, 5) # create an array of 5 evenly spaced samples from -pi to pi np.logspace(1,3,9) # create a log-spaced array of 9 floats between (and including) 10 and 1000 # # # There is also a submodule ``np.random`` which allows you to create simple random arrays: # np.random.uniform(size=5,low=-5,high=5) np.random.normal(size=6,loc=1,scale=4) # .. tip:: # # You can set a seed via ``np.random.seed(100)`` which takes an integer # as an argument. Setting the seed guarantees the same set of random variables # when repeating execution. # # # The function ``arange`` is better only used when working with integer arguments. # # New arrays can be obtained by operating with existing arrays: # a + b**2 # elementwise operations # # # There are shortcuts to fill arrays with ones or zeros, or create arrays just # like another one, but filled with ones or zeros: # f = np.ones(3) # float array of ones g = np.zeros(4, dtype=int) # int array of zeros i = np.ones_like(g) # array of zeros with same length/type as f # # # # .. admonition:: Exercise: Create a squared arctan curve # # Create a squared arctan curve between -4*pi and 4*pi sampling 100 points. Note # that the value of `pi` is in the numpy namespace (``np.pi``). # # .. raw:: html # #

Click to Show/Hide Solution

# x = np.linspace(-4*np.pi, 4*np.pi, 100) y = np.arctan(x)**2 # # .. raw:: html # #
# # # # # The difference between lists and arrays: # ---------------------------------------- # # Lists and array behave differently! # # * Arithmics: # mylist = [1,2,3] myarray = np.array([1,2,3]) mylist*2 myarray*2 # * Manipulations: lists are easy to modify. Using ``.append()`` and ``.remove()`` # makes them efficiently longer or shorter. Numpy arrays are not meant to # be modified. There is no real alternative to ``.remove()``, though there is # an alternative ``np.append`` (or ``np.hstack``). These are very costly, however, # and should be avoided in loops. If you don't know the length of your array # in advance, it is often better to first create a list with ``.append()``, # and turn that into an array after you're done. # # # # But they are similar in some ways: # # * Indexing and slicing (though numpy is **much** more powerful - but we'll get to that later): # mylist = [1,2,3,4,5,6,7,8] myarray = np.array([1,2,3,4,5,6,7,8]) mylist[1::2] myarray[1::2] # .. admonition:: Exercise: Sorting and reversing # # Create a random array (following standard Poisson distribution) of size 100, sort and reverse it, # and print the second-to-last element of that array. # # .. raw:: html # #

Click to Show/Hide Solution

# x = np.random.poisson(size=100) x = np.sort(x) x = x[::-1] print(x[-2]) # # .. raw:: html # #
# # # # # # # Go `back to the matplotlib tutorial `_. # # # # Summary # ---------- # # This is a non-exhaustive list of useful commands you might want to consider when creating # arrays: # # =============================== ========================================================================== # `linspace`_ Return evenly spaced numbers over a specified interval. # `arange`_ Return evenly spaced values within a given interval. # `logspace`_ Return numbers spaced evenly on a log scale. # `meshgrid`_ Return coordinate matrices from two or more coordinate vectors. # `zeros`_ Return a new array of given shape and type, filled with zeros. # `zeros_like`_ Return an array of zeros with the same shape and type as a given array. # `ones`_ Return a new array of given shape and type, filled with ones. # `ones_like`_ Return an array of ones with the same shape and type as a given array. # =============================== ==========================================================================