Lecture 15 – NumPy

Data 94, Spring 2021

Review: lists

In [1]:
names = ['bill', 'sarah', 'cal', 'nina', 'joe']
names[1]
Out[1]:
'sarah'
In [2]:
me = ['berkeley', 22, 1998, '🇨🇦']
In [3]:
[1, 2, 3] + [4, 5, 6]
Out[3]:
[1, 2, 3, 4, 5, 6]
In [4]:
numbers = [5, 4, 9, 12, 18]
new_numbers = []
for num in numbers:
    new_numbers.append(num + 5)

new_numbers
Out[4]:
[10, 9, 14, 17, 23]

NumPy

In [5]:
# Necessary to use numpy arrays!
import numpy as np
In [6]:
# Array with 4 numbers
np.array([4, 9, 1, 2])
Out[6]:
array([4, 9, 1, 2])
In [7]:
# Array with 3 strings
np.array(['how', 'are', 'you'])
Out[7]:
array(['how', 'are', 'you'], dtype='<U3')
In [8]:
# Empty array
np.array([])
Out[8]:
array([], dtype=float64)
In [9]:
numbers = [5, 4, 9, 12, 18]
numbers
Out[9]:
[5, 4, 9, 12, 18]
In [10]:
type(numbers)
Out[10]:
list
In [11]:
numbers_arr = np.array(numbers)
numbers_arr
Out[11]:
array([ 5,  4,  9, 12, 18])
In [12]:
type(numbers_arr)
Out[12]:
numpy.ndarray

Benefit 1: operations on every element

Basic operations:

In [13]:
numbers_arr
Out[13]:
array([ 5,  4,  9, 12, 18])
In [14]:
numbers_arr * 2
Out[14]:
array([10,  8, 18, 24, 36])
In [15]:
numbers_arr - 5
Out[15]:
array([ 0, -1,  4,  7, 13])
In [16]:
numbers_arr // 2
Out[16]:
array([2, 2, 4, 6, 9])
In [17]:
numbers_arr ** 2 - 1
Out[17]:
array([ 24,  15,  80, 143, 323])

Example: Celsius to Fahrenheit

In [18]:
c_temps = [17, 18, 22, -4.5, 15, 9, 0, 3, 8]

# Using vanilla lists:
f_temps = []
for c in c_temps:
    f = (9 / 5) * c + 32
    f_temps.append(f)
    
f_temps
Out[18]:
[62.6, 64.4, 71.6, 23.9, 59.0, 48.2, 32.0, 37.4, 46.4]
In [19]:
# Using arrays: no for loop!
(9 / 5) * np.array(c_temps) + 32
Out[19]:
array([62.6, 64.4, 71.6, 23.9, 59. , 48.2, 32. , 37.4, 46.4])

More math

In [20]:
numbers_arr
Out[20]:
array([ 5,  4,  9, 12, 18])
In [21]:
np.sqrt(numbers_arr)
Out[21]:
array([2.23606798, 2.        , 3.        , 3.46410162, 4.24264069])
In [22]:
np.log(numbers_arr)
Out[22]:
array([1.60943791, 1.38629436, 2.19722458, 2.48490665, 2.89037176])
In [23]:
np.sin(numbers_arr)
Out[23]:
array([-0.95892427, -0.7568025 ,  0.41211849, -0.53657292, -0.75098725])
In [24]:
np.sqrt(144)
Out[24]:
12.0

Quick Check 1

In [25]:
my_tips = [0.15, 0.16, 0.22, 0.39]
your_tips = [0.25, 0.19, 0.08]
tips = np.array(my_tips + your_tips)
tips_pct = 100 * tips
In [ ]:
 

Benefit 2: element-wise operations

In [26]:
a = np.array([1, 2, 3])
b = np.array([-4, 5, 9])
In [27]:
a + b
Out[27]:
array([-3,  7, 12])
In [28]:
a - 2 * b
Out[28]:
array([  9,  -8, -15])
In [29]:
a**2 + b**2
Out[29]:
array([17, 29, 90])
In [30]:
# a and c have different lengths, so you can't add them element-wise
c = np.array([9, 0, 2, 4])

a + c
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-30-87488aa0dee2> in <module>
      2 c = np.array([9, 0, 2, 4])
      3 
----> 4 a + c

ValueError: operands could not be broadcast together with shapes (3,) (4,) 

Example: population growth

In [31]:
pop_2019 = np.array([100, 55, 23, 91, 121])
pop_2020 = np.array([101, 45, 23, 93, 118])
In [32]:
# Change from 2020 to 2019
pop_2020 - pop_2019
Out[32]:
array([  1, -10,   0,   2,  -3])
In [33]:
# Percent change
100 * (pop_2020 - pop_2019) / pop_2019
Out[33]:
array([  1.        , -18.18181818,   0.        ,   2.1978022 ,
        -2.47933884])

Quick Check 2

In [34]:
# a = np.array([3, 4, 5])
# b = np.array([7, -4.0, 5])
# c = (a + b) / (a - b)

Other features of arrays

Accessing individual elements

In [35]:
pop_2020 = np.array([101, 45, 23, 93, 118])
In [36]:
pop_2020[0]
Out[36]:
101
In [37]:
pop_2020[-2]
Out[37]:
93
In [38]:
pop_2020.item(0)
Out[38]:
101
In [39]:
pop_2020.item(-2)
Out[39]:
93

Common methods

In [40]:
pop_2020
Out[40]:
array([101,  45,  23,  93, 118])
In [41]:
# Sum of all elements
# Equivalent to np.sum(pop_2020)
pop_2020.sum()
Out[41]:
380
In [42]:
# Average of all elements
# Equivalent to np.mean(pop_2020)
pop_2020.mean()
Out[42]:
76.0
In [43]:
# Product of all elements
# Equivalent to np.prod(pop_2020)
pop_2020.prod()
Out[43]:
1147167090

Ranges

In [44]:
np.arange(10)
Out[44]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [45]:
np.arange(3, 13, 3)
Out[45]:
array([ 3,  6,  9, 12])
In [46]:
# Powers of 2
2**np.arange(10)
Out[46]:
array([  1,   2,   4,   8,  16,  32,  64, 128, 256, 512])
In [47]:
# 1^2 + 2^2 + 3^2 + ... + 99^2 + 100^2
np.sum(np.arange(101)**2)
Out[47]:
338350

Automatic type conversion

In [48]:
# 2. means 2.0
some_values = [2, 3, 3.5, 4, False]
np.array(some_values)
Out[48]:
array([2. , 3. , 3.5, 4. , 0. ])
In [49]:
np.array(some_values).item(0)
Out[49]:
2.0
In [50]:
other_values = [9, 8, 'hello', -14.5]
np.array(other_values)
Out[50]:
array(['9', '8', 'hello', '-14.5'], dtype='<U21')

Even more functions

In [51]:
pop_2020
Out[51]:
array([101,  45,  23,  93, 118])
In [52]:
# Cumulative sum: for each element,
# add all elements so far
np.cumsum(pop_2020)
Out[52]:
array([101, 146, 169, 262, 380])
In [53]:
# Difference: takes the differences
# of consecutive differences
np.diff(pop_2020)
Out[53]:
array([-56, -22,  70,  25])
In [54]:
# count_nonzero: counts the number of elements
# that are not equal to 0
np.count_nonzero(np.array([1, 2, 3, 0, 0, 4, -5]))
Out[54]:
5