Lecture 15 – NumPy¶

Data 94, Spring 2021¶

Review: lists¶

names = ['bill', 'sarah', 'cal', 'nina', 'joe']
names[1]

'sarah'

me = ['berkeley', 22, 1998, '🇨🇦']

[1, 2, 3] + [4, 5, 6]

[1, 2, 3, 4, 5, 6]

numbers = [5, 4, 9, 12, 18]
new_numbers = []
for num in numbers:
    new_numbers.append(num + 5)

new_numbers

[10, 9, 14, 17, 23]

NumPy¶

# Necessary to use numpy arrays!
import numpy as np

# Array with 4 numbers
np.array([4, 9, 1, 2])

array([4, 9, 1, 2])

# Array with 3 strings
np.array(['how', 'are', 'you'])

array(['how', 'are', 'you'], dtype='<U3')

# Empty array
np.array([])

array([], dtype=float64)

numbers = [5, 4, 9, 12, 18]
numbers

[5, 4, 9, 12, 18]

type(numbers)

list

numbers_arr = np.array(numbers)
numbers_arr

array([ 5,  4,  9, 12, 18])

type(numbers_arr)

numpy.ndarray

Benefit 1: operations on every element¶

Basic operations:¶

numbers_arr

array([ 5,  4,  9, 12, 18])

numbers_arr * 2

array([10,  8, 18, 24, 36])

numbers_arr - 5

array([ 0, -1,  4,  7, 13])

numbers_arr // 2

array([2, 2, 4, 6, 9])

numbers_arr ** 2 - 1

array([ 24,  15,  80, 143, 323])

Example: Celsius to Fahrenheit¶

c_temps = [17, 18, 22, -4.5, 15, 9, 0, 3, 8]

# Using vanilla lists:
f_temps = []
for c in c_temps:
    f = (9 / 5) * c + 32
    f_temps.append(f)
    
f_temps

[62.6, 64.4, 71.6, 23.9, 59.0, 48.2, 32.0, 37.4, 46.4]

# Using arrays: no for loop!
(9 / 5) * np.array(c_temps) + 32

array([62.6, 64.4, 71.6, 23.9, 59. , 48.2, 32. , 37.4, 46.4])

More math¶

numbers_arr

array([ 5,  4,  9, 12, 18])

np.sqrt(numbers_arr)

array([2.23606798, 2.        , 3.        , 3.46410162, 4.24264069])

np.log(numbers_arr)

array([1.60943791, 1.38629436, 2.19722458, 2.48490665, 2.89037176])

np.sin(numbers_arr)

array([-0.95892427, -0.7568025 ,  0.41211849, -0.53657292, -0.75098725])

np.sqrt(144)

12.0

Quick Check 1¶

my_tips = [0.15, 0.16, 0.22, 0.39]
your_tips = [0.25, 0.19, 0.08]
tips = np.array(my_tips + your_tips)
tips_pct = 100 * tips

Benefit 2: element-wise operations¶

a = np.array([1, 2, 3])
b = np.array([-4, 5, 9])

a + b

array([-3,  7, 12])

a - 2 * b

array([  9,  -8, -15])

a**2 + b**2

array([17, 29, 90])

# a and c have different lengths, so you can't add them element-wise
c = np.array([9, 0, 2, 4])

a + c

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-30-87488aa0dee2> in <module>
      2 c = np.array([9, 0, 2, 4])
      3 
----> 4 a + c

ValueError: operands could not be broadcast together with shapes (3,) (4,)

Example: population growth¶

pop_2019 = np.array([100, 55, 23, 91, 121])
pop_2020 = np.array([101, 45, 23, 93, 118])

# Change from 2020 to 2019
pop_2020 - pop_2019

array([  1, -10,   0,   2,  -3])

# Percent change
100 * (pop_2020 - pop_2019) / pop_2019

array([  1.        , -18.18181818,   0.        ,   2.1978022 ,
        -2.47933884])

Quick Check 2¶

# a = np.array([3, 4, 5])
# b = np.array([7, -4.0, 5])
# c = (a + b) / (a - b)

Other features of arrays¶

Accessing individual elements¶

pop_2020 = np.array([101, 45, 23, 93, 118])

pop_2020[0]

101

pop_2020[-2]

93

pop_2020.item(0)

101

pop_2020.item(-2)

93

Common methods¶

pop_2020

array([101,  45,  23,  93, 118])

# Sum of all elements
# Equivalent to np.sum(pop_2020)
pop_2020.sum()

380

# Average of all elements
# Equivalent to np.mean(pop_2020)
pop_2020.mean()

76.0

# Product of all elements
# Equivalent to np.prod(pop_2020)
pop_2020.prod()

1147167090

Ranges¶

np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

np.arange(3, 13, 3)

array([ 3,  6,  9, 12])

# Powers of 2
2**np.arange(10)

array([  1,   2,   4,   8,  16,  32,  64, 128, 256, 512])

# 1^2 + 2^2 + 3^2 + ... + 99^2 + 100^2
np.sum(np.arange(101)**2)

338350

Automatic type conversion¶

# 2. means 2.0
some_values = [2, 3, 3.5, 4, False]
np.array(some_values)

array([2. , 3. , 3.5, 4. , 0. ])

np.array(some_values).item(0)

2.0

other_values = [9, 8, 'hello', -14.5]
np.array(other_values)

array(['9', '8', 'hello', '-14.5'], dtype='<U21')

Even more functions¶

pop_2020

array([101,  45,  23,  93, 118])

# Cumulative sum: for each element,
# add all elements so far
np.cumsum(pop_2020)

array([101, 146, 169, 262, 380])

# Difference: takes the differences
# of consecutive differences
np.diff(pop_2020)

array([-56, -22,  70,  25])

# count_nonzero: counts the number of elements
# that are not equal to 0
np.count_nonzero(np.array([1, 2, 3, 0, 0, 4, -5]))

5