Lecture 4 – Comparisons and Boolean Operators

Data 94, Spring 2021

Review exercise – growth rates

$$\text{FV} = \text{PV} (1 + i)^t$$
In [1]:
PV = 1e4            # Scientific notation for 10000
FV = 47123
t = 10
In [ ]:
 

NoneType and bool

NoneType

In [2]:
my_var = None
In [3]:
type(my_var)
Out[3]:
NoneType
In [4]:
# Nothing is displayed!
my_var
In [5]:
# But it can be printed.
print(my_var)
None

Pay attention to what happens here.

In [6]:
strange = print(15)
15
In [7]:
strange
In [8]:
print(strange)
None

bool

In [9]:
# Can display multiple values in one line
True, False
Out[9]:
(True, False)
In [10]:
type(True), type(False)
Out[10]:
(bool, bool)
In [11]:
int(True), int(False)
Out[11]:
(1, 0)
In [12]:
# Equivalent to 3 + 1 - 0
3 + True - False
Out[12]:
4

Keywords

In [13]:
# Doesn't work
3 = 4
  File "<ipython-input-13-27aff25d12d3>", line 2
    3 = 4
    ^
SyntaxError: cannot assign to literal
In [14]:
# Similarly, doesn't work
True = 14
  File "<ipython-input-14-aa701fa4af03>", line 2
    True = 14
    ^
SyntaxError: cannot assign to True
In [15]:
# Works, but don't do it!
# Don't uncomment this, because it will ruin the demos later on.
# bool = "breaking the rules"
# bool

Quick Check 1

In [16]:
# A = str('None') + str('00')
In [17]:
# B = True * 8 - float(False) * 15
In [18]:
# C = int(None) - 1

Comparisons

In [19]:
# is age at least age_limit?
age_limit = 21
age = 17

age >= age_limit
Out[19]:
False
In [20]:
# is password_guess equal to true_password?
true_password = 'qwerty1093x!'
password_guess = 'QWERTY1093x!'

password_guess == true_password
Out[20]:
False
In [21]:
3 == 3
Out[21]:
True
In [22]:
'hello' != 'howdy'
Out[22]:
True
In [23]:
-3 > -2
Out[23]:
False
In [24]:
-3 < -2
Out[24]:
True
In [25]:
'alpha' >= 'beta'
Out[25]:
False
In [26]:
x = 5         # set x equal to 5
In [27]:
x == 5        # is x equal to 5?
Out[27]:
True
In [28]:
y = x == 5
y
Out[28]:
True

Comparing different types

In [29]:
17 == '17'
Out[29]:
False
In [30]:
'zebra' != True
Out[30]:
True
In [31]:
True == 1.0
Out[31]:
True
In [32]:
5 > True
Out[32]:
True
In [33]:
'alpha' >= 'beta'
Out[33]:
False
In [34]:
'alpha' >= 5
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-34-2b00aa81a369> in <module>
----> 1 'alpha' >= 5

TypeError: '>=' not supported between instances of 'str' and 'int'

Floating point issues, revisited

In [35]:
0.1 * 2 == 0.2
Out[35]:
True
In [36]:
0.1 * 6
Out[36]:
0.6000000000000001
In [37]:
0.1 * 6 == 0.6
Out[37]:
False
In [38]:
abs(0.1 * 6 - 0.6) < 0.0001
Out[38]:
True

String containment

In [39]:
'berkeley' in 'uc berkeley'
Out[39]:
True
In [40]:
'stanford' in 'uc berkeley'
Out[40]:
False
In [41]:
'berkeley' in 'UC BERKELEY'
Out[41]:
False

Quick Check 2

In [ ]:
 
In [ ]:
 

Boolean operators

In [42]:
year = 'junior'
units = 125
In [43]:
year_check = year == 'senior'
year_check
Out[43]:
False
In [44]:
units_check = units >= 120
units_check
Out[44]:
True
In [45]:
ready_to_grad = year_check and units_check
almost_ready = year_check or units_check
In [46]:
ready_to_grad
Out[46]:
False
In [47]:
units_check
Out[47]:
True
In [48]:
n = 12
(n % 2 == 0) and (n % 4 == 0)
Out[48]:
True
In [49]:
(n % 2 == 0) and not (n % 5 == 0)
Out[49]:
True
In [50]:
(n % 3 != 0) and (n % 4 != 0)
Out[50]:
False
In [51]:
True and False and True and True
Out[51]:
False
In [52]:
True or False or True or True
Out[52]:
True
In [53]:
3 < 4 <= 5
Out[53]:
True
In [54]:
3 < 4 > 2 < 11 > -1
Out[54]:
True
In [55]:
3 < 4 < 2 > 11 > -1
Out[55]:
False

Quick Check 3

In [ ]:
 
In [ ]:
 

Demo

The first cell contains code that's mostly copied from last lecture. Ignore it once again!

In [56]:
from datascience import *
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np

data = Table.read_table('data/countries.csv')
data = data.relabeled('Country(or dependent territory)', 'Country') \
           .relabeled('% of world', '%') \
           .relabeled('Source(official or UN)', 'Source')
data = data.with_columns(
    'Country', data.apply(lambda s: s[:s.index('[')].lower() if '[' in s else s.lower(), 'Country'),
    'Population', data.apply(lambda i: int(i.replace(',', '')), 'Population'),
    '%', data.apply(lambda f: float(f.replace('%', '')), '%')
)

def first_letter(s):
    return s[0]

def last_letter(s):
    return s[-1]
In [57]:
data
Out[57]:
Rank Country Population % Date Source
1 china 1405936040 17.9 27 Dec 2020 National population clock[3]
2 india 1371366679 17.5 27 Dec 2020 National population clock[4]
3 united states 330888778 4.22 27 Dec 2020 National population clock[5]
4 indonesia 269603400 3.44 1 Jul 2020 National annual projection[6]
5 pakistan 220892331 2.82 1 Jul 2020 UN Projection[2]
6 brazil 212523810 2.71 27 Dec 2020 National population clock[7]
7 nigeria 206139587 2.63 1 Jul 2020 UN Projection[2]
8 bangladesh 169885314 2.17 27 Dec 2020 National population clock[8]
9 russia 146748590 1.87 1 Jan 2020 National annual estimate[9]
10 mexico 127792286 1.63 1 Jul 2020 National annual projection[10]

... (232 rows omitted)

Below, assign first_or_last to a string containing a single lowercase letter. We'll look at the distribution of populations of countries whose names either begin or end with first_or_last.

In [58]:
first_or_last = 'a'
In [59]:
relevant_countries = data.where(data.apply(
    
              # Focus on this part!
    lambda name: first_letter(name) == first_or_last or last_letter(name) == first_or_last
    
    
, 'Country')).sort('Population', descending = True)
In [60]:
relevant_countries
Out[60]:
Rank Country Population % Date Source
1 china 1405936040 17.9 27 Dec 2020 National population clock[3]
2 india 1371366679 17.5 27 Dec 2020 National population clock[4]
4 indonesia 269603400 3.44 1 Jul 2020 National annual projection[6]
7 nigeria 206139587 2.63 1 Jul 2020 UN Projection[2]
9 russia 146748590 1.87 1 Jan 2020 National annual estimate[9]
13 ethiopia 109612120 1.4 1 Jul 2020 National annual projection[13]
23 south africa 59622350 0.761 1 Jul 2020 National annual estimate[23]
25 tanzania 57637628 0.735 1 Jul 2020 National annual projection[25]
27 south korea 51834302 0.661 1 Nov 2020 Monthly national estimate[27]
28 colombia 50372424 0.643 30 Jun 2020 National annual projection[28]

... (72 rows omitted)

In [61]:
# Ignore everything except the last line!
plt.figure(figsize = (10, 7))
names = relevant_countries.column('Country')
pops = relevant_countries.column('Population')

if relevant_countries.num_rows > 15:
    names = names[:15]
    pops = pops[:15]

sns.barplot(x = pops, y = names, orient = 'h')

# Focus on this part!
plt.title('Populations of countries starting or ending with ' + first_or_last);