Skip to content

SENCE 2023 Examples - 3. Semester

Common libraries and parameters

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
# Don't show too many rows in Pandas Dataframes
pd.options.display.max_rows = 7
In [3]:
# Larger plots
plt.rcParams['figure.figsize'] = [16, 8]
In [4]:
# "pip install folium" might be needed first : https://pypi.org/project/folium/
import folium
In [5]:
# Make a data frame with dots to show on the map.
# All the values are the same, in order to check if the projection distorts the circles
data = pd.DataFrame({
   'lon':[-58, 2, 145, 30.32, -4.03, -73.57, 36.82, -38.5],
   'lat':[-34, 49, -38, 59.93, 5.33, 45.52, -1.29, -12.97],
   'name':['Buenos Aires', 'Paris', 'Melbourne', 'St Petersbourg', 'Abidjan', 'Montreal', 'Nairobi', 'Salvador'],
   'value': [50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0]
})

data
Out[5]:
lon lat name value
0 -58.00 -34.00 Buenos Aires 50.0
1 2.00 49.00 Paris 50.0
2 145.00 -38.00 Melbourne 50.0
... ... ... ... ...
5 -73.57 45.52 Montreal 50.0
6 36.82 -1.29 Nairobi 50.0
7 -38.50 -12.97 Salvador 50.0

8 rows × 4 columns

Circles are distorted by Mercator projection

see https://en.wikipedia.org/wiki/Tissot%27s_indicatrix for more information

In [6]:
# Make an empty map
m = folium.Map(location=[20,0], tiles="OpenStreetMap", zoom_start=2)

# add marker one by one on the map
for city in data.itertuples():
    folium.Circle(
        location=[city.lat, city.lon],
        popup=city.name,
        radius=city.value * 20000.0,
        color='crimson',
        fill=True,
        fill_color='crimson'
    ).add_to(m)

m.get_root().html.add_child(folium.Element("<h3 align='center'>Map with distorted circles</h3>"))

# Show the map
m
Out[6]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Avoiding deformation

In [7]:
import math
m = folium.Map(location=[20,0], tiles="OpenStreetMap", zoom_start=2)

# add marker one by one on the map, and account for Mercator deformation
for city in data.itertuples():
    local_deformation = math.cos(city.lat * math.pi / 180)
    folium.Circle(
        location=[city.lat, city.lon],
        popup='%s (%.1f)' % (city.name, city.value),
        radius=city.value * 20000.0 * local_deformation,
        color='crimson',
        fill=True,
        fill_color='crimson'
    ).add_to(m)

m.get_root().html.add_child(folium.Element("<h3 align='center'>Map with circles of correct size</h3>"))

m.save('output/bubble_map.html')

m
Out[7]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Basic example

In [8]:
# initialize columns
data = {
    'A': [0, 1, 2, 3, 4, 5, 6],
    'B': [1, 2, 3, 4, 5, 6, 7],
    'C': [2, 3, 4, 5, 6, 7, 8],
    'D': [3, 4, 5, 6, 7, 8, 9],
    'E': [4, 5, 6, 7, 8, 9, 10],
    'F': [5, 6, 7, 8, 9, 10, 11]
}
df = pd.DataFrame(data)
In [9]:
df
Out[9]:
A B C D E F
0 0 1 2 3 4 5
1 1 2 3 4 5 6
2 2 3 4 5 6 7
3 3 4 5 6 7 8
4 4 5 6 7 8 9
5 5 6 7 8 9 10
6 6 7 8 9 10 11
In [10]:
colors = 'viridis' # See https://matplotlib.org/stable/gallery/color/colormap_reference.html
sns.heatmap(df, cmap=colors)
plt.title("Heatmap from pandas dataframe, with '%s' colormap." % colors)
plt.show()

Heatmap from timeseries

In [11]:
# Parse a whole year of weather data
weather_df = pd.read_csv('output/SchPark01.csv',
            sep = ';',
            na_values = ' ',
            names = ['date', 'time', 'ghi', 'ta'],
            parse_dates = [[0, 1]],
            index_col = 'date_time'
           )
weather_df
Out[11]:
ghi ta
date_time
2011-01-01 00:00:00 0.0 -0.6
2011-01-01 00:15:00 0.0 -0.4
2011-01-01 00:30:00 0.0 -0.5
... ... ...
2011-12-31 23:15:00 0.0 8.4
2011-12-31 23:30:00 0.0 8.5
2011-12-31 23:45:00 0.0 8.1

35040 rows × 2 columns

In [12]:
# Temperatures(day_of_year, time)
temperatures = pd.pivot_table(weather_df, values='ta', index=weather_df.index.time, columns=weather_df.index.dayofyear)
temperatures
Out[12]:
date_time 1 2 3 4 5 6 7 8 9 10 ... 356 357 358 359 360 361 362 363 364 365
00:00:00 -0.6 1.0 0.7 -3.3 -6.7 -2.0 8.4 9.6 10.9 3.9 ... 2.9 6.9 7.2 4.9 6.0 7.9 5.3 3.5 4.7 4.6
00:15:00 -0.4 1.0 0.5 -3.7 -7.9 -2.5 8.4 9.4 10.7 3.8 ... 2.8 7.0 7.1 4.9 6.0 8.2 5.2 3.6 4.3 4.6
00:30:00 -0.5 1.0 0.5 -3.0 -7.4 -2.1 8.4 9.2 10.8 3.6 ... 2.8 7.0 7.1 4.8 6.0 8.2 4.9 4.4 5.2 4.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
23:15:00 1.0 0.6 -3.5 -6.9 -2.2 8.4 9.0 10.5 4.2 3.7 ... 6.7 7.3 5.0 6.0 8.3 5.5 5.0 4.4 4.1 8.4
23:30:00 1.0 0.5 -3.5 -7.3 -2.2 8.4 10.2 10.7 4.2 3.5 ... 6.7 7.3 5.2 6.1 8.5 5.4 4.4 4.5 4.2 8.5
23:45:00 1.0 0.7 -3.3 -6.9 -2.5 8.6 10.4 10.9 4.1 3.5 ... 6.9 7.2 4.9 6.1 8.1 5.3 3.9 5.1 4.6 8.1

96 rows × 357 columns

In [13]:
sns.heatmap(temperatures, annot=False)
plt.title('Temperatures in Scharnhauser Park, 2011')
plt.show()
In [14]:
# What are the available datasets?
', '.join(sns.get_dataset_names())
Out[14]:
'anagrams, anscombe, attention, brain_networks, car_crashes, diamonds, dots, dowjones, exercise, flights, fmri, geyser, glue, healthexp, iris, mpg, penguins, planets, seaice, taxis, tips, titanic'
In [15]:
penguins_df = sns.load_dataset('penguins')
penguins_df
Out[15]:
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
0 Adelie Torgersen 39.1 18.7 181.0 3750.0 Male
1 Adelie Torgersen 39.5 17.4 186.0 3800.0 Female
2 Adelie Torgersen 40.3 18.0 195.0 3250.0 Female
... ... ... ... ... ... ... ...
341 Gentoo Biscoe 50.4 15.7 222.0 5750.0 Male
342 Gentoo Biscoe 45.2 14.8 212.0 5200.0 Female
343 Gentoo Biscoe 49.9 16.1 213.0 5400.0 Male

344 rows × 7 columns

In [16]:
# Basic correlogram
sns.pairplot(penguins_df, hue='species')
plt.show()

FlapPyBird

Slightly modified version of FlapPyBird, with high score file and plot if desired:

https://github.com/EricDuminil/FlapPyBird

In [17]:
high_score_filename = 'output/my_high_score.csv'
In [18]:
# Find the best score, without any library

previous_record = 0

with open(high_score_filename) as high_score_file:
    for line in high_score_file:
        when, old_score = line.split(';')
        old_score = int(old_score)
        if old_score > previous_record:
            previous_record = old_score

print("Current best score is : %d" % previous_record )
Current best score is : 18
In [19]:
# Parse high score file with Pandas
high_score_df = pd.read_csv(high_score_filename,
                 sep=';',
                 names=['datetime', 'score'],
                 parse_dates=True,
                 index_col='datetime')
high_score_df
Out[19]:
score
datetime
2023-01-26 21:11:23 1
2023-01-26 21:11:32 3
2023-01-26 21:11:41 4
... ...
2023-01-27 15:56:00 1
2023-01-27 15:56:13 8
2023-01-27 15:56:29 9

35 rows × 1 columns

In [20]:
high_score_df.plot(ylim=(0, None),
        title='My FlapPyBird scores',
        use_index=False,
        xlabel='Attempt #')

plt.show()