SENCE 2024 Examples¶

Sankey with different colors¶

Find color combinations at https://designwizard.com/blog/colour-combination/#gray-ff-and-lime-punch-dedff

In [1]:

Copied!





import matplotlib.pyplot as plt

from matplotlib.sankey import Sankey

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1, xticks=[], yticks=[], title="Two Systems")
flows = [0.25, 0.15, 0.60, -0.10, -0.05, -0.25, -0.15, -0.10, -0.35]
sankey = Sankey(ax=ax, unit=None)
sankey.add(flows=flows, label='one',
           orientations=[-1, 1, 0, 1, 1, 1, -1, -1, 0],
           facecolor='#606060FF')
sankey.add(flows=[-0.25, 0.15, 0.1], label='two',
           orientations=[-1, -1, -1], prior=0, connect=(0, 0),
           facecolor='#D6ED17FF')
diagrams = sankey.finish()
diagrams[-1].patch.set_hatch('/')
plt.legend();
import matplotlib.pyplot as plt

from matplotlib.sankey import Sankey

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1, xticks=[], yticks=[], title="Two Systems")
flows = [0.25, 0.15, 0.60, -0.10, -0.05, -0.25, -0.15, -0.10, -0.35]
sankey = Sankey(ax=ax, unit=None)
sankey.add(flows=flows, label='one',
           orientations=[-1, 1, 0, 1, 1, 1, -1, -1, 0],
           facecolor='#606060FF')
sankey.add(flows=[-0.25, 0.15, 0.1], label='two',
           orientations=[-1, -1, -1], prior=0, connect=(0, 0),
           facecolor='#D6ED17FF')
diagrams = sankey.finish()
diagrams[-1].patch.set_hatch('/')
plt.legend();

No description has been provided for this image

Read online CSV¶

In [2]:

Copied!

import pandas as pd
import pandas as pd

In [3]:

Copied!

pd.read_csv("https://python.ericduminil.com/files/wetterstation.temp.csv")
pd.read_csv("https://python.ericduminil.com/files/wetterstation.temp.csv")

Out[3]:

	#group	false	false.1	true	true.1	false.2	false.3	true.2	true.3
0	#datatype	string	long	dateTime:RFC3339	dateTime:RFC3339	dateTime:RFC3339	double	string	string
1	#default	mean	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	NaN	result	table	_start	_stop	_time	_value	_field	_measurement
3	NaN	NaN	0	2024-03-31T22:00:00Z	2024-11-12T21:56:11.652Z	2024-04-01T07:38:29.058Z	9.200975609756101	value	wetterstation.temperatur
4	NaN	NaN	0	2024-03-31T22:00:00Z	2024-11-12T21:56:11.652Z	2024-04-01T22:42:28.424Z	8.58029850746268	value	wetterstation.temperatur
...	...	...	...	...	...	...	...	...	...
359	NaN	NaN	0	2024-03-31T22:00:00Z	2024-11-12T21:56:11.652Z	2024-11-10T19:18:43.354Z	3.659710144927535	value	wetterstation.temperatur
360	NaN	NaN	0	2024-03-31T22:00:00Z	2024-11-12T21:56:11.652Z	2024-11-11T10:22:42.72Z	1.9895384615384597	value	wetterstation.temperatur
361	NaN	NaN	0	2024-03-31T22:00:00Z	2024-11-12T21:56:11.652Z	2024-11-12T01:26:42.086Z	5.282580645161291	value	wetterstation.temperatur
362	NaN	NaN	0	2024-03-31T22:00:00Z	2024-11-12T21:56:11.652Z	2024-11-12T16:30:41.452Z	4.560792079207922	value	wetterstation.temperatur
363	NaN	NaN	0	2024-03-31T22:00:00Z	2024-11-12T21:56:11.652Z	2024-11-12T21:56:11.652Z	2.4579310344827574	value	wetterstation.temperatur

364 rows × 9 columns

In [4]:

Copied!

pd.read_csv("https://python.ericduminil.com/files/wetterstation.temp.csv",
            skiprows=3)
pd.read_csv("https://python.ericduminil.com/files/wetterstation.temp.csv",
            skiprows=3)

Out[4]:

	Unnamed: 0	result	table	_start	_stop	_time	_value	_field	_measurement
0	NaN	NaN	0	2024-03-31T22:00:00Z	2024-11-12T21:56:11.652Z	2024-04-01T07:38:29.058Z	9.200976	value	wetterstation.temperatur
1	NaN	NaN	0	2024-03-31T22:00:00Z	2024-11-12T21:56:11.652Z	2024-04-01T22:42:28.424Z	8.580299	value	wetterstation.temperatur
2	NaN	NaN	0	2024-03-31T22:00:00Z	2024-11-12T21:56:11.652Z	2024-04-02T13:46:27.79Z	8.436757	value	wetterstation.temperatur
3	NaN	NaN	0	2024-03-31T22:00:00Z	2024-11-12T21:56:11.652Z	2024-04-03T04:50:27.156Z	6.948889	value	wetterstation.temperatur
4	NaN	NaN	0	2024-03-31T22:00:00Z	2024-11-12T21:56:11.652Z	2024-04-03T19:54:26.522Z	9.091223	value	wetterstation.temperatur
...	...	...	...	...	...	...	...	...	...
356	NaN	NaN	0	2024-03-31T22:00:00Z	2024-11-12T21:56:11.652Z	2024-11-10T19:18:43.354Z	3.659710	value	wetterstation.temperatur
357	NaN	NaN	0	2024-03-31T22:00:00Z	2024-11-12T21:56:11.652Z	2024-11-11T10:22:42.72Z	1.989538	value	wetterstation.temperatur
358	NaN	NaN	0	2024-03-31T22:00:00Z	2024-11-12T21:56:11.652Z	2024-11-12T01:26:42.086Z	5.282581	value	wetterstation.temperatur
359	NaN	NaN	0	2024-03-31T22:00:00Z	2024-11-12T21:56:11.652Z	2024-11-12T16:30:41.452Z	4.560792	value	wetterstation.temperatur
360	NaN	NaN	0	2024-03-31T22:00:00Z	2024-11-12T21:56:11.652Z	2024-11-12T21:56:11.652Z	2.457931	value	wetterstation.temperatur

361 rows × 9 columns

In [5]:

Copied!

pd.read_csv("https://python.ericduminil.com/files/wetterstation.temp.csv",
            skiprows=3,
            parse_dates=[3, 4, 5])
pd.read_csv("https://python.ericduminil.com/files/wetterstation.temp.csv",
            skiprows=3,
            parse_dates=[3, 4, 5])

Out[5]:

	Unnamed: 0	result	table	_start	_stop	_time	_value	_field	_measurement
0	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	2024-04-01 07:38:29.058000+00:00	9.200976	value	wetterstation.temperatur
1	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	2024-04-01 22:42:28.424000+00:00	8.580299	value	wetterstation.temperatur
2	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	2024-04-02 13:46:27.790000+00:00	8.436757	value	wetterstation.temperatur
3	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	2024-04-03 04:50:27.156000+00:00	6.948889	value	wetterstation.temperatur
4	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	2024-04-03 19:54:26.522000+00:00	9.091223	value	wetterstation.temperatur
...	...	...	...	...	...	...	...	...	...
356	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	2024-11-10 19:18:43.354000+00:00	3.659710	value	wetterstation.temperatur
357	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	2024-11-11 10:22:42.720000+00:00	1.989538	value	wetterstation.temperatur
358	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	2024-11-12 01:26:42.086000+00:00	5.282581	value	wetterstation.temperatur
359	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	2024-11-12 16:30:41.452000+00:00	4.560792	value	wetterstation.temperatur
360	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	2024-11-12 21:56:11.652000+00:00	2.457931	value	wetterstation.temperatur

361 rows × 9 columns

In [6]:

Copied!





pd.read_csv("https://python.ericduminil.com/files/wetterstation.temp.csv",
            skiprows=3,
            parse_dates=[3, 4, 5],
            index_col='_time'
           )
pd.read_csv("https://python.ericduminil.com/files/wetterstation.temp.csv",
            skiprows=3,
            parse_dates=[3, 4, 5],
            index_col='_time'
           )

Out[6]:

	Unnamed: 0	result	table	_start	_stop	_value	_field	_measurement
_time
2024-04-01 07:38:29.058000+00:00	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	9.200976	value	wetterstation.temperatur
2024-04-01 22:42:28.424000+00:00	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	8.580299	value	wetterstation.temperatur
2024-04-02 13:46:27.790000+00:00	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	8.436757	value	wetterstation.temperatur
2024-04-03 04:50:27.156000+00:00	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	6.948889	value	wetterstation.temperatur
2024-04-03 19:54:26.522000+00:00	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	9.091223	value	wetterstation.temperatur
...	...	...	...	...	...	...	...	...
2024-11-10 19:18:43.354000+00:00	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	3.659710	value	wetterstation.temperatur
2024-11-11 10:22:42.720000+00:00	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	1.989538	value	wetterstation.temperatur
2024-11-12 01:26:42.086000+00:00	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	5.282581	value	wetterstation.temperatur
2024-11-12 16:30:41.452000+00:00	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	4.560792	value	wetterstation.temperatur
2024-11-12 21:56:11.652000+00:00	NaN	NaN	0	2024-03-31 22:00:00+00:00	2024-11-12 21:56:11.652000+00:00	2.457931	value	wetterstation.temperatur

361 rows × 8 columns

In [7]:

Copied!





pd.read_csv("https://python.ericduminil.com/files/wetterstation.temp.csv",
            skiprows=3,
            usecols=['_time', '_value'],
            parse_dates=[0],
            index_col='_time',
           )
pd.read_csv("https://python.ericduminil.com/files/wetterstation.temp.csv",
            skiprows=3,
            usecols=['_time', '_value'],
            parse_dates=[0],
            index_col='_time',
           )

Out[7]:

	_value
_time
2024-04-01 07:38:29.058000+00:00	9.200976
2024-04-01 22:42:28.424000+00:00	8.580299
2024-04-02 13:46:27.790000+00:00	8.436757
2024-04-03 04:50:27.156000+00:00	6.948889
2024-04-03 19:54:26.522000+00:00	9.091223
...	...
2024-11-10 19:18:43.354000+00:00	3.659710
2024-11-11 10:22:42.720000+00:00	1.989538
2024-11-12 01:26:42.086000+00:00	5.282581
2024-11-12 16:30:41.452000+00:00	4.560792
2024-11-12 21:56:11.652000+00:00	2.457931

361 rows × 1 columns

In [8]:

Copied!





df = pd.read_csv("https://python.ericduminil.com/files/wetterstation.temp.csv",
            skiprows=3,
            usecols=['_time', '_value'],
            parse_dates=[0],
            index_col='_time',
           )
df = df.rename(columns={'_value': 'temperature'})
df
df = pd.read_csv("https://python.ericduminil.com/files/wetterstation.temp.csv",
            skiprows=3,
            usecols=['_time', '_value'],
            parse_dates=[0],
            index_col='_time',
           )
df = df.rename(columns={'_value': 'temperature'})
df

Out[8]:

	temperature
_time
2024-04-01 07:38:29.058000+00:00	9.200976
2024-04-01 22:42:28.424000+00:00	8.580299
2024-04-02 13:46:27.790000+00:00	8.436757
2024-04-03 04:50:27.156000+00:00	6.948889
2024-04-03 19:54:26.522000+00:00	9.091223
...	...
2024-11-10 19:18:43.354000+00:00	3.659710
2024-11-11 10:22:42.720000+00:00	1.989538
2024-11-12 01:26:42.086000+00:00	5.282581
2024-11-12 16:30:41.452000+00:00	4.560792
2024-11-12 21:56:11.652000+00:00	2.457931

361 rows × 1 columns

In [9]:

Copied!

df.plot();
df.plot();

In [10]:

Copied!

df.resample('1W').mean().plot();
df.resample('1W').mean().plot();

Include image in Notebook¶

![Minion](http://octodex.github.com/images/daftpunktocat-thomas.gif)

Minion

Create output/ folder if needed¶

In [12]:

Copied!

from pathlib import Path
from pathlib import Path

In [13]:

Copied!

Path('output').mkdir(exist_ok=True)
Path('output').mkdir(exist_ok=True)

2-D Density Plot¶

https://python-graph-gallery.com/2d-density-plot/

In [14]:

Copied!





import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde as kde

# Create data: 200 points
data = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 3]], 200)
x, y = data.T

# Create a figure with 6 plot areas
fig, axes = plt.subplots(ncols=6, nrows=1, figsize=(21, 5))

# Everything starts with a Scatterplot
axes[0].set_title('Scatterplot')
axes[0].plot(x, y, 'ko')

# Thus we can cut the plotting window in several hexbins
nbins = 20
axes[1].set_title('Hexbin')
axes[1].hexbin(x, y, gridsize=nbins, cmap=plt.cm.BuGn_r)

# 2D Histogram
axes[2].set_title('2D Histogram')
axes[2].hist2d(x, y, bins=nbins, cmap=plt.cm.BuGn_r)

# Evaluate a gaussian kde on a regular grid of nbins x nbins over data extents
k = kde(data.T)
xi, yi = np.mgrid[x.min():x.max():nbins*1j, y.min():y.max():nbins*1j]
zi = k(np.vstack([xi.flatten(), yi.flatten()]))

# plot a density
axes[3].set_title('Calculate Gaussian KDE')
axes[3].pcolormesh(xi, yi, zi.reshape(xi.shape), cmap=plt.cm.BuGn_r)

# add shading
axes[4].set_title('2D Density with shading')
axes[4].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='gouraud', cmap=plt.cm.BuGn_r)

# contour
axes[5].set_title('Contour')
axes[5].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='gouraud', cmap=plt.cm.BuGn_r)
axes[5].contour(xi, yi, zi.reshape(xi.shape) );
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde as kde

# Create data: 200 points
data = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 3]], 200)
x, y = data.T

# Create a figure with 6 plot areas
fig, axes = plt.subplots(ncols=6, nrows=1, figsize=(21, 5))

# Everything starts with a Scatterplot
axes[0].set_title('Scatterplot')
axes[0].plot(x, y, 'ko')

# Thus we can cut the plotting window in several hexbins
nbins = 20
axes[1].set_title('Hexbin')
axes[1].hexbin(x, y, gridsize=nbins, cmap=plt.cm.BuGn_r)

# 2D Histogram
axes[2].set_title('2D Histogram')
axes[2].hist2d(x, y, bins=nbins, cmap=plt.cm.BuGn_r)

# Evaluate a gaussian kde on a regular grid of nbins x nbins over data extents
k = kde(data.T)
xi, yi = np.mgrid[x.min():x.max():nbins*1j, y.min():y.max():nbins*1j]
zi = k(np.vstack([xi.flatten(), yi.flatten()]))

# plot a density
axes[3].set_title('Calculate Gaussian KDE')
axes[3].pcolormesh(xi, yi, zi.reshape(xi.shape), cmap=plt.cm.BuGn_r)

# add shading
axes[4].set_title('2D Density with shading')
axes[4].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='gouraud', cmap=plt.cm.BuGn_r)

# contour
axes[5].set_title('Contour')
axes[5].pcolormesh(xi, yi, zi.reshape(xi.shape), shading='gouraud', cmap=plt.cm.BuGn_r)
axes[5].contour(xi, yi, zi.reshape(xi.shape) );

Circular Barplot¶

https://python-graph-gallery.com/circular-barplot/

Simple¶

In [15]:

Copied!





# import numpy to get the value of Pi
import numpy as np

# Add a bar in the polar coordinates
plt.subplot(111, polar=True);
plt.bar(x=0, height=10, width=np.pi/2, bottom=5);
# import numpy to get the value of Pi
import numpy as np

# Add a bar in the polar coordinates
plt.subplot(111, polar=True);
plt.bar(x=0, height=10, width=np.pi/2, bottom=5);

In [16]:

Copied!





import pandas as pd

# Build a dataset
df = pd.DataFrame(
        {
            'Name': ['item ' + str(i) for i in list(range(1, 51)) ],
            'Value': np.random.randint(low=10, high=100, size=50)
        })

# Show 3 first rows
df.head(3)
import pandas as pd

# Build a dataset
df = pd.DataFrame(
        {
            'Name': ['item ' + str(i) for i in list(range(1, 51)) ],
            'Value': np.random.randint(low=10, high=100, size=50)
        })

# Show 3 first rows
df.head(3)

Out[16]:

	Name	Value
0	item 1	64
1	item 2	70
2	item 3	12

In [17]:

Copied!





# set figure size
plt.figure(figsize=(20,10))

# plot polar axis
ax = plt.subplot(111, polar=True)

# remove grid
plt.axis('off')

# Set the coordinates limits
upperLimit = 100
lowerLimit = 30

# Compute max and min in the dataset
max = df['Value'].max()

# Let's compute heights: they are a conversion of each item value in those new coordinates
# In our example, 0 in the dataset will be converted to the lowerLimit (10)
# The maximum will be converted to the upperLimit (100)
slope = (max - lowerLimit) / max
heights = slope * df.Value + lowerLimit

# Compute the width of each bar. In total we have 2*Pi = 360°
width = 2*np.pi / len(df.index)

# Compute the angle each bar is centered on:
indexes = list(range(1, len(df.index)+1))
angles = [element * width for element in indexes]
angles

# Draw bars
bars = ax.bar(
    x=angles, 
    height=heights, 
    width=width, 
    bottom=lowerLimit,
    linewidth=2, 
    edgecolor="white")
# set figure size
plt.figure(figsize=(20,10))

# plot polar axis
ax = plt.subplot(111, polar=True)

# remove grid
plt.axis('off')

# Set the coordinates limits
upperLimit = 100
lowerLimit = 30

# Compute max and min in the dataset
max = df['Value'].max()

# Let's compute heights: they are a conversion of each item value in those new coordinates
# In our example, 0 in the dataset will be converted to the lowerLimit (10)
# The maximum will be converted to the upperLimit (100)
slope = (max - lowerLimit) / max
heights = slope * df.Value + lowerLimit

# Compute the width of each bar. In total we have 2*Pi = 360°
width = 2*np.pi / len(df.index)

# Compute the angle each bar is centered on:
indexes = list(range(1, len(df.index)+1))
angles = [element * width for element in indexes]
angles

# Draw bars
bars = ax.bar(
    x=angles, 
    height=heights, 
    width=width, 
    bottom=lowerLimit,
    linewidth=2, 
    edgecolor="white")

Complex - Star Wars¶

https://python-graph-gallery.com/532-customizing-circular-barplot-in-matplotlib/

In [18]:

Copied!





import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from matplotlib.lines import Line2D
from matplotlib import font_manager

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from matplotlib.lines import Line2D
from matplotlib import font_manager

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 

In [19]:

Copied!





import tempfile
from pathlib import Path
import urllib

# Create a temporary directory for the font files
path = Path(tempfile.mkdtemp())

# URL and downloaded path of the fonts
url_label_font = "https://github.com/Lisa-Ho/small-data-projects/raw/main/assets/fonts/Ubuntu-R.ttf"
url_title_font = "https://github.com/Lisa-Ho/small-data-projects/raw/main/assets/fonts/Mandalore-K77lD.otf"
path_label_font = path / "Ubuntu-R.ttf"
path_title_font = path / "Mandalore-K77lD.otf"

# Download the fonts to our temporary directory
urllib.request.urlretrieve(url_label_font, path_label_font)
urllib.request.urlretrieve(url_title_font, path_title_font)

# Create a Matplotlib Font object from our `.ttf` files
label_font = font_manager.FontEntry(fname=str(path_label_font), name="Ubuntu-R")
title_font = font_manager.FontEntry(fname=str(path_title_font), name="Mandalore-K77lD")

# Register objects with Matplotlib's ttf list
font_manager.fontManager.ttflist.append(label_font)
font_manager.fontManager.ttflist.append(title_font)
import tempfile
from pathlib import Path
import urllib

# Create a temporary directory for the font files
path = Path(tempfile.mkdtemp())

# URL and downloaded path of the fonts
url_label_font = "https://github.com/Lisa-Ho/small-data-projects/raw/main/assets/fonts/Ubuntu-R.ttf"
url_title_font = "https://github.com/Lisa-Ho/small-data-projects/raw/main/assets/fonts/Mandalore-K77lD.otf"
path_label_font = path / "Ubuntu-R.ttf"
path_title_font = path / "Mandalore-K77lD.otf"

# Download the fonts to our temporary directory
urllib.request.urlretrieve(url_label_font, path_label_font)
urllib.request.urlretrieve(url_title_font, path_title_font)

# Create a Matplotlib Font object from our `.ttf` files
label_font = font_manager.FontEntry(fname=str(path_label_font), name="Ubuntu-R")
title_font = font_manager.FontEntry(fname=str(path_title_font), name="Mandalore-K77lD")

# Register objects with Matplotlib's ttf list
font_manager.fontManager.ttflist.append(label_font)
font_manager.fontManager.ttflist.append(title_font)

In [20]:

Copied!

# load cleaned data set
df = pd.read_csv('https://raw.githubusercontent.com/Lisa-Ho/small-data-projects/main/2023/2308-star-wars-scripts/episode1_each_line_of_anakin_clean.csv')

# print first rows to check it's all looking ok
df.head()
# load cleaned data set
df = pd.read_csv('https://raw.githubusercontent.com/Lisa-Ho/small-data-projects/main/2023/2308-star-wars-scripts/episode1_each_line_of_anakin_clean.csv')

# print first rows to check it's all looking ok
df.head()

Out[20]:

	id	to	text	number	episode
0	271.0	WATTO	Mel tassa cho-passa	3	1
1	274.0	PADME	Are you an angel?	4	1
2	276.0	PADME	An angel. I've heard the deep space pilots tal...	46	1
3	278.0	PADME	I listen to all the traders and star pilots wh...	27	1
4	280.0	PADME	All mylife.	2	1

In [21]:

Copied!

# calculate corect angular position in circular bar plot
x_max = 2*np.pi
df['angular_pos'] = np.linspace(0, x_max, len(df), endpoint=False)
# calculate corect angular position in circular bar plot
x_max = 2*np.pi
df['angular_pos'] = np.linspace(0, x_max, len(df), endpoint=False)

In [22]:

Copied!





# store colors to use in dictionary
chart_colors = {'bg': '#0C081F', 'QUI-GON': '#F271A7', 'PADME': '#40B8E1', 'OBI-WAN':'#75EAB6',
                'R2D2': '#F4E55E', 'other': '#444A68'}

# map colors for bars to the data
df['colors'] = df['to'].map(chart_colors)

# fill with neutral color for secondary characters
df['colors'] = df['colors'].fillna(chart_colors['other'])
# store colors to use in dictionary
chart_colors = {'bg': '#0C081F', 'QUI-GON': '#F271A7', 'PADME': '#40B8E1', 'OBI-WAN':'#75EAB6',
                'R2D2': '#F4E55E', 'other': '#444A68'}

# map colors for bars to the data
df['colors'] = df['to'].map(chart_colors)

# fill with neutral color for secondary characters
df['colors'] = df['colors'].fillna(chart_colors['other'])

In [23]:

Copied!





# layout  -----------------------------------------
# setup figure with polar projection
fig, ax = plt.subplots(figsize=(10, 10), 
                       subplot_kw={'projection': 'polar'})

# set background colors
ax.set_facecolor(chart_colors['bg'])
fig.set_facecolor(chart_colors['bg'])

# plot data  -----------------------------------------
ax.bar(df['angular_pos'], df['number'], alpha=1, color=df['colors'], 
       linewidth=0, width=0.052, zorder=3)

# format axis -----------------------------------------
# start on the top and plot bars clockwise
ax.set_theta_zero_location('N')
ax.set_theta_direction(-1)   

# scale y-axis to account for area size of bars 
max_value = 50
r_offset = -10
r2 = max_value - r_offset
alpha = r2 - r_offset
v_offset = r_offset**2 / alpha
forward = lambda value: ((value + v_offset) * alpha)**0.5 + r_offset
reverse = lambda radius: (radius - r_offset) ** 2 / alpha - v_offset
ax.set_rlim(0, max_value)
ax.set_rorigin(r_offset)
ax.set_yscale('function', functions=(
    lambda value: np.where(value >= 0, forward(value), value),
    lambda radius: np.where(radius > 0, reverse(radius), radius)))

# format labels and grid
ax.set_rlabel_position(0)
ax.set_yticks([10,20,30,40])
ax.set_yticklabels([10,20,30,40],fontsize=9, color='white',alpha=0.35)

# format gridlines
ax.set_thetagrids(angles=[])
ax.grid(visible=True, axis='y', zorder=2, color='white',
        linewidth=0.75, alpha=0.2)

# remove spines
ax.spines[:].set_visible(False)

# custom legend  -----------------------------------------
# add axis to hold legend
lgd = fig.add_axes([0.75,0.71, 0.15, 0.25]) 

# define legend elements
kw = dict(marker='o', color=chart_colors['bg'], markersize=8, alpha=1, 
          markeredgecolor='None', linewidth=0)
legend_elements =[Line2D([0],[0], 
                          markerfacecolor=chart_colors['PADME'],
                          label='Padme', 
                          **kw), 
                  Line2D([0], [0], 
                         markerfacecolor=chart_colors['QUI-GON'],
                         label='Qui-Gon', 
                         **kw), 
                  Line2D([0], [0], 
                         markerfacecolor=chart_colors['R2D2'], 
                         label='R2D2', 
                         **kw), 
                  Line2D([0], [0], 
                         markerfacecolor=chart_colors['OBI-WAN'], 
                         label='Obi-Wan', 
                         **kw), 
                  Line2D([0], [0], 
                         markerfacecolor=chart_colors['other'], 
                         label='Other', 
                         **kw)] 

# visualise legend and remove axis around it
L = lgd.legend(frameon=False, handles=legend_elements, loc='center', 
               ncol=1, handletextpad=0.2, labelspacing=1)
plt.setp(L.texts, va='baseline', color='white', size=12, 
         fontfamily=label_font.name)    
lgd.axis('off')

# circular annotation  -----------------------------------------
# draw an inner circle on a new axis
circ = fig.add_axes([0.453, 0.435, 0.12, 0.12],polar=True) 
line_angular_pos = df['angular_pos'][1:-5]
line_r = [5] * len(line_angular_pos)

#plot line and markers for start + end
circ.plot(line_angular_pos, line_r, zorder=5, color='white', 
          linewidth=0.75, alpha=0.4)
circ.plot(line_angular_pos.to_list()[0], line_r[0], zorder=5, color='white', 
          linewidth=0,marker='o', markersize=3,alpha=0.4)
circ.plot(line_angular_pos.to_list()[-1], line_r[-1], zorder=5, color='white', 
          linewidth=0,marker='>', markersize=3,alpha=0.4)

# format axis
circ.set_theta_zero_location('N')
circ.set_theta_direction(-1)  
circ.axis('off')

# text annotations -----------------------------------------
ax.annotate('1 line', xy=(0.1, 48), xycoords='data', xytext=(40, 20), 
            textcoords='offset points', 
            fontsize=10, fontfamily=label_font.name,
            ha='left', va='baseline', 
            annotation_clip=False, 
            color='#ababab',
            arrowprops=dict(arrowstyle='->',edgecolor='#ababab', 
                            connectionstyle='arc3,rad=.5', alpha=0.75))
ax.annotate('Words\nper line', xy=(-0.05, 22), xycoords='data', xytext=(0, 0), 
            textcoords='offset points', 
            fontsize=10, fontfamily=label_font.name,
            ha='right', va='baseline', 
            annotation_clip=False, 
            color='#ababab')
ax.annotate('', xy=(-0.02, 38), xycoords='data', xytext=(0, -105), 
            textcoords='offset points', 
            fontsize=10, fontfamily=label_font.name,
            ha='right', va='baseline', 
            annotation_clip=False, 
            color='#ababab',
            arrowprops=dict(arrowstyle='<->',edgecolor='#ababab', linewidth=0.75,
                            connectionstyle='arc3,rad=0', alpha=0.75 ))
lgd.annotate('Talking to', xy=(0.35, 0.78), xycoords='data', xytext=(-18, 14), 
             textcoords='offset points', 
            fontsize=10, fontfamily=label_font.name,
            ha='right', va='center', 
            annotation_clip=False, 
            color='#ababab',
            arrowprops=dict(arrowstyle='->',edgecolor='#ababab', 
                            connectionstyle='arc3,rad=-.5', alpha=0.75))

# Title + Credits  -----------------------------------------
plt.figtext(0.5,1.03, 'Star Wars Episode I', 
            fontfamily=title_font.name, 
            fontsize=55, color='white', ha='center')
plt.figtext(0.5,0.98, 'Each line of Anakin', 
            fontfamily=label_font.name,
            fontsize=24, color='white', ha='center')
plt.figtext(0.5,0.1, 'Data: jcwieme/data-scripts-star-wars  |  Design: Lisa Hornung', 
            fontfamily=label_font.name,
            fontsize=8, color='white', ha='center', alpha=0.75)

plt.savefig('output/anakin.png')
plt.show()
# layout  -----------------------------------------
# setup figure with polar projection
fig, ax = plt.subplots(figsize=(10, 10), 
                       subplot_kw={'projection': 'polar'})

# set background colors
ax.set_facecolor(chart_colors['bg'])
fig.set_facecolor(chart_colors['bg'])

# plot data  -----------------------------------------
ax.bar(df['angular_pos'], df['number'], alpha=1, color=df['colors'], 
       linewidth=0, width=0.052, zorder=3)

# format axis -----------------------------------------
# start on the top and plot bars clockwise
ax.set_theta_zero_location('N')
ax.set_theta_direction(-1)   

# scale y-axis to account for area size of bars 
max_value = 50
r_offset = -10
r2 = max_value - r_offset
alpha = r2 - r_offset
v_offset = r_offset**2 / alpha
forward = lambda value: ((value + v_offset) * alpha)**0.5 + r_offset
reverse = lambda radius: (radius - r_offset) ** 2 / alpha - v_offset
ax.set_rlim(0, max_value)
ax.set_rorigin(r_offset)
ax.set_yscale('function', functions=(
    lambda value: np.where(value >= 0, forward(value), value),
    lambda radius: np.where(radius > 0, reverse(radius), radius)))

# format labels and grid
ax.set_rlabel_position(0)
ax.set_yticks([10,20,30,40])
ax.set_yticklabels([10,20,30,40],fontsize=9, color='white',alpha=0.35)

# format gridlines
ax.set_thetagrids(angles=[])
ax.grid(visible=True, axis='y', zorder=2, color='white',
        linewidth=0.75, alpha=0.2)

# remove spines
ax.spines[:].set_visible(False)

# custom legend  -----------------------------------------
# add axis to hold legend
lgd = fig.add_axes([0.75,0.71, 0.15, 0.25]) 

# define legend elements
kw = dict(marker='o', color=chart_colors['bg'], markersize=8, alpha=1, 
          markeredgecolor='None', linewidth=0)
legend_elements =[Line2D([0],[0], 
                          markerfacecolor=chart_colors['PADME'],
                          label='Padme', 
                          **kw), 
                  Line2D([0], [0], 
                         markerfacecolor=chart_colors['QUI-GON'],
                         label='Qui-Gon', 
                         **kw), 
                  Line2D([0], [0], 
                         markerfacecolor=chart_colors['R2D2'], 
                         label='R2D2', 
                         **kw), 
                  Line2D([0], [0], 
                         markerfacecolor=chart_colors['OBI-WAN'], 
                         label='Obi-Wan', 
                         **kw), 
                  Line2D([0], [0], 
                         markerfacecolor=chart_colors['other'], 
                         label='Other', 
                         **kw)] 

# visualise legend and remove axis around it
L = lgd.legend(frameon=False, handles=legend_elements, loc='center', 
               ncol=1, handletextpad=0.2, labelspacing=1)
plt.setp(L.texts, va='baseline', color='white', size=12, 
         fontfamily=label_font.name)    
lgd.axis('off')

# circular annotation  -----------------------------------------
# draw an inner circle on a new axis
circ = fig.add_axes([0.453, 0.435, 0.12, 0.12],polar=True) 
line_angular_pos = df['angular_pos'][1:-5]
line_r = [5] * len(line_angular_pos)

#plot line and markers for start + end
circ.plot(line_angular_pos, line_r, zorder=5, color='white', 
          linewidth=0.75, alpha=0.4)
circ.plot(line_angular_pos.to_list()[0], line_r[0], zorder=5, color='white', 
          linewidth=0,marker='o', markersize=3,alpha=0.4)
circ.plot(line_angular_pos.to_list()[-1], line_r[-1], zorder=5, color='white', 
          linewidth=0,marker='>', markersize=3,alpha=0.4)

# format axis
circ.set_theta_zero_location('N')
circ.set_theta_direction(-1)  
circ.axis('off')

# text annotations -----------------------------------------
ax.annotate('1 line', xy=(0.1, 48), xycoords='data', xytext=(40, 20), 
            textcoords='offset points', 
            fontsize=10, fontfamily=label_font.name,
            ha='left', va='baseline', 
            annotation_clip=False, 
            color='#ababab',
            arrowprops=dict(arrowstyle='->',edgecolor='#ababab', 
                            connectionstyle='arc3,rad=.5', alpha=0.75))
ax.annotate('Words\nper line', xy=(-0.05, 22), xycoords='data', xytext=(0, 0), 
            textcoords='offset points', 
            fontsize=10, fontfamily=label_font.name,
            ha='right', va='baseline', 
            annotation_clip=False, 
            color='#ababab')
ax.annotate('', xy=(-0.02, 38), xycoords='data', xytext=(0, -105), 
            textcoords='offset points', 
            fontsize=10, fontfamily=label_font.name,
            ha='right', va='baseline', 
            annotation_clip=False, 
            color='#ababab',
            arrowprops=dict(arrowstyle='<->',edgecolor='#ababab', linewidth=0.75,
                            connectionstyle='arc3,rad=0', alpha=0.75 ))
lgd.annotate('Talking to', xy=(0.35, 0.78), xycoords='data', xytext=(-18, 14), 
             textcoords='offset points', 
            fontsize=10, fontfamily=label_font.name,
            ha='right', va='center', 
            annotation_clip=False, 
            color='#ababab',
            arrowprops=dict(arrowstyle='->',edgecolor='#ababab', 
                            connectionstyle='arc3,rad=-.5', alpha=0.75))

# Title + Credits  -----------------------------------------
plt.figtext(0.5,1.03, 'Star Wars Episode I', 
            fontfamily=title_font.name, 
            fontsize=55, color='white', ha='center')
plt.figtext(0.5,0.98, 'Each line of Anakin', 
            fontfamily=label_font.name,
            fontsize=24, color='white', ha='center')
plt.figtext(0.5,0.1, 'Data: jcwieme/data-scripts-star-wars  |  Design: Lisa Hornung', 
            fontfamily=label_font.name,
            fontsize=8, color='white', ha='center', alpha=0.75)

plt.savefig('output/anakin.png')
plt.show()

Waffle¶

https://python-graph-gallery.com/waffle-chart/

Simple¶

!pip install pywaffle

In [24]:

Copied!





import matplotlib.pyplot as plt
import matplotlib.patches as mpatches # for the legend
from pywaffle import Waffle
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches # for the legend
from pywaffle import Waffle
import pandas as pd

In [25]:

Copied!





data = {
    2018: [3032, 2892, 804],
    2019: [4537, 3379, 1096],
    2020: [8932, 3879, 896],
    2021: [22147, 6678, 2156],
    2022: [32384, 13354, 5245]
}

df = pd.DataFrame(data,
                  index=['car', 'truck', 'motorcycle'])
data = {
    2018: [3032, 2892, 804],
    2019: [4537, 3379, 1096],
    2020: [8932, 3879, 896],
    2021: [22147, 6678, 2156],
    2022: [32384, 13354, 5245]
}

df = pd.DataFrame(data,
                  index=['car', 'truck', 'motorcycle'])

In [26]:

Copied!





number_of_bars = len(df.columns) # one bar per year

# Init the whole figure and axes
fig, axs = plt.subplots(nrows=1,
                        ncols=number_of_bars,
                        figsize=(8,6),)

# Iterate over each bar and create it
for i,ax in enumerate(axs):
    
    col_name = df.columns[i]
    values = df[col_name] # values from the i-th column
    
    Waffle.make_waffle(
        ax=ax,  # pass axis to make_waffle 
        rows=20,
        columns=5,
        values=values,
    )

plt.show()
number_of_bars = len(df.columns) # one bar per year

# Init the whole figure and axes
fig, axs = plt.subplots(nrows=1,
                        ncols=number_of_bars,
                        figsize=(8,6),)

# Iterate over each bar and create it
for i,ax in enumerate(axs):
    
    col_name = df.columns[i]
    values = df[col_name] # values from the i-th column
    
    Waffle.make_waffle(
        ax=ax,  # pass axis to make_waffle 
        rows=20,
        columns=5,
        values=values,
    )

plt.show()

In [27]:

Copied!





number_of_bars = len(df.columns) # one bar per year
colors = ["darkred", "red", "darkorange"]

# Init the whole figure and axes
fig, axs = plt.subplots(nrows=1,
                        ncols=number_of_bars,
                        figsize=(8,6),)

# Iterate over each bar and create it
for i,ax in enumerate(axs):
    
    col_name = df.columns[i]
    values = df[col_name]/1000 # values from the i-th column
    
    Waffle.make_waffle(
        ax=ax,  # pass axis to make_waffle 
        rows=20,
        columns=5,
        values=values,
        title={"label": col_name, "loc": "left"},
        colors=colors,
        vertical=True,
        icons=['car-side', 'truck', 'motorcycle'],
        font_size=12, # size of each point
        icon_legend=True,
        legend={'loc': 'upper left', 'bbox_to_anchor': (1, 1)},
    )
    
# Add a title
fig.suptitle('Vehicle Production by Year and Vehicle Type',
             fontsize=14, fontweight='bold')


# Add a legend
legend_labels = df.index
legend_elements = [mpatches.Patch(color=colors[i],
                                  label=legend_labels[i]) for i in range(len(colors))]
fig.legend(handles=legend_elements,
           loc="upper right",
           title="Vehicle Types",
           bbox_to_anchor=(1.04, 0.9))

plt.subplots_adjust(right=0.85)
plt.show()
number_of_bars = len(df.columns) # one bar per year
colors = ["darkred", "red", "darkorange"]

# Init the whole figure and axes
fig, axs = plt.subplots(nrows=1,
                        ncols=number_of_bars,
                        figsize=(8,6),)

# Iterate over each bar and create it
for i,ax in enumerate(axs):
    
    col_name = df.columns[i]
    values = df[col_name]/1000 # values from the i-th column
    
    Waffle.make_waffle(
        ax=ax,  # pass axis to make_waffle 
        rows=20,
        columns=5,
        values=values,
        title={"label": col_name, "loc": "left"},
        colors=colors,
        vertical=True,
        icons=['car-side', 'truck', 'motorcycle'],
        font_size=12, # size of each point
        icon_legend=True,
        legend={'loc': 'upper left', 'bbox_to_anchor': (1, 1)},
    )
    
# Add a title
fig.suptitle('Vehicle Production by Year and Vehicle Type',
             fontsize=14, fontweight='bold')


# Add a legend
legend_labels = df.index
legend_elements = [mpatches.Patch(color=colors[i],
                                  label=legend_labels[i]) for i in range(len(colors))]
fig.legend(handles=legend_elements,
           loc="upper right",
           title="Vehicle Types",
           bbox_to_anchor=(1.04, 0.9))

plt.subplots_adjust(right=0.85)
plt.show()

More complex¶

https://python-graph-gallery.com/web-waffle-chart-as-share/

NOTE: Example should be updated because pyfonts has been changed

!pip install pyfonts highlight-text

In [28]:

Copied!





# Libraries
import matplotlib.pyplot as plt
import pandas as pd
from pywaffle import Waffle
from highlight_text import fig_text, ax_text
from pyfonts import load_font
# Libraries
import matplotlib.pyplot as plt
import pandas as pd
from pywaffle import Waffle
from highlight_text import fig_text, ax_text
from pyfonts import load_font

In [29]:

Copied!

path = 'https://raw.githubusercontent.com/holtzy/R-graph-gallery/master/DATA/share-cereals.csv'
df = pd.read_csv(path)

def remove_html_tag(s):
    return s.split('</b>')[0][3:]

df['lab'] = df['lab'].apply(remove_html_tag)
df = df[df['type'] == 'feed']
df.reset_index(inplace=True)
df
path = 'https://raw.githubusercontent.com/holtzy/R-graph-gallery/master/DATA/share-cereals.csv'
df = pd.read_csv(path)

def remove_html_tag(s):
    return s.split('')[0][3:]

df['lab'] = df['lab'].apply(remove_html_tag)
df = df[df['type'] == 'feed']
df.reset_index(inplace=True)
df

Out[29]:

	index	lab	type	percent
0	0	Africa	feed	21
1	2	Americas	feed	53
2	4	Asia	feed	32
3	6	Europe	feed	66
4	8	Oceania	feed	59

In [30]:

Copied!





#NOTE: URL has been updated
font_title = load_font("https://github.com/googlefonts/staatliches/raw/refs/heads/main/fonts/Staatliches-Regular.ttf")
font_credit = load_font("https://github.com/impallari/Raleway/raw/master/fonts/v4020/Raleway-v4020-Light.otf")
bold_font_credit = load_font("https://github.com/impallari/Raleway/raw/master/fonts/v4020/Raleway-v4020-Bold.otf")

background_color = "#222725"
pink = "#f72585"
dark_pink = "#7a0325"

number_of_bars = len(df)  # one bar per continent

# Init the whole figure and axes
fig, axs = plt.subplots(
   nrows=number_of_bars,
   ncols=1,
   figsize=(8, 8),
   dpi=300
)
fig.set_facecolor(background_color)
ax.set_facecolor('white')


# Iterate over each bar and create it
for (i, row), ax in zip(df.iterrows(), axs):

    share = row['percent']
    values = [share, 100-share]

    Waffle.make_waffle(
        ax=ax,
        rows=4,
        columns=25,
        values=values,
        colors=[pink, dark_pink],
    )

    text = f"{row['lab']}"
    ax.text(
        x=-0.4, y=0.5, s=text,
        font=bold_font_credit, color='white', rotation=90,
        ha='center', va='center', fontsize=13
    )
    text = f"{share}%"
    ax.text(
        x=-0.2, y=0.5, s=text,
        font=font_credit, color='white', rotation=90,
        ha='center', va='center', fontsize=13
    )

fig_text(
    x=0.05, y=0.95, s="SHARE OF CEREALS USED AS <ANIMAL FEEDS>",
    highlight_textprops=[{'color': pink}], color='white',
    fontsize=22, font=font_title
)
fig_text(
    x=0.05, y=0.05, s="<Data> OWID (year 2021) | <Plot> Benjamin Nowak",
    font=font_credit, color="white", fontsize=10,
    highlight_textprops=[{'font': bold_font_credit}]*2
)

plt.savefig('output/web-waffle-chart-as-share.png', dpi=300)
plt.show()
#NOTE: URL has been updated
font_title = load_font("https://github.com/googlefonts/staatliches/raw/refs/heads/main/fonts/Staatliches-Regular.ttf")
font_credit = load_font("https://github.com/impallari/Raleway/raw/master/fonts/v4020/Raleway-v4020-Light.otf")
bold_font_credit = load_font("https://github.com/impallari/Raleway/raw/master/fonts/v4020/Raleway-v4020-Bold.otf")

background_color = "#222725"
pink = "#f72585"
dark_pink = "#7a0325"

number_of_bars = len(df)  # one bar per continent

# Init the whole figure and axes
fig, axs = plt.subplots(
   nrows=number_of_bars,
   ncols=1,
   figsize=(8, 8),
   dpi=300
)
fig.set_facecolor(background_color)
ax.set_facecolor('white')


# Iterate over each bar and create it
for (i, row), ax in zip(df.iterrows(), axs):

    share = row['percent']
    values = [share, 100-share]

    Waffle.make_waffle(
        ax=ax,
        rows=4,
        columns=25,
        values=values,
        colors=[pink, dark_pink],
    )

    text = f"{row['lab']}"
    ax.text(
        x=-0.4, y=0.5, s=text,
        font=bold_font_credit, color='white', rotation=90,
        ha='center', va='center', fontsize=13
    )
    text = f"{share}%"
    ax.text(
        x=-0.2, y=0.5, s=text,
        font=font_credit, color='white', rotation=90,
        ha='center', va='center', fontsize=13
    )

fig_text(
    x=0.05, y=0.95, s="SHARE OF CEREALS USED AS ",
    highlight_textprops=[{'color': pink}], color='white',
    fontsize=22, font=font_title
)
fig_text(
    x=0.05, y=0.05, s=" OWID (year 2021) |  Benjamin Nowak",
    font=font_credit, color="white", fontsize=10,
    highlight_textprops=[{'font': bold_font_credit}]*2
)

plt.savefig('output/web-waffle-chart-as-share.png', dpi=300)
plt.show()

Multiple line charts¶

https://python-graph-gallery.com/web-line-chart-small-multiple/

In [31]:

Copied!





# Libraries
import matplotlib.pyplot as plt
import pandas as pd
import datetime
# Libraries
import matplotlib.pyplot as plt
import pandas as pd
import datetime

In [32]:

Copied!





# Open the dataset from Github
url = "https://raw.githubusercontent.com/holtzy/the-python-graph-gallery/master/static/data/dataConsumerConfidence.csv"
df = pd.read_csv(url)

# Reshape the DataFrame using pivot longer
df = df.melt(id_vars=['Time'], var_name='country', value_name='value')

# Convert to time format
df['Time'] = pd.to_datetime(df['Time'], format='%b-%Y')

# Remove rows with missing values (only one row)
df = df.dropna()
# Open the dataset from Github
url = "https://raw.githubusercontent.com/holtzy/the-python-graph-gallery/master/static/data/dataConsumerConfidence.csv"
df = pd.read_csv(url)

# Reshape the DataFrame using pivot longer
df = df.melt(id_vars=['Time'], var_name='country', value_name='value')

# Convert to time format
df['Time'] = pd.to_datetime(df['Time'], format='%b-%Y')

# Remove rows with missing values (only one row)
df = df.dropna()

In [33]:

Copied!





# Create a colormap with a color for each country
num_countries = len(df['country'].unique())
colors = plt.get_cmap('tab10', num_countries)

# Init a 3x3 charts
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(8, 12))

# Add a big title on top of the entire chart
fig.suptitle('\nConsumer \nConfidence \nAround the \nWorld\n\n', # Title ('\n' allows you to go to the line),
             fontsize=40,
             fontweight='bold', 
             x=0.05, # Shift the text to the left
             ha='left' # Align the text to the left
            )

# Add a paragraph of text on the right of the title
paragraph_text = (
    "The consumer confidence indicator\n"
    "provided an indication of future\n"
    "developments of households'.\n"
    "consumption and saving. An\n"
    "indicator above 100 signals a boost\n"
    "in the consumers' confidence\n"
    "towards the future economic\n"
    "situation. Values below 100 indicate\n"
    "a pessimistic attitude towards future\n"
    "developments in the economy,\n"
    "possibly resulting in a tendency to\n"
    "save more and consume less. During\n"
    "2022, the consuer confidence\n"
    "indicators have declined in many\n"
    "major economies around the world.\n"
)
fig.text(0.55, 0.9, # Position
         paragraph_text, # Content
         fontsize=12,
         va='top', # Put the paragraph at the top of the chart
         ha='left', # Align the text to the left
        )

# Plot each group in the subplots
for i, (group, ax) in enumerate(zip(df['country'].unique(), ax.flatten())):

    # Filter for the group
    filtered_df = df[df['country'] == group]
    x = filtered_df['Time']
    y = filtered_df['value']

    # Get last value (according to 'Time') for the group
    sorted_df = filtered_df.sort_values(by='Time')
    last_value = sorted_df.iloc[-1]['value']
    last_date = sorted_df.iloc[-1]['Time']

    # Set the background color for each subplot
    ax.set_facecolor('seashell')
    fig.set_facecolor('seashell')
    
    # Plot the line
    ax.plot(x, y, color=colors(i))
    
    # Add the final value
    ax.plot(last_date, # x-axis position
            last_value, # y-axis position
            marker='o', # Style of the point
            markersize=5, # Size of the point
            color=colors(i), # Color
           )
    
    # Add the text of the value
    ax.text(last_date,
             last_value*1.005, # slightly shift up
             f'{round(last_value)}', # round for more lisibility
             fontsize=7,
             color=colors(i), # color
             fontweight='bold',
           )

    # Add the 100 on the left
    ax.text(sorted_df.iloc[0]['Time'] - pd.Timedelta(days=300), # shift the position to the left
             100,
             '100',
             fontsize=10,
             color='black',)

    # Add line 
    sorted_df = df.sort_values(by='Time')
    start_x_position = sorted_df.iloc[0]['Time']
    end_x_position = sorted_df.iloc[-1]['Time']
    ax.plot([start_x_position, end_x_position], # x-axis position
            [100, 100], # y-axis position (constant position)
            color='black', # Color
            alpha=0.8, # Opacity
            linewidth=0.8, # width of the line
           )
    
    # Plot other groups with lighter colors (alpha argument)
    other_groups = df['country'].unique()[df['country'].unique() != group]
    for other_group in other_groups:

        # Filter observations that are not in the group
        other_y = df['value'][df['country'] == other_group]
        other_x = df['Time'][df['country'] == other_group]

        # Display the other observations with less opacity (alpha=0.2)
        ax.plot(other_x, other_y, color=colors(i), alpha=0.2)

    # Removes spines
    ax.spines[['right', 'top', 'left', 'bottom']].set_visible(False)
    
    # Add a bold title to each subplot
    ax.set_title(f'{group}', fontsize=12, fontweight='bold')
    
    # Remove axis labels
    ax.set_yticks([])
    ax.set_xticks([])

# Add a credit section at the bottom of the chart
fig.text(0.0, -0.01, # position
         "Design:", # text
         fontsize=10,
         va='bottom',
         ha='left',
         fontweight='bold',)
fig.text(0.1, -0.01, # position
         "Gilbert Fontana", # text
         fontsize=10,
         va='bottom',
         ha='left')
fig.text(0.0, -0.025, # position
         "Data:", # text
         fontsize=10,
         va='bottom',
         ha='left',
         fontweight='bold',)
fig.text(0.07, -0.025, # position
         "OECD, 2022",
         fontsize=10,
         va='bottom',
         ha='left')

# Adjust layout and spacing
plt.tight_layout()

# Show the plot
plt.show()
# Create a colormap with a color for each country
num_countries = len(df['country'].unique())
colors = plt.get_cmap('tab10', num_countries)

# Init a 3x3 charts
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(8, 12))

# Add a big title on top of the entire chart
fig.suptitle('\nConsumer \nConfidence \nAround the \nWorld\n\n', # Title ('\n' allows you to go to the line),
             fontsize=40,
             fontweight='bold', 
             x=0.05, # Shift the text to the left
             ha='left' # Align the text to the left
            )

# Add a paragraph of text on the right of the title
paragraph_text = (
    "The consumer confidence indicator\n"
    "provided an indication of future\n"
    "developments of households'.\n"
    "consumption and saving. An\n"
    "indicator above 100 signals a boost\n"
    "in the consumers' confidence\n"
    "towards the future economic\n"
    "situation. Values below 100 indicate\n"
    "a pessimistic attitude towards future\n"
    "developments in the economy,\n"
    "possibly resulting in a tendency to\n"
    "save more and consume less. During\n"
    "2022, the consuer confidence\n"
    "indicators have declined in many\n"
    "major economies around the world.\n"
)
fig.text(0.55, 0.9, # Position
         paragraph_text, # Content
         fontsize=12,
         va='top', # Put the paragraph at the top of the chart
         ha='left', # Align the text to the left
        )

# Plot each group in the subplots
for i, (group, ax) in enumerate(zip(df['country'].unique(), ax.flatten())):

    # Filter for the group
    filtered_df = df[df['country'] == group]
    x = filtered_df['Time']
    y = filtered_df['value']

    # Get last value (according to 'Time') for the group
    sorted_df = filtered_df.sort_values(by='Time')
    last_value = sorted_df.iloc[-1]['value']
    last_date = sorted_df.iloc[-1]['Time']

    # Set the background color for each subplot
    ax.set_facecolor('seashell')
    fig.set_facecolor('seashell')
    
    # Plot the line
    ax.plot(x, y, color=colors(i))
    
    # Add the final value
    ax.plot(last_date, # x-axis position
            last_value, # y-axis position
            marker='o', # Style of the point
            markersize=5, # Size of the point
            color=colors(i), # Color
           )
    
    # Add the text of the value
    ax.text(last_date,
             last_value*1.005, # slightly shift up
             f'{round(last_value)}', # round for more lisibility
             fontsize=7,
             color=colors(i), # color
             fontweight='bold',
           )

    # Add the 100 on the left
    ax.text(sorted_df.iloc[0]['Time'] - pd.Timedelta(days=300), # shift the position to the left
             100,
             '100',
             fontsize=10,
             color='black',)

    # Add line 
    sorted_df = df.sort_values(by='Time')
    start_x_position = sorted_df.iloc[0]['Time']
    end_x_position = sorted_df.iloc[-1]['Time']
    ax.plot([start_x_position, end_x_position], # x-axis position
            [100, 100], # y-axis position (constant position)
            color='black', # Color
            alpha=0.8, # Opacity
            linewidth=0.8, # width of the line
           )
    
    # Plot other groups with lighter colors (alpha argument)
    other_groups = df['country'].unique()[df['country'].unique() != group]
    for other_group in other_groups:

        # Filter observations that are not in the group
        other_y = df['value'][df['country'] == other_group]
        other_x = df['Time'][df['country'] == other_group]

        # Display the other observations with less opacity (alpha=0.2)
        ax.plot(other_x, other_y, color=colors(i), alpha=0.2)

    # Removes spines
    ax.spines[['right', 'top', 'left', 'bottom']].set_visible(False)
    
    # Add a bold title to each subplot
    ax.set_title(f'{group}', fontsize=12, fontweight='bold')
    
    # Remove axis labels
    ax.set_yticks([])
    ax.set_xticks([])

# Add a credit section at the bottom of the chart
fig.text(0.0, -0.01, # position
         "Design:", # text
         fontsize=10,
         va='bottom',
         ha='left',
         fontweight='bold',)
fig.text(0.1, -0.01, # position
         "Gilbert Fontana", # text
         fontsize=10,
         va='bottom',
         ha='left')
fig.text(0.0, -0.025, # position
         "Data:", # text
         fontsize=10,
         va='bottom',
         ha='left',
         fontweight='bold',)
fig.text(0.07, -0.025, # position
         "OECD, 2022",
         fontsize=10,
         va='bottom',
         ha='left')

# Adjust layout and spacing
plt.tight_layout()

# Show the plot
plt.show()

Bubble Map¶

https://python-graph-gallery.com/web-bubble-map-with-arrows/

!pip install cartopy geoplot

In [34]:

Copied!





# data manipulation
import numpy as np
import pandas as pd
import geopandas as gpd

# visualization
import matplotlib.pyplot as plt
from matplotlib import font_manager
from matplotlib.font_manager import FontProperties
from highlight_text import fig_text, ax_text
from matplotlib.patches import FancyArrowPatch

# geospatial manipulation
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import geoplot
import geoplot.crs as gcrs

# Easier way to get fonts
from pyfonts import load_font
# data manipulation
import numpy as np
import pandas as pd
import geopandas as gpd

# visualization
import matplotlib.pyplot as plt
from matplotlib import font_manager
from matplotlib.font_manager import FontProperties
from highlight_text import fig_text, ax_text
from matplotlib.patches import FancyArrowPatch

# geospatial manipulation
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import geoplot
import geoplot.crs as gcrs

# Easier way to get fonts
from pyfonts import load_font

In [35]:

Copied!





proj = ccrs.Miller()
# Alternative (see https://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html):
# proj = ccrs.Robinson()
# Mercator looks too weird close to the poles
# proj = ccrs.Mercator()


url = "https://raw.githubusercontent.com/holtzy/The-Python-Graph-Gallery/master/static/data/all_world.geojson"
world = gpd.read_file(url)
world = world[~world['name'].isin(["Antarctica", "Greenland"])]
world = world.to_crs(proj.proj4_init)
world.head()
proj = ccrs.Miller()
# Alternative (see https://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html):
# proj = ccrs.Robinson()
# Mercator looks too weird close to the poles
# proj = ccrs.Mercator()


url = "https://raw.githubusercontent.com/holtzy/The-Python-Graph-Gallery/master/static/data/all_world.geojson"
world = gpd.read_file(url)
world = world[~world['name'].isin(["Antarctica", "Greenland"])]
world = world.to_crs(proj.proj4_init)
world.head()

Out[35]:

	name	geometry
0	Fiji	MULTIPOLYGON (((20037508.343 -1803779.309, 200...
1	Tanzania	POLYGON ((3774143.866 -105756.618, 3792946.708...
2	W. Sahara	POLYGON ((-964649.018 3158195.645, -964597.245...
3	Canada	MULTIPOLYGON (((-13674486.249 5937950.601, -13...
4	United States of America	MULTIPOLYGON (((-13674486.249 5937950.601, -13...

In [36]:

Copied!





#Load data
url = "https://raw.githubusercontent.com/holtzy/The-Python-Graph-Gallery/master/static/data/earthquakes.csv"
df = pd.read_csv(url)

# Filter dataset: big earth quakes only
df = df[df['Depth (km)']>=0.01] # depth of at least 10 meters

# Sort: big bubbles must be below small bubbles for visibility
df.sort_values(by='Depth (km)', ascending=False, inplace=True)

df.head()
#Load data
url = "https://raw.githubusercontent.com/holtzy/The-Python-Graph-Gallery/master/static/data/earthquakes.csv"
df = pd.read_csv(url)

# Filter dataset: big earth quakes only
df = df[df['Depth (km)']>=0.01] # depth of at least 10 meters

# Sort: big bubbles must be below small bubbles for visibility
df.sort_values(by='Depth (km)', ascending=False, inplace=True)

df.head()

Out[36]:

	Date	Time (utc)	Region	Magnitude	Depth (km)	Latitude	Longitude	Mode	Map	year
7961	20/02/2019	06:50:47	Banda Sea	5.0	2026	-6.89	129.15	A	-	2019.0
6813	07/07/2019	07:50:53	Eastern New Guinea Reg, P.N.G.	5.4	1010	-5.96	147.90	A	-	2019.0
8293	17/01/2019	14:01:50	Fiji Islands	4.7	689	-18.65	179.44	A	-	2019.0
11258	03/01/2018	06:42:58	Fiji Islands Region	5.5	677	-19.93	-178.89	A	-	2018.0
9530	06/09/2018	18:22:24	Fiji Islands Region	5.8	672	-18.88	179.30	A	-	2018.0

Simple¶

In [37]:

Copied!





proj = ccrs.Miller()
fig, ax = plt.subplots(figsize=(12, 8), dpi=300, subplot_kw={'projection':proj})
ax.set_axis_off()

# background map
world.boundary.plot(ax=ax)

# transform the coordinates to the projection's CRS
pc = ccrs.PlateCarree()
new_coords = proj.transform_points(pc, df['Longitude'].values, df['Latitude'].values)

# bubble on top of the map
ax.scatter(
   new_coords[:, 0], new_coords[:, 1],
   s=df['Depth (km)']/3, # size of the bubbles
   zorder=10, # this specifies to put bubbles on top of the map
)

plt.show()
proj = ccrs.Miller()
fig, ax = plt.subplots(figsize=(12, 8), dpi=300, subplot_kw={'projection':proj})
ax.set_axis_off()

# background map
world.boundary.plot(ax=ax)

# transform the coordinates to the projection's CRS
pc = ccrs.PlateCarree()
new_coords = proj.transform_points(pc, df['Longitude'].values, df['Latitude'].values)

# bubble on top of the map
ax.scatter(
   new_coords[:, 0], new_coords[:, 1],
   s=df['Depth (km)']/3, # size of the bubbles
   zorder=10, # this specifies to put bubbles on top of the map
)

plt.show()

More complex¶

In [38]:

Copied!





def draw_arrow(tail_position, head_position, invert=False, radius=0.5, color='black', fig=None):
   if fig is None:
      fig = plt.gcf()
   kw = dict(arrowstyle="Simple, tail_width=0.5, head_width=4, head_length=8", color=color, lw=0.5)
   if invert:
      connectionstyle = f"arc3,rad=-{radius}"
   else:
      connectionstyle = f"arc3,rad={radius}"
   a = FancyArrowPatch(
      tail_position, head_position,
      connectionstyle=connectionstyle,
      transform=fig.transFigure,
      **kw
   )
   fig.patches.append(a)

# TODO: push updated example to graph-gallery
font = load_font('https://github.com/coreyhu/Urbanist/raw/refs/heads/main/fonts/ttf/Urbanist-Medium.ttf')
bold_font =  load_font('https://github.com/coreyhu/Urbanist/raw/refs/heads/main/fonts/ttf/Urbanist-Black.ttf')

# colors
background_color = '#14213d'
map_color = (233/255, 196/255, 106/255, 0.2)
text_color = 'white'
bubble_color = '#fefae0'
alpha_text = 0.7

# initialize the figure
fig, ax = plt.subplots(figsize=(12, 8), dpi=300, subplot_kw={'projection': proj})
fig.set_facecolor(background_color)
ax.set_facecolor(background_color)
ax.set_axis_off()

# background map
world.boundary.plot(ax=ax, linewidth=0, facecolor=map_color)

# transform the coordinates to the projection's CRS
pc = ccrs.PlateCarree()
new_coords = proj.transform_points(pc, df['Longitude'].values, df['Latitude'].values)

# bubble on top of the map
ax.scatter(
   new_coords[:, 0], new_coords[:, 1],
   s=df['Depth (km)'] * np.log(df['Depth (km)']) /10,
   color=bubble_color,
   linewidth=0.4,
   edgecolor='grey',
   alpha=0.6,
   zorder=10,
)

# title
fig_text(
   x=0.5, y=0.98, s='Earthquakes around the world',
   color=text_color, fontsize=30, ha='center', va='top', font=font,
   alpha=alpha_text
)

# subtitle
fig_text(
   x=0.5, y=0.92, s='Earthquakes between 2015 and 2024. Each dot is an earthquake with a size proportionnal to its depth.',
   color=text_color, fontsize=14, ha='center', va='top', font=font, alpha=alpha_text
)

# credit
text = """
<Data>: Pakistan Meteorological Department
<Map>: barbierjoseph.com
"""
fig_text(
   x=0.85, y=0.16, s=text, color=text_color, fontsize=7, ha='right', va='top',
   font=font, highlight_textprops=[{'font': bold_font}, {'font': bold_font}],
   alpha=alpha_text
)

# nazaca plate
highlight_textprops = [
   {"bbox": {"facecolor": "black", "pad": 2, "alpha": 1}, "alpha": alpha_text},
   {"bbox": {"facecolor": "black", "pad": 2, "alpha": 1}, "alpha": alpha_text}
]
draw_arrow((0.23, 0.27), (0.37, 0.35), fig=fig, color=text_color, invert=True, radius=0.2)
fig_text(x=0.16, y=0.265, s='<Collisions between Nazca Plate>\n<and South American plate>', fontsize=10, color=text_color, font=font, highlight_textprops=highlight_textprops, zorder=100)

# india plate
draw_arrow((0.69, 0.64), (0.64, 0.55), fig=fig, color=text_color, radius=0.4)
fig_text(x=0.7, y=0.66, s='<Collisions between Eurasian plate>\n<and Indian plate>', fontsize=10, color=text_color, font=font, highlight_textprops=highlight_textprops, zorder=100)

# philippine plate
draw_arrow((0.73, 0.22), (0.8, 0.51), fig=fig, color=text_color, radius=0.6)
fig_text(x=0.54, y=0.22, s='<Collisions between Philippine plate>\n<and Eurasian plate>', fontsize=10, color=text_color, font=font, highlight_textprops=highlight_textprops, zorder=100)

plt.savefig('output/web-bubble-map-with-arrows.png', dpi=300, bbox_inches="tight")
plt.show()
def draw_arrow(tail_position, head_position, invert=False, radius=0.5, color='black', fig=None):
   if fig is None:
      fig = plt.gcf()
   kw = dict(arrowstyle="Simple, tail_width=0.5, head_width=4, head_length=8", color=color, lw=0.5)
   if invert:
      connectionstyle = f"arc3,rad=-{radius}"
   else:
      connectionstyle = f"arc3,rad={radius}"
   a = FancyArrowPatch(
      tail_position, head_position,
      connectionstyle=connectionstyle,
      transform=fig.transFigure,
      **kw
   )
   fig.patches.append(a)

# TODO: push updated example to graph-gallery
font = load_font('https://github.com/coreyhu/Urbanist/raw/refs/heads/main/fonts/ttf/Urbanist-Medium.ttf')
bold_font =  load_font('https://github.com/coreyhu/Urbanist/raw/refs/heads/main/fonts/ttf/Urbanist-Black.ttf')

# colors
background_color = '#14213d'
map_color = (233/255, 196/255, 106/255, 0.2)
text_color = 'white'
bubble_color = '#fefae0'
alpha_text = 0.7

# initialize the figure
fig, ax = plt.subplots(figsize=(12, 8), dpi=300, subplot_kw={'projection': proj})
fig.set_facecolor(background_color)
ax.set_facecolor(background_color)
ax.set_axis_off()

# background map
world.boundary.plot(ax=ax, linewidth=0, facecolor=map_color)

# transform the coordinates to the projection's CRS
pc = ccrs.PlateCarree()
new_coords = proj.transform_points(pc, df['Longitude'].values, df['Latitude'].values)

# bubble on top of the map
ax.scatter(
   new_coords[:, 0], new_coords[:, 1],
   s=df['Depth (km)'] * np.log(df['Depth (km)']) /10,
   color=bubble_color,
   linewidth=0.4,
   edgecolor='grey',
   alpha=0.6,
   zorder=10,
)

# title
fig_text(
   x=0.5, y=0.98, s='Earthquakes around the world',
   color=text_color, fontsize=30, ha='center', va='top', font=font,
   alpha=alpha_text
)

# subtitle
fig_text(
   x=0.5, y=0.92, s='Earthquakes between 2015 and 2024. Each dot is an earthquake with a size proportionnal to its depth.',
   color=text_color, fontsize=14, ha='center', va='top', font=font, alpha=alpha_text
)

# credit
text = """
: Pakistan Meteorological Department
: barbierjoseph.com
"""
fig_text(
   x=0.85, y=0.16, s=text, color=text_color, fontsize=7, ha='right', va='top',
   font=font, highlight_textprops=[{'font': bold_font}, {'font': bold_font}],
   alpha=alpha_text
)

# nazaca plate
highlight_textprops = [
   {"bbox": {"facecolor": "black", "pad": 2, "alpha": 1}, "alpha": alpha_text},
   {"bbox": {"facecolor": "black", "pad": 2, "alpha": 1}, "alpha": alpha_text}
]
draw_arrow((0.23, 0.27), (0.37, 0.35), fig=fig, color=text_color, invert=True, radius=0.2)
fig_text(x=0.16, y=0.265, s='\n', fontsize=10, color=text_color, font=font, highlight_textprops=highlight_textprops, zorder=100)

# india plate
draw_arrow((0.69, 0.64), (0.64, 0.55), fig=fig, color=text_color, radius=0.4)
fig_text(x=0.7, y=0.66, s='\n', fontsize=10, color=text_color, font=font, highlight_textprops=highlight_textprops, zorder=100)

# philippine plate
draw_arrow((0.73, 0.22), (0.8, 0.51), fig=fig, color=text_color, radius=0.6)
fig_text(x=0.54, y=0.22, s='\n', fontsize=10, color=text_color, font=font, highlight_textprops=highlight_textprops, zorder=100)

plt.savefig('output/web-bubble-map-with-arrows.png', dpi=300, bbox_inches="tight")
plt.show()

Animations¶

Simple¶

In [39]:

Copied!

# libraries
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
# libraries
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation

In [40]:

Copied!





# initiate figure
fig, ax = plt.subplots(figsize=(10, 8), dpi=120)

def update(frame):
    ax.clear()
    ax.scatter(
      1+frame, 10+frame*10,
      s=600, alpha=0.5,
      edgecolors="black"
    )
    ax.set_xlim(0, 10)
    ax.set_ylim(0, 100)
    return fig, ax

ani = FuncAnimation(fig, update, frames=range(10))
ani.save("output/my_animation.gif", fps=5);
plt.close(fig) # Don't show plot directly.
# initiate figure
fig, ax = plt.subplots(figsize=(10, 8), dpi=120)

def update(frame):
    ax.clear()
    ax.scatter(
      1+frame, 10+frame*10,
      s=600, alpha=0.5,
      edgecolors="black"
    )
    ax.set_xlim(0, 10)
    ax.set_ylim(0, 100)
    return fig, ax

ani = FuncAnimation(fig, update, frames=range(10))
ani.save("output/my_animation.gif", fps=5);
plt.close(fig) # Don't show plot directly.

my_animation.gif:

simple_animation

More Complex¶

In [41]:

Copied!





import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import pandas as pd
import numpy as np

data = pd.read_csv('https://raw.githubusercontent.com/holtzy/The-Python-Graph-Gallery/master/static/data/gapminderData.csv')
data['continent'] = pd.Categorical(data['continent'])
data.head()
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import pandas as pd
import numpy as np

data = pd.read_csv('https://raw.githubusercontent.com/holtzy/The-Python-Graph-Gallery/master/static/data/gapminderData.csv')
data['continent'] = pd.Categorical(data['continent'])
data.head()

Out[41]:

	country	year	pop	continent	lifeExp	gdpPercap
0	Afghanistan	1952	8425333.0	Asia	28.801	779.445314
1	Afghanistan	1957	9240934.0	Asia	30.332	820.853030
2	Afghanistan	1962	10267083.0	Asia	31.997	853.100710
3	Afghanistan	1967	11537966.0	Asia	34.020	836.197138
4	Afghanistan	1972	13079460.0	Asia	36.088	739.981106

In [42]:

Copied!





interp_data = pd.DataFrame()

multiple = 10
for country in data['country'].unique():
   
   # prepare a temporary dataframe and subset
   temp_df = pd.DataFrame()
   country_df = data[data['country']==country]

   # interpolate the data
   years = np.linspace(country_df['year'].min(), country_df['year'].max(), len(country_df) * multiple-(multiple-1))
   pops = np.linspace(country_df['pop'].min(), country_df['pop'].max(), len(country_df) * multiple-(multiple-1))
   lifeExps = np.linspace(country_df['lifeExp'].min(), country_df['lifeExp'].max(), len(country_df) * multiple-(multiple-1))
   gdps = np.linspace(country_df['gdpPercap'].min(), country_df['gdpPercap'].max(), len(country_df) * multiple-(multiple-1))
   continents = [country_df['continent'].values[0]] * len(years)

   # add the data to the temporary dataframe
   temp_df['year'] = years
   temp_df['pop'] = pops
   temp_df['lifeExp'] = lifeExps
   temp_df['gdpPercap'] = gdps
   temp_df['continent'] = continents
   temp_df['country'] = country

   # append the temporary dataframe to the final dataframe
   interp_data = pd.concat([interp_data, temp_df])
   interp_data['continent'] = pd.Categorical(interp_data['continent'])

interp_data.head()
interp_data = pd.DataFrame()

multiple = 10
for country in data['country'].unique():
   
   # prepare a temporary dataframe and subset
   temp_df = pd.DataFrame()
   country_df = data[data['country']==country]

   # interpolate the data
   years = np.linspace(country_df['year'].min(), country_df['year'].max(), len(country_df) * multiple-(multiple-1))
   pops = np.linspace(country_df['pop'].min(), country_df['pop'].max(), len(country_df) * multiple-(multiple-1))
   lifeExps = np.linspace(country_df['lifeExp'].min(), country_df['lifeExp'].max(), len(country_df) * multiple-(multiple-1))
   gdps = np.linspace(country_df['gdpPercap'].min(), country_df['gdpPercap'].max(), len(country_df) * multiple-(multiple-1))
   continents = [country_df['continent'].values[0]] * len(years)

   # add the data to the temporary dataframe
   temp_df['year'] = years
   temp_df['pop'] = pops
   temp_df['lifeExp'] = lifeExps
   temp_df['gdpPercap'] = gdps
   temp_df['continent'] = continents
   temp_df['country'] = country

   # append the temporary dataframe to the final dataframe
   interp_data = pd.concat([interp_data, temp_df])
   interp_data['continent'] = pd.Categorical(interp_data['continent'])

interp_data.head()

Out[42]:

	year	pop	lifeExp	gdpPercap	continent	country
0	1952.0	8.425333e+06	28.801000	635.341351	Asia	Afghanistan
1	1952.5	8.638647e+06	28.937609	638.456534	Asia	Afghanistan
2	1953.0	8.851962e+06	29.074218	641.571716	Asia	Afghanistan
3	1953.5	9.065276e+06	29.210827	644.686899	Asia	Afghanistan
4	1954.0	9.278591e+06	29.347436	647.802081	Asia	Afghanistan

In [43]:

Copied!





fig, ax = plt.subplots(figsize=(10, 10), dpi=120)

def update(frame):
    # Clear the current plot to redraw
    ax.clear()
    
    # Filter data for the specific year
    yearly_data = interp_data.loc[interp_data.year == frame, :]

    # Scatter plot for that year
    ax.scatter(
        x=yearly_data['lifeExp'], 
        y=yearly_data['gdpPercap'], 
        s=yearly_data['pop']/100000,
        c=yearly_data['continent'].cat.codes, 
        cmap="Accent", 
        alpha=0.6, 
        edgecolors="white", 
        linewidths=2
    )

    # Updating titles and layout
    ax.set_title(f"Global Development in {round(frame)}")
    ax.set_xlabel("Life Expectancy")
    ax.set_ylabel("GDP per Capita")
    ax.set_yscale('log')
    ax.set_ylim(100, 100000)
    ax.set_xlim(20, 90)

    return ax

ani = FuncAnimation(fig, update, frames=interp_data['year'].unique())
ani.save('output/gapminder-2.gif', fps=10)
plt.close(fig)
fig, ax = plt.subplots(figsize=(10, 10), dpi=120)

def update(frame):
    # Clear the current plot to redraw
    ax.clear()
    
    # Filter data for the specific year
    yearly_data = interp_data.loc[interp_data.year == frame, :]

    # Scatter plot for that year
    ax.scatter(
        x=yearly_data['lifeExp'], 
        y=yearly_data['gdpPercap'], 
        s=yearly_data['pop']/100000,
        c=yearly_data['continent'].cat.codes, 
        cmap="Accent", 
        alpha=0.6, 
        edgecolors="white", 
        linewidths=2
    )

    # Updating titles and layout
    ax.set_title(f"Global Development in {round(frame)}")
    ax.set_xlabel("Life Expectancy")
    ax.set_ylabel("GDP per Capita")
    ax.set_yscale('log')
    ax.set_ylim(100, 100000)
    ax.set_xlim(20, 90)

    return ax

ani = FuncAnimation(fig, update, frames=interp_data['year'].unique())
ani.save('output/gapminder-2.gif', fps=10)
plt.close(fig)

gapminder-2.gif:

animation