In [1]:
Copied!
import pandas as pd
import pandas as pd
Parse CSV, trial & error¶
In [2]:
Copied!
pd.read_csv('output/SchPark01.csv')
pd.read_csv('output/SchPark01.csv')
Out[2]:
2011/01/01;00:00;0.0;-0.6 | |
---|---|
0 | 2011/01/01;00:15;0.0;-0.4 |
1 | 2011/01/01;00:30;0.0;-0.5 |
2 | 2011/01/01;00:45;0.0;-0.5 |
3 | 2011/01/01;01:00;0.0;-0.7 |
4 | 2011/01/01;01:15;0.0;-0.6 |
... | ... |
35034 | 2011/12/31;22:45;0.0;7.9 |
35035 | 2011/12/31;23:00;0.0;7.9 |
35036 | 2011/12/31;23:15;0.0;8.4 |
35037 | 2011/12/31;23:30;0.0;8.5 |
35038 | 2011/12/31;23:45;0.0;8.1 |
35039 rows × 1 columns
In [3]:
Copied!
pd.read_csv('output/SchPark01.csv', sep=';')
pd.read_csv('output/SchPark01.csv', sep=';')
Out[3]:
2011/01/01 | 00:00 | 0.0 | -0.6 | |
---|---|---|---|---|
0 | 2011/01/01 | 00:15 | 0.0 | -0.4 |
1 | 2011/01/01 | 00:30 | 0.0 | -0.5 |
2 | 2011/01/01 | 00:45 | 0.0 | -0.5 |
3 | 2011/01/01 | 01:00 | 0.0 | -0.7 |
4 | 2011/01/01 | 01:15 | 0.0 | -0.6 |
... | ... | ... | ... | ... |
35034 | 2011/12/31 | 22:45 | 0.0 | 7.9 |
35035 | 2011/12/31 | 23:00 | 0.0 | 7.9 |
35036 | 2011/12/31 | 23:15 | 0.0 | 8.4 |
35037 | 2011/12/31 | 23:30 | 0.0 | 8.5 |
35038 | 2011/12/31 | 23:45 | 0.0 | 8.1 |
35039 rows × 4 columns
In [4]:
Copied!
pd.read_csv('output/SchPark01.csv', sep=';',
names = ['date', 'time', 'ghi', 'ta'])
pd.read_csv('output/SchPark01.csv', sep=';',
names = ['date', 'time', 'ghi', 'ta'])
Out[4]:
date | time | ghi | ta | |
---|---|---|---|---|
0 | 2011/01/01 | 00:00 | 0.0 | -0.6 |
1 | 2011/01/01 | 00:15 | 0.0 | -0.4 |
2 | 2011/01/01 | 00:30 | 0.0 | -0.5 |
3 | 2011/01/01 | 00:45 | 0.0 | -0.5 |
4 | 2011/01/01 | 01:00 | 0.0 | -0.7 |
... | ... | ... | ... | ... |
35035 | 2011/12/31 | 22:45 | 0.0 | 7.9 |
35036 | 2011/12/31 | 23:00 | 0.0 | 7.9 |
35037 | 2011/12/31 | 23:15 | 0.0 | 8.4 |
35038 | 2011/12/31 | 23:30 | 0.0 | 8.5 |
35039 | 2011/12/31 | 23:45 | 0.0 | 8.1 |
35040 rows × 4 columns
Parse CSV¶
In [5]:
Copied!
df = pd.read_csv('output/SchPark01.csv',
sep = ';',
na_values = ' ',
names = ['date', 'time', 'ghi', 'ta'],
)
# https://stackoverflow.com/a/77983644/6419007
df['datetime'] = pd.to_datetime(df.pop('date')+' '+ df.pop('time'),
format="%Y/%m/%d %H:%M")
df = df.set_index('datetime')
df
df = pd.read_csv('output/SchPark01.csv',
sep = ';',
na_values = ' ',
names = ['date', 'time', 'ghi', 'ta'],
)
# https://stackoverflow.com/a/77983644/6419007
df['datetime'] = pd.to_datetime(df.pop('date')+' '+ df.pop('time'),
format="%Y/%m/%d %H:%M")
df = df.set_index('datetime')
df
Out[5]:
ghi | ta | |
---|---|---|
datetime | ||
2011-01-01 00:00:00 | 0.0 | -0.6 |
2011-01-01 00:15:00 | 0.0 | -0.4 |
2011-01-01 00:30:00 | 0.0 | -0.5 |
2011-01-01 00:45:00 | 0.0 | -0.5 |
2011-01-01 01:00:00 | 0.0 | -0.7 |
... | ... | ... |
2011-12-31 22:45:00 | 0.0 | 7.9 |
2011-12-31 23:00:00 | 0.0 | 7.9 |
2011-12-31 23:15:00 | 0.0 | 8.4 |
2011-12-31 23:30:00 | 0.0 | 8.5 |
2011-12-31 23:45:00 | 0.0 | 8.1 |
35040 rows × 2 columns
Plots¶
In [6]:
Copied!
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
In [7]:
Copied!
plt.rcParams['figure.figsize'] = (15, 8)
plt.rcParams['figure.figsize'] = (15, 8)
In [8]:
Copied!
df.plot();
df.plot();
In [9]:
Copied!
df.resample('ME').mean().plot();
df.resample('ME').mean().plot();
In [10]:
Copied!
import seaborn as sns
import seaborn as sns
In [11]:
Copied!
sns.heatmap(
pd.pivot_table(df, values='ghi', index=df.index.time, columns=df.index.dayofyear),
annot=False);
sns.heatmap(
pd.pivot_table(df, values='ghi', index=df.index.time, columns=df.index.dayofyear),
annot=False);
In [12]:
Copied!
sns.heatmap(
pd.pivot_table(df, values='ta', index=df.index.time, columns=df.index.dayofyear),
annot=False);
sns.heatmap(
pd.pivot_table(df, values='ta', index=df.index.time, columns=df.index.dayofyear),
annot=False);
In [13]:
Copied!
# https://stackoverflow.com/a/16345735/6419007
# NOTE: it used to be x.time, now it's apparently x.time()
df2 = df.groupby(lambda x: x.time()).ffill()
# https://stackoverflow.com/a/16345735/6419007
# NOTE: it used to be x.time, now it's apparently x.time()
df2 = df.groupby(lambda x: x.time()).ffill()
In [14]:
Copied!
df2
df2
Out[14]:
ghi | ta | |
---|---|---|
datetime | ||
2011-01-01 00:00:00 | 0.0 | -0.6 |
2011-01-01 00:15:00 | 0.0 | -0.4 |
2011-01-01 00:30:00 | 0.0 | -0.5 |
2011-01-01 00:45:00 | 0.0 | -0.5 |
2011-01-01 01:00:00 | 0.0 | -0.7 |
... | ... | ... |
2011-12-31 22:45:00 | 0.0 | 7.9 |
2011-12-31 23:00:00 | 0.0 | 7.9 |
2011-12-31 23:15:00 | 0.0 | 8.4 |
2011-12-31 23:30:00 | 0.0 | 8.5 |
2011-12-31 23:45:00 | 0.0 | 8.1 |
35040 rows × 2 columns
In [15]:
Copied!
sns.heatmap(
pd.pivot_table(df2, values='ghi', index=df2.index.time, columns=df2.index.dayofyear),
annot=False);
sns.heatmap(
pd.pivot_table(df2, values='ghi', index=df2.index.time, columns=df2.index.dayofyear),
annot=False);
In [16]:
Copied!
sns.heatmap(
pd.pivot_table(df2, values='ta', index=df2.index.time, columns=df2.index.dayofyear),
annot=False);
sns.heatmap(
pd.pivot_table(df2, values='ta', index=df2.index.time, columns=df2.index.dayofyear),
annot=False);
Title, labels, units¶
In [17]:
Copied!
import matplotlib.dates as mdates
month_locator = mdates.MonthLocator(bymonthday=15)
ax = sns.heatmap(
pd.pivot_table(df2, values='ta', index=df2.index.map(lambda x: x.strftime("%H:%M")),
columns=df2.index.dayofyear),
annot=False,
cbar_kws={'label': '', 'format': '%.0f °C'}
)
plt.title("Temperature in SchPark")
plt.xlabel("")
plt.ylabel("")
ax.xaxis.set_major_locator(month_locator)
ax.xaxis.set_major_formatter(mdates.DateFormatter("%B"))
plt.show()
import matplotlib.dates as mdates
month_locator = mdates.MonthLocator(bymonthday=15)
ax = sns.heatmap(
pd.pivot_table(df2, values='ta', index=df2.index.map(lambda x: x.strftime("%H:%M")),
columns=df2.index.dayofyear),
annot=False,
cbar_kws={'label': '', 'format': '%.0f °C'}
)
plt.title("Temperature in SchPark")
plt.xlabel("")
plt.ylabel("")
ax.xaxis.set_major_locator(month_locator)
ax.xaxis.set_major_formatter(mdates.DateFormatter("%B"))
plt.show()
In [18]:
Copied!
df.ta.mean()
df.ta.mean()
Out[18]:
12.796018797659162
In [19]:
Copied!
df2.ta.mean()
df2.ta.mean()
Out[19]:
12.749155251141554
In [20]:
Copied!
df.sort_values('ghi', ascending=False).ghi.plot(use_index=False, title='Sorted irradiance');
df.sort_values('ghi', ascending=False).ghi.plot(use_index=False, title='Sorted irradiance');
In [21]:
Copied!
df.sort_values('ta', ascending=False).ta.plot(use_index=False, title = 'Sorted temperature');
df.sort_values('ta', ascending=False).ta.plot(use_index=False, title = 'Sorted temperature');
In [22]:
Copied!
df.plot(x='ghi', y='ta', xlabel='GHI [W/m²]', ylabel='Temperature', kind='scatter');
df.plot(x='ghi', y='ta', xlabel='GHI [W/m²]', ylabel='Temperature', kind='scatter');
Warmest day¶
In [23]:
Copied!
max_temp = df2.ta.max()
max_temp
max_temp = df2.ta.max()
max_temp
Out[23]:
38.3
In [24]:
Copied!
warmest_date = df2[df2.ta == df2.ta.max()].index.date[0]
warmest_date
warmest_date = df2[df2.ta == df2.ta.max()].index.date[0]
warmest_date
Out[24]:
datetime.date(2011, 8, 23)
In [25]:
Copied!
warmest_day = df2[df2.index.date == warmest_date]
warmest_day
warmest_day = df2[df2.index.date == warmest_date]
warmest_day
Out[25]:
ghi | ta | |
---|---|---|
datetime | ||
2011-08-23 00:00:00 | 0.0 | 27.9 |
2011-08-23 00:15:00 | 0.0 | 27.7 |
2011-08-23 00:30:00 | 0.0 | 27.1 |
2011-08-23 00:45:00 | 0.0 | 26.7 |
2011-08-23 01:00:00 | 0.0 | 27.0 |
... | ... | ... |
2011-08-23 22:45:00 | 0.0 | 28.2 |
2011-08-23 23:00:00 | 0.0 | 28.1 |
2011-08-23 23:15:00 | 0.0 | 28.0 |
2011-08-23 23:30:00 | 0.0 | 28.3 |
2011-08-23 23:45:00 | 0.0 | 28.4 |
96 rows × 2 columns
In [26]:
Copied!
warmest_day.ghi.plot(title='Irradiance during warmest day in SchPark, 2011');
warmest_day.ghi.plot(title='Irradiance during warmest day in SchPark, 2011');
In [27]:
Copied!
warmest_day.ta.plot(title='Temperature during warmest day in SchPark, 2011');
warmest_day.ta.plot(title='Temperature during warmest day in SchPark, 2011');
Montly temperature ridge lines¶
In [28]:
Copied!
df2.ta[df2.index.month==7].plot.hist(bins=50, title='Temperature distribution in July [°C]');
df2.ta[df2.index.month==7].plot.hist(bins=50, title='Temperature distribution in July [°C]');
In [29]:
Copied!
# getting necessary libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
def ridge_lines(weather, column_name, title, xaxis):
df = weather.copy()
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# we define a dictionary with months that we'll use later
month_dict = {1: 'january',
2: 'february',
3: 'march',
4: 'april',
5: 'may',
6: 'june',
7: 'july',
8: 'august',
9: 'september',
10: 'october',
11: 'november',
12: 'december'}
df['month'] = df.index.month.map(month_dict)
month_mean_serie = df.groupby('month')[column_name].mean()
df['mean_month'] = df['month'].map(month_mean_serie)
# we generate a color palette with Seaborn.color_palette()
pal = sns.color_palette(palette='coolwarm', n_colors=12)
# in the sns.FacetGrid class, the 'hue' argument is the one that is the one that will be represented by colors with 'palette'
g = sns.FacetGrid(df, row='month', hue='mean_month', aspect=15, height=0.75, palette=pal)
# then we add the densities kdeplots for each month
g.map(sns.kdeplot, column_name,
bw_adjust=1, clip_on=False,
fill=True, alpha=1, linewidth=1.5)
# here we add a white line that represents the contour of each kdeplot
g.map(sns.kdeplot, column_name,
bw_adjust=1, clip_on=False,
color="w", lw=2)
# here we add a horizontal line for each plot
g.map(plt.axhline, y=0,
lw=2, clip_on=False)
# we loop over the FacetGrid figure axes (g.axes.flat) and add the month as text with the right color
# notice how ax.lines[-1].get_color() enables you to access the last line's color in each matplotlib.Axes
for i, ax in enumerate(g.axes.flat):
ax.text(-15, 0.02, month_dict[i+1],
fontweight='bold', fontsize=15,
color=ax.lines[-1].get_color())
# we use matplotlib.Figure.subplots_adjust() function to get the subplots to overlap
g.fig.subplots_adjust(hspace=-0.3)
# eventually we remove axes titles, yticks and spines
g.set_titles("")
g.set_ylabels("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
plt.setp(ax.get_xticklabels(), fontsize=15, fontweight='bold')
plt.xlabel(xaxis, fontweight='bold', fontsize=15)
g.fig.suptitle(title,
ha='right',
fontsize=20,
fontweight=20)
plt.show()
# getting necessary libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
def ridge_lines(weather, column_name, title, xaxis):
df = weather.copy()
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# we define a dictionary with months that we'll use later
month_dict = {1: 'january',
2: 'february',
3: 'march',
4: 'april',
5: 'may',
6: 'june',
7: 'july',
8: 'august',
9: 'september',
10: 'october',
11: 'november',
12: 'december'}
df['month'] = df.index.month.map(month_dict)
month_mean_serie = df.groupby('month')[column_name].mean()
df['mean_month'] = df['month'].map(month_mean_serie)
# we generate a color palette with Seaborn.color_palette()
pal = sns.color_palette(palette='coolwarm', n_colors=12)
# in the sns.FacetGrid class, the 'hue' argument is the one that is the one that will be represented by colors with 'palette'
g = sns.FacetGrid(df, row='month', hue='mean_month', aspect=15, height=0.75, palette=pal)
# then we add the densities kdeplots for each month
g.map(sns.kdeplot, column_name,
bw_adjust=1, clip_on=False,
fill=True, alpha=1, linewidth=1.5)
# here we add a white line that represents the contour of each kdeplot
g.map(sns.kdeplot, column_name,
bw_adjust=1, clip_on=False,
color="w", lw=2)
# here we add a horizontal line for each plot
g.map(plt.axhline, y=0,
lw=2, clip_on=False)
# we loop over the FacetGrid figure axes (g.axes.flat) and add the month as text with the right color
# notice how ax.lines[-1].get_color() enables you to access the last line's color in each matplotlib.Axes
for i, ax in enumerate(g.axes.flat):
ax.text(-15, 0.02, month_dict[i+1],
fontweight='bold', fontsize=15,
color=ax.lines[-1].get_color())
# we use matplotlib.Figure.subplots_adjust() function to get the subplots to overlap
g.fig.subplots_adjust(hspace=-0.3)
# eventually we remove axes titles, yticks and spines
g.set_titles("")
g.set_ylabels("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
plt.setp(ax.get_xticklabels(), fontsize=15, fontweight='bold')
plt.xlabel(xaxis, fontweight='bold', fontsize=15)
g.fig.suptitle(title,
ha='right',
fontsize=20,
fontweight=20)
plt.show()
In [30]:
Copied!
ridge_lines(df, 'ta', 'Temperature distribution in Scharnhauser Park (2011)', 'Temperature in degree Celsius')
ridge_lines(df, 'ta', 'Temperature distribution in Scharnhauser Park (2011)', 'Temperature in degree Celsius')