In [1]:

Copied!

import pandas as pd
import pandas as pd

Parse CSV, trial & error¶

In [2]:

Copied!

pd.read_csv('output/SchPark01.csv')
pd.read_csv('output/SchPark01.csv')

Out[2]:

	2011/01/01;00:00;0.0;-0.6
0	2011/01/01;00:15;0.0;-0.4
1	2011/01/01;00:30;0.0;-0.5
2	2011/01/01;00:45;0.0;-0.5
3	2011/01/01;01:00;0.0;-0.7
4	2011/01/01;01:15;0.0;-0.6
...	...
35034	2011/12/31;22:45;0.0;7.9
35035	2011/12/31;23:00;0.0;7.9
35036	2011/12/31;23:15;0.0;8.4
35037	2011/12/31;23:30;0.0;8.5
35038	2011/12/31;23:45;0.0;8.1

35039 rows × 1 columns

In [3]:

Copied!

pd.read_csv('output/SchPark01.csv', sep=';')
pd.read_csv('output/SchPark01.csv', sep=';')

Out[3]:

	2011/01/01	00:00	0.0	-0.6
0	2011/01/01	00:15	0.0	-0.4
1	2011/01/01	00:30	0.0	-0.5
2	2011/01/01	00:45	0.0	-0.5
3	2011/01/01	01:00	0.0	-0.7
4	2011/01/01	01:15	0.0	-0.6
...	...	...	...	...
35034	2011/12/31	22:45	0.0	7.9
35035	2011/12/31	23:00	0.0	7.9
35036	2011/12/31	23:15	0.0	8.4
35037	2011/12/31	23:30	0.0	8.5
35038	2011/12/31	23:45	0.0	8.1

35039 rows × 4 columns

In [4]:

Copied!

pd.read_csv('output/SchPark01.csv', sep=';',
            names = ['date', 'time', 'ghi', 'ta'])
pd.read_csv('output/SchPark01.csv', sep=';',
            names = ['date', 'time', 'ghi', 'ta'])

Out[4]:

	date	time	ghi	ta
0	2011/01/01	00:00	0.0	-0.6
1	2011/01/01	00:15	0.0	-0.4
2	2011/01/01	00:30	0.0	-0.5
3	2011/01/01	00:45	0.0	-0.5
4	2011/01/01	01:00	0.0	-0.7
...	...	...	...	...
35035	2011/12/31	22:45	0.0	7.9
35036	2011/12/31	23:00	0.0	7.9
35037	2011/12/31	23:15	0.0	8.4
35038	2011/12/31	23:30	0.0	8.5
35039	2011/12/31	23:45	0.0	8.1

35040 rows × 4 columns

# NOTE: This will not work because of empty values: ghi and ta will be parsed as strings pd.read_csv('output/SchPark01.csv', sep=';', names = ['date', 'time', 'ghi', 'ta']).ghi.mean()

Parse CSV¶

In [5]:

Copied!





df = pd.read_csv('output/SchPark01.csv',
            sep = ';',
            na_values = ' ',
            names = ['date', 'time', 'ghi', 'ta'],
)

# https://stackoverflow.com/a/77983644/6419007
df['datetime'] = pd.to_datetime(df.pop('date')+' '+ df.pop('time'),
                                format="%Y/%m/%d %H:%M")
df = df.set_index('datetime')
df
df = pd.read_csv('output/SchPark01.csv',
            sep = ';',
            na_values = ' ',
            names = ['date', 'time', 'ghi', 'ta'],
)

# https://stackoverflow.com/a/77983644/6419007
df['datetime'] = pd.to_datetime(df.pop('date')+' '+ df.pop('time'),
                                format="%Y/%m/%d %H:%M")
df = df.set_index('datetime')
df

Out[5]:

	ghi	ta
datetime
2011-01-01 00:00:00	0.0	-0.6
2011-01-01 00:15:00	0.0	-0.4
2011-01-01 00:30:00	0.0	-0.5
2011-01-01 00:45:00	0.0	-0.5
2011-01-01 01:00:00	0.0	-0.7
...	...	...
2011-12-31 22:45:00	0.0	7.9
2011-12-31 23:00:00	0.0	7.9
2011-12-31 23:15:00	0.0	8.4
2011-12-31 23:30:00	0.0	8.5
2011-12-31 23:45:00	0.0	8.1

35040 rows × 2 columns

Plots¶

In [6]:

Copied!

import matplotlib.pyplot as plt
import matplotlib.pyplot as plt

In [7]:

Copied!

plt.rcParams['figure.figsize'] = (15, 8)
plt.rcParams['figure.figsize'] = (15, 8)

In [8]:

Copied!

df.plot();
df.plot();

No description has been provided for this image

In [9]:

Copied!

df.resample('ME').mean().plot();
df.resample('ME').mean().plot();

In [10]:

Copied!

import seaborn as sns
import seaborn as sns

In [11]:

Copied!

sns.heatmap(
        pd.pivot_table(df, values='ghi', index=df.index.time, columns=df.index.dayofyear),
        annot=False);
 sns.heatmap(
        pd.pivot_table(df, values='ghi', index=df.index.time, columns=df.index.dayofyear),
        annot=False);

In [12]:

Copied!

sns.heatmap(
        pd.pivot_table(df, values='ta', index=df.index.time, columns=df.index.dayofyear),
        annot=False);
 sns.heatmap(
        pd.pivot_table(df, values='ta', index=df.index.time, columns=df.index.dayofyear),
        annot=False);

In [13]:

Copied!

# https://stackoverflow.com/a/16345735/6419007
# NOTE: it used to be x.time, now it's apparently x.time()
df2 = df.groupby(lambda x: x.time()).ffill()
# https://stackoverflow.com/a/16345735/6419007
# NOTE: it used to be x.time, now it's apparently x.time()
df2 = df.groupby(lambda x: x.time()).ffill()

In [14]:

Copied!

df2
df2

Out[14]:

	ghi	ta
datetime
2011-01-01 00:00:00	0.0	-0.6
2011-01-01 00:15:00	0.0	-0.4
2011-01-01 00:30:00	0.0	-0.5
2011-01-01 00:45:00	0.0	-0.5
2011-01-01 01:00:00	0.0	-0.7
...	...	...
2011-12-31 22:45:00	0.0	7.9
2011-12-31 23:00:00	0.0	7.9
2011-12-31 23:15:00	0.0	8.4
2011-12-31 23:30:00	0.0	8.5
2011-12-31 23:45:00	0.0	8.1

35040 rows × 2 columns

In [15]:

Copied!

sns.heatmap(
        pd.pivot_table(df2, values='ghi', index=df2.index.time, columns=df2.index.dayofyear),
        annot=False);
 sns.heatmap(
        pd.pivot_table(df2, values='ghi', index=df2.index.time, columns=df2.index.dayofyear),
        annot=False);

In [16]:

Copied!

sns.heatmap(
        pd.pivot_table(df2, values='ta', index=df2.index.time, columns=df2.index.dayofyear),
        annot=False);
 sns.heatmap(
        pd.pivot_table(df2, values='ta', index=df2.index.time, columns=df2.index.dayofyear),
        annot=False);

Title, labels, units¶

In [17]:

Copied!





import matplotlib.dates as mdates
month_locator = mdates.MonthLocator(bymonthday=15)

ax = sns.heatmap(
    pd.pivot_table(df2, values='ta', index=df2.index.map(lambda x: x.strftime("%H:%M")),
                   columns=df2.index.dayofyear),
    annot=False,
    cbar_kws={'label': '', 'format': '%.0f °C'}
)

plt.title("Temperature in SchPark")
plt.xlabel("")
plt.ylabel("")

ax.xaxis.set_major_locator(month_locator)
ax.xaxis.set_major_formatter(mdates.DateFormatter("%B"))

plt.show()
import matplotlib.dates as mdates
month_locator = mdates.MonthLocator(bymonthday=15)

ax = sns.heatmap(
    pd.pivot_table(df2, values='ta', index=df2.index.map(lambda x: x.strftime("%H:%M")),
                   columns=df2.index.dayofyear),
    annot=False,
    cbar_kws={'label': '', 'format': '%.0f °C'}
)

plt.title("Temperature in SchPark")
plt.xlabel("")
plt.ylabel("")

ax.xaxis.set_major_locator(month_locator)
ax.xaxis.set_major_formatter(mdates.DateFormatter("%B"))

plt.show()

In [18]:

Copied!

df.ta.mean()
df.ta.mean()

Out[18]:

12.796018797659162

In [19]:

Copied!

df2.ta.mean()
df2.ta.mean()

Out[19]:

12.749155251141554

In [20]:

Copied!

df.sort_values('ghi', ascending=False).ghi.plot(use_index=False, title='Sorted irradiance');
df.sort_values('ghi', ascending=False).ghi.plot(use_index=False, title='Sorted irradiance');

In [21]:

Copied!

df.sort_values('ta', ascending=False).ta.plot(use_index=False, title = 'Sorted temperature');
df.sort_values('ta', ascending=False).ta.plot(use_index=False, title = 'Sorted temperature');

In [22]:

Copied!

df.plot(x='ghi', y='ta', xlabel='GHI [W/m²]', ylabel='Temperature', kind='scatter');
df.plot(x='ghi', y='ta', xlabel='GHI [W/m²]', ylabel='Temperature', kind='scatter');

Warmest day¶

In [23]:

Copied!

max_temp = df2.ta.max()
max_temp
max_temp = df2.ta.max()
max_temp

Out[23]:

38.3

In [24]:

Copied!

warmest_date = df2[df2.ta == df2.ta.max()].index.date[0]
warmest_date
warmest_date = df2[df2.ta == df2.ta.max()].index.date[0]
warmest_date

Out[24]:

datetime.date(2011, 8, 23)

In [25]:

Copied!

warmest_day = df2[df2.index.date == warmest_date]
warmest_day
warmest_day = df2[df2.index.date == warmest_date]
warmest_day

Out[25]:

	ghi	ta
datetime
2011-08-23 00:00:00	0.0	27.9
2011-08-23 00:15:00	0.0	27.7
2011-08-23 00:30:00	0.0	27.1
2011-08-23 00:45:00	0.0	26.7
2011-08-23 01:00:00	0.0	27.0
...	...	...
2011-08-23 22:45:00	0.0	28.2
2011-08-23 23:00:00	0.0	28.1
2011-08-23 23:15:00	0.0	28.0
2011-08-23 23:30:00	0.0	28.3
2011-08-23 23:45:00	0.0	28.4

96 rows × 2 columns

In [26]:

Copied!

warmest_day.ghi.plot(title='Irradiance during warmest day in SchPark, 2011');
warmest_day.ghi.plot(title='Irradiance during warmest day in SchPark, 2011');

In [27]:

Copied!

warmest_day.ta.plot(title='Temperature during warmest day in SchPark, 2011');
warmest_day.ta.plot(title='Temperature during warmest day in SchPark, 2011');

Montly temperature ridge lines¶

In [28]:

Copied!

df2.ta[df2.index.month==7].plot.hist(bins=50, title='Temperature distribution in July [°C]');
df2.ta[df2.index.month==7].plot.hist(bins=50, title='Temperature distribution in July [°C]');

In [29]:

Copied!





# getting necessary libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

def ridge_lines(weather, column_name, title, xaxis):
    df = weather.copy()
    sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})

    # we define a dictionary with months that we'll use later
    month_dict = {1: 'january',
                  2: 'february',
                  3: 'march',
                  4: 'april',
                  5: 'may',
                  6: 'june',
                  7: 'july',
                  8: 'august',
                  9: 'september',
                  10: 'october',
                  11: 'november',
                  12: 'december'}

    df['month'] = df.index.month.map(month_dict)

    month_mean_serie = df.groupby('month')[column_name].mean()
    df['mean_month'] = df['month'].map(month_mean_serie)
    
    # we generate a color palette with Seaborn.color_palette()
    pal = sns.color_palette(palette='coolwarm', n_colors=12)

    # in the sns.FacetGrid class, the 'hue' argument is the one that is the one that will be represented by colors with 'palette'
    g = sns.FacetGrid(df, row='month', hue='mean_month', aspect=15, height=0.75, palette=pal)

    # then we add the densities kdeplots for each month
    g.map(sns.kdeplot, column_name,
          bw_adjust=1, clip_on=False,
          fill=True, alpha=1, linewidth=1.5)

    # here we add a white line that represents the contour of each kdeplot
    g.map(sns.kdeplot, column_name, 
          bw_adjust=1, clip_on=False, 
          color="w", lw=2)

    # here we add a horizontal line for each plot
    g.map(plt.axhline, y=0,
          lw=2, clip_on=False)

    # we loop over the FacetGrid figure axes (g.axes.flat) and add the month as text with the right color
    # notice how ax.lines[-1].get_color() enables you to access the last line's color in each matplotlib.Axes
    for i, ax in enumerate(g.axes.flat):
        ax.text(-15, 0.02, month_dict[i+1],
                fontweight='bold', fontsize=15,
                color=ax.lines[-1].get_color())

    # we use matplotlib.Figure.subplots_adjust() function to get the subplots to overlap
    g.fig.subplots_adjust(hspace=-0.3)

    # eventually we remove axes titles, yticks and spines
    g.set_titles("")
    g.set_ylabels("")
    g.set(yticks=[])
    g.despine(bottom=True, left=True)

    plt.setp(ax.get_xticklabels(), fontsize=15, fontweight='bold')
    plt.xlabel(xaxis, fontweight='bold', fontsize=15)
    g.fig.suptitle(title,
                   ha='right',
                   fontsize=20,
                   fontweight=20)

    plt.show()
# getting necessary libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

def ridge_lines(weather, column_name, title, xaxis):
    df = weather.copy()
    sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})

    # we define a dictionary with months that we'll use later
    month_dict = {1: 'january',
                  2: 'february',
                  3: 'march',
                  4: 'april',
                  5: 'may',
                  6: 'june',
                  7: 'july',
                  8: 'august',
                  9: 'september',
                  10: 'october',
                  11: 'november',
                  12: 'december'}

    df['month'] = df.index.month.map(month_dict)

    month_mean_serie = df.groupby('month')[column_name].mean()
    df['mean_month'] = df['month'].map(month_mean_serie)
    
    # we generate a color palette with Seaborn.color_palette()
    pal = sns.color_palette(palette='coolwarm', n_colors=12)

    # in the sns.FacetGrid class, the 'hue' argument is the one that is the one that will be represented by colors with 'palette'
    g = sns.FacetGrid(df, row='month', hue='mean_month', aspect=15, height=0.75, palette=pal)

    # then we add the densities kdeplots for each month
    g.map(sns.kdeplot, column_name,
          bw_adjust=1, clip_on=False,
          fill=True, alpha=1, linewidth=1.5)

    # here we add a white line that represents the contour of each kdeplot
    g.map(sns.kdeplot, column_name, 
          bw_adjust=1, clip_on=False, 
          color="w", lw=2)

    # here we add a horizontal line for each plot
    g.map(plt.axhline, y=0,
          lw=2, clip_on=False)

    # we loop over the FacetGrid figure axes (g.axes.flat) and add the month as text with the right color
    # notice how ax.lines[-1].get_color() enables you to access the last line's color in each matplotlib.Axes
    for i, ax in enumerate(g.axes.flat):
        ax.text(-15, 0.02, month_dict[i+1],
                fontweight='bold', fontsize=15,
                color=ax.lines[-1].get_color())

    # we use matplotlib.Figure.subplots_adjust() function to get the subplots to overlap
    g.fig.subplots_adjust(hspace=-0.3)

    # eventually we remove axes titles, yticks and spines
    g.set_titles("")
    g.set_ylabels("")
    g.set(yticks=[])
    g.despine(bottom=True, left=True)

    plt.setp(ax.get_xticklabels(), fontsize=15, fontweight='bold')
    plt.xlabel(xaxis, fontweight='bold', fontsize=15)
    g.fig.suptitle(title,
                   ha='right',
                   fontsize=20,
                   fontweight=20)

    plt.show()

In [30]:

Copied!

ridge_lines(df, 'ta', 'Temperature distribution in Scharnhauser Park (2011)', 'Temperature in degree Celsius')
ridge_lines(df, 'ta', 'Temperature distribution in Scharnhauser Park (2011)', 'Temperature in degree Celsius')