Source code for tell.visualization

import os

import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

from glob import glob
from mpl_toolkits.axes_grid1 import make_axes_locatable


[docs]def plot_ba_service_territory(ba_to_plot: str, year_to_plot: str, data_input_dir: str, image_output_dir: str, image_resolution: int, save_images=False): """Plot maps of the service territory for a given BA in a given year :param ba_to_plot: Code for the BA you want to plot :type ba_to_plot: str :param year_to_plot: Year you want to plot (valid 2015-2019) :type year_to_plot: str :param data_input_dir: Top-level data directory for TELL :type data_input_dir: str :param image_output_dir: Directory to store the images :type image_output_dir: str :param image_resolution: Resolution at which you want to save the images in DPI :type image_resolution: int :param save_images: Set to True if you want to save the images after they're generated :type save_images: bool """ # Set the input directories based on the 'data_input_dir' variable: shapefile_input_dir = os.path.join(data_input_dir, r'tell_raw_data', r'County_Shapefiles') population_input_dir = os.path.join(data_input_dir, r'tell_raw_data', r'Population') ba_service_territory_input_dir = os.path.join(data_input_dir, r'tell_quickstarter_data', r'outputs', r'ba_service_territory') # Read in the county shapefile and reassign the 'FIPS' variable as integers: counties_df = gpd.read_file(os.path.join(shapefile_input_dir, r'tl_2020_us_county.shp')).rename(columns={'GEOID': 'County_FIPS'}) counties_df['County_FIPS'] = counties_df['County_FIPS'].astype(int) # Read in county populations file: population_df = pd.read_csv(os.path.join(population_input_dir, r'county_populations_2000_to_2020.csv')) # Keep only the columns we need: population_df = population_df[['county_FIPS', ('pop_' + year_to_plot)]].copy(deep=False) # Rename the columns: population_df.rename(columns={"county_FIPS": "County_FIPS", ('pop_' + year_to_plot): "Population"}, inplace=True) # Read in the BA mapping file: ba_mapping_df = pd.read_csv((os.path.join(ba_service_territory_input_dir, f'ba_service_territory_{str(year_to_plot)}.csv')), index_col=None, header=0) # Merge the ba_mapping_df and population_df together using county FIPS codes to join them: ba_mapping_df = ba_mapping_df.merge(population_df, on='County_FIPS', how='left') # Merge the ba_mapping_df and counties_df together using county FIPS codes to join them: counties_df = counties_df.merge(ba_mapping_df, on='County_FIPS', how='left') # Subset to only the BA you want to plot: counties_subset_df = counties_df.loc[counties_df['BA_Code'] == ba_to_plot] # Create the figure: fig, ax = plt.subplots(1, 1, figsize=(25, 10)) divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="3%", pad=0.1) ax1 = counties_subset_df.plot(column='Population', cmap='GnBu', ax=ax, cax=cax, edgecolor='grey', linewidth=0.5, legend=True, legend_kwds={'label': ('County Population in ' + year_to_plot), 'orientation': 'vertical'}) ax1.set_title((ba_to_plot + ' Service Territory in ' + year_to_plot)) # If the "save_images" flag is set to true then save the plot to a .png file: if save_images == True: filename = (ba_to_plot + '_Service_Territory_' + year_to_plot + '.png') plt.savefig(os.path.join(image_output_dir, filename), dpi=image_resolution, bbox_inches='tight')
[docs]def plot_mlp_summary_statistics(validation_df, image_output_dir: str, image_resolution: int, save_images=False): """Plot the summary statistics of the MLP evaluation data across BAs :param validation_df: Validation dataframe produced by the batch training of MLP models for all BAs :type validation_dft: df :param image_output_dir: Directory to store the images :type image_output_dir: str :param image_resolution: Resolution at which you want to save the images in DPI :type image_resolution: int :param save_images: Set to True if you want to save the images after they're generated :type save_images: bool """ # Create an x-axis the length of the dataframe to be used in plotting: x_axis = np.arange(len(validation_df)) # Make the plot: plt.figure(figsize=(25, 10)) plt.subplot(221) plt.bar(x_axis, validation_df.sort_values(by=['R2'], ascending=True)['R2'], 0.75) plt.xticks(x_axis, validation_df.sort_values(by=['R2'], ascending=True)['BA'], rotation=90) plt.grid() plt.xlabel('Balancing Authority') plt.ylabel('R2 Score') plt.title('Coefficient of Determination') plt.subplot(222) plt.bar(x_axis, validation_df.sort_values(by=['MAPE'], ascending=True)['MAPE'], 0.75) plt.xticks(x_axis, validation_df.sort_values(by=['MAPE'], ascending=True)['BA'], rotation=90) plt.grid() plt.xlabel('Balancing Authority') plt.ylabel('MAPE') plt.title('Mean Absolute Percentage Error') plt.subplot(223) plt.bar(x_axis, validation_df.sort_values(by=['RMS_ABS'], ascending=True)['RMS_ABS'], 0.75) plt.xticks(x_axis, validation_df.sort_values(by=['RMS_ABS'], ascending=True)['BA'], rotation=90) plt.grid() plt.xlabel('Balancing Authority') plt.ylabel('Absolute RMS Error [MWh]') plt.title('Absolute Root-Mean-Squared Error') plt.subplot(224) plt.bar(x_axis, validation_df.sort_values(by=['RMS_NORM'], ascending=True)['RMS_NORM'], 0.75) plt.xticks(x_axis, validation_df.sort_values(by=['RMS_NORM'], ascending=True)['BA'], rotation=90) plt.grid() plt.xlabel('Balancing Authority') plt.ylabel('Normalized RMS Error') plt.title('Normalized Root-Mean-Squared Error') plt.subplots_adjust(wspace=0.15, hspace=0.4) # If the "save_images" flag is set to true then save the plot to a .png file: if save_images: plt.savefig(os.path.join(image_output_dir, 'MLP_Summary_Statistics.png'), dpi=image_resolution, bbox_inches='tight', facecolor='white')
[docs]def plot_mlp_errors_vs_load(prediction_df, validation_df, image_output_dir: str, image_resolution: int, save_images=False): """Plot the summary statistics of the MLP evaluation data as a function of mean load :param prediction_df: Prediction dataframe produced by the batch training of MLP models for all BAs :type prediction_df: df :param validation_df: Validation dataframe produced by the batch training of MLP models for all BAs :type validation_df: df :param image_output_dir: Directory to store the images :type image_output_dir: str :param image_resolution: Resolution at which you want to save the images in DPI :type image_resolution: int :param save_images: Set to True if you want to save the images after they're generated :type save_images: bool """ # Compute the mean hourly load for each BA: prediction_df['Mean_Load_MWh'] = prediction_df.groupby('region')['predictions'].transform('mean') # Rename the region variable: prediction_df.rename(columns={'region': 'BA'}, inplace=True) # Keep on the variables we need: mean_load_df = prediction_df[['BA', 'Mean_Load_MWh']].copy().drop_duplicates() # Merge the mean load data into the validation dataframe: validation_df = validation_df.merge(mean_load_df, on=['BA']) # Make the plot: plt.figure(figsize=(25, 10)) plt.subplot(221) plt.scatter(validation_df['Mean_Load_MWh'], validation_df['R2'], s=15, c='blue') plt.grid() plt.xlabel('Mean Hourly Load [MWh]') plt.ylabel('R2 Score') plt.title('Coefficient of Determination') plt.subplot(222) plt.scatter(validation_df['Mean_Load_MWh'], validation_df['MAPE'], s=15, c='blue') plt.grid() plt.xlabel('Mean Hourly Load [MWh]') plt.ylabel('MAPE') plt.title('Mean Absolute Percentage Error') plt.subplot(223) plt.scatter(validation_df['Mean_Load_MWh'], validation_df['RMS_ABS'], s=15, c='blue') plt.grid() plt.xlabel('Mean Hourly Load [MWh]') plt.ylabel('Absolute RMS Error [MWh]') plt.title('Absolute Root-Mean-Squared Error') plt.subplot(224) plt.scatter(validation_df['Mean_Load_MWh'], validation_df['RMS_NORM'], s=15, c='blue') plt.grid() plt.xlabel('Mean Hourly Load [MWh]') plt.ylabel('Normalized RMS Error') plt.title('Normalized Root-Mean-Squared Error') plt.subplots_adjust(wspace=0.15, hspace=0.4) # If the "save_images" flag is set to true then save the plot to a .png file: if save_images: plt.savefig(os.path.join(image_output_dir, 'MLP_Summary_Statistics_vs_Load.png'), dpi=image_resolution, bbox_inches='tight', facecolor='white') return validation_df
[docs]def plot_mlp_ba_time_series(prediction_df, ba_to_plot: str, image_output_dir: str, image_resolution: int, save_images=False): """Plot the performance metrics for an individual BA :param prediction_df: Prediction dataframe produced by the batch training of MLP models for all BAs :type prediction_df: df :param ba_to_plot: Code for the BA you want to plot :type ba_to_plot: str :param image_output_dir: Directory to store the images :type image_output_dir: str :param image_resolution: Resolution at which you want to save the images in DPI :type image_resolution: int :param save_images: Set to True if you want to save the images after they're generated :type save_images: bool """ # Rename the region variable: prediction_df.rename(columns={'region': 'BA'}, inplace=True) # Subset to just the data for the BA you want to plot subset_df = prediction_df[prediction_df['BA'].isin([ba_to_plot])] one_to_one = np.arange(0, 200000, 1000) # Make the plot: plt.figure(figsize=(25, 10)) plt.subplot(211) plt.plot(subset_df['datetime'], subset_df['ground_truth'], 'r', linewidth=0.5, label='Observed') plt.plot(subset_df['datetime'], subset_df['predictions'], 'b', linewidth=0.5, label='Predicted') plt.xlim(subset_df['datetime'].dropna().min(), subset_df['datetime'].dropna().max()) plt.legend() plt.xlabel('Time') plt.ylabel('Demand [MWh]') plt.title('Hourly Demand Time Series in ' + ba_to_plot) plt.subplot(223) plt.hist(subset_df['ground_truth'], bins=40, density=True, histtype='step', edgecolor = 'r', label='Observed', linewidth=3) plt.hist(subset_df['predictions'], bins=40, density=True, histtype='step', edgecolor = 'b', label='Predicted', linewidth=3) plt.legend() plt.xlabel('Demand [MWh]') plt.ylabel('Frequency') plt.title('Hourly Demand Distribution in ' + ba_to_plot) plt.subplot(224) plt.scatter(subset_df['ground_truth'], subset_df['predictions'], s=15, c='blue', label='Hourly Sample') plt.plot(one_to_one,one_to_one,'k', linewidth=3, label = '1:1') plt.plot(one_to_one, (one_to_one*1.1), 'k', linewidth=3, linestyle='--', label = '1:1 - 10%') plt.plot(one_to_one, (one_to_one*0.9), 'k', linewidth=3, linestyle='--', label = '1:1 + 10%') plt.legend() plt.xlim(0.98*subset_df[['ground_truth', 'predictions']].min().min(), 1.02*subset_df[['ground_truth', 'predictions']].max().max()) plt.ylim(0.98*subset_df[['ground_truth', 'predictions']].min().min(), 1.02*subset_df[['ground_truth', 'predictions']].max().max()) plt.xlabel('Observed Hourly Demand [MWh]') plt.ylabel('Predicted Hourly Demand [MWh]') plt.title('Hourly Demand Relationship in ' + ba_to_plot) plt.subplots_adjust(wspace=0.15, hspace=0.4) # If the "save_images" flag is set to true then save the plot to a .png file: if save_images: plt.savefig(os.path.join(image_output_dir, ba_to_plot + '_Time_Series.png'), dpi=image_resolution, bbox_inches='tight', facecolor='white')
[docs]def plot_mlp_ba_peak_week(prediction_df, ba_to_plot: str, image_output_dir: str, image_resolution: int, save_images=False): """Plot the time-series of load during the peak week of the year for a given BA. :param prediction_df: Prediction dataframe produced by the batch training of MLP models for all BAs :type prediction_df: df :param ba_to_plot: Code for the BA you want to plot :type ba_to_plot: str :param image_output_dir: Directory to store the images :type image_output_dir: str :param image_resolution: Resolution at which you want to save the images in DPI :type image_resolution: int :param save_images: Set to True if you want to save the images after they're generated :type save_images: bool """ # Rename the region variable: prediction_df.rename(columns={'region': 'BA'}, inplace=True) # Subset to just the data for the BA you want to plot subset_df = prediction_df[prediction_df['BA'].isin([ba_to_plot])].copy() # Smooth the predictions using exponentially-weighted windows: subset_df['Rolling_Mean'] = subset_df['predictions'].ewm(span=168).mean() # Find the index of the maximum value of the rolling mean: index = subset_df['Rolling_Mean'].idxmax(axis=0) if index > 84: start = (index -84) else: start = 0 if index < (len(subset_df)-84): end = (index + 84) else: end = len(subset_df) peak_df = subset_df[start:end] # Make the plot: plt.figure(figsize=(25, 10)) plt.plot(peak_df['datetime'], peak_df['ground_truth'], 'r', linewidth=3, label='Observed') plt.plot(peak_df['datetime'], peak_df['predictions'], 'b', linewidth=3, label='Predicted') plt.xlim(peak_df['datetime'].dropna().min(), peak_df['datetime'].dropna().max()) plt.legend() plt.xlabel('Time') plt.ylabel('Demand [MWh]') plt.title('Peak Demand Week in ' + ba_to_plot) # If the "save_images" flag is set to true then save the plot to a .png file: if save_images: plt.savefig(os.path.join(image_output_dir, ba_to_plot + '_Peak_Week.png'), dpi=image_resolution, bbox_inches='tight', facecolor='white')
[docs]def plot_state_scaling_factors(year_to_plot: str, scenario_to_plot: str, data_input_dir: str, image_output_dir: str, image_resolution: int, save_images=False): """Plot the scaling factor that force TELL annual total state loads to agree with GCAM-USA :param year_to_plot: Year you want to plot (valid 2039, 2059, 2079, 2099) :type year_to_plot: str :param scenario_to_plot: Scenario you want to plot :type scenario_to_plot: str :param data_input_dir: Top-level data directory for TELL :type data_input_dir: str :param image_output_dir: Directory to store the images :type image_output_dir: str :param image_resolution: Resolution at which you want to save the images in DPI :type image_resolution: int :param save_images: Set to True if you want to save the images after they're generated :type save_images: bool """ # Set the data input directories for the various variables you need: tell_data_input_dir = os.path.join(data_input_dir, r'tell_quickstarter_data', r'outputs', r'tell_output', scenario_to_plot, year_to_plot) # Read in the states shapefile and change the geolocation variable name to state FIPS code: states_df = gpd.read_file(os.path.join(data_input_dir, r'tell_raw_data', r'State_Shapefiles', r'tl_2020_us_state.shp')).rename(columns={'GEOID': 'State_FIPS'}) # Convert the state FIPS code to an integer and multiply it by 1000: states_df['State_FIPS'] = states_df['State_FIPS'].astype(int) * 1000 # Read in the 'TELL_State_Summary_Data' .csv file and reassign the 'State_FIPS' code as an integer: state_summary_df = pd.read_csv((tell_data_input_dir + '/' + 'TELL_State_Summary_Data_' + year_to_plot + '.csv'), dtype={'State_FIPS': int}) # Merge the two dataframes together using state FIPS codes to join them: states_df = states_df.merge(state_summary_df, on='State_FIPS', how='left') # Make the plot: fig, ax = plt.subplots(1, 1, figsize=(25, 10)) divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="3%", pad=0.1) ax1 = states_df.plot(column='Scaling_Factor', cmap='RdBu_r', ax=ax, cax=cax, edgecolor='grey', vmin=0.5, vmax=1.5, linewidth=0.5, legend=True, legend_kwds={'label': 'TELL Scaling Factor', 'orientation': 'vertical'}) ax1.set_title(('State-Level Scaling Factors in ' + year_to_plot)) # If the "save_images" flag is set to true then save the plot to a .png file: if save_images == True: filename = ('TELL_State_Scaling_Factors_' + year_to_plot + '.png') plt.savefig(os.path.join(image_output_dir, filename), dpi=image_resolution, bbox_inches='tight')
[docs]def plot_state_annual_total_loads(year_to_plot: str, scenario_to_plot: str, data_input_dir: str, image_output_dir: str, image_resolution: int, save_images=False): """Plot annual total loads from both GCAM-USA and TELL :param year_to_plot: Year you want to plot (valid 2039, 2059, 2079, 2099) :type year_to_plot: str :param scenario_to_plot: Scenario you want to plot :type scenario_to_plot: str :param data_input_dir: Top-level data directory for TELL :type data_input_dir: str :param image_output_dir: Directory to store the images :type image_output_dir: str :param image_resolution: Resolution at which you want to save the images in DPI :type image_resolution: int :param save_images: Set to True if you want to save the images after they're generated :type save_images: bool """ # Set the data input directories for the various variables you need: tell_data_input_dir = os.path.join(data_input_dir, r'tell_quickstarter_data', r'outputs', r'tell_output', scenario_to_plot, year_to_plot) # Read in the 'TELL_State_Summary_Data' .csv file and reassign the 'State_FIPS' code as an integer: state_summary_df = pd.read_csv((tell_data_input_dir + '/' + 'TELL_State_Summary_Data_' + year_to_plot + '.csv'), dtype={'State_FIPS': int}) # Create an x-axis the length of the dataframe to be used in plotting: x_axis = np.arange(len(state_summary_df)) # Make the plot: plt.figure(figsize=(25, 10)) plt.bar(x_axis - 0.2, state_summary_df['GCAM_USA_Load_TWh'], 0.4, label='GCAM-USA Loads') plt.bar(x_axis + 0.2, state_summary_df['Raw_TELL_Load_TWh'], 0.4, label='Unscaled TELL Loads') plt.xticks(x_axis, state_summary_df['State_Name']) plt.xticks(rotation=90) plt.legend() plt.ylabel("Annual Total Load [TWh]") plt.title(('Annual Total Loads from GCAM-USA and TELL in ' + year_to_plot)) # If the "save_images" flag is set to true then save the plot to a .png file: if save_images == True: filename = ('TELL_State_Annual_Total_Loads_' + year_to_plot + '.png') plt.savefig(os.path.join(image_output_dir, filename), dpi=image_resolution, bbox_inches='tight')
[docs]def plot_state_load_time_series(state_to_plot: str, year_to_plot: str, scenario_to_plot: str, data_input_dir: str, image_output_dir: str, image_resolution: int, save_images=False): """Plot the time series of load for a given state :param state_to_plot: State you want to plot :type state_to_plot: str :param year_to_plot: Year you want to plot (valid 2039, 2059, 2079, 2099) :type year_to_plot: str :param scenario_to_plot: Scenario you want to plot :type scenario_to_plot: str :param data_input_dir: Top-level data directory for TELL :type data_input_dir: str :param image_output_dir: Directory to store the images :type image_output_dir: str :param image_resolution: Resolution at which you want to save the images in DPI :type image_resolution: int :param save_images: Set to True if you want to save the images after they're generated :type save_images: bool """ # Set the data input directories for the various variables you need: tell_data_input_dir = os.path.join(data_input_dir, r'tell_quickstarter_data', r'outputs', r'tell_output', scenario_to_plot, year_to_plot) # Read in the 'TELL_State_Summary_Data' .csv file parse the time variable: state_hourly_load_df = pd.read_csv((tell_data_input_dir + '/' + 'TELL_State_Hourly_Load_Data_' + year_to_plot + '.csv'), parse_dates=["Time_UTC"]) # Subset the dataframe to only the state you want to plot: state_subset_df = state_hourly_load_df.loc[state_hourly_load_df['State_Name'] == state_to_plot] # Make the plot: fig, ax = plt.subplots(2, figsize=(25, 10), sharex=True, sharey=True) ax[0].plot(state_subset_df['Time_UTC'], state_subset_df['Raw_TELL_State_Load_MWh'], 'k-', label='Raw Load', linewidth=0.5) ax[1].plot(state_subset_df['Time_UTC'], state_subset_df['Scaled_TELL_State_Load_MWh'], 'k-', label='Scaled Load', linewidth=0.5) ax[0].set_title((state_subset_df['State_Name'].iloc[0] + ' Raw TELL Loads in ' + year_to_plot)) ax[1].set_title((state_subset_df['State_Name'].iloc[0] + ' Scaled TELL Loads in ' + year_to_plot)) ax[0].set_ylabel('Hourly Load [MWh]') ax[1].set_ylabel('Hourly Load [MWh]') # If the "save_images" flag is set to true then save the plot to a .png file: if save_images == True: state_name = state_subset_df['State_Name'].iloc[0] state_name = state_name.replace(" ", "_") filename = ('TELL_State_Hourly_Loads_' + state_name + '_' + year_to_plot + '.png') plt.savefig(os.path.join(image_output_dir, filename), dpi=image_resolution, bbox_inches='tight')
[docs]def plot_state_load_duration_curve(state_to_plot: str, year_to_plot: str, scenario_to_plot: str, data_input_dir: str, image_output_dir: str, image_resolution: int, save_images=False): """Plot the load duration curve for a given state :param state_to_plot: State you want to plot :type state_to_plot: str :param year_to_plot: Year you want to plot (valid 2039, 2059, 2079, 2099) :type year_to_plot: str :param scenario_to_plot: Scenario you want to plot :type scenario_to_plot: str :param data_input_dir: Top-level data directory for TELL :type data_input_dir: str :param image_output_dir: Directory to store the images :type image_output_dir: str :param image_resolution: Resolution at which you want to save the images in DPI :type image_resolution: int :param save_images: Set to True if you want to save the images after they're generated :type save_images: bool """ # Set the data input directories for the various variables you need: tell_data_input_dir = os.path.join(data_input_dir, r'tell_quickstarter_data', r'outputs', r'tell_output', scenario_to_plot, year_to_plot) # Read in the 'TELL_State_Summary_Data' .csv file and parse the time variable: state_hourly_load_df = pd.read_csv((tell_data_input_dir + '/' + 'TELL_State_Hourly_Load_Data_' + year_to_plot + '.csv'), parse_dates=["Time_UTC"]) # Subset the dataframe to only the state you want to plot: state_subset_df = state_hourly_load_df.loc[state_hourly_load_df['State_Name'] == state_to_plot] # Sort the hourly load values from largest to smallest and compute the hourly duration for each value: load_df_sorted = state_subset_df.sort_values(by=['Scaled_TELL_State_Load_MWh'], ascending=False) load_df_sorted['Interval'] = 1 load_df_sorted['Duration'] = load_df_sorted['Interval'].cumsum() # Make the plot: plt.figure(figsize=(25, 10)) plt.plot(load_df_sorted['Duration'], load_df_sorted['Raw_TELL_State_Load_MWh'], 'k-', label='Raw Load', linewidth=0.5) plt.xlabel("Duration [h]") plt.ylabel("Scaled State Hourly Load [MWh]") plt.title((state_subset_df['State_Name'].iloc[0] + ' Load Duration Curve in ' + year_to_plot)) # If the "save_images" flag is set to true then save the plot to a .png file: if save_images == True: state_name = state_subset_df['State_Name'].iloc[0] state_name = state_name.replace(" ", "_") filename = ('TELL_State_Load_Duration_Curve_' + state_name + '_' + year_to_plot + '.png') plt.savefig(os.path.join(image_output_dir, filename), dpi=image_resolution, bbox_inches='tight')
[docs]def plot_ba_load_time_series(ba_to_plot: str, year_to_plot: str, scenario_to_plot: str, data_input_dir: str, image_output_dir: str, image_resolution: int, save_images=False): """Plot the time series of load for a given Balancing Authority :param ba_to_plot: Balancing Authority code for the BA you want to plot :type ba_to_plot: str :param year_to_plot: Year you want to plot (valid 2039, 2059, 2079, 2099) :type year_to_plot: str :param scenario_to_plot: Scenario you want to plot :type scenario_to_plot: str :param data_input_dir: Top-level data directory for TELL :type data_input_dir: str :param image_output_dir: Directory to store the images :type image_output_dir: str :param image_resolution: Resolution at which you want to save the images in DPI :type image_resolution: int :param save_images: Set to True if you want to save the images after they're generated :type save_images: bool """ # Set the data input directories for the various variables you need: tell_data_input_dir = os.path.join(data_input_dir, r'tell_quickstarter_data', r'outputs', r'tell_output', scenario_to_plot, year_to_plot) # Read in the 'TELL_Balancing_Authority_Hourly_Load_Data' .csv file and parse the time variable: ba_hourly_load_df = pd.read_csv((tell_data_input_dir + '/' + 'TELL_Balancing_Authority_Hourly_Load_Data_' + year_to_plot + '.csv'), parse_dates=["Time_UTC"]) # Subset the dataframe to only the BA you want to plot: ba_subset_df = ba_hourly_load_df.loc[ba_hourly_load_df['BA_Code'] == ba_to_plot] # Make the plot: fig, ax = plt.subplots(2, figsize=(25, 10), sharex=True, sharey=True) ax[0].plot(ba_subset_df['Time_UTC'], ba_subset_df['Raw_TELL_BA_Load_MWh'], 'k-', label='Raw Load', linewidth=0.5) ax[1].plot(ba_subset_df['Time_UTC'], ba_subset_df['Scaled_TELL_BA_Load_MWh'], 'k-', label='Scaled Load', linewidth=0.5) ax[0].set_title((ba_to_plot + ' Raw TELL Loads in ' + year_to_plot)) ax[1].set_title((ba_to_plot + ' Scaled TELL Loads in ' + year_to_plot)) ax[0].set_ylabel('Hourly Load [MWh]') ax[1].set_ylabel('Hourly Load [MWh]') # If the "save_images" flag is set to true then save the plot to a .png file: if save_images == True: filename = ('TELL_BA_Hourly_Loads_' + ba_to_plot + '_' + year_to_plot + '.png') plt.savefig(os.path.join(image_output_dir, filename), dpi=image_resolution, bbox_inches='tight')
[docs]def plot_ba_variable_correlations(ba_to_plot: str, data_input_dir: str, image_output_dir: str, image_resolution: int, save_images=False): """Plot the correlation matrix between predictive variables and observed demand for individual or all BAs. :param ba_to_plot: BA code for the BA you want to plot. Set to "All" to plot the average correlation across all BAs. :type ba_to_plot: str :param data_input_dir: Top-level data directory for TELL :type data_input_dir: str :param image_output_dir: Directory to store the images :type image_output_dir: str :param image_resolution: Resolution at which you want to save the images in DPI :type image_resolution: int :param save_images: Set to True if you want to save the images after they're generated :type save_images: bool """ # Set the input directory based on the 'data_input_dir' variable: compiled_data_input_dir = os.path.join(data_input_dir, r'tell_quickstarter_data', r'outputs', r'compiled_historical_data') if ba_to_plot != 'All': # Read in compiled historical data file for the BA you want to plot: df = pd.read_csv(os.path.join(compiled_data_input_dir, f'{ba_to_plot}_historical_data.csv')) # Rename the a few columns for simplicity: df.rename(columns={"Adjusted_Demand_MWh": "Demand"}, inplace=True) df.rename(columns={"Total_Population": "Population"}, inplace=True) # Drop out the columns we don't need anymore: df.drop(['Forecast_Demand_MWh', 'Adjusted_Generation_MWh', 'Adjusted_Interchange_MWh'], axis=1, inplace=True) # Calculate the correlation matrix of the dataframe: corr = df.corr() else: # Loop over the compiled historical data files in the input directory: for idx, file in enumerate(glob(f'{compiled_data_input_dir}/*.csv')): # Read in the .csv file: dfx = pd.read_csv(os.path.join(compiled_data_input_dir, file)) # Rename the a few columns for simplicity: dfx.rename(columns={"Adjusted_Demand_MWh": "Demand"}, inplace=True) dfx.rename(columns={"Total_Population": "Population"}, inplace=True) # Drop out the columns we don't need anymore: dfx.drop(['Forecast_Demand_MWh', 'Adjusted_Generation_MWh', 'Adjusted_Interchange_MWh'], axis=1, inplace=True) # Calculate the correlation matrix of the dataframe: corrx = dfx.corr() # Concatenate the correlation matrix across BAs: if idx == 0: corrall = corrx.copy() else: corrall = np.dstack((corrall, corrx)) del dfx, corrx # Calculate the average correlation matrix across all BAs and convert that value to a pd dataframe for plotting: corr = pd.DataFrame(np.nanmean(corrall, axis=2), columns=['Year', 'Month', 'Day', 'Hour', 'Demand', 'Population', 'T2', 'Q2', 'SWDOWN', 'GLW', 'WSPD']) # Fill diagonal and upper half with NaNs mask = np.zeros_like(corr, dtype=bool) mask[np.triu_indices_from(mask)] = True corr[mask] = np.nan f = plt.figure(figsize=(25, 10)) plt.matshow(corr, fignum=f.number, cmap='RdBu_r', vmin=-1, vmax=1) cb = plt.colorbar() cb.ax.tick_params(labelsize=14) if ba_to_plot != 'All': plt.title('Correlation Matrix in the ' + ba_to_plot + ' Balancing Authority', fontsize=16); else: plt.title('Average Correlation Matrix Across All Balancing Authorities in TELL', fontsize=16); plt.xticks(range(len(corr.columns)), corr.columns, rotation='vertical') plt.yticks(range(len(corr.columns)), corr.columns) # If the "save_images" flag is set to true then save the plot to a .png file: if save_images == True: filename = (ba_to_plot + '_Correlation_Matrix.png') plt.savefig(os.path.join(image_output_dir, filename), dpi=image_resolution, bbox_inches='tight')