Source code for tell.data_spatial_mapping

import os

import numpy as np
import pandas as pd

from pandas import DataFrame


[docs]def process_spatial_mapping(target_year: int, fips_file: str, service_area_file: str, sales_ult_file: str, bal_auth_file: str, output_dir: str): """Workflow function to execute the mapping of BAs to counties for a given year :param target_year: Year to process; four digit year (e.g., 1990) :type target_year: int :param fips_file: County FIPS code .csv file :type fips_file: str :param service_area_file: Balancing authority service area Excel file :type service_area_file: str :param sales_ult_file: Balancing authority sales to ultimate customer Excel file :type sales_ult_file: str :param bal_auth_file: Balancing authority and ID codes Excel file :type bal_auth_file: str :param output_dir: Directory to store the output .csv file :type output_dir: str """ # Read in the raw data from the "Service_Territory_YYYY.xlsx" spreadsheet: try: df_territory = pd.read_excel(service_area_file, sheet_name='Counties') except: df_territory = pd.read_excel(service_area_file, sheet_name='Counties_States') # Only keep the columns that are needed: df_territory = df_territory[['Utility Number', 'State', 'County', 'Data Year']].copy() # Read in the raw data from the "Sales_Ult_Cust_YYYY.xlsx" spreadsheet: df_ultcust = pd.read_excel(sales_ult_file, sheet_name='States', skiprows=2) # Rename the "BA_CODE" column for EIA-861 data before 2017: try: df_ultcust = df_ultcust.rename(columns={'BA_CODE': 'BA Code'}) except: df_ultcust = df_ultcust # Only keep the columns that are needed: df_ultcust = df_ultcust[['Utility Number', 'State', 'BA Code']].copy() # Read in the raw data from the "Balancing_Authority_YYYY.xlsx" spreadsheet: df_ba = pd.read_excel(bal_auth_file) # Rename the "BA_CODE" column for EIA-861 data before 2017: try: df_ba = df_ba.rename(columns={'BA_CODE': 'BA Code', 'BA ID': 'BA_Number'}) except: df_ba = df_ba # Only keep the columns that are needed: df_ba = df_ba[['State', 'BA Code', 'BA_Number', 'Balancing Authority Name']].copy() # Merge the 'df_territory' and 'df_ultcust' dataframes based on common utility numbers and states: df_territory_ultcust = pd.merge(df_territory, df_ultcust, how='left', left_on=['Utility Number', 'State'], right_on=['Utility Number', 'State']) # Merge the 'df_territory_ultcust' and 'df_ba' dataframes based on common BA codes: df_territory_ultcust_ba = pd.merge(df_territory_ultcust, df_ba, how='left', left_on=['BA Code', 'State'], right_on=['BA Code', 'State']) # Only keep the columns that are needed: df_territory_ultcust_ba = df_territory_ultcust_ba[ ['Data Year', 'State', 'County', 'BA Code', 'Balancing Authority Name', 'BA_Number']].copy() # Drop the duplicates that result from more than one utility-BA combination per county: df_territory_ultcust_ba = df_territory_ultcust_ba.drop_duplicates() # Drop the rows that have a NaN value for "BA Code": df_territory_ultcust_ba = df_territory_ultcust_ba.dropna(subset=['BA Code']) # Make the "County" variable consistently lower case: df_territory_ultcust_ba['County'] = df_territory_ultcust_ba['County'].apply(lambda x: x.lower()) # Remove apostrophes from the "County" variable: df_territory_ultcust_ba['County'] = df_territory_ultcust_ba['County'].apply(lambda x: x.replace("'", "")) # Replace spaces and dashes with underscores in the "County" variable: df_territory_ultcust_ba['County'] = df_territory_ultcust_ba['County'].apply(lambda x: x.replace(" ", "_")) df_territory_ultcust_ba['County'] = df_territory_ultcust_ba['County'].apply(lambda x: x.replace("-", "_")) # Rename one county that has two spaces in the raw EIA-861 data: df_territory_ultcust_ba['County'] = df_territory_ultcust_ba['County'].apply( lambda x: x.replace("st__helena", "st_helena")) # Read in the state and county FIPS code .csv file: df_fips = pd.read_csv(fips_file) # Make the "county_name" variable consistently lower case and copy it to a new variable: df_fips['county_match'] = df_fips['county_name'].apply(lambda x: x.lower()) # Strip the words "county", "borough", "parish", "municipality", and "census area" from the "county_match" variable: df_fips['county_match'] = df_fips['county_match'].apply(lambda x: x.split(' county')[0]) df_fips['county_match'] = df_fips['county_match'].apply(lambda x: x.split(' borough')[0]) df_fips['county_match'] = df_fips['county_match'].apply(lambda x: x.split(' parish')[0]) df_fips['county_match'] = df_fips['county_match'].apply(lambda x: x.split(' municipality')[0]) df_fips['county_match'] = df_fips['county_match'].apply(lambda x: x.split(' census area')[0]) # Rename two counties that contain the prefix "De": df_fips['county_match'] = df_fips['county_match'].apply(lambda x: x.replace("de soto", "desoto")) df_fips['county_match'] = df_fips['county_match'].apply(lambda x: x.replace("de witt", "dewitt")) # Remove apostrophes and periods from the "county_match" variable: df_fips['county_match'] = df_fips['county_match'].apply(lambda x: x.replace("'", "")) df_fips['county_match'] = df_fips['county_match'].apply(lambda x: x.replace(".", "")) # Replace spaces and dashes with underscores in the "county_match" variable: df_fips['county_match'] = df_fips['county_match'].apply(lambda x: x.replace(" ", "_")) df_fips['county_match'] = df_fips['county_match'].apply(lambda x: x.replace("-", "_")) # Rename a few columns to match what is in the 'df_territory_ultcust_ba' dataframe: df_fips = df_fips.rename(columns={'county_match': 'County', 'state_abbreviation': 'State'}) # Merge the 'df_territory_ultcust_ba' and 'df_fips' dataframes based on common county names and state abbreviations: df_merged = pd.merge(df_territory_ultcust_ba, df_fips, how='left', left_on=['County', 'State'], right_on=['County', 'State']) # Only keep the columns that are needed: df_output = df_merged[ ['Data Year', 'state_name', 'state_FIPS', 'county_name', 'county_FIPS', 'BA_Number', 'BA Code']].copy() # Rename the columns using a consistent format: df_output.rename(columns={"Data Year": "Year", "state_FIPS": "State_FIPS", "state_name": "State_Name", "county_FIPS": "County_FIPS", "county_name": "County_Name", "BA Code": "BA_Code"}, inplace=True) # Reorder the columns: df_output = df_output[ ['Year', 'State_FIPS', 'State_Name', 'County_FIPS', 'County_Name', 'BA_Number', 'BA_Code']].copy(deep=False) # Sort the dataframe by BA number first and then county FIPS code: df_output = df_output.sort_values(by=["BA_Number", "County_FIPS"]) # Write the spatial mapping output to a .csv file: output_file = os.path.join(output_dir, f'ba_service_territory_{target_year}.csv') df_output.to_csv(output_file, sep=',', index=False)
[docs]def map_ba_service_territory(start_year: int, end_year: int, data_input_dir: str): """Workflow function to run the "process_spatial_mapping" function to map BAs to counties :param start_year: Year to start process; four digit year (e.g., 1990) :type start_year: int :param end_year: Year to end process; four digit year (e.g., 1990) :type end_year: int :param data_input_dir: Top-level data directory for TELL :type data_input_dir: str """ # Set the output directory based on the "raw_data_dir" variable: output_dir = os.path.join(data_input_dir, r'tell_quickstarter_data', r'outputs', r'ba_service_territory') # If the output directory doesn't exist then create it: if not os.path.exists(output_dir): os.makedirs(output_dir) # Create a vector of years to process: years_to_process = range(start_year, end_year + 1) # Loop over the range of years to process: for target_year in years_to_process: # Set paths to files: fips_file = os.path.join(data_input_dir, r'tell_raw_data', 'state_and_county_fips_codes.csv') service_area_file = os.path.join(data_input_dir, r'tell_raw_data', r'EIA_861', f'{target_year}', f'Service_Territory_{target_year}.xlsx') sales_ult_file = os.path.join(data_input_dir, r'tell_raw_data', r'EIA_861', f'{target_year}', f'Sales_Ult_Cust_{target_year}.xlsx') bal_auth_file = os.path.join(data_input_dir, r'tell_raw_data', r'EIA_861', f'{target_year}', f'Balancing_Authority_{target_year}.xlsx') # Run the "process_spatial_mapping" function for that year: process_spatial_mapping(target_year, fips_file, service_area_file, sales_ult_file, bal_auth_file, output_dir)