diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index 35940e4bda8dc09cd647edc02a0a28840bc5c94e..b1f2c0ed900586546bd29090b98f6f04ecc7a54e 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -1,4 +1,6 @@ -"""This file calcuate bill disagregation for multifamily buildings""" +"""This file calcuate bill disagregation for multifamily buildings + As of 2019/07/24, this is the file which lives in BlocLink and interact with BlocPower Dashboard. +""" import warnings from datetime import timedelta @@ -117,7 +119,7 @@ class BillDisaggregation(): """ - if indoor_set_point > 55: + if indoor_set_point > 65: if indoor_set_point < outdoor_temp: return outdoor_temp - indoor_set_point return 0 @@ -938,7 +940,7 @@ class BillDisaggregation(): monthly_output = monthly_output_table[['Month', 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Heating Usage', 'Cooling Usage', 'Other Usage', 'HDD', 'CDD']] - monthly_output = monthly_output.sort('Bill From Date').reset_index(drop=True) + monthly_output = monthly_output.sort_values('Bill From Date').reset_index(drop=True) return monthly_output def non_weahter_related_breakdown(self, end_uses, monthly_output_table): @@ -1089,7 +1091,7 @@ class BillDisaggregation(): Return the index of the row of raw bill contains the bill date from a normalized bill """ for index, bill in self.formatted_bill.iterrows(): - if bill['Bill From Date'] <= norm_bill_date < bill['Bill To Date']: + if bill['Bill From Date'] <= norm_bill_date <= bill['Bill To Date']: return index return None @@ -1110,17 +1112,26 @@ class BillDisaggregation(): """ weights = [] total_days = [] + for ind in range(len(index_numdays)): unit_price = self.formatted_bill['Unit Price'][int(index_numdays[ind]['index'])] days_in_that_period = int(index_numdays[ind]['num_days']) weights.append(unit_price * days_in_that_period) total_days.append(days_in_that_period) weighted_unit_price = sum(weights)/sum(total_days) + return weighted_unit_price def find_bills_in_raw(self, norm_bill_from, norm_bill_to): """ Return the index / number of days in each raw bill billing period for a normalized billing period + Arg: + norm_bill_from: first day of a month (normalized bill) + norm_bill_to: last day of a month (normalized bill) + return: + Results(Dic): + 'index': the index of the formatted billing period contains 'norm_bill_from' + 'num_days': # of days of the normalized billing period contains in a specific formatted billing period """ norm_bill_days = (norm_bill_to - norm_bill_from).days @@ -1151,9 +1162,9 @@ class BillDisaggregation(): calculate the unit price for each nomralized billing period """ normalized_unit_price = [] - for m in range(len(mbill)): - from_date = mbill['Bill From Date'].iloc[m] - to_date = mbill['Bill To Date'].iloc[m] + for month in range(len(mbill)): + from_date = mbill['Bill From Date'].iloc[month] + to_date = mbill['Bill To Date'].iloc[month] index_numdays = self.find_bills_in_raw(from_date, to_date) weighted_unit_price_for_this_month = self.weighted_unit_price(index_numdays) normalized_unit_price.append(weighted_unit_price_for_this_month) diff --git a/bpeng/mv/__init__.py b/bpeng/mv/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bpeng/mv/measurementVerification.py b/bpeng/mv/measurementVerification.py new file mode 100644 index 0000000000000000000000000000000000000000..afdce6327499049480f4d8d741e98f3213e0b0a0 --- /dev/null +++ b/bpeng/mv/measurementVerification.py @@ -0,0 +1,1149 @@ +''' +This python class is to calculate energy costs/savings for implemented energy conservasion measures using IPMVP Option C. + +Author: Doris Han +''' + +import matplotlib as mpl +mpl.get_backend() + +import matplotlib.pyplot as plt +import seaborn as sns; sns.set() +import datetime as datetime +import requests +import pandas as pd +import numpy as np +import statsmodels.api as sm +import warnings +warnings.simplefilter('ignore') +import psycopg2 +from matplotlib.pyplot import text +from datetime import timedelta +from sklearn import linear_model +from scipy.optimize import minimize +from scipy import stats +from dateutil import relativedelta +from bpeng.bill.awesome_disaggregate import BillDisaggregation + + +class MeasurementVerification(): + + ''' + The class to calcuate energy savings through utility bill and operational information adopts IPMVP option C + + ''' + + def __init__(self, bill, weather, thermal_comf, earlest_retrofit_date, latest_retrofit_date,\ + weather_relate_usage_type, occupancy, utility_type): + + + self.original_bill = bill + self.weather = weather + self.thermal_comf = thermal_comf + self.earlest_retrofit_date = earlest_retrofit_date + self.latest_retrofit_date = latest_retrofit_date + self.pre_weather_related_usage_type = weather_relate_usage_type + self.occupancy = occupancy + self.utility_type = utility_type + self.weather_relate_usage_type = None + self.projection_post_bill = None + self.projection_fig = None + self.pre_bill = None + self.post_bill = None + self.baseline = None + self.reporting = None + self.projection_reporting_bill = None + self.regression_stat = None + self.first_year_saving = None + self.projection_baseline_bill = None + self.setpoints = {} + #self.non_weather_related_end_uses = {'Miscellanous': 1} + + + def split_bill(self, raw_bill): + ''' + split raw bill into two section: + pre - retrofit + post - retrofit + + Args: + raw_bill(pd.DataFrame): formatted bill from BillDisaggregation Module + earlest_retrofit_date(str): earlest implementation date of ECMs for this meter + latest_retrofit_date(str): latest implemntation date of ECMs for this meter + + Returns: + pre_bill(pd.DataFrame): raw_bill for pre-retrofit period + post_bill(pd.DataFrame): raw_bill for post_retrotit period + + ''' + + bill = raw_bill.copy() + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + + erd = pd.to_datetime(self.earlest_retrofit_date) + lrd = pd.to_datetime(self.latest_retrofit_date) + + bill = bill.sort_values('Bill From Date').reset_index() + + for i in range(len(bill) - 1): + if (erd>= bill['Bill From Date'].iloc[i] ) & (erd<= bill['Bill To Date'].iloc[i]): + erd_flag = i + + if (lrd >= bill['Bill From Date'].iloc[i] ) & (lrd <= bill['Bill To Date'].iloc[i]): + lrd_flag = i + + pre_bill = bill.iloc[0: erd_flag] + post_bill = bill.iloc[(lrd_flag+1):] + + return pre_bill, post_bill + + @staticmethod + def disaggregate(heating_setpoint, cooling_setpoint, bill, regression_method,\ + weather_related_usage, weather_data): + ''' + Disaggregate bill with all known inputs + + Args: + heating_setpoint(float): + cooling_setpoint(float): + bill(pd.DataFrame): + regression_method(int): + 0 stands for 'both not'; + 1 stands for regression method 1 using multiple variable regression + 2 stands for using summer dhw method to estimate usage + + Returns: + regr(list): + ''' + + bd = BillDisaggregation(bill, weather_data) + #pre-requisit is the bills are longer than 365 days/ + bd.optimize_setpoints() + + bill['temperature'] = [ + bd.bill_period_weather(x, y) + for x, y in zip(bill['Bill From Date'], + bill['Bill To Date']) + ] + + if weather_related_usage == 'Both': + regr = bd.regression_1(heating_setpoint, cooling_setpoint, bill) + + + if weather_related_usage == 'Cooling': + regr = bd.regression_1(0, cooling_setpoint, bill) + + if weather_related_usage == 'Heating': + if regression_method == 1: + regr = bd.regression_1(heating_setpoint, 0, bill) + + if regression_method ==2: + regr = bd.summer_dhw(heating_setpoint, bill) + + return regr + + @staticmethod + def weather_demand(heating_setpoint, cooling_setpoints, bill, weather_data): + + ''' + debugged - I think this functioin should be part of BillDisaggregation Class + This function is to calculate the HDD/ CDD with preferred weather period and setpoints + + Args: + heating_setpoint(float) + cooling_setpoint(float) + bill(pd.DataFrame): bills with columns: + 'Bill From Date' + 'Bill To Date' + Returns: + weather_demand(np.array): an array of hddcdd + ''' + + bd = BillDisaggregation(bill, weather_data) + + ahdd = [ + list(bd.hdd(heating_setpoint, xx) for xx in x) + for x in bill['temperature'] + ] + + acdd = [ + list(bd.cdd(cooling_setpoints, xx) for xx in x) + for x in bill['temperature'] + ] + + + # It should be billing period average hdd / days + + daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) + daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) + + # set threshold that if the HDD/CDD is lower than a certain value, we set it to 0 + daily_hdd1 = np.array([ + bd.threshold(daily_hdd[x], 0.1) + for x in range(len(daily_hdd)) + ]) + + daily_cdd1 = np.array([ + bd.threshold(daily_cdd[x], 0.1) + for x in range(len(daily_cdd)) + ]) + + weather_demand = np.array([daily_hdd1, daily_cdd1]).T + + return weather_demand + + def bill_disaggregation(self, bill, weather_related_usage_type='Unknown'): + + ''' + the function calls for bill disaggregation module + + ''' + + bd = BillDisaggregation(bill,self.weather) + bd.optimize_setpoints(weather_related_usage = weather_related_usage_type) + + bill_evaluation = pd.DataFrame(columns=['Usage', 'r squared','regr method', 'Consumption', 'Heating',\ + 'Cooling','Non-weather-related-usage','diff','hdd', 'cdd', 'Days in Bill','Unit Price',\ + 'Heating Setpoint', 'Cooling Setpoint']) + + + output = bd.benchmarking_output() + bill_evaluation = bill_evaluation.append({\ + 'Usage': output[0],\ + 'r squared': output[1],\ + 'regr method': output[2],\ + 'Consumption':output[3],\ + 'Heating': format(output[4], '0.0f'),\ + 'Cooling': format(output[5], '0.0f'),\ + 'Non-weather-related-usage': format(output[6], '0.0f'),\ + 'diff':format(output[7],'.2%'),\ + 'hdd': format(output[8], '0.0f'),\ + 'cdd': format(output[9], '0.0f'),\ + 'Days in Bill': output[10],\ + 'Unit Price':bd.avg_unit_price,\ + 'Heating Setpoint': output[11],\ + 'Cooling Setpoint': output[12] + }, ignore_index = True) + + return bd, bill_evaluation + + def usage_not_related_to_weather(self, bill): + + ''' + To return heating/cooling coef and daily non-weather-related-usage when the usage is not related to weather change + + Args: + bill(pd.DataFrame): a utility bill whose usage dose not relate to weather change + + Return: + bill_metrix(dictionary): a dictionary of heating coef, cooling coef, intercept + ''' + + non_weather_related_daily_usage = sum(bill['Usage'])/ sum(bill['Days In Bill']) + heating_coef = 0 + cooling_coef = 0 + + bill_metrix = {'heating_coef': 0, + 'cooling_coef': 0, + 'non_weather_related_daily_usage': non_weather_related_daily_usage} + + return bill_metrix + + + def cal_heating_coef(self, regr_heating_coef): + ''' + Calculation for cooling coefficiency with the consideration of changing factors for projection purpose + - need further development + + ''' + heating_coef = regr_heating_coef + return heating_coef + + + def cal_cooling_coef(self, regr_cooling_coef): + ''' + Calculation for cooling coefficiency for projection purpose - need further development + ''' + cooling_coef = regr_cooling_coef + return cooling_coef + + def occup(self): + '''occupancy change pre & post retrofit''' + occupancy_change = 1 + return occupancy_change + + + def usage_related_to_weather(self, regr, regression_method): + + ''' + Args: + regr(list): + regr[0] is regression model + regr[1] is r-squared + regr[2] is hdd/cdd through the regression + regr[3] is the bills (optional, only for regression method 2) + regression_method(int): 1 or 2 + + Returns: + bill_metrix(Dictionary): as defined below + + + ???? Doris: I want to keep the same input as - non weather related usage matrix + ''' + if regression_method == 1: + regr_model = regr[0] + heating_coef = regr_model.coef_[0] + cooling_coef = regr_model.coef_[1] + non_weather_related_daily_usage = regr_model.intercept_ + + + if regression_method == 2: + regr_model = regr[0] + heating_coef = regr_model.coef_[0] + cooling_coef = 0 + non_weather_related_daily_usage = regr[3]['dhw']/regr[3]['Days In Bill'] + + bill_metrix = {'heating_coef': heating_coef, + 'cooling_coef': cooling_coef, + 'non_weather_related_daily_usage': non_weather_related_daily_usage} + + return bill_metrix + + + def annual_bill_pre_retrofit(self, bill): + ''' + Return the latest/nearest annual bill to performance analysis, + however, it will only return natural billing period, which means it could be around 365 but not exact 365 days + + Args: + bill(pd.DataFrame): bill of pre_retrofit period + + Return: + annual_bill(pd.DataFrame) + + ''' + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + + bill_last_date = bill['Bill To Date'].iloc[-1] + proposed_first_date = bill_last_date - timedelta(365) + bill['flag'] = bill['Bill From Date'].apply(lambda x: (x- proposed_first_date).days) + + index = bill.flag.lt(-1).idxmin() + + if index == 0: + annual_bill = bill.drop('flag', axis=1) + else: + annual_bill = bill[index-1:].drop('flag', axis=1) + + return annual_bill + + + def annual_bill_post_retrofit(self, bill): + ''' + Return the latest/nearest annual bill for post-retrofit period + however, it will only return natural billing period, which means it could be around 365 but not exact 365 days + + Args: + bill(pd.DataFrame): bill of post_retrofit + + Return: + annual_bill(pd.DataFrame) + ''' + + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill = bill.drop('index', axis=1) + + bill = bill.reset_index(drop=True) + bill_first_date = bill['Bill From Date'].iloc[0] + proposed_last_date = bill_first_date + timedelta(365) + + bill['flag'] = bill['Bill To Date'].apply(lambda x: (x- proposed_last_date).days) + index = bill.flag.lt(-1).idxmin() + + annual_bill = bill[0:index].drop('flag', axis=1) + if sum(annual_bill['Days In Bill']) < 360: + annual_bill = bill[0:index+1] + + return annual_bill + + + def baseline_bill(self, pre_bill): + + ''' + breakdown the bill of pre_retrofit period if there is long billing period and + return the billing data will be used as 'Baseline Bill' + + ''' + bk = BreakdownLongBill(pre_bill, self.weather) + pre_bill_breakdown = bk.long_bill_breakdown() + baseline_bill = self.annual_bill_pre_retrofit(pre_bill_breakdown).reset_index(drop=True) + + return baseline_bill + + def reporting_bill(self, post_bill): + bk = BreakdownLongBill(post_bill, self.weather) + bill_breakdown = bk.long_bill_breakdown() + reporting_bill = self.annual_bill_post_retrofit(bill_breakdown).reset_index(drop=True) + + return reporting_bill + + def main(self): + + ''' + Function to calcuate the energy usage for post_retrofit period using baseline conditions - + + ''' + # format the bill for later analysis + bill_bd = BillDisaggregation(self.original_bill, self.weather) + bill_bd.optimize_setpoints() + bill = bill_bd.formatted_bill + pre_bill, post_bill = self.split_bill(bill) + + # raw bill of pre-retrofit period + self.pre_bill = pre_bill + assert sum(pre_bill['Days In Bill']) > 365, 'Utility Data is less than 12 months pre-retrofit.' + + # baseline bill + self.baseline = self.baseline_bill(pre_bill) + + # post_retrofit_bill - simple breakdown the post retrofit bill incase it is too long + self.post_bill = BreakdownLongBill(post_bill, self.weather).long_bill_breakdown() + + # calcuate the all prejected baseline for all the post retrofit billing period + post = projection_baseline(self.baseline, self.post_bill, self.weather) + self.projection_post_bill = post.projection() + + self.setpoints = {'heating': post.hp, + 'cooling': post.cp} + + if self.setpoints['cooling'] is np.NaN: + self.setpoints['cooling'] = None + if self.setpoints['heating'] is np.NaN: + self.setpoints['heating'] = None + + # calculate the projected baseline for baseline period + baseline_usage_baseline_period = projection_baseline(self.baseline, self.baseline, self.weather) + self.projection_baseline_bill = baseline_usage_baseline_period.projection() + + # calculate the projected baseline for reporting period + self.reporting = self.reporting_bill(self.post_bill) + report = projection_baseline(self.baseline, self.reporting, self.weather) + self.projection_reporting_bill = report.projection() + self.regression_stat = report.regression_stat + self.first_year_saving = MeasurementVerification.energy_savings(self.projection_reporting_bill) + self.first_year_saving['baseline_metered_energy_usage'] = sum(self.baseline['Usage']) + self.first_year_saving['baseline_from_date'] = self.baseline['Bill From Date'].iloc[0] + self.first_year_saving['baseline_end_date'] = self.baseline['Bill To Date'].iloc[-1] + # xx = self.plot_result(self.projection_baseline_bill, self.projection_reporting_bill) + + @staticmethod + def energy_savings(bill_with_baseline): + ''' + Return the energy saving for bills with projected baseline usage + + Args: + bill_with_baseline(pd.DataFrame): + 'Bill From Date' + 'Bill To Date' + 'Usage' + 'Days In Bill' + 'Total Charge' + 'temperature' + 'baseline' + + + Return: + savings(Dict): + + 'Measured Energy Usage for Reporting Period'; + 'Baseline Projection for Reporting Period'; + 'Annual Energy Savings'; + 'Energy Reductaion Percentage'; + 'Costs Avoidance'; + 'Annual Energy Costs' + + ''' + + metered_usage = sum(bill_with_baseline['Usage']) + baseline_usage = sum(bill_with_baseline['baseline']) + + energy_savings = baseline_usage - metered_usage + total_costs = sum(bill_with_baseline['Total Charge']) + unit_price = total_costs / metered_usage + saving_percentage = energy_savings / baseline_usage + dollar_savings = energy_savings * unit_price + + savings = { + 'report_metered_energy_usage': round(metered_usage, 0), + 'report_baseline_projection_energy_usage': round(baseline_usage, 0), + 'report_total_energy_costs': round(total_costs, 0), + 'annual_energy_usage_avoidance': round(energy_savings, 0), + 'annual_energy_cost_avoidance': round(dollar_savings, 0), + 'first_year_energy_reduction_percentage': saving_percentage, + 'report_from_date': bill_with_baseline['Bill From Date'].iloc[0], + 'report_end_date': bill_with_baseline['Bill To Date'].iloc[-1] + } + + return savings + + def return_utility_name(self, utility_type): + + if self.utility_type == 1: + name = 'Electricity' + unit = 'kWh' + if self.utility_type == 2: + name = 'Natural Gas' + unit = 'Therms' + if self.utility_type == 3: + name = 'Oil' + unit = 'Gallon' + if self.utility_type == 4: + name = 'Water' + unit = 'CCF' + + return name, unit + + def plot_result(self, pre, projection_post): + ''' + The funtion to plot: + 1. the metered data over baseline and reporting period + 2. the baseline data over reporting period + + Args: + pre(pd.DataFrame): utility bills of pre retrofit (not the baseline bills) + prejection_post(pd.DataFrame): utility bills of post retrofit with prejected baseline + + Returns: + metered(pd.DataFrame): Metered usage over baseline + reporting period + + + ''' + + sns.set_style("white") + plt.figure(figsize=(10,6)) + ax = plt.gca() + + + name, unit = self.return_utility_name(self.utility_type) + + post = projection_post.copy() + + bill = self.original_bill + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill = bill.sort_values('Bill From Date').reset_index(drop=True) + + pre_y = pre['Usage'] + post_x = post['Bill To Date'].values + post_pred_y = post['baseline'] + post_y = post['Usage'] + + y_max = max(pre_y) * 1.5 + arrow_location = max(pre_y) * 0.10 + text_loccation = max(pre_y) * 0.13 + const_location = max(pre_y) * 1.1 + legend_location = max(pre_y) * 1.35 + legend_text_location = max(pre_y) * 1.45 + + # Baseline Period + baseline_start_date = pd.to_datetime(pre['Bill From Date'].iloc[1]) + baseline_end_date = pd.to_datetime(pre['Bill To Date'].iloc[-1]) + plt.axvline(baseline_end_date, color='darkgrey', linestyle='--') + + ax.annotate('', xy=(pre['Bill From Date'].iloc[1],arrow_location),\ + xytext=(pre['Bill To Date'].iloc[-1],arrow_location), \ + xycoords='data', textcoords='data', + arrowprops=dict(arrowstyle='<->',lw=1.5,color='grey')) + + ax.annotate('Baseline Period', xy=(pre['Bill From Date'].iloc[6],text_loccation),\ + ha='center',\ + va='center',weight='bold') + + # Reporting Period + reporting_start_date = pd.to_datetime(post['Bill From Date'].iloc[1]) + reporting_end_date = pd.to_datetime(post['Bill To Date'].iloc[-1]) + plt.axvline(reporting_start_date, color='darkgrey', linestyle='--') + + ax.annotate('', xy=(post['Bill From Date'].iloc[1],arrow_location),\ + xytext=(post['Bill To Date'].iloc[-1],arrow_location), \ + xycoords='data', textcoords='data', + arrowprops=dict(arrowstyle='<->',lw=1.5,color='grey')) + + ax.annotate('Reporting Period', xy=(post['Bill From Date'].iloc[6],text_loccation),\ + ha='center',\ + va='center', weight='bold') + + #ECM construction + index_start = bill[bill['Bill To Date'] == baseline_end_date].index[0] + + index_end = bill[bill['Bill From Date'] == reporting_start_date].index[0] + construction = bill[index_start:index_end] + + + construction_median = pd.to_datetime((reporting_start_date-baseline_end_date)/2 + baseline_end_date) + ax.annotate('', xy=(baseline_end_date,const_location), xytext=(reporting_start_date,const_location),\ + xycoords='data', textcoords='data', + arrowprops=dict(arrowstyle='<->',lw=1.5,color='darkgrey')) + + plt.axvline(construction_median, color='darkgrey', linestyle='-',\ + ymin=(1.1/1.5), + ymax=(1.35/1.5)) + + plt.plot(construction_median,legend_location,'o',color='darkgrey',linewidth=10) + + ax.annotate('ECM\nConstruction', xy=(construction_median,legend_text_location), ha='center', \ + va='center', weight='bold', color='darkgrey') + + + #Metered Usage Overtime + metered = pre.append(construction).append(post) + metered = metered.drop(['baseline','temperature'], axis=1).drop_duplicates() + + metered_x = metered['Bill To Date'].values + metered_y = metered['Usage'] + + # Legend - Measured Usage + plt.axvline(metered['Bill To Date'].iloc[4], color='cornflowerblue', linestyle='-',\ + ymin=(metered['Usage'].iloc[4]/y_max), + ymax=(1.35/1.5)) + + plt.plot(metered['Bill To Date'].iloc[4],legend_location,'o',color='cornflowerblue',linewidth=10) + + ax.annotate('Measured\nUsage', xy=(metered['Bill To Date'].iloc[4],legend_text_location), ha='center', \ + va='center',weight='bold', color='cornflowerblue') + + + # Legend - Adjusted Baseline + plt.axvline(post['Bill To Date'].iloc[10], color='brown', linestyle='-',\ + ymin=(post['baseline'].iloc[10]/y_max), + ymax=(1.35/1.5)) + + plt.plot(post['Bill To Date'].iloc[10],legend_location,'o',color='brown',linewidth=10) + + ax.annotate('Adjusted Baseline\nUsage', xy=(post['Bill To Date'].iloc[10],legend_text_location), ha='center', \ + va='center',weight='bold', color='brown') + + + + # Legend - Energy Savings + plt.axvline(post['Bill To Date'].iloc[4], color='green', linestyle='-',\ + ymin=((post['baseline'].iloc[4]+post['Usage'].iloc[4])/2/y_max), + ymax=(1.35/1.5)) + + plt.plot(post['Bill To Date'].iloc[4],legend_location,'o',color='green',linewidth=10) + + ax.annotate('Energy\nSavings', xy=(post['Bill To Date'].iloc[4],legend_text_location), ha='center', \ + va='center',weight='bold', color='green') + + #plots + plt.plot(metered_x, metered_y, '-o',color='cornflowerblue',linewidth=3.5) + plt.plot(post_x, post_pred_y, '--', color='brown', alpha=0.8) + + ax.fill_between(post_x,\ + post_y, post_pred_y,\ + facecolor='mediumturquoise',\ + alpha=0.1, + edgecolor='b',\ + linewidth=0) + + plt.ylim([0,y_max]) + + for spine in plt.gca().spines.values(): + spine.set_visible(False) + + xmin, xmax = ax.get_xlim() + ymin, ymax = ax.get_ylim() + + fig = plt.gcf() + + # get width and height of axes object to compute + # matching arrowhead length and width + dps = fig.dpi_scale_trans.inverted() + bbox = ax.get_window_extent().transformed(dps) + width, height = bbox.width, bbox.height + + # manual arrowhead width and length + hw = 0.5/20.*(ymax-ymin) + hl = 0.5/20.*(xmax-xmin) + lw = 1 # axis line width + ohg = 0.1 # arrow overhang + + # compute matching arrowhead length and width + yhw = hw/(ymax-ymin)*(xmax-xmin)* height/width * 1.2 + yhl = hl/(xmax-xmin)*(ymax-ymin)* width/height *1.2 + + # y axis + ax.arrow(xmin, 0, 0, ymax-ymin, fc='k', ec='k', lw = lw, + head_width=yhw, head_length=yhl, overhang = ohg, + length_includes_head= True, clip_on = False) + # x axis + ax.arrow(xmin, 0., (xmax-xmin), 0., fc='k', ec='k', lw = lw, + head_width=hw, head_length=hl, overhang = ohg, + length_includes_head= True, clip_on = False) + + plt.ylabel('Consumption ({})'.format(unit)) + ax.set_title('{} Consumption of Baseline and Reporting Period'.format(name),\ + size=14, weight='bold',verticalalignment='bottom', alpha=0.8) + +# from matplotlib import rcParams +# rcParams['axes.titlepad'] = 50 + + return metered + + + @staticmethod + def ols_regression(X,y): + ''' + Return the summary stats for ordinary linear regression + ''' + X2 = sm.add_constant(X) + est = sm.OLS(y, X2) + est2 = est.fit() + return est2.summary() + + +class projection_baseline(): + + def __init__(self, baseline_bill_raw, reporting_bill_raw, weather): + + self.weather = weather + self.baseline_bill = None + self.reporting_bill = None + self.baseline_bill_raw = baseline_bill_raw + self.reporting_bill_raw = reporting_bill_raw + self.base_bd = None + self.regression_stat = None + self.hp = None + self.cp = None + + @staticmethod + def ols_regression(X,y): + ''' + Return the summary stats for ordinary linear regression + ''' + + X2 = sm.add_constant(X) + est = sm.OLS(y, X2) + est2 = est.fit() + return est2.summary() + + + def projection(self): + + # only change for the reports + + self.base_bd = BillDisaggregation(self.baseline_bill_raw, self.weather) + self.base_bd.optimize_setpoints(weather_related_usage='Heating') + + + report_bd = BillDisaggregation(self.reporting_bill_raw, self.weather) + report_bd.optimize_setpoints() + + self.hp = self.base_bd.heating_set_point + self.cp = self.base_bd.cooling_set_point + + baseline_bill = self.base_bd.processed_bill + baseline_bill['Unit Price'] = baseline_bill['Total Charge'] / baseline_bill['Usage'] + self.baseline_bill = baseline_bill + + self.reporting_bill = report_bd.processed_bill + #print(self.reporting_bill) + + + regr_method = self.base_bd.regression_method + + if regr_method == 0: + proj_bill = self.r0_proj(self.baseline_bill,self.reporting_bill) + if regr_method == 1: + proj_bill = self.r1_proj(self.baseline_bill,self.reporting_bill) + if regr_method == 2: + proj_bill = self.r2_proj(self.baseline_bill,self.reporting_bill) + + + return proj_bill + + def r0_proj(self, baseline_bill, reporting_bill): + + baseline_id = DateIdentifier(baseline_bill).identifier_matrix() + proj_baseline = reporting_bill + + proj_baseline['baseline'] = [\ + DateIdentifier(proj_baseline).match_identifier_for_billing_period(x, y, baseline_id) + for x,y in zip(proj_baseline['Bill From Date'],\ + proj_baseline['Bill To Date'])] + + return proj_baseline + + + def r1_proj(self, baseline_bill, reporting_bill): + + + X = MeasurementVerification.weather_demand(self.hp,self.cp,baseline_bill,self.weather) + y = np.array(baseline_bill['Usage'] / baseline_bill['Days In Bill']) + + self.regression_stat = projection_baseline.ols_regression(X,y) + + proj_baseline = reporting_bill + + regr_model = linear_model.LinearRegression() + regr_model.fit(X, y) + post_retrofit_daily_hddcdd= MeasurementVerification.weather_demand(self.hp,\ + self.cp,\ + proj_baseline,\ + self.weather) + + predict_baseline = regr_model.predict(post_retrofit_daily_hddcdd) * proj_baseline['Days In Bill'] + + #understand the difference of building + model_diff = (sum(regr_model.predict(X) * baseline_bill['Days In Bill']) - sum(baseline_bill['Usage']))/sum(regr_model.predict(X) * baseline_bill['Days In Bill']) + #print('r1', model_diff) + proj_baseline['baseline'] = list(predict_baseline) + + return proj_baseline + + + def r2_proj(self, baseline_bill, reporting_bill): + + + + proj_baseline = reporting_bill + + regr = self.base_bd.summer_dhw(self.hp,baseline_bill) + + baseline_bill_with_baseline = regr[3] + baseline_bill_with_baseline['heating'] = baseline_bill_with_baseline['Usage'] - baseline_bill_with_baseline['dhw'] + daily_baseline = np.average(baseline_bill_with_baseline['dhw']/baseline_bill_with_baseline['Days In Bill']) + + y = baseline_bill_with_baseline['heating'] / baseline_bill_with_baseline['Days In Bill'] + X = regr[2] + + self.regression_stat = projection_baseline.ols_regression(X,y) + + regr_model = linear_model.LinearRegression() + regr_model.fit(X, y) + + post_retrofit_daily_hddcdd= MeasurementVerification.weather_demand(self.hp,\ + 0,\ + proj_baseline,\ + self.weather) + + X2 = (post_retrofit_daily_hddcdd[:,0]).reshape(-1,1) + + predict_baseline = regr_model.predict(X2) * proj_baseline['Days In Bill'] + + model_diff = (sum((regr_model.predict(X) + daily_baseline) * baseline_bill_with_baseline['Days In Bill'])\ + - sum(baseline_bill_with_baseline['Usage']))/sum((regr_model.predict(X) + daily_baseline) * baseline_bill_with_baseline['Days In Bill']) + #print('r2', model_diff) + + period_baseline = list(proj_baseline['Days In Bill'] * daily_baseline) + proj_baseline['baseline'] = predict_baseline + period_baseline + + return proj_baseline + + +class BreakdownLongBill(): + + ''' + This class is to breakdown a bill which is abnormally long than the others + ''' + + def __init__(self, bill, weather): + self.bill = bill + self.weather = weather + self.bd = None + + + + def days_in_long_bill(self, days): + + interval = days // 30 + reminder = days % 30 + days_in_long_bill = [] + + if reminder > 15: + for intr in range(interval): days_in_long_bill.append(30) + days_in_long_bill.append(reminder) + if reminder <= 15: + for intr in range(interval-1): days_in_long_bill.append(30) + days_in_long_bill.append(reminder+30) + return days_in_long_bill + + def sub_billing_period_dates(self, billing_period_info): + + ''' + Return date ranges for each sub-billing period + + Args: + billing_period_info(series): one raw from the formatted bill + return: + long_bill_period(pd.DateFrame): same columns with formatted bill and drop temperature + + ''' + long_bill_period = pd.DataFrame(columns=['Bill From Date', 'Bill To Date','Usage', 'Days In Bill', + 'Total Charge', 'Unit Price']) + start_date = billing_period_info['Bill From Date'] + days_in_bill = self.days_in_long_bill(int(billing_period_info['Days In Bill'])) + + bill_from_dates = [] + bill_to_dates = [] + + for xx in range(len(days_in_bill)): + days = days_in_bill[xx] + bill_from_dates.append(start_date) + start_date = start_date + timedelta(days) + bill_to_dates.append(start_date) + + long_bill_period['Bill From Date'] = bill_from_dates + long_bill_period['Days In Bill'] = days_in_bill + long_bill_period['Bill To Date'] = bill_to_dates + + return long_bill_period + + def r0_long_bill_breakdown(self, sub_billing_period, billing_period_info): + ''' + breakdown the usage of a long bill when the usage is not related to weather + r0 indicate the regression method is 0 + + Args: + long_bill_period(pd.DataFrame): the output from function - sub_billing_period_dates + billing_period_info(series): one raw from the formatted bill + ''' + + daily_usage = billing_period_info['Usage'] / billing_period_info['Days In Bill'] + unit_price = billing_period_info['Total Charge'] / billing_period_info['Days In Bill'] + + sub_billing_period['Usage'] = sub_billing_period['Days In Bill'] * daily_usage + sub_billing_period['Total Charge'] = sub_billing_period['Days In Bill'] * unit_price + sub_billing_period['Unit Price'] = sub_billing_period['Total Charge']/sub_billing_period['Days In Bill'] + + return sub_billing_period + + + def r1_long_bill_breakdown(self, sub_billing_period, billing_period_info): + + ''' + breakdown the usage of a long bill when the regression method = 1 + r1 indicates the regression method is 1 + + ''' + + + sub_billing_period['temperature'] = sub_billing_period['temperature'] = [ + self.bd.bill_period_weather(x, y) + for x, y in zip(sub_billing_period['Bill From Date'], + sub_billing_period['Bill To Date']) + ] + + heating_setpoint = self.bd.heating_set_point + cooling_setpoint = self.bd.cooling_set_point + + hddcdd = MeasurementVerification.weather_demand(heating_setpoint,cooling_setpoint,\ + sub_billing_period,self.weather) + sub_billing_period['Usage'] = self.bd.regr_model.predict(hddcdd) * sub_billing_period['Days In Bill'] + unit_price = billing_period_info['Total Charge']/billing_period_info['Usage'] + sub_billing_period['Total Charge'] = sub_billing_period['Usage'] * unit_price + sub_billing_period['Unit Price'] = pd.Series([unit_price for x in range(len(sub_billing_period))]) + + return sub_billing_period + + def r2_long_bill_breakdown(self, sub_billing_period, billing_period_info): + + ''' + breakdown the usage of a long bill when the regression method = 2 + r2 indicates the regression method is 1 + + ''' + + + sub_billing_period['temperature'] = sub_billing_period['temperature'] = [ + self.bd.bill_period_weather(x, y) + for x, y in zip(sub_billing_period['Bill From Date'], + sub_billing_period['Bill To Date']) + ] + + bill = self.bd.processed_bill + bill['temperature'] = bill['temperature'] = [ + self.bd.bill_period_weather(x, y) + for x, y in zip(bill['Bill From Date'], + bill['Bill To Date']) + ] + + heating_setpoint = self.bd.heating_set_point + hdd = MeasurementVerification.weather_demand(heating_setpoint,0,\ + sub_billing_period,self.weather)[:,0].reshape(-1,1) + + + regr = self.bd.summer_dhw(heating_setpoint,bill) + + bill_with_baseline = regr[3] + daily_baseline = np.average(bill_with_baseline['dhw']/bill_with_baseline['Days In Bill']) + + sub_billing_period['heating_usage'] = self.bd.regr_model.predict(hdd) * sub_billing_period['Days In Bill'] + sub_billing_period['dhw'] = sub_billing_period['Days In Bill'] * daily_baseline + sub_billing_period['Usage'] = sub_billing_period['heating_usage'] + sub_billing_period['dhw'] + + unit_price = billing_period_info['Total Charge'] / billing_period_info['Usage'] + + sub_billing_period['Total Charge'] = sub_billing_period['Usage'] * unit_price + sub_billing_period['Unit Price'] = pd.Series([unit_price for x in range(len(sub_billing_period))]) + + sub_billing_period = sub_billing_period.drop('dhw',axis=1) + sub_billing_period = sub_billing_period.drop('heating_usage',axis=1) + + return sub_billing_period + + + def long_bill_breakdown(self): + ''' + breakdown the bills with irregular long billing period, return a formatted bill with sub-billing peirod for the long bill + + Args: + formatted_bill(pd.DataFrame) + Ruturn: + the broken down bill of bills with long billing peirod + + + ''' + self.bd = BillDisaggregation(self.bill, self.weather) + #TODO: Be careful about the inputs + self.bd.optimize_setpoints() + + formatted_bill = self.bd.formatted_bill + regression_method = self.bd.regression_method + regr_model = self.bd.regr_model + bill_quality = self.bd.bill_quality(self.bill) + #bill = formatted_bill.drop('temperature', axis=1) + new_bill = self.bill.copy().reset_index(drop=True) + + if any(i == 'long' for i in bill_quality.flag): + bill_quality_long = bill_quality[bill_quality['flag'] == 'long'] + + if len(bill_quality_long) > 0: + for x in range(len(bill_quality_long)): + + index_of_raw_bill = bill_quality_long['index'].iloc[x] + long_billing_period_info = formatted_bill.iloc[index_of_raw_bill] + days = long_billing_period_info['Days In Bill'] + total_usage = long_billing_period_info['Usage'] + days_breakdown_list = self.days_in_long_bill(days) + long_bill_breakdown_single_raw = self.sub_billing_period_dates(long_billing_period_info) + new_bill = new_bill.drop(index_of_raw_bill) + + if regression_method == 0: + long_bill_breakdown_single_raw = self.r0_long_bill_breakdown(long_bill_breakdown_single_raw,\ + long_billing_period_info) + + + if regression_method == 1: + long_bill_breakdown_single_raw = self.r1_long_bill_breakdown(long_bill_breakdown_single_raw,\ + long_billing_period_info) + + if regression_method == 2: + long_bill_breakdown_single_raw = self.r2_long_bill_breakdown(long_bill_breakdown_single_raw,\ + long_billing_period_info) + + adjustment_factor = total_usage/sum(long_bill_breakdown_single_raw['Usage']) + long_bill_breakdown_single_raw['Usage'] = long_bill_breakdown_single_raw['Usage'] * adjustment_factor + new_bill = new_bill.append(long_bill_breakdown_single_raw) + + new_bill = new_bill.sort_values('Bill From Date').reset_index(drop=True) + + else: + new_bill = bill + + return new_bill + + +class DateIdentifier(): + + ''' + In order to add [Month, date] identifer in addition to YYYY/MM/DD + Since the comparasion is happened between same day of different year in M&V process + ''' + + def __init__(self, bill): + self.bill = bill + + def identifier_for_date(self, bill_from_date, bill_to_date): + ''' + return a list of [month, date] for a date range + Args: + bill_from_date(timestamp): + bill_to_date(timestamp): + + Return: + identifier(list): a list of [month, date] of the dates in bewteem the bill_from_date and bill_to_date + ''' + + + days = (bill_to_date - bill_from_date).days + identifier = [] + + for d in range(days): + date = bill_from_date + timedelta(d) + date_id = [date.month, date.day] + identifier.append(date_id) + + return identifier + + def identifier_matrix(self): + ''' + Args: + bill(pd.DataFrame): + + Return: + matrix(pd.DataFrame): + identifier: [month, date], + unit_price: average unit price for a specific billing period + daily_usage: daily_usage for a specific date range + ''' + bill = self.bill + matrix = pd.DataFrame(columns = ['identifier','unit_price', 'daily_usage']) + + for bp in range(len(bill)): + from_date = bill['Bill From Date'].iloc[bp] + to_date = bill['Bill To Date'].iloc[bp] + unit_price = bill['Unit Price'].iloc[bp] + daily_usage = bill['Usage'].iloc[bp]/bill['Days In Bill'].iloc[bp] + matrix_temp = pd.DataFrame(columns = ['identifier','unit_price', 'daily_usage']) + matrix_temp['identifier'] = self.identifier_for_date(from_date, to_date) + matrix_temp['unit_price'] = pd.Series([unit_price for x in range(len(matrix_temp))]) + matrix_temp['daily_usage'] = pd.Series([daily_usage for x in range(len(matrix_temp.index))]) + matrix = matrix.append(matrix_temp, ignore_index=True) + + return matrix + + + def match_identifier_for_billing_period(self, from_date, to_date, base_identifier_matrix): + ''' + Given a date range and a identifier matirx, + Return the usage of period by matching the date identifier between the period to the corresponding identifier daily + usage + + Args: + from_date(timestamp) + to_date(timestamp) + base_identifier_matrix(pd.DataFrame): + identifier: month, date + daily_usage + unit_price + + Return: + period_usage + ''' + identifier_list = self.identifier_for_date(from_date, to_date) + + period_usage = 0 + for xx in range(len(identifier_list)): + identi = identifier_list[xx] + temp = base_identifier_matrix[base_identifier_matrix.identifier.apply(lambda x: x == identi)] + unit_price = temp.unit_price.mean() + daily_usage = temp.daily_usage.mean() + period_usage += daily_usage + + return period_usage + + diff --git a/bpeng/mv/query.py b/bpeng/mv/query.py new file mode 100644 index 0000000000000000000000000000000000000000..ff4e6aa18190b8b0bd413f5dc2d47fd15b55dccb --- /dev/null +++ b/bpeng/mv/query.py @@ -0,0 +1,64 @@ +import pandas as pd +import psycopg2 +from influxdb import InfluxDBClient + + +class bill_weather(): + # fetch weather data, and raw utility from database + + def __init__(self, utility_id, building_id): + + self.utility_id = utility_id + self.building_id = building_id + self.weather = None + self.bill = None + self.df_utility = None + self.account_id = None + + def query_weather(self): + + user = 'engineering' + password = 'nPEc9Pz0iV' + dbname = 'weather' + host = '52.206.6.10' + port = 8086 + + influx_db = InfluxDBClient(host, port, user, password, dbname, ssl=True) + query_string = "SELECT * from temperature WHERE interval='daily'" + daily_temperature = influx_db.query(query_string).get_points('temperature') + self.weather = pd.DataFrame(daily_temperature) + + + def query_utility_bill(self): + hostname = 'utilitybillprod-rr.czgvwxaefxfj.us-east-1.rds.amazonaws.com' + username = 'blocpower' + password = 'Z50Fwgkfi0SsVaBz' + database = 'utility_bill' + + myConnection = psycopg2.connect(host=hostname, user=username, password=password, dbname=database) + df_bill = pd.read_sql('SELECT * FROM public.bill', myConnection) + df_account = pd.read_sql('SELECT * FROM public.account', myConnection) + df_utility = pd.read_sql('SELECT * FROM public.utility_type', myConnection) + self.df_utility = df_utility + + df_account_selected = df_account[df_account['account_type'] == self.utility_id] + # return the utility account information based on the 'building id' and 'utility id' + # it may return multiple records if there is the multiple accounts for the same utility for the same building + account = df_account_selected[df_account_selected['building_id'] == self.building_id] + + # current code return the first records of the utility of the building for a specific utility type + if not account.empty: + id = account['id'].iloc[0] + self.account_id = id + new_bill = df_bill[df_bill['account_id'] == id].fillna(0) + new_bill['actual_total'] = new_bill['delivery_charge'] + new_bill['supply_charge'] + new_bill['esco_charge'] + \ + new_bill['delivery_tax'] + new_bill = new_bill.reset_index(drop=True) + + bill = new_bill[['bill_from_date', 'bill_to_date', 'usage', 'actual_total']] + bill.columns = ['Bill From Date', 'Bill To Date', 'Usage', 'Total Charge'] + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill['Days In Bill'] = (bill['Bill To Date'] - bill['Bill From Date']).dt.days + self.bill = bill + diff --git a/requirements.txt b/requirements.txt index 21af8635752f7cd6d8ab30a03d58bda744de9986..72dcdc9d5cba525ea9311ed1c2c20d431e890fc9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,9 @@ oplus==4.6.0 pvlib==0.4.4 requests==2.12.4 xlrd==1.0.0 -python-pptx==0.6.18 \ No newline at end of file +influxdb==5.2.2 +pandas==0.24.2 +psycopg2==2.8.2 +influxdb==5.2.2 +scipy==0.19.0 +python-pptx==0.6.18