From 2ee991a53277176b4d745df1e4dc9b0d53c1f397 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 30 Aug 2018 12:32:22 -0400 Subject: [PATCH 01/97] change the set point of cooling from 55 to 65 --- bpeng/bill/awesome_disaggregate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index 35940e4..29563c5 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -117,7 +117,7 @@ class BillDisaggregation(): """ - if indoor_set_point > 55: + if indoor_set_point > 65: if indoor_set_point < outdoor_temp: return outdoor_temp - indoor_set_point return 0 -- GitLab From 793ae3558833729cd4b6ad954f9eb5c9cd777af9 Mon Sep 17 00:00:00 2001 From: Doris H Date: Mon, 18 Mar 2019 15:25:25 -0400 Subject: [PATCH 02/97] prep for dashboard integration --- bpeng/bill/get_test_data.py | 73 +++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 bpeng/bill/get_test_data.py diff --git a/bpeng/bill/get_test_data.py b/bpeng/bill/get_test_data.py new file mode 100644 index 0000000..fc8e3c0 --- /dev/null +++ b/bpeng/bill/get_test_data.py @@ -0,0 +1,73 @@ +#%% +import psycopg2 +import requests +import pandas as pd + + +def __init__(): + return + +def get_weather_data(start_date, end_date): + + weather_service = 'https://staging.weatherservice.blocpower.io' + + WEATHER_SERVICE_URL = ( + f'{weather_service}/weather?' + 'measurement=temperature&' + 'interval=daily&' + f'date_start={start_date}&' + f'date_end={end_date}' + ) + + HEADERS = { + 'x-blocpower-app-key': '427d8dd6-c46c-495d-a03e-b6e6aeeb6902', + 'x-blocpower-app-secret': 'Sb/s0Kb5RBL5NIvpsFm5+LHlpcu/YL8gKvb0D8kqlt1B36mNFSF5vTVIrhkEiew7aq7MtVIGt6uFZDEKYBnWRzA3QxpRdw9z', + } + + res = requests.get(WEATHER_SERVICE_URL, headers=HEADERS) + results = res.json() + + pretty_weather_data = [] + for i in results['data']: + pretty_weather_data.append({ + 'time': i['time'], + 'interval': i['tags']['interval'], + 'location': i['tags']['location'], + 'value': i['fields']['value'] + }) + + return pretty_weather_data + +def query_bill(building_id, account_type): + + hostname = 'utilitybillprod-rr.czgvwxaefxfj.us-east-1.rds.amazonaws.com' + username = 'blocpower' + password = 'Z50Fwgkfi0SsVaBz' + database = 'utility_bill' + + myConnection = psycopg2.connect(host=hostname, user=username, \ + password=password, dbname=database ) + df_bill = pd.read_sql('SELECT * FROM public.bill', \ + myConnection) + df_account = pd.read_sql('SELECT * FROM public.account', \ + myConnection) + df_utility = pd.read_sql('SELECT * FROM public.utility_type', \ + myConnection) + df_account_selected = df_account[df_account['account_type'] == account_type] + + account = df_account_selected[df_account_selected['building_id'] == building_id] + acc_id = account['id'].iloc[0] + new_bill = df_bill[df_bill['account_id'] == acc_id].fillna(0) + new_bill['actual_total'] = new_bill['delivery_charge'] + new_bill['supply_charge'] + new_bill['esco_charge'] + new_bill['delivery_tax'] + new_bill = new_bill.reset_index(drop=True) + bill = new_bill[['bill_from_date','bill_to_date','usage','actual_total']] + bill.columns = ['Bill From Date','Bill To Date','Usage','Total Charge'] + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill['Days In Bill'] = (bill['Bill To Date'] - bill['Bill From Date']).dt.days + return bill + + + + + -- GitLab From a8742354512fe14cbe1ab12e3eb9db32e197def1 Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 22 Mar 2019 16:12:36 -0400 Subject: [PATCH 03/97] refactor --- bpeng/bill/benchmarking.py | 0 bpeng/bill/bill_cleaner.py | 190 +++++++++++ bpeng/bill/calculater.py | 86 +++++ bpeng/bill/driver.py | 390 +++++++++++++++++++++++ bpeng/bill/get_test_data.py | 42 +-- bpeng/bill/normailize_usage.py | 166 ++++++++++ bpeng/bill/output_natural_usage.py | 2 + bpeng/bill/regr.py | 126 ++++++++ bpeng/bill/test.py | 305 ++++++++++++++++++ bpeng/bill/weather_data_cal.py | 43 +++ bpeng/bill/weather_related_usage_type.py | 35 ++ 11 files changed, 1356 insertions(+), 29 deletions(-) create mode 100644 bpeng/bill/benchmarking.py create mode 100644 bpeng/bill/bill_cleaner.py create mode 100644 bpeng/bill/calculater.py create mode 100644 bpeng/bill/driver.py create mode 100644 bpeng/bill/normailize_usage.py create mode 100644 bpeng/bill/output_natural_usage.py create mode 100644 bpeng/bill/regr.py create mode 100644 bpeng/bill/test.py create mode 100644 bpeng/bill/weather_data_cal.py create mode 100644 bpeng/bill/weather_related_usage_type.py diff --git a/bpeng/bill/benchmarking.py b/bpeng/bill/benchmarking.py new file mode 100644 index 0000000..e69de29 diff --git a/bpeng/bill/bill_cleaner.py b/bpeng/bill/bill_cleaner.py new file mode 100644 index 0000000..fb54c45 --- /dev/null +++ b/bpeng/bill/bill_cleaner.py @@ -0,0 +1,190 @@ + +#%% +import warnings +from datetime import timedelta +import numpy as np +import pandas as pd +from calculater import (outliers_iqr) + + +def bill_formating(raw_bill): + """ + Bill Cleaning + Step 1: + 1. format each column of the raw bill + 2. drop NAN / duplicates + + Args: + raw_bill (pd.DataFrame): a raw bill with columns of + 'Bill From Date' + 'Bill To Date' + 'Days In Bill' + 'Usage' + 'Total Charge' + Returns: + pd.DataFrame: a formatted bill + boolean: True - Length of the bill has changed during bill cleaning step 1 + """ + bill_copy = raw_bill.copy() + bill_copy['Bill From Date'] = pd.to_datetime( + bill_copy['Bill From Date']) + bill_copy['Bill From Date'] = bill_copy['Bill From Date'].apply(lambda x: '/'.join([str(x.date().month), + str(x.date().day), + str(x.date().year)])) + bill_copy['Bill To Date'] = pd.to_datetime(bill_copy['Bill To Date']) + bill_copy['Bill To Date'] = bill_copy['Bill To Date'].apply(lambda x: '/'.join([str(x.date().month), + str(x.date().day), + str(x.date().year)])) + bill_copy = bill_copy[[ + 'Bill From Date', 'Bill To Date', 'Usage', 'Days In Bill', + 'Total Charge' + ]] + + bill_copy1 = bill_copy[pd.to_datetime(bill_copy['Bill To Date']) > + pd.to_datetime(bill_copy['Bill From Date'])] + bill_copy1['Bill From Date'] = pd.to_datetime( + bill_copy1['Bill From Date']) + bill_copy2 = bill_copy1.sort_values('Bill From Date') + bill_copy2['Bill To Date'] = pd.to_datetime(bill_copy2['Bill To Date']) + bill_copy2 = bill_copy2.dropna() + bill_copy2 = bill_copy2.drop_duplicates() + bill_copy2 = bill_copy2.reset_index(drop=True) + + if np.array(bill_copy2.shape)[0] == np.array(raw_bill.shape)[0]: + bill_shape_change = 'False' + bill_shape_change = 'True' + bill_formatted = bill_copy2 + return bill_formatted, bill_shape_change + +def bill_quality(bill_formatted): + """ + Bill Cleaning + Step 2: + 1. Check each billing period to find a bill is too short or too long; + Args: + bill_formatted (pd.DataFrame): formatted bill from Bill Cleaning Step 1 + Returns: + pd.DataFrame: a dataframe with columns: + 'index': the index of the billing period which is identified as an outlier + 'flag': to indicate either it is too long or too short + """ + + bill = bill_formatted.copy() + bill = pd.DataFrame(bill) + days_in_bill = np.array(bill['Days In Bill']) + + # abnormal days in bill will return False + days_quality_index_inti = outliers_iqr(list(days_in_bill)) + days_quality_index_recheck = np.array( + [x not in range(25, 35) for x in days_in_bill]) + days_quality_index = list( + np.array(days_quality_index_inti) * + np.array(days_quality_index_recheck)) + + days_abn_index = [] + for x in range(len(days_quality_index)): + if days_quality_index[x]: + days_abn_index.append(x) + + bill_quality = pd.DataFrame(data=days_abn_index, columns=['index']) + + flag = [] + for xx in range(len(days_abn_index)): + point_index = days_abn_index[xx - 1] + if days_in_bill[point_index] < days_in_bill.mean(): + flag.append('short') + elif days_in_bill[point_index] >= days_in_bill.mean(): + flag.append('long') + + bill_quality['flag'] = np.array(flag) + return bill_quality + +def short_bill_consolidate(bill_formatted, bill_quality): + """ + Bill Cleaning + Step 3: + consolidation of the bills that are too short compare to others + NOTE: error + + Args: + bill_formatted (pd.DataFrame): formatted bill from Bill Cleaning Step 1 + bill_quality(pd.DataFrame): bill quality from Step 2 + + Returns: + pd.DataFrame: the cleaned bill and ready for analysis + + """ + bill_quality_short = bill_quality[bill_quality['flag'] == 'short'] + bill_consi = bill_formatted.copy() + # consolidate the billing period that is too short compare to others + + for xxx in range(len(bill_quality_short)): + + if bill_quality['flag'].iloc[xxx] == 'short': + row_index = bill_quality_short['index'].iloc[xxx] + + if (row_index != 0) and (row_index != bill_consi.index[-1]): + + if bill_consi['Days In Bill'][int( + row_index - 1)] <= bill_consi['Days In Bill'][int( + row_index + 1)]: + + bill_consi['Bill To Date'][int( + row_index - 1)] = bill_consi['Bill To Date'][int( + row_index)] + bill_consi['Usage'][int( + row_index - 1)] = bill_consi['Usage'][int( + row_index - 1)] + bill_consi['Usage'][int( + row_index)] + bill_consi['Total Charge'][int( + row_index - 1)] = bill_consi['Total Charge'][int( + row_index - 1)] + bill_consi['Total Charge'][int( + row_index)] + bill_consi['Days In Bill'][int( + row_index - 1)] = bill_consi['Days In Bill'][int( + row_index - 1 + )] + bill_consi['Days In Bill'][int(row_index)] + else: + bill_consi['Bill From Date'][int( + row_index + 1)] = bill_consi['Bill To Date'][int( + row_index)] + bill_consi['Usage'][int( + row_index + 1)] = bill_consi['Usage'][int( + row_index + 1)] + bill_consi['Usage'][int( + row_index)] + bill_consi['Total Charge'][int( + row_index + 1)] = bill_consi['Total Charge'][int( + row_index + 1)] + bill_consi['Total Charge'][int( + row_index)] + bill_consi['Days In Bill'][int( + row_index + 1)] = bill_consi['Days In Bill'][int( + row_index + 1 + )] + bill_consi['Days In Bill'][int(row_index)] + + if row_index == 0: + bill_consi['Bill From Date'][1] = bill_consi[ + 'Bill From Date'][0] + bill_consi['Usage'][ + 1] = bill_consi['Usage'][0] + bill_consi['Usage'][1] + bill_consi['Total Charge'][ + 1] = bill_consi['Total Charge'][0] + bill_consi['Total Charge'][1] + bill_consi['Days In Bill'][ + 1] = bill_consi['Days In Bill'][0] + bill_consi['Days In Bill'][1] + + if row_index == bill_consi.index[-1]: + bill_consi['Bill To Date'].iloc[-2] = bill_consi[ + 'Bill To Date'].iloc[-1] + bill_consi['Usage'].iloc[ + -2] = bill_consi['Usage'].iloc[-2] + bill_consi['Usage'].iloc[-1] + bill_consi['Total Charge'].iloc[ + -2] = bill_consi['Total Charge'].iloc[-2] + bill_consi['Total Charge'].iloc[-1] + bill_consi['Days In Bill'].iloc[ + -2] = bill_consi['Days In Bill'].iloc[-1] + bill_consi['Days In Bill'].iloc[-2] + + if len(bill_quality_short) != 0: + bill_consi = bill_consi.drop( + bill_consi.index[list(bill_quality_short['index'])]) + + bill_consi = bill_consi.reset_index(drop=False) + + return bill_consi diff --git a/bpeng/bill/calculater.py b/bpeng/bill/calculater.py new file mode 100644 index 0000000..fbd0a31 --- /dev/null +++ b/bpeng/bill/calculater.py @@ -0,0 +1,86 @@ + +import warnings +from datetime import timedelta +import numpy as np +import pandas as pd + +def cdd(indoor_set_point, outdoor_temp): + """ + CDD + Assumption: + cooling setting point shall always higher than 55 F, + which is the trigger temperature of the heating system + + ?? + set_point is for indoor temperature + + """ + + if indoor_set_point > 65: + if indoor_set_point < outdoor_temp: + return outdoor_temp - indoor_set_point + return 0 + +def hdd(indoor_set_point, outdoor_temp): + """ + HDD + Assumption: + Only if the outdoor temperature drop below 60'F, + then the heating system may be able to be turn on + """ + if (outdoor_temp < 60) and (indoor_set_point > outdoor_temp): + hdd = indoor_set_point - outdoor_temp + else: + hdd = 0 + return hdd + +def threshold(data, set_point): + """If data is less the set_point, return 0""" + if data <= set_point: + data = 0 + return data + +def outliers_iqr(ys): + """ + Find outlier using IQR method + + Args: + ys (list):A list of number needs to be checked for outliners + + Returns: + list: Returns a list of boolean + True: Outliner + False: Not Outliner + + """ + quartile_1, quartile_3 = np.percentile(ys, [25, 75]) + iqr = quartile_3 - quartile_1 + lower_bound = quartile_1 - (iqr * 1.5) + upper_bound = quartile_3 + (iqr * 1.5) + return [(x > upper_bound or x < lower_bound) for x in ys] + +def anomaly_point(alist, thresholds): + """ + Find outlier and return its index + + Args: + + alist (list): A list of number needs to be checked for outliners + thresholds (float): a percentage of the difference between the mean of the whole list and + the mean of the list without the outlier + + Returns: + + list: Returns a list the index of the outliner + + """ + amean = [] + for x in range(len(alist)): + temp = np.hstack((alist[:(x)], alist[(x + 1):])) + amean.append(temp.mean()) + index = [] + for x in range(len(alist)): + temp1 = abs(alist[x] - np.array(alist).mean()) / np.array( + alist).mean() + index.append(temp1 < thresholds) + return index diff --git a/bpeng/bill/driver.py b/bpeng/bill/driver.py new file mode 100644 index 0000000..cd8bd31 --- /dev/null +++ b/bpeng/bill/driver.py @@ -0,0 +1,390 @@ +#%% +"""This file calcuate bill disagregation for multifamily buildings""" + +import warnings +from datetime import timedelta +import numpy as np +import pandas as pd +from dateutil import relativedelta +from scipy.optimize import minimize +from sklearn import linear_model + +warnings.simplefilter('ignore') + +from get_test_data import (get_weather_data,query_bill) +from bill_cleaner import (bill_formating,bill_quality,short_bill_consolidate) +from weather_data_cal import (weather_cleaning, bill_period_weather) + + +class BillDisaggregation(): + """ + Class for Bill Disaggregation + + Args: + + bill (pd.DataFrame): raw bill (from parsing) (File like object) + First row (besides column name) are the names + 'Bill From Date', 'Bill To Date', 'Days in Bill', 'Usage' must be in names + daily_temp (pd.DataFrame): daily temperature (File like object) + + Attributes: + + output: list + total heating load + heating load for the first year (first 12 bill periods) + heating load of each month + (return NaN if R-squared is low) + """ + + def __init__(self, bill, raw_weather_data_daily, account_info): + + # self.account_info = account_info + # self.bill = bill + self.raw_weather_data_daily = raw_weather_data_daily + # self.processed_bill = None + self.weather_data_daily = None + # self.usage = None + # self.regression_method = None + # self.heating_comsuption_pred = None + # self.cooling_comsuption_pred = None + # self.others_comsuption_pred = None + # self.regr_model = None + # self.r_squared_of_fit = None + # self.heating_set_point = None + # self.cooling_set_point = None + # self.days_in_bills = None + # self.output_table = None + # self.output_table_monthly = None + # self.most_recent_monthly_output = None + # self.avg_unit_price = None + # self.bill_breakdown = None + # self.recent_year_bill_breakdown = None + # self.annual_usage = None + # self.formatted_bill = None + + def optimize_setpoints(self, non_weather_related_end_use={'Miscellaneous': 1}, weather_related_usage='Unknown'): + """ + Main function for the optimization and disaggregation + + Args: + + usage (str): + Specify if the weather - related consumption is for heating or cooling + 'Unknown': no prior knowledge + 'Heating': only for heating + 'Cooling': only for cooling + 'Both': for both heating and cooling + 'Both Not': not for heating or cooling + default 'Unknown' + + """ + + + # # get the bill ready + # self.weather_data_daily = weather_cleaning(raw_weather_data_daily) + + # formatted_bill, shape = bill_formating(raw_bill) # pylint: disable=unused-variable + # quality = bill_quality(formatted_bill) + + + # if any(i == 'short' for i in quality.flag): + # processed_bill = short_bill_consolidate( + # formatted_bill, quality) + # else: + # processed_bill = formatted_bill + + # processed_bill['temperature'] = [ + # bill_period_weather(x, y, weather_data_daily) + # for x, y in zip(processed_bill['Bill From Date'], + # processed_bill['Bill To Date']) + # ] + + # processed_bill = processed_bill.sort_values('Bill From Date') + # formatted_bill = formatted_bill.sort_values('Bill From Date') + # formatted_bill['Unit Price'] = formatted_bill['Total Charge'] / formatted_bill['Usage'] + + + + # self.daily_temp = self.weather_cleaning(self.raw_daily_temp) + # formatted_bill, shape = self.bill_formating(self.bill) # pylint: disable=unused-variable + # quality = self.bill_quality(formatted_bill) + + # if any(i == 'short' for i in quality.flag): + # # any(quality.flag.astype(str) == 'long') + # self.processed_bill = self.short_bill_consolidate( + # formatted_bill, quality) + # else: + # self.processed_bill = formatted_bill + + # self.processed_bill['temperature'] = [ + # self.bill_period_weather(x, y) + # for x, y in zip(self.processed_bill['Bill From Date'], + # self.processed_bill['Bill To Date']) + # ] + + # self.processed_bill = self.processed_bill.sort_values('Bill From Date') + + # formatted_bill = formatted_bill.sort_values('Bill From Date') + # formatted_bill['Unit Price'] = formatted_bill['Total Charge'] / formatted_bill['Usage'] + # self.formatted_bill = formatted_bill + + regression_method = 1 + + if weather_related_usage == 'Unknown': + opt = minimize( + lambda x: -self.regression_1(x[0], x[1], self.processed_bill)[1], + (65, 65), + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + regr = self.regression_1(opt.x[0], opt.x[1], self.processed_bill) + regr_model = regr[0] + heating_coef, cooling_coef = regr_model.coef_ + + if -opt.fun > 0.5: + if (heating_coef > 0) and (cooling_coef <= 0): + weather_related_usage = 'Heating' + elif (heating_coef <= 0) and (cooling_coef > 0): + weather_related_usage = 'Cooling' + elif (heating_coef <= 0) and (cooling_coef <= 0): + weather_related_usage = 'Both Not' + elif (heating_coef >= 0) and (cooling_coef >= 0): + weather_related_usage = 'Both' + else: + weather_related_usage = 'Both Not' + + if weather_related_usage == 'Both': + opt = minimize( + lambda x: -self.regression_1(x[0], x[1], self.processed_bill)[1], + (65, 65), + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + regr = self.regression_1(opt.x[0], opt.x[1], self.processed_bill) + regr_model = regr[0] + heating_coef, cooling_coef = regr_model.coef_ + hddcdd = regr[2] + heating_set_point = opt.x[0] + cooling_set_point = opt.x[1] + + # change accordingly for JOENYC buildings + + if (heating_coef > 0) and (cooling_coef < 0): + weather_related_usage = 'Heating' + cooling_coef = 0 + elif (heating_coef <= 0) and (cooling_coef > 0): + weather_related_usage = 'Cooling' + heating_coef = 0 + elif (heating_coef <= 0) and (cooling_coef <= 0): + weather_related_usage = 'Both Not' + heating_coef = 0 + cooling_coef = 0 + + # changes on Jan 17th 2018 + # please futher check with more bills + + elif (heating_coef > 0) and (cooling_coef > 0): + if heating_coef / cooling_coef > 5: + weather_related_usage = 'Heating' + cooling_coef = 0 + else: + # set the range of heating set point or cooling point - + if round(heating_set_point) in range( + 60, 95) and round(cooling_set_point) in range( + 55, 75): + weather_related_usage = 'Both' + heating_coef = heating_coef + cooling_coef = cooling_coef + + else: + # using standard seting points to check the bill + regr = self.regression_1(72, 65, self.processed_bill) + regr_model = regr[0] + heating_coef, cooling_coef = regr_model.coef_ + hddcdd = regr[2] + heating_set_point = opt.x[0] + cooling_set_point = opt.x[1] + + if (heating_coef > 0) and (cooling_coef < 0): + weather_related_usage = 'Heating' + cooling_coef = 0 + elif (heating_coef <= 0) and (cooling_coef > 0): + weather_related_usage = 'Cooling' + heating_coef = 0 + elif (heating_coef <= 0) and (cooling_coef <= 0): + weather_related_usage = 'Both Not' + heating_coef = 0 + cooling_coef = 0 + elif (heating_coef > 0) and (cooling_coef > 0): + if heating_coef / cooling_coef > 5: + weather_related_usage = 'Heating' + cooling_coef = 0 + else: + weather_related_usage = 'Both' + + if weather_related_usage == 'Heating': + opt_1 = minimize( + lambda x: -self.regression_1(x, 300, self.processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + opt_2 = minimize( + lambda x: -self.summer_dhw(x, self.processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + if (-opt_2.fun > 0.9) and (round(opt_2.x[0]) in range(64, 85)): + opt = opt_2 + heating_set_point = opt.x[0] + cooling_set_point = np.NaN + regr = self.summer_dhw(opt.x[0], self.processed_bill) + regr_model = regr[0] + hdd = regr[2] + hdd_transit = [hdd[x][0] for x in range(len(hdd))] + hddcdd = np.array([[hdd_transit[x], 0] + for x in range(len(hdd))]) + regression_method = 2 + else: + if round(opt_1.x[0]) in range(60, 95): + opt = opt_1 + heating_set_point = opt.x[0] + cooling_set_point = np.NaN + regr = self.regression_1(heating_set_point, 300, + self.processed_bill) + regr_model = regr[0] + hddcdd = regr[2] + heating_coef = regr_model.coef_ + cooling_coef = 0 + else: + # legit heating set-point 72'F + heating_set_point = 72 + cooling_set_point = np.NaN + regr = self.regression_1(heating_set_point, 300, + self.processed_bill) + + regr_model = regr[0] + hddcdd = regr[2] + heating_coef = regr_model.coef_ + cooling_coef = 0 + + if weather_related_usage == 'Cooling': + opt = minimize( + lambda x: -self.regression_1(x, 300, self.processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + regr = self.regression_1(opt.x[0], 300, self.processed_bill) + regr_model = regr[0] + hddcdd = regr[2] + cooling_set_point = opt.x[0] + heating_set_point = np.NaN + + self.usage = weather_related_usage + + if self.usage == 'Both Not': + self.heating_consumption_pred = self.processed_bill['Usage'] * 0 + self.cooling_consumption_pred = self.processed_bill['Usage'] * 0 + self.others_consumption_pred = self.processed_bill['Usage'] + self.regression_method = 0 + self.hddcdd = np.zeros((len(self.processed_bill), 2)) + cooling_set_point = np.NaN + heating_set_point = np.NaN + + else: + self.regression_method = regression_method + + if self.regression_method == 1: + + self.hddcdd = np.array( + pd.DataFrame(hddcdd).mul( + list(self.processed_bill['Days In Bill']), axis=0)) + + if regr[1] > 0.5: + self.regr_model = regr_model + self.heating_consumption_pred = np.array( + self.hddcdd[:, 0]) * regr_model.coef_[0] + self.cooling_consumption_pred = np.array( + self.hddcdd[:, 1]) * regr_model.coef_[1] + + if self.regr_model.intercept_ < 0: + self.others_consumption_pred = 0 * self.processed_bill[ + 'Days In Bill'] + else: + self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill[ + 'Days In Bill'] + + # real_sum = np.array(self.processed_bill['Usage']) + # predict_sum = self.heating_consumption_pred + self.cooling_consumption_pred + \ + # self.others_consumption_pred + + # diff = real_sum - predict_sum + + else: + self.heating_consumption_pred = self.processed_bill[ + 'Days In Bill'] * 0 + self.cooling_consumption_pred = self.processed_bill[ + 'Days In Bill'] * 0 + self.others_consumption_pred = self.processed_bill['Usage'] + self.regression_method = 0 + self.hddcdd = np.zeros((len(self.processed_bill), 2)) + cooling_set_point = np.NaN + heating_set_point = np.NaN + self.usage = 'Both Not' + + elif self.regression_method == 2: + self.hddcdd = np.array( + pd.DataFrame(hddcdd).mul( + list(self.processed_bill['Days In Bill']), axis=0)) + + self.regr_model = regr_model + self.heating_consumption_pred = np.array( + self.hddcdd[:, 0]) * self.regr_model.coef_[0] + self.cooling_consumption_pred = np.array(self.hddcdd[:, 0]) * 0 + + if self.regr_model.intercept_ < 0: + self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + regr[3]['dhw'] + else: + self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill']\ + + regr[3]['dhw'] + + bill_cp = self.processed_bill.copy() + bill_cp = self.processed_bill[[ + 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage', 'Total Charge' + ]] + bill_cp['Unit Price'] = bill_cp['Total Charge'] / bill_cp['Usage'] + bill_cp['Heating Usage'] = self.heating_consumption_pred + bill_cp['Cooling Usage'] = self.cooling_consumption_pred + bill_cp['Other Usage'] = self.others_consumption_pred + + if self.usage == 'Both Not': + self.r_squared_of_fit = 0 + else: + self.r_squared_of_fit = regr[1] + + self.heating_set_point = heating_set_point + self.cooling_set_point = cooling_set_point + self.output_table = bill_cp + + last_bill_date = self.processed_bill['Bill To Date'].iloc[-1] + first_bill_date = self.processed_bill['Bill From Date'].iloc[0] + + billing_months = self.num_month_dates(last_bill_date, first_bill_date) + output_monthly_initial = self.output_to_month(last_bill_date, self.heating_set_point, + self.cooling_set_point, billing_months) + self.output_table_monthly = self.normalized_unit_price(self.output_table, output_monthly_initial) + self.most_recent_monthly_output = self.output_to_month(last_bill_date, + self.heating_set_point, self.cooling_set_point, 12) + self.bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.output_table_monthly) + self.recent_year_bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, + self.most_recent_monthly_output) + self.annual_usage = self.annual_usage_costs(self.recent_year_bill_breakdown, non_weather_related_end_use) + +# self.avg_unit_price = (sum(bill_formatted['Total Charge'])) / ( +# sum(bill_formatted['Usage'])) diff --git a/bpeng/bill/get_test_data.py b/bpeng/bill/get_test_data.py index fc8e3c0..15d40a1 100644 --- a/bpeng/bill/get_test_data.py +++ b/bpeng/bill/get_test_data.py @@ -1,42 +1,26 @@ #%% import psycopg2 -import requests import pandas as pd +from influxdb import InfluxDBClient + def __init__(): return -def get_weather_data(start_date, end_date): - - weather_service = 'https://staging.weatherservice.blocpower.io' - - WEATHER_SERVICE_URL = ( - f'{weather_service}/weather?' - 'measurement=temperature&' - 'interval=daily&' - f'date_start={start_date}&' - f'date_end={end_date}' - ) - - HEADERS = { - 'x-blocpower-app-key': '427d8dd6-c46c-495d-a03e-b6e6aeeb6902', - 'x-blocpower-app-secret': 'Sb/s0Kb5RBL5NIvpsFm5+LHlpcu/YL8gKvb0D8kqlt1B36mNFSF5vTVIrhkEiew7aq7MtVIGt6uFZDEKYBnWRzA3QxpRdw9z', - } - - res = requests.get(WEATHER_SERVICE_URL, headers=HEADERS) - results = res.json() +def get_weather_data(): + user = 'engineering' + password = 'nPEc9Pz0iV' + dbname = 'weather' + host = '52.206.6.10' + port = 8086 - pretty_weather_data = [] - for i in results['data']: - pretty_weather_data.append({ - 'time': i['time'], - 'interval': i['tags']['interval'], - 'location': i['tags']['location'], - 'value': i['fields']['value'] - }) + influx_db = InfluxDBClient(host, port, user, password, dbname, ssl=True) - return pretty_weather_data + query_string = "SELECT * from temperature WHERE interval='daily'" + daily_temperature = influx_db.query(query_string).get_points('temperature') + weather = pd.DataFrame(daily_temperature) + return weather def query_bill(building_id, account_type): diff --git a/bpeng/bill/normailize_usage.py b/bpeng/bill/normailize_usage.py new file mode 100644 index 0000000..11d50d2 --- /dev/null +++ b/bpeng/bill/normailize_usage.py @@ -0,0 +1,166 @@ + ''' + This module will create a normailized usage based on a raw bill natural billing periods + should be refactor to a class + ''' + def find_index_in_first_raw_biil(self, norm_bill_date): + """ + Return the index of the row of raw bill contains the bill date from a normalized bill + """ + for index, bill in self.formatted_bill.iterrows(): + if bill['Bill From Date'] <= norm_bill_date < bill['Bill To Date']: + return index + return None + + def days_in_raw_bill_period(self, norm_bill_date, norm_bill_date_respected_index, flag): + """ + Return how many days from a normalized bill within a raw bill billing period + """ + + if flag == 'start': + days = (self.formatted_bill['Bill To Date'][norm_bill_date_respected_index] - norm_bill_date).days + if flag == 'end': + days = (norm_bill_date - self.formatted_bill['Bill From Date'][norm_bill_date_respected_index]).days + return days + + def weighted_unit_price(self, index_numdays): + """ + Return the weighted average of unit price + """ + weights = [] + total_days = [] + for ind in range(len(index_numdays)): + unit_price = self.formatted_bill['Unit Price'][int(index_numdays[ind]['index'])] + days_in_that_period = int(index_numdays[ind]['num_days']) + weights.append(unit_price * days_in_that_period) + total_days.append(days_in_that_period) + weighted_unit_price = sum(weights)/sum(total_days) + return weighted_unit_price + + def find_bills_in_raw(self, norm_bill_from, norm_bill_to): + """ + Return the index / number of days in each raw bill billing period for a normalized billing period + """ + + norm_bill_days = (norm_bill_to - norm_bill_from).days + results = [] + + index_start = self.find_index_in_first_raw_biil(norm_bill_from) + index_end = self.find_index_in_first_raw_biil(norm_bill_to) + + if index_start == index_end: + results.append({'index': index_start, 'num_days': norm_bill_days}) + + elif index_end - index_start >= 1: + days_in_start_period = self.days_in_raw_bill_period(norm_bill_from, index_start, 'start') + results.append({'index': index_start, 'num_days': days_in_start_period}) + days_in_end_period = self.days_in_raw_bill_period(norm_bill_to, index_end, 'end') + results.append({'index': index_end, 'num_days': days_in_end_period}) + + if index_end - index_start >= 2: + for p in range(index_end - index_start - 1): + days_in_period = self.formatted_bill['Days In Bill'][index_start+p+1] + index_of_this_period = index_start+p+1 + results.append({'index': index_of_this_period, 'num_days': days_in_period}) + + return results + + def normalized_unit_price(self, rawbill, mbill): + """ + calculate the unit price for each nomralized billing period + """ + normalized_unit_price = [] + for m in range(len(mbill)): + from_date = mbill['Bill From Date'].iloc[m] + to_date = mbill['Bill To Date'].iloc[m] + index_numdays = self.find_bills_in_raw(from_date, to_date) + weighted_unit_price_for_this_month = self.weighted_unit_price(index_numdays) + normalized_unit_price.append(weighted_unit_price_for_this_month) + mbill['Unit Price'] = normalized_unit_price + return mbill + + def output_to_month(self, last_date_of_bill, hp, cp, number_of_month): + """ + Transfrom period-wise output to month-wise output + + Args: + + last_day_of_bill(datetime): last day of bill + hp(float): heating season indoor set point + cp(float): cooling season indoor set point + number_of_month(int): number of month that need to be re-format + + Returns: + + pd.DataFrame: result with monthly consumptions + + """ + + last_dates = [] + first_dates = [] + + lastdate = last_date_of_bill - timedelta(last_date_of_bill.day) + + # cosntruct a new dataframe with bills from the first to last day for each month + + for i in range(0, number_of_month): + last_dates.append(lastdate) + first_dates.append(lastdate.replace(day=1)) + lastdate = first_dates[i] - timedelta(1) + + monthly_output_table = pd.DataFrame(columns=['Bill From Date', 'Bill To Date', 'Days In Bill', + 'Heating Usage', 'Cooling Usage', 'Other Usage']) + + monthly_output_table['Bill From Date'] = first_dates + monthly_output_table['Bill To Date'] = last_dates + monthly_output_table[ + 'Days In Bill'] = monthly_output_table['Bill To Date'] - monthly_output_table['Bill From Date'] + monthly_output_table[ + 'Days In Bill'] = monthly_output_table['Days In Bill'].apply( + lambda x: x.days) + 1 + monthly_output_table['Month'] = monthly_output_table[ + 'Bill From Date'].apply(lambda x: x.month) + monthly_output_table['temperature'] = [ + self.bill_period_weather(x, y) + for x, y in zip(monthly_output_table['Bill From Date'], + monthly_output_table['Bill To Date']) + ] + + hdd = [ + list(BillDisaggregation.hdd(hp, xx) for xx in x) + for x in monthly_output_table['temperature'] + ] + cdd = [ + list(BillDisaggregation.cdd(cp, xx) for xx in x) + for x in monthly_output_table['temperature'] + ] + monthly_hdd = np.array([np.sum(hdd[x]) for x in range(len(hdd))]) + monthly_cdd = np.array([np.sum(cdd[x]) for x in range(len(cdd))]) + + monthly_output_table['HDD'] = monthly_hdd + monthly_output_table['CDD'] = monthly_cdd + + per_hdd = self.benchmarking_output()[4] / self.benchmarking_output()[8] + if np.isnan(per_hdd): + per_hdd = 0 + per_cdd = self.benchmarking_output()[5] / self.benchmarking_output()[9] + if np.isnan(per_cdd): + per_cdd = 0 + per_day = self.benchmarking_output()[6] / self.benchmarking_output()[ + 10] + if np.isnan(per_day): + per_day = 0 + + monthly_output_table['Heating Usage'] = monthly_output_table[ + 'HDD'] * per_hdd + monthly_output_table['Cooling Usage'] = monthly_output_table[ + 'CDD'] * per_cdd + monthly_output_table['Other Usage'] = monthly_output_table[ + 'Days In Bill'] * per_day + + monthly_output_table['Usage'] = monthly_output_table['Heating Usage']\ + + monthly_output_table['Cooling Usage'] + monthly_output_table['Other Usage'] + + monthly_output = monthly_output_table[['Month', 'Bill From Date', 'Bill To Date', 'Days In Bill', + 'Heating Usage', 'Cooling Usage', 'Other Usage', 'HDD', 'CDD']] + monthly_output = monthly_output.sort('Bill From Date').reset_index(drop=True) + return monthly_output diff --git a/bpeng/bill/output_natural_usage.py b/bpeng/bill/output_natural_usage.py new file mode 100644 index 0000000..9fda912 --- /dev/null +++ b/bpeng/bill/output_natural_usage.py @@ -0,0 +1,2 @@ +''' +this module should get out put diff --git a/bpeng/bill/regr.py b/bpeng/bill/regr.py new file mode 100644 index 0000000..45d2b02 --- /dev/null +++ b/bpeng/bill/regr.py @@ -0,0 +1,126 @@ + +import warnings +from datetime import timedelta + +import numpy as np +import pandas as pd +from dateutil import relativedelta +from sklearn import linear_model + +from calculater import (cdd,hdd,threshold,outliers_iqr) +warnings.simplefilter('ignore') + + +def regression_1(hp, cp, processed_bill): + """ + A linear regression model with heating and cooling set fixed + + Args: + + hp(float): heating season indoor set point + cp(float): cooling season indoor set point + processed_bill(pd.DataFrame): cleaned bill with daily temperature + + Returns: + + sklearn.linear_model.LinearRegression: regression model + float: r-squared of the linear regression model + 2d-array: a 2D numpy array of normalized billing period average daily HDDs and CDDs + + """ + + bill = processed_bill.copy() + consumption = np.array(bill['Usage'] / bill['Days In Bill']) + + ahdd = [ + list(hdd(hp, xx) for xx in x) + for x in bill['temperature'] + ] + acdd = [ + list(cdd(cp, xx) for xx in x) + for x in bill['temperature'] + ] + + # it should be billing period average hdd / days + + daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) + daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) + + # set threshold that if the HDD/CDD is lower than a certain value, we set it to 0 + daily_hdd1 = np.array([ + threshold(daily_hdd[x], 0.1) + for x in range(len(daily_hdd)) + ]) + daily_cdd1 = np.array([ + threshold(daily_cdd[x], 0.1) + for x in range(len(daily_cdd)) + ]) + + regression_temp = np.array([daily_hdd1, daily_cdd1]).T + + regr_model = linear_model.LinearRegression() + regr_model.fit(regression_temp, consumption) + score = regr_model.score(regression_temp, consumption) + + return regr_model, score, regression_temp + +def regression_2_summer_dhw(hp, processed_bill): + """ + This funcion uses summer month gas usage as base consumption for the year + A linear regression of weather-related consumption and a fixed heating system set point + NOTE: USUALLY ERROR + + Args: + hp(float): heating season indoor set point + cp(float): cooling season indoor set point + processed_bill(pd.DataFrame): cleaned bill with daily temperature + + Returns: + sklearn.linear_model.LinearRegression: regression model + float: r-squared of the linear regression model + 2d-array: a 2D numpy array of normalized billing period HDDs sum + pd.DataFrame + + """ + + bill = processed_bill.copy() + ahdd = [[hdd(hp, xx) for xx in x] + for x in bill['temperature']] + # monthly_hdd = np.array([np.sum(ahdd[x]) for x in range(len(ahdd))]) + daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) + + # daily dhw usage + bill['dhw'] = bill['Usage'] / bill['Days In Bill'] * (daily_hdd <= 0.1) + + if len([*filter(lambda x: x >= 18, list(bill['Days In Bill']))]) > 0: + dhw_only_consumption = bill[bill['dhw'] != 0]['dhw'] + + if len(dhw_only_consumption) > 0: + dhw_quality_index = outliers_iqr(list(dhw_only_consumption)) # list + dhw_only_consumption_checked = [] + + for xx in range(len(dhw_only_consumption)): + if not dhw_quality_index[xx]: + dhw_only_consumption_checked.append( + list(dhw_only_consumption)[xx]) + + daily_dhw = np.mean(dhw_only_consumption_checked) + else: + daily_dhw = 0 + + else: + daily_dhw = 0 + + bill['dhw'] = daily_dhw * bill['Days In Bill'] + + # 2018/01/30 + # Daily hdd makes more sense + + regression_temp = daily_hdd.reshape(-1, 1) + consumption = (bill['Usage'] - bill['dhw']) / bill['Days In Bill'] + + regr_model = linear_model.LinearRegression(fit_intercept=False) + regr_model.fit(regression_temp, consumption) + score = regr_model.score(regression_temp, consumption) + + return regr_model, score, regression_temp, bill diff --git a/bpeng/bill/test.py b/bpeng/bill/test.py new file mode 100644 index 0000000..1178183 --- /dev/null +++ b/bpeng/bill/test.py @@ -0,0 +1,305 @@ +#%% +import pandas as pd +import numpy as np +import datetime as datetime +from scipy.optimize import minimize +from datetime import timedelta + + +from get_test_data import (get_weather_data, query_bill) +from bill_cleaner import (bill_formating, bill_quality, short_bill_consolidate) +from weather_data_cal import (weather_cleaning, bill_period_weather) +from regr import (regression_1, regression_2_summer_dhw) +from calculater import(hdd, threshold) +from weather_related_usage_type import (determine_weather_usage_type_when_input_is_unknown) + +########################################################################### inputs +end_uses = {'Miscellanous':1} +raw_bill = query_bill(205232,2) +raw_weather_data_daily = get_weather_data() +weather_related_usage_init = 'Unknown' + +############################################################################ +weather_data_daily = weather_cleaning(raw_weather_data_daily) +formatted_bill, shape = bill_formating(raw_bill) # pylint: disable=unused-variable +quality = bill_quality(formatted_bill) + + +if any(i == 'short' for i in quality.flag): + processed_bill = short_bill_consolidate( + formatted_bill, quality) +else: + processed_bill = formatted_bill + +processed_bill['temperature'] = [ + bill_period_weather(x, y, weather_data_daily) + for x, y in zip(processed_bill['Bill From Date'], + processed_bill['Bill To Date']) +] + +processed_bill = processed_bill.sort_values('Bill From Date') +formatted_bill = formatted_bill.sort_values('Bill From Date') +formatted_bill['Unit Price'] = formatted_bill['Total Charge'] / formatted_bill['Usage'] + +############################################# above works +regression_method = 1 + +#%% + +if weather_related_usage_init == 'Unknown': + weather_related_usage = \ + determine_weather_usage_type_when_input_is_unknown(processed_bill) +else: + weather_related_usage = weather_related_usage_init + + +if weather_related_usage == 'Both': + opt = minimize( + lambda x: -regression_1(x[0], x[1], processed_bill)[1], + (65, 65), + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + regr_temp = regression_1(opt.x[0], opt.x[1], processed_bill) + regr_model = regr_temp[0] + heating_coef, cooling_coef = regr_model.coef_ + hddcdd = regr_temp[2] + heating_set_point = opt.x[0] + cooling_set_point = opt.x[1] + # change accordingly for JOENYC buildings + print('set_points', heating_set_point, cooling_set_point) + + if (heating_coef > 0) and (cooling_coef < 0): + weather_related_usage = 'Heating' + cooling_coef = 0 + elif (heating_coef <= 0) and (cooling_coef > 0): + weather_related_usage = 'Cooling' + heating_coef = 0 + elif (heating_coef <= 0) and (cooling_coef <= 0): + weather_related_usage = 'Both Not' + heating_coef = 0 + cooling_coef = 0 + elif (heating_coef > 0) and (cooling_coef > 0): + if heating_coef / cooling_coef > 5: + weather_related_usage = 'Heating' + cooling_coef = 0 + else: + # set the range of heating set point or cooling point - + if round(heating_set_point) in range( + 60, 95) and round(cooling_set_point) in range( + 55, 75): + weather_related_usage = 'Both' + heating_coef = heating_coef + cooling_coef = cooling_coef + + else: + # using standard seting points to check the bill + regr = regression_1(72, 65, processed_bill) + regr_model = regr[0] + heating_coef, cooling_coef = regr_model.coef_ + hddcdd = regr[2] + heating_set_point = opt.x[0] + cooling_set_point = opt.x[1] + + if (heating_coef > 0) and (cooling_coef < 0): + weather_related_usage = 'Heating' + cooling_coef = 0 + elif (heating_coef <= 0) and (cooling_coef > 0): + weather_related_usage = 'Cooling' + heating_coef = 0 + elif (heating_coef <= 0) and (cooling_coef <= 0): + weather_related_usage = 'Both Not' + heating_coef = 0 + cooling_coef = 0 + elif (heating_coef > 0) and (cooling_coef > 0): + if heating_coef / cooling_coef > 5: + weather_related_usage = 'Heating' + cooling_coef = 0 + else: + weather_related_usage = 'Both' + +if weather_related_usage == 'Heating': + opt_1 = minimize( + lambda x: regression_1(x, 300,processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + opt_2 = minimize( + lambda x: -regression_2_summer_dhw(x, processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + if (-opt_2.fun > 0.9) and (round(opt_2.x[0]) in range(64, 85)): + opt = opt_2 + heating_set_point = opt.x[0] + cooling_set_point = np.NaN + regr = regression_2_summer_dhw(opt.x[0], processed_bill) + regr_model = regr[0] + hdd = regr[2] + hdd_transit = [hdd[x][0] for x in range(len(hdd))] + hddcdd = np.array([[hdd_transit[x], 0] + for x in range(len(hdd))]) + regression_method = 2 + else: + if round(opt_1.x[0]) in range(60, 95): + opt = opt_1 + heating_set_point = opt.x[0] + cooling_set_point = np.NaN + regr_temp = regression_1(heating_set_point, 300, + processed_bill) + regr_model = regr_temp[0] + hddcdd = regr_temp[2] + heating_coef = regr_model.coef_ + cooling_coef = 0 + else: + # legit heating set-point 72'F + heating_set_point = 72 + cooling_set_point = np.NaN + regr_temp = regression_1(heating_set_point, 300, + processed_bill) + + regr_model = regr_temp[0] + hddcdd = regr_temp[2] + heating_coef = regr_model.coef_ + cooling_coef = 0 + +if weather_related_usage == 'Cooling': + opt = minimize( + lambda x: regression_1(x, 300, processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + regr_temp = regression_1(opt.x[0], 300, processed_bill) + regr_model = regr_temp[0] + hddcdd = regr_temp[2] + cooling_set_point = opt.x[0] + heating_set_point = np.NaN + + +#%% +if self.usage == 'Both Not': + self.heating_consumption_pred = self.processed_bill['Usage'] * 0 + self.cooling_consumption_pred = self.processed_bill['Usage'] * 0 + self.others_consumption_pred = self.processed_bill['Usage'] + self.regression_method = 0 + self.hddcdd = np.zeros((len(self.processed_bill), 2)) + cooling_set_point = np.NaN + heating_set_point = np.NaN + + else: + self.regression_method = regression_method + + if self.regression_method == 1: + + self.hddcdd = np.array( + pd.DataFrame(hddcdd).mul( + list(self.processed_bill['Days In Bill']), axis=0)) + + if regr[1] > 0.5: + self.regr_model = regr_model + self.heating_consumption_pred = np.array( + self.hddcdd[:, 0]) * regr_model.coef_[0] + self.cooling_consumption_pred = np.array( + self.hddcdd[:, 1]) * regr_model.coef_[1] + + if self.regr_model.intercept_ < 0: + self.others_consumption_pred = 0 * self.processed_bill[ + 'Days In Bill'] + else: + self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill[ + 'Days In Bill'] + + # real_sum = np.array(self.processed_bill['Usage']) + # predict_sum = self.heating_consumption_pred + self.cooling_consumption_pred + \ + # self.others_consumption_pred + + # diff = real_sum - predict_sum + + else: + self.heating_consumption_pred = self.processed_bill[ + 'Days In Bill'] * 0 + self.cooling_consumption_pred = self.processed_bill[ + 'Days In Bill'] * 0 + self.others_consumption_pred = self.processed_bill['Usage'] + self.regression_method = 0 + self.hddcdd = np.zeros((len(self.processed_bill), 2)) + cooling_set_point = np.NaN + heating_set_point = np.NaN + self.usage = 'Both Not' + + elif self.regression_method == 2: + self.hddcdd = np.array( + pd.DataFrame(hddcdd).mul( + list(self.processed_bill['Days In Bill']), axis=0)) + + self.regr_model = regr_model + self.heating_consumption_pred = np.array( + self.hddcdd[:, 0]) * self.regr_model.coef_[0] + self.cooling_consumption_pred = np.array(self.hddcdd[:, 0]) * 0 + + if self.regr_model.intercept_ < 0: + self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + regr[3]['dhw'] + else: + self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill']\ + + regr[3]['dhw'] + + + + + + + + + +#%% + +# # bill_evaluation = pd.DataFrame(columns=['Building Id','Usage', 'r squared','regr method', 'Consumption', 'Heating',\ +# # 'Cooling','Non-weather-related-usage','diff','hdd', 'cdd', 'Days in Bill','Unit Price',\ +# # 'Heating Setpoint', 'Cooling Setpoint']) + + +# # bd = BillDisaggregation(bill,weather_data) +# # bd.optimize_setpoints(end_uses, weather_related_usage='Heating') +# # output = bd.benchmarking_output() +# # dhw_to_total = output[6]/ (output[3]) +# # bill_evaluation = bill_evaluation.append({\ +# # 'Building Id':205232, \ +# # 'Usage': output[0],\ +# # 'r squared': output[1],\ +# # 'regr method': output[2],\ +# # 'Consumption':output[3],\ +# # 'Heating': format(output[4], '0.0f'),\ +# # 'Cooling': format(output[5], '0.0f'),\ +# # 'Non-weather-related-usage': format(output[6], '0.0f'),\ +# # 'diff':format(output[7],'.2%'),\ +# # 'hdd': format(output[8], '0.0f'),\ +# # 'cdd': format(output[9], '0.0f'),\ +# # 'Days in Bill': output[10],\ +# # 'Unit Price':bd.avg_unit_price,\ +# # 'Heating Setpoint': output[11],\ +# # 'Cooling Setpoint': output[12] +# # }, ignore_index = True) + +# # print(bill_evaluation) +# #%% +# ''' +# results +# Building Id Usage r-squared regr method Consumption Heating Cooling \ +# 205232.0 Heating 0.95385 2.0 10068.0 10239 0 + +# Non-weather-related-usage diff hdd cdd Days in Bill Unit_Price \ +# 0 -1.70% 9969 0 701.0 $1.013706 + +# Heating Setpoint Cooling Setpoint +# 0 70.401855 NaN +# ''' + + +#%% +#%% diff --git a/bpeng/bill/weather_data_cal.py b/bpeng/bill/weather_data_cal.py new file mode 100644 index 0000000..82816be --- /dev/null +++ b/bpeng/bill/weather_data_cal.py @@ -0,0 +1,43 @@ +''' +this python file will deal with cal related to temperature from weather data +''' + +import pandas as pd + +def weather_cleaning(raw_daily_temp): + """ + Format the daily temperature data from influx query + + Args: + raw_daily_temp (influx query): raw temperature data queried from Influx + Returns: + pd.DateFrame: Returns formatted daily temperature + """ + raw_daily_temp.rename( + columns={'time': 'date', + 'value': 'temperature'}, inplace=True) + raw_daily_temp['date'] = pd.to_datetime(raw_daily_temp['date']) + raw_daily_temp['date'] = raw_daily_temp['date'].apply(lambda x: '/'.join([str(x.date().month), + str(x.date().day), + str(x.date().year)])) + daily_temp = raw_daily_temp + daily_temp['date'] = pd.to_datetime(daily_temp['date']) + return daily_temp + +def bill_period_weather(bill_from_date, bill_end_date, weather_data_daily): + """ + get the outdoor temperaturebetween two date, return a list + + Args: + + bill_from_date (Datetime): start date of a period + bill_end_date (Datetime): end date of a period + + Returns: + list: Returns a list of outdoor temperature for a period + """ + end_date_id = weather_data_daily[weather_data_daily.date == + bill_end_date].index[0] + start_date_id = weather_data_daily[weather_data_daily.date == + bill_from_date].index[0] + return list(weather_data_daily['temperature'][start_date_id:end_date_id]) diff --git a/bpeng/bill/weather_related_usage_type.py b/bpeng/bill/weather_related_usage_type.py new file mode 100644 index 0000000..5c33e4a --- /dev/null +++ b/bpeng/bill/weather_related_usage_type.py @@ -0,0 +1,35 @@ + +import pandas as pd +import numpy as np +import datetime as datetime +from scipy.optimize import minimize +from regr import regression_1 + +def determine_weather_usage_type_when_input_is_unknown(processed_bill): + ''' + This function is to determine the weather_related_usage_type when the input is unknown + ''' + opt = minimize( + lambda x: -regression_1(x[0], x[1], processed_bill)[1], + (65, 65), + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + regr_temp = regression_1(opt.x[0], opt.x[1], processed_bill) + regr_model = regr_temp[0] + heating_coef, cooling_coef = regr_model.coef_ + + if -opt.fun > 0.5: + if (heating_coef > 0) and (cooling_coef <= 0): + weather_related_usage = 'Heating' + elif (heating_coef <= 0) and (cooling_coef > 0): + weather_related_usage = 'Cooling' + elif (heating_coef <= 0) and (cooling_coef <= 0): + weather_related_usage = 'Both Not' + elif (heating_coef >= 0) and (cooling_coef >= 0): + weather_related_usage = 'Both' + else: + weather_related_usage = 'Both Not' + + return weather_related_usage -- GitLab From ab6a6319e3101d9484712f853312850ffeeab636 Mon Sep 17 00:00:00 2001 From: Doris H Date: Tue, 14 May 2019 10:58:40 -0400 Subject: [PATCH 04/97] fix test issues --- bpeng/bill/bill_cleaner.py | 27 +- bpeng/bill/calculater.py | 8 +- bpeng/bill/get_test_data.py | 63 ++--- bpeng/bill/task.py | 7 + bpeng/bill/test.py | 536 ++++++++++++++++++------------------ 5 files changed, 321 insertions(+), 320 deletions(-) create mode 100644 bpeng/bill/task.py diff --git a/bpeng/bill/bill_cleaner.py b/bpeng/bill/bill_cleaner.py index fb54c45..9e3decc 100644 --- a/bpeng/bill/bill_cleaner.py +++ b/bpeng/bill/bill_cleaner.py @@ -1,7 +1,6 @@ -#%% -import warnings -from datetime import timedelta + +# from datetime import timedelta import numpy as np import pandas as pd from calculater import (outliers_iqr) @@ -41,7 +40,7 @@ def bill_formating(raw_bill): ]] bill_copy1 = bill_copy[pd.to_datetime(bill_copy['Bill To Date']) > - pd.to_datetime(bill_copy['Bill From Date'])] + pd.to_datetime(bill_copy['Bill From Date'])] bill_copy1['Bill From Date'] = pd.to_datetime( bill_copy1['Bill From Date']) bill_copy2 = bill_copy1.sort_values('Bill From Date') @@ -56,6 +55,7 @@ def bill_formating(raw_bill): bill_formatted = bill_copy2 return bill_formatted, bill_shape_change + def bill_quality(bill_formatted): """ Bill Cleaning @@ -86,20 +86,21 @@ def bill_quality(bill_formatted): if days_quality_index[x]: days_abn_index.append(x) - bill_quality = pd.DataFrame(data=days_abn_index, columns=['index']) + bill_quality_metric = pd.DataFrame(data=days_abn_index, columns=['index']) flag = [] - for xx in range(len(days_abn_index)): - point_index = days_abn_index[xx - 1] + for billing_date_index in range(len(days_abn_index)): + point_index = days_abn_index[billing_date_index - 1] if days_in_bill[point_index] < days_in_bill.mean(): flag.append('short') elif days_in_bill[point_index] >= days_in_bill.mean(): flag.append('long') - bill_quality['flag'] = np.array(flag) - return bill_quality + bill_quality_metric['flag'] = np.array(flag) + return bill_quality_metric + -def short_bill_consolidate(bill_formatted, bill_quality): +def short_bill_consolidate(bill_formatted, bill_quality_metric): """ Bill Cleaning Step 3: @@ -108,19 +109,19 @@ def short_bill_consolidate(bill_formatted, bill_quality): Args: bill_formatted (pd.DataFrame): formatted bill from Bill Cleaning Step 1 - bill_quality(pd.DataFrame): bill quality from Step 2 + bill_quality_metric (pd.DataFrame): bill quality from Step 2 Returns: pd.DataFrame: the cleaned bill and ready for analysis """ - bill_quality_short = bill_quality[bill_quality['flag'] == 'short'] + bill_quality_short = bill_quality_metric[bill_quality_metric['flag'] == 'short'] bill_consi = bill_formatted.copy() # consolidate the billing period that is too short compare to others for xxx in range(len(bill_quality_short)): - if bill_quality['flag'].iloc[xxx] == 'short': + if bill_quality_metric['flag'].iloc[xxx] == 'short': row_index = bill_quality_short['index'].iloc[xxx] if (row_index != 0) and (row_index != bill_consi.index[-1]): diff --git a/bpeng/bill/calculater.py b/bpeng/bill/calculater.py index fbd0a31..3840b0a 100644 --- a/bpeng/bill/calculater.py +++ b/bpeng/bill/calculater.py @@ -1,9 +1,9 @@ -import warnings -from datetime import timedelta + import numpy as np import pandas as pd + def cdd(indoor_set_point, outdoor_temp): """ CDD @@ -21,6 +21,7 @@ def cdd(indoor_set_point, outdoor_temp): return outdoor_temp - indoor_set_point return 0 + def hdd(indoor_set_point, outdoor_temp): """ HDD @@ -34,12 +35,14 @@ def hdd(indoor_set_point, outdoor_temp): hdd = 0 return hdd + def threshold(data, set_point): """If data is less the set_point, return 0""" if data <= set_point: data = 0 return data + def outliers_iqr(ys): """ Find outlier using IQR method @@ -59,6 +62,7 @@ def outliers_iqr(ys): upper_bound = quartile_3 + (iqr * 1.5) return [(x > upper_bound or x < lower_bound) for x in ys] + def anomaly_point(alist, thresholds): """ Find outlier and return its index diff --git a/bpeng/bill/get_test_data.py b/bpeng/bill/get_test_data.py index 15d40a1..2e254cd 100644 --- a/bpeng/bill/get_test_data.py +++ b/bpeng/bill/get_test_data.py @@ -1,13 +1,14 @@ -#%% + + import psycopg2 import pandas as pd from influxdb import InfluxDBClient - def __init__(): return + def get_weather_data(): user = 'engineering' password = 'nPEc9Pz0iV' @@ -22,36 +23,32 @@ def get_weather_data(): weather = pd.DataFrame(daily_temperature) return weather -def query_bill(building_id, account_type): - - hostname = 'utilitybillprod-rr.czgvwxaefxfj.us-east-1.rds.amazonaws.com' - username = 'blocpower' - password = 'Z50Fwgkfi0SsVaBz' - database = 'utility_bill' - - myConnection = psycopg2.connect(host=hostname, user=username, \ - password=password, dbname=database ) - df_bill = pd.read_sql('SELECT * FROM public.bill', \ - myConnection) - df_account = pd.read_sql('SELECT * FROM public.account', \ - myConnection) - df_utility = pd.read_sql('SELECT * FROM public.utility_type', \ - myConnection) - df_account_selected = df_account[df_account['account_type'] == account_type] - - account = df_account_selected[df_account_selected['building_id'] == building_id] - acc_id = account['id'].iloc[0] - new_bill = df_bill[df_bill['account_id'] == acc_id].fillna(0) - new_bill['actual_total'] = new_bill['delivery_charge'] + new_bill['supply_charge'] + new_bill['esco_charge'] + new_bill['delivery_tax'] - new_bill = new_bill.reset_index(drop=True) - bill = new_bill[['bill_from_date','bill_to_date','usage','actual_total']] - bill.columns = ['Bill From Date','Bill To Date','Usage','Total Charge'] - bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) - bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) - bill['Days In Bill'] = (bill['Bill To Date'] - bill['Bill From Date']).dt.days - return bill - - - +def query_bill(building_id, account_type): + hostname = 'utilitybillprod-rr.czgvwxaefxfj.us-east-1.rds.amazonaws.com' + username = 'blocpower' + password = 'Z50Fwgkfi0SsVaBz' + database = 'utility_bill' + + myConnection = psycopg2.connect(host=hostname, user=username, + password=password, dbname=database) + df_bill = pd.read_sql('SELECT * FROM public.bill', myConnection) + df_account = pd.read_sql('SELECT * FROM public.account', myConnection) + df_utility = pd.read_sql('SELECT * FROM public.utility_type', myConnection) + df_account_selected = df_account[df_account['account_type'] == account_type] + + account = df_account_selected[df_account_selected['building_id'] == building_id] + acc_id = account['id'].iloc[0] + new_bill = df_bill[df_bill['account_id'] == acc_id].fillna(0) + new_bill['actual_total'] = new_bill['delivery_charge'] + \ + new_bill['supply_charge'] + \ + new_bill['esco_charge'] + \ + new_bill['delivery_tax'] + new_bill = new_bill.reset_index(drop=True) + bill = new_bill[['bill_from_date', 'bill_to_date', 'usage', 'actual_total']] + bill.columns = ['Bill From Date', 'Bill To Date', 'Usage', 'Total Charge'] + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill['Days In Bill'] = (bill['Bill To Date'] - bill['Bill From Date']).dt.days + return bill diff --git a/bpeng/bill/task.py b/bpeng/bill/task.py new file mode 100644 index 0000000..cd034fb --- /dev/null +++ b/bpeng/bill/task.py @@ -0,0 +1,7 @@ +# import pandas as pd + + +def task(): + return 1 + + diff --git a/bpeng/bill/test.py b/bpeng/bill/test.py index 1178183..488d88a 100644 --- a/bpeng/bill/test.py +++ b/bpeng/bill/test.py @@ -1,25 +1,23 @@ -#%% + + import pandas as pd import numpy as np import datetime as datetime from scipy.optimize import minimize from datetime import timedelta - - -from get_test_data import (get_weather_data, query_bill) from bill_cleaner import (bill_formating, bill_quality, short_bill_consolidate) from weather_data_cal import (weather_cleaning, bill_period_weather) from regr import (regression_1, regression_2_summer_dhw) -from calculater import(hdd, threshold) +from get_test_data import (query_bill, get_weather_data) +from calculater import (hdd, threshold) from weather_related_usage_type import (determine_weather_usage_type_when_input_is_unknown) -########################################################################### inputs -end_uses = {'Miscellanous':1} -raw_bill = query_bill(205232,2) +end_uses = {'Miscellanous': 1} +raw_bill = query_bill(205232, 2) raw_weather_data_daily = get_weather_data() weather_related_usage_init = 'Unknown' -############################################################################ + weather_data_daily = weather_cleaning(raw_weather_data_daily) formatted_bill, shape = bill_formating(raw_bill) # pylint: disable=unused-variable quality = bill_quality(formatted_bill) @@ -31,275 +29,269 @@ if any(i == 'short' for i in quality.flag): else: processed_bill = formatted_bill -processed_bill['temperature'] = [ - bill_period_weather(x, y, weather_data_daily) - for x, y in zip(processed_bill['Bill From Date'], - processed_bill['Bill To Date']) -] +print(formatted_bill) -processed_bill = processed_bill.sort_values('Bill From Date') -formatted_bill = formatted_bill.sort_values('Bill From Date') -formatted_bill['Unit Price'] = formatted_bill['Total Charge'] / formatted_bill['Usage'] +# processed_bill['temperature'] = [ +# bill_period_weather(x, y, weather_data_daily) +# for x, y in zip(processed_bill['Bill From Date'], +# processed_bill['Bill To Date']) +# ] -############################################# above works -regression_method = 1 +# processed_bill = processed_bill.sort_values('Bill From Date') +# formatted_bill = formatted_bill.sort_values('Bill From Date') +# formatted_bill['Unit Price'] = formatted_bill['Total Charge'] / formatted_bill['Usage'] -#%% +# ############################################# above works +# regression_method = 1 -if weather_related_usage_init == 'Unknown': - weather_related_usage = \ - determine_weather_usage_type_when_input_is_unknown(processed_bill) -else: - weather_related_usage = weather_related_usage_init - - -if weather_related_usage == 'Both': - opt = minimize( - lambda x: -regression_1(x[0], x[1], processed_bill)[1], - (65, 65), - method='nelder-mead', - options={'xtol': 1e-2, - 'disp': False}) - - regr_temp = regression_1(opt.x[0], opt.x[1], processed_bill) - regr_model = regr_temp[0] - heating_coef, cooling_coef = regr_model.coef_ - hddcdd = regr_temp[2] - heating_set_point = opt.x[0] - cooling_set_point = opt.x[1] - # change accordingly for JOENYC buildings - print('set_points', heating_set_point, cooling_set_point) - - if (heating_coef > 0) and (cooling_coef < 0): - weather_related_usage = 'Heating' - cooling_coef = 0 - elif (heating_coef <= 0) and (cooling_coef > 0): - weather_related_usage = 'Cooling' - heating_coef = 0 - elif (heating_coef <= 0) and (cooling_coef <= 0): - weather_related_usage = 'Both Not' - heating_coef = 0 - cooling_coef = 0 - elif (heating_coef > 0) and (cooling_coef > 0): - if heating_coef / cooling_coef > 5: - weather_related_usage = 'Heating' - cooling_coef = 0 - else: - # set the range of heating set point or cooling point - - if round(heating_set_point) in range( - 60, 95) and round(cooling_set_point) in range( - 55, 75): - weather_related_usage = 'Both' - heating_coef = heating_coef - cooling_coef = cooling_coef - - else: - # using standard seting points to check the bill - regr = regression_1(72, 65, processed_bill) - regr_model = regr[0] - heating_coef, cooling_coef = regr_model.coef_ - hddcdd = regr[2] - heating_set_point = opt.x[0] - cooling_set_point = opt.x[1] - - if (heating_coef > 0) and (cooling_coef < 0): - weather_related_usage = 'Heating' - cooling_coef = 0 - elif (heating_coef <= 0) and (cooling_coef > 0): - weather_related_usage = 'Cooling' - heating_coef = 0 - elif (heating_coef <= 0) and (cooling_coef <= 0): - weather_related_usage = 'Both Not' - heating_coef = 0 - cooling_coef = 0 - elif (heating_coef > 0) and (cooling_coef > 0): - if heating_coef / cooling_coef > 5: - weather_related_usage = 'Heating' - cooling_coef = 0 - else: - weather_related_usage = 'Both' - -if weather_related_usage == 'Heating': - opt_1 = minimize( - lambda x: regression_1(x, 300,processed_bill)[1], - 65, - method='nelder-mead', - options={'xtol': 1e-2, - 'disp': False}) - - opt_2 = minimize( - lambda x: -regression_2_summer_dhw(x, processed_bill)[1], - 65, - method='nelder-mead', - options={'xtol': 1e-2, - 'disp': False}) - - if (-opt_2.fun > 0.9) and (round(opt_2.x[0]) in range(64, 85)): - opt = opt_2 - heating_set_point = opt.x[0] - cooling_set_point = np.NaN - regr = regression_2_summer_dhw(opt.x[0], processed_bill) - regr_model = regr[0] - hdd = regr[2] - hdd_transit = [hdd[x][0] for x in range(len(hdd))] - hddcdd = np.array([[hdd_transit[x], 0] - for x in range(len(hdd))]) - regression_method = 2 - else: - if round(opt_1.x[0]) in range(60, 95): - opt = opt_1 - heating_set_point = opt.x[0] - cooling_set_point = np.NaN - regr_temp = regression_1(heating_set_point, 300, - processed_bill) - regr_model = regr_temp[0] - hddcdd = regr_temp[2] - heating_coef = regr_model.coef_ - cooling_coef = 0 - else: - # legit heating set-point 72'F - heating_set_point = 72 - cooling_set_point = np.NaN - regr_temp = regression_1(heating_set_point, 300, - processed_bill) - - regr_model = regr_temp[0] - hddcdd = regr_temp[2] - heating_coef = regr_model.coef_ - cooling_coef = 0 - -if weather_related_usage == 'Cooling': - opt = minimize( - lambda x: regression_1(x, 300, processed_bill)[1], - 65, - method='nelder-mead', - options={'xtol': 1e-2, - 'disp': False}) - regr_temp = regression_1(opt.x[0], 300, processed_bill) - regr_model = regr_temp[0] - hddcdd = regr_temp[2] - cooling_set_point = opt.x[0] - heating_set_point = np.NaN - - -#%% -if self.usage == 'Both Not': - self.heating_consumption_pred = self.processed_bill['Usage'] * 0 - self.cooling_consumption_pred = self.processed_bill['Usage'] * 0 - self.others_consumption_pred = self.processed_bill['Usage'] - self.regression_method = 0 - self.hddcdd = np.zeros((len(self.processed_bill), 2)) - cooling_set_point = np.NaN - heating_set_point = np.NaN - - else: - self.regression_method = regression_method - - if self.regression_method == 1: - - self.hddcdd = np.array( - pd.DataFrame(hddcdd).mul( - list(self.processed_bill['Days In Bill']), axis=0)) - - if regr[1] > 0.5: - self.regr_model = regr_model - self.heating_consumption_pred = np.array( - self.hddcdd[:, 0]) * regr_model.coef_[0] - self.cooling_consumption_pred = np.array( - self.hddcdd[:, 1]) * regr_model.coef_[1] - - if self.regr_model.intercept_ < 0: - self.others_consumption_pred = 0 * self.processed_bill[ - 'Days In Bill'] - else: - self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill[ - 'Days In Bill'] - - # real_sum = np.array(self.processed_bill['Usage']) - # predict_sum = self.heating_consumption_pred + self.cooling_consumption_pred + \ - # self.others_consumption_pred - - # diff = real_sum - predict_sum - - else: - self.heating_consumption_pred = self.processed_bill[ - 'Days In Bill'] * 0 - self.cooling_consumption_pred = self.processed_bill[ - 'Days In Bill'] * 0 - self.others_consumption_pred = self.processed_bill['Usage'] - self.regression_method = 0 - self.hddcdd = np.zeros((len(self.processed_bill), 2)) - cooling_set_point = np.NaN - heating_set_point = np.NaN - self.usage = 'Both Not' - - elif self.regression_method == 2: - self.hddcdd = np.array( - pd.DataFrame(hddcdd).mul( - list(self.processed_bill['Days In Bill']), axis=0)) - - self.regr_model = regr_model - self.heating_consumption_pred = np.array( - self.hddcdd[:, 0]) * self.regr_model.coef_[0] - self.cooling_consumption_pred = np.array(self.hddcdd[:, 0]) * 0 - - if self.regr_model.intercept_ < 0: - self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + regr[3]['dhw'] - else: - self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill']\ - + regr[3]['dhw'] - - - - - - - - - -#%% - -# # bill_evaluation = pd.DataFrame(columns=['Building Id','Usage', 'r squared','regr method', 'Consumption', 'Heating',\ -# # 'Cooling','Non-weather-related-usage','diff','hdd', 'cdd', 'Days in Bill','Unit Price',\ -# # 'Heating Setpoint', 'Cooling Setpoint']) - - -# # bd = BillDisaggregation(bill,weather_data) -# # bd.optimize_setpoints(end_uses, weather_related_usage='Heating') -# # output = bd.benchmarking_output() -# # dhw_to_total = output[6]/ (output[3]) -# # bill_evaluation = bill_evaluation.append({\ -# # 'Building Id':205232, \ -# # 'Usage': output[0],\ -# # 'r squared': output[1],\ -# # 'regr method': output[2],\ -# # 'Consumption':output[3],\ -# # 'Heating': format(output[4], '0.0f'),\ -# # 'Cooling': format(output[5], '0.0f'),\ -# # 'Non-weather-related-usage': format(output[6], '0.0f'),\ -# # 'diff':format(output[7],'.2%'),\ -# # 'hdd': format(output[8], '0.0f'),\ -# # 'cdd': format(output[9], '0.0f'),\ -# # 'Days in Bill': output[10],\ -# # 'Unit Price':bd.avg_unit_price,\ -# # 'Heating Setpoint': output[11],\ -# # 'Cooling Setpoint': output[12] -# # }, ignore_index = True) - -# # print(bill_evaluation) # #%% -# ''' -# results -# Building Id Usage r-squared regr method Consumption Heating Cooling \ -# 205232.0 Heating 0.95385 2.0 10068.0 10239 0 -# Non-weather-related-usage diff hdd cdd Days in Bill Unit_Price \ -# 0 -1.70% 9969 0 701.0 $1.013706 +# if weather_related_usage_init == 'Unknown': +# weather_related_usage = \ +# determine_weather_usage_type_when_input_is_unknown(processed_bill) +# else: +# weather_related_usage = weather_related_usage_init + + +# if weather_related_usage == 'Both': +# opt = minimize( +# lambda x: -regression_1(x[0], x[1], processed_bill)[1], +# (65, 65), +# method='nelder-mead', +# options={'xtol': 1e-2, +# 'disp': False}) + +# regr_temp = regression_1(opt.x[0], opt.x[1], processed_bill) +# regr_model = regr_temp[0] +# heating_coef, cooling_coef = regr_model.coef_ +# hddcdd = regr_temp[2] +# heating_set_point = opt.x[0] +# cooling_set_point = opt.x[1] +# # change accordingly for JOENYC buildings +# print('set_points', heating_set_point, cooling_set_point) + +# if (heating_coef > 0) and (cooling_coef < 0): +# weather_related_usage = 'Heating' +# cooling_coef = 0 +# elif (heating_coef <= 0) and (cooling_coef > 0): +# weather_related_usage = 'Cooling' +# heating_coef = 0 +# elif (heating_coef <= 0) and (cooling_coef <= 0): +# weather_related_usage = 'Both Not' +# heating_coef = 0 +# cooling_coef = 0 +# elif (heating_coef > 0) and (cooling_coef > 0): +# if heating_coef / cooling_coef > 5: +# weather_related_usage = 'Heating' +# cooling_coef = 0 +# else: +# # set the range of heating set point or cooling point - +# if round(heating_set_point) in range( +# 60, 95) and round(cooling_set_point) in range( +# 55, 75): +# weather_related_usage = 'Both' +# heating_coef = heating_coef +# cooling_coef = cooling_coef + +# else: +# # using standard seting points to check the bill +# regr = regression_1(72, 65, processed_bill) +# regr_model = regr[0] +# heating_coef, cooling_coef = regr_model.coef_ +# hddcdd = regr[2] +# heating_set_point = opt.x[0] +# cooling_set_point = opt.x[1] + +# if (heating_coef > 0) and (cooling_coef < 0): +# weather_related_usage = 'Heating' +# cooling_coef = 0 +# elif (heating_coef <= 0) and (cooling_coef > 0): +# weather_related_usage = 'Cooling' +# heating_coef = 0 +# elif (heating_coef <= 0) and (cooling_coef <= 0): +# weather_related_usage = 'Both Not' +# heating_coef = 0 +# cooling_coef = 0 +# elif (heating_coef > 0) and (cooling_coef > 0): +# if heating_coef / cooling_coef > 5: +# weather_related_usage = 'Heating' +# cooling_coef = 0 +# else: +# weather_related_usage = 'Both' + +# if weather_related_usage == 'Heating': +# opt_1 = minimize( +# lambda x: regression_1(x, 300,processed_bill)[1], +# 65, +# method='nelder-mead', +# options={'xtol': 1e-2, +# 'disp': False}) + +# opt_2 = minimize( +# lambda x: -regression_2_summer_dhw(x, processed_bill)[1], +# 65, +# method='nelder-mead', +# options={'xtol': 1e-2, +# 'disp': False}) + +# if (-opt_2.fun > 0.9) and (round(opt_2.x[0]) in range(64, 85)): +# opt = opt_2 +# heating_set_point = opt.x[0] +# cooling_set_point = np.NaN +# regr = regression_2_summer_dhw(opt.x[0], processed_bill) +# regr_model = regr[0] +# hdd = regr[2] +# hdd_transit = [hdd[x][0] for x in range(len(hdd))] +# hddcdd = np.array([[hdd_transit[x], 0] +# for x in range(len(hdd))]) +# regression_method = 2 +# else: +# if round(opt_1.x[0]) in range(60, 95): +# opt = opt_1 +# heating_set_point = opt.x[0] +# cooling_set_point = np.NaN +# regr_temp = regression_1(heating_set_point, 300, +# processed_bill) +# regr_model = regr_temp[0] +# hddcdd = regr_temp[2] +# heating_coef = regr_model.coef_ +# cooling_coef = 0 +# else: +# # legit heating set-point 72'F +# heating_set_point = 72 +# cooling_set_point = np.NaN +# regr_temp = regression_1(heating_set_point, 300, +# processed_bill) + +# regr_model = regr_temp[0] +# hddcdd = regr_temp[2] +# heating_coef = regr_model.coef_ +# cooling_coef = 0 + +# if weather_related_usage == 'Cooling': +# opt = minimize( +# lambda x: regression_1(x, 300, processed_bill)[1], +# 65, +# method='nelder-mead', +# options={'xtol': 1e-2, +# 'disp': False}) +# regr_temp = regression_1(opt.x[0], 300, processed_bill) +# regr_model = regr_temp[0] +# hddcdd = regr_temp[2] +# cooling_set_point = opt.x[0] +# heating_set_point = np.NaN -# Heating Setpoint Cooling Setpoint -# 0 70.401855 NaN -# ''' +# #%% +# if self.usage == 'Both Not': +# self.heating_consumption_pred = self.processed_bill['Usage'] * 0 +# self.cooling_consumption_pred = self.processed_bill['Usage'] * 0 +# self.others_consumption_pred = self.processed_bill['Usage'] +# self.regression_method = 0 +# self.hddcdd = np.zeros((len(self.processed_bill), 2)) +# cooling_set_point = np.NaN +# heating_set_point = np.NaN + +# else: +# self.regression_method = regression_method + +# if self.regression_method == 1: + +# self.hddcdd = np.array( +# pd.DataFrame(hddcdd).mul( +# list(self.processed_bill['Days In Bill']), axis=0)) + +# if regr[1] > 0.5: +# self.regr_model = regr_model +# self.heating_consumption_pred = np.array( +# self.hddcdd[:, 0]) * regr_model.coef_[0] +# self.cooling_consumption_pred = np.array( +# self.hddcdd[:, 1]) * regr_model.coef_[1] + +# if self.regr_model.intercept_ < 0: +# self.others_consumption_pred = 0 * self.processed_bill[ +# 'Days In Bill'] +# else: +# self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill[ +# 'Days In Bill'] + +# # real_sum = np.array(self.processed_bill['Usage']) +# # predict_sum = self.heating_consumption_pred + self.cooling_consumption_pred + \ +# # self.others_consumption_pred + +# # diff = real_sum - predict_sum + +# else: +# self.heating_consumption_pred = self.processed_bill[ +# 'Days In Bill'] * 0 +# self.cooling_consumption_pred = self.processed_bill[ +# 'Days In Bill'] * 0 +# self.others_consumption_pred = self.processed_bill['Usage'] +# self.regression_method = 0 +# self.hddcdd = np.zeros((len(self.processed_bill), 2)) +# cooling_set_point = np.NaN +# heating_set_point = np.NaN +# self.usage = 'Both Not' + +# elif self.regression_method == 2: +# self.hddcdd = np.array( +# pd.DataFrame(hddcdd).mul( +# list(self.processed_bill['Days In Bill']), axis=0)) + +# self.regr_model = regr_model +# self.heating_consumption_pred = np.array( +# self.hddcdd[:, 0]) * self.regr_model.coef_[0] +# self.cooling_consumption_pred = np.array(self.hddcdd[:, 0]) * 0 + +# if self.regr_model.intercept_ < 0: +# self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + regr[3]['dhw'] +# else: +# self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill']\ +# + regr[3]['dhw'] -#%% -#%% +# #%% + +# # # bill_evaluation = pd.DataFrame(columns=['Building Id','Usage', 'r squared','regr method', 'Consumption', 'Heating',\ +# # # 'Cooling','Non-weather-related-usage','diff','hdd', 'cdd', 'Days in Bill','Unit Price',\ +# # # 'Heating Setpoint', 'Cooling Setpoint']) + + +# # # bd = BillDisaggregation(bill,weather_data) +# # # bd.optimize_setpoints(end_uses, weather_related_usage='Heating') +# # # output = bd.benchmarking_output() +# # # dhw_to_total = output[6]/ (output[3]) +# # # bill_evaluation = bill_evaluation.append({\ +# # # 'Building Id':205232, \ +# # # 'Usage': output[0],\ +# # # 'r squared': output[1],\ +# # # 'regr method': output[2],\ +# # # 'Consumption':output[3],\ +# # # 'Heating': format(output[4], '0.0f'),\ +# # # 'Cooling': format(output[5], '0.0f'),\ +# # # 'Non-weather-related-usage': format(output[6], '0.0f'),\ +# # # 'diff':format(output[7],'.2%'),\ +# # # 'hdd': format(output[8], '0.0f'),\ +# # # 'cdd': format(output[9], '0.0f'),\ +# # # 'Days in Bill': output[10],\ +# # # 'Unit Price':bd.avg_unit_price,\ +# # # 'Heating Setpoint': output[11],\ +# # # 'Cooling Setpoint': output[12] +# # # }, ignore_index = True) + +# # # print(bill_evaluation) +# # #%% +# # ''' +# # results +# # Building Id Usage r-squared regr method Consumption Heating Cooling \ +# # 205232.0 Heating 0.95385 2.0 10068.0 10239 0 + +# # Non-weather-related-usage diff hdd cdd Days in Bill Unit_Price \ +# # 0 -1.70% 9969 0 701.0 $1.013706 + +# # Heating Setpoint Cooling Setpoint +# # 0 70.401855 NaN +# # ''' + + +# #%% +# #%% -- GitLab From d84279866ae40df83d0da02d09cdedeac5a13cce Mon Sep 17 00:00:00 2001 From: Doris H Date: Tue, 14 May 2019 14:33:24 -0400 Subject: [PATCH 05/97] format weather data calculation --- bpeng/bill/weather_data_cal.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/bpeng/bill/weather_data_cal.py b/bpeng/bill/weather_data_cal.py index 82816be..72d7c2b 100644 --- a/bpeng/bill/weather_data_cal.py +++ b/bpeng/bill/weather_data_cal.py @@ -4,6 +4,7 @@ this python file will deal with cal related to temperature from weather data import pandas as pd + def weather_cleaning(raw_daily_temp): """ Format the daily temperature data from influx query @@ -12,18 +13,19 @@ def weather_cleaning(raw_daily_temp): raw_daily_temp (influx query): raw temperature data queried from Influx Returns: pd.DateFrame: Returns formatted daily temperature + """ - raw_daily_temp.rename( - columns={'time': 'date', - 'value': 'temperature'}, inplace=True) + raw_daily_temp.rename(columns={'time': 'date', + 'value': 'temperature'}, inplace=True) raw_daily_temp['date'] = pd.to_datetime(raw_daily_temp['date']) raw_daily_temp['date'] = raw_daily_temp['date'].apply(lambda x: '/'.join([str(x.date().month), - str(x.date().day), - str(x.date().year)])) + str(x.date().day), + str(x.date().year)])) daily_temp = raw_daily_temp daily_temp['date'] = pd.to_datetime(daily_temp['date']) return daily_temp + def bill_period_weather(bill_from_date, bill_end_date, weather_data_daily): """ get the outdoor temperaturebetween two date, return a list @@ -32,12 +34,13 @@ def bill_period_weather(bill_from_date, bill_end_date, weather_data_daily): bill_from_date (Datetime): start date of a period bill_end_date (Datetime): end date of a period + weather_data_daily(pd.DataFrame): daily temperature Returns: list: Returns a list of outdoor temperature for a period """ end_date_id = weather_data_daily[weather_data_daily.date == - bill_end_date].index[0] + bill_end_date].index[0] start_date_id = weather_data_daily[weather_data_daily.date == - bill_from_date].index[0] + bill_from_date].index[0] return list(weather_data_daily['temperature'][start_date_id:end_date_id]) -- GitLab From f9aed33d4b70b674ec1789aa4cbf2f6511fcd051 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 5 Jun 2019 16:44:23 -0400 Subject: [PATCH 06/97] updates --- bpeng/bill/task.py | 7 ------- bpeng/bill/test-unit.py | 10 ++++++++++ bpeng/bill/test.py | 24 +++++++++++------------- 3 files changed, 21 insertions(+), 20 deletions(-) delete mode 100644 bpeng/bill/task.py create mode 100644 bpeng/bill/test-unit.py diff --git a/bpeng/bill/task.py b/bpeng/bill/task.py deleted file mode 100644 index cd034fb..0000000 --- a/bpeng/bill/task.py +++ /dev/null @@ -1,7 +0,0 @@ -# import pandas as pd - - -def task(): - return 1 - - diff --git a/bpeng/bill/test-unit.py b/bpeng/bill/test-unit.py new file mode 100644 index 0000000..9632121 --- /dev/null +++ b/bpeng/bill/test-unit.py @@ -0,0 +1,10 @@ +# import get_test_data + + +# weather = get_test_data.get_weather_data() + +# print(weather.tail()) + +import os + +print(os.getcwd()) diff --git a/bpeng/bill/test.py b/bpeng/bill/test.py index 488d88a..d914c34 100644 --- a/bpeng/bill/test.py +++ b/bpeng/bill/test.py @@ -5,6 +5,7 @@ import numpy as np import datetime as datetime from scipy.optimize import minimize from datetime import timedelta + from bill_cleaner import (bill_formating, bill_quality, short_bill_consolidate) from weather_data_cal import (weather_cleaning, bill_period_weather) from regr import (regression_1, regression_2_summer_dhw) @@ -13,33 +14,30 @@ from calculater import (hdd, threshold) from weather_related_usage_type import (determine_weather_usage_type_when_input_is_unknown) end_uses = {'Miscellanous': 1} -raw_bill = query_bill(205232, 2) +raw_bill = query_bill(181794, 2) raw_weather_data_daily = get_weather_data() weather_related_usage_init = 'Unknown' - weather_data_daily = weather_cleaning(raw_weather_data_daily) formatted_bill, shape = bill_formating(raw_bill) # pylint: disable=unused-variable quality = bill_quality(formatted_bill) - if any(i == 'short' for i in quality.flag): processed_bill = short_bill_consolidate( formatted_bill, quality) else: processed_bill = formatted_bill -print(formatted_bill) - -# processed_bill['temperature'] = [ -# bill_period_weather(x, y, weather_data_daily) -# for x, y in zip(processed_bill['Bill From Date'], -# processed_bill['Bill To Date']) -# ] +processed_bill['temperature'] = [ + bill_period_weather(x, y, weather_data_daily) + for x, y in zip(processed_bill['Bill From Date'], + processed_bill['Bill To Date']) +] -# processed_bill = processed_bill.sort_values('Bill From Date') -# formatted_bill = formatted_bill.sort_values('Bill From Date') -# formatted_bill['Unit Price'] = formatted_bill['Total Charge'] / formatted_bill['Usage'] +processed_bill = processed_bill.sort_values('Bill From Date') +formatted_bill = formatted_bill.sort_values('Bill From Date') +formatted_bill['Unit Price'] = formatted_bill['Total Charge']/formatted_bill['Usage'] +print('process bill:', processed_bill) # ############################################# above works # regression_method = 1 -- GitLab From 4d7082fdb64629877c81c48bd27cec58732007cb Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 5 Jun 2019 17:08:21 -0400 Subject: [PATCH 07/97] regr-1 working --- bpeng/bill/regr.py | 36 +++++++++--------------------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/bpeng/bill/regr.py b/bpeng/bill/regr.py index 45d2b02..b03f045 100644 --- a/bpeng/bill/regr.py +++ b/bpeng/bill/regr.py @@ -1,17 +1,16 @@ import warnings -from datetime import timedelta - import numpy as np import pandas as pd + +from datetime import timedelta from dateutil import relativedelta from sklearn import linear_model - -from calculater import (cdd,hdd,threshold,outliers_iqr) +from calculater import (cdd, hdd, threshold, outliers_iqr) warnings.simplefilter('ignore') -def regression_1(hp, cp, processed_bill): +def regression_1(hp, cp, bill): """ A linear regression model with heating and cooling set fixed @@ -28,33 +27,15 @@ def regression_1(hp, cp, processed_bill): 2d-array: a 2D numpy array of normalized billing period average daily HDDs and CDDs """ - - bill = processed_bill.copy() consumption = np.array(bill['Usage'] / bill['Days In Bill']) - - ahdd = [ - list(hdd(hp, xx) for xx in x) - for x in bill['temperature'] - ] - acdd = [ - list(cdd(cp, xx) for xx in x) - for x in bill['temperature'] - ] - + ahdd = [list(hdd(hp, xx) for xx in x) for x in bill['temperature']] + acdd = [list(cdd(cp, xx) for xx in x) for x in bill['temperature']] # it should be billing period average hdd / days - daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) - # set threshold that if the HDD/CDD is lower than a certain value, we set it to 0 - daily_hdd1 = np.array([ - threshold(daily_hdd[x], 0.1) - for x in range(len(daily_hdd)) - ]) - daily_cdd1 = np.array([ - threshold(daily_cdd[x], 0.1) - for x in range(len(daily_cdd)) - ]) + daily_hdd1 = np.array([threshold(daily_hdd[x], 0.1) for x in range(len(daily_hdd))]) + daily_cdd1 = np.array([threshold(daily_cdd[x], 0.1) for x in range(len(daily_cdd))]) regression_temp = np.array([daily_hdd1, daily_cdd1]).T @@ -64,6 +45,7 @@ def regression_1(hp, cp, processed_bill): return regr_model, score, regression_temp + def regression_2_summer_dhw(hp, processed_bill): """ This funcion uses summer month gas usage as base consumption for the year -- GitLab From 90d118f129bd5da2340397bef529ce460701cf7c Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 5 Jun 2019 17:08:36 -0400 Subject: [PATCH 08/97] testing regr 1 --- bpeng/bill/test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bpeng/bill/test.py b/bpeng/bill/test.py index d914c34..c6bc9a0 100644 --- a/bpeng/bill/test.py +++ b/bpeng/bill/test.py @@ -37,7 +37,12 @@ processed_bill['temperature'] = [ processed_bill = processed_bill.sort_values('Bill From Date') formatted_bill = formatted_bill.sort_values('Bill From Date') formatted_bill['Unit Price'] = formatted_bill['Total Charge']/formatted_bill['Usage'] -print('process bill:', processed_bill) + +score = regression_1(72, 72, processed_bill)[1] + +print('r-squared:', score) + + # ############################################# above works # regression_method = 1 -- GitLab From d476ca0a8776ed5b10c3d5cbafdee1c5dd5e56a6 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 5 Jun 2019 17:29:31 -0400 Subject: [PATCH 09/97] update the regressison --- bpeng/bill/regr.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/bpeng/bill/regr.py b/bpeng/bill/regr.py index b03f045..20cf24e 100644 --- a/bpeng/bill/regr.py +++ b/bpeng/bill/regr.py @@ -45,8 +45,7 @@ def regression_1(hp, cp, bill): return regr_model, score, regression_temp - -def regression_2_summer_dhw(hp, processed_bill): +def regression_2_summer_dhw(hp, bill): """ This funcion uses summer month gas usage as base consumption for the year A linear regression of weather-related consumption and a fixed heating system set point @@ -64,10 +63,7 @@ def regression_2_summer_dhw(hp, processed_bill): pd.DataFrame """ - - bill = processed_bill.copy() - ahdd = [[hdd(hp, xx) for xx in x] - for x in bill['temperature']] + ahdd = [[hdd(hp, xx) for xx in x] for x in bill['temperature']] # monthly_hdd = np.array([np.sum(ahdd[x]) for x in range(len(ahdd))]) daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) @@ -76,31 +72,23 @@ def regression_2_summer_dhw(hp, processed_bill): if len([*filter(lambda x: x >= 18, list(bill['Days In Bill']))]) > 0: dhw_only_consumption = bill[bill['dhw'] != 0]['dhw'] - if len(dhw_only_consumption) > 0: dhw_quality_index = outliers_iqr(list(dhw_only_consumption)) # list dhw_only_consumption_checked = [] - for xx in range(len(dhw_only_consumption)): if not dhw_quality_index[xx]: dhw_only_consumption_checked.append( list(dhw_only_consumption)[xx]) - daily_dhw = np.mean(dhw_only_consumption_checked) else: daily_dhw = 0 - else: daily_dhw = 0 - bill['dhw'] = daily_dhw * bill['Days In Bill'] - # 2018/01/30 # Daily hdd makes more sense - regression_temp = daily_hdd.reshape(-1, 1) consumption = (bill['Usage'] - bill['dhw']) / bill['Days In Bill'] - regr_model = linear_model.LinearRegression(fit_intercept=False) regr_model.fit(regression_temp, consumption) score = regr_model.score(regression_temp, consumption) -- GitLab From 535a59ade666385bbd4147c405a230b2c0df6de9 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 5 Jun 2019 17:31:25 -0400 Subject: [PATCH 10/97] change regression 2 function name --- bpeng/bill/regr.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bpeng/bill/regr.py b/bpeng/bill/regr.py index 20cf24e..c4d11c4 100644 --- a/bpeng/bill/regr.py +++ b/bpeng/bill/regr.py @@ -45,7 +45,8 @@ def regression_1(hp, cp, bill): return regr_model, score, regression_temp -def regression_2_summer_dhw(hp, bill): + +def regression_2(hp, bill): """ This funcion uses summer month gas usage as base consumption for the year A linear regression of weather-related consumption and a fixed heating system set point @@ -54,7 +55,7 @@ def regression_2_summer_dhw(hp, bill): Args: hp(float): heating season indoor set point cp(float): cooling season indoor set point - processed_bill(pd.DataFrame): cleaned bill with daily temperature + bill(pd.DataFrame): cleaned bill with daily temperature Returns: sklearn.linear_model.LinearRegression: regression model -- GitLab From 0c7c76ea8cd6bd7bc6d16058349c65ddbd4d79b6 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 5 Jun 2019 17:54:05 -0400 Subject: [PATCH 11/97] work on the module for set point optimization --- bpeng/bill/setpoints_optimization.py | 285 +++++++++++++++++++++++++++ bpeng/bill/test.py | 4 +- 2 files changed, 287 insertions(+), 2 deletions(-) create mode 100644 bpeng/bill/setpoints_optimization.py diff --git a/bpeng/bill/setpoints_optimization.py b/bpeng/bill/setpoints_optimization.py new file mode 100644 index 0000000..1967d39 --- /dev/null +++ b/bpeng/bill/setpoints_optimization.py @@ -0,0 +1,285 @@ + +import warnings +from datetime import timedelta + +import numpy as np +import pandas as pd +from dateutil import relativedelta +from scipy.optimize import minimize +from sklearn import linear_model + +from regr import (regression_1, regression_2) + + +def optimize_setpoints(weather_related_usage='Unknown', processed_bill): + """ + Main function for the optimization and disaggregation + + Args: + + usage (str): + Specify if the weather - related consumption is for heating or cooling + 'Unknown': no prior knowledge + 'Heating': only for heating + 'Cooling': only for cooling + 'Both': for both heating and cooling + 'Both Not': not for heating or cooling + default value: 'Unknown' + + """ + regression_method = 1 + + if weather_related_usage == 'Unknown': + opt = minimize( + lambda x: -regression_1(x[0], x[1], processed_bill)[1], + (65, 65), + method='nelder-mead', + options={'xtol': 1e-2,'disp': False}) + + regr = regression_1(opt.x[0], opt.x[1], processed_bill) + regr_model = regr[0] + heating_coef, cooling_coef = regr_model.coef_ + + if -opt.fun > 0.5: + if (heating_coef > 0) and (cooling_coef <= 0): + weather_related_usage = 'Heating' + elif (heating_coef <= 0) and (cooling_coef > 0): + weather_related_usage = 'Cooling' + elif (heating_coef <= 0) and (cooling_coef <= 0): + weather_related_usage = 'Both Not' + elif (heating_coef >= 0) and (cooling_coef >= 0): + weather_related_usage = 'Both' + else: + weather_related_usage = 'Both Not' + + if weather_related_usage == 'Both': + opt = minimize( + lambda x: -self.regression_1(x[0], x[1], self.processed_bill)[1], + (65, 65), + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + regr = self.regression_1(opt.x[0], opt.x[1], self.processed_bill) + regr_model = regr[0] + heating_coef, cooling_coef = regr_model.coef_ + hddcdd = regr[2] + heating_set_point = opt.x[0] + cooling_set_point = opt.x[1] + + # change accordingly for JOENYC buildings + + if (heating_coef > 0) and (cooling_coef < 0): + weather_related_usage = 'Heating' + cooling_coef = 0 + elif (heating_coef <= 0) and (cooling_coef > 0): + weather_related_usage = 'Cooling' + heating_coef = 0 + elif (heating_coef <= 0) and (cooling_coef <= 0): + weather_related_usage = 'Both Not' + heating_coef = 0 + cooling_coef = 0 + + # changes on Jan 17th 2018 + # please futher check with more bills + + elif (heating_coef > 0) and (cooling_coef > 0): + if heating_coef / cooling_coef > 5: + weather_related_usage = 'Heating' + cooling_coef = 0 + else: + # set the range of heating set point or cooling point - + if round(heating_set_point) in range( + 60, 95) and round(cooling_set_point) in range( + 55, 75): + weather_related_usage = 'Both' + heating_coef = heating_coef + cooling_coef = cooling_coef + + else: + # using standard seting points to check the bill + regr = self.regression_1(72, 65, self.processed_bill) + regr_model = regr[0] + heating_coef, cooling_coef = regr_model.coef_ + hddcdd = regr[2] + heating_set_point = opt.x[0] + cooling_set_point = opt.x[1] + + if (heating_coef > 0) and (cooling_coef < 0): + weather_related_usage = 'Heating' + cooling_coef = 0 + elif (heating_coef <= 0) and (cooling_coef > 0): + weather_related_usage = 'Cooling' + heating_coef = 0 + elif (heating_coef <= 0) and (cooling_coef <= 0): + weather_related_usage = 'Both Not' + heating_coef = 0 + cooling_coef = 0 + elif (heating_coef > 0) and (cooling_coef > 0): + if heating_coef / cooling_coef > 5: + weather_related_usage = 'Heating' + cooling_coef = 0 + else: + weather_related_usage = 'Both' + + if weather_related_usage == 'Heating': + opt_1 = minimize( + lambda x: -self.regression_1(x, 300, self.processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + opt_2 = minimize( + lambda x: -self.summer_dhw(x, self.processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + if (-opt_2.fun > 0.9) and (round(opt_2.x[0]) in range(64, 85)): + opt = opt_2 + heating_set_point = opt.x[0] + cooling_set_point = np.NaN + regr = self.summer_dhw(opt.x[0], self.processed_bill) + regr_model = regr[0] + hdd = regr[2] + hdd_transit = [hdd[x][0] for x in range(len(hdd))] + hddcdd = np.array([[hdd_transit[x], 0] + for x in range(len(hdd))]) + regression_method = 2 + else: + if round(opt_1.x[0]) in range(60, 95): + opt = opt_1 + heating_set_point = opt.x[0] + cooling_set_point = np.NaN + regr = self.regression_1(heating_set_point, 300, + self.processed_bill) + regr_model = regr[0] + hddcdd = regr[2] + heating_coef = regr_model.coef_ + cooling_coef = 0 + else: + # legit heating set-point 72'F + heating_set_point = 72 + cooling_set_point = np.NaN + regr = self.regression_1(heating_set_point, 300, + self.processed_bill) + + regr_model = regr[0] + hddcdd = regr[2] + heating_coef = regr_model.coef_ + cooling_coef = 0 + + if weather_related_usage == 'Cooling': + opt = minimize( + lambda x: -self.regression_1(x, 300, self.processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + regr = self.regression_1(opt.x[0], 300, self.processed_bill) + regr_model = regr[0] + hddcdd = regr[2] + cooling_set_point = opt.x[0] + heating_set_point = np.NaN + + self.usage = weather_related_usage + + if self.usage == 'Both Not': + self.heating_consumption_pred = self.processed_bill['Usage'] * 0 + self.cooling_consumption_pred = self.processed_bill['Usage'] * 0 + self.others_consumption_pred = self.processed_bill['Usage'] + self.regression_method = 0 + self.hddcdd = np.zeros((len(self.processed_bill), 2)) + cooling_set_point = np.NaN + heating_set_point = np.NaN + + else: + self.regression_method = regression_method + + if self.regression_method == 1: + + self.hddcdd = np.array( + pd.DataFrame(hddcdd).mul( + list(self.processed_bill['Days In Bill']), axis=0)) + + if regr[1] > 0.5: + self.regr_model = regr_model + self.heating_consumption_pred = np.array( + self.hddcdd[:, 0]) * regr_model.coef_[0] + self.cooling_consumption_pred = np.array( + self.hddcdd[:, 1]) * regr_model.coef_[1] + + if self.regr_model.intercept_ < 0: + self.others_consumption_pred = 0 * self.processed_bill[ + 'Days In Bill'] + else: + self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill[ + 'Days In Bill'] + + # real_sum = np.array(self.processed_bill['Usage']) + # predict_sum = self.heating_consumption_pred + self.cooling_consumption_pred + \ + # self.others_consumption_pred + + # diff = real_sum - predict_sum + + else: + self.heating_consumption_pred = self.processed_bill[ + 'Days In Bill'] * 0 + self.cooling_consumption_pred = self.processed_bill[ + 'Days In Bill'] * 0 + self.others_consumption_pred = self.processed_bill['Usage'] + self.regression_method = 0 + self.hddcdd = np.zeros((len(self.processed_bill), 2)) + cooling_set_point = np.NaN + heating_set_point = np.NaN + self.usage = 'Both Not' + + elif self.regression_method == 2: + self.hddcdd = np.array( + pd.DataFrame(hddcdd).mul( + list(self.processed_bill['Days In Bill']), axis=0)) + + self.regr_model = regr_model + self.heating_consumption_pred = np.array( + self.hddcdd[:, 0]) * self.regr_model.coef_[0] + self.cooling_consumption_pred = np.array(self.hddcdd[:, 0]) * 0 + + if self.regr_model.intercept_ < 0: + self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + regr[3]['dhw'] + else: + self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill']\ + + regr[3]['dhw'] + + bill_cp = self.processed_bill.copy() + bill_cp = self.processed_bill[[ + 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage', 'Total Charge' + ]] + bill_cp['Unit Price'] = bill_cp['Total Charge'] / bill_cp['Usage'] + bill_cp['Heating Usage'] = self.heating_consumption_pred + bill_cp['Cooling Usage'] = self.cooling_consumption_pred + bill_cp['Other Usage'] = self.others_consumption_pred + + if self.usage == 'Both Not': + self.r_squared_of_fit = 0 + else: + self.r_squared_of_fit = regr[1] + + self.heating_set_point = heating_set_point + self.cooling_set_point = cooling_set_point + self.output_table = bill_cp + + last_bill_date = self.processed_bill['Bill To Date'].iloc[-1] + first_bill_date = self.processed_bill['Bill From Date'].iloc[0] + + billing_months = self.num_month_dates(last_bill_date, first_bill_date) + output_monthly_initial = self.output_to_month(last_bill_date, self.heating_set_point, + self.cooling_set_point, billing_months) + self.output_table_monthly = self.normalized_unit_price(self.output_table, output_monthly_initial) + self.most_recent_monthly_output = self.output_to_month(last_bill_date, + self.heating_set_point, self.cooling_set_point, 12) + self.bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.output_table_monthly) + self.recent_year_bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, + self.most_recent_monthly_output) + self.annual_usage = self.annual_usage_costs(self.recent_year_bill_breakdown, non_weather_related_end_use) diff --git a/bpeng/bill/test.py b/bpeng/bill/test.py index c6bc9a0..be9fc18 100644 --- a/bpeng/bill/test.py +++ b/bpeng/bill/test.py @@ -8,7 +8,7 @@ from datetime import timedelta from bill_cleaner import (bill_formating, bill_quality, short_bill_consolidate) from weather_data_cal import (weather_cleaning, bill_period_weather) -from regr import (regression_1, regression_2_summer_dhw) +from regr import (regression_1, regression_2) from get_test_data import (query_bill, get_weather_data) from calculater import (hdd, threshold) from weather_related_usage_type import (determine_weather_usage_type_when_input_is_unknown) @@ -38,7 +38,7 @@ processed_bill = processed_bill.sort_values('Bill From Date') formatted_bill = formatted_bill.sort_values('Bill From Date') formatted_bill['Unit Price'] = formatted_bill['Total Charge']/formatted_bill['Usage'] -score = regression_1(72, 72, processed_bill)[1] +score = regression_2(72, processed_bill)[1] print('r-squared:', score) -- GitLab From 478cec3e65a9c51e18133becb3df339c107e758f Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 5 Jun 2019 17:54:31 -0400 Subject: [PATCH 12/97] updates --- bpeng/bill/test.py | 261 --------------------------------------------- 1 file changed, 261 deletions(-) diff --git a/bpeng/bill/test.py b/bpeng/bill/test.py index be9fc18..73f2bc4 100644 --- a/bpeng/bill/test.py +++ b/bpeng/bill/test.py @@ -37,264 +37,3 @@ processed_bill['temperature'] = [ processed_bill = processed_bill.sort_values('Bill From Date') formatted_bill = formatted_bill.sort_values('Bill From Date') formatted_bill['Unit Price'] = formatted_bill['Total Charge']/formatted_bill['Usage'] - -score = regression_2(72, processed_bill)[1] - -print('r-squared:', score) - - - -# ############################################# above works -# regression_method = 1 - -# #%% - -# if weather_related_usage_init == 'Unknown': -# weather_related_usage = \ -# determine_weather_usage_type_when_input_is_unknown(processed_bill) -# else: -# weather_related_usage = weather_related_usage_init - - -# if weather_related_usage == 'Both': -# opt = minimize( -# lambda x: -regression_1(x[0], x[1], processed_bill)[1], -# (65, 65), -# method='nelder-mead', -# options={'xtol': 1e-2, -# 'disp': False}) - -# regr_temp = regression_1(opt.x[0], opt.x[1], processed_bill) -# regr_model = regr_temp[0] -# heating_coef, cooling_coef = regr_model.coef_ -# hddcdd = regr_temp[2] -# heating_set_point = opt.x[0] -# cooling_set_point = opt.x[1] -# # change accordingly for JOENYC buildings -# print('set_points', heating_set_point, cooling_set_point) - -# if (heating_coef > 0) and (cooling_coef < 0): -# weather_related_usage = 'Heating' -# cooling_coef = 0 -# elif (heating_coef <= 0) and (cooling_coef > 0): -# weather_related_usage = 'Cooling' -# heating_coef = 0 -# elif (heating_coef <= 0) and (cooling_coef <= 0): -# weather_related_usage = 'Both Not' -# heating_coef = 0 -# cooling_coef = 0 -# elif (heating_coef > 0) and (cooling_coef > 0): -# if heating_coef / cooling_coef > 5: -# weather_related_usage = 'Heating' -# cooling_coef = 0 -# else: -# # set the range of heating set point or cooling point - -# if round(heating_set_point) in range( -# 60, 95) and round(cooling_set_point) in range( -# 55, 75): -# weather_related_usage = 'Both' -# heating_coef = heating_coef -# cooling_coef = cooling_coef - -# else: -# # using standard seting points to check the bill -# regr = regression_1(72, 65, processed_bill) -# regr_model = regr[0] -# heating_coef, cooling_coef = regr_model.coef_ -# hddcdd = regr[2] -# heating_set_point = opt.x[0] -# cooling_set_point = opt.x[1] - -# if (heating_coef > 0) and (cooling_coef < 0): -# weather_related_usage = 'Heating' -# cooling_coef = 0 -# elif (heating_coef <= 0) and (cooling_coef > 0): -# weather_related_usage = 'Cooling' -# heating_coef = 0 -# elif (heating_coef <= 0) and (cooling_coef <= 0): -# weather_related_usage = 'Both Not' -# heating_coef = 0 -# cooling_coef = 0 -# elif (heating_coef > 0) and (cooling_coef > 0): -# if heating_coef / cooling_coef > 5: -# weather_related_usage = 'Heating' -# cooling_coef = 0 -# else: -# weather_related_usage = 'Both' - -# if weather_related_usage == 'Heating': -# opt_1 = minimize( -# lambda x: regression_1(x, 300,processed_bill)[1], -# 65, -# method='nelder-mead', -# options={'xtol': 1e-2, -# 'disp': False}) - -# opt_2 = minimize( -# lambda x: -regression_2_summer_dhw(x, processed_bill)[1], -# 65, -# method='nelder-mead', -# options={'xtol': 1e-2, -# 'disp': False}) - -# if (-opt_2.fun > 0.9) and (round(opt_2.x[0]) in range(64, 85)): -# opt = opt_2 -# heating_set_point = opt.x[0] -# cooling_set_point = np.NaN -# regr = regression_2_summer_dhw(opt.x[0], processed_bill) -# regr_model = regr[0] -# hdd = regr[2] -# hdd_transit = [hdd[x][0] for x in range(len(hdd))] -# hddcdd = np.array([[hdd_transit[x], 0] -# for x in range(len(hdd))]) -# regression_method = 2 -# else: -# if round(opt_1.x[0]) in range(60, 95): -# opt = opt_1 -# heating_set_point = opt.x[0] -# cooling_set_point = np.NaN -# regr_temp = regression_1(heating_set_point, 300, -# processed_bill) -# regr_model = regr_temp[0] -# hddcdd = regr_temp[2] -# heating_coef = regr_model.coef_ -# cooling_coef = 0 -# else: -# # legit heating set-point 72'F -# heating_set_point = 72 -# cooling_set_point = np.NaN -# regr_temp = regression_1(heating_set_point, 300, -# processed_bill) - -# regr_model = regr_temp[0] -# hddcdd = regr_temp[2] -# heating_coef = regr_model.coef_ -# cooling_coef = 0 - -# if weather_related_usage == 'Cooling': -# opt = minimize( -# lambda x: regression_1(x, 300, processed_bill)[1], -# 65, -# method='nelder-mead', -# options={'xtol': 1e-2, -# 'disp': False}) -# regr_temp = regression_1(opt.x[0], 300, processed_bill) -# regr_model = regr_temp[0] -# hddcdd = regr_temp[2] -# cooling_set_point = opt.x[0] -# heating_set_point = np.NaN - - -# #%% -# if self.usage == 'Both Not': -# self.heating_consumption_pred = self.processed_bill['Usage'] * 0 -# self.cooling_consumption_pred = self.processed_bill['Usage'] * 0 -# self.others_consumption_pred = self.processed_bill['Usage'] -# self.regression_method = 0 -# self.hddcdd = np.zeros((len(self.processed_bill), 2)) -# cooling_set_point = np.NaN -# heating_set_point = np.NaN - -# else: -# self.regression_method = regression_method - -# if self.regression_method == 1: - -# self.hddcdd = np.array( -# pd.DataFrame(hddcdd).mul( -# list(self.processed_bill['Days In Bill']), axis=0)) - -# if regr[1] > 0.5: -# self.regr_model = regr_model -# self.heating_consumption_pred = np.array( -# self.hddcdd[:, 0]) * regr_model.coef_[0] -# self.cooling_consumption_pred = np.array( -# self.hddcdd[:, 1]) * regr_model.coef_[1] - -# if self.regr_model.intercept_ < 0: -# self.others_consumption_pred = 0 * self.processed_bill[ -# 'Days In Bill'] -# else: -# self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill[ -# 'Days In Bill'] - -# # real_sum = np.array(self.processed_bill['Usage']) -# # predict_sum = self.heating_consumption_pred + self.cooling_consumption_pred + \ -# # self.others_consumption_pred - -# # diff = real_sum - predict_sum - -# else: -# self.heating_consumption_pred = self.processed_bill[ -# 'Days In Bill'] * 0 -# self.cooling_consumption_pred = self.processed_bill[ -# 'Days In Bill'] * 0 -# self.others_consumption_pred = self.processed_bill['Usage'] -# self.regression_method = 0 -# self.hddcdd = np.zeros((len(self.processed_bill), 2)) -# cooling_set_point = np.NaN -# heating_set_point = np.NaN -# self.usage = 'Both Not' - -# elif self.regression_method == 2: -# self.hddcdd = np.array( -# pd.DataFrame(hddcdd).mul( -# list(self.processed_bill['Days In Bill']), axis=0)) - -# self.regr_model = regr_model -# self.heating_consumption_pred = np.array( -# self.hddcdd[:, 0]) * self.regr_model.coef_[0] -# self.cooling_consumption_pred = np.array(self.hddcdd[:, 0]) * 0 - -# if self.regr_model.intercept_ < 0: -# self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + regr[3]['dhw'] -# else: -# self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill']\ -# + regr[3]['dhw'] - -# #%% - -# # # bill_evaluation = pd.DataFrame(columns=['Building Id','Usage', 'r squared','regr method', 'Consumption', 'Heating',\ -# # # 'Cooling','Non-weather-related-usage','diff','hdd', 'cdd', 'Days in Bill','Unit Price',\ -# # # 'Heating Setpoint', 'Cooling Setpoint']) - - -# # # bd = BillDisaggregation(bill,weather_data) -# # # bd.optimize_setpoints(end_uses, weather_related_usage='Heating') -# # # output = bd.benchmarking_output() -# # # dhw_to_total = output[6]/ (output[3]) -# # # bill_evaluation = bill_evaluation.append({\ -# # # 'Building Id':205232, \ -# # # 'Usage': output[0],\ -# # # 'r squared': output[1],\ -# # # 'regr method': output[2],\ -# # # 'Consumption':output[3],\ -# # # 'Heating': format(output[4], '0.0f'),\ -# # # 'Cooling': format(output[5], '0.0f'),\ -# # # 'Non-weather-related-usage': format(output[6], '0.0f'),\ -# # # 'diff':format(output[7],'.2%'),\ -# # # 'hdd': format(output[8], '0.0f'),\ -# # # 'cdd': format(output[9], '0.0f'),\ -# # # 'Days in Bill': output[10],\ -# # # 'Unit Price':bd.avg_unit_price,\ -# # # 'Heating Setpoint': output[11],\ -# # # 'Cooling Setpoint': output[12] -# # # }, ignore_index = True) - -# # # print(bill_evaluation) -# # #%% -# # ''' -# # results -# # Building Id Usage r-squared regr method Consumption Heating Cooling \ -# # 205232.0 Heating 0.95385 2.0 10068.0 10239 0 - -# # Non-weather-related-usage diff hdd cdd Days in Bill Unit_Price \ -# # 0 -1.70% 9969 0 701.0 $1.013706 - -# # Heating Setpoint Cooling Setpoint -# # 0 70.401855 NaN -# # ''' - - -# #%% -# #%% -- GitLab From ba10c92cacfe6d78e00f39c658e10e9b4274898e Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 5 Jun 2019 18:15:12 -0400 Subject: [PATCH 13/97] updates adding set points optimizations --- bpeng/bill/setpoints_optimization.py | 263 +++++++++++++-------------- bpeng/bill/test.py | 7 + 2 files changed, 136 insertions(+), 134 deletions(-) diff --git a/bpeng/bill/setpoints_optimization.py b/bpeng/bill/setpoints_optimization.py index 1967d39..45a60b8 100644 --- a/bpeng/bill/setpoints_optimization.py +++ b/bpeng/bill/setpoints_optimization.py @@ -10,8 +10,7 @@ from sklearn import linear_model from regr import (regression_1, regression_2) - -def optimize_setpoints(weather_related_usage='Unknown', processed_bill): +def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): """ Main function for the optimization and disaggregation @@ -27,6 +26,8 @@ def optimize_setpoints(weather_related_usage='Unknown', processed_bill): default value: 'Unknown' """ + designed_heating_temp = 72 + designed_cooling_temp = 65 regression_method = 1 if weather_related_usage == 'Unknown': @@ -36,8 +37,8 @@ def optimize_setpoints(weather_related_usage='Unknown', processed_bill): method='nelder-mead', options={'xtol': 1e-2,'disp': False}) - regr = regression_1(opt.x[0], opt.x[1], processed_bill) - regr_model = regr[0] + regr_output = regression_1(opt.x[0], opt.x[1], processed_bill) + regr_model = regr_output[0] heating_coef, cooling_coef = regr_model.coef_ if -opt.fun > 0.5: @@ -54,16 +55,16 @@ def optimize_setpoints(weather_related_usage='Unknown', processed_bill): if weather_related_usage == 'Both': opt = minimize( - lambda x: -self.regression_1(x[0], x[1], self.processed_bill)[1], + lambda x: -regression_1(x[0], x[1],processed_bill)[1], (65, 65), method='nelder-mead', options={'xtol': 1e-2, 'disp': False}) - regr = self.regression_1(opt.x[0], opt.x[1], self.processed_bill) - regr_model = regr[0] + regr_output = regression_1(opt.x[0], opt.x[1], processed_bill) + regr_model = regr_output[0] heating_coef, cooling_coef = regr_model.coef_ - hddcdd = regr[2] + hddcdd = regr_output[2] heating_set_point = opt.x[0] cooling_set_point = opt.x[1] @@ -79,10 +80,8 @@ def optimize_setpoints(weather_related_usage='Unknown', processed_bill): weather_related_usage = 'Both Not' heating_coef = 0 cooling_coef = 0 - # changes on Jan 17th 2018 # please futher check with more bills - elif (heating_coef > 0) and (cooling_coef > 0): if heating_coef / cooling_coef > 5: weather_related_usage = 'Heating' @@ -95,13 +94,12 @@ def optimize_setpoints(weather_related_usage='Unknown', processed_bill): weather_related_usage = 'Both' heating_coef = heating_coef cooling_coef = cooling_coef - else: # using standard seting points to check the bill - regr = self.regression_1(72, 65, self.processed_bill) - regr_model = regr[0] + regr_output = regression_1(designed_heating_temp, designed_cooling_temp, processed_bill) + regr_model = regr_output[0] heating_coef, cooling_coef = regr_model.coef_ - hddcdd = regr[2] + hddcdd = regr_output[2] heating_set_point = opt.x[0] cooling_set_point = opt.x[1] @@ -124,14 +122,14 @@ def optimize_setpoints(weather_related_usage='Unknown', processed_bill): if weather_related_usage == 'Heating': opt_1 = minimize( - lambda x: -self.regression_1(x, 300, self.processed_bill)[1], + lambda x: -regression_1(x, 300, processed_bill)[1], 65, method='nelder-mead', options={'xtol': 1e-2, 'disp': False}) opt_2 = minimize( - lambda x: -self.summer_dhw(x, self.processed_bill)[1], + lambda x: -regression_2(x, processed_bill)[1], 65, method='nelder-mead', options={'xtol': 1e-2, @@ -141,145 +139,142 @@ def optimize_setpoints(weather_related_usage='Unknown', processed_bill): opt = opt_2 heating_set_point = opt.x[0] cooling_set_point = np.NaN - regr = self.summer_dhw(opt.x[0], self.processed_bill) - regr_model = regr[0] - hdd = regr[2] + regr_output = regression_2(opt.x[0], processed_bill) + regr_model = regr_output[0] + hdd = regr_output[2] hdd_transit = [hdd[x][0] for x in range(len(hdd))] - hddcdd = np.array([[hdd_transit[x], 0] - for x in range(len(hdd))]) + hddcdd = np.array([[hdd_transit[x], 0] for x in range(len(hdd))]) regression_method = 2 else: if round(opt_1.x[0]) in range(60, 95): opt = opt_1 heating_set_point = opt.x[0] cooling_set_point = np.NaN - regr = self.regression_1(heating_set_point, 300, - self.processed_bill) - regr_model = regr[0] - hddcdd = regr[2] + regr_output = regression_1(heating_set_point, 300, processed_bill) + regr_model = regr_output[0] + hddcdd = regr_output[2] heating_coef = regr_model.coef_ cooling_coef = 0 else: # legit heating set-point 72'F - heating_set_point = 72 + heating_set_point = designed_heating_temp cooling_set_point = np.NaN - regr = self.regression_1(heating_set_point, 300, - self.processed_bill) - - regr_model = regr[0] - hddcdd = regr[2] + regr_output = regression_1(heating_set_point, 300, processed_bill) + regr_model = regr_output[0] + hddcdd = regr_output[2] heating_coef = regr_model.coef_ cooling_coef = 0 if weather_related_usage == 'Cooling': opt = minimize( - lambda x: -self.regression_1(x, 300, self.processed_bill)[1], + lambda x: -regression_1(x, 300, processed_bill)[1], 65, method='nelder-mead', options={'xtol': 1e-2, 'disp': False}) - regr = self.regression_1(opt.x[0], 300, self.processed_bill) - regr_model = regr[0] - hddcdd = regr[2] + regr_output = regression_1(opt.x[0], 300, processed_bill) + regr_model = regr_output[0] + hddcdd = regr_output[2] cooling_set_point = opt.x[0] heating_set_point = np.NaN - self.usage = weather_related_usage - - if self.usage == 'Both Not': - self.heating_consumption_pred = self.processed_bill['Usage'] * 0 - self.cooling_consumption_pred = self.processed_bill['Usage'] * 0 - self.others_consumption_pred = self.processed_bill['Usage'] - self.regression_method = 0 - self.hddcdd = np.zeros((len(self.processed_bill), 2)) - cooling_set_point = np.NaN - heating_set_point = np.NaN - - else: - self.regression_method = regression_method - - if self.regression_method == 1: - - self.hddcdd = np.array( - pd.DataFrame(hddcdd).mul( - list(self.processed_bill['Days In Bill']), axis=0)) - - if regr[1] > 0.5: - self.regr_model = regr_model - self.heating_consumption_pred = np.array( - self.hddcdd[:, 0]) * regr_model.coef_[0] - self.cooling_consumption_pred = np.array( - self.hddcdd[:, 1]) * regr_model.coef_[1] - - if self.regr_model.intercept_ < 0: - self.others_consumption_pred = 0 * self.processed_bill[ - 'Days In Bill'] - else: - self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill[ - 'Days In Bill'] - - # real_sum = np.array(self.processed_bill['Usage']) - # predict_sum = self.heating_consumption_pred + self.cooling_consumption_pred + \ - # self.others_consumption_pred - - # diff = real_sum - predict_sum - - else: - self.heating_consumption_pred = self.processed_bill[ - 'Days In Bill'] * 0 - self.cooling_consumption_pred = self.processed_bill[ - 'Days In Bill'] * 0 - self.others_consumption_pred = self.processed_bill['Usage'] - self.regression_method = 0 - self.hddcdd = np.zeros((len(self.processed_bill), 2)) - cooling_set_point = np.NaN - heating_set_point = np.NaN - self.usage = 'Both Not' - - elif self.regression_method == 2: - self.hddcdd = np.array( - pd.DataFrame(hddcdd).mul( - list(self.processed_bill['Days In Bill']), axis=0)) - - self.regr_model = regr_model - self.heating_consumption_pred = np.array( - self.hddcdd[:, 0]) * self.regr_model.coef_[0] - self.cooling_consumption_pred = np.array(self.hddcdd[:, 0]) * 0 - - if self.regr_model.intercept_ < 0: - self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + regr[3]['dhw'] - else: - self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill']\ - + regr[3]['dhw'] - - bill_cp = self.processed_bill.copy() - bill_cp = self.processed_bill[[ - 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage', 'Total Charge' - ]] - bill_cp['Unit Price'] = bill_cp['Total Charge'] / bill_cp['Usage'] - bill_cp['Heating Usage'] = self.heating_consumption_pred - bill_cp['Cooling Usage'] = self.cooling_consumption_pred - bill_cp['Other Usage'] = self.others_consumption_pred - - if self.usage == 'Both Not': - self.r_squared_of_fit = 0 - else: - self.r_squared_of_fit = regr[1] - - self.heating_set_point = heating_set_point - self.cooling_set_point = cooling_set_point - self.output_table = bill_cp - - last_bill_date = self.processed_bill['Bill To Date'].iloc[-1] - first_bill_date = self.processed_bill['Bill From Date'].iloc[0] - - billing_months = self.num_month_dates(last_bill_date, first_bill_date) - output_monthly_initial = self.output_to_month(last_bill_date, self.heating_set_point, - self.cooling_set_point, billing_months) - self.output_table_monthly = self.normalized_unit_price(self.output_table, output_monthly_initial) - self.most_recent_monthly_output = self.output_to_month(last_bill_date, - self.heating_set_point, self.cooling_set_point, 12) - self.bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.output_table_monthly) - self.recent_year_bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, - self.most_recent_monthly_output) - self.annual_usage = self.annual_usage_costs(self.recent_year_bill_breakdown, non_weather_related_end_use) + optimization_output = [regr_output, heating_set_point, cooling_set_point, weather_related_usage] + return optimization_output + + # if self.usage == 'Both Not': + # self.heating_consumption_pred = self.processed_bill['Usage'] * 0 + # self.cooling_consumption_pred = self.processed_bill['Usage'] * 0 + # self.others_consumption_pred = self.processed_bill['Usage'] + # self.regression_method = 0 + # self.hddcdd = np.zeros((len(self.processed_bill), 2)) + # cooling_set_point = np.NaN + # heating_set_point = np.NaN + + # else: + # self.regression_method = regression_method + + # if self.regression_method == 1: + + # self.hddcdd = np.array( + # pd.DataFrame(hddcdd).mul( + # list(self.processed_bill['Days In Bill']), axis=0)) + + # if regr[1] > 0.5: + # self.regr_model = regr_model + # self.heating_consumption_pred = np.array( + # self.hddcdd[:, 0]) * regr_model.coef_[0] + # self.cooling_consumption_pred = np.array( + # self.hddcdd[:, 1]) * regr_model.coef_[1] + + # if self.regr_model.intercept_ < 0: + # self.others_consumption_pred = 0 * self.processed_bill[ + # 'Days In Bill'] + # else: + # self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill[ + # 'Days In Bill'] + + # # real_sum = np.array(self.processed_bill['Usage']) + # # predict_sum = self.heating_consumption_pred + self.cooling_consumption_pred + \ + # # self.others_consumption_pred + + # # diff = real_sum - predict_sum + + # else: + # self.heating_consumption_pred = self.processed_bill[ + # 'Days In Bill'] * 0 + # self.cooling_consumption_pred = self.processed_bill[ + # 'Days In Bill'] * 0 + # self.others_consumption_pred = self.processed_bill['Usage'] + # self.regression_method = 0 + # self.hddcdd = np.zeros((len(self.processed_bill), 2)) + # cooling_set_point = np.NaN + # heating_set_point = np.NaN + # self.usage = 'Both Not' + + # elif self.regression_method == 2: + # self.hddcdd = np.array( + # pd.DataFrame(hddcdd).mul( + # list(self.processed_bill['Days In Bill']), axis=0)) + + # self.regr_model = regr_model + # self.heating_consumption_pred = np.array( + # self.hddcdd[:, 0]) * self.regr_model.coef_[0] + # self.cooling_consumption_pred = np.array(self.hddcdd[:, 0]) * 0 + + # if self.regr_model.intercept_ < 0: + # self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + regr[3]['dhw'] + # else: + # self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill']\ + # + regr[3]['dhw'] + + # bill_cp = self.processed_bill.copy() + # bill_cp = self.processed_bill[[ + # 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage', 'Total Charge' + # ]] + # bill_cp['Unit Price'] = bill_cp['Total Charge'] / bill_cp['Usage'] + # bill_cp['Heating Usage'] = self.heating_consumption_pred + # bill_cp['Cooling Usage'] = self.cooling_consumption_pred + # bill_cp['Other Usage'] = self.others_consumption_pred + + # if self.usage == 'Both Not': + # self.r_squared_of_fit = 0 + # else: + # self.r_squared_of_fit = regr[1] + + # self.heating_set_point = heating_set_point + # self.cooling_set_point = cooling_set_point + # self.output_table = bill_cp + + # last_bill_date = self.processed_bill['Bill To Date'].iloc[-1] + # first_bill_date = self.processed_bill['Bill From Date'].iloc[0] + + # billing_months = self.num_month_dates(last_bill_date, first_bill_date) + # output_monthly_initial = self.output_to_month(last_bill_date, self.heating_set_point, + # self.cooling_set_point, billing_months) + # self.output_table_monthly = self.normalized_unit_price(self.output_table, output_monthly_initial) + # self.most_recent_monthly_output = self.output_to_month(last_bill_date, + # self.heating_set_point, self.cooling_set_point, 12) + # self.bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.output_table_monthly) + # self.recent_year_bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, + # self.most_recent_monthly_output) + # self.annual_usage = self.annual_usage_costs(self.recent_year_bill_breakdown, non_weather_related_end_use) diff --git a/bpeng/bill/test.py b/bpeng/bill/test.py index 73f2bc4..be6ee2c 100644 --- a/bpeng/bill/test.py +++ b/bpeng/bill/test.py @@ -13,6 +13,9 @@ from get_test_data import (query_bill, get_weather_data) from calculater import (hdd, threshold) from weather_related_usage_type import (determine_weather_usage_type_when_input_is_unknown) +from setpoints_optimization import optimize_setpoints + + end_uses = {'Miscellanous': 1} raw_bill = query_bill(181794, 2) raw_weather_data_daily = get_weather_data() @@ -37,3 +40,7 @@ processed_bill['temperature'] = [ processed_bill = processed_bill.sort_values('Bill From Date') formatted_bill = formatted_bill.sort_values('Bill From Date') formatted_bill['Unit Price'] = formatted_bill['Total Charge']/formatted_bill['Usage'] + + +test_results = optimize_setpoints(processed_bill) +print('hi:', test_results) -- GitLab From be7dab5e8f10b0ff019c46ae9a03dfc202bc0d75 Mon Sep 17 00:00:00 2001 From: Doris H Date: Thu, 6 Jun 2019 15:00:12 -0400 Subject: [PATCH 14/97] adding regr method to the output --- bpeng/bill/setpoints_optimization.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bpeng/bill/setpoints_optimization.py b/bpeng/bill/setpoints_optimization.py index 45a60b8..762e8c0 100644 --- a/bpeng/bill/setpoints_optimization.py +++ b/bpeng/bill/setpoints_optimization.py @@ -64,7 +64,7 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): regr_output = regression_1(opt.x[0], opt.x[1], processed_bill) regr_model = regr_output[0] heating_coef, cooling_coef = regr_model.coef_ - hddcdd = regr_output[2] + # hddcdd = regr_output[2] heating_set_point = opt.x[0] cooling_set_point = opt.x[1] @@ -178,7 +178,7 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): cooling_set_point = opt.x[0] heating_set_point = np.NaN - optimization_output = [regr_output, heating_set_point, cooling_set_point, weather_related_usage] + optimization_output = [regr_output, heating_set_point, cooling_set_point, weather_related_usage,regression_method] return optimization_output # if self.usage == 'Both Not': -- GitLab From a5856aed3876fb5b9f84a0ac15f3b9cea945fb9a Mon Sep 17 00:00:00 2001 From: Doris H Date: Thu, 6 Jun 2019 15:01:33 -0400 Subject: [PATCH 15/97] updates --- bpeng/bill/setpoints_optimization.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bpeng/bill/setpoints_optimization.py b/bpeng/bill/setpoints_optimization.py index 762e8c0..c59d356 100644 --- a/bpeng/bill/setpoints_optimization.py +++ b/bpeng/bill/setpoints_optimization.py @@ -178,7 +178,8 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): cooling_set_point = opt.x[0] heating_set_point = np.NaN - optimization_output = [regr_output, heating_set_point, cooling_set_point, weather_related_usage,regression_method] + + optimization_output = [regr_output, heating_set_point, cooling_set_point, weather_related_usage, regression_method] return optimization_output # if self.usage == 'Both Not': -- GitLab From dc8fb2739c7eac6c3fc5e9ae557b8d77f2f776b0 Mon Sep 17 00:00:00 2001 From: Doris H Date: Thu, 6 Jun 2019 15:01:50 -0400 Subject: [PATCH 16/97] delete unused file --- bpeng/bill/test-unit.py | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 bpeng/bill/test-unit.py diff --git a/bpeng/bill/test-unit.py b/bpeng/bill/test-unit.py deleted file mode 100644 index 9632121..0000000 --- a/bpeng/bill/test-unit.py +++ /dev/null @@ -1,10 +0,0 @@ -# import get_test_data - - -# weather = get_test_data.get_weather_data() - -# print(weather.tail()) - -import os - -print(os.getcwd()) -- GitLab From 8e649f6df141097e5012a82d6332a9648ca092ba Mon Sep 17 00:00:00 2001 From: Doris H Date: Thu, 6 Jun 2019 15:02:07 -0400 Subject: [PATCH 17/97] formatting --- bpeng/bill/normailize_usage.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bpeng/bill/normailize_usage.py b/bpeng/bill/normailize_usage.py index 11d50d2..a7f5459 100644 --- a/bpeng/bill/normailize_usage.py +++ b/bpeng/bill/normailize_usage.py @@ -2,6 +2,9 @@ This module will create a normailized usage based on a raw bill natural billing periods should be refactor to a class ''' + + + def find_index_in_first_raw_biil(self, norm_bill_date): """ Return the index of the row of raw bill contains the bill date from a normalized bill -- GitLab From 24427aa5d2d7bef70575118f9e6dec9060ab9068 Mon Sep 17 00:00:00 2001 From: Doris H Date: Thu, 6 Jun 2019 15:02:21 -0400 Subject: [PATCH 18/97] formatting --- bpeng/bill/calculater.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bpeng/bill/calculater.py b/bpeng/bill/calculater.py index 3840b0a..a36b870 100644 --- a/bpeng/bill/calculater.py +++ b/bpeng/bill/calculater.py @@ -1,5 +1,3 @@ - - import numpy as np import pandas as pd -- GitLab From fe45465f8284081e36d84d52bc405db9f44ebe62 Mon Sep 17 00:00:00 2001 From: Doris H Date: Thu, 6 Jun 2019 15:37:11 -0400 Subject: [PATCH 19/97] delete unused files --- bpeng/bill/output_natural_usage.py | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 bpeng/bill/output_natural_usage.py diff --git a/bpeng/bill/output_natural_usage.py b/bpeng/bill/output_natural_usage.py deleted file mode 100644 index 9fda912..0000000 --- a/bpeng/bill/output_natural_usage.py +++ /dev/null @@ -1,2 +0,0 @@ -''' -this module should get out put -- GitLab From 77b7ae1dc3cf7bd675e6bd059826c4084f08e03b Mon Sep 17 00:00:00 2001 From: Doris H Date: Thu, 6 Jun 2019 15:37:31 -0400 Subject: [PATCH 20/97] add 'both not' scenario --- bpeng/bill/setpoints_optimization.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/bpeng/bill/setpoints_optimization.py b/bpeng/bill/setpoints_optimization.py index c59d356..36ca496 100644 --- a/bpeng/bill/setpoints_optimization.py +++ b/bpeng/bill/setpoints_optimization.py @@ -99,7 +99,7 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): regr_output = regression_1(designed_heating_temp, designed_cooling_temp, processed_bill) regr_model = regr_output[0] heating_coef, cooling_coef = regr_model.coef_ - hddcdd = regr_output[2] + # hddcdd = regr_output[2] heating_set_point = opt.x[0] cooling_set_point = opt.x[1] @@ -141,9 +141,9 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): cooling_set_point = np.NaN regr_output = regression_2(opt.x[0], processed_bill) regr_model = regr_output[0] - hdd = regr_output[2] - hdd_transit = [hdd[x][0] for x in range(len(hdd))] - hddcdd = np.array([[hdd_transit[x], 0] for x in range(len(hdd))]) + # hdd = regr_output[2] + # hdd_transit = [hdd[x][0] for x in range(len(hdd))] + # hddcdd = np.array([[hdd_transit[x], 0] for x in range(len(hdd))]) regression_method = 2 else: if round(opt_1.x[0]) in range(60, 95): @@ -152,7 +152,6 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): cooling_set_point = np.NaN regr_output = regression_1(heating_set_point, 300, processed_bill) regr_model = regr_output[0] - hddcdd = regr_output[2] heating_coef = regr_model.coef_ cooling_coef = 0 else: @@ -161,7 +160,6 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): cooling_set_point = np.NaN regr_output = regression_1(heating_set_point, 300, processed_bill) regr_model = regr_output[0] - hddcdd = regr_output[2] heating_coef = regr_model.coef_ cooling_coef = 0 @@ -174,10 +172,15 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): 'disp': False}) regr_output = regression_1(opt.x[0], 300, processed_bill) regr_model = regr_output[0] - hddcdd = regr_output[2] cooling_set_point = opt.x[0] heating_set_point = np.NaN + if weather_related_usage == 'Both Not': + regression_method = 0 + cooling_set_point = np.NaN + heating_set_point = np.NaN + regr_output = np.NaN + optimization_output = [regr_output, heating_set_point, cooling_set_point, weather_related_usage, regression_method] return optimization_output -- GitLab From beed26810edc533d74046fd7d99c1dc600297703 Mon Sep 17 00:00:00 2001 From: Doris H Date: Thu, 6 Jun 2019 15:37:45 -0400 Subject: [PATCH 21/97] wip --- bpeng/bill/output_natural_billing_period.py | 122 ++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 bpeng/bill/output_natural_billing_period.py diff --git a/bpeng/bill/output_natural_billing_period.py b/bpeng/bill/output_natural_billing_period.py new file mode 100644 index 0000000..60d87dd --- /dev/null +++ b/bpeng/bill/output_natural_billing_period.py @@ -0,0 +1,122 @@ +''' +This module breaks down the energy usage by its weather-related features and non-weather-related features. +The inputs of this module is a cleaned bill, the best fitted regression model, and the non-weather-related usage info. +The outputs of this module is the disaggragated results based on the input informaiton. + +Author: Doris Han +''' + +class disaggragation_output(): + + def __init__(self, processed_bill, regr_feature, non_weather_usage): + self.processed_bill = processed_bill + self.regr_feature = regr_feature + self.non_weather_usage = non_weather_usage + self.regr_model = self.regr_feature[0][0] + self.heating_set_point = self.regr_feature[1] + self.cooling_set_point = self.regr_feature[2] + self.weather_related_usage = self.regr_feature[3] + self.regression_method = self.regr_feature[4] + #optimization_output = [regr_output, heating_set_point, cooling_set_point, weather_related_usage, regression_method] + + + def weather_ralated_breakdown(): + + if self.usage == 'Both Not': + self.heating_consumption_pred = self.processed_bill['Usage'] * 0 + self.cooling_consumption_pred = self.processed_bill['Usage'] * 0 + self.others_consumption_pred = self.processed_bill['Usage'] + self.regression_method = 0 + + + else: + self.regression_method = regression_method + + if self.regression_method == 1: + + self.hddcdd = np.array( + pd.DataFrame(hddcdd).mul( + list(self.processed_bill['Days In Bill']), axis=0)) + + if regr[1] > 0.5: + self.regr_model = regr_model + self.heating_consumption_pred = np.array( + self.hddcdd[:, 0]) * regr_model.coef_[0] + self.cooling_consumption_pred = np.array( + self.hddcdd[:, 1]) * regr_model.coef_[1] + + if self.regr_model.intercept_ < 0: + self.others_consumption_pred = 0 * self.processed_bill[ + 'Days In Bill'] + else: + self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill[ + 'Days In Bill'] + + # real_sum = np.array(self.processed_bill['Usage']) + # predict_sum = self.heating_consumption_pred + self.cooling_consumption_pred + \ + # self.others_consumption_pred + + # diff = real_sum - predict_sum + + else: + self.heating_consumption_pred = self.processed_bill[ + 'Days In Bill'] * 0 + self.cooling_consumption_pred = self.processed_bill[ + 'Days In Bill'] * 0 + self.others_consumption_pred = self.processed_bill['Usage'] + self.regression_method = 0 + self.hddcdd = np.zeros((len(self.processed_bill), 2)) + cooling_set_point = np.NaN + heating_set_point = np.NaN + self.usage = 'Both Not' + + elif self.regression_method == 2: + self.hddcdd = np.array( + pd.DataFrame(hddcdd).mul( + list(self.processed_bill['Days In Bill']), axis=0)) + + self.regr_model = regr_model + self.heating_consumption_pred = np.array( + self.hddcdd[:, 0]) * self.regr_model.coef_[0] + self.cooling_consumption_pred = np.array(self.hddcdd[:, 0]) * 0 + + if self.regr_model.intercept_ < 0: + self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + regr[3]['dhw'] + else: + self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill']\ + + regr[3]['dhw'] + + bill_cp = self.processed_bill.copy() + bill_cp = self.processed_bill[[ + 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage', 'Total Charge' + ]] + bill_cp['Unit Price'] = bill_cp['Total Charge'] / bill_cp['Usage'] + bill_cp['Heating Usage'] = self.heating_consumption_pred + bill_cp['Cooling Usage'] = self.cooling_consumption_pred + bill_cp['Other Usage'] = self.others_consumption_pred + + if self.usage == 'Both Not': + self.r_squared_of_fit = 0 + else: + self.r_squared_of_fit = regr[1] + + self.heating_set_point = heating_set_point + self.cooling_set_point = cooling_set_point + self.output_table = bill_cp + + last_bill_date = self.processed_bill['Bill To Date'].iloc[-1] + first_bill_date = self.processed_bill['Bill From Date'].iloc[0] + + billing_months = self.num_month_dates(last_bill_date, first_bill_date) + output_monthly_initial = self.output_to_month(last_bill_date, self.heating_set_point, + self.cooling_set_point, billing_months) + self.output_table_monthly = self.normalized_unit_price(self.output_table, output_monthly_initial) + self.most_recent_monthly_output = self.output_to_month(last_bill_date, + self.heating_set_point, self.cooling_set_point, 12) + self.bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.output_table_monthly) + self.recent_year_bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, + self.most_recent_monthly_output) + self.annual_usage = self.annual_usage_costs(self.recent_year_bill_breakdown, non_weather_related_end_use) + + + -- GitLab From 47bb8fb3b6a5b9ff0027029e006de73a11a6cf3b Mon Sep 17 00:00:00 2001 From: Doris H Date: Thu, 6 Jun 2019 15:38:07 -0400 Subject: [PATCH 22/97] commented out code not in use --- bpeng/bill/weather_related_usage_type.py | 60 ++++++++++++------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/bpeng/bill/weather_related_usage_type.py b/bpeng/bill/weather_related_usage_type.py index 5c33e4a..1717804 100644 --- a/bpeng/bill/weather_related_usage_type.py +++ b/bpeng/bill/weather_related_usage_type.py @@ -1,35 +1,35 @@ -import pandas as pd -import numpy as np -import datetime as datetime -from scipy.optimize import minimize -from regr import regression_1 +# import pandas as pd +# import numpy as np +# import datetime as datetime +# from scipy.optimize import minimize +# from regr import regression_1 -def determine_weather_usage_type_when_input_is_unknown(processed_bill): - ''' - This function is to determine the weather_related_usage_type when the input is unknown - ''' - opt = minimize( - lambda x: -regression_1(x[0], x[1], processed_bill)[1], - (65, 65), - method='nelder-mead', - options={'xtol': 1e-2, - 'disp': False}) +# def determine_weather_usage_type_when_input_is_unknown(processed_bill): +# ''' +# This function is to determine the weather_related_usage_type when the input is unknown +# ''' +# opt = minimize( +# lambda x: -regression_1(x[0], x[1], processed_bill)[1], +# (65, 65), +# method='nelder-mead', +# options={'xtol': 1e-2, +# 'disp': False}) - regr_temp = regression_1(opt.x[0], opt.x[1], processed_bill) - regr_model = regr_temp[0] - heating_coef, cooling_coef = regr_model.coef_ +# regr_temp = regression_1(opt.x[0], opt.x[1], processed_bill) +# regr_model = regr_temp[0] +# heating_coef, cooling_coef = regr_model.coef_ - if -opt.fun > 0.5: - if (heating_coef > 0) and (cooling_coef <= 0): - weather_related_usage = 'Heating' - elif (heating_coef <= 0) and (cooling_coef > 0): - weather_related_usage = 'Cooling' - elif (heating_coef <= 0) and (cooling_coef <= 0): - weather_related_usage = 'Both Not' - elif (heating_coef >= 0) and (cooling_coef >= 0): - weather_related_usage = 'Both' - else: - weather_related_usage = 'Both Not' +# if -opt.fun > 0.5: +# if (heating_coef > 0) and (cooling_coef <= 0): +# weather_related_usage = 'Heating' +# elif (heating_coef <= 0) and (cooling_coef > 0): +# weather_related_usage = 'Cooling' +# elif (heating_coef <= 0) and (cooling_coef <= 0): +# weather_related_usage = 'Both Not' +# elif (heating_coef >= 0) and (cooling_coef >= 0): +# weather_related_usage = 'Both' +# else: +# weather_related_usage = 'Both Not' - return weather_related_usage +# return weather_related_usage -- GitLab From e6431cb9090eacb8116bdae7ba5a053e3ee11f7a Mon Sep 17 00:00:00 2001 From: Doris H Date: Thu, 6 Jun 2019 16:53:15 -0400 Subject: [PATCH 23/97] update the disaggregation class --- bpeng/bill/output_natural_billing_period.py | 164 ++++++++++++-------- 1 file changed, 96 insertions(+), 68 deletions(-) diff --git a/bpeng/bill/output_natural_billing_period.py b/bpeng/bill/output_natural_billing_period.py index 60d87dd..f0a8cc6 100644 --- a/bpeng/bill/output_natural_billing_period.py +++ b/bpeng/bill/output_natural_billing_period.py @@ -5,6 +5,15 @@ The outputs of this module is the disaggragated results based on the input infor Author: Doris Han ''' +import warnings +from datetime import timedelta +import numpy as np +import pandas as pd +from dateutil import relativedelta +from scipy.optimize import minimize +from sklearn import linear_model + + class disaggragation_output(): @@ -17,92 +26,109 @@ class disaggragation_output(): self.cooling_set_point = self.regr_feature[2] self.weather_related_usage = self.regr_feature[3] self.regression_method = self.regr_feature[4] - #optimization_output = [regr_output, heating_set_point, cooling_set_point, weather_related_usage, regression_method] + self.disaggragated_bill = None + # optimization_output = [regr_output, heating_set_point, cooling_set_point, weather_related_usage, regression_method] + # regr_output = regr_model, score, regression_temp, bill def weather_ralated_breakdown(): - if self.usage == 'Both Not': - self.heating_consumption_pred = self.processed_bill['Usage'] * 0 - self.cooling_consumption_pred = self.processed_bill['Usage'] * 0 - self.others_consumption_pred = self.processed_bill['Usage'] - self.regression_method = 0 + if self.regression_method == 0 : + heating_consump = self.processed_bill['Usage'] * 0 + cooling_consump = self.processed_bill['Usage'] * 0 + non_weather_related_consump = self.processed_bill['Usage'] + if self.regression_method == 1: + r_squared_of_fit = self.regr_feature[0][1] + regression_temp = self.regr_feature[0][2] + heating_coef = self.regr_model.coef_[0] + cooling_coef = self.regr_model.coef_[1] + hddcdd = np.array(pd.DataFrame(regression_temp).mul(list(self.processed_bill['Days In Bill']), axis=0)) - else: - self.regression_method = regression_method + if np.absolute(r_squared_of_fit) > 0.5: + heating_consump = np.array(hddcdd[:, 0]) * heating_coef + cooling_consump = np.array(hddcdd[:, 1]) * cooling_coef - if self.regression_method == 1: - - self.hddcdd = np.array( - pd.DataFrame(hddcdd).mul( + if self.regr_model.intercept_ < 0: + non_weather_related_consump = 0 * self.processed_bill['Days In Bill'] + else: + non_weather_related_consump = self.regr_model.intercept_ * self.processed_bill['Days In Bill'] + + if self.regression_method == 2: + regression_temp = self.regr_feature[0][2] + dhw_usage = self.regr_feature[0][3] + regression_temp_transit = [regression_temp[x][0] for x in range(len(regression_temp))] + hddcdd = np.array([[regression_temp_transit[x], 0] for x in range(len(regression_temp))]) + hddcdd = np.array(pd.DataFrame(hddcdd).mul( list(self.processed_bill['Days In Bill']), axis=0)) + heating_coef = self.regr_model.coef_[0] + cooling_coef = 0 + heating_consump = np.array(hddcdd[:, 0]) * heating_coef + cooling_consump = np.array(hddcdd[:, 1]) * cooling_coef - if regr[1] > 0.5: - self.regr_model = regr_model - self.heating_consumption_pred = np.array( - self.hddcdd[:, 0]) * regr_model.coef_[0] - self.cooling_consumption_pred = np.array( - self.hddcdd[:, 1]) * regr_model.coef_[1] + if self.regr_model.intercept_ < 0: + non_weather_related_consump = 0 * self.processed_bill['Days In Bill'] + dhw_usage + else: + non_weather_related_consump = self.regr_model.intercept_ * self.processed_bill[ + 'Days In Bill'] + dhw_usage - if self.regr_model.intercept_ < 0: - self.others_consumption_pred = 0 * self.processed_bill[ - 'Days In Bill'] - else: - self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill[ - 'Days In Bill'] - # real_sum = np.array(self.processed_bill['Usage']) - # predict_sum = self.heating_consumption_pred + self.cooling_consumption_pred + \ - # self.others_consumption_pred - # diff = real_sum - predict_sum + disaggragated_bill = self.processed_bill.copy() + disaggragated_bill = self.processed_bill[[ + 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage', 'Total Charge' + ]] + disaggragated_bill['Unit Price'] = disaggragated_bill['Total Charge'] / disaggragated_bill['Usage'] + disaggragated_bill['Heating Usage'] = heating_consump + disaggragated_bill['Cooling Usage'] = cooling_consump + disaggragated_bill['Non Weather Related Usage'] = non_weather_related_consump - else: - self.heating_consumption_pred = self.processed_bill[ - 'Days In Bill'] * 0 - self.cooling_consumption_pred = self.processed_bill[ - 'Days In Bill'] * 0 - self.others_consumption_pred = self.processed_bill['Usage'] - self.regression_method = 0 - self.hddcdd = np.zeros((len(self.processed_bill), 2)) - cooling_set_point = np.NaN - heating_set_point = np.NaN - self.usage = 'Both Not' - - elif self.regression_method == 2: - self.hddcdd = np.array( - pd.DataFrame(hddcdd).mul( - list(self.processed_bill['Days In Bill']), axis=0)) + self.disaggragated_bill = disaggragated_bill - self.regr_model = regr_model - self.heating_consumption_pred = np.array( - self.hddcdd[:, 0]) * self.regr_model.coef_[0] - self.cooling_consumption_pred = np.array(self.hddcdd[:, 0]) * 0 + return disaggragated_bill - if self.regr_model.intercept_ < 0: - self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + regr[3]['dhw'] - else: - self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill']\ - + regr[3]['dhw'] - bill_cp = self.processed_bill.copy() - bill_cp = self.processed_bill[[ - 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage', 'Total Charge' - ]] - bill_cp['Unit Price'] = bill_cp['Total Charge'] / bill_cp['Usage'] - bill_cp['Heating Usage'] = self.heating_consumption_pred - bill_cp['Cooling Usage'] = self.cooling_consumption_pred - bill_cp['Other Usage'] = self.others_consumption_pred - if self.usage == 'Both Not': - self.r_squared_of_fit = 0 - else: - self.r_squared_of_fit = regr[1] + ######### trim the bills - self.heating_set_point = heating_set_point - self.cooling_set_point = cooling_set_point - self.output_table = bill_cp + def non_weahter_related_breakdown(self, end_uses, disaggragated_bill): + """ + breakdown the non_weather_related_usage + + Args: + end_uses(dictionary): key: end use + value: percentage of the end use among non-weather related usage + disaggragated_bill (pd.DataFrame): bills have been breakdown to heating, cooling, and non-weather-related-comsump + Returns: + pd.DataFrame: bill breakdown of all end-use + """ + + bill = disaggragated_bill.copy() + eu = pd.DataFrame( + list(end_uses.items()), columns=['end use', 'percentage']) + for i in range(len(eu)): + name_of_the_column = eu['end use'].iloc[i] + value_of_the_column = eu['percentage'].iloc[i] + monthly_usages[name_of_the_column] = monthly_usages[ + 'Other Usage'] * value_of_the_column + + if sum(eu['percentage']) != 1: + monthly_usages['Miscellaneous'] = monthly_usages['Other Usage'] * ( + 1 - sum(eu['percentage'])) + + return monthly_usages + + + + def disaggragated_bill_monthly(): + + + def output_to_dashboard(): + ''' + to meet the dashboard requirements for utility page + further breakdown the bill to month + + ''' last_bill_date = self.processed_bill['Bill To Date'].iloc[-1] first_bill_date = self.processed_bill['Bill From Date'].iloc[0] @@ -113,7 +139,9 @@ class disaggragation_output(): self.output_table_monthly = self.normalized_unit_price(self.output_table, output_monthly_initial) self.most_recent_monthly_output = self.output_to_month(last_bill_date, self.heating_set_point, self.cooling_set_point, 12) + self.bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.output_table_monthly) + self.recent_year_bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.most_recent_monthly_output) self.annual_usage = self.annual_usage_costs(self.recent_year_bill_breakdown, non_weather_related_end_use) -- GitLab From e0da4dd9b4bfe06a70c21fb95eb09170975813b6 Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 7 Jun 2019 12:02:41 -0400 Subject: [PATCH 24/97] update the regr 2, with cooling coef_ as 0 --- bpeng/bill/regr.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bpeng/bill/regr.py b/bpeng/bill/regr.py index c4d11c4..7040e77 100644 --- a/bpeng/bill/regr.py +++ b/bpeng/bill/regr.py @@ -64,10 +64,13 @@ def regression_2(hp, bill): pd.DataFrame """ + impossible_cooling_temp = 300 + cp = impossible_cooling_temp ahdd = [[hdd(hp, xx) for xx in x] for x in bill['temperature']] + acdd = [list(cdd(cp, xx) for xx in x) for x in bill['temperature']] # monthly_hdd = np.array([np.sum(ahdd[x]) for x in range(len(ahdd))]) daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) - + daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) # daily dhw usage bill['dhw'] = bill['Usage'] / bill['Days In Bill'] * (daily_hdd <= 0.1) @@ -88,7 +91,7 @@ def regression_2(hp, bill): bill['dhw'] = daily_dhw * bill['Days In Bill'] # 2018/01/30 # Daily hdd makes more sense - regression_temp = daily_hdd.reshape(-1, 1) + regression_temp = np.array([daily_hdd, daily_cdd]).T consumption = (bill['Usage'] - bill['dhw']) / bill['Days In Bill'] regr_model = linear_model.LinearRegression(fit_intercept=False) regr_model.fit(regression_temp, consumption) -- GitLab From 5c2dcd72ddc82ca078c86ec27170fb983fbcc035 Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 7 Jun 2019 12:02:58 -0400 Subject: [PATCH 25/97] updated the opt regr matrix --- bpeng/bill/setpoints_optimization.py | 147 ++++++--------------------- 1 file changed, 30 insertions(+), 117 deletions(-) diff --git a/bpeng/bill/setpoints_optimization.py b/bpeng/bill/setpoints_optimization.py index 36ca496..1bcb057 100644 --- a/bpeng/bill/setpoints_optimization.py +++ b/bpeng/bill/setpoints_optimization.py @@ -1,13 +1,12 @@ +''' +This module is to calculate the most fitted regression model and return the regression model matrix +and the system heating and cooling set points. +''' import warnings -from datetime import timedelta - import numpy as np -import pandas as pd -from dateutil import relativedelta from scipy.optimize import minimize from sklearn import linear_model - from regr import (regression_1, regression_2) def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): @@ -28,6 +27,8 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): """ designed_heating_temp = 72 designed_cooling_temp = 65 + impossible_heating_temp = 0 + impossible_cooling_temp = 300 regression_method = 1 if weather_related_usage == 'Unknown': @@ -64,12 +65,10 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): regr_output = regression_1(opt.x[0], opt.x[1], processed_bill) regr_model = regr_output[0] heating_coef, cooling_coef = regr_model.coef_ - # hddcdd = regr_output[2] heating_set_point = opt.x[0] cooling_set_point = opt.x[1] # change accordingly for JOENYC buildings - if (heating_coef > 0) and (cooling_coef < 0): weather_related_usage = 'Heating' cooling_coef = 0 @@ -99,7 +98,6 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): regr_output = regression_1(designed_heating_temp, designed_cooling_temp, processed_bill) regr_model = regr_output[0] heating_coef, cooling_coef = regr_model.coef_ - # hddcdd = regr_output[2] heating_set_point = opt.x[0] cooling_set_point = opt.x[1] @@ -122,7 +120,7 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): if weather_related_usage == 'Heating': opt_1 = minimize( - lambda x: -regression_1(x, 300, processed_bill)[1], + lambda x: -regression_1(x, impossible_cooling_temp, processed_bill)[1], 65, method='nelder-mead', options={'xtol': 1e-2, @@ -141,16 +139,13 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): cooling_set_point = np.NaN regr_output = regression_2(opt.x[0], processed_bill) regr_model = regr_output[0] - # hdd = regr_output[2] - # hdd_transit = [hdd[x][0] for x in range(len(hdd))] - # hddcdd = np.array([[hdd_transit[x], 0] for x in range(len(hdd))]) regression_method = 2 else: if round(opt_1.x[0]) in range(60, 95): opt = opt_1 heating_set_point = opt.x[0] cooling_set_point = np.NaN - regr_output = regression_1(heating_set_point, 300, processed_bill) + regr_output = regression_1(heating_set_point, impossible_cooling_temp, processed_bill) regr_model = regr_output[0] heating_coef = regr_model.coef_ cooling_coef = 0 @@ -158,19 +153,19 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): # legit heating set-point 72'F heating_set_point = designed_heating_temp cooling_set_point = np.NaN - regr_output = regression_1(heating_set_point, 300, processed_bill) + regr_output = regression_1(heating_set_point, impossible_cooling_temp, processed_bill) regr_model = regr_output[0] heating_coef = regr_model.coef_ cooling_coef = 0 if weather_related_usage == 'Cooling': opt = minimize( - lambda x: -regression_1(x, 300, processed_bill)[1], + lambda x: -regression_1(impossible_heating_temp, x, processed_bill)[1], 65, method='nelder-mead', options={'xtol': 1e-2, 'disp': False}) - regr_output = regression_1(opt.x[0], 300, processed_bill) + regr_output = regression_1(opt.x[0],impossible_heating_temp, processed_bill) regr_model = regr_output[0] cooling_set_point = opt.x[0] heating_set_point = np.NaN @@ -181,104 +176,22 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): heating_set_point = np.NaN regr_output = np.NaN - - optimization_output = [regr_output, heating_set_point, cooling_set_point, weather_related_usage, regression_method] - return optimization_output - - # if self.usage == 'Both Not': - # self.heating_consumption_pred = self.processed_bill['Usage'] * 0 - # self.cooling_consumption_pred = self.processed_bill['Usage'] * 0 - # self.others_consumption_pred = self.processed_bill['Usage'] - # self.regression_method = 0 - # self.hddcdd = np.zeros((len(self.processed_bill), 2)) - # cooling_set_point = np.NaN - # heating_set_point = np.NaN - - # else: - # self.regression_method = regression_method - - # if self.regression_method == 1: - - # self.hddcdd = np.array( - # pd.DataFrame(hddcdd).mul( - # list(self.processed_bill['Days In Bill']), axis=0)) - - # if regr[1] > 0.5: - # self.regr_model = regr_model - # self.heating_consumption_pred = np.array( - # self.hddcdd[:, 0]) * regr_model.coef_[0] - # self.cooling_consumption_pred = np.array( - # self.hddcdd[:, 1]) * regr_model.coef_[1] - - # if self.regr_model.intercept_ < 0: - # self.others_consumption_pred = 0 * self.processed_bill[ - # 'Days In Bill'] - # else: - # self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill[ - # 'Days In Bill'] - - # # real_sum = np.array(self.processed_bill['Usage']) - # # predict_sum = self.heating_consumption_pred + self.cooling_consumption_pred + \ - # # self.others_consumption_pred - - # # diff = real_sum - predict_sum - - # else: - # self.heating_consumption_pred = self.processed_bill[ - # 'Days In Bill'] * 0 - # self.cooling_consumption_pred = self.processed_bill[ - # 'Days In Bill'] * 0 - # self.others_consumption_pred = self.processed_bill['Usage'] - # self.regression_method = 0 - # self.hddcdd = np.zeros((len(self.processed_bill), 2)) - # cooling_set_point = np.NaN - # heating_set_point = np.NaN - # self.usage = 'Both Not' - - # elif self.regression_method == 2: - # self.hddcdd = np.array( - # pd.DataFrame(hddcdd).mul( - # list(self.processed_bill['Days In Bill']), axis=0)) - - # self.regr_model = regr_model - # self.heating_consumption_pred = np.array( - # self.hddcdd[:, 0]) * self.regr_model.coef_[0] - # self.cooling_consumption_pred = np.array(self.hddcdd[:, 0]) * 0 - - # if self.regr_model.intercept_ < 0: - # self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + regr[3]['dhw'] - # else: - # self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill']\ - # + regr[3]['dhw'] - - # bill_cp = self.processed_bill.copy() - # bill_cp = self.processed_bill[[ - # 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage', 'Total Charge' - # ]] - # bill_cp['Unit Price'] = bill_cp['Total Charge'] / bill_cp['Usage'] - # bill_cp['Heating Usage'] = self.heating_consumption_pred - # bill_cp['Cooling Usage'] = self.cooling_consumption_pred - # bill_cp['Other Usage'] = self.others_consumption_pred - - # if self.usage == 'Both Not': - # self.r_squared_of_fit = 0 - # else: - # self.r_squared_of_fit = regr[1] - - # self.heating_set_point = heating_set_point - # self.cooling_set_point = cooling_set_point - # self.output_table = bill_cp - - # last_bill_date = self.processed_bill['Bill To Date'].iloc[-1] - # first_bill_date = self.processed_bill['Bill From Date'].iloc[0] - - # billing_months = self.num_month_dates(last_bill_date, first_bill_date) - # output_monthly_initial = self.output_to_month(last_bill_date, self.heating_set_point, - # self.cooling_set_point, billing_months) - # self.output_table_monthly = self.normalized_unit_price(self.output_table, output_monthly_initial) - # self.most_recent_monthly_output = self.output_to_month(last_bill_date, - # self.heating_set_point, self.cooling_set_point, 12) - # self.bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.output_table_monthly) - # self.recent_year_bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, - # self.most_recent_monthly_output) - # self.annual_usage = self.annual_usage_costs(self.recent_year_bill_breakdown, non_weather_related_end_use) + #return the regression + if weather_related_usage == 'Both Not': + intercept_ = 0 + heating_coef_ = 0 + cooling_coef_ = 0 + else: + intercept_ = regr_output[0].intercept_ + heating_coef_ = regr_output[0].coef_[0] + cooling_coef_ = regr_output[0].coef_[1] + + optimized_regr_matrix = {'heating_setpoint': heating_set_point, + 'cooling_set_point': cooling_set_point, + 'intercept_':intercept_, + 'heating_coef_' : heating_coef_, + 'cooling_coef_': cooling_coef_, + 'weather_related_usage': weather_related_usage, + 'regression_method':regression_method + } + return optimized_regr_matrix -- GitLab From 192e791909b04c26b1bd16048dbc4479447a4ff7 Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 7 Jun 2019 15:41:52 -0400 Subject: [PATCH 26/97] added a regression temp function --- bpeng/bill/regr.py | 64 ++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/bpeng/bill/regr.py b/bpeng/bill/regr.py index 7040e77..2063f8a 100644 --- a/bpeng/bill/regr.py +++ b/bpeng/bill/regr.py @@ -1,10 +1,6 @@ import warnings import numpy as np -import pandas as pd - -from datetime import timedelta -from dateutil import relativedelta from sklearn import linear_model from calculater import (cdd, hdd, threshold, outliers_iqr) warnings.simplefilter('ignore') @@ -28,23 +24,11 @@ def regression_1(hp, cp, bill): """ consumption = np.array(bill['Usage'] / bill['Days In Bill']) - ahdd = [list(hdd(hp, xx) for xx in x) for x in bill['temperature']] - acdd = [list(cdd(cp, xx) for xx in x) for x in bill['temperature']] - # it should be billing period average hdd / days - daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) - daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) - # set threshold that if the HDD/CDD is lower than a certain value, we set it to 0 - daily_hdd1 = np.array([threshold(daily_hdd[x], 0.1) for x in range(len(daily_hdd))]) - daily_cdd1 = np.array([threshold(daily_cdd[x], 0.1) for x in range(len(daily_cdd))]) - - regression_temp = np.array([daily_hdd1, daily_cdd1]).T - + regression_temp = regr_temp_hddcdd(hp, cp, bill) regr_model = linear_model.LinearRegression() regr_model.fit(regression_temp, consumption) score = regr_model.score(regression_temp, consumption) - - return regr_model, score, regression_temp - + return regr_model, score def regression_2(hp, bill): """ @@ -65,13 +49,8 @@ def regression_2(hp, bill): """ impossible_cooling_temp = 300 - cp = impossible_cooling_temp - ahdd = [[hdd(hp, xx) for xx in x] for x in bill['temperature']] - acdd = [list(cdd(cp, xx) for xx in x) for x in bill['temperature']] - # monthly_hdd = np.array([np.sum(ahdd[x]) for x in range(len(ahdd))]) - daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) - daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) - # daily dhw usage + regression_temp = regr_temp_hddcdd(hp, impossible_cooling_temp, bill) + daily_hdd = regression_temp[:,0] bill['dhw'] = bill['Usage'] / bill['Days In Bill'] * (daily_hdd <= 0.1) if len([*filter(lambda x: x >= 18, list(bill['Days In Bill']))]) > 0: @@ -88,13 +67,38 @@ def regression_2(hp, bill): daily_dhw = 0 else: daily_dhw = 0 + bill['dhw'] = daily_dhw * bill['Days In Bill'] - # 2018/01/30 - # Daily hdd makes more sense - regression_temp = np.array([daily_hdd, daily_cdd]).T consumption = (bill['Usage'] - bill['dhw']) / bill['Days In Bill'] - regr_model = linear_model.LinearRegression(fit_intercept=False) + regr_model = linear_model.LinearRegression(fit_intercept=True) regr_model.fit(regression_temp, consumption) score = regr_model.score(regression_temp, consumption) + return regr_model, score, daily_dhw + +def regr_temp_hddcdd(hp, cp, bill): + ''' + Cal for avg hdd/cdd for a bills with any billing period + ''' + + impossible_heating_temp = 0 + impossible_cooling_temp = 300 + + if hp != np.NaN: + ahdd = [list(hdd(hp, xx) for xx in x) for x in bill['temperature']] + else: + ahdd = [list(hdd(impossible_heating_temp, xx) for xx in x) for x in bill['temperature']] + + if cp != np.NaN: + acdd = [list(cdd(cp, xx) for xx in x) for x in bill['temperature']] + else: + acdd = [list(cdd(impossible_cooling_temp, xx) for xx in x) for x in bill['temperature']] + + # it should be billing period average hdd / days + daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) + daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) + # set threshold that if the HDD/CDD is lower than a certain value, we set it to 0 + daily_hdd1 = np.array([threshold(daily_hdd[x], 0.1) for x in range(len(daily_hdd))]) + daily_cdd1 = np.array([threshold(daily_cdd[x], 0.1) for x in range(len(daily_cdd))]) + regression_temp = np.array([daily_hdd1, daily_cdd1]).T - return regr_model, score, regression_temp, bill + return regression_temp -- GitLab From 99f7d32f2ef65463e9e147b4c78da6c70942c6fb Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 7 Jun 2019 15:42:14 -0400 Subject: [PATCH 27/97] updated based on regr.py --- bpeng/bill/setpoints_optimization.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bpeng/bill/setpoints_optimization.py b/bpeng/bill/setpoints_optimization.py index 1bcb057..b513794 100644 --- a/bpeng/bill/setpoints_optimization.py +++ b/bpeng/bill/setpoints_optimization.py @@ -174,17 +174,19 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): regression_method = 0 cooling_set_point = np.NaN heating_set_point = np.NaN - regr_output = np.NaN + regr_model = np.NaN #return the regression if weather_related_usage == 'Both Not': intercept_ = 0 heating_coef_ = 0 cooling_coef_ = 0 + r_squared = 0 else: intercept_ = regr_output[0].intercept_ heating_coef_ = regr_output[0].coef_[0] cooling_coef_ = regr_output[0].coef_[1] + r_squared = regr_output[1] optimized_regr_matrix = {'heating_setpoint': heating_set_point, 'cooling_set_point': cooling_set_point, @@ -192,6 +194,9 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): 'heating_coef_' : heating_coef_, 'cooling_coef_': cooling_coef_, 'weather_related_usage': weather_related_usage, - 'regression_method':regression_method + 'regression_method':regression_method, + 'regr_model': regr_model, + 'r_squared': r_squared } + return optimized_regr_matrix -- GitLab From 8c698960d429ab5809ee12400a3e7f14961523b5 Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 7 Jun 2019 15:43:28 -0400 Subject: [PATCH 28/97] added a fun of daily temperature column for a bill --- bpeng/bill/weather_data_cal.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/bpeng/bill/weather_data_cal.py b/bpeng/bill/weather_data_cal.py index 72d7c2b..e450cbe 100644 --- a/bpeng/bill/weather_data_cal.py +++ b/bpeng/bill/weather_data_cal.py @@ -44,3 +44,10 @@ def bill_period_weather(bill_from_date, bill_end_date, weather_data_daily): start_date_id = weather_data_daily[weather_data_daily.date == bill_from_date].index[0] return list(weather_data_daily['temperature'][start_date_id:end_date_id]) + + +def bill_with_daily_temp(cleaned_bill, weather_data_daily): + cleaned_bill['temperature'] = [bill_period_weather(x, y, weather_data_daily) + for x, y in zip(cleaned_bill['Bill From Date'], + cleaned_bill['Bill To Date'])] + return cleaned_bill -- GitLab From c251e204883ee375277e66e3e47c87f96ce15511 Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 7 Jun 2019 16:40:29 -0400 Subject: [PATCH 29/97] updated disaggragate model --- bpeng/bill/disaggragate_with_regr_matrix.py | 299 ++++++++++++++++++++ 1 file changed, 299 insertions(+) create mode 100644 bpeng/bill/disaggragate_with_regr_matrix.py diff --git a/bpeng/bill/disaggragate_with_regr_matrix.py b/bpeng/bill/disaggragate_with_regr_matrix.py new file mode 100644 index 0000000..a77e194 --- /dev/null +++ b/bpeng/bill/disaggragate_with_regr_matrix.py @@ -0,0 +1,299 @@ +''' +This module breaks down the energy usage by its weather-related features and non-weather-related features. +The inputs of this module is a cleaned bill, the best fitted regression model, and the non-weather-related usage info. +The outputs of this module is the disaggragated results based on the input informaiton. + +Author: Doris Han +''' +import warnings +from datetime import timedelta +import numpy as np +import pandas as pd +from dateutil import relativedelta +from scipy.optimize import minimize +from sklearn import linear_model +from weather_data_cal import (bill_period_weather) +from calculater import (hdd, cdd) +from regr import (regr_temp_hddcdd) + + +def weather_ralated_breakdown(regr_matrix, processed_bill_any): + + heating_set_point = regr_matrix['heating_set_point'] + cooling_set_point = regr_matrix['cooling_set_point'] + weather_related_usage = regr_matrix['weather_related_usage'] + regression_method = regr_matrix['regression_method'] + cooling_coef_ = regr_matrix['cooling_coef_'] + heating_coef_ = regr_matrix['heating_coef_'] + regr_output = regr_matrix['regr_output'] + intercept_ = regr_matrix['intercept_'] + + if regression_method == 0 : + disaggragated_bill = usage_not_related_to_weather(processed_bill_any) + + if regression_method == 1 : + hddcdd = regr_temp_hddcdd(heating_set_point, cooling_set_point, processed_bill_any) + r_squared_of_fit = regr_output[1] + + if np.absolute(r_squared_of_fit) > 0.5: + heating_consump = np.array(hddcdd[:, 0]) * heating_coef_ * processed_bill_any['Days In Bill'] + cooling_consump = np.array(hddcdd[:, 1]) * cooling_coef_ * processed_bill_any['Days In Bill'] + + if intercept_ < 0: + non_weather_related_consump = 0 * processed_bill_any['Days In Bill'] + else: + non_weather_related_consump = intercept_ * processed_bill_any['Days In Bill'] + else: + disaggragated_bill = usage_not_related_to_weather(processed_bill_any) + + if regression_method == 2: + hddcdd = regr_temp_hddcdd(heating_set_point, cooling_set_point, processed_bill_any) + dhw_usage = regr_output[2] + + heating_consump = np.array(hddcdd[:, 0]) * heating_coef_ * processed_bill_any['Days In Bill'] + cooling_consump = np.array(hddcdd[:, 1]) * cooling_coef_ * processed_bill_any['Days In Bill'] + non_weather_related_consump = dhw_usage * processed_bill_any['Days In Bill'] + + + + disaggragated_bill = processed_bill_any.copy() + disaggragated_bill = processed_bill_any[[ + 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage', 'Total Charge' + ]] + disaggragated_bill['Unit Price'] = disaggragated_bill['Total Charge'] / disaggragated_bill['Usage'] + disaggragated_bill['Heating Usage'] = heating_consump + disaggragated_bill['Cooling Usage'] = cooling_consump + disaggragated_bill['Non Weather Related Usage'] = non_weather_related_consump + + return disaggragated_bill + +def usage_not_related_to_weather(bill): + ''' + this function return heating, cooling, and non-weather-related-usage for a bill that not related to weather at all. + ''' + bill['Heating Usage'] = bill['Usage'] * 0 + bill['Cooling Usage'] = bill['Usage'] * 0 + bill['Non Weather Related Usage'] = bill['Usage'] + bill['Unit Price'] = bill['Total Charge'] / bill['Usage'] + return bill + +@staticmethod +def non_weahter_related_breakdown(end_uses, weather_disaggragated_bill): + """ + breakdown the non_weather_related_usage + + Args: + end_uses(dictionary): key: end use + value: percentage of the end use among non-weather related usage + weather_disaggragated_bill (pd.DataFrame): bills have been breakdown to heating, cooling, and non-weather-related-comsump + Returns: + pd.DataFrame: bill breakdown of all end-use + """ + eu = pd.DataFrame( + list(end_uses.items()), columns=['end use', 'percentage']) + + for i in range(len(eu)): + name_of_the_column = eu['end use'].iloc[i] + value_of_the_column = eu['percentage'].iloc[i] + weather_disaggragated_bill[name_of_the_column] = weather_disaggragated_bill[ + 'Non Weather Related Usage'] * value_of_the_column + + if sum(eu['percentage']) != 1: + weather_disaggragated_bill['Miscellaneous'] = weather_disaggragated_bill['Non Weather Related Usage'] * ( + 1 - sum(eu['percentage'])) + + fully_disaggragated_bill = weather_disaggragated_bill.copy() + return fully_disaggragated_bill + +# def find_index_in_first_raw_biil(self, norm_bill_date): +# """ +# Return the index of the row of raw bill contains the bill date from a normalized bill +# """ +# for index, bill in self.formatted_bill.iterrows(): +# if bill['Bill From Date'] <= norm_bill_date < bill['Bill To Date']: +# return index +# return None + +# def days_in_raw_bill_period(self, norm_bill_date, norm_bill_date_respected_index, flag): +# """ +# Return how many days from a normalized bill within a raw bill billing period +# """ + +# if flag == 'start': +# days = (self.formatted_bill['Bill To Date'][norm_bill_date_respected_index] - norm_bill_date).days +# if flag == 'end': +# days = (norm_bill_date - self.formatted_bill['Bill From Date'][norm_bill_date_respected_index]).days +# return days + +# def weighted_unit_price(self, index_numdays): +# """ +# Return the weighted average of unit price +# """ +# weights = [] +# total_days = [] +# for ind in range(len(index_numdays)): +# unit_price = self.formatted_bill['Unit Price'][int(index_numdays[ind]['index'])] +# days_in_that_period = int(index_numdays[ind]['num_days']) +# weights.append(unit_price * days_in_that_period) +# total_days.append(days_in_that_period) +# weighted_unit_price = sum(weights)/sum(total_days) +# return weighted_unit_price + +# def find_bills_in_raw(self, norm_bill_from, norm_bill_to): +# """ +# Return the index / number of days in each raw bill billing period for a normalized billing period +# """ + +# norm_bill_days = (norm_bill_to - norm_bill_from).days +# results = [] + +# index_start = self.find_index_in_first_raw_biil(norm_bill_from) +# index_end = self.find_index_in_first_raw_biil(norm_bill_to) + +# if index_start == index_end: +# results.append({'index': index_start, 'num_days': norm_bill_days}) + +# elif index_end - index_start >= 1: +# days_in_start_period = self.days_in_raw_bill_period(norm_bill_from, index_start, 'start') +# results.append({'index': index_start, 'num_days': days_in_start_period}) +# days_in_end_period = self.days_in_raw_bill_period(norm_bill_to, index_end, 'end') +# results.append({'index': index_end, 'num_days': days_in_end_period}) + +# if index_end - index_start >= 2: +# for p in range(index_end - index_start - 1): +# days_in_period = self.formatted_bill['Days In Bill'][index_start+p+1] +# index_of_this_period = index_start+p+1 +# results.append({'index': index_of_this_period, 'num_days': days_in_period}) + +# return results + +# def normalized_unit_price(self, rawbill, mbill): +# """ +# calculate the unit price for each nomralized billing period +# """ +# normalized_unit_price = [] +# for m in range(len(mbill)): +# from_date = mbill['Bill From Date'].iloc[m] +# to_date = mbill['Bill To Date'].iloc[m] +# index_numdays = self.find_bills_in_raw(from_date, to_date) +# weighted_unit_price_for_this_month = self.weighted_unit_price(index_numdays) +# normalized_unit_price.append(weighted_unit_price_for_this_month) +# mbill['Unit Price'] = normalized_unit_price +# return mbill + +# def disaggragated_bill_monthly(self, last_date_of_bill, hp, cp, number_of_month): +# """ +# Transfrom period-wise bills to month-wise bills + +# Args: + +# last_day_of_bill(datetime): last day of bill +# hp(float): heating season indoor set point +# cp(float): cooling season indoor set point +# number_of_month(int): number of month that need to be re-format + +# Returns: + +# pd.DataFrame: result with monthly consumptions + +# """ + +# last_dates = [] +# first_dates = [] + +# lastdate = last_date_of_bill - timedelta(last_date_of_bill.day) + +# # cosntruct a new dataframe with bills from the first to last day for each month + +# for i in range(0, number_of_month): +# last_dates.append(lastdate) +# first_dates.append(lastdate.replace(day=1)) +# lastdate = first_dates[i] - timedelta(1) + +# monthly_output_table = pd.DataFrame(columns=['Bill From Date', 'Bill To Date', 'Days In Bill', +# 'Heating Usage', 'Cooling Usage', 'Non Weather Related Usage']) + +# monthly_output_table['Bill From Date'] = first_dates +# monthly_output_table['Bill To Date'] = last_dates +# monthly_output_table[ +# 'Days In Bill'] = monthly_output_table['Bill To Date'] - monthly_output_table['Bill From Date'] +# monthly_output_table[ +# 'Days In Bill'] = monthly_output_table['Days In Bill'].apply( +# lambda x: x.days) + 1 +# monthly_output_table['Month'] = monthly_output_table[ +# 'Bill From Date'].apply(lambda x: x.month) +# monthly_output_table['temperature'] = [ +# bill_period_weather(x, y) +# for x, y in zip(monthly_output_table['Bill From Date'], +# monthly_output_table['Bill To Date']) +# ] + +# hdd = [ +# list(hdd(hp, xx) for xx in x) +# for x in monthly_output_table['temperature'] +# ] +# cdd = [ +# list(cdd(cp, xx) for xx in x) +# for x in monthly_output_table['temperature'] +# ] +# monthly_hdd = np.array([np.sum(hdd[x]) for x in range(len(hdd))]) +# monthly_cdd = np.array([np.sum(cdd[x]) for x in range(len(cdd))]) + +# monthly_output_table['HDD'] = monthly_hdd +# monthly_output_table['CDD'] = monthly_cdd + +# # per_hdd = self.benchmarking_output()[4] / self.benchmarking_output()[8] +# # if np.isnan(per_hdd): +# # per_hdd = 0 +# # per_cdd = self.benchmarking_output()[5] / self.benchmarking_output()[9] +# # if np.isnan(per_cdd): +# # per_cdd = 0 +# # per_day = self.benchmarking_output()[6] / self.benchmarking_output()[ +# # 10] +# # if np.isnan(per_day): +# # per_day = 0 + +# # monthly_output_table['Heating Usage'] = monthly_output_table[ +# # 'HDD'] * per_hdd +# # monthly_output_table['Cooling Usage'] = monthly_output_table[ +# # 'CDD'] * per_cdd +# # monthly_output_table['Other Usage'] = monthly_output_table[ +# # 'Days In Bill'] * per_day + + + +# monthly_output_table['Usage'] = monthly_output_table['Heating Usage']\ +# + monthly_output_table['Cooling Usage'] + monthly_output_table['Other Usage'] + +# monthly_output = monthly_output_table[['Month', 'Bill From Date', 'Bill To Date', 'Days In Bill', +# 'Heating Usage', 'Cooling Usage', 'Other Usage', 'HDD', 'CDD']] +# monthly_output = monthly_output.sort('Bill From Date').reset_index(drop=True) +# return monthly_output + + + +# def output_to_dashboard(): +# ''' +# to meet the dashboard requirements for utility page +# further breakdown the bill to month + +# ''' + +# last_bill_date = self.processed_bill['Bill To Date'].iloc[-1] +# first_bill_date = self.processed_bill['Bill From Date'].iloc[0] + +# billing_months = self.num_month_dates(last_bill_date, first_bill_date) +# output_monthly_initial = self.output_to_month(last_bill_date, self.heating_set_point, +# self.cooling_set_point, billing_months) +# self.output_table_monthly = self.normalized_unit_price(self.output_table, output_monthly_initial) +# self.most_recent_monthly_output = self.output_to_month(last_bill_date, +# self.heating_set_point, self.cooling_set_point, 12) + +# self.bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.output_table_monthly) + +# self.recent_year_bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, +# self.most_recent_monthly_output) +# self.annual_usage = self.annual_usage_costs(self.recent_year_bill_breakdown, non_weather_related_end_use) + + + -- GitLab From f55cef6df3607136ae20ff4ee849a734bdf02206 Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 7 Jun 2019 16:40:43 -0400 Subject: [PATCH 30/97] updated based on regr.py --- bpeng/bill/setpoints_optimization.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/bpeng/bill/setpoints_optimization.py b/bpeng/bill/setpoints_optimization.py index b513794..1896878 100644 --- a/bpeng/bill/setpoints_optimization.py +++ b/bpeng/bill/setpoints_optimization.py @@ -181,14 +181,13 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): intercept_ = 0 heating_coef_ = 0 cooling_coef_ = 0 - r_squared = 0 + regr_output = np.NaN else: intercept_ = regr_output[0].intercept_ heating_coef_ = regr_output[0].coef_[0] cooling_coef_ = regr_output[0].coef_[1] - r_squared = regr_output[1] - optimized_regr_matrix = {'heating_setpoint': heating_set_point, + optimized_regr_matrix = {'heating_set_point': heating_set_point, 'cooling_set_point': cooling_set_point, 'intercept_':intercept_, 'heating_coef_' : heating_coef_, @@ -196,7 +195,7 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): 'weather_related_usage': weather_related_usage, 'regression_method':regression_method, 'regr_model': regr_model, - 'r_squared': r_squared + 'regr_output': regr_output } return optimized_regr_matrix -- GitLab From f2c95ea63ab6343ed4f30bce0a987416c6c438a6 Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 7 Jun 2019 17:10:13 -0400 Subject: [PATCH 31/97] remove total usage to accomdate monthly bills --- bpeng/bill/disaggragate_with_regr_matrix.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/bpeng/bill/disaggragate_with_regr_matrix.py b/bpeng/bill/disaggragate_with_regr_matrix.py index a77e194..5a64bab 100644 --- a/bpeng/bill/disaggragate_with_regr_matrix.py +++ b/bpeng/bill/disaggragate_with_regr_matrix.py @@ -5,15 +5,9 @@ The outputs of this module is the disaggragated results based on the input infor Author: Doris Han ''' -import warnings -from datetime import timedelta + import numpy as np import pandas as pd -from dateutil import relativedelta -from scipy.optimize import minimize -from sklearn import linear_model -from weather_data_cal import (bill_period_weather) -from calculater import (hdd, cdd) from regr import (regr_temp_hddcdd) @@ -58,9 +52,9 @@ def weather_ralated_breakdown(regr_matrix, processed_bill_any): disaggragated_bill = processed_bill_any.copy() disaggragated_bill = processed_bill_any[[ - 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage', 'Total Charge' + 'Bill From Date', 'Bill To Date', 'Days In Bill' ]] - disaggragated_bill['Unit Price'] = disaggragated_bill['Total Charge'] / disaggragated_bill['Usage'] + #disaggragated_bill['Unit Price'] = disaggragated_bill['Total Charge'] / disaggragated_bill['Usage'] disaggragated_bill['Heating Usage'] = heating_consump disaggragated_bill['Cooling Usage'] = cooling_consump disaggragated_bill['Non Weather Related Usage'] = non_weather_related_consump @@ -73,8 +67,9 @@ def usage_not_related_to_weather(bill): ''' bill['Heating Usage'] = bill['Usage'] * 0 bill['Cooling Usage'] = bill['Usage'] * 0 - bill['Non Weather Related Usage'] = bill['Usage'] - bill['Unit Price'] = bill['Total Charge'] / bill['Usage'] + #attention: if in this case, other usage needs to be re-caculate + bill['Non Weather Related Usage'] = bill['Usage'] * 0 # how to deal with this one if the usage is not given + #bill['Unit Price'] = bill['Total Charge'] / bill['Usage'] return bill @staticmethod -- GitLab From 1c95e48edc8c71cee9d1c79abb1ccf89b104a58e Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 7 Jun 2019 17:10:30 -0400 Subject: [PATCH 32/97] return a monthly bills --- bpeng/bill/normalized_monthly_bill.py | 147 ++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 bpeng/bill/normalized_monthly_bill.py diff --git a/bpeng/bill/normalized_monthly_bill.py b/bpeng/bill/normalized_monthly_bill.py new file mode 100644 index 0000000..a43fade --- /dev/null +++ b/bpeng/bill/normalized_monthly_bill.py @@ -0,0 +1,147 @@ +""" +This module will create a normailized usage based on a raw bill natural billing periods +should be refactor to a class +""" + +import warnings +from datetime import timedelta +import numpy as np +import pandas as pd +from dateutil import relativedelta +warnings.simplefilter('ignore') + + +class normalized_billing_period(): + + def __init__(self, formatted_bill): + self.formatted_bill = formatted_bill + + + def find_index_in_first_raw_biil(self, norm_bill_date): + """ + Return the index of the row of raw bill contains the bill date from a normalized bill + """ + for index, bill in self.formatted_bill.iterrows(): + if bill['Bill From Date'] <= norm_bill_date < bill['Bill To Date']: + return index + return None + + def days_in_raw_bill_period(self, norm_bill_date, norm_bill_date_respected_index, flag): + """ + Return how many days from a normalized bill within a raw bill billing period + """ + + if flag == 'start': + days = (self.formatted_bill['Bill To Date'][norm_bill_date_respected_index] - norm_bill_date).days + if flag == 'end': + days = (norm_bill_date - self.formatted_bill['Bill From Date'][norm_bill_date_respected_index]).days + return days + + def weighted_unit_price(self, index_numdays): + """ + Return the weighted average of unit price + """ + weights = [] + total_days = [] + for ind in range(len(index_numdays)): + unit_price = self.formatted_bill['Unit Price'][int(index_numdays[ind]['index'])] + days_in_that_period = int(index_numdays[ind]['num_days']) + weights.append(unit_price * days_in_that_period) + total_days.append(days_in_that_period) + weighted_unit_price = sum(weights)/sum(total_days) + return weighted_unit_price + + def find_bills_in_raw(self, norm_bill_from, norm_bill_to): + """ + Return the index / number of days in each raw bill billing period for a normalized billing period + """ + + norm_bill_days = (norm_bill_to - norm_bill_from).days + results = [] + + index_start = self.find_index_in_first_raw_biil(norm_bill_from) + index_end = self.find_index_in_first_raw_biil(norm_bill_to) + + if index_start == index_end: + results.append({'index': index_start, 'num_days': norm_bill_days}) + + elif index_end - index_start >= 1: + days_in_start_period = self.days_in_raw_bill_period(norm_bill_from, index_start, 'start') + results.append({'index': index_start, 'num_days': days_in_start_period}) + days_in_end_period = self.days_in_raw_bill_period(norm_bill_to, index_end, 'end') + results.append({'index': index_end, 'num_days': days_in_end_period}) + + if index_end - index_start >= 2: + for p in range(index_end - index_start - 1): + days_in_period = self.formatted_bill['Days In Bill'][index_start+p+1] + index_of_this_period = index_start+p+1 + results.append({'index': index_of_this_period, 'num_days': days_in_period}) + + return results + + def normalized_unit_price(self, rawbill, mbill): + """ + calculate the unit price for each nomralized billing period + """ + normalized_unit_price = [] + for m in range(len(mbill)): + from_date = mbill['Bill From Date'].iloc[m] + to_date = mbill['Bill To Date'].iloc[m] + index_numdays = self.find_bills_in_raw(from_date, to_date) + weighted_unit_price_for_this_month = self.weighted_unit_price(index_numdays) + normalized_unit_price.append(weighted_unit_price_for_this_month) + mbill['Unit Price'] = normalized_unit_price + return mbill + + + @staticmethod + def num_month_dates(last_date_bill, first_date_bill): + """Return number of month in between two date """ + lastdate = last_date_bill - timedelta(last_date_bill.day) + firstdate = first_date_bill + timedelta(days=32) + firstdate = firstdate.replace(day=1) + r = relativedelta.relativedelta(lastdate, firstdate) + num_month = r.years * 12 + r.months + 1 + return (num_month) + + + def normailized_monthly_bill(self): + """ + Args: + + last_day_of_bill(datetime): last day of bill + hp(float): heating season indoor set point + cp(float): cooling season indoor set point + number_of_month(int): number of month that need to be re-format + + Returns: + + pd.DataFrame: result with monthly consumptions + + """ + last_date_of_bill = self.formatted_bill['Bill To Date'].iloc[-1] + first_bill_date = self.formatted_bill['Bill From Date'].iloc[0] + + last_dates = [] + first_dates = [] + + lastdate = last_date_of_bill - timedelta(last_date_of_bill.day) + + # cosntruct a new dataframe with bills from the first to last day for each month + number_of_month = normalized_billing_period.num_month_dates(last_date_of_bill,first_bill_date ) + for i in range(0, number_of_month): + last_dates.append(lastdate) + first_dates.append(lastdate.replace(day=1)) + lastdate = first_dates[i] - timedelta(1) + + normalized_monthly_bill = pd.DataFrame(columns=['Bill From Date', 'Bill To Date', 'Days In Bill']) + normalized_monthly_bill['Bill From Date'] = first_dates + normalized_monthly_bill['Bill To Date'] = last_dates + normalized_monthly_bill[ + 'Days In Bill'] = normalized_monthly_bill['Bill To Date'] - normalized_monthly_bill['Bill From Date'] + normalized_monthly_bill[ + 'Days In Bill'] = normalized_monthly_bill['Days In Bill'].apply( + lambda x: x.days) + 1 + normalized_monthly_bill['Month'] = normalized_monthly_bill[ + 'Bill From Date'].apply(lambda x: x.month) + return normalized_monthly_bill -- GitLab From 90cf7a63be64cc27ef359486bb7b79000e090334 Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 7 Jun 2019 17:10:38 -0400 Subject: [PATCH 33/97] rename --- bpeng/bill/normailize_usage.py | 169 --------------------------------- 1 file changed, 169 deletions(-) delete mode 100644 bpeng/bill/normailize_usage.py diff --git a/bpeng/bill/normailize_usage.py b/bpeng/bill/normailize_usage.py deleted file mode 100644 index a7f5459..0000000 --- a/bpeng/bill/normailize_usage.py +++ /dev/null @@ -1,169 +0,0 @@ - ''' - This module will create a normailized usage based on a raw bill natural billing periods - should be refactor to a class - ''' - - - - def find_index_in_first_raw_biil(self, norm_bill_date): - """ - Return the index of the row of raw bill contains the bill date from a normalized bill - """ - for index, bill in self.formatted_bill.iterrows(): - if bill['Bill From Date'] <= norm_bill_date < bill['Bill To Date']: - return index - return None - - def days_in_raw_bill_period(self, norm_bill_date, norm_bill_date_respected_index, flag): - """ - Return how many days from a normalized bill within a raw bill billing period - """ - - if flag == 'start': - days = (self.formatted_bill['Bill To Date'][norm_bill_date_respected_index] - norm_bill_date).days - if flag == 'end': - days = (norm_bill_date - self.formatted_bill['Bill From Date'][norm_bill_date_respected_index]).days - return days - - def weighted_unit_price(self, index_numdays): - """ - Return the weighted average of unit price - """ - weights = [] - total_days = [] - for ind in range(len(index_numdays)): - unit_price = self.formatted_bill['Unit Price'][int(index_numdays[ind]['index'])] - days_in_that_period = int(index_numdays[ind]['num_days']) - weights.append(unit_price * days_in_that_period) - total_days.append(days_in_that_period) - weighted_unit_price = sum(weights)/sum(total_days) - return weighted_unit_price - - def find_bills_in_raw(self, norm_bill_from, norm_bill_to): - """ - Return the index / number of days in each raw bill billing period for a normalized billing period - """ - - norm_bill_days = (norm_bill_to - norm_bill_from).days - results = [] - - index_start = self.find_index_in_first_raw_biil(norm_bill_from) - index_end = self.find_index_in_first_raw_biil(norm_bill_to) - - if index_start == index_end: - results.append({'index': index_start, 'num_days': norm_bill_days}) - - elif index_end - index_start >= 1: - days_in_start_period = self.days_in_raw_bill_period(norm_bill_from, index_start, 'start') - results.append({'index': index_start, 'num_days': days_in_start_period}) - days_in_end_period = self.days_in_raw_bill_period(norm_bill_to, index_end, 'end') - results.append({'index': index_end, 'num_days': days_in_end_period}) - - if index_end - index_start >= 2: - for p in range(index_end - index_start - 1): - days_in_period = self.formatted_bill['Days In Bill'][index_start+p+1] - index_of_this_period = index_start+p+1 - results.append({'index': index_of_this_period, 'num_days': days_in_period}) - - return results - - def normalized_unit_price(self, rawbill, mbill): - """ - calculate the unit price for each nomralized billing period - """ - normalized_unit_price = [] - for m in range(len(mbill)): - from_date = mbill['Bill From Date'].iloc[m] - to_date = mbill['Bill To Date'].iloc[m] - index_numdays = self.find_bills_in_raw(from_date, to_date) - weighted_unit_price_for_this_month = self.weighted_unit_price(index_numdays) - normalized_unit_price.append(weighted_unit_price_for_this_month) - mbill['Unit Price'] = normalized_unit_price - return mbill - - def output_to_month(self, last_date_of_bill, hp, cp, number_of_month): - """ - Transfrom period-wise output to month-wise output - - Args: - - last_day_of_bill(datetime): last day of bill - hp(float): heating season indoor set point - cp(float): cooling season indoor set point - number_of_month(int): number of month that need to be re-format - - Returns: - - pd.DataFrame: result with monthly consumptions - - """ - - last_dates = [] - first_dates = [] - - lastdate = last_date_of_bill - timedelta(last_date_of_bill.day) - - # cosntruct a new dataframe with bills from the first to last day for each month - - for i in range(0, number_of_month): - last_dates.append(lastdate) - first_dates.append(lastdate.replace(day=1)) - lastdate = first_dates[i] - timedelta(1) - - monthly_output_table = pd.DataFrame(columns=['Bill From Date', 'Bill To Date', 'Days In Bill', - 'Heating Usage', 'Cooling Usage', 'Other Usage']) - - monthly_output_table['Bill From Date'] = first_dates - monthly_output_table['Bill To Date'] = last_dates - monthly_output_table[ - 'Days In Bill'] = monthly_output_table['Bill To Date'] - monthly_output_table['Bill From Date'] - monthly_output_table[ - 'Days In Bill'] = monthly_output_table['Days In Bill'].apply( - lambda x: x.days) + 1 - monthly_output_table['Month'] = monthly_output_table[ - 'Bill From Date'].apply(lambda x: x.month) - monthly_output_table['temperature'] = [ - self.bill_period_weather(x, y) - for x, y in zip(monthly_output_table['Bill From Date'], - monthly_output_table['Bill To Date']) - ] - - hdd = [ - list(BillDisaggregation.hdd(hp, xx) for xx in x) - for x in monthly_output_table['temperature'] - ] - cdd = [ - list(BillDisaggregation.cdd(cp, xx) for xx in x) - for x in monthly_output_table['temperature'] - ] - monthly_hdd = np.array([np.sum(hdd[x]) for x in range(len(hdd))]) - monthly_cdd = np.array([np.sum(cdd[x]) for x in range(len(cdd))]) - - monthly_output_table['HDD'] = monthly_hdd - monthly_output_table['CDD'] = monthly_cdd - - per_hdd = self.benchmarking_output()[4] / self.benchmarking_output()[8] - if np.isnan(per_hdd): - per_hdd = 0 - per_cdd = self.benchmarking_output()[5] / self.benchmarking_output()[9] - if np.isnan(per_cdd): - per_cdd = 0 - per_day = self.benchmarking_output()[6] / self.benchmarking_output()[ - 10] - if np.isnan(per_day): - per_day = 0 - - monthly_output_table['Heating Usage'] = monthly_output_table[ - 'HDD'] * per_hdd - monthly_output_table['Cooling Usage'] = monthly_output_table[ - 'CDD'] * per_cdd - monthly_output_table['Other Usage'] = monthly_output_table[ - 'Days In Bill'] * per_day - - monthly_output_table['Usage'] = monthly_output_table['Heating Usage']\ - + monthly_output_table['Cooling Usage'] + monthly_output_table['Other Usage'] - - monthly_output = monthly_output_table[['Month', 'Bill From Date', 'Bill To Date', 'Days In Bill', - 'Heating Usage', 'Cooling Usage', 'Other Usage', 'HDD', 'CDD']] - monthly_output = monthly_output.sort('Bill From Date').reset_index(drop=True) - return monthly_output -- GitLab From a9e91a3bbd4c96b823ec433120016dd4aeebf1f1 Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 7 Jun 2019 17:10:44 -0400 Subject: [PATCH 34/97] renamed --- bpeng/bill/output_natural_billing_period.py | 150 -------------------- 1 file changed, 150 deletions(-) delete mode 100644 bpeng/bill/output_natural_billing_period.py diff --git a/bpeng/bill/output_natural_billing_period.py b/bpeng/bill/output_natural_billing_period.py deleted file mode 100644 index f0a8cc6..0000000 --- a/bpeng/bill/output_natural_billing_period.py +++ /dev/null @@ -1,150 +0,0 @@ -''' -This module breaks down the energy usage by its weather-related features and non-weather-related features. -The inputs of this module is a cleaned bill, the best fitted regression model, and the non-weather-related usage info. -The outputs of this module is the disaggragated results based on the input informaiton. - -Author: Doris Han -''' -import warnings -from datetime import timedelta -import numpy as np -import pandas as pd -from dateutil import relativedelta -from scipy.optimize import minimize -from sklearn import linear_model - - - -class disaggragation_output(): - - def __init__(self, processed_bill, regr_feature, non_weather_usage): - self.processed_bill = processed_bill - self.regr_feature = regr_feature - self.non_weather_usage = non_weather_usage - self.regr_model = self.regr_feature[0][0] - self.heating_set_point = self.regr_feature[1] - self.cooling_set_point = self.regr_feature[2] - self.weather_related_usage = self.regr_feature[3] - self.regression_method = self.regr_feature[4] - self.disaggragated_bill = None - - # optimization_output = [regr_output, heating_set_point, cooling_set_point, weather_related_usage, regression_method] - # regr_output = regr_model, score, regression_temp, bill - - def weather_ralated_breakdown(): - - if self.regression_method == 0 : - heating_consump = self.processed_bill['Usage'] * 0 - cooling_consump = self.processed_bill['Usage'] * 0 - non_weather_related_consump = self.processed_bill['Usage'] - - if self.regression_method == 1: - r_squared_of_fit = self.regr_feature[0][1] - regression_temp = self.regr_feature[0][2] - heating_coef = self.regr_model.coef_[0] - cooling_coef = self.regr_model.coef_[1] - hddcdd = np.array(pd.DataFrame(regression_temp).mul(list(self.processed_bill['Days In Bill']), axis=0)) - - if np.absolute(r_squared_of_fit) > 0.5: - heating_consump = np.array(hddcdd[:, 0]) * heating_coef - cooling_consump = np.array(hddcdd[:, 1]) * cooling_coef - - if self.regr_model.intercept_ < 0: - non_weather_related_consump = 0 * self.processed_bill['Days In Bill'] - else: - non_weather_related_consump = self.regr_model.intercept_ * self.processed_bill['Days In Bill'] - - if self.regression_method == 2: - regression_temp = self.regr_feature[0][2] - dhw_usage = self.regr_feature[0][3] - regression_temp_transit = [regression_temp[x][0] for x in range(len(regression_temp))] - hddcdd = np.array([[regression_temp_transit[x], 0] for x in range(len(regression_temp))]) - hddcdd = np.array(pd.DataFrame(hddcdd).mul( - list(self.processed_bill['Days In Bill']), axis=0)) - heating_coef = self.regr_model.coef_[0] - cooling_coef = 0 - heating_consump = np.array(hddcdd[:, 0]) * heating_coef - cooling_consump = np.array(hddcdd[:, 1]) * cooling_coef - - if self.regr_model.intercept_ < 0: - non_weather_related_consump = 0 * self.processed_bill['Days In Bill'] + dhw_usage - else: - non_weather_related_consump = self.regr_model.intercept_ * self.processed_bill[ - 'Days In Bill'] + dhw_usage - - - - disaggragated_bill = self.processed_bill.copy() - disaggragated_bill = self.processed_bill[[ - 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage', 'Total Charge' - ]] - disaggragated_bill['Unit Price'] = disaggragated_bill['Total Charge'] / disaggragated_bill['Usage'] - disaggragated_bill['Heating Usage'] = heating_consump - disaggragated_bill['Cooling Usage'] = cooling_consump - disaggragated_bill['Non Weather Related Usage'] = non_weather_related_consump - - self.disaggragated_bill = disaggragated_bill - - return disaggragated_bill - - - - ######### trim the bills - - def non_weahter_related_breakdown(self, end_uses, disaggragated_bill): - """ - breakdown the non_weather_related_usage - - Args: - end_uses(dictionary): key: end use - value: percentage of the end use among non-weather related usage - disaggragated_bill (pd.DataFrame): bills have been breakdown to heating, cooling, and non-weather-related-comsump - Returns: - pd.DataFrame: bill breakdown of all end-use - """ - - bill = disaggragated_bill.copy() - eu = pd.DataFrame( - list(end_uses.items()), columns=['end use', 'percentage']) - for i in range(len(eu)): - name_of_the_column = eu['end use'].iloc[i] - value_of_the_column = eu['percentage'].iloc[i] - monthly_usages[name_of_the_column] = monthly_usages[ - 'Other Usage'] * value_of_the_column - - if sum(eu['percentage']) != 1: - monthly_usages['Miscellaneous'] = monthly_usages['Other Usage'] * ( - 1 - sum(eu['percentage'])) - - return monthly_usages - - - - def disaggragated_bill_monthly(): - - - def output_to_dashboard(): - ''' - to meet the dashboard requirements for utility page - further breakdown the bill to month - - ''' - - last_bill_date = self.processed_bill['Bill To Date'].iloc[-1] - first_bill_date = self.processed_bill['Bill From Date'].iloc[0] - - billing_months = self.num_month_dates(last_bill_date, first_bill_date) - output_monthly_initial = self.output_to_month(last_bill_date, self.heating_set_point, - self.cooling_set_point, billing_months) - self.output_table_monthly = self.normalized_unit_price(self.output_table, output_monthly_initial) - self.most_recent_monthly_output = self.output_to_month(last_bill_date, - self.heating_set_point, self.cooling_set_point, 12) - - self.bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.output_table_monthly) - - self.recent_year_bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, - self.most_recent_monthly_output) - self.annual_usage = self.annual_usage_costs(self.recent_year_bill_breakdown, non_weather_related_end_use) - - - -- GitLab From 80cc2a4663abad5cb66b11b90ec748833c9a7d76 Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 7 Jun 2019 17:10:58 -0400 Subject: [PATCH 35/97] test - progress --- bpeng/bill/test.py | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/bpeng/bill/test.py b/bpeng/bill/test.py index be6ee2c..886f688 100644 --- a/bpeng/bill/test.py +++ b/bpeng/bill/test.py @@ -7,14 +7,13 @@ from scipy.optimize import minimize from datetime import timedelta from bill_cleaner import (bill_formating, bill_quality, short_bill_consolidate) -from weather_data_cal import (weather_cleaning, bill_period_weather) -from regr import (regression_1, regression_2) +from weather_data_cal import (weather_cleaning, bill_period_weather, bill_with_daily_temp) +from regr import (regression_1, regression_2, regr_temp_hddcdd) from get_test_data import (query_bill, get_weather_data) from calculater import (hdd, threshold) -from weather_related_usage_type import (determine_weather_usage_type_when_input_is_unknown) - -from setpoints_optimization import optimize_setpoints - +from setpoints_optimization import (optimize_setpoints) +from disaggragate_with_regr_matrix import(weather_ralated_breakdown, non_weahter_related_breakdown) +from normalized_monthly_bill import normalized_billing_period end_uses = {'Miscellanous': 1} raw_bill = query_bill(181794, 2) @@ -31,16 +30,36 @@ if any(i == 'short' for i in quality.flag): else: processed_bill = formatted_bill -processed_bill['temperature'] = [ - bill_period_weather(x, y, weather_data_daily) - for x, y in zip(processed_bill['Bill From Date'], - processed_bill['Bill To Date']) -] +processed_bill = bill_with_daily_temp(formatted_bill, weather_data_daily) processed_bill = processed_bill.sort_values('Bill From Date') formatted_bill = formatted_bill.sort_values('Bill From Date') formatted_bill['Unit Price'] = formatted_bill['Total Charge']/formatted_bill['Usage'] -test_results = optimize_setpoints(processed_bill) -print('hi:', test_results) +nb = normalized_billing_period(formatted_bill) +monthly_bill = nb.normailized_monthly_bill() +monthly_bill_temp = bill_with_daily_temp(monthly_bill,weather_data_daily) + + +regr_results = optimize_setpoints(processed_bill) +monthly_breakdown = weather_ralated_breakdown(regr_results, monthly_bill_temp) + +#lalalalalala +print(monthly_breakdown) + +# regr_model = regression_1(72, 300, processed_bill) +# score = regr_model.score +# print('r-squared:', score) + + +#score = regr_model.score(regression_temp, consumption) +# regr_model, score, regression_temp, bill + + +# regression_model = test_results[0][0] +# print('regr model:', regression_model) +# X = np.array([20,21,23,27]) +# prediction = regression_model.predict(X) +# print(prediction) + -- GitLab From 7ac9f5c1eddb084652c585d736a0c71c09defe09 Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 7 Jun 2019 17:15:13 -0400 Subject: [PATCH 36/97] added calculated total --- bpeng/bill/disaggragate_with_regr_matrix.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bpeng/bill/disaggragate_with_regr_matrix.py b/bpeng/bill/disaggragate_with_regr_matrix.py index 5a64bab..f06a4e6 100644 --- a/bpeng/bill/disaggragate_with_regr_matrix.py +++ b/bpeng/bill/disaggragate_with_regr_matrix.py @@ -58,6 +58,7 @@ def weather_ralated_breakdown(regr_matrix, processed_bill_any): disaggragated_bill['Heating Usage'] = heating_consump disaggragated_bill['Cooling Usage'] = cooling_consump disaggragated_bill['Non Weather Related Usage'] = non_weather_related_consump + disaggragated_bill['Calculated Total Usage'] = heating_consump + cooling_consump + non_weather_related_consump return disaggragated_bill @@ -68,8 +69,9 @@ def usage_not_related_to_weather(bill): bill['Heating Usage'] = bill['Usage'] * 0 bill['Cooling Usage'] = bill['Usage'] * 0 #attention: if in this case, other usage needs to be re-caculate - bill['Non Weather Related Usage'] = bill['Usage'] * 0 # how to deal with this one if the usage is not given + bill['Non Weather Related Usage'] = bill['Usage'] * 0 #bill['Unit Price'] = bill['Total Charge'] / bill['Usage'] + bill['Calculated Total Usage'] = bill['Usage'] * 0 return bill @staticmethod -- GitLab From 18041269ee26f378cea22943be36bcd8af37af71 Mon Sep 17 00:00:00 2001 From: Doris H Date: Tue, 11 Jun 2019 14:55:16 -0400 Subject: [PATCH 37/97] add Argu+return --- bpeng/bill/disaggragate_with_regr_matrix.py | 205 ++------------------ 1 file changed, 12 insertions(+), 193 deletions(-) diff --git a/bpeng/bill/disaggragate_with_regr_matrix.py b/bpeng/bill/disaggragate_with_regr_matrix.py index f06a4e6..a2d3dcf 100644 --- a/bpeng/bill/disaggragate_with_regr_matrix.py +++ b/bpeng/bill/disaggragate_with_regr_matrix.py @@ -12,6 +12,17 @@ from regr import (regr_temp_hddcdd) def weather_ralated_breakdown(regr_matrix, processed_bill_any): + """ + This function calculates the weather-related usage based on regression model and billing info: + + Argu: + regr_matrix (Dictionary) + processed_bill_any(pd.DataFrame): Bill From Date, Bill To Date, Days In Bills, Unit Price, temperature + + Return: + disaggrated_bill(pd.DataFrame): Bill From Date, Bill To Date, Days In Bills, Unit Price, Heating Usage, Cooling Usage + Non Weather Related Usage, Calculated Total Usage + """ heating_set_point = regr_matrix['heating_set_point'] cooling_set_point = regr_matrix['cooling_set_point'] @@ -52,9 +63,8 @@ def weather_ralated_breakdown(regr_matrix, processed_bill_any): disaggragated_bill = processed_bill_any.copy() disaggragated_bill = processed_bill_any[[ - 'Bill From Date', 'Bill To Date', 'Days In Bill' + 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Unit Price' ]] - #disaggragated_bill['Unit Price'] = disaggragated_bill['Total Charge'] / disaggragated_bill['Usage'] disaggragated_bill['Heating Usage'] = heating_consump disaggragated_bill['Cooling Usage'] = cooling_consump disaggragated_bill['Non Weather Related Usage'] = non_weather_related_consump @@ -70,7 +80,6 @@ def usage_not_related_to_weather(bill): bill['Cooling Usage'] = bill['Usage'] * 0 #attention: if in this case, other usage needs to be re-caculate bill['Non Weather Related Usage'] = bill['Usage'] * 0 - #bill['Unit Price'] = bill['Total Charge'] / bill['Usage'] bill['Calculated Total Usage'] = bill['Usage'] * 0 return bill @@ -102,195 +111,5 @@ def non_weahter_related_breakdown(end_uses, weather_disaggragated_bill): fully_disaggragated_bill = weather_disaggragated_bill.copy() return fully_disaggragated_bill -# def find_index_in_first_raw_biil(self, norm_bill_date): -# """ -# Return the index of the row of raw bill contains the bill date from a normalized bill -# """ -# for index, bill in self.formatted_bill.iterrows(): -# if bill['Bill From Date'] <= norm_bill_date < bill['Bill To Date']: -# return index -# return None - -# def days_in_raw_bill_period(self, norm_bill_date, norm_bill_date_respected_index, flag): -# """ -# Return how many days from a normalized bill within a raw bill billing period -# """ - -# if flag == 'start': -# days = (self.formatted_bill['Bill To Date'][norm_bill_date_respected_index] - norm_bill_date).days -# if flag == 'end': -# days = (norm_bill_date - self.formatted_bill['Bill From Date'][norm_bill_date_respected_index]).days -# return days - -# def weighted_unit_price(self, index_numdays): -# """ -# Return the weighted average of unit price -# """ -# weights = [] -# total_days = [] -# for ind in range(len(index_numdays)): -# unit_price = self.formatted_bill['Unit Price'][int(index_numdays[ind]['index'])] -# days_in_that_period = int(index_numdays[ind]['num_days']) -# weights.append(unit_price * days_in_that_period) -# total_days.append(days_in_that_period) -# weighted_unit_price = sum(weights)/sum(total_days) -# return weighted_unit_price - -# def find_bills_in_raw(self, norm_bill_from, norm_bill_to): -# """ -# Return the index / number of days in each raw bill billing period for a normalized billing period -# """ - -# norm_bill_days = (norm_bill_to - norm_bill_from).days -# results = [] - -# index_start = self.find_index_in_first_raw_biil(norm_bill_from) -# index_end = self.find_index_in_first_raw_biil(norm_bill_to) - -# if index_start == index_end: -# results.append({'index': index_start, 'num_days': norm_bill_days}) - -# elif index_end - index_start >= 1: -# days_in_start_period = self.days_in_raw_bill_period(norm_bill_from, index_start, 'start') -# results.append({'index': index_start, 'num_days': days_in_start_period}) -# days_in_end_period = self.days_in_raw_bill_period(norm_bill_to, index_end, 'end') -# results.append({'index': index_end, 'num_days': days_in_end_period}) - -# if index_end - index_start >= 2: -# for p in range(index_end - index_start - 1): -# days_in_period = self.formatted_bill['Days In Bill'][index_start+p+1] -# index_of_this_period = index_start+p+1 -# results.append({'index': index_of_this_period, 'num_days': days_in_period}) - -# return results - -# def normalized_unit_price(self, rawbill, mbill): -# """ -# calculate the unit price for each nomralized billing period -# """ -# normalized_unit_price = [] -# for m in range(len(mbill)): -# from_date = mbill['Bill From Date'].iloc[m] -# to_date = mbill['Bill To Date'].iloc[m] -# index_numdays = self.find_bills_in_raw(from_date, to_date) -# weighted_unit_price_for_this_month = self.weighted_unit_price(index_numdays) -# normalized_unit_price.append(weighted_unit_price_for_this_month) -# mbill['Unit Price'] = normalized_unit_price -# return mbill - -# def disaggragated_bill_monthly(self, last_date_of_bill, hp, cp, number_of_month): -# """ -# Transfrom period-wise bills to month-wise bills - -# Args: - -# last_day_of_bill(datetime): last day of bill -# hp(float): heating season indoor set point -# cp(float): cooling season indoor set point -# number_of_month(int): number of month that need to be re-format - -# Returns: - -# pd.DataFrame: result with monthly consumptions - -# """ - -# last_dates = [] -# first_dates = [] - -# lastdate = last_date_of_bill - timedelta(last_date_of_bill.day) - -# # cosntruct a new dataframe with bills from the first to last day for each month - -# for i in range(0, number_of_month): -# last_dates.append(lastdate) -# first_dates.append(lastdate.replace(day=1)) -# lastdate = first_dates[i] - timedelta(1) - -# monthly_output_table = pd.DataFrame(columns=['Bill From Date', 'Bill To Date', 'Days In Bill', -# 'Heating Usage', 'Cooling Usage', 'Non Weather Related Usage']) - -# monthly_output_table['Bill From Date'] = first_dates -# monthly_output_table['Bill To Date'] = last_dates -# monthly_output_table[ -# 'Days In Bill'] = monthly_output_table['Bill To Date'] - monthly_output_table['Bill From Date'] -# monthly_output_table[ -# 'Days In Bill'] = monthly_output_table['Days In Bill'].apply( -# lambda x: x.days) + 1 -# monthly_output_table['Month'] = monthly_output_table[ -# 'Bill From Date'].apply(lambda x: x.month) -# monthly_output_table['temperature'] = [ -# bill_period_weather(x, y) -# for x, y in zip(monthly_output_table['Bill From Date'], -# monthly_output_table['Bill To Date']) -# ] - -# hdd = [ -# list(hdd(hp, xx) for xx in x) -# for x in monthly_output_table['temperature'] -# ] -# cdd = [ -# list(cdd(cp, xx) for xx in x) -# for x in monthly_output_table['temperature'] -# ] -# monthly_hdd = np.array([np.sum(hdd[x]) for x in range(len(hdd))]) -# monthly_cdd = np.array([np.sum(cdd[x]) for x in range(len(cdd))]) - -# monthly_output_table['HDD'] = monthly_hdd -# monthly_output_table['CDD'] = monthly_cdd - -# # per_hdd = self.benchmarking_output()[4] / self.benchmarking_output()[8] -# # if np.isnan(per_hdd): -# # per_hdd = 0 -# # per_cdd = self.benchmarking_output()[5] / self.benchmarking_output()[9] -# # if np.isnan(per_cdd): -# # per_cdd = 0 -# # per_day = self.benchmarking_output()[6] / self.benchmarking_output()[ -# # 10] -# # if np.isnan(per_day): -# # per_day = 0 - -# # monthly_output_table['Heating Usage'] = monthly_output_table[ -# # 'HDD'] * per_hdd -# # monthly_output_table['Cooling Usage'] = monthly_output_table[ -# # 'CDD'] * per_cdd -# # monthly_output_table['Other Usage'] = monthly_output_table[ -# # 'Days In Bill'] * per_day - - - -# monthly_output_table['Usage'] = monthly_output_table['Heating Usage']\ -# + monthly_output_table['Cooling Usage'] + monthly_output_table['Other Usage'] - -# monthly_output = monthly_output_table[['Month', 'Bill From Date', 'Bill To Date', 'Days In Bill', -# 'Heating Usage', 'Cooling Usage', 'Other Usage', 'HDD', 'CDD']] -# monthly_output = monthly_output.sort('Bill From Date').reset_index(drop=True) -# return monthly_output - - - -# def output_to_dashboard(): -# ''' -# to meet the dashboard requirements for utility page -# further breakdown the bill to month - -# ''' - -# last_bill_date = self.processed_bill['Bill To Date'].iloc[-1] -# first_bill_date = self.processed_bill['Bill From Date'].iloc[0] - -# billing_months = self.num_month_dates(last_bill_date, first_bill_date) -# output_monthly_initial = self.output_to_month(last_bill_date, self.heating_set_point, -# self.cooling_set_point, billing_months) -# self.output_table_monthly = self.normalized_unit_price(self.output_table, output_monthly_initial) -# self.most_recent_monthly_output = self.output_to_month(last_bill_date, -# self.heating_set_point, self.cooling_set_point, 12) - -# self.bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.output_table_monthly) - -# self.recent_year_bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, -# self.most_recent_monthly_output) -# self.annual_usage = self.annual_usage_costs(self.recent_year_bill_breakdown, non_weather_related_end_use) - -- GitLab From 7e67160e4e46fa01903b7ac0cc9b40f3eea9c9a5 Mon Sep 17 00:00:00 2001 From: Doris H Date: Tue, 11 Jun 2019 14:55:33 -0400 Subject: [PATCH 38/97] add normalized price --- bpeng/bill/normalized_monthly_bill.py | 59 ++++++++++++++------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/bpeng/bill/normalized_monthly_bill.py b/bpeng/bill/normalized_monthly_bill.py index a43fade..8bc1989 100644 --- a/bpeng/bill/normalized_monthly_bill.py +++ b/bpeng/bill/normalized_monthly_bill.py @@ -37,20 +37,6 @@ class normalized_billing_period(): days = (norm_bill_date - self.formatted_bill['Bill From Date'][norm_bill_date_respected_index]).days return days - def weighted_unit_price(self, index_numdays): - """ - Return the weighted average of unit price - """ - weights = [] - total_days = [] - for ind in range(len(index_numdays)): - unit_price = self.formatted_bill['Unit Price'][int(index_numdays[ind]['index'])] - days_in_that_period = int(index_numdays[ind]['num_days']) - weights.append(unit_price * days_in_that_period) - total_days.append(days_in_that_period) - weighted_unit_price = sum(weights)/sum(total_days) - return weighted_unit_price - def find_bills_in_raw(self, norm_bill_from, norm_bill_to): """ Return the index / number of days in each raw bill billing period for a normalized billing period @@ -79,20 +65,6 @@ class normalized_billing_period(): return results - def normalized_unit_price(self, rawbill, mbill): - """ - calculate the unit price for each nomralized billing period - """ - normalized_unit_price = [] - for m in range(len(mbill)): - from_date = mbill['Bill From Date'].iloc[m] - to_date = mbill['Bill To Date'].iloc[m] - index_numdays = self.find_bills_in_raw(from_date, to_date) - weighted_unit_price_for_this_month = self.weighted_unit_price(index_numdays) - normalized_unit_price.append(weighted_unit_price_for_this_month) - mbill['Unit Price'] = normalized_unit_price - return mbill - @staticmethod def num_month_dates(last_date_bill, first_date_bill): @@ -144,4 +116,35 @@ class normalized_billing_period(): lambda x: x.days) + 1 normalized_monthly_bill['Month'] = normalized_monthly_bill[ 'Bill From Date'].apply(lambda x: x.month) + return normalized_monthly_bill + + + + def normalized_unit_price(self, rawbill, mbill): + """ + calculate the unit price for each nomralized billing period + """ + normalized_unit_price = [] + for m in range(len(mbill)): + from_date = mbill['Bill From Date'].iloc[m] + to_date = mbill['Bill To Date'].iloc[m] + index_numdays = self.find_bills_in_raw(from_date, to_date) + weighted_unit_price_for_this_month = self.weighted_unit_price(index_numdays) + normalized_unit_price.append(weighted_unit_price_for_this_month) + mbill['Unit Price'] = normalized_unit_price + return mbill + + def weighted_unit_price(self, index_numdays): + """ + Return the weighted average of unit price + """ + weights = [] + total_days = [] + for ind in range(len(index_numdays)): + unit_price = self.formatted_bill['Unit Price'][int(index_numdays[ind]['index'])] + days_in_that_period = int(index_numdays[ind]['num_days']) + weights.append(unit_price * days_in_that_period) + total_days.append(days_in_that_period) + weighted_unit_price = sum(weights)/sum(total_days) + return weighted_unit_price -- GitLab From 83361102e9675306ec91d84137bd520f3996479e Mon Sep 17 00:00:00 2001 From: Doris H Date: Tue, 11 Jun 2019 16:44:37 -0400 Subject: [PATCH 39/97] remove staticmethod --- bpeng/bill/disaggragate_with_regr_matrix.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bpeng/bill/disaggragate_with_regr_matrix.py b/bpeng/bill/disaggragate_with_regr_matrix.py index a2d3dcf..648f36c 100644 --- a/bpeng/bill/disaggragate_with_regr_matrix.py +++ b/bpeng/bill/disaggragate_with_regr_matrix.py @@ -83,7 +83,6 @@ def usage_not_related_to_weather(bill): bill['Calculated Total Usage'] = bill['Usage'] * 0 return bill -@staticmethod def non_weahter_related_breakdown(end_uses, weather_disaggragated_bill): """ breakdown the non_weather_related_usage -- GitLab From b01142ea8713a1445df2ebc0a9c51496ca26252e Mon Sep 17 00:00:00 2001 From: Doris H Date: Tue, 11 Jun 2019 16:44:49 -0400 Subject: [PATCH 40/97] update driver main fucntion --- bpeng/bill/driver.py | 509 +++++++++++-------------------------------- 1 file changed, 131 insertions(+), 378 deletions(-) diff --git a/bpeng/bill/driver.py b/bpeng/bill/driver.py index cd8bd31..27dfca8 100644 --- a/bpeng/bill/driver.py +++ b/bpeng/bill/driver.py @@ -1,390 +1,143 @@ -#%% -"""This file calcuate bill disagregation for multifamily buildings""" +"""This is the driver file to use this library to calculate the bill disaggragation results for BlocPower Dashboard""" -import warnings -from datetime import timedelta -import numpy as np import pandas as pd -from dateutil import relativedelta +import numpy as np +import datetime as datetime from scipy.optimize import minimize -from sklearn import linear_model - -warnings.simplefilter('ignore') - -from get_test_data import (get_weather_data,query_bill) -from bill_cleaner import (bill_formating,bill_quality,short_bill_consolidate) -from weather_data_cal import (weather_cleaning, bill_period_weather) - - -class BillDisaggregation(): - """ - Class for Bill Disaggregation - - Args: - - bill (pd.DataFrame): raw bill (from parsing) (File like object) - First row (besides column name) are the names - 'Bill From Date', 'Bill To Date', 'Days in Bill', 'Usage' must be in names - daily_temp (pd.DataFrame): daily temperature (File like object) - - Attributes: - - output: list - total heating load - heating load for the first year (first 12 bill periods) - heating load of each month - (return NaN if R-squared is low) - """ - - def __init__(self, bill, raw_weather_data_daily, account_info): - - # self.account_info = account_info - # self.bill = bill - self.raw_weather_data_daily = raw_weather_data_daily - # self.processed_bill = None - self.weather_data_daily = None - # self.usage = None - # self.regression_method = None - # self.heating_comsuption_pred = None - # self.cooling_comsuption_pred = None - # self.others_comsuption_pred = None - # self.regr_model = None - # self.r_squared_of_fit = None - # self.heating_set_point = None - # self.cooling_set_point = None - # self.days_in_bills = None - # self.output_table = None - # self.output_table_monthly = None - # self.most_recent_monthly_output = None - # self.avg_unit_price = None - # self.bill_breakdown = None - # self.recent_year_bill_breakdown = None - # self.annual_usage = None - # self.formatted_bill = None - - def optimize_setpoints(self, non_weather_related_end_use={'Miscellaneous': 1}, weather_related_usage='Unknown'): +from datetime import timedelta +from get_test_data import (query_bill, get_weather_data) + +from bill_cleaner import (bill_formating, bill_quality, short_bill_consolidate) +from weather_data_cal import (weather_cleaning, bill_period_weather, bill_with_daily_temp) +from regr import (regression_1, regression_2, regr_temp_hddcdd) +from calculater import (hdd, threshold) +from setpoints_optimization import (optimize_setpoints) +from disaggragate_with_regr_matrix import(weather_ralated_breakdown, non_weahter_related_breakdown) +from normalized_monthly_bill import normalized_billing_period + + +class bill_disaggragate(): + + def __init__(self, bill, daily_temp, end_uses): + self.bill = bill + self.daily_temp = daily_temp + self.end_uses = end_uses + self.output_normalized_monthly_bill = None + self.annual_normalized_monthly_bill = None + self.regr_results = None + self.annual_usage_costs_summary = None + + def main(self): + + weather_data_daily = weather_cleaning(self.daily_temp) + formatted_bill, shape = bill_formating(self.bill) # pylint: disable=unused-variable + quality = bill_quality(formatted_bill) + + if any(i == 'short' for i in quality.flag): + processed_bill = short_bill_consolidate( + formatted_bill, quality) + else: + processed_bill = formatted_bill + + processed_bill = bill_with_daily_temp(formatted_bill, weather_data_daily) + + processed_bill = processed_bill.sort_values('Bill From Date') + formatted_bill = formatted_bill.sort_values('Bill From Date') + formatted_bill['Unit Price'] = formatted_bill['Total Charge']/formatted_bill['Usage'] + + nb = normalized_billing_period(formatted_bill) + monthly_bill = nb.normailized_monthly_bill() + monthly_bill_temp = bill_with_daily_temp(monthly_bill, weather_data_daily) + + monthly_bill_with_price = nb.normalized_unit_price(formatted_bill, monthly_bill_temp) + + self.regr_results = optimize_setpoints(processed_bill) + monthly_breakdown = weather_ralated_breakdown(self.regr_results, monthly_bill_with_price) + self.output_normalized_monthly_bill = non_weahter_related_breakdown(self.end_uses, monthly_breakdown).sort_values('Bill From Date').reset_index(drop=True) + self.annual_normalized_monthly_bill = self.output_normalized_monthly_bill[-12:].reset_index + self.annual_usage_costs_summary = self.annual_usage_costs(self.annual_normalized_monthly_bill, self.end_uses) + + return + + def annual_usage_costs(self, annual_bill_breakdown, end_uses): """ - Main function for the optimization and disaggregation + Calcuate annual usage and costs for each end use Args: + annual_bill_breakdown(pd.DataFrame): the output from non-weather-related usage breakdown + end_uses(dictionary): key: end use + value: percentage of the end use among non-weather related usage - usage (str): - Specify if the weather - related consumption is for heating or cooling - 'Unknown': no prior knowledge - 'Heating': only for heating - 'Cooling': only for cooling - 'Both': for both heating and cooling - 'Both Not': not for heating or cooling - default 'Unknown' + Return: + pd.DataFrame: annual usage, costs for each end uses """ + annual_usage_costs_sum = pd.DataFrame(columns=['End Use', 'Usage', 'Costs']) + avg_price = 0.9 + #sum(annual_bill_breakdown['Calculated Total Usage'] * annual_bill_breakdown['Unit Price'])/sum(annual_bill_breakdown['Calculated Total Usage']) - # # get the bill ready - # self.weather_data_daily = weather_cleaning(raw_weather_data_daily) - - # formatted_bill, shape = bill_formating(raw_bill) # pylint: disable=unused-variable - # quality = bill_quality(formatted_bill) - - - # if any(i == 'short' for i in quality.flag): - # processed_bill = short_bill_consolidate( - # formatted_bill, quality) - # else: - # processed_bill = formatted_bill - - # processed_bill['temperature'] = [ - # bill_period_weather(x, y, weather_data_daily) - # for x, y in zip(processed_bill['Bill From Date'], - # processed_bill['Bill To Date']) - # ] - - # processed_bill = processed_bill.sort_values('Bill From Date') - # formatted_bill = formatted_bill.sort_values('Bill From Date') - # formatted_bill['Unit Price'] = formatted_bill['Total Charge'] / formatted_bill['Usage'] - - - - # self.daily_temp = self.weather_cleaning(self.raw_daily_temp) - # formatted_bill, shape = self.bill_formating(self.bill) # pylint: disable=unused-variable - # quality = self.bill_quality(formatted_bill) - - # if any(i == 'short' for i in quality.flag): - # # any(quality.flag.astype(str) == 'long') - # self.processed_bill = self.short_bill_consolidate( - # formatted_bill, quality) - # else: - # self.processed_bill = formatted_bill - - # self.processed_bill['temperature'] = [ - # self.bill_period_weather(x, y) - # for x, y in zip(self.processed_bill['Bill From Date'], - # self.processed_bill['Bill To Date']) - # ] - - # self.processed_bill = self.processed_bill.sort_values('Bill From Date') - - # formatted_bill = formatted_bill.sort_values('Bill From Date') - # formatted_bill['Unit Price'] = formatted_bill['Total Charge'] / formatted_bill['Usage'] - # self.formatted_bill = formatted_bill - - regression_method = 1 - - if weather_related_usage == 'Unknown': - opt = minimize( - lambda x: -self.regression_1(x[0], x[1], self.processed_bill)[1], - (65, 65), - method='nelder-mead', - options={'xtol': 1e-2, - 'disp': False}) - - regr = self.regression_1(opt.x[0], opt.x[1], self.processed_bill) - regr_model = regr[0] - heating_coef, cooling_coef = regr_model.coef_ - - if -opt.fun > 0.5: - if (heating_coef > 0) and (cooling_coef <= 0): - weather_related_usage = 'Heating' - elif (heating_coef <= 0) and (cooling_coef > 0): - weather_related_usage = 'Cooling' - elif (heating_coef <= 0) and (cooling_coef <= 0): - weather_related_usage = 'Both Not' - elif (heating_coef >= 0) and (cooling_coef >= 0): - weather_related_usage = 'Both' - else: - weather_related_usage = 'Both Not' - - if weather_related_usage == 'Both': - opt = minimize( - lambda x: -self.regression_1(x[0], x[1], self.processed_bill)[1], - (65, 65), - method='nelder-mead', - options={'xtol': 1e-2, - 'disp': False}) - - regr = self.regression_1(opt.x[0], opt.x[1], self.processed_bill) - regr_model = regr[0] - heating_coef, cooling_coef = regr_model.coef_ - hddcdd = regr[2] - heating_set_point = opt.x[0] - cooling_set_point = opt.x[1] - - # change accordingly for JOENYC buildings - - if (heating_coef > 0) and (cooling_coef < 0): - weather_related_usage = 'Heating' - cooling_coef = 0 - elif (heating_coef <= 0) and (cooling_coef > 0): - weather_related_usage = 'Cooling' - heating_coef = 0 - elif (heating_coef <= 0) and (cooling_coef <= 0): - weather_related_usage = 'Both Not' - heating_coef = 0 - cooling_coef = 0 - - # changes on Jan 17th 2018 - # please futher check with more bills - - elif (heating_coef > 0) and (cooling_coef > 0): - if heating_coef / cooling_coef > 5: - weather_related_usage = 'Heating' - cooling_coef = 0 - else: - # set the range of heating set point or cooling point - - if round(heating_set_point) in range( - 60, 95) and round(cooling_set_point) in range( - 55, 75): - weather_related_usage = 'Both' - heating_coef = heating_coef - cooling_coef = cooling_coef - - else: - # using standard seting points to check the bill - regr = self.regression_1(72, 65, self.processed_bill) - regr_model = regr[0] - heating_coef, cooling_coef = regr_model.coef_ - hddcdd = regr[2] - heating_set_point = opt.x[0] - cooling_set_point = opt.x[1] - - if (heating_coef > 0) and (cooling_coef < 0): - weather_related_usage = 'Heating' - cooling_coef = 0 - elif (heating_coef <= 0) and (cooling_coef > 0): - weather_related_usage = 'Cooling' - heating_coef = 0 - elif (heating_coef <= 0) and (cooling_coef <= 0): - weather_related_usage = 'Both Not' - heating_coef = 0 - cooling_coef = 0 - elif (heating_coef > 0) and (cooling_coef > 0): - if heating_coef / cooling_coef > 5: - weather_related_usage = 'Heating' - cooling_coef = 0 - else: - weather_related_usage = 'Both' - - if weather_related_usage == 'Heating': - opt_1 = minimize( - lambda x: -self.regression_1(x, 300, self.processed_bill)[1], - 65, - method='nelder-mead', - options={'xtol': 1e-2, - 'disp': False}) - - opt_2 = minimize( - lambda x: -self.summer_dhw(x, self.processed_bill)[1], - 65, - method='nelder-mead', - options={'xtol': 1e-2, - 'disp': False}) - - if (-opt_2.fun > 0.9) and (round(opt_2.x[0]) in range(64, 85)): - opt = opt_2 - heating_set_point = opt.x[0] - cooling_set_point = np.NaN - regr = self.summer_dhw(opt.x[0], self.processed_bill) - regr_model = regr[0] - hdd = regr[2] - hdd_transit = [hdd[x][0] for x in range(len(hdd))] - hddcdd = np.array([[hdd_transit[x], 0] - for x in range(len(hdd))]) - regression_method = 2 - else: - if round(opt_1.x[0]) in range(60, 95): - opt = opt_1 - heating_set_point = opt.x[0] - cooling_set_point = np.NaN - regr = self.regression_1(heating_set_point, 300, - self.processed_bill) - regr_model = regr[0] - hddcdd = regr[2] - heating_coef = regr_model.coef_ - cooling_coef = 0 - else: - # legit heating set-point 72'F - heating_set_point = 72 - cooling_set_point = np.NaN - regr = self.regression_1(heating_set_point, 300, - self.processed_bill) - - regr_model = regr[0] - hddcdd = regr[2] - heating_coef = regr_model.coef_ - cooling_coef = 0 - - if weather_related_usage == 'Cooling': - opt = minimize( - lambda x: -self.regression_1(x, 300, self.processed_bill)[1], - 65, - method='nelder-mead', - options={'xtol': 1e-2, - 'disp': False}) - regr = self.regression_1(opt.x[0], 300, self.processed_bill) - regr_model = regr[0] - hddcdd = regr[2] - cooling_set_point = opt.x[0] - heating_set_point = np.NaN - - self.usage = weather_related_usage - - if self.usage == 'Both Not': - self.heating_consumption_pred = self.processed_bill['Usage'] * 0 - self.cooling_consumption_pred = self.processed_bill['Usage'] * 0 - self.others_consumption_pred = self.processed_bill['Usage'] - self.regression_method = 0 - self.hddcdd = np.zeros((len(self.processed_bill), 2)) - cooling_set_point = np.NaN - heating_set_point = np.NaN + end_use = ['Heating Usage', 'Cooling Usage'] - else: - self.regression_method = regression_method - - if self.regression_method == 1: - - self.hddcdd = np.array( - pd.DataFrame(hddcdd).mul( - list(self.processed_bill['Days In Bill']), axis=0)) - - if regr[1] > 0.5: - self.regr_model = regr_model - self.heating_consumption_pred = np.array( - self.hddcdd[:, 0]) * regr_model.coef_[0] - self.cooling_consumption_pred = np.array( - self.hddcdd[:, 1]) * regr_model.coef_[1] - - if self.regr_model.intercept_ < 0: - self.others_consumption_pred = 0 * self.processed_bill[ - 'Days In Bill'] - else: - self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill[ - 'Days In Bill'] - - # real_sum = np.array(self.processed_bill['Usage']) - # predict_sum = self.heating_consumption_pred + self.cooling_consumption_pred + \ - # self.others_consumption_pred - - # diff = real_sum - predict_sum - - else: - self.heating_consumption_pred = self.processed_bill[ - 'Days In Bill'] * 0 - self.cooling_consumption_pred = self.processed_bill[ - 'Days In Bill'] * 0 - self.others_consumption_pred = self.processed_bill['Usage'] - self.regression_method = 0 - self.hddcdd = np.zeros((len(self.processed_bill), 2)) - cooling_set_point = np.NaN - heating_set_point = np.NaN - self.usage = 'Both Not' - - elif self.regression_method == 2: - self.hddcdd = np.array( - pd.DataFrame(hddcdd).mul( - list(self.processed_bill['Days In Bill']), axis=0)) - - self.regr_model = regr_model - self.heating_consumption_pred = np.array( - self.hddcdd[:, 0]) * self.regr_model.coef_[0] - self.cooling_consumption_pred = np.array(self.hddcdd[:, 0]) * 0 - - if self.regr_model.intercept_ < 0: - self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + regr[3]['dhw'] - else: - self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill']\ - + regr[3]['dhw'] - - bill_cp = self.processed_bill.copy() - bill_cp = self.processed_bill[[ - 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage', 'Total Charge' - ]] - bill_cp['Unit Price'] = bill_cp['Total Charge'] / bill_cp['Usage'] - bill_cp['Heating Usage'] = self.heating_consumption_pred - bill_cp['Cooling Usage'] = self.cooling_consumption_pred - bill_cp['Other Usage'] = self.others_consumption_pred - - if self.usage == 'Both Not': - self.r_squared_of_fit = 0 - else: - self.r_squared_of_fit = regr[1] - - self.heating_set_point = heating_set_point - self.cooling_set_point = cooling_set_point - self.output_table = bill_cp - - last_bill_date = self.processed_bill['Bill To Date'].iloc[-1] - first_bill_date = self.processed_bill['Bill From Date'].iloc[0] - - billing_months = self.num_month_dates(last_bill_date, first_bill_date) - output_monthly_initial = self.output_to_month(last_bill_date, self.heating_set_point, - self.cooling_set_point, billing_months) - self.output_table_monthly = self.normalized_unit_price(self.output_table, output_monthly_initial) - self.most_recent_monthly_output = self.output_to_month(last_bill_date, - self.heating_set_point, self.cooling_set_point, 12) - self.bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.output_table_monthly) - self.recent_year_bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, - self.most_recent_monthly_output) - self.annual_usage = self.annual_usage_costs(self.recent_year_bill_breakdown, non_weather_related_end_use) - -# self.avg_unit_price = (sum(bill_formatted['Total Charge'])) / ( -# sum(bill_formatted['Usage'])) + eu = pd.DataFrame( + list(end_uses.items()), columns=['end use', 'percentage']) + + if sum(eu['percentage']) != 1: + eu = eu.append({'end use':'Miscellaneous', 'percentage': (1 - sum(eu['percentage']))}) + + for i in range(len(eu)): + name_of_end_use = eu['end use'].iloc[i] + end_use.append(name_of_end_use) + + annual_usage_costs_sum['End Use'] = end_use + + for j in range(len(annual_usage_costs_sum)): + temp = annual_bill_breakdown[annual_usage_costs_sum['End Use'].iloc[j]] + temp_usage = sum(temp) + annual_usage_costs_sum['Usage'].iloc[j] = temp_usage + + annual_usage_costs_sum['Costs'] = annual_usage_costs_sum['Usage'] * avg_price + + return annual_usage_costs_sum + + def to_json(self, period='bill_breakdown'): + """ + Output in json file + + Args: + + period (str): 'bill_breakdown' for bill breakdown with non-weather realted end uses + 'bill' for monthly out put for bill with only weather related breakdown + default 'bill_breakdown' + + Returns: + + json: output in json format + + """ + + if period == 'bill_breakdown': + return self.bill_breakdown.to_json(orient="records", date_format="iso") + + return self.output_table_monthly.to_json(orient="records", date_format="iso") + + def to_dict(self, period='bill_breakdown'): + """ + Output in dictionary file + + Args: + + period (str): 'bill_breakdown' for bill breakdown with non-weather realted end uses + 'bill' for monthly out put for bill with only weather related breakdown + default 'bill_breakdown' + + Returns: + + json: output in json format + + """ + + if period == 'bill_breakdown': + return self.bill_breakdown.to_dict(orient="records") + + return self.output_table_monthly.to_dict(orient="records") -- GitLab From a0d7dd34d13d401d7edd39ade7ba1f7878563bfa Mon Sep 17 00:00:00 2001 From: Doris H Date: Tue, 11 Jun 2019 16:45:06 -0400 Subject: [PATCH 41/97] delete unused mehtod --- bpeng/bill/weather_related_usage_type.py | 35 ------------------------ 1 file changed, 35 deletions(-) delete mode 100644 bpeng/bill/weather_related_usage_type.py diff --git a/bpeng/bill/weather_related_usage_type.py b/bpeng/bill/weather_related_usage_type.py deleted file mode 100644 index 1717804..0000000 --- a/bpeng/bill/weather_related_usage_type.py +++ /dev/null @@ -1,35 +0,0 @@ - -# import pandas as pd -# import numpy as np -# import datetime as datetime -# from scipy.optimize import minimize -# from regr import regression_1 - -# def determine_weather_usage_type_when_input_is_unknown(processed_bill): -# ''' -# This function is to determine the weather_related_usage_type when the input is unknown -# ''' -# opt = minimize( -# lambda x: -regression_1(x[0], x[1], processed_bill)[1], -# (65, 65), -# method='nelder-mead', -# options={'xtol': 1e-2, -# 'disp': False}) - -# regr_temp = regression_1(opt.x[0], opt.x[1], processed_bill) -# regr_model = regr_temp[0] -# heating_coef, cooling_coef = regr_model.coef_ - -# if -opt.fun > 0.5: -# if (heating_coef > 0) and (cooling_coef <= 0): -# weather_related_usage = 'Heating' -# elif (heating_coef <= 0) and (cooling_coef > 0): -# weather_related_usage = 'Cooling' -# elif (heating_coef <= 0) and (cooling_coef <= 0): -# weather_related_usage = 'Both Not' -# elif (heating_coef >= 0) and (cooling_coef >= 0): -# weather_related_usage = 'Both' -# else: -# weather_related_usage = 'Both Not' - -# return weather_related_usage -- GitLab From 6f3f1545c4f9b4141f62a630a45fa61e8c07f5ec Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 12 Jun 2019 11:19:30 -0400 Subject: [PATCH 42/97] update --- bpeng/bill/driver.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/bpeng/bill/driver.py b/bpeng/bill/driver.py index 27dfca8..1cac577 100644 --- a/bpeng/bill/driver.py +++ b/bpeng/bill/driver.py @@ -54,7 +54,7 @@ class bill_disaggragate(): self.regr_results = optimize_setpoints(processed_bill) monthly_breakdown = weather_ralated_breakdown(self.regr_results, monthly_bill_with_price) self.output_normalized_monthly_bill = non_weahter_related_breakdown(self.end_uses, monthly_breakdown).sort_values('Bill From Date').reset_index(drop=True) - self.annual_normalized_monthly_bill = self.output_normalized_monthly_bill[-12:].reset_index + self.annual_normalized_monthly_bill = self.output_normalized_monthly_bill[-12:].reset_index(drop=True) self.annual_usage_costs_summary = self.annual_usage_costs(self.annual_normalized_monthly_bill, self.end_uses) return @@ -74,20 +74,14 @@ class bill_disaggragate(): """ annual_usage_costs_sum = pd.DataFrame(columns=['End Use', 'Usage', 'Costs']) - avg_price = 0.9 - #sum(annual_bill_breakdown['Calculated Total Usage'] * annual_bill_breakdown['Unit Price'])/sum(annual_bill_breakdown['Calculated Total Usage']) - - end_use = ['Heating Usage', 'Cooling Usage'] - - eu = pd.DataFrame( - list(end_uses.items()), columns=['end use', 'percentage']) - - if sum(eu['percentage']) != 1: - eu = eu.append({'end use':'Miscellaneous', 'percentage': (1 - sum(eu['percentage']))}) - - for i in range(len(eu)): - name_of_end_use = eu['end use'].iloc[i] - end_use.append(name_of_end_use) + annual_bill_breakdown['Costs'] = annual_bill_breakdown['Calculated Total Usage'] * annual_bill_breakdown['Unit Price'] + avg_price = sum(annual_bill_breakdown['Costs']) /sum(annual_bill_breakdown['Calculated Total Usage']) + end_use = list(annual_bill_breakdown.columns) + unwanted_column = ['Bill From Date', 'Bill To Date', 'Days In Bill','Unit Price', 'Non Weather Related Usage', \ + 'Calculated Total Usage','Costs'] + + for elem in unwanted_column: + end_use.remove(elem) annual_usage_costs_sum['End Use'] = end_use -- GitLab From 5594c32d7b51358cf3bad62a6f5c2da3306442d6 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 12 Jun 2019 11:19:42 -0400 Subject: [PATCH 43/97] new test using driver --- bpeng/bill/test.py | 94 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 66 insertions(+), 28 deletions(-) diff --git a/bpeng/bill/test.py b/bpeng/bill/test.py index 886f688..c39b52e 100644 --- a/bpeng/bill/test.py +++ b/bpeng/bill/test.py @@ -5,48 +5,86 @@ import numpy as np import datetime as datetime from scipy.optimize import minimize from datetime import timedelta - -from bill_cleaner import (bill_formating, bill_quality, short_bill_consolidate) -from weather_data_cal import (weather_cleaning, bill_period_weather, bill_with_daily_temp) -from regr import (regression_1, regression_2, regr_temp_hddcdd) from get_test_data import (query_bill, get_weather_data) -from calculater import (hdd, threshold) -from setpoints_optimization import (optimize_setpoints) -from disaggragate_with_regr_matrix import(weather_ralated_breakdown, non_weahter_related_breakdown) -from normalized_monthly_bill import normalized_billing_period +from driver import bill_disaggragate -end_uses = {'Miscellanous': 1} +#Inputs +end_uses = {'DHW': 0.8} raw_bill = query_bill(181794, 2) raw_weather_data_daily = get_weather_data() weather_related_usage_init = 'Unknown' -weather_data_daily = weather_cleaning(raw_weather_data_daily) -formatted_bill, shape = bill_formating(raw_bill) # pylint: disable=unused-variable -quality = bill_quality(formatted_bill) -if any(i == 'short' for i in quality.flag): - processed_bill = short_bill_consolidate( - formatted_bill, quality) -else: - processed_bill = formatted_bill +bd = bill_disaggragate(raw_bill, raw_weather_data_daily, end_uses) +bd.main() +most_recent_year_bill = bd.annual_normalized_monthly_bill +column_name = most_recent_year_bill.columns +r_squared = bd.regr_results['regr_output'][1] + +print('r-squared-fit:', r_squared) +print(bd.annual_usage_costs_summary) + +# import pandas as pd +# end_uses = {'a': 0.1, 'b': 0.2} +# eu = pd.DataFrame(list(end_uses.items()), columns=['end use', 'percentage']) +# print('sum', sum(eu['percentage'])) +# eu = eu.append({'end use': 'Misc', 'percentage': 0.7}, ignore_index = True) +# print(eu) + +# #Bill Analysis +# weather_data_daily = weather_cleaning(raw_weather_data_daily) +# formatted_bill, shape = bill_formating(raw_bill) # pylint: disable=unused-variable +# quality = bill_quality(formatted_bill) + +# if any(i == 'short' for i in quality.flag): +# processed_bill = short_bill_consolidate( +# formatted_bill, quality) +# else: +# processed_bill = formatted_bill + +# processed_bill = bill_with_daily_temp(formatted_bill, weather_data_daily) + +# processed_bill = processed_bill.sort_values('Bill From Date') +# formatted_bill = formatted_bill.sort_values('Bill From Date') +# formatted_bill['Unit Price'] = formatted_bill['Total Charge']/formatted_bill['Usage'] + + +# nb = normalized_billing_period(formatted_bill) +# monthly_bill = nb.normailized_monthly_bill() +# monthly_bill_temp = bill_with_daily_temp(monthly_bill, weather_data_daily) + +# monthly_bill_with_price = nb.normalized_unit_price(formatted_bill, monthly_bill) + + +# regr_results = optimize_setpoints(processed_bill) +# monthly_breakdown = weather_ralated_breakdown(regr_results, monthly_bill_temp) +# monthly_breakdown_full = non_weahter_related_breakdown(end_uses, monthly_breakdown).sort_values('Bill From Date').reset_index(drop=True) + +# #lalalalalala +# print(monthly_breakdown_full[-12:]) + + + + + + + + + + + + + + + + -processed_bill = bill_with_daily_temp(formatted_bill, weather_data_daily) -processed_bill = processed_bill.sort_values('Bill From Date') -formatted_bill = formatted_bill.sort_values('Bill From Date') -formatted_bill['Unit Price'] = formatted_bill['Total Charge']/formatted_bill['Usage'] -nb = normalized_billing_period(formatted_bill) -monthly_bill = nb.normailized_monthly_bill() -monthly_bill_temp = bill_with_daily_temp(monthly_bill,weather_data_daily) -regr_results = optimize_setpoints(processed_bill) -monthly_breakdown = weather_ralated_breakdown(regr_results, monthly_bill_temp) -#lalalalalala -print(monthly_breakdown) # regr_model = regression_1(72, 300, processed_bill) # score = regr_model.score -- GitLab From d3a10b1db76a4bd4116af57418e416c322bfc8ad Mon Sep 17 00:00:00 2001 From: Aizizi Yigaimu Date: Wed, 12 Jun 2019 13:50:33 -0400 Subject: [PATCH 44/97] Refactor bills code --- bpeng/bill/bill_cleaner.py | 343 +++++++++++++++++-------------------- bpeng/bill/driver.py | 27 +-- bpeng/bill/test.py | 12 +- 3 files changed, 184 insertions(+), 198 deletions(-) diff --git a/bpeng/bill/bill_cleaner.py b/bpeng/bill/bill_cleaner.py index 9e3decc..5e66fd4 100644 --- a/bpeng/bill/bill_cleaner.py +++ b/bpeng/bill/bill_cleaner.py @@ -1,191 +1,168 @@ - - # from datetime import timedelta import numpy as np import pandas as pd from calculater import (outliers_iqr) -def bill_formating(raw_bill): - """ - Bill Cleaning - Step 1: - 1. format each column of the raw bill - 2. drop NAN / duplicates - - Args: - raw_bill (pd.DataFrame): a raw bill with columns of - 'Bill From Date' - 'Bill To Date' - 'Days In Bill' - 'Usage' - 'Total Charge' - Returns: - pd.DataFrame: a formatted bill - boolean: True - Length of the bill has changed during bill cleaning step 1 - """ - bill_copy = raw_bill.copy() - bill_copy['Bill From Date'] = pd.to_datetime( - bill_copy['Bill From Date']) - bill_copy['Bill From Date'] = bill_copy['Bill From Date'].apply(lambda x: '/'.join([str(x.date().month), +class Bill: + + def __init__(self, raw_bill): + """ + Args: + self.raw_bill (pd.DataFrame): a raw bill with columns of + 'Bill From Date' + 'Bill To Date' + 'Days In Bill' + 'Usage' + 'Total Charge' + """ + self.raw_bill = raw_bill + self.formatted_bill = None + self.shape_change = False + self.quality_metric = None + self.bill_consi = None + + def formating(self): + """ + Bill Cleaning + Step 1: + 1. format each column of the raw bill + 2. drop NAN / duplicates + Returns: + pd.DataFrame: a formatted bill + boolean: True - Length of the bill has changed during bill cleaning step 1 + """ + bill_copy = self.raw_bill.copy() + bill_copy['Bill From Date'] = pd.to_datetime( + bill_copy['Bill From Date']) + bill_copy['Bill From Date'] = bill_copy['Bill From Date'].apply(lambda x: '/'.join([str(x.date().month), + str(x.date().day), + str(x.date().year)])) + bill_copy['Bill To Date'] = pd.to_datetime(bill_copy['Bill To Date']) + bill_copy['Bill To Date'] = bill_copy['Bill To Date'].apply(lambda x: '/'.join([str(x.date().month), str(x.date().day), str(x.date().year)])) - bill_copy['Bill To Date'] = pd.to_datetime(bill_copy['Bill To Date']) - bill_copy['Bill To Date'] = bill_copy['Bill To Date'].apply(lambda x: '/'.join([str(x.date().month), - str(x.date().day), - str(x.date().year)])) - bill_copy = bill_copy[[ - 'Bill From Date', 'Bill To Date', 'Usage', 'Days In Bill', - 'Total Charge' - ]] - - bill_copy1 = bill_copy[pd.to_datetime(bill_copy['Bill To Date']) > - pd.to_datetime(bill_copy['Bill From Date'])] - bill_copy1['Bill From Date'] = pd.to_datetime( - bill_copy1['Bill From Date']) - bill_copy2 = bill_copy1.sort_values('Bill From Date') - bill_copy2['Bill To Date'] = pd.to_datetime(bill_copy2['Bill To Date']) - bill_copy2 = bill_copy2.dropna() - bill_copy2 = bill_copy2.drop_duplicates() - bill_copy2 = bill_copy2.reset_index(drop=True) - - if np.array(bill_copy2.shape)[0] == np.array(raw_bill.shape)[0]: - bill_shape_change = 'False' - bill_shape_change = 'True' - bill_formatted = bill_copy2 - return bill_formatted, bill_shape_change - - -def bill_quality(bill_formatted): - """ - Bill Cleaning - Step 2: - 1. Check each billing period to find a bill is too short or too long; - Args: - bill_formatted (pd.DataFrame): formatted bill from Bill Cleaning Step 1 - Returns: - pd.DataFrame: a dataframe with columns: - 'index': the index of the billing period which is identified as an outlier - 'flag': to indicate either it is too long or too short - """ - - bill = bill_formatted.copy() - bill = pd.DataFrame(bill) - days_in_bill = np.array(bill['Days In Bill']) - - # abnormal days in bill will return False - days_quality_index_inti = outliers_iqr(list(days_in_bill)) - days_quality_index_recheck = np.array( - [x not in range(25, 35) for x in days_in_bill]) - days_quality_index = list( - np.array(days_quality_index_inti) * - np.array(days_quality_index_recheck)) - - days_abn_index = [] - for x in range(len(days_quality_index)): - if days_quality_index[x]: - days_abn_index.append(x) - - bill_quality_metric = pd.DataFrame(data=days_abn_index, columns=['index']) - - flag = [] - for billing_date_index in range(len(days_abn_index)): - point_index = days_abn_index[billing_date_index - 1] - if days_in_bill[point_index] < days_in_bill.mean(): - flag.append('short') - elif days_in_bill[point_index] >= days_in_bill.mean(): - flag.append('long') - - bill_quality_metric['flag'] = np.array(flag) - return bill_quality_metric - - -def short_bill_consolidate(bill_formatted, bill_quality_metric): - """ - Bill Cleaning - Step 3: - consolidation of the bills that are too short compare to others - NOTE: error - - Args: - bill_formatted (pd.DataFrame): formatted bill from Bill Cleaning Step 1 - bill_quality_metric (pd.DataFrame): bill quality from Step 2 - - Returns: - pd.DataFrame: the cleaned bill and ready for analysis - - """ - bill_quality_short = bill_quality_metric[bill_quality_metric['flag'] == 'short'] - bill_consi = bill_formatted.copy() - # consolidate the billing period that is too short compare to others - - for xxx in range(len(bill_quality_short)): - - if bill_quality_metric['flag'].iloc[xxx] == 'short': - row_index = bill_quality_short['index'].iloc[xxx] - - if (row_index != 0) and (row_index != bill_consi.index[-1]): - - if bill_consi['Days In Bill'][int( - row_index - 1)] <= bill_consi['Days In Bill'][int( - row_index + 1)]: - - bill_consi['Bill To Date'][int( - row_index - 1)] = bill_consi['Bill To Date'][int( - row_index)] - bill_consi['Usage'][int( - row_index - 1)] = bill_consi['Usage'][int( - row_index - 1)] + bill_consi['Usage'][int( - row_index)] - bill_consi['Total Charge'][int( - row_index - 1)] = bill_consi['Total Charge'][int( - row_index - 1)] + bill_consi['Total Charge'][int( - row_index)] - bill_consi['Days In Bill'][int( - row_index - 1)] = bill_consi['Days In Bill'][int( - row_index - 1 - )] + bill_consi['Days In Bill'][int(row_index)] - else: - bill_consi['Bill From Date'][int( - row_index + 1)] = bill_consi['Bill To Date'][int( - row_index)] - bill_consi['Usage'][int( - row_index + 1)] = bill_consi['Usage'][int( - row_index + 1)] + bill_consi['Usage'][int( - row_index)] - bill_consi['Total Charge'][int( - row_index + 1)] = bill_consi['Total Charge'][int( - row_index + 1)] + bill_consi['Total Charge'][int( - row_index)] - bill_consi['Days In Bill'][int( - row_index + 1)] = bill_consi['Days In Bill'][int( - row_index + 1 - )] + bill_consi['Days In Bill'][int(row_index)] - - if row_index == 0: - bill_consi['Bill From Date'][1] = bill_consi[ - 'Bill From Date'][0] - bill_consi['Usage'][ - 1] = bill_consi['Usage'][0] + bill_consi['Usage'][1] - bill_consi['Total Charge'][ - 1] = bill_consi['Total Charge'][0] + bill_consi['Total Charge'][1] - bill_consi['Days In Bill'][ - 1] = bill_consi['Days In Bill'][0] + bill_consi['Days In Bill'][1] - - if row_index == bill_consi.index[-1]: - bill_consi['Bill To Date'].iloc[-2] = bill_consi[ - 'Bill To Date'].iloc[-1] - bill_consi['Usage'].iloc[ - -2] = bill_consi['Usage'].iloc[-2] + bill_consi['Usage'].iloc[-1] - bill_consi['Total Charge'].iloc[ - -2] = bill_consi['Total Charge'].iloc[-2] + bill_consi['Total Charge'].iloc[-1] - bill_consi['Days In Bill'].iloc[ - -2] = bill_consi['Days In Bill'].iloc[-1] + bill_consi['Days In Bill'].iloc[-2] - - if len(bill_quality_short) != 0: - bill_consi = bill_consi.drop( - bill_consi.index[list(bill_quality_short['index'])]) - - bill_consi = bill_consi.reset_index(drop=False) - - return bill_consi + bill_copy = bill_copy[[ + 'Bill From Date', 'Bill To Date', 'Usage', 'Days In Bill', + 'Total Charge' + ]] + + bill_copy1 = bill_copy[pd.to_datetime(bill_copy['Bill To Date']) > + pd.to_datetime(bill_copy['Bill From Date'])] + bill_copy1['Bill From Date'] = pd.to_datetime( + bill_copy1['Bill From Date']) + bill_copy2 = bill_copy1.sort_values('Bill From Date') + bill_copy2['Bill To Date'] = pd.to_datetime(bill_copy2['Bill To Date']) + bill_copy2 = bill_copy2.dropna() + bill_copy2 = bill_copy2.drop_duplicates() + bill_copy2 = bill_copy2.reset_index(drop=True) + + if np.array(bill_copy2.shape)[0] == np.array(self.raw_bill.shape)[0]: + self.shape_change = 'False' + else: + self.shape_change = 'True' + + self.formatted_bill = bill_copy2 + # return bill_formatted, bill_shape_change + + def quality(self): + """ + Bill Cleaning + Step 2: + 1. Check each billing period to find a bill is too short or too long; + Args: + bill_formatted (pd.DataFrame): formatted bill from Bill Cleaning Step 1 + Returns: + pd.DataFrame: a dataframe with columns: + 'index': the index of the billing period which is identified as an outlier + 'flag': to indicate either it is too long or too short + """ + + if self.formatted_bill is None: + exit('The bill is not formatted, please do formatting!') + + bill = self.formatted_bill.copy() + bill = pd.DataFrame(bill) + days_in_bill = np.array(bill['Days In Bill']) + + # abnormal days in bill will return False + days_quality_index_inti = outliers_iqr(list(days_in_bill)) + days_quality_index_recheck = np.array( + [x not in range(25, 35) for x in days_in_bill]) + days_quality_index = list( + np.array(days_quality_index_inti) * + np.array(days_quality_index_recheck)) + + days_abn_index = [] + for x in range(len(days_quality_index)): + if days_quality_index[x]: + days_abn_index.append(x) + + self.quality_metric = pd.DataFrame(data=days_abn_index, columns=['index']) + + flag = [] + for billing_date_index in range(len(days_abn_index)): + point_index = days_abn_index[billing_date_index - 1] + if days_in_bill[point_index] < days_in_bill.mean(): + flag.append('short') + elif days_in_bill[point_index] >= days_in_bill.mean(): + flag.append('long') + + self.quality_metric['flag'] = np.array(flag) + + def consolidate(self): + """ + Bill Cleaning + Step 3: + consolidation of the bills that are too short compare to others + NOTE: error + + Returns: + pd.DataFrame: the cleaned bill and ready for analysis + + """ + if self.formatted_bill is None: + exit('The bill is not formatted, please do formatting!') + + if self.quality_metric.empty: + exit + + bill_quality_short = self.quality_metric[self.quality_metric['flag'] == 'short'] + if bill_quality_short is None: + exit + + bill_consi = self.formatted_bill.copy() + # consolidate the billing period that is too short compare to others + + for seq in range(len(bill_quality_short)): + if self.quality_metric['flag'].iloc[seq] == 'short': + row_index = bill_quality_short['index'].iloc[seq] + + if row_index not in [0, bill_consi.index[-1]]: + if bill_consi['Days In Bill'][int(row_index - 1)] <= bill_consi['Days In Bill'][int(row_index + 1)]: + bill_consi['Bill To Date'][int(row_index - 1)] = bill_consi['Bill To Date'][int(row_index)] + bill_consi['Usage'][int(row_index - 1)] += bill_consi['Usage'][int(row_index)] + bill_consi['Total Charge'][int(row_index - 1)] += bill_consi['Total Charge'][int(row_index)] + bill_consi['Days In Bill'][int(row_index - 1)] += bill_consi['Days In Bill'][int(row_index)] + else: + bill_consi['Bill From Date'][int(row_index + 1)] = bill_consi['Bill To Date'][int(row_index)] + bill_consi['Usage'][int(row_index + 1)] += bill_consi['Usage'][int(row_index)] + bill_consi['Total Charge'][int(row_index + 1)] += bill_consi['Total Charge'][int(row_index)] + bill_consi['Days In Bill'][int(row_index + 1)] += bill_consi['Days In Bill'][int(row_index)] + + if row_index == 0: + bill_consi['Bill From Date'][1] = bill_consi['Bill From Date'][0] + bill_consi['Usage'][1] += bill_consi['Usage'][0] + bill_consi['Total Charge'][1] += bill_consi['Total Charge'][0] + bill_consi['Days In Bill'][1] += bill_consi['Days In Bill'][0] + + if row_index == bill_consi.index[-1]: + bill_consi['Bill To Date'].iloc[-2] = bill_consi['Bill To Date'].iloc[-1] + bill_consi['Usage'].iloc[-2] += bill_consi['Usage'].iloc[-1] + bill_consi['Total Charge'].iloc[-2] += bill_consi['Total Charge'].iloc[-1] + bill_consi['Days In Bill'].iloc[-2] += bill_consi['Days In Bill'].iloc[-1] + + if len(bill_quality_short) != 0: + bill_consi = bill_consi.drop(bill_consi.index[list(bill_quality_short['index'])]) + + self.bill_consi = bill_consi.reset_index(drop=False) diff --git a/bpeng/bill/driver.py b/bpeng/bill/driver.py index 1cac577..027c4fa 100644 --- a/bpeng/bill/driver.py +++ b/bpeng/bill/driver.py @@ -14,12 +14,13 @@ from calculater import (hdd, threshold) from setpoints_optimization import (optimize_setpoints) from disaggragate_with_regr_matrix import(weather_ralated_breakdown, non_weahter_related_breakdown) from normalized_monthly_bill import normalized_billing_period +from bill_cleaner import Bill class bill_disaggragate(): - def __init__(self, bill, daily_temp, end_uses): - self.bill = bill + def __init__(self, raw_bill, daily_temp, end_uses): + self.bill = Bill(raw_bill) self.daily_temp = daily_temp self.end_uses = end_uses self.output_normalized_monthly_bill = None @@ -30,25 +31,25 @@ class bill_disaggragate(): def main(self): weather_data_daily = weather_cleaning(self.daily_temp) - formatted_bill, shape = bill_formating(self.bill) # pylint: disable=unused-variable - quality = bill_quality(formatted_bill) + self.bill.formating() + self.bill.quality() + self.bill.consolidate() - if any(i == 'short' for i in quality.flag): - processed_bill = short_bill_consolidate( - formatted_bill, quality) - else: - processed_bill = formatted_bill - - processed_bill = bill_with_daily_temp(formatted_bill, weather_data_daily) + # if any(i == 'short' for i in quality.flag): + # processed_bill = short_bill_consolidate( + # formatted_bill, quality) + # else: + # processed_bill = formatted_bill + processed_bill = bill_with_daily_temp(self.bill.formatted_bill, weather_data_daily) processed_bill = processed_bill.sort_values('Bill From Date') - formatted_bill = formatted_bill.sort_values('Bill From Date') + + formatted_bill = self.bill.formatted_bill.sort_values('Bill From Date') formatted_bill['Unit Price'] = formatted_bill['Total Charge']/formatted_bill['Usage'] nb = normalized_billing_period(formatted_bill) monthly_bill = nb.normailized_monthly_bill() monthly_bill_temp = bill_with_daily_temp(monthly_bill, weather_data_daily) - monthly_bill_with_price = nb.normalized_unit_price(formatted_bill, monthly_bill_temp) self.regr_results = optimize_setpoints(processed_bill) diff --git a/bpeng/bill/test.py b/bpeng/bill/test.py index c39b52e..ec5f3cc 100644 --- a/bpeng/bill/test.py +++ b/bpeng/bill/test.py @@ -7,6 +7,7 @@ from scipy.optimize import minimize from datetime import timedelta from get_test_data import (query_bill, get_weather_data) from driver import bill_disaggragate +from bill_cleaner import Bill #Inputs end_uses = {'DHW': 0.8} @@ -21,8 +22,15 @@ most_recent_year_bill = bd.annual_normalized_monthly_bill column_name = most_recent_year_bill.columns r_squared = bd.regr_results['regr_output'][1] -print('r-squared-fit:', r_squared) -print(bd.annual_usage_costs_summary) + + +# # print('r-squared-fit:', r_squared) +# # print(bd.annual_usage_costs_summary) +# print(raw_bill) +# bill.formating() +# bill.quality() +# bill.consolidate() +# print(bill.bill_consi) # import pandas as pd # end_uses = {'a': 0.1, 'b': 0.2} -- GitLab From 4ec19e43e6aadcb4b5a63b59244bb9860b3e19b2 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 12 Jun 2019 15:39:59 -0400 Subject: [PATCH 45/97] formatting --- bpeng/bill/setpoints_optimization.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bpeng/bill/setpoints_optimization.py b/bpeng/bill/setpoints_optimization.py index 1896878..cf739f5 100644 --- a/bpeng/bill/setpoints_optimization.py +++ b/bpeng/bill/setpoints_optimization.py @@ -185,15 +185,15 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): else: intercept_ = regr_output[0].intercept_ heating_coef_ = regr_output[0].coef_[0] - cooling_coef_ = regr_output[0].coef_[1] + cooling_coef_ = regr_output[0].coef_[1] optimized_regr_matrix = {'heating_set_point': heating_set_point, 'cooling_set_point': cooling_set_point, - 'intercept_':intercept_, - 'heating_coef_' : heating_coef_, + 'intercept_': intercept_, + 'heating_coef_': heating_coef_, 'cooling_coef_': cooling_coef_, 'weather_related_usage': weather_related_usage, - 'regression_method':regression_method, + 'regression_method': regression_method, 'regr_model': regr_model, 'regr_output': regr_output } -- GitLab From 1779b2ab1d0e2667639d58f9f4409706e4889162 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 12 Jun 2019 15:40:08 -0400 Subject: [PATCH 46/97] update the requirments --- requirements.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/requirements.txt b/requirements.txt index c1bedd9..da3eea1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,8 @@ oplus==4.6.0 pvlib==0.4.4 requests==2.12.4 xlrd==1.0.0 +influxdb==5.2.2 +pandas==0.24.2 +psycopg2==2.8.2 +influxdb==5.2.2 +scipy==0.19.0 -- GitLab From f496c957713c4bb285780db99e40589ee955c142 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 12 Jun 2019 15:40:23 -0400 Subject: [PATCH 47/97] formatting --- bpeng/bill/driver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bpeng/bill/driver.py b/bpeng/bill/driver.py index 1cac577..d6310b2 100644 --- a/bpeng/bill/driver.py +++ b/bpeng/bill/driver.py @@ -12,7 +12,7 @@ from weather_data_cal import (weather_cleaning, bill_period_weather, bill_with_d from regr import (regression_1, regression_2, regr_temp_hddcdd) from calculater import (hdd, threshold) from setpoints_optimization import (optimize_setpoints) -from disaggragate_with_regr_matrix import(weather_ralated_breakdown, non_weahter_related_breakdown) +from disaggragate_with_regr_matrix import (weather_ralated_breakdown, non_weahter_related_breakdown) from normalized_monthly_bill import normalized_billing_period -- GitLab From fb94ac7b9ccd43003fd9ffa7a1c4c677716cda4f Mon Sep 17 00:00:00 2001 From: Aizizi Yigaimu Date: Wed, 12 Jun 2019 15:57:46 -0400 Subject: [PATCH 48/97] Fix import Bill issue --- bpeng/bill/driver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bpeng/bill/driver.py b/bpeng/bill/driver.py index 027c4fa..022df3f 100644 --- a/bpeng/bill/driver.py +++ b/bpeng/bill/driver.py @@ -7,7 +7,7 @@ from scipy.optimize import minimize from datetime import timedelta from get_test_data import (query_bill, get_weather_data) -from bill_cleaner import (bill_formating, bill_quality, short_bill_consolidate) +from bill_cleaner import Bill from weather_data_cal import (weather_cleaning, bill_period_weather, bill_with_daily_temp) from regr import (regression_1, regression_2, regr_temp_hddcdd) from calculater import (hdd, threshold) -- GitLab From d74eb90bdc50738b6095944588ecb64a2615fb39 Mon Sep 17 00:00:00 2001 From: Aizizi Yigaimu Date: Wed, 12 Jun 2019 16:09:20 -0400 Subject: [PATCH 49/97] Add date format function --- bpeng/bill/bill_cleaner.py | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/bpeng/bill/bill_cleaner.py b/bpeng/bill/bill_cleaner.py index 5e66fd4..b6970db 100644 --- a/bpeng/bill/bill_cleaner.py +++ b/bpeng/bill/bill_cleaner.py @@ -22,6 +22,12 @@ class Bill: self.quality_metric = None self.bill_consi = None + def formatDate(self, date): + return date.apply(lambda x: '/'.join([str(x.date().month), + str(x.date().day), + str(x.date().year)])) + + def formating(self): """ Bill Cleaning @@ -33,35 +39,22 @@ class Bill: boolean: True - Length of the bill has changed during bill cleaning step 1 """ bill_copy = self.raw_bill.copy() - bill_copy['Bill From Date'] = pd.to_datetime( - bill_copy['Bill From Date']) - bill_copy['Bill From Date'] = bill_copy['Bill From Date'].apply(lambda x: '/'.join([str(x.date().month), - str(x.date().day), - str(x.date().year)])) - bill_copy['Bill To Date'] = pd.to_datetime(bill_copy['Bill To Date']) - bill_copy['Bill To Date'] = bill_copy['Bill To Date'].apply(lambda x: '/'.join([str(x.date().month), - str(x.date().day), - str(x.date().year)])) + bill_copy['Bill From Date'] = self.formatDate(pd.to_datetime(bill_copy['Bill From Date'])) + bill_copy['Bill To Date'] = self.formatDate(pd.to_datetime(bill_copy['Bill To Date'])) bill_copy = bill_copy[[ - 'Bill From Date', 'Bill To Date', 'Usage', 'Days In Bill', - 'Total Charge' + 'Bill From Date', 'Bill To Date', 'Usage', 'Days In Bill', 'Total Charge' ]] bill_copy1 = bill_copy[pd.to_datetime(bill_copy['Bill To Date']) > pd.to_datetime(bill_copy['Bill From Date'])] - bill_copy1['Bill From Date'] = pd.to_datetime( - bill_copy1['Bill From Date']) + bill_copy1['Bill From Date'] = pd.to_datetime(bill_copy1['Bill From Date']) bill_copy2 = bill_copy1.sort_values('Bill From Date') bill_copy2['Bill To Date'] = pd.to_datetime(bill_copy2['Bill To Date']) bill_copy2 = bill_copy2.dropna() bill_copy2 = bill_copy2.drop_duplicates() bill_copy2 = bill_copy2.reset_index(drop=True) - if np.array(bill_copy2.shape)[0] == np.array(self.raw_bill.shape)[0]: - self.shape_change = 'False' - else: - self.shape_change = 'True' - + self.shape_change = 'False' if np.array(bill_copy2.shape)[0] == np.array(self.raw_bill.shape)[0] else 'True' self.formatted_bill = bill_copy2 # return bill_formatted, bill_shape_change -- GitLab From a26626c61a72fd6c6ed5591eacdd246cf0b8d69b Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 12 Jun 2019 16:14:06 -0400 Subject: [PATCH 50/97] get raw file --- bpeng/bill/bill_cleaner.py | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/bpeng/bill/bill_cleaner.py b/bpeng/bill/bill_cleaner.py index 5e66fd4..b6970db 100644 --- a/bpeng/bill/bill_cleaner.py +++ b/bpeng/bill/bill_cleaner.py @@ -22,6 +22,12 @@ class Bill: self.quality_metric = None self.bill_consi = None + def formatDate(self, date): + return date.apply(lambda x: '/'.join([str(x.date().month), + str(x.date().day), + str(x.date().year)])) + + def formating(self): """ Bill Cleaning @@ -33,35 +39,22 @@ class Bill: boolean: True - Length of the bill has changed during bill cleaning step 1 """ bill_copy = self.raw_bill.copy() - bill_copy['Bill From Date'] = pd.to_datetime( - bill_copy['Bill From Date']) - bill_copy['Bill From Date'] = bill_copy['Bill From Date'].apply(lambda x: '/'.join([str(x.date().month), - str(x.date().day), - str(x.date().year)])) - bill_copy['Bill To Date'] = pd.to_datetime(bill_copy['Bill To Date']) - bill_copy['Bill To Date'] = bill_copy['Bill To Date'].apply(lambda x: '/'.join([str(x.date().month), - str(x.date().day), - str(x.date().year)])) + bill_copy['Bill From Date'] = self.formatDate(pd.to_datetime(bill_copy['Bill From Date'])) + bill_copy['Bill To Date'] = self.formatDate(pd.to_datetime(bill_copy['Bill To Date'])) bill_copy = bill_copy[[ - 'Bill From Date', 'Bill To Date', 'Usage', 'Days In Bill', - 'Total Charge' + 'Bill From Date', 'Bill To Date', 'Usage', 'Days In Bill', 'Total Charge' ]] bill_copy1 = bill_copy[pd.to_datetime(bill_copy['Bill To Date']) > pd.to_datetime(bill_copy['Bill From Date'])] - bill_copy1['Bill From Date'] = pd.to_datetime( - bill_copy1['Bill From Date']) + bill_copy1['Bill From Date'] = pd.to_datetime(bill_copy1['Bill From Date']) bill_copy2 = bill_copy1.sort_values('Bill From Date') bill_copy2['Bill To Date'] = pd.to_datetime(bill_copy2['Bill To Date']) bill_copy2 = bill_copy2.dropna() bill_copy2 = bill_copy2.drop_duplicates() bill_copy2 = bill_copy2.reset_index(drop=True) - if np.array(bill_copy2.shape)[0] == np.array(self.raw_bill.shape)[0]: - self.shape_change = 'False' - else: - self.shape_change = 'True' - + self.shape_change = 'False' if np.array(bill_copy2.shape)[0] == np.array(self.raw_bill.shape)[0] else 'True' self.formatted_bill = bill_copy2 # return bill_formatted, bill_shape_change -- GitLab From 7bcf1d87b12103d03ad66fed856bfb03cd7dcb1f Mon Sep 17 00:00:00 2001 From: Aizizi Yigaimu Date: Wed, 12 Jun 2019 16:22:03 -0400 Subject: [PATCH 51/97] Remove duplicated bill from and to date code --- bpeng/bill/bill_cleaner.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/bpeng/bill/bill_cleaner.py b/bpeng/bill/bill_cleaner.py index b6970db..06ce477 100644 --- a/bpeng/bill/bill_cleaner.py +++ b/bpeng/bill/bill_cleaner.py @@ -39,17 +39,18 @@ class Bill: boolean: True - Length of the bill has changed during bill cleaning step 1 """ bill_copy = self.raw_bill.copy() - bill_copy['Bill From Date'] = self.formatDate(pd.to_datetime(bill_copy['Bill From Date'])) - bill_copy['Bill To Date'] = self.formatDate(pd.to_datetime(bill_copy['Bill To Date'])) + billFromDate = pd.to_datetime(bill_copy['Bill From Date']) + billToDate = pd.to_datetime(bill_copy['Bill To Date']) + bill_copy['Bill From Date'] = self.formatDate(billFromDate) + bill_copy['Bill To Date'] = self.formatDate(billToDate) bill_copy = bill_copy[[ 'Bill From Date', 'Bill To Date', 'Usage', 'Days In Bill', 'Total Charge' ]] - bill_copy1 = bill_copy[pd.to_datetime(bill_copy['Bill To Date']) > - pd.to_datetime(bill_copy['Bill From Date'])] - bill_copy1['Bill From Date'] = pd.to_datetime(bill_copy1['Bill From Date']) + bill_copy1 = bill_copy[billFromDate > billToDate] + bill_copy1['Bill From Date'] = billFromDate bill_copy2 = bill_copy1.sort_values('Bill From Date') - bill_copy2['Bill To Date'] = pd.to_datetime(bill_copy2['Bill To Date']) + bill_copy2['Bill To Date'] = billToDate bill_copy2 = bill_copy2.dropna() bill_copy2 = bill_copy2.drop_duplicates() bill_copy2 = bill_copy2.reset_index(drop=True) -- GitLab From 8ff83ebe2c7ed53b437f25bd4aa4b5f117b8fe48 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 12 Jun 2019 18:04:12 -0400 Subject: [PATCH 52/97] debug and it works now --- bpeng/bill/bill_cleaner.py | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/bpeng/bill/bill_cleaner.py b/bpeng/bill/bill_cleaner.py index 06ce477..bd85778 100644 --- a/bpeng/bill/bill_cleaner.py +++ b/bpeng/bill/bill_cleaner.py @@ -1,9 +1,7 @@ -# from datetime import timedelta import numpy as np import pandas as pd from calculater import (outliers_iqr) - class Bill: def __init__(self, raw_bill): @@ -24,11 +22,10 @@ class Bill: def formatDate(self, date): return date.apply(lambda x: '/'.join([str(x.date().month), - str(x.date().day), - str(x.date().year)])) - + str(x.date().day), + str(x.date().year)])) - def formating(self): + def formatting(self): """ Bill Cleaning Step 1: @@ -47,7 +44,7 @@ class Bill: 'Bill From Date', 'Bill To Date', 'Usage', 'Days In Bill', 'Total Charge' ]] - bill_copy1 = bill_copy[billFromDate > billToDate] + bill_copy1 = bill_copy[billFromDate < billToDate] bill_copy1['Bill From Date'] = billFromDate bill_copy2 = bill_copy1.sort_values('Bill From Date') bill_copy2['Bill To Date'] = billToDate @@ -57,17 +54,14 @@ class Bill: self.shape_change = 'False' if np.array(bill_copy2.shape)[0] == np.array(self.raw_bill.shape)[0] else 'True' self.formatted_bill = bill_copy2 - # return bill_formatted, bill_shape_change def quality(self): """ Bill Cleaning Step 2: 1. Check each billing period to find a bill is too short or too long; - Args: - bill_formatted (pd.DataFrame): formatted bill from Bill Cleaning Step 1 - Returns: - pd.DataFrame: a dataframe with columns: + 2. formatted_bill (pd.DataFrame): formatted bill from Bill Cleaning Step 1 + 3. quality (pd.DataFrame): a dataframe with columns: 'index': the index of the billing period which is identified as an outlier 'flag': to indicate either it is too long or too short """ @@ -110,20 +104,18 @@ class Bill: Step 3: consolidation of the bills that are too short compare to others NOTE: error - - Returns: - pd.DataFrame: the cleaned bill and ready for analysis - + self.bill_consi (pd.DataFrame): the cleaned bill and ready for analysis """ if self.formatted_bill is None: exit('The bill is not formatted, please do formatting!') + print('quality matric:', self.quality_metric) if self.quality_metric.empty: - exit + exit() bill_quality_short = self.quality_metric[self.quality_metric['flag'] == 'short'] if bill_quality_short is None: - exit + exit() bill_consi = self.formatted_bill.copy() # consolidate the billing period that is too short compare to others @@ -156,7 +148,10 @@ class Bill: bill_consi['Total Charge'].iloc[-2] += bill_consi['Total Charge'].iloc[-1] bill_consi['Days In Bill'].iloc[-2] += bill_consi['Days In Bill'].iloc[-1] - if len(bill_quality_short) != 0: - bill_consi = bill_consi.drop(bill_consi.index[list(bill_quality_short['index'])]) - + bill_consi = bill_consi.drop(bill_consi.index[list(bill_quality_short['index'])]) self.bill_consi = bill_consi.reset_index(drop=False) + + def RunAllFunctions(self): + self.formatting() + self.quality() + self.consolidate() -- GitLab From 10f21a9cd089cafad195bcbc5a2dbe6fa46d5b16 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 12 Jun 2019 18:05:12 -0400 Subject: [PATCH 53/97] add a new building for test with abnormal bills --- bpeng/bill/test.py | 71 +++++++--------------------------------------- 1 file changed, 11 insertions(+), 60 deletions(-) diff --git a/bpeng/bill/test.py b/bpeng/bill/test.py index ec5f3cc..096c584 100644 --- a/bpeng/bill/test.py +++ b/bpeng/bill/test.py @@ -5,75 +5,26 @@ import numpy as np import datetime as datetime from scipy.optimize import minimize from datetime import timedelta + from get_test_data import (query_bill, get_weather_data) from driver import bill_disaggragate from bill_cleaner import Bill -#Inputs +# inputs: end_uses = {'DHW': 0.8} -raw_bill = query_bill(181794, 2) +raw_bill = query_bill(243106, 2) raw_weather_data_daily = get_weather_data() weather_related_usage_init = 'Unknown' +temp = Bill(raw_bill) +temp.RunAllFunctions() +#print(temp.bill_consi) -bd = bill_disaggragate(raw_bill, raw_weather_data_daily, end_uses) -bd.main() -most_recent_year_bill = bd.annual_normalized_monthly_bill -column_name = most_recent_year_bill.columns -r_squared = bd.regr_results['regr_output'][1] - - - -# # print('r-squared-fit:', r_squared) -# # print(bd.annual_usage_costs_summary) -# print(raw_bill) -# bill.formating() -# bill.quality() -# bill.consolidate() -# print(bill.bill_consi) - -# import pandas as pd -# end_uses = {'a': 0.1, 'b': 0.2} -# eu = pd.DataFrame(list(end_uses.items()), columns=['end use', 'percentage']) -# print('sum', sum(eu['percentage'])) -# eu = eu.append({'end use': 'Misc', 'percentage': 0.7}, ignore_index = True) -# print(eu) - -# #Bill Analysis -# weather_data_daily = weather_cleaning(raw_weather_data_daily) -# formatted_bill, shape = bill_formating(raw_bill) # pylint: disable=unused-variable -# quality = bill_quality(formatted_bill) - -# if any(i == 'short' for i in quality.flag): -# processed_bill = short_bill_consolidate( -# formatted_bill, quality) -# else: -# processed_bill = formatted_bill - -# processed_bill = bill_with_daily_temp(formatted_bill, weather_data_daily) - -# processed_bill = processed_bill.sort_values('Bill From Date') -# formatted_bill = formatted_bill.sort_values('Bill From Date') -# formatted_bill['Unit Price'] = formatted_bill['Total Charge']/formatted_bill['Usage'] - - -# nb = normalized_billing_period(formatted_bill) -# monthly_bill = nb.normailized_monthly_bill() -# monthly_bill_temp = bill_with_daily_temp(monthly_bill, weather_data_daily) - -# monthly_bill_with_price = nb.normalized_unit_price(formatted_bill, monthly_bill) - - -# regr_results = optimize_setpoints(processed_bill) -# monthly_breakdown = weather_ralated_breakdown(regr_results, monthly_bill_temp) -# monthly_breakdown_full = non_weahter_related_breakdown(end_uses, monthly_breakdown).sort_values('Bill From Date').reset_index(drop=True) - -# #lalalalalala -# print(monthly_breakdown_full[-12:]) - - - - +# bd = bill_disaggragate(raw_bill, raw_weather_data_daily, end_uses) +# bd.main() +# most_recent_year_bill = bd.annual_normalized_monthly_bill +# column_name = most_recent_year_bill.columns +# r_squared = bd.regr_results['regr_output'][1] -- GitLab From 1b9976256a7cf799ce3248674639c083a21ebc3a Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 14 Jun 2019 16:52:45 -0400 Subject: [PATCH 54/97] updates --- bpeng/bill/bill_cleaner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bpeng/bill/bill_cleaner.py b/bpeng/bill/bill_cleaner.py index bd85778..aacad1f 100644 --- a/bpeng/bill/bill_cleaner.py +++ b/bpeng/bill/bill_cleaner.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd from calculater import (outliers_iqr) + class Bill: def __init__(self, raw_bill): -- GitLab From b4cb58660eee0677b5ea719c7cf7f294c9c0ae90 Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 14 Jun 2019 16:52:54 -0400 Subject: [PATCH 55/97] update to class --- bpeng/bill/get_billing_weather_data.py | 63 ++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 bpeng/bill/get_billing_weather_data.py diff --git a/bpeng/bill/get_billing_weather_data.py b/bpeng/bill/get_billing_weather_data.py new file mode 100644 index 0000000..335b50f --- /dev/null +++ b/bpeng/bill/get_billing_weather_data.py @@ -0,0 +1,63 @@ + + +import psycopg2 +import pandas as pd +from influxdb import InfluxDBClient + + +class get_billing_weather_data(): + + def __init__(self, building_id, account_type): + self.building_id = building_id + self.account_type = account_type + self.weather = None + self.bill = None + return + + def get_weather_data(self): + user = 'engineering' + password = 'nPEc9Pz0iV' + dbname = 'weather' + host = '52.206.6.10' + port = 8086 + + influx_db = InfluxDBClient(host, port, user, password, dbname, ssl=True) + + query_string = "SELECT * from temperature WHERE interval='daily'" + daily_temperature = influx_db.query(query_string).get_points('temperature') + self.weather = pd.DataFrame(daily_temperature) + + def query_bill(self): + + hostname = 'utilitybillprod-rr.czgvwxaefxfj.us-east-1.rds.amazonaws.com' + username = 'blocpower' + password = 'Z50Fwgkfi0SsVaBz' + database = 'utility_bill' + + myConnection = psycopg2.connect(host=hostname, user=username, + password=password, dbname=database) + df_bill = pd.read_sql('SELECT * FROM public.bill', myConnection) + df_account = pd.read_sql('SELECT * FROM public.account', myConnection) + df_utility = pd.read_sql('SELECT * FROM public.utility_type', myConnection) + df_account_selected = df_account[df_account['account_type'] == self.account_type] + + account = df_account_selected[df_account_selected['building_id'] == self.building_id] + acc_id = account['id'].iloc[0] + new_bill = df_bill[df_bill['account_id'] == acc_id].fillna(0) + new_bill['actual_total'] = new_bill['delivery_charge'] + \ + new_bill['supply_charge'] + \ + new_bill['esco_charge'] + \ + new_bill['delivery_tax'] + new_bill = new_bill.reset_index(drop=True) + bill = new_bill[['bill_from_date', 'bill_to_date', 'usage', 'actual_total']] + bill.columns = ['Bill From Date', 'Bill To Date', 'Usage', 'Total Charge'] + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill['Days In Bill'] = (bill['Bill To Date'] - bill['Bill From Date']).dt.days + self.bill = bill + + def RunAllFunctions(self): + self.get_weather_data() + self.query_bill() + + -- GitLab From 66b2c3d31c937cb3542971b368ea3c7ff8794b02 Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 14 Jun 2019 16:55:21 -0400 Subject: [PATCH 56/97] update the name --- bpeng/bill/get_test_data.py | 54 ------------------------------------- 1 file changed, 54 deletions(-) delete mode 100644 bpeng/bill/get_test_data.py diff --git a/bpeng/bill/get_test_data.py b/bpeng/bill/get_test_data.py deleted file mode 100644 index 2e254cd..0000000 --- a/bpeng/bill/get_test_data.py +++ /dev/null @@ -1,54 +0,0 @@ - - -import psycopg2 -import pandas as pd -from influxdb import InfluxDBClient - - -def __init__(): - return - - -def get_weather_data(): - user = 'engineering' - password = 'nPEc9Pz0iV' - dbname = 'weather' - host = '52.206.6.10' - port = 8086 - - influx_db = InfluxDBClient(host, port, user, password, dbname, ssl=True) - - query_string = "SELECT * from temperature WHERE interval='daily'" - daily_temperature = influx_db.query(query_string).get_points('temperature') - weather = pd.DataFrame(daily_temperature) - return weather - - -def query_bill(building_id, account_type): - - hostname = 'utilitybillprod-rr.czgvwxaefxfj.us-east-1.rds.amazonaws.com' - username = 'blocpower' - password = 'Z50Fwgkfi0SsVaBz' - database = 'utility_bill' - - myConnection = psycopg2.connect(host=hostname, user=username, - password=password, dbname=database) - df_bill = pd.read_sql('SELECT * FROM public.bill', myConnection) - df_account = pd.read_sql('SELECT * FROM public.account', myConnection) - df_utility = pd.read_sql('SELECT * FROM public.utility_type', myConnection) - df_account_selected = df_account[df_account['account_type'] == account_type] - - account = df_account_selected[df_account_selected['building_id'] == building_id] - acc_id = account['id'].iloc[0] - new_bill = df_bill[df_bill['account_id'] == acc_id].fillna(0) - new_bill['actual_total'] = new_bill['delivery_charge'] + \ - new_bill['supply_charge'] + \ - new_bill['esco_charge'] + \ - new_bill['delivery_tax'] - new_bill = new_bill.reset_index(drop=True) - bill = new_bill[['bill_from_date', 'bill_to_date', 'usage', 'actual_total']] - bill.columns = ['Bill From Date', 'Bill To Date', 'Usage', 'Total Charge'] - bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) - bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) - bill['Days In Bill'] = (bill['Bill To Date'] - bill['Bill From Date']).dt.days - return bill -- GitLab From 80f946a73753eef0123d8cf8feff1bccb28de0fd Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 14 Jun 2019 17:01:28 -0400 Subject: [PATCH 57/97] update the name of the main module --- bpeng/bill/driver.py | 138 ------------------------------------------- 1 file changed, 138 deletions(-) delete mode 100644 bpeng/bill/driver.py diff --git a/bpeng/bill/driver.py b/bpeng/bill/driver.py deleted file mode 100644 index 33edb3f..0000000 --- a/bpeng/bill/driver.py +++ /dev/null @@ -1,138 +0,0 @@ -"""This is the driver file to use this library to calculate the bill disaggragation results for BlocPower Dashboard""" - -import pandas as pd -import numpy as np -import datetime as datetime -from scipy.optimize import minimize -from datetime import timedelta -from get_test_data import (query_bill, get_weather_data) - -from bill_cleaner import Bill -from weather_data_cal import (weather_cleaning, bill_period_weather, bill_with_daily_temp) -from regr import (regression_1, regression_2, regr_temp_hddcdd) -from calculater import (hdd, threshold) -from setpoints_optimization import (optimize_setpoints) -from disaggragate_with_regr_matrix import (weather_ralated_breakdown, non_weahter_related_breakdown) -from normalized_monthly_bill import normalized_billing_period -from bill_cleaner import Bill - - -class bill_disaggragate(): - - def __init__(self, raw_bill, daily_temp, end_uses): - self.bill = Bill(raw_bill) - self.daily_temp = daily_temp - self.end_uses = end_uses - self.output_normalized_monthly_bill = None - self.annual_normalized_monthly_bill = None - self.regr_results = None - self.annual_usage_costs_summary = None - - def main(self): - - weather_data_daily = weather_cleaning(self.daily_temp) - self.bill.formating() - self.bill.quality() - self.bill.consolidate() - - # if any(i == 'short' for i in quality.flag): - # processed_bill = short_bill_consolidate( - # formatted_bill, quality) - # else: - # processed_bill = formatted_bill - - processed_bill = bill_with_daily_temp(self.bill.formatted_bill, weather_data_daily) - processed_bill = processed_bill.sort_values('Bill From Date') - - formatted_bill = self.bill.formatted_bill.sort_values('Bill From Date') - formatted_bill['Unit Price'] = formatted_bill['Total Charge']/formatted_bill['Usage'] - - nb = normalized_billing_period(formatted_bill) - monthly_bill = nb.normailized_monthly_bill() - monthly_bill_temp = bill_with_daily_temp(monthly_bill, weather_data_daily) - monthly_bill_with_price = nb.normalized_unit_price(formatted_bill, monthly_bill_temp) - - self.regr_results = optimize_setpoints(processed_bill) - monthly_breakdown = weather_ralated_breakdown(self.regr_results, monthly_bill_with_price) - self.output_normalized_monthly_bill = non_weahter_related_breakdown(self.end_uses, monthly_breakdown).sort_values('Bill From Date').reset_index(drop=True) - self.annual_normalized_monthly_bill = self.output_normalized_monthly_bill[-12:].reset_index(drop=True) - self.annual_usage_costs_summary = self.annual_usage_costs(self.annual_normalized_monthly_bill, self.end_uses) - - return - - def annual_usage_costs(self, annual_bill_breakdown, end_uses): - """ - Calcuate annual usage and costs for each end use - - Args: - annual_bill_breakdown(pd.DataFrame): the output from non-weather-related usage breakdown - end_uses(dictionary): key: end use - value: percentage of the end use among non-weather related usage - - Return: - pd.DataFrame: annual usage, costs for each end uses - - """ - - annual_usage_costs_sum = pd.DataFrame(columns=['End Use', 'Usage', 'Costs']) - annual_bill_breakdown['Costs'] = annual_bill_breakdown['Calculated Total Usage'] * annual_bill_breakdown['Unit Price'] - avg_price = sum(annual_bill_breakdown['Costs']) /sum(annual_bill_breakdown['Calculated Total Usage']) - end_use = list(annual_bill_breakdown.columns) - unwanted_column = ['Bill From Date', 'Bill To Date', 'Days In Bill','Unit Price', 'Non Weather Related Usage', \ - 'Calculated Total Usage','Costs'] - - for elem in unwanted_column: - end_use.remove(elem) - - annual_usage_costs_sum['End Use'] = end_use - - for j in range(len(annual_usage_costs_sum)): - temp = annual_bill_breakdown[annual_usage_costs_sum['End Use'].iloc[j]] - temp_usage = sum(temp) - annual_usage_costs_sum['Usage'].iloc[j] = temp_usage - - annual_usage_costs_sum['Costs'] = annual_usage_costs_sum['Usage'] * avg_price - - return annual_usage_costs_sum - - def to_json(self, period='bill_breakdown'): - """ - Output in json file - - Args: - - period (str): 'bill_breakdown' for bill breakdown with non-weather realted end uses - 'bill' for monthly out put for bill with only weather related breakdown - default 'bill_breakdown' - - Returns: - - json: output in json format - - """ - - if period == 'bill_breakdown': - return self.bill_breakdown.to_json(orient="records", date_format="iso") - - return self.output_table_monthly.to_json(orient="records", date_format="iso") - - def to_dict(self, period='bill_breakdown'): - """ - Output in dictionary file - - Args: - - period (str): 'bill_breakdown' for bill breakdown with non-weather realted end uses - 'bill' for monthly out put for bill with only weather related breakdown - default 'bill_breakdown' - - Returns: - - json: output in json format - - """ - - if period == 'bill_breakdown': - return self.bill_breakdown.to_dict(orient="records") - - return self.output_table_monthly.to_dict(orient="records") -- GitLab From fe02c2de70b39b1970203a1d67ff97a33313afb4 Mon Sep 17 00:00:00 2001 From: Doris H Date: Tue, 25 Jun 2019 11:01:25 -0400 Subject: [PATCH 58/97] updates --- bpeng/bill/bill_cleaner.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bpeng/bill/bill_cleaner.py b/bpeng/bill/bill_cleaner.py index aacad1f..0049bf6 100644 --- a/bpeng/bill/bill_cleaner.py +++ b/bpeng/bill/bill_cleaner.py @@ -110,7 +110,6 @@ class Bill: if self.formatted_bill is None: exit('The bill is not formatted, please do formatting!') - print('quality matric:', self.quality_metric) if self.quality_metric.empty: exit() -- GitLab From c7514e7a88fb52aaf098d84fc0d8131ca7c9eb5b Mon Sep 17 00:00:00 2001 From: Doris H Date: Tue, 25 Jun 2019 11:01:42 -0400 Subject: [PATCH 59/97] updates --- bpeng/bill/benchmarking.py | 16 +++ bpeng/bill/bill_analysis.py | 129 ++++++++++++++++++++ bpeng/bill/disaggragate_with_regr_matrix.py | 2 - bpeng/bill/test.py | 58 +++++++-- 4 files changed, 191 insertions(+), 14 deletions(-) create mode 100644 bpeng/bill/bill_analysis.py diff --git a/bpeng/bill/benchmarking.py b/bpeng/bill/benchmarking.py index e69de29..bcf4fc5 100644 --- a/bpeng/bill/benchmarking.py +++ b/bpeng/bill/benchmarking.py @@ -0,0 +1,16 @@ +""" +This module takes regr matric and stardized HDD/CDD to calculate the normalized usage for a specific building for benchmarking purpose. +""" + + +import numpy as np +import pandas as pd + + +class benchmarking(): + + def __init__(self, regr_matric): + self.regr_matric = regr_matric + self.monthly_HDD = [1008, 861, 713, 392, 136, 16, 1, 1, 40, 249, 524, 836] + self.monthly_CDD = [0, 0, 1, 6, 54, 209, 377, 336, 141, 17, 1, 0] + self.days_each_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] diff --git a/bpeng/bill/bill_analysis.py b/bpeng/bill/bill_analysis.py new file mode 100644 index 0000000..bbc16d8 --- /dev/null +++ b/bpeng/bill/bill_analysis.py @@ -0,0 +1,129 @@ +"""This is the driver file to use this library to calculate the bill disaggragation results for BlocPower Dashboard""" + +import pandas as pd +import numpy as np +import datetime as datetime +from scipy.optimize import minimize +from datetime import timedelta +from get_billing_weather_data import get_billing_weather_data as gd + +from bill_cleaner import Bill +from weather_data_cal import (weather_cleaning, bill_period_weather, bill_with_daily_temp) +from regr import (regression_1, regression_2, regr_temp_hddcdd) +from calculater import (hdd, threshold) +from setpoints_optimization import (optimize_setpoints) +from disaggragate_with_regr_matrix import (weather_ralated_breakdown, non_weahter_related_breakdown) +from normalized_monthly_bill import normalized_billing_period + + +class bill_analysis(): + + def __init__(self, raw_bill, daily_temp, end_uses): + self.bill = Bill(raw_bill) + self.daily_temp = daily_temp + self.end_uses = end_uses + self.output_normalized_monthly_bill = None + self.annual_normalized_monthly_bill = None + self.regr_results = None + self.annual_usage_costs_summary = None + + def main(self): + + weather_data_daily = weather_cleaning(self.daily_temp) + self.bill.RunAllFunctions() + + processed_bill = bill_with_daily_temp(self.bill.formatted_bill, weather_data_daily) + processed_bill = processed_bill.sort_values('Bill From Date') + + formatted_bill = self.bill.formatted_bill.sort_values('Bill From Date') + formatted_bill['Unit Price'] = formatted_bill['Total Charge']/formatted_bill['Usage'] + + nb = normalized_billing_period(formatted_bill) + monthly_bill = nb.normailized_monthly_bill() + monthly_bill_temp = bill_with_daily_temp(monthly_bill, weather_data_daily) + monthly_bill_with_price = nb.normalized_unit_price(formatted_bill, monthly_bill_temp) + + self.regr_results = optimize_setpoints(processed_bill) + monthly_breakdown = weather_ralated_breakdown(self.regr_results, monthly_bill_with_price) + self.output_normalized_monthly_bill = non_weahter_related_breakdown(self.end_uses, monthly_breakdown).sort_values('Bill From Date').reset_index(drop=True) + self.annual_normalized_monthly_bill = self.output_normalized_monthly_bill[-12:].reset_index(drop=True) + self.annual_usage_costs_summary = self.annual_usage_costs(self.annual_normalized_monthly_bill, self.end_uses) + + return + + def annual_usage_costs(self, annual_bill_breakdown, end_uses): + """ + Calcuate annual usage and costs for each end use + + Args: + annual_bill_breakdown(pd.DataFrame): the output from non-weather-related usage breakdown + end_uses(dictionary): key: end use + value: percentage of the end use among non-weather related usage + + Return: + pd.DataFrame: annual usage, costs for each end uses + + """ + + annual_usage_costs_sum = pd.DataFrame(columns=['End Use', 'Usage', 'Costs']) + annual_bill_breakdown['Costs'] = annual_bill_breakdown['Calculated Total Usage'] * annual_bill_breakdown['Unit Price'] + avg_price = sum(annual_bill_breakdown['Costs']) / sum(annual_bill_breakdown['Calculated Total Usage']) + end_use = list(annual_bill_breakdown.columns) + unwanted_column = ['Bill From Date', 'Bill To Date', 'Days In Bill','Unit Price', 'Non Weather Related Usage', \ + 'Calculated Total Usage', 'Costs'] + + for elem in unwanted_column: + end_use.remove(elem) + + annual_usage_costs_sum['End Use'] = end_use + + for j in range(len(annual_usage_costs_sum)): + temp = annual_bill_breakdown[annual_usage_costs_sum['End Use'].iloc[j]] + temp_usage = sum(temp) + annual_usage_costs_sum['Usage'].iloc[j] = temp_usage + + annual_usage_costs_sum['Costs'] = annual_usage_costs_sum['Usage'] * avg_price + + return annual_usage_costs_sum + + def to_json(self, period='bill_breakdown'): + """ + Output in json file + + Args: + + period (str): 'bill_breakdown' for bill breakdown with non-weather realted end uses + 'bill' for monthly out put for bill with only weather related breakdown + default 'bill_breakdown' + + Returns: + + json: output in json format + + """ + + if period == 'bill_breakdown': + return self.bill_breakdown.to_json(orient="records", date_format="iso") + + return self.output_table_monthly.to_json(orient="records", date_format="iso") + + def to_dict(self, period='bill_breakdown'): + """ + Output in dictionary file + + Args: + + period (str): 'bill_breakdown' for bill breakdown with non-weather realted end uses + 'bill' for monthly out put for bill with only weather related breakdown + default 'bill_breakdown' + + Returns: + + json: output in json format + + """ + + if period == 'bill_breakdown': + return self.bill_breakdown.to_dict(orient="records") + + return self.output_table_monthly.to_dict(orient="records") diff --git a/bpeng/bill/disaggragate_with_regr_matrix.py b/bpeng/bill/disaggragate_with_regr_matrix.py index 648f36c..fcc53d3 100644 --- a/bpeng/bill/disaggragate_with_regr_matrix.py +++ b/bpeng/bill/disaggragate_with_regr_matrix.py @@ -59,8 +59,6 @@ def weather_ralated_breakdown(regr_matrix, processed_bill_any): cooling_consump = np.array(hddcdd[:, 1]) * cooling_coef_ * processed_bill_any['Days In Bill'] non_weather_related_consump = dhw_usage * processed_bill_any['Days In Bill'] - - disaggragated_bill = processed_bill_any.copy() disaggragated_bill = processed_bill_any[[ 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Unit Price' diff --git a/bpeng/bill/test.py b/bpeng/bill/test.py index 096c584..07ea11b 100644 --- a/bpeng/bill/test.py +++ b/bpeng/bill/test.py @@ -6,25 +6,59 @@ import datetime as datetime from scipy.optimize import minimize from datetime import timedelta -from get_test_data import (query_bill, get_weather_data) -from driver import bill_disaggragate +from get_billing_weather_data import get_billing_weather_data +from bill_analysis import bill_analysis from bill_cleaner import Bill # inputs: end_uses = {'DHW': 0.8} -raw_bill = query_bill(243106, 2) -raw_weather_data_daily = get_weather_data() +data = get_billing_weather_data(243106, 2) +data.RunAllFunctions() +raw_bill = data.bill +raw_weather_data_daily = data.weather weather_related_usage_init = 'Unknown' -temp = Bill(raw_bill) -temp.RunAllFunctions() -#print(temp.bill_consi) +ba = bill_analysis(raw_bill, raw_weather_data_daily, end_uses) +ba.main() +most_recent_year_bill = ba.annual_normalized_monthly_bill +r_squared = ba.regr_results['regr_output'][1] + + +print('r_squared:', r_squared) +print('regr_results:', ba.regr_results) + + +## test for benchmarking and reporting purpose: +### benchmarking here means --> how much energy this site will consumpe if all the weather data is given. + + +# import numpy as np +# import pandas as pd +# monthly_HDD = [1008, 861, 713, 392, 136, 16, 1, 1, 40, 249, 524, 836] +# monthly_CDD = [0, 0, 1, 6, 54, 209, 377, 336, 141, 17, 1, 0] +# days_each_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] +# month = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + +# nbf = pd.DataFrame(columns=['monthly_hdd', 'monthly_cdd', 'days_in_bill', 'month']) + +# nbf['month'] = month +# nbf['days_in_bill'] = days_each_month +# nbf['monthly_hdd'] = monthly_HDD +# nbf['monthly_cdd'] = monthly_CDD + +# nbf['daily_hdd'] = nbf['monthly_hdd']/nbf['days_in_bill'] +# nbf['daily_cdd'] = nbf['monthly_cdd']/nbf['days_in_bill'] + +# print(nbf) + + + + + + + + -# bd = bill_disaggragate(raw_bill, raw_weather_data_daily, end_uses) -# bd.main() -# most_recent_year_bill = bd.annual_normalized_monthly_bill -# column_name = most_recent_year_bill.columns -# r_squared = bd.regr_results['regr_output'][1] -- GitLab From 1c4c6ef8083983c138619cdceb5e7d1c962662a7 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 26 Jun 2019 16:13:56 -0400 Subject: [PATCH 60/97] formatting --- .../bill/1981_2010_NYC_NormalsTemperature.csv | 366 ++++++++++++++++++ bpeng/bill/disaggragate_with_regr_matrix.py | 2 +- bpeng/bill/normalClimate_NYC.csv | 13 + bpeng/bill/test.py | 73 ++-- 4 files changed, 423 insertions(+), 31 deletions(-) create mode 100644 bpeng/bill/1981_2010_NYC_NormalsTemperature.csv create mode 100644 bpeng/bill/normalClimate_NYC.csv diff --git a/bpeng/bill/1981_2010_NYC_NormalsTemperature.csv b/bpeng/bill/1981_2010_NYC_NormalsTemperature.csv new file mode 100644 index 0000000..7885e8f --- /dev/null +++ b/bpeng/bill/1981_2010_NYC_NormalsTemperature.csv @@ -0,0 +1,366 @@ +no,month,day,temperature,hdd ,cdd +1,1,1,34,31,0 +2,1,2,33.8,31,0 +3,1,3,33.7,31,0 +4,1,4,33.5,31,0 +5,1,5,33.4,32,0 +6,1,6,33.3,32,0 +7,1,7,33.2,32,0 +8,1,8,33.1,32,0 +9,1,9,33,32,0 +10,1,10,32.9,32,0 +11,1,11,32.9,32,0 +12,1,12,32.8,32,0 +13,1,13,32.8,32,0 +14,1,14,32.7,32,0 +15,1,15,32.7,32,0 +16,1,16,32.7,32,0 +17,1,17,32.7,32,0 +18,1,18,32.7,32,0 +19,1,19,32.7,32,0 +20,1,20,32.7,32,0 +21,1,21,32.7,32,0 +22,1,22,32.8,32,0 +23,1,23,32.8,32,0 +24,1,24,32.9,32,0 +25,1,25,32.9,32,0 +26,1,26,33,32,0 +27,1,27,33.1,32,0 +28,1,28,33.1,32,0 +29,1,29,33.2,32,0 +30,1,30,33.3,32,0 +31,1,31,33.4,32,0 +32,2,1,33.5,31,0 +33,2,2,33.7,31,0 +34,2,3,33.8,31,0 +35,2,4,33.9,31,0 +36,2,5,34,31,0 +37,2,6,34.2,31,0 +38,2,7,34.3,31,0 +39,2,8,34.5,31,0 +40,2,9,34.6,30,0 +41,2,10,34.8,30,0 +42,2,11,34.9,30,0 +43,2,12,35.1,30,0 +44,2,13,35.3,30,0 +45,2,14,35.4,30,0 +46,2,15,35.6,29,0 +47,2,16,35.8,29,0 +48,2,17,36,29,0 +49,2,18,36.2,29,0 +50,2,19,36.4,29,0 +51,2,20,36.5,28,0 +52,2,21,36.7,28,0 +53,2,22,36.9,28,0 +54,2,23,37.1,28,0 +55,2,24,37.3,28,0 +56,2,25,37.6,27,0 +57,2,26,37.8,27,0 +58,2,27,38,27,0 +59,2,28,38.2,27,0 +60,3,1,38.4,27,0 +61,3,2,38.6,26,0 +62,3,3,38.9,26,0 +63,3,4,39.1,26,0 +64,3,5,39.3,26,0 +65,3,6,39.6,25,0 +66,3,7,39.8,25,0 +67,3,8,40.1,25,0 +68,3,9,40.3,25,0 +69,3,10,40.6,24,0 +70,3,11,40.8,24,0 +71,3,12,41.1,24,0 +72,3,13,41.4,24,0 +73,3,14,41.7,23,0 +74,3,15,41.9,23,0 +75,3,16,42.2,23,0 +76,3,17,42.5,22,0 +77,3,18,42.8,22,0 +78,3,19,43.1,22,0 +79,3,20,43.4,22,0 +80,3,21,43.7,21,0 +81,3,22,44,21,0 +82,3,23,44.3,21,0 +83,3,24,44.7,20,0 +84,3,25,45,20,0 +85,3,26,45.3,20,0 +86,3,27,45.6,19,0 +87,3,28,46,19,0 +88,3,29,46.3,19,0 +89,3,30,46.7,18,0 +90,3,31,47,18,0 +91,4,1,47.4,18,0 +92,4,2,47.7,17,0 +93,4,3,48.1,17,0 +94,4,4,48.4,17,0 +95,4,5,48.8,16,0 +96,4,6,49.2,16,0 +97,4,7,49.5,16,0 +98,4,8,49.9,15,0 +99,4,9,50.3,15,0 +100,4,10,50.6,15,0 +101,4,11,51,14,0 +102,4,12,51.4,14,0 +103,4,13,51.7,14,0 +104,4,14,52.1,13,0 +105,4,15,52.4,13,0 +106,4,16,52.8,12,0 +107,4,17,53.2,12,0 +108,4,18,53.5,12,0 +109,4,19,53.9,11,0 +110,4,20,54.2,11,0 +111,4,21,54.5,11,0 +112,4,22,54.9,10,0 +113,4,23,55.2,10,0 +114,4,24,55.6,10,0 +115,4,25,55.9,9,0 +116,4,26,56.2,9,0 +117,4,27,56.6,9,0 +118,4,28,56.9,8,0 +119,4,29,57.2,8,0 +120,4,30,57.5,8,0 +121,5,1,57.8,8,0 +122,5,2,58.1,7,0 +123,5,3,58.4,7,0 +124,5,4,58.7,7,1 +125,5,5,59,7,1 +126,5,6,59.3,6,1 +127,5,7,59.6,6,1 +128,5,8,59.9,6,1 +129,5,9,60.2,6,1 +130,5,10,60.5,5,1 +131,5,11,60.8,5,1 +132,5,12,61.1,5,1 +133,5,13,61.4,5,1 +134,5,14,61.7,5,1 +135,5,15,62,4,1 +136,5,16,62.3,4,1 +137,5,17,62.6,4,2 +138,5,18,62.9,4,2 +139,5,19,63.1,4,2 +140,5,20,63.4,3,2 +141,5,21,63.7,3,2 +142,5,22,64.1,3,2 +143,5,23,64.4,3,2 +144,5,24,64.7,3,2 +145,5,25,65,3,3 +146,5,26,65.3,3,3 +147,5,27,65.6,2,3 +148,5,28,65.9,2,3 +149,5,29,66.2,2,3 +150,5,30,66.6,2,3 +151,5,31,66.9,2,4 +152,6,1,67.2,2,4 +153,6,2,67.6,2,4 +154,6,3,67.9,1,4 +155,6,4,68.2,1,4 +156,6,5,68.5,1,5 +157,6,6,68.9,1,5 +158,6,7,69.2,1,5 +159,6,8,69.5,1,5 +160,6,9,69.9,1,6 +161,6,10,70.2,1,6 +162,6,11,70.5,1,6 +163,6,12,70.9,1,6 +164,6,13,71.2,0,7 +165,6,14,71.5,0,7 +166,6,15,71.8,0,7 +167,6,16,72.1,0,7 +168,6,17,72.4,0,8 +169,6,18,72.7,0,8 +170,6,19,73,0,8 +171,6,20,73.3,0,9 +172,6,21,73.6,0,9 +173,6,22,73.9,0,9 +174,6,23,74.1,0,9 +175,6,24,74.4,0,9 +176,6,25,74.6,0,10 +177,6,26,74.9,0,10 +178,6,27,75.1,0,10 +179,6,28,75.3,0,10 +180,6,29,75.5,0,11 +181,6,30,75.7,0,11 +182,7,1,75.9,0,11 +183,7,2,76.1,0,11 +184,7,3,76.3,0,11 +185,7,4,76.4,0,11 +186,7,5,76.5,0,12 +187,7,6,76.7,0,12 +188,7,7,76.8,0,12 +189,7,8,76.9,0,12 +190,7,9,77,0,12 +191,7,10,77.1,0,12 +192,7,11,77.2,0,12 +193,7,12,77.3,0,12 +194,7,13,77.3,0,12 +195,7,14,77.4,0,12 +196,7,15,77.4,0,12 +197,7,16,77.5,0,12 +198,7,17,77.5,0,12 +199,7,18,77.5,0,13 +200,7,19,77.5,0,13 +201,7,20,77.5,0,13 +202,7,21,77.5,0,13 +203,7,22,77.5,0,13 +204,7,23,77.5,0,13 +205,7,24,77.5,0,13 +206,7,25,77.5,0,12 +207,7,26,77.5,0,12 +208,7,27,77.4,0,12 +209,7,28,77.4,0,12 +210,7,29,77.4,0,12 +211,7,30,77.3,0,12 +212,7,31,77.3,0,12 +213,8,1,77.2,0,12 +214,8,2,77.2,0,12 +215,8,3,77.1,0,12 +216,8,4,77.1,0,12 +217,8,5,77,0,12 +218,8,6,77,0,12 +219,8,7,76.9,0,12 +220,8,8,76.8,0,12 +221,8,9,76.8,0,12 +222,8,10,76.7,0,12 +223,8,11,76.6,0,12 +224,8,12,76.5,0,12 +225,8,13,76.5,0,11 +226,8,14,76.4,0,11 +227,8,15,76.3,0,11 +228,8,16,76.2,0,11 +229,8,17,76.1,0,11 +230,8,18,76,0,11 +231,8,19,75.8,0,11 +232,8,20,75.7,0,11 +233,8,21,75.6,0,11 +234,8,22,75.5,0,10 +235,8,23,75.3,0,10 +236,8,24,75.2,0,10 +237,8,25,75,0,10 +238,8,26,74.8,0,10 +239,8,27,74.6,0,10 +240,8,28,74.4,0,9 +241,8,29,74.2,0,9 +242,8,30,74,0,9 +243,8,31,73.8,0,9 +244,9,1,73.6,0,9 +245,9,2,73.3,0,8 +246,9,3,73.1,0,8 +247,9,4,72.8,0,8 +248,9,5,72.5,0,8 +249,9,6,72.3,0,7 +250,9,7,72,0,7 +251,9,8,71.7,0,7 +252,9,9,71.4,0,7 +253,9,10,71,0,6 +254,9,11,70.7,0,6 +255,9,12,70.4,0,6 +256,9,13,70,1,6 +257,9,14,69.7,1,5 +258,9,15,69.3,1,5 +259,9,16,68.9,1,5 +260,9,17,68.6,1,5 +261,9,18,68.2,1,4 +262,9,19,67.8,1,4 +263,9,20,67.4,1,4 +264,9,21,67,1,3 +265,9,22,66.6,2,3 +266,9,23,66.2,2,3 +267,9,24,65.8,2,3 +268,9,25,65.4,2,3 +269,9,26,65,2,2 +270,9,27,64.6,3,2 +271,9,28,64.2,3,2 +272,9,29,63.8,3,2 +273,9,30,63.4,3,2 +274,10,1,63,4,2 +275,10,2,62.6,4,1 +276,10,3,62.2,4,1 +277,10,4,61.8,4,1 +278,10,5,61.4,5,1 +279,10,6,61.1,5,1 +280,10,7,60.7,5,1 +281,10,8,60.3,6,1 +282,10,9,59.9,6,1 +283,10,10,59.6,6,1 +284,10,11,59.2,7,1 +285,10,12,58.9,7,1 +286,10,13,58.5,7,1 +287,10,14,58.2,7,0 +288,10,15,57.8,8,0 +289,10,16,57.5,8,0 +290,10,17,57.1,8,0 +291,10,18,56.8,8,0 +292,10,19,56.5,9,0 +293,10,20,56.2,9,0 +294,10,21,55.9,9,0 +295,10,22,55.6,10,0 +296,10,23,55.2,10,0 +297,10,24,54.9,10,0 +298,10,25,54.6,10,0 +299,10,26,54.3,11,0 +300,10,27,54,11,0 +301,10,28,53.7,11,0 +302,10,29,53.4,12,0 +303,10,30,53.1,12,0 +304,10,31,52.8,12,0 +305,11,1,52.5,13,0 +306,11,2,52.2,13,0 +307,11,3,51.9,13,0 +308,11,4,51.6,13,0 +309,11,5,51.3,14,0 +310,11,6,51,14,0 +311,11,7,50.7,14,0 +312,11,8,50.4,15,0 +313,11,9,50.1,15,0 +314,11,10,49.8,15,0 +315,11,11,49.5,16,0 +316,11,12,49.2,16,0 +317,11,13,48.9,16,0 +318,11,14,48.5,16,0 +319,11,15,48.2,17,0 +320,11,16,47.9,17,0 +321,11,17,47.6,17,0 +322,11,18,47.2,18,0 +323,11,19,46.9,18,0 +324,11,20,46.5,18,0 +325,11,21,46.2,19,0 +326,11,22,45.9,19,0 +327,11,23,45.5,20,0 +328,11,24,45.2,20,0 +329,11,25,44.8,20,0 +330,11,26,44.5,21,0 +331,11,27,44.1,21,0 +332,11,28,43.7,21,0 +333,11,29,43.4,22,0 +334,11,30,43,22,0 +335,12,1,42.7,22,0 +336,12,2,42.3,23,0 +337,12,3,42,23,0 +338,12,4,41.6,23,0 +339,12,5,41.3,24,0 +340,12,6,40.9,24,0 +341,12,7,40.6,24,0 +342,12,8,40.2,25,0 +343,12,9,39.9,25,0 +344,12,10,39.6,25,0 +345,12,11,39.3,26,0 +346,12,12,38.9,26,0 +347,12,13,38.6,26,0 +348,12,14,38.3,27,0 +349,12,15,38,27,0 +350,12,16,37.7,27,0 +351,12,17,37.4,28,0 +352,12,18,37.1,28,0 +353,12,19,36.8,28,0 +354,12,20,36.6,28,0 +355,12,21,36.3,29,0 +356,12,22,36,29,0 +357,12,23,35.8,29,0 +358,12,24,35.6,29,0 +359,12,25,35.3,30,0 +360,12,26,35.1,30,0 +361,12,27,34.9,30,0 +362,12,28,34.7,30,0 +363,12,29,34.5,31,0 +364,12,30,34.3,31,0 +365,12,31,34.1,31,0 diff --git a/bpeng/bill/disaggragate_with_regr_matrix.py b/bpeng/bill/disaggragate_with_regr_matrix.py index fcc53d3..34f7ebe 100644 --- a/bpeng/bill/disaggragate_with_regr_matrix.py +++ b/bpeng/bill/disaggragate_with_regr_matrix.py @@ -57,7 +57,7 @@ def weather_ralated_breakdown(regr_matrix, processed_bill_any): heating_consump = np.array(hddcdd[:, 0]) * heating_coef_ * processed_bill_any['Days In Bill'] cooling_consump = np.array(hddcdd[:, 1]) * cooling_coef_ * processed_bill_any['Days In Bill'] - non_weather_related_consump = dhw_usage * processed_bill_any['Days In Bill'] + non_weather_related_consump = dhw_usage * processed_bill_any['Days In Bill'] disaggragated_bill = processed_bill_any.copy() disaggragated_bill = processed_bill_any[[ diff --git a/bpeng/bill/normalClimate_NYC.csv b/bpeng/bill/normalClimate_NYC.csv new file mode 100644 index 0000000..ea6fe7b --- /dev/null +++ b/bpeng/bill/normalClimate_NYC.csv @@ -0,0 +1,13 @@ +,temperature,days_in_bill,month +0,"[34.0, 33.799999999999997, 33.700000000000003, 33.5, 33.399999999999999, 33.299999999999997, 33.200000000000003, 33.100000000000001, 33.0, 32.899999999999999, 32.899999999999999, 32.799999999999997, 32.799999999999997, 32.700000000000003, 32.700000000000003, 32.700000000000003, 32.700000000000003, 32.700000000000003, 32.700000000000003, 32.700000000000003, 32.700000000000003, 32.799999999999997, 32.799999999999997, 32.899999999999999, 32.899999999999999, 33.0, 33.100000000000001, 33.100000000000001, 33.200000000000003, 33.299999999999997, 33.399999999999999]",31,Jan +1,"[33.5, 33.700000000000003, 33.799999999999997, 33.899999999999999, 34.0, 34.200000000000003, 34.299999999999997, 34.5, 34.600000000000001, 34.799999999999997, 34.899999999999999, 35.100000000000001, 35.299999999999997, 35.399999999999999, 35.600000000000001, 35.799999999999997, 36.0, 36.200000000000003, 36.399999999999999, 36.5, 36.700000000000003, 36.899999999999999, 37.100000000000001, 37.299999999999997, 37.600000000000001, 37.799999999999997, 38.0, 38.200000000000003]",28,Feb +2,"[38.399999999999999, 38.600000000000001, 38.899999999999999, 39.100000000000001, 39.299999999999997, 39.600000000000001, 39.799999999999997, 40.100000000000001, 40.299999999999997, 40.600000000000001, 40.799999999999997, 41.100000000000001, 41.399999999999999, 41.700000000000003, 41.899999999999999, 42.200000000000003, 42.5, 42.799999999999997, 43.100000000000001, 43.399999999999999, 43.700000000000003, 44.0, 44.299999999999997, 44.700000000000003, 45.0, 45.299999999999997, 45.600000000000001, 46.0, 46.299999999999997, 46.700000000000003, 47.0]",31,Mar +3,"[47.399999999999999, 47.700000000000003, 48.100000000000001, 48.399999999999999, 48.799999999999997, 49.200000000000003, 49.5, 49.899999999999999, 50.299999999999997, 50.600000000000001, 51.0, 51.399999999999999, 51.700000000000003, 52.100000000000001, 52.399999999999999, 52.799999999999997, 53.200000000000003, 53.5, 53.899999999999999, 54.200000000000003, 54.5, 54.899999999999999, 55.200000000000003, 55.600000000000001, 55.899999999999999, 56.200000000000003, 56.600000000000001, 56.899999999999999, 57.200000000000003, 57.5]",30,Apr +4,"[57.799999999999997, 58.100000000000001, 58.399999999999999, 58.700000000000003, 59.0, 59.299999999999997, 59.600000000000001, 59.899999999999999, 60.200000000000003, 60.5, 60.799999999999997, 61.100000000000001, 61.399999999999999, 61.700000000000003, 62.0, 62.299999999999997, 62.600000000000001, 62.899999999999999, 63.100000000000001, 63.399999999999999, 63.700000000000003, 64.099999999999994, 64.400000000000006, 64.700000000000003, 65.0, 65.299999999999997, 65.599999999999994, 65.900000000000006, 66.200000000000003, 66.599999999999994, 66.900000000000006]",31,May +5,"[67.200000000000003, 67.599999999999994, 67.900000000000006, 68.200000000000003, 68.5, 68.900000000000006, 69.200000000000003, 69.5, 69.900000000000006, 70.200000000000003, 70.5, 70.900000000000006, 71.200000000000003, 71.5, 71.799999999999997, 72.099999999999994, 72.400000000000006, 72.700000000000003, 73.0, 73.299999999999997, 73.599999999999994, 73.900000000000006, 74.099999999999994, 74.400000000000006, 74.599999999999994, 74.900000000000006, 75.099999999999994, 75.299999999999997, 75.5, 75.700000000000003]",30,Jun +6,"[75.900000000000006, 76.099999999999994, 76.299999999999997, 76.400000000000006, 76.5, 76.700000000000003, 76.799999999999997, 76.900000000000006, 77.0, 77.099999999999994, 77.200000000000003, 77.299999999999997, 77.299999999999997, 77.400000000000006, 77.400000000000006, 77.5, 77.5, 77.5, 77.5, 77.5, 77.5, 77.5, 77.5, 77.5, 77.5, 77.5, 77.400000000000006, 77.400000000000006, 77.400000000000006, 77.299999999999997, 77.299999999999997]",31,Jul +7,"[77.200000000000003, 77.200000000000003, 77.099999999999994, 77.099999999999994, 77.0, 77.0, 76.900000000000006, 76.799999999999997, 76.799999999999997, 76.700000000000003, 76.599999999999994, 76.5, 76.5, 76.400000000000006, 76.299999999999997, 76.200000000000003, 76.099999999999994, 76.0, 75.799999999999997, 75.700000000000003, 75.599999999999994, 75.5, 75.299999999999997, 75.200000000000003, 75.0, 74.799999999999997, 74.599999999999994, 74.400000000000006, 74.200000000000003, 74.0, 73.799999999999997]",31,Aug +8,"[73.599999999999994, 73.299999999999997, 73.099999999999994, 72.799999999999997, 72.5, 72.299999999999997, 72.0, 71.700000000000003, 71.400000000000006, 71.0, 70.700000000000003, 70.400000000000006, 70.0, 69.700000000000003, 69.299999999999997, 68.900000000000006, 68.599999999999994, 68.200000000000003, 67.799999999999997, 67.400000000000006, 67.0, 66.599999999999994, 66.200000000000003, 65.799999999999997, 65.400000000000006, 65.0, 64.599999999999994, 64.200000000000003, 63.799999999999997, 63.399999999999999]",30,Sep +9,"[63.0, 62.600000000000001, 62.200000000000003, 61.799999999999997, 61.399999999999999, 61.100000000000001, 60.700000000000003, 60.299999999999997, 59.899999999999999, 59.600000000000001, 59.200000000000003, 58.899999999999999, 58.5, 58.200000000000003, 57.799999999999997, 57.5, 57.100000000000001, 56.799999999999997, 56.5, 56.200000000000003, 55.899999999999999, 55.600000000000001, 55.200000000000003, 54.899999999999999, 54.600000000000001, 54.299999999999997, 54.0, 53.700000000000003, 53.399999999999999, 53.100000000000001, 52.799999999999997]",31,Oct +10,"[52.5, 52.200000000000003, 51.899999999999999, 51.600000000000001, 51.299999999999997, 51.0, 50.700000000000003, 50.399999999999999, 50.100000000000001, 49.799999999999997, 49.5, 49.200000000000003, 48.899999999999999, 48.5, 48.200000000000003, 47.899999999999999, 47.600000000000001, 47.200000000000003, 46.899999999999999, 46.5, 46.200000000000003, 45.899999999999999, 45.5, 45.200000000000003, 44.799999999999997, 44.5, 44.100000000000001, 43.700000000000003, 43.399999999999999, 43.0]",30,Nov +11,"[42.700000000000003, 42.299999999999997, 42.0, 41.600000000000001, 41.299999999999997, 40.899999999999999, 40.600000000000001, 40.200000000000003, 39.899999999999999, 39.600000000000001, 39.299999999999997, 38.899999999999999, 38.600000000000001, 38.299999999999997, 38.0, 37.700000000000003, 37.399999999999999, 37.100000000000001, 36.799999999999997, 36.600000000000001, 36.299999999999997, 36.0, 35.799999999999997, 35.600000000000001, 35.299999999999997, 35.100000000000001, 34.899999999999999, 34.700000000000003, 34.5, 34.299999999999997, 34.100000000000001]",31,Dec diff --git a/bpeng/bill/test.py b/bpeng/bill/test.py index 07ea11b..c067621 100644 --- a/bpeng/bill/test.py +++ b/bpeng/bill/test.py @@ -9,7 +9,7 @@ from datetime import timedelta from get_billing_weather_data import get_billing_weather_data from bill_analysis import bill_analysis from bill_cleaner import Bill - +from regr import regr_temp_hddcdd # inputs: end_uses = {'DHW': 0.8} data = get_billing_weather_data(243106, 2) @@ -28,37 +28,47 @@ print('r_squared:', r_squared) print('regr_results:', ba.regr_results) -## test for benchmarking and reporting purpose: -### benchmarking here means --> how much energy this site will consumpe if all the weather data is given. - - -# import numpy as np -# import pandas as pd -# monthly_HDD = [1008, 861, 713, 392, 136, 16, 1, 1, 40, 249, 524, 836] -# monthly_CDD = [0, 0, 1, 6, 54, 209, 377, 336, 141, 17, 1, 0] -# days_each_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] -# month = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] -# nbf = pd.DataFrame(columns=['monthly_hdd', 'monthly_cdd', 'days_in_bill', 'month']) - -# nbf['month'] = month -# nbf['days_in_bill'] = days_each_month -# nbf['monthly_hdd'] = monthly_HDD -# nbf['monthly_cdd'] = monthly_CDD +import pandas as pd +import numpy as np +import os -# nbf['daily_hdd'] = nbf['monthly_hdd']/nbf['days_in_bill'] -# nbf['daily_cdd'] = nbf['monthly_cdd']/nbf['days_in_bill'] +origin_path = os.path.abspath(os.path.join(os.path.dirname("__file__"))) +data_path = os.path.join(origin_path, 'bpeng/bill/') +df = pd.read_csv(data_path + '1981_2010_NYC_NormalsTemperature.csv', error_bad_lines=False) +df['temperature'] = list(np.float(temperature) for temperature in list(df['temperature'])) -# print(nbf) +nbf = pd.DataFrame(columns=['temperature', 'days_in_bill', 'month']) +days_each_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] +month = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] +nbf['month'] = month +nbf['days_in_bill'] = days_each_month +temp = [] +month_keys = list(df.groupby(df['month']).groups.keys()) +for month in month_keys: + temp.append(list(df[df['month'] == month]['temperature'])) +nbf['temperature'] = temp +print('type:', nbf['temperature'][0][0]) +regr_matrix = ba.regr_results +heating_set_point = regr_matrix['heating_set_point'] +cooling_set_point = regr_matrix['cooling_set_point'] +weather_related_usage = regr_matrix['weather_related_usage'] +regression_method = regr_matrix['regression_method'] +cooling_coef_ = regr_matrix['cooling_coef_'] +heating_coef_ = regr_matrix['heating_coef_'] +regr_output = regr_matrix['regr_output'] +intercept_ = regr_matrix['intercept_'] +normalized_hddcdd = regr_temp_hddcdd(heating_set_point, cooling_set_point, nbf) +print(normalized_hddcdd) @@ -66,7 +76,20 @@ print('regr_results:', ba.regr_results) +# inner = os.path.dirname("__file__") +# print('inner of the path:', inner) +# joined_inner = os.path.join(os.path.dirname("__file__")) +# print('joined_inner:', joined_inner) +# origin_path = os.path.abspath(os.path.join(os.path.dirname("__file__"))) +# print('origin_path', origin_path) +# data_path = os.path.join(origin_path, 'bpeng/bill/') +# print('data path:', data_path) +# print(df.head()) +# dirpath = os.getcwd() +# print("current directory is : " + dirpath) +# foldername = os.path.basename(dirpath) +# print("Directory name is : " + foldername) @@ -79,18 +102,8 @@ print('regr_results:', ba.regr_results) -# regr_model = regression_1(72, 300, processed_bill) -# score = regr_model.score -# print('r-squared:', score) -#score = regr_model.score(regression_temp, consumption) -# regr_model, score, regression_temp, bill -# regression_model = test_results[0][0] -# print('regr model:', regression_model) -# X = np.array([20,21,23,27]) -# prediction = regression_model.predict(X) -# print(prediction) -- GitLab From 71f963f5f4396cb0cb991dcc3b2b676b0edb0682 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 26 Jun 2019 18:18:32 -0400 Subject: [PATCH 61/97] formatting --- bpeng/bill/disaggragate_with_regr_matrix.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bpeng/bill/disaggragate_with_regr_matrix.py b/bpeng/bill/disaggragate_with_regr_matrix.py index 34f7ebe..387c435 100644 --- a/bpeng/bill/disaggragate_with_regr_matrix.py +++ b/bpeng/bill/disaggragate_with_regr_matrix.py @@ -6,6 +6,7 @@ The outputs of this module is the disaggragated results based on the input infor Author: Doris Han ''' + import numpy as np import pandas as pd from regr import (regr_temp_hddcdd) -- GitLab From b678d8f08f81d7a157e67fe5ed6637b1f18852a2 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 26 Jun 2019 18:18:48 -0400 Subject: [PATCH 62/97] finish the benchmarking code --- bpeng/bill/benchmarking.py | 122 +++++++++++++++++++++++++++++++++++-- 1 file changed, 117 insertions(+), 5 deletions(-) diff --git a/bpeng/bill/benchmarking.py b/bpeng/bill/benchmarking.py index bcf4fc5..1587024 100644 --- a/bpeng/bill/benchmarking.py +++ b/bpeng/bill/benchmarking.py @@ -1,16 +1,128 @@ """ -This module takes regr matric and stardized HDD/CDD to calculate the normalized usage for a specific building for benchmarking purpose. +This module calcuate energy usage of the building using its """ import numpy as np import pandas as pd +import os +from disaggragate_with_regr_matrix import weather_ralated_breakdown class benchmarking(): - def __init__(self, regr_matric): + def __init__(self, regr_matric, utility_type): self.regr_matric = regr_matric - self.monthly_HDD = [1008, 861, 713, 392, 136, 16, 1, 1, 40, 249, 524, 836] - self.monthly_CDD = [0, 0, 1, 6, 54, 209, 377, 336, 141, 17, 1, 0] - self.days_each_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + self.utility_type = utility_type + self.normalized_monthly_bill = None + + def assumble_normal_bills(self): + ''' + This function is to construct a bill with climate normal weather data for each month, and use the calculated regression metric to calculate weather normalized usage for this specific building. + ''' + origin_path = os.path.abspath(os.path.join(os.path.dirname("__file__"))) + data_path = os.path.join(origin_path, 'bpeng/bill/') + df = pd.read_csv(data_path + '1981_2010_NYC_NormalsTemperature.csv', error_bad_lines=False) + df['temperature'] = list(np.float(temperature) for temperature in list(df['temperature'])) + + nbf = pd.DataFrame(columns=['temperature', 'Days In Bill', 'Bill From Date']) + days_each_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + month = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + + nbf['Bill From Date'] = month + nbf['Days In Bill'] = days_each_month + + temp = [] + month_keys = list(df.groupby(df['month']).groups.keys()) + for month in month_keys: + temp.append(list(df[df['month'] == month]['temperature'])) + + nbf['temperature'] = temp + nbf['Bill To Date'] = nbf['Bill From Date'] + + # pre-set the unit price to 0, and add the actual blended rate later + nbf['Unit Price'] = pd.Series((0 for x in range(len(nbf.index)))) + self.normalized_monthly_bill = weather_ralated_breakdown(self.regr_matric, nbf) + self.normalized_monthly_bill['Month'] = self.normalized_monthly_bill['Bill From Date'] + self.normalized_monthly_bill = self.normalized_monthly_bill.drop(['Bill From Date', 'Bill To Date'], axis=1) + + def energyAndEmmision(self, energy_usage): + ''' + this function is to: + 1. convert different kind of energy to mmBtu + 2. calculate source energy usage based on utility type and using EPA conversion ratio + 3. calcuate corresponding CO2 emission metric tons + + Args: + energy_usage: (numpy.float) + + returns: + benchmark_indicators: (dictionary) + ''' + + if self.utility_type == 1: + utility_name = 'Electricity' + utility_unit = 'kWh' + converstion_ratio = 0.003412 #energy conversion ratio to MMbtu + source_site_ratio = 2.8 + co2_emission_factor = 0 + + if self.utility_type == 2: + utility_name = 'Natural Gas' + utility_unit = 'Therms' + converstion_ratio = 0.1 # 0.1 mmBtu/therm + source_site_ratio = 1.05 + co2_emission_factor = 53.06 * 0.001 # metric tons co2 generated per mmbtu gas + + if self.utility_type == 3: + utility_name = 'Oil' + utility_unit = 'Gallon' + # converstion factor for 'heating oil' + converstion_ratio = 0.139 + source_site_ratio = 1.01 + co2_emission_factor = 73.96 * 0.001 + + if self.utility_type == 4: + utility_name = 'Water' + utility_unit = 'CCF' + + converstion_ratio = 0 + source_site_ratio = 0 + co2_emission_factor = 0 + + energy_usage_mmbtu = energy_usage * converstion_ratio + site_co2_tonnes = energy_usage_mmbtu + source_energy_usage = energy_usage_mmbtu * source_site_ratio + source_co2_tonnes = source_energy_usage * co2_emission_factor + + benchmark_indicators = { + 'energy_usage_mmbtu': energy_usage_mmbtu, + 'site_co2_tonnes': site_co2_tonnes, + 'source_energy_usage': source_energy_usage, + 'source_co2_tonnes': source_co2_tonnes + + } + + return benchmark_indicators + + def normalized_monthly_bill_wIndicators(self): + site_energy = [] + source_energy = [] + site_co2 = [] + source_co2 = [] + + for monthly_usage in list(self.normalized_monthly_bill['Calculated Total Usage']): + indicators = self.energyAndEmmision(monthly_usage) + site_energy.append(indicators['energy_usage_mmbtu']) + source_energy.append(indicators['source_energy_usage']) + site_co2.append(indicators['site_co2_tonnes']) + source_co2.append(indicators['source_co2_tonnes']) + + normalized_monthly_billwIndicators = self.normalized_monthly_bill.copy() + normalized_monthly_billwIndicators['Usage mmBTU'] = site_energy + normalized_monthly_billwIndicators['Site CO2 Tonnes'] = site_co2 + normalized_monthly_billwIndicators['Source Energy Usage mmBTU'] = source_energy + normalized_monthly_billwIndicators['Source CO2 Tonnes'] = source_co2 + + return normalized_monthly_billwIndicators + -- GitLab From a2328ccae9a89c56cfd5b0a1e7c0ee6d4572299f Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 26 Jun 2019 18:19:06 -0400 Subject: [PATCH 63/97] corresponding test notebook --- bpeng/bill/test.py | 54 ++++++++-------------------------------------- 1 file changed, 9 insertions(+), 45 deletions(-) diff --git a/bpeng/bill/test.py b/bpeng/bill/test.py index c067621..3ae481b 100644 --- a/bpeng/bill/test.py +++ b/bpeng/bill/test.py @@ -10,6 +10,9 @@ from get_billing_weather_data import get_billing_weather_data from bill_analysis import bill_analysis from bill_cleaner import Bill from regr import regr_temp_hddcdd +from disaggragate_with_regr_matrix import weather_ralated_breakdown +from benchmarking import benchmarking + # inputs: end_uses = {'DHW': 0.8} data = get_billing_weather_data(243106, 2) @@ -18,59 +21,20 @@ raw_bill = data.bill raw_weather_data_daily = data.weather weather_related_usage_init = 'Unknown' +print(raw_bill.columns) + ba = bill_analysis(raw_bill, raw_weather_data_daily, end_uses) ba.main() most_recent_year_bill = ba.annual_normalized_monthly_bill r_squared = ba.regr_results['regr_output'][1] - - print('r_squared:', r_squared) -print('regr_results:', ba.regr_results) - - - -import pandas as pd -import numpy as np -import os - -origin_path = os.path.abspath(os.path.join(os.path.dirname("__file__"))) -data_path = os.path.join(origin_path, 'bpeng/bill/') -df = pd.read_csv(data_path + '1981_2010_NYC_NormalsTemperature.csv', error_bad_lines=False) -df['temperature'] = list(np.float(temperature) for temperature in list(df['temperature'])) - - - -nbf = pd.DataFrame(columns=['temperature', 'days_in_bill', 'month']) -days_each_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] -month = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - -nbf['month'] = month -nbf['days_in_bill'] = days_each_month -temp = [] -month_keys = list(df.groupby(df['month']).groups.keys()) - -for month in month_keys: - temp.append(list(df[df['month'] == month]['temperature'])) -nbf['temperature'] = temp - -print('type:', nbf['temperature'][0][0]) regr_matrix = ba.regr_results +temp = benchmarking(regr_matrix, 2) +temp.assumble_normal_bills() +weather_normalized_bill = temp.normalized_monthly_bill -heating_set_point = regr_matrix['heating_set_point'] -cooling_set_point = regr_matrix['cooling_set_point'] -weather_related_usage = regr_matrix['weather_related_usage'] -regression_method = regr_matrix['regression_method'] -cooling_coef_ = regr_matrix['cooling_coef_'] -heating_coef_ = regr_matrix['heating_coef_'] -regr_output = regr_matrix['regr_output'] -intercept_ = regr_matrix['intercept_'] - - -normalized_hddcdd = regr_temp_hddcdd(heating_set_point, cooling_set_point, nbf) -print(normalized_hddcdd) - - +print(weather_normalized_bill) -- GitLab From 323afd5bc0b697fca1a5772f7bc1af69546df708 Mon Sep 17 00:00:00 2001 From: Doris H Date: Mon, 8 Jul 2019 16:48:22 -0400 Subject: [PATCH 64/97] add descriptions + formatting --- bpeng/bill/benchmarking.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/bpeng/bill/benchmarking.py b/bpeng/bill/benchmarking.py index 1587024..b8378c6 100644 --- a/bpeng/bill/benchmarking.py +++ b/bpeng/bill/benchmarking.py @@ -15,6 +15,7 @@ class benchmarking(): self.regr_matric = regr_matric self.utility_type = utility_type self.normalized_monthly_bill = None + self.normalized_monthly_bill_wIndicators = None def assumble_normal_bills(self): ''' @@ -49,7 +50,7 @@ class benchmarking(): def energyAndEmmision(self, energy_usage): ''' this function is to: - 1. convert different kind of energy to mmBtu + 1. convert different energy units to mmBtu 2. calculate source energy usage based on utility type and using EPA conversion ratio 3. calcuate corresponding CO2 emission metric tons @@ -63,7 +64,7 @@ class benchmarking(): if self.utility_type == 1: utility_name = 'Electricity' utility_unit = 'kWh' - converstion_ratio = 0.003412 #energy conversion ratio to MMbtu + converstion_ratio = 0.003412 # energy conversion ratio to MMbtu source_site_ratio = 2.8 co2_emission_factor = 0 @@ -123,6 +124,4 @@ class benchmarking(): normalized_monthly_billwIndicators['Site CO2 Tonnes'] = site_co2 normalized_monthly_billwIndicators['Source Energy Usage mmBTU'] = source_energy normalized_monthly_billwIndicators['Source CO2 Tonnes'] = source_co2 - - return normalized_monthly_billwIndicators - + self.normalized_monthly_bill_wIndicators = normalized_monthly_billwIndicators -- GitLab From 373937effc5ded3c36c2cc4b692da39a9b57648a Mon Sep 17 00:00:00 2001 From: Doris H Date: Mon, 8 Jul 2019 17:44:48 -0400 Subject: [PATCH 65/97] solve problems from vs codes -formating --- bpeng/bill/normalized_monthly_bill.py | 42 +++++++++------------------ 1 file changed, 13 insertions(+), 29 deletions(-) diff --git a/bpeng/bill/normalized_monthly_bill.py b/bpeng/bill/normalized_monthly_bill.py index 8bc1989..22eb301 100644 --- a/bpeng/bill/normalized_monthly_bill.py +++ b/bpeng/bill/normalized_monthly_bill.py @@ -5,18 +5,16 @@ should be refactor to a class import warnings from datetime import timedelta -import numpy as np import pandas as pd from dateutil import relativedelta warnings.simplefilter('ignore') -class normalized_billing_period(): +class NormalizedBillingPeriod(): def __init__(self, formatted_bill): self.formatted_bill = formatted_bill - def find_index_in_first_raw_biil(self, norm_bill_date): """ Return the index of the row of raw bill contains the bill date from a normalized bill @@ -58,38 +56,27 @@ class normalized_billing_period(): results.append({'index': index_end, 'num_days': days_in_end_period}) if index_end - index_start >= 2: - for p in range(index_end - index_start - 1): - days_in_period = self.formatted_bill['Days In Bill'][index_start+p+1] - index_of_this_period = index_start+p+1 + for period in range(index_end - index_start - 1): + days_in_period = self.formatted_bill['Days In Bill'][index_start + period + 1] + index_of_this_period = index_start + period + 1 results.append({'index': index_of_this_period, 'num_days': days_in_period}) return results - @staticmethod def num_month_dates(last_date_bill, first_date_bill): """Return number of month in between two date """ lastdate = last_date_bill - timedelta(last_date_bill.day) firstdate = first_date_bill + timedelta(days=32) firstdate = firstdate.replace(day=1) - r = relativedelta.relativedelta(lastdate, firstdate) - num_month = r.years * 12 + r.months + 1 - return (num_month) - + relative_date_range = relativedelta.relativedelta(lastdate, firstdate) + num_month = relative_date_range.years * 12 + relative_date_range.months + 1 + return num_month def normailized_monthly_bill(self): """ - Args: - - last_day_of_bill(datetime): last day of bill - hp(float): heating season indoor set point - cp(float): cooling season indoor set point - number_of_month(int): number of month that need to be re-format - Returns: - pd.DataFrame: result with monthly consumptions - """ last_date_of_bill = self.formatted_bill['Bill To Date'].iloc[-1] first_bill_date = self.formatted_bill['Bill From Date'].iloc[0] @@ -100,7 +87,7 @@ class normalized_billing_period(): lastdate = last_date_of_bill - timedelta(last_date_of_bill.day) # cosntruct a new dataframe with bills from the first to last day for each month - number_of_month = normalized_billing_period.num_month_dates(last_date_of_bill,first_bill_date ) + number_of_month = NormalizedBillingPeriod.num_month_dates(last_date_of_bill, first_bill_date) for i in range(0, number_of_month): last_dates.append(lastdate) first_dates.append(lastdate.replace(day=1)) @@ -119,19 +106,16 @@ class normalized_billing_period(): return normalized_monthly_bill - - - def normalized_unit_price(self, rawbill, mbill): + def normalized_unit_price(self, mbill): """ calculate the unit price for each nomralized billing period """ normalized_unit_price = [] - for m in range(len(mbill)): - from_date = mbill['Bill From Date'].iloc[m] - to_date = mbill['Bill To Date'].iloc[m] + for month in range(len(mbill)): + from_date = mbill['Bill From Date'].iloc[month] + to_date = mbill['Bill To Date'].iloc[month] index_numdays = self.find_bills_in_raw(from_date, to_date) - weighted_unit_price_for_this_month = self.weighted_unit_price(index_numdays) - normalized_unit_price.append(weighted_unit_price_for_this_month) + normalized_unit_price.append(self.weighted_unit_price(index_numdays)) mbill['Unit Price'] = normalized_unit_price return mbill -- GitLab From 7179b3d4a8cd04b64dabfd3197c7778020c99e3e Mon Sep 17 00:00:00 2001 From: Doris H Date: Mon, 8 Jul 2019 17:45:07 -0400 Subject: [PATCH 66/97] formatting the vs code problems --- bpeng/bill/benchmarking.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/bpeng/bill/benchmarking.py b/bpeng/bill/benchmarking.py index b8378c6..45fe491 100644 --- a/bpeng/bill/benchmarking.py +++ b/bpeng/bill/benchmarking.py @@ -3,13 +3,13 @@ This module calcuate energy usage of the building using its """ +import os import numpy as np import pandas as pd -import os from disaggragate_with_regr_matrix import weather_ralated_breakdown -class benchmarking(): +class Benchmarking(): def __init__(self, regr_matric, utility_type): self.regr_matric = regr_matric @@ -19,12 +19,14 @@ class benchmarking(): def assumble_normal_bills(self): ''' - This function is to construct a bill with climate normal weather data for each month, and use the calculated regression metric to calculate weather normalized usage for this specific building. + This function is to construct a bill with climate normal weather data for each month, and use the calculated + regression metric to calculate weather normalized usage for this specific building. ''' origin_path = os.path.abspath(os.path.join(os.path.dirname("__file__"))) data_path = os.path.join(origin_path, 'bpeng/bill/') - df = pd.read_csv(data_path + '1981_2010_NYC_NormalsTemperature.csv', error_bad_lines=False) - df['temperature'] = list(np.float(temperature) for temperature in list(df['temperature'])) + climate_normal_weather = pd.read_csv(data_path + '1981_2010_NYC_NormalsTemperature.csv', error_bad_lines=False) + climate_normal_weather['temperature'] = list(np.float(temperature) for temperature in + list(climate_normal_weather['temperature'])) nbf = pd.DataFrame(columns=['temperature', 'Days In Bill', 'Bill From Date']) days_each_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] @@ -34,9 +36,9 @@ class benchmarking(): nbf['Days In Bill'] = days_each_month temp = [] - month_keys = list(df.groupby(df['month']).groups.keys()) + month_keys = list(climate_normal_weather.groupby(climate_normal_weather['month']).groups.keys()) for month in month_keys: - temp.append(list(df[df['month'] == month]['temperature'])) + temp.append(list(climate_normal_weather[climate_normal_weather['month'] == month]['temperature'])) nbf['temperature'] = temp nbf['Bill To Date'] = nbf['Bill From Date'] @@ -55,10 +57,11 @@ class benchmarking(): 3. calcuate corresponding CO2 emission metric tons Args: - energy_usage: (numpy.float) + energy_usage (numpy.float): energy usage per period, a number can be daily, monthly, yearly usage - returns: - benchmark_indicators: (dictionary) + Returns: + benchmark_indicators (dictionary): A disctionary contains co2 emission, + site and source energy usage based on utility type and energy uscage ''' if self.utility_type == 1: @@ -96,17 +99,16 @@ class benchmarking(): source_energy_usage = energy_usage_mmbtu * source_site_ratio source_co2_tonnes = source_energy_usage * co2_emission_factor - benchmark_indicators = { - 'energy_usage_mmbtu': energy_usage_mmbtu, - 'site_co2_tonnes': site_co2_tonnes, - 'source_energy_usage': source_energy_usage, - 'source_co2_tonnes': source_co2_tonnes - + benchmark_indicators ={ + 'energy_usage_mmbtu': energy_usage_mmbtu, + 'site_co2_tonnes': site_co2_tonnes, + 'source_energy_usage': source_energy_usage, + 'source_co2_tonnes': source_co2_tonnes } return benchmark_indicators - def normalized_monthly_bill_wIndicators(self): + def normalize_monthly_bill_wIndicators(self): site_energy = [] source_energy = [] site_co2 = [] -- GitLab From 28e31abe93538012310b55ba3d613d9d6ed968a0 Mon Sep 17 00:00:00 2001 From: Doris H Date: Mon, 8 Jul 2019 17:45:38 -0400 Subject: [PATCH 67/97] formatting according vs instructions --- bpeng/bill/regr.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/bpeng/bill/regr.py b/bpeng/bill/regr.py index 2063f8a..40a078d 100644 --- a/bpeng/bill/regr.py +++ b/bpeng/bill/regr.py @@ -6,15 +6,15 @@ from calculater import (cdd, hdd, threshold, outliers_iqr) warnings.simplefilter('ignore') -def regression_1(hp, cp, bill): +def regression_1(HeatingSetpoint, CoolingSetpoint, bill): """ A linear regression model with heating and cooling set fixed Args: - hp(float): heating season indoor set point - cp(float): cooling season indoor set point - processed_bill(pd.DataFrame): cleaned bill with daily temperature + HeatingSetpoint(float): heating season indoor set point + CoolingSetpoint(float): cooling season indoor set point + bill(pd.DataFrame): cleaned bill with daily temperature Returns: @@ -24,7 +24,7 @@ def regression_1(hp, cp, bill): """ consumption = np.array(bill['Usage'] / bill['Days In Bill']) - regression_temp = regr_temp_hddcdd(hp, cp, bill) + regression_temp = regr_temp_hddcdd(HeatingSetpoint, CoolingSetpoint, bill) regr_model = linear_model.LinearRegression() regr_model.fit(regression_temp, consumption) score = regr_model.score(regression_temp, consumption) @@ -38,7 +38,6 @@ def regression_2(hp, bill): Args: hp(float): heating season indoor set point - cp(float): cooling season indoor set point bill(pd.DataFrame): cleaned bill with daily temperature Returns: @@ -50,7 +49,7 @@ def regression_2(hp, bill): """ impossible_cooling_temp = 300 regression_temp = regr_temp_hddcdd(hp, impossible_cooling_temp, bill) - daily_hdd = regression_temp[:,0] + daily_hdd = regression_temp[:, 0] bill['dhw'] = bill['Usage'] / bill['Days In Bill'] * (daily_hdd <= 0.1) if len([*filter(lambda x: x >= 18, list(bill['Days In Bill']))]) > 0: @@ -75,7 +74,7 @@ def regression_2(hp, bill): score = regr_model.score(regression_temp, consumption) return regr_model, score, daily_dhw -def regr_temp_hddcdd(hp, cp, bill): +def regr_temp_hddcdd(heating_Setpoint, cooling_Setpoint, bill): ''' Cal for avg hdd/cdd for a bills with any billing period ''' @@ -83,13 +82,13 @@ def regr_temp_hddcdd(hp, cp, bill): impossible_heating_temp = 0 impossible_cooling_temp = 300 - if hp != np.NaN: - ahdd = [list(hdd(hp, xx) for xx in x) for x in bill['temperature']] + if heating_Setpoint != np.NaN: + ahdd = [list(hdd(heating_Setpoint, xx) for xx in x) for x in bill['temperature']] else: ahdd = [list(hdd(impossible_heating_temp, xx) for xx in x) for x in bill['temperature']] - if cp != np.NaN: - acdd = [list(cdd(cp, xx) for xx in x) for x in bill['temperature']] + if cooling_Setpoint != np.NaN: + acdd = [list(cdd(cooling_Setpoint, xx) for xx in x) for x in bill['temperature']] else: acdd = [list(cdd(impossible_cooling_temp, xx) for xx in x) for x in bill['temperature']] -- GitLab From e70f3c1da17a11dae4e41748afd22c0972de510c Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 10 Jul 2019 11:57:16 -0400 Subject: [PATCH 68/97] add run all --- bpeng/bill/benchmarking.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/bpeng/bill/benchmarking.py b/bpeng/bill/benchmarking.py index 45fe491..9f7bf64 100644 --- a/bpeng/bill/benchmarking.py +++ b/bpeng/bill/benchmarking.py @@ -1,5 +1,6 @@ """ -This module calcuate energy usage of the building using its +This module calcuate energy usage of the building under climate normal weather data +which returns a monthly """ @@ -9,13 +10,14 @@ import pandas as pd from disaggragate_with_regr_matrix import weather_ralated_breakdown -class Benchmarking(): +class ClimateNormal_Benchmarking(): def __init__(self, regr_matric, utility_type): self.regr_matric = regr_matric self.utility_type = utility_type self.normalized_monthly_bill = None self.normalized_monthly_bill_wIndicators = None + self.normalized_billTotal = None def assumble_normal_bills(self): ''' @@ -99,7 +101,7 @@ class Benchmarking(): source_energy_usage = energy_usage_mmbtu * source_site_ratio source_co2_tonnes = source_energy_usage * co2_emission_factor - benchmark_indicators ={ + benchmark_indicators = { 'energy_usage_mmbtu': energy_usage_mmbtu, 'site_co2_tonnes': site_co2_tonnes, 'source_energy_usage': source_energy_usage, @@ -127,3 +129,18 @@ class Benchmarking(): normalized_monthly_billwIndicators['Source Energy Usage mmBTU'] = source_energy normalized_monthly_billwIndicators['Source CO2 Tonnes'] = source_co2 self.normalized_monthly_bill_wIndicators = normalized_monthly_billwIndicators + + self.normalized_billTotal = { + 'total_usage': sum(self.normalized_monthly_billwIndicators['Calculated Total Usage']), + 'total_usage_mmbtu': sum(self.normalized_monthly_billwIndicators['energy_usage_mmbtu']), + 'source_energy_usage': sum(self.normalized_monthly_billwIndicators['Source Energy Usage mmBTU']), + 'site_co2_tonnes': sum(self.normalized_monthly_billwIndicators['Site CO2 Tonnes']), + 'source_co2_tonnes': sum(self.normalized_monthly_billwIndicators['Source CO2 Tonnes']), + # 'utiltiy_name': utility_name, + # 'utility_unit': utility_unit + } + + def RunAllFunctions(self): + self.assumble_normal_bills() + self.energyAndEmmision() + self.normalize_monthly_bill_wIndicators() -- GitLab From cf42165b42d3e13131b745b16f9f52168f95c30d Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 10 Jul 2019 11:57:27 -0400 Subject: [PATCH 69/97] formatting --- bpeng/bill/bill_analysis.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bpeng/bill/bill_analysis.py b/bpeng/bill/bill_analysis.py index bbc16d8..03460ce 100644 --- a/bpeng/bill/bill_analysis.py +++ b/bpeng/bill/bill_analysis.py @@ -13,7 +13,7 @@ from regr import (regression_1, regression_2, regr_temp_hddcdd) from calculater import (hdd, threshold) from setpoints_optimization import (optimize_setpoints) from disaggragate_with_regr_matrix import (weather_ralated_breakdown, non_weahter_related_breakdown) -from normalized_monthly_bill import normalized_billing_period +from normalized_monthly_bill import NormalizedBillingPeriod class bill_analysis(): @@ -38,10 +38,10 @@ class bill_analysis(): formatted_bill = self.bill.formatted_bill.sort_values('Bill From Date') formatted_bill['Unit Price'] = formatted_bill['Total Charge']/formatted_bill['Usage'] - nb = normalized_billing_period(formatted_bill) + nb = NormalizedBillingPeriod(formatted_bill) monthly_bill = nb.normailized_monthly_bill() monthly_bill_temp = bill_with_daily_temp(monthly_bill, weather_data_daily) - monthly_bill_with_price = nb.normalized_unit_price(formatted_bill, monthly_bill_temp) + monthly_bill_with_price = nb.normalized_unit_price(monthly_bill_temp) self.regr_results = optimize_setpoints(processed_bill) monthly_breakdown = weather_ralated_breakdown(self.regr_results, monthly_bill_with_price) -- GitLab From 41839ec3686cdeaa505267c11a4e530b4cfb89cb Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 10 Jul 2019 15:49:35 -0400 Subject: [PATCH 70/97] add . before each module --- bpeng/bill/bill_analysis.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/bpeng/bill/bill_analysis.py b/bpeng/bill/bill_analysis.py index 03460ce..3a7796b 100644 --- a/bpeng/bill/bill_analysis.py +++ b/bpeng/bill/bill_analysis.py @@ -5,15 +5,14 @@ import numpy as np import datetime as datetime from scipy.optimize import minimize from datetime import timedelta -from get_billing_weather_data import get_billing_weather_data as gd - -from bill_cleaner import Bill -from weather_data_cal import (weather_cleaning, bill_period_weather, bill_with_daily_temp) -from regr import (regression_1, regression_2, regr_temp_hddcdd) -from calculater import (hdd, threshold) -from setpoints_optimization import (optimize_setpoints) -from disaggragate_with_regr_matrix import (weather_ralated_breakdown, non_weahter_related_breakdown) -from normalized_monthly_bill import NormalizedBillingPeriod +from .bill_cleaner import Bill +from .weather_data_cal import (weather_cleaning, bill_period_weather, bill_with_daily_temp) +from .regr import (regression_1, regression_2, regr_temp_hddcdd) +from .calculater import (hdd, threshold) +from .setpoints_optimization import (optimize_setpoints) +from .disaggragate_with_regr_matrix import (weather_ralated_breakdown, non_weahter_related_breakdown) +from .normalized_monthly_bill import NormalizedBillingPeriod +from .get_billing_weather_data import get_billing_weather_data as gd class bill_analysis(): -- GitLab From a16cfb8ebb38194fd5ca892b45fe8d878e802f8f Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 10 Jul 2019 15:49:49 -0400 Subject: [PATCH 71/97] add '' . '' --- bpeng/bill/benchmarking.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/bpeng/bill/benchmarking.py b/bpeng/bill/benchmarking.py index 9f7bf64..11cc7e1 100644 --- a/bpeng/bill/benchmarking.py +++ b/bpeng/bill/benchmarking.py @@ -7,7 +7,7 @@ which returns a monthly import os import numpy as np import pandas as pd -from disaggragate_with_regr_matrix import weather_ralated_breakdown +from .disaggragate_with_regr_matrix import weather_ralated_breakdown class ClimateNormal_Benchmarking(): @@ -97,7 +97,7 @@ class ClimateNormal_Benchmarking(): co2_emission_factor = 0 energy_usage_mmbtu = energy_usage * converstion_ratio - site_co2_tonnes = energy_usage_mmbtu + site_co2_tonnes = energy_usage_mmbtu * co2_emission_factor source_energy_usage = energy_usage_mmbtu * source_site_ratio source_co2_tonnes = source_energy_usage * co2_emission_factor @@ -128,19 +128,19 @@ class ClimateNormal_Benchmarking(): normalized_monthly_billwIndicators['Site CO2 Tonnes'] = site_co2 normalized_monthly_billwIndicators['Source Energy Usage mmBTU'] = source_energy normalized_monthly_billwIndicators['Source CO2 Tonnes'] = source_co2 + self.normalized_monthly_bill_wIndicators = normalized_monthly_billwIndicators self.normalized_billTotal = { - 'total_usage': sum(self.normalized_monthly_billwIndicators['Calculated Total Usage']), - 'total_usage_mmbtu': sum(self.normalized_monthly_billwIndicators['energy_usage_mmbtu']), - 'source_energy_usage': sum(self.normalized_monthly_billwIndicators['Source Energy Usage mmBTU']), - 'site_co2_tonnes': sum(self.normalized_monthly_billwIndicators['Site CO2 Tonnes']), - 'source_co2_tonnes': sum(self.normalized_monthly_billwIndicators['Source CO2 Tonnes']), + 'total_usage': sum(self.normalized_monthly_bill_wIndicators['Calculated Total Usage']), + 'total_usage_mmbtu': sum(self.normalized_monthly_bill_wIndicators['Usage mmBTU']), + 'source_energy_usage': sum(self.normalized_monthly_bill_wIndicators['Source Energy Usage mmBTU']), + 'site_co2_tonnes': sum(self.normalized_monthly_bill_wIndicators['Site CO2 Tonnes']), + 'source_co2_tonnes': sum(self.normalized_monthly_bill_wIndicators['Source CO2 Tonnes']), # 'utiltiy_name': utility_name, # 'utility_unit': utility_unit } - def RunAllFunctions(self): - self.assumble_normal_bills() - self.energyAndEmmision() - self.normalize_monthly_bill_wIndicators() + def RunAllFunctions(self): + self.assumble_normal_bills() + self.normalize_monthly_bill_wIndicators() -- GitLab From bc7b6efb651fe72895df3fa4b1d8b401a8913a0e Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 10 Jul 2019 15:50:03 -0400 Subject: [PATCH 72/97] update correspondingly --- bpeng/bill/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bpeng/bill/__init__.py b/bpeng/bill/__init__.py index fca9e1b..890b382 100644 --- a/bpeng/bill/__init__.py +++ b/bpeng/bill/__init__.py @@ -1 +1 @@ -from .disaggregate import BillDisaggregation +from .test import BillDisaggregationDriver -- GitLab From 9bf93333833ba451c7c03cdef43642ade3dfcdae Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 10 Jul 2019 16:03:47 -0400 Subject: [PATCH 73/97] update --- bpeng/bill/setpoints_optimization.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bpeng/bill/setpoints_optimization.py b/bpeng/bill/setpoints_optimization.py index cf739f5..195d531 100644 --- a/bpeng/bill/setpoints_optimization.py +++ b/bpeng/bill/setpoints_optimization.py @@ -3,17 +3,17 @@ This module is to calculate the most fitted regression model and return the regr and the system heating and cooling set points. ''' -import warnings + import numpy as np from scipy.optimize import minimize -from sklearn import linear_model -from regr import (regression_1, regression_2) +from .regr import (regression_1, regression_2) def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): """ Main function for the optimization and disaggregation Args: + processed_bill(pd.DataFrame): utility bills has been cleaned usage (str): Specify if the weather - related consumption is for heating or cooling -- GitLab From eb024942b5d6f80a6d6fee67b20c6207aa1ae32f Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 10 Jul 2019 16:03:58 -0400 Subject: [PATCH 74/97] update --- bpeng/bill/test.py | 81 ++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 50 deletions(-) diff --git a/bpeng/bill/test.py b/bpeng/bill/test.py index 3ae481b..7743c51 100644 --- a/bpeng/bill/test.py +++ b/bpeng/bill/test.py @@ -1,60 +1,41 @@ +''' +This file is to disaggregate and normalize the utility bills based on weather data. +Author: Doris Han +''' import pandas as pd import numpy as np import datetime as datetime from scipy.optimize import minimize from datetime import timedelta - -from get_billing_weather_data import get_billing_weather_data -from bill_analysis import bill_analysis -from bill_cleaner import Bill -from regr import regr_temp_hddcdd -from disaggragate_with_regr_matrix import weather_ralated_breakdown -from benchmarking import benchmarking - -# inputs: -end_uses = {'DHW': 0.8} -data = get_billing_weather_data(243106, 2) -data.RunAllFunctions() -raw_bill = data.bill -raw_weather_data_daily = data.weather -weather_related_usage_init = 'Unknown' - -print(raw_bill.columns) - -ba = bill_analysis(raw_bill, raw_weather_data_daily, end_uses) -ba.main() -most_recent_year_bill = ba.annual_normalized_monthly_bill -r_squared = ba.regr_results['regr_output'][1] -print('r_squared:', r_squared) - -regr_matrix = ba.regr_results -temp = benchmarking(regr_matrix, 2) -temp.assumble_normal_bills() -weather_normalized_bill = temp.normalized_monthly_bill - -print(weather_normalized_bill) - - - - - -# inner = os.path.dirname("__file__") -# print('inner of the path:', inner) -# joined_inner = os.path.join(os.path.dirname("__file__")) -# print('joined_inner:', joined_inner) -# origin_path = os.path.abspath(os.path.join(os.path.dirname("__file__"))) -# print('origin_path', origin_path) -# data_path = os.path.join(origin_path, 'bpeng/bill/') -# print('data path:', data_path) -# print(df.head()) - -# dirpath = os.getcwd() -# print("current directory is : " + dirpath) -# foldername = os.path.basename(dirpath) -# print("Directory name is : " + foldername) - +from .get_billing_weather_data import get_billing_weather_data +from .bill_analysis import bill_analysis +from .bill_cleaner import Bill +from .regr import regr_temp_hddcdd +from .disaggragate_with_regr_matrix import weather_ralated_breakdown +from .benchmarking import ClimateNormal_Benchmarking + +class BillDisaggregationDriver(): + + def __init__(self, end_uses, weather_daily, raw_utility_bill, weathter_related_usage_input, utility_type): + self.end_uses = end_uses + self.weather_daily = weather_daily + self.raw_bill = raw_utility_bill + self.weahter_related_usage_input = weathter_related_usage_input + self.utility_type = utility_type + self.most_recent_year_bill = None + self.regr_matrix = None + + def main(self): + + billAnalyzed_Obj = bill_analysis(self.raw_bill, self.weather_daily, self.end_uses) + billAnalyzed_Obj.main() + self.most_recent_year_bill = billAnalyzed_Obj.annual_normalized_monthly_bill + self.regr_matrix = billAnalyzed_Obj.regr_results + temp = ClimateNormal_Benchmarking(self.regr_matrix, self.utility_type) + temp.RunAllFunctions() + self.weather_normalized_bill = temp.normalized_monthly_bill -- GitLab From 15c98f66c6f4cd8c24229fd462d9667fb4e71e41 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 10 Jul 2019 16:19:22 -0400 Subject: [PATCH 75/97] solving vs problems --- bpeng/bill/bill_analysis.py | 57 ++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/bpeng/bill/bill_analysis.py b/bpeng/bill/bill_analysis.py index 3a7796b..ea6a587 100644 --- a/bpeng/bill/bill_analysis.py +++ b/bpeng/bill/bill_analysis.py @@ -1,18 +1,12 @@ """This is the driver file to use this library to calculate the bill disaggragation results for BlocPower Dashboard""" + import pandas as pd -import numpy as np -import datetime as datetime -from scipy.optimize import minimize -from datetime import timedelta from .bill_cleaner import Bill -from .weather_data_cal import (weather_cleaning, bill_period_weather, bill_with_daily_temp) -from .regr import (regression_1, regression_2, regr_temp_hddcdd) -from .calculater import (hdd, threshold) +from .weather_data_cal import (weather_cleaning, bill_with_daily_temp) from .setpoints_optimization import (optimize_setpoints) from .disaggragate_with_regr_matrix import (weather_ralated_breakdown, non_weahter_related_breakdown) from .normalized_monthly_bill import NormalizedBillingPeriod -from .get_billing_weather_data import get_billing_weather_data as gd class bill_analysis(): @@ -37,14 +31,15 @@ class bill_analysis(): formatted_bill = self.bill.formatted_bill.sort_values('Bill From Date') formatted_bill['Unit Price'] = formatted_bill['Total Charge']/formatted_bill['Usage'] - nb = NormalizedBillingPeriod(formatted_bill) - monthly_bill = nb.normailized_monthly_bill() + NormalzedBill_Obj = NormalizedBillingPeriod(formatted_bill) + monthly_bill = NormalzedBill_Obj.normailized_monthly_bill() monthly_bill_temp = bill_with_daily_temp(monthly_bill, weather_data_daily) - monthly_bill_with_price = nb.normalized_unit_price(monthly_bill_temp) + monthly_bill_with_price = NormalzedBill_Obj.normalized_unit_price(monthly_bill_temp) self.regr_results = optimize_setpoints(processed_bill) monthly_breakdown = weather_ralated_breakdown(self.regr_results, monthly_bill_with_price) - self.output_normalized_monthly_bill = non_weahter_related_breakdown(self.end_uses, monthly_breakdown).sort_values('Bill From Date').reset_index(drop=True) + self.output_normalized_monthly_bill = non_weahter_related_breakdown( + self.end_uses, monthly_breakdown).sort_values('Bill From Date').reset_index(drop=True) self.annual_normalized_monthly_bill = self.output_normalized_monthly_bill[-12:].reset_index(drop=True) self.annual_usage_costs_summary = self.annual_usage_costs(self.annual_normalized_monthly_bill, self.end_uses) @@ -65,11 +60,12 @@ class bill_analysis(): """ annual_usage_costs_sum = pd.DataFrame(columns=['End Use', 'Usage', 'Costs']) - annual_bill_breakdown['Costs'] = annual_bill_breakdown['Calculated Total Usage'] * annual_bill_breakdown['Unit Price'] + annual_bill_breakdown['Costs'] = \ + annual_bill_breakdown['Calculated Total Usage'] * annual_bill_breakdown['Unit Price'] avg_price = sum(annual_bill_breakdown['Costs']) / sum(annual_bill_breakdown['Calculated Total Usage']) end_use = list(annual_bill_breakdown.columns) - unwanted_column = ['Bill From Date', 'Bill To Date', 'Days In Bill','Unit Price', 'Non Weather Related Usage', \ - 'Calculated Total Usage', 'Costs'] + unwanted_column = ['Bill From Date', 'Bill To Date', 'Days In Bill', 'Unit Price', 'Non Weather Related Usage', + 'Calculated Total Usage', 'Costs'] for elem in unwanted_column: end_use.remove(elem) @@ -102,27 +98,24 @@ class bill_analysis(): """ if period == 'bill_breakdown': - return self.bill_breakdown.to_json(orient="records", date_format="iso") + return self.bill_breakdown.to_json(orient="records", date_format="iso") - return self.output_table_monthly.to_json(orient="records", date_format="iso") + return self.output_table_monthly.to_json(orient="records", date_format="iso") def to_dict(self, period='bill_breakdown'): - """ - Output in dictionary file - - Args: - - period (str): 'bill_breakdown' for bill breakdown with non-weather realted end uses - 'bill' for monthly out put for bill with only weather related breakdown - default 'bill_breakdown' - - Returns: + """ + Output in dictionary file - json: output in json format + Args: + period (str): 'bill_breakdown' for bill breakdown with non-weather realted end uses + 'bill' for monthly out put for bill with only weather related breakdown + default 'bill_breakdown' - """ + Returns: + json: output in json format + """ - if period == 'bill_breakdown': - return self.bill_breakdown.to_dict(orient="records") + if period == 'bill_breakdown': + return self.bill_breakdown.to_dict(orient="records") - return self.output_table_monthly.to_dict(orient="records") + return self.output_table_monthly.to_dict(orient="records") -- GitLab From aef67635ccf5619df03d787c054286c2f4daf2ab Mon Sep 17 00:00:00 2001 From: Doris H Date: Mon, 15 Jul 2019 15:14:07 -0400 Subject: [PATCH 76/97] updates --- bpeng/bill/setpoints_optimization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bpeng/bill/setpoints_optimization.py b/bpeng/bill/setpoints_optimization.py index 195d531..f14a91c 100644 --- a/bpeng/bill/setpoints_optimization.py +++ b/bpeng/bill/setpoints_optimization.py @@ -196,6 +196,6 @@ def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): 'regression_method': regression_method, 'regr_model': regr_model, 'regr_output': regr_output - } + } return optimized_regr_matrix -- GitLab From 951e1ce61da11d43f9322e4d0af58454e426dc22 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 24 Jul 2019 11:46:09 -0400 Subject: [PATCH 77/97] updates --- bpeng/bill/benchmarking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bpeng/bill/benchmarking.py b/bpeng/bill/benchmarking.py index 11cc7e1..51d748c 100644 --- a/bpeng/bill/benchmarking.py +++ b/bpeng/bill/benchmarking.py @@ -28,7 +28,7 @@ class ClimateNormal_Benchmarking(): data_path = os.path.join(origin_path, 'bpeng/bill/') climate_normal_weather = pd.read_csv(data_path + '1981_2010_NYC_NormalsTemperature.csv', error_bad_lines=False) climate_normal_weather['temperature'] = list(np.float(temperature) for temperature in - list(climate_normal_weather['temperature'])) + list(climate_normal_weather['temperature'])) nbf = pd.DataFrame(columns=['temperature', 'Days In Bill', 'Bill From Date']) days_each_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] -- GitLab From 78400d6c2cfddbfbe00bd7897a04ebfddc164b06 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 24 Jul 2019 11:46:25 -0400 Subject: [PATCH 78/97] orig mv code 1 --- bpeng/mv/BreakdownLongBill.py | 211 ++++++++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100644 bpeng/mv/BreakdownLongBill.py diff --git a/bpeng/mv/BreakdownLongBill.py b/bpeng/mv/BreakdownLongBill.py new file mode 100644 index 0000000..54cf82e --- /dev/null +++ b/bpeng/mv/BreakdownLongBill.py @@ -0,0 +1,211 @@ +class BreakdownLongBill(): + + ''' + This class is to breakdown a bill which is abnormally long than the others + ''' + + def __init__(self, bill, weather): + self.bill = bill + self.weather = weather + self.bd = None + + + + def days_in_long_bill(self, days): + + interval = days // 30 + reminder = days % 30 + days_in_long_bill = [] + + if reminder > 15: + for intr in range(interval): days_in_long_bill.append(30) + days_in_long_bill.append(reminder) + if reminder <= 15: + for intr in range(interval-1): days_in_long_bill.append(30) + days_in_long_bill.append(reminder+30) + return days_in_long_bill + + def sub_billing_period_dates(self, billing_period_info): + + ''' + Return date ranges for each sub-billing period + + Args: + billing_period_info(series): one raw from the formatted bill + return: + long_bill_period(pd.DateFrame): same columns with formatted bill and drop temperature + + ''' + long_bill_period = pd.DataFrame(columns=['Bill From Date', 'Bill To Date','Usage', 'Days In Bill', + 'Total Charge', 'Unit Price']) + start_date = billing_period_info['Bill From Date'] + days_in_bill = self.days_in_long_bill(int(billing_period_info['Days In Bill'])) + + bill_from_dates = [] + bill_to_dates = [] + + for xx in range(len(days_in_bill)): + days = days_in_bill[xx] + bill_from_dates.append(start_date) + start_date = start_date + timedelta(days) + bill_to_dates.append(start_date) + + long_bill_period['Bill From Date'] = bill_from_dates + long_bill_period['Days In Bill'] = days_in_bill + long_bill_period['Bill To Date'] = bill_to_dates + + return long_bill_period + + def r0_long_bill_breakdown(self, sub_billing_period, billing_period_info): + ''' + breakdown the usage of a long bill when the usage is not related to weather + r0 indicate the regression method is 0 + + Args: + long_bill_period(pd.DataFrame): the output from function - sub_billing_period_dates + billing_period_info(series): one raw from the formatted bill + ''' + + daily_usage = billing_period_info['Usage'] / billing_period_info['Days In Bill'] + unit_price = billing_period_info['Total Charge'] / billing_period_info['Days In Bill'] + + sub_billing_period['Usage'] = sub_billing_period['Days In Bill'] * daily_usage + sub_billing_period['Total Charge'] = sub_billing_period['Days In Bill'] * unit_price + sub_billing_period['Unit Price'] = sub_billing_period['Total Charge']/sub_billing_period['Days In Bill'] + + return sub_billing_period + + + def r1_long_bill_breakdown(self, sub_billing_period, billing_period_info): + + ''' + breakdown the usage of a long bill when the regression method = 1 + r1 indicates the regression method is 1 + + ''' + + + sub_billing_period['temperature'] = sub_billing_period['temperature'] = [ + self.bd.bill_period_weather(x, y) + for x, y in zip(sub_billing_period['Bill From Date'], + sub_billing_period['Bill To Date']) + ] + + heating_setpoint = self.bd.heating_set_point + cooling_setpoint = self.bd.cooling_set_point + + hddcdd = MeasurementVerification.weather_demand(heating_setpoint,cooling_setpoint,\ + sub_billing_period,self.weather) + sub_billing_period['Usage'] = self.bd.regr_model.predict(hddcdd) * sub_billing_period['Days In Bill'] + unit_price = billing_period_info['Total Charge']/billing_period_info['Usage'] + sub_billing_period['Total Charge'] = sub_billing_period['Usage'] * unit_price + sub_billing_period['Unit Price'] = pd.Series([unit_price for x in range(len(sub_billing_period))]) + + return sub_billing_period + + def r2_long_bill_breakdown(self, sub_billing_period, billing_period_info): + + ''' + breakdown the usage of a long bill when the regression method = 2 + r2 indicates the regression method is 1 + + ''' + + + sub_billing_period['temperature'] = sub_billing_period['temperature'] = [ + self.bd.bill_period_weather(x, y) + for x, y in zip(sub_billing_period['Bill From Date'], + sub_billing_period['Bill To Date']) + ] + + bill = self.bd.processed_bill + bill['temperature'] = bill['temperature'] = [ + self.bd.bill_period_weather(x, y) + for x, y in zip(bill['Bill From Date'], + bill['Bill To Date']) + ] + + heating_setpoint = self.bd.heating_set_point + hdd = MeasurementVerification.weather_demand(heating_setpoint,0,\ + sub_billing_period,self.weather)[:,0].reshape(-1,1) + + + regr = self.bd.summer_dhw(heating_setpoint,bill) + + bill_with_baseline = regr[3] + daily_baseline = np.average(bill_with_baseline['dhw']/bill_with_baseline['Days In Bill']) + + sub_billing_period['heating_usage'] = self.bd.regr_model.predict(hdd) * sub_billing_period['Days In Bill'] + sub_billing_period['dhw'] = sub_billing_period['Days In Bill'] * daily_baseline + sub_billing_period['Usage'] = sub_billing_period['heating_usage'] + sub_billing_period['dhw'] + + unit_price = billing_period_info['Total Charge'] / billing_period_info['Usage'] + + sub_billing_period['Total Charge'] = sub_billing_period['Usage'] * unit_price + sub_billing_period['Unit Price'] = pd.Series([unit_price for x in range(len(sub_billing_period))]) + + sub_billing_period = sub_billing_period.drop('dhw',axis=1) + sub_billing_period = sub_billing_period.drop('heating_usage',axis=1) + + return sub_billing_period + + + def long_bill_breakdown(self): + ''' + breakdown the bills with irregular long billing period, return a formatted bill with sub-billing peirod for the long bill + + Args: + formatted_bill(pd.DataFrame) + Ruturn: + the broken down bill of bills with long billing peirod + + + ''' + self.bd = BillDisaggregation(self.bill, self.weather) + #TODO: Be careful about the inputs + self.bd.optimize_setpoints() + + formatted_bill = self.bd.formatted_bill + regression_method = self.bd.regression_method + regr_model = self.bd.regr_model + bill_quality = self.bd.bill_quality(self.bill) + #bill = formatted_bill.drop('temperature', axis=1) + new_bill = self.bill.copy().reset_index(drop=True) + + if any(i == 'long' for i in bill_quality.flag): + bill_quality_long = bill_quality[bill_quality['flag'] == 'long'] + + if len(bill_quality_long) > 0: + for x in range(len(bill_quality_long)): + + index_of_raw_bill = bill_quality_long['index'].iloc[x] + long_billing_period_info = formatted_bill.iloc[index_of_raw_bill] + days = long_billing_period_info['Days In Bill'] + total_usage = long_billing_period_info['Usage'] + days_breakdown_list = self.days_in_long_bill(days) + long_bill_breakdown_single_raw = self.sub_billing_period_dates(long_billing_period_info) + new_bill = new_bill.drop(index_of_raw_bill) + + if regression_method == 0: + long_bill_breakdown_single_raw = self.r0_long_bill_breakdown(long_bill_breakdown_single_raw,\ + long_billing_period_info) + + + if regression_method == 1: + long_bill_breakdown_single_raw = self.r1_long_bill_breakdown(long_bill_breakdown_single_raw,\ + long_billing_period_info) + + if regression_method == 2: + long_bill_breakdown_single_raw = self.r2_long_bill_breakdown(long_bill_breakdown_single_raw,\ + long_billing_period_info) + + adjustment_factor = total_usage/sum(long_bill_breakdown_single_raw['Usage']) + long_bill_breakdown_single_raw['Usage'] = long_bill_breakdown_single_raw['Usage'] * adjustment_factor + new_bill = new_bill.append(long_bill_breakdown_single_raw) + + new_bill = new_bill.sort('Bill From Date').reset_index(drop=True) + + else: + new_bill = bill + + return new_bill -- GitLab From c048f00efa4bd8fd4183dd599dd1c506604296ac Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 24 Jul 2019 11:46:33 -0400 Subject: [PATCH 79/97] create init py --- bpeng/mv/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 bpeng/mv/__init__.py diff --git a/bpeng/mv/__init__.py b/bpeng/mv/__init__.py new file mode 100644 index 0000000..e69de29 -- GitLab From 714aecee1b6ae4bdb220c2d423236e38a6738a65 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 24 Jul 2019 11:46:43 -0400 Subject: [PATCH 80/97] orig mv code 2 --- bpeng/mv/identifier.py | 88 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 bpeng/mv/identifier.py diff --git a/bpeng/mv/identifier.py b/bpeng/mv/identifier.py new file mode 100644 index 0000000..3986ac9 --- /dev/null +++ b/bpeng/mv/identifier.py @@ -0,0 +1,88 @@ +class DateIdentifier(): + + ''' + In order to add [Month, date] identifer in addition to YYYY/MM/DD + Since the comparasion is happened between same day of different year in M&V process + ''' + + def __init__(self, bill): + self.bill = bill + + def identifier_for_date(self, bill_from_date, bill_to_date): + ''' + return a list of [month, date] for a date range + Args: + bill_from_date(timestamp): + bill_to_date(timestamp): + + Return: + identifier(list): a list of [month, date] of the dates in bewteem the bill_from_date and bill_to_date + ''' + + + days = (bill_to_date - bill_from_date).days + identifier = [] + + for d in range(days): + date = bill_from_date + timedelta(d) + date_id = [date.month, date.day] + identifier.append(date_id) + + return identifier + + def identifier_matrix(self): + ''' + Args: + bill(pd.DataFrame): + + Return: + matrix(pd.DataFrame): + identifier: [month, date], + unit_price: average unit price for a specific billing period + daily_usage: daily_usage for a specific date range + ''' + bill = self.bill + matrix = pd.DataFrame(columns = ['identifier','unit_price', 'daily_usage']) + + for bp in range(len(bill)): + from_date = bill['Bill From Date'].iloc[bp] + to_date = bill['Bill To Date'].iloc[bp] + unit_price = bill['Unit Price'].iloc[bp] + daily_usage = bill['Usage'].iloc[bp]/bill['Days In Bill'].iloc[bp] + matrix_temp = pd.DataFrame(columns = ['identifier','unit_price', 'daily_usage']) + matrix_temp['identifier'] = self.identifier_for_date(from_date, to_date) + matrix_temp['unit_price'] = pd.Series([unit_price for x in range(len(matrix_temp))]) + matrix_temp['daily_usage'] = pd.Series([daily_usage for x in range(len(matrix_temp.index))]) + matrix = matrix.append(matrix_temp, ignore_index=True) + + return matrix + + + def match_identifier_for_billing_period(self, from_date, to_date, base_identifier_matrix): + ''' + Given a date range and a identifier matirx, + Return the usage of period by matching the date identifier between the period to the corresponding identifier daily + usage + + Args: + from_date(timestamp) + to_date(timestamp) + base_identifier_matrix(pd.DataFrame): + identifier: month, date + daily_usage + unit_price + + Return: + period_usage + ''' + identifier_list = self.identifier_for_date(from_date, to_date) + + period_usage = 0 + for xx in range(len(identifier_list)): + identi = identifier_list[xx] + temp = base_identifier_matrix[base_identifier_matrix.identifier.apply(lambda x: x == identi)] + unit_price = temp.unit_price.mean() + daily_usage = temp.daily_usage.mean() + period_usage += daily_usage + + return period_usage -- GitLab From 0a2880e0edc1e644b677f0f3cab57c57fbf61587 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 24 Jul 2019 11:46:51 -0400 Subject: [PATCH 81/97] orig mv code 3 --- bpeng/mv/mandv_orig.py | 717 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 717 insertions(+) create mode 100644 bpeng/mv/mandv_orig.py diff --git a/bpeng/mv/mandv_orig.py b/bpeng/mv/mandv_orig.py new file mode 100644 index 0000000..dce9a35 --- /dev/null +++ b/bpeng/mv/mandv_orig.py @@ -0,0 +1,717 @@ +%matplotlib notebook +import matplotlib as mpl +mpl.get_backend() + +import matplotlib.pyplot as plt +from matplotlib.pyplot import text + +from datetime import timedelta +import datetime as datetime +import requests + +import pandas as pd +import numpy as np + +from sklearn import linear_model +from scipy.optimize import minimize +import statsmodels.api as sm +from scipy import stats + +import warnings +warnings.simplefilter('ignore') +from dateutil import relativedelta + + +import psycopg2 +import seaborn as sns; sns.set() + +%run bill_disaggregation.ipynb + + + +class MeasurementVerification(): + + ''' + The class to calcuate energy savings through utility bill and operational information adopts IPMVP option C + + + ''' + + + def __init__(self, bill, weather, thermal_comf, earlest_retrofit_date, latest_retrofit_date,\ + weather_relate_usage_type, occupancy, utility_type): + + + self.original_bill = bill + self.weather = weather + self.thermal_comf = thermal_comf + self.earlest_retrofit_date = earlest_retrofit_date + self.latest_retrofit_date = latest_retrofit_date + self.pre_weather_related_usage_type = weather_relate_usage_type + self.occupancy = occupancy + self.utility_type = utility_type + self.weather_relate_usage_type = None + self.projection_post_bill = None + self.projection_fig = None + self.pre_bill = None + self.post_bill = None + self.baseline = None + self.reporting = None + self.projection_reporting_bill = None + self.regression_stat = None + self.first_year_saving = None + self.projection_baseline_bill = None + self.setpoints = {} + + #self.non_weather_related_end_uses = {'Miscellanous': 1} + + + + def split_bill(self, raw_bill): + ''' + split raw bill into two section: + pre - retrofit + post - retrofit + + Args: + raw_bill(pd.DataFrame): formatted bill from BillDisaggregation Module + earlest_retrofit_date(str): earlest implementation date of ECMs for this meter + latest_retrofit_date(str): latest implemntation date of ECMs for this meter + + Returns: + pre_bill(pd.DataFrame): raw_bill for pre-retrofit period + post_bill(pd.DataFrame): raw_bill for post_retrotit period + + ''' + + bill = raw_bill.copy() + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + + erd = pd.to_datetime(self.earlest_retrofit_date) + lrd = pd.to_datetime(self.latest_retrofit_date) + + bill = bill.sort_values('Bill From Date').reset_index() + + for i in range(len(bill) - 1): + if (erd>= bill['Bill From Date'].iloc[i] ) & (erd<= bill['Bill To Date'].iloc[i]): + erd_flag = i + #print('erd_flag:', i) + + if (lrd >= bill['Bill From Date'].iloc[i] ) & (lrd <= bill['Bill To Date'].iloc[i]): + lrd_flag = i + #print('lrd_flag:', i) + + pre_bill = bill.iloc[0: erd_flag] + post_bill = bill.iloc[(lrd_flag+1):] + + return pre_bill, post_bill + + @staticmethod + def disaggregate(heating_setpoint, cooling_setpoint, bill, regression_method,\ + weather_related_usage, weather_data): + ''' + Disaggregate bill with all known inputs + + Args: + heating_setpoint(float): + cooling_setpoint(float): + bill(pd.DataFrame): + regression_method(int): + 0 stands for 'both not'; + 1 stands for regression method 1 using multiple variable regression + 2 stands for using summer dhw method to estimate usage + + Returns: + regr(list): + ''' + + bd = BillDisaggregation(bill, weather_data) + bd.optimize_setpoints() + + bill['temperature'] = [ + bd.bill_period_weather(x, y) + for x, y in zip(bill['Bill From Date'], + bill['Bill To Date']) + ] + + if weather_related_usage == 'Both': + regr = bd.regression_1(heating_setpoint, cooling_setpoint, bill) + + + if weather_related_usage == 'Cooling': + regr = bd.regression_1(0, cooling_setpoint, bill) + + if weather_related_usage == 'Heating': + if regression_method == 1: + regr = bd.regression_1(heating_setpoint, 0, bill) + + if regression_method ==2: + regr = bd.summer_dhw(heating_setpoint, bill) + + return regr + + @staticmethod + def weather_demand(heating_setpoint, cooling_setpoints, bill, weather_data): + + ''' + debugged - I think this functioin should be part of BillDisaggregation Class + This function is to calculate the HDD/ CDD with preferred weather period and setpoints + + Args: + heating_setpoint(float) + cooling_setpoint(float) + bill(pd.DataFrame): bills with columns: + 'Bill From Date' + 'Bill To Date' + Returns: + weather_demand(np.array): an array of hddcdd + ''' + + bd = BillDisaggregation(bill, weather_data) + + ahdd = [ + list(bd.hdd(heating_setpoint, xx) for xx in x) + for x in bill['temperature'] + ] + + acdd = [ + list(bd.cdd(cooling_setpoints, xx) for xx in x) + for x in bill['temperature'] + ] + + + # It should be billing period average hdd / days + + daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) + daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) + + # set threshold that if the HDD/CDD is lower than a certain value, we set it to 0 + daily_hdd1 = np.array([ + bd.threshold(daily_hdd[x], 0.1) + for x in range(len(daily_hdd)) + ]) + + daily_cdd1 = np.array([ + bd.threshold(daily_cdd[x], 0.1) + for x in range(len(daily_cdd)) + ]) + + weather_demand = np.array([daily_hdd1, daily_cdd1]).T + + return weather_demand + + def bill_disaggregation(self, bill, weather_related_usage_type='Unknown'): + + ''' + the function calls for bill disaggregation module + + ''' + + bd = BillDisaggregation(bill,self.weather) + bd.optimize_setpoints(weather_related_usage = weather_related_usage_type) + + bill_evaluation = pd.DataFrame(columns=['Usage', 'r squared','regr method', 'Consumption', 'Heating',\ + 'Cooling','Non-weather-related-usage','diff','hdd', 'cdd', 'Days in Bill','Unit Price',\ + 'Heating Setpoint', 'Cooling Setpoint']) + + + output = bd.benchmarking_output() + bill_evaluation = bill_evaluation.append({\ + 'Usage': output[0],\ + 'r squared': output[1],\ + 'regr method': output[2],\ + 'Consumption':output[3],\ + 'Heating': format(output[4], '0.0f'),\ + 'Cooling': format(output[5], '0.0f'),\ + 'Non-weather-related-usage': format(output[6], '0.0f'),\ + 'diff':format(output[7],'.2%'),\ + 'hdd': format(output[8], '0.0f'),\ + 'cdd': format(output[9], '0.0f'),\ + 'Days in Bill': output[10],\ + 'Unit Price':bd.avg_unit_price,\ + 'Heating Setpoint': output[11],\ + 'Cooling Setpoint': output[12] + }, ignore_index = True) + + return bd, bill_evaluation + + def usage_not_related_to_weather(self, bill): + + ''' + To return heating/cooling coef and daily non-weather-related-usage when the usage is not related to weather change + + Args: + bill(pd.DataFrame): a utility bill whose usage dose not relate to weather change + + Return: + bill_metrix(dictionary): a dictionary of heating coef, cooling coef, intercept + ''' + + non_weather_related_daily_usage = sum(bill['Usage'])/ sum(bill['Days In Bill']) + heating_coef = 0 + cooling_coef = 0 + + bill_metrix = {'heating_coef': 0, + 'cooling_coef': 0, + 'non_weather_related_daily_usage': non_weather_related_daily_usage} + + return bill_metrix + + + def cal_heating_coef(self, regr_heating_coef): + ''' + Calculation for cooling coefficiency with the consideration of changing factors for projection purpose + - need further development + + ''' + heating_coef = regr_heating_coef + return heating_coef + + + def cal_cooling_coef(self, regr_cooling_coef): + ''' + Calculation for cooling coefficiency for projection purpose - need further development + ''' + cooling_coef = regr_cooling_coef + return cooling_coef + + def occup(self): + '''occupancy change pre & post retrofit''' + occupancy_change = 1 + return occupancy_change + + + def usage_related_to_weather(self, regr, regression_method): + + ''' + Args: + regr(list): + regr[0] is regression model + regr[1] is r-squared + regr[2] is hdd/cdd through the regression + regr[3] is the bills (optional, only for regression method 2) + regression_method(int): 1 or 2 + + Returns: + bill_metrix(Dictionary): as defined below + + + ???? Doris: I want to keep the same input as - non weather related usage matrix + ''' + if regression_method == 1: + regr_model = regr[0] + heating_coef = regr_model.coef_[0] + cooling_coef = regr_model.coef_[1] + non_weather_related_daily_usage = regr_model.intercept_ + + + if regression_method == 2: + regr_model = regr[0] + heating_coef = regr_model.coef_[0] + cooling_coef = 0 + non_weather_related_daily_usage = regr[3]['dhw']/regr[3]['Days In Bill'] + + bill_metrix = {'heating_coef': heating_coef, + 'cooling_coef': cooling_coef, + 'non_weather_related_daily_usage': non_weather_related_daily_usage} + + return bill_metrix + + + def annual_bill_pre_retrofit(self, bill): + ''' + Return the latest/nearest annual bill to performance analysis, + however, it will only return natural billing period, which means it could be around 365 but not exact 365 days + + Args: + bill(pd.DataFrame): bill of pre_retrofit period + + Return: + annual_bill(pd.DataFrame) + + ''' + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + + bill_last_date = bill['Bill To Date'].iloc[-1] + proposed_first_date = bill_last_date - timedelta(365) + bill['flag'] = bill['Bill From Date'].apply(lambda x: (x- proposed_first_date).days) + + index = bill.flag.lt(-1).idxmin() + + if index == 0: + annual_bill = bill.drop('flag', axis=1) + else: + annual_bill = bill[index-1:].drop('flag', axis=1) + + return annual_bill + + + def annual_bill_post_retrofit(self, bill): + ''' + Return the latest/nearest annual bill for post-retrofit period + however, it will only return natural billing period, which means it could be around 365 but not exact 365 days + + Args: + bill(pd.DataFrame): bill of post_retrofit + + Return: + annual_bill(pd.DataFrame) + ''' + + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill = bill.drop('index', axis=1) + + bill = bill.reset_index(drop=True) + bill_first_date = bill['Bill From Date'].iloc[0] + proposed_last_date = bill_first_date + timedelta(365) + + bill['flag'] = bill['Bill To Date'].apply(lambda x: (x- proposed_last_date).days) + index = bill.flag.lt(-1).idxmin() + + annual_bill = bill[0:index].drop('flag', axis=1) + if sum(annual_bill['Days In Bill']) < 360: + annual_bill = bill[0:index+1] + + return annual_bill + + + def baseline_bill(self, pre_bill): + + ''' + breakdown the bill of pre_retrofit period if there is long billing period and + return the billing data will be used as 'Baseline Bill' + + ''' + bk = BreakdownLongBill(pre_bill, self.weather) + pre_bill_breakdown = bk.long_bill_breakdown() + baseline_bill = self.annual_bill_pre_retrofit(pre_bill_breakdown).reset_index(drop=True) + + return baseline_bill + + def reporting_bill(self, post_bill): + bk = BreakdownLongBill(post_bill, self.weather) + bill_breakdown = bk.long_bill_breakdown() + reporting_bill = self.annual_bill_post_retrofit(bill_breakdown).reset_index(drop=True) + + return reporting_bill + + def main(self): + + ''' + Function to calcuate the energy usage for post_retrofit period using baseline conditions - + + ''' + # format the bill for later analysis + bill_bd = BillDisaggregation(self.original_bill, self.weather) + bill_bd.optimize_setpoints() + bill = bill_bd.formatted_bill + + + pre_bill, post_bill = self.split_bill(bill) + + #raw bill of pre-retrofit period + self.pre_bill = pre_bill + + assert sum(pre_bill['Days In Bill']) > 365, 'Utility Data is less than 12 months pre-retrofit.' + + #baseline bill + self.baseline = self.baseline_bill(pre_bill) + + #post_retrofit_bill - simple breakdown the post retrofit bill incase it is too long + self.post_bill = BreakdownLongBill(post_bill, self.weather).long_bill_breakdown() + + #calcuate the all prejected baseline for all the post retrofit billing period + post = projection_baseline(self.baseline, self.post_bill, self.weather) + self.projection_post_bill = post.projection() + self.setpoints = {'heating': post.hp, + 'cooling': post.cp} + + #calculate the projected baseline for baseline period + + baseline_usage_baseline_period = projection_baseline(self.baseline, self.baseline, self.weather) + self.projection_baseline_bill = baseline_usage_baseline_period.projection() + + + #calculate the projected baseline for reporting period + self.reporting = self.reporting_bill(self.post_bill) + report = projection_baseline(self.baseline, self.reporting, self.weather) + self.projection_reporting_bill = report.projection() + self.regression_stat = report.regression_stat + + + + self.first_year_saving = MeasurementVerification.energy_savings(self.projection_reporting_bill) + xx = self.plot_result(self.projection_baseline_bill, self.projection_reporting_bill) +# print('Baseline Period Regression Stats:'+'\n', self.regression_stat) +# print('Frist Year Savings:', pd.DataFrame.from_dict(self., orient='index')) + + @staticmethod + def energy_savings(bill_with_baseline): + ''' + Return the energy saving for bills with baseline usage + + Args: + bill_with_baseline(pd.DataFrame): + 'Bill From Date' + 'Bill To Date' + 'Usage' + 'Days In Bill' + 'Total Charge' + 'temperature' + 'baseline' + + + Return: + savings(Dict): + + 'Measured Energy Usage for Reporting Period'; + 'Baseline Projection for Reporting Period'; + 'Annual Energy Savings'; + 'Energy Reductaion Percentage'; + 'Costs Avoidance'; + 'Annual Energy Costs' + + ''' + + metered_usage = sum(bill_with_baseline['Usage']) + baseline_usage = sum(bill_with_baseline['baseline']) + + energy_savings = baseline_usage - metered_usage + total_costs = sum(bill_with_baseline['Total Charge']) + unit_price = total_costs / metered_usage + saving_percentage = energy_savings/ baseline_usage + dollar_savings = energy_savings * unit_price + + savings = {'Measured Energy Usage for Reporting Period': round(metered_usage,0), + 'Baseline Projection for Reporting Period': round(baseline_usage,0), + 'Annual Energy Savings': round(energy_savings,0), + 'Energy Reductaion Percentage': saving_percentage, + 'Costs Avoidance': round(dollar_savings,0), + 'Annual Energy Costs': round(total_costs,0)} + + return savings + + + def return_utility_name(self, utility_type): + + if self.utility_type == 1: + name = 'Electricity' + unit = 'kWh' + if self.utility_type == 2: + name = 'Natural Gas' + unit = 'Therms' + if self.utility_type == 3: + name = 'Oil' + unit = 'Gallon' + if self.utility_type == 4: + name = 'Water' + unit = 'CCF' + + return name, unit + + def plot_result(self, pre, projection_post): + ''' + The funtion to plot: + 1. the metered data over baseline and reporting period + 2. the baseline data over reporting period + + Args: + pre(pd.DataFrame): utility bills of pre retrofit (not the baseline bills) + prejection_post(pd.DataFrame): utility bills of post retrofit with prejected baseline + + Returns: + metered(pd.DataFrame): Metered usage over baseline + reporting period + + + ''' + + sns.set_style("white") + plt.figure(figsize=(10,6)) + ax = plt.gca() + + + name, unit = self.return_utility_name(self.utility_type) + + post = projection_post.copy() + + bill = self.original_bill + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill = bill.sort_values('Bill From Date').reset_index(drop=True) + + pre_y = pre['Usage'] + post_x = post['Bill To Date'].values + post_pred_y = post['baseline'] + post_y=post['Usage'] + + y_max = max(pre_y) * 1.5 + arrow_location = max(pre_y) * 0.10 + text_loccation = max(pre_y) * 0.13 + const_location = max(pre_y) * 1.1 + legend_location = max(pre_y) * 1.35 + legend_text_location = max(pre_y) * 1.45 + + # Baseline Period + baseline_start_date = pd.to_datetime(pre['Bill From Date'].iloc[1]) + baseline_end_date = pd.to_datetime(pre['Bill To Date'].iloc[-1]) + plt.axvline(baseline_end_date, color='darkgrey', linestyle='--') + + ax.annotate('', xy=(pre['Bill From Date'].iloc[1],arrow_location),\ + xytext=(pre['Bill To Date'].iloc[-1],arrow_location), \ + xycoords='data', textcoords='data', + arrowprops=dict(arrowstyle='<->',lw=1.5,color='grey')) + + ax.annotate('Baseline Period', xy=(pre['Bill From Date'].iloc[6],text_loccation),\ + ha='center',\ + va='center',weight='bold') + + # Reporting Period + print('post retrofit', post) + reporting_start_date = pd.to_datetime(post['Bill From Date'].iloc[1]) + + reporting_end_date = pd.to_datetime(post['Bill To Date'].iloc[-1]) + plt.axvline(reporting_start_date, color='darkgrey', linestyle='--') + + ax.annotate('', xy=(post['Bill From Date'].iloc[1],arrow_location),\ + xytext=(post['Bill To Date'].iloc[-1],arrow_location), \ + xycoords='data', textcoords='data', + arrowprops=dict(arrowstyle='<->',lw=1.5,color='grey')) + + ax.annotate('Reporting Period', xy=(post['Bill From Date'].iloc[6],text_loccation),\ + ha='center',\ + va='center', weight='bold') + + #ECM construction + index_start = bill[bill['Bill To Date'] == baseline_end_date].index[0] + + index_end = bill[bill['Bill From Date'] == reporting_start_date].index[0] + construction = bill[index_start:index_end] + + + construction_median = pd.to_datetime((reporting_start_date-baseline_end_date)/2 + baseline_end_date) + ax.annotate('', xy=(baseline_end_date,const_location), xytext=(reporting_start_date,const_location),\ + xycoords='data', textcoords='data', + arrowprops=dict(arrowstyle='<->',lw=1.5,color='darkgrey')) + + plt.axvline(construction_median, color='darkgrey', linestyle='-',\ + ymin=(1.1/1.5), + ymax=(1.35/1.5)) + + plt.plot(construction_median,legend_location,'o',color='darkgrey',linewidth=10) + + ax.annotate('ECM\nConstruction', xy=(construction_median,legend_text_location), ha='center', \ + va='center', weight='bold', color='darkgrey') + + + #Metered Usage Overtime + metered = pre.append(construction).append(post) + metered = metered.drop(['baseline','temperature'], axis=1).drop_duplicates() + + metered_x = metered['Bill To Date'].values + metered_y = metered['Usage'] + + # Legend - Measured Usage + plt.axvline(metered['Bill To Date'].iloc[4], color='cornflowerblue', linestyle='-',\ + ymin=(metered['Usage'].iloc[4]/y_max), + ymax=(1.35/1.5)) + + plt.plot(metered['Bill To Date'].iloc[4],legend_location,'o',color='cornflowerblue',linewidth=10) + + ax.annotate('Measured\nUsage', xy=(metered['Bill To Date'].iloc[4],legend_text_location), ha='center', \ + va='center',weight='bold', color='cornflowerblue') + + + # Legend - Adjusted Baseline + plt.axvline(post['Bill To Date'].iloc[10], color='brown', linestyle='-',\ + ymin=(post['baseline'].iloc[10]/y_max), + ymax=(1.35/1.5)) + + plt.plot(post['Bill To Date'].iloc[10],legend_location,'o',color='brown',linewidth=10) + + ax.annotate('Adjusted Baseline\nUsage', xy=(post['Bill To Date'].iloc[10],legend_text_location), ha='center', \ + va='center',weight='bold', color='brown') + + + + # Legend - Energy Savings + plt.axvline(post['Bill To Date'].iloc[4], color='green', linestyle='-',\ + ymin=((post['baseline'].iloc[4]+post['Usage'].iloc[4])/2/y_max), + ymax=(1.35/1.5)) + + plt.plot(post['Bill To Date'].iloc[4],legend_location,'o',color='green',linewidth=10) + + ax.annotate('Energy\nSavings', xy=(post['Bill To Date'].iloc[4],legend_text_location), ha='center', \ + va='center',weight='bold', color='green') + + + #plots + plt.plot(metered_x, metered_y, '-o',color='cornflowerblue',linewidth=3.5) + plt.plot(post_x, post_pred_y, '--', color='brown', alpha=0.8) + + ax.fill_between(post_x,\ + post_y, post_pred_y,\ + facecolor='mediumturquoise',\ + alpha=0.1, + edgecolor='b',\ + linewidth=0) + + plt.ylim([0,y_max]) + + + for spine in plt.gca().spines.values(): + spine.set_visible(False) + + + xmin, xmax = ax.get_xlim() + ymin, ymax = ax.get_ylim() + + fig = plt.gcf() + + # get width and height of axes object to compute + # matching arrowhead length and width + dps = fig.dpi_scale_trans.inverted() + bbox = ax.get_window_extent().transformed(dps) + width, height = bbox.width, bbox.height + + # manual arrowhead width and length + hw = 0.5/20.*(ymax-ymin) + hl = 0.5/20.*(xmax-xmin) + lw = 1 # axis line width + ohg = 0.1 # arrow overhang + + # compute matching arrowhead length and width + yhw = hw/(ymax-ymin)*(xmax-xmin)* height/width * 1.2 + yhl = hl/(xmax-xmin)*(ymax-ymin)* width/height *1.2 + + # y axis + ax.arrow(xmin, 0, 0, ymax-ymin, fc='k', ec='k', lw = lw, + head_width=yhw, head_length=yhl, overhang = ohg, + length_includes_head= True, clip_on = False) + # x axis + ax.arrow(xmin, 0., (xmax-xmin), 0., fc='k', ec='k', lw = lw, + head_width=hw, head_length=hl, overhang = ohg, + length_includes_head= True, clip_on = False) + + plt.ylabel('Consumption ({})'.format(unit)) + ax.set_title('{} Consumption of Baseline and Reporting Period'.format(name),\ + size=14, weight='bold',verticalalignment='bottom', alpha=0.8) + +# from matplotlib import rcParams +# rcParams['axes.titlepad'] = 50 + + return metered + + + @staticmethod + def ols_regression(X,y): + ''' + Return the summary stats for ordinary linear regression + ''' + + X2 = sm.add_constant(X) + est = sm.OLS(y, X2) + est2 = est.fit() + return est2.summary() -- GitLab From cc989fd660f559743c5062f6157d12dd2a2bc868 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 24 Jul 2019 11:47:06 -0400 Subject: [PATCH 82/97] orig mv code 3 --- bpeng/mv/prediction.py | 717 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 717 insertions(+) create mode 100644 bpeng/mv/prediction.py diff --git a/bpeng/mv/prediction.py b/bpeng/mv/prediction.py new file mode 100644 index 0000000..dce9a35 --- /dev/null +++ b/bpeng/mv/prediction.py @@ -0,0 +1,717 @@ +%matplotlib notebook +import matplotlib as mpl +mpl.get_backend() + +import matplotlib.pyplot as plt +from matplotlib.pyplot import text + +from datetime import timedelta +import datetime as datetime +import requests + +import pandas as pd +import numpy as np + +from sklearn import linear_model +from scipy.optimize import minimize +import statsmodels.api as sm +from scipy import stats + +import warnings +warnings.simplefilter('ignore') +from dateutil import relativedelta + + +import psycopg2 +import seaborn as sns; sns.set() + +%run bill_disaggregation.ipynb + + + +class MeasurementVerification(): + + ''' + The class to calcuate energy savings through utility bill and operational information adopts IPMVP option C + + + ''' + + + def __init__(self, bill, weather, thermal_comf, earlest_retrofit_date, latest_retrofit_date,\ + weather_relate_usage_type, occupancy, utility_type): + + + self.original_bill = bill + self.weather = weather + self.thermal_comf = thermal_comf + self.earlest_retrofit_date = earlest_retrofit_date + self.latest_retrofit_date = latest_retrofit_date + self.pre_weather_related_usage_type = weather_relate_usage_type + self.occupancy = occupancy + self.utility_type = utility_type + self.weather_relate_usage_type = None + self.projection_post_bill = None + self.projection_fig = None + self.pre_bill = None + self.post_bill = None + self.baseline = None + self.reporting = None + self.projection_reporting_bill = None + self.regression_stat = None + self.first_year_saving = None + self.projection_baseline_bill = None + self.setpoints = {} + + #self.non_weather_related_end_uses = {'Miscellanous': 1} + + + + def split_bill(self, raw_bill): + ''' + split raw bill into two section: + pre - retrofit + post - retrofit + + Args: + raw_bill(pd.DataFrame): formatted bill from BillDisaggregation Module + earlest_retrofit_date(str): earlest implementation date of ECMs for this meter + latest_retrofit_date(str): latest implemntation date of ECMs for this meter + + Returns: + pre_bill(pd.DataFrame): raw_bill for pre-retrofit period + post_bill(pd.DataFrame): raw_bill for post_retrotit period + + ''' + + bill = raw_bill.copy() + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + + erd = pd.to_datetime(self.earlest_retrofit_date) + lrd = pd.to_datetime(self.latest_retrofit_date) + + bill = bill.sort_values('Bill From Date').reset_index() + + for i in range(len(bill) - 1): + if (erd>= bill['Bill From Date'].iloc[i] ) & (erd<= bill['Bill To Date'].iloc[i]): + erd_flag = i + #print('erd_flag:', i) + + if (lrd >= bill['Bill From Date'].iloc[i] ) & (lrd <= bill['Bill To Date'].iloc[i]): + lrd_flag = i + #print('lrd_flag:', i) + + pre_bill = bill.iloc[0: erd_flag] + post_bill = bill.iloc[(lrd_flag+1):] + + return pre_bill, post_bill + + @staticmethod + def disaggregate(heating_setpoint, cooling_setpoint, bill, regression_method,\ + weather_related_usage, weather_data): + ''' + Disaggregate bill with all known inputs + + Args: + heating_setpoint(float): + cooling_setpoint(float): + bill(pd.DataFrame): + regression_method(int): + 0 stands for 'both not'; + 1 stands for regression method 1 using multiple variable regression + 2 stands for using summer dhw method to estimate usage + + Returns: + regr(list): + ''' + + bd = BillDisaggregation(bill, weather_data) + bd.optimize_setpoints() + + bill['temperature'] = [ + bd.bill_period_weather(x, y) + for x, y in zip(bill['Bill From Date'], + bill['Bill To Date']) + ] + + if weather_related_usage == 'Both': + regr = bd.regression_1(heating_setpoint, cooling_setpoint, bill) + + + if weather_related_usage == 'Cooling': + regr = bd.regression_1(0, cooling_setpoint, bill) + + if weather_related_usage == 'Heating': + if regression_method == 1: + regr = bd.regression_1(heating_setpoint, 0, bill) + + if regression_method ==2: + regr = bd.summer_dhw(heating_setpoint, bill) + + return regr + + @staticmethod + def weather_demand(heating_setpoint, cooling_setpoints, bill, weather_data): + + ''' + debugged - I think this functioin should be part of BillDisaggregation Class + This function is to calculate the HDD/ CDD with preferred weather period and setpoints + + Args: + heating_setpoint(float) + cooling_setpoint(float) + bill(pd.DataFrame): bills with columns: + 'Bill From Date' + 'Bill To Date' + Returns: + weather_demand(np.array): an array of hddcdd + ''' + + bd = BillDisaggregation(bill, weather_data) + + ahdd = [ + list(bd.hdd(heating_setpoint, xx) for xx in x) + for x in bill['temperature'] + ] + + acdd = [ + list(bd.cdd(cooling_setpoints, xx) for xx in x) + for x in bill['temperature'] + ] + + + # It should be billing period average hdd / days + + daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) + daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) + + # set threshold that if the HDD/CDD is lower than a certain value, we set it to 0 + daily_hdd1 = np.array([ + bd.threshold(daily_hdd[x], 0.1) + for x in range(len(daily_hdd)) + ]) + + daily_cdd1 = np.array([ + bd.threshold(daily_cdd[x], 0.1) + for x in range(len(daily_cdd)) + ]) + + weather_demand = np.array([daily_hdd1, daily_cdd1]).T + + return weather_demand + + def bill_disaggregation(self, bill, weather_related_usage_type='Unknown'): + + ''' + the function calls for bill disaggregation module + + ''' + + bd = BillDisaggregation(bill,self.weather) + bd.optimize_setpoints(weather_related_usage = weather_related_usage_type) + + bill_evaluation = pd.DataFrame(columns=['Usage', 'r squared','regr method', 'Consumption', 'Heating',\ + 'Cooling','Non-weather-related-usage','diff','hdd', 'cdd', 'Days in Bill','Unit Price',\ + 'Heating Setpoint', 'Cooling Setpoint']) + + + output = bd.benchmarking_output() + bill_evaluation = bill_evaluation.append({\ + 'Usage': output[0],\ + 'r squared': output[1],\ + 'regr method': output[2],\ + 'Consumption':output[3],\ + 'Heating': format(output[4], '0.0f'),\ + 'Cooling': format(output[5], '0.0f'),\ + 'Non-weather-related-usage': format(output[6], '0.0f'),\ + 'diff':format(output[7],'.2%'),\ + 'hdd': format(output[8], '0.0f'),\ + 'cdd': format(output[9], '0.0f'),\ + 'Days in Bill': output[10],\ + 'Unit Price':bd.avg_unit_price,\ + 'Heating Setpoint': output[11],\ + 'Cooling Setpoint': output[12] + }, ignore_index = True) + + return bd, bill_evaluation + + def usage_not_related_to_weather(self, bill): + + ''' + To return heating/cooling coef and daily non-weather-related-usage when the usage is not related to weather change + + Args: + bill(pd.DataFrame): a utility bill whose usage dose not relate to weather change + + Return: + bill_metrix(dictionary): a dictionary of heating coef, cooling coef, intercept + ''' + + non_weather_related_daily_usage = sum(bill['Usage'])/ sum(bill['Days In Bill']) + heating_coef = 0 + cooling_coef = 0 + + bill_metrix = {'heating_coef': 0, + 'cooling_coef': 0, + 'non_weather_related_daily_usage': non_weather_related_daily_usage} + + return bill_metrix + + + def cal_heating_coef(self, regr_heating_coef): + ''' + Calculation for cooling coefficiency with the consideration of changing factors for projection purpose + - need further development + + ''' + heating_coef = regr_heating_coef + return heating_coef + + + def cal_cooling_coef(self, regr_cooling_coef): + ''' + Calculation for cooling coefficiency for projection purpose - need further development + ''' + cooling_coef = regr_cooling_coef + return cooling_coef + + def occup(self): + '''occupancy change pre & post retrofit''' + occupancy_change = 1 + return occupancy_change + + + def usage_related_to_weather(self, regr, regression_method): + + ''' + Args: + regr(list): + regr[0] is regression model + regr[1] is r-squared + regr[2] is hdd/cdd through the regression + regr[3] is the bills (optional, only for regression method 2) + regression_method(int): 1 or 2 + + Returns: + bill_metrix(Dictionary): as defined below + + + ???? Doris: I want to keep the same input as - non weather related usage matrix + ''' + if regression_method == 1: + regr_model = regr[0] + heating_coef = regr_model.coef_[0] + cooling_coef = regr_model.coef_[1] + non_weather_related_daily_usage = regr_model.intercept_ + + + if regression_method == 2: + regr_model = regr[0] + heating_coef = regr_model.coef_[0] + cooling_coef = 0 + non_weather_related_daily_usage = regr[3]['dhw']/regr[3]['Days In Bill'] + + bill_metrix = {'heating_coef': heating_coef, + 'cooling_coef': cooling_coef, + 'non_weather_related_daily_usage': non_weather_related_daily_usage} + + return bill_metrix + + + def annual_bill_pre_retrofit(self, bill): + ''' + Return the latest/nearest annual bill to performance analysis, + however, it will only return natural billing period, which means it could be around 365 but not exact 365 days + + Args: + bill(pd.DataFrame): bill of pre_retrofit period + + Return: + annual_bill(pd.DataFrame) + + ''' + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + + bill_last_date = bill['Bill To Date'].iloc[-1] + proposed_first_date = bill_last_date - timedelta(365) + bill['flag'] = bill['Bill From Date'].apply(lambda x: (x- proposed_first_date).days) + + index = bill.flag.lt(-1).idxmin() + + if index == 0: + annual_bill = bill.drop('flag', axis=1) + else: + annual_bill = bill[index-1:].drop('flag', axis=1) + + return annual_bill + + + def annual_bill_post_retrofit(self, bill): + ''' + Return the latest/nearest annual bill for post-retrofit period + however, it will only return natural billing period, which means it could be around 365 but not exact 365 days + + Args: + bill(pd.DataFrame): bill of post_retrofit + + Return: + annual_bill(pd.DataFrame) + ''' + + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill = bill.drop('index', axis=1) + + bill = bill.reset_index(drop=True) + bill_first_date = bill['Bill From Date'].iloc[0] + proposed_last_date = bill_first_date + timedelta(365) + + bill['flag'] = bill['Bill To Date'].apply(lambda x: (x- proposed_last_date).days) + index = bill.flag.lt(-1).idxmin() + + annual_bill = bill[0:index].drop('flag', axis=1) + if sum(annual_bill['Days In Bill']) < 360: + annual_bill = bill[0:index+1] + + return annual_bill + + + def baseline_bill(self, pre_bill): + + ''' + breakdown the bill of pre_retrofit period if there is long billing period and + return the billing data will be used as 'Baseline Bill' + + ''' + bk = BreakdownLongBill(pre_bill, self.weather) + pre_bill_breakdown = bk.long_bill_breakdown() + baseline_bill = self.annual_bill_pre_retrofit(pre_bill_breakdown).reset_index(drop=True) + + return baseline_bill + + def reporting_bill(self, post_bill): + bk = BreakdownLongBill(post_bill, self.weather) + bill_breakdown = bk.long_bill_breakdown() + reporting_bill = self.annual_bill_post_retrofit(bill_breakdown).reset_index(drop=True) + + return reporting_bill + + def main(self): + + ''' + Function to calcuate the energy usage for post_retrofit period using baseline conditions - + + ''' + # format the bill for later analysis + bill_bd = BillDisaggregation(self.original_bill, self.weather) + bill_bd.optimize_setpoints() + bill = bill_bd.formatted_bill + + + pre_bill, post_bill = self.split_bill(bill) + + #raw bill of pre-retrofit period + self.pre_bill = pre_bill + + assert sum(pre_bill['Days In Bill']) > 365, 'Utility Data is less than 12 months pre-retrofit.' + + #baseline bill + self.baseline = self.baseline_bill(pre_bill) + + #post_retrofit_bill - simple breakdown the post retrofit bill incase it is too long + self.post_bill = BreakdownLongBill(post_bill, self.weather).long_bill_breakdown() + + #calcuate the all prejected baseline for all the post retrofit billing period + post = projection_baseline(self.baseline, self.post_bill, self.weather) + self.projection_post_bill = post.projection() + self.setpoints = {'heating': post.hp, + 'cooling': post.cp} + + #calculate the projected baseline for baseline period + + baseline_usage_baseline_period = projection_baseline(self.baseline, self.baseline, self.weather) + self.projection_baseline_bill = baseline_usage_baseline_period.projection() + + + #calculate the projected baseline for reporting period + self.reporting = self.reporting_bill(self.post_bill) + report = projection_baseline(self.baseline, self.reporting, self.weather) + self.projection_reporting_bill = report.projection() + self.regression_stat = report.regression_stat + + + + self.first_year_saving = MeasurementVerification.energy_savings(self.projection_reporting_bill) + xx = self.plot_result(self.projection_baseline_bill, self.projection_reporting_bill) +# print('Baseline Period Regression Stats:'+'\n', self.regression_stat) +# print('Frist Year Savings:', pd.DataFrame.from_dict(self., orient='index')) + + @staticmethod + def energy_savings(bill_with_baseline): + ''' + Return the energy saving for bills with baseline usage + + Args: + bill_with_baseline(pd.DataFrame): + 'Bill From Date' + 'Bill To Date' + 'Usage' + 'Days In Bill' + 'Total Charge' + 'temperature' + 'baseline' + + + Return: + savings(Dict): + + 'Measured Energy Usage for Reporting Period'; + 'Baseline Projection for Reporting Period'; + 'Annual Energy Savings'; + 'Energy Reductaion Percentage'; + 'Costs Avoidance'; + 'Annual Energy Costs' + + ''' + + metered_usage = sum(bill_with_baseline['Usage']) + baseline_usage = sum(bill_with_baseline['baseline']) + + energy_savings = baseline_usage - metered_usage + total_costs = sum(bill_with_baseline['Total Charge']) + unit_price = total_costs / metered_usage + saving_percentage = energy_savings/ baseline_usage + dollar_savings = energy_savings * unit_price + + savings = {'Measured Energy Usage for Reporting Period': round(metered_usage,0), + 'Baseline Projection for Reporting Period': round(baseline_usage,0), + 'Annual Energy Savings': round(energy_savings,0), + 'Energy Reductaion Percentage': saving_percentage, + 'Costs Avoidance': round(dollar_savings,0), + 'Annual Energy Costs': round(total_costs,0)} + + return savings + + + def return_utility_name(self, utility_type): + + if self.utility_type == 1: + name = 'Electricity' + unit = 'kWh' + if self.utility_type == 2: + name = 'Natural Gas' + unit = 'Therms' + if self.utility_type == 3: + name = 'Oil' + unit = 'Gallon' + if self.utility_type == 4: + name = 'Water' + unit = 'CCF' + + return name, unit + + def plot_result(self, pre, projection_post): + ''' + The funtion to plot: + 1. the metered data over baseline and reporting period + 2. the baseline data over reporting period + + Args: + pre(pd.DataFrame): utility bills of pre retrofit (not the baseline bills) + prejection_post(pd.DataFrame): utility bills of post retrofit with prejected baseline + + Returns: + metered(pd.DataFrame): Metered usage over baseline + reporting period + + + ''' + + sns.set_style("white") + plt.figure(figsize=(10,6)) + ax = plt.gca() + + + name, unit = self.return_utility_name(self.utility_type) + + post = projection_post.copy() + + bill = self.original_bill + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill = bill.sort_values('Bill From Date').reset_index(drop=True) + + pre_y = pre['Usage'] + post_x = post['Bill To Date'].values + post_pred_y = post['baseline'] + post_y=post['Usage'] + + y_max = max(pre_y) * 1.5 + arrow_location = max(pre_y) * 0.10 + text_loccation = max(pre_y) * 0.13 + const_location = max(pre_y) * 1.1 + legend_location = max(pre_y) * 1.35 + legend_text_location = max(pre_y) * 1.45 + + # Baseline Period + baseline_start_date = pd.to_datetime(pre['Bill From Date'].iloc[1]) + baseline_end_date = pd.to_datetime(pre['Bill To Date'].iloc[-1]) + plt.axvline(baseline_end_date, color='darkgrey', linestyle='--') + + ax.annotate('', xy=(pre['Bill From Date'].iloc[1],arrow_location),\ + xytext=(pre['Bill To Date'].iloc[-1],arrow_location), \ + xycoords='data', textcoords='data', + arrowprops=dict(arrowstyle='<->',lw=1.5,color='grey')) + + ax.annotate('Baseline Period', xy=(pre['Bill From Date'].iloc[6],text_loccation),\ + ha='center',\ + va='center',weight='bold') + + # Reporting Period + print('post retrofit', post) + reporting_start_date = pd.to_datetime(post['Bill From Date'].iloc[1]) + + reporting_end_date = pd.to_datetime(post['Bill To Date'].iloc[-1]) + plt.axvline(reporting_start_date, color='darkgrey', linestyle='--') + + ax.annotate('', xy=(post['Bill From Date'].iloc[1],arrow_location),\ + xytext=(post['Bill To Date'].iloc[-1],arrow_location), \ + xycoords='data', textcoords='data', + arrowprops=dict(arrowstyle='<->',lw=1.5,color='grey')) + + ax.annotate('Reporting Period', xy=(post['Bill From Date'].iloc[6],text_loccation),\ + ha='center',\ + va='center', weight='bold') + + #ECM construction + index_start = bill[bill['Bill To Date'] == baseline_end_date].index[0] + + index_end = bill[bill['Bill From Date'] == reporting_start_date].index[0] + construction = bill[index_start:index_end] + + + construction_median = pd.to_datetime((reporting_start_date-baseline_end_date)/2 + baseline_end_date) + ax.annotate('', xy=(baseline_end_date,const_location), xytext=(reporting_start_date,const_location),\ + xycoords='data', textcoords='data', + arrowprops=dict(arrowstyle='<->',lw=1.5,color='darkgrey')) + + plt.axvline(construction_median, color='darkgrey', linestyle='-',\ + ymin=(1.1/1.5), + ymax=(1.35/1.5)) + + plt.plot(construction_median,legend_location,'o',color='darkgrey',linewidth=10) + + ax.annotate('ECM\nConstruction', xy=(construction_median,legend_text_location), ha='center', \ + va='center', weight='bold', color='darkgrey') + + + #Metered Usage Overtime + metered = pre.append(construction).append(post) + metered = metered.drop(['baseline','temperature'], axis=1).drop_duplicates() + + metered_x = metered['Bill To Date'].values + metered_y = metered['Usage'] + + # Legend - Measured Usage + plt.axvline(metered['Bill To Date'].iloc[4], color='cornflowerblue', linestyle='-',\ + ymin=(metered['Usage'].iloc[4]/y_max), + ymax=(1.35/1.5)) + + plt.plot(metered['Bill To Date'].iloc[4],legend_location,'o',color='cornflowerblue',linewidth=10) + + ax.annotate('Measured\nUsage', xy=(metered['Bill To Date'].iloc[4],legend_text_location), ha='center', \ + va='center',weight='bold', color='cornflowerblue') + + + # Legend - Adjusted Baseline + plt.axvline(post['Bill To Date'].iloc[10], color='brown', linestyle='-',\ + ymin=(post['baseline'].iloc[10]/y_max), + ymax=(1.35/1.5)) + + plt.plot(post['Bill To Date'].iloc[10],legend_location,'o',color='brown',linewidth=10) + + ax.annotate('Adjusted Baseline\nUsage', xy=(post['Bill To Date'].iloc[10],legend_text_location), ha='center', \ + va='center',weight='bold', color='brown') + + + + # Legend - Energy Savings + plt.axvline(post['Bill To Date'].iloc[4], color='green', linestyle='-',\ + ymin=((post['baseline'].iloc[4]+post['Usage'].iloc[4])/2/y_max), + ymax=(1.35/1.5)) + + plt.plot(post['Bill To Date'].iloc[4],legend_location,'o',color='green',linewidth=10) + + ax.annotate('Energy\nSavings', xy=(post['Bill To Date'].iloc[4],legend_text_location), ha='center', \ + va='center',weight='bold', color='green') + + + #plots + plt.plot(metered_x, metered_y, '-o',color='cornflowerblue',linewidth=3.5) + plt.plot(post_x, post_pred_y, '--', color='brown', alpha=0.8) + + ax.fill_between(post_x,\ + post_y, post_pred_y,\ + facecolor='mediumturquoise',\ + alpha=0.1, + edgecolor='b',\ + linewidth=0) + + plt.ylim([0,y_max]) + + + for spine in plt.gca().spines.values(): + spine.set_visible(False) + + + xmin, xmax = ax.get_xlim() + ymin, ymax = ax.get_ylim() + + fig = plt.gcf() + + # get width and height of axes object to compute + # matching arrowhead length and width + dps = fig.dpi_scale_trans.inverted() + bbox = ax.get_window_extent().transformed(dps) + width, height = bbox.width, bbox.height + + # manual arrowhead width and length + hw = 0.5/20.*(ymax-ymin) + hl = 0.5/20.*(xmax-xmin) + lw = 1 # axis line width + ohg = 0.1 # arrow overhang + + # compute matching arrowhead length and width + yhw = hw/(ymax-ymin)*(xmax-xmin)* height/width * 1.2 + yhl = hl/(xmax-xmin)*(ymax-ymin)* width/height *1.2 + + # y axis + ax.arrow(xmin, 0, 0, ymax-ymin, fc='k', ec='k', lw = lw, + head_width=yhw, head_length=yhl, overhang = ohg, + length_includes_head= True, clip_on = False) + # x axis + ax.arrow(xmin, 0., (xmax-xmin), 0., fc='k', ec='k', lw = lw, + head_width=hw, head_length=hl, overhang = ohg, + length_includes_head= True, clip_on = False) + + plt.ylabel('Consumption ({})'.format(unit)) + ax.set_title('{} Consumption of Baseline and Reporting Period'.format(name),\ + size=14, weight='bold',verticalalignment='bottom', alpha=0.8) + +# from matplotlib import rcParams +# rcParams['axes.titlepad'] = 50 + + return metered + + + @staticmethod + def ols_regression(X,y): + ''' + Return the summary stats for ordinary linear regression + ''' + + X2 = sm.add_constant(X) + est = sm.OLS(y, X2) + est2 = est.fit() + return est2.summary() -- GitLab From e39a9c9e137a49c45c410013b2a3fae2c743f9ae Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 24 Jul 2019 12:33:25 -0400 Subject: [PATCH 83/97] update description of the file --- bpeng/bill/awesome_disaggregate.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index 29563c5..a1bccf2 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -1,4 +1,6 @@ -"""This file calcuate bill disagregation for multifamily buildings""" +"""This file calcuate bill disagregation for multifamily buildings + As of 2019/07/24, this is the file which lives in BlocLink and interact with BlocPower Dashboard. +""" import warnings from datetime import timedelta -- GitLab From 607b29b856490194e7e429bf9a781139f37f9514 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 24 Jul 2019 12:33:37 -0400 Subject: [PATCH 84/97] update the imports --- bpeng/mv/mandv_orig.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/bpeng/mv/mandv_orig.py b/bpeng/mv/mandv_orig.py index dce9a35..3245449 100644 --- a/bpeng/mv/mandv_orig.py +++ b/bpeng/mv/mandv_orig.py @@ -1,32 +1,24 @@ -%matplotlib notebook + + import matplotlib as mpl mpl.get_backend() import matplotlib.pyplot as plt -from matplotlib.pyplot import text - -from datetime import timedelta import datetime as datetime import requests - import pandas as pd import numpy as np - -from sklearn import linear_model -from scipy.optimize import minimize import statsmodels.api as sm -from scipy import stats - import warnings warnings.simplefilter('ignore') -from dateutil import relativedelta - - import psycopg2 import seaborn as sns; sns.set() - -%run bill_disaggregation.ipynb - +from matplotlib.pyplot import text +from datetime import timedelta +from sklearn import linear_model +from scipy.optimize import minimize +from scipy import stats +from dateutil import relativedelta class MeasurementVerification(): -- GitLab From 1987e3666b4255109d2997258cf4ff4916428c85 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 31 Jul 2019 15:55:12 -0400 Subject: [PATCH 85/97] orig code --- bpeng/mv/mandv_orig.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/bpeng/mv/mandv_orig.py b/bpeng/mv/mandv_orig.py index 3245449..3ab95e3 100644 --- a/bpeng/mv/mandv_orig.py +++ b/bpeng/mv/mandv_orig.py @@ -4,6 +4,7 @@ import matplotlib as mpl mpl.get_backend() import matplotlib.pyplot as plt +import seaborn as sns; sns.set() import datetime as datetime import requests import pandas as pd @@ -12,7 +13,6 @@ import statsmodels.api as sm import warnings warnings.simplefilter('ignore') import psycopg2 -import seaborn as sns; sns.set() from matplotlib.pyplot import text from datetime import timedelta from sklearn import linear_model @@ -54,11 +54,9 @@ class MeasurementVerification(): self.first_year_saving = None self.projection_baseline_bill = None self.setpoints = {} - #self.non_weather_related_end_uses = {'Miscellanous': 1} - def split_bill(self, raw_bill): ''' split raw bill into two section: @@ -88,11 +86,9 @@ class MeasurementVerification(): for i in range(len(bill) - 1): if (erd>= bill['Bill From Date'].iloc[i] ) & (erd<= bill['Bill To Date'].iloc[i]): erd_flag = i - #print('erd_flag:', i) if (lrd >= bill['Bill From Date'].iloc[i] ) & (lrd <= bill['Bill To Date'].iloc[i]): lrd_flag = i - #print('lrd_flag:', i) pre_bill = bill.iloc[0: erd_flag] post_bill = bill.iloc[(lrd_flag+1):] @@ -119,6 +115,7 @@ class MeasurementVerification(): ''' bd = BillDisaggregation(bill, weather_data) + #pre-requisit is the bills are longer than 365 days/ bd.optimize_setpoints() bill['temperature'] = [ -- GitLab From 9dd07a179acdc8e73adc46ff98d1f591b0848f0d Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 31 Jul 2019 15:55:23 -0400 Subject: [PATCH 86/97] random python --- bpeng/mv/measurementVerification.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 bpeng/mv/measurementVerification.py diff --git a/bpeng/mv/measurementVerification.py b/bpeng/mv/measurementVerification.py new file mode 100644 index 0000000..e69de29 -- GitLab From 8d3672a5b41303605fa77f0041fd281e1fb1b956 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 31 Jul 2019 15:55:34 -0400 Subject: [PATCH 87/97] forgot what's this --- bpeng/mv/mv_test.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 bpeng/mv/mv_test.py diff --git a/bpeng/mv/mv_test.py b/bpeng/mv/mv_test.py new file mode 100644 index 0000000..e69de29 -- GitLab From 4d5131e5ec7af18eedf61c04a5f35e34ea3e7f08 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 31 Jul 2019 15:59:00 -0400 Subject: [PATCH 88/97] remove bill module to match master branch --- .../bill/1981_2010_NYC_NormalsTemperature.csv | 366 --------- bpeng/bill/benchmarking.py | 146 ---- bpeng/bill/bill_analysis.py | 121 --- bpeng/bill/bill_cleaner.py | 157 ---- bpeng/bill/calculater.py | 88 --- bpeng/bill/disaggragate_with_regr_matrix.py | 113 --- bpeng/bill/get_billing_weather_data.py | 63 -- bpeng/bill/normalClimate_NYC.csv | 13 - bpeng/bill/normalized_monthly_bill.py | 134 ---- bpeng/bill/regr.py | 103 --- bpeng/bill/setpoints_optimization.py | 201 ----- bpeng/bill/test.py | 54 -- bpeng/bill/weather_data_cal.py | 53 -- bpeng/mv/BreakdownLongBill.py | 211 ------ bpeng/mv/identifier.py | 88 --- bpeng/mv/mandv_orig.py | 706 ----------------- bpeng/mv/measurementVerification.py | 706 +++++++++++++++++ bpeng/mv/mv_test.py | 0 bpeng/mv/prediction.py | 717 ------------------ 19 files changed, 706 insertions(+), 3334 deletions(-) delete mode 100644 bpeng/bill/1981_2010_NYC_NormalsTemperature.csv delete mode 100644 bpeng/bill/benchmarking.py delete mode 100644 bpeng/bill/bill_analysis.py delete mode 100644 bpeng/bill/bill_cleaner.py delete mode 100644 bpeng/bill/calculater.py delete mode 100644 bpeng/bill/disaggragate_with_regr_matrix.py delete mode 100644 bpeng/bill/get_billing_weather_data.py delete mode 100644 bpeng/bill/normalClimate_NYC.csv delete mode 100644 bpeng/bill/normalized_monthly_bill.py delete mode 100644 bpeng/bill/regr.py delete mode 100644 bpeng/bill/setpoints_optimization.py delete mode 100644 bpeng/bill/test.py delete mode 100644 bpeng/bill/weather_data_cal.py delete mode 100644 bpeng/mv/BreakdownLongBill.py delete mode 100644 bpeng/mv/identifier.py delete mode 100644 bpeng/mv/mandv_orig.py delete mode 100644 bpeng/mv/mv_test.py delete mode 100644 bpeng/mv/prediction.py diff --git a/bpeng/bill/1981_2010_NYC_NormalsTemperature.csv b/bpeng/bill/1981_2010_NYC_NormalsTemperature.csv deleted file mode 100644 index 7885e8f..0000000 --- a/bpeng/bill/1981_2010_NYC_NormalsTemperature.csv +++ /dev/null @@ -1,366 +0,0 @@ -no,month,day,temperature,hdd ,cdd -1,1,1,34,31,0 -2,1,2,33.8,31,0 -3,1,3,33.7,31,0 -4,1,4,33.5,31,0 -5,1,5,33.4,32,0 -6,1,6,33.3,32,0 -7,1,7,33.2,32,0 -8,1,8,33.1,32,0 -9,1,9,33,32,0 -10,1,10,32.9,32,0 -11,1,11,32.9,32,0 -12,1,12,32.8,32,0 -13,1,13,32.8,32,0 -14,1,14,32.7,32,0 -15,1,15,32.7,32,0 -16,1,16,32.7,32,0 -17,1,17,32.7,32,0 -18,1,18,32.7,32,0 -19,1,19,32.7,32,0 -20,1,20,32.7,32,0 -21,1,21,32.7,32,0 -22,1,22,32.8,32,0 -23,1,23,32.8,32,0 -24,1,24,32.9,32,0 -25,1,25,32.9,32,0 -26,1,26,33,32,0 -27,1,27,33.1,32,0 -28,1,28,33.1,32,0 -29,1,29,33.2,32,0 -30,1,30,33.3,32,0 -31,1,31,33.4,32,0 -32,2,1,33.5,31,0 -33,2,2,33.7,31,0 -34,2,3,33.8,31,0 -35,2,4,33.9,31,0 -36,2,5,34,31,0 -37,2,6,34.2,31,0 -38,2,7,34.3,31,0 -39,2,8,34.5,31,0 -40,2,9,34.6,30,0 -41,2,10,34.8,30,0 -42,2,11,34.9,30,0 -43,2,12,35.1,30,0 -44,2,13,35.3,30,0 -45,2,14,35.4,30,0 -46,2,15,35.6,29,0 -47,2,16,35.8,29,0 -48,2,17,36,29,0 -49,2,18,36.2,29,0 -50,2,19,36.4,29,0 -51,2,20,36.5,28,0 -52,2,21,36.7,28,0 -53,2,22,36.9,28,0 -54,2,23,37.1,28,0 -55,2,24,37.3,28,0 -56,2,25,37.6,27,0 -57,2,26,37.8,27,0 -58,2,27,38,27,0 -59,2,28,38.2,27,0 -60,3,1,38.4,27,0 -61,3,2,38.6,26,0 -62,3,3,38.9,26,0 -63,3,4,39.1,26,0 -64,3,5,39.3,26,0 -65,3,6,39.6,25,0 -66,3,7,39.8,25,0 -67,3,8,40.1,25,0 -68,3,9,40.3,25,0 -69,3,10,40.6,24,0 -70,3,11,40.8,24,0 -71,3,12,41.1,24,0 -72,3,13,41.4,24,0 -73,3,14,41.7,23,0 -74,3,15,41.9,23,0 -75,3,16,42.2,23,0 -76,3,17,42.5,22,0 -77,3,18,42.8,22,0 -78,3,19,43.1,22,0 -79,3,20,43.4,22,0 -80,3,21,43.7,21,0 -81,3,22,44,21,0 -82,3,23,44.3,21,0 -83,3,24,44.7,20,0 -84,3,25,45,20,0 -85,3,26,45.3,20,0 -86,3,27,45.6,19,0 -87,3,28,46,19,0 -88,3,29,46.3,19,0 -89,3,30,46.7,18,0 -90,3,31,47,18,0 -91,4,1,47.4,18,0 -92,4,2,47.7,17,0 -93,4,3,48.1,17,0 -94,4,4,48.4,17,0 -95,4,5,48.8,16,0 -96,4,6,49.2,16,0 -97,4,7,49.5,16,0 -98,4,8,49.9,15,0 -99,4,9,50.3,15,0 -100,4,10,50.6,15,0 -101,4,11,51,14,0 -102,4,12,51.4,14,0 -103,4,13,51.7,14,0 -104,4,14,52.1,13,0 -105,4,15,52.4,13,0 -106,4,16,52.8,12,0 -107,4,17,53.2,12,0 -108,4,18,53.5,12,0 -109,4,19,53.9,11,0 -110,4,20,54.2,11,0 -111,4,21,54.5,11,0 -112,4,22,54.9,10,0 -113,4,23,55.2,10,0 -114,4,24,55.6,10,0 -115,4,25,55.9,9,0 -116,4,26,56.2,9,0 -117,4,27,56.6,9,0 -118,4,28,56.9,8,0 -119,4,29,57.2,8,0 -120,4,30,57.5,8,0 -121,5,1,57.8,8,0 -122,5,2,58.1,7,0 -123,5,3,58.4,7,0 -124,5,4,58.7,7,1 -125,5,5,59,7,1 -126,5,6,59.3,6,1 -127,5,7,59.6,6,1 -128,5,8,59.9,6,1 -129,5,9,60.2,6,1 -130,5,10,60.5,5,1 -131,5,11,60.8,5,1 -132,5,12,61.1,5,1 -133,5,13,61.4,5,1 -134,5,14,61.7,5,1 -135,5,15,62,4,1 -136,5,16,62.3,4,1 -137,5,17,62.6,4,2 -138,5,18,62.9,4,2 -139,5,19,63.1,4,2 -140,5,20,63.4,3,2 -141,5,21,63.7,3,2 -142,5,22,64.1,3,2 -143,5,23,64.4,3,2 -144,5,24,64.7,3,2 -145,5,25,65,3,3 -146,5,26,65.3,3,3 -147,5,27,65.6,2,3 -148,5,28,65.9,2,3 -149,5,29,66.2,2,3 -150,5,30,66.6,2,3 -151,5,31,66.9,2,4 -152,6,1,67.2,2,4 -153,6,2,67.6,2,4 -154,6,3,67.9,1,4 -155,6,4,68.2,1,4 -156,6,5,68.5,1,5 -157,6,6,68.9,1,5 -158,6,7,69.2,1,5 -159,6,8,69.5,1,5 -160,6,9,69.9,1,6 -161,6,10,70.2,1,6 -162,6,11,70.5,1,6 -163,6,12,70.9,1,6 -164,6,13,71.2,0,7 -165,6,14,71.5,0,7 -166,6,15,71.8,0,7 -167,6,16,72.1,0,7 -168,6,17,72.4,0,8 -169,6,18,72.7,0,8 -170,6,19,73,0,8 -171,6,20,73.3,0,9 -172,6,21,73.6,0,9 -173,6,22,73.9,0,9 -174,6,23,74.1,0,9 -175,6,24,74.4,0,9 -176,6,25,74.6,0,10 -177,6,26,74.9,0,10 -178,6,27,75.1,0,10 -179,6,28,75.3,0,10 -180,6,29,75.5,0,11 -181,6,30,75.7,0,11 -182,7,1,75.9,0,11 -183,7,2,76.1,0,11 -184,7,3,76.3,0,11 -185,7,4,76.4,0,11 -186,7,5,76.5,0,12 -187,7,6,76.7,0,12 -188,7,7,76.8,0,12 -189,7,8,76.9,0,12 -190,7,9,77,0,12 -191,7,10,77.1,0,12 -192,7,11,77.2,0,12 -193,7,12,77.3,0,12 -194,7,13,77.3,0,12 -195,7,14,77.4,0,12 -196,7,15,77.4,0,12 -197,7,16,77.5,0,12 -198,7,17,77.5,0,12 -199,7,18,77.5,0,13 -200,7,19,77.5,0,13 -201,7,20,77.5,0,13 -202,7,21,77.5,0,13 -203,7,22,77.5,0,13 -204,7,23,77.5,0,13 -205,7,24,77.5,0,13 -206,7,25,77.5,0,12 -207,7,26,77.5,0,12 -208,7,27,77.4,0,12 -209,7,28,77.4,0,12 -210,7,29,77.4,0,12 -211,7,30,77.3,0,12 -212,7,31,77.3,0,12 -213,8,1,77.2,0,12 -214,8,2,77.2,0,12 -215,8,3,77.1,0,12 -216,8,4,77.1,0,12 -217,8,5,77,0,12 -218,8,6,77,0,12 -219,8,7,76.9,0,12 -220,8,8,76.8,0,12 -221,8,9,76.8,0,12 -222,8,10,76.7,0,12 -223,8,11,76.6,0,12 -224,8,12,76.5,0,12 -225,8,13,76.5,0,11 -226,8,14,76.4,0,11 -227,8,15,76.3,0,11 -228,8,16,76.2,0,11 -229,8,17,76.1,0,11 -230,8,18,76,0,11 -231,8,19,75.8,0,11 -232,8,20,75.7,0,11 -233,8,21,75.6,0,11 -234,8,22,75.5,0,10 -235,8,23,75.3,0,10 -236,8,24,75.2,0,10 -237,8,25,75,0,10 -238,8,26,74.8,0,10 -239,8,27,74.6,0,10 -240,8,28,74.4,0,9 -241,8,29,74.2,0,9 -242,8,30,74,0,9 -243,8,31,73.8,0,9 -244,9,1,73.6,0,9 -245,9,2,73.3,0,8 -246,9,3,73.1,0,8 -247,9,4,72.8,0,8 -248,9,5,72.5,0,8 -249,9,6,72.3,0,7 -250,9,7,72,0,7 -251,9,8,71.7,0,7 -252,9,9,71.4,0,7 -253,9,10,71,0,6 -254,9,11,70.7,0,6 -255,9,12,70.4,0,6 -256,9,13,70,1,6 -257,9,14,69.7,1,5 -258,9,15,69.3,1,5 -259,9,16,68.9,1,5 -260,9,17,68.6,1,5 -261,9,18,68.2,1,4 -262,9,19,67.8,1,4 -263,9,20,67.4,1,4 -264,9,21,67,1,3 -265,9,22,66.6,2,3 -266,9,23,66.2,2,3 -267,9,24,65.8,2,3 -268,9,25,65.4,2,3 -269,9,26,65,2,2 -270,9,27,64.6,3,2 -271,9,28,64.2,3,2 -272,9,29,63.8,3,2 -273,9,30,63.4,3,2 -274,10,1,63,4,2 -275,10,2,62.6,4,1 -276,10,3,62.2,4,1 -277,10,4,61.8,4,1 -278,10,5,61.4,5,1 -279,10,6,61.1,5,1 -280,10,7,60.7,5,1 -281,10,8,60.3,6,1 -282,10,9,59.9,6,1 -283,10,10,59.6,6,1 -284,10,11,59.2,7,1 -285,10,12,58.9,7,1 -286,10,13,58.5,7,1 -287,10,14,58.2,7,0 -288,10,15,57.8,8,0 -289,10,16,57.5,8,0 -290,10,17,57.1,8,0 -291,10,18,56.8,8,0 -292,10,19,56.5,9,0 -293,10,20,56.2,9,0 -294,10,21,55.9,9,0 -295,10,22,55.6,10,0 -296,10,23,55.2,10,0 -297,10,24,54.9,10,0 -298,10,25,54.6,10,0 -299,10,26,54.3,11,0 -300,10,27,54,11,0 -301,10,28,53.7,11,0 -302,10,29,53.4,12,0 -303,10,30,53.1,12,0 -304,10,31,52.8,12,0 -305,11,1,52.5,13,0 -306,11,2,52.2,13,0 -307,11,3,51.9,13,0 -308,11,4,51.6,13,0 -309,11,5,51.3,14,0 -310,11,6,51,14,0 -311,11,7,50.7,14,0 -312,11,8,50.4,15,0 -313,11,9,50.1,15,0 -314,11,10,49.8,15,0 -315,11,11,49.5,16,0 -316,11,12,49.2,16,0 -317,11,13,48.9,16,0 -318,11,14,48.5,16,0 -319,11,15,48.2,17,0 -320,11,16,47.9,17,0 -321,11,17,47.6,17,0 -322,11,18,47.2,18,0 -323,11,19,46.9,18,0 -324,11,20,46.5,18,0 -325,11,21,46.2,19,0 -326,11,22,45.9,19,0 -327,11,23,45.5,20,0 -328,11,24,45.2,20,0 -329,11,25,44.8,20,0 -330,11,26,44.5,21,0 -331,11,27,44.1,21,0 -332,11,28,43.7,21,0 -333,11,29,43.4,22,0 -334,11,30,43,22,0 -335,12,1,42.7,22,0 -336,12,2,42.3,23,0 -337,12,3,42,23,0 -338,12,4,41.6,23,0 -339,12,5,41.3,24,0 -340,12,6,40.9,24,0 -341,12,7,40.6,24,0 -342,12,8,40.2,25,0 -343,12,9,39.9,25,0 -344,12,10,39.6,25,0 -345,12,11,39.3,26,0 -346,12,12,38.9,26,0 -347,12,13,38.6,26,0 -348,12,14,38.3,27,0 -349,12,15,38,27,0 -350,12,16,37.7,27,0 -351,12,17,37.4,28,0 -352,12,18,37.1,28,0 -353,12,19,36.8,28,0 -354,12,20,36.6,28,0 -355,12,21,36.3,29,0 -356,12,22,36,29,0 -357,12,23,35.8,29,0 -358,12,24,35.6,29,0 -359,12,25,35.3,30,0 -360,12,26,35.1,30,0 -361,12,27,34.9,30,0 -362,12,28,34.7,30,0 -363,12,29,34.5,31,0 -364,12,30,34.3,31,0 -365,12,31,34.1,31,0 diff --git a/bpeng/bill/benchmarking.py b/bpeng/bill/benchmarking.py deleted file mode 100644 index 51d748c..0000000 --- a/bpeng/bill/benchmarking.py +++ /dev/null @@ -1,146 +0,0 @@ -""" -This module calcuate energy usage of the building under climate normal weather data -which returns a monthly -""" - - -import os -import numpy as np -import pandas as pd -from .disaggragate_with_regr_matrix import weather_ralated_breakdown - - -class ClimateNormal_Benchmarking(): - - def __init__(self, regr_matric, utility_type): - self.regr_matric = regr_matric - self.utility_type = utility_type - self.normalized_monthly_bill = None - self.normalized_monthly_bill_wIndicators = None - self.normalized_billTotal = None - - def assumble_normal_bills(self): - ''' - This function is to construct a bill with climate normal weather data for each month, and use the calculated - regression metric to calculate weather normalized usage for this specific building. - ''' - origin_path = os.path.abspath(os.path.join(os.path.dirname("__file__"))) - data_path = os.path.join(origin_path, 'bpeng/bill/') - climate_normal_weather = pd.read_csv(data_path + '1981_2010_NYC_NormalsTemperature.csv', error_bad_lines=False) - climate_normal_weather['temperature'] = list(np.float(temperature) for temperature in - list(climate_normal_weather['temperature'])) - - nbf = pd.DataFrame(columns=['temperature', 'Days In Bill', 'Bill From Date']) - days_each_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] - month = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - - nbf['Bill From Date'] = month - nbf['Days In Bill'] = days_each_month - - temp = [] - month_keys = list(climate_normal_weather.groupby(climate_normal_weather['month']).groups.keys()) - for month in month_keys: - temp.append(list(climate_normal_weather[climate_normal_weather['month'] == month]['temperature'])) - - nbf['temperature'] = temp - nbf['Bill To Date'] = nbf['Bill From Date'] - - # pre-set the unit price to 0, and add the actual blended rate later - nbf['Unit Price'] = pd.Series((0 for x in range(len(nbf.index)))) - self.normalized_monthly_bill = weather_ralated_breakdown(self.regr_matric, nbf) - self.normalized_monthly_bill['Month'] = self.normalized_monthly_bill['Bill From Date'] - self.normalized_monthly_bill = self.normalized_monthly_bill.drop(['Bill From Date', 'Bill To Date'], axis=1) - - def energyAndEmmision(self, energy_usage): - ''' - this function is to: - 1. convert different energy units to mmBtu - 2. calculate source energy usage based on utility type and using EPA conversion ratio - 3. calcuate corresponding CO2 emission metric tons - - Args: - energy_usage (numpy.float): energy usage per period, a number can be daily, monthly, yearly usage - - Returns: - benchmark_indicators (dictionary): A disctionary contains co2 emission, - site and source energy usage based on utility type and energy uscage - ''' - - if self.utility_type == 1: - utility_name = 'Electricity' - utility_unit = 'kWh' - converstion_ratio = 0.003412 # energy conversion ratio to MMbtu - source_site_ratio = 2.8 - co2_emission_factor = 0 - - if self.utility_type == 2: - utility_name = 'Natural Gas' - utility_unit = 'Therms' - converstion_ratio = 0.1 # 0.1 mmBtu/therm - source_site_ratio = 1.05 - co2_emission_factor = 53.06 * 0.001 # metric tons co2 generated per mmbtu gas - - if self.utility_type == 3: - utility_name = 'Oil' - utility_unit = 'Gallon' - # converstion factor for 'heating oil' - converstion_ratio = 0.139 - source_site_ratio = 1.01 - co2_emission_factor = 73.96 * 0.001 - - if self.utility_type == 4: - utility_name = 'Water' - utility_unit = 'CCF' - - converstion_ratio = 0 - source_site_ratio = 0 - co2_emission_factor = 0 - - energy_usage_mmbtu = energy_usage * converstion_ratio - site_co2_tonnes = energy_usage_mmbtu * co2_emission_factor - source_energy_usage = energy_usage_mmbtu * source_site_ratio - source_co2_tonnes = source_energy_usage * co2_emission_factor - - benchmark_indicators = { - 'energy_usage_mmbtu': energy_usage_mmbtu, - 'site_co2_tonnes': site_co2_tonnes, - 'source_energy_usage': source_energy_usage, - 'source_co2_tonnes': source_co2_tonnes - } - - return benchmark_indicators - - def normalize_monthly_bill_wIndicators(self): - site_energy = [] - source_energy = [] - site_co2 = [] - source_co2 = [] - - for monthly_usage in list(self.normalized_monthly_bill['Calculated Total Usage']): - indicators = self.energyAndEmmision(monthly_usage) - site_energy.append(indicators['energy_usage_mmbtu']) - source_energy.append(indicators['source_energy_usage']) - site_co2.append(indicators['site_co2_tonnes']) - source_co2.append(indicators['source_co2_tonnes']) - - normalized_monthly_billwIndicators = self.normalized_monthly_bill.copy() - normalized_monthly_billwIndicators['Usage mmBTU'] = site_energy - normalized_monthly_billwIndicators['Site CO2 Tonnes'] = site_co2 - normalized_monthly_billwIndicators['Source Energy Usage mmBTU'] = source_energy - normalized_monthly_billwIndicators['Source CO2 Tonnes'] = source_co2 - - self.normalized_monthly_bill_wIndicators = normalized_monthly_billwIndicators - - self.normalized_billTotal = { - 'total_usage': sum(self.normalized_monthly_bill_wIndicators['Calculated Total Usage']), - 'total_usage_mmbtu': sum(self.normalized_monthly_bill_wIndicators['Usage mmBTU']), - 'source_energy_usage': sum(self.normalized_monthly_bill_wIndicators['Source Energy Usage mmBTU']), - 'site_co2_tonnes': sum(self.normalized_monthly_bill_wIndicators['Site CO2 Tonnes']), - 'source_co2_tonnes': sum(self.normalized_monthly_bill_wIndicators['Source CO2 Tonnes']), - # 'utiltiy_name': utility_name, - # 'utility_unit': utility_unit - } - - def RunAllFunctions(self): - self.assumble_normal_bills() - self.normalize_monthly_bill_wIndicators() diff --git a/bpeng/bill/bill_analysis.py b/bpeng/bill/bill_analysis.py deleted file mode 100644 index ea6a587..0000000 --- a/bpeng/bill/bill_analysis.py +++ /dev/null @@ -1,121 +0,0 @@ -"""This is the driver file to use this library to calculate the bill disaggragation results for BlocPower Dashboard""" - - -import pandas as pd -from .bill_cleaner import Bill -from .weather_data_cal import (weather_cleaning, bill_with_daily_temp) -from .setpoints_optimization import (optimize_setpoints) -from .disaggragate_with_regr_matrix import (weather_ralated_breakdown, non_weahter_related_breakdown) -from .normalized_monthly_bill import NormalizedBillingPeriod - - -class bill_analysis(): - - def __init__(self, raw_bill, daily_temp, end_uses): - self.bill = Bill(raw_bill) - self.daily_temp = daily_temp - self.end_uses = end_uses - self.output_normalized_monthly_bill = None - self.annual_normalized_monthly_bill = None - self.regr_results = None - self.annual_usage_costs_summary = None - - def main(self): - - weather_data_daily = weather_cleaning(self.daily_temp) - self.bill.RunAllFunctions() - - processed_bill = bill_with_daily_temp(self.bill.formatted_bill, weather_data_daily) - processed_bill = processed_bill.sort_values('Bill From Date') - - formatted_bill = self.bill.formatted_bill.sort_values('Bill From Date') - formatted_bill['Unit Price'] = formatted_bill['Total Charge']/formatted_bill['Usage'] - - NormalzedBill_Obj = NormalizedBillingPeriod(formatted_bill) - monthly_bill = NormalzedBill_Obj.normailized_monthly_bill() - monthly_bill_temp = bill_with_daily_temp(monthly_bill, weather_data_daily) - monthly_bill_with_price = NormalzedBill_Obj.normalized_unit_price(monthly_bill_temp) - - self.regr_results = optimize_setpoints(processed_bill) - monthly_breakdown = weather_ralated_breakdown(self.regr_results, monthly_bill_with_price) - self.output_normalized_monthly_bill = non_weahter_related_breakdown( - self.end_uses, monthly_breakdown).sort_values('Bill From Date').reset_index(drop=True) - self.annual_normalized_monthly_bill = self.output_normalized_monthly_bill[-12:].reset_index(drop=True) - self.annual_usage_costs_summary = self.annual_usage_costs(self.annual_normalized_monthly_bill, self.end_uses) - - return - - def annual_usage_costs(self, annual_bill_breakdown, end_uses): - """ - Calcuate annual usage and costs for each end use - - Args: - annual_bill_breakdown(pd.DataFrame): the output from non-weather-related usage breakdown - end_uses(dictionary): key: end use - value: percentage of the end use among non-weather related usage - - Return: - pd.DataFrame: annual usage, costs for each end uses - - """ - - annual_usage_costs_sum = pd.DataFrame(columns=['End Use', 'Usage', 'Costs']) - annual_bill_breakdown['Costs'] = \ - annual_bill_breakdown['Calculated Total Usage'] * annual_bill_breakdown['Unit Price'] - avg_price = sum(annual_bill_breakdown['Costs']) / sum(annual_bill_breakdown['Calculated Total Usage']) - end_use = list(annual_bill_breakdown.columns) - unwanted_column = ['Bill From Date', 'Bill To Date', 'Days In Bill', 'Unit Price', 'Non Weather Related Usage', - 'Calculated Total Usage', 'Costs'] - - for elem in unwanted_column: - end_use.remove(elem) - - annual_usage_costs_sum['End Use'] = end_use - - for j in range(len(annual_usage_costs_sum)): - temp = annual_bill_breakdown[annual_usage_costs_sum['End Use'].iloc[j]] - temp_usage = sum(temp) - annual_usage_costs_sum['Usage'].iloc[j] = temp_usage - - annual_usage_costs_sum['Costs'] = annual_usage_costs_sum['Usage'] * avg_price - - return annual_usage_costs_sum - - def to_json(self, period='bill_breakdown'): - """ - Output in json file - - Args: - - period (str): 'bill_breakdown' for bill breakdown with non-weather realted end uses - 'bill' for monthly out put for bill with only weather related breakdown - default 'bill_breakdown' - - Returns: - - json: output in json format - - """ - - if period == 'bill_breakdown': - return self.bill_breakdown.to_json(orient="records", date_format="iso") - - return self.output_table_monthly.to_json(orient="records", date_format="iso") - - def to_dict(self, period='bill_breakdown'): - """ - Output in dictionary file - - Args: - period (str): 'bill_breakdown' for bill breakdown with non-weather realted end uses - 'bill' for monthly out put for bill with only weather related breakdown - default 'bill_breakdown' - - Returns: - json: output in json format - """ - - if period == 'bill_breakdown': - return self.bill_breakdown.to_dict(orient="records") - - return self.output_table_monthly.to_dict(orient="records") diff --git a/bpeng/bill/bill_cleaner.py b/bpeng/bill/bill_cleaner.py deleted file mode 100644 index 0049bf6..0000000 --- a/bpeng/bill/bill_cleaner.py +++ /dev/null @@ -1,157 +0,0 @@ -import numpy as np -import pandas as pd -from calculater import (outliers_iqr) - - -class Bill: - - def __init__(self, raw_bill): - """ - Args: - self.raw_bill (pd.DataFrame): a raw bill with columns of - 'Bill From Date' - 'Bill To Date' - 'Days In Bill' - 'Usage' - 'Total Charge' - """ - self.raw_bill = raw_bill - self.formatted_bill = None - self.shape_change = False - self.quality_metric = None - self.bill_consi = None - - def formatDate(self, date): - return date.apply(lambda x: '/'.join([str(x.date().month), - str(x.date().day), - str(x.date().year)])) - - def formatting(self): - """ - Bill Cleaning - Step 1: - 1. format each column of the raw bill - 2. drop NAN / duplicates - Returns: - pd.DataFrame: a formatted bill - boolean: True - Length of the bill has changed during bill cleaning step 1 - """ - bill_copy = self.raw_bill.copy() - billFromDate = pd.to_datetime(bill_copy['Bill From Date']) - billToDate = pd.to_datetime(bill_copy['Bill To Date']) - bill_copy['Bill From Date'] = self.formatDate(billFromDate) - bill_copy['Bill To Date'] = self.formatDate(billToDate) - bill_copy = bill_copy[[ - 'Bill From Date', 'Bill To Date', 'Usage', 'Days In Bill', 'Total Charge' - ]] - - bill_copy1 = bill_copy[billFromDate < billToDate] - bill_copy1['Bill From Date'] = billFromDate - bill_copy2 = bill_copy1.sort_values('Bill From Date') - bill_copy2['Bill To Date'] = billToDate - bill_copy2 = bill_copy2.dropna() - bill_copy2 = bill_copy2.drop_duplicates() - bill_copy2 = bill_copy2.reset_index(drop=True) - - self.shape_change = 'False' if np.array(bill_copy2.shape)[0] == np.array(self.raw_bill.shape)[0] else 'True' - self.formatted_bill = bill_copy2 - - def quality(self): - """ - Bill Cleaning - Step 2: - 1. Check each billing period to find a bill is too short or too long; - 2. formatted_bill (pd.DataFrame): formatted bill from Bill Cleaning Step 1 - 3. quality (pd.DataFrame): a dataframe with columns: - 'index': the index of the billing period which is identified as an outlier - 'flag': to indicate either it is too long or too short - """ - - if self.formatted_bill is None: - exit('The bill is not formatted, please do formatting!') - - bill = self.formatted_bill.copy() - bill = pd.DataFrame(bill) - days_in_bill = np.array(bill['Days In Bill']) - - # abnormal days in bill will return False - days_quality_index_inti = outliers_iqr(list(days_in_bill)) - days_quality_index_recheck = np.array( - [x not in range(25, 35) for x in days_in_bill]) - days_quality_index = list( - np.array(days_quality_index_inti) * - np.array(days_quality_index_recheck)) - - days_abn_index = [] - for x in range(len(days_quality_index)): - if days_quality_index[x]: - days_abn_index.append(x) - - self.quality_metric = pd.DataFrame(data=days_abn_index, columns=['index']) - - flag = [] - for billing_date_index in range(len(days_abn_index)): - point_index = days_abn_index[billing_date_index - 1] - if days_in_bill[point_index] < days_in_bill.mean(): - flag.append('short') - elif days_in_bill[point_index] >= days_in_bill.mean(): - flag.append('long') - - self.quality_metric['flag'] = np.array(flag) - - def consolidate(self): - """ - Bill Cleaning - Step 3: - consolidation of the bills that are too short compare to others - NOTE: error - self.bill_consi (pd.DataFrame): the cleaned bill and ready for analysis - """ - if self.formatted_bill is None: - exit('The bill is not formatted, please do formatting!') - - if self.quality_metric.empty: - exit() - - bill_quality_short = self.quality_metric[self.quality_metric['flag'] == 'short'] - if bill_quality_short is None: - exit() - - bill_consi = self.formatted_bill.copy() - # consolidate the billing period that is too short compare to others - - for seq in range(len(bill_quality_short)): - if self.quality_metric['flag'].iloc[seq] == 'short': - row_index = bill_quality_short['index'].iloc[seq] - - if row_index not in [0, bill_consi.index[-1]]: - if bill_consi['Days In Bill'][int(row_index - 1)] <= bill_consi['Days In Bill'][int(row_index + 1)]: - bill_consi['Bill To Date'][int(row_index - 1)] = bill_consi['Bill To Date'][int(row_index)] - bill_consi['Usage'][int(row_index - 1)] += bill_consi['Usage'][int(row_index)] - bill_consi['Total Charge'][int(row_index - 1)] += bill_consi['Total Charge'][int(row_index)] - bill_consi['Days In Bill'][int(row_index - 1)] += bill_consi['Days In Bill'][int(row_index)] - else: - bill_consi['Bill From Date'][int(row_index + 1)] = bill_consi['Bill To Date'][int(row_index)] - bill_consi['Usage'][int(row_index + 1)] += bill_consi['Usage'][int(row_index)] - bill_consi['Total Charge'][int(row_index + 1)] += bill_consi['Total Charge'][int(row_index)] - bill_consi['Days In Bill'][int(row_index + 1)] += bill_consi['Days In Bill'][int(row_index)] - - if row_index == 0: - bill_consi['Bill From Date'][1] = bill_consi['Bill From Date'][0] - bill_consi['Usage'][1] += bill_consi['Usage'][0] - bill_consi['Total Charge'][1] += bill_consi['Total Charge'][0] - bill_consi['Days In Bill'][1] += bill_consi['Days In Bill'][0] - - if row_index == bill_consi.index[-1]: - bill_consi['Bill To Date'].iloc[-2] = bill_consi['Bill To Date'].iloc[-1] - bill_consi['Usage'].iloc[-2] += bill_consi['Usage'].iloc[-1] - bill_consi['Total Charge'].iloc[-2] += bill_consi['Total Charge'].iloc[-1] - bill_consi['Days In Bill'].iloc[-2] += bill_consi['Days In Bill'].iloc[-1] - - bill_consi = bill_consi.drop(bill_consi.index[list(bill_quality_short['index'])]) - self.bill_consi = bill_consi.reset_index(drop=False) - - def RunAllFunctions(self): - self.formatting() - self.quality() - self.consolidate() diff --git a/bpeng/bill/calculater.py b/bpeng/bill/calculater.py deleted file mode 100644 index a36b870..0000000 --- a/bpeng/bill/calculater.py +++ /dev/null @@ -1,88 +0,0 @@ -import numpy as np -import pandas as pd - - -def cdd(indoor_set_point, outdoor_temp): - """ - CDD - Assumption: - cooling setting point shall always higher than 55 F, - which is the trigger temperature of the heating system - - ?? - set_point is for indoor temperature - - """ - - if indoor_set_point > 65: - if indoor_set_point < outdoor_temp: - return outdoor_temp - indoor_set_point - return 0 - - -def hdd(indoor_set_point, outdoor_temp): - """ - HDD - Assumption: - Only if the outdoor temperature drop below 60'F, - then the heating system may be able to be turn on - """ - if (outdoor_temp < 60) and (indoor_set_point > outdoor_temp): - hdd = indoor_set_point - outdoor_temp - else: - hdd = 0 - return hdd - - -def threshold(data, set_point): - """If data is less the set_point, return 0""" - if data <= set_point: - data = 0 - return data - - -def outliers_iqr(ys): - """ - Find outlier using IQR method - - Args: - ys (list):A list of number needs to be checked for outliners - - Returns: - list: Returns a list of boolean - True: Outliner - False: Not Outliner - - """ - quartile_1, quartile_3 = np.percentile(ys, [25, 75]) - iqr = quartile_3 - quartile_1 - lower_bound = quartile_1 - (iqr * 1.5) - upper_bound = quartile_3 + (iqr * 1.5) - return [(x > upper_bound or x < lower_bound) for x in ys] - - -def anomaly_point(alist, thresholds): - """ - Find outlier and return its index - - Args: - - alist (list): A list of number needs to be checked for outliners - thresholds (float): a percentage of the difference between the mean of the whole list and - the mean of the list without the outlier - - Returns: - - list: Returns a list the index of the outliner - - """ - amean = [] - for x in range(len(alist)): - temp = np.hstack((alist[:(x)], alist[(x + 1):])) - amean.append(temp.mean()) - index = [] - for x in range(len(alist)): - temp1 = abs(alist[x] - np.array(alist).mean()) / np.array( - alist).mean() - index.append(temp1 < thresholds) - return index diff --git a/bpeng/bill/disaggragate_with_regr_matrix.py b/bpeng/bill/disaggragate_with_regr_matrix.py deleted file mode 100644 index 387c435..0000000 --- a/bpeng/bill/disaggragate_with_regr_matrix.py +++ /dev/null @@ -1,113 +0,0 @@ -''' -This module breaks down the energy usage by its weather-related features and non-weather-related features. -The inputs of this module is a cleaned bill, the best fitted regression model, and the non-weather-related usage info. -The outputs of this module is the disaggragated results based on the input informaiton. - -Author: Doris Han -''' - - -import numpy as np -import pandas as pd -from regr import (regr_temp_hddcdd) - - -def weather_ralated_breakdown(regr_matrix, processed_bill_any): - """ - This function calculates the weather-related usage based on regression model and billing info: - - Argu: - regr_matrix (Dictionary) - processed_bill_any(pd.DataFrame): Bill From Date, Bill To Date, Days In Bills, Unit Price, temperature - - Return: - disaggrated_bill(pd.DataFrame): Bill From Date, Bill To Date, Days In Bills, Unit Price, Heating Usage, Cooling Usage - Non Weather Related Usage, Calculated Total Usage - """ - - heating_set_point = regr_matrix['heating_set_point'] - cooling_set_point = regr_matrix['cooling_set_point'] - weather_related_usage = regr_matrix['weather_related_usage'] - regression_method = regr_matrix['regression_method'] - cooling_coef_ = regr_matrix['cooling_coef_'] - heating_coef_ = regr_matrix['heating_coef_'] - regr_output = regr_matrix['regr_output'] - intercept_ = regr_matrix['intercept_'] - - if regression_method == 0 : - disaggragated_bill = usage_not_related_to_weather(processed_bill_any) - - if regression_method == 1 : - hddcdd = regr_temp_hddcdd(heating_set_point, cooling_set_point, processed_bill_any) - r_squared_of_fit = regr_output[1] - - if np.absolute(r_squared_of_fit) > 0.5: - heating_consump = np.array(hddcdd[:, 0]) * heating_coef_ * processed_bill_any['Days In Bill'] - cooling_consump = np.array(hddcdd[:, 1]) * cooling_coef_ * processed_bill_any['Days In Bill'] - - if intercept_ < 0: - non_weather_related_consump = 0 * processed_bill_any['Days In Bill'] - else: - non_weather_related_consump = intercept_ * processed_bill_any['Days In Bill'] - else: - disaggragated_bill = usage_not_related_to_weather(processed_bill_any) - - if regression_method == 2: - hddcdd = regr_temp_hddcdd(heating_set_point, cooling_set_point, processed_bill_any) - dhw_usage = regr_output[2] - - heating_consump = np.array(hddcdd[:, 0]) * heating_coef_ * processed_bill_any['Days In Bill'] - cooling_consump = np.array(hddcdd[:, 1]) * cooling_coef_ * processed_bill_any['Days In Bill'] - non_weather_related_consump = dhw_usage * processed_bill_any['Days In Bill'] - - disaggragated_bill = processed_bill_any.copy() - disaggragated_bill = processed_bill_any[[ - 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Unit Price' - ]] - disaggragated_bill['Heating Usage'] = heating_consump - disaggragated_bill['Cooling Usage'] = cooling_consump - disaggragated_bill['Non Weather Related Usage'] = non_weather_related_consump - disaggragated_bill['Calculated Total Usage'] = heating_consump + cooling_consump + non_weather_related_consump - - return disaggragated_bill - -def usage_not_related_to_weather(bill): - ''' - this function return heating, cooling, and non-weather-related-usage for a bill that not related to weather at all. - ''' - bill['Heating Usage'] = bill['Usage'] * 0 - bill['Cooling Usage'] = bill['Usage'] * 0 - #attention: if in this case, other usage needs to be re-caculate - bill['Non Weather Related Usage'] = bill['Usage'] * 0 - bill['Calculated Total Usage'] = bill['Usage'] * 0 - return bill - -def non_weahter_related_breakdown(end_uses, weather_disaggragated_bill): - """ - breakdown the non_weather_related_usage - - Args: - end_uses(dictionary): key: end use - value: percentage of the end use among non-weather related usage - weather_disaggragated_bill (pd.DataFrame): bills have been breakdown to heating, cooling, and non-weather-related-comsump - Returns: - pd.DataFrame: bill breakdown of all end-use - """ - eu = pd.DataFrame( - list(end_uses.items()), columns=['end use', 'percentage']) - - for i in range(len(eu)): - name_of_the_column = eu['end use'].iloc[i] - value_of_the_column = eu['percentage'].iloc[i] - weather_disaggragated_bill[name_of_the_column] = weather_disaggragated_bill[ - 'Non Weather Related Usage'] * value_of_the_column - - if sum(eu['percentage']) != 1: - weather_disaggragated_bill['Miscellaneous'] = weather_disaggragated_bill['Non Weather Related Usage'] * ( - 1 - sum(eu['percentage'])) - - fully_disaggragated_bill = weather_disaggragated_bill.copy() - return fully_disaggragated_bill - - - diff --git a/bpeng/bill/get_billing_weather_data.py b/bpeng/bill/get_billing_weather_data.py deleted file mode 100644 index 335b50f..0000000 --- a/bpeng/bill/get_billing_weather_data.py +++ /dev/null @@ -1,63 +0,0 @@ - - -import psycopg2 -import pandas as pd -from influxdb import InfluxDBClient - - -class get_billing_weather_data(): - - def __init__(self, building_id, account_type): - self.building_id = building_id - self.account_type = account_type - self.weather = None - self.bill = None - return - - def get_weather_data(self): - user = 'engineering' - password = 'nPEc9Pz0iV' - dbname = 'weather' - host = '52.206.6.10' - port = 8086 - - influx_db = InfluxDBClient(host, port, user, password, dbname, ssl=True) - - query_string = "SELECT * from temperature WHERE interval='daily'" - daily_temperature = influx_db.query(query_string).get_points('temperature') - self.weather = pd.DataFrame(daily_temperature) - - def query_bill(self): - - hostname = 'utilitybillprod-rr.czgvwxaefxfj.us-east-1.rds.amazonaws.com' - username = 'blocpower' - password = 'Z50Fwgkfi0SsVaBz' - database = 'utility_bill' - - myConnection = psycopg2.connect(host=hostname, user=username, - password=password, dbname=database) - df_bill = pd.read_sql('SELECT * FROM public.bill', myConnection) - df_account = pd.read_sql('SELECT * FROM public.account', myConnection) - df_utility = pd.read_sql('SELECT * FROM public.utility_type', myConnection) - df_account_selected = df_account[df_account['account_type'] == self.account_type] - - account = df_account_selected[df_account_selected['building_id'] == self.building_id] - acc_id = account['id'].iloc[0] - new_bill = df_bill[df_bill['account_id'] == acc_id].fillna(0) - new_bill['actual_total'] = new_bill['delivery_charge'] + \ - new_bill['supply_charge'] + \ - new_bill['esco_charge'] + \ - new_bill['delivery_tax'] - new_bill = new_bill.reset_index(drop=True) - bill = new_bill[['bill_from_date', 'bill_to_date', 'usage', 'actual_total']] - bill.columns = ['Bill From Date', 'Bill To Date', 'Usage', 'Total Charge'] - bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) - bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) - bill['Days In Bill'] = (bill['Bill To Date'] - bill['Bill From Date']).dt.days - self.bill = bill - - def RunAllFunctions(self): - self.get_weather_data() - self.query_bill() - - diff --git a/bpeng/bill/normalClimate_NYC.csv b/bpeng/bill/normalClimate_NYC.csv deleted file mode 100644 index ea6fe7b..0000000 --- a/bpeng/bill/normalClimate_NYC.csv +++ /dev/null @@ -1,13 +0,0 @@ -,temperature,days_in_bill,month -0,"[34.0, 33.799999999999997, 33.700000000000003, 33.5, 33.399999999999999, 33.299999999999997, 33.200000000000003, 33.100000000000001, 33.0, 32.899999999999999, 32.899999999999999, 32.799999999999997, 32.799999999999997, 32.700000000000003, 32.700000000000003, 32.700000000000003, 32.700000000000003, 32.700000000000003, 32.700000000000003, 32.700000000000003, 32.700000000000003, 32.799999999999997, 32.799999999999997, 32.899999999999999, 32.899999999999999, 33.0, 33.100000000000001, 33.100000000000001, 33.200000000000003, 33.299999999999997, 33.399999999999999]",31,Jan -1,"[33.5, 33.700000000000003, 33.799999999999997, 33.899999999999999, 34.0, 34.200000000000003, 34.299999999999997, 34.5, 34.600000000000001, 34.799999999999997, 34.899999999999999, 35.100000000000001, 35.299999999999997, 35.399999999999999, 35.600000000000001, 35.799999999999997, 36.0, 36.200000000000003, 36.399999999999999, 36.5, 36.700000000000003, 36.899999999999999, 37.100000000000001, 37.299999999999997, 37.600000000000001, 37.799999999999997, 38.0, 38.200000000000003]",28,Feb -2,"[38.399999999999999, 38.600000000000001, 38.899999999999999, 39.100000000000001, 39.299999999999997, 39.600000000000001, 39.799999999999997, 40.100000000000001, 40.299999999999997, 40.600000000000001, 40.799999999999997, 41.100000000000001, 41.399999999999999, 41.700000000000003, 41.899999999999999, 42.200000000000003, 42.5, 42.799999999999997, 43.100000000000001, 43.399999999999999, 43.700000000000003, 44.0, 44.299999999999997, 44.700000000000003, 45.0, 45.299999999999997, 45.600000000000001, 46.0, 46.299999999999997, 46.700000000000003, 47.0]",31,Mar -3,"[47.399999999999999, 47.700000000000003, 48.100000000000001, 48.399999999999999, 48.799999999999997, 49.200000000000003, 49.5, 49.899999999999999, 50.299999999999997, 50.600000000000001, 51.0, 51.399999999999999, 51.700000000000003, 52.100000000000001, 52.399999999999999, 52.799999999999997, 53.200000000000003, 53.5, 53.899999999999999, 54.200000000000003, 54.5, 54.899999999999999, 55.200000000000003, 55.600000000000001, 55.899999999999999, 56.200000000000003, 56.600000000000001, 56.899999999999999, 57.200000000000003, 57.5]",30,Apr -4,"[57.799999999999997, 58.100000000000001, 58.399999999999999, 58.700000000000003, 59.0, 59.299999999999997, 59.600000000000001, 59.899999999999999, 60.200000000000003, 60.5, 60.799999999999997, 61.100000000000001, 61.399999999999999, 61.700000000000003, 62.0, 62.299999999999997, 62.600000000000001, 62.899999999999999, 63.100000000000001, 63.399999999999999, 63.700000000000003, 64.099999999999994, 64.400000000000006, 64.700000000000003, 65.0, 65.299999999999997, 65.599999999999994, 65.900000000000006, 66.200000000000003, 66.599999999999994, 66.900000000000006]",31,May -5,"[67.200000000000003, 67.599999999999994, 67.900000000000006, 68.200000000000003, 68.5, 68.900000000000006, 69.200000000000003, 69.5, 69.900000000000006, 70.200000000000003, 70.5, 70.900000000000006, 71.200000000000003, 71.5, 71.799999999999997, 72.099999999999994, 72.400000000000006, 72.700000000000003, 73.0, 73.299999999999997, 73.599999999999994, 73.900000000000006, 74.099999999999994, 74.400000000000006, 74.599999999999994, 74.900000000000006, 75.099999999999994, 75.299999999999997, 75.5, 75.700000000000003]",30,Jun -6,"[75.900000000000006, 76.099999999999994, 76.299999999999997, 76.400000000000006, 76.5, 76.700000000000003, 76.799999999999997, 76.900000000000006, 77.0, 77.099999999999994, 77.200000000000003, 77.299999999999997, 77.299999999999997, 77.400000000000006, 77.400000000000006, 77.5, 77.5, 77.5, 77.5, 77.5, 77.5, 77.5, 77.5, 77.5, 77.5, 77.5, 77.400000000000006, 77.400000000000006, 77.400000000000006, 77.299999999999997, 77.299999999999997]",31,Jul -7,"[77.200000000000003, 77.200000000000003, 77.099999999999994, 77.099999999999994, 77.0, 77.0, 76.900000000000006, 76.799999999999997, 76.799999999999997, 76.700000000000003, 76.599999999999994, 76.5, 76.5, 76.400000000000006, 76.299999999999997, 76.200000000000003, 76.099999999999994, 76.0, 75.799999999999997, 75.700000000000003, 75.599999999999994, 75.5, 75.299999999999997, 75.200000000000003, 75.0, 74.799999999999997, 74.599999999999994, 74.400000000000006, 74.200000000000003, 74.0, 73.799999999999997]",31,Aug -8,"[73.599999999999994, 73.299999999999997, 73.099999999999994, 72.799999999999997, 72.5, 72.299999999999997, 72.0, 71.700000000000003, 71.400000000000006, 71.0, 70.700000000000003, 70.400000000000006, 70.0, 69.700000000000003, 69.299999999999997, 68.900000000000006, 68.599999999999994, 68.200000000000003, 67.799999999999997, 67.400000000000006, 67.0, 66.599999999999994, 66.200000000000003, 65.799999999999997, 65.400000000000006, 65.0, 64.599999999999994, 64.200000000000003, 63.799999999999997, 63.399999999999999]",30,Sep -9,"[63.0, 62.600000000000001, 62.200000000000003, 61.799999999999997, 61.399999999999999, 61.100000000000001, 60.700000000000003, 60.299999999999997, 59.899999999999999, 59.600000000000001, 59.200000000000003, 58.899999999999999, 58.5, 58.200000000000003, 57.799999999999997, 57.5, 57.100000000000001, 56.799999999999997, 56.5, 56.200000000000003, 55.899999999999999, 55.600000000000001, 55.200000000000003, 54.899999999999999, 54.600000000000001, 54.299999999999997, 54.0, 53.700000000000003, 53.399999999999999, 53.100000000000001, 52.799999999999997]",31,Oct -10,"[52.5, 52.200000000000003, 51.899999999999999, 51.600000000000001, 51.299999999999997, 51.0, 50.700000000000003, 50.399999999999999, 50.100000000000001, 49.799999999999997, 49.5, 49.200000000000003, 48.899999999999999, 48.5, 48.200000000000003, 47.899999999999999, 47.600000000000001, 47.200000000000003, 46.899999999999999, 46.5, 46.200000000000003, 45.899999999999999, 45.5, 45.200000000000003, 44.799999999999997, 44.5, 44.100000000000001, 43.700000000000003, 43.399999999999999, 43.0]",30,Nov -11,"[42.700000000000003, 42.299999999999997, 42.0, 41.600000000000001, 41.299999999999997, 40.899999999999999, 40.600000000000001, 40.200000000000003, 39.899999999999999, 39.600000000000001, 39.299999999999997, 38.899999999999999, 38.600000000000001, 38.299999999999997, 38.0, 37.700000000000003, 37.399999999999999, 37.100000000000001, 36.799999999999997, 36.600000000000001, 36.299999999999997, 36.0, 35.799999999999997, 35.600000000000001, 35.299999999999997, 35.100000000000001, 34.899999999999999, 34.700000000000003, 34.5, 34.299999999999997, 34.100000000000001]",31,Dec diff --git a/bpeng/bill/normalized_monthly_bill.py b/bpeng/bill/normalized_monthly_bill.py deleted file mode 100644 index 22eb301..0000000 --- a/bpeng/bill/normalized_monthly_bill.py +++ /dev/null @@ -1,134 +0,0 @@ -""" -This module will create a normailized usage based on a raw bill natural billing periods -should be refactor to a class -""" - -import warnings -from datetime import timedelta -import pandas as pd -from dateutil import relativedelta -warnings.simplefilter('ignore') - - -class NormalizedBillingPeriod(): - - def __init__(self, formatted_bill): - self.formatted_bill = formatted_bill - - def find_index_in_first_raw_biil(self, norm_bill_date): - """ - Return the index of the row of raw bill contains the bill date from a normalized bill - """ - for index, bill in self.formatted_bill.iterrows(): - if bill['Bill From Date'] <= norm_bill_date < bill['Bill To Date']: - return index - return None - - def days_in_raw_bill_period(self, norm_bill_date, norm_bill_date_respected_index, flag): - """ - Return how many days from a normalized bill within a raw bill billing period - """ - - if flag == 'start': - days = (self.formatted_bill['Bill To Date'][norm_bill_date_respected_index] - norm_bill_date).days - if flag == 'end': - days = (norm_bill_date - self.formatted_bill['Bill From Date'][norm_bill_date_respected_index]).days - return days - - def find_bills_in_raw(self, norm_bill_from, norm_bill_to): - """ - Return the index / number of days in each raw bill billing period for a normalized billing period - """ - - norm_bill_days = (norm_bill_to - norm_bill_from).days - results = [] - - index_start = self.find_index_in_first_raw_biil(norm_bill_from) - index_end = self.find_index_in_first_raw_biil(norm_bill_to) - - if index_start == index_end: - results.append({'index': index_start, 'num_days': norm_bill_days}) - - elif index_end - index_start >= 1: - days_in_start_period = self.days_in_raw_bill_period(norm_bill_from, index_start, 'start') - results.append({'index': index_start, 'num_days': days_in_start_period}) - days_in_end_period = self.days_in_raw_bill_period(norm_bill_to, index_end, 'end') - results.append({'index': index_end, 'num_days': days_in_end_period}) - - if index_end - index_start >= 2: - for period in range(index_end - index_start - 1): - days_in_period = self.formatted_bill['Days In Bill'][index_start + period + 1] - index_of_this_period = index_start + period + 1 - results.append({'index': index_of_this_period, 'num_days': days_in_period}) - - return results - - @staticmethod - def num_month_dates(last_date_bill, first_date_bill): - """Return number of month in between two date """ - lastdate = last_date_bill - timedelta(last_date_bill.day) - firstdate = first_date_bill + timedelta(days=32) - firstdate = firstdate.replace(day=1) - relative_date_range = relativedelta.relativedelta(lastdate, firstdate) - num_month = relative_date_range.years * 12 + relative_date_range.months + 1 - return num_month - - def normailized_monthly_bill(self): - """ - Returns: - pd.DataFrame: result with monthly consumptions - """ - last_date_of_bill = self.formatted_bill['Bill To Date'].iloc[-1] - first_bill_date = self.formatted_bill['Bill From Date'].iloc[0] - - last_dates = [] - first_dates = [] - - lastdate = last_date_of_bill - timedelta(last_date_of_bill.day) - - # cosntruct a new dataframe with bills from the first to last day for each month - number_of_month = NormalizedBillingPeriod.num_month_dates(last_date_of_bill, first_bill_date) - for i in range(0, number_of_month): - last_dates.append(lastdate) - first_dates.append(lastdate.replace(day=1)) - lastdate = first_dates[i] - timedelta(1) - - normalized_monthly_bill = pd.DataFrame(columns=['Bill From Date', 'Bill To Date', 'Days In Bill']) - normalized_monthly_bill['Bill From Date'] = first_dates - normalized_monthly_bill['Bill To Date'] = last_dates - normalized_monthly_bill[ - 'Days In Bill'] = normalized_monthly_bill['Bill To Date'] - normalized_monthly_bill['Bill From Date'] - normalized_monthly_bill[ - 'Days In Bill'] = normalized_monthly_bill['Days In Bill'].apply( - lambda x: x.days) + 1 - normalized_monthly_bill['Month'] = normalized_monthly_bill[ - 'Bill From Date'].apply(lambda x: x.month) - - return normalized_monthly_bill - - def normalized_unit_price(self, mbill): - """ - calculate the unit price for each nomralized billing period - """ - normalized_unit_price = [] - for month in range(len(mbill)): - from_date = mbill['Bill From Date'].iloc[month] - to_date = mbill['Bill To Date'].iloc[month] - index_numdays = self.find_bills_in_raw(from_date, to_date) - normalized_unit_price.append(self.weighted_unit_price(index_numdays)) - mbill['Unit Price'] = normalized_unit_price - return mbill - - def weighted_unit_price(self, index_numdays): - """ - Return the weighted average of unit price - """ - weights = [] - total_days = [] - for ind in range(len(index_numdays)): - unit_price = self.formatted_bill['Unit Price'][int(index_numdays[ind]['index'])] - days_in_that_period = int(index_numdays[ind]['num_days']) - weights.append(unit_price * days_in_that_period) - total_days.append(days_in_that_period) - weighted_unit_price = sum(weights)/sum(total_days) - return weighted_unit_price diff --git a/bpeng/bill/regr.py b/bpeng/bill/regr.py deleted file mode 100644 index 40a078d..0000000 --- a/bpeng/bill/regr.py +++ /dev/null @@ -1,103 +0,0 @@ - -import warnings -import numpy as np -from sklearn import linear_model -from calculater import (cdd, hdd, threshold, outliers_iqr) -warnings.simplefilter('ignore') - - -def regression_1(HeatingSetpoint, CoolingSetpoint, bill): - """ - A linear regression model with heating and cooling set fixed - - Args: - - HeatingSetpoint(float): heating season indoor set point - CoolingSetpoint(float): cooling season indoor set point - bill(pd.DataFrame): cleaned bill with daily temperature - - Returns: - - sklearn.linear_model.LinearRegression: regression model - float: r-squared of the linear regression model - 2d-array: a 2D numpy array of normalized billing period average daily HDDs and CDDs - - """ - consumption = np.array(bill['Usage'] / bill['Days In Bill']) - regression_temp = regr_temp_hddcdd(HeatingSetpoint, CoolingSetpoint, bill) - regr_model = linear_model.LinearRegression() - regr_model.fit(regression_temp, consumption) - score = regr_model.score(regression_temp, consumption) - return regr_model, score - -def regression_2(hp, bill): - """ - This funcion uses summer month gas usage as base consumption for the year - A linear regression of weather-related consumption and a fixed heating system set point - NOTE: USUALLY ERROR - - Args: - hp(float): heating season indoor set point - bill(pd.DataFrame): cleaned bill with daily temperature - - Returns: - sklearn.linear_model.LinearRegression: regression model - float: r-squared of the linear regression model - 2d-array: a 2D numpy array of normalized billing period HDDs sum - pd.DataFrame - - """ - impossible_cooling_temp = 300 - regression_temp = regr_temp_hddcdd(hp, impossible_cooling_temp, bill) - daily_hdd = regression_temp[:, 0] - bill['dhw'] = bill['Usage'] / bill['Days In Bill'] * (daily_hdd <= 0.1) - - if len([*filter(lambda x: x >= 18, list(bill['Days In Bill']))]) > 0: - dhw_only_consumption = bill[bill['dhw'] != 0]['dhw'] - if len(dhw_only_consumption) > 0: - dhw_quality_index = outliers_iqr(list(dhw_only_consumption)) # list - dhw_only_consumption_checked = [] - for xx in range(len(dhw_only_consumption)): - if not dhw_quality_index[xx]: - dhw_only_consumption_checked.append( - list(dhw_only_consumption)[xx]) - daily_dhw = np.mean(dhw_only_consumption_checked) - else: - daily_dhw = 0 - else: - daily_dhw = 0 - - bill['dhw'] = daily_dhw * bill['Days In Bill'] - consumption = (bill['Usage'] - bill['dhw']) / bill['Days In Bill'] - regr_model = linear_model.LinearRegression(fit_intercept=True) - regr_model.fit(regression_temp, consumption) - score = regr_model.score(regression_temp, consumption) - return regr_model, score, daily_dhw - -def regr_temp_hddcdd(heating_Setpoint, cooling_Setpoint, bill): - ''' - Cal for avg hdd/cdd for a bills with any billing period - ''' - - impossible_heating_temp = 0 - impossible_cooling_temp = 300 - - if heating_Setpoint != np.NaN: - ahdd = [list(hdd(heating_Setpoint, xx) for xx in x) for x in bill['temperature']] - else: - ahdd = [list(hdd(impossible_heating_temp, xx) for xx in x) for x in bill['temperature']] - - if cooling_Setpoint != np.NaN: - acdd = [list(cdd(cooling_Setpoint, xx) for xx in x) for x in bill['temperature']] - else: - acdd = [list(cdd(impossible_cooling_temp, xx) for xx in x) for x in bill['temperature']] - - # it should be billing period average hdd / days - daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) - daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) - # set threshold that if the HDD/CDD is lower than a certain value, we set it to 0 - daily_hdd1 = np.array([threshold(daily_hdd[x], 0.1) for x in range(len(daily_hdd))]) - daily_cdd1 = np.array([threshold(daily_cdd[x], 0.1) for x in range(len(daily_cdd))]) - regression_temp = np.array([daily_hdd1, daily_cdd1]).T - - return regression_temp diff --git a/bpeng/bill/setpoints_optimization.py b/bpeng/bill/setpoints_optimization.py deleted file mode 100644 index f14a91c..0000000 --- a/bpeng/bill/setpoints_optimization.py +++ /dev/null @@ -1,201 +0,0 @@ -''' -This module is to calculate the most fitted regression model and return the regression model matrix -and the system heating and cooling set points. -''' - - -import numpy as np -from scipy.optimize import minimize -from .regr import (regression_1, regression_2) - -def optimize_setpoints(processed_bill, weather_related_usage='Unknown'): - """ - Main function for the optimization and disaggregation - - Args: - processed_bill(pd.DataFrame): utility bills has been cleaned - - usage (str): - Specify if the weather - related consumption is for heating or cooling - 'Unknown': no prior knowledge - 'Heating': only for heating - 'Cooling': only for cooling - 'Both': for both heating and cooling - 'Both Not': not for heating or cooling - default value: 'Unknown' - - """ - designed_heating_temp = 72 - designed_cooling_temp = 65 - impossible_heating_temp = 0 - impossible_cooling_temp = 300 - regression_method = 1 - - if weather_related_usage == 'Unknown': - opt = minimize( - lambda x: -regression_1(x[0], x[1], processed_bill)[1], - (65, 65), - method='nelder-mead', - options={'xtol': 1e-2,'disp': False}) - - regr_output = regression_1(opt.x[0], opt.x[1], processed_bill) - regr_model = regr_output[0] - heating_coef, cooling_coef = regr_model.coef_ - - if -opt.fun > 0.5: - if (heating_coef > 0) and (cooling_coef <= 0): - weather_related_usage = 'Heating' - elif (heating_coef <= 0) and (cooling_coef > 0): - weather_related_usage = 'Cooling' - elif (heating_coef <= 0) and (cooling_coef <= 0): - weather_related_usage = 'Both Not' - elif (heating_coef >= 0) and (cooling_coef >= 0): - weather_related_usage = 'Both' - else: - weather_related_usage = 'Both Not' - - if weather_related_usage == 'Both': - opt = minimize( - lambda x: -regression_1(x[0], x[1],processed_bill)[1], - (65, 65), - method='nelder-mead', - options={'xtol': 1e-2, - 'disp': False}) - - regr_output = regression_1(opt.x[0], opt.x[1], processed_bill) - regr_model = regr_output[0] - heating_coef, cooling_coef = regr_model.coef_ - heating_set_point = opt.x[0] - cooling_set_point = opt.x[1] - - # change accordingly for JOENYC buildings - if (heating_coef > 0) and (cooling_coef < 0): - weather_related_usage = 'Heating' - cooling_coef = 0 - elif (heating_coef <= 0) and (cooling_coef > 0): - weather_related_usage = 'Cooling' - heating_coef = 0 - elif (heating_coef <= 0) and (cooling_coef <= 0): - weather_related_usage = 'Both Not' - heating_coef = 0 - cooling_coef = 0 - # changes on Jan 17th 2018 - # please futher check with more bills - elif (heating_coef > 0) and (cooling_coef > 0): - if heating_coef / cooling_coef > 5: - weather_related_usage = 'Heating' - cooling_coef = 0 - else: - # set the range of heating set point or cooling point - - if round(heating_set_point) in range( - 60, 95) and round(cooling_set_point) in range( - 55, 75): - weather_related_usage = 'Both' - heating_coef = heating_coef - cooling_coef = cooling_coef - else: - # using standard seting points to check the bill - regr_output = regression_1(designed_heating_temp, designed_cooling_temp, processed_bill) - regr_model = regr_output[0] - heating_coef, cooling_coef = regr_model.coef_ - heating_set_point = opt.x[0] - cooling_set_point = opt.x[1] - - if (heating_coef > 0) and (cooling_coef < 0): - weather_related_usage = 'Heating' - cooling_coef = 0 - elif (heating_coef <= 0) and (cooling_coef > 0): - weather_related_usage = 'Cooling' - heating_coef = 0 - elif (heating_coef <= 0) and (cooling_coef <= 0): - weather_related_usage = 'Both Not' - heating_coef = 0 - cooling_coef = 0 - elif (heating_coef > 0) and (cooling_coef > 0): - if heating_coef / cooling_coef > 5: - weather_related_usage = 'Heating' - cooling_coef = 0 - else: - weather_related_usage = 'Both' - - if weather_related_usage == 'Heating': - opt_1 = minimize( - lambda x: -regression_1(x, impossible_cooling_temp, processed_bill)[1], - 65, - method='nelder-mead', - options={'xtol': 1e-2, - 'disp': False}) - - opt_2 = minimize( - lambda x: -regression_2(x, processed_bill)[1], - 65, - method='nelder-mead', - options={'xtol': 1e-2, - 'disp': False}) - - if (-opt_2.fun > 0.9) and (round(opt_2.x[0]) in range(64, 85)): - opt = opt_2 - heating_set_point = opt.x[0] - cooling_set_point = np.NaN - regr_output = regression_2(opt.x[0], processed_bill) - regr_model = regr_output[0] - regression_method = 2 - else: - if round(opt_1.x[0]) in range(60, 95): - opt = opt_1 - heating_set_point = opt.x[0] - cooling_set_point = np.NaN - regr_output = regression_1(heating_set_point, impossible_cooling_temp, processed_bill) - regr_model = regr_output[0] - heating_coef = regr_model.coef_ - cooling_coef = 0 - else: - # legit heating set-point 72'F - heating_set_point = designed_heating_temp - cooling_set_point = np.NaN - regr_output = regression_1(heating_set_point, impossible_cooling_temp, processed_bill) - regr_model = regr_output[0] - heating_coef = regr_model.coef_ - cooling_coef = 0 - - if weather_related_usage == 'Cooling': - opt = minimize( - lambda x: -regression_1(impossible_heating_temp, x, processed_bill)[1], - 65, - method='nelder-mead', - options={'xtol': 1e-2, - 'disp': False}) - regr_output = regression_1(opt.x[0],impossible_heating_temp, processed_bill) - regr_model = regr_output[0] - cooling_set_point = opt.x[0] - heating_set_point = np.NaN - - if weather_related_usage == 'Both Not': - regression_method = 0 - cooling_set_point = np.NaN - heating_set_point = np.NaN - regr_model = np.NaN - - #return the regression - if weather_related_usage == 'Both Not': - intercept_ = 0 - heating_coef_ = 0 - cooling_coef_ = 0 - regr_output = np.NaN - else: - intercept_ = regr_output[0].intercept_ - heating_coef_ = regr_output[0].coef_[0] - cooling_coef_ = regr_output[0].coef_[1] - - optimized_regr_matrix = {'heating_set_point': heating_set_point, - 'cooling_set_point': cooling_set_point, - 'intercept_': intercept_, - 'heating_coef_': heating_coef_, - 'cooling_coef_': cooling_coef_, - 'weather_related_usage': weather_related_usage, - 'regression_method': regression_method, - 'regr_model': regr_model, - 'regr_output': regr_output - } - - return optimized_regr_matrix diff --git a/bpeng/bill/test.py b/bpeng/bill/test.py deleted file mode 100644 index 7743c51..0000000 --- a/bpeng/bill/test.py +++ /dev/null @@ -1,54 +0,0 @@ -''' -This file is to disaggregate and normalize the utility bills based on weather data. - -Author: Doris Han -''' - -import pandas as pd -import numpy as np -import datetime as datetime -from scipy.optimize import minimize -from datetime import timedelta -from .get_billing_weather_data import get_billing_weather_data -from .bill_analysis import bill_analysis -from .bill_cleaner import Bill -from .regr import regr_temp_hddcdd -from .disaggragate_with_regr_matrix import weather_ralated_breakdown -from .benchmarking import ClimateNormal_Benchmarking - -class BillDisaggregationDriver(): - - def __init__(self, end_uses, weather_daily, raw_utility_bill, weathter_related_usage_input, utility_type): - self.end_uses = end_uses - self.weather_daily = weather_daily - self.raw_bill = raw_utility_bill - self.weahter_related_usage_input = weathter_related_usage_input - self.utility_type = utility_type - self.most_recent_year_bill = None - self.regr_matrix = None - - def main(self): - - billAnalyzed_Obj = bill_analysis(self.raw_bill, self.weather_daily, self.end_uses) - billAnalyzed_Obj.main() - self.most_recent_year_bill = billAnalyzed_Obj.annual_normalized_monthly_bill - self.regr_matrix = billAnalyzed_Obj.regr_results - temp = ClimateNormal_Benchmarking(self.regr_matrix, self.utility_type) - temp.RunAllFunctions() - self.weather_normalized_bill = temp.normalized_monthly_bill - - - - - - - - - - - - - - - - diff --git a/bpeng/bill/weather_data_cal.py b/bpeng/bill/weather_data_cal.py deleted file mode 100644 index e450cbe..0000000 --- a/bpeng/bill/weather_data_cal.py +++ /dev/null @@ -1,53 +0,0 @@ -''' -this python file will deal with cal related to temperature from weather data -''' - -import pandas as pd - - -def weather_cleaning(raw_daily_temp): - """ - Format the daily temperature data from influx query - - Args: - raw_daily_temp (influx query): raw temperature data queried from Influx - Returns: - pd.DateFrame: Returns formatted daily temperature - - """ - raw_daily_temp.rename(columns={'time': 'date', - 'value': 'temperature'}, inplace=True) - raw_daily_temp['date'] = pd.to_datetime(raw_daily_temp['date']) - raw_daily_temp['date'] = raw_daily_temp['date'].apply(lambda x: '/'.join([str(x.date().month), - str(x.date().day), - str(x.date().year)])) - daily_temp = raw_daily_temp - daily_temp['date'] = pd.to_datetime(daily_temp['date']) - return daily_temp - - -def bill_period_weather(bill_from_date, bill_end_date, weather_data_daily): - """ - get the outdoor temperaturebetween two date, return a list - - Args: - - bill_from_date (Datetime): start date of a period - bill_end_date (Datetime): end date of a period - weather_data_daily(pd.DataFrame): daily temperature - - Returns: - list: Returns a list of outdoor temperature for a period - """ - end_date_id = weather_data_daily[weather_data_daily.date == - bill_end_date].index[0] - start_date_id = weather_data_daily[weather_data_daily.date == - bill_from_date].index[0] - return list(weather_data_daily['temperature'][start_date_id:end_date_id]) - - -def bill_with_daily_temp(cleaned_bill, weather_data_daily): - cleaned_bill['temperature'] = [bill_period_weather(x, y, weather_data_daily) - for x, y in zip(cleaned_bill['Bill From Date'], - cleaned_bill['Bill To Date'])] - return cleaned_bill diff --git a/bpeng/mv/BreakdownLongBill.py b/bpeng/mv/BreakdownLongBill.py deleted file mode 100644 index 54cf82e..0000000 --- a/bpeng/mv/BreakdownLongBill.py +++ /dev/null @@ -1,211 +0,0 @@ -class BreakdownLongBill(): - - ''' - This class is to breakdown a bill which is abnormally long than the others - ''' - - def __init__(self, bill, weather): - self.bill = bill - self.weather = weather - self.bd = None - - - - def days_in_long_bill(self, days): - - interval = days // 30 - reminder = days % 30 - days_in_long_bill = [] - - if reminder > 15: - for intr in range(interval): days_in_long_bill.append(30) - days_in_long_bill.append(reminder) - if reminder <= 15: - for intr in range(interval-1): days_in_long_bill.append(30) - days_in_long_bill.append(reminder+30) - return days_in_long_bill - - def sub_billing_period_dates(self, billing_period_info): - - ''' - Return date ranges for each sub-billing period - - Args: - billing_period_info(series): one raw from the formatted bill - return: - long_bill_period(pd.DateFrame): same columns with formatted bill and drop temperature - - ''' - long_bill_period = pd.DataFrame(columns=['Bill From Date', 'Bill To Date','Usage', 'Days In Bill', - 'Total Charge', 'Unit Price']) - start_date = billing_period_info['Bill From Date'] - days_in_bill = self.days_in_long_bill(int(billing_period_info['Days In Bill'])) - - bill_from_dates = [] - bill_to_dates = [] - - for xx in range(len(days_in_bill)): - days = days_in_bill[xx] - bill_from_dates.append(start_date) - start_date = start_date + timedelta(days) - bill_to_dates.append(start_date) - - long_bill_period['Bill From Date'] = bill_from_dates - long_bill_period['Days In Bill'] = days_in_bill - long_bill_period['Bill To Date'] = bill_to_dates - - return long_bill_period - - def r0_long_bill_breakdown(self, sub_billing_period, billing_period_info): - ''' - breakdown the usage of a long bill when the usage is not related to weather - r0 indicate the regression method is 0 - - Args: - long_bill_period(pd.DataFrame): the output from function - sub_billing_period_dates - billing_period_info(series): one raw from the formatted bill - ''' - - daily_usage = billing_period_info['Usage'] / billing_period_info['Days In Bill'] - unit_price = billing_period_info['Total Charge'] / billing_period_info['Days In Bill'] - - sub_billing_period['Usage'] = sub_billing_period['Days In Bill'] * daily_usage - sub_billing_period['Total Charge'] = sub_billing_period['Days In Bill'] * unit_price - sub_billing_period['Unit Price'] = sub_billing_period['Total Charge']/sub_billing_period['Days In Bill'] - - return sub_billing_period - - - def r1_long_bill_breakdown(self, sub_billing_period, billing_period_info): - - ''' - breakdown the usage of a long bill when the regression method = 1 - r1 indicates the regression method is 1 - - ''' - - - sub_billing_period['temperature'] = sub_billing_period['temperature'] = [ - self.bd.bill_period_weather(x, y) - for x, y in zip(sub_billing_period['Bill From Date'], - sub_billing_period['Bill To Date']) - ] - - heating_setpoint = self.bd.heating_set_point - cooling_setpoint = self.bd.cooling_set_point - - hddcdd = MeasurementVerification.weather_demand(heating_setpoint,cooling_setpoint,\ - sub_billing_period,self.weather) - sub_billing_period['Usage'] = self.bd.regr_model.predict(hddcdd) * sub_billing_period['Days In Bill'] - unit_price = billing_period_info['Total Charge']/billing_period_info['Usage'] - sub_billing_period['Total Charge'] = sub_billing_period['Usage'] * unit_price - sub_billing_period['Unit Price'] = pd.Series([unit_price for x in range(len(sub_billing_period))]) - - return sub_billing_period - - def r2_long_bill_breakdown(self, sub_billing_period, billing_period_info): - - ''' - breakdown the usage of a long bill when the regression method = 2 - r2 indicates the regression method is 1 - - ''' - - - sub_billing_period['temperature'] = sub_billing_period['temperature'] = [ - self.bd.bill_period_weather(x, y) - for x, y in zip(sub_billing_period['Bill From Date'], - sub_billing_period['Bill To Date']) - ] - - bill = self.bd.processed_bill - bill['temperature'] = bill['temperature'] = [ - self.bd.bill_period_weather(x, y) - for x, y in zip(bill['Bill From Date'], - bill['Bill To Date']) - ] - - heating_setpoint = self.bd.heating_set_point - hdd = MeasurementVerification.weather_demand(heating_setpoint,0,\ - sub_billing_period,self.weather)[:,0].reshape(-1,1) - - - regr = self.bd.summer_dhw(heating_setpoint,bill) - - bill_with_baseline = regr[3] - daily_baseline = np.average(bill_with_baseline['dhw']/bill_with_baseline['Days In Bill']) - - sub_billing_period['heating_usage'] = self.bd.regr_model.predict(hdd) * sub_billing_period['Days In Bill'] - sub_billing_period['dhw'] = sub_billing_period['Days In Bill'] * daily_baseline - sub_billing_period['Usage'] = sub_billing_period['heating_usage'] + sub_billing_period['dhw'] - - unit_price = billing_period_info['Total Charge'] / billing_period_info['Usage'] - - sub_billing_period['Total Charge'] = sub_billing_period['Usage'] * unit_price - sub_billing_period['Unit Price'] = pd.Series([unit_price for x in range(len(sub_billing_period))]) - - sub_billing_period = sub_billing_period.drop('dhw',axis=1) - sub_billing_period = sub_billing_period.drop('heating_usage',axis=1) - - return sub_billing_period - - - def long_bill_breakdown(self): - ''' - breakdown the bills with irregular long billing period, return a formatted bill with sub-billing peirod for the long bill - - Args: - formatted_bill(pd.DataFrame) - Ruturn: - the broken down bill of bills with long billing peirod - - - ''' - self.bd = BillDisaggregation(self.bill, self.weather) - #TODO: Be careful about the inputs - self.bd.optimize_setpoints() - - formatted_bill = self.bd.formatted_bill - regression_method = self.bd.regression_method - regr_model = self.bd.regr_model - bill_quality = self.bd.bill_quality(self.bill) - #bill = formatted_bill.drop('temperature', axis=1) - new_bill = self.bill.copy().reset_index(drop=True) - - if any(i == 'long' for i in bill_quality.flag): - bill_quality_long = bill_quality[bill_quality['flag'] == 'long'] - - if len(bill_quality_long) > 0: - for x in range(len(bill_quality_long)): - - index_of_raw_bill = bill_quality_long['index'].iloc[x] - long_billing_period_info = formatted_bill.iloc[index_of_raw_bill] - days = long_billing_period_info['Days In Bill'] - total_usage = long_billing_period_info['Usage'] - days_breakdown_list = self.days_in_long_bill(days) - long_bill_breakdown_single_raw = self.sub_billing_period_dates(long_billing_period_info) - new_bill = new_bill.drop(index_of_raw_bill) - - if regression_method == 0: - long_bill_breakdown_single_raw = self.r0_long_bill_breakdown(long_bill_breakdown_single_raw,\ - long_billing_period_info) - - - if regression_method == 1: - long_bill_breakdown_single_raw = self.r1_long_bill_breakdown(long_bill_breakdown_single_raw,\ - long_billing_period_info) - - if regression_method == 2: - long_bill_breakdown_single_raw = self.r2_long_bill_breakdown(long_bill_breakdown_single_raw,\ - long_billing_period_info) - - adjustment_factor = total_usage/sum(long_bill_breakdown_single_raw['Usage']) - long_bill_breakdown_single_raw['Usage'] = long_bill_breakdown_single_raw['Usage'] * adjustment_factor - new_bill = new_bill.append(long_bill_breakdown_single_raw) - - new_bill = new_bill.sort('Bill From Date').reset_index(drop=True) - - else: - new_bill = bill - - return new_bill diff --git a/bpeng/mv/identifier.py b/bpeng/mv/identifier.py deleted file mode 100644 index 3986ac9..0000000 --- a/bpeng/mv/identifier.py +++ /dev/null @@ -1,88 +0,0 @@ -class DateIdentifier(): - - ''' - In order to add [Month, date] identifer in addition to YYYY/MM/DD - Since the comparasion is happened between same day of different year in M&V process - ''' - - def __init__(self, bill): - self.bill = bill - - def identifier_for_date(self, bill_from_date, bill_to_date): - ''' - return a list of [month, date] for a date range - Args: - bill_from_date(timestamp): - bill_to_date(timestamp): - - Return: - identifier(list): a list of [month, date] of the dates in bewteem the bill_from_date and bill_to_date - ''' - - - days = (bill_to_date - bill_from_date).days - identifier = [] - - for d in range(days): - date = bill_from_date + timedelta(d) - date_id = [date.month, date.day] - identifier.append(date_id) - - return identifier - - def identifier_matrix(self): - ''' - Args: - bill(pd.DataFrame): - - Return: - matrix(pd.DataFrame): - identifier: [month, date], - unit_price: average unit price for a specific billing period - daily_usage: daily_usage for a specific date range - ''' - bill = self.bill - matrix = pd.DataFrame(columns = ['identifier','unit_price', 'daily_usage']) - - for bp in range(len(bill)): - from_date = bill['Bill From Date'].iloc[bp] - to_date = bill['Bill To Date'].iloc[bp] - unit_price = bill['Unit Price'].iloc[bp] - daily_usage = bill['Usage'].iloc[bp]/bill['Days In Bill'].iloc[bp] - matrix_temp = pd.DataFrame(columns = ['identifier','unit_price', 'daily_usage']) - matrix_temp['identifier'] = self.identifier_for_date(from_date, to_date) - matrix_temp['unit_price'] = pd.Series([unit_price for x in range(len(matrix_temp))]) - matrix_temp['daily_usage'] = pd.Series([daily_usage for x in range(len(matrix_temp.index))]) - matrix = matrix.append(matrix_temp, ignore_index=True) - - return matrix - - - def match_identifier_for_billing_period(self, from_date, to_date, base_identifier_matrix): - ''' - Given a date range and a identifier matirx, - Return the usage of period by matching the date identifier between the period to the corresponding identifier daily - usage - - Args: - from_date(timestamp) - to_date(timestamp) - base_identifier_matrix(pd.DataFrame): - identifier: month, date - daily_usage - unit_price - - Return: - period_usage - ''' - identifier_list = self.identifier_for_date(from_date, to_date) - - period_usage = 0 - for xx in range(len(identifier_list)): - identi = identifier_list[xx] - temp = base_identifier_matrix[base_identifier_matrix.identifier.apply(lambda x: x == identi)] - unit_price = temp.unit_price.mean() - daily_usage = temp.daily_usage.mean() - period_usage += daily_usage - - return period_usage diff --git a/bpeng/mv/mandv_orig.py b/bpeng/mv/mandv_orig.py deleted file mode 100644 index 3ab95e3..0000000 --- a/bpeng/mv/mandv_orig.py +++ /dev/null @@ -1,706 +0,0 @@ - - -import matplotlib as mpl -mpl.get_backend() - -import matplotlib.pyplot as plt -import seaborn as sns; sns.set() -import datetime as datetime -import requests -import pandas as pd -import numpy as np -import statsmodels.api as sm -import warnings -warnings.simplefilter('ignore') -import psycopg2 -from matplotlib.pyplot import text -from datetime import timedelta -from sklearn import linear_model -from scipy.optimize import minimize -from scipy import stats -from dateutil import relativedelta - - -class MeasurementVerification(): - - ''' - The class to calcuate energy savings through utility bill and operational information adopts IPMVP option C - - - ''' - - - def __init__(self, bill, weather, thermal_comf, earlest_retrofit_date, latest_retrofit_date,\ - weather_relate_usage_type, occupancy, utility_type): - - - self.original_bill = bill - self.weather = weather - self.thermal_comf = thermal_comf - self.earlest_retrofit_date = earlest_retrofit_date - self.latest_retrofit_date = latest_retrofit_date - self.pre_weather_related_usage_type = weather_relate_usage_type - self.occupancy = occupancy - self.utility_type = utility_type - self.weather_relate_usage_type = None - self.projection_post_bill = None - self.projection_fig = None - self.pre_bill = None - self.post_bill = None - self.baseline = None - self.reporting = None - self.projection_reporting_bill = None - self.regression_stat = None - self.first_year_saving = None - self.projection_baseline_bill = None - self.setpoints = {} - #self.non_weather_related_end_uses = {'Miscellanous': 1} - - - def split_bill(self, raw_bill): - ''' - split raw bill into two section: - pre - retrofit - post - retrofit - - Args: - raw_bill(pd.DataFrame): formatted bill from BillDisaggregation Module - earlest_retrofit_date(str): earlest implementation date of ECMs for this meter - latest_retrofit_date(str): latest implemntation date of ECMs for this meter - - Returns: - pre_bill(pd.DataFrame): raw_bill for pre-retrofit period - post_bill(pd.DataFrame): raw_bill for post_retrotit period - - ''' - - bill = raw_bill.copy() - bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) - bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) - - erd = pd.to_datetime(self.earlest_retrofit_date) - lrd = pd.to_datetime(self.latest_retrofit_date) - - bill = bill.sort_values('Bill From Date').reset_index() - - for i in range(len(bill) - 1): - if (erd>= bill['Bill From Date'].iloc[i] ) & (erd<= bill['Bill To Date'].iloc[i]): - erd_flag = i - - if (lrd >= bill['Bill From Date'].iloc[i] ) & (lrd <= bill['Bill To Date'].iloc[i]): - lrd_flag = i - - pre_bill = bill.iloc[0: erd_flag] - post_bill = bill.iloc[(lrd_flag+1):] - - return pre_bill, post_bill - - @staticmethod - def disaggregate(heating_setpoint, cooling_setpoint, bill, regression_method,\ - weather_related_usage, weather_data): - ''' - Disaggregate bill with all known inputs - - Args: - heating_setpoint(float): - cooling_setpoint(float): - bill(pd.DataFrame): - regression_method(int): - 0 stands for 'both not'; - 1 stands for regression method 1 using multiple variable regression - 2 stands for using summer dhw method to estimate usage - - Returns: - regr(list): - ''' - - bd = BillDisaggregation(bill, weather_data) - #pre-requisit is the bills are longer than 365 days/ - bd.optimize_setpoints() - - bill['temperature'] = [ - bd.bill_period_weather(x, y) - for x, y in zip(bill['Bill From Date'], - bill['Bill To Date']) - ] - - if weather_related_usage == 'Both': - regr = bd.regression_1(heating_setpoint, cooling_setpoint, bill) - - - if weather_related_usage == 'Cooling': - regr = bd.regression_1(0, cooling_setpoint, bill) - - if weather_related_usage == 'Heating': - if regression_method == 1: - regr = bd.regression_1(heating_setpoint, 0, bill) - - if regression_method ==2: - regr = bd.summer_dhw(heating_setpoint, bill) - - return regr - - @staticmethod - def weather_demand(heating_setpoint, cooling_setpoints, bill, weather_data): - - ''' - debugged - I think this functioin should be part of BillDisaggregation Class - This function is to calculate the HDD/ CDD with preferred weather period and setpoints - - Args: - heating_setpoint(float) - cooling_setpoint(float) - bill(pd.DataFrame): bills with columns: - 'Bill From Date' - 'Bill To Date' - Returns: - weather_demand(np.array): an array of hddcdd - ''' - - bd = BillDisaggregation(bill, weather_data) - - ahdd = [ - list(bd.hdd(heating_setpoint, xx) for xx in x) - for x in bill['temperature'] - ] - - acdd = [ - list(bd.cdd(cooling_setpoints, xx) for xx in x) - for x in bill['temperature'] - ] - - - # It should be billing period average hdd / days - - daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) - daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) - - # set threshold that if the HDD/CDD is lower than a certain value, we set it to 0 - daily_hdd1 = np.array([ - bd.threshold(daily_hdd[x], 0.1) - for x in range(len(daily_hdd)) - ]) - - daily_cdd1 = np.array([ - bd.threshold(daily_cdd[x], 0.1) - for x in range(len(daily_cdd)) - ]) - - weather_demand = np.array([daily_hdd1, daily_cdd1]).T - - return weather_demand - - def bill_disaggregation(self, bill, weather_related_usage_type='Unknown'): - - ''' - the function calls for bill disaggregation module - - ''' - - bd = BillDisaggregation(bill,self.weather) - bd.optimize_setpoints(weather_related_usage = weather_related_usage_type) - - bill_evaluation = pd.DataFrame(columns=['Usage', 'r squared','regr method', 'Consumption', 'Heating',\ - 'Cooling','Non-weather-related-usage','diff','hdd', 'cdd', 'Days in Bill','Unit Price',\ - 'Heating Setpoint', 'Cooling Setpoint']) - - - output = bd.benchmarking_output() - bill_evaluation = bill_evaluation.append({\ - 'Usage': output[0],\ - 'r squared': output[1],\ - 'regr method': output[2],\ - 'Consumption':output[3],\ - 'Heating': format(output[4], '0.0f'),\ - 'Cooling': format(output[5], '0.0f'),\ - 'Non-weather-related-usage': format(output[6], '0.0f'),\ - 'diff':format(output[7],'.2%'),\ - 'hdd': format(output[8], '0.0f'),\ - 'cdd': format(output[9], '0.0f'),\ - 'Days in Bill': output[10],\ - 'Unit Price':bd.avg_unit_price,\ - 'Heating Setpoint': output[11],\ - 'Cooling Setpoint': output[12] - }, ignore_index = True) - - return bd, bill_evaluation - - def usage_not_related_to_weather(self, bill): - - ''' - To return heating/cooling coef and daily non-weather-related-usage when the usage is not related to weather change - - Args: - bill(pd.DataFrame): a utility bill whose usage dose not relate to weather change - - Return: - bill_metrix(dictionary): a dictionary of heating coef, cooling coef, intercept - ''' - - non_weather_related_daily_usage = sum(bill['Usage'])/ sum(bill['Days In Bill']) - heating_coef = 0 - cooling_coef = 0 - - bill_metrix = {'heating_coef': 0, - 'cooling_coef': 0, - 'non_weather_related_daily_usage': non_weather_related_daily_usage} - - return bill_metrix - - - def cal_heating_coef(self, regr_heating_coef): - ''' - Calculation for cooling coefficiency with the consideration of changing factors for projection purpose - - need further development - - ''' - heating_coef = regr_heating_coef - return heating_coef - - - def cal_cooling_coef(self, regr_cooling_coef): - ''' - Calculation for cooling coefficiency for projection purpose - need further development - ''' - cooling_coef = regr_cooling_coef - return cooling_coef - - def occup(self): - '''occupancy change pre & post retrofit''' - occupancy_change = 1 - return occupancy_change - - - def usage_related_to_weather(self, regr, regression_method): - - ''' - Args: - regr(list): - regr[0] is regression model - regr[1] is r-squared - regr[2] is hdd/cdd through the regression - regr[3] is the bills (optional, only for regression method 2) - regression_method(int): 1 or 2 - - Returns: - bill_metrix(Dictionary): as defined below - - - ???? Doris: I want to keep the same input as - non weather related usage matrix - ''' - if regression_method == 1: - regr_model = regr[0] - heating_coef = regr_model.coef_[0] - cooling_coef = regr_model.coef_[1] - non_weather_related_daily_usage = regr_model.intercept_ - - - if regression_method == 2: - regr_model = regr[0] - heating_coef = regr_model.coef_[0] - cooling_coef = 0 - non_weather_related_daily_usage = regr[3]['dhw']/regr[3]['Days In Bill'] - - bill_metrix = {'heating_coef': heating_coef, - 'cooling_coef': cooling_coef, - 'non_weather_related_daily_usage': non_weather_related_daily_usage} - - return bill_metrix - - - def annual_bill_pre_retrofit(self, bill): - ''' - Return the latest/nearest annual bill to performance analysis, - however, it will only return natural billing period, which means it could be around 365 but not exact 365 days - - Args: - bill(pd.DataFrame): bill of pre_retrofit period - - Return: - annual_bill(pd.DataFrame) - - ''' - bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) - bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) - - bill_last_date = bill['Bill To Date'].iloc[-1] - proposed_first_date = bill_last_date - timedelta(365) - bill['flag'] = bill['Bill From Date'].apply(lambda x: (x- proposed_first_date).days) - - index = bill.flag.lt(-1).idxmin() - - if index == 0: - annual_bill = bill.drop('flag', axis=1) - else: - annual_bill = bill[index-1:].drop('flag', axis=1) - - return annual_bill - - - def annual_bill_post_retrofit(self, bill): - ''' - Return the latest/nearest annual bill for post-retrofit period - however, it will only return natural billing period, which means it could be around 365 but not exact 365 days - - Args: - bill(pd.DataFrame): bill of post_retrofit - - Return: - annual_bill(pd.DataFrame) - ''' - - bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) - bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) - bill = bill.drop('index', axis=1) - - bill = bill.reset_index(drop=True) - bill_first_date = bill['Bill From Date'].iloc[0] - proposed_last_date = bill_first_date + timedelta(365) - - bill['flag'] = bill['Bill To Date'].apply(lambda x: (x- proposed_last_date).days) - index = bill.flag.lt(-1).idxmin() - - annual_bill = bill[0:index].drop('flag', axis=1) - if sum(annual_bill['Days In Bill']) < 360: - annual_bill = bill[0:index+1] - - return annual_bill - - - def baseline_bill(self, pre_bill): - - ''' - breakdown the bill of pre_retrofit period if there is long billing period and - return the billing data will be used as 'Baseline Bill' - - ''' - bk = BreakdownLongBill(pre_bill, self.weather) - pre_bill_breakdown = bk.long_bill_breakdown() - baseline_bill = self.annual_bill_pre_retrofit(pre_bill_breakdown).reset_index(drop=True) - - return baseline_bill - - def reporting_bill(self, post_bill): - bk = BreakdownLongBill(post_bill, self.weather) - bill_breakdown = bk.long_bill_breakdown() - reporting_bill = self.annual_bill_post_retrofit(bill_breakdown).reset_index(drop=True) - - return reporting_bill - - def main(self): - - ''' - Function to calcuate the energy usage for post_retrofit period using baseline conditions - - - ''' - # format the bill for later analysis - bill_bd = BillDisaggregation(self.original_bill, self.weather) - bill_bd.optimize_setpoints() - bill = bill_bd.formatted_bill - - - pre_bill, post_bill = self.split_bill(bill) - - #raw bill of pre-retrofit period - self.pre_bill = pre_bill - - assert sum(pre_bill['Days In Bill']) > 365, 'Utility Data is less than 12 months pre-retrofit.' - - #baseline bill - self.baseline = self.baseline_bill(pre_bill) - - #post_retrofit_bill - simple breakdown the post retrofit bill incase it is too long - self.post_bill = BreakdownLongBill(post_bill, self.weather).long_bill_breakdown() - - #calcuate the all prejected baseline for all the post retrofit billing period - post = projection_baseline(self.baseline, self.post_bill, self.weather) - self.projection_post_bill = post.projection() - self.setpoints = {'heating': post.hp, - 'cooling': post.cp} - - #calculate the projected baseline for baseline period - - baseline_usage_baseline_period = projection_baseline(self.baseline, self.baseline, self.weather) - self.projection_baseline_bill = baseline_usage_baseline_period.projection() - - - #calculate the projected baseline for reporting period - self.reporting = self.reporting_bill(self.post_bill) - report = projection_baseline(self.baseline, self.reporting, self.weather) - self.projection_reporting_bill = report.projection() - self.regression_stat = report.regression_stat - - - - self.first_year_saving = MeasurementVerification.energy_savings(self.projection_reporting_bill) - xx = self.plot_result(self.projection_baseline_bill, self.projection_reporting_bill) -# print('Baseline Period Regression Stats:'+'\n', self.regression_stat) -# print('Frist Year Savings:', pd.DataFrame.from_dict(self., orient='index')) - - @staticmethod - def energy_savings(bill_with_baseline): - ''' - Return the energy saving for bills with baseline usage - - Args: - bill_with_baseline(pd.DataFrame): - 'Bill From Date' - 'Bill To Date' - 'Usage' - 'Days In Bill' - 'Total Charge' - 'temperature' - 'baseline' - - - Return: - savings(Dict): - - 'Measured Energy Usage for Reporting Period'; - 'Baseline Projection for Reporting Period'; - 'Annual Energy Savings'; - 'Energy Reductaion Percentage'; - 'Costs Avoidance'; - 'Annual Energy Costs' - - ''' - - metered_usage = sum(bill_with_baseline['Usage']) - baseline_usage = sum(bill_with_baseline['baseline']) - - energy_savings = baseline_usage - metered_usage - total_costs = sum(bill_with_baseline['Total Charge']) - unit_price = total_costs / metered_usage - saving_percentage = energy_savings/ baseline_usage - dollar_savings = energy_savings * unit_price - - savings = {'Measured Energy Usage for Reporting Period': round(metered_usage,0), - 'Baseline Projection for Reporting Period': round(baseline_usage,0), - 'Annual Energy Savings': round(energy_savings,0), - 'Energy Reductaion Percentage': saving_percentage, - 'Costs Avoidance': round(dollar_savings,0), - 'Annual Energy Costs': round(total_costs,0)} - - return savings - - - def return_utility_name(self, utility_type): - - if self.utility_type == 1: - name = 'Electricity' - unit = 'kWh' - if self.utility_type == 2: - name = 'Natural Gas' - unit = 'Therms' - if self.utility_type == 3: - name = 'Oil' - unit = 'Gallon' - if self.utility_type == 4: - name = 'Water' - unit = 'CCF' - - return name, unit - - def plot_result(self, pre, projection_post): - ''' - The funtion to plot: - 1. the metered data over baseline and reporting period - 2. the baseline data over reporting period - - Args: - pre(pd.DataFrame): utility bills of pre retrofit (not the baseline bills) - prejection_post(pd.DataFrame): utility bills of post retrofit with prejected baseline - - Returns: - metered(pd.DataFrame): Metered usage over baseline + reporting period - - - ''' - - sns.set_style("white") - plt.figure(figsize=(10,6)) - ax = plt.gca() - - - name, unit = self.return_utility_name(self.utility_type) - - post = projection_post.copy() - - bill = self.original_bill - bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) - bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) - bill = bill.sort_values('Bill From Date').reset_index(drop=True) - - pre_y = pre['Usage'] - post_x = post['Bill To Date'].values - post_pred_y = post['baseline'] - post_y=post['Usage'] - - y_max = max(pre_y) * 1.5 - arrow_location = max(pre_y) * 0.10 - text_loccation = max(pre_y) * 0.13 - const_location = max(pre_y) * 1.1 - legend_location = max(pre_y) * 1.35 - legend_text_location = max(pre_y) * 1.45 - - # Baseline Period - baseline_start_date = pd.to_datetime(pre['Bill From Date'].iloc[1]) - baseline_end_date = pd.to_datetime(pre['Bill To Date'].iloc[-1]) - plt.axvline(baseline_end_date, color='darkgrey', linestyle='--') - - ax.annotate('', xy=(pre['Bill From Date'].iloc[1],arrow_location),\ - xytext=(pre['Bill To Date'].iloc[-1],arrow_location), \ - xycoords='data', textcoords='data', - arrowprops=dict(arrowstyle='<->',lw=1.5,color='grey')) - - ax.annotate('Baseline Period', xy=(pre['Bill From Date'].iloc[6],text_loccation),\ - ha='center',\ - va='center',weight='bold') - - # Reporting Period - print('post retrofit', post) - reporting_start_date = pd.to_datetime(post['Bill From Date'].iloc[1]) - - reporting_end_date = pd.to_datetime(post['Bill To Date'].iloc[-1]) - plt.axvline(reporting_start_date, color='darkgrey', linestyle='--') - - ax.annotate('', xy=(post['Bill From Date'].iloc[1],arrow_location),\ - xytext=(post['Bill To Date'].iloc[-1],arrow_location), \ - xycoords='data', textcoords='data', - arrowprops=dict(arrowstyle='<->',lw=1.5,color='grey')) - - ax.annotate('Reporting Period', xy=(post['Bill From Date'].iloc[6],text_loccation),\ - ha='center',\ - va='center', weight='bold') - - #ECM construction - index_start = bill[bill['Bill To Date'] == baseline_end_date].index[0] - - index_end = bill[bill['Bill From Date'] == reporting_start_date].index[0] - construction = bill[index_start:index_end] - - - construction_median = pd.to_datetime((reporting_start_date-baseline_end_date)/2 + baseline_end_date) - ax.annotate('', xy=(baseline_end_date,const_location), xytext=(reporting_start_date,const_location),\ - xycoords='data', textcoords='data', - arrowprops=dict(arrowstyle='<->',lw=1.5,color='darkgrey')) - - plt.axvline(construction_median, color='darkgrey', linestyle='-',\ - ymin=(1.1/1.5), - ymax=(1.35/1.5)) - - plt.plot(construction_median,legend_location,'o',color='darkgrey',linewidth=10) - - ax.annotate('ECM\nConstruction', xy=(construction_median,legend_text_location), ha='center', \ - va='center', weight='bold', color='darkgrey') - - - #Metered Usage Overtime - metered = pre.append(construction).append(post) - metered = metered.drop(['baseline','temperature'], axis=1).drop_duplicates() - - metered_x = metered['Bill To Date'].values - metered_y = metered['Usage'] - - # Legend - Measured Usage - plt.axvline(metered['Bill To Date'].iloc[4], color='cornflowerblue', linestyle='-',\ - ymin=(metered['Usage'].iloc[4]/y_max), - ymax=(1.35/1.5)) - - plt.plot(metered['Bill To Date'].iloc[4],legend_location,'o',color='cornflowerblue',linewidth=10) - - ax.annotate('Measured\nUsage', xy=(metered['Bill To Date'].iloc[4],legend_text_location), ha='center', \ - va='center',weight='bold', color='cornflowerblue') - - - # Legend - Adjusted Baseline - plt.axvline(post['Bill To Date'].iloc[10], color='brown', linestyle='-',\ - ymin=(post['baseline'].iloc[10]/y_max), - ymax=(1.35/1.5)) - - plt.plot(post['Bill To Date'].iloc[10],legend_location,'o',color='brown',linewidth=10) - - ax.annotate('Adjusted Baseline\nUsage', xy=(post['Bill To Date'].iloc[10],legend_text_location), ha='center', \ - va='center',weight='bold', color='brown') - - - - # Legend - Energy Savings - plt.axvline(post['Bill To Date'].iloc[4], color='green', linestyle='-',\ - ymin=((post['baseline'].iloc[4]+post['Usage'].iloc[4])/2/y_max), - ymax=(1.35/1.5)) - - plt.plot(post['Bill To Date'].iloc[4],legend_location,'o',color='green',linewidth=10) - - ax.annotate('Energy\nSavings', xy=(post['Bill To Date'].iloc[4],legend_text_location), ha='center', \ - va='center',weight='bold', color='green') - - - #plots - plt.plot(metered_x, metered_y, '-o',color='cornflowerblue',linewidth=3.5) - plt.plot(post_x, post_pred_y, '--', color='brown', alpha=0.8) - - ax.fill_between(post_x,\ - post_y, post_pred_y,\ - facecolor='mediumturquoise',\ - alpha=0.1, - edgecolor='b',\ - linewidth=0) - - plt.ylim([0,y_max]) - - - for spine in plt.gca().spines.values(): - spine.set_visible(False) - - - xmin, xmax = ax.get_xlim() - ymin, ymax = ax.get_ylim() - - fig = plt.gcf() - - # get width and height of axes object to compute - # matching arrowhead length and width - dps = fig.dpi_scale_trans.inverted() - bbox = ax.get_window_extent().transformed(dps) - width, height = bbox.width, bbox.height - - # manual arrowhead width and length - hw = 0.5/20.*(ymax-ymin) - hl = 0.5/20.*(xmax-xmin) - lw = 1 # axis line width - ohg = 0.1 # arrow overhang - - # compute matching arrowhead length and width - yhw = hw/(ymax-ymin)*(xmax-xmin)* height/width * 1.2 - yhl = hl/(xmax-xmin)*(ymax-ymin)* width/height *1.2 - - # y axis - ax.arrow(xmin, 0, 0, ymax-ymin, fc='k', ec='k', lw = lw, - head_width=yhw, head_length=yhl, overhang = ohg, - length_includes_head= True, clip_on = False) - # x axis - ax.arrow(xmin, 0., (xmax-xmin), 0., fc='k', ec='k', lw = lw, - head_width=hw, head_length=hl, overhang = ohg, - length_includes_head= True, clip_on = False) - - plt.ylabel('Consumption ({})'.format(unit)) - ax.set_title('{} Consumption of Baseline and Reporting Period'.format(name),\ - size=14, weight='bold',verticalalignment='bottom', alpha=0.8) - -# from matplotlib import rcParams -# rcParams['axes.titlepad'] = 50 - - return metered - - - @staticmethod - def ols_regression(X,y): - ''' - Return the summary stats for ordinary linear regression - ''' - - X2 = sm.add_constant(X) - est = sm.OLS(y, X2) - est2 = est.fit() - return est2.summary() diff --git a/bpeng/mv/measurementVerification.py b/bpeng/mv/measurementVerification.py index e69de29..3ab95e3 100644 --- a/bpeng/mv/measurementVerification.py +++ b/bpeng/mv/measurementVerification.py @@ -0,0 +1,706 @@ + + +import matplotlib as mpl +mpl.get_backend() + +import matplotlib.pyplot as plt +import seaborn as sns; sns.set() +import datetime as datetime +import requests +import pandas as pd +import numpy as np +import statsmodels.api as sm +import warnings +warnings.simplefilter('ignore') +import psycopg2 +from matplotlib.pyplot import text +from datetime import timedelta +from sklearn import linear_model +from scipy.optimize import minimize +from scipy import stats +from dateutil import relativedelta + + +class MeasurementVerification(): + + ''' + The class to calcuate energy savings through utility bill and operational information adopts IPMVP option C + + + ''' + + + def __init__(self, bill, weather, thermal_comf, earlest_retrofit_date, latest_retrofit_date,\ + weather_relate_usage_type, occupancy, utility_type): + + + self.original_bill = bill + self.weather = weather + self.thermal_comf = thermal_comf + self.earlest_retrofit_date = earlest_retrofit_date + self.latest_retrofit_date = latest_retrofit_date + self.pre_weather_related_usage_type = weather_relate_usage_type + self.occupancy = occupancy + self.utility_type = utility_type + self.weather_relate_usage_type = None + self.projection_post_bill = None + self.projection_fig = None + self.pre_bill = None + self.post_bill = None + self.baseline = None + self.reporting = None + self.projection_reporting_bill = None + self.regression_stat = None + self.first_year_saving = None + self.projection_baseline_bill = None + self.setpoints = {} + #self.non_weather_related_end_uses = {'Miscellanous': 1} + + + def split_bill(self, raw_bill): + ''' + split raw bill into two section: + pre - retrofit + post - retrofit + + Args: + raw_bill(pd.DataFrame): formatted bill from BillDisaggregation Module + earlest_retrofit_date(str): earlest implementation date of ECMs for this meter + latest_retrofit_date(str): latest implemntation date of ECMs for this meter + + Returns: + pre_bill(pd.DataFrame): raw_bill for pre-retrofit period + post_bill(pd.DataFrame): raw_bill for post_retrotit period + + ''' + + bill = raw_bill.copy() + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + + erd = pd.to_datetime(self.earlest_retrofit_date) + lrd = pd.to_datetime(self.latest_retrofit_date) + + bill = bill.sort_values('Bill From Date').reset_index() + + for i in range(len(bill) - 1): + if (erd>= bill['Bill From Date'].iloc[i] ) & (erd<= bill['Bill To Date'].iloc[i]): + erd_flag = i + + if (lrd >= bill['Bill From Date'].iloc[i] ) & (lrd <= bill['Bill To Date'].iloc[i]): + lrd_flag = i + + pre_bill = bill.iloc[0: erd_flag] + post_bill = bill.iloc[(lrd_flag+1):] + + return pre_bill, post_bill + + @staticmethod + def disaggregate(heating_setpoint, cooling_setpoint, bill, regression_method,\ + weather_related_usage, weather_data): + ''' + Disaggregate bill with all known inputs + + Args: + heating_setpoint(float): + cooling_setpoint(float): + bill(pd.DataFrame): + regression_method(int): + 0 stands for 'both not'; + 1 stands for regression method 1 using multiple variable regression + 2 stands for using summer dhw method to estimate usage + + Returns: + regr(list): + ''' + + bd = BillDisaggregation(bill, weather_data) + #pre-requisit is the bills are longer than 365 days/ + bd.optimize_setpoints() + + bill['temperature'] = [ + bd.bill_period_weather(x, y) + for x, y in zip(bill['Bill From Date'], + bill['Bill To Date']) + ] + + if weather_related_usage == 'Both': + regr = bd.regression_1(heating_setpoint, cooling_setpoint, bill) + + + if weather_related_usage == 'Cooling': + regr = bd.regression_1(0, cooling_setpoint, bill) + + if weather_related_usage == 'Heating': + if regression_method == 1: + regr = bd.regression_1(heating_setpoint, 0, bill) + + if regression_method ==2: + regr = bd.summer_dhw(heating_setpoint, bill) + + return regr + + @staticmethod + def weather_demand(heating_setpoint, cooling_setpoints, bill, weather_data): + + ''' + debugged - I think this functioin should be part of BillDisaggregation Class + This function is to calculate the HDD/ CDD with preferred weather period and setpoints + + Args: + heating_setpoint(float) + cooling_setpoint(float) + bill(pd.DataFrame): bills with columns: + 'Bill From Date' + 'Bill To Date' + Returns: + weather_demand(np.array): an array of hddcdd + ''' + + bd = BillDisaggregation(bill, weather_data) + + ahdd = [ + list(bd.hdd(heating_setpoint, xx) for xx in x) + for x in bill['temperature'] + ] + + acdd = [ + list(bd.cdd(cooling_setpoints, xx) for xx in x) + for x in bill['temperature'] + ] + + + # It should be billing period average hdd / days + + daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) + daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) + + # set threshold that if the HDD/CDD is lower than a certain value, we set it to 0 + daily_hdd1 = np.array([ + bd.threshold(daily_hdd[x], 0.1) + for x in range(len(daily_hdd)) + ]) + + daily_cdd1 = np.array([ + bd.threshold(daily_cdd[x], 0.1) + for x in range(len(daily_cdd)) + ]) + + weather_demand = np.array([daily_hdd1, daily_cdd1]).T + + return weather_demand + + def bill_disaggregation(self, bill, weather_related_usage_type='Unknown'): + + ''' + the function calls for bill disaggregation module + + ''' + + bd = BillDisaggregation(bill,self.weather) + bd.optimize_setpoints(weather_related_usage = weather_related_usage_type) + + bill_evaluation = pd.DataFrame(columns=['Usage', 'r squared','regr method', 'Consumption', 'Heating',\ + 'Cooling','Non-weather-related-usage','diff','hdd', 'cdd', 'Days in Bill','Unit Price',\ + 'Heating Setpoint', 'Cooling Setpoint']) + + + output = bd.benchmarking_output() + bill_evaluation = bill_evaluation.append({\ + 'Usage': output[0],\ + 'r squared': output[1],\ + 'regr method': output[2],\ + 'Consumption':output[3],\ + 'Heating': format(output[4], '0.0f'),\ + 'Cooling': format(output[5], '0.0f'),\ + 'Non-weather-related-usage': format(output[6], '0.0f'),\ + 'diff':format(output[7],'.2%'),\ + 'hdd': format(output[8], '0.0f'),\ + 'cdd': format(output[9], '0.0f'),\ + 'Days in Bill': output[10],\ + 'Unit Price':bd.avg_unit_price,\ + 'Heating Setpoint': output[11],\ + 'Cooling Setpoint': output[12] + }, ignore_index = True) + + return bd, bill_evaluation + + def usage_not_related_to_weather(self, bill): + + ''' + To return heating/cooling coef and daily non-weather-related-usage when the usage is not related to weather change + + Args: + bill(pd.DataFrame): a utility bill whose usage dose not relate to weather change + + Return: + bill_metrix(dictionary): a dictionary of heating coef, cooling coef, intercept + ''' + + non_weather_related_daily_usage = sum(bill['Usage'])/ sum(bill['Days In Bill']) + heating_coef = 0 + cooling_coef = 0 + + bill_metrix = {'heating_coef': 0, + 'cooling_coef': 0, + 'non_weather_related_daily_usage': non_weather_related_daily_usage} + + return bill_metrix + + + def cal_heating_coef(self, regr_heating_coef): + ''' + Calculation for cooling coefficiency with the consideration of changing factors for projection purpose + - need further development + + ''' + heating_coef = regr_heating_coef + return heating_coef + + + def cal_cooling_coef(self, regr_cooling_coef): + ''' + Calculation for cooling coefficiency for projection purpose - need further development + ''' + cooling_coef = regr_cooling_coef + return cooling_coef + + def occup(self): + '''occupancy change pre & post retrofit''' + occupancy_change = 1 + return occupancy_change + + + def usage_related_to_weather(self, regr, regression_method): + + ''' + Args: + regr(list): + regr[0] is regression model + regr[1] is r-squared + regr[2] is hdd/cdd through the regression + regr[3] is the bills (optional, only for regression method 2) + regression_method(int): 1 or 2 + + Returns: + bill_metrix(Dictionary): as defined below + + + ???? Doris: I want to keep the same input as - non weather related usage matrix + ''' + if regression_method == 1: + regr_model = regr[0] + heating_coef = regr_model.coef_[0] + cooling_coef = regr_model.coef_[1] + non_weather_related_daily_usage = regr_model.intercept_ + + + if regression_method == 2: + regr_model = regr[0] + heating_coef = regr_model.coef_[0] + cooling_coef = 0 + non_weather_related_daily_usage = regr[3]['dhw']/regr[3]['Days In Bill'] + + bill_metrix = {'heating_coef': heating_coef, + 'cooling_coef': cooling_coef, + 'non_weather_related_daily_usage': non_weather_related_daily_usage} + + return bill_metrix + + + def annual_bill_pre_retrofit(self, bill): + ''' + Return the latest/nearest annual bill to performance analysis, + however, it will only return natural billing period, which means it could be around 365 but not exact 365 days + + Args: + bill(pd.DataFrame): bill of pre_retrofit period + + Return: + annual_bill(pd.DataFrame) + + ''' + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + + bill_last_date = bill['Bill To Date'].iloc[-1] + proposed_first_date = bill_last_date - timedelta(365) + bill['flag'] = bill['Bill From Date'].apply(lambda x: (x- proposed_first_date).days) + + index = bill.flag.lt(-1).idxmin() + + if index == 0: + annual_bill = bill.drop('flag', axis=1) + else: + annual_bill = bill[index-1:].drop('flag', axis=1) + + return annual_bill + + + def annual_bill_post_retrofit(self, bill): + ''' + Return the latest/nearest annual bill for post-retrofit period + however, it will only return natural billing period, which means it could be around 365 but not exact 365 days + + Args: + bill(pd.DataFrame): bill of post_retrofit + + Return: + annual_bill(pd.DataFrame) + ''' + + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill = bill.drop('index', axis=1) + + bill = bill.reset_index(drop=True) + bill_first_date = bill['Bill From Date'].iloc[0] + proposed_last_date = bill_first_date + timedelta(365) + + bill['flag'] = bill['Bill To Date'].apply(lambda x: (x- proposed_last_date).days) + index = bill.flag.lt(-1).idxmin() + + annual_bill = bill[0:index].drop('flag', axis=1) + if sum(annual_bill['Days In Bill']) < 360: + annual_bill = bill[0:index+1] + + return annual_bill + + + def baseline_bill(self, pre_bill): + + ''' + breakdown the bill of pre_retrofit period if there is long billing period and + return the billing data will be used as 'Baseline Bill' + + ''' + bk = BreakdownLongBill(pre_bill, self.weather) + pre_bill_breakdown = bk.long_bill_breakdown() + baseline_bill = self.annual_bill_pre_retrofit(pre_bill_breakdown).reset_index(drop=True) + + return baseline_bill + + def reporting_bill(self, post_bill): + bk = BreakdownLongBill(post_bill, self.weather) + bill_breakdown = bk.long_bill_breakdown() + reporting_bill = self.annual_bill_post_retrofit(bill_breakdown).reset_index(drop=True) + + return reporting_bill + + def main(self): + + ''' + Function to calcuate the energy usage for post_retrofit period using baseline conditions - + + ''' + # format the bill for later analysis + bill_bd = BillDisaggregation(self.original_bill, self.weather) + bill_bd.optimize_setpoints() + bill = bill_bd.formatted_bill + + + pre_bill, post_bill = self.split_bill(bill) + + #raw bill of pre-retrofit period + self.pre_bill = pre_bill + + assert sum(pre_bill['Days In Bill']) > 365, 'Utility Data is less than 12 months pre-retrofit.' + + #baseline bill + self.baseline = self.baseline_bill(pre_bill) + + #post_retrofit_bill - simple breakdown the post retrofit bill incase it is too long + self.post_bill = BreakdownLongBill(post_bill, self.weather).long_bill_breakdown() + + #calcuate the all prejected baseline for all the post retrofit billing period + post = projection_baseline(self.baseline, self.post_bill, self.weather) + self.projection_post_bill = post.projection() + self.setpoints = {'heating': post.hp, + 'cooling': post.cp} + + #calculate the projected baseline for baseline period + + baseline_usage_baseline_period = projection_baseline(self.baseline, self.baseline, self.weather) + self.projection_baseline_bill = baseline_usage_baseline_period.projection() + + + #calculate the projected baseline for reporting period + self.reporting = self.reporting_bill(self.post_bill) + report = projection_baseline(self.baseline, self.reporting, self.weather) + self.projection_reporting_bill = report.projection() + self.regression_stat = report.regression_stat + + + + self.first_year_saving = MeasurementVerification.energy_savings(self.projection_reporting_bill) + xx = self.plot_result(self.projection_baseline_bill, self.projection_reporting_bill) +# print('Baseline Period Regression Stats:'+'\n', self.regression_stat) +# print('Frist Year Savings:', pd.DataFrame.from_dict(self., orient='index')) + + @staticmethod + def energy_savings(bill_with_baseline): + ''' + Return the energy saving for bills with baseline usage + + Args: + bill_with_baseline(pd.DataFrame): + 'Bill From Date' + 'Bill To Date' + 'Usage' + 'Days In Bill' + 'Total Charge' + 'temperature' + 'baseline' + + + Return: + savings(Dict): + + 'Measured Energy Usage for Reporting Period'; + 'Baseline Projection for Reporting Period'; + 'Annual Energy Savings'; + 'Energy Reductaion Percentage'; + 'Costs Avoidance'; + 'Annual Energy Costs' + + ''' + + metered_usage = sum(bill_with_baseline['Usage']) + baseline_usage = sum(bill_with_baseline['baseline']) + + energy_savings = baseline_usage - metered_usage + total_costs = sum(bill_with_baseline['Total Charge']) + unit_price = total_costs / metered_usage + saving_percentage = energy_savings/ baseline_usage + dollar_savings = energy_savings * unit_price + + savings = {'Measured Energy Usage for Reporting Period': round(metered_usage,0), + 'Baseline Projection for Reporting Period': round(baseline_usage,0), + 'Annual Energy Savings': round(energy_savings,0), + 'Energy Reductaion Percentage': saving_percentage, + 'Costs Avoidance': round(dollar_savings,0), + 'Annual Energy Costs': round(total_costs,0)} + + return savings + + + def return_utility_name(self, utility_type): + + if self.utility_type == 1: + name = 'Electricity' + unit = 'kWh' + if self.utility_type == 2: + name = 'Natural Gas' + unit = 'Therms' + if self.utility_type == 3: + name = 'Oil' + unit = 'Gallon' + if self.utility_type == 4: + name = 'Water' + unit = 'CCF' + + return name, unit + + def plot_result(self, pre, projection_post): + ''' + The funtion to plot: + 1. the metered data over baseline and reporting period + 2. the baseline data over reporting period + + Args: + pre(pd.DataFrame): utility bills of pre retrofit (not the baseline bills) + prejection_post(pd.DataFrame): utility bills of post retrofit with prejected baseline + + Returns: + metered(pd.DataFrame): Metered usage over baseline + reporting period + + + ''' + + sns.set_style("white") + plt.figure(figsize=(10,6)) + ax = plt.gca() + + + name, unit = self.return_utility_name(self.utility_type) + + post = projection_post.copy() + + bill = self.original_bill + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill = bill.sort_values('Bill From Date').reset_index(drop=True) + + pre_y = pre['Usage'] + post_x = post['Bill To Date'].values + post_pred_y = post['baseline'] + post_y=post['Usage'] + + y_max = max(pre_y) * 1.5 + arrow_location = max(pre_y) * 0.10 + text_loccation = max(pre_y) * 0.13 + const_location = max(pre_y) * 1.1 + legend_location = max(pre_y) * 1.35 + legend_text_location = max(pre_y) * 1.45 + + # Baseline Period + baseline_start_date = pd.to_datetime(pre['Bill From Date'].iloc[1]) + baseline_end_date = pd.to_datetime(pre['Bill To Date'].iloc[-1]) + plt.axvline(baseline_end_date, color='darkgrey', linestyle='--') + + ax.annotate('', xy=(pre['Bill From Date'].iloc[1],arrow_location),\ + xytext=(pre['Bill To Date'].iloc[-1],arrow_location), \ + xycoords='data', textcoords='data', + arrowprops=dict(arrowstyle='<->',lw=1.5,color='grey')) + + ax.annotate('Baseline Period', xy=(pre['Bill From Date'].iloc[6],text_loccation),\ + ha='center',\ + va='center',weight='bold') + + # Reporting Period + print('post retrofit', post) + reporting_start_date = pd.to_datetime(post['Bill From Date'].iloc[1]) + + reporting_end_date = pd.to_datetime(post['Bill To Date'].iloc[-1]) + plt.axvline(reporting_start_date, color='darkgrey', linestyle='--') + + ax.annotate('', xy=(post['Bill From Date'].iloc[1],arrow_location),\ + xytext=(post['Bill To Date'].iloc[-1],arrow_location), \ + xycoords='data', textcoords='data', + arrowprops=dict(arrowstyle='<->',lw=1.5,color='grey')) + + ax.annotate('Reporting Period', xy=(post['Bill From Date'].iloc[6],text_loccation),\ + ha='center',\ + va='center', weight='bold') + + #ECM construction + index_start = bill[bill['Bill To Date'] == baseline_end_date].index[0] + + index_end = bill[bill['Bill From Date'] == reporting_start_date].index[0] + construction = bill[index_start:index_end] + + + construction_median = pd.to_datetime((reporting_start_date-baseline_end_date)/2 + baseline_end_date) + ax.annotate('', xy=(baseline_end_date,const_location), xytext=(reporting_start_date,const_location),\ + xycoords='data', textcoords='data', + arrowprops=dict(arrowstyle='<->',lw=1.5,color='darkgrey')) + + plt.axvline(construction_median, color='darkgrey', linestyle='-',\ + ymin=(1.1/1.5), + ymax=(1.35/1.5)) + + plt.plot(construction_median,legend_location,'o',color='darkgrey',linewidth=10) + + ax.annotate('ECM\nConstruction', xy=(construction_median,legend_text_location), ha='center', \ + va='center', weight='bold', color='darkgrey') + + + #Metered Usage Overtime + metered = pre.append(construction).append(post) + metered = metered.drop(['baseline','temperature'], axis=1).drop_duplicates() + + metered_x = metered['Bill To Date'].values + metered_y = metered['Usage'] + + # Legend - Measured Usage + plt.axvline(metered['Bill To Date'].iloc[4], color='cornflowerblue', linestyle='-',\ + ymin=(metered['Usage'].iloc[4]/y_max), + ymax=(1.35/1.5)) + + plt.plot(metered['Bill To Date'].iloc[4],legend_location,'o',color='cornflowerblue',linewidth=10) + + ax.annotate('Measured\nUsage', xy=(metered['Bill To Date'].iloc[4],legend_text_location), ha='center', \ + va='center',weight='bold', color='cornflowerblue') + + + # Legend - Adjusted Baseline + plt.axvline(post['Bill To Date'].iloc[10], color='brown', linestyle='-',\ + ymin=(post['baseline'].iloc[10]/y_max), + ymax=(1.35/1.5)) + + plt.plot(post['Bill To Date'].iloc[10],legend_location,'o',color='brown',linewidth=10) + + ax.annotate('Adjusted Baseline\nUsage', xy=(post['Bill To Date'].iloc[10],legend_text_location), ha='center', \ + va='center',weight='bold', color='brown') + + + + # Legend - Energy Savings + plt.axvline(post['Bill To Date'].iloc[4], color='green', linestyle='-',\ + ymin=((post['baseline'].iloc[4]+post['Usage'].iloc[4])/2/y_max), + ymax=(1.35/1.5)) + + plt.plot(post['Bill To Date'].iloc[4],legend_location,'o',color='green',linewidth=10) + + ax.annotate('Energy\nSavings', xy=(post['Bill To Date'].iloc[4],legend_text_location), ha='center', \ + va='center',weight='bold', color='green') + + + #plots + plt.plot(metered_x, metered_y, '-o',color='cornflowerblue',linewidth=3.5) + plt.plot(post_x, post_pred_y, '--', color='brown', alpha=0.8) + + ax.fill_between(post_x,\ + post_y, post_pred_y,\ + facecolor='mediumturquoise',\ + alpha=0.1, + edgecolor='b',\ + linewidth=0) + + plt.ylim([0,y_max]) + + + for spine in plt.gca().spines.values(): + spine.set_visible(False) + + + xmin, xmax = ax.get_xlim() + ymin, ymax = ax.get_ylim() + + fig = plt.gcf() + + # get width and height of axes object to compute + # matching arrowhead length and width + dps = fig.dpi_scale_trans.inverted() + bbox = ax.get_window_extent().transformed(dps) + width, height = bbox.width, bbox.height + + # manual arrowhead width and length + hw = 0.5/20.*(ymax-ymin) + hl = 0.5/20.*(xmax-xmin) + lw = 1 # axis line width + ohg = 0.1 # arrow overhang + + # compute matching arrowhead length and width + yhw = hw/(ymax-ymin)*(xmax-xmin)* height/width * 1.2 + yhl = hl/(xmax-xmin)*(ymax-ymin)* width/height *1.2 + + # y axis + ax.arrow(xmin, 0, 0, ymax-ymin, fc='k', ec='k', lw = lw, + head_width=yhw, head_length=yhl, overhang = ohg, + length_includes_head= True, clip_on = False) + # x axis + ax.arrow(xmin, 0., (xmax-xmin), 0., fc='k', ec='k', lw = lw, + head_width=hw, head_length=hl, overhang = ohg, + length_includes_head= True, clip_on = False) + + plt.ylabel('Consumption ({})'.format(unit)) + ax.set_title('{} Consumption of Baseline and Reporting Period'.format(name),\ + size=14, weight='bold',verticalalignment='bottom', alpha=0.8) + +# from matplotlib import rcParams +# rcParams['axes.titlepad'] = 50 + + return metered + + + @staticmethod + def ols_regression(X,y): + ''' + Return the summary stats for ordinary linear regression + ''' + + X2 = sm.add_constant(X) + est = sm.OLS(y, X2) + est2 = est.fit() + return est2.summary() diff --git a/bpeng/mv/mv_test.py b/bpeng/mv/mv_test.py deleted file mode 100644 index e69de29..0000000 diff --git a/bpeng/mv/prediction.py b/bpeng/mv/prediction.py deleted file mode 100644 index dce9a35..0000000 --- a/bpeng/mv/prediction.py +++ /dev/null @@ -1,717 +0,0 @@ -%matplotlib notebook -import matplotlib as mpl -mpl.get_backend() - -import matplotlib.pyplot as plt -from matplotlib.pyplot import text - -from datetime import timedelta -import datetime as datetime -import requests - -import pandas as pd -import numpy as np - -from sklearn import linear_model -from scipy.optimize import minimize -import statsmodels.api as sm -from scipy import stats - -import warnings -warnings.simplefilter('ignore') -from dateutil import relativedelta - - -import psycopg2 -import seaborn as sns; sns.set() - -%run bill_disaggregation.ipynb - - - -class MeasurementVerification(): - - ''' - The class to calcuate energy savings through utility bill and operational information adopts IPMVP option C - - - ''' - - - def __init__(self, bill, weather, thermal_comf, earlest_retrofit_date, latest_retrofit_date,\ - weather_relate_usage_type, occupancy, utility_type): - - - self.original_bill = bill - self.weather = weather - self.thermal_comf = thermal_comf - self.earlest_retrofit_date = earlest_retrofit_date - self.latest_retrofit_date = latest_retrofit_date - self.pre_weather_related_usage_type = weather_relate_usage_type - self.occupancy = occupancy - self.utility_type = utility_type - self.weather_relate_usage_type = None - self.projection_post_bill = None - self.projection_fig = None - self.pre_bill = None - self.post_bill = None - self.baseline = None - self.reporting = None - self.projection_reporting_bill = None - self.regression_stat = None - self.first_year_saving = None - self.projection_baseline_bill = None - self.setpoints = {} - - #self.non_weather_related_end_uses = {'Miscellanous': 1} - - - - def split_bill(self, raw_bill): - ''' - split raw bill into two section: - pre - retrofit - post - retrofit - - Args: - raw_bill(pd.DataFrame): formatted bill from BillDisaggregation Module - earlest_retrofit_date(str): earlest implementation date of ECMs for this meter - latest_retrofit_date(str): latest implemntation date of ECMs for this meter - - Returns: - pre_bill(pd.DataFrame): raw_bill for pre-retrofit period - post_bill(pd.DataFrame): raw_bill for post_retrotit period - - ''' - - bill = raw_bill.copy() - bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) - bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) - - erd = pd.to_datetime(self.earlest_retrofit_date) - lrd = pd.to_datetime(self.latest_retrofit_date) - - bill = bill.sort_values('Bill From Date').reset_index() - - for i in range(len(bill) - 1): - if (erd>= bill['Bill From Date'].iloc[i] ) & (erd<= bill['Bill To Date'].iloc[i]): - erd_flag = i - #print('erd_flag:', i) - - if (lrd >= bill['Bill From Date'].iloc[i] ) & (lrd <= bill['Bill To Date'].iloc[i]): - lrd_flag = i - #print('lrd_flag:', i) - - pre_bill = bill.iloc[0: erd_flag] - post_bill = bill.iloc[(lrd_flag+1):] - - return pre_bill, post_bill - - @staticmethod - def disaggregate(heating_setpoint, cooling_setpoint, bill, regression_method,\ - weather_related_usage, weather_data): - ''' - Disaggregate bill with all known inputs - - Args: - heating_setpoint(float): - cooling_setpoint(float): - bill(pd.DataFrame): - regression_method(int): - 0 stands for 'both not'; - 1 stands for regression method 1 using multiple variable regression - 2 stands for using summer dhw method to estimate usage - - Returns: - regr(list): - ''' - - bd = BillDisaggregation(bill, weather_data) - bd.optimize_setpoints() - - bill['temperature'] = [ - bd.bill_period_weather(x, y) - for x, y in zip(bill['Bill From Date'], - bill['Bill To Date']) - ] - - if weather_related_usage == 'Both': - regr = bd.regression_1(heating_setpoint, cooling_setpoint, bill) - - - if weather_related_usage == 'Cooling': - regr = bd.regression_1(0, cooling_setpoint, bill) - - if weather_related_usage == 'Heating': - if regression_method == 1: - regr = bd.regression_1(heating_setpoint, 0, bill) - - if regression_method ==2: - regr = bd.summer_dhw(heating_setpoint, bill) - - return regr - - @staticmethod - def weather_demand(heating_setpoint, cooling_setpoints, bill, weather_data): - - ''' - debugged - I think this functioin should be part of BillDisaggregation Class - This function is to calculate the HDD/ CDD with preferred weather period and setpoints - - Args: - heating_setpoint(float) - cooling_setpoint(float) - bill(pd.DataFrame): bills with columns: - 'Bill From Date' - 'Bill To Date' - Returns: - weather_demand(np.array): an array of hddcdd - ''' - - bd = BillDisaggregation(bill, weather_data) - - ahdd = [ - list(bd.hdd(heating_setpoint, xx) for xx in x) - for x in bill['temperature'] - ] - - acdd = [ - list(bd.cdd(cooling_setpoints, xx) for xx in x) - for x in bill['temperature'] - ] - - - # It should be billing period average hdd / days - - daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) - daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) - - # set threshold that if the HDD/CDD is lower than a certain value, we set it to 0 - daily_hdd1 = np.array([ - bd.threshold(daily_hdd[x], 0.1) - for x in range(len(daily_hdd)) - ]) - - daily_cdd1 = np.array([ - bd.threshold(daily_cdd[x], 0.1) - for x in range(len(daily_cdd)) - ]) - - weather_demand = np.array([daily_hdd1, daily_cdd1]).T - - return weather_demand - - def bill_disaggregation(self, bill, weather_related_usage_type='Unknown'): - - ''' - the function calls for bill disaggregation module - - ''' - - bd = BillDisaggregation(bill,self.weather) - bd.optimize_setpoints(weather_related_usage = weather_related_usage_type) - - bill_evaluation = pd.DataFrame(columns=['Usage', 'r squared','regr method', 'Consumption', 'Heating',\ - 'Cooling','Non-weather-related-usage','diff','hdd', 'cdd', 'Days in Bill','Unit Price',\ - 'Heating Setpoint', 'Cooling Setpoint']) - - - output = bd.benchmarking_output() - bill_evaluation = bill_evaluation.append({\ - 'Usage': output[0],\ - 'r squared': output[1],\ - 'regr method': output[2],\ - 'Consumption':output[3],\ - 'Heating': format(output[4], '0.0f'),\ - 'Cooling': format(output[5], '0.0f'),\ - 'Non-weather-related-usage': format(output[6], '0.0f'),\ - 'diff':format(output[7],'.2%'),\ - 'hdd': format(output[8], '0.0f'),\ - 'cdd': format(output[9], '0.0f'),\ - 'Days in Bill': output[10],\ - 'Unit Price':bd.avg_unit_price,\ - 'Heating Setpoint': output[11],\ - 'Cooling Setpoint': output[12] - }, ignore_index = True) - - return bd, bill_evaluation - - def usage_not_related_to_weather(self, bill): - - ''' - To return heating/cooling coef and daily non-weather-related-usage when the usage is not related to weather change - - Args: - bill(pd.DataFrame): a utility bill whose usage dose not relate to weather change - - Return: - bill_metrix(dictionary): a dictionary of heating coef, cooling coef, intercept - ''' - - non_weather_related_daily_usage = sum(bill['Usage'])/ sum(bill['Days In Bill']) - heating_coef = 0 - cooling_coef = 0 - - bill_metrix = {'heating_coef': 0, - 'cooling_coef': 0, - 'non_weather_related_daily_usage': non_weather_related_daily_usage} - - return bill_metrix - - - def cal_heating_coef(self, regr_heating_coef): - ''' - Calculation for cooling coefficiency with the consideration of changing factors for projection purpose - - need further development - - ''' - heating_coef = regr_heating_coef - return heating_coef - - - def cal_cooling_coef(self, regr_cooling_coef): - ''' - Calculation for cooling coefficiency for projection purpose - need further development - ''' - cooling_coef = regr_cooling_coef - return cooling_coef - - def occup(self): - '''occupancy change pre & post retrofit''' - occupancy_change = 1 - return occupancy_change - - - def usage_related_to_weather(self, regr, regression_method): - - ''' - Args: - regr(list): - regr[0] is regression model - regr[1] is r-squared - regr[2] is hdd/cdd through the regression - regr[3] is the bills (optional, only for regression method 2) - regression_method(int): 1 or 2 - - Returns: - bill_metrix(Dictionary): as defined below - - - ???? Doris: I want to keep the same input as - non weather related usage matrix - ''' - if regression_method == 1: - regr_model = regr[0] - heating_coef = regr_model.coef_[0] - cooling_coef = regr_model.coef_[1] - non_weather_related_daily_usage = regr_model.intercept_ - - - if regression_method == 2: - regr_model = regr[0] - heating_coef = regr_model.coef_[0] - cooling_coef = 0 - non_weather_related_daily_usage = regr[3]['dhw']/regr[3]['Days In Bill'] - - bill_metrix = {'heating_coef': heating_coef, - 'cooling_coef': cooling_coef, - 'non_weather_related_daily_usage': non_weather_related_daily_usage} - - return bill_metrix - - - def annual_bill_pre_retrofit(self, bill): - ''' - Return the latest/nearest annual bill to performance analysis, - however, it will only return natural billing period, which means it could be around 365 but not exact 365 days - - Args: - bill(pd.DataFrame): bill of pre_retrofit period - - Return: - annual_bill(pd.DataFrame) - - ''' - bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) - bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) - - bill_last_date = bill['Bill To Date'].iloc[-1] - proposed_first_date = bill_last_date - timedelta(365) - bill['flag'] = bill['Bill From Date'].apply(lambda x: (x- proposed_first_date).days) - - index = bill.flag.lt(-1).idxmin() - - if index == 0: - annual_bill = bill.drop('flag', axis=1) - else: - annual_bill = bill[index-1:].drop('flag', axis=1) - - return annual_bill - - - def annual_bill_post_retrofit(self, bill): - ''' - Return the latest/nearest annual bill for post-retrofit period - however, it will only return natural billing period, which means it could be around 365 but not exact 365 days - - Args: - bill(pd.DataFrame): bill of post_retrofit - - Return: - annual_bill(pd.DataFrame) - ''' - - bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) - bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) - bill = bill.drop('index', axis=1) - - bill = bill.reset_index(drop=True) - bill_first_date = bill['Bill From Date'].iloc[0] - proposed_last_date = bill_first_date + timedelta(365) - - bill['flag'] = bill['Bill To Date'].apply(lambda x: (x- proposed_last_date).days) - index = bill.flag.lt(-1).idxmin() - - annual_bill = bill[0:index].drop('flag', axis=1) - if sum(annual_bill['Days In Bill']) < 360: - annual_bill = bill[0:index+1] - - return annual_bill - - - def baseline_bill(self, pre_bill): - - ''' - breakdown the bill of pre_retrofit period if there is long billing period and - return the billing data will be used as 'Baseline Bill' - - ''' - bk = BreakdownLongBill(pre_bill, self.weather) - pre_bill_breakdown = bk.long_bill_breakdown() - baseline_bill = self.annual_bill_pre_retrofit(pre_bill_breakdown).reset_index(drop=True) - - return baseline_bill - - def reporting_bill(self, post_bill): - bk = BreakdownLongBill(post_bill, self.weather) - bill_breakdown = bk.long_bill_breakdown() - reporting_bill = self.annual_bill_post_retrofit(bill_breakdown).reset_index(drop=True) - - return reporting_bill - - def main(self): - - ''' - Function to calcuate the energy usage for post_retrofit period using baseline conditions - - - ''' - # format the bill for later analysis - bill_bd = BillDisaggregation(self.original_bill, self.weather) - bill_bd.optimize_setpoints() - bill = bill_bd.formatted_bill - - - pre_bill, post_bill = self.split_bill(bill) - - #raw bill of pre-retrofit period - self.pre_bill = pre_bill - - assert sum(pre_bill['Days In Bill']) > 365, 'Utility Data is less than 12 months pre-retrofit.' - - #baseline bill - self.baseline = self.baseline_bill(pre_bill) - - #post_retrofit_bill - simple breakdown the post retrofit bill incase it is too long - self.post_bill = BreakdownLongBill(post_bill, self.weather).long_bill_breakdown() - - #calcuate the all prejected baseline for all the post retrofit billing period - post = projection_baseline(self.baseline, self.post_bill, self.weather) - self.projection_post_bill = post.projection() - self.setpoints = {'heating': post.hp, - 'cooling': post.cp} - - #calculate the projected baseline for baseline period - - baseline_usage_baseline_period = projection_baseline(self.baseline, self.baseline, self.weather) - self.projection_baseline_bill = baseline_usage_baseline_period.projection() - - - #calculate the projected baseline for reporting period - self.reporting = self.reporting_bill(self.post_bill) - report = projection_baseline(self.baseline, self.reporting, self.weather) - self.projection_reporting_bill = report.projection() - self.regression_stat = report.regression_stat - - - - self.first_year_saving = MeasurementVerification.energy_savings(self.projection_reporting_bill) - xx = self.plot_result(self.projection_baseline_bill, self.projection_reporting_bill) -# print('Baseline Period Regression Stats:'+'\n', self.regression_stat) -# print('Frist Year Savings:', pd.DataFrame.from_dict(self., orient='index')) - - @staticmethod - def energy_savings(bill_with_baseline): - ''' - Return the energy saving for bills with baseline usage - - Args: - bill_with_baseline(pd.DataFrame): - 'Bill From Date' - 'Bill To Date' - 'Usage' - 'Days In Bill' - 'Total Charge' - 'temperature' - 'baseline' - - - Return: - savings(Dict): - - 'Measured Energy Usage for Reporting Period'; - 'Baseline Projection for Reporting Period'; - 'Annual Energy Savings'; - 'Energy Reductaion Percentage'; - 'Costs Avoidance'; - 'Annual Energy Costs' - - ''' - - metered_usage = sum(bill_with_baseline['Usage']) - baseline_usage = sum(bill_with_baseline['baseline']) - - energy_savings = baseline_usage - metered_usage - total_costs = sum(bill_with_baseline['Total Charge']) - unit_price = total_costs / metered_usage - saving_percentage = energy_savings/ baseline_usage - dollar_savings = energy_savings * unit_price - - savings = {'Measured Energy Usage for Reporting Period': round(metered_usage,0), - 'Baseline Projection for Reporting Period': round(baseline_usage,0), - 'Annual Energy Savings': round(energy_savings,0), - 'Energy Reductaion Percentage': saving_percentage, - 'Costs Avoidance': round(dollar_savings,0), - 'Annual Energy Costs': round(total_costs,0)} - - return savings - - - def return_utility_name(self, utility_type): - - if self.utility_type == 1: - name = 'Electricity' - unit = 'kWh' - if self.utility_type == 2: - name = 'Natural Gas' - unit = 'Therms' - if self.utility_type == 3: - name = 'Oil' - unit = 'Gallon' - if self.utility_type == 4: - name = 'Water' - unit = 'CCF' - - return name, unit - - def plot_result(self, pre, projection_post): - ''' - The funtion to plot: - 1. the metered data over baseline and reporting period - 2. the baseline data over reporting period - - Args: - pre(pd.DataFrame): utility bills of pre retrofit (not the baseline bills) - prejection_post(pd.DataFrame): utility bills of post retrofit with prejected baseline - - Returns: - metered(pd.DataFrame): Metered usage over baseline + reporting period - - - ''' - - sns.set_style("white") - plt.figure(figsize=(10,6)) - ax = plt.gca() - - - name, unit = self.return_utility_name(self.utility_type) - - post = projection_post.copy() - - bill = self.original_bill - bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) - bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) - bill = bill.sort_values('Bill From Date').reset_index(drop=True) - - pre_y = pre['Usage'] - post_x = post['Bill To Date'].values - post_pred_y = post['baseline'] - post_y=post['Usage'] - - y_max = max(pre_y) * 1.5 - arrow_location = max(pre_y) * 0.10 - text_loccation = max(pre_y) * 0.13 - const_location = max(pre_y) * 1.1 - legend_location = max(pre_y) * 1.35 - legend_text_location = max(pre_y) * 1.45 - - # Baseline Period - baseline_start_date = pd.to_datetime(pre['Bill From Date'].iloc[1]) - baseline_end_date = pd.to_datetime(pre['Bill To Date'].iloc[-1]) - plt.axvline(baseline_end_date, color='darkgrey', linestyle='--') - - ax.annotate('', xy=(pre['Bill From Date'].iloc[1],arrow_location),\ - xytext=(pre['Bill To Date'].iloc[-1],arrow_location), \ - xycoords='data', textcoords='data', - arrowprops=dict(arrowstyle='<->',lw=1.5,color='grey')) - - ax.annotate('Baseline Period', xy=(pre['Bill From Date'].iloc[6],text_loccation),\ - ha='center',\ - va='center',weight='bold') - - # Reporting Period - print('post retrofit', post) - reporting_start_date = pd.to_datetime(post['Bill From Date'].iloc[1]) - - reporting_end_date = pd.to_datetime(post['Bill To Date'].iloc[-1]) - plt.axvline(reporting_start_date, color='darkgrey', linestyle='--') - - ax.annotate('', xy=(post['Bill From Date'].iloc[1],arrow_location),\ - xytext=(post['Bill To Date'].iloc[-1],arrow_location), \ - xycoords='data', textcoords='data', - arrowprops=dict(arrowstyle='<->',lw=1.5,color='grey')) - - ax.annotate('Reporting Period', xy=(post['Bill From Date'].iloc[6],text_loccation),\ - ha='center',\ - va='center', weight='bold') - - #ECM construction - index_start = bill[bill['Bill To Date'] == baseline_end_date].index[0] - - index_end = bill[bill['Bill From Date'] == reporting_start_date].index[0] - construction = bill[index_start:index_end] - - - construction_median = pd.to_datetime((reporting_start_date-baseline_end_date)/2 + baseline_end_date) - ax.annotate('', xy=(baseline_end_date,const_location), xytext=(reporting_start_date,const_location),\ - xycoords='data', textcoords='data', - arrowprops=dict(arrowstyle='<->',lw=1.5,color='darkgrey')) - - plt.axvline(construction_median, color='darkgrey', linestyle='-',\ - ymin=(1.1/1.5), - ymax=(1.35/1.5)) - - plt.plot(construction_median,legend_location,'o',color='darkgrey',linewidth=10) - - ax.annotate('ECM\nConstruction', xy=(construction_median,legend_text_location), ha='center', \ - va='center', weight='bold', color='darkgrey') - - - #Metered Usage Overtime - metered = pre.append(construction).append(post) - metered = metered.drop(['baseline','temperature'], axis=1).drop_duplicates() - - metered_x = metered['Bill To Date'].values - metered_y = metered['Usage'] - - # Legend - Measured Usage - plt.axvline(metered['Bill To Date'].iloc[4], color='cornflowerblue', linestyle='-',\ - ymin=(metered['Usage'].iloc[4]/y_max), - ymax=(1.35/1.5)) - - plt.plot(metered['Bill To Date'].iloc[4],legend_location,'o',color='cornflowerblue',linewidth=10) - - ax.annotate('Measured\nUsage', xy=(metered['Bill To Date'].iloc[4],legend_text_location), ha='center', \ - va='center',weight='bold', color='cornflowerblue') - - - # Legend - Adjusted Baseline - plt.axvline(post['Bill To Date'].iloc[10], color='brown', linestyle='-',\ - ymin=(post['baseline'].iloc[10]/y_max), - ymax=(1.35/1.5)) - - plt.plot(post['Bill To Date'].iloc[10],legend_location,'o',color='brown',linewidth=10) - - ax.annotate('Adjusted Baseline\nUsage', xy=(post['Bill To Date'].iloc[10],legend_text_location), ha='center', \ - va='center',weight='bold', color='brown') - - - - # Legend - Energy Savings - plt.axvline(post['Bill To Date'].iloc[4], color='green', linestyle='-',\ - ymin=((post['baseline'].iloc[4]+post['Usage'].iloc[4])/2/y_max), - ymax=(1.35/1.5)) - - plt.plot(post['Bill To Date'].iloc[4],legend_location,'o',color='green',linewidth=10) - - ax.annotate('Energy\nSavings', xy=(post['Bill To Date'].iloc[4],legend_text_location), ha='center', \ - va='center',weight='bold', color='green') - - - #plots - plt.plot(metered_x, metered_y, '-o',color='cornflowerblue',linewidth=3.5) - plt.plot(post_x, post_pred_y, '--', color='brown', alpha=0.8) - - ax.fill_between(post_x,\ - post_y, post_pred_y,\ - facecolor='mediumturquoise',\ - alpha=0.1, - edgecolor='b',\ - linewidth=0) - - plt.ylim([0,y_max]) - - - for spine in plt.gca().spines.values(): - spine.set_visible(False) - - - xmin, xmax = ax.get_xlim() - ymin, ymax = ax.get_ylim() - - fig = plt.gcf() - - # get width and height of axes object to compute - # matching arrowhead length and width - dps = fig.dpi_scale_trans.inverted() - bbox = ax.get_window_extent().transformed(dps) - width, height = bbox.width, bbox.height - - # manual arrowhead width and length - hw = 0.5/20.*(ymax-ymin) - hl = 0.5/20.*(xmax-xmin) - lw = 1 # axis line width - ohg = 0.1 # arrow overhang - - # compute matching arrowhead length and width - yhw = hw/(ymax-ymin)*(xmax-xmin)* height/width * 1.2 - yhl = hl/(xmax-xmin)*(ymax-ymin)* width/height *1.2 - - # y axis - ax.arrow(xmin, 0, 0, ymax-ymin, fc='k', ec='k', lw = lw, - head_width=yhw, head_length=yhl, overhang = ohg, - length_includes_head= True, clip_on = False) - # x axis - ax.arrow(xmin, 0., (xmax-xmin), 0., fc='k', ec='k', lw = lw, - head_width=hw, head_length=hl, overhang = ohg, - length_includes_head= True, clip_on = False) - - plt.ylabel('Consumption ({})'.format(unit)) - ax.set_title('{} Consumption of Baseline and Reporting Period'.format(name),\ - size=14, weight='bold',verticalalignment='bottom', alpha=0.8) - -# from matplotlib import rcParams -# rcParams['axes.titlepad'] = 50 - - return metered - - - @staticmethod - def ols_regression(X,y): - ''' - Return the summary stats for ordinary linear regression - ''' - - X2 = sm.add_constant(X) - est = sm.OLS(y, X2) - est2 = est.fit() - return est2.summary() -- GitLab From 9fe89f8cde7347adc6971726578f54255fdc87f3 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 31 Jul 2019 16:02:58 -0400 Subject: [PATCH 89/97] added m&v from jupyter which runs code locally now --- bpeng/mv/measurementVerification.py | 444 ++++++++++++++++++++++++++++ 1 file changed, 444 insertions(+) diff --git a/bpeng/mv/measurementVerification.py b/bpeng/mv/measurementVerification.py index 3ab95e3..b127799 100644 --- a/bpeng/mv/measurementVerification.py +++ b/bpeng/mv/measurementVerification.py @@ -704,3 +704,447 @@ class MeasurementVerification(): est = sm.OLS(y, X2) est2 = est.fit() return est2.summary() + + +class projection_baseline(): + + def __init__(self, baseline_bill_raw, reporting_bill_raw, weather): + + self.weather = weather + self.baseline_bill = None + self.reporting_bill = None + self.baseline_bill_raw = baseline_bill_raw + self.reporting_bill_raw = reporting_bill_raw + self.base_bd = None + self.regression_stat = None + self.hp = None + self.cp = None + + @staticmethod + def ols_regression(X,y): + ''' + Return the summary stats for ordinary linear regression + ''' + + X2 = sm.add_constant(X) + est = sm.OLS(y, X2) + est2 = est.fit() + return est2.summary() + + + def projection(self): + + # only change for the reports + + self.base_bd = BillDisaggregation(self.baseline_bill_raw, self.weather) + self.base_bd.optimize_setpoints(weather_related_usage='Heating') + + + report_bd = BillDisaggregation(self.reporting_bill_raw, self.weather) + report_bd.optimize_setpoints() + + self.hp = self.base_bd.heating_set_point + self.cp = self.base_bd.cooling_set_point + + baseline_bill = self.base_bd.processed_bill + baseline_bill['Unit Price'] = baseline_bill['Total Charge'] / baseline_bill['Usage'] + self.baseline_bill = baseline_bill + + self.reporting_bill = report_bd.processed_bill + #print(self.reporting_bill) + + + regr_method = self.base_bd.regression_method + + if regr_method == 0: + proj_bill = self.r0_proj(self.baseline_bill,self.reporting_bill) + if regr_method == 1: + proj_bill = self.r1_proj(self.baseline_bill,self.reporting_bill) + if regr_method == 2: + proj_bill = self.r2_proj(self.baseline_bill,self.reporting_bill) + + + return proj_bill + + def r0_proj(self, baseline_bill, reporting_bill): + + baseline_id = DateIdentifier(baseline_bill).identifier_matrix() + proj_baseline = reporting_bill + + proj_baseline['baseline'] = [\ + DateIdentifier(proj_baseline).match_identifier_for_billing_period(x, y, baseline_id) + for x,y in zip(proj_baseline['Bill From Date'],\ + proj_baseline['Bill To Date'])] + + return proj_baseline + + + def r1_proj(self, baseline_bill, reporting_bill): + + + X = MeasurementVerification.weather_demand(self.hp,self.cp,baseline_bill,self.weather) + y = np.array(baseline_bill['Usage'] / baseline_bill['Days In Bill']) + + self.regression_stat = projection_baseline.ols_regression(X,y) + + proj_baseline = reporting_bill + + regr_model = linear_model.LinearRegression() + regr_model.fit(X, y) + post_retrofit_daily_hddcdd= MeasurementVerification.weather_demand(self.hp,\ + self.cp,\ + proj_baseline,\ + self.weather) + + predict_baseline = regr_model.predict(post_retrofit_daily_hddcdd) * proj_baseline['Days In Bill'] + + #understand the difference of building + model_diff = (sum(regr_model.predict(X) * baseline_bill['Days In Bill']) - sum(baseline_bill['Usage']))/sum(regr_model.predict(X) * baseline_bill['Days In Bill']) + print('r1',model_diff) + proj_baseline['baseline'] = list(predict_baseline) + + return proj_baseline + + + def r2_proj(self, baseline_bill, reporting_bill): + + + + proj_baseline = reporting_bill + + regr = self.base_bd.summer_dhw(self.hp,baseline_bill) + + baseline_bill_with_baseline = regr[3] + baseline_bill_with_baseline['heating'] = baseline_bill_with_baseline['Usage'] - baseline_bill_with_baseline['dhw'] + daily_baseline = np.average(baseline_bill_with_baseline['dhw']/baseline_bill_with_baseline['Days In Bill']) + + y = baseline_bill_with_baseline['heating'] / baseline_bill_with_baseline['Days In Bill'] + X = regr[2] + + self.regression_stat = projection_baseline.ols_regression(X,y) + + regr_model = linear_model.LinearRegression() + regr_model.fit(X, y) + + post_retrofit_daily_hddcdd= MeasurementVerification.weather_demand(self.hp,\ + 0,\ + proj_baseline,\ + self.weather) + + X2 = (post_retrofit_daily_hddcdd[:,0]).reshape(-1,1) + + predict_baseline = regr_model.predict(X2) * proj_baseline['Days In Bill'] + + model_diff = (sum((regr_model.predict(X) + daily_baseline) * baseline_bill_with_baseline['Days In Bill'])\ + - sum(baseline_bill_with_baseline['Usage']))/sum((regr_model.predict(X) + daily_baseline) * baseline_bill_with_baseline['Days In Bill']) + print('r2', model_diff) + + period_baseline = list(proj_baseline['Days In Bill'] * daily_baseline) + proj_baseline['baseline'] = predict_baseline + period_baseline + + return proj_baseline + + +class BreakdownLongBill(): + + ''' + This class is to breakdown a bill which is abnormally long than the others + ''' + + def __init__(self, bill, weather): + self.bill = bill + self.weather = weather + self.bd = None + + + + def days_in_long_bill(self, days): + + interval = days // 30 + reminder = days % 30 + days_in_long_bill = [] + + if reminder > 15: + for intr in range(interval): days_in_long_bill.append(30) + days_in_long_bill.append(reminder) + if reminder <= 15: + for intr in range(interval-1): days_in_long_bill.append(30) + days_in_long_bill.append(reminder+30) + return days_in_long_bill + + def sub_billing_period_dates(self, billing_period_info): + + ''' + Return date ranges for each sub-billing period + + Args: + billing_period_info(series): one raw from the formatted bill + return: + long_bill_period(pd.DateFrame): same columns with formatted bill and drop temperature + + ''' + long_bill_period = pd.DataFrame(columns=['Bill From Date', 'Bill To Date','Usage', 'Days In Bill', + 'Total Charge', 'Unit Price']) + start_date = billing_period_info['Bill From Date'] + days_in_bill = self.days_in_long_bill(int(billing_period_info['Days In Bill'])) + + bill_from_dates = [] + bill_to_dates = [] + + for xx in range(len(days_in_bill)): + days = days_in_bill[xx] + bill_from_dates.append(start_date) + start_date = start_date + timedelta(days) + bill_to_dates.append(start_date) + + long_bill_period['Bill From Date'] = bill_from_dates + long_bill_period['Days In Bill'] = days_in_bill + long_bill_period['Bill To Date'] = bill_to_dates + + return long_bill_period + + def r0_long_bill_breakdown(self, sub_billing_period, billing_period_info): + ''' + breakdown the usage of a long bill when the usage is not related to weather + r0 indicate the regression method is 0 + + Args: + long_bill_period(pd.DataFrame): the output from function - sub_billing_period_dates + billing_period_info(series): one raw from the formatted bill + ''' + + daily_usage = billing_period_info['Usage'] / billing_period_info['Days In Bill'] + unit_price = billing_period_info['Total Charge'] / billing_period_info['Days In Bill'] + + sub_billing_period['Usage'] = sub_billing_period['Days In Bill'] * daily_usage + sub_billing_period['Total Charge'] = sub_billing_period['Days In Bill'] * unit_price + sub_billing_period['Unit Price'] = sub_billing_period['Total Charge']/sub_billing_period['Days In Bill'] + + return sub_billing_period + + + def r1_long_bill_breakdown(self, sub_billing_period, billing_period_info): + + ''' + breakdown the usage of a long bill when the regression method = 1 + r1 indicates the regression method is 1 + + ''' + + + sub_billing_period['temperature'] = sub_billing_period['temperature'] = [ + self.bd.bill_period_weather(x, y) + for x, y in zip(sub_billing_period['Bill From Date'], + sub_billing_period['Bill To Date']) + ] + + heating_setpoint = self.bd.heating_set_point + cooling_setpoint = self.bd.cooling_set_point + + hddcdd = MeasurementVerification.weather_demand(heating_setpoint,cooling_setpoint,\ + sub_billing_period,self.weather) + sub_billing_period['Usage'] = self.bd.regr_model.predict(hddcdd) * sub_billing_period['Days In Bill'] + unit_price = billing_period_info['Total Charge']/billing_period_info['Usage'] + sub_billing_period['Total Charge'] = sub_billing_period['Usage'] * unit_price + sub_billing_period['Unit Price'] = pd.Series([unit_price for x in range(len(sub_billing_period))]) + + return sub_billing_period + + def r2_long_bill_breakdown(self, sub_billing_period, billing_period_info): + + ''' + breakdown the usage of a long bill when the regression method = 2 + r2 indicates the regression method is 1 + + ''' + + + sub_billing_period['temperature'] = sub_billing_period['temperature'] = [ + self.bd.bill_period_weather(x, y) + for x, y in zip(sub_billing_period['Bill From Date'], + sub_billing_period['Bill To Date']) + ] + + bill = self.bd.processed_bill + bill['temperature'] = bill['temperature'] = [ + self.bd.bill_period_weather(x, y) + for x, y in zip(bill['Bill From Date'], + bill['Bill To Date']) + ] + + heating_setpoint = self.bd.heating_set_point + hdd = MeasurementVerification.weather_demand(heating_setpoint,0,\ + sub_billing_period,self.weather)[:,0].reshape(-1,1) + + + regr = self.bd.summer_dhw(heating_setpoint,bill) + + bill_with_baseline = regr[3] + daily_baseline = np.average(bill_with_baseline['dhw']/bill_with_baseline['Days In Bill']) + + sub_billing_period['heating_usage'] = self.bd.regr_model.predict(hdd) * sub_billing_period['Days In Bill'] + sub_billing_period['dhw'] = sub_billing_period['Days In Bill'] * daily_baseline + sub_billing_period['Usage'] = sub_billing_period['heating_usage'] + sub_billing_period['dhw'] + + unit_price = billing_period_info['Total Charge'] / billing_period_info['Usage'] + + sub_billing_period['Total Charge'] = sub_billing_period['Usage'] * unit_price + sub_billing_period['Unit Price'] = pd.Series([unit_price for x in range(len(sub_billing_period))]) + + sub_billing_period = sub_billing_period.drop('dhw',axis=1) + sub_billing_period = sub_billing_period.drop('heating_usage',axis=1) + + return sub_billing_period + + + def long_bill_breakdown(self): + ''' + breakdown the bills with irregular long billing period, return a formatted bill with sub-billing peirod for the long bill + + Args: + formatted_bill(pd.DataFrame) + Ruturn: + the broken down bill of bills with long billing peirod + + + ''' + self.bd = BillDisaggregation(self.bill, self.weather) + #TODO: Be careful about the inputs + self.bd.optimize_setpoints() + + formatted_bill = self.bd.formatted_bill + regression_method = self.bd.regression_method + regr_model = self.bd.regr_model + bill_quality = self.bd.bill_quality(self.bill) + #bill = formatted_bill.drop('temperature', axis=1) + new_bill = self.bill.copy().reset_index(drop=True) + + if any(i == 'long' for i in bill_quality.flag): + bill_quality_long = bill_quality[bill_quality['flag'] == 'long'] + + if len(bill_quality_long) > 0: + for x in range(len(bill_quality_long)): + + index_of_raw_bill = bill_quality_long['index'].iloc[x] + long_billing_period_info = formatted_bill.iloc[index_of_raw_bill] + days = long_billing_period_info['Days In Bill'] + total_usage = long_billing_period_info['Usage'] + days_breakdown_list = self.days_in_long_bill(days) + long_bill_breakdown_single_raw = self.sub_billing_period_dates(long_billing_period_info) + new_bill = new_bill.drop(index_of_raw_bill) + + if regression_method == 0: + long_bill_breakdown_single_raw = self.r0_long_bill_breakdown(long_bill_breakdown_single_raw,\ + long_billing_period_info) + + + if regression_method == 1: + long_bill_breakdown_single_raw = self.r1_long_bill_breakdown(long_bill_breakdown_single_raw,\ + long_billing_period_info) + + if regression_method == 2: + long_bill_breakdown_single_raw = self.r2_long_bill_breakdown(long_bill_breakdown_single_raw,\ + long_billing_period_info) + + adjustment_factor = total_usage/sum(long_bill_breakdown_single_raw['Usage']) + long_bill_breakdown_single_raw['Usage'] = long_bill_breakdown_single_raw['Usage'] * adjustment_factor + new_bill = new_bill.append(long_bill_breakdown_single_raw) + + new_bill = new_bill.sort('Bill From Date').reset_index(drop=True) + + else: + new_bill = bill + + return new_bill + + +class DateIdentifier(): + + ''' + In order to add [Month, date] identifer in addition to YYYY/MM/DD + Since the comparasion is happened between same day of different year in M&V process + ''' + + def __init__(self, bill): + self.bill = bill + + def identifier_for_date(self, bill_from_date, bill_to_date): + ''' + return a list of [month, date] for a date range + Args: + bill_from_date(timestamp): + bill_to_date(timestamp): + + Return: + identifier(list): a list of [month, date] of the dates in bewteem the bill_from_date and bill_to_date + ''' + + + days = (bill_to_date - bill_from_date).days + identifier = [] + + for d in range(days): + date = bill_from_date + timedelta(d) + date_id = [date.month, date.day] + identifier.append(date_id) + + return identifier + + def identifier_matrix(self): + ''' + Args: + bill(pd.DataFrame): + + Return: + matrix(pd.DataFrame): + identifier: [month, date], + unit_price: average unit price for a specific billing period + daily_usage: daily_usage for a specific date range + ''' + bill = self.bill + matrix = pd.DataFrame(columns = ['identifier','unit_price', 'daily_usage']) + + for bp in range(len(bill)): + from_date = bill['Bill From Date'].iloc[bp] + to_date = bill['Bill To Date'].iloc[bp] + unit_price = bill['Unit Price'].iloc[bp] + daily_usage = bill['Usage'].iloc[bp]/bill['Days In Bill'].iloc[bp] + matrix_temp = pd.DataFrame(columns = ['identifier','unit_price', 'daily_usage']) + matrix_temp['identifier'] = self.identifier_for_date(from_date, to_date) + matrix_temp['unit_price'] = pd.Series([unit_price for x in range(len(matrix_temp))]) + matrix_temp['daily_usage'] = pd.Series([daily_usage for x in range(len(matrix_temp.index))]) + matrix = matrix.append(matrix_temp, ignore_index=True) + + return matrix + + + def match_identifier_for_billing_period(self, from_date, to_date, base_identifier_matrix): + ''' + Given a date range and a identifier matirx, + Return the usage of period by matching the date identifier between the period to the corresponding identifier daily + usage + + Args: + from_date(timestamp) + to_date(timestamp) + base_identifier_matrix(pd.DataFrame): + identifier: month, date + daily_usage + unit_price + + Return: + period_usage + ''' + identifier_list = self.identifier_for_date(from_date, to_date) + + period_usage = 0 + for xx in range(len(identifier_list)): + identi = identifier_list[xx] + temp = base_identifier_matrix[base_identifier_matrix.identifier.apply(lambda x: x == identi)] + unit_price = temp.unit_price.mean() + daily_usage = temp.daily_usage.mean() + period_usage += daily_usage + + return period_usage + + -- GitLab From 993f6299edb81cf518dd3210dc87443027399828 Mon Sep 17 00:00:00 2001 From: Doris H Date: Wed, 31 Jul 2019 17:44:58 -0400 Subject: [PATCH 90/97] class to query bill&weather based on bldgID/Utili --- bpeng/mv/query.py | 63 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 bpeng/mv/query.py diff --git a/bpeng/mv/query.py b/bpeng/mv/query.py new file mode 100644 index 0000000..194aea8 --- /dev/null +++ b/bpeng/mv/query.py @@ -0,0 +1,63 @@ +from influxdb import InfluxDBClient +import datetime +import pandas as pd +import numpy as np +import psycopg2 + + +class bill_weather(): + + + def __init__(self, utility_id, building_id): + + self.utility_id = utility_id + self.building_id = building_id + self.weather = None + self.bill = None + self.df_utility = None + + def query_weather(self): + #import weather data, and raw bills + + user = 'engineering' + password = 'nPEc9Pz0iV' + dbname = 'weather' + host = '52.206.6.10' + port = 8086 + + influx_db = InfluxDBClient(host, port, user, password, dbname, ssl=True) + + query_string = "SELECT * from temperature WHERE interval='daily'" + daily_temperature = influx_db.query(query_string).get_points('temperature') + self.weather = pd.DataFrame(daily_temperature) + + def query_utility_bill(self): + hostname = 'utilitybillprod-rr.czgvwxaefxfj.us-east-1.rds.amazonaws.com' + username = 'blocpower' + password = 'Z50Fwgkfi0SsVaBz' + database = 'utility_bill' + + myConnection = psycopg2.connect( host=hostname, user=username, password=password, dbname=database ) + df_bill = pd.read_sql('SELECT * FROM public.bill', myConnection) + df_account = pd.read_sql('SELECT * FROM public.account', myConnection) + + df_utility = pd.read_sql('SELECT * FROM public.utility_type', myConnection) + self.df_utility = df_utility + df_account_selected = df_account[df_account['account_type'] == self.utility_id] + + + + end_uses = {'Miscellanous':1} + account = df_account_selected[df_account_selected['building_id'] == self.building_id] + id = account['id'].iloc[0] + new_bill = df_bill[df_bill['account_id'] == id].fillna(0) + new_bill['actual_total'] = new_bill['delivery_charge'] + new_bill['supply_charge'] + new_bill['esco_charge'] + new_bill['delivery_tax'] + new_bill = new_bill.reset_index(drop=True) + + bill = new_bill[['bill_from_date','bill_to_date','usage','actual_total']] + bill.columns = ['Bill From Date','Bill To Date','Usage','Total Charge'] + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill['Days In Bill'] = (bill['Bill To Date'] - bill['Bill From Date']).dt.days + + self.bill = bill -- GitLab From a1f1183c6a05b60b43f1414c5f86fc66508d06b2 Mon Sep 17 00:00:00 2001 From: Doris H Date: Thu, 8 Aug 2019 15:10:34 -0400 Subject: [PATCH 91/97] return the 'account id' of the queried bill --- bpeng/mv/query.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/bpeng/mv/query.py b/bpeng/mv/query.py index 194aea8..04e6d59 100644 --- a/bpeng/mv/query.py +++ b/bpeng/mv/query.py @@ -1,12 +1,10 @@ -from influxdb import InfluxDBClient -import datetime import pandas as pd -import numpy as np import psycopg2 +from influxdb import InfluxDBClient class bill_weather(): - + # fetch weather data, and raw utility from database def __init__(self, utility_id, building_id): @@ -15,9 +13,9 @@ class bill_weather(): self.weather = None self.bill = None self.df_utility = None + self.account_id = None def query_weather(self): - #import weather data, and raw bills user = 'engineering' password = 'nPEc9Pz0iV' @@ -26,38 +24,41 @@ class bill_weather(): port = 8086 influx_db = InfluxDBClient(host, port, user, password, dbname, ssl=True) - query_string = "SELECT * from temperature WHERE interval='daily'" daily_temperature = influx_db.query(query_string).get_points('temperature') self.weather = pd.DataFrame(daily_temperature) + def query_utility_bill(self): hostname = 'utilitybillprod-rr.czgvwxaefxfj.us-east-1.rds.amazonaws.com' username = 'blocpower' password = 'Z50Fwgkfi0SsVaBz' database = 'utility_bill' - myConnection = psycopg2.connect( host=hostname, user=username, password=password, dbname=database ) + myConnection = psycopg2.connect(host=hostname, user=username, password=password, dbname=database) df_bill = pd.read_sql('SELECT * FROM public.bill', myConnection) df_account = pd.read_sql('SELECT * FROM public.account', myConnection) - df_utility = pd.read_sql('SELECT * FROM public.utility_type', myConnection) self.df_utility = df_utility - df_account_selected = df_account[df_account['account_type'] == self.utility_id] - - - end_uses = {'Miscellanous':1} + df_account_selected = df_account[df_account['account_type'] == self.utility_id] + # return the utility account information based on the 'building id' and 'utility id' + # it may return multiple records if there is the multiple accounts for the same utility for the same building account = df_account_selected[df_account_selected['building_id'] == self.building_id] + + # current code return the first records of the utility of the building for a specific utility type id = account['id'].iloc[0] + self.account_id = id new_bill = df_bill[df_bill['account_id'] == id].fillna(0) - new_bill['actual_total'] = new_bill['delivery_charge'] + new_bill['supply_charge'] + new_bill['esco_charge'] + new_bill['delivery_tax'] + new_bill['actual_total'] = new_bill['delivery_charge'] + new_bill['supply_charge'] + new_bill['esco_charge'] + \ + new_bill['delivery_tax'] new_bill = new_bill.reset_index(drop=True) - bill = new_bill[['bill_from_date','bill_to_date','usage','actual_total']] - bill.columns = ['Bill From Date','Bill To Date','Usage','Total Charge'] + bill = new_bill[['bill_from_date', 'bill_to_date', 'usage', 'actual_total']] + bill.columns = ['Bill From Date', 'Bill To Date', 'Usage', 'Total Charge'] bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) bill['Days In Bill'] = (bill['Bill To Date'] - bill['Bill From Date']).dt.days self.bill = bill + -- GitLab From 4fe7a1b9fa9b6c41af02801522f038dff2290bae Mon Sep 17 00:00:00 2001 From: Doris H Date: Thu, 8 Aug 2019 16:23:44 -0400 Subject: [PATCH 92/97] based on master - run the code --- bpeng/bill/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bpeng/bill/__init__.py b/bpeng/bill/__init__.py index 890b382..fca9e1b 100644 --- a/bpeng/bill/__init__.py +++ b/bpeng/bill/__init__.py @@ -1 +1 @@ -from .test import BillDisaggregationDriver +from .disaggregate import BillDisaggregation -- GitLab From 78ae964f0523508ae00dab9c8405d42cbe09370a Mon Sep 17 00:00:00 2001 From: Doris H Date: Thu, 8 Aug 2019 16:24:09 -0400 Subject: [PATCH 93/97] return addtional outputs to database --- bpeng/mv/measurementVerification.py | 71 +++++++++++++---------------- 1 file changed, 32 insertions(+), 39 deletions(-) diff --git a/bpeng/mv/measurementVerification.py b/bpeng/mv/measurementVerification.py index b127799..78cee12 100644 --- a/bpeng/mv/measurementVerification.py +++ b/bpeng/mv/measurementVerification.py @@ -1,4 +1,8 @@ +''' +This python class is to calculate energy costs/savings for implemented energy conservasion measures using IPMVP Option C. +Author: Doris Han +''' import matplotlib as mpl mpl.get_backend() @@ -19,6 +23,7 @@ from sklearn import linear_model from scipy.optimize import minimize from scipy import stats from dateutil import relativedelta +from bpeng.bill.awesome_disaggregate import BillDisaggregation class MeasurementVerification(): @@ -26,10 +31,8 @@ class MeasurementVerification(): ''' The class to calcuate energy savings through utility bill and operational information adopts IPMVP option C - ''' - def __init__(self, bill, weather, thermal_comf, earlest_retrofit_date, latest_retrofit_date,\ weather_relate_usage_type, occupancy, utility_type): @@ -397,50 +400,43 @@ class MeasurementVerification(): bill_bd = BillDisaggregation(self.original_bill, self.weather) bill_bd.optimize_setpoints() bill = bill_bd.formatted_bill - - pre_bill, post_bill = self.split_bill(bill) - #raw bill of pre-retrofit period + # raw bill of pre-retrofit period self.pre_bill = pre_bill - assert sum(pre_bill['Days In Bill']) > 365, 'Utility Data is less than 12 months pre-retrofit.' - #baseline bill + # baseline bill self.baseline = self.baseline_bill(pre_bill) - #post_retrofit_bill - simple breakdown the post retrofit bill incase it is too long + # post_retrofit_bill - simple breakdown the post retrofit bill incase it is too long self.post_bill = BreakdownLongBill(post_bill, self.weather).long_bill_breakdown() - #calcuate the all prejected baseline for all the post retrofit billing period + # calcuate the all prejected baseline for all the post retrofit billing period post = projection_baseline(self.baseline, self.post_bill, self.weather) self.projection_post_bill = post.projection() self.setpoints = {'heating': post.hp, 'cooling': post.cp} - #calculate the projected baseline for baseline period - + # calculate the projected baseline for baseline period baseline_usage_baseline_period = projection_baseline(self.baseline, self.baseline, self.weather) self.projection_baseline_bill = baseline_usage_baseline_period.projection() - - #calculate the projected baseline for reporting period + # calculate the projected baseline for reporting period self.reporting = self.reporting_bill(self.post_bill) report = projection_baseline(self.baseline, self.reporting, self.weather) self.projection_reporting_bill = report.projection() self.regression_stat = report.regression_stat - - - self.first_year_saving = MeasurementVerification.energy_savings(self.projection_reporting_bill) - xx = self.plot_result(self.projection_baseline_bill, self.projection_reporting_bill) -# print('Baseline Period Regression Stats:'+'\n', self.regression_stat) -# print('Frist Year Savings:', pd.DataFrame.from_dict(self., orient='index')) + self.first_year_saving['baseline_metered_energy_usage'] = sum(self.baseline['Usage']) + self.first_year_saving['baseline_from_date'] = self.baseline['Bill From Date'].iloc[0] + self.first_year_saving['baseline_end_date'] = self.baseline['Bill To Date'].iloc[-1] + # xx = self.plot_result(self.projection_baseline_bill, self.projection_reporting_bill) @staticmethod def energy_savings(bill_with_baseline): ''' - Return the energy saving for bills with baseline usage + Return the energy saving for bills with projected baseline usage Args: bill_with_baseline(pd.DataFrame): @@ -471,19 +467,22 @@ class MeasurementVerification(): energy_savings = baseline_usage - metered_usage total_costs = sum(bill_with_baseline['Total Charge']) unit_price = total_costs / metered_usage - saving_percentage = energy_savings/ baseline_usage - dollar_savings = energy_savings * unit_price - - savings = {'Measured Energy Usage for Reporting Period': round(metered_usage,0), - 'Baseline Projection for Reporting Period': round(baseline_usage,0), - 'Annual Energy Savings': round(energy_savings,0), - 'Energy Reductaion Percentage': saving_percentage, - 'Costs Avoidance': round(dollar_savings,0), - 'Annual Energy Costs': round(total_costs,0)} + saving_percentage = energy_savings / baseline_usage + dollar_savings = energy_savings * unit_price + + savings = { + 'report_metered_energy_usage': round(metered_usage, 0), + 'report_baseline_projection_energy_usage': round(baseline_usage, 0), + 'report_total_energy_costs': round(total_costs, 0), + 'annual_energy_usage_avoidance': round(energy_savings, 0), + 'annual_energy_cost_avoidance': round(dollar_savings, 0), + 'first_year_energy_reduction_percentage': saving_percentage, + 'report_from_date': bill_with_baseline['Bill From Date'].iloc[0], + 'report_end_date': bill_with_baseline['Bill To Date'].iloc[-1] + } return savings - def return_utility_name(self, utility_type): if self.utility_type == 1: @@ -534,7 +533,7 @@ class MeasurementVerification(): pre_y = pre['Usage'] post_x = post['Bill To Date'].values post_pred_y = post['baseline'] - post_y=post['Usage'] + post_y = post['Usage'] y_max = max(pre_y) * 1.5 arrow_location = max(pre_y) * 0.10 @@ -558,9 +557,7 @@ class MeasurementVerification(): va='center',weight='bold') # Reporting Period - print('post retrofit', post) reporting_start_date = pd.to_datetime(post['Bill From Date'].iloc[1]) - reporting_end_date = pd.to_datetime(post['Bill To Date'].iloc[-1]) plt.axvline(reporting_start_date, color='darkgrey', linestyle='--') @@ -635,7 +632,6 @@ class MeasurementVerification(): ax.annotate('Energy\nSavings', xy=(post['Bill To Date'].iloc[4],legend_text_location), ha='center', \ va='center',weight='bold', color='green') - #plots plt.plot(metered_x, metered_y, '-o',color='cornflowerblue',linewidth=3.5) plt.plot(post_x, post_pred_y, '--', color='brown', alpha=0.8) @@ -649,11 +645,9 @@ class MeasurementVerification(): plt.ylim([0,y_max]) - for spine in plt.gca().spines.values(): spine.set_visible(False) - xmin, xmax = ax.get_xlim() ymin, ymax = ax.get_ylim() @@ -699,7 +693,6 @@ class MeasurementVerification(): ''' Return the summary stats for ordinary linear regression ''' - X2 = sm.add_constant(X) est = sm.OLS(y, X2) est2 = est.fit() @@ -800,7 +793,7 @@ class projection_baseline(): #understand the difference of building model_diff = (sum(regr_model.predict(X) * baseline_bill['Days In Bill']) - sum(baseline_bill['Usage']))/sum(regr_model.predict(X) * baseline_bill['Days In Bill']) - print('r1',model_diff) + #print('r1', model_diff) proj_baseline['baseline'] = list(predict_baseline) return proj_baseline @@ -837,7 +830,7 @@ class projection_baseline(): model_diff = (sum((regr_model.predict(X) + daily_baseline) * baseline_bill_with_baseline['Days In Bill'])\ - sum(baseline_bill_with_baseline['Usage']))/sum((regr_model.predict(X) + daily_baseline) * baseline_bill_with_baseline['Days In Bill']) - print('r2', model_diff) + #print('r2', model_diff) period_baseline = list(proj_baseline['Days In Bill'] * daily_baseline) proj_baseline['baseline'] = predict_baseline + period_baseline -- GitLab From 811b5f48660bea90deb386eb2f02fd9d7627bc6b Mon Sep 17 00:00:00 2001 From: Aizizi Yigaimu Date: Thu, 8 Aug 2019 17:56:48 -0400 Subject: [PATCH 94/97] Updates after testing --- bpeng/bill/awesome_disaggregate.py | 2 +- bpeng/mv/measurementVerification.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index a1bccf2..144e0dd 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -940,7 +940,7 @@ class BillDisaggregation(): monthly_output = monthly_output_table[['Month', 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Heating Usage', 'Cooling Usage', 'Other Usage', 'HDD', 'CDD']] - monthly_output = monthly_output.sort('Bill From Date').reset_index(drop=True) + monthly_output = monthly_output.sort_values('Bill From Date').reset_index(drop=True) return monthly_output def non_weahter_related_breakdown(self, end_uses, monthly_output_table): diff --git a/bpeng/mv/measurementVerification.py b/bpeng/mv/measurementVerification.py index 78cee12..afdce63 100644 --- a/bpeng/mv/measurementVerification.py +++ b/bpeng/mv/measurementVerification.py @@ -415,9 +415,15 @@ class MeasurementVerification(): # calcuate the all prejected baseline for all the post retrofit billing period post = projection_baseline(self.baseline, self.post_bill, self.weather) self.projection_post_bill = post.projection() + self.setpoints = {'heating': post.hp, 'cooling': post.cp} + if self.setpoints['cooling'] is np.NaN: + self.setpoints['cooling'] = None + if self.setpoints['heating'] is np.NaN: + self.setpoints['heating'] = None + # calculate the projected baseline for baseline period baseline_usage_baseline_period = projection_baseline(self.baseline, self.baseline, self.weather) self.projection_baseline_bill = baseline_usage_baseline_period.projection() @@ -1043,7 +1049,7 @@ class BreakdownLongBill(): long_bill_breakdown_single_raw['Usage'] = long_bill_breakdown_single_raw['Usage'] * adjustment_factor new_bill = new_bill.append(long_bill_breakdown_single_raw) - new_bill = new_bill.sort('Bill From Date').reset_index(drop=True) + new_bill = new_bill.sort_values('Bill From Date').reset_index(drop=True) else: new_bill = bill -- GitLab From 8d5f8fa25b998572522280bbb3fb09a4516b2eb6 Mon Sep 17 00:00:00 2001 From: Doris H Date: Fri, 9 Aug 2019 14:04:15 -0400 Subject: [PATCH 95/97] resolve billing period issue --- bpeng/bill/awesome_disaggregate.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index 144e0dd..b1f2c0e 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -1091,7 +1091,7 @@ class BillDisaggregation(): Return the index of the row of raw bill contains the bill date from a normalized bill """ for index, bill in self.formatted_bill.iterrows(): - if bill['Bill From Date'] <= norm_bill_date < bill['Bill To Date']: + if bill['Bill From Date'] <= norm_bill_date <= bill['Bill To Date']: return index return None @@ -1112,17 +1112,26 @@ class BillDisaggregation(): """ weights = [] total_days = [] + for ind in range(len(index_numdays)): unit_price = self.formatted_bill['Unit Price'][int(index_numdays[ind]['index'])] days_in_that_period = int(index_numdays[ind]['num_days']) weights.append(unit_price * days_in_that_period) total_days.append(days_in_that_period) weighted_unit_price = sum(weights)/sum(total_days) + return weighted_unit_price def find_bills_in_raw(self, norm_bill_from, norm_bill_to): """ Return the index / number of days in each raw bill billing period for a normalized billing period + Arg: + norm_bill_from: first day of a month (normalized bill) + norm_bill_to: last day of a month (normalized bill) + return: + Results(Dic): + 'index': the index of the formatted billing period contains 'norm_bill_from' + 'num_days': # of days of the normalized billing period contains in a specific formatted billing period """ norm_bill_days = (norm_bill_to - norm_bill_from).days @@ -1153,9 +1162,9 @@ class BillDisaggregation(): calculate the unit price for each nomralized billing period """ normalized_unit_price = [] - for m in range(len(mbill)): - from_date = mbill['Bill From Date'].iloc[m] - to_date = mbill['Bill To Date'].iloc[m] + for month in range(len(mbill)): + from_date = mbill['Bill From Date'].iloc[month] + to_date = mbill['Bill To Date'].iloc[month] index_numdays = self.find_bills_in_raw(from_date, to_date) weighted_unit_price_for_this_month = self.weighted_unit_price(index_numdays) normalized_unit_price.append(weighted_unit_price_for_this_month) -- GitLab From f369e05be50a4fe445d5713d930879048bc19712 Mon Sep 17 00:00:00 2001 From: Aizizi Yigaimu Date: Mon, 26 Aug 2019 17:29:46 -0400 Subject: [PATCH 96/97] change __init__ --- bpeng/bill/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bpeng/bill/__init__.py b/bpeng/bill/__init__.py index 890b382..fca9e1b 100644 --- a/bpeng/bill/__init__.py +++ b/bpeng/bill/__init__.py @@ -1 +1 @@ -from .test import BillDisaggregationDriver +from .disaggregate import BillDisaggregation -- GitLab From e935774278195de6e4b87d8924252909ee4e2cb1 Mon Sep 17 00:00:00 2001 From: Aizizi Yigaimu Date: Thu, 29 Aug 2019 15:08:07 -0400 Subject: [PATCH 97/97] Fix empty bill scenario --- bpeng/mv/query.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/bpeng/mv/query.py b/bpeng/mv/query.py index 04e6d59..ff4e6aa 100644 --- a/bpeng/mv/query.py +++ b/bpeng/mv/query.py @@ -47,18 +47,18 @@ class bill_weather(): account = df_account_selected[df_account_selected['building_id'] == self.building_id] # current code return the first records of the utility of the building for a specific utility type - id = account['id'].iloc[0] - self.account_id = id - new_bill = df_bill[df_bill['account_id'] == id].fillna(0) - new_bill['actual_total'] = new_bill['delivery_charge'] + new_bill['supply_charge'] + new_bill['esco_charge'] + \ - new_bill['delivery_tax'] - new_bill = new_bill.reset_index(drop=True) + if not account.empty: + id = account['id'].iloc[0] + self.account_id = id + new_bill = df_bill[df_bill['account_id'] == id].fillna(0) + new_bill['actual_total'] = new_bill['delivery_charge'] + new_bill['supply_charge'] + new_bill['esco_charge'] + \ + new_bill['delivery_tax'] + new_bill = new_bill.reset_index(drop=True) - bill = new_bill[['bill_from_date', 'bill_to_date', 'usage', 'actual_total']] - bill.columns = ['Bill From Date', 'Bill To Date', 'Usage', 'Total Charge'] - bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) - bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) - bill['Days In Bill'] = (bill['Bill To Date'] - bill['Bill From Date']).dt.days - - self.bill = bill + bill = new_bill[['bill_from_date', 'bill_to_date', 'usage', 'actual_total']] + bill.columns = ['Bill From Date', 'Bill To Date', 'Usage', 'Total Charge'] + bill['Bill To Date'] = pd.to_datetime(bill['Bill To Date']) + bill['Bill From Date'] = pd.to_datetime(bill['Bill From Date']) + bill['Days In Bill'] = (bill['Bill To Date'] - bill['Bill From Date']).dt.days + self.bill = bill -- GitLab