From f7f15b8bb3a94f913d549845128de385cb8305a5 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 3 Nov 2017 14:44:22 -0400 Subject: [PATCH 01/19] Add awesome disaggregation --- bpeng/bill/awesome_disaggregate.py | 603 +++++++++++++++++++++++++++++ bpeng/bill/disaggregate.py | 2 +- 2 files changed, 604 insertions(+), 1 deletion(-) create mode 100644 bpeng/bill/awesome_disaggregate.py diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py new file mode 100644 index 0000000..95a047b --- /dev/null +++ b/bpeng/bill/awesome_disaggregate.py @@ -0,0 +1,603 @@ +# ''' this file calcuate bill disagregation for multifamily buildings''' + +from datetime import timedelta +import pandas as pd +import numpy as np + +from sklearn import linear_model +from scipy.optimize import minimize + +import matplotlib.pyplot as plt +import warnings +warnings.simplefilter('ignore') + +class BillDisaggregation(): + """ + Class for Bill Disaggregation + + Args: + + bill (pd.DataFrame): raw bill (from parsing) (File like object) + First row (besides column name) are the names + 'Bill From Date', 'Bill To Date', 'Days in Bill', 'Usage' must be in names + daily_temp (pd.DataFrame): daily temperature (File like object) + + Attributes: + + output: list + total heating load + heating load for the first year (first 12 bill periods) + heating load of each month + (return NaN if R-squared is low) + """ + # pylint: disable=too-many-instance-attributes + def __init__(self, bill, raw_daily_temp): + + #self.account_info = account_info + self.bill = bill + self.raw_daily_temp = raw_daily_temp + self.processed_bill = None + self.daily_temp = None + self.usage = None + self.regression_method = None + self.heating_comsuption_pred = None + self.cooling_comsuption_pred = None + self.others_comsuption_pred = None + self.regr_model = None + self.r_squared_of_fit = None +# self.heating_load_proportion = None +# self.cooling_load_proportion = None + self.set_points = None + self.days_in_12_bills = None + self.output_table = None + self.output_monthly = None + + def weather_cleaning(self, raw_daily_temp): + ''' + format the daily temperature data from influx query + + ''' + raw_daily_temp.rename(columns={'time':'date','value':'temperature'}, inplace = True) + raw_daily_temp['date'] = pd.to_datetime(raw_daily_temp['date']) + raw_daily_temp['date'] = raw_daily_temp['date'].apply(lambda x: '/'.join([str(x.date().month),str(x.date().day), + str(x.date().year)])) + daily_temp = raw_daily_temp + self.daily_temp = daily_temp + + return self.daily_temp + +# def main(self,raw_daily_temp): +# self.daily_temp = self.weather_cleaning(raw_daily_temp) +# print(self.daily_temp.head()) + + def bill_period_weather(self, bill_from_date, bill_end_date): + ''' + get the temperature date between two data, return a list + ''' + #print(self.daily_temp.head()) + end_date_id = self.daily_temp[self.daily_temp.date == bill_end_date].index[0] + start_date_id = self.daily_temp[self.daily_temp.date == bill_from_date].index[0] + return list(self.daily_temp['temperature'][start_date_id:end_date_id]) + + @staticmethod + def cdd(set_point, weather): + ''' + cooling degree day: + Assumption: + cooling setting point shall always higher than 55 F, which is the trigger temperature of the heating system + 存疑 + ''' + + if set_point > 50: + if set_point < weather: + return weather - set_point + return 0 + + @staticmethod + + def hdd(set_point, weather): + ''' + HDD for everyday: + Assumption: + only if the outdoor temperature drop below 60'F, then the heating system may be able to be turn on + + ''' + if (weather < 60) & (set_point > weather): + hdd = set_point - weather + else: + hdd = 0 + + + return hdd + + @staticmethod + def threshold(data, set_point): + '''threshold, if the data is less the set_point, return 0''' + if data <= set_point: + data = 0 + return data + + + @staticmethod + def anomaly_point(alist,thresholds): + + ''' + if there is a certain number in a list is too large or small, return its index location + if false, the number at that location is normal + if true, the number is an outliner should be carefully taken care + ''' + amean = [] + for x in range(len(alist)): + temp = np.hstack((alist[:(x)],alist[(x+1):])) + amean.append(temp.mean()) + index = [] + for x in range(len(alist)): + temp1 = abs(alist[x] - np.array(alist).mean())/np.array(alist).mean() + index.append(temp1 < thresholds) + return index + + # the following functions aim to clean the bill and understand if the bill is in a good shape for analysis + + def bill_formating(self, bill): + + ''' + step 1 to clean the bill: + 1. format the raw bills + 2. drop NAN / duplicates + + ''' + bill_copy = bill.copy() + bill_copy['Bill From Date'] = pd.to_datetime(bill_copy['Bill From Date']) + bill_copy['Bill From Date'] = bill_copy['Bill From Date'].apply(lambda x: '/'.join([str(x.date().month),str(x.date().day),\ + str(x.date().year)])) + bill_copy['Bill To Date'] = pd.to_datetime(bill_copy['Bill To Date']) + bill_copy['Bill To Date'] = bill_copy['Bill To Date'].apply(lambda x: '/'.join([str(x.date().month),str(x.date().day),\ + str(x.date().year)])) + + bill_copy = bill_copy[['Bill From Date','Bill To Date','Usage','Days In Bill']] + bill_copy = bill_copy.dropna() + bill_copy = bill_copy.drop_duplicates() + bill_copy = bill_copy.reset_index(drop = True) + + if np.array(bill_copy.shape)[0] == np.array(bill.shape)[0]: + bill_shape_change = 'False' + bill_shape_change = 'True' + + bill_formatted = bill_copy + + return bill_formatted, bill_shape_change + + + def bill_quality(self, bill_formatted): + + ''' + step 2: + check bill quality to see if there is billing period that is too short or too long; + + return a table of the index of abnormal billing period + + ''' + + bill = bill_formatted + bill = pd.DataFrame(bill) + + total_rows = np.array(bill.shape)[0] + timescale = pd.to_datetime(bill['Bill To Date'].iloc[total_rows-1])\ + - pd.to_datetime(bill['Bill From Date'].iloc[0]) + total_days_in_bill = timescale.days + days_in_bill = np.array(bill['Days In Bill']) + + #abnormal days in bill will return False + days_quality_index = BillDisaggregation.anomaly_point(list(days_in_bill), 0.5) + days_abn_index = [] + for x in range(len(days_quality_index)): + if days_quality_index[x] == False: + days_abn_index.append(x) + + bill_quality = pd.DataFrame(data = days_abn_index, columns = ['index'] ) + + flag = [] + for xx in range(len(days_abn_index)): + point_index = days_abn_index[xx-1] + if days_in_bill[point_index] < days_in_bill.mean(): + flag.append('short') + elif days_in_bill[point_index] >= days_in_bill.mean(): + flag.append('long') + + bill_quality['flag'] = np.array(flag) + + #length check - ensure the bill are covering more than a year - also need to check the regularity of the bills + if total_rows >= 12 or total_days_in_bill>= 365: + bill_length_flag = 1 + else: + bill_length_flag = 0 + print('Billing period did not cover a full year' ) + bill_quality['bill_length_flag'] = pd.Series(bill_length_flag for x in range(len(bill_quality.index))) + + return bill_quality + + def short_bill_consolidate(self, bill_formatted, bill_quality): + ''' + step 3: consolidation of the bills that are too short compare to others + bill_formatted: df. drop nan/ dupicates + bill quality: df + NOTE: error + ''' + bill_quality_short = bill_quality[bill_quality['flag'] == 'short'] + bill_consi = bill_formatted + #consolidate the billing period that is too short compare to others +# print(bill_formatted) + +# print(bill_quality) + + for xxx in range(len(bill_quality_short)): + + if bill_quality['flag'].iloc[xxx] == 'short': + row_index = bill_quality_short['index'].iloc[xxx] + + if row_index != 0 & row_index != bill_consi.index[-1]: + + if bill_consi['Days In Bill'][int(row_index -1)] <= bill_consi['Days In Bill'][int(row_index +1)]: + + bill_consi['Bill To Date'][int(row_index - 1)] = bill_consi['Bill To Date'][int(row_index)] + bill_consi['Usage'][int(row_index - 1)] = bill_consi['Usage'][int(row_index - 1)] + bill_consi['Usage'][row_index] + bill_consi['Days In Bill'][int(row_index - 1)] = bill_consi['Days In Bill'][int(row_index - 1)] + bill_consi['Days In Bill'][row_index] + else: + bill_consi['Bill From Date'][int(row_index + 1)] = bill_consi['Bill To Date'][int(row_index)] + bill_consi['Usage'][int(row_index + 1)] = bill_consi['Usage'][int(row_index + 1)] + bill_consi['Usage'][row_index] + bill_consi['Days In Bill'][int(row_index + 1)] = bill_consi['Days In Bill'][int(row_index + 1)] + bill_consi['Days In Bill'][row_index] + + if row_index == 0: + bill_consi['Bill From Date'][1] = bill_consi['Bill From Date'][0] + bill_consi['Usage'][1] = bill_consi['Usage'][0] + bill_consi['Usage'][1] + bill_consi['Days In Bill'][1] = bill_consi['Days In Bill'][0] + bill_consi['Days In Bill'][1] + + if row_index == bill_consi.index[-1]: + bill_consi['Bill To Date'][-2] = bill_consi['Bill To Date'][-1] + bill_consi['Usage'][-2] = bill_consi['Usage'][-2] + bill_consi['Usage'][-1] + bill_consi['Days In Bill'][-2] = bill_consi['Days In Bill'][-1] + bill_consi['Days In Bill'][-2] + + #drop the bills that with a billing period that is too short - which will change the index of the dataframe,I think it should + #taken care with solutions of 'the billing period that is toooo long + # or apply bill quality check again to identify the new index of the peroiod that is too long + + if len(bill_quality_short) != 0: + bill_consi = bill_consi.drop(bill_consi.index[list(bill_quality_short['index'])]) + + #bill_consi = bill_consi.reset_index(inplace = True) + bill_consi = bill_consi.reset_index(drop = False) + + return bill_consi + + def regression_1(self, hp, cp, processed_bill): + + bill = processed_bill.copy() + consumption = np.array(bill['Usage']/ bill['Days In Bill']) + ahdd = [\ + list(BillDisaggregation.hdd(hp, xx) for xx in x) for x in bill['temperature']\ + ] + acdd = [list(BillDisaggregation.cdd(cp, xx) for xx in x) for x in bill['temperature']] + daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) + daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) + + # set threshold that if the HDD/CDD is lower than a certain value, we set it to 0 + daily_hdd1 = np.array([BillDisaggregation.threshold(daily_hdd[x], 0.1) for x in range(len(daily_hdd))]) + daily_cdd1 = np.array([BillDisaggregation.threshold(daily_cdd[x], 0.1) for x in range(len(daily_cdd))]) + + regression_temp = np.array([daily_hdd1,daily_cdd1]).T + + regr_model = linear_model.LinearRegression() + regr_model.fit(regression_temp, consumption) + score = regr_model.score(regression_temp, consumption) + + return regr_model, score, regression_temp + + + def summer_dhw(self, hp, abill): + + ''' + this function shall count the summer month gas usage as dhw usage, and use the dhw usage as the baseline: + this function does not count for cooling consumption + NOTE: USUALLY ERROR + ''' + + bill = abill.copy() + ahdd = np.array((BillDisaggregation.hdd(hp, xx) for xx in x) for x in bill['temperature']) + + print(bill['temperature']) + + monthly_hdd = [np.sum(ahdd[x]) for x in range(len(ahdd))] + + print('monthly', monthly_hdd) + daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) + print('daily_hdd', daily_hdd) + + #daily dhw usage + bill['dhw'] = bill['Usage'] / bill['Days In Bill'] * (daily_hdd <= 0.1) + dhw_only_consumption = bill[bill['dhw'] != 0]['dhw'] #array + dhw_quality_index = BillDisaggregation.anomaly_point(list(dhw_only_consumption), 0.5) #list + + #calculated the average consumption if the monthly is not outliner + + dhw_only_consumption_checked = [] + + for xx in range(len(dhw_only_consumption)): + if dhw_quality_index[xx] == True: + dhw_only_consumption_checked.append(list(dhw_only_consumption)[xx]) + + daily_dhw = np.mean(dhw_only_consumption_checked) + #print('daily_dhw',daily_dhw) + + #print('daily_dhw', daily_dhw) + + bill['dhw'] = daily_dhw * bill['Days In Bill'] + #print(bill['dhw']) + + regression_temp = monthly_hdd.reshape(-1,1) + consumption = bill['Usage'] - bill['dhw'] + + #print( bill['Usage']) + + regr_model = linear_model.LinearRegression() + regr_model.fit(regression_temp, consumption) + score = regr_model.score(regression_temp, consumption) + + return regr_model, score, regression_temp, bill + + + def main(self, usage = 'Unknown'): + + self.daily_temp = self.weather_cleaning(self.raw_daily_temp) + quality = self.bill_quality(self.bill) + formatted_bill, shape = self.bill_formating(self.bill) + + if any(i == 'short' for i in quality.flag): + #any(quality.flag.astype(str) == 'long') + self.processed_bill = self.short_bill_consolidate(formatted_bill,quality) + else: + self.processed_bill = formatted_bill + + self.processed_bill['temperature'] = [\ + self.bill_period_weather(x,y) \ + for x, y in zip(self.processed_bill['Bill From Date'], \ + self.processed_bill['Bill To Date']) + ] + #account_type = 2 + regression_method = 1 + + if usage == 'Unknown': + opt = minimize(lambda x: -self.regression_1(x[0],x[1], self.processed_bill)[1], + (65,65), + method='nelder-mead', + options={'xtol': 1e-2, 'disp': False}) + + regr = self.regression_1(opt.x[0],opt.x[1],self.processed_bill) + regr_model = regr[0] + heating_coef, cooling_coef = regr_model.coef_ + #print('test') + + if (heating_coef > 0) & (cooling_coef <= 0): + usage = 'Heating' + elif (heating_coef <= 0) & (cooling_coef > 0): + usage = 'Cooling' + elif (heating_coef <= 0) & (cooling_coef <= 0): + usage = 'Both not' + elif (heating_coef >= 0) & (cooling_coef >= 0): + usage = 'Both' + + if usage == 'Both': + opt = minimize(lambda x: -self.regression_1(x[0],x[1], self.processed_bill)[1], + (65,65), + method='nelder-mead', + options={'xtol': 1e-2, 'disp': False}) + + regr = self.regression_1(opt.x[0],opt.x[1],self.processed_bill) + regr_model = regr[0] + heating_coef, cooling_coef = regr_model.coef_ + hddcdd = regr[2] + + if (heating_coef > 0) & (cooling_coef <= 0): + usage = 'heating' + cooling_coef = 0 + elif (heating_coef <= 0) & (cooling_coef > 0): + usage = 'cooling' + heating_coef = 0 + elif (heating_coef <= 0) & (cooling_coef <= 0): + usage = 'both not' + heating_coef = 0 + cooling_coef = 0 + + if usage == 'Heating': + opt_1 = minimize(lambda x: -self.regression_1(x,300, self.processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, 'disp': False}) + + opt_2 = minimize(lambda x: -self.summer_dhw(x,self.processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, 'disp': False}) + print(opt_2.fun) + + if (-opt_2.fun > 0.9) and (round(opt_2.x[0]) in range(65, 90)): + opt = opt_2 + regr = self.summer_dhw(opt_2.x[0],self.processed_bill) + regr_model = regr[0] + hddcdd = regr[2] + regression_method = 2 + #print(opt_2.fun) + else: + opt = opt_1 + regr = self.regression_1(opt.x[0],300,self.processed_bill) + regr_model = regr[0] + hddcdd = regr[2] + + heating_coef = regr_model.coef_ + cooling_coef = 0 + + + if usage == 'Cooling': + opt = minimize(lambda x: -self.regression_1(x,300, self.processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, 'disp': False}) + regr = self.regression_1(opt.x[0],300, self.processed_bill) + regr_model = regr[0] + hddcdd = regr[2] + + self.usage = usage + self.regression_method = regression_method + + if self.regression_method == 1: + + if regr[1] > 0.4: + self.regr_model = regr_model + self.heating_consumption_pred= np.multiply(hddcdd[:, 0], self.processed_bill['Days In Bill']) * regr_model.coef_[0] + self.cooling_consumption_pred = np.multiply(hddcdd[:, 1], self.processed_bill['Days In Bill']) * regr_model.coef_[1] + + if self.regr_model.intercept_ < 0: + self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + else: + self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill'] + + real_sum = np.array(self.processed_bill['Usage']) + predict_sum = self.heating_consumption_pred + self.heating_consumption_pred + self.others_consumption_pred + + diff = real_sum - predict_sum + + if self.regr_model.intercept_ < 0: + for i in range(len(diff)): + if diff[i] > 0: + self.others_consumption_pred[i] = diff[i] + else: + #print('Low R-squrared') + self.heating_consumption_pred = self.processed_bill['Days In Bill'] * 0 + self.cooling_consumption_pred = self.processed_bill['Days In Bill'] * 0 + self.others_consumption_pred = self.processed_bill['Usage'] + + + elif self.regression_method ==2: + self.regr_model = regr_model + #print(regr_model.coef_) + self.heating_consumption_pred= np.array(hddcdd[:, 0]) * self.regr_model.coef_[0] + #print(self.heating_consumption_pred) + self.cooling_consumption_pred = np.array(hddcdd[:, 0]) * 0 + self.others_consumption_pred = self.regr_model.intercept_ + regr[3]['dhw'] + + #print('regression method', test.regression_method) + + bill_cp = self.processed_bill.copy() + #print('lenth of processsed bill', len(self.processed_bill)) + #print('lenth of hdddd', len(hddcdd)) + bill_cp = self.processed_bill[['Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage']] + + bill_cp['Heating Usage'] = self.heating_consumption_pred + bill_cp['Cooling Usage'] = self.cooling_consumption_pred + bill_cp['Other Usage'] = self.others_consumption_pred + + + self.r_squared_of_fit = regr[1] + self.set_points = opt.x + self.output_table = bill_cp + self.output_monthly = BillDisaggregation.output_to_monthly(self.output_table) + + @staticmethod + def output_to_monthly(output): + """ + Transfrom period-wise output to month-wise output + + Args: + + output (pd.DataFrame): a pandas dataframe like `self.output_table` + columns of the dataframe must be (in order): + 'Bill From Date', 'Bill To Date', + 'Days In Bill', 'Usage', 'Heating Usage', + 'Cooling Usage', 'Other Usage' + + Returns: + + pd.DataFrame: result with monthly consumptions + + """ + last_date = pd.to_datetime(output['Bill To Date']).iloc[0] + days_in_recent_year = 365 + if (last_date - timedelta(365)).day != last_date.day: + days_in_recent_year = 366 + days_cumsum = np.array(output['Days In Bill']).cumsum() + try: + periods_in_recent_year = \ + next(i for i, v in enumerate(days_cumsum) if v >= days_in_recent_year) + except StopIteration: + raise ArithmeticError('Days in bill less than one whole year.') + bill_in_recent_year = output.iloc[:(periods_in_recent_year + 1)] + values_in_recent_year = bill_in_recent_year.iloc[:, 2:].values + values_in_recent_year[-1] *= \ + 1 - (days_cumsum[periods_in_recent_year] - days_in_recent_year) \ + / values_in_recent_year[-1][0] + daily_usage = \ + np.concatenate([np.tile(i[1:] / i[0], (int(i[0]), 1)) + for i in values_in_recent_year])[::-1] + day_of_year = last_date.timetuple().tm_yday - 1 + daily_usage = np.roll(daily_usage, day_of_year, axis=0) + month_days = [31, 28, 31, 30, 31, 30, + 31, 31, 30, 31, 30, 31] + if days_in_recent_year == 366: + month_days[1] = 29 + month_cumsum = np.insert(np.cumsum(month_days), 0, 0) + output_monthly = \ + pd.DataFrame([daily_usage[month_cumsum[i]:month_cumsum[i+1]] + .sum(axis=0) for i in range(12)]) + output_monthly.columns = [output.columns[3:]] + output_monthly['Month'] = range(1, 13) + output_monthly = output_monthly.iloc[:, [4, 0, 1, 2, 3]] + return output_monthly + + + def to_json(self, period='bill'): + """ + Output in json file + + Args: + + period (str): 'month' for monthly + 'bill' for each bill period + default 'bill' + + Returns: + + json: output in json format + + """ + if period == 'bill': + return self.output_table.to_json(orient="records") + return self.output_monthly.to_json(orient="records") + + def print_all_features(self): + """ + print the features heating load percentage, cooling load percentage, + r-squared of fit and type of usage. + + """ +# print('Heating load percentage is {:.1%}'.format(self.heating_load_proportion)) +# print('Cooling load percentage is {:.1%}'.format(self.cooling_load_proportion)) + print('R-squared of fit is {}'.format(self.r_squared_of_fit)) + print('Usage is {}'.format(self.usage)) + + +# @staticmethod +# def projection_figure(bill): +# plt.figure(figsize=(10,5)) +# x = pd.to_datetime(bill['Bill From Date']) +# y = bill['Usage'] +# plt.plot(x,y) +# plt.plot(x, (bill['Heating Usage'] + bill['Cooling Usage'] + bill['Other Usage'])) +# plt.plot(x, bill['Heating Usage']) +# plt.plot(x, bill['Cooling Usage']) +# plt.legend(['real consumption', 'prejected consumption','prejected heating', 'prejected cooling']) +# #plt.plot(x,(cleaned_bill['total_hdd']/cleaned_bill['Days In Bill'])) +# plt.show() + + + + + diff --git a/bpeng/bill/disaggregate.py b/bpeng/bill/disaggregate.py index 29b414b..0ee2d8f 100644 --- a/bpeng/bill/disaggregate.py +++ b/bpeng/bill/disaggregate.py @@ -132,7 +132,7 @@ class BillDisaggregation(): [np.mean([BillDisaggregation.cooling(xx, set_cooling) for xx in x]) for x in temperature] regr_model = linear_model.LinearRegression() hddcdd = np.array([daily_hdd, daily_cdd]).T - regr_model.fit(hddcdd, consumption) + regr_model. fit(hddcdd, consumption) return regr_model, regr_model.score(hddcdd, consumption), hddcdd def optimize(self, usage='Unknown'): -- GitLab From b18d0f9be74fdbc126ebe57705c7a8067a074fe8 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 3 Nov 2017 14:48:57 -0400 Subject: [PATCH 02/19] delete the space --- bpeng/bill/disaggregate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bpeng/bill/disaggregate.py b/bpeng/bill/disaggregate.py index 0ee2d8f..29b414b 100644 --- a/bpeng/bill/disaggregate.py +++ b/bpeng/bill/disaggregate.py @@ -132,7 +132,7 @@ class BillDisaggregation(): [np.mean([BillDisaggregation.cooling(xx, set_cooling) for xx in x]) for x in temperature] regr_model = linear_model.LinearRegression() hddcdd = np.array([daily_hdd, daily_cdd]).T - regr_model. fit(hddcdd, consumption) + regr_model.fit(hddcdd, consumption) return regr_model, regr_model.score(hddcdd, consumption), hddcdd def optimize(self, usage='Unknown'): -- GitLab From 548104514b2ac47c69a80e6d7a6011382bc0b8e7 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 6 Feb 2018 17:16:08 -0500 Subject: [PATCH 03/19] improve accuracy of the results --- bpeng/bill/awesome_disaggregate.py | 837 ++++++++++++++++++++--------- 1 file changed, 592 insertions(+), 245 deletions(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index 95a047b..43b83ff 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -10,6 +10,8 @@ from scipy.optimize import minimize import matplotlib.pyplot as plt import warnings warnings.simplefilter('ignore') +from dateutil import relativedelta + class BillDisaggregation(): """ @@ -24,11 +26,11 @@ class BillDisaggregation(): Attributes: - output: list - total heating load - heating load for the first year (first 12 bill periods) - heating load of each month - (return NaN if R-squared is low) + output: list + total heating load + heating load for the first year (first 12 bill periods) + heating load of each month + (return NaN if R-squared is low) """ # pylint: disable=too-many-instance-attributes def __init__(self, bill, raw_daily_temp): @@ -45,86 +47,132 @@ class BillDisaggregation(): self.others_comsuption_pred = None self.regr_model = None self.r_squared_of_fit = None -# self.heating_load_proportion = None -# self.cooling_load_proportion = None - self.set_points = None + self.heating_set_point = None + self.cooling_set_point = None + self.days_in_bills = None self.days_in_12_bills = None self.output_table = None - self.output_monthly = None + self.most_recent_monthly_output = None + self.unit_price = None def weather_cleaning(self, raw_daily_temp): ''' - format the daily temperature data from influx query + Format the daily temperature data from influx query + + Args: + + raw_daily_temp (influx query): raw temperature data queried from Influx + Returns: + + pd.DateFrame: Returns formatted daily temperature ''' raw_daily_temp.rename(columns={'time':'date','value':'temperature'}, inplace = True) raw_daily_temp['date'] = pd.to_datetime(raw_daily_temp['date']) raw_daily_temp['date'] = raw_daily_temp['date'].apply(lambda x: '/'.join([str(x.date().month),str(x.date().day), str(x.date().year)])) daily_temp = raw_daily_temp + daily_temp['date'] = pd.to_datetime(daily_temp['date']) self.daily_temp = daily_temp return self.daily_temp -# def main(self,raw_daily_temp): -# self.daily_temp = self.weather_cleaning(raw_daily_temp) -# print(self.daily_temp.head()) def bill_period_weather(self, bill_from_date, bill_end_date): ''' - get the temperature date between two data, return a list + get the outdoor temperature date between two date, return a list + + Args: + + bill_from_date (Datetime): start date of a period + bill_end_date (Datetime): end date of a period + + Returns: + + list: Returns a list of outdoor temperature for a period ''' - #print(self.daily_temp.head()) end_date_id = self.daily_temp[self.daily_temp.date == bill_end_date].index[0] start_date_id = self.daily_temp[self.daily_temp.date == bill_from_date].index[0] return list(self.daily_temp['temperature'][start_date_id:end_date_id]) @staticmethod - def cdd(set_point, weather): + def cdd(indoor_set_point, outdoor_temp): ''' - cooling degree day: + CDD Assumption: - cooling setting point shall always higher than 55 F, which is the trigger temperature of the heating system - 存疑 + cooling setting point shall always higher than 55 F, + which is the trigger temperature of the heating system + + ?? + set_point is for indoor temperature + ''' - if set_point > 50: - if set_point < weather: - return weather - set_point + if indoor_set_point > 55: + if indoor_set_point < outdoor_temp: + return outdoor_temp - indoor_set_point return 0 @staticmethod - - def hdd(set_point, weather): + def hdd(indoor_set_point, outdoor_temp): ''' - HDD for everyday: + HDD Assumption: - only if the outdoor temperature drop below 60'F, then the heating system may be able to be turn on - + Only if the outdoor temperature drop below 60'F, + then the heating system may be able to be turn on ''' - if (weather < 60) & (set_point > weather): - hdd = set_point - weather + if (outdoor_temp < 60) & (indoor_set_point > outdoor_temp): + hdd = indoor_set_point - outdoor_temp else: hdd = 0 - - return hdd @staticmethod def threshold(data, set_point): - '''threshold, if the data is less the set_point, return 0''' + '''If data is less the set_point, return 0''' if data <= set_point: data = 0 return data + @staticmethod + def outliers_iqr(ys): + ''' + Find outlier using IQR method + + Args: + + ys (list):A list of number needs to be checked for outliners + + Returns: + + list: Returns a list of boolean + True: Outliner + False: Not Outliner + + ''' + quartile_1, quartile_3 = np.percentile(ys, [25, 75]) + iqr = quartile_3 - quartile_1 + lower_bound = quartile_1 - (iqr * 1.5) + upper_bound = quartile_3 + (iqr * 1.5) + return [(x > upper_bound or x < lower_bound) for x in ys] + @staticmethod def anomaly_point(alist,thresholds): ''' - if there is a certain number in a list is too large or small, return its index location - if false, the number at that location is normal - if true, the number is an outliner should be carefully taken care + Find outlier and return its index + + Args: + + alist (list): A list of number needs to be checked for outliners + thresholds (float): a percentage of the difference between the mean of the whole list and + the mean of the list without the outlier + + Returns: + + list: Returns a list the index of the outliner + ''' amean = [] for x in range(len(alist)): @@ -136,17 +184,42 @@ class BillDisaggregation(): index.append(temp1 < thresholds) return index - # the following functions aim to clean the bill and understand if the bill is in a good shape for analysis + @staticmethod + def num_month_dates(last_date_bill, first_date_bill): + '''Return number of month in between two date ''' + lastdate = last_date_bill - timedelta(last_date_bill.day) + firstdate = first_date_bill + timedelta(days=32) + firstdate = firstdate.replace(day = 1) + r = relativedelta.relativedelta(lastdate, firstdate) + num_month = r.years * 12 + r.months +1 + return(num_month) + - def bill_formating(self, bill): + def bill_formating(self, raw_bill): ''' - step 1 to clean the bill: - 1. format the raw bills - 2. drop NAN / duplicates + Bill Cleaning + Step 1: + 1. format each column of the raw bill + 2. drop NAN / duplicates + + Args: + + raw_bill (pd.DataFrame): a raw bill with columns of + 'Bill From Date' + 'Bill To Date' + 'Days In Bill' + 'Usage' + 'Delivery Charge' + 'Supply Charge' + 'Total Charge' + Returns: + pd.DataFrame: a formatted raw_bill + boolean: True - Length of the bill has changed during bill cleaning step 1 + ''' - bill_copy = bill.copy() + bill_copy = raw_bill.copy() bill_copy['Bill From Date'] = pd.to_datetime(bill_copy['Bill From Date']) bill_copy['Bill From Date'] = bill_copy['Bill From Date'].apply(lambda x: '/'.join([str(x.date().month),str(x.date().day),\ str(x.date().year)])) @@ -154,16 +227,22 @@ class BillDisaggregation(): bill_copy['Bill To Date'] = bill_copy['Bill To Date'].apply(lambda x: '/'.join([str(x.date().month),str(x.date().day),\ str(x.date().year)])) - bill_copy = bill_copy[['Bill From Date','Bill To Date','Usage','Days In Bill']] - bill_copy = bill_copy.dropna() - bill_copy = bill_copy.drop_duplicates() - bill_copy = bill_copy.reset_index(drop = True) + bill_copy = bill_copy[['Bill From Date','Bill To Date','Usage','Days In Bill','Total Charge']] + bill_copy1 = bill_copy[pd.to_datetime(bill_copy['Bill To Date']) > pd.to_datetime(bill_copy['Bill From Date'])] + bill_copy1['Bill From Date'] = pd.to_datetime(bill_copy1['Bill From Date']) + bill_copy2 = bill_copy1.sort_values('Bill From Date') + bill_copy2['Bill To Date'] = pd.to_datetime(bill_copy2['Bill To Date']) + bill_copy2 = bill_copy2.dropna() + bill_copy2 = bill_copy2.drop_duplicates() + bill_copy2 = bill_copy2.reset_index(drop = True) - if np.array(bill_copy.shape)[0] == np.array(bill.shape)[0]: + if np.array(bill_copy2.shape)[0] == np.array(raw_bill.shape)[0]: bill_shape_change = 'False' bill_shape_change = 'True' - bill_formatted = bill_copy + bill_formatted = bill_copy2 + + self.unit_price = (sum(bill_formatted['Total Charge'])) / (sum(bill_formatted['Usage'])) return bill_formatted, bill_shape_change @@ -171,27 +250,36 @@ class BillDisaggregation(): def bill_quality(self, bill_formatted): ''' - step 2: - check bill quality to see if there is billing period that is too short or too long; + Bill Cleaning + Step 2: + 1. Check each billing period to find a bill is too short or too long; - return a table of the index of abnormal billing period + Args: + bill_formatted (pd.DataFrame): formatted bill from Bill Cleaning Step 1 + Returns: + pd.DataFrame: a dataframe with columns: + 'index': the index of the billing period which is identified as an outlier + 'flag': to indicate either it is too long or too short ''' - bill = bill_formatted + bill = bill_formatted.copy() bill = pd.DataFrame(bill) total_rows = np.array(bill.shape)[0] - timescale = pd.to_datetime(bill['Bill To Date'].iloc[total_rows-1])\ - - pd.to_datetime(bill['Bill From Date'].iloc[0]) - total_days_in_bill = timescale.days +# timescale = pd.to_datetime(bill['Bill To Date'].iloc[total_rows-1])\ +# - pd.to_datetime(bill['Bill From Date'].iloc[0]) +# total_days_in_bill = timescale.days days_in_bill = np.array(bill['Days In Bill']) #abnormal days in bill will return False - days_quality_index = BillDisaggregation.anomaly_point(list(days_in_bill), 0.5) + days_quality_index_inti = BillDisaggregation.outliers_iqr(list(days_in_bill)) + days_quality_index_recheck = np.array([x not in range(25,35) for x in days_in_bill]) + days_quality_index = list(np.array(days_quality_index_inti) * np.array(days_quality_index_recheck)) + days_abn_index = [] for x in range(len(days_quality_index)): - if days_quality_index[x] == False: + if days_quality_index[x] == True: days_abn_index.append(x) bill_quality = pd.DataFrame(data = days_abn_index, columns = ['index'] ) @@ -205,47 +293,43 @@ class BillDisaggregation(): flag.append('long') bill_quality['flag'] = np.array(flag) - - #length check - ensure the bill are covering more than a year - also need to check the regularity of the bills - if total_rows >= 12 or total_days_in_bill>= 365: - bill_length_flag = 1 - else: - bill_length_flag = 0 - print('Billing period did not cover a full year' ) - bill_quality['bill_length_flag'] = pd.Series(bill_length_flag for x in range(len(bill_quality.index))) - return bill_quality def short_bill_consolidate(self, bill_formatted, bill_quality): ''' - step 3: consolidation of the bills that are too short compare to others - bill_formatted: df. drop nan/ dupicates - bill quality: df + Bill Cleaning + Step 3: + consolidation of the bills that are too short compare to others NOTE: error + + Args: + bill_formatted (pd.DataFrame): formatted bill from Bill Cleaning Step 1 + bill_quality(pd.DataFrame): bill quality from Step 2 + + Returns: + pd.DataFrame: the cleaned bill and ready for analysis + ''' bill_quality_short = bill_quality[bill_quality['flag'] == 'short'] - bill_consi = bill_formatted + bill_consi = bill_formatted.copy() #consolidate the billing period that is too short compare to others -# print(bill_formatted) - -# print(bill_quality) for xxx in range(len(bill_quality_short)): if bill_quality['flag'].iloc[xxx] == 'short': row_index = bill_quality_short['index'].iloc[xxx] - if row_index != 0 & row_index != bill_consi.index[-1]: + if (row_index != 0) & (row_index != bill_consi.index[-1]): if bill_consi['Days In Bill'][int(row_index -1)] <= bill_consi['Days In Bill'][int(row_index +1)]: - + #print('row index', row_index) bill_consi['Bill To Date'][int(row_index - 1)] = bill_consi['Bill To Date'][int(row_index)] - bill_consi['Usage'][int(row_index - 1)] = bill_consi['Usage'][int(row_index - 1)] + bill_consi['Usage'][row_index] - bill_consi['Days In Bill'][int(row_index - 1)] = bill_consi['Days In Bill'][int(row_index - 1)] + bill_consi['Days In Bill'][row_index] + bill_consi['Usage'][int(row_index - 1)] = bill_consi['Usage'][int(row_index - 1)] + bill_consi['Usage'][int(row_index)] + bill_consi['Days In Bill'][int(row_index - 1)] = bill_consi['Days In Bill'][int(row_index - 1)] + bill_consi['Days In Bill'][int(row_index)] else: bill_consi['Bill From Date'][int(row_index + 1)] = bill_consi['Bill To Date'][int(row_index)] - bill_consi['Usage'][int(row_index + 1)] = bill_consi['Usage'][int(row_index + 1)] + bill_consi['Usage'][row_index] - bill_consi['Days In Bill'][int(row_index + 1)] = bill_consi['Days In Bill'][int(row_index + 1)] + bill_consi['Days In Bill'][row_index] + bill_consi['Usage'][int(row_index + 1)] = bill_consi['Usage'][int(row_index + 1)] + bill_consi['Usage'][int(row_index)] + bill_consi['Days In Bill'][int(row_index + 1)] = bill_consi['Days In Bill'][int(row_index + 1)] + bill_consi['Days In Bill'][int(row_index)] if row_index == 0: bill_consi['Bill From Date'][1] = bill_consi['Bill From Date'][0] @@ -253,10 +337,9 @@ class BillDisaggregation(): bill_consi['Days In Bill'][1] = bill_consi['Days In Bill'][0] + bill_consi['Days In Bill'][1] if row_index == bill_consi.index[-1]: - bill_consi['Bill To Date'][-2] = bill_consi['Bill To Date'][-1] - bill_consi['Usage'][-2] = bill_consi['Usage'][-2] + bill_consi['Usage'][-1] - bill_consi['Days In Bill'][-2] = bill_consi['Days In Bill'][-1] + bill_consi['Days In Bill'][-2] - + bill_consi['Bill To Date'].iloc[-2] = bill_consi['Bill To Date'].iloc[-1] + bill_consi['Usage'].iloc[-2] = bill_consi['Usage'].iloc[-2] + bill_consi['Usage'].iloc[-1] + bill_consi['Days In Bill'].iloc[-2] = bill_consi['Days In Bill'].iloc[-1] + bill_consi['Days In Bill'].iloc[-2] #drop the bills that with a billing period that is too short - which will change the index of the dataframe,I think it should #taken care with solutions of 'the billing period that is toooo long # or apply bill quality check again to identify the new index of the peroiod that is too long @@ -271,12 +354,33 @@ class BillDisaggregation(): def regression_1(self, hp, cp, processed_bill): + ''' + A linear regression model with heating and cooling set fixed + + Args: + + hp(float): heating season indoor set point + cp(float): cooling season indoor set point + processed_bill(pd.DataFrame): cleaned bill with daily temperature + + Returns: + + sklearn.linear_model.LinearRegression: regression model + float: r-squared of the linear regression model + 2d-array: a 2D numpy array of normalized billing period average daily HDDs and CDDs + + ''' + bill = processed_bill.copy() + + #changed 2/2/2018 + consumption = np.array(bill['Usage']/ bill['Days In Bill']) - ahdd = [\ - list(BillDisaggregation.hdd(hp, xx) for xx in x) for x in bill['temperature']\ - ] + ahdd = [list(BillDisaggregation.hdd(hp, xx) for xx in x) for x in bill['temperature']] acdd = [list(BillDisaggregation.cdd(cp, xx) for xx in x) for x in bill['temperature']] + + #it should be billing period average hdd / days + daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) @@ -294,49 +398,59 @@ class BillDisaggregation(): def summer_dhw(self, hp, abill): - ''' - this function shall count the summer month gas usage as dhw usage, and use the dhw usage as the baseline: - this function does not count for cooling consumption + This funcion uses summer month gas usage as base consumption for the year + A linear regression of weather-related consumption and a fixed heating system set point NOTE: USUALLY ERROR - ''' - bill = abill.copy() - ahdd = np.array((BillDisaggregation.hdd(hp, xx) for xx in x) for x in bill['temperature']) + Args: + + hp(float): heating season indoor set point + cp(float): cooling season indoor set point + processed_bill(pd.DataFrame): cleaned bill with daily temperature - print(bill['temperature']) + Returns: - monthly_hdd = [np.sum(ahdd[x]) for x in range(len(ahdd))] + sklearn.linear_model.LinearRegression: regression model + float: r-squared of the linear regression model + 2d-array: a 2D numpy array of normalized billing period HDDs sum + pd.DataFrame - print('monthly', monthly_hdd) + ''' + + bill = abill.copy() + ahdd = [[BillDisaggregation.hdd(hp, xx) for xx in x] for x in bill['temperature']] + monthly_hdd = np.array([np.sum(ahdd[x]) for x in range(len(ahdd))]) daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) - print('daily_hdd', daily_hdd) #daily dhw usage bill['dhw'] = bill['Usage'] / bill['Days In Bill'] * (daily_hdd <= 0.1) - dhw_only_consumption = bill[bill['dhw'] != 0]['dhw'] #array - dhw_quality_index = BillDisaggregation.anomaly_point(list(dhw_only_consumption), 0.5) #list - #calculated the average consumption if the monthly is not outliner + if len([*filter(lambda x: x >= 18, list(bill['Days In Bill']))]) > 0: + dhw_only_consumption = bill[bill['dhw'] != 0]['dhw'] - dhw_only_consumption_checked = [] + if len(dhw_only_consumption) > 0: + dhw_quality_index = BillDisaggregation.outliers_iqr(list(dhw_only_consumption)) #list + dhw_only_consumption_checked = [] - for xx in range(len(dhw_only_consumption)): - if dhw_quality_index[xx] == True: - dhw_only_consumption_checked.append(list(dhw_only_consumption)[xx]) + for xx in range(len(dhw_only_consumption)): + if dhw_quality_index[xx] == False: + dhw_only_consumption_checked.append(list(dhw_only_consumption)[xx]) - daily_dhw = np.mean(dhw_only_consumption_checked) - #print('daily_dhw',daily_dhw) + daily_dhw = np.mean(dhw_only_consumption_checked) + else: + daily_dhw = 0 - #print('daily_dhw', daily_dhw) + else: + daily_dhw = 0 bill['dhw'] = daily_dhw * bill['Days In Bill'] - #print(bill['dhw']) - regression_temp = monthly_hdd.reshape(-1,1) - consumption = bill['Usage'] - bill['dhw'] + # 2018/01/30 + # Daily hdd makes more sense - #print( bill['Usage']) + regression_temp = daily_hdd.reshape(-1,1) + consumption = (bill['Usage'] - bill['dhw']) / bill['Days In Bill'] regr_model = linear_model.LinearRegression() regr_model.fit(regression_temp, consumption) @@ -347,9 +461,26 @@ class BillDisaggregation(): def main(self, usage = 'Unknown'): + + """ + Main function for the optimization and disaggregation + + Args: + + usage (str): + Specify if the weather - related consumption is for heating or cooling + 'Unknown': no prior knowledge + 'Heating': only for heating + 'Cooling': only for cooling + 'Both': for both heating and cooling + 'Both Not': not for heating or cooling + default 'Unknown' + + """ + self.daily_temp = self.weather_cleaning(self.raw_daily_temp) - quality = self.bill_quality(self.bill) formatted_bill, shape = self.bill_formating(self.bill) + quality = self.bill_quality(formatted_bill) if any(i == 'short' for i in quality.flag): #any(quality.flag.astype(str) == 'long') @@ -362,7 +493,9 @@ class BillDisaggregation(): for x, y in zip(self.processed_bill['Bill From Date'], \ self.processed_bill['Bill To Date']) ] - #account_type = 2 + + self.processed_bill = self.processed_bill.sort_values('Bill From Date') + regression_method = 1 if usage == 'Unknown': @@ -374,16 +507,18 @@ class BillDisaggregation(): regr = self.regression_1(opt.x[0],opt.x[1],self.processed_bill) regr_model = regr[0] heating_coef, cooling_coef = regr_model.coef_ - #print('test') - if (heating_coef > 0) & (cooling_coef <= 0): - usage = 'Heating' - elif (heating_coef <= 0) & (cooling_coef > 0): - usage = 'Cooling' - elif (heating_coef <= 0) & (cooling_coef <= 0): - usage = 'Both not' - elif (heating_coef >= 0) & (cooling_coef >= 0): - usage = 'Both' + if -opt.fun > 0.5: + if (heating_coef > 0) & (cooling_coef <= 0): + usage = 'Heating' + elif (heating_coef <= 0) & (cooling_coef > 0): + usage = 'Cooling' + elif (heating_coef <= 0) & (cooling_coef <= 0): + usage = 'Both Not' + elif (heating_coef >= 0) & (cooling_coef >= 0): + usage = 'Both' + else: + usage = 'Both Not' if usage == 'Both': opt = minimize(lambda x: -self.regression_1(x[0],x[1], self.processed_bill)[1], @@ -395,18 +530,67 @@ class BillDisaggregation(): regr_model = regr[0] heating_coef, cooling_coef = regr_model.coef_ hddcdd = regr[2] + heating_set_point = opt.x[0] + cooling_set_point = opt.x[1] + + + #change accordingly for JOENYC buildings - if (heating_coef > 0) & (cooling_coef <= 0): - usage = 'heating' + if (heating_coef >0 ) & (cooling_coef <0): + usage = 'Heating' cooling_coef = 0 elif (heating_coef <= 0) & (cooling_coef > 0): - usage = 'cooling' + usage = 'Cooling' heating_coef = 0 elif (heating_coef <= 0) & (cooling_coef <= 0): - usage = 'both not' + usage = 'Both Not' heating_coef = 0 cooling_coef = 0 + #changes on Jan 17th 2018 + # please futher check with more bills + + elif (heating_coef > 0) & (cooling_coef > 0): + if heating_coef / cooling_coef > 5: + usage = 'Heating' + cooling_coef = 0 + else: + #set the range of heating set point or cooling point - + if round(heating_set_point) in range(60, 95) and round(cooling_set_point) in range(55, 75): + usage = 'Both' + heating_coef = heating_coef + cooling_coef = cooling_coef + + else: + #using standard seting points to check the bill + regr = self.regression_1(72,65,self.processed_bill) + regr_model = regr[0] + heating_coef, cooling_coef = regr_model.coef_ + hddcdd = regr[2] + heating_set_point = opt.x[0] + cooling_set_point = opt.x[1] + + if (heating_coef >0 ) & (cooling_coef <0): + usage = 'Heating' + cooling_coef = 0 + elif (heating_coef <= 0) & (cooling_coef > 0): + usage = 'Cooling' + heating_coef = 0 + elif (heating_coef <= 0) & (cooling_coef <= 0): + usage = 'Both Not' + heating_coef = 0 + cooling_coef = 0 + elif (heating_coef > 0) & (cooling_coef > 0): + if heating_coef / cooling_coef > 5: + usage = 'Heating' + cooling_coef = 0 + else: + usage = 'Both' + + + + + if usage == 'Heating': opt_1 = minimize(lambda x: -self.regression_1(x,300, self.processed_bill)[1], 65, @@ -417,23 +601,37 @@ class BillDisaggregation(): 65, method='nelder-mead', options={'xtol': 1e-2, 'disp': False}) - print(opt_2.fun) - if (-opt_2.fun > 0.9) and (round(opt_2.x[0]) in range(65, 90)): + if (-opt_2.fun > 0.9) and (round(opt_2.x[0]) in range(64, 85)): opt = opt_2 - regr = self.summer_dhw(opt_2.x[0],self.processed_bill) + heating_set_point = opt.x[0] + cooling_set_point = np.NaN + regr = self.summer_dhw(opt.x[0],self.processed_bill) regr_model = regr[0] - hddcdd = regr[2] + hdd = regr[2] + hdd_transit = [hdd[x][0] for x in range(len(hdd))] + hddcdd = np.array([[hdd_transit[x], 0] for x in range(len(hdd))]) regression_method = 2 - #print(opt_2.fun) else: - opt = opt_1 - regr = self.regression_1(opt.x[0],300,self.processed_bill) - regr_model = regr[0] - hddcdd = regr[2] + if round(opt_1.x[0]) in range(60, 95): + opt = opt_1 + heating_set_point = opt.x[0] + cooling_set_point = np.NaN + regr = self.regression_1(heating_set_point,300,self.processed_bill) + regr_model = regr[0] + hddcdd = regr[2] + heating_coef = regr_model.coef_ + cooling_coef = 0 + else: + #legit heating set-point 72'F + heating_set_point = 72 + cooling_set_point = np.NaN + regr = self.regression_1(heating_set_point,300,self.processed_bill) - heating_coef = regr_model.coef_ - cooling_coef = 0 + regr_model = regr[0] + hddcdd = regr[2] + heating_coef = regr_model.coef_ + cooling_coef = 0 if usage == 'Cooling': @@ -444,51 +642,76 @@ class BillDisaggregation(): regr = self.regression_1(opt.x[0],300, self.processed_bill) regr_model = regr[0] hddcdd = regr[2] + cooling_set_point = opt.x[0] + heating_set_point = np.NaN self.usage = usage - self.regression_method = regression_method - if self.regression_method == 1: - if regr[1] > 0.4: - self.regr_model = regr_model - self.heating_consumption_pred= np.multiply(hddcdd[:, 0], self.processed_bill['Days In Bill']) * regr_model.coef_[0] - self.cooling_consumption_pred = np.multiply(hddcdd[:, 1], self.processed_bill['Days In Bill']) * regr_model.coef_[1] + if self.usage == 'Both Not': + self.heating_consumption_pred = self.processed_bill['Usage'] * 0 + self.cooling_consumption_pred = self.processed_bill['Usage'] * 0 + self.others_consumption_pred = self.processed_bill['Usage'] + self.regression_method = 0 + self.hddcdd = np.zeros((len(self.processed_bill), 2)) + cooling_set_point = np.NaN + heating_set_point = np.NaN - if self.regr_model.intercept_ < 0: - self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] - else: - self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill'] + else: + self.regression_method = regression_method - real_sum = np.array(self.processed_bill['Usage']) - predict_sum = self.heating_consumption_pred + self.heating_consumption_pred + self.others_consumption_pred - diff = real_sum - predict_sum + if self.regression_method == 1: - if self.regr_model.intercept_ < 0: - for i in range(len(diff)): - if diff[i] > 0: - self.others_consumption_pred[i] = diff[i] - else: - #print('Low R-squrared') - self.heating_consumption_pred = self.processed_bill['Days In Bill'] * 0 - self.cooling_consumption_pred = self.processed_bill['Days In Bill'] * 0 - self.others_consumption_pred = self.processed_bill['Usage'] + self.hddcdd = np.array(pd.DataFrame(hddcdd).mul(list(self.processed_bill['Days In Bill']), axis = 0)) + + if regr[1] > 0.5: + self.regr_model = regr_model + self.heating_consumption_pred= np.array(self.hddcdd[:, 0]) * regr_model.coef_[0] + self.cooling_consumption_pred = np.array(self.hddcdd[:, 1]) * regr_model.coef_[1] + + if self.regr_model.intercept_ < 0: + self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + else: + self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill'] + + real_sum = np.array(self.processed_bill['Usage']) + predict_sum = self.heating_consumption_pred + self.cooling_consumption_pred + self.others_consumption_pred + + diff = real_sum - predict_sum +# if self.regr_model.intercept_ < 0: +# for i in range(len(diff)): +# if diff[i] > 0: +# self.others_consumption_pred[i] = diff[i] + else: + self.heating_consumption_pred = self.processed_bill['Days In Bill'] * 0 + self.cooling_consumption_pred = self.processed_bill['Days In Bill'] * 0 + self.others_consumption_pred = self.processed_bill['Usage'] + self.regression_method = 0 + self.hddcdd = np.zeros((len(self.processed_bill), 2)) + cooling_set_point = np.NaN + heating_set_point = np.NaN + self.usage = 'Both Not' + + + elif self.regression_method == 2: + self.hddcdd = np.array(pd.DataFrame(hddcdd).mul(list(self.processed_bill['Days In Bill']), axis = 0)) + #self.hddcdd = hddcdd + self.regr_model = regr_model + self.heating_consumption_pred = np.array(self.hddcdd[:, 0]) * self.regr_model.coef_[0] + self.cooling_consumption_pred = np.array(self.hddcdd[:, 0]) * 0 + + if self.regr_model.intercept_ < 0: + self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + regr[3]['dhw'] + else: + self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill']\ + + regr[3]['dhw'] + #print('dhw', regr[3]) - elif self.regression_method ==2: - self.regr_model = regr_model - #print(regr_model.coef_) - self.heating_consumption_pred= np.array(hddcdd[:, 0]) * self.regr_model.coef_[0] - #print(self.heating_consumption_pred) - self.cooling_consumption_pred = np.array(hddcdd[:, 0]) * 0 - self.others_consumption_pred = self.regr_model.intercept_ + regr[3]['dhw'] - #print('regression method', test.regression_method) bill_cp = self.processed_bill.copy() - #print('lenth of processsed bill', len(self.processed_bill)) - #print('lenth of hdddd', len(hddcdd)) bill_cp = self.processed_bill[['Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage']] bill_cp['Heating Usage'] = self.heating_consumption_pred @@ -496,81 +719,209 @@ class BillDisaggregation(): bill_cp['Other Usage'] = self.others_consumption_pred - self.r_squared_of_fit = regr[1] - self.set_points = opt.x + if self.usage == 'Both Not': + self.r_squared_of_fit = np.NaN + #self.h = np.NaN + else: + self.r_squared_of_fit = regr[1] + #self.set_points = opt.x + + #update 2018/01/17 + self.heating_set_point = heating_set_point + self.cooling_set_point = cooling_set_point self.output_table = bill_cp - self.output_monthly = BillDisaggregation.output_to_monthly(self.output_table) - @staticmethod - def output_to_monthly(output): - """ - Transfrom period-wise output to month-wise output + def benchmarking_output(self): + + ''' + output perimeters that related with evaluating the bills + ''' - Args: + test = self.output_table.copy() + bill_start_date = pd.to_datetime(test['Bill From Date']).iloc[0] + bill_last_date = pd.to_datetime(test['Bill To Date']).iloc[-1] + days_in_bill = (bill_last_date - bill_start_date).days - output (pd.DataFrame): a pandas dataframe like `self.output_table` - columns of the dataframe must be (in order): - 'Bill From Date', 'Bill To Date', - 'Days In Bill', 'Usage', 'Heating Usage', - 'Cooling Usage', 'Other Usage' - Returns: + if days_in_bill >=350: + usage = self.usage + r = self.r_squared_of_fit + r_method = self.regression_method + consumption = sum(test['Usage']) + heating = sum(test['Heating Usage']) + cooling = sum(test['Cooling Usage']) + others = sum(test['Other Usage']) + diff = (consumption - heating - cooling - others) / consumption + hdd = sum(self.hddcdd[:,0]) + cdd = sum(self.hddcdd[:,1]) - pd.DataFrame: result with monthly consumptions + + else: + consumption = np.NaN + r_method = np.NaN + heating = np.NaN + cooling = np.NaN + others = np.NaN + hdd = np.NaN + cdd = np.NaN + diff = np.NaN + usage = np.NaN + r = np.NaN + + return usage, r, r_method, consumption, heating, cooling, others, diff, hdd, cdd, days_in_bill, self.heating_set_point, self.cooling_set_point + + + + def output_to_month(self, last_day_of_bill, hp, cp, number_of_month): """ - last_date = pd.to_datetime(output['Bill To Date']).iloc[0] - days_in_recent_year = 365 - if (last_date - timedelta(365)).day != last_date.day: - days_in_recent_year = 366 - days_cumsum = np.array(output['Days In Bill']).cumsum() - try: - periods_in_recent_year = \ - next(i for i, v in enumerate(days_cumsum) if v >= days_in_recent_year) - except StopIteration: - raise ArithmeticError('Days in bill less than one whole year.') - bill_in_recent_year = output.iloc[:(periods_in_recent_year + 1)] - values_in_recent_year = bill_in_recent_year.iloc[:, 2:].values - values_in_recent_year[-1] *= \ - 1 - (days_cumsum[periods_in_recent_year] - days_in_recent_year) \ - / values_in_recent_year[-1][0] - daily_usage = \ - np.concatenate([np.tile(i[1:] / i[0], (int(i[0]), 1)) - for i in values_in_recent_year])[::-1] - day_of_year = last_date.timetuple().tm_yday - 1 - daily_usage = np.roll(daily_usage, day_of_year, axis=0) - month_days = [31, 28, 31, 30, 31, 30, - 31, 31, 30, 31, 30, 31] - if days_in_recent_year == 366: - month_days[1] = 29 - month_cumsum = np.insert(np.cumsum(month_days), 0, 0) - output_monthly = \ - pd.DataFrame([daily_usage[month_cumsum[i]:month_cumsum[i+1]] - .sum(axis=0) for i in range(12)]) - output_monthly.columns = [output.columns[3:]] - output_monthly['Month'] = range(1, 13) - output_monthly = output_monthly.iloc[:, [4, 0, 1, 2, 3]] - return output_monthly - - - def to_json(self, period='bill'): - """ - Output in json file + Transfrom period-wise output to month-wise output Args: - period (str): 'month' for monthly - 'bill' for each bill period - default 'bill' + last_day_of_bill(datetime): last day of bill + hp(float): heating season indoor set point + cp(float): cooling season indoor set point Returns: - json: output in json format + pd.DataFrame: result with monthly consumptions """ - if period == 'bill': - return self.output_table.to_json(orient="records") - return self.output_monthly.to_json(orient="records") + + + last_dates = [] + first_dates = [] + + lastdate = last_day_of_bill-timedelta(last_date_of_bill.day) + + #cosntruct a new dataframe with bills from the first to last day for each month + + for i in range(0,number_of_month): + last_dates.append(lastdate) + first_dates.append(lastdate.replace(day = 1)) + lastdate = first_dates[i] -timedelta(1) + + monthly_output_table = pd.DataFrame(columns=['Bill From Date','Bill To Date', 'Days In Bill',\ + 'Heating Usage','Cooling Usage','Other Usage']) + + monthly_output_table['Bill From Date'] = first_dates + monthly_output_table['Bill To Date'] = last_dates + monthly_output_table['Days In Bill'] = monthly_output_table['Bill To Date'] - monthly_output_table['Bill From Date'] + monthly_output_table['Days In Bill'] = monthly_output_table['Days In Bill'].apply(lambda x: x.days) + 1 + monthly_output_table['Month'] = monthly_output_table['Bill From Date'].apply(lambda x: x.month) + monthly_output_table['temperature'] = [\ + self.bill_period_weather(x,y) \ + for x, y in zip(monthly_output_table['Bill From Date'], \ + monthly_output_table['Bill To Date']) + ] + + hdd = [list(BillDisaggregation.hdd(hp, xx) for xx in x) for x in monthly_output_table['temperature']] + cdd = [list(BillDisaggregation.cdd(cp, xx) for xx in x) for x in monthly_output_table['temperature']] + monthly_hdd = np.array([np.sum(hdd[x]) for x in range(len(hdd))]) + monthly_cdd = np.array([np.sum(cdd[x]) for x in range(len(cdd))]) + + monthly_output_table['HDD'] = monthly_hdd + monthly_output_table['CDD'] = monthly_cdd + + + per_hdd = self.benchmarking_output()[4]/self.benchmarking_output()[8] + if np.isnan(per_hdd): + per_hdd = 0 + per_cdd = self.benchmarking_output()[5]/self.benchmarking_output()[9] + if np.isnan(per_cdd): + per_cdd = 0 + per_day = self.benchmarking_output()[6]/self.benchmarking_output()[10] + if np.isnan(per_day): + per_day = 0 + + monthly_output_table['Heating Usage'] = monthly_output_table['HDD'] * per_hdd + monthly_output_table['Cooling Usage'] = monthly_output_table['CDD'] * per_cdd + monthly_output_table['Other Usage'] = monthly_output_table['Days In Bill'] * per_day + + monthly_output_table['Usage'] = monthly_output_table['Heating Usage']\ + + monthly_output_table['Cooling Usage'] + monthly_output_table['Other Usage'] + + + monthly_output = monthly_output_table[['Month','Bill From Date','Bill To Date', 'Days In Bill',\ + 'Heating Usage','Cooling Usage','Other Usage','HDD','CDD']] + + return monthly_output + +# this_lastdate = self.output_table['Bill To Date'].iloc[-1] - timedelta(self.output_table['Bill To Date'].iloc[-1].day) +# self.most_recent_monthly_output = self.output_to_month(this_lastdate,self.heating_set_point,self.cooling_set_point) + + def non_weahter_related_breakdown(self,end_uses, monthly_output_table): + + ''' + breakdown the non_weather_related_usage + + Args: + + end_uses(dictionary): key: end use + value: percentage of the end use among non-weather related usage + monthly_output_table (pd.DataFrame): monthly bill breakdown starts with the first date of the month, ends with the + last date + Returns: + pd.DataFrame: bill breakdown of all end-use + + ''' + + monthly_usages = monthly_output_table.copy() + eu = pd.DataFrame(list(end_uses.items()), columns=['end use', 'percentage']) + for i in range(len(eu)): + name_of_the_column = eu['end use'].iloc[i] + value_of_the_column = eu['percentage'].iloc[i] + monthly_usages[name_of_the_column] = monthly_usages['Other Usage'] * value_of_the_column + + if sum(eu['percentage']) !=1: + monthly_usages['Miscellaneous'] = monthly_usages['Other Usage'] * (1 - sum(eu['percentage'])) + + return monthly_usages + + @staticmethod + def annual_usage_costs(annual_bill_breakdown, end_uses): + ''' + Calcuate annual usage and costs for each end use + + Args: + annual_bill_breakdown(pd.DataFrame): the output from non-weather-related usage breakdown + end_uses(dictionary): key: end use + value: percentage of the end use among non-weather related usage + + Return: + pd.DataFrame: annual usage, costs for each end uses + + ''' + + annual_usage = pd.DataFrame(columns = ['End Use', 'Usage', 'Costs']) + + end_use = ['Heating Usage', 'Cooling Usage'] + + x = bill_breakdown + number_of_columns = len(x.columns) + + eu = pd.DataFrame(list(end_uses.items()), columns=['end use', 'percentage']) + + for i in range(len(eu)): + name_of_end_use = eu['end use'].iloc[i] + end_use.append(name_of_end_use) + + if number_of_columns - 9 != len(eu): + end_use.append('Miscellaneous') + + annual_usage['End Use'] = end_use + + + for j in range(len(annual_usage)): + temp = x[annual_usage['End Use'].iloc[j]] + temp_usage = sum(temp) + annual_usage['Usage'].iloc[j] = temp_usage + + annual_usage['Costs'] = annual_usage['Usage'] * (unit_price) + + return annual_usage + def print_all_features(self): """ @@ -578,26 +929,22 @@ class BillDisaggregation(): r-squared of fit and type of usage. """ -# print('Heating load percentage is {:.1%}'.format(self.heating_load_proportion)) -# print('Cooling load percentage is {:.1%}'.format(self.cooling_load_proportion)) + print('Heating set point is {}'.format(self.heating_set_point)) + print('Cooling set point is {}'.format(self.cooling_set_point)) print('R-squared of fit is {}'.format(self.r_squared_of_fit)) print('Usage is {}'.format(self.usage)) -# @staticmethod -# def projection_figure(bill): -# plt.figure(figsize=(10,5)) -# x = pd.to_datetime(bill['Bill From Date']) -# y = bill['Usage'] -# plt.plot(x,y) -# plt.plot(x, (bill['Heating Usage'] + bill['Cooling Usage'] + bill['Other Usage'])) -# plt.plot(x, bill['Heating Usage']) -# plt.plot(x, bill['Cooling Usage']) -# plt.legend(['real consumption', 'prejected consumption','prejected heating', 'prejected cooling']) -# #plt.plot(x,(cleaned_bill['total_hdd']/cleaned_bill['Days In Bill'])) -# plt.show() - - - - - + @staticmethod + def projection_figure(bill): + '''ploat the disaggregated bill''' + + plt.figure(figsize=(10,5)) + x = pd.to_datetime(bill['Bill From Date']) + y = bill['Usage'] + plt.plot(x,y) + plt.plot(x, (bill['Heating Usage'] + bill['Cooling Usage'] + bill['Other Usage'])) + plt.plot(x, bill['Heating Usage']) + plt.plot(x, bill['Cooling Usage']) + plt.legend(['real consumption', 'prejected consumption','prejected heating', 'prejected cooling']) + plt.show() -- GitLab From 490178fab52ad3d298299e776ed74fa67a586115 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 6 Feb 2018 17:18:32 -0500 Subject: [PATCH 04/19] sort imports --- bpeng/bill/awesome_disaggregate.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index 43b83ff..c7bb4db 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -1,16 +1,16 @@ # ''' this file calcuate bill disagregation for multifamily buildings''' +import warnings from datetime import timedelta -import pandas as pd -import numpy as np -from sklearn import linear_model +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from dateutil import relativedelta from scipy.optimize import minimize +from sklearn import linear_model -import matplotlib.pyplot as plt -import warnings warnings.simplefilter('ignore') -from dateutil import relativedelta class BillDisaggregation(): -- GitLab From c419d9bd269359bb18732d5b619493efde279dad Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 6 Feb 2018 17:24:58 -0500 Subject: [PATCH 05/19] format the code --- bpeng/bill/awesome_disaggregate.py | 398 +++++++++++++++++------------ 1 file changed, 235 insertions(+), 163 deletions(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index c7bb4db..bc9da0a 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -32,6 +32,7 @@ class BillDisaggregation(): heating load of each month (return NaN if R-squared is low) """ + # pylint: disable=too-many-instance-attributes def __init__(self, bill, raw_daily_temp): @@ -67,7 +68,9 @@ class BillDisaggregation(): pd.DateFrame: Returns formatted daily temperature ''' - raw_daily_temp.rename(columns={'time':'date','value':'temperature'}, inplace = True) + raw_daily_temp.rename( + columns={'time': 'date', + 'value': 'temperature'}, inplace=True) raw_daily_temp['date'] = pd.to_datetime(raw_daily_temp['date']) raw_daily_temp['date'] = raw_daily_temp['date'].apply(lambda x: '/'.join([str(x.date().month),str(x.date().day), str(x.date().year)])) @@ -77,7 +80,6 @@ class BillDisaggregation(): return self.daily_temp - def bill_period_weather(self, bill_from_date, bill_end_date): ''' get the outdoor temperature date between two date, return a list @@ -91,8 +93,10 @@ class BillDisaggregation(): list: Returns a list of outdoor temperature for a period ''' - end_date_id = self.daily_temp[self.daily_temp.date == bill_end_date].index[0] - start_date_id = self.daily_temp[self.daily_temp.date == bill_from_date].index[0] + end_date_id = self.daily_temp[self.daily_temp.date == + bill_end_date].index[0] + start_date_id = self.daily_temp[self.daily_temp.date == + bill_from_date].index[0] return list(self.daily_temp['temperature'][start_date_id:end_date_id]) @staticmethod @@ -156,10 +160,8 @@ class BillDisaggregation(): upper_bound = quartile_3 + (iqr * 1.5) return [(x > upper_bound or x < lower_bound) for x in ys] - @staticmethod - def anomaly_point(alist,thresholds): - + def anomaly_point(alist, thresholds): ''' Find outlier and return its index @@ -176,11 +178,12 @@ class BillDisaggregation(): ''' amean = [] for x in range(len(alist)): - temp = np.hstack((alist[:(x)],alist[(x+1):])) + temp = np.hstack((alist[:(x)], alist[(x + 1):])) amean.append(temp.mean()) index = [] for x in range(len(alist)): - temp1 = abs(alist[x] - np.array(alist).mean())/np.array(alist).mean() + temp1 = abs(alist[x] - np.array(alist).mean()) / np.array( + alist).mean() index.append(temp1 < thresholds) return index @@ -189,14 +192,12 @@ class BillDisaggregation(): '''Return number of month in between two date ''' lastdate = last_date_bill - timedelta(last_date_bill.day) firstdate = first_date_bill + timedelta(days=32) - firstdate = firstdate.replace(day = 1) + firstdate = firstdate.replace(day=1) r = relativedelta.relativedelta(lastdate, firstdate) - num_month = r.years * 12 + r.months +1 - return(num_month) - + num_month = r.years * 12 + r.months + 1 + return (num_month) def bill_formating(self, raw_bill): - ''' Bill Cleaning Step 1: @@ -220,21 +221,27 @@ class BillDisaggregation(): ''' bill_copy = raw_bill.copy() - bill_copy['Bill From Date'] = pd.to_datetime(bill_copy['Bill From Date']) + bill_copy['Bill From Date'] = pd.to_datetime( + bill_copy['Bill From Date']) bill_copy['Bill From Date'] = bill_copy['Bill From Date'].apply(lambda x: '/'.join([str(x.date().month),str(x.date().day),\ str(x.date().year)])) bill_copy['Bill To Date'] = pd.to_datetime(bill_copy['Bill To Date']) bill_copy['Bill To Date'] = bill_copy['Bill To Date'].apply(lambda x: '/'.join([str(x.date().month),str(x.date().day),\ str(x.date().year)])) - bill_copy = bill_copy[['Bill From Date','Bill To Date','Usage','Days In Bill','Total Charge']] - bill_copy1 = bill_copy[pd.to_datetime(bill_copy['Bill To Date']) > pd.to_datetime(bill_copy['Bill From Date'])] - bill_copy1['Bill From Date'] = pd.to_datetime(bill_copy1['Bill From Date']) + bill_copy = bill_copy[[ + 'Bill From Date', 'Bill To Date', 'Usage', 'Days In Bill', + 'Total Charge' + ]] + bill_copy1 = bill_copy[pd.to_datetime(bill_copy['Bill To Date']) > + pd.to_datetime(bill_copy['Bill From Date'])] + bill_copy1['Bill From Date'] = pd.to_datetime( + bill_copy1['Bill From Date']) bill_copy2 = bill_copy1.sort_values('Bill From Date') bill_copy2['Bill To Date'] = pd.to_datetime(bill_copy2['Bill To Date']) bill_copy2 = bill_copy2.dropna() bill_copy2 = bill_copy2.drop_duplicates() - bill_copy2 = bill_copy2.reset_index(drop = True) + bill_copy2 = bill_copy2.reset_index(drop=True) if np.array(bill_copy2.shape)[0] == np.array(raw_bill.shape)[0]: bill_shape_change = 'False' @@ -242,13 +249,12 @@ class BillDisaggregation(): bill_formatted = bill_copy2 - self.unit_price = (sum(bill_formatted['Total Charge'])) / (sum(bill_formatted['Usage'])) + self.unit_price = (sum(bill_formatted['Total Charge'])) / ( + sum(bill_formatted['Usage'])) return bill_formatted, bill_shape_change - def bill_quality(self, bill_formatted): - ''' Bill Cleaning Step 2: @@ -267,26 +273,30 @@ class BillDisaggregation(): bill = pd.DataFrame(bill) total_rows = np.array(bill.shape)[0] -# timescale = pd.to_datetime(bill['Bill To Date'].iloc[total_rows-1])\ -# - pd.to_datetime(bill['Bill From Date'].iloc[0]) -# total_days_in_bill = timescale.days + # timescale = pd.to_datetime(bill['Bill To Date'].iloc[total_rows-1])\ + # - pd.to_datetime(bill['Bill From Date'].iloc[0]) + # total_days_in_bill = timescale.days days_in_bill = np.array(bill['Days In Bill']) #abnormal days in bill will return False - days_quality_index_inti = BillDisaggregation.outliers_iqr(list(days_in_bill)) - days_quality_index_recheck = np.array([x not in range(25,35) for x in days_in_bill]) - days_quality_index = list(np.array(days_quality_index_inti) * np.array(days_quality_index_recheck)) + days_quality_index_inti = BillDisaggregation.outliers_iqr( + list(days_in_bill)) + days_quality_index_recheck = np.array( + [x not in range(25, 35) for x in days_in_bill]) + days_quality_index = list( + np.array(days_quality_index_inti) * + np.array(days_quality_index_recheck)) days_abn_index = [] for x in range(len(days_quality_index)): if days_quality_index[x] == True: days_abn_index.append(x) - bill_quality = pd.DataFrame(data = days_abn_index, columns = ['index'] ) + bill_quality = pd.DataFrame(data=days_abn_index, columns=['index']) flag = [] for xx in range(len(days_abn_index)): - point_index = days_abn_index[xx-1] + point_index = days_abn_index[xx - 1] if days_in_bill[point_index] < days_in_bill.mean(): flag.append('short') elif days_in_bill[point_index] >= days_in_bill.mean(): @@ -321,39 +331,63 @@ class BillDisaggregation(): if (row_index != 0) & (row_index != bill_consi.index[-1]): - if bill_consi['Days In Bill'][int(row_index -1)] <= bill_consi['Days In Bill'][int(row_index +1)]: + if bill_consi['Days In Bill'][int( + row_index - 1)] <= bill_consi['Days In Bill'][int( + row_index + 1)]: #print('row index', row_index) - bill_consi['Bill To Date'][int(row_index - 1)] = bill_consi['Bill To Date'][int(row_index)] - bill_consi['Usage'][int(row_index - 1)] = bill_consi['Usage'][int(row_index - 1)] + bill_consi['Usage'][int(row_index)] - bill_consi['Days In Bill'][int(row_index - 1)] = bill_consi['Days In Bill'][int(row_index - 1)] + bill_consi['Days In Bill'][int(row_index)] + bill_consi['Bill To Date'][int( + row_index - 1)] = bill_consi['Bill To Date'][int( + row_index)] + bill_consi['Usage'][int( + row_index - 1)] = bill_consi['Usage'][int( + row_index - 1)] + bill_consi['Usage'][int( + row_index)] + bill_consi['Days In Bill'][int( + row_index - 1)] = bill_consi['Days In Bill'][int( + row_index - 1 + )] + bill_consi['Days In Bill'][int(row_index)] else: - bill_consi['Bill From Date'][int(row_index + 1)] = bill_consi['Bill To Date'][int(row_index)] - bill_consi['Usage'][int(row_index + 1)] = bill_consi['Usage'][int(row_index + 1)] + bill_consi['Usage'][int(row_index)] - bill_consi['Days In Bill'][int(row_index + 1)] = bill_consi['Days In Bill'][int(row_index + 1)] + bill_consi['Days In Bill'][int(row_index)] + bill_consi['Bill From Date'][int( + row_index + 1)] = bill_consi['Bill To Date'][int( + row_index)] + bill_consi['Usage'][int( + row_index + 1)] = bill_consi['Usage'][int( + row_index + 1)] + bill_consi['Usage'][int( + row_index)] + bill_consi['Days In Bill'][int( + row_index + 1)] = bill_consi['Days In Bill'][int( + row_index + 1 + )] + bill_consi['Days In Bill'][int(row_index)] if row_index == 0: - bill_consi['Bill From Date'][1] = bill_consi['Bill From Date'][0] - bill_consi['Usage'][1] = bill_consi['Usage'][0] + bill_consi['Usage'][1] - bill_consi['Days In Bill'][1] = bill_consi['Days In Bill'][0] + bill_consi['Days In Bill'][1] + bill_consi['Bill From Date'][1] = bill_consi[ + 'Bill From Date'][0] + bill_consi['Usage'][ + 1] = bill_consi['Usage'][0] + bill_consi['Usage'][1] + bill_consi['Days In Bill'][ + 1] = bill_consi['Days In Bill'][0] + bill_consi['Days In Bill'][1] if row_index == bill_consi.index[-1]: - bill_consi['Bill To Date'].iloc[-2] = bill_consi['Bill To Date'].iloc[-1] - bill_consi['Usage'].iloc[-2] = bill_consi['Usage'].iloc[-2] + bill_consi['Usage'].iloc[-1] - bill_consi['Days In Bill'].iloc[-2] = bill_consi['Days In Bill'].iloc[-1] + bill_consi['Days In Bill'].iloc[-2] + bill_consi['Bill To Date'].iloc[-2] = bill_consi[ + 'Bill To Date'].iloc[-1] + bill_consi['Usage'].iloc[ + -2] = bill_consi['Usage'].iloc[-2] + bill_consi['Usage'].iloc[-1] + bill_consi['Days In Bill'].iloc[ + -2] = bill_consi['Days In Bill'].iloc[-1] + bill_consi['Days In Bill'].iloc[-2] #drop the bills that with a billing period that is too short - which will change the index of the dataframe,I think it should #taken care with solutions of 'the billing period that is toooo long # or apply bill quality check again to identify the new index of the peroiod that is too long if len(bill_quality_short) != 0: - bill_consi = bill_consi.drop(bill_consi.index[list(bill_quality_short['index'])]) + bill_consi = bill_consi.drop( + bill_consi.index[list(bill_quality_short['index'])]) #bill_consi = bill_consi.reset_index(inplace = True) - bill_consi = bill_consi.reset_index(drop = False) + bill_consi = bill_consi.reset_index(drop=False) return bill_consi def regression_1(self, hp, cp, processed_bill): - ''' A linear regression model with heating and cooling set fixed @@ -375,9 +409,15 @@ class BillDisaggregation(): #changed 2/2/2018 - consumption = np.array(bill['Usage']/ bill['Days In Bill']) - ahdd = [list(BillDisaggregation.hdd(hp, xx) for xx in x) for x in bill['temperature']] - acdd = [list(BillDisaggregation.cdd(cp, xx) for xx in x) for x in bill['temperature']] + consumption = np.array(bill['Usage'] / bill['Days In Bill']) + ahdd = [ + list(BillDisaggregation.hdd(hp, xx) for xx in x) + for x in bill['temperature'] + ] + acdd = [ + list(BillDisaggregation.cdd(cp, xx) for xx in x) + for x in bill['temperature'] + ] #it should be billing period average hdd / days @@ -385,18 +425,23 @@ class BillDisaggregation(): daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) # set threshold that if the HDD/CDD is lower than a certain value, we set it to 0 - daily_hdd1 = np.array([BillDisaggregation.threshold(daily_hdd[x], 0.1) for x in range(len(daily_hdd))]) - daily_cdd1 = np.array([BillDisaggregation.threshold(daily_cdd[x], 0.1) for x in range(len(daily_cdd))]) + daily_hdd1 = np.array([ + BillDisaggregation.threshold(daily_hdd[x], 0.1) + for x in range(len(daily_hdd)) + ]) + daily_cdd1 = np.array([ + BillDisaggregation.threshold(daily_cdd[x], 0.1) + for x in range(len(daily_cdd)) + ]) - regression_temp = np.array([daily_hdd1,daily_cdd1]).T + regression_temp = np.array([daily_hdd1, daily_cdd1]).T regr_model = linear_model.LinearRegression() regr_model.fit(regression_temp, consumption) - score = regr_model.score(regression_temp, consumption) + score = regr_model.score(regression_temp, consumption) return regr_model, score, regression_temp - def summer_dhw(self, hp, abill): ''' This funcion uses summer month gas usage as base consumption for the year @@ -419,7 +464,8 @@ class BillDisaggregation(): ''' bill = abill.copy() - ahdd = [[BillDisaggregation.hdd(hp, xx) for xx in x] for x in bill['temperature']] + ahdd = [[BillDisaggregation.hdd(hp, xx) for xx in x] + for x in bill['temperature']] monthly_hdd = np.array([np.sum(ahdd[x]) for x in range(len(ahdd))]) daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) @@ -430,16 +476,18 @@ class BillDisaggregation(): dhw_only_consumption = bill[bill['dhw'] != 0]['dhw'] if len(dhw_only_consumption) > 0: - dhw_quality_index = BillDisaggregation.outliers_iqr(list(dhw_only_consumption)) #list + dhw_quality_index = BillDisaggregation.outliers_iqr( + list(dhw_only_consumption)) #list dhw_only_consumption_checked = [] for xx in range(len(dhw_only_consumption)): if dhw_quality_index[xx] == False: - dhw_only_consumption_checked.append(list(dhw_only_consumption)[xx]) + dhw_only_consumption_checked.append( + list(dhw_only_consumption)[xx]) daily_dhw = np.mean(dhw_only_consumption_checked) else: - daily_dhw = 0 + daily_dhw = 0 else: daily_dhw = 0 @@ -449,19 +497,16 @@ class BillDisaggregation(): # 2018/01/30 # Daily hdd makes more sense - regression_temp = daily_hdd.reshape(-1,1) + regression_temp = daily_hdd.reshape(-1, 1) consumption = (bill['Usage'] - bill['dhw']) / bill['Days In Bill'] regr_model = linear_model.LinearRegression() regr_model.fit(regression_temp, consumption) - score = regr_model.score(regression_temp, consumption) + score = regr_model.score(regression_temp, consumption) return regr_model, score, regression_temp, bill - - def main(self, usage = 'Unknown'): - - + def main(self, usage='Unknown'): """ Main function for the optimization and disaggregation @@ -484,7 +529,8 @@ class BillDisaggregation(): if any(i == 'short' for i in quality.flag): #any(quality.flag.astype(str) == 'long') - self.processed_bill = self.short_bill_consolidate(formatted_bill,quality) + self.processed_bill = self.short_bill_consolidate( + formatted_bill, quality) else: self.processed_bill = formatted_bill @@ -499,12 +545,14 @@ class BillDisaggregation(): regression_method = 1 if usage == 'Unknown': - opt = minimize(lambda x: -self.regression_1(x[0],x[1], self.processed_bill)[1], - (65,65), - method='nelder-mead', - options={'xtol': 1e-2, 'disp': False}) - - regr = self.regression_1(opt.x[0],opt.x[1],self.processed_bill) + opt = minimize( + lambda x: -self.regression_1(x[0], x[1], self.processed_bill)[1], + (65, 65), + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + regr = self.regression_1(opt.x[0], opt.x[1], self.processed_bill) regr_model = regr[0] heating_coef, cooling_coef = regr_model.coef_ @@ -521,22 +569,23 @@ class BillDisaggregation(): usage = 'Both Not' if usage == 'Both': - opt = minimize(lambda x: -self.regression_1(x[0],x[1], self.processed_bill)[1], - (65,65), - method='nelder-mead', - options={'xtol': 1e-2, 'disp': False}) - - regr = self.regression_1(opt.x[0],opt.x[1],self.processed_bill) + opt = minimize( + lambda x: -self.regression_1(x[0], x[1], self.processed_bill)[1], + (65, 65), + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + regr = self.regression_1(opt.x[0], opt.x[1], self.processed_bill) regr_model = regr[0] heating_coef, cooling_coef = regr_model.coef_ hddcdd = regr[2] heating_set_point = opt.x[0] cooling_set_point = opt.x[1] - #change accordingly for JOENYC buildings - if (heating_coef >0 ) & (cooling_coef <0): + if (heating_coef > 0) & (cooling_coef < 0): usage = 'Heating' cooling_coef = 0 elif (heating_coef <= 0) & (cooling_coef > 0): @@ -556,21 +605,23 @@ class BillDisaggregation(): cooling_coef = 0 else: #set the range of heating set point or cooling point - - if round(heating_set_point) in range(60, 95) and round(cooling_set_point) in range(55, 75): + if round(heating_set_point) in range( + 60, 95) and round(cooling_set_point) in range( + 55, 75): usage = 'Both' heating_coef = heating_coef cooling_coef = cooling_coef else: #using standard seting points to check the bill - regr = self.regression_1(72,65,self.processed_bill) + regr = self.regression_1(72, 65, self.processed_bill) regr_model = regr[0] heating_coef, cooling_coef = regr_model.coef_ hddcdd = regr[2] heating_set_point = opt.x[0] cooling_set_point = opt.x[1] - if (heating_coef >0 ) & (cooling_coef <0): + if (heating_coef > 0) & (cooling_coef < 0): usage = 'Heating' cooling_coef = 0 elif (heating_coef <= 0) & (cooling_coef > 0): @@ -587,37 +638,39 @@ class BillDisaggregation(): else: usage = 'Both' - - - - if usage == 'Heating': - opt_1 = minimize(lambda x: -self.regression_1(x,300, self.processed_bill)[1], - 65, - method='nelder-mead', - options={'xtol': 1e-2, 'disp': False}) - - opt_2 = minimize(lambda x: -self.summer_dhw(x,self.processed_bill)[1], - 65, - method='nelder-mead', - options={'xtol': 1e-2, 'disp': False}) + opt_1 = minimize( + lambda x: -self.regression_1(x, 300, self.processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + opt_2 = minimize( + lambda x: -self.summer_dhw(x, self.processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) if (-opt_2.fun > 0.9) and (round(opt_2.x[0]) in range(64, 85)): opt = opt_2 heating_set_point = opt.x[0] cooling_set_point = np.NaN - regr = self.summer_dhw(opt.x[0],self.processed_bill) + regr = self.summer_dhw(opt.x[0], self.processed_bill) regr_model = regr[0] hdd = regr[2] hdd_transit = [hdd[x][0] for x in range(len(hdd))] - hddcdd = np.array([[hdd_transit[x], 0] for x in range(len(hdd))]) + hddcdd = np.array([[hdd_transit[x], 0] + for x in range(len(hdd))]) regression_method = 2 else: if round(opt_1.x[0]) in range(60, 95): opt = opt_1 heating_set_point = opt.x[0] cooling_set_point = np.NaN - regr = self.regression_1(heating_set_point,300,self.processed_bill) + regr = self.regression_1(heating_set_point, 300, + self.processed_bill) regr_model = regr[0] hddcdd = regr[2] heating_coef = regr_model.coef_ @@ -626,20 +679,22 @@ class BillDisaggregation(): #legit heating set-point 72'F heating_set_point = 72 cooling_set_point = np.NaN - regr = self.regression_1(heating_set_point,300,self.processed_bill) + regr = self.regression_1(heating_set_point, 300, + self.processed_bill) regr_model = regr[0] hddcdd = regr[2] heating_coef = regr_model.coef_ cooling_coef = 0 - if usage == 'Cooling': - opt = minimize(lambda x: -self.regression_1(x,300, self.processed_bill)[1], - 65, - method='nelder-mead', - options={'xtol': 1e-2, 'disp': False}) - regr = self.regression_1(opt.x[0],300, self.processed_bill) + opt = minimize( + lambda x: -self.regression_1(x, 300, self.processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + regr = self.regression_1(opt.x[0], 300, self.processed_bill) regr_model = regr[0] hddcdd = regr[2] cooling_set_point = opt.x[0] @@ -647,7 +702,6 @@ class BillDisaggregation(): self.usage = usage - if self.usage == 'Both Not': self.heating_consumption_pred = self.processed_bill['Usage'] * 0 self.cooling_consumption_pred = self.processed_bill['Usage'] * 0 @@ -660,20 +714,25 @@ class BillDisaggregation(): else: self.regression_method = regression_method - if self.regression_method == 1: - self.hddcdd = np.array(pd.DataFrame(hddcdd).mul(list(self.processed_bill['Days In Bill']), axis = 0)) + self.hddcdd = np.array( + pd.DataFrame(hddcdd).mul( + list(self.processed_bill['Days In Bill']), axis=0)) if regr[1] > 0.5: self.regr_model = regr_model - self.heating_consumption_pred= np.array(self.hddcdd[:, 0]) * regr_model.coef_[0] - self.cooling_consumption_pred = np.array(self.hddcdd[:, 1]) * regr_model.coef_[1] + self.heating_consumption_pred = np.array( + self.hddcdd[:, 0]) * regr_model.coef_[0] + self.cooling_consumption_pred = np.array( + self.hddcdd[:, 1]) * regr_model.coef_[1] if self.regr_model.intercept_ < 0: - self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + self.others_consumption_pred = 0 * self.processed_bill[ + 'Days In Bill'] else: - self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill'] + self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill[ + 'Days In Bill'] real_sum = np.array(self.processed_bill['Usage']) predict_sum = self.heating_consumption_pred + self.cooling_consumption_pred + self.others_consumption_pred @@ -685,8 +744,10 @@ class BillDisaggregation(): # if diff[i] > 0: # self.others_consumption_pred[i] = diff[i] else: - self.heating_consumption_pred = self.processed_bill['Days In Bill'] * 0 - self.cooling_consumption_pred = self.processed_bill['Days In Bill'] * 0 + self.heating_consumption_pred = self.processed_bill[ + 'Days In Bill'] * 0 + self.cooling_consumption_pred = self.processed_bill[ + 'Days In Bill'] * 0 self.others_consumption_pred = self.processed_bill['Usage'] self.regression_method = 0 self.hddcdd = np.zeros((len(self.processed_bill), 2)) @@ -694,31 +755,32 @@ class BillDisaggregation(): heating_set_point = np.NaN self.usage = 'Both Not' - elif self.regression_method == 2: - self.hddcdd = np.array(pd.DataFrame(hddcdd).mul(list(self.processed_bill['Days In Bill']), axis = 0)) + self.hddcdd = np.array( + pd.DataFrame(hddcdd).mul( + list(self.processed_bill['Days In Bill']), axis=0)) #self.hddcdd = hddcdd self.regr_model = regr_model - self.heating_consumption_pred = np.array(self.hddcdd[:, 0]) * self.regr_model.coef_[0] + self.heating_consumption_pred = np.array( + self.hddcdd[:, 0]) * self.regr_model.coef_[0] self.cooling_consumption_pred = np.array(self.hddcdd[:, 0]) * 0 if self.regr_model.intercept_ < 0: - self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + regr[3]['dhw'] + self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + regr[3]['dhw'] else: self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill']\ + regr[3]['dhw'] #print('dhw', regr[3]) - - bill_cp = self.processed_bill.copy() - bill_cp = self.processed_bill[['Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage']] + bill_cp = self.processed_bill[[ + 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage' + ]] bill_cp['Heating Usage'] = self.heating_consumption_pred bill_cp['Cooling Usage'] = self.cooling_consumption_pred bill_cp['Other Usage'] = self.others_consumption_pred - if self.usage == 'Both Not': self.r_squared_of_fit = np.NaN #self.h = np.NaN @@ -732,7 +794,6 @@ class BillDisaggregation(): self.output_table = bill_cp def benchmarking_output(self): - ''' output perimeters that related with evaluating the bills ''' @@ -742,8 +803,7 @@ class BillDisaggregation(): bill_last_date = pd.to_datetime(test['Bill To Date']).iloc[-1] days_in_bill = (bill_last_date - bill_start_date).days - - if days_in_bill >=350: + if days_in_bill >= 350: usage = self.usage r = self.r_squared_of_fit r_method = self.regression_method @@ -752,9 +812,8 @@ class BillDisaggregation(): cooling = sum(test['Cooling Usage']) others = sum(test['Other Usage']) diff = (consumption - heating - cooling - others) / consumption - hdd = sum(self.hddcdd[:,0]) - cdd = sum(self.hddcdd[:,1]) - + hdd = sum(self.hddcdd[:, 0]) + cdd = sum(self.hddcdd[:, 1]) else: consumption = np.NaN @@ -770,10 +829,7 @@ class BillDisaggregation(): return usage, r, r_method, consumption, heating, cooling, others, diff, hdd, cdd, days_in_bill, self.heating_set_point, self.cooling_set_point - - def output_to_month(self, last_day_of_bill, hp, cp, number_of_month): - """ Transfrom period-wise output to month-wise output @@ -789,55 +845,67 @@ class BillDisaggregation(): """ - last_dates = [] first_dates = [] - lastdate = last_day_of_bill-timedelta(last_date_of_bill.day) + lastdate = last_day_of_bill - timedelta(last_date_of_bill.day) #cosntruct a new dataframe with bills from the first to last day for each month - for i in range(0,number_of_month): + for i in range(0, number_of_month): last_dates.append(lastdate) - first_dates.append(lastdate.replace(day = 1)) - lastdate = first_dates[i] -timedelta(1) + first_dates.append(lastdate.replace(day=1)) + lastdate = first_dates[i] - timedelta(1) monthly_output_table = pd.DataFrame(columns=['Bill From Date','Bill To Date', 'Days In Bill',\ 'Heating Usage','Cooling Usage','Other Usage']) monthly_output_table['Bill From Date'] = first_dates monthly_output_table['Bill To Date'] = last_dates - monthly_output_table['Days In Bill'] = monthly_output_table['Bill To Date'] - monthly_output_table['Bill From Date'] - monthly_output_table['Days In Bill'] = monthly_output_table['Days In Bill'].apply(lambda x: x.days) + 1 - monthly_output_table['Month'] = monthly_output_table['Bill From Date'].apply(lambda x: x.month) + monthly_output_table[ + 'Days In Bill'] = monthly_output_table['Bill To Date'] - monthly_output_table['Bill From Date'] + monthly_output_table[ + 'Days In Bill'] = monthly_output_table['Days In Bill'].apply( + lambda x: x.days) + 1 + monthly_output_table['Month'] = monthly_output_table[ + 'Bill From Date'].apply(lambda x: x.month) monthly_output_table['temperature'] = [\ self.bill_period_weather(x,y) \ for x, y in zip(monthly_output_table['Bill From Date'], \ monthly_output_table['Bill To Date']) ] - hdd = [list(BillDisaggregation.hdd(hp, xx) for xx in x) for x in monthly_output_table['temperature']] - cdd = [list(BillDisaggregation.cdd(cp, xx) for xx in x) for x in monthly_output_table['temperature']] + hdd = [ + list(BillDisaggregation.hdd(hp, xx) for xx in x) + for x in monthly_output_table['temperature'] + ] + cdd = [ + list(BillDisaggregation.cdd(cp, xx) for xx in x) + for x in monthly_output_table['temperature'] + ] monthly_hdd = np.array([np.sum(hdd[x]) for x in range(len(hdd))]) monthly_cdd = np.array([np.sum(cdd[x]) for x in range(len(cdd))]) monthly_output_table['HDD'] = monthly_hdd monthly_output_table['CDD'] = monthly_cdd - - per_hdd = self.benchmarking_output()[4]/self.benchmarking_output()[8] + per_hdd = self.benchmarking_output()[4] / self.benchmarking_output()[8] if np.isnan(per_hdd): per_hdd = 0 - per_cdd = self.benchmarking_output()[5]/self.benchmarking_output()[9] + per_cdd = self.benchmarking_output()[5] / self.benchmarking_output()[9] if np.isnan(per_cdd): per_cdd = 0 - per_day = self.benchmarking_output()[6]/self.benchmarking_output()[10] + per_day = self.benchmarking_output()[6] / self.benchmarking_output()[ + 10] if np.isnan(per_day): per_day = 0 - monthly_output_table['Heating Usage'] = monthly_output_table['HDD'] * per_hdd - monthly_output_table['Cooling Usage'] = monthly_output_table['CDD'] * per_cdd - monthly_output_table['Other Usage'] = monthly_output_table['Days In Bill'] * per_day + monthly_output_table['Heating Usage'] = monthly_output_table[ + 'HDD'] * per_hdd + monthly_output_table['Cooling Usage'] = monthly_output_table[ + 'CDD'] * per_cdd + monthly_output_table['Other Usage'] = monthly_output_table[ + 'Days In Bill'] * per_day monthly_output_table['Usage'] = monthly_output_table['Heating Usage']\ + monthly_output_table['Cooling Usage'] + monthly_output_table['Other Usage'] @@ -851,8 +919,7 @@ class BillDisaggregation(): # this_lastdate = self.output_table['Bill To Date'].iloc[-1] - timedelta(self.output_table['Bill To Date'].iloc[-1].day) # self.most_recent_monthly_output = self.output_to_month(this_lastdate,self.heating_set_point,self.cooling_set_point) - def non_weahter_related_breakdown(self,end_uses, monthly_output_table): - + def non_weahter_related_breakdown(self, end_uses, monthly_output_table): ''' breakdown the non_weather_related_usage @@ -868,14 +935,17 @@ class BillDisaggregation(): ''' monthly_usages = monthly_output_table.copy() - eu = pd.DataFrame(list(end_uses.items()), columns=['end use', 'percentage']) + eu = pd.DataFrame( + list(end_uses.items()), columns=['end use', 'percentage']) for i in range(len(eu)): name_of_the_column = eu['end use'].iloc[i] value_of_the_column = eu['percentage'].iloc[i] - monthly_usages[name_of_the_column] = monthly_usages['Other Usage'] * value_of_the_column + monthly_usages[name_of_the_column] = monthly_usages[ + 'Other Usage'] * value_of_the_column - if sum(eu['percentage']) !=1: - monthly_usages['Miscellaneous'] = monthly_usages['Other Usage'] * (1 - sum(eu['percentage'])) + if sum(eu['percentage']) != 1: + monthly_usages['Miscellaneous'] = monthly_usages['Other Usage'] * ( + 1 - sum(eu['percentage'])) return monthly_usages @@ -894,14 +964,15 @@ class BillDisaggregation(): ''' - annual_usage = pd.DataFrame(columns = ['End Use', 'Usage', 'Costs']) + annual_usage = pd.DataFrame(columns=['End Use', 'Usage', 'Costs']) end_use = ['Heating Usage', 'Cooling Usage'] x = bill_breakdown number_of_columns = len(x.columns) - eu = pd.DataFrame(list(end_uses.items()), columns=['end use', 'percentage']) + eu = pd.DataFrame( + list(end_uses.items()), columns=['end use', 'percentage']) for i in range(len(eu)): name_of_end_use = eu['end use'].iloc[i] @@ -912,7 +983,6 @@ class BillDisaggregation(): annual_usage['End Use'] = end_use - for j in range(len(annual_usage)): temp = x[annual_usage['End Use'].iloc[j]] temp_usage = sum(temp) @@ -922,7 +992,6 @@ class BillDisaggregation(): return annual_usage - def print_all_features(self): """ print the features heating load percentage, cooling load percentage, @@ -934,17 +1003,20 @@ class BillDisaggregation(): print('R-squared of fit is {}'.format(self.r_squared_of_fit)) print('Usage is {}'.format(self.usage)) - @staticmethod def projection_figure(bill): '''ploat the disaggregated bill''' - plt.figure(figsize=(10,5)) + plt.figure(figsize=(10, 5)) x = pd.to_datetime(bill['Bill From Date']) y = bill['Usage'] - plt.plot(x,y) - plt.plot(x, (bill['Heating Usage'] + bill['Cooling Usage'] + bill['Other Usage'])) + plt.plot(x, y) + plt.plot(x, (bill['Heating Usage'] + bill['Cooling Usage'] + + bill['Other Usage'])) plt.plot(x, bill['Heating Usage']) plt.plot(x, bill['Cooling Usage']) - plt.legend(['real consumption', 'prejected consumption','prejected heating', 'prejected cooling']) + plt.legend([ + 'real consumption', 'prejected consumption', 'prejected heating', + 'prejected cooling' + ]) plt.show() -- GitLab From f14989bbe06bdee9ee8d47c5f6bc1b9e4d4a3ccf Mon Sep 17 00:00:00 2001 From: Alessandro DiMarco Date: Tue, 6 Feb 2018 17:40:14 -0500 Subject: [PATCH 06/19] Fix commented out code in bill --- bpeng/bill/awesome_disaggregate.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index bc9da0a..03fdc8b 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -739,10 +739,10 @@ class BillDisaggregation(): diff = real_sum - predict_sum -# if self.regr_model.intercept_ < 0: -# for i in range(len(diff)): -# if diff[i] > 0: -# self.others_consumption_pred[i] = diff[i] + # if self.regr_model.intercept_ < 0: + # for i in range(len(diff)): + # if diff[i] > 0: + # self.others_consumption_pred[i] = diff[i] else: self.heating_consumption_pred = self.processed_bill[ 'Days In Bill'] * 0 -- GitLab From e6d0d0929bf9955d782493d97ae88ff7b555c5b4 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 6 Feb 2018 17:55:38 -0500 Subject: [PATCH 07/19] fix bugs --- bpeng/bill/awesome_disaggregate.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index 03fdc8b..45c6e75 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -829,7 +829,7 @@ class BillDisaggregation(): return usage, r, r_method, consumption, heating, cooling, others, diff, hdd, cdd, days_in_bill, self.heating_set_point, self.cooling_set_point - def output_to_month(self, last_day_of_bill, hp, cp, number_of_month): + def output_to_month(self, last_date_of_bill, hp, cp, number_of_month): """ Transfrom period-wise output to month-wise output @@ -848,7 +848,7 @@ class BillDisaggregation(): last_dates = [] first_dates = [] - lastdate = last_day_of_bill - timedelta(last_date_of_bill.day) + lastdate = last_date_of_bill - timedelta(last_date_of_bill.day) #cosntruct a new dataframe with bills from the first to last day for each month @@ -910,12 +910,12 @@ class BillDisaggregation(): monthly_output_table['Usage'] = monthly_output_table['Heating Usage']\ + monthly_output_table['Cooling Usage'] + monthly_output_table['Other Usage'] - monthly_output = monthly_output_table[['Month','Bill From Date','Bill To Date', 'Days In Bill',\ 'Heating Usage','Cooling Usage','Other Usage','HDD','CDD']] return monthly_output + # this_lastdate = self.output_table['Bill To Date'].iloc[-1] - timedelta(self.output_table['Bill To Date'].iloc[-1].day) # self.most_recent_monthly_output = self.output_to_month(this_lastdate,self.heating_set_point,self.cooling_set_point) @@ -949,8 +949,7 @@ class BillDisaggregation(): return monthly_usages - @staticmethod - def annual_usage_costs(annual_bill_breakdown, end_uses): + def annual_usage_costs(self, annual_bill_breakdown, end_uses): ''' Calcuate annual usage and costs for each end use @@ -968,7 +967,7 @@ class BillDisaggregation(): end_use = ['Heating Usage', 'Cooling Usage'] - x = bill_breakdown + x = annual_bill_breakdown number_of_columns = len(x.columns) eu = pd.DataFrame( @@ -988,7 +987,7 @@ class BillDisaggregation(): temp_usage = sum(temp) annual_usage['Usage'].iloc[j] = temp_usage - annual_usage['Costs'] = annual_usage['Usage'] * (unit_price) + annual_usage['Costs'] = annual_usage['Usage'] * (self.unit_price) return annual_usage -- GitLab From af1ed50ee37b8eef3bd4db0fdac763ce623031ba Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 6 Feb 2018 18:00:38 -0500 Subject: [PATCH 08/19] Reformat code --- bpeng/bill/awesome_disaggregate.py | 76 +++++++++++++++--------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index 45c6e75..45a151a 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -1,4 +1,4 @@ -# ''' this file calcuate bill disagregation for multifamily buildings''' +"""This file calcuate bill disagregation for multifamily buildings""" import warnings from datetime import timedelta @@ -72,8 +72,8 @@ class BillDisaggregation(): columns={'time': 'date', 'value': 'temperature'}, inplace=True) raw_daily_temp['date'] = pd.to_datetime(raw_daily_temp['date']) - raw_daily_temp['date'] = raw_daily_temp['date'].apply(lambda x: '/'.join([str(x.date().month),str(x.date().day), - str(x.date().year)])) + raw_daily_temp['date'] = raw_daily_temp['date'].apply(lambda x: '/'.join([str(x.date().month), str(x.date().day), + str(x.date().year)])) daily_temp = raw_daily_temp daily_temp['date'] = pd.to_datetime(daily_temp['date']) self.daily_temp = daily_temp @@ -223,11 +223,11 @@ class BillDisaggregation(): bill_copy = raw_bill.copy() bill_copy['Bill From Date'] = pd.to_datetime( bill_copy['Bill From Date']) - bill_copy['Bill From Date'] = bill_copy['Bill From Date'].apply(lambda x: '/'.join([str(x.date().month),str(x.date().day),\ - str(x.date().year)])) + bill_copy['Bill From Date'] = bill_copy['Bill From Date'].apply(lambda x: '/'.join([str(x.date().month), str(x.date().day), + str(x.date().year)])) bill_copy['Bill To Date'] = pd.to_datetime(bill_copy['Bill To Date']) - bill_copy['Bill To Date'] = bill_copy['Bill To Date'].apply(lambda x: '/'.join([str(x.date().month),str(x.date().day),\ - str(x.date().year)])) + bill_copy['Bill To Date'] = bill_copy['Bill To Date'].apply(lambda x: '/'.join([str(x.date().month), str(x.date().day), + str(x.date().year)])) bill_copy = bill_copy[[ 'Bill From Date', 'Bill To Date', 'Usage', 'Days In Bill', @@ -278,7 +278,7 @@ class BillDisaggregation(): # total_days_in_bill = timescale.days days_in_bill = np.array(bill['Days In Bill']) - #abnormal days in bill will return False + # abnormal days in bill will return False days_quality_index_inti = BillDisaggregation.outliers_iqr( list(days_in_bill)) days_quality_index_recheck = np.array( @@ -322,7 +322,7 @@ class BillDisaggregation(): ''' bill_quality_short = bill_quality[bill_quality['flag'] == 'short'] bill_consi = bill_formatted.copy() - #consolidate the billing period that is too short compare to others + # consolidate the billing period that is too short compare to others for xxx in range(len(bill_quality_short)): @@ -374,8 +374,8 @@ class BillDisaggregation(): -2] = bill_consi['Usage'].iloc[-2] + bill_consi['Usage'].iloc[-1] bill_consi['Days In Bill'].iloc[ -2] = bill_consi['Days In Bill'].iloc[-1] + bill_consi['Days In Bill'].iloc[-2] - #drop the bills that with a billing period that is too short - which will change the index of the dataframe,I think it should - #taken care with solutions of 'the billing period that is toooo long + # drop the bills that with a billing period that is too short - which will change the index of the dataframe,I think it should + # taken care with solutions of 'the billing period that is toooo long # or apply bill quality check again to identify the new index of the peroiod that is too long if len(bill_quality_short) != 0: @@ -407,7 +407,7 @@ class BillDisaggregation(): bill = processed_bill.copy() - #changed 2/2/2018 + # changed 2/2/2018 consumption = np.array(bill['Usage'] / bill['Days In Bill']) ahdd = [ @@ -419,7 +419,7 @@ class BillDisaggregation(): for x in bill['temperature'] ] - #it should be billing period average hdd / days + # it should be billing period average hdd / days daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) @@ -469,7 +469,7 @@ class BillDisaggregation(): monthly_hdd = np.array([np.sum(ahdd[x]) for x in range(len(ahdd))]) daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) - #daily dhw usage + # daily dhw usage bill['dhw'] = bill['Usage'] / bill['Days In Bill'] * (daily_hdd <= 0.1) if len([*filter(lambda x: x >= 18, list(bill['Days In Bill']))]) > 0: @@ -477,7 +477,7 @@ class BillDisaggregation(): if len(dhw_only_consumption) > 0: dhw_quality_index = BillDisaggregation.outliers_iqr( - list(dhw_only_consumption)) #list + list(dhw_only_consumption)) # list dhw_only_consumption_checked = [] for xx in range(len(dhw_only_consumption)): @@ -534,11 +534,11 @@ class BillDisaggregation(): else: self.processed_bill = formatted_bill - self.processed_bill['temperature'] = [\ - self.bill_period_weather(x,y) \ - for x, y in zip(self.processed_bill['Bill From Date'], \ - self.processed_bill['Bill To Date']) - ] + self.processed_bill['temperature'] = [ + self.bill_period_weather(x, y) + for x, y in zip(self.processed_bill['Bill From Date'], + self.processed_bill['Bill To Date']) + ] self.processed_bill = self.processed_bill.sort_values('Bill From Date') @@ -583,7 +583,7 @@ class BillDisaggregation(): heating_set_point = opt.x[0] cooling_set_point = opt.x[1] - #change accordingly for JOENYC buildings + # change accordingly for JOENYC buildings if (heating_coef > 0) & (cooling_coef < 0): usage = 'Heating' @@ -596,7 +596,7 @@ class BillDisaggregation(): heating_coef = 0 cooling_coef = 0 - #changes on Jan 17th 2018 + # changes on Jan 17th 2018 # please futher check with more bills elif (heating_coef > 0) & (cooling_coef > 0): @@ -604,7 +604,7 @@ class BillDisaggregation(): usage = 'Heating' cooling_coef = 0 else: - #set the range of heating set point or cooling point - + # set the range of heating set point or cooling point - if round(heating_set_point) in range( 60, 95) and round(cooling_set_point) in range( 55, 75): @@ -613,7 +613,7 @@ class BillDisaggregation(): cooling_coef = cooling_coef else: - #using standard seting points to check the bill + # using standard seting points to check the bill regr = self.regression_1(72, 65, self.processed_bill) regr_model = regr[0] heating_coef, cooling_coef = regr_model.coef_ @@ -676,7 +676,7 @@ class BillDisaggregation(): heating_coef = regr_model.coef_ cooling_coef = 0 else: - #legit heating set-point 72'F + # legit heating set-point 72'F heating_set_point = 72 cooling_set_point = np.NaN regr = self.regression_1(heating_set_point, 300, @@ -769,7 +769,7 @@ class BillDisaggregation(): self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + regr[3]['dhw'] else: self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill']\ - + regr[3]['dhw'] + + regr[3]['dhw'] #print('dhw', regr[3]) bill_cp = self.processed_bill.copy() @@ -788,7 +788,7 @@ class BillDisaggregation(): self.r_squared_of_fit = regr[1] #self.set_points = opt.x - #update 2018/01/17 + # update 2018/01/17 self.heating_set_point = heating_set_point self.cooling_set_point = cooling_set_point self.output_table = bill_cp @@ -850,15 +850,15 @@ class BillDisaggregation(): lastdate = last_date_of_bill - timedelta(last_date_of_bill.day) - #cosntruct a new dataframe with bills from the first to last day for each month + # cosntruct a new dataframe with bills from the first to last day for each month for i in range(0, number_of_month): last_dates.append(lastdate) first_dates.append(lastdate.replace(day=1)) lastdate = first_dates[i] - timedelta(1) - monthly_output_table = pd.DataFrame(columns=['Bill From Date','Bill To Date', 'Days In Bill',\ - 'Heating Usage','Cooling Usage','Other Usage']) + monthly_output_table = pd.DataFrame(columns=['Bill From Date', 'Bill To Date', 'Days In Bill', + 'Heating Usage', 'Cooling Usage', 'Other Usage']) monthly_output_table['Bill From Date'] = first_dates monthly_output_table['Bill To Date'] = last_dates @@ -869,11 +869,11 @@ class BillDisaggregation(): lambda x: x.days) + 1 monthly_output_table['Month'] = monthly_output_table[ 'Bill From Date'].apply(lambda x: x.month) - monthly_output_table['temperature'] = [\ - self.bill_period_weather(x,y) \ - for x, y in zip(monthly_output_table['Bill From Date'], \ - monthly_output_table['Bill To Date']) - ] + monthly_output_table['temperature'] = [ + self.bill_period_weather(x, y) + for x, y in zip(monthly_output_table['Bill From Date'], + monthly_output_table['Bill To Date']) + ] hdd = [ list(BillDisaggregation.hdd(hp, xx) for xx in x) @@ -908,10 +908,10 @@ class BillDisaggregation(): 'Days In Bill'] * per_day monthly_output_table['Usage'] = monthly_output_table['Heating Usage']\ - + monthly_output_table['Cooling Usage'] + monthly_output_table['Other Usage'] + + monthly_output_table['Cooling Usage'] + monthly_output_table['Other Usage'] - monthly_output = monthly_output_table[['Month','Bill From Date','Bill To Date', 'Days In Bill',\ - 'Heating Usage','Cooling Usage','Other Usage','HDD','CDD']] + monthly_output = monthly_output_table[['Month', 'Bill From Date', 'Bill To Date', 'Days In Bill', + 'Heating Usage', 'Cooling Usage', 'Other Usage', 'HDD', 'CDD']] return monthly_output -- GitLab From 3c05cb6982ce97f2fcfe388579d3cb39441a5097 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 6 Feb 2018 18:03:17 -0500 Subject: [PATCH 09/19] Add spacing --- bpeng/bill/awesome_disaggregate.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index 45a151a..264c271 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -36,7 +36,7 @@ class BillDisaggregation(): # pylint: disable=too-many-instance-attributes def __init__(self, bill, raw_daily_temp): - #self.account_info = account_info + # self.account_info = account_info self.bill = bill self.raw_daily_temp = raw_daily_temp self.processed_bill = None @@ -223,10 +223,12 @@ class BillDisaggregation(): bill_copy = raw_bill.copy() bill_copy['Bill From Date'] = pd.to_datetime( bill_copy['Bill From Date']) - bill_copy['Bill From Date'] = bill_copy['Bill From Date'].apply(lambda x: '/'.join([str(x.date().month), str(x.date().day), + bill_copy['Bill From Date'] = bill_copy['Bill From Date'].apply(lambda x: '/'.join([str(x.date().month), + str(x.date().day), str(x.date().year)])) bill_copy['Bill To Date'] = pd.to_datetime(bill_copy['Bill To Date']) - bill_copy['Bill To Date'] = bill_copy['Bill To Date'].apply(lambda x: '/'.join([str(x.date().month), str(x.date().day), + bill_copy['Bill To Date'] = bill_copy['Bill To Date'].apply(lambda x: '/'.join([str(x.date().month), + str(x.date().day), str(x.date().year)])) bill_copy = bill_copy[[ -- GitLab From 1e61416da336fa074c9b8dda78e10c490131328b Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 6 Feb 2018 18:18:17 -0500 Subject: [PATCH 10/19] fix formatting problems --- bpeng/bill/awesome_disaggregate.py | 55 +++++++++++++----------------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index 264c271..2dd8873 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -72,7 +72,8 @@ class BillDisaggregation(): columns={'time': 'date', 'value': 'temperature'}, inplace=True) raw_daily_temp['date'] = pd.to_datetime(raw_daily_temp['date']) - raw_daily_temp['date'] = raw_daily_temp['date'].apply(lambda x: '/'.join([str(x.date().month), str(x.date().day), + raw_daily_temp['date'] = raw_daily_temp['date'].apply(lambda x: '/'.join([str(x.date().month), + str(x.date().day), str(x.date().year)])) daily_temp = raw_daily_temp daily_temp['date'] = pd.to_datetime(daily_temp['date']) @@ -274,10 +275,10 @@ class BillDisaggregation(): bill = bill_formatted.copy() bill = pd.DataFrame(bill) - total_rows = np.array(bill.shape)[0] - # timescale = pd.to_datetime(bill['Bill To Date'].iloc[total_rows-1])\ + # total_rows = np.array(bill.shape)[0] + # timescale = pd.to_datetime(bill['Bill To Date'].iloc[total_rows-1])\ # - pd.to_datetime(bill['Bill From Date'].iloc[0]) - # total_days_in_bill = timescale.days + # total_days_in_bill = timescale.days days_in_bill = np.array(bill['Days In Bill']) # abnormal days in bill will return False @@ -291,7 +292,7 @@ class BillDisaggregation(): days_abn_index = [] for x in range(len(days_quality_index)): - if days_quality_index[x] == True: + if days_quality_index[x]: days_abn_index.append(x) bill_quality = pd.DataFrame(data=days_abn_index, columns=['index']) @@ -336,7 +337,7 @@ class BillDisaggregation(): if bill_consi['Days In Bill'][int( row_index - 1)] <= bill_consi['Days In Bill'][int( row_index + 1)]: - #print('row index', row_index) + bill_consi['Bill To Date'][int( row_index - 1)] = bill_consi['Bill To Date'][int( row_index)] @@ -376,15 +377,12 @@ class BillDisaggregation(): -2] = bill_consi['Usage'].iloc[-2] + bill_consi['Usage'].iloc[-1] bill_consi['Days In Bill'].iloc[ -2] = bill_consi['Days In Bill'].iloc[-1] + bill_consi['Days In Bill'].iloc[-2] - # drop the bills that with a billing period that is too short - which will change the index of the dataframe,I think it should - # taken care with solutions of 'the billing period that is toooo long - # or apply bill quality check again to identify the new index of the peroiod that is too long if len(bill_quality_short) != 0: bill_consi = bill_consi.drop( bill_consi.index[list(bill_quality_short['index'])]) - #bill_consi = bill_consi.reset_index(inplace = True) + # bill_consi = bill_consi.reset_index(inplace = True) bill_consi = bill_consi.reset_index(drop=False) return bill_consi @@ -468,7 +466,7 @@ class BillDisaggregation(): bill = abill.copy() ahdd = [[BillDisaggregation.hdd(hp, xx) for xx in x] for x in bill['temperature']] - monthly_hdd = np.array([np.sum(ahdd[x]) for x in range(len(ahdd))]) + # monthly_hdd = np.array([np.sum(ahdd[x]) for x in range(len(ahdd))]) daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) # daily dhw usage @@ -483,7 +481,7 @@ class BillDisaggregation(): dhw_only_consumption_checked = [] for xx in range(len(dhw_only_consumption)): - if dhw_quality_index[xx] == False: + if not dhw_quality_index[xx]: dhw_only_consumption_checked.append( list(dhw_only_consumption)[xx]) @@ -526,11 +524,11 @@ class BillDisaggregation(): """ self.daily_temp = self.weather_cleaning(self.raw_daily_temp) - formatted_bill, shape = self.bill_formating(self.bill) + formatted_bill, shape = self.bill_formating(self.bill) # pylint: disable=unused-variable quality = self.bill_quality(formatted_bill) if any(i == 'short' for i in quality.flag): - #any(quality.flag.astype(str) == 'long') + # any(quality.flag.astype(str) == 'long') self.processed_bill = self.short_bill_consolidate( formatted_bill, quality) else: @@ -736,15 +734,12 @@ class BillDisaggregation(): self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill[ 'Days In Bill'] - real_sum = np.array(self.processed_bill['Usage']) - predict_sum = self.heating_consumption_pred + self.cooling_consumption_pred + self.others_consumption_pred + # real_sum = np.array(self.processed_bill['Usage']) + # predict_sum = self.heating_consumption_pred + self.cooling_consumption_pred + \ + # self.others_consumption_pred - diff = real_sum - predict_sum + # diff = real_sum - predict_sum - # if self.regr_model.intercept_ < 0: - # for i in range(len(diff)): - # if diff[i] > 0: - # self.others_consumption_pred[i] = diff[i] else: self.heating_consumption_pred = self.processed_bill[ 'Days In Bill'] * 0 @@ -761,7 +756,7 @@ class BillDisaggregation(): self.hddcdd = np.array( pd.DataFrame(hddcdd).mul( list(self.processed_bill['Days In Bill']), axis=0)) - #self.hddcdd = hddcdd + self.regr_model = regr_model self.heating_consumption_pred = np.array( self.hddcdd[:, 0]) * self.regr_model.coef_[0] @@ -772,7 +767,6 @@ class BillDisaggregation(): else: self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill']\ + regr[3]['dhw'] - #print('dhw', regr[3]) bill_cp = self.processed_bill.copy() bill_cp = self.processed_bill[[ @@ -785,10 +779,10 @@ class BillDisaggregation(): if self.usage == 'Both Not': self.r_squared_of_fit = np.NaN - #self.h = np.NaN + # self.h = np.NaN else: self.r_squared_of_fit = regr[1] - #self.set_points = opt.x + # self.set_points = opt.x # update 2018/01/17 self.heating_set_point = heating_set_point @@ -829,7 +823,8 @@ class BillDisaggregation(): usage = np.NaN r = np.NaN - return usage, r, r_method, consumption, heating, cooling, others, diff, hdd, cdd, days_in_bill, self.heating_set_point, self.cooling_set_point + return usage, r, r_method, consumption, heating, cooling, others, \ + diff, hdd, cdd, days_in_bill, self.heating_set_point, self.cooling_set_point def output_to_month(self, last_date_of_bill, hp, cp, number_of_month): """ @@ -917,10 +912,6 @@ class BillDisaggregation(): return monthly_output - -# this_lastdate = self.output_table['Bill To Date'].iloc[-1] - timedelta(self.output_table['Bill To Date'].iloc[-1].day) -# self.most_recent_monthly_output = self.output_to_month(this_lastdate,self.heating_set_point,self.cooling_set_point) - def non_weahter_related_breakdown(self, end_uses, monthly_output_table): ''' breakdown the non_weather_related_usage @@ -929,8 +920,8 @@ class BillDisaggregation(): end_uses(dictionary): key: end use value: percentage of the end use among non-weather related usage - monthly_output_table (pd.DataFrame): monthly bill breakdown starts with the first date of the month, ends with the - last date + monthly_output_table (pd.DataFrame): monthly bill breakdown starts with the first date of the month, + ends with the last date Returns: pd.DataFrame: bill breakdown of all end-use -- GitLab From 558ee917b957f1b2941f66f97b820ee294e45a67 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 7 Feb 2018 18:03:42 -0500 Subject: [PATCH 11/19] add to_json funtion/ add new output table --- bpeng/bill/awesome_disaggregate.py | 40 ++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index 2dd8873..4284f4f 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -51,10 +51,13 @@ class BillDisaggregation(): self.heating_set_point = None self.cooling_set_point = None self.days_in_bills = None - self.days_in_12_bills = None self.output_table = None + self.output_table_monthly = None self.most_recent_monthly_output = None self.unit_price = None + self.bill_breakdown = None + self.recent_year_bill_breakdown = None + self.annual_usage = None def weather_cleaning(self, raw_daily_temp): ''' @@ -506,7 +509,7 @@ class BillDisaggregation(): return regr_model, score, regression_temp, bill - def main(self, usage='Unknown'): + def main(self, end_uses, usage='Unknown'): """ Main function for the optimization and disaggregation @@ -789,6 +792,18 @@ class BillDisaggregation(): self.cooling_set_point = cooling_set_point self.output_table = bill_cp + last_bill_date = self.processed_bill['Bill To Date'].iloc[-1] + first_bill_date = self.processed_bill['Bill From Date'].iloc[0] + + billing_months = self.num_month_dates(last_bill_date, first_bill_date) + self.output_table_monthly = self.output_to_month(last_bill_date, self.heating_set_point, + self.cooling_set_point, billing_months) + self.most_recent_monthly_output = self.output_to_month(last_bill_date, + self.heating_set_point, self.cooling_set_point, 12) + self.bill_breakdown = self.non_weahter_related_breakdown(end_uses, self.output_table_monthly) + self.recent_year_bill_breakdown = self.non_weahter_related_breakdown(end_uses, self.most_recent_monthly_output) + self.annual_usage = self.annual_usage_costs(self.recent_year_bill_breakdown, end_uses) + def benchmarking_output(self): ''' output perimeters that related with evaluating the bills @@ -835,6 +850,7 @@ class BillDisaggregation(): last_day_of_bill(datetime): last day of bill hp(float): heating season indoor set point cp(float): cooling season indoor set point + number_of_month(int): number of month that need to be re-format Returns: @@ -1012,3 +1028,23 @@ class BillDisaggregation(): 'prejected cooling' ]) plt.show() + + def to_json(self, period='bill_breakdown'): + """ + Output in json file + + Args: + + period (str): 'bill_breakdown' for bill breakdown with non-weather realted end uses + 'bill' for monthly out put for bill with only weather related breakdown + default 'bill_breakdown' + + Returns: + + json: output in json format + + """ + if period == 'bill_breakdown': + return self.bill_breakdown.to_json(orient="records") + + return self.output_table_monthly.to_json(orient="records") -- GitLab From ee556c9d8a843d6e5a35b93c48309eacd823de67 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 8 Feb 2018 10:28:28 -0500 Subject: [PATCH 12/19] add default params to main --- bpeng/bill/awesome_disaggregate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index 4284f4f..ef0d24f 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -509,7 +509,7 @@ class BillDisaggregation(): return regr_model, score, regression_temp, bill - def main(self, end_uses, usage='Unknown'): + def main(self, end_uses={'Miscellaneous': 1}, usage='Unknown'): """ Main function for the optimization and disaggregation -- GitLab From a87960af7de5b7292e206142e2e4a60e9b8b19a1 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 8 Feb 2018 18:15:34 -0500 Subject: [PATCH 13/19] add to_dict function --- bpeng/bill/awesome_disaggregate.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index ef0d24f..2686494 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -1044,7 +1044,29 @@ class BillDisaggregation(): json: output in json format """ + if period == 'bill_breakdown': return self.bill_breakdown.to_json(orient="records") return self.output_table_monthly.to_json(orient="records") + + def to_dict(self, period='bill_breakdown'): + """ + Output in dictionary file + + Args: + + period (str): 'bill_breakdown' for bill breakdown with non-weather realted end uses + 'bill' for monthly out put for bill with only weather related breakdown + default 'bill_breakdown' + + Returns: + + json: output in json format + + """ + + if period == 'bill_breakdown': + return self.bill_breakdown.to_dict(orient="records") + + return self.output_table_monthly.to_dict(orient="records") -- GitLab From c41fd7e75a2b64878b26fd6b3e6771d3156967a1 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 8 Feb 2018 18:17:59 -0500 Subject: [PATCH 14/19] comment matplot --- bpeng/bill/awesome_disaggregate.py | 36 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index 2686494..277ce66 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -3,7 +3,7 @@ import warnings from datetime import timedelta -import matplotlib.pyplot as plt +# import matplotlib.pyplot as plt import numpy as np import pandas as pd from dateutil import relativedelta @@ -1011,23 +1011,23 @@ class BillDisaggregation(): print('R-squared of fit is {}'.format(self.r_squared_of_fit)) print('Usage is {}'.format(self.usage)) - @staticmethod - def projection_figure(bill): - '''ploat the disaggregated bill''' - - plt.figure(figsize=(10, 5)) - x = pd.to_datetime(bill['Bill From Date']) - y = bill['Usage'] - plt.plot(x, y) - plt.plot(x, (bill['Heating Usage'] + bill['Cooling Usage'] + - bill['Other Usage'])) - plt.plot(x, bill['Heating Usage']) - plt.plot(x, bill['Cooling Usage']) - plt.legend([ - 'real consumption', 'prejected consumption', 'prejected heating', - 'prejected cooling' - ]) - plt.show() + # @staticmethod + # def projection_figure(bill): + # '''ploat the disaggregated bill''' + + # plt.figure(figsize=(10, 5)) + # x = pd.to_datetime(bill['Bill From Date']) + # y = bill['Usage'] + # plt.plot(x, y) + # plt.plot(x, (bill['Heating Usage'] + bill['Cooling Usage'] + + # bill['Other Usage'])) + # plt.plot(x, bill['Heating Usage']) + # plt.plot(x, bill['Cooling Usage']) + # plt.legend([ + # 'real consumption', 'prejected consumption', 'prejected heating', + # 'prejected cooling' + # ]) + # plt.show() def to_json(self, period='bill_breakdown'): """ -- GitLab From d0966b00ba294fe6163046efffadc49ff342ae50 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 8 Feb 2018 18:44:31 -0500 Subject: [PATCH 15/19] to_json date formate change --- bpeng/bill/awesome_disaggregate.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index 277ce66..2fa7159 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -1046,9 +1046,9 @@ class BillDisaggregation(): """ if period == 'bill_breakdown': - return self.bill_breakdown.to_json(orient="records") + return self.bill_breakdown.to_json(orient="records", date_formate="iso") - return self.output_table_monthly.to_json(orient="records") + return self.output_table_monthly.to_json(orient="records", date_formate="iso") def to_dict(self, period='bill_breakdown'): """ @@ -1070,3 +1070,4 @@ class BillDisaggregation(): return self.bill_breakdown.to_dict(orient="records") return self.output_table_monthly.to_dict(orient="records") + -- GitLab From 16c6d33c5d0605a6baffc036e8cfa01cd94a4956 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 8 Feb 2018 18:47:51 -0500 Subject: [PATCH 16/19] format --- bpeng/bill/awesome_disaggregate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index 2fa7159..a1d862c 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -1046,9 +1046,9 @@ class BillDisaggregation(): """ if period == 'bill_breakdown': - return self.bill_breakdown.to_json(orient="records", date_formate="iso") + return self.bill_breakdown.to_json(orient="records", date_format="iso") - return self.output_table_monthly.to_json(orient="records", date_formate="iso") + return self.output_table_monthly.to_json(orient="records", date_format="iso") def to_dict(self, period='bill_breakdown'): """ -- GitLab From 98ceb26852ba2afef3137456e63f51d179ea9ef0 Mon Sep 17 00:00:00 2001 From: Alessandro DiMarco Date: Thu, 8 Feb 2018 20:10:57 -0500 Subject: [PATCH 17/19] Rename end_use to non weather realated end use --- bpeng/bill/awesome_disaggregate.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index a1d862c..8f9958e 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -509,7 +509,7 @@ class BillDisaggregation(): return regr_model, score, regression_temp, bill - def main(self, end_uses={'Miscellaneous': 1}, usage='Unknown'): + def main(self, non_weather_related_end_use={'Miscellaneous': 1}, usage='Unknown'): """ Main function for the optimization and disaggregation @@ -800,9 +800,9 @@ class BillDisaggregation(): self.cooling_set_point, billing_months) self.most_recent_monthly_output = self.output_to_month(last_bill_date, self.heating_set_point, self.cooling_set_point, 12) - self.bill_breakdown = self.non_weahter_related_breakdown(end_uses, self.output_table_monthly) - self.recent_year_bill_breakdown = self.non_weahter_related_breakdown(end_uses, self.most_recent_monthly_output) - self.annual_usage = self.annual_usage_costs(self.recent_year_bill_breakdown, end_uses) + self.bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.output_table_monthly) + self.recent_year_bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.most_recent_monthly_output) + self.annual_usage = self.annual_usage_costs(self.recent_year_bill_breakdown, non_weather_related_end_use) def benchmarking_output(self): ''' -- GitLab From 6e766db2b146763400de389b3ab4496cdb3dfd6e Mon Sep 17 00:00:00 2001 From: Alessandro DiMarco Date: Thu, 8 Feb 2018 20:13:57 -0500 Subject: [PATCH 18/19] Rename usage to weather related usage --- bpeng/bill/awesome_disaggregate.py | 42 +++++++++++++++--------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index 8f9958e..f2a60ba 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -509,7 +509,7 @@ class BillDisaggregation(): return regr_model, score, regression_temp, bill - def main(self, non_weather_related_end_use={'Miscellaneous': 1}, usage='Unknown'): + def main(self, non_weather_related_end_use={'Miscellaneous': 1}, weather_related_usage='Unknown'): """ Main function for the optimization and disaggregation @@ -547,7 +547,7 @@ class BillDisaggregation(): regression_method = 1 - if usage == 'Unknown': + if weather_related_usage == 'Unknown': opt = minimize( lambda x: -self.regression_1(x[0], x[1], self.processed_bill)[1], (65, 65), @@ -561,17 +561,17 @@ class BillDisaggregation(): if -opt.fun > 0.5: if (heating_coef > 0) & (cooling_coef <= 0): - usage = 'Heating' + weather_related_usage = 'Heating' elif (heating_coef <= 0) & (cooling_coef > 0): - usage = 'Cooling' + weather_related_usage = 'Cooling' elif (heating_coef <= 0) & (cooling_coef <= 0): - usage = 'Both Not' + weather_related_usage = 'Both Not' elif (heating_coef >= 0) & (cooling_coef >= 0): - usage = 'Both' + weather_related_usage = 'Both' else: - usage = 'Both Not' + weather_related_usage = 'Both Not' - if usage == 'Both': + if weather_related_usage == 'Both': opt = minimize( lambda x: -self.regression_1(x[0], x[1], self.processed_bill)[1], (65, 65), @@ -589,13 +589,13 @@ class BillDisaggregation(): # change accordingly for JOENYC buildings if (heating_coef > 0) & (cooling_coef < 0): - usage = 'Heating' + weather_related_usage = 'Heating' cooling_coef = 0 elif (heating_coef <= 0) & (cooling_coef > 0): - usage = 'Cooling' + weather_related_usage = 'Cooling' heating_coef = 0 elif (heating_coef <= 0) & (cooling_coef <= 0): - usage = 'Both Not' + weather_related_usage = 'Both Not' heating_coef = 0 cooling_coef = 0 @@ -604,14 +604,14 @@ class BillDisaggregation(): elif (heating_coef > 0) & (cooling_coef > 0): if heating_coef / cooling_coef > 5: - usage = 'Heating' + weather_related_usage = 'Heating' cooling_coef = 0 else: # set the range of heating set point or cooling point - if round(heating_set_point) in range( 60, 95) and round(cooling_set_point) in range( 55, 75): - usage = 'Both' + weather_related_usage = 'Both' heating_coef = heating_coef cooling_coef = cooling_coef @@ -625,23 +625,23 @@ class BillDisaggregation(): cooling_set_point = opt.x[1] if (heating_coef > 0) & (cooling_coef < 0): - usage = 'Heating' + weather_related_usage = 'Heating' cooling_coef = 0 elif (heating_coef <= 0) & (cooling_coef > 0): - usage = 'Cooling' + weather_related_usage = 'Cooling' heating_coef = 0 elif (heating_coef <= 0) & (cooling_coef <= 0): - usage = 'Both Not' + weather_related_usage = 'Both Not' heating_coef = 0 cooling_coef = 0 elif (heating_coef > 0) & (cooling_coef > 0): if heating_coef / cooling_coef > 5: - usage = 'Heating' + weather_related_usage = 'Heating' cooling_coef = 0 else: - usage = 'Both' + weather_related_usage = 'Both' - if usage == 'Heating': + if weather_related_usage == 'Heating': opt_1 = minimize( lambda x: -self.regression_1(x, 300, self.processed_bill)[1], 65, @@ -690,7 +690,7 @@ class BillDisaggregation(): heating_coef = regr_model.coef_ cooling_coef = 0 - if usage == 'Cooling': + if weather_related_usage == 'Cooling': opt = minimize( lambda x: -self.regression_1(x, 300, self.processed_bill)[1], 65, @@ -703,7 +703,7 @@ class BillDisaggregation(): cooling_set_point = opt.x[0] heating_set_point = np.NaN - self.usage = usage + self.usage = weather_related_usage if self.usage == 'Both Not': self.heating_consumption_pred = self.processed_bill['Usage'] * 0 -- GitLab From c8f277f9d2ef2be39c2dd630261181d9ede94776 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 12 Feb 2018 18:18:20 -0500 Subject: [PATCH 19/19] change main function name to optimize setpoints --- bpeng/bill/awesome_disaggregate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index f2a60ba..1f070c0 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -509,7 +509,7 @@ class BillDisaggregation(): return regr_model, score, regression_temp, bill - def main(self, non_weather_related_end_use={'Miscellaneous': 1}, weather_related_usage='Unknown'): + def optimize_setpoints(self, non_weather_related_end_use={'Miscellaneous': 1}, weather_related_usage='Unknown'): """ Main function for the optimization and disaggregation -- GitLab