diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py new file mode 100644 index 0000000000000000000000000000000000000000..1f070c068473acc0210a90f3bb1413d9ea8dd600 --- /dev/null +++ b/bpeng/bill/awesome_disaggregate.py @@ -0,0 +1,1073 @@ +"""This file calcuate bill disagregation for multifamily buildings""" + +import warnings +from datetime import timedelta + +# import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from dateutil import relativedelta +from scipy.optimize import minimize +from sklearn import linear_model + +warnings.simplefilter('ignore') + + +class BillDisaggregation(): + """ + Class for Bill Disaggregation + + Args: + + bill (pd.DataFrame): raw bill (from parsing) (File like object) + First row (besides column name) are the names + 'Bill From Date', 'Bill To Date', 'Days in Bill', 'Usage' must be in names + daily_temp (pd.DataFrame): daily temperature (File like object) + + Attributes: + + output: list + total heating load + heating load for the first year (first 12 bill periods) + heating load of each month + (return NaN if R-squared is low) + """ + + # pylint: disable=too-many-instance-attributes + def __init__(self, bill, raw_daily_temp): + + # self.account_info = account_info + self.bill = bill + self.raw_daily_temp = raw_daily_temp + self.processed_bill = None + self.daily_temp = None + self.usage = None + self.regression_method = None + self.heating_comsuption_pred = None + self.cooling_comsuption_pred = None + self.others_comsuption_pred = None + self.regr_model = None + self.r_squared_of_fit = None + self.heating_set_point = None + self.cooling_set_point = None + self.days_in_bills = None + self.output_table = None + self.output_table_monthly = None + self.most_recent_monthly_output = None + self.unit_price = None + self.bill_breakdown = None + self.recent_year_bill_breakdown = None + self.annual_usage = None + + def weather_cleaning(self, raw_daily_temp): + ''' + Format the daily temperature data from influx query + + Args: + + raw_daily_temp (influx query): raw temperature data queried from Influx + + Returns: + + pd.DateFrame: Returns formatted daily temperature + ''' + raw_daily_temp.rename( + columns={'time': 'date', + 'value': 'temperature'}, inplace=True) + raw_daily_temp['date'] = pd.to_datetime(raw_daily_temp['date']) + raw_daily_temp['date'] = raw_daily_temp['date'].apply(lambda x: '/'.join([str(x.date().month), + str(x.date().day), + str(x.date().year)])) + daily_temp = raw_daily_temp + daily_temp['date'] = pd.to_datetime(daily_temp['date']) + self.daily_temp = daily_temp + + return self.daily_temp + + def bill_period_weather(self, bill_from_date, bill_end_date): + ''' + get the outdoor temperature date between two date, return a list + + Args: + + bill_from_date (Datetime): start date of a period + bill_end_date (Datetime): end date of a period + + Returns: + + list: Returns a list of outdoor temperature for a period + ''' + end_date_id = self.daily_temp[self.daily_temp.date == + bill_end_date].index[0] + start_date_id = self.daily_temp[self.daily_temp.date == + bill_from_date].index[0] + return list(self.daily_temp['temperature'][start_date_id:end_date_id]) + + @staticmethod + def cdd(indoor_set_point, outdoor_temp): + ''' + CDD + Assumption: + cooling setting point shall always higher than 55 F, + which is the trigger temperature of the heating system + + ?? + set_point is for indoor temperature + + ''' + + if indoor_set_point > 55: + if indoor_set_point < outdoor_temp: + return outdoor_temp - indoor_set_point + return 0 + + @staticmethod + def hdd(indoor_set_point, outdoor_temp): + ''' + HDD + Assumption: + Only if the outdoor temperature drop below 60'F, + then the heating system may be able to be turn on + ''' + if (outdoor_temp < 60) & (indoor_set_point > outdoor_temp): + hdd = indoor_set_point - outdoor_temp + else: + hdd = 0 + return hdd + + @staticmethod + def threshold(data, set_point): + '''If data is less the set_point, return 0''' + if data <= set_point: + data = 0 + return data + + @staticmethod + def outliers_iqr(ys): + ''' + Find outlier using IQR method + + Args: + + ys (list):A list of number needs to be checked for outliners + + Returns: + + list: Returns a list of boolean + True: Outliner + False: Not Outliner + + ''' + quartile_1, quartile_3 = np.percentile(ys, [25, 75]) + iqr = quartile_3 - quartile_1 + lower_bound = quartile_1 - (iqr * 1.5) + upper_bound = quartile_3 + (iqr * 1.5) + return [(x > upper_bound or x < lower_bound) for x in ys] + + @staticmethod + def anomaly_point(alist, thresholds): + ''' + Find outlier and return its index + + Args: + + alist (list): A list of number needs to be checked for outliners + thresholds (float): a percentage of the difference between the mean of the whole list and + the mean of the list without the outlier + + Returns: + + list: Returns a list the index of the outliner + + ''' + amean = [] + for x in range(len(alist)): + temp = np.hstack((alist[:(x)], alist[(x + 1):])) + amean.append(temp.mean()) + index = [] + for x in range(len(alist)): + temp1 = abs(alist[x] - np.array(alist).mean()) / np.array( + alist).mean() + index.append(temp1 < thresholds) + return index + + @staticmethod + def num_month_dates(last_date_bill, first_date_bill): + '''Return number of month in between two date ''' + lastdate = last_date_bill - timedelta(last_date_bill.day) + firstdate = first_date_bill + timedelta(days=32) + firstdate = firstdate.replace(day=1) + r = relativedelta.relativedelta(lastdate, firstdate) + num_month = r.years * 12 + r.months + 1 + return (num_month) + + def bill_formating(self, raw_bill): + ''' + Bill Cleaning + Step 1: + 1. format each column of the raw bill + 2. drop NAN / duplicates + + Args: + + raw_bill (pd.DataFrame): a raw bill with columns of + 'Bill From Date' + 'Bill To Date' + 'Days In Bill' + 'Usage' + 'Delivery Charge' + 'Supply Charge' + 'Total Charge' + Returns: + pd.DataFrame: a formatted raw_bill + boolean: True - Length of the bill has changed during bill cleaning step 1 + + + ''' + bill_copy = raw_bill.copy() + bill_copy['Bill From Date'] = pd.to_datetime( + bill_copy['Bill From Date']) + bill_copy['Bill From Date'] = bill_copy['Bill From Date'].apply(lambda x: '/'.join([str(x.date().month), + str(x.date().day), + str(x.date().year)])) + bill_copy['Bill To Date'] = pd.to_datetime(bill_copy['Bill To Date']) + bill_copy['Bill To Date'] = bill_copy['Bill To Date'].apply(lambda x: '/'.join([str(x.date().month), + str(x.date().day), + str(x.date().year)])) + + bill_copy = bill_copy[[ + 'Bill From Date', 'Bill To Date', 'Usage', 'Days In Bill', + 'Total Charge' + ]] + bill_copy1 = bill_copy[pd.to_datetime(bill_copy['Bill To Date']) > + pd.to_datetime(bill_copy['Bill From Date'])] + bill_copy1['Bill From Date'] = pd.to_datetime( + bill_copy1['Bill From Date']) + bill_copy2 = bill_copy1.sort_values('Bill From Date') + bill_copy2['Bill To Date'] = pd.to_datetime(bill_copy2['Bill To Date']) + bill_copy2 = bill_copy2.dropna() + bill_copy2 = bill_copy2.drop_duplicates() + bill_copy2 = bill_copy2.reset_index(drop=True) + + if np.array(bill_copy2.shape)[0] == np.array(raw_bill.shape)[0]: + bill_shape_change = 'False' + bill_shape_change = 'True' + + bill_formatted = bill_copy2 + + self.unit_price = (sum(bill_formatted['Total Charge'])) / ( + sum(bill_formatted['Usage'])) + + return bill_formatted, bill_shape_change + + def bill_quality(self, bill_formatted): + ''' + Bill Cleaning + Step 2: + 1. Check each billing period to find a bill is too short or too long; + + Args: + bill_formatted (pd.DataFrame): formatted bill from Bill Cleaning Step 1 + + Returns: + pd.DataFrame: a dataframe with columns: + 'index': the index of the billing period which is identified as an outlier + 'flag': to indicate either it is too long or too short + ''' + + bill = bill_formatted.copy() + bill = pd.DataFrame(bill) + + # total_rows = np.array(bill.shape)[0] + # timescale = pd.to_datetime(bill['Bill To Date'].iloc[total_rows-1])\ + # - pd.to_datetime(bill['Bill From Date'].iloc[0]) + # total_days_in_bill = timescale.days + days_in_bill = np.array(bill['Days In Bill']) + + # abnormal days in bill will return False + days_quality_index_inti = BillDisaggregation.outliers_iqr( + list(days_in_bill)) + days_quality_index_recheck = np.array( + [x not in range(25, 35) for x in days_in_bill]) + days_quality_index = list( + np.array(days_quality_index_inti) * + np.array(days_quality_index_recheck)) + + days_abn_index = [] + for x in range(len(days_quality_index)): + if days_quality_index[x]: + days_abn_index.append(x) + + bill_quality = pd.DataFrame(data=days_abn_index, columns=['index']) + + flag = [] + for xx in range(len(days_abn_index)): + point_index = days_abn_index[xx - 1] + if days_in_bill[point_index] < days_in_bill.mean(): + flag.append('short') + elif days_in_bill[point_index] >= days_in_bill.mean(): + flag.append('long') + + bill_quality['flag'] = np.array(flag) + return bill_quality + + def short_bill_consolidate(self, bill_formatted, bill_quality): + ''' + Bill Cleaning + Step 3: + consolidation of the bills that are too short compare to others + NOTE: error + + Args: + bill_formatted (pd.DataFrame): formatted bill from Bill Cleaning Step 1 + bill_quality(pd.DataFrame): bill quality from Step 2 + + Returns: + pd.DataFrame: the cleaned bill and ready for analysis + + ''' + bill_quality_short = bill_quality[bill_quality['flag'] == 'short'] + bill_consi = bill_formatted.copy() + # consolidate the billing period that is too short compare to others + + for xxx in range(len(bill_quality_short)): + + if bill_quality['flag'].iloc[xxx] == 'short': + row_index = bill_quality_short['index'].iloc[xxx] + + if (row_index != 0) & (row_index != bill_consi.index[-1]): + + if bill_consi['Days In Bill'][int( + row_index - 1)] <= bill_consi['Days In Bill'][int( + row_index + 1)]: + + bill_consi['Bill To Date'][int( + row_index - 1)] = bill_consi['Bill To Date'][int( + row_index)] + bill_consi['Usage'][int( + row_index - 1)] = bill_consi['Usage'][int( + row_index - 1)] + bill_consi['Usage'][int( + row_index)] + bill_consi['Days In Bill'][int( + row_index - 1)] = bill_consi['Days In Bill'][int( + row_index - 1 + )] + bill_consi['Days In Bill'][int(row_index)] + else: + bill_consi['Bill From Date'][int( + row_index + 1)] = bill_consi['Bill To Date'][int( + row_index)] + bill_consi['Usage'][int( + row_index + 1)] = bill_consi['Usage'][int( + row_index + 1)] + bill_consi['Usage'][int( + row_index)] + bill_consi['Days In Bill'][int( + row_index + 1)] = bill_consi['Days In Bill'][int( + row_index + 1 + )] + bill_consi['Days In Bill'][int(row_index)] + + if row_index == 0: + bill_consi['Bill From Date'][1] = bill_consi[ + 'Bill From Date'][0] + bill_consi['Usage'][ + 1] = bill_consi['Usage'][0] + bill_consi['Usage'][1] + bill_consi['Days In Bill'][ + 1] = bill_consi['Days In Bill'][0] + bill_consi['Days In Bill'][1] + + if row_index == bill_consi.index[-1]: + bill_consi['Bill To Date'].iloc[-2] = bill_consi[ + 'Bill To Date'].iloc[-1] + bill_consi['Usage'].iloc[ + -2] = bill_consi['Usage'].iloc[-2] + bill_consi['Usage'].iloc[-1] + bill_consi['Days In Bill'].iloc[ + -2] = bill_consi['Days In Bill'].iloc[-1] + bill_consi['Days In Bill'].iloc[-2] + + if len(bill_quality_short) != 0: + bill_consi = bill_consi.drop( + bill_consi.index[list(bill_quality_short['index'])]) + + # bill_consi = bill_consi.reset_index(inplace = True) + bill_consi = bill_consi.reset_index(drop=False) + + return bill_consi + + def regression_1(self, hp, cp, processed_bill): + ''' + A linear regression model with heating and cooling set fixed + + Args: + + hp(float): heating season indoor set point + cp(float): cooling season indoor set point + processed_bill(pd.DataFrame): cleaned bill with daily temperature + + Returns: + + sklearn.linear_model.LinearRegression: regression model + float: r-squared of the linear regression model + 2d-array: a 2D numpy array of normalized billing period average daily HDDs and CDDs + + ''' + + bill = processed_bill.copy() + + # changed 2/2/2018 + + consumption = np.array(bill['Usage'] / bill['Days In Bill']) + ahdd = [ + list(BillDisaggregation.hdd(hp, xx) for xx in x) + for x in bill['temperature'] + ] + acdd = [ + list(BillDisaggregation.cdd(cp, xx) for xx in x) + for x in bill['temperature'] + ] + + # it should be billing period average hdd / days + + daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) + daily_cdd = np.array([np.mean(acdd[x]) for x in range(len(acdd))]) + + # set threshold that if the HDD/CDD is lower than a certain value, we set it to 0 + daily_hdd1 = np.array([ + BillDisaggregation.threshold(daily_hdd[x], 0.1) + for x in range(len(daily_hdd)) + ]) + daily_cdd1 = np.array([ + BillDisaggregation.threshold(daily_cdd[x], 0.1) + for x in range(len(daily_cdd)) + ]) + + regression_temp = np.array([daily_hdd1, daily_cdd1]).T + + regr_model = linear_model.LinearRegression() + regr_model.fit(regression_temp, consumption) + score = regr_model.score(regression_temp, consumption) + + return regr_model, score, regression_temp + + def summer_dhw(self, hp, abill): + ''' + This funcion uses summer month gas usage as base consumption for the year + A linear regression of weather-related consumption and a fixed heating system set point + NOTE: USUALLY ERROR + + Args: + + hp(float): heating season indoor set point + cp(float): cooling season indoor set point + processed_bill(pd.DataFrame): cleaned bill with daily temperature + + Returns: + + sklearn.linear_model.LinearRegression: regression model + float: r-squared of the linear regression model + 2d-array: a 2D numpy array of normalized billing period HDDs sum + pd.DataFrame + + ''' + + bill = abill.copy() + ahdd = [[BillDisaggregation.hdd(hp, xx) for xx in x] + for x in bill['temperature']] + # monthly_hdd = np.array([np.sum(ahdd[x]) for x in range(len(ahdd))]) + daily_hdd = np.array([np.mean(ahdd[x]) for x in range(len(ahdd))]) + + # daily dhw usage + bill['dhw'] = bill['Usage'] / bill['Days In Bill'] * (daily_hdd <= 0.1) + + if len([*filter(lambda x: x >= 18, list(bill['Days In Bill']))]) > 0: + dhw_only_consumption = bill[bill['dhw'] != 0]['dhw'] + + if len(dhw_only_consumption) > 0: + dhw_quality_index = BillDisaggregation.outliers_iqr( + list(dhw_only_consumption)) # list + dhw_only_consumption_checked = [] + + for xx in range(len(dhw_only_consumption)): + if not dhw_quality_index[xx]: + dhw_only_consumption_checked.append( + list(dhw_only_consumption)[xx]) + + daily_dhw = np.mean(dhw_only_consumption_checked) + else: + daily_dhw = 0 + + else: + daily_dhw = 0 + + bill['dhw'] = daily_dhw * bill['Days In Bill'] + + # 2018/01/30 + # Daily hdd makes more sense + + regression_temp = daily_hdd.reshape(-1, 1) + consumption = (bill['Usage'] - bill['dhw']) / bill['Days In Bill'] + + regr_model = linear_model.LinearRegression() + regr_model.fit(regression_temp, consumption) + score = regr_model.score(regression_temp, consumption) + + return regr_model, score, regression_temp, bill + + def optimize_setpoints(self, non_weather_related_end_use={'Miscellaneous': 1}, weather_related_usage='Unknown'): + """ + Main function for the optimization and disaggregation + + Args: + + usage (str): + Specify if the weather - related consumption is for heating or cooling + 'Unknown': no prior knowledge + 'Heating': only for heating + 'Cooling': only for cooling + 'Both': for both heating and cooling + 'Both Not': not for heating or cooling + default 'Unknown' + + """ + + self.daily_temp = self.weather_cleaning(self.raw_daily_temp) + formatted_bill, shape = self.bill_formating(self.bill) # pylint: disable=unused-variable + quality = self.bill_quality(formatted_bill) + + if any(i == 'short' for i in quality.flag): + # any(quality.flag.astype(str) == 'long') + self.processed_bill = self.short_bill_consolidate( + formatted_bill, quality) + else: + self.processed_bill = formatted_bill + + self.processed_bill['temperature'] = [ + self.bill_period_weather(x, y) + for x, y in zip(self.processed_bill['Bill From Date'], + self.processed_bill['Bill To Date']) + ] + + self.processed_bill = self.processed_bill.sort_values('Bill From Date') + + regression_method = 1 + + if weather_related_usage == 'Unknown': + opt = minimize( + lambda x: -self.regression_1(x[0], x[1], self.processed_bill)[1], + (65, 65), + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + regr = self.regression_1(opt.x[0], opt.x[1], self.processed_bill) + regr_model = regr[0] + heating_coef, cooling_coef = regr_model.coef_ + + if -opt.fun > 0.5: + if (heating_coef > 0) & (cooling_coef <= 0): + weather_related_usage = 'Heating' + elif (heating_coef <= 0) & (cooling_coef > 0): + weather_related_usage = 'Cooling' + elif (heating_coef <= 0) & (cooling_coef <= 0): + weather_related_usage = 'Both Not' + elif (heating_coef >= 0) & (cooling_coef >= 0): + weather_related_usage = 'Both' + else: + weather_related_usage = 'Both Not' + + if weather_related_usage == 'Both': + opt = minimize( + lambda x: -self.regression_1(x[0], x[1], self.processed_bill)[1], + (65, 65), + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + regr = self.regression_1(opt.x[0], opt.x[1], self.processed_bill) + regr_model = regr[0] + heating_coef, cooling_coef = regr_model.coef_ + hddcdd = regr[2] + heating_set_point = opt.x[0] + cooling_set_point = opt.x[1] + + # change accordingly for JOENYC buildings + + if (heating_coef > 0) & (cooling_coef < 0): + weather_related_usage = 'Heating' + cooling_coef = 0 + elif (heating_coef <= 0) & (cooling_coef > 0): + weather_related_usage = 'Cooling' + heating_coef = 0 + elif (heating_coef <= 0) & (cooling_coef <= 0): + weather_related_usage = 'Both Not' + heating_coef = 0 + cooling_coef = 0 + + # changes on Jan 17th 2018 + # please futher check with more bills + + elif (heating_coef > 0) & (cooling_coef > 0): + if heating_coef / cooling_coef > 5: + weather_related_usage = 'Heating' + cooling_coef = 0 + else: + # set the range of heating set point or cooling point - + if round(heating_set_point) in range( + 60, 95) and round(cooling_set_point) in range( + 55, 75): + weather_related_usage = 'Both' + heating_coef = heating_coef + cooling_coef = cooling_coef + + else: + # using standard seting points to check the bill + regr = self.regression_1(72, 65, self.processed_bill) + regr_model = regr[0] + heating_coef, cooling_coef = regr_model.coef_ + hddcdd = regr[2] + heating_set_point = opt.x[0] + cooling_set_point = opt.x[1] + + if (heating_coef > 0) & (cooling_coef < 0): + weather_related_usage = 'Heating' + cooling_coef = 0 + elif (heating_coef <= 0) & (cooling_coef > 0): + weather_related_usage = 'Cooling' + heating_coef = 0 + elif (heating_coef <= 0) & (cooling_coef <= 0): + weather_related_usage = 'Both Not' + heating_coef = 0 + cooling_coef = 0 + elif (heating_coef > 0) & (cooling_coef > 0): + if heating_coef / cooling_coef > 5: + weather_related_usage = 'Heating' + cooling_coef = 0 + else: + weather_related_usage = 'Both' + + if weather_related_usage == 'Heating': + opt_1 = minimize( + lambda x: -self.regression_1(x, 300, self.processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + opt_2 = minimize( + lambda x: -self.summer_dhw(x, self.processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + + if (-opt_2.fun > 0.9) and (round(opt_2.x[0]) in range(64, 85)): + opt = opt_2 + heating_set_point = opt.x[0] + cooling_set_point = np.NaN + regr = self.summer_dhw(opt.x[0], self.processed_bill) + regr_model = regr[0] + hdd = regr[2] + hdd_transit = [hdd[x][0] for x in range(len(hdd))] + hddcdd = np.array([[hdd_transit[x], 0] + for x in range(len(hdd))]) + regression_method = 2 + else: + if round(opt_1.x[0]) in range(60, 95): + opt = opt_1 + heating_set_point = opt.x[0] + cooling_set_point = np.NaN + regr = self.regression_1(heating_set_point, 300, + self.processed_bill) + regr_model = regr[0] + hddcdd = regr[2] + heating_coef = regr_model.coef_ + cooling_coef = 0 + else: + # legit heating set-point 72'F + heating_set_point = 72 + cooling_set_point = np.NaN + regr = self.regression_1(heating_set_point, 300, + self.processed_bill) + + regr_model = regr[0] + hddcdd = regr[2] + heating_coef = regr_model.coef_ + cooling_coef = 0 + + if weather_related_usage == 'Cooling': + opt = minimize( + lambda x: -self.regression_1(x, 300, self.processed_bill)[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, + 'disp': False}) + regr = self.regression_1(opt.x[0], 300, self.processed_bill) + regr_model = regr[0] + hddcdd = regr[2] + cooling_set_point = opt.x[0] + heating_set_point = np.NaN + + self.usage = weather_related_usage + + if self.usage == 'Both Not': + self.heating_consumption_pred = self.processed_bill['Usage'] * 0 + self.cooling_consumption_pred = self.processed_bill['Usage'] * 0 + self.others_consumption_pred = self.processed_bill['Usage'] + self.regression_method = 0 + self.hddcdd = np.zeros((len(self.processed_bill), 2)) + cooling_set_point = np.NaN + heating_set_point = np.NaN + + else: + self.regression_method = regression_method + + if self.regression_method == 1: + + self.hddcdd = np.array( + pd.DataFrame(hddcdd).mul( + list(self.processed_bill['Days In Bill']), axis=0)) + + if regr[1] > 0.5: + self.regr_model = regr_model + self.heating_consumption_pred = np.array( + self.hddcdd[:, 0]) * regr_model.coef_[0] + self.cooling_consumption_pred = np.array( + self.hddcdd[:, 1]) * regr_model.coef_[1] + + if self.regr_model.intercept_ < 0: + self.others_consumption_pred = 0 * self.processed_bill[ + 'Days In Bill'] + else: + self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill[ + 'Days In Bill'] + + # real_sum = np.array(self.processed_bill['Usage']) + # predict_sum = self.heating_consumption_pred + self.cooling_consumption_pred + \ + # self.others_consumption_pred + + # diff = real_sum - predict_sum + + else: + self.heating_consumption_pred = self.processed_bill[ + 'Days In Bill'] * 0 + self.cooling_consumption_pred = self.processed_bill[ + 'Days In Bill'] * 0 + self.others_consumption_pred = self.processed_bill['Usage'] + self.regression_method = 0 + self.hddcdd = np.zeros((len(self.processed_bill), 2)) + cooling_set_point = np.NaN + heating_set_point = np.NaN + self.usage = 'Both Not' + + elif self.regression_method == 2: + self.hddcdd = np.array( + pd.DataFrame(hddcdd).mul( + list(self.processed_bill['Days In Bill']), axis=0)) + + self.regr_model = regr_model + self.heating_consumption_pred = np.array( + self.hddcdd[:, 0]) * self.regr_model.coef_[0] + self.cooling_consumption_pred = np.array(self.hddcdd[:, 0]) * 0 + + if self.regr_model.intercept_ < 0: + self.others_consumption_pred = 0 * self.processed_bill['Days In Bill'] + regr[3]['dhw'] + else: + self.others_consumption_pred = self.regr_model.intercept_ * self.processed_bill['Days In Bill']\ + + regr[3]['dhw'] + + bill_cp = self.processed_bill.copy() + bill_cp = self.processed_bill[[ + 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage' + ]] + + bill_cp['Heating Usage'] = self.heating_consumption_pred + bill_cp['Cooling Usage'] = self.cooling_consumption_pred + bill_cp['Other Usage'] = self.others_consumption_pred + + if self.usage == 'Both Not': + self.r_squared_of_fit = np.NaN + # self.h = np.NaN + else: + self.r_squared_of_fit = regr[1] + # self.set_points = opt.x + + # update 2018/01/17 + self.heating_set_point = heating_set_point + self.cooling_set_point = cooling_set_point + self.output_table = bill_cp + + last_bill_date = self.processed_bill['Bill To Date'].iloc[-1] + first_bill_date = self.processed_bill['Bill From Date'].iloc[0] + + billing_months = self.num_month_dates(last_bill_date, first_bill_date) + self.output_table_monthly = self.output_to_month(last_bill_date, self.heating_set_point, + self.cooling_set_point, billing_months) + self.most_recent_monthly_output = self.output_to_month(last_bill_date, + self.heating_set_point, self.cooling_set_point, 12) + self.bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.output_table_monthly) + self.recent_year_bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.most_recent_monthly_output) + self.annual_usage = self.annual_usage_costs(self.recent_year_bill_breakdown, non_weather_related_end_use) + + def benchmarking_output(self): + ''' + output perimeters that related with evaluating the bills + ''' + + test = self.output_table.copy() + bill_start_date = pd.to_datetime(test['Bill From Date']).iloc[0] + bill_last_date = pd.to_datetime(test['Bill To Date']).iloc[-1] + days_in_bill = (bill_last_date - bill_start_date).days + + if days_in_bill >= 350: + usage = self.usage + r = self.r_squared_of_fit + r_method = self.regression_method + consumption = sum(test['Usage']) + heating = sum(test['Heating Usage']) + cooling = sum(test['Cooling Usage']) + others = sum(test['Other Usage']) + diff = (consumption - heating - cooling - others) / consumption + hdd = sum(self.hddcdd[:, 0]) + cdd = sum(self.hddcdd[:, 1]) + + else: + consumption = np.NaN + r_method = np.NaN + heating = np.NaN + cooling = np.NaN + others = np.NaN + hdd = np.NaN + cdd = np.NaN + diff = np.NaN + usage = np.NaN + r = np.NaN + + return usage, r, r_method, consumption, heating, cooling, others, \ + diff, hdd, cdd, days_in_bill, self.heating_set_point, self.cooling_set_point + + def output_to_month(self, last_date_of_bill, hp, cp, number_of_month): + """ + Transfrom period-wise output to month-wise output + + Args: + + last_day_of_bill(datetime): last day of bill + hp(float): heating season indoor set point + cp(float): cooling season indoor set point + number_of_month(int): number of month that need to be re-format + + Returns: + + pd.DataFrame: result with monthly consumptions + + """ + + last_dates = [] + first_dates = [] + + lastdate = last_date_of_bill - timedelta(last_date_of_bill.day) + + # cosntruct a new dataframe with bills from the first to last day for each month + + for i in range(0, number_of_month): + last_dates.append(lastdate) + first_dates.append(lastdate.replace(day=1)) + lastdate = first_dates[i] - timedelta(1) + + monthly_output_table = pd.DataFrame(columns=['Bill From Date', 'Bill To Date', 'Days In Bill', + 'Heating Usage', 'Cooling Usage', 'Other Usage']) + + monthly_output_table['Bill From Date'] = first_dates + monthly_output_table['Bill To Date'] = last_dates + monthly_output_table[ + 'Days In Bill'] = monthly_output_table['Bill To Date'] - monthly_output_table['Bill From Date'] + monthly_output_table[ + 'Days In Bill'] = monthly_output_table['Days In Bill'].apply( + lambda x: x.days) + 1 + monthly_output_table['Month'] = monthly_output_table[ + 'Bill From Date'].apply(lambda x: x.month) + monthly_output_table['temperature'] = [ + self.bill_period_weather(x, y) + for x, y in zip(monthly_output_table['Bill From Date'], + monthly_output_table['Bill To Date']) + ] + + hdd = [ + list(BillDisaggregation.hdd(hp, xx) for xx in x) + for x in monthly_output_table['temperature'] + ] + cdd = [ + list(BillDisaggregation.cdd(cp, xx) for xx in x) + for x in monthly_output_table['temperature'] + ] + monthly_hdd = np.array([np.sum(hdd[x]) for x in range(len(hdd))]) + monthly_cdd = np.array([np.sum(cdd[x]) for x in range(len(cdd))]) + + monthly_output_table['HDD'] = monthly_hdd + monthly_output_table['CDD'] = monthly_cdd + + per_hdd = self.benchmarking_output()[4] / self.benchmarking_output()[8] + if np.isnan(per_hdd): + per_hdd = 0 + per_cdd = self.benchmarking_output()[5] / self.benchmarking_output()[9] + if np.isnan(per_cdd): + per_cdd = 0 + per_day = self.benchmarking_output()[6] / self.benchmarking_output()[ + 10] + if np.isnan(per_day): + per_day = 0 + + monthly_output_table['Heating Usage'] = monthly_output_table[ + 'HDD'] * per_hdd + monthly_output_table['Cooling Usage'] = monthly_output_table[ + 'CDD'] * per_cdd + monthly_output_table['Other Usage'] = monthly_output_table[ + 'Days In Bill'] * per_day + + monthly_output_table['Usage'] = monthly_output_table['Heating Usage']\ + + monthly_output_table['Cooling Usage'] + monthly_output_table['Other Usage'] + + monthly_output = monthly_output_table[['Month', 'Bill From Date', 'Bill To Date', 'Days In Bill', + 'Heating Usage', 'Cooling Usage', 'Other Usage', 'HDD', 'CDD']] + + return monthly_output + + def non_weahter_related_breakdown(self, end_uses, monthly_output_table): + ''' + breakdown the non_weather_related_usage + + Args: + + end_uses(dictionary): key: end use + value: percentage of the end use among non-weather related usage + monthly_output_table (pd.DataFrame): monthly bill breakdown starts with the first date of the month, + ends with the last date + Returns: + pd.DataFrame: bill breakdown of all end-use + + ''' + + monthly_usages = monthly_output_table.copy() + eu = pd.DataFrame( + list(end_uses.items()), columns=['end use', 'percentage']) + for i in range(len(eu)): + name_of_the_column = eu['end use'].iloc[i] + value_of_the_column = eu['percentage'].iloc[i] + monthly_usages[name_of_the_column] = monthly_usages[ + 'Other Usage'] * value_of_the_column + + if sum(eu['percentage']) != 1: + monthly_usages['Miscellaneous'] = monthly_usages['Other Usage'] * ( + 1 - sum(eu['percentage'])) + + return monthly_usages + + def annual_usage_costs(self, annual_bill_breakdown, end_uses): + ''' + Calcuate annual usage and costs for each end use + + Args: + annual_bill_breakdown(pd.DataFrame): the output from non-weather-related usage breakdown + end_uses(dictionary): key: end use + value: percentage of the end use among non-weather related usage + + Return: + pd.DataFrame: annual usage, costs for each end uses + + ''' + + annual_usage = pd.DataFrame(columns=['End Use', 'Usage', 'Costs']) + + end_use = ['Heating Usage', 'Cooling Usage'] + + x = annual_bill_breakdown + number_of_columns = len(x.columns) + + eu = pd.DataFrame( + list(end_uses.items()), columns=['end use', 'percentage']) + + for i in range(len(eu)): + name_of_end_use = eu['end use'].iloc[i] + end_use.append(name_of_end_use) + + if number_of_columns - 9 != len(eu): + end_use.append('Miscellaneous') + + annual_usage['End Use'] = end_use + + for j in range(len(annual_usage)): + temp = x[annual_usage['End Use'].iloc[j]] + temp_usage = sum(temp) + annual_usage['Usage'].iloc[j] = temp_usage + + annual_usage['Costs'] = annual_usage['Usage'] * (self.unit_price) + + return annual_usage + + def print_all_features(self): + """ + print the features heating load percentage, cooling load percentage, + r-squared of fit and type of usage. + + """ + print('Heating set point is {}'.format(self.heating_set_point)) + print('Cooling set point is {}'.format(self.cooling_set_point)) + print('R-squared of fit is {}'.format(self.r_squared_of_fit)) + print('Usage is {}'.format(self.usage)) + + # @staticmethod + # def projection_figure(bill): + # '''ploat the disaggregated bill''' + + # plt.figure(figsize=(10, 5)) + # x = pd.to_datetime(bill['Bill From Date']) + # y = bill['Usage'] + # plt.plot(x, y) + # plt.plot(x, (bill['Heating Usage'] + bill['Cooling Usage'] + + # bill['Other Usage'])) + # plt.plot(x, bill['Heating Usage']) + # plt.plot(x, bill['Cooling Usage']) + # plt.legend([ + # 'real consumption', 'prejected consumption', 'prejected heating', + # 'prejected cooling' + # ]) + # plt.show() + + def to_json(self, period='bill_breakdown'): + """ + Output in json file + + Args: + + period (str): 'bill_breakdown' for bill breakdown with non-weather realted end uses + 'bill' for monthly out put for bill with only weather related breakdown + default 'bill_breakdown' + + Returns: + + json: output in json format + + """ + + if period == 'bill_breakdown': + return self.bill_breakdown.to_json(orient="records", date_format="iso") + + return self.output_table_monthly.to_json(orient="records", date_format="iso") + + def to_dict(self, period='bill_breakdown'): + """ + Output in dictionary file + + Args: + + period (str): 'bill_breakdown' for bill breakdown with non-weather realted end uses + 'bill' for monthly out put for bill with only weather related breakdown + default 'bill_breakdown' + + Returns: + + json: output in json format + + """ + + if period == 'bill_breakdown': + return self.bill_breakdown.to_dict(orient="records") + + return self.output_table_monthly.to_dict(orient="records") +