diff --git a/.pylintrc b/.pylintrc index 9b452a68e7674e712a4dfcf47cb7f8df693a092e..25bcb14a7191cc137dc7a80aabb6a071dcbd5dbc 100644 --- a/.pylintrc +++ b/.pylintrc @@ -65,7 +65,7 @@ confidence= # --enable=similarities". If you want to run only the classes checker, but have # no Warning level messages displayed, use"--disable=all --enable=classes # --disable=W" -disable=zip-builtin-not-iterating,dict-iter-method,buffer-builtin,raw_input-builtin,print-statement,unpacking-in-except,execfile-builtin,useless-suppression,suppressed-message,map-builtin-not-iterating,using-cmp-argument,dict-view-method,parameter-unpacking,coerce-builtin,input-builtin,unichr-builtin,hex-method,setslice-method,old-division,nonzero-method,cmp-builtin,old-raise-syntax,basestring-builtin,reload-builtin,intern-builtin,getslice-method,cmp-method,long-builtin,apply-builtin,file-builtin,indexing-exception,old-ne-operator,no-absolute-import,round-builtin,metaclass-assignment,range-builtin-not-iterating,standarderror-builtin,delslice-method,backtick,unicode-builtin,xrange-builtin,import-star-module-level,raising-string,long-suffix,oct-method,next-method-called,coerce-method,reduce-builtin,old-octal-literal,filter-builtin-not-iterating +disable=zip-builtin-not-iterating,dict-iter-method,buffer-builtin,raw_input-builtin,print-statement,unpacking-in-except,execfile-builtin,useless-suppression,suppressed-message,map-builtin-not-iterating,using-cmp-argument,dict-view-method,parameter-unpacking,coerce-builtin,input-builtin,unichr-builtin,hex-method,setslice-method,old-division,nonzero-method,cmp-builtin,old-raise-syntax,basestring-builtin,reload-builtin,intern-builtin,getslice-method,cmp-method,long-builtin,apply-builtin,file-builtin,indexing-exception,old-ne-operator,no-absolute-import,round-builtin,metaclass-assignment,range-builtin-not-iterating,standarderror-builtin,delslice-method,backtick,unicode-builtin,xrange-builtin,import-star-module-level,raising-string,long-suffix,oct-method,next-method-called,coerce-method,reduce-builtin,old-octal-literal,filter-builtin-not-iterating,no-member [REPORTS] diff --git a/bpeng/bill/disaggregate.py b/bpeng/bill/disaggregate.py new file mode 100644 index 0000000000000000000000000000000000000000..d0587a4997c5bc5d6b5335cf491f3c3f0c16edc6 --- /dev/null +++ b/bpeng/bill/disaggregate.py @@ -0,0 +1,259 @@ +""" This file will be used for calculate bill disaggregation """ + +import pandas as pd +import numpy as np + +from sklearn import linear_model +from scipy.optimize import minimize + + +class BillDisaggregation(): + """ + Class for Bill Disaggregation + + Args: + + bill (pd.DataFrame): raw bill (from parsing) (File like object) + First row (besides column name) are the names + 'Bill From Date', 'Bill To Date', 'Days in Bill', 'Usage' must be in names + daily_temp (pd.DataFrame): daily temperature (File like object) + + Attributes: + + output: list + total heating load + heating load for the first year (first 12 bill periods) + heating load of each month + (return NaN if R-squared is low) + """ + # pylint: disable=too-many-instance-attributes + def __init__(self, bill, daily_temp): + self.bill = bill + self.daily_temp = daily_temp + self.usage = None + self.heating_load_m = None + self.cooling_load_m = None + self.others_m = None + self.r_squared_of_fit = None + self.heating_load_proportion = None + self.cooling_load_proportion = None + self.set_point = None + self.days_in_12_bills = None + self.output_table = None + + def weather_period(self, period_end_date, days_in_period): + """ + Get the dates within the period + + Args: + + period_end_date (Datetime): end date of a period + days_in_period (str): number of days in a period + + Returns: + + list: Returns a list of weather data for a period + + """ + # end_datetime = period_end_date.date().strftime("%/%m/%d") + end_datetime = '/'.join([str(period_end_date.date().month), + str(period_end_date.date().day), + str(period_end_date.date().year)]) + end_date_id = self.daily_temp[self.daily_temp.date == end_datetime].index[0] + start_date_id = end_date_id - int(days_in_period) + return list(self.daily_temp['temperature'][start_date_id:end_date_id]) + + def table_cleaning(self, bill): + """ + Clean the table + + Args: + + bill (pd.DataFrame): raw bill with columns + 'Bill To Date', 'Bill From Date', + 'Days In Bill', 'Usage' + + Returns: + + pd.DataFrame: Returns a cleaned dataframe with temperature data + + """ + bill_cp = bill.copy() + bill_cp['Bill To Date'] = pd.to_datetime(bill_cp['Bill To Date']) + bill_cp['Bill From Date'] = pd.to_datetime(bill_cp['Bill From Date']) + # Add new columns for 'weather in period' and 'daily usage' + bill_cp['Temperature'] = [ + self.weather_period(x, y) for (x, y) in zip(bill_cp['Bill To Date'], bill_cp['Days In Bill']) + ] + bill_cp = bill_cp[['Bill From Date', 'Bill To Date', 'Temperature', 'Usage', 'Days In Bill']] + bill_cp['Daily Usage'] = [x / y for (x, y) in zip(bill_cp['Usage'], bill_cp['Days In Bill'])] + bill_cp = bill_cp.dropna() + return bill_cp + + @staticmethod + def heating(curr_temp, set_temp): + """HDD (for each day)""" + if curr_temp > set_temp: + return 0 + else: + return set_temp - curr_temp + + @staticmethod + def cooling(curr_temp, set_temp): + """ CDD (for each day) """ + if curr_temp > set_temp: + return curr_temp - set_temp + else: + return 0 + + @staticmethod + def regression_r2_op(set_heating, set_cooling, temperature, consumption): + """ + A linear regression model with heating and cooling set fixed + + Args: + + set_heating (int): a temperature setting for heating + set_cooling (int): a temperature setting for cooling + temperature (array): an array of lists of daily temperature [array-like] + consumption (array): an array of monthly consumption (normalized) [array-like] + + Returns: + + sklearn.linear_model.LinearRegression: regression model + float: r-squared of the linear regression model + 2d-array: a 2D numpy array of normalized monthly HDDs and CDDs + + """ + # function for regression + daily_hdd = \ + [np.mean([BillDisaggregation.heating(xx, set_heating) for xx in x]) for x in temperature] + daily_cdd = \ + [np.mean([BillDisaggregation.cooling(xx, set_cooling) for xx in x]) for x in temperature] + regr_model = linear_model.LinearRegression() + hddcdd = np.array([daily_hdd, daily_cdd]).T + regr_model.fit(hddcdd, consumption) + return regr_model, regr_model.score(hddcdd, consumption), hddcdd + + def optimize(self, usage='Unknown'): + """ + Main function for the optimization + + Args: + + usage (str): + Specify if the consumption is for heating or cooling + 'Unknown': no prior knowledge + 'Heating': only for heating + 'Cooling': only for cooling + 'Both': for both heating and cooling + default 'Unknown' + + """ + # pylint: disable=too-many-statements + assert len(self.bill) > 5, 'No sufficient months for regression.' + bill = self.table_cleaning(self.bill) + days = [int(ii) for ii in bill['Days In Bill'].tolist()] + if (usage == 'Unknown') | (usage == 'Both'): + function = lambda x: -self.regression_r2_op(x[0], x[1], + np.array(bill['Temperature']), + bill['Daily Usage'])[1] + opt = minimize(function, (65, 65), method='nelder-mead', options={'xtol': 1e-2, 'disp': False}) + regr = self.regression_r2_op(opt.x[0], opt.x[1], bill['Temperature'], bill['Daily Usage']) + regr_model = regr[0] + + heating_coef, cooling_coef = regr_model.coef_ + if usage == 'Unknown': + if cooling_coef < heating_coef / 20: + usage = 'Heating' + elif heating_coef < cooling_coef / 20: + usage = 'Cooling' + else: + usage = 'Both' + + if usage == 'Both': + print( + 'For this bill, R-squared is {}, ' + 'with set point optimized at {}'.format(-opt.fun, opt.x) + ) + hddcdd = regr[2] + + if usage == 'Heating': + function = lambda x: -self.regression_r2_op(x, 200, np.array(bill['Temperature']), bill['Daily Usage'])[1] + # Note here in the function the cooling set point is fixed at 200 + # Indicating that there is no cooling load + opt = minimize(function, 65, method='nelder-mead', options={'xtol': 1e-2, 'disp': False}) + print('For this bill, R-squared is {}, with set point optimized at {}'.format(-opt.fun, opt.x[0])) + regr = self.regression_r2_op(opt.x[0], 200, bill['Temperature'], bill['Daily Usage']) + regr_model = regr[0] + hddcdd = regr[2] + + if usage == 'Cooling': + function = lambda x: -self.regression_r2_op(-100, x, np.array(bill['Temperature']), bill['Daily Usage'])[1] + opt = minimize(function, 65, method='nelder-mead', options={'xtol': 1e-2, 'disp': False}) + print('For this bill, R-squared is {}, with set point optimized at {}'.format(-opt.fun, opt.x[0])) + regr = self.regression_r2_op(-100, opt.x[0], bill['Temperature'], bill['Daily Usage']) + regr_model = regr[0] + hddcdd = regr[2] + + # Now we regress with the optimized set point + self.usage = usage + + # print(hddcdd) + + if True | (regr[1] > 0.5): + # print regr_model.coef_[0], regr_model.intercept_ + days = np.array([int(ii) for ii in bill['Days In Bill'].tolist()]) + self.heating_load_m = np.multiply(hddcdd[:, 0], days) * regr_model.coef_[0] + self.cooling_load_m = np.multiply(hddcdd[:, 1], days) * regr_model.coef_[1] + self.others_m = regr_model.intercept_ * days + + real_sum = np.array(self.bill['Usage']) + predict_sum = self.heating_load_m + self.cooling_load_m + self.others_m + sum_ratio = real_sum / predict_sum + self.heating_load_m = self.heating_load_m * sum_ratio + self.cooling_load_m = self.cooling_load_m * sum_ratio + self.others_m = self.others_m * sum_ratio + + # For printing output + bill_cp = self.bill.copy() + bill_cp = bill_cp[['Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage']] + self.r_squared_of_fit = regr[1] + + bill_cp['Heating Usage'] = self.heating_load_m + bill_cp['Cooling Usage'] = self.cooling_load_m + bill_cp['Other Usage'] = self.others_m + + self.heating_load_proportion = self.heating_load_m.sum()/real_sum.sum() + self.cooling_load_proportion = self.cooling_load_m.sum()/real_sum.sum() + self.set_point = opt.x[0] + self.days_in_12_bills = sum(days[:12]) + self.output_table = bill_cp + + else: + print('Low R-squared') + + def output(self): + """ + Output in csv file + + """ + return self.output_table.to_csv(None) + + def to_json(self): + """ + Output in json file + + """ + return self.output_table.to_json(orient="records") + + def print_all_features(self): + """ + print the features heating load percentage, cooling load percentage, + r-squared of fit and type of usage. + + """ + print('Heating load percentage is {:.1%}'.format(self.heating_load_proportion)) + print('Cooling load percentage is {:.1%}'.format(self.cooling_load_proportion)) + print('R-squared of fit is {}'.format(self.r_squared_of_fit)) + print('Usage is {}'.format(self.usage)) diff --git a/requirements.txt b/requirements.txt index 0b8b9686a0eccc658645248d63e45cfb1f4d8345..2669509c6bf620d958fd33ed90b5341c7dd267a9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ -numpy==1.12.0 -pandas==0.19.2 +numpy==1.11.2 +pandas==0.18.0 +scikit-learn==0.17.1 +scipy==0.16.0 xlrd==1.0.0