From c44fbe500ad572184a53f615bea970cd1809e199 Mon Sep 17 00:00:00 2001 From: Boya Yu Date: Fri, 7 Apr 2017 14:49:18 -0400 Subject: [PATCH 1/5] Add initial bill disaggregate --- bpeng/bill/disaggregate.py | 181 +++++++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 bpeng/bill/disaggregate.py diff --git a/bpeng/bill/disaggregate.py b/bpeng/bill/disaggregate.py new file mode 100644 index 0000000..573ff28 --- /dev/null +++ b/bpeng/bill/disaggregate.py @@ -0,0 +1,181 @@ +""" This file will be used for calc """ +import os +import pandas as pd +import numpy as np +import math + +from sklearn import linear_model +from scipy.optimize import minimize + + +class BillDisaggregation(): + """ + Class for Bill Disaggregation + Parameters + ---------- + bill: raw bill (from parsing) (File like object) + First row (besides column name) are the names + 'Bill From Date', 'Bill To Date', 'Days in Bill', 'Usage' must be in names + dt: daily temperature (File like object) + Attributes + ---------- + output: list + total heating load + heating load for the first year (first 12 bill periods) + heating load of each month + (return NaN if R-squared is low) + """ + def __init__(self, bill, dt): + self.bill = bill + self.dt = dt + + def weather_period(self, pe, dp): + """ + Get the dates within the period + + Args: + + pe (Datetime): pe is something + dp (str): string of something + + Returns: + + list: Returns a list of weather data for a period + + """ + # pedt = pe.date().strftime("%Y/%m/%d") + pedt = str(pe.date().month)+'/'+str(pe.date().day)+'/'+str(pe.date().year) + peid = self.dt[self.dt.date == pedt].index[0] + return list(self.dt['temperature'][peid-int(dp):peid]) + + def table_cleaning(self, tt): + # Let the first row be the column names + t0 = tt.copy() + t0['Bill To Date'] = pd.to_datetime(t0['Bill To Date']) + t0['Bill From Date'] = pd.to_datetime(t0['Bill From Date']) + # Add new columns for 'weather in period' and 'daily usage' + t0['wp'] = [self.weather_period(x,y) for (x,y) in zip(t0['Bill To Date'], t0['Days In Bill'])] + t0 = t0[['Bill From Date', 'Bill To Date','wp', 'Usage', 'Days In Bill']] + t0['Daily Usage'] = [x / y for (x,y) in zip(t0['Usage'], t0['Days In Bill'])] + t0 = t0.dropna() + return t0 + + @staticmethod + def Heating(curr_temp, set_temp): + """HDD (for each day)""" + if curr_temp > set_temp: + return 0 + else: + return set_temp - curr_temp + + @staticmethod + def Cooling(t, tb): + # CDD (for each day) + if t > tb: + return t - tb + else: + return 0 + + @staticmethod + def Regression_R2_op(tbh, tbc, wp, b): + # function for regression + wph = [np.mean([BillDisaggregation.Heating(xx, tbh) for xx in x]) for x in wp] + wpc = [np.mean([BillDisaggregation.Cooling(xx, tbc) for xx in x]) for x in wp] + lm = linear_model.LinearRegression() + Y = b + X = np.array([wph, wpc]).T + #print X, Y + lm.fit(X, Y) + return lm, lm.score(X, Y), X + + def optimize(self, usage='Unknown'): + assert len(self.bill) > 5, 'No sufficient months for regression.' + tt = self.table_cleaning(self.bill) + days = [int(ii) for ii in tt['Days In Bill'].tolist()] + if (usage == 'Unknown') | (usage == 'Both'): + function = lambda tb: -self.Regression_R2_op(tb[0], tb[1], np.array(tt['wp']), tt['Daily Usage'])[1] + opt = minimize(function, (65, 65), method='nelder-mead', options={'xtol': 1e-2, 'disp': False}) + regr = self.Regression_R2_op(opt.x[0], opt.x[1], tt['wp'], tt['Daily Usage']) + lm = regr[0] + + heating_coef, cooling_coef = lm.coef_ + if usage == 'Unknown': + if cooling_coef < heating_coef / 20: + usage = 'Heating' + elif heating_coef < cooling_coef / 20: + usage = 'Cooling' + else: + usage = 'Both' + + if usage == 'Both': + print ('For this bill, R-squared is {}, with set point optimized at {}'.format(-opt.fun, opt.x)) + X = regr[2] + + if usage == 'Heating': + function = lambda tbh: -self.Regression_R2_op(tbh, 200, np.array(tt['wp']), tt['Daily Usage'])[1] + # Note here in the function the cooling set point is fixed at 200 + # Indicating that there is no cooling load + opt = minimize(function, 65, method='nelder-mead', options={'xtol': 1e-2, 'disp': False}) + print ('For this bill, R-squared is {}, with set point optimized at {}'.format(-opt.fun, opt.x[0])) + regr = self.Regression_R2_op(opt.x[0], 200, tt['wp'], tt['Daily Usage']) + lm = regr[0] + X = regr[2] + + + if usage == 'Cooling': + function = lambda tbc: -self.Regression_R2_op(-100, tbc, np.array(tt['wp']), tt['Daily Usage'])[1] + opt = minimize(function, 65, method='nelder-mead', options={'xtol': 1e-2, 'disp': False}) + print ('For this bill, R-squared is {}, with set point optimized at {}'.format(-opt.fun, opt.x[0])) + regr = self.Regression_R2_op(-100, opt.x[0], tt['wp'], tt['Daily Usage']) + lm = regr[0] + X = regr[2] + + + # Now we regress with the optimized set point + self.Usage = usage + + #print(X) + + if True | (regr[1] > 0.5): + #print lm.coef_[0], lm.intercept_ + days = np.array([int(ii) for ii in tt['Days In Bill'].tolist()]) + self.heating_load_m = np.multiply(X[:,0], days) * lm.coef_[0] + self.cooling_load_m = np.multiply(X[:,1], days) * lm.coef_[1] + self.others_m = lm.intercept_ * days + + real_sum = np.array(self.bill['Usage']) + predict_sum = self.heating_load_m + self.cooling_load_m + self.others_m + sum_ratio = real_sum / predict_sum + self.heating_load_m = self.heating_load_m * sum_ratio + self.cooling_load_m = self.cooling_load_m * sum_ratio + self.others_m = self.others_m * sum_ratio + + #For printing output + P = self.bill.copy() + P = P[['Bill From Date','Bill To Date','Days In Bill','Usage']] + self.R_squared_of_fit = regr[1] + + P['Heating Usage'] = self.heating_load_m + P['Cooling Usage'] = self.cooling_load_m + P['Other Usage'] = self.others_m + + self.Heating_load_proportion = self.heating_load_m.sum()/real_sum.sum() + self.Cooling_load_proportion = self.cooling_load_m.sum()/real_sum.sum() + self.Set_point=opt.x[0] + self.Days_in_12_bills=sum(days[:12]) + self.Output_Table = P + + else: + print('Low R-squared') + + def output(self): + return self.Output_Table.to_csv(None) + + def to_json(self): + return self.Output_Table.to_json(orient="records") + + def print_all_features(self): + print('Heating load percentage is {:.1%}'.format(self.Heating_load_proportion)) + print('Cooling load percentage is {:.1%}'.format(self.Cooling_load_proportion)) + print('R-squared of fit is {}'.format(self.R_squared_of_fit)) + print('Usage is {}'.format(self.Usage)) -- GitLab From 8b886d9079200bc9f7cb1d6285cbc5293541ea44 Mon Sep 17 00:00:00 2001 From: Boya Yu Date: Fri, 7 Apr 2017 14:52:14 -0400 Subject: [PATCH 2/5] Fix line too long for disaggregate --- bpeng/bill/disaggregate.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bpeng/bill/disaggregate.py b/bpeng/bill/disaggregate.py index 573ff28..b551175 100644 --- a/bpeng/bill/disaggregate.py +++ b/bpeng/bill/disaggregate.py @@ -108,7 +108,10 @@ class BillDisaggregation(): usage = 'Both' if usage == 'Both': - print ('For this bill, R-squared is {}, with set point optimized at {}'.format(-opt.fun, opt.x)) + print( + 'For this bill, R-squared is {}, ' \ + 'with set point optimized at {}'.format(-opt.fun, opt.x) + ) X = regr[2] if usage == 'Heating': -- GitLab From 79fde9c3ab48da16575f5fbe57c89465554ee7ae Mon Sep 17 00:00:00 2001 From: Boya Yu Date: Mon, 10 Apr 2017 14:14:37 -0400 Subject: [PATCH 3/5] Code style improvement --- bpeng/bill/disaggregate.py | 233 ++++++++++++++++++++++++------------- 1 file changed, 154 insertions(+), 79 deletions(-) diff --git a/bpeng/bill/disaggregate.py b/bpeng/bill/disaggregate.py index b551175..6318a54 100644 --- a/bpeng/bill/disaggregate.py +++ b/bpeng/bill/disaggregate.py @@ -1,8 +1,7 @@ -""" This file will be used for calc """ -import os +""" This file will be used for calculate bill disaggregation """ + import pandas as pd import numpy as np -import math from sklearn import linear_model from scipy.optimize import minimize @@ -16,7 +15,7 @@ class BillDisaggregation(): bill: raw bill (from parsing) (File like object) First row (besides column name) are the names 'Bill From Date', 'Bill To Date', 'Days in Bill', 'Usage' must be in names - dt: daily temperature (File like object) + daily_temp: daily temperature (File like object) Attributes ---------- output: list @@ -25,43 +24,70 @@ class BillDisaggregation(): heating load of each month (return NaN if R-squared is low) """ - def __init__(self, bill, dt): + def __init__(self, bill, daily_temp): self.bill = bill - self.dt = dt + self.daily_temp = daily_temp + self.usage = None + self.heating_load_m = None + self.cooling_load_m = None + self.others_m = None + self.r_squared_of_fit = None + self.heating_load_proportion = None + self.cooling_load_proportion = None + self.set_point = None + self.days_in_12_bills = None + self.output_table = None - def weather_period(self, pe, dp): + def weather_period(self, period_end_date, days_in_period): """ Get the dates within the period Args: - pe (Datetime): pe is something - dp (str): string of something + period_end_date (Datetime): end date of a period + days_in_period (str or int): number of days in a period Returns: list: Returns a list of weather data for a period """ - # pedt = pe.date().strftime("%Y/%m/%d") - pedt = str(pe.date().month)+'/'+str(pe.date().day)+'/'+str(pe.date().year) - peid = self.dt[self.dt.date == pedt].index[0] - return list(self.dt['temperature'][peid-int(dp):peid]) - - def table_cleaning(self, tt): - # Let the first row be the column names - t0 = tt.copy() - t0['Bill To Date'] = pd.to_datetime(t0['Bill To Date']) - t0['Bill From Date'] = pd.to_datetime(t0['Bill From Date']) + # end_datetime = period_end_date.date().strftime("%/%m/%d") + end_datetime = '/'.join([str(period_end_date.date().month), + str(period_end_date.date().day), + str(period_end_date.date().year)]) + end_date_id = self.daily_temp[self.daily_temp.date == end_datetime].index[0] + start_date_id = end_date_id - int(days_in_period) + return list(self.daily_temp['temperature'][start_date_id:end_date_id]) + + def table_cleaning(self, bill): + """ + Clean the table + + Args: + + bill (pd.DataFrame): raw bill with columns + 'Bill To Date', 'Bill From Date', + 'Days In Bill', 'Usage' + + Returns: + + pd.DataFrame: Returns a cleaned dataframe with temperature data + + """ + bill_cp = bill.copy() + bill_cp['Bill To Date'] = pd.to_datetime(bill_cp['Bill To Date']) + bill_cp['Bill From Date'] = pd.to_datetime(bill_cp['Bill From Date']) # Add new columns for 'weather in period' and 'daily usage' - t0['wp'] = [self.weather_period(x,y) for (x,y) in zip(t0['Bill To Date'], t0['Days In Bill'])] - t0 = t0[['Bill From Date', 'Bill To Date','wp', 'Usage', 'Days In Bill']] - t0['Daily Usage'] = [x / y for (x,y) in zip(t0['Usage'], t0['Days In Bill'])] - t0 = t0.dropna() - return t0 + bill_cp['Temperature'] = [self.weather_period(x, y) for (x, y) in \ + zip(bill_cp['Bill To Date'], bill_cp['Days In Bill'])] + bill_cp = bill_cp[['Bill From Date', 'Bill To Date', 'Temperature', 'Usage', 'Days In Bill']] + bill_cp['Daily Usage'] = [x / y for (x, y) in zip(bill_cp['Usage'], bill_cp['Days In Bill'])] + bill_cp = bill_cp.dropna() + return bill_cp @staticmethod - def Heating(curr_temp, set_temp): + def heating(curr_temp, set_temp): """HDD (for each day)""" if curr_temp > set_temp: return 0 @@ -69,36 +95,72 @@ class BillDisaggregation(): return set_temp - curr_temp @staticmethod - def Cooling(t, tb): - # CDD (for each day) - if t > tb: - return t - tb + def cooling(curr_temp, set_temp): + """ CDD (for each day) """ + if curr_temp > set_temp: + return curr_temp - set_temp else: return 0 @staticmethod - def Regression_R2_op(tbh, tbc, wp, b): + def regression_r2_op(set_heating, set_cooling, temperature, consumption): + """ + A linear regression model with heating and cooling set fixed + + Args: + + set_heating (int): a temperature setting for heating + set_cooling (int): a temperature setting for cooling + temperature (array or array-like): an array of lists of daily temperature + consumption (array or array-like): an array of monthly consumption (normalized) + + Returns: + + tuple (len = 3): Returns a tuple of three elements + First element (sklearn.linear_model.LinearRegression): regression model + Second element (float): r-squared of the linear regression model + Third element (2d-array): a 2D numpy array of normalized monthly HDDs and CDDs + + """ # function for regression - wph = [np.mean([BillDisaggregation.Heating(xx, tbh) for xx in x]) for x in wp] - wpc = [np.mean([BillDisaggregation.Cooling(xx, tbc) for xx in x]) for x in wp] - lm = linear_model.LinearRegression() - Y = b - X = np.array([wph, wpc]).T - #print X, Y - lm.fit(X, Y) - return lm, lm.score(X, Y), X + daily_hdd = \ + [np.mean([BillDisaggregation.heating(xx, set_heating) for xx in x]) for x in temperature] + daily_cdd = \ + [np.mean([BillDisaggregation.cooling(xx, set_cooling) for xx in x]) for x in temperature] + regr_model = linear_model.LinearRegression() + hddcdd = np.array([daily_hdd, daily_cdd]).T + #print hddcdd, consumption + regr_model.fit(hddcdd, consumption) + return regr_model, regr_model.score(hddcdd, consumption), hddcdd def optimize(self, usage='Unknown'): + """ + Main function for the optimization + + Args: + + usage (str): + Specify if the consumption is for heating or cooling + 'Unknown': no prior knowledge + 'Heating': only for heating + 'Cooling': only for cooling + 'Both': for both heating and cooling + default 'Unknown' + + """ + assert len(self.bill) > 5, 'No sufficient months for regression.' - tt = self.table_cleaning(self.bill) - days = [int(ii) for ii in tt['Days In Bill'].tolist()] + bill = self.table_cleaning(self.bill) + days = [int(ii) for ii in bill['Days In Bill'].tolist()] if (usage == 'Unknown') | (usage == 'Both'): - function = lambda tb: -self.Regression_R2_op(tb[0], tb[1], np.array(tt['wp']), tt['Daily Usage'])[1] + function = lambda x: -self.regression_r2_op(x[0], x[1], + np.array(bill['Temperature']), + bill['Daily Usage'])[1] opt = minimize(function, (65, 65), method='nelder-mead', options={'xtol': 1e-2, 'disp': False}) - regr = self.Regression_R2_op(opt.x[0], opt.x[1], tt['wp'], tt['Daily Usage']) - lm = regr[0] + regr = self.regression_r2_op(opt.x[0], opt.x[1], bill['Temperature'], bill['Daily Usage']) + regr_model = regr[0] - heating_coef, cooling_coef = lm.coef_ + heating_coef, cooling_coef = regr_model.coef_ if usage == 'Unknown': if cooling_coef < heating_coef / 20: usage = 'Heating' @@ -112,39 +174,39 @@ class BillDisaggregation(): 'For this bill, R-squared is {}, ' \ 'with set point optimized at {}'.format(-opt.fun, opt.x) ) - X = regr[2] + hddcdd = regr[2] if usage == 'Heating': - function = lambda tbh: -self.Regression_R2_op(tbh, 200, np.array(tt['wp']), tt['Daily Usage'])[1] + function = lambda x: -self.regression_r2_op(x, 200, np.array(bill['Temperature']), bill['Daily Usage'])[1] # Note here in the function the cooling set point is fixed at 200 # Indicating that there is no cooling load opt = minimize(function, 65, method='nelder-mead', options={'xtol': 1e-2, 'disp': False}) - print ('For this bill, R-squared is {}, with set point optimized at {}'.format(-opt.fun, opt.x[0])) - regr = self.Regression_R2_op(opt.x[0], 200, tt['wp'], tt['Daily Usage']) - lm = regr[0] - X = regr[2] + print('For this bill, R-squared is {}, with set point optimized at {}'.format(-opt.fun, opt.x[0])) + regr = self.regression_r2_op(opt.x[0], 200, bill['Temperature'], bill['Daily Usage']) + regr_model = regr[0] + hddcdd = regr[2] if usage == 'Cooling': - function = lambda tbc: -self.Regression_R2_op(-100, tbc, np.array(tt['wp']), tt['Daily Usage'])[1] + function = lambda x: -self.regression_r2_op(-100, x, np.array(bill['Temperature']), bill['Daily Usage'])[1] opt = minimize(function, 65, method='nelder-mead', options={'xtol': 1e-2, 'disp': False}) - print ('For this bill, R-squared is {}, with set point optimized at {}'.format(-opt.fun, opt.x[0])) - regr = self.Regression_R2_op(-100, opt.x[0], tt['wp'], tt['Daily Usage']) - lm = regr[0] - X = regr[2] + print('For this bill, R-squared is {}, with set point optimized at {}'.format(-opt.fun, opt.x[0])) + regr = self.regression_r2_op(-100, opt.x[0], bill['Temperature'], bill['Daily Usage']) + regr_model = regr[0] + hddcdd = regr[2] # Now we regress with the optimized set point - self.Usage = usage + self.usage = usage - #print(X) + #print(hddcdd) if True | (regr[1] > 0.5): - #print lm.coef_[0], lm.intercept_ - days = np.array([int(ii) for ii in tt['Days In Bill'].tolist()]) - self.heating_load_m = np.multiply(X[:,0], days) * lm.coef_[0] - self.cooling_load_m = np.multiply(X[:,1], days) * lm.coef_[1] - self.others_m = lm.intercept_ * days + #print regr_model.coef_[0], regr_model.intercept_ + days = np.array([int(ii) for ii in bill['Days In Bill'].tolist()]) + self.heating_load_m = np.multiply(hddcdd[:, 0], days) * regr_model.coef_[0] + self.cooling_load_m = np.multiply(hddcdd[:, 1], days) * regr_model.coef_[1] + self.others_m = regr_model.intercept_ * days real_sum = np.array(self.bill['Usage']) predict_sum = self.heating_load_m + self.cooling_load_m + self.others_m @@ -154,31 +216,44 @@ class BillDisaggregation(): self.others_m = self.others_m * sum_ratio #For printing output - P = self.bill.copy() - P = P[['Bill From Date','Bill To Date','Days In Bill','Usage']] - self.R_squared_of_fit = regr[1] + bill_cp = self.bill.copy() + bill_cp = bill_cp[['Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage']] + self.r_squared_of_fit = regr[1] - P['Heating Usage'] = self.heating_load_m - P['Cooling Usage'] = self.cooling_load_m - P['Other Usage'] = self.others_m + bill_cp['Heating Usage'] = self.heating_load_m + bill_cp['Cooling Usage'] = self.cooling_load_m + bill_cp['Other Usage'] = self.others_m - self.Heating_load_proportion = self.heating_load_m.sum()/real_sum.sum() - self.Cooling_load_proportion = self.cooling_load_m.sum()/real_sum.sum() - self.Set_point=opt.x[0] - self.Days_in_12_bills=sum(days[:12]) - self.Output_Table = P + self.heating_load_proportion = self.heating_load_m.sum()/real_sum.sum() + self.cooling_load_proportion = self.cooling_load_m.sum()/real_sum.sum() + self.set_point = opt.x[0] + self.days_in_12_bills = sum(days[:12]) + self.output_table = bill_cp else: print('Low R-squared') def output(self): - return self.Output_Table.to_csv(None) + """ + Output in csv file + + """ + return self.output_table.to_csv(None) def to_json(self): - return self.Output_Table.to_json(orient="records") + """ + Output in json file + + """ + return self.output_table.to_json(orient="records") def print_all_features(self): - print('Heating load percentage is {:.1%}'.format(self.Heating_load_proportion)) - print('Cooling load percentage is {:.1%}'.format(self.Cooling_load_proportion)) - print('R-squared of fit is {}'.format(self.R_squared_of_fit)) - print('Usage is {}'.format(self.Usage)) + """ + print the features heating load percentage, cooling load percentage, + r-squared of fit and type of usage. + + """ + print('Heating load percentage is {:.1%}'.format(self.heating_load_proportion)) + print('Cooling load percentage is {:.1%}'.format(self.cooling_load_proportion)) + print('R-squared of fit is {}'.format(self.r_squared_of_fit)) + print('Usage is {}'.format(self.usage)) -- GitLab From db74aaa0750c19c3b56c993a1b07a76a9e1706ae Mon Sep 17 00:00:00 2001 From: Alessandro DiMarco Date: Wed, 12 Apr 2017 12:13:06 -0400 Subject: [PATCH 4/5] Add requirements and ignore warning in pylint --- .pylintrc | 2 +- requirements.txt | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.pylintrc b/.pylintrc index 9b452a6..25bcb14 100644 --- a/.pylintrc +++ b/.pylintrc @@ -65,7 +65,7 @@ confidence= # --enable=similarities". If you want to run only the classes checker, but have # no Warning level messages displayed, use"--disable=all --enable=classes # --disable=W" -disable=zip-builtin-not-iterating,dict-iter-method,buffer-builtin,raw_input-builtin,print-statement,unpacking-in-except,execfile-builtin,useless-suppression,suppressed-message,map-builtin-not-iterating,using-cmp-argument,dict-view-method,parameter-unpacking,coerce-builtin,input-builtin,unichr-builtin,hex-method,setslice-method,old-division,nonzero-method,cmp-builtin,old-raise-syntax,basestring-builtin,reload-builtin,intern-builtin,getslice-method,cmp-method,long-builtin,apply-builtin,file-builtin,indexing-exception,old-ne-operator,no-absolute-import,round-builtin,metaclass-assignment,range-builtin-not-iterating,standarderror-builtin,delslice-method,backtick,unicode-builtin,xrange-builtin,import-star-module-level,raising-string,long-suffix,oct-method,next-method-called,coerce-method,reduce-builtin,old-octal-literal,filter-builtin-not-iterating +disable=zip-builtin-not-iterating,dict-iter-method,buffer-builtin,raw_input-builtin,print-statement,unpacking-in-except,execfile-builtin,useless-suppression,suppressed-message,map-builtin-not-iterating,using-cmp-argument,dict-view-method,parameter-unpacking,coerce-builtin,input-builtin,unichr-builtin,hex-method,setslice-method,old-division,nonzero-method,cmp-builtin,old-raise-syntax,basestring-builtin,reload-builtin,intern-builtin,getslice-method,cmp-method,long-builtin,apply-builtin,file-builtin,indexing-exception,old-ne-operator,no-absolute-import,round-builtin,metaclass-assignment,range-builtin-not-iterating,standarderror-builtin,delslice-method,backtick,unicode-builtin,xrange-builtin,import-star-module-level,raising-string,long-suffix,oct-method,next-method-called,coerce-method,reduce-builtin,old-octal-literal,filter-builtin-not-iterating,no-member [REPORTS] diff --git a/requirements.txt b/requirements.txt index 0b8b968..2669509 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ -numpy==1.12.0 -pandas==0.19.2 +numpy==1.11.2 +pandas==0.18.0 +scikit-learn==0.17.1 +scipy==0.16.0 xlrd==1.0.0 -- GitLab From dde1320dde4d3f5f40d680443d040158d4a3b9e4 Mon Sep 17 00:00:00 2001 From: Boya Yu Date: Wed, 12 Apr 2017 12:28:02 -0400 Subject: [PATCH 5/5] Fix pylint warnings --- bpeng/bill/disaggregate.py | 50 +++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/bpeng/bill/disaggregate.py b/bpeng/bill/disaggregate.py index 6318a54..d0587a4 100644 --- a/bpeng/bill/disaggregate.py +++ b/bpeng/bill/disaggregate.py @@ -10,20 +10,23 @@ from scipy.optimize import minimize class BillDisaggregation(): """ Class for Bill Disaggregation - Parameters - ---------- - bill: raw bill (from parsing) (File like object) - First row (besides column name) are the names - 'Bill From Date', 'Bill To Date', 'Days in Bill', 'Usage' must be in names - daily_temp: daily temperature (File like object) - Attributes - ---------- + + Args: + + bill (pd.DataFrame): raw bill (from parsing) (File like object) + First row (besides column name) are the names + 'Bill From Date', 'Bill To Date', 'Days in Bill', 'Usage' must be in names + daily_temp (pd.DataFrame): daily temperature (File like object) + + Attributes: + output: list total heating load heating load for the first year (first 12 bill periods) heating load of each month (return NaN if R-squared is low) """ + # pylint: disable=too-many-instance-attributes def __init__(self, bill, daily_temp): self.bill = bill self.daily_temp = daily_temp @@ -45,7 +48,7 @@ class BillDisaggregation(): Args: period_end_date (Datetime): end date of a period - days_in_period (str or int): number of days in a period + days_in_period (str): number of days in a period Returns: @@ -79,8 +82,9 @@ class BillDisaggregation(): bill_cp['Bill To Date'] = pd.to_datetime(bill_cp['Bill To Date']) bill_cp['Bill From Date'] = pd.to_datetime(bill_cp['Bill From Date']) # Add new columns for 'weather in period' and 'daily usage' - bill_cp['Temperature'] = [self.weather_period(x, y) for (x, y) in \ - zip(bill_cp['Bill To Date'], bill_cp['Days In Bill'])] + bill_cp['Temperature'] = [ + self.weather_period(x, y) for (x, y) in zip(bill_cp['Bill To Date'], bill_cp['Days In Bill']) + ] bill_cp = bill_cp[['Bill From Date', 'Bill To Date', 'Temperature', 'Usage', 'Days In Bill']] bill_cp['Daily Usage'] = [x / y for (x, y) in zip(bill_cp['Usage'], bill_cp['Days In Bill'])] bill_cp = bill_cp.dropna() @@ -111,15 +115,14 @@ class BillDisaggregation(): set_heating (int): a temperature setting for heating set_cooling (int): a temperature setting for cooling - temperature (array or array-like): an array of lists of daily temperature - consumption (array or array-like): an array of monthly consumption (normalized) + temperature (array): an array of lists of daily temperature [array-like] + consumption (array): an array of monthly consumption (normalized) [array-like] Returns: - tuple (len = 3): Returns a tuple of three elements - First element (sklearn.linear_model.LinearRegression): regression model - Second element (float): r-squared of the linear regression model - Third element (2d-array): a 2D numpy array of normalized monthly HDDs and CDDs + sklearn.linear_model.LinearRegression: regression model + float: r-squared of the linear regression model + 2d-array: a 2D numpy array of normalized monthly HDDs and CDDs """ # function for regression @@ -129,7 +132,6 @@ class BillDisaggregation(): [np.mean([BillDisaggregation.cooling(xx, set_cooling) for xx in x]) for x in temperature] regr_model = linear_model.LinearRegression() hddcdd = np.array([daily_hdd, daily_cdd]).T - #print hddcdd, consumption regr_model.fit(hddcdd, consumption) return regr_model, regr_model.score(hddcdd, consumption), hddcdd @@ -148,7 +150,7 @@ class BillDisaggregation(): default 'Unknown' """ - + # pylint: disable=too-many-statements assert len(self.bill) > 5, 'No sufficient months for regression.' bill = self.table_cleaning(self.bill) days = [int(ii) for ii in bill['Days In Bill'].tolist()] @@ -171,7 +173,7 @@ class BillDisaggregation(): if usage == 'Both': print( - 'For this bill, R-squared is {}, ' \ + 'For this bill, R-squared is {}, ' 'with set point optimized at {}'.format(-opt.fun, opt.x) ) hddcdd = regr[2] @@ -186,7 +188,6 @@ class BillDisaggregation(): regr_model = regr[0] hddcdd = regr[2] - if usage == 'Cooling': function = lambda x: -self.regression_r2_op(-100, x, np.array(bill['Temperature']), bill['Daily Usage'])[1] opt = minimize(function, 65, method='nelder-mead', options={'xtol': 1e-2, 'disp': False}) @@ -195,14 +196,13 @@ class BillDisaggregation(): regr_model = regr[0] hddcdd = regr[2] - # Now we regress with the optimized set point self.usage = usage - #print(hddcdd) + # print(hddcdd) if True | (regr[1] > 0.5): - #print regr_model.coef_[0], regr_model.intercept_ + # print regr_model.coef_[0], regr_model.intercept_ days = np.array([int(ii) for ii in bill['Days In Bill'].tolist()]) self.heating_load_m = np.multiply(hddcdd[:, 0], days) * regr_model.coef_[0] self.cooling_load_m = np.multiply(hddcdd[:, 1], days) * regr_model.coef_[1] @@ -215,7 +215,7 @@ class BillDisaggregation(): self.cooling_load_m = self.cooling_load_m * sum_ratio self.others_m = self.others_m * sum_ratio - #For printing output + # For printing output bill_cp = self.bill.copy() bill_cp = bill_cp[['Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage']] self.r_squared_of_fit = regr[1] -- GitLab