From d511c1289d902a5a4c8a7c673506fcd2e504273a Mon Sep 17 00:00:00 2001 From: Boya Yu Date: Thu, 15 Jun 2017 16:33:57 -0400 Subject: [PATCH 1/2] Add bill disaggregation --- bpeng/bill/__init__.py | 1 + bpeng/bill/disaggregate.py | 336 +++++++++++++++++++++++++++++++++++++ 2 files changed, 337 insertions(+) create mode 100644 bpeng/bill/__init__.py create mode 100644 bpeng/bill/disaggregate.py diff --git a/bpeng/bill/__init__.py b/bpeng/bill/__init__.py new file mode 100644 index 0000000..fca9e1b --- /dev/null +++ b/bpeng/bill/__init__.py @@ -0,0 +1 @@ +from .disaggregate import BillDisaggregation diff --git a/bpeng/bill/disaggregate.py b/bpeng/bill/disaggregate.py new file mode 100644 index 0000000..29b414b --- /dev/null +++ b/bpeng/bill/disaggregate.py @@ -0,0 +1,336 @@ +""" This file will be used for calculate bill disaggregation """ + +from datetime import timedelta +import pandas as pd +import numpy as np + +from sklearn import linear_model +from scipy.optimize import minimize + + +class BillDisaggregation(): + """ + Class for Bill Disaggregation + + Args: + + bill (pd.DataFrame): raw bill (from parsing) (File like object) + First row (besides column name) are the names + 'Bill From Date', 'Bill To Date', 'Days in Bill', 'Usage' must be in names + daily_temp (pd.DataFrame): daily temperature (File like object) + + Attributes: + + output: list + total heating load + heating load for the first year (first 12 bill periods) + heating load of each month + (return NaN if R-squared is low) + """ + # pylint: disable=too-many-instance-attributes + def __init__(self, bill, daily_temp): + self.bill = bill + self.daily_temp = daily_temp + self.usage = None + self.heating_load_m = None + self.cooling_load_m = None + self.others_m = None + self.r_squared_of_fit = None + self.heating_load_proportion = None + self.cooling_load_proportion = None + self.set_point = None + self.days_in_12_bills = None + self.output_table = None + self.output_monthly = None + + def weather_period(self, period_end_date, days_in_period): + """ + Get the dates within the period + + Args: + + period_end_date (Datetime): end date of a period + days_in_period (str): number of days in a period + + Returns: + + list: Returns a list of weather data for a period + + """ + # end_datetime = period_end_date.date().strftime("%/%m/%d") + end_datetime = '/'.join([str(period_end_date.date().month), + str(period_end_date.date().day), + str(period_end_date.date().year)]) + end_date_id = self.daily_temp[self.daily_temp.date == end_datetime].index[0] + start_date_id = end_date_id - int(days_in_period) + return list(self.daily_temp['temperature'][start_date_id:end_date_id]) + + def table_cleaning(self, bill): + """ + Clean the table + + Args: + + bill (pd.DataFrame): raw bill with columns + 'Bill To Date', 'Bill From Date', + 'Days In Bill', 'Usage' + + Returns: + + pd.DataFrame: Returns a cleaned dataframe with temperature data + + """ + bill_cp = bill.copy() + bill_cp['Bill To Date'] = pd.to_datetime(bill_cp['Bill To Date']) + bill_cp['Bill From Date'] = pd.to_datetime(bill_cp['Bill From Date']) + # Add new columns for 'weather in period' and 'daily usage' + bill_cp['Temperature'] = [ + self.weather_period(x, y) for (x, y) in zip(bill_cp['Bill To Date'], bill_cp['Days In Bill']) + ] + bill_cp = bill_cp[['Bill From Date', 'Bill To Date', 'Temperature', 'Usage', 'Days In Bill']] + bill_cp['Daily Usage'] = [x / y for (x, y) in zip(bill_cp['Usage'], bill_cp['Days In Bill'])] + bill_cp = bill_cp.dropna() + return bill_cp + + @staticmethod + def heating(curr_temp, set_temp): + """HDD (for each day)""" + if curr_temp > set_temp: + return 0 + return set_temp - curr_temp + + @staticmethod + def cooling(curr_temp, set_temp): + """ CDD (for each day) """ + if curr_temp > set_temp: + return curr_temp - set_temp + return 0 + + @staticmethod + def regression_r2_op(set_heating, set_cooling, temperature, consumption): + """ + A linear regression model with heating and cooling set fixed + + Args: + + set_heating (int): a temperature setting for heating + set_cooling (int): a temperature setting for cooling + temperature (array): an array of lists of daily temperature [array-like] + consumption (array): an array of monthly consumption (normalized) [array-like] + + Returns: + + sklearn.linear_model.LinearRegression: regression model + float: r-squared of the linear regression model + 2d-array: a 2D numpy array of normalized monthly HDDs and CDDs + + """ + # function for regression + daily_hdd = \ + [np.mean([BillDisaggregation.heating(xx, set_heating) for xx in x]) for x in temperature] + daily_cdd = \ + [np.mean([BillDisaggregation.cooling(xx, set_cooling) for xx in x]) for x in temperature] + regr_model = linear_model.LinearRegression() + hddcdd = np.array([daily_hdd, daily_cdd]).T + regr_model.fit(hddcdd, consumption) + return regr_model, regr_model.score(hddcdd, consumption), hddcdd + + def optimize(self, usage='Unknown'): + """ + Main function for the optimization + + Args: + + usage (str): + Specify if the consumption is for heating or cooling + 'Unknown': no prior knowledge + 'Heating': only for heating + 'Cooling': only for cooling + 'Both': for both heating and cooling + default 'Unknown' + + """ + # pylint: disable=too-many-statements + assert len(self.bill) > 5, 'No sufficient months for regression.' + bill = self.table_cleaning(self.bill) + days = [int(ii) for ii in bill['Days In Bill'].tolist()] + if (usage == 'Unknown') | (usage == 'Both'): + opt = minimize(lambda x: -self.regression_r2_op(x[0], x[1], + np.array(bill['Temperature']), + bill['Daily Usage'])[1], + (65, 65), + method='nelder-mead', + options={'xtol': 1e-2, 'disp': False}) + regr = self.regression_r2_op(opt.x[0], opt.x[1], bill['Temperature'], bill['Daily Usage']) + regr_model = regr[0] + + heating_coef, cooling_coef = regr_model.coef_ + if usage == 'Unknown': + if cooling_coef < heating_coef / 20: + usage = 'Heating' + elif heating_coef < cooling_coef / 20: + usage = 'Cooling' + else: + usage = 'Both' + + if usage == 'Both': + print( + 'For this bill, R-squared is {}, ' + 'with set point optimized at {}'.format(-opt.fun, opt.x) + ) + hddcdd = regr[2] + + if usage == 'Heating': + # Note here in the function the cooling set point is fixed at 200 + # Indicating that there is no cooling load + opt = minimize(lambda x: -self.regression_r2_op(x, 200, + np.array(bill['Temperature']), bill['Daily Usage'])[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, 'disp': False}) + print('For this bill, R-squared is {}, with set point optimized at {}'.format(-opt.fun, opt.x[0])) + regr = self.regression_r2_op(opt.x[0], 200, bill['Temperature'], bill['Daily Usage']) + regr_model = regr[0] + hddcdd = regr[2] + + if usage == 'Cooling': + opt = minimize(lambda x: -self.regression_r2_op(-100, x, + np.array(bill['Temperature']), bill['Daily Usage'])[1], + 65, + method='nelder-mead', + options={'xtol': 1e-2, 'disp': False}) + print('For this bill, R-squared is {}, with set point optimized at {}'.format(-opt.fun, opt.x[0])) + regr = self.regression_r2_op(-100, opt.x[0], bill['Temperature'], bill['Daily Usage']) + regr_model = regr[0] + hddcdd = regr[2] + + # Now we regress with the optimized set point + self.usage = usage + + # print(hddcdd) + + if True | (regr[1] > 0.5): + # print regr_model.coef_[0], regr_model.intercept_ + days = np.array([int(ii) for ii in bill['Days In Bill'].tolist()]) + self.heating_load_m = np.multiply(hddcdd[:, 0], days) * regr_model.coef_[0] + self.cooling_load_m = np.multiply(hddcdd[:, 1], days) * regr_model.coef_[1] + self.others_m = regr_model.intercept_ * days + + real_sum = np.array(self.bill['Usage']) + predict_sum = self.heating_load_m + self.cooling_load_m + self.others_m + sum_ratio = real_sum / predict_sum + self.heating_load_m = self.heating_load_m * sum_ratio + self.cooling_load_m = self.cooling_load_m * sum_ratio + self.others_m = self.others_m * sum_ratio + + if any(i < 0 for i in self.others_m): + self.heating_load_m, self.cooling_load_m = \ + np.array([self.heating_load_m, self.cooling_load_m]) \ + * real_sum / (self.cooling_load_m + self.heating_load_m) + self.heating_load_m = np.nan_to_num(self.heating_load_m) + self.cooling_load_m = np.nan_to_num(self.cooling_load_m) + self.others_m = np.zeros(len(self.others_m)) + + # For printing output + bill_cp = self.bill.copy() + bill_cp = bill_cp[['Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage']] + self.r_squared_of_fit = regr[1] + + bill_cp['Heating Usage'] = self.heating_load_m + bill_cp['Cooling Usage'] = self.cooling_load_m + bill_cp['Other Usage'] = self.others_m + + self.heating_load_proportion = self.heating_load_m.sum()/real_sum.sum() + self.cooling_load_proportion = self.cooling_load_m.sum()/real_sum.sum() + self.set_point = opt.x[0] + self.days_in_12_bills = sum(days[:12]) + self.output_table = bill_cp + self.output_monthly = BillDisaggregation.output_to_monthly(self.output_table) + + else: + print('Low R-squared') + + @staticmethod + def output_to_monthly(output): + """ + Transfrom period-wise output to month-wise output + + Args: + + output (pd.DataFrame): a pandas dataframe like `self.output_table` + columns of the dataframe must be (in order): + 'Bill From Date', 'Bill To Date', + 'Days In Bill', 'Usage', 'Heating Usage', + 'Cooling Usage', 'Other Usage' + + Returns: + + pd.DataFrame: result with monthly consumptions + + """ + last_date = pd.to_datetime(output['Bill To Date']).iloc[0] + days_in_recent_year = 365 + if (last_date - timedelta(365)).day != last_date.day: + days_in_recent_year = 366 + days_cumsum = np.array(output['Days In Bill']).cumsum() + periods_in_recent_year = \ + next(i for i, v in enumerate(days_cumsum) if v >= days_in_recent_year) + bill_in_recent_year = output.iloc[:(periods_in_recent_year + 1)] + values_in_recent_year = bill_in_recent_year.iloc[:, 2:].values + values_in_recent_year[-1] *= \ + 1 - (days_cumsum[periods_in_recent_year] - days_in_recent_year) \ + / values_in_recent_year[-1][0] + daily_usage = \ + np.concatenate([np.tile(i[1:] / i[0], (int(i[0]), 1)) + for i in values_in_recent_year])[::-1] + day_of_year = last_date.timetuple().tm_yday - 1 + daily_usage = np.roll(daily_usage, day_of_year, axis=0) + month_days = [31, 28, 31, 30, 31, 30, + 31, 31, 30, 31, 30, 31] + if days_in_recent_year == 366: + month_days[1] = 29 + month_cumsum = np.insert(np.cumsum(month_days), 0, 0) + output_monthly = \ + pd.DataFrame([daily_usage[month_cumsum[i]:month_cumsum[i+1]] + .sum(axis=0) for i in range(12)]) + output_monthly.columns = [output.columns[3:]] + output_monthly['Month'] = range(1, 13) + output_monthly = output_monthly.iloc[:, [4, 0, 1, 2, 3]] + return output_monthly + + def output(self): + """ + Output in csv file + + """ + return self.output_table.to_csv(None) + + def to_json(self, period='bill'): + """ + Output in json file + + Args: + + period (str): 'month' for monthly + 'bill' for each bill period + default 'bill' + + Returns: + + json: output in json format + + """ + if period == 'bill': + return self.output_table.to_json(orient="records") + return self.output_monthly.to_json(orient="records") + + def print_all_features(self): + """ + print the features heating load percentage, cooling load percentage, + r-squared of fit and type of usage. + + """ + print('Heating load percentage is {:.1%}'.format(self.heating_load_proportion)) + print('Cooling load percentage is {:.1%}'.format(self.cooling_load_proportion)) + print('R-squared of fit is {}'.format(self.r_squared_of_fit)) + print('Usage is {}'.format(self.usage)) -- GitLab From 9e3e26d31dc4e17465b125ddb3ebffe45e0af6ab Mon Sep 17 00:00:00 2001 From: Alessandro DiMarco Date: Thu, 15 Jun 2017 16:45:24 -0400 Subject: [PATCH 2/2] Add requirements to Jenkinsfile --- Jenkinsfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 3b56fb6..6e5cb40 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -13,6 +13,10 @@ pipeline { sh 'virtualenv --python=/usr/bin/python3 venv' sh """ source venv/bin/activate + pip install numpy==1.11.2 + pip install pandas==0.18.0 + pip install scipy==0.16.0 + pip install scikit-learn==0.17.1 pip install -r requirements-dev.txt pip install -r requirements-conda.txt pip install git+https://bb3dd9a7a6eab87372d5377d65d370738cd25196:x-oauth-basic@github.com/blocp/python-pptx -- GitLab