diff --git a/bpeng/bill/awesome_disaggregate.py b/bpeng/bill/awesome_disaggregate.py index 85118395fc05bef35731ed9e503cf15826c579b5..35940e4bda8dc09cd647edc02a0a28840bc5c94e 100644 --- a/bpeng/bill/awesome_disaggregate.py +++ b/bpeng/bill/awesome_disaggregate.py @@ -54,10 +54,11 @@ class BillDisaggregation(): self.output_table = None self.output_table_monthly = None self.most_recent_monthly_output = None - self.unit_price = None + self.avg_unit_price = None self.bill_breakdown = None self.recent_year_bill_breakdown = None self.annual_usage = None + self.formatted_bill = None def weather_cleaning(self, raw_daily_temp): """ @@ -210,16 +211,14 @@ class BillDisaggregation(): Args: - raw_bill (pd.DataFrame): a raw bill with columns of + self.output_table (pd.DataFrame): a raw bill with columns of 'Bill From Date' 'Bill To Date' 'Days In Bill' 'Usage' - 'Delivery Charge' - 'Supply Charge' 'Total Charge' Returns: - pd.DataFrame: a formatted raw_bill + pd.DataFrame: a formatted self.output_table boolean: True - Length of the bill has changed during bill cleaning step 1 @@ -255,7 +254,7 @@ class BillDisaggregation(): bill_formatted = bill_copy2 - self.unit_price = (sum(bill_formatted['Total Charge'])) / ( + self.avg_unit_price = (sum(bill_formatted['Total Charge'])) / ( sum(bill_formatted['Usage'])) return bill_formatted, bill_shape_change @@ -348,6 +347,10 @@ class BillDisaggregation(): row_index - 1)] = bill_consi['Usage'][int( row_index - 1)] + bill_consi['Usage'][int( row_index)] + bill_consi['Total Charge'][int( + row_index - 1)] = bill_consi['Total Charge'][int( + row_index - 1)] + bill_consi['Total Charge'][int( + row_index)] bill_consi['Days In Bill'][int( row_index - 1)] = bill_consi['Days In Bill'][int( row_index - 1 @@ -360,6 +363,10 @@ class BillDisaggregation(): row_index + 1)] = bill_consi['Usage'][int( row_index + 1)] + bill_consi['Usage'][int( row_index)] + bill_consi['Total Charge'][int( + row_index + 1)] = bill_consi['Total Charge'][int( + row_index + 1)] + bill_consi['Total Charge'][int( + row_index)] bill_consi['Days In Bill'][int( row_index + 1)] = bill_consi['Days In Bill'][int( row_index + 1 @@ -370,6 +377,8 @@ class BillDisaggregation(): 'Bill From Date'][0] bill_consi['Usage'][ 1] = bill_consi['Usage'][0] + bill_consi['Usage'][1] + bill_consi['Total Charge'][ + 1] = bill_consi['Total Charge'][0] + bill_consi['Total Charge'][1] bill_consi['Days In Bill'][ 1] = bill_consi['Days In Bill'][0] + bill_consi['Days In Bill'][1] @@ -378,6 +387,8 @@ class BillDisaggregation(): 'Bill To Date'].iloc[-1] bill_consi['Usage'].iloc[ -2] = bill_consi['Usage'].iloc[-2] + bill_consi['Usage'].iloc[-1] + bill_consi['Total Charge'].iloc[ + -2] = bill_consi['Total Charge'].iloc[-2] + bill_consi['Total Charge'].iloc[-1] bill_consi['Days In Bill'].iloc[ -2] = bill_consi['Days In Bill'].iloc[-1] + bill_consi['Days In Bill'].iloc[-2] @@ -385,7 +396,6 @@ class BillDisaggregation(): bill_consi = bill_consi.drop( bill_consi.index[list(bill_quality_short['index'])]) - # bill_consi = bill_consi.reset_index(inplace = True) bill_consi = bill_consi.reset_index(drop=False) return bill_consi @@ -545,6 +555,10 @@ class BillDisaggregation(): self.processed_bill = self.processed_bill.sort_values('Bill From Date') + formatted_bill = formatted_bill.sort_values('Bill From Date') + formatted_bill['Unit Price'] = formatted_bill['Total Charge'] / formatted_bill['Usage'] + self.formatted_bill = formatted_bill + regression_method = 1 if weather_related_usage == 'Unknown': @@ -773,21 +787,18 @@ class BillDisaggregation(): bill_cp = self.processed_bill.copy() bill_cp = self.processed_bill[[ - 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage' + 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Usage', 'Total Charge' ]] - + bill_cp['Unit Price'] = bill_cp['Total Charge'] / bill_cp['Usage'] bill_cp['Heating Usage'] = self.heating_consumption_pred bill_cp['Cooling Usage'] = self.cooling_consumption_pred bill_cp['Other Usage'] = self.others_consumption_pred if self.usage == 'Both Not': self.r_squared_of_fit = 0 - # self.h = np.NaN else: self.r_squared_of_fit = regr[1] - # self.set_points = opt.x - # update 2018/01/17 self.heating_set_point = heating_set_point self.cooling_set_point = cooling_set_point self.output_table = bill_cp @@ -796,12 +807,14 @@ class BillDisaggregation(): first_bill_date = self.processed_bill['Bill From Date'].iloc[0] billing_months = self.num_month_dates(last_bill_date, first_bill_date) - self.output_table_monthly = self.output_to_month(last_bill_date, self.heating_set_point, + output_monthly_initial = self.output_to_month(last_bill_date, self.heating_set_point, self.cooling_set_point, billing_months) + self.output_table_monthly = self.normalized_unit_price(self.output_table, output_monthly_initial) self.most_recent_monthly_output = self.output_to_month(last_bill_date, self.heating_set_point, self.cooling_set_point, 12) self.bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.output_table_monthly) - self.recent_year_bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, self.most_recent_monthly_output) + self.recent_year_bill_breakdown = self.non_weahter_related_breakdown(non_weather_related_end_use, + self.most_recent_monthly_output) self.annual_usage = self.annual_usage_costs(self.recent_year_bill_breakdown, non_weather_related_end_use) def benchmarking_output(self): @@ -925,7 +938,7 @@ class BillDisaggregation(): monthly_output = monthly_output_table[['Month', 'Bill From Date', 'Bill To Date', 'Days In Bill', 'Heating Usage', 'Cooling Usage', 'Other Usage', 'HDD', 'CDD']] - + monthly_output = monthly_output.sort('Bill From Date').reset_index(drop=True) return monthly_output def non_weahter_related_breakdown(self, end_uses, monthly_output_table): @@ -996,7 +1009,7 @@ class BillDisaggregation(): temp_usage = sum(temp) annual_usage['Usage'].iloc[j] = temp_usage - annual_usage['Costs'] = annual_usage['Usage'] * (self.unit_price) + annual_usage['Costs'] = annual_usage['Usage'] * (self.avg_unit_price) return annual_usage @@ -1071,3 +1084,78 @@ class BillDisaggregation(): return self.output_table_monthly.to_dict(orient="records") + def find_index_in_first_raw_biil(self, norm_bill_date): + """ + Return the index of the row of raw bill contains the bill date from a normalized bill + """ + for index, bill in self.formatted_bill.iterrows(): + if bill['Bill From Date'] <= norm_bill_date < bill['Bill To Date']: + return index + return None + + def days_in_raw_bill_period(self, norm_bill_date, norm_bill_date_respected_index, flag): + """ + Return how many days from a normalized bill within a raw bill billing period + """ + + if flag == 'start': + days = (self.formatted_bill['Bill To Date'][norm_bill_date_respected_index] - norm_bill_date).days + if flag == 'end': + days = (norm_bill_date - self.formatted_bill['Bill From Date'][norm_bill_date_respected_index]).days + return days + + def weighted_unit_price(self, index_numdays): + """ + Return the weighted average of unit price + """ + weights = [] + total_days = [] + for ind in range(len(index_numdays)): + unit_price = self.formatted_bill['Unit Price'][int(index_numdays[ind]['index'])] + days_in_that_period = int(index_numdays[ind]['num_days']) + weights.append(unit_price * days_in_that_period) + total_days.append(days_in_that_period) + weighted_unit_price = sum(weights)/sum(total_days) + return weighted_unit_price + + def find_bills_in_raw(self, norm_bill_from, norm_bill_to): + """ + Return the index / number of days in each raw bill billing period for a normalized billing period + """ + + norm_bill_days = (norm_bill_to - norm_bill_from).days + results = [] + + index_start = self.find_index_in_first_raw_biil(norm_bill_from) + index_end = self.find_index_in_first_raw_biil(norm_bill_to) + + if index_start == index_end: + results.append({'index': index_start, 'num_days': norm_bill_days}) + + elif index_end - index_start >= 1: + days_in_start_period = self.days_in_raw_bill_period(norm_bill_from, index_start, 'start') + results.append({'index': index_start, 'num_days': days_in_start_period}) + days_in_end_period = self.days_in_raw_bill_period(norm_bill_to, index_end, 'end') + results.append({'index': index_end, 'num_days': days_in_end_period}) + + if index_end - index_start >= 2: + for p in range(index_end - index_start - 1): + days_in_period = self.formatted_bill['Days In Bill'][index_start+p+1] + index_of_this_period = index_start+p+1 + results.append({'index': index_of_this_period, 'num_days': days_in_period}) + + return results + + def normalized_unit_price(self, rawbill, mbill): + """ + calculate the unit price for each nomralized billing period + """ + normalized_unit_price = [] + for m in range(len(mbill)): + from_date = mbill['Bill From Date'].iloc[m] + to_date = mbill['Bill To Date'].iloc[m] + index_numdays = self.find_bills_in_raw(from_date, to_date) + weighted_unit_price_for_this_month = self.weighted_unit_price(index_numdays) + normalized_unit_price.append(weighted_unit_price_for_this_month) + mbill['Unit Price'] = normalized_unit_price + return mbill