diff --git a/bpfin/utilbills/bill_proj_reg.py b/bpfin/utilbills/bill_proj_reg.py index b6165deded2fe6f1674199c3b54e70a1d5adb9d9..5c1a1877899d4033f896df2a2ef4cd830f4f7dc2 100644 --- a/bpfin/utilbills/bill_proj_reg.py +++ b/bpfin/utilbills/bill_proj_reg.py @@ -3,60 +3,29 @@ import datetime import calendar -def regression_base_sum(bill_start_utility, bill_end_utility, x_value_calendar, x_values): +def regression_base_sum(bill_start_utility, bill_end_utility, x_value_calendar, + x_values): """Return x values for regression. Args: bill_start_utility (list): list of datetimes (start-date) from bills bill_end_utility (list): list of datetimes (end-date) from bills - x_value_calendar (list): list of datetimes - x_values (list): x values corresponding to datetimes - + x_value_calendar (list): list of datetimes corresponding to x values + x_values (list): x values corresponding to datetimes. x values are dependent variable + in regression, they can be HDD/CDD/occupancy/etc Returns: - list: x_values of a regression + dictionary: {(start_date, end_date): x_values of a regression)} """ - day = [] - month = [] - year = [] - for i in bill_start_utility: - day.append(i.day) - month.append(i.month) - year.append(i.year) - - day2 = [] - month2 = [] - year2 = [] - for i in x_value_calendar: - day2.append(i.day) - month2.append(i.month) - year2.append(i.year) - - datezipper = [(x, y, z) for x, y, z in zip(year, month, day)] - datezipper2 = [(x, y, z) for x, y, z in zip(year2, month2, day2)] - - list_subtractor = [x - y for x, y in zip(bill_end_utility, bill_start_utility)] - day_counter = [] - for number in list_subtractor: - if number is not None: - day_counter.append(abs(number.days)) - else: - day_counter.append(0) - - index_finder = [] - for i in datezipper: - for x in datezipper2: - if x == i: - index_finder.append(datezipper2.index(x)) - list_creator = [] - for i in range(len(index_finder)): - list_creator.append(x_values[index_finder[i]:index_finder[i] + day_counter[i]]) + regression_base_dictionary = {} + for start_date, end_date in zip(bill_start_utility, bill_end_utility): + x_sums = 0 + for (x_date, value) in zip(x_value_calendar, x_values): + if start_date < x_date <= end_date: + x_sums += value + regression_base_dictionary[(start_date, end_date)] = x_sums - x_totals = [] - for i in list_creator: - x_totals.append(sum(i)) - - return x_totals + return regression_base_dictionary def regression_coefficients(x_list, y_list, is_intercept): @@ -79,20 +48,30 @@ def regression_coefficients(x_list, y_list, is_intercept): return coefficients -def regression_predicting_y(reg_history, periodstart, periodend, x_value_calendar, x_values): +def regression_predicting_y(reg_history, period_start, period_end, + x_value_calendar, x_values): """Return results of regression. Args: reg_history (list): tuple of x and y coefficient based on historical regression - periodstart (list): bill - periodend (list): - x_value_calendar (list): - x_values (list) : x values that we want for projection + period_start (list): list of datetimes of start date for a bill + period_end (list): list of datetimes of end date for a bill + x_value_calendar (list): x regressor calendar values + x_values (list) : x values that are regressed, this is dependent variable for regression, + which could be HDD/CDD/occupancy, used to project y value (usage) in linear relationship, + y = mx+b Returns: - list: progression of future y values + dictionary: {(start_date, end_date): projection of y_values of a regression)} """ - x_list = regression_base_sum(periodstart, periodend, x_value_calendar, x_values) + x_dict = regression_base_sum(period_start, period_end, x_value_calendar, + x_values) future_y = [] - for i in x_list: - future_y.append(reg_history[0] + reg_history[1] * i) - return future_y + for date_pair, x_value in x_dict.items(): + future_y.append(reg_history[0] + reg_history[1] * x_value) + + result = {} + + for i in range(len(future_y)): + result[(period_start[i], period_end[i])] = future_y[i] + + return result