From 0fe265b333d05d80f5581003042e69f76e5f2c64 Mon Sep 17 00:00:00 2001 From: Sarey Hamarneh Date: Mon, 24 Apr 2017 15:09:45 -0400 Subject: [PATCH 1/3] Make regression code more concise --- bpfin/utilbills/bill_proj_reg.py | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/bpfin/utilbills/bill_proj_reg.py b/bpfin/utilbills/bill_proj_reg.py index b6165de..96562ac 100644 --- a/bpfin/utilbills/bill_proj_reg.py +++ b/bpfin/utilbills/bill_proj_reg.py @@ -9,31 +9,12 @@ def regression_base_sum(bill_start_utility, bill_end_utility, x_value_calendar, Args: bill_start_utility (list): list of datetimes (start-date) from bills bill_end_utility (list): list of datetimes (end-date) from bills - x_value_calendar (list): list of datetimes + x_value_calendar (list): list of datetimes that is difference of x_values (list): x values corresponding to datetimes Returns: list: x_values of a regression """ - day = [] - month = [] - year = [] - for i in bill_start_utility: - day.append(i.day) - month.append(i.month) - year.append(i.year) - - day2 = [] - month2 = [] - year2 = [] - for i in x_value_calendar: - day2.append(i.day) - month2.append(i.month) - year2.append(i.year) - - datezipper = [(x, y, z) for x, y, z in zip(year, month, day)] - datezipper2 = [(x, y, z) for x, y, z in zip(year2, month2, day2)] - list_subtractor = [x - y for x, y in zip(bill_end_utility, bill_start_utility)] day_counter = [] for number in list_subtractor: @@ -43,10 +24,10 @@ def regression_base_sum(bill_start_utility, bill_end_utility, x_value_calendar, day_counter.append(0) index_finder = [] - for i in datezipper: - for x in datezipper2: + for i in bill_start_utility: + for x in x_value_calendar: if x == i: - index_finder.append(datezipper2.index(x)) + index_finder.append(x_value_calendar.index(x)) list_creator = [] for i in range(len(index_finder)): -- GitLab From eff3732b07381bdc60f794e6c152d42e3f5eb29f Mon Sep 17 00:00:00 2001 From: Sarey Hamarneh Date: Mon, 24 Apr 2017 20:33:01 -0400 Subject: [PATCH 2/3] Fix regression code --- bpfin/utilbills/bill_proj_reg.py | 65 ++++++++++++++++---------------- 1 file changed, 32 insertions(+), 33 deletions(-) diff --git a/bpfin/utilbills/bill_proj_reg.py b/bpfin/utilbills/bill_proj_reg.py index 96562ac..f57abef 100644 --- a/bpfin/utilbills/bill_proj_reg.py +++ b/bpfin/utilbills/bill_proj_reg.py @@ -3,41 +3,32 @@ import datetime import calendar -def regression_base_sum(bill_start_utility, bill_end_utility, x_value_calendar, x_values): +def regression_base_sum(bill_start_utility, bill_end_utility, x_value_calendar, + x_values): """Return x values for regression. Args: bill_start_utility (list): list of datetimes (start-date) from bills bill_end_utility (list): list of datetimes (end-date) from bills - x_value_calendar (list): list of datetimes that is difference of + x_value_calendar (list): list of datetimes corresponding to x values x_values (list): x values corresponding to datetimes - Returns: list: x_values of a regression """ - list_subtractor = [x - y for x, y in zip(bill_end_utility, bill_start_utility)] - day_counter = [] - for number in list_subtractor: - if number is not None: - day_counter.append(abs(number.days)) - else: - day_counter.append(0) - - index_finder = [] - for i in bill_start_utility: - for x in x_value_calendar: - if x == i: - index_finder.append(x_value_calendar.index(x)) - - list_creator = [] - for i in range(len(index_finder)): - list_creator.append(x_values[index_finder[i]:index_finder[i] + day_counter[i]]) - - x_totals = [] - for i in list_creator: - x_totals.append(sum(i)) + final_sums = [] + paired_dates = [] + for start_date, end_date in zip(bill_start_utility, bill_end_utility): + x_sums = 0 + for (x_date, value) in zip(x_value_calendar, x_values): + if start_date < x_date <= end_date: + x_sums += value + final_sums.append(x_sums) + paired_dates.append((start_date, end_date)) + regression_base_dictionary = {} + for i in range(len(paired_dates)): + regression_base_dictionary[paired_dates[i]] = final_sums[i] - return x_totals + return regression_base_dictionary def regression_coefficients(x_list, y_list, is_intercept): @@ -60,20 +51,28 @@ def regression_coefficients(x_list, y_list, is_intercept): return coefficients -def regression_predicting_y(reg_history, periodstart, periodend, x_value_calendar, x_values): +def regression_predicting_y(reg_history, periodstart, periodend, + x_value_calendar, x_values): """Return results of regression. Args: reg_history (list): tuple of x and y coefficient based on historical regression periodstart (list): bill - periodend (list): - x_value_calendar (list): - x_values (list) : x values that we want for projection + periodend (list): bill + x_value_calendar (list): x regressor calendar values + x_values (list) : x values that are regressed Returns: list: progression of future y values """ - x_list = regression_base_sum(periodstart, periodend, x_value_calendar, x_values) + x_dict = regression_base_sum(periodstart, periodend, x_value_calendar, + x_values) future_y = [] - for i in x_list: - future_y.append(reg_history[0] + reg_history[1] * i) - return future_y + for date_pair, x_value in x_dict.items(): + future_y.append(reg_history[0] + reg_history[1] * x_value) + + result = {} + + for i in range(len(future_y)): + result[(periodstart[i], periodend[i])] = future_y[i] + + return result -- GitLab From 7898c9cd7a70529e08f30552a98c4a998f039918 Mon Sep 17 00:00:00 2001 From: Sarey Hamarneh Date: Tue, 25 Apr 2017 13:29:46 -0400 Subject: [PATCH 3/3] Fix comments --- bpfin/utilbills/bill_proj_reg.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/bpfin/utilbills/bill_proj_reg.py b/bpfin/utilbills/bill_proj_reg.py index f57abef..5c1a187 100644 --- a/bpfin/utilbills/bill_proj_reg.py +++ b/bpfin/utilbills/bill_proj_reg.py @@ -11,22 +11,19 @@ def regression_base_sum(bill_start_utility, bill_end_utility, x_value_calendar, bill_start_utility (list): list of datetimes (start-date) from bills bill_end_utility (list): list of datetimes (end-date) from bills x_value_calendar (list): list of datetimes corresponding to x values - x_values (list): x values corresponding to datetimes + x_values (list): x values corresponding to datetimes. x values are dependent variable + in regression, they can be HDD/CDD/occupancy/etc Returns: - list: x_values of a regression + dictionary: {(start_date, end_date): x_values of a regression)} """ - final_sums = [] - paired_dates = [] + + regression_base_dictionary = {} for start_date, end_date in zip(bill_start_utility, bill_end_utility): x_sums = 0 for (x_date, value) in zip(x_value_calendar, x_values): if start_date < x_date <= end_date: x_sums += value - final_sums.append(x_sums) - paired_dates.append((start_date, end_date)) - regression_base_dictionary = {} - for i in range(len(paired_dates)): - regression_base_dictionary[paired_dates[i]] = final_sums[i] + regression_base_dictionary[(start_date, end_date)] = x_sums return regression_base_dictionary @@ -51,20 +48,22 @@ def regression_coefficients(x_list, y_list, is_intercept): return coefficients -def regression_predicting_y(reg_history, periodstart, periodend, +def regression_predicting_y(reg_history, period_start, period_end, x_value_calendar, x_values): """Return results of regression. Args: reg_history (list): tuple of x and y coefficient based on historical regression - periodstart (list): bill - periodend (list): bill + period_start (list): list of datetimes of start date for a bill + period_end (list): list of datetimes of end date for a bill x_value_calendar (list): x regressor calendar values - x_values (list) : x values that are regressed + x_values (list) : x values that are regressed, this is dependent variable for regression, + which could be HDD/CDD/occupancy, used to project y value (usage) in linear relationship, + y = mx+b Returns: - list: progression of future y values + dictionary: {(start_date, end_date): projection of y_values of a regression)} """ - x_dict = regression_base_sum(periodstart, periodend, x_value_calendar, + x_dict = regression_base_sum(period_start, period_end, x_value_calendar, x_values) future_y = [] for date_pair, x_value in x_dict.items(): @@ -73,6 +72,6 @@ def regression_predicting_y(reg_history, periodstart, periodend, result = {} for i in range(len(future_y)): - result[(periodstart[i], periodend[i])] = future_y[i] + result[(period_start[i], period_end[i])] = future_y[i] return result -- GitLab