From d62696adafed083987eeb5be4b6a6eac507366ce Mon Sep 17 00:00:00 2001 From: Conrad S Date: Mon, 27 Mar 2017 16:51:12 -0400 Subject: [PATCH 01/11] Add initial code --- bpeng/__init__.py | 1 + bpeng/weather/__init__.py | 0 bpeng/weather/weather.py | 241 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 242 insertions(+) create mode 100644 bpeng/weather/__init__.py create mode 100644 bpeng/weather/weather.py diff --git a/bpeng/__init__.py b/bpeng/__init__.py index 76ce1c0..a5995bd 100644 --- a/bpeng/__init__.py +++ b/bpeng/__init__.py @@ -1,2 +1,3 @@ from .heatloss.heatloss import HeatLoss from .dimensions.parse_dimensions import ParseDimensions +from .weather.weather import WeatherScraper diff --git a/bpeng/weather/__init__.py b/bpeng/weather/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bpeng/weather/weather.py b/bpeng/weather/weather.py new file mode 100644 index 0000000..1c04944 --- /dev/null +++ b/bpeng/weather/weather.py @@ -0,0 +1,241 @@ +from datetime import datetime +from datetime import date, timedelta +import sys +from urllib.request import urlopen + +import numpy as np +import pandas as pd + +class WeatherParsing: + """ + Class for parsing weather data from 'wunderground.com' + This version only for years after 2000 + + Parameters + ---------- + period: year | period, optional, default 'year' + If 'year', get the weather data for one single year + If 'period', get the weather data within an particular period + + year: int + Year which data is required + Ignored when period = 'period' + + startdate: str + Startdate of which data is required + Ignored when period = 'year' + + enddate: str + Enddate of which data is required + Ignored when period = 'year' + + city: str + City or airport where data is required + KCHA for Chattanooga, TN + KNYC for New York City, NY + ... + + Attributes + ---------- + get_weather: + Get the data from the website + Requires about 1 second for each day + option: + detail: boolean + 'True' for detail weather + 'False' for temperature only + + day_list: list of strings + A list of all the dates within the period + + output_temp: pandas dataframe + A dataframe of the temperature data required + option: + 'day': a temperature for each day + 'hour': a temperature for each hour + default 'day' + + output_detail: pandas dataframe + A dataframe of weather data of 'temperature','humidity','wind' and 'rain' + + Example + ------- + >>> wp = Weather_Parsing(period='period',startdate='2016/3/1', enddate='2016/3/3', city='KNYC') + >>> wp.get_weather() + >>> print(wp.output_temp()) + date temperature + 0 2016/03/01 45.63 + 1 2016/03/02 41.95 + 2 2016/03/03 31.00 + + >>> wp = Weather_Parsing(period='period',startdate='2014/1/1', enddate='2014/1/2', city='KCHA') + >>> wp.get_weather(detail=True) + >>> print(wp.output_detail()) + temperature humidity wind rain + 2014/01/01 36.68 79.0 1.790000 0.00 + 2014/01/02 38.63 79.0 10.028205 0.43 + + + """ + def __init__(self, period='year', year=2014, startdate='sd', enddate='ed', city='KCHA'): + self.year = year + self.city = city + + day_list = [] + if period == 'year': + d1 = date(year, 1, 1) + d2 = date(year, 12, 31) + elif period == 'period': + try: + d1 = pd.to_datetime(startdate).date() + d2 = pd.to_datetime(enddate).date() + except: + raise ValueError('Check date time') + else: + raise ValueError("period should be 'year' or 'period'") + + delta = d2 - d1 + day_list = [(d1 + timedelta(days=i)).strftime("20%y/%m/%d") for i in range(delta.days + 1)] + self.day_list = day_list + + def get_date_weather(self, date): + url = 'https://www.wunderground.com/history/airport/{}/{}/DailyHistory.html?format=1'.format(self.city, date) + html = urlopen(url).read() + if (sys.version_info > (3, 0)): + html = html.decode('utf-8') + htmlclean = html.split('\n')[2:-1] + htmltmp = [x.split(',')[:-1] for x in htmlclean] + for i in htmltmp: + try: + if float(i[1]) < -100: + htmltmp.remove(i) + except: + htmltmp.remove(i) + return htmltmp + + + def time_to_hour(self, x): + y = x[:] + hourtmp = datetime.strptime(y[0], "%I:%M %p").strftime("%H") + y[0] = int(hourtmp) + y[1] = float(y[1]) + return y + + def add_missing_temperature(self, x): + y = np.array(x) + yhour = y[:,0] + tl = [] + + for i in set(yhour): + ttmp = np.mean([j[1] for j in filter(lambda xx: xx[0] == i, y)]) + tl.append([int(i), ttmp]) + thour = [j[0] for j in tl] + if 0 not in thour: + tl = [[0, tl[0][1]]] + tl + if 23 not in thour: + tl.append([23,tl[-1][1]]) + #print tl + + htmp = tl[0] + for i in tl[0:]: + if i[0] - htmp[0] != 1: + tinsert = [[ti, (i[1]*(htmp[0]-ti)+htmp[1]*(ti-i[0]))/(htmp[0]-i[0])] for ti in range(htmp[0]+1, i[0])] + tl += tinsert + htmp = i + + return tl + + def sum_of_rain(self, x): + sor = 0 + for i in x: + try: + if float(i) >= 0: + sor += float(i) + except: + pass + return sor + + def average_of_humidity(self, x): + aoh = 0 + for i in x: + try: + if int(i) >= 0: + aoh += int(i) + except: + x.remove(i) + return aoh / len(x) + + def average_of_wind(self, x): + aow = 0 + for i in x: + try: + if float(i) >= 0: + aow += float(i) + except: + x.remove(i) + return aow / len(x) + + def get_weather(self, detail=False): + temp_list = [] + humidity_list = [] + wind_list = [] + rain_list = [] + condition_list = [] + + for i in self.day_list: + try: + weather_all = self.get_date_weather(i) + gettmp = [j[:2] for j in weather_all] + if detail == True: + getdetail = weather_all + humidity_list.append([k[3] for k in weather_all]) + wind_list.append([k[7] for k in weather_all]) + rain_list.append([k[9] for k in weather_all]) + condition_list.append([k[11] for k in weather_all]) + temp = self.add_missing_temperature( + [self.time_to_hour(wtmp) for wtmp in gettmp] + ) + temp_list.append(temp) + except: + raise ValueError('No temperature data for {}'.format(i)) + break + self.temperature = temp_list + self.humidity = [self.average_of_humidity(wtmp) for wtmp in humidity_list] + self.wind = [self.average_of_wind(wtmp) for wtmp in wind_list] + self.rain = [self.sum_of_rain(wtmp) for wtmp in rain_list] + self.condition = condition_list + + dflist = [] + for i in range(len(self.day_list)): + for j in range(24): + dflist.append([self.day_list[i], j, round(self.temperature[i][j][1], 2)]) + self.temperature_df = pd.DataFrame(dflist, columns=['date','hour','temperature']) + + dflist2 = [] + for i in range(len(self.day_list)): + dflist2.append([self.day_list[i], round(np.mean([self.temperature[i][j][1] for j in range(24)]), 2)]) + self.temperature_df2 = pd.DataFrame(dflist2, columns=['date','temperature']) + + + + def output_temp(self, interval='day'): + if interval == 'hour': + return self.temperature_df + elif interval == 'day': + return self.temperature_df2 + else: + raise ValueError("Choose interval parameter within 'day' and 'hour'") + + def output_detail(self): + try: + dfdetail = [list(self.temperature_df2['temperature']), self.humidity, self.wind, self.rain] + dfdetail = pd.DataFrame(dfdetail).T + dfdetail.columns = ['temperature','humidity', 'wind', 'rain'] + dfdetail.index = self.day_list + return dfdetail + except: + raise ValueError("Get detail weather first") + +wp = Weather_Parsing(period='period',startdate='2016/3/1', enddate='2016/3/3', city='KNYC') +wp.get_weather(detail=True) +print(wp.output_detail()) -- GitLab From e3a323e9f02a2fc0f43f8eee37217b258e91cc3d Mon Sep 17 00:00:00 2001 From: Conrad S Date: Fri, 31 Mar 2017 11:10:16 -0400 Subject: [PATCH 02/11] Initial syntax refactoring for weather --- bpeng/tests/test_weather.py | 41 +++++ bpeng/weather/weather.py | 313 ++++++++++++++++++++++++------------ 2 files changed, 255 insertions(+), 99 deletions(-) create mode 100644 bpeng/tests/test_weather.py diff --git a/bpeng/tests/test_weather.py b/bpeng/tests/test_weather.py new file mode 100644 index 0000000..a1c0750 --- /dev/null +++ b/bpeng/tests/test_weather.py @@ -0,0 +1,41 @@ +"""Test weather scraper""" +import os +from bpeng import WeatherScraper + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + +class TestWeather: + + def setup_class(self): + self.scraper1 = WeatherScraper( + period='period', + startdate='2016/3/1', + enddate='2016/3/3', + city='KNYC' + ) + + self.scraper2 = WeatherScraper( + period='year', + year=2014, + city='KNYC' + ) + + def test_parse(self): + self.scraper1.get_weather(detail=True) + output = self.scraper1.output_detail() + assert len(output) == 3 + + assert output['temperature'][0] == 45.63 + assert output['temperature'][1] == 41.95 + assert output['temperature'][2] == 31.00 + + assert output['humidity'][0] == 51.384615384615387 + assert output['wind'][1] == 9.6279999999999983 + assert output['rain'][1] == 0.26999999999999996 + +# self.scraper2.get_weather(detail=True) +# output = self.scraper2.output_detail() +# print(output) + + + diff --git a/bpeng/weather/weather.py b/bpeng/weather/weather.py index 1c04944..1b8b0c3 100644 --- a/bpeng/weather/weather.py +++ b/bpeng/weather/weather.py @@ -6,7 +6,7 @@ from urllib.request import urlopen import numpy as np import pandas as pd -class WeatherParsing: +class WeatherScraper: """ Class for parsing weather data from 'wunderground.com' This version only for years after 2000 @@ -60,7 +60,7 @@ class WeatherParsing: Example ------- - >>> wp = Weather_Parsing(period='period',startdate='2016/3/1', enddate='2016/3/3', city='KNYC') + >>> wp = WeatherScraper(period='period',startdate='2016/3/1', enddate='2016/3/3', city='KNYC') >>> wp.get_weather() >>> print(wp.output_temp()) date temperature @@ -68,7 +68,7 @@ class WeatherParsing: 1 2016/03/02 41.95 2 2016/03/03 31.00 - >>> wp = Weather_Parsing(period='period',startdate='2014/1/1', enddate='2014/1/2', city='KCHA') + >>> wp = WeatherScraper(period='period',startdate='2014/1/1', enddate='2014/1/2', city='KCHA') >>> wp.get_weather(detail=True) >>> print(wp.output_detail()) temperature humidity wind rain @@ -77,143 +77,262 @@ class WeatherParsing: """ - def __init__(self, period='year', year=2014, startdate='sd', enddate='ed', city='KCHA'): + WUNDER_URL = 'https://www.wunderground.com/history/airport/{}/{}/DailyHistory.html?format=1' + + def __init__(self, period='year', year=2014, startdate='sd', enddate='ed', city='KNYC'): self.year = year self.city = city - day_list = [] if period == 'year': - d1 = date(year, 1, 1) - d2 = date(year, 12, 31) + start_day = date(year, 1, 1) + end_day = date(year, 12, 31) elif period == 'period': try: - d1 = pd.to_datetime(startdate).date() - d2 = pd.to_datetime(enddate).date() + start_day = pd.to_datetime(startdate).date() + end_day = pd.to_datetime(enddate).date() except: raise ValueError('Check date time') else: raise ValueError("period should be 'year' or 'period'") - delta = d2 - d1 - day_list = [(d1 + timedelta(days=i)).strftime("20%y/%m/%d") for i in range(delta.days + 1)] + delta = end_day - start_day + # Create a list with a timestamp for each day in the desired range + day_list = [] + for day_num in range(delta.days+1): + new_day = start_day + timedelta(days=day_num) + day_list.append(new_day.strftime("20%y/%m/%d")) self.day_list = day_list def get_date_weather(self, date): - url = 'https://www.wunderground.com/history/airport/{}/{}/DailyHistory.html?format=1'.format(self.city, date) - html = urlopen(url).read() - if (sys.version_info > (3, 0)): - html = html.decode('utf-8') - htmlclean = html.split('\n')[2:-1] - htmltmp = [x.split(',')[:-1] for x in htmlclean] - for i in htmltmp: + """ + Get the weather for a given date + + Args: + date(string): The date to get weather for + + Returns: + html_features (list): + A 2D list where each entry is a list of + features for a timestamp + """ + print(date) + print(self.city) + url = WeatherScraper.WUNDER_URL.format(self.city, date) + html = urlopen(url).read().decode('utf-8') + # Seperate html by new lines, each entry is a timestamp with data + html_lines = html.split('\n')[2:-1] + # 2D list where each entry is a list of features for a given timestamp + html_features = [line.split(',')[:-1] for line in html_lines] + + # The minimum allowed value for temperature + MIN_ALLOWED_TEMPERATURE = -100 + # Loop through all of the features, removing ones with invalid temperature + for entry in html_features: try: - if float(i[1]) < -100: - htmltmp.remove(i) + if float(entry[1]) < MIN_ALLOWED_TEMPERATURE: + html_features.remove(entry) except: - htmltmp.remove(i) - return htmltmp - - - def time_to_hour(self, x): - y = x[:] - hourtmp = datetime.strptime(y[0], "%I:%M %p").strftime("%H") - y[0] = int(hourtmp) - y[1] = float(y[1]) - return y - - def add_missing_temperature(self, x): - y = np.array(x) - yhour = y[:,0] - tl = [] - - for i in set(yhour): - ttmp = np.mean([j[1] for j in filter(lambda xx: xx[0] == i, y)]) - tl.append([int(i), ttmp]) - thour = [j[0] for j in tl] - if 0 not in thour: - tl = [[0, tl[0][1]]] + tl - if 23 not in thour: - tl.append([23,tl[-1][1]]) - #print tl - - htmp = tl[0] - for i in tl[0:]: - if i[0] - htmp[0] != 1: - tinsert = [[ti, (i[1]*(htmp[0]-ti)+htmp[1]*(ti-i[0]))/(htmp[0]-i[0])] for ti in range(htmp[0]+1, i[0])] - tl += tinsert - htmp = i - - return tl - - def sum_of_rain(self, x): - sor = 0 - for i in x: + html_features.remove(entry) + return html_features + + def convert_time_format(self, prev_date): + """ + Convert a date time object from 12 hr format (with am and pm) + to 24 hr format + + Args: + prev_date(list): List with the 0th index in 12 hour + format and the 1st index as a string representing temperature + + Returns: + new_date(list): A new list with the 0th index in 24 hour + format and the 1st index as a float representing temperature + + """ + # Make a copy of the date object + new_date = prev_date[:] + # Convert to 24 hr format + new_hour = datetime.strptime(new_date[0], "%I:%M %p").strftime("%H") + new_date[0] = int(new_hour) + # Convert the temperature to a float from a string + new_date[1] = float(new_date[1]) + return new_date + + # x is a list of timestamps in 24 hour format + # timestamp, and temperature 2D list + def add_missing_temperature(self, hour_temperature_list): + """ + Interpolate missing temperatures for hours from the temperatures around them + + Args: + hour_temperature_list (list): + A 2D list where each entry is a list containing exactly + 2 entries. 0th index is the hour, 1st index is the + temperature for that hour + + + + """ + return_list = [] + + # A 1D list that contains only the hours + hour_list = [i[0] for i in hour_temperature_list] + # No repeated entries + hour_set = set(hour_list) + + # There are sometimes multiple entries for a single hour, + # here we take the average of those entries + for hour in hour_set: + # Only average the temperatures that are for this hour + avg_temperature = np.mean( + [entry[1] for entry in hour_temperature_list if entry[0] == hour] + ) + return_list.append([int(hour), avg_temperature]) + + # For interpolation purposes we need data for the 0th hour + # and for the 23rd hour. Set them to the nearest available datapoint + if 0 not in hour_set: + return_list.insert(0, [0, return_list[0][1]]) + if 23 not in hour_set: + return_list.append([23, return_list[-1][1]]) + + # Get the first entry in the list for usage in loop + prev_entry = return_list[0] + # A list to loop through so we can add new interpolated data + # into the middle of the return_list + looping_list = return_list + return_list = [] + for entry in looping_list[0:]: + # If there is data missing between this hour and + # the previous hour, find weighted average and set it to that value + cur_hour = entry[0] + prev_hour = prev_entry[0] + # If there is a gap between these two timestamps, interpolate the new hours + if (cur_hour - prev_hour) != 1: + cur_temp = entry[1] + prev_temp = prev_entry[1] + for new_hour in range(cur_hour + 1, prev_hour): + # Math to interpolate temperature + interpolate_sum = cur_temp * (prev_hour - newhour) + prev_temp * (new_hour - cur_hour) + new_temperature = interpolate_sum / (prev_hour - cur_hour) + + new_entry = [new_hour, new_temperature] + return_list.append(new_entry) + # Set prev entry to current entry before looping back around + return_list.append(entry) + prev_entry = entry + return return_list + + def sum_of_rain(self, rain_data): + """ + Sum a days worth of rain data, ignoring the + data if it's less than 0 or not an integer + + Args: + rain_date (list): + A list where each entry is a data point + for rain + + Returns: + total (float): + Sum of all number entries greater than 0 + """ + total = 0 + for entry in rain_data: try: - if float(i) >= 0: - sor += float(i) + if float(entry) >= 0: + total += float(entry) except: pass - return sor + return total - def average_of_humidity(self, x): - aoh = 0 - for i in x: - try: - if int(i) >= 0: - aoh += int(i) - except: - x.remove(i) - return aoh / len(x) - def average_of_wind(self, x): - aow = 0 - for i in x: + def average_of_feature(self, feature_list): + """ + Take the average value for a given feature + (humidity or wind currently) + + Args: + feature_list (list): + A list where each entry is a feature + + Returns: + (float): An average + """ + total = 0 + for feature in feature_list: try: - if float(i) >= 0: - aow += float(i) + if float(feature) >= 0: + total += float(feature) + else: + feature_list.remove(feature) except: - x.remove(i) - return aow / len(x) + feature_list.remove(feature) + return total / len(feature_list) def get_weather(self, detail=False): - temp_list = [] + """ + Get the weather for this object's date range + + Args: + detail (boolean): Whether or not the scrape should + include humidity, wind, rain and conditions + """ + # List of + temperature_list = [] humidity_list = [] wind_list = [] rain_list = [] condition_list = [] - for i in self.day_list: + # i is a day + for day in self.day_list: try: - weather_all = self.get_date_weather(i) - gettmp = [j[:2] for j in weather_all] + # all of the weather data for that day + day_weather = self.get_date_weather(day) + # for each line in the day's weather, (aka for each time stamp), get the time stamp AND the temperature + # time stamp is in [0], temperautre is in the [1] index + gettmp = [j[:2] for j in day_weather] if detail == True: - getdetail = weather_all - humidity_list.append([k[3] for k in weather_all]) - wind_list.append([k[7] for k in weather_all]) - rain_list.append([k[9] for k in weather_all]) - condition_list.append([k[11] for k in weather_all]) + # all the weather data +# append all the data for all of the features + humidity_list.append([k[3] for k in day_weather]) + wind_list.append([k[7] for k in day_weather]) + rain_list.append([k[9] for k in day_weather]) + # condition is sunny, rainy + condition_list.append([k[11] for k in day_weather]) + # interpolate missing temperatures for the data temp = self.add_missing_temperature( - [self.time_to_hour(wtmp) for wtmp in gettmp] + # Convert to 24 hr format for each timestamp in one day + [self.convert_time_format(wtmp) for wtmp in gettmp] ) - temp_list.append(temp) - except: - raise ValueError('No temperature data for {}'.format(i)) - break - self.temperature = temp_list - self.humidity = [self.average_of_humidity(wtmp) for wtmp in humidity_list] - self.wind = [self.average_of_wind(wtmp) for wtmp in wind_list] + temperature_list.append(temp) + except Exception as e: + raise e + # raise ValueError('No temperature data for {}'.format(day)) + self.temperature = temperature_list +# for day of data in humidity + self.humidity = [self.average_of_feature(wtmp) for wtmp in humidity_list] + self.wind = [self.average_of_feature(wtmp) for wtmp in wind_list] self.rain = [self.sum_of_rain(wtmp) for wtmp in rain_list] self.condition = condition_list + # list of list, each entry is date, hour, and temperature dflist = [] for i in range(len(self.day_list)): for j in range(24): dflist.append([self.day_list[i], j, round(self.temperature[i][j][1], 2)]) self.temperature_df = pd.DataFrame(dflist, columns=['date','hour','temperature']) + + # For daily only dflist2 = [] for i in range(len(self.day_list)): - dflist2.append([self.day_list[i], round(np.mean([self.temperature[i][j][1] for j in range(24)]), 2)]) + mean = np.mean([self.temperature[i][j][1] for j in range(24)]) + dflist2.append([ + self.day_list[i], + round(mean, 2) + ]) self.temperature_df2 = pd.DataFrame(dflist2, columns=['date','temperature']) @@ -235,7 +354,3 @@ class WeatherParsing: return dfdetail except: raise ValueError("Get detail weather first") - -wp = Weather_Parsing(period='period',startdate='2016/3/1', enddate='2016/3/3', city='KNYC') -wp.get_weather(detail=True) -print(wp.output_detail()) -- GitLab From 90fbcee1422abdfe85e95fcfe5f5f92bf462ebaf Mon Sep 17 00:00:00 2001 From: Conrad S Date: Fri, 31 Mar 2017 14:22:38 -0400 Subject: [PATCH 03/11] Add comments and update variable names in weather --- bpeng/tests/test_weather.py | 9 +-- bpeng/weather/weather.py | 124 +++++++++++++++++++++--------------- 2 files changed, 74 insertions(+), 59 deletions(-) diff --git a/bpeng/tests/test_weather.py b/bpeng/tests/test_weather.py index a1c0750..ee7c704 100644 --- a/bpeng/tests/test_weather.py +++ b/bpeng/tests/test_weather.py @@ -22,7 +22,7 @@ class TestWeather: def test_parse(self): self.scraper1.get_weather(detail=True) - output = self.scraper1.output_detail() + output = self.scraper1.output_daily_data() assert len(output) == 3 assert output['temperature'][0] == 45.63 @@ -32,10 +32,3 @@ class TestWeather: assert output['humidity'][0] == 51.384615384615387 assert output['wind'][1] == 9.6279999999999983 assert output['rain'][1] == 0.26999999999999996 - -# self.scraper2.get_weather(detail=True) -# output = self.scraper2.output_detail() -# print(output) - - - diff --git a/bpeng/weather/weather.py b/bpeng/weather/weather.py index 1b8b0c3..a072fa6 100644 --- a/bpeng/weather/weather.py +++ b/bpeng/weather/weather.py @@ -1,11 +1,11 @@ from datetime import datetime from datetime import date, timedelta -import sys from urllib.request import urlopen import numpy as np import pandas as pd + class WeatherScraper: """ Class for parsing weather data from 'wunderground.com' @@ -48,7 +48,7 @@ class WeatherScraper: day_list: list of strings A list of all the dates within the period - output_temp: pandas dataframe + output_temperature: pandas dataframe A dataframe of the temperature data required option: 'day': a temperature for each day @@ -56,19 +56,22 @@ class WeatherScraper: default 'day' output_detail: pandas dataframe - A dataframe of weather data of 'temperature','humidity','wind' and 'rain' + A dataframe of weather data for + 'temperature','humidity','wind' and 'rain' Example ------- - >>> wp = WeatherScraper(period='period',startdate='2016/3/1', enddate='2016/3/3', city='KNYC') + >>> wp = WeatherScraper(period='period',startdate='2016/3/1', + enddate='2016/3/3', city='KNYC') >>> wp.get_weather() - >>> print(wp.output_temp()) + >>> print(wp.output_temperature()) date temperature 0 2016/03/01 45.63 1 2016/03/02 41.95 2 2016/03/03 31.00 - >>> wp = WeatherScraper(period='period',startdate='2014/1/1', enddate='2014/1/2', city='KCHA') + >>> wp = WeatherScraper(period='period',startdate='2014/1/1', + enddate='2014/1/2', city='KCHA') >>> wp.get_weather(detail=True) >>> print(wp.output_detail()) temperature humidity wind rain @@ -79,7 +82,14 @@ class WeatherScraper: """ WUNDER_URL = 'https://www.wunderground.com/history/airport/{}/{}/DailyHistory.html?format=1' - def __init__(self, period='year', year=2014, startdate='sd', enddate='ed', city='KNYC'): + def __init__( + self, + period='year', + year=2014, + startdate='sd', + enddate='ed', + city='KNYC' + ): self.year = year self.city = city @@ -115,8 +125,6 @@ class WeatherScraper: A 2D list where each entry is a list of features for a timestamp """ - print(date) - print(self.city) url = WeatherScraper.WUNDER_URL.format(self.city, date) html = urlopen(url).read().decode('utf-8') # Seperate html by new lines, each entry is a timestamp with data @@ -213,7 +221,10 @@ class WeatherScraper: prev_temp = prev_entry[1] for new_hour in range(cur_hour + 1, prev_hour): # Math to interpolate temperature - interpolate_sum = cur_temp * (prev_hour - newhour) + prev_temp * (new_hour - cur_hour) + interpolate_sum = ( + cur_temp * (prev_hour - new_hour) + + prev_temp * (new_hour - cur_hour) + ) new_temperature = interpolate_sum / (prev_hour - cur_hour) new_entry = [new_hour, new_temperature] @@ -246,7 +257,6 @@ class WeatherScraper: pass return total - def average_of_feature(self, feature_list): """ Take the average value for a given feature @@ -288,69 +298,81 @@ class WeatherScraper: # i is a day for day in self.day_list: try: - # all of the weather data for that day + # All of the weather data for that day day_weather = self.get_date_weather(day) - # for each line in the day's weather, (aka for each time stamp), get the time stamp AND the temperature - # time stamp is in [0], temperautre is in the [1] index - gettmp = [j[:2] for j in day_weather] - if detail == True: - # all the weather data -# append all the data for all of the features - humidity_list.append([k[3] for k in day_weather]) - wind_list.append([k[7] for k in day_weather]) - rain_list.append([k[9] for k in day_weather]) - # condition is sunny, rainy - condition_list.append([k[11] for k in day_weather]) - # interpolate missing temperatures for the data - temp = self.add_missing_temperature( + # A list with the 0th index as the hour, 1st index as temperature + hour_temperature_list = [j[:2] for j in day_weather] + if detail: + # Get the data for each feature + humidity_list.append([entry[3] for entry in day_weather]) + wind_list.append([entry[7] for entry in day_weather]) + rain_list.append([entry[9] for entry in day_weather]) + condition_list.append([entry[11] for entry in day_weather]) + # Interpolate missing temperatures + new_temperature = self.add_missing_temperature( # Convert to 24 hr format for each timestamp in one day - [self.convert_time_format(wtmp) for wtmp in gettmp] + [self.convert_time_format(entry) for entry in hour_temperature_list] ) - temperature_list.append(temp) + temperature_list.append(new_temperature) except Exception as e: raise e # raise ValueError('No temperature data for {}'.format(day)) self.temperature = temperature_list -# for day of data in humidity - self.humidity = [self.average_of_feature(wtmp) for wtmp in humidity_list] - self.wind = [self.average_of_feature(wtmp) for wtmp in wind_list] - self.rain = [self.sum_of_rain(wtmp) for wtmp in rain_list] + # Get the averages for each day + self.humidity = [self.average_of_feature(entry) for entry in humidity_list] + self.wind = [self.average_of_feature(entry) for entry in wind_list] + self.rain = [self.sum_of_rain(entry) for entry in rain_list] self.condition = condition_list - # list of list, each entry is date, hour, and temperature - dflist = [] + # Detailed dataframe with hourly temperature data + dataframe_list = [] for i in range(len(self.day_list)): for j in range(24): - dflist.append([self.day_list[i], j, round(self.temperature[i][j][1], 2)]) - self.temperature_df = pd.DataFrame(dflist, columns=['date','hour','temperature']) - + dataframe_list.append([ + self.day_list[i], + j, + round(self.temperature[i][j][1], 2), + ]) + self.temperature_dataframe_detail = pd.DataFrame( + dataframe_list, + columns=['date','hour','temperature'], + ) # For daily only - dflist2 = [] + dataframe_list = [] for i in range(len(self.day_list)): mean = np.mean([self.temperature[i][j][1] for j in range(24)]) - dflist2.append([ + dataframe_list.append([ self.day_list[i], - round(mean, 2) + round(mean, 2), ]) - self.temperature_df2 = pd.DataFrame(dflist2, columns=['date','temperature']) - + self.temperature_dataframe_nodetail = pd.DataFrame( + dataframe_list, + columns=['date','temperature'], + ) - - def output_temp(self, interval='day'): + def output_temperature(self, interval='day'): if interval == 'hour': - return self.temperature_df + return self.temperature_dataframe_detail elif interval == 'day': - return self.temperature_df2 + return self.temperature_dataframe_nodetail else: raise ValueError("Choose interval parameter within 'day' and 'hour'") - def output_detail(self): + def output_daily_data(self): + """ + Output the data for daily + """ try: - dfdetail = [list(self.temperature_df2['temperature']), self.humidity, self.wind, self.rain] - dfdetail = pd.DataFrame(dfdetail).T - dfdetail.columns = ['temperature','humidity', 'wind', 'rain'] - dfdetail.index = self.day_list - return dfdetail + dataframe_detail = [ + list(self.temperature_dataframe_nodetail['temperature']), + self.humidity, + self.wind, + self.rain, + ] + dataframe_detail = pd.DataFrame(dataframe_detail).T + dataframe_detail.columns = ['temperature','humidity', 'wind', 'rain'] + dataframe_detail.index = self.day_list + return dataframe_detail except: raise ValueError("Get detail weather first") -- GitLab From 25864228558254539cb3c9b1111379f33868a163 Mon Sep 17 00:00:00 2001 From: Conrad S Date: Mon, 3 Apr 2017 11:51:48 -0400 Subject: [PATCH 04/11] Change how weather is imported in init --- bpeng/__init__.py | 1 - bpeng/tests/test_weather.py | 6 +++--- bpeng/weather/__init__.py | 1 + bpeng/weather/weather.py | 14 +++++++------- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/bpeng/__init__.py b/bpeng/__init__.py index a5995bd..76ce1c0 100644 --- a/bpeng/__init__.py +++ b/bpeng/__init__.py @@ -1,3 +1,2 @@ from .heatloss.heatloss import HeatLoss from .dimensions.parse_dimensions import ParseDimensions -from .weather.weather import WeatherScraper diff --git a/bpeng/tests/test_weather.py b/bpeng/tests/test_weather.py index ee7c704..c054fde 100644 --- a/bpeng/tests/test_weather.py +++ b/bpeng/tests/test_weather.py @@ -1,20 +1,20 @@ """Test weather scraper""" import os -from bpeng import WeatherScraper +from bpeng.weather import WeatherUnderground BASE_DIR = os.path.dirname(os.path.abspath(__file__)) class TestWeather: def setup_class(self): - self.scraper1 = WeatherScraper( + self.scraper1 = WeatherUnderground( period='period', startdate='2016/3/1', enddate='2016/3/3', city='KNYC' ) - self.scraper2 = WeatherScraper( + self.scraper2 = WeatherUnderground( period='year', year=2014, city='KNYC' diff --git a/bpeng/weather/__init__.py b/bpeng/weather/__init__.py index e69de29..5663f6f 100644 --- a/bpeng/weather/__init__.py +++ b/bpeng/weather/__init__.py @@ -0,0 +1 @@ +from .weather import WeatherUnderground diff --git a/bpeng/weather/weather.py b/bpeng/weather/weather.py index a072fa6..d37009e 100644 --- a/bpeng/weather/weather.py +++ b/bpeng/weather/weather.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd -class WeatherScraper: +class WeatherUnderground: """ Class for parsing weather data from 'wunderground.com' This version only for years after 2000 @@ -61,7 +61,7 @@ class WeatherScraper: Example ------- - >>> wp = WeatherScraper(period='period',startdate='2016/3/1', + >>> wp = WeatherUnderground(period='period',startdate='2016/3/1', enddate='2016/3/3', city='KNYC') >>> wp.get_weather() >>> print(wp.output_temperature()) @@ -70,7 +70,7 @@ class WeatherScraper: 1 2016/03/02 41.95 2 2016/03/03 31.00 - >>> wp = WeatherScraper(period='period',startdate='2014/1/1', + >>> wp = WeatherUnderground(period='period',startdate='2014/1/1', enddate='2014/1/2', city='KCHA') >>> wp.get_weather(detail=True) >>> print(wp.output_detail()) @@ -125,7 +125,7 @@ class WeatherScraper: A 2D list where each entry is a list of features for a timestamp """ - url = WeatherScraper.WUNDER_URL.format(self.city, date) + url = WeatherUnderground.WUNDER_URL.format(self.city, date) html = urlopen(url).read().decode('utf-8') # Seperate html by new lines, each entry is a timestamp with data html_lines = html.split('\n')[2:-1] @@ -335,7 +335,7 @@ class WeatherScraper: ]) self.temperature_dataframe_detail = pd.DataFrame( dataframe_list, - columns=['date','hour','temperature'], + columns=['date', 'hour', 'temperature'], ) # For daily only @@ -348,7 +348,7 @@ class WeatherScraper: ]) self.temperature_dataframe_nodetail = pd.DataFrame( dataframe_list, - columns=['date','temperature'], + columns=['date', 'temperature'], ) def output_temperature(self, interval='day'): @@ -371,7 +371,7 @@ class WeatherScraper: self.rain, ] dataframe_detail = pd.DataFrame(dataframe_detail).T - dataframe_detail.columns = ['temperature','humidity', 'wind', 'rain'] + dataframe_detail.columns = ['temperature', 'humidity', 'wind', 'rain'] dataframe_detail.index = self.day_list return dataframe_detail except: -- GitLab From b3400e679aebf31a3b183dc04665e2ff2c2af7f4 Mon Sep 17 00:00:00 2001 From: Conrad S Date: Mon, 3 Apr 2017 14:04:07 -0400 Subject: [PATCH 05/11] Change name of weather init arg to location from city --- bpeng/tests/test_weather.py | 4 ++-- bpeng/weather/weather.py | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/bpeng/tests/test_weather.py b/bpeng/tests/test_weather.py index c054fde..83cfd72 100644 --- a/bpeng/tests/test_weather.py +++ b/bpeng/tests/test_weather.py @@ -11,13 +11,13 @@ class TestWeather: period='period', startdate='2016/3/1', enddate='2016/3/3', - city='KNYC' + location='KNYC' ) self.scraper2 = WeatherUnderground( period='year', year=2014, - city='KNYC' + location='KNYC' ) def test_parse(self): diff --git a/bpeng/weather/weather.py b/bpeng/weather/weather.py index d37009e..8603dba 100644 --- a/bpeng/weather/weather.py +++ b/bpeng/weather/weather.py @@ -29,10 +29,10 @@ class WeatherUnderground: Enddate of which data is required Ignored when period = 'year' - city: str - City or airport where data is required + location: str + location or airport where data is required KCHA for Chattanooga, TN - KNYC for New York City, NY + KNYC for New York location, NY ... Attributes @@ -62,7 +62,7 @@ class WeatherUnderground: Example ------- >>> wp = WeatherUnderground(period='period',startdate='2016/3/1', - enddate='2016/3/3', city='KNYC') + enddate='2016/3/3', location='KNYC') >>> wp.get_weather() >>> print(wp.output_temperature()) date temperature @@ -71,7 +71,7 @@ class WeatherUnderground: 2 2016/03/03 31.00 >>> wp = WeatherUnderground(period='period',startdate='2014/1/1', - enddate='2014/1/2', city='KCHA') + enddate='2014/1/2', location='KCHA') >>> wp.get_weather(detail=True) >>> print(wp.output_detail()) temperature humidity wind rain @@ -88,10 +88,10 @@ class WeatherUnderground: year=2014, startdate='sd', enddate='ed', - city='KNYC' + location='KNYC' ): self.year = year - self.city = city + self.location = location if period == 'year': start_day = date(year, 1, 1) @@ -125,7 +125,7 @@ class WeatherUnderground: A 2D list where each entry is a list of features for a timestamp """ - url = WeatherUnderground.WUNDER_URL.format(self.city, date) + url = WeatherUnderground.WUNDER_URL.format(self.location, date) html = urlopen(url).read().decode('utf-8') # Seperate html by new lines, each entry is a timestamp with data html_lines = html.split('\n')[2:-1] -- GitLab From 8d127155b68560853bc5dedd79637e3e9e2399ac Mon Sep 17 00:00:00 2001 From: Conrad S Date: Mon, 3 Apr 2017 14:04:58 -0400 Subject: [PATCH 06/11] Remove BASE_DIR variable from weather test --- bpeng/tests/test_weather.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bpeng/tests/test_weather.py b/bpeng/tests/test_weather.py index 83cfd72..7ce84e0 100644 --- a/bpeng/tests/test_weather.py +++ b/bpeng/tests/test_weather.py @@ -2,8 +2,6 @@ import os from bpeng.weather import WeatherUnderground -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) - class TestWeather: def setup_class(self): -- GitLab From add28ea9f77dc29a2a920c7d5adb3880ad6d7629 Mon Sep 17 00:00:00 2001 From: Conrad S Date: Mon, 3 Apr 2017 14:10:13 -0400 Subject: [PATCH 07/11] Fix syntax errors in weather --- bpeng/weather/weather.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/bpeng/weather/weather.py b/bpeng/weather/weather.py index 8603dba..f574365 100644 --- a/bpeng/weather/weather.py +++ b/bpeng/weather/weather.py @@ -118,7 +118,7 @@ class WeatherUnderground: Get the weather for a given date Args: - date(string): The date to get weather for + date (string): The date to get weather for Returns: html_features (list): @@ -178,7 +178,10 @@ class WeatherUnderground: 2 entries. 0th index is the hour, 1st index is the temperature for that hour - + Returns: + (list): A 2D list where each entry is a list containing exactly + 24 entries. Each entry is a 2D list with the 0th index being the + hour and the 1st index being the temperature """ return_list = [] @@ -315,8 +318,7 @@ class WeatherUnderground: ) temperature_list.append(new_temperature) except Exception as e: - raise e - # raise ValueError('No temperature data for {}'.format(day)) + raise ValueError('No temperature data for {}, {}'.format(day, str(e))) self.temperature = temperature_list # Get the averages for each day self.humidity = [self.average_of_feature(entry) for entry in humidity_list] -- GitLab From 0094343d07efb177ffb656495aa76ea0e21175bb Mon Sep 17 00:00:00 2001 From: Conrad S Date: Mon, 3 Apr 2017 14:11:33 -0400 Subject: [PATCH 08/11] Updates return comments to have no parenthese --- bpeng/weather/weather.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bpeng/weather/weather.py b/bpeng/weather/weather.py index f574365..e0e1933 100644 --- a/bpeng/weather/weather.py +++ b/bpeng/weather/weather.py @@ -121,7 +121,7 @@ class WeatherUnderground: date (string): The date to get weather for Returns: - html_features (list): + list: A 2D list where each entry is a list of features for a timestamp """ @@ -149,11 +149,11 @@ class WeatherUnderground: to 24 hr format Args: - prev_date(list): List with the 0th index in 12 hour + prev_date (list): List with the 0th index in 12 hour format and the 1st index as a string representing temperature Returns: - new_date(list): A new list with the 0th index in 24 hour + list: A new list with the 0th index in 24 hour format and the 1st index as a float representing temperature """ @@ -270,7 +270,7 @@ class WeatherUnderground: A list where each entry is a feature Returns: - (float): An average + float: An average """ total = 0 for feature in feature_list: -- GitLab From 81bb759ad7e0c02e894a8d6fabc2012e5a5014b9 Mon Sep 17 00:00:00 2001 From: Alessandro DiMarco Date: Mon, 3 Apr 2017 15:00:54 -0400 Subject: [PATCH 09/11] Fix pylint errors and doc style --- bpeng/tests/test_weather.py | 3 +- bpeng/weather/weather.py | 217 +++++++++++++++++------------------- 2 files changed, 107 insertions(+), 113 deletions(-) diff --git a/bpeng/tests/test_weather.py b/bpeng/tests/test_weather.py index 7ce84e0..5b032f3 100644 --- a/bpeng/tests/test_weather.py +++ b/bpeng/tests/test_weather.py @@ -1,8 +1,9 @@ """Test weather scraper""" -import os from bpeng.weather import WeatherUnderground class TestWeather: + scraper1 = None + scraper2 = None def setup_class(self): self.scraper1 = WeatherUnderground( diff --git a/bpeng/weather/weather.py b/bpeng/weather/weather.py index e0e1933..8e70ea9 100644 --- a/bpeng/weather/weather.py +++ b/bpeng/weather/weather.py @@ -1,5 +1,5 @@ -from datetime import datetime -from datetime import date, timedelta +""" Weather scraper for wunderground.com """ +from datetime import date, datetime, timedelta from urllib.request import urlopen import numpy as np @@ -11,84 +11,59 @@ class WeatherUnderground: Class for parsing weather data from 'wunderground.com' This version only for years after 2000 - Parameters - ---------- - period: year | period, optional, default 'year' - If 'year', get the weather data for one single year - If 'period', get the weather data within an particular period - - year: int - Year which data is required - Ignored when period = 'period' - - startdate: str - Startdate of which data is required - Ignored when period = 'year' - - enddate: str - Enddate of which data is required - Ignored when period = 'year' - - location: str - location or airport where data is required - KCHA for Chattanooga, TN - KNYC for New York location, NY - ... - - Attributes - ---------- - get_weather: - Get the data from the website - Requires about 1 second for each day - option: - detail: boolean - 'True' for detail weather - 'False' for temperature only - - day_list: list of strings - A list of all the dates within the period - - output_temperature: pandas dataframe - A dataframe of the temperature data required - option: - 'day': a temperature for each day - 'hour': a temperature for each hour - default 'day' - - output_detail: pandas dataframe - A dataframe of weather data for - 'temperature','humidity','wind' and 'rain' - - Example - ------- - >>> wp = WeatherUnderground(period='period',startdate='2016/3/1', - enddate='2016/3/3', location='KNYC') - >>> wp.get_weather() - >>> print(wp.output_temperature()) - date temperature - 0 2016/03/01 45.63 - 1 2016/03/02 41.95 - 2 2016/03/03 31.00 - - >>> wp = WeatherUnderground(period='period',startdate='2014/1/1', - enddate='2014/1/2', location='KCHA') - >>> wp.get_weather(detail=True) - >>> print(wp.output_detail()) - temperature humidity wind rain - 2014/01/01 36.68 79.0 1.790000 0.00 - 2014/01/02 38.63 79.0 10.028205 0.43 - - + Args: + + period (str): year | period, optional, default 'year' + If 'year', get the weather data for one single year + If 'period', get the weather data within an particular period + year (int): Year which data is required. Ignored when period = 'period'. + startdate (str): Startdate of which data is required in format year/month/day. + Ignored when period = 'year' + enddate (str): Enddate of which data is required. Ignored when period = 'year'. + location (str): Location or airport where data is required + i.e. KCHA for Chattanooga, TN + KNYC for New York location, NY + + Attributes: + + day_list (list): A list of all the dates (str) within the period + + Examples: + + >>> wp = WeatherUnderground(period='period',startdate='2016/3/1', + enddate='2016/3/3', location='KNYC') + >>> wp.get_weather() + >>> print(wp.output_temperature()) + date temperature + 0 2016/03/01 45.63 + 1 2016/03/02 41.95 + 2 2016/03/03 31.00 + + >>> wp = WeatherUnderground(period='period',startdate='2014/1/1', + enddate='2014/1/2', location='KCHA') + >>> wp.get_weather(detail=True) + >>> print(wp.output_detail()) + temperature humidity wind rain + 2014/01/01 36.68 79.0 1.790000 0.00 + 2014/01/02 38.63 79.0 10.028205 0.43 """ + WUNDER_URL = 'https://www.wunderground.com/history/airport/{}/{}/DailyHistory.html?format=1' + MIN_ALLOWED_TEMPERATURE = -100 # The minimum allowed value for temperature + temperature = None + humidity = None + wind = None + rain = None + condition = None + temperature_dataframe_detail = None def __init__( - self, - period='year', - year=2014, - startdate='sd', - enddate='ed', - location='KNYC' + self, + period='year', + year=2014, + startdate='sd', + enddate='ed', + location='KNYC' ): self.year = year self.location = location @@ -113,49 +88,49 @@ class WeatherUnderground: day_list.append(new_day.strftime("20%y/%m/%d")) self.day_list = day_list - def get_date_weather(self, date): + def get_date_weather(self, day): """ Get the weather for a given date Args: - date (string): The date to get weather for + + day (string): The date to get weather for Returns: - list: - A 2D list where each entry is a list of - features for a timestamp + + list: A 2D list where each entry is a list of features for a timestamp """ - url = WeatherUnderground.WUNDER_URL.format(self.location, date) + url = self.WUNDER_URL.format(self.location, day) html = urlopen(url).read().decode('utf-8') # Seperate html by new lines, each entry is a timestamp with data html_lines = html.split('\n')[2:-1] # 2D list where each entry is a list of features for a given timestamp html_features = [line.split(',')[:-1] for line in html_lines] - # The minimum allowed value for temperature - MIN_ALLOWED_TEMPERATURE = -100 # Loop through all of the features, removing ones with invalid temperature for entry in html_features: try: - if float(entry[1]) < MIN_ALLOWED_TEMPERATURE: + if float(entry[1]) < self.MIN_ALLOWED_TEMPERATURE: html_features.remove(entry) - except: + except Exception as err: html_features.remove(entry) return html_features - def convert_time_format(self, prev_date): + @staticmethod + def convert_time_format(prev_date): """ Convert a date time object from 12 hr format (with am and pm) to 24 hr format Args: + prev_date (list): List with the 0th index in 12 hour - format and the 1st index as a string representing temperature + format and the 1st index as a string representing temperature Returns: - list: A new list with the 0th index in 24 hour - format and the 1st index as a float representing temperature + list: A new list with the 0th index in 24 hour + format and the 1st index as a float representing temperature """ # Make a copy of the date object new_date = prev_date[:] @@ -168,21 +143,22 @@ class WeatherUnderground: # x is a list of timestamps in 24 hour format # timestamp, and temperature 2D list - def add_missing_temperature(self, hour_temperature_list): + @staticmethod + def add_missing_temperature(hour_temperature_list): """ Interpolate missing temperatures for hours from the temperatures around them Args: - hour_temperature_list (list): - A 2D list where each entry is a list containing exactly - 2 entries. 0th index is the hour, 1st index is the - temperature for that hour + + hour_temperature_list (list): A 2D list where each entry is a list containing exactly + 2 entries. 0th index is the hour, 1st index is the + temperature for that hour Returns: - (list): A 2D list where each entry is a list containing exactly - 24 entries. Each entry is a 2D list with the 0th index being the - hour and the 1st index being the temperature + list: A 2D list where each entry is a list containing exactly + 24 entries. Each entry is a 2D list with the 0th index being the + hour and the 1st index being the temperature """ return_list = [] @@ -243,20 +219,19 @@ class WeatherUnderground: data if it's less than 0 or not an integer Args: - rain_date (list): - A list where each entry is a data point - for rain + + rain_date (list): A list where each entry is a data point for rain Returns: - total (float): - Sum of all number entries greater than 0 + + float: Sum of all number entries greater than 0 """ total = 0 for entry in rain_data: try: if float(entry) >= 0: total += float(entry) - except: + except Exception as err: pass return total @@ -266,10 +241,11 @@ class WeatherUnderground: (humidity or wind currently) Args: - feature_list (list): - A list where each entry is a feature + + feature_list (list): A list where each entry is a feature Returns: + float: An average """ total = 0 @@ -279,18 +255,20 @@ class WeatherUnderground: total += float(feature) else: feature_list.remove(feature) - except: + except Exception as err: feature_list.remove(feature) return total / len(feature_list) def get_weather(self, detail=False): """ - Get the weather for this object's date range + Get the weather for this object's date range. Requires about 1 second for each day Args: - detail (boolean): Whether or not the scrape should - include humidity, wind, rain and conditions + + detail (bool): Whether or not the scrape should include humidity, wind, rain + and conditions. True for detail weather and False for temperature only """ + # pylint: disable=too-many-locals # List of temperature_list = [] humidity_list = [] @@ -317,8 +295,8 @@ class WeatherUnderground: [self.convert_time_format(entry) for entry in hour_temperature_list] ) temperature_list.append(new_temperature) - except Exception as e: - raise ValueError('No temperature data for {}, {}'.format(day, str(e))) + except Exception as err: + raise ValueError('No temperature data for {}, {}'.format(day, str(err))) self.temperature = temperature_list # Get the averages for each day self.humidity = [self.average_of_feature(entry) for entry in humidity_list] @@ -354,6 +332,16 @@ class WeatherUnderground: ) def output_temperature(self, interval='day'): + """ + Args: + + interval (str): Options 'day', temperature for each day or + 'hour', temperature for each hour. Defaults to 'day' + + Returns: + + pd.DataFrame: A dataframe of the temperature data + """ if interval == 'hour': return self.temperature_dataframe_detail elif interval == 'day': @@ -364,6 +352,11 @@ class WeatherUnderground: def output_daily_data(self): """ Output the data for daily + + Returns: + + pd.DataFrame: A dataframe of weather data for 'temperature', 'humidity', + 'wind' and 'rain'. """ try: dataframe_detail = [ -- GitLab From c55e373a71d3364ccbc4906cc742c138394c5a71 Mon Sep 17 00:00:00 2001 From: Alessandro DiMarco Date: Mon, 3 Apr 2017 15:19:16 -0400 Subject: [PATCH 10/11] Pylint disable warnings for now --- bpeng/weather/weather.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/bpeng/weather/weather.py b/bpeng/weather/weather.py index 8e70ea9..d6873db 100644 --- a/bpeng/weather/weather.py +++ b/bpeng/weather/weather.py @@ -48,6 +48,7 @@ class WeatherUnderground: 2014/01/02 38.63 79.0 10.028205 0.43 """ + # pylint: disable=too-many-instance-attributes,too-many-arguments WUNDER_URL = 'https://www.wunderground.com/history/airport/{}/{}/DailyHistory.html?format=1' MIN_ALLOWED_TEMPERATURE = -100 # The minimum allowed value for temperature temperature = None @@ -56,6 +57,7 @@ class WeatherUnderground: rain = None condition = None temperature_dataframe_detail = None + temperature_dataframe_nodetail = None def __init__( self, @@ -87,6 +89,7 @@ class WeatherUnderground: new_day = start_day + timedelta(days=day_num) day_list.append(new_day.strftime("20%y/%m/%d")) self.day_list = day_list + # pylint: enable=too-many-arguments def get_date_weather(self, day): """ @@ -112,8 +115,10 @@ class WeatherUnderground: try: if float(entry[1]) < self.MIN_ALLOWED_TEMPERATURE: html_features.remove(entry) + # pylint: disable=broad-except,unused-variable except Exception as err: html_features.remove(entry) + # pylint: enable=broad-except,unused-variable return html_features @staticmethod @@ -141,9 +146,8 @@ class WeatherUnderground: new_date[1] = float(new_date[1]) return new_date - # x is a list of timestamps in 24 hour format - # timestamp, and temperature 2D list @staticmethod + # pylint: disable=too-many-locals def add_missing_temperature(hour_temperature_list): """ Interpolate missing temperatures for hours from the temperatures around them @@ -211,9 +215,11 @@ class WeatherUnderground: # Set prev entry to current entry before looping back around return_list.append(entry) prev_entry = entry + # pylint: enable=too-many-locals return return_list - def sum_of_rain(self, rain_data): + @staticmethod + def sum_of_rain(rain_data): """ Sum a days worth of rain data, ignoring the data if it's less than 0 or not an integer @@ -231,11 +237,14 @@ class WeatherUnderground: try: if float(entry) >= 0: total += float(entry) + # pylint: disable=broad-except,unused-variable except Exception as err: pass + # pylint: enable=broad-except,unused-variable return total - def average_of_feature(self, feature_list): + @staticmethod + def average_of_feature(feature_list): """ Take the average value for a given feature (humidity or wind currently) @@ -255,8 +264,10 @@ class WeatherUnderground: total += float(feature) else: feature_list.remove(feature) + # pylint: disable=broad-except,unused-variable except Exception as err: feature_list.remove(feature) + # pylint: enable=broad-except,unused-variable return total / len(feature_list) def get_weather(self, detail=False): @@ -269,7 +280,6 @@ class WeatherUnderground: and conditions. True for detail weather and False for temperature only """ # pylint: disable=too-many-locals - # List of temperature_list = [] humidity_list = [] wind_list = [] @@ -330,6 +340,7 @@ class WeatherUnderground: dataframe_list, columns=['date', 'temperature'], ) + # pylint: enable=too-many-locals def output_temperature(self, interval='day'): """ -- GitLab From 6f0085f0a87794b605ae9467f0924800e5263508 Mon Sep 17 00:00:00 2001 From: Conrad S Date: Mon, 3 Apr 2017 15:27:24 -0400 Subject: [PATCH 11/11] Bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0b933df..30b75bc 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ reqs = [str(req.req) for req in install_reqs] setup( name='bpeng', - version='0.1', + version='0.2', description='Engineering models and utilites', author='BlocPower', author_email='admin@blocpower.org', -- GitLab