diff --git a/bpeng/weather/weather.py b/bpeng/weather/weather.py index d2f0f7486062d4976b045837cca945b0636e08a1..238a1e0c8f49fda2ea6271ffed5c9b6113a50699 100644 --- a/bpeng/weather/weather.py +++ b/bpeng/weather/weather.py @@ -1,6 +1,8 @@ """ Weather scraper for wunderground.com """ -from datetime import date, timedelta +from flask import current_app +from datetime import date, datetime, timedelta from urllib.request import urlopen +import dateutil.parser import json import numpy as np @@ -45,6 +47,7 @@ class WeatherUnderground: # WUNDER_URL = 'https://www.wunderground.com/history/airport/{}/{}/DailyHistory.html?format=1' # url above is deprecated WUNDER_JSON = 'http://api.wunderground.com/api/{}/history_{}/q/{}/{}.json' + WUNDER_JSON_FUTURE = 'http://api.wunderground.com/api/{}/hourly10day/q/{}/{}.json' MIN_ALLOWED_TEMPERATURE = -100 # The minimum allowed value for temperature temperature = None humidity = None @@ -105,18 +108,54 @@ class WeatherUnderground: Raises: KeyError: API Key does not exist or exceeds limit. """ - url = self.WUNDER_JSON.format(self.api_key, day.replace('/', ''), - self.state, self.city) - data = json.loads(urlopen(url).read().decode()) - try: - data = data['history']['observations'] - except Exception as _: - raise KeyError('Key does not exist or exceeds limit.') - features = pd.DataFrame(data)[['date', 'tempi', 'hum', 'precipi', 'wspdi']].dropna() - features['hour'] = features['date'].apply(lambda x: x['hour']) + + current_app.logger.info('Getting weather for {}'.format(day)) + date = dateutil.parser.parse(day) + + # Handle future date entirely with hourly forecast + if date.date() > datetime.today().date(): + return self.get_future_date_weather(day) + else: + url = self.WUNDER_JSON.format(self.api_key, day.replace('/', ''), + self.state, self.city) + data = json.loads(urlopen(url).read().decode()) + try: + data = data['history']['observations'] + except Exception as _: + raise KeyError('Key does not exist or exceeds limit.') + features = pd.DataFrame(data)[['date', 'tempi', 'hum', 'precipi', 'wspdi']].dropna() + features['hour'] = features['date'].apply(lambda x: x['hour']) + features = features[features['tempi'].astype(float) >= self.MIN_ALLOWED_TEMPERATURE] + # If it's the same day, we also have to add future data + if date.date() == datetime.today().date(): + new_features = self.get_future_date_weather(day) + features = features.append(new_features) + return features + + def get_future_date_weather(self, day): + url = self.WUNDER_JSON_FUTURE.format(self.api_key, self.state, self.city) + response = json.loads(urlopen(url).read().decode()) + data = [] + for row in response['hourly_forecast']: + row_date = '{}/{}/{}'.format( + row['FCTTIME']['year'], + row['FCTTIME']['mon_padded'], + row['FCTTIME']['mday_padded'], + ) + if row_date == day: + data.append(row) + if not len(data): + raise ValueError('No hourly data for the specified future data. Future data only available 10 days ahead') + features = pd.DataFrame(data)[['FCTTIME', 'temp', 'humidity', 'qpf', 'wspd']].dropna() + features['hour'] = features['FCTTIME'].apply(lambda x: x['hour_padded']) + features['tempi'] = features['temp'].apply(lambda x: x['english']) + features['wspdi'] = features['wspd'].apply(lambda x: x['english']) + features['precipi'] = features['qpf'].apply(lambda x: x['english']) + features['hum'] = features['humidity'] features = features[features['tempi'].astype(float) >= self.MIN_ALLOWED_TEMPERATURE] return features + @staticmethod def add_missing_temperature(hour_temperature_list): """ @@ -173,17 +212,13 @@ class WeatherUnderground: """ self.api_key = api_key for day in self.day_list: - try: - # All of the weather data for that day - day_weather = self.get_date_weather(day) - hour_temp = day_weather[['tempi', 'hour']] - self.temperature.append(self.add_missing_temperature(hour_temp)) - self.humidity.append(self.average_of_feature(day_weather['hum'])) - self.wind.append(self.average_of_feature(day_weather['wspdi'])) - self.rain.append(self.sum_of_rain(day_weather['precipi'])) - - except Exception as err: - raise ValueError('No temperature data for {}, {}'.format(day, str(err))) + # All of the weather data for that day + day_weather = self.get_date_weather(day) + hour_temp = day_weather[['tempi', 'hour']] + self.temperature.append(self.add_missing_temperature(hour_temp)) + self.humidity.append(self.average_of_feature(day_weather['hum'])) + self.wind.append(self.average_of_feature(day_weather['wspdi'])) + self.rain.append(self.sum_of_rain(day_weather['precipi'])) # Detailed dataframe with hourly temperature data dataframe_list = []