From 62003bf5747cbed17112ae5ec2d606848de27907 Mon Sep 17 00:00:00 2001 From: Jinal Soni Date: Wed, 25 Sep 2019 17:22:01 -0400 Subject: [PATCH 01/13] everything except insert into final table --- bulk/mapboxMapping.py | 45 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 bulk/mapboxMapping.py diff --git a/bulk/mapboxMapping.py b/bulk/mapboxMapping.py new file mode 100644 index 0000000..a7f14d4 --- /dev/null +++ b/bulk/mapboxMapping.py @@ -0,0 +1,45 @@ +import psycopg2 +import requests +import json + +building_addresses = [] + +connection = psycopg2.connect(database="building", user = "blocpower", \ + password = "8z7NxHg8kBeLwtTDdRe", host = "buildingdev.czgvwxaefxfj.us-east-1.rds.amazonaws.com", \ + port = "5432") + +print('DB connection is Sucsessful \n') + +cursor = connection.cursor() + +query1 = ''' SELECT building_id, house_number, street_name, zipcode FROM address \ + FULL OUTER JOIN building_address ON address.id = building_address.address_id limit 5;''' + +cursor.execute(query1) +rows = cursor.fetchall() + +address_building_id = [] + +for row in rows: + address_list = [] + for i in range(1, 4): + address_list.append(str(row[i])) + address_list[:] = [' '.join(address_list[:])] + address_list.append(row[0]) + address_building_id.append(address_list) + +for i in address_building_id: + URL = "https://api.mapbox.com/geocoding/v5/mapbox.places-permanent/{}.json".format(i[0]) + PARAMS = { + 'access_token': 'pk.eyJ1IjoiYmxvY3Bvd2VyIiwiYSI6ImNqd202bngzazE3c2o0OW4wM2IzbG00Y2cifQ.ZdSoYvbdw4fuIul8PQ3sBQ', + 'limit': 1 + } + response = requests.get(url = URL, params = PARAMS).json() + responseBody = response['features'][0] + placeName = responseBody['place_name'] + i.append(placeName) + + # query2 = ''' INSERT INTO mapbox_building (mapbox_address, building_id) VALUES (%s, %s); ''' + # var = i[2], i[1] + # cursor.execute(query2, var) + -- GitLab From c78f7c8c09aa138481640b8745ed3d5c81175719 Mon Sep 17 00:00:00 2001 From: Jinal Soni Date: Thu, 26 Sep 2019 14:49:49 -0400 Subject: [PATCH 02/13] completed and tested --- bulk/mapboxMapping.py | 83 +++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 39 deletions(-) diff --git a/bulk/mapboxMapping.py b/bulk/mapboxMapping.py index a7f14d4..02876e8 100644 --- a/bulk/mapboxMapping.py +++ b/bulk/mapboxMapping.py @@ -2,44 +2,49 @@ import psycopg2 import requests import json -building_addresses = [] - -connection = psycopg2.connect(database="building", user = "blocpower", \ - password = "8z7NxHg8kBeLwtTDdRe", host = "buildingdev.czgvwxaefxfj.us-east-1.rds.amazonaws.com", \ - port = "5432") - -print('DB connection is Sucsessful \n') - -cursor = connection.cursor() - -query1 = ''' SELECT building_id, house_number, street_name, zipcode FROM address \ - FULL OUTER JOIN building_address ON address.id = building_address.address_id limit 5;''' - -cursor.execute(query1) -rows = cursor.fetchall() - -address_building_id = [] - -for row in rows: - address_list = [] - for i in range(1, 4): - address_list.append(str(row[i])) - address_list[:] = [' '.join(address_list[:])] - address_list.append(row[0]) - address_building_id.append(address_list) +def doThings(db_credentials): -for i in address_building_id: - URL = "https://api.mapbox.com/geocoding/v5/mapbox.places-permanent/{}.json".format(i[0]) - PARAMS = { - 'access_token': 'pk.eyJ1IjoiYmxvY3Bvd2VyIiwiYSI6ImNqd202bngzazE3c2o0OW4wM2IzbG00Y2cifQ.ZdSoYvbdw4fuIul8PQ3sBQ', - 'limit': 1 - } - response = requests.get(url = URL, params = PARAMS).json() - responseBody = response['features'][0] - placeName = responseBody['place_name'] - i.append(placeName) - - # query2 = ''' INSERT INTO mapbox_building (mapbox_address, building_id) VALUES (%s, %s); ''' - # var = i[2], i[1] - # cursor.execute(query2, var) + connection = psycopg2.connect(database=db_credentials[0], user = db_credentials[1], \ + password = db_credentials[2], host = db_credentials[3], port = db_credentials[4]) + print('DB Connection establised successfully!') + cursor = connection.cursor() + + query1 = ''' SELECT building_id, house_number, street_name, zipcode FROM address \ + FULL OUTER JOIN building_address ON address.id = building_address.address_id;''' + cursor.execute(query1) + rows = cursor.fetchall() + address_building_id = [] + for row in rows: + address_list = [] + for i in range(1, 4): + address_list.append(str(row[i])) + address_list[:] = [' '.join(address_list[:])] + address_list.append(row[0]) + address_building_id.append(address_list) + + for i in address_building_id: + URL = "https://api.mapbox.com/geocoding/v5/mapbox.places-permanent/{}.json".format(i[0]) + PARAMS = { + 'access_token': 'pk.eyJ1IjoiYmxvY3Bvd2VyIiwiYSI6ImNqd202bngzazE3c2o0OW4wM2IzbG00Y2cifQ.ZdSoYvbdw4fuIul8PQ3sBQ', + 'limit': 1 + } + response = requests.get(url = URL, params = PARAMS).json() + responseBody = response['features'][0] + placeName = responseBody['place_name'] + i.append(placeName) + query2 = ''' INSERT INTO mapbox_building (mapbox_address, building_id) VALUES ('{}', {}); '''.format(i[2], i[1]) + cursor.execute(query2) + + connection.commit() + cursor.close() + connection.close() + + return 'Success!' + +def main(): + db_credentials = ["building", "blocpower", "8z7NxHg8kBeLwtTDdRe", "buildingdev.czgvwxaefxfj.us-east-1.rds.amazonaws.com", "5432"] + + message = doThings(db_credentials) +if __name__ == "__main__": + main() \ No newline at end of file -- GitLab From 3141c6863b9c884f6ca1575c6056c63e7b725db5 Mon Sep 17 00:00:00 2001 From: Jinal Soni Date: Thu, 26 Sep 2019 15:02:54 -0400 Subject: [PATCH 03/13] Time added --- bulk/mapboxMapping.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bulk/mapboxMapping.py b/bulk/mapboxMapping.py index 02876e8..a857375 100644 --- a/bulk/mapboxMapping.py +++ b/bulk/mapboxMapping.py @@ -1,6 +1,7 @@ import psycopg2 import requests import json +import time def doThings(db_credentials): @@ -43,8 +44,10 @@ def doThings(db_credentials): def main(): db_credentials = ["building", "blocpower", "8z7NxHg8kBeLwtTDdRe", "buildingdev.czgvwxaefxfj.us-east-1.rds.amazonaws.com", "5432"] - + start_time = time.time() message = doThings(db_credentials) + end_time = time.time() + print('Time Taken: ', int(end_time-start_time)) if __name__ == "__main__": main() \ No newline at end of file -- GitLab From 2228dece599f46cb20e7ab786f796eefe499ed93 Mon Sep 17 00:00:00 2001 From: Jinal Soni Date: Thu, 26 Sep 2019 17:03:15 -0400 Subject: [PATCH 04/13] EC2 instance error solved --- bulk/mapboxMapping.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/bulk/mapboxMapping.py b/bulk/mapboxMapping.py index a857375..db4d2fd 100644 --- a/bulk/mapboxMapping.py +++ b/bulk/mapboxMapping.py @@ -11,7 +11,7 @@ def doThings(db_credentials): cursor = connection.cursor() query1 = ''' SELECT building_id, house_number, street_name, zipcode FROM address \ - FULL OUTER JOIN building_address ON address.id = building_address.address_id;''' + FULL OUTER JOIN building_address ON address.id = building_address.address_id limit 5;''' cursor.execute(query1) rows = cursor.fetchall() address_building_id = [] @@ -30,11 +30,19 @@ def doThings(db_credentials): 'limit': 1 } response = requests.get(url = URL, params = PARAMS).json() - responseBody = response['features'][0] - placeName = responseBody['place_name'] - i.append(placeName) - query2 = ''' INSERT INTO mapbox_building (mapbox_address, building_id) VALUES ('{}', {}); '''.format(i[2], i[1]) - cursor.execute(query2) + if 'features' in response: + responseBody = response['features'][0] + placeName = responseBody['place_name'] + i.append(placeName) + if i[1] == None: + i[1] = 0 + if "'" in i[2]: + i[2] = i[2].replace("'", "") + query2 = ''' INSERT INTO mapbox_building (mapbox_address, building_id) VALUES ('{}', {}); '''.format(i[2].encode('utf-8').strip(), i[1]) + print(query2) + cursor.execute(query2) + else: + continue connection.commit() cursor.close() -- GitLab From 95b3883a3ca4e503df65decdc0a35174900496af Mon Sep 17 00:00:00 2001 From: Jinal Soni Date: Mon, 30 Sep 2019 09:00:19 -0400 Subject: [PATCH 05/13] MultiProcessing + Batch --- bulk/mapboxMapping.py | 83 ++++++++++++++++++++++++++++++------------- 1 file changed, 58 insertions(+), 25 deletions(-) diff --git a/bulk/mapboxMapping.py b/bulk/mapboxMapping.py index db4d2fd..5fd6f04 100644 --- a/bulk/mapboxMapping.py +++ b/bulk/mapboxMapping.py @@ -2,8 +2,9 @@ import psycopg2 import requests import json import time +from multiprocessing import Process, Pool -def doThings(db_credentials): +def getData(db_credentials): connection = psycopg2.connect(database=db_credentials[0], user = db_credentials[1], \ password = db_credentials[2], host = db_credentials[3], port = db_credentials[4]) @@ -11,7 +12,7 @@ def doThings(db_credentials): cursor = connection.cursor() query1 = ''' SELECT building_id, house_number, street_name, zipcode FROM address \ - FULL OUTER JOIN building_address ON address.id = building_address.address_id limit 5;''' + FULL OUTER JOIN building_address ON address.id = building_address.address_id;''' cursor.execute(query1) rows = cursor.fetchall() address_building_id = [] @@ -23,39 +24,71 @@ def doThings(db_credentials): address_list.append(row[0]) address_building_id.append(address_list) - for i in address_building_id: - URL = "https://api.mapbox.com/geocoding/v5/mapbox.places-permanent/{}.json".format(i[0]) - PARAMS = { - 'access_token': 'pk.eyJ1IjoiYmxvY3Bvd2VyIiwiYSI6ImNqd202bngzazE3c2o0OW4wM2IzbG00Y2cifQ.ZdSoYvbdw4fuIul8PQ3sBQ', - 'limit': 1 - } - response = requests.get(url = URL, params = PARAMS).json() - if 'features' in response: + connection.commit() + cursor.close() + connection.close() + + return address_building_id + +def mapboxData(address_building_id): + URL = "https://api.mapbox.com/geocoding/v5/mapbox.places-permanent/{}.json".format(address_building_id) + PARAMS = { + 'access_token': 'pk.eyJ1IjoiYmxvY3Bvd2VyIiwiYSI6ImNqd202bngzazE3c2o0OW4wM2IzbG00Y2cifQ.ZdSoYvbdw4fuIul8PQ3sBQ', + 'limit': 1 + } + response = requests.get(url = URL, params = PARAMS).json() + if 'features' in response: + try: responseBody = response['features'][0] placeName = responseBody['place_name'] - i.append(placeName) - if i[1] == None: - i[1] = 0 - if "'" in i[2]: - i[2] = i[2].replace("'", "") - query2 = ''' INSERT INTO mapbox_building (mapbox_address, building_id) VALUES ('{}', {}); '''.format(i[2].encode('utf-8').strip(), i[1]) - print(query2) - cursor.execute(query2) - else: - continue + address_building_id.append(placeName) + address_building_id.pop(0) + print(address_building_id) + except: + pass + +def putData(db_credentials, mapbox_address_list): + + connection = psycopg2.connect(database=db_credentials[0], user = db_credentials[1], \ + password = db_credentials[2], host = db_credentials[3], port = db_credentials[4]) + print('DB Connection establised successfully!') + cursor = connection.cursor() + + for i in mapbox_address_list: + if i[0] == None: + i[0] = 0 + if "'" in i[1]: + i[1] = i[1].replace("'", "") + query2 = ''' INSERT INTO mapbox_building (mapbox_address, building_id) VALUES ('{}', {}); '''.format(i[1], i[0]) + cursor.execute(query2) connection.commit() cursor.close() - connection.close() - - return 'Success!' + connection.close() def main(): db_credentials = ["building", "blocpower", "8z7NxHg8kBeLwtTDdRe", "buildingdev.czgvwxaefxfj.us-east-1.rds.amazonaws.com", "5432"] + address_building_id = getData(db_credentials) start_time = time.time() - message = doThings(db_credentials) + pool = Pool(processes=10) + + pool.map(mapboxData, address_building_id) + + n = 0 + batch = 1000 + num = len(mapbox_address_list) + + while num > 0: + if num >= batch: + putData(db_credentials, mapbox_address_list[(n*batch):((n+1)*batch)]) + else: + putData(db_credentials, mapbox_address_list[(n*batch):]) + num -= batch + n += 1 + print('Batch: ', n) + end_time = time.time() - print('Time Taken: ', int(end_time-start_time)) + print('Total Time Taken: ', int(end_time-start_time)) if __name__ == "__main__": main() \ No newline at end of file -- GitLab From e3357db6b9a074792163fb1216368454f911744d Mon Sep 17 00:00:00 2001 From: Jinal Soni Date: Mon, 30 Sep 2019 16:12:29 -0400 Subject: [PATCH 06/13] output to CSV Files --- bulk/mapboxMapping.py | 77 ++++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 34 deletions(-) diff --git a/bulk/mapboxMapping.py b/bulk/mapboxMapping.py index 5fd6f04..37109cc 100644 --- a/bulk/mapboxMapping.py +++ b/bulk/mapboxMapping.py @@ -3,6 +3,7 @@ import requests import json import time from multiprocessing import Process, Pool +import csv def getData(db_credentials): @@ -12,7 +13,7 @@ def getData(db_credentials): cursor = connection.cursor() query1 = ''' SELECT building_id, house_number, street_name, zipcode FROM address \ - FULL OUTER JOIN building_address ON address.id = building_address.address_id;''' + FULL OUTER JOIN building_address ON address.id = building_address.address_id;''' cursor.execute(query1) rows = cursor.fetchall() address_building_id = [] @@ -23,15 +24,21 @@ def getData(db_credentials): address_list[:] = [' '.join(address_list[:])] address_list.append(row[0]) address_building_id.append(address_list) - + connection.commit() cursor.close() connection.close() + + with open('selectData.csv', 'a') as f: + writer = csv.writer(f) + for i in address_building_id: + if i[1] == None: + i[1] = 0 + row = [i[0], i[1]] + writer.writerow(row) - return address_building_id - -def mapboxData(address_building_id): - URL = "https://api.mapbox.com/geocoding/v5/mapbox.places-permanent/{}.json".format(address_building_id) +def mapboxData(row): + URL = "https://api.mapbox.com/geocoding/v5/mapbox.places-permanent/{}.json".format(row[0]) PARAMS = { 'access_token': 'pk.eyJ1IjoiYmxvY3Bvd2VyIiwiYSI6ImNqd202bngzazE3c2o0OW4wM2IzbG00Y2cifQ.ZdSoYvbdw4fuIul8PQ3sBQ', 'limit': 1 @@ -41,26 +48,27 @@ def mapboxData(address_building_id): try: responseBody = response['features'][0] placeName = responseBody['place_name'] - address_building_id.append(placeName) - address_building_id.pop(0) - print(address_building_id) + with open('mapboxData.csv', 'a') as f: + writer = csv.writer(f) + r = [row[1], placeName] + writer.writerow(r) except: pass -def putData(db_credentials, mapbox_address_list): +def putData(db_credentials): connection = psycopg2.connect(database=db_credentials[0], user = db_credentials[1], \ password = db_credentials[2], host = db_credentials[3], port = db_credentials[4]) print('DB Connection establised successfully!') cursor = connection.cursor() - - for i in mapbox_address_list: - if i[0] == None: - i[0] = 0 - if "'" in i[1]: - i[1] = i[1].replace("'", "") - query2 = ''' INSERT INTO mapbox_building (mapbox_address, building_id) VALUES ('{}', {}); '''.format(i[1], i[0]) - cursor.execute(query2) + + with open('mapboxData.csv', 'r') as f: + rows = csv.reader(f) + for row in rows: + if "'" in row[1]: + row[1] = row[1].replace("'", "") + query2 = ''' INSERT INTO mapbox_building (mapbox_address, building_id) VALUES ('{}', {}); '''.format(row[1], row[0]) + cursor.execute(query2) connection.commit() cursor.close() @@ -68,27 +76,28 @@ def putData(db_credentials, mapbox_address_list): def main(): db_credentials = ["building", "blocpower", "8z7NxHg8kBeLwtTDdRe", "buildingdev.czgvwxaefxfj.us-east-1.rds.amazonaws.com", "5432"] - address_building_id = getData(db_credentials) start_time = time.time() - pool = Pool(processes=10) - pool.map(mapboxData, address_building_id) + start_time1 = time.time() + getData(db_credentials) + end_time1 = time.time() + print('SELECT TIME: ', end_time1-start_time1) - n = 0 - batch = 1000 - num = len(mapbox_address_list) - - while num > 0: - if num >= batch: - putData(db_credentials, mapbox_address_list[(n*batch):((n+1)*batch)]) - else: - putData(db_credentials, mapbox_address_list[(n*batch):]) - num -= batch - n += 1 - print('Batch: ', n) + start_time2 = time.time() + pool = Pool(processes=13) + with open('selectData.csv', 'r') as f: + reader = csv.reader(f) + pool.map(mapboxData, reader) + end_time2 = time.time() + print('MAPBOX TIME: ', end_time2-start_time2) + + start_time3 = time.time() + putData(db_credentials) + end_time3 = time.time() + print('INSER TIME: ', end_time3-start_time3) end_time = time.time() - print('Total Time Taken: ', int(end_time-start_time)) + print('TOTAL TIME: ', end_time-start_time) if __name__ == "__main__": main() \ No newline at end of file -- GitLab From 22f59eb293059df8e78d3f4162c01ce7a41ea13e Mon Sep 17 00:00:00 2001 From: Jinal Soni Date: Wed, 2 Oct 2019 17:35:07 -0400 Subject: [PATCH 07/13] Inner join + sorting --- bulk/mapboxMapping.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bulk/mapboxMapping.py b/bulk/mapboxMapping.py index 37109cc..390cdd5 100644 --- a/bulk/mapboxMapping.py +++ b/bulk/mapboxMapping.py @@ -13,7 +13,7 @@ def getData(db_credentials): cursor = connection.cursor() query1 = ''' SELECT building_id, house_number, street_name, zipcode FROM address \ - FULL OUTER JOIN building_address ON address.id = building_address.address_id;''' + INNER JOIN building_address ON address.id = building_address.address_id ORDER BY building_id ASC;''' cursor.execute(query1) rows = cursor.fetchall() address_building_id = [] @@ -38,7 +38,7 @@ def getData(db_credentials): writer.writerow(row) def mapboxData(row): - URL = "https://api.mapbox.com/geocoding/v5/mapbox.places-permanent/{}.json".format(row[0]) + URL = "https://api.mapbox.com/geocoding/v5/mapbox.places/{}.json".format(row[0]) PARAMS = { 'access_token': 'pk.eyJ1IjoiYmxvY3Bvd2VyIiwiYSI6ImNqd202bngzazE3c2o0OW4wM2IzbG00Y2cifQ.ZdSoYvbdw4fuIul8PQ3sBQ', 'limit': 1 @@ -50,7 +50,7 @@ def mapboxData(row): placeName = responseBody['place_name'] with open('mapboxData.csv', 'a') as f: writer = csv.writer(f) - r = [row[1], placeName] + r = [row[0], row[1], placeName] writer.writerow(r) except: pass @@ -65,9 +65,9 @@ def putData(db_credentials): with open('mapboxData.csv', 'r') as f: rows = csv.reader(f) for row in rows: - if "'" in row[1]: - row[1] = row[1].replace("'", "") - query2 = ''' INSERT INTO mapbox_building (mapbox_address, building_id) VALUES ('{}', {}); '''.format(row[1], row[0]) + if "'" in row[2]: + row[2] = row[2].replace("'", "") + query2 = ''' INSERT INTO mapbox_building (mapbox_address, building_id) VALUES ('{}', {}); '''.format(row[2], row[1]) cursor.execute(query2) connection.commit() -- GitLab From e21c7ea4330713de06ac2a854cd7f384d3f33534 Mon Sep 17 00:00:00 2001 From: Jinal Soni Date: Thu, 3 Oct 2019 12:27:46 -0400 Subject: [PATCH 08/13] Addresses only in New York Region --- bulk/mapboxMapping.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/bulk/mapboxMapping.py b/bulk/mapboxMapping.py index 390cdd5..58935c6 100644 --- a/bulk/mapboxMapping.py +++ b/bulk/mapboxMapping.py @@ -41,7 +41,8 @@ def mapboxData(row): URL = "https://api.mapbox.com/geocoding/v5/mapbox.places/{}.json".format(row[0]) PARAMS = { 'access_token': 'pk.eyJ1IjoiYmxvY3Bvd2VyIiwiYSI6ImNqd202bngzazE3c2o0OW4wM2IzbG00Y2cifQ.ZdSoYvbdw4fuIul8PQ3sBQ', - 'limit': 1 + 'limit': 1, + 'region': 'New York' } response = requests.get(url = URL, params = PARAMS).json() if 'features' in response: @@ -83,11 +84,24 @@ def main(): end_time1 = time.time() print('SELECT TIME: ', end_time1-start_time1) + n = 0 + batch = 1000 + start_time2 = time.time() pool = Pool(processes=13) with open('selectData.csv', 'r') as f: reader = csv.reader(f) - pool.map(mapboxData, reader) + data = list(reader) + num = len(data) + while num > 0: + if num >= batch: + pool.map(mapboxData, data[(n*batch):((n+1)*batch)]) + else: + pool.map(mapboxData, data[(n*batch):]) + num -= batch + print('Batch: ', n) + n += 1 + end_time2 = time.time() print('MAPBOX TIME: ', end_time2-start_time2) -- GitLab From ef49ddfcf2502a34bac910f3f0daee3124906589 Mon Sep 17 00:00:00 2001 From: Jinal Soni Date: Fri, 4 Oct 2019 12:07:21 -0400 Subject: [PATCH 09/13] Testing --- bulk/mapboxMapping.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bulk/mapboxMapping.py b/bulk/mapboxMapping.py index 58935c6..fa4c246 100644 --- a/bulk/mapboxMapping.py +++ b/bulk/mapboxMapping.py @@ -13,7 +13,7 @@ def getData(db_credentials): cursor = connection.cursor() query1 = ''' SELECT building_id, house_number, street_name, zipcode FROM address \ - INNER JOIN building_address ON address.id = building_address.address_id ORDER BY building_id ASC;''' + INNER JOIN building_address ON address.id = building_address.address_id ORDER BY building_id ASC limit 120;''' cursor.execute(query1) rows = cursor.fetchall() address_building_id = [] @@ -68,7 +68,7 @@ def putData(db_credentials): for row in rows: if "'" in row[2]: row[2] = row[2].replace("'", "") - query2 = ''' INSERT INTO mapbox_building (mapbox_address, building_id) VALUES ('{}', {}); '''.format(row[2], row[1]) + query2 = ''' INSERT INTO table_for_testing (mapbox_address, building_id) VALUES ('{}', {}); '''.format(row[2], row[1]) cursor.execute(query2) connection.commit() @@ -85,7 +85,7 @@ def main(): print('SELECT TIME: ', end_time1-start_time1) n = 0 - batch = 1000 + batch = 25 start_time2 = time.time() pool = Pool(processes=13) @@ -94,12 +94,12 @@ def main(): data = list(reader) num = len(data) while num > 0: + print('Batch: ', n) if num >= batch: pool.map(mapboxData, data[(n*batch):((n+1)*batch)]) else: pool.map(mapboxData, data[(n*batch):]) num -= batch - print('Batch: ', n) n += 1 end_time2 = time.time() @@ -108,7 +108,7 @@ def main(): start_time3 = time.time() putData(db_credentials) end_time3 = time.time() - print('INSER TIME: ', end_time3-start_time3) + print('INSERT TIME: ', end_time3-start_time3) end_time = time.time() print('TOTAL TIME: ', end_time-start_time) -- GitLab From 24b082a15f27e52e1d238a1ba91376edd0834d2e Mon Sep 17 00:00:00 2001 From: Jinal Soni Date: Wed, 16 Oct 2019 11:58:23 -0400 Subject: [PATCH 10/13] keys to env --- bulk/mapboxMapping.py | 5 +++-- key.sh | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 key.sh diff --git a/bulk/mapboxMapping.py b/bulk/mapboxMapping.py index fa4c246..ffdda9f 100644 --- a/bulk/mapboxMapping.py +++ b/bulk/mapboxMapping.py @@ -4,6 +4,7 @@ import json import time from multiprocessing import Process, Pool import csv +import os def getData(db_credentials): @@ -40,7 +41,7 @@ def getData(db_credentials): def mapboxData(row): URL = "https://api.mapbox.com/geocoding/v5/mapbox.places/{}.json".format(row[0]) PARAMS = { - 'access_token': 'pk.eyJ1IjoiYmxvY3Bvd2VyIiwiYSI6ImNqd202bngzazE3c2o0OW4wM2IzbG00Y2cifQ.ZdSoYvbdw4fuIul8PQ3sBQ', + 'access_token': os.environ.get('TOKEN'), 'limit': 1, 'region': 'New York' } @@ -76,7 +77,7 @@ def putData(db_credentials): connection.close() def main(): - db_credentials = ["building", "blocpower", "8z7NxHg8kBeLwtTDdRe", "buildingdev.czgvwxaefxfj.us-east-1.rds.amazonaws.com", "5432"] + db_credentials = ["building", "blocpower", os.environ.get('DB_PASS'), "buildingdev.czgvwxaefxfj.us-east-1.rds.amazonaws.com", "5432"] start_time = time.time() start_time1 = time.time() diff --git a/key.sh b/key.sh new file mode 100644 index 0000000..3f52017 --- /dev/null +++ b/key.sh @@ -0,0 +1,2 @@ +TOKEN="pk.eyJ1IjoiYmxvY3Bvd2VyIiwiYSI6ImNqd202bngzazE3c2o0OW4wM2IzbG00Y2cifQ.ZdSoYvbdw4fuIul8PQ3sBQ", +DB_PASS="8z7NxHg8kBeLwtTDdRe" \ No newline at end of file -- GitLab From 27c17da53efe578dad3a5789a4402e2b6bc3e15a Mon Sep 17 00:00:00 2001 From: Jinal Soni Date: Wed, 16 Oct 2019 12:00:20 -0400 Subject: [PATCH 11/13] key.sh --- key.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/key.sh b/key.sh index 3f52017..909802e 100644 --- a/key.sh +++ b/key.sh @@ -1,2 +1,2 @@ -TOKEN="pk.eyJ1IjoiYmxvY3Bvd2VyIiwiYSI6ImNqd202bngzazE3c2o0OW4wM2IzbG00Y2cifQ.ZdSoYvbdw4fuIul8PQ3sBQ", -DB_PASS="8z7NxHg8kBeLwtTDdRe" \ No newline at end of file +export TOKEN="pk.eyJ1IjoiYmxvY3Bvd2VyIiwiYSI6ImNqd202bngzazE3c2o0OW4wM2IzbG00Y2cifQ.ZdSoYvbdw4fuIul8PQ3sBQ", +export DB_PASS="8z7NxHg8kBeLwtTDdRe" \ No newline at end of file -- GitLab From 856df9b7434ca2314506130255ef6039b0648f9e Mon Sep 17 00:00:00 2001 From: Jinal Soni Date: Thu, 24 Oct 2019 15:32:18 -0400 Subject: [PATCH 12/13] Removed credentials --- .gitignore | 2 ++ bulk/mapboxMapping.py | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 60ed49f..106ac23 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,5 @@ config.json # VSCode .vscode + +key.sh \ No newline at end of file diff --git a/bulk/mapboxMapping.py b/bulk/mapboxMapping.py index ffdda9f..d3832f7 100644 --- a/bulk/mapboxMapping.py +++ b/bulk/mapboxMapping.py @@ -77,8 +77,7 @@ def putData(db_credentials): connection.close() def main(): - db_credentials = ["building", "blocpower", os.environ.get('DB_PASS'), "buildingdev.czgvwxaefxfj.us-east-1.rds.amazonaws.com", "5432"] - start_time = time.time() + db_credentials = [os.environ.get('DATABASE'), os.environ.get('USER'), os.environ.get('DB_PASS'), os.environ.get('HOST'), os.environ.get('PORT')] start_time1 = time.time() getData(db_credentials) -- GitLab From 427a72bd5848204b41f24e83df6bf047a4ecc61f Mon Sep 17 00:00:00 2001 From: Jinal Soni <22827279+Jinal-7@users.noreply.github.com> Date: Thu, 24 Oct 2019 15:34:40 -0400 Subject: [PATCH 13/13] Delete key.sh --- key.sh | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 key.sh diff --git a/key.sh b/key.sh deleted file mode 100644 index 909802e..0000000 --- a/key.sh +++ /dev/null @@ -1,2 +0,0 @@ -export TOKEN="pk.eyJ1IjoiYmxvY3Bvd2VyIiwiYSI6ImNqd202bngzazE3c2o0OW4wM2IzbG00Y2cifQ.ZdSoYvbdw4fuIul8PQ3sBQ", -export DB_PASS="8z7NxHg8kBeLwtTDdRe" \ No newline at end of file -- GitLab