Source code for lovelyrita.geocode

import re
import requests
import datetime
from urllib.parse import urlencode
import numpy as np
import psycopg2
import pandas as pd
from lovelyrita import config


GOOGLE_API_URL = config.GOOGLE_API_URL
GOOGLE_API_KEY = config.GOOGLE_API_KEY

POSTGIS_HOST = config.POSTGIS_HOST
POSTGIS_PORT = config.POSTGIS_PORT
POSTGIS_USERNAME = config.POSTGIS_USERNAME
POSTGIS_PASSWORD = config.POSTGIS_PASSWORD
POSTGIS_DATABASE = config.POSTGIS_DATABASE


[docs]class Geocoder(object): def __init__(self, geocodes=None, api_url=GOOGLE_API_URL, api_key=GOOGLE_API_KEY): if geocodes is None: geocodes = pd.DataFrame(columns=('lat', 'lng', 'place_id', 'timestamp')) geocodes.index.name = 'address' self.geocodes = geocodes self.api_url = GOOGLE_API_URL self.api_key = GOOGLE_API_KEY
[docs] def geocode(self, address): """ Pull data from Google Maps API Parameters ---------- address : str """ # check if query has already been run try: g = self.geocodes.loc[address] return g['lat'], g['lng'], g['place_id'] except KeyError: pass query = {'address': address, 'key': self.api_key} url = self.api_url + 'json?' + urlencode(query) response = requests.get(url) if response.status_code == 404: raise Exception("404 error for {}".format(url)) content = response.json() if content['status'] != 'OK': raise Exception("Status not OK for {}".format(url)) place_id = content['results'][0]['place_id'] lat = content['results'][0]['geometry']['location']['lat'] lng = content['results'][0]['geometry']['location']['lng'] timestamp = str(datetime.datetime.now()) new_geocode = pd.Series({'place_id': place_id, 'lat': lat, 'lng': lng, 'timestamp': timestamp}, name=address) self.geocodes = self.geocodes.append(new_geocode)
return lat, lng, place_id
[docs]class PostGISGeocoder(object): def __init__(self, host=POSTGIS_HOST, port=POSTGIS_PORT, database=POSTGIS_DATABASE, user=POSTGIS_USERNAME, password=POSTGIS_PASSWORD): """A PostGIS geocoder """ connection = psycopg2.connect(host=host, database=database, user=user, port=port, password=password) self.connection = connection self.cursor = connection.cursor()
[docs] def geocode(self, address): """Get the latitude and longitude of an address Parameters ---------- address : str Returns ------- A dictionary containing keys latitude, longitude, street_number, street_name, street_type, and rating (a numeric value indicating how uncertain the geocoding is) """ patt = "[" + re.escape("()\'+") + "]" address = re.sub(patt, '', address) columns = ['rating', 'longitude', 'latitude', 'street_number', 'street_name', 'street_suffix'] query = ("""SELECT g.rating, ST_X(g.geomout) As lon, ST_Y(g.geomout) As lat, """ """(addy).address As stno, (addy).streetname As street, """ """(addy).streettypeabbrev As styp, """ """(addy).location As city, """ """(addy).stateabbrev As st,(addy).zip """ """FROM geocode(%s, 1) As g;""") self.cursor.execute(query, (address, )) response = self.cursor.fetchone() if response: result = {k: v for k, v in zip(columns, response)} else: result = [None, ] * len(columns)
return result
[docs]def geocode_citations(citations, rating=10, geocoder=None): """Geocode a DataFrame of citations Parameters: ----------- citations : pandas.DataFrame rating : int Returns: -------- A DataFrame with geocoded latitude and longitude """ if 'latitude' not in citations.columns: citations['latitude'] = np.nan if 'longitude' not in citations.columns: citations['longitude'] = np.nan if geocoder is None: geocoder = PostGISGeocoder() results = [] indices = [] try: from progressbar import progressbar iterator = progressbar(citations.iterrows(), max_value=len(citations)) except ImportError: iterator = citations.iterrows() for index, row in iterator: res = geocoder.geocode(row['street'] + ', oakland, ca') if res['rating'] < rating: indices.append(index) results.append(res) results = pd.DataFrame(results, index=indices)
citations.update(results)