import geocoder
import csv
import sys
import numpy as np
from collections import Counter
import re
import time
#file paths
filepath1 = "diocese_correct.csv"
filepath2 = "dachau_initial_file.csv"
#list to store diocese names
diocese = []
with open(filepath1, 'rU') as f1:
#create csv reader object
reader1 = csv.reader(f1)
for row in reader1:
diocese.append(row[1])
#zero'd numpy array to contain index, latitude, longitude
coords = np.zeros((len(diocese), 3))
for i in range(0, len(diocese)):
#if the string field for the diocese is not empty (e.g., contains letters), then find latitude and longitude using geocoder
if re.search('[a-zA-Z]', diocese[i]):
g = geocoder.geonames(diocese[i], username='benjaminlee')
#g is returned in JSON form, so easy to query for latitude (lat) and longitude (lng)
coords[i][0] = i
coords[i][1] = g.lat
coords[i][2] = g.lng
#opens full Dachau file for reading and appending lat, lng
with open(filepath2, 'rU') as f2:
reader2 = csv.reader(f2)
#opens output CSV file for writing
with open('dachau_with_geolocations.csv', 'w') as outfile:
writer = csv.writer(outfile)
i = 0
#iterate over the rows of the bio CSV
for row in reader2:
#if there is a coordinate
if coords[i][1] != 0:
# row.insert(0, str(coords[i][1]) + ',' + str(coords[i][2]))
row.insert(0, coords[i][1])
row.insert(1, coords[i][2])
writer.writerow(row)
else:
row.insert(0, '')
row.insert(1, '')
writer.writerow(row)
i= i + 1
if i == 2679:
break
sys.exit()