Feineigle.com - World Geo-Graph-y

Home · Projects · 2016 · World Geo-Graph-Y

Published: October 9, 2016 (7 years 6 months ago.)
Tags:  Neo4j · Python · Software


Using data extracted from this wiki page (mirror), an initial multitude of grey dots are added, one for each country. No connections are present. The more colorful part of the graph is during the development of CONected, a global oligarchy mapper.

Next connections are added. If it looks too connected, it is. There are several entries for things like the European Union and France and England had connections all over the place. The Antarctic connections are also confusing.

After going through the initial data, editing out the suspect connections, and then teasing a few key nodes into place (Russia, China, Egypt, and India), you end up with a distorted, though recognizable, map. Here is the edited data used to make this graph.

No interface, but it gets the job done.

#!/usr/bin/env python
#Author: Mark Feineigle
#Create Date: 2016/10/09
'''Rip wiki data about country borders, dump to neo4j'''
import BeautifulSoup as bs
from collections import OrderedDict
import py2neo

def makeSoup(path):
  html = open(path).read()
  return bs.BeautifulSoup(html)

def makeDict(soup):
  d = OrderedDict()
  res = soup.findAll("tr")
  element = 1
  while element < 287:
    # get the country
    keys = [link.text for link in res[element].findAll("td")[0].findAll("a") 
	    if not link.text.startswith("[") and len(link.text) > 0]
    # get its borders
    vals = [link.text for link in res[element].findAll("td")[4].findAll("a")
	    if not link.text.startswith("[")]

    element+=1
    d[keys[0]] = vals
  return d

def dict2Txt(data, path):
  with open(path, "w") as fi:
    for key,values in data.iteritems():
      values = [i for i in values if len(i) > 0] #remove empty elements
      fi.write(key.encode("utf-8").strip()+";")
      fi.write(",".join(values).encode("utf-8")+"\n")

def txt2Dict(path):
  d = OrderedDict()
  with open(path, "r") as fi:
    for line in fi.readlines():
      key = line.strip().split(";")[0]
      values = line.strip().split(";")[1].split(",")
      d[key] = values
  return d

def dict2Graph(data):
  py2neo.authenticate("localhost:7474", "neo4juser", "neo4jpassword")
  graph = py2neo.Graph()
  count = 0
  for key,values in data.iteritems():
    count+=1
    cmd = "MERGE (n:t_Country {name:{key}, t:True})"
    graph.run(cmd, key=key)
    for value in values:
      if len(value) > 0:
	cmd = "MERGE (n:t_Country {name:{value}, t:True})"
	graph.run(cmd, value=value)

	cmd = '''MATCH (key:t_Country {name:{key}}),
		       (val:t_Country {name:{value}})
		 CREATE UNIQUE (key)-[:BORDERS]-(val)'''
	graph.run(cmd, key=key, value=value)
  print count

if __name__ == '__main__':
  htmlpath = "List of countries and territories by land borders - Wikipedia, the free encyclopedia.html"
  #txtpath = "countryBorders.txt"
  txtpath = "countryBordersEDITED.txt"

  def readData():
    soup = makeSoup(htmlpath)
    d = makeDict(soup)
    dict2Txt(d, txtpath)
  def writeGraph():
    d = txt2Dict(txtpath)
    dict2Graph(d)

  # comment out the appropriate lines to only read or write
  #readData()
  #manually edit the text here
  writeGraph()