Source code for geograpy.wikidata

Created on 2020-09-23

@author: wf
import time
from lodstorage.sparql import SPARQL
[docs]class Wikidata(object): ''' Wikidata access ''' def __init__(self, endpoint=''): ''' Constructor ''' self.endpoint=endpoint
[docs] def getCityPopulations(self, profile=True): ''' get the city populations from Wikidata Args: profile(bool): if True show profiling information ''' queryString=""" # get a list of human settlements having a geoName identifier # to add to geograpy3 library # see PREFIX rdfs: <> PREFIX wdt: <> PREFIX wd: <> SELECT ?city ?cityLabel ?cityPop ?geoNameId ?country ?countryLabel ?countryIsoCode ?countryPopulation WHERE { # geoName Identifier ?city wdt:P1566 ?geoNameId. # instance of human settlement ?city wdt:P31/wdt:P279* wd:Q486972 . # population of city OPTIONAL { ?city wdt:P1082 ?cityPop.} # label of the City ?city rdfs:label ?cityLabel filter (lang(?cityLabel) = "en"). # country this city belongs to ?city wdt:P17 ?country . # label for the country ?country rdfs:label ?countryLabel filter (lang(?countryLabel) = "en"). # ISO 3166-1 alpha-2 code ?country wdt:P297 ?countryIsoCode. # population of country ?country wdt:P1082 ?countryPopulation. OPTIONAL { ?country wdt:P2132 ?countryGdpPerCapita. } }""" if profile: print("getting cities with population and geoNamesId from wikidata endpoint %s" %self.endpoint) starttime=time.time() wd=SPARQL(self.endpoint) results=wd.query(queryString) cityList=wd.asListOfDicts(results) if profile: print("Found %d cities in %5.1f s" % (len(cityList),time.time()-starttime)) return cityList
[docs] def getCities(self,region=None, country=None): ''' get the cities from Wikidata ''' if region is not None: values="VALUES ?region { wd:%s }" % region if country is not None: values="VALUES ?country { wd:%s}" % country queryString="""# get a list of cities for the given region # for geograpy3 library # see PREFIX rdfs: <> PREFIX wdt: <> PREFIX wd: <> SELECT DISTINCT ?city ?cityLabel ?geoNameId ?cityPop ?cityCoord ?region ?regionLabel ?regionIsoCode ?country ?countryLabel ?countryIsoCode ?countryPopulation ?countryGdpPerCapita WHERE { # administrative unit of first order # example DE-NW Q1198 %s #?region wdt:P31/wdt:P279* wd:Q10864048. ?region rdfs:label ?regionLabel filter (lang(?regionLabel) = "en"). # isocode state/province OPTIONAL { ?region wdt:P300 ?regionIsoCode. } # country this region belongs to ?region wdt:P17 ?country . # label for the country ?country rdfs:label ?countryLabel filter (lang(?countryLabel) = "en"). # ISO 3166-1 alpha-2 code ?country wdt:P297 ?countryIsoCode. # population of country ?country wdt:P1082 ?countryPopulation. OPTIONAL { ?country wdt:P2132 ?countryGdpPerCapita. } # located in administrative territory # ?city wdt:P131* ?region. # label of the City ?city rdfs:label ?cityLabel filter (lang(?cityLabel) = "en"). # instance of human settlement ?city wdt:P31/wdt:P279* wd:Q486972 . # geoName Identifier ?city wdt:P1566 ?geoNameId. # population of city OPTIONAL { ?city wdt:P1082 ?cityPop.} # get the coordinates OPTIONAL { select (max(?coord) as ?cityCoord) where { ?city wdt:P625 ?coord. } } } ORDER BY ?cityLabel""" % values wd=SPARQL(self.endpoint) results=wd.query(queryString) cityList=wd.asListOfDicts(results) return cityList
[docs] def getCountries(self): ''' get a list of countries `try query <>`_ ''' queryString="""# get a list of countries # for geograpy3 library # see PREFIX rdfs: <> PREFIX wd: <> PREFIX wdt: <> PREFIX p: <> PREFIX ps: <> PREFIX pq: <> # get City details with Country SELECT DISTINCT ?country ?countryLabel ?countryIsoCode ?countryPopulation ?countryGDP_perCapita ?coord WHERE { # instance of City Country ?country wdt:P31/wdt:P279* wd:Q3624078 . # label for the country ?country rdfs:label ?countryLabel filter (lang(?countryLabel) = "en"). # get the coordinates ?country wdt:P625 ?coord. # ISO 3166-1 alpha-2 code ?country wdt:P297 ?countryIsoCode. # population of country ?country wdt:P1082 ?countryPopulation. # # nonminal GDP per capita ?country wdt:P2132 ?countryGDP_perCapita. }""" wd=SPARQL(self.endpoint) results=wd.query(queryString) self.countryList=wd.asListOfDicts(results)
[docs] def getRegions(self): ''' get Regions from Wikidata `try query <>`_ ''' queryString="""# get a list of regions # for geograpy3 library # see PREFIX rdfs: <> PREFIX wd: <> PREFIX wdt: <> PREFIX wikibase: <> SELECT DISTINCT ?country ?countryLabel ?countryIsoCode ?region (max(?regionAlpha2) as ?regionIsoCode) ?regionLabel (max(?population) as ?regionPopulation) ?location WHERE { # administrative unit of first order ?region wdt:P31/wdt:P279* wd:Q10864048. OPTIONAL { ?region rdfs:label ?regionLabel filter (lang(?regionLabel) = "en"). } # filter historic regions # FILTER NOT EXISTS {?region wdt:P576 ?end} # get the population # OPTIONAL { ?region wdt:P1082 ?population. } # # OPTIONAL { ?region wdt:P17 ?country. # label for the country ?country rdfs:label ?countryLabel filter (lang(?countryLabel) = "en"). ?country wdt:P297 ?countryIsoCode. } # isocode state/province ?region wdt:P300 ?regionAlpha2. # OPTIONAL { ?region wdt:P625 ?location. } } GROUP BY ?country ?countryLabel ?countryIsoCode ?region ?regionIsoCode ?regionLabel ?location ORDER BY ?regionIsoCode""" wd=SPARQL(self.endpoint) results=wd.query(queryString) self.regionList=wd.asListOfDicts(results)