Skip to content

Commit fe71648

Browse files
authored
Implemented get_place_obs and reimplemented query (#94)
* Implemented get_place_obs * Reimplemented query as a function instead of a class. * query now returns a list. Amended docstrings. * Fixed docstring typo
1 parent bca81e3 commit fe71648

File tree

13 files changed

+339
-172
lines changed

13 files changed

+339
-172
lines changed

datacommons/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@
1313
# limitations under the License.
1414

1515
# Data Commons SPARQL query support
16-
from datacommons.query import Query
16+
from datacommons.query import query
1717

1818
# Data Commons Python Client API
1919
from datacommons.core import get_property_labels, get_property_values, get_triples
2020
from datacommons.places import get_places_in
21-
from datacommons.populations import get_populations, get_observations, get_pop_obs
21+
from datacommons.populations import get_populations, get_observations, get_pop_obs, get_place_obs
2222

2323
# Other utilities
2424
from .utils import set_api_key, clean_frame, flatten_frame

datacommons/populations.py

Lines changed: 139 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -338,13 +338,144 @@ def get_pop_obs(dcid):
338338
339339
Each :obj:`Observation` is represented by a :code:`dict` that have the keys:
340340
341-
- :code:`measuredProp`
342-
- :code:`observationDate`
343-
- :code:`observationPeriod` (optional)
344-
- :code:`measurementMethod` (optional)
345-
- one of: :code:`measuredValue`, :code:`meanValue`, :code:`maxValue`,
346-
:code:`minValue`, :code:`medianValue`
347-
341+
- :code:`measuredProp`: The property measured by the :obj:`Observation`.
342+
- :code:`observationDate`: The date when the :obj:`Observation` was made.
343+
- :code:`observationPeriod` (optional): The period over which the
344+
:obj:`Observation` was made.
345+
- :code:`measurementMethod` (optional): A field providing additional
346+
information on how the :obj:`Observation` was collected.
347+
- Additional fields that denote values measured by the :obj:`Observation`.
348+
These may include the following: :code:`measuredValue`, :code:`meanValue`,
349+
:code:`medianValue`, :code:`maxValue`, :code:`minValue`, :code:`sumValue`,
350+
:code:`marginOfError`, :code:`stdError`, :code:`meanStdError`, and others.
348351
"""
349352
url = utils._API_ROOT + utils._API_ENDPOINTS['get_pop_obs'] + '?dcid={}'.format(dcid)
350-
return utils._send_request(url, compress=True, post=False)
353+
return utils._send_request(url, compress=True, post=False)
354+
355+
def get_place_obs(place_type, population_type, constraining_properties={}):
356+
""" Returns all :obj:`StatisticalPopulation`'s and :obj:`Observation`'s for \
357+
all places of the given :code:`place_type`.
358+
359+
Args:
360+
place_type (:obj:`str`): The type of places to query
361+
:obj:`StatisticalPopulation`'s and :obj:`Observation`'s for.
362+
population_type (:obj:`str`): The population type of the
363+
:obj:`StatisticalPopulation`
364+
constraining_properties (:obj:`map` from :obj:`str` to :obj:`str`, optional):
365+
A map from constraining property to the value that the
366+
:obj:`StatisticalPopulation` should be constrained by.
367+
368+
Returns:
369+
Given a :code:`Place` type (i.e. :obj:`State`, :obj:`County`, :obj:`City`),
370+
a :code:`population_type` (i.e. :obj:`Person`), and optionally a set of
371+
constraining properties defining the `obj`:`StatisticalPopulation`, this
372+
function returns *all* :obj:`StatisticalPopulation`'s and
373+
:obj:`Observation`'s for all places of the given type. See examples for more
374+
details on how the format of the return value is structured.
375+
376+
Raises:
377+
ValueError: If the payload returned by the Data Commons REST API is
378+
malformed.
379+
380+
Examples:
381+
We would like to get all :obj:`StatisticalPopulation` and
382+
:obj:`Observations` for all places of type :obj:`City` where the populations
383+
have a population type of :obj:`Person` is specified by the following
384+
constraining properties.
385+
386+
- Persons should have `age <https://browser.datacommons.org/kg?dcid=age>`_
387+
with value `Years5To17 <https://browser.datacommons.org/kg?dcid=Years5To17>`_
388+
- Persons should have `placeOfBirth <https://browser.datacommons.org/kg?dcid=placeOfBirth>`_
389+
with value BornInOtherStateInTheUnitedStates.
390+
391+
>>> props = {
392+
... 'age': 'Years5To17',
393+
... 'placeOfBirth': 'BornInOtherStateInTheUnitedStates'
394+
... }
395+
>>> get_place_obs('City', 'Person', constraining_properties=props)
396+
[
397+
{
398+
'name': 'Marcus Hook borough',
399+
'place': 'geoId/4247344',
400+
'populations': {
401+
'dc/p/pq6frs32sfvk': {
402+
'observations': [
403+
{
404+
'id': 'dc/o/0005qml1el8qh',
405+
'marginOfError': 39,
406+
'measuredProp': 'count',
407+
'measuredValue': 67,
408+
'measurementMethod': 'CensusACS5yrSurvey',
409+
'observationDate': '2014',
410+
'provenanceId': 'dc/3j71hj1',
411+
'type': 'Observation'
412+
},
413+
{
414+
'id': 'dc/o/wvskpk5vyjkhb',
415+
'marginOfError': 33,
416+
'measuredProp': 'count',
417+
'measuredValue': 58,
418+
'measurementMethod': 'CensusACS5yrSurvey',
419+
'observationDate': '2015',
420+
'provenanceId': 'dc/3j71hj1',
421+
'type': 'Observation'
422+
},
423+
{
424+
'id': 'dc/o/3h44trf3vyrm3',
425+
'marginOfError': 36,
426+
'measuredProp': 'count',
427+
'measuredValue': 42,
428+
'measurementMethod': 'CensusACS5yrSurvey',
429+
'observationDate': '2011',
430+
'provenanceId': 'dc/3j71hj1',
431+
'type': 'Observation'
432+
},
433+
# More observations...
434+
],
435+
'provenanceId': 'dc/3j71hj1'
436+
}
437+
}
438+
},
439+
# Entries for more cities...
440+
]
441+
442+
The value returned by :code:`get_place_obs` is a :obj:`list` of
443+
:obj:`dict`'s. Each dictionary corresponds to :obj:`StatisticalPopulation`'s
444+
matching the given :code:`population_type` and
445+
:code:`constraining_properties` for a single place of the given
446+
:code:`place_type`. The dictionary contains the following keys.
447+
448+
- :code:`name`: The name of the place being described.
449+
- :code:`place`: The dcid associated with the place being described.
450+
- :code:`populations`: A :obj:`dict` mapping :code:`StatisticalPopulation`
451+
dcids to a a :obj:`dict` with a list of :code:`observations` and a the
452+
:code:`provenanceId` identifying the source that defined the
453+
:code:`StatisticalPopulation`.
454+
455+
Each :obj:`Observation` is represented by a :obj:`dict` with the following
456+
keys.
457+
458+
- :code:`id`: The :code:`dcid` identifying the :obj:`Observation`.
459+
- :code:`provenanceId`: The dcid identifying the source that defined this
460+
:obj:`Observation`.
461+
- :code:`type`: The type associated with the :obj:`Observation`.
462+
- :code:`measuredProp`: The property measured by the :obj:`Observation`.
463+
- :code:`observationDate`: The date when the :obj:`Observation` was made.
464+
- :code:`observationPeriod` (optional): The period over which the
465+
:obj:`Observation` was made.
466+
- :code:`measurementMethod` (optional): A field identifying how the
467+
:obj:`Observation` was made
468+
- Additional fields that denote values measured by the :obj:`Observation`.
469+
These may include the following: :code:`measuredValue`, :code:`meanValue`,
470+
:code:`medianValue`, :code:`maxValue`, :code:`minValue`, :code:`sumValue`,
471+
:code:`marginOfError`, :code:`stdError`, :code:`meanStdError`, and others.
472+
"""
473+
# Create the json payload and send it to the REST API.
474+
pv = [{'property': k, 'value': v} for k, v in constraining_properties.items()]
475+
url = utils._API_ROOT + utils._API_ENDPOINTS['get_place_obs']
476+
payload = utils._send_request(url, req_json={
477+
'place_type': place_type,
478+
'population_type': population_type,
479+
'pvs': pv,
480+
}, compress=True)
481+
return payload['places']

datacommons/query.py

Lines changed: 86 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
# limitations under the License.
1414
""" Data Commons Python Client API Query Module.
1515
16-
Implements a wrapper object for sending SPARQL queries to the Data Commons
17-
knowledge graph.
16+
Implements functions for sending graph queries to the Data Commons knowledge
17+
graph.
1818
"""
1919

2020
from __future__ import absolute_import
@@ -26,132 +26,97 @@
2626
import os
2727
import requests
2828

29-
# -----------------------------------------------------------------------------
30-
# Query Class
31-
# -----------------------------------------------------------------------------
29+
# ----------------------------- WRAPPER FUNCTIONS -----------------------------
3230

3331

34-
class Query(object):
35-
""" A wrapper object that performs a SPARQL query on the Data Commons graph.
32+
def query(query_string, select=None):
33+
""" Returns the results of executing a SPARQL query on the Data Commons graph.
3634
3735
Args:
38-
**kwargs: Valid keyword arguments include the following. At least one
39-
valid argument must be provided.
40-
41-
- `sparql` (:obj:`str`): The SPARQL query string.
36+
query_string (:obj:`str`): The SPARQL query string.
37+
select (:obj:`func` accepting a row in the query result): A function that
38+
selects rows to be returned by :code:`query`. This function accepts a row
39+
in the results of executing :code:`query_string` and return True if and
40+
only if the row is to be returned by :code:`query`. The row passed in as
41+
an argument is represented as a :obj:`dict` that maps a query variable in
42+
:code:`query_string` to its value in the given row.
43+
44+
Returns:
45+
A table, represented as a :obj:`list` of rows, resulting from executing the
46+
given SPARQL query. Each row is a :obj:`dict` mapping query variable to its
47+
value in the row. If `select` is not `None`, then a row is included in the
48+
returned :obj:`list` if and only if `select` returns :obj:`True` for that
49+
row.
4250
4351
Raises:
44-
ValueError: If an invalid keyword argument is provided.
52+
ValueError: If the payload returned by the Data Commons REST API is
53+
malformed.
4554
46-
Example:
47-
To construct a :obj:`Query` object, do the following.
55+
Examples:
56+
We would like to query for the name associated with three states identified
57+
by their dcids
58+
`California <https://browser.datacommons.org/kg?dcid=geoId/06>`_,
59+
`Kentucky <https://browser.datacommons.org/kg?dcid=geoId/21>`_, and
60+
`Maryland <https://browser.datacommons.org/kg?dcid=geoId/24>`_.
4861
4962
>>> query_str = '''
50-
...SELECT ?name ?dcid
51-
...WHERE {
52-
... ?a typeOf Place .
53-
... ?a name ?name .
54-
... ?a dcid ("geoId/06" "geoId/21" "geoId/24") .
55-
... ?a dcid ?dcid
56-
...}
57-
...'''
58-
>>> query = dc.Query(sparql=query_str)
63+
... SELECT ?name ?dcid
64+
... WHERE {
65+
... ?a typeOf Place .
66+
... ?a name ?name .
67+
... ?a dcid ("geoId/06" "geoId/21" "geoId/24") .
68+
... ?a dcid ?dcid
69+
... }
70+
... '''
71+
>>> result = query(query_str)
72+
>>> for r in result:
73+
... print(r)
74+
{"?name": "Maryland", "?dcid": "geoId/24"}
75+
{"?name": "Kentucky", "?dcid": "geoId/21"}
76+
{"?name": "California", "?dcid": "geoId/06"}
77+
78+
Optionally, we can specify which rows are returned by setting :code:`select`
79+
like so. The following returns all rows where the name is "Maryland".
80+
81+
>>> selector = lambda row: row['?name'] == 'Maryland'
82+
>>> result = query(query_str, select=selector)
83+
>>> for r in result:
84+
... print(r)
85+
{"?name": "Maryland", "?dcid": "geoId/24"}
5986
"""
60-
61-
# Valid query languages
62-
_SPARQL_LANG = 'sparql'
63-
_VALID_LANG = [_SPARQL_LANG]
64-
65-
def __init__(self, **kwargs):
66-
""" Initializes a SPARQL query targeting the Data Commons graph. """
67-
if self._SPARQL_LANG in kwargs:
68-
self._query = kwargs[self._SPARQL_LANG]
69-
self._language = self._SPARQL_LANG
70-
self._result = None
71-
else:
72-
lang_str = ', '.join(self._VALID_LANG)
73-
raise ValueError(
74-
'Must provide one of the following languages: {}'.format(lang_str))
75-
76-
def rows(self, select=None):
77-
""" Returns the result of executing the query as an iterator over all rows.
78-
79-
Args:
80-
select (:obj:`func` accepting a `row` in the query result): A function
81-
that returns true if and only if a row in the query results should be
82-
kept. The argument for this function is a :obj:`dict` from query
83-
variable to its value in a given row.
84-
85-
Yields:
86-
Rows from executing the query where each row is a :obj:`dict` mapping
87-
query variable to its value in the row. If `select` is not `None`, then
88-
the row is returned if and only if `select` returns :obj:`True`.
89-
90-
Example:
91-
The following query asks for names of three states:
92-
`California <https://browser.datacommons.org/kg?dcid=geoId/06>`_,
93-
`Kentucky <https://browser.datacommons.org/kg?dcid=geoId/21>`_, and
94-
`Maryland <https://browser.datacommons.org/kg?dcid=geoId/24>`_.
95-
96-
>>> query_str = '''
97-
... SELECT ?name ?dcid
98-
... WHERE {
99-
... ?a typeOf Place .
100-
... ?a name ?name .
101-
... ?a dcid ("geoId/06" "geoId/21" "geoId/24") .
102-
... ?a dcid ?dcid
103-
... }
104-
... '''
105-
>>> query = dc.Query(sparql=query_str)
106-
>>> for r in query.rows():
107-
... print(r)
108-
{"?name": "Maryland", "?dcid": "geoId/24"}
109-
{"?name": "Kentucky", "?dcid": "geoId/21"}
110-
{"?name": "California", "?dcid": "geoId/06"}
111-
"""
112-
# Execute the query if the results are empty.
113-
if not self._result:
114-
self._execute()
115-
116-
# Iterate through the query results
117-
header = self._result['header']
118-
for row in self._result['rows']:
119-
# Construct the map from query variable to cell value.
120-
row_map = {}
121-
for idx, cell in enumerate(row['cells']):
122-
if idx > len(header):
123-
raise RuntimeError(
124-
'Query error: unexpected cell {}'.format(cell))
125-
if 'value' not in cell:
126-
raise RuntimeError(
127-
'Query error: cell missing value {}'.format(cell))
128-
cell_var = header[idx]
129-
row_map[cell_var] = cell['value']
130-
131-
# Yield the row if it is selected
132-
if select is None or select(row_map):
133-
yield row_map
134-
135-
def _execute(self):
136-
""" Execute the query.
137-
138-
Raises:
139-
RuntimeError: on query failure (see error hint).
140-
"""
141-
# Get the API Key and set the headers
142-
if not os.environ.get(_ENV_VAR_API_KEY, None):
143-
raise ValueError(
144-
'Request error: Must set an API key before using the API!')
145-
headers = {'x-api-key': os.environ[_ENV_VAR_API_KEY]}
146-
147-
# Create the query request.
148-
if self._language == self._SPARQL_LANG:
149-
payload = {'sparql': self._query}
150-
url = _API_ROOT + _API_ENDPOINTS['query']
151-
res = requests.post(url, json=payload, headers=headers)
152-
153-
# Verify then store the results.
154-
res_json = res.json()
155-
if 'message' in res_json:
156-
raise RuntimeError('Query error: {}'.format(res_json['message']))
157-
self._result = res.json()
87+
# Get the API Key and perform the POST request.
88+
if not os.environ.get(_ENV_VAR_API_KEY, None):
89+
raise ValueError(
90+
'Request error: Must set an API key before using the API!')
91+
url = _API_ROOT + _API_ENDPOINTS['query']
92+
res = requests.post(url, json={'sparql': query_string}, headers={
93+
'x-api-key': os.environ[_ENV_VAR_API_KEY]
94+
})
95+
96+
# Verify then store the results.
97+
if res.status_code != 200:
98+
raise ValueError(
99+
'Response error: An HTTP {} code was returned by the mixer. Printing '
100+
'response\n\n{}'.format(res.status_code , res.text))
101+
res_json = res.json()
102+
103+
# Iterate through the query results
104+
header = res_json['header']
105+
result_rows = []
106+
for row in res_json['rows']:
107+
# Construct the map from query variable to cell value.
108+
row_map = {}
109+
for idx, cell in enumerate(row['cells']):
110+
if idx > len(header):
111+
raise ValueError(
112+
'Query error: unexpected cell {}'.format(cell))
113+
if 'value' not in cell:
114+
raise ValueError(
115+
'Query error: cell missing value {}'.format(cell))
116+
cell_var = header[idx]
117+
row_map[cell_var] = cell['value']
118+
119+
# Add the row to the result rows if it is selected
120+
if select is None or select(row_map):
121+
result_rows.append(row_map)
122+
return result_rows

0 commit comments

Comments
 (0)