Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion datacommons/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# Data Commons Python Client API
from datacommons.core import get_property_labels, get_property_values, get_triples
from datacommons.places import get_places_in
from datacommons.populations import get_populations, get_observations
from datacommons.populations import get_populations, get_observations, get_pop_obs

# Other utilities
from .utils import set_api_key, clean_frame, flatten_frame
6 changes: 6 additions & 0 deletions datacommons/examples/populations.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import datacommons as dc
import pandas as pd
import pprint

import datacommons.utils as utils

Expand Down Expand Up @@ -84,5 +85,10 @@ def main():
print(pd_frame)


# Get all population and observation data of Mountain View.
utils._print_header('Get Mountain View population and observation')
popobs = dc.get_pop_obs("geoId/0649670")
pprint.pprint(popobs)

if __name__ == '__main__':
main()
3 changes: 2 additions & 1 deletion datacommons/places.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@


def get_places_in(dcids, place_type):
""" Returns :obj:`Place`'s contained in :code:`dcids` of type `place_type`.
""" Returns :obj:`Place`s contained in :code:`dcids` of type
:code:`place_type`.

Args:
dcids (Union[:obj:`list` of :obj:`str`, :obj:`pandas.Series`]): Dcids to get
Expand Down
113 changes: 113 additions & 0 deletions datacommons/populations.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,3 +235,116 @@ def get_observations(dcids,
except ValueError:
typed_results[k] = v
return typed_results


def get_pop_obs(dcid):
""" Returns all :obj:`StatisticalPopulation` and :obj:`Observation` \
of a :obj:`Thing`.

Args:
dcid (:obj:`str`): Dcid of the thing.

Returns:
A :obj:`dict` of :obj:`StatisticalPopulation` and :obj:`Observation` that
are associated to the thing identified by the given :code:`dcid`. The given
dcid is linked to the returned :obj:`StatisticalPopulation`,
which are the :obj:`observedNode` of the returned :obj:`Observation`.
See example below for more detail about how the returned :obj:`dict` is
structured.

Raises:
ValueError: If the payload returned by the Data Commons REST API is
malformed.

Examples:
We would like to get all :obj:`StatisticalPopulation` and
:obj:`Observations` of
`Santa Clara <https://browser.datacommons.org/kg?dcid=geoId/06085>`_.

>>> get_pop_obs("geoId/06085")
{
'name': 'Santa Clara',
'placeType': 'County',
'populations': {
'dc/p/zzlmxxtp1el87': {
'popType': 'Household',
'numConstraints': 3,
'propertyValues': {
'householderAge': 'Years45To64',
'householderRace': 'USC_AsianAlone',
'income': 'USDollar35000To39999'
},
'observations': [
{
'marginOfError': 274,
'measuredProp': 'count',
'measuredValue': 1352,
'measurementMethod': 'CensusACS5yrSurvey',
'observationDate': '2017'
},
{
'marginOfError': 226,
'measuredProp': 'count',
'measuredValue': 1388,
'measurementMethod': 'CensusACS5yrSurvey',
'observationDate': '2013'
}
],
},
},
'observations': [
{
'meanValue': 4.1583,
'measuredProp': 'particulateMatter25',
'measurementMethod': 'CDCHealthTracking',
'observationDate': '2014-04-04',
'observedNode': 'geoId/06085'
},
{
'meanValue': 9.4461,
'measuredProp': 'particulateMatter25',
'measurementMethod': 'CDCHealthTracking',
'observationDate': '2014-03-20',
'observedNode': 'geoId/06085'
}
]
}

Notice that the return value is a multi-level :obj:`dict`. The top level
contains the following keys.

- :code:`name` and :code:`placeType` provides the name and type of the
:obj:`Place` identified by the given :code:`dcid`.
- :code:`populations` maps to a :obj:`dict` containing all
:obj:`StatisticalPopulation` that have the given :code:`dcid` as its
:obj:`location`.
- :code:`observations` maps to a :obj:`list` containing all
:obj:`Observation` that have the given :code:`dcid` as its
:obj:`observedNode`.

The :code:`populations` dictionary is keyed by the dcid of each
:obj:`StatisticalPopulation`. The mapped dictionary contains the following
keys.

- :code:`popType` which gives the population type of the
:obj:`StatisticalPopulation` identified by the key.
- :code:`numConstraints` which gives the number of constraining properties
defined for the identified :obj:`StatisticalPopulation`.
- :code:`propertyValues` which gives a :obj:`dict` mapping a constraining
property to its value for the identified :obj:`StatisticalPopulation`.
- :code:`observations` which gives a list of all :obj:`Observation`'s that
have the identified :obj:`StatisticalPopulation` as their
:obj:`observedNode`.

Each :obj:`Observation` is represented by a :code:`dict` that have the keys:

- :code:`measuredProp`
- :code:`observationDate`
- :code:`observationPeriod` (optional)
- :code:`measurementMethod` (optional)
- one of: :code:`measuredValue`, :code:`meanValue`, :code:`maxValue`,
:code:`minValue`, :code:`medianValue`

"""
url = utils._API_ROOT + utils._API_ENDPOINTS['get_pop_obs'] + '?dcid={}'.format(dcid)
return utils._send_request(url, compress=True, post=False)
1 change: 0 additions & 1 deletion datacommons/test/places_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ def json(self):
# Otherwise, return an empty response and a 404.
return MockResponse({}, 404)


class TestGetPlacesIn(unittest.TestCase):
""" Unit stests for get_places_in. """

Expand Down
105 changes: 105 additions & 0 deletions datacommons/test/populations_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from __future__ import division
from __future__ import print_function

import base64
from pandas.util.testing import assert_series_equal
from unittest import mock

Expand All @@ -30,6 +31,7 @@

import json
import unittest
import zlib


def post_request_mock(*args, **kwargs):
Expand Down Expand Up @@ -134,6 +136,64 @@ def json(self):
return MockResponse({}, 404)


def get_request_mock(*args, **kwargs):
""" A mock GET requests sent in the requests package. """
# Create the mock response object.
class MockResponse:
def __init__(self, json_data, status_code):
self.json_data = json_data
self.status_code = status_code

def json(self):
return self.json_data

headers = kwargs['headers']

# If the API key does not match, then return 403 Forbidden
if 'x-api-key' not in headers or headers['x-api-key'] != 'TEST-API-KEY':
return MockResponse({}, 403)

# Mock responses for get requests to get_pop_obs.
if args[0] == utils._API_ROOT + utils._API_ENDPOINTS['get_pop_obs'] + '?dcid=geoId/06085':
# Response returned when querying for a city in the graph.
res_json = json.dumps({
'name': 'Mountain View',
'placeType': 'City',
'populations': {
'dc/p/013ldrstf6lnf': {
'numConstraints': 6,
'observations': [
{
'marginOfError': 119,
'measuredProp': 'count',
'measuredValue': 225,
'measurementMethod': 'CensusACS5yrSurvey',
'observationDate': '2014'
}, {
'marginOfError': 108,
'measuredProp': 'count',
'measuredValue': 180,
'measurementMethod': 'CensusACS5yrSurvey',
'observationDate': '2012'
}
],
'popType': 'Person',
'propertyValues': {
'age': 'Years16Onwards',
'gender': 'Male',
'income': 'USDollar30000To34999',
'incomeStatus': 'WithIncome',
'race': 'USC_HispanicOrLatinoRace',
'workExperience': 'USC_NotWorkedFullTime'
}
}
}
})
return MockResponse({'payload': base64.b64encode(zlib.compress(res_json.encode('utf-8')))}, 200)

# Otherwise, return an empty response and a 404.
return MockResponse({}, 404)

class TestGetPopulations(unittest.TestCase):
""" Unit tests for get_populations. """

Expand Down Expand Up @@ -355,6 +415,51 @@ def test_series_no_dcids(self, post_mock):
measurement_method='BLSSeasonallyAdjusted')
assert_series_equal(actual, expected)

class TestGetPopObs(unittest.TestCase):
""" Unit stests for get_pop_Obs. """

@mock.patch('requests.get', side_effect=get_request_mock)
def test_valid_dcid(self, get_mock):
""" Calling get_pop_obs with valid dcid returns valid results. """
# Set the API key
dc.set_api_key('TEST-API-KEY')

# Call get_places_in
popobs = dc.get_pop_obs('geoId/06085')
self.assertDictEqual(popobs, {
'name': 'Mountain View',
'placeType': 'City',
'populations': {
'dc/p/013ldrstf6lnf': {
'numConstraints': 6,
'observations': [
{
'marginOfError': 119,
'measuredProp': 'count',
'measuredValue': 225,
'measurementMethod': 'CensusACS5yrSurvey',
'observationDate': '2014'
}, {
'marginOfError': 108,
'measuredProp': 'count',
'measuredValue': 180,
'measurementMethod': 'CensusACS5yrSurvey',
'observationDate': '2012'
}
],
'popType': 'Person',
'propertyValues': {
'age': 'Years16Onwards',
'gender': 'Male',
'income': 'USDollar30000To34999',
'incomeStatus': 'WithIncome',
'race': 'USC_HispanicOrLatinoRace',
'workExperience': 'USC_NotWorkedFullTime'
}
}
}
})


if __name__ == '__main__':
unittest.main()
13 changes: 8 additions & 5 deletions datacommons/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from __future__ import print_function

from collections import defaultdict

import pandas as pd

import base64
Expand All @@ -45,7 +44,8 @@
'get_triples': '/node/triples',
'get_places_in': '/node/places-in',
'get_populations': '/node/populations',
'get_observations': '/node/observations'
'get_observations': '/node/observations',
'get_pop_obs': '/bulk/pop-obs'
}

# The default value to limit to
Expand Down Expand Up @@ -138,7 +138,7 @@ def clean_frame(pd_frame):
# ------------------------- INTERNAL HELPER FUNCTIONS -------------------------


def _send_request(req_url, req_json={}, compress=False):
def _send_request(req_url, req_json={}, compress=False, post=True):
""" Sends a POST request to the given req_url with the given req_json.

Returns:
Expand All @@ -154,7 +154,10 @@ def _send_request(req_url, req_json={}, compress=False):
headers = {'x-api-key': os.environ[_ENV_VAR_API_KEY]}

# Send the request and verify the request succeeded
res = requests.post(req_url, headers=headers, json=req_json)
if post:
res = requests.post(req_url, headers=headers, json=req_json)
else:
res = requests.get(req_url, headers=headers)
if res.status_code != 200:
raise ValueError(
'Response error: An HTTP {} code was returned by the mixer. Printing '
Expand All @@ -171,7 +174,7 @@ def _send_request(req_url, req_json={}, compress=False):
payload = res_json['payload']
if compress:
payload = zlib.decompress(
base64.b64decode(payload), 16 + zlib.MAX_WBITS)
base64.b64decode(payload), zlib.MAX_WBITS|32)
return json.loads(payload)


Expand Down
2 changes: 1 addition & 1 deletion docs/source/_autosummary/datacommons.places.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ datacommons.places
.. autosummary::
:toctree: datacommons_places

get_places_in
get_places_in
1 change: 1 addition & 0 deletions docs/source/_autosummary/datacommons.populations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ datacommons.populations

get_observations
get_populations
get_pop_obs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
datacommons.places.get\_pop\_obs
================================

.. currentmodule:: datacommons.places

.. autofunction:: get_pop_obs