|
13 | 13 | # limitations under the License. |
14 | 14 | """ Data Commons Python Client API Query Module. |
15 | 15 |
|
16 | | -Implements a wrapper object for sending SPARQL queries to the Data Commons |
17 | | -knowledge graph. |
| 16 | +Implements functions for sending graph queries to the Data Commons knowledge |
| 17 | +graph. |
18 | 18 | """ |
19 | 19 |
|
20 | 20 | from __future__ import absolute_import |
|
26 | 26 | import os |
27 | 27 | import requests |
28 | 28 |
|
29 | | -# ----------------------------------------------------------------------------- |
30 | | -# Query Class |
31 | | -# ----------------------------------------------------------------------------- |
| 29 | +# ----------------------------- WRAPPER FUNCTIONS ----------------------------- |
32 | 30 |
|
33 | 31 |
|
34 | | -class Query(object): |
35 | | - """ A wrapper object that performs a SPARQL query on the Data Commons graph. |
| 32 | +def query(query_string, select=None): |
| 33 | + """ Returns the results of executing a SPARQL query on the Data Commons graph. |
36 | 34 |
|
37 | 35 | Args: |
38 | | - **kwargs: Valid keyword arguments include the following. At least one |
39 | | - valid argument must be provided. |
40 | | -
|
41 | | - - `sparql` (:obj:`str`): The SPARQL query string. |
| 36 | + query_string (:obj:`str`): The SPARQL query string. |
| 37 | + select (:obj:`func` accepting a row in the query result): A function that |
| 38 | + selects rows to be returned by :code:`query`. This function accepts a row |
| 39 | + in the results of executing :code:`query_string` and return True if and |
| 40 | + only if the row is to be returned by :code:`query`. The row passed in as |
| 41 | + an argument is represented as a :obj:`dict` that maps a query variable in |
| 42 | + :code:`query_string` to its value in the given row. |
| 43 | +
|
| 44 | + Returns: |
| 45 | + A table, represented as a :obj:`list` of rows, resulting from executing the |
| 46 | + given SPARQL query. Each row is a :obj:`dict` mapping query variable to its |
| 47 | + value in the row. If `select` is not `None`, then a row is included in the |
| 48 | + returned :obj:`list` if and only if `select` returns :obj:`True` for that |
| 49 | + row. |
42 | 50 |
|
43 | 51 | Raises: |
44 | | - ValueError: If an invalid keyword argument is provided. |
| 52 | + ValueError: If the payload returned by the Data Commons REST API is |
| 53 | + malformed. |
45 | 54 |
|
46 | | - Example: |
47 | | - To construct a :obj:`Query` object, do the following. |
| 55 | + Examples: |
| 56 | + We would like to query for the name associated with three states identified |
| 57 | + by their dcids |
| 58 | + `California <https://browser.datacommons.org/kg?dcid=geoId/06>`_, |
| 59 | + `Kentucky <https://browser.datacommons.org/kg?dcid=geoId/21>`_, and |
| 60 | + `Maryland <https://browser.datacommons.org/kg?dcid=geoId/24>`_. |
48 | 61 |
|
49 | 62 | >>> query_str = ''' |
50 | | - ...SELECT ?name ?dcid |
51 | | - ...WHERE { |
52 | | - ... ?a typeOf Place . |
53 | | - ... ?a name ?name . |
54 | | - ... ?a dcid ("geoId/06" "geoId/21" "geoId/24") . |
55 | | - ... ?a dcid ?dcid |
56 | | - ...} |
57 | | - ...''' |
58 | | - >>> query = dc.Query(sparql=query_str) |
| 63 | + ... SELECT ?name ?dcid |
| 64 | + ... WHERE { |
| 65 | + ... ?a typeOf Place . |
| 66 | + ... ?a name ?name . |
| 67 | + ... ?a dcid ("geoId/06" "geoId/21" "geoId/24") . |
| 68 | + ... ?a dcid ?dcid |
| 69 | + ... } |
| 70 | + ... ''' |
| 71 | + >>> result = query(query_str) |
| 72 | + >>> for r in result: |
| 73 | + ... print(r) |
| 74 | + {"?name": "Maryland", "?dcid": "geoId/24"} |
| 75 | + {"?name": "Kentucky", "?dcid": "geoId/21"} |
| 76 | + {"?name": "California", "?dcid": "geoId/06"} |
| 77 | +
|
| 78 | + Optionally, we can specify which rows are returned by setting :code:`select` |
| 79 | + like so. The following returns all rows where the name is "Maryland". |
| 80 | +
|
| 81 | + >>> selector = lambda row: row['?name'] == 'Maryland' |
| 82 | + >>> result = query(query_str, select=selector) |
| 83 | + >>> for r in result: |
| 84 | + ... print(r) |
| 85 | + {"?name": "Maryland", "?dcid": "geoId/24"} |
59 | 86 | """ |
60 | | - |
61 | | - # Valid query languages |
62 | | - _SPARQL_LANG = 'sparql' |
63 | | - _VALID_LANG = [_SPARQL_LANG] |
64 | | - |
65 | | - def __init__(self, **kwargs): |
66 | | - """ Initializes a SPARQL query targeting the Data Commons graph. """ |
67 | | - if self._SPARQL_LANG in kwargs: |
68 | | - self._query = kwargs[self._SPARQL_LANG] |
69 | | - self._language = self._SPARQL_LANG |
70 | | - self._result = None |
71 | | - else: |
72 | | - lang_str = ', '.join(self._VALID_LANG) |
73 | | - raise ValueError( |
74 | | - 'Must provide one of the following languages: {}'.format(lang_str)) |
75 | | - |
76 | | - def rows(self, select=None): |
77 | | - """ Returns the result of executing the query as an iterator over all rows. |
78 | | -
|
79 | | - Args: |
80 | | - select (:obj:`func` accepting a `row` in the query result): A function |
81 | | - that returns true if and only if a row in the query results should be |
82 | | - kept. The argument for this function is a :obj:`dict` from query |
83 | | - variable to its value in a given row. |
84 | | -
|
85 | | - Yields: |
86 | | - Rows from executing the query where each row is a :obj:`dict` mapping |
87 | | - query variable to its value in the row. If `select` is not `None`, then |
88 | | - the row is returned if and only if `select` returns :obj:`True`. |
89 | | -
|
90 | | - Example: |
91 | | - The following query asks for names of three states: |
92 | | - `California <https://browser.datacommons.org/kg?dcid=geoId/06>`_, |
93 | | - `Kentucky <https://browser.datacommons.org/kg?dcid=geoId/21>`_, and |
94 | | - `Maryland <https://browser.datacommons.org/kg?dcid=geoId/24>`_. |
95 | | -
|
96 | | - >>> query_str = ''' |
97 | | - ... SELECT ?name ?dcid |
98 | | - ... WHERE { |
99 | | - ... ?a typeOf Place . |
100 | | - ... ?a name ?name . |
101 | | - ... ?a dcid ("geoId/06" "geoId/21" "geoId/24") . |
102 | | - ... ?a dcid ?dcid |
103 | | - ... } |
104 | | - ... ''' |
105 | | - >>> query = dc.Query(sparql=query_str) |
106 | | - >>> for r in query.rows(): |
107 | | - ... print(r) |
108 | | - {"?name": "Maryland", "?dcid": "geoId/24"} |
109 | | - {"?name": "Kentucky", "?dcid": "geoId/21"} |
110 | | - {"?name": "California", "?dcid": "geoId/06"} |
111 | | - """ |
112 | | - # Execute the query if the results are empty. |
113 | | - if not self._result: |
114 | | - self._execute() |
115 | | - |
116 | | - # Iterate through the query results |
117 | | - header = self._result['header'] |
118 | | - for row in self._result['rows']: |
119 | | - # Construct the map from query variable to cell value. |
120 | | - row_map = {} |
121 | | - for idx, cell in enumerate(row['cells']): |
122 | | - if idx > len(header): |
123 | | - raise RuntimeError( |
124 | | - 'Query error: unexpected cell {}'.format(cell)) |
125 | | - if 'value' not in cell: |
126 | | - raise RuntimeError( |
127 | | - 'Query error: cell missing value {}'.format(cell)) |
128 | | - cell_var = header[idx] |
129 | | - row_map[cell_var] = cell['value'] |
130 | | - |
131 | | - # Yield the row if it is selected |
132 | | - if select is None or select(row_map): |
133 | | - yield row_map |
134 | | - |
135 | | - def _execute(self): |
136 | | - """ Execute the query. |
137 | | -
|
138 | | - Raises: |
139 | | - RuntimeError: on query failure (see error hint). |
140 | | - """ |
141 | | - # Get the API Key and set the headers |
142 | | - if not os.environ.get(_ENV_VAR_API_KEY, None): |
143 | | - raise ValueError( |
144 | | - 'Request error: Must set an API key before using the API!') |
145 | | - headers = {'x-api-key': os.environ[_ENV_VAR_API_KEY]} |
146 | | - |
147 | | - # Create the query request. |
148 | | - if self._language == self._SPARQL_LANG: |
149 | | - payload = {'sparql': self._query} |
150 | | - url = _API_ROOT + _API_ENDPOINTS['query'] |
151 | | - res = requests.post(url, json=payload, headers=headers) |
152 | | - |
153 | | - # Verify then store the results. |
154 | | - res_json = res.json() |
155 | | - if 'message' in res_json: |
156 | | - raise RuntimeError('Query error: {}'.format(res_json['message'])) |
157 | | - self._result = res.json() |
| 87 | + # Get the API Key and perform the POST request. |
| 88 | + if not os.environ.get(_ENV_VAR_API_KEY, None): |
| 89 | + raise ValueError( |
| 90 | + 'Request error: Must set an API key before using the API!') |
| 91 | + url = _API_ROOT + _API_ENDPOINTS['query'] |
| 92 | + res = requests.post(url, json={'sparql': query_string}, headers={ |
| 93 | + 'x-api-key': os.environ[_ENV_VAR_API_KEY] |
| 94 | + }) |
| 95 | + |
| 96 | + # Verify then store the results. |
| 97 | + if res.status_code != 200: |
| 98 | + raise ValueError( |
| 99 | + 'Response error: An HTTP {} code was returned by the mixer. Printing ' |
| 100 | + 'response\n\n{}'.format(res.status_code , res.text)) |
| 101 | + res_json = res.json() |
| 102 | + |
| 103 | + # Iterate through the query results |
| 104 | + header = res_json['header'] |
| 105 | + result_rows = [] |
| 106 | + for row in res_json['rows']: |
| 107 | + # Construct the map from query variable to cell value. |
| 108 | + row_map = {} |
| 109 | + for idx, cell in enumerate(row['cells']): |
| 110 | + if idx > len(header): |
| 111 | + raise ValueError( |
| 112 | + 'Query error: unexpected cell {}'.format(cell)) |
| 113 | + if 'value' not in cell: |
| 114 | + raise ValueError( |
| 115 | + 'Query error: cell missing value {}'.format(cell)) |
| 116 | + cell_var = header[idx] |
| 117 | + row_map[cell_var] = cell['value'] |
| 118 | + |
| 119 | + # Add the row to the result rows if it is selected |
| 120 | + if select is None or select(row_map): |
| 121 | + result_rows.append(row_map) |
| 122 | + return result_rows |
0 commit comments