I have a URL that returns JSON data as follows:
{
u 'fields': [{
u 'keyField': False,
u 'name': u '_blockid',
u 'fieldType': u 'long'
}, {
u 'keyField': False,
u 'name': u '_collector',
u 'fieldType': u 'string'
}, {
u 'keyField': False,
u 'name': u '_collectorid',
u 'fieldType': u 'long'
}, {
u 'keyField': False,
u 'name': u '_messageid',
u 'fieldType': u 'long'
}
],
u 'messages': [{
u 'map': {
u '_messageid': u '-9223368783568280026',
u '_collectorid': u '135927517',
u '_blockid': u '-9223372036519990555',
u '_collector': u 'collector1',
}
}, {
u 'map': {
u '_messageid': u '-92233645345280026',
u '_collectorid': u '13545342517',
u '_blockid': u '-92234254242343219990555',
u '_collector': u 'collector2',
}
}
]
}
That's a snippet. The real JSON contains thousands of values under ['messages']['map']
I have a script that runs as follows
rJSON = requests.get(JsonURL, auth=(username, password))
DATA = json.loads(rJSON.text)
for x in DATA[u'messages']:
print type(x[u'map'])
for i in x[u'map']:
print np.isscalar(x[u'map'][i])
df = pd.DataFrame.from_dict(x[u'map'])
break ### TESTING ###
This outputs the following
<type 'dict'>
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-151-1b71c28d4d83> in <module>()
11 for i in x[u'map']:
12 print np.isscalar(q[i])
---> 13 df = pd.DataFrame.from_dict(x[u'map'])
14
15 #if isinstance(msgData, pd.DataFrame): # If the variable is a dataframe, append to it...
C:\Users\USERID\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\frame.pyc in from_dict(cls, data, orient, dtype)
849 raise ValueError('only recognize index or columns for orient')
850
--> 851 return cls(data, index=index, columns=columns, dtype=dtype)
852
853 def to_dict(self, orient='dict'):
C:\Users\USERID\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\frame.pyc in __init__(self, data, index, columns, dtype, copy)
273 dtype=dtype, copy=copy)
274 elif isinstance(data, dict):
--> 275 mgr = self._init_dict(data, index, columns, dtype=dtype)
276 elif isinstance(data, ma.MaskedArray):
277 import numpy.ma.mrecords as mrecords
C:\Users\USERID\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\frame.pyc in _init_dict(self, data, index, columns, dtype)
409 arrays = [data[k] for k in keys]
410
--> 411 return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
412
413 def _init_ndarray(self, values, index, columns, dtype=None, copy=False):
C:\Users\USERID\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\frame.pyc in _arrays_to_mgr(arrays, arr_names, index, columns, dtype)
5494 # figure out the index, if necessary
5495 if index is None:
-> 5496 index = extract_index(arrays)
5497 else:
5498 index = _ensure_index(index)
C:\Users\USERID\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\frame.pyc in extract_index(data)
5533
5534 if not indexes and not raw_lengths:
-> 5535 raise ValueError('If using all scalar values, you must pass'
5536 ' an index')
5537
ValueError: If using all scalar values, you must pass an index
I understand it's mad because the dictionary contains scalar values, but I can't figure out why they are being loaded into the dictionary by json.loads() as a scalar, or how to convert them from scalar to strings.
My end goal is to take all of the ['messages']['map'] data and pd.concat them in the loop into 1 large dataframe that I can analyze.
Is it possible to stop json.loads from loading them as scalars? Or is there a way to convert them from scalars to something else that can be loaded into a data frame?
orient='index'parameter ?