现在有一个问题:
通过json.loads()函数读取的数据是unicode的,后续没法处理。
解决方法:
通过json.loads函数中的object_hook参数指定解析方式
代码如下:
import sys
import json
g_charset= 'utf-8'
def _byteify(data, ignore_dicts = False):
if isinstance(data, unicode):
return data.encode(g_charset)
if isinstance(data, list):
return [ _byteify(item, ignore_dicts=True) for item in data ]
# if this is a dictionary, return dictionary of byteified keys and values
# but only if we haven't already byteified it
if isinstance(data, dict) and not ignore_dicts:
return {
_byteify(key, ignore_dicts=True): _byteify(value, ignore_dicts=True)
for key, value in data.iteritems()
}
return data
def json_loads_byteified(json_text):
return _byteify(
json.loads(json_text, object_hook=_byteify),
ignore_dicts=True
)
line = file(sys.argv[1]).readline()
data = json_loads_byteified(line.strip())
print json.dumps(data, ensure_ascii=False, indent=2)
参考: