背景
- python 3.8
- scrapy
- mysql
- JSON
问题展示
- JSON数据
{"code":0,"msg":"success","data":{"total":496,"entity":[{"uuid":"c2fe2324-dec5-4dbc-9840-42289ea3c38c","name":"睿眼web攻击溯源系统日志规则","vendor":"zoreworld","model":"攻击溯源","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":2},{"uuid":"384e4fe4-58f1-435f-a04c-6cc15d1b8fc2","name":"中兴交换机日志规则(syslog)","vendor":"中兴","model":"switch","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":6},{"uuid":"c15f5483-31dc-4bf1-903c-e9f330864a65","name":"中兴路由器日志规则(syslog)","vendor":"中兴","model":"router","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":2},{"uuid":"fbf46df3-528a-4525-bc66-61ed18d7136f","name":"中新金盾抗DDoS系统GFW日志规则(syslog)","vendor":"中新金盾","model":"金盾抗DDoS系统","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":9},{"uuid":"8f183a5d-eb4b-4a55-abe3-dded07d76191","name":"zabbix(分布式系统监视以及网络监视)日志规则","vendor":"zabbix","model":"zabbix","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":2},{"uuid":"2f0667d9-297d-4b4e-981d-b36847b3d7a2","name":"用友erp数据库接受配置日志解析规则(syslog)","vendor":"用友erp","model":"数据库","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":1},{"uuid":"cd1260c6-3aa5-4730-a073-ce5aced0bd82","name":"铱迅WAF日志规则(syslog)","vendor":"铱迅","model":"WAF","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":7},{"uuid":"e3d89b4c-6bb6-4a77-bfa6-f2b508da4502","name":"北京零平蜂眼系统日志解析规则","vendor":"北京零平","model":"蜂眼","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":1},{"uuid":"c14e0722-3191-4a48-8a92-de8c34d04f4a","name":"星网锐捷ids日志规则(syslog)","vendor":"星网锐捷","model":"ids","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":2},{"uuid":"cb52ce49-c3f3-4142-a297-ff617b8790d8","name":"星网锐捷AC日志规则(syslog)","vendor":"星网锐捷","model":"ac","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":1}]}}
- 解析过程
def parse(self, response):
text = '{"code":0,"msg":"success","data":{"total":496,"entity":[{"uuid":"c2fe2324-dec5-4dbc-9840-42289ea3c38c","name":"睿眼web攻击溯源系统日志规则","vendor":"zoreworld","model":"攻击溯源","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":2},{"uuid":"384e4fe4-58f1-435f-a04c-6cc15d1b8fc2","name":"中兴交换机日志规则(syslog)","vendor":"中兴","model":"switch","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":6},{"uuid":"c15f5483-31dc-4bf1-903c-e9f330864a65","name":"中兴路由器日志规则(syslog)","vendor":"中兴","model":"router","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":2},{"uuid":"fbf46df3-528a-4525-bc66-61ed18d7136f","name":"中新金盾抗DDoS系统GFW日志规则(syslog)","vendor":"中新金盾","model":"金盾抗DDoS系统","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":9},{"uuid":"8f183a5d-eb4b-4a55-abe3-dded07d76191","name":"zabbix(分布式系统监视以及网络监视)日志规则","vendor":"zabbix","model":"zabbix","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":2},{"uuid":"2f0667d9-297d-4b4e-981d-b36847b3d7a2","name":"用友erp数据库接受配置日志解析规则(syslog)","vendor":"用友erp","model":"数据库","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":1},{"uuid":"cd1260c6-3aa5-4730-a073-ce5aced0bd82","name":"铱迅WAF日志规则(syslog)","vendor":"铱迅","model":"WAF","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":7},{"uuid":"e3d89b4c-6bb6-4a77-bfa6-f2b508da4502","name":"北京零平蜂眼系统日志解析规则","vendor":"北京零平","model":"蜂眼","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":1},{"uuid":"c14e0722-3191-4a48-8a92-de8c34d04f4a","name":"星网锐捷ids日志规则(syslog)","vendor":"星网锐捷","model":"ids","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":2},{"uuid":"cb52ce49-c3f3-4142-a297-ff617b8790d8","name":"星网锐捷AC日志规则(syslog)","vendor":"星网锐捷","model":"ac","version":"Unnamed Version","assetCategory":-1,"enable":true,"mapperSize":1}]}}'
data = json.loads(text)
entitys = data['data']['entity']
for entity in entitys:
uuid = entity['uuid']
item = LogAudit1Item()
item['uuid'] = uuid
item['content'] = entity
yield item
- 解析JSON 插入数据库错误
pymysql.err.ProgrammingError: (1064, 'You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near \'\'uuid\': "\'cb52ce49-c3f3-4142-a297-ff617b8790d8\'", \'name\': "\'星网锐捷AC日志\' at line 1')
- json.dumps 数据可以入库但是中文会被Unicode编码
entitys = data['data']['entity']
for entity in entitys:
uuid = entity['uuid']
content = json.dumps(entity)
item = LogAudit1Item()
item['uuid'] = uuid
item['content'] = content
yield item
image.png
json库的一些用法
json.dumps() 将python对象编码成Json字符串
json.loads() 将Json字符串解码成python对象
json.dump() 将python中的对象转化成json储存到文件中
json.load() 将文件中的json的格式转化成python对象提取出来
问题解析
- JSON 插入数据库的问题
- 将python的JSON对象转为字符串
- 编码问题
- ensure_ascii=True:默认输出ASCLL码,如果把这个该成False,就可以输出中文。
data = json.loads(text)
entitys = data['data']['entity']
for entity in entitys:
uuid = entity['uuid']
content = json.dumps(entity, ensure_ascii=False)
item = LogAudit1Item()
item['uuid'] = uuid
item['content'] = content
yield item
image.png
总结
- python 解析JSON
- JSON数据入库
- json.dumps(entity, ensure_ascii=False)