基础语句1 : grep '"device_os"':'"iOS"' | grep -v '"device_ua"':'""'
基础语句2 : awk '{match($0,/(device_os[^,]*?,)/,a);match($0,/(device_ua[^,]*?,)/,b);match($0,/(device_idfa[^,]*?,)/,c);print a[1],b[1],c[1]}'
最终语句:
cat "E:\FromG\QQFiles\hdfs-idfa-awk.txt"|awk '{match($0,/(device_os[^,]*?,)/,a);match($0,/(device_ua":"[^"]*?",)/,b);match($0,/(device_idfa[^,]*?,)/,c);print a[1],b[1],c[1]}'|grep -v 'device_ua":""'|grep 'device_os":"iOS"'
简析:先使用awk match函数提取字段和值,再使用grep过滤值
注意: awk的输出将决定grep中如何写,尤其注意双引号。
如下为测试文件,来自hdfs。
-------------------------------------------------------------------------------------
{"type":"imp","time":1509292800,"data":{"request_id":"9ff00d3ef48b505f1a66086e7ea22455","adsource_id":"59bf64d22941ef010026fa10","publisher_id":"589805c3d3639801005b6439","app_id":"591d0a41bddf360001aac205","slot_id":"591d0a9fbddf360001aac24d","creative_id":"","adv_id":"","request":0,"bid_req":0,"bid_res_bid":0,"bid_res_no_bid":0,"win":0,"bid_res_error":0,"bid_res":0,"bid_res_timeout":0,"conn_type":"wifi","device_os":"iOS","device_type":"pad","pay_type":"cpm","slot_type":"banner","price":"1.60","device_imei":"","device_imsi":"","device_idfa":"30AE4A19-CBC3-4BA2-B252-36CDEF45E569","device_androidid":"","device_mac":"","device_ua":"Mozilla/5.0 (iPad; CPU OS 10_3_2 like Mac OS X) AppleWebKit/603.2.4 (KHTML, like Gecko) Mobile/14F89","device_ip":"114.94.38.213","req_unix":"","timestamp":1509296286,"preImp":1,"preImpValid":1,"preImpInvalid":0,"preImpDuplicate":0,"preClk":0,"preClkValid":0,"preClkInvalid":0,"preClkDuplicate":0}}
{"type":"imp","time":1509292800,"data":{"request_id":"9ff00d3ef48b505f1a66086e7ea22455","adsource_id":"59bf64d22941ef010026fa10","publisher_id":"589805c3d3639801005b6439","app_id":"591d0a41bddf360001aac205","slot_id":"591d0a9fbddf360001aac24d","creative_id":"","adv_id":"","request":0,"bid_req":0,"bid_res_bid":0,"bid_res_no_bid":0,"win":0,"bid_res_error":0,"bid_res":0,"bid_res_timeout":0,"conn_type":"wifi","device_os":"android","device_type":"pad","pay_type":"cpm","slot_type":"banner","price":"1.60","device_imei":"","device_imsi":"","device_idfa":"30AE4A19-CBC3-4BA2-B252-36CDEF45E569","device_androidid":"","device_mac":"","device_ua":"Mozilla/5.0 (iPad; CPU OS 10_3_2 like Mac OS X) AppleWebKit/603.2.4 (KHTML, like Gecko) Mobile/14F89","device_ip":"114.94.38.213","req_unix":"","timestamp":1509296286,"preImp":1,"preImpValid":1,"preImpInvalid":0,"preImpDuplicate":0,"preClk":0,"preClkValid":0,"preClkInvalid":0,"preClkDuplicate":0}}
{"type":"imp","time":1509292800,"data":{"request_id":"9ff00d3ef48b505f1a66086e7ea22455","adsource_id":"59bf64d22941ef010026fa10","publisher_id":"589805c3d3639801005b6439","app_id":"591d0a41bddf360001aac205","slot_id":"591d0a9fbddf360001aac24d","creative_id":"","adv_id":"","request":0,"bid_req":0,"bid_res_bid":0,"bid_res_no_bid":0,"win":0,"bid_res_error":0,"bid_res":0,"bid_res_timeout":0,"conn_type":"wifi","device_os":"iOS","device_type":"pad","pay_type":"cpm","slot_type":"banner","price":"1.60","device_imei":"","device_imsi":"","device_idfa":"30AE4A19-CBC3-4BA2-B252-36CDEF45E569","device_androidid":"","device_mac":"","device_ua":"","device_ip":"114.94.38.213","req_unix":"","timestamp":1509296286,"preImp":1,"preImpValid":1,"preImpInvalid":0,"preImpDuplicate":0,"preClk":0,"preClkValid":0,"preClkInvalid":0,"preClkDuplicate":0}}
{"type":"imp","time":1509292800,"data":{"request_id":"9ff00d3ef48b505f1a66086e7ea22455","adsource_id":"59bf64d22941ef010026fa10","publisher_id":"589805c3d3639801005b6439","app_id":"591d0a41bddf360001aac205","slot_id":"591d0a9fbddf360001aac24d","creative_id":"","adv_id":"","request":0,"bid_req":0,"bid_res_bid":0,"bid_res_no_bid":0,"win":0,"bid_res_error":0,"bid_res":0,"bid_res_timeout":0,"conn_type":"wifi","device_type":"pad","pay_type":"cpm","slot_type":"banner","price":"1.60","device_imei":"","device_imsi":"","device_idfa":"30AE4A19-CBC3-4BA2-B252-36CDEF45E569","device_androidid":"","device_mac":"","device_ua":"","device_ip":"114.94.38.213","req_unix":"","timestamp":1509296286,"preImp":1,"preImpValid":1,"preImpInvalid":0,"preImpDuplicate":0,"preClk":0,"preClkValid":0,"preClkInvalid":0,"preClkDuplicate":0}}