web123456

【Filter duplicate logs using python scripts】

import json import sys filepath=sys.argv[1] list = [] total = 0 count = 0 count2 = 0 temp={} temp2={} result={} result2={} resultStr='' def dataClear(s): global count,total,count2 total = total + 1 #Get callback object a='**Customer callback information:' b='logSeq:0' objson= s[s.find(a, 0, len(s))+len(a):s.find(b, 0, len(s))-1] o=json.loads(objson) #get callback uri c='uri:' d='method:' uriStr = s[s.find(c, 0, len(s)):s.find(d, 0, len(s))-3] uriArr = uriStr.split('/') uriStr = uriArr[len(uriArr)-1] key = o['toUserName'] val = o['userID']+'_'+o['externalUserID']+'_'+str(o['createTime']) val2 = o['userID']+'_'+o['externalUserID']+'_'+str(o['createTime'])+'_'+uriStr if key in temp: if temp[key].count(val) > 0: count = count+1 putResult(key,val) else: temp[key].append(val) else: list = [] list.append(val) temp[key] = list if key in temp2: if temp2[key].count(val2) > 0: count2 = count2 + 1 putResult2(key,val2) else: temp2[key].append(val2) else: list2 = [] list2.append(val2) temp2[key] = list2 def putResult(key,val): if key in result: result[key].append(val) else: list = [] list.append(val) result[key] = list def putResult2(key,val): if key in result2: result2[key].append(val) else: list = [] list.append(val) result2[key] = list with open(filepath,'r',encoding='utf-8') as f: for line in f.readlines(): line = line.strip('\n') dataClear(line) for keys in result.keys(): resultStr = resultStr + 'company:'+keys+',repeat:'+str(len(result[keys]))+'Poster, repeat with uri:'+str(len(result2.get(keys,[])))+'strip'+'\n' for vals in result[keys]: print('company:'+keys+',Repeat callback:'+vals) if keys in result2: for vals in result2[keys]: print('company:'+keys+', repeated callbacks with uri:'+vals) print('Total callback number:'+str(total)+', Number of repeated items:'+str(count)+'Bits, the number of repeated items in the same uri:'+str(count2)+'strip') print(resultStr)