トップ 差分 一覧 ソース 検索 ヘルプ RSS ログイン

PY-squidlog

squid のLog の変換

# tail /var/log/squid/access.log | perl -pe 's/\d+\.\d+/localtime $&/e' -  

http://wiki.squid-cache.org/Features/LogFormat

Log check

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2014 Hiro.Ogawa

'''
Created on Oct 11, 2010

@author: His Royal Highness

The squid log reader will analyse the squid access.log file and produce stats on its
content. it expects access.log to be in the same directory,
but this can be changed to an absolute path as requried.
'''

''' VARIABLES '''
# filename = "./logs/access.log"
# filename = "/home/squid/logs/access.log"
filename = "/home/squid/logs/access.log.0"
## filename = "/home/squid/logs/access.log.1"
file = open(filename)
''' Request Variables '''
total_requests = 0
total_cache_hits = 0
total_cache_misses = 0
''' Byte Total Variables '''
total_requested_bytes = 0
total_cache_bytes = 0
total_miss_bytes = 0
total_other_bytes = 0
host_requests = {}
host_total_bytes = {}
domain_requests = {}
domain_bytes = {}

''' Strip back URL to what we will measure on '''
def striptodomain(url):
   stripped = url.replace("http://", "")
   #stripped = stripped.repace("www.")
   bits = stripped.split('/')
   hostname = bits[0]
   return hostname


''' Rip through the file and generate the stats '''
for line in file:
   bit = line.split()
   if len(bit) < 9:
       print len(bit)
       continue
   total_requests += 1

   ''' Add in the host request and bytes totals '''
   ## print bit[2],bit[4]
   if bit[2] in host_requests:
       host_requests[bit[2]] += 1
       host_total_bytes[bit[2]] = int(host_total_bytes[bit[2]]) + int(bit[4])
   else:
       host_requests[bit[2]] = 1
       host_total_bytes[bit[2]] = int(bit[4])


   ''' Add in the Domain request and bytes totals '''
   hostname = striptodomain(bit[6])
   if hostname in domain_requests:
       domain_requests[hostname] += 1
       domain_bytes[hostname] = int(domain_bytes[hostname]) + int(bit[4])
   else:
       domain_requests[hostname] = 1
       domain_bytes[hostname] = int(bit[4])



   if 'TCP_HIT' in line or 'TCP_MEM_HIT' in line or 'TCP_NEGATIVE_HIT' in line or 'TCP_IMS_HIT' in line or 'TCP_REFRESH_HIT' in line:
       total_cache_hits += 1
       total_cache_bytes += int(bit[4])
       total_requested_bytes += int(bit[4])

   elif "TCP_MISS" in line or 'TCP_REFRESH_MISS' in line or 'TCP_CLIENT_REFRESH_MISS' in line:
       total_cache_misses += 1
       total_miss_bytes += int(bit[4])
       total_requested_bytes += int(bit[4])

   else:
       total_other_bytes += int(bit[4])
       total_requested_bytes += int(bit[4])
file.close()


''' Extra file stats '''
total_other = (total_requests - total_cache_hits) - total_cache_misses


''' Calculate Percent values '''
total_cache_hits_pc = round((1.0 * total_cache_hits / total_requests) * 100,2)
total_cache_misses_pc = round((1.0 * total_cache_misses / total_requests) * 100,2)
total_other_pc = round((1.0 * total_other / total_requests) * 100,2)

total_cache_bytes_pc = round((1.0*total_cache_bytes/total_requested_bytes) * 100,2)
total_miss_bytes_pc = round((1.0 *total_miss_bytes/ total_requested_bytes) * 100,2)
total_other_bytes_pc = round((1.0 *total_other_bytes/total_requested_bytes)* 100,2)



''' File type counts'''
print " "
print "Caching Performance"
print "Total requests: \t\t", total_requests
print "Total Cache hits:\t\t",total_cache_hits, total_cache_hits_pc, "%"
print "Total Cache misses:\t" ,total_cache_misses, total_cache_misses_pc, "%"
print "Total Other:\t\t" ,total_other, total_other_pc, "%"
print ""
print "Total bytes: \t\t", total_requested_bytes
print "Total Cache bytes:\t",total_cache_bytes, total_cache_bytes_pc, "%"
print "Total Miss bytes:\t\t",total_miss_bytes, total_miss_bytes_pc, "%"
print "Total Other bytes:\t",total_other_bytes, total_other_bytes_pc, "%"
print ""
print ""
print ""




''' Print Domain statistics '''
#sorted_domain_requests = sorted(domain_requests)
print "Most Requested Domains (hits)"
temp_list = domain_requests.keys()
temp_list.sort( key = domain_requests.__getitem__, reverse = True)
for a in range(25):
   print domain_requests[temp_list[a]]  , "\t\t" , temp_list[a]
print ""
print ""


print "Highest Data Transfer by Domain (bytes)"
temp_list = domain_bytes.keys()
temp_list.sort( key = domain_bytes.__getitem__, reverse = True)
for a in range(25):
   print domain_bytes[temp_list[a]]  , "\t" , temp_list[a]
print ""
print ""

print "Most requesting hosts (hits)"
temp_list = host_requests.keys()
temp_list.sort( key = host_requests.__getitem__, reverse = True)
if len(temp_list) < 25:
   hosts = len(temp_list)
else:
   hosts = 25
for a in range(hosts):
   print host_requests[temp_list[a]]  , "\t" , temp_list[a]
print ""
print ""

print "Highest Data Transfer by Host (bytes)"
temp_list = host_total_bytes.keys()
temp_list.sort( key = host_total_bytes.__getitem__, reverse = True)
for a in range(hosts):
   print host_total_bytes[temp_list[a]]  , "\t" , temp_list[a]
print ""
print ""

Log check2

green@vbfb1:~/Yama1 % cat ./q2.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2014 Hiro.Ogawa

import sqlite3

class DDvar:
   def __init__(self):
       self.axisx = {}
       self.axisy = {}
       self.cnt_axisx = 0
       self.cnt_axisy = 0
       self.imat = [[0 for i in range(100)] for j in range(100)]


   def strmat(self,ax,ay):
           retx = 0
           rety = 0
           if ax in self.axisx:
               retx = self.axisx[ax]
           else:
               self.cnt_axisx = self.cnt_axisx + 1
               self.axisx[ax] = self.cnt_axisx
               retx = self.cnt_axisx

           if ay in self.axisy:
               rety = self.axisy[ay]
           else:
               self.cnt_axisy = self.cnt_axisy + 1
               self.axisy[ay] = self.cnt_axisy
               rety = self.cnt_axisy

           return(retx,rety)


   def setimat(self,ax,ay,data):
           ( x,y ) = self.strmat( ax, ay )
           self.imat[x][y] = data

   def getimat(self,ax,ay):
           ( x,y ) = self.strmat( ax, ay )
           rdat = self.imat[x][y]
           return rdat


if __name__ == "__main__":
   axi = DDvar()
   count = 0

   for i in [ 'a' , 'b', 'c', 'd' ]:
       for j in [ '11', '22', '33', '44']:
           ## print i,j
           ## imat[i][j]=count
           axi.setimat( i , j , count )
           count = count + 1



   for ii in [ 'a' , 'b', 'c', 'd' ]:
       for jj in [ '11', '22', '33', '44']:
           print ii,jj,axi.getimat( ii, jj )

elapsed time check

#!/usr/bin/env python
# coding: utf-8

import os

fname="./logs/access.log"

fp = open(fname)

rdata = fp.read()

fp.close()

lines = rdata.split('\n')

for line in lines :
   sp3 = line.split()[1]
   ## spp3 = sp3.encode('utf_8')
   if sp3.isdigit :
       ss = int(sp3)
       if ss > 1000 :
           sp1 = float( line.split()[0] )
           print datetime.datetime.fromtimestamp(sp1),
           print line
   # print sp3

ログ時間の

#!/usr/bin/python3
# -*- coding: utf-8 -*-

import time
import datetime

fname = "./180122/access.log.0"

fp = open(fname)

stime = time.mktime(datetime.datetime(2018, 1, 19, 0, 0, 0, 1).timetuple())
etime = time.mktime(datetime.datetime(2018, 1, 19, 23, 59, 59, 9).timetuple())

print (stime )

for fline in fp :
   # print ( fline.split()[0] )
   ti = fline.split()[0]
   lips = fline.split()[2]
   lurl = fline.split()[6]
   # print ( ips )
   ltime = float( ti )
   ## print ( time.localtime(ltime)) 
   if ( (ltime > stime)  & ( ltime < etime )):
       s2 = int(lips.split(".")[2])
       s3 = int(lips.split(".")[3])
       if ( ( s2 == 3) & ((s3 > 102 ) & (s3 < 120))): 
           print ( datetime.datetime.fromtimestamp(ltime),end="") 
           print ( ","+lips + "," + lurl)