Как распечатать информацию за определенный период времени вместо всех? - PullRequest
0 голосов
/ 30 апреля 2020
import os
import re 
from collections import Counter
fileNames = []
textInfo = []

currentDirectoryPath = os.getcwd()
print(currentDirectoryPath)


regexp = re.compile(
    r'(?P<clientIP>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).+\['
    + '(?P<timestamp>\d{2}/[A-Z][a-z]{2}/\d\d\d\d).+\"'
    + '(?P<action>[A-Z]{3,4}).+\"'  
    + '\s*(?P<statuscode>[1-5][0-9][0-9])'
    )



os.chdir("/content/drive/My Drive/IT 170/log")
currentDirectoryPath = os.getcwd()
listOfFileNames = os.listdir(currentDirectoryPath)
#for files in listOfFileNames :
    #print(files) 


f = open('access_1.log', 'r')
matched = 0
failed = 0
cnt_clientIPs = Counter()
cnt_clientAction = Counter()
cnt_clientTimeStamp = Counter()
cnt_clientStatusCode = Counter()


for line in f:
    m = re.match(regexp, line)
    if m:
        cnt_clientIPs.update([m.group('clientIP')])
        cnt_clientAction.update([m.group('action')])
        cnt_clientStatusCode.update([m.group('statuscode')])
        matched += 1
    else:
        failed += 1
        continue

    print("""""\
client .........: %s
timestamp ......: %s
action .........: %s
statuscode.........: %s
""" % ( m.group('clientIP'),
        m.group('timestamp'),
        m.group('action'),
        m.group('statuscode'),
    ))

userInputIP = input("Enter how many of the top clients you want to see. ")
print('[*] %d lines matched the regular expression' % (matched))
print('[*] %d lines failed to match the regular expression' % (failed), end='\n\n')
print('[*] ============================================')
print('[*] '+ userInputIP +' Most Frequently Occurring Clients Queried')
print('[*] ============================================')

for clientIP, count in cnt_clientIPs.most_common(int(userInputIP)):
    print('[*] %30s: %d' % (clientIP, count))
print('[*] ============================================')


userInputAction = input("Enter how many of the top actions you want to see. ")
print('[*] '+ userInputAction +' Most Frequently Occurring Clients Actions')
print('[*] ============================================')

for action, count in cnt_clientAction.most_common(int(userInputAction)):
    print('[*] %30s: %d' % (action, count))
print('[*] ============================================')

Enter how many of the top clients you want to see. 5
[*] 49997 lines matched the regular expression
[*] 3 lines failed to match the regular expression

[*] ============================================
[*] 5 Most Frequently Occurring Clients Queried
[*] ============================================
[*]                 205.167.170.15: 15695
[*]                  79.142.95.122: 3207
[*]                  52.22.118.215: 734
[*]                  84.112.161.41: 712
[*]                   37.1.206.196: 371
[*] ============================================
Enter how many of the top actions you want to see. 5
[*] 5 Most Frequently Occurring Clients Actions
[*] ============================================
[*]                            GET: 44048
[*]                           POST: 5921
[*]                           HEAD: 25
[*]                            PUT: 3
[*] ============================================

За последние две части я могу напечатать их все за все время. Как я могу получить его из определенного временного окна? IP-адреса клиентов TopN (N может быть указан пользователем, скажем, 3, 5, 10) среди всего отслеживаемого периода времени или определенного c временного окна (например, между 18 февраля / 2016 г. и 01 марта / 2016 г.). Я хотел бы напечатать все с января по февраль.

for clientIP, count in cnt_clientIPs.most_common(int(userInputIP)):
    print('[*] %30s: %d' % (clientIP, count))
print('[*] ============================================')


userInputAction = input("Enter how many of the top actions you want to see. ")
print('[*] '+ userInputAction +' Most Frequently Occurring Clients Actions')
print('[*] ============================================')

for action, count in cnt_clientAction.most_common(int(userInputAction)):
    print('[*] %30s: %d' % (action, count))
print('[*] ============================================')

Я хотел бы напечатать все с января по февраль.

178.191.155.244 - - [31/Dec/2015:12:54:00 +0100] "GET /index.php?option=com_phocagallery&view=category&id=1:almhuette-raith&Itemid=53 HTTP/1.1" 200 32653 "-" "Mozilla/5.0 (iPad; CPU OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F69 Safari/600.1.4" "-"
178.191.155.244 - - [31/Dec/2015:12:54:10 +0100] "GET /modules/mod_bowslideshow/tmpl/css/bowslideshow.css HTTP/1.1" 200 1725 "http://www.almhuette-raith.at/index.php?option=com_phocagallery&view=category&id=1:almhuette-raith&Itemid=53" "Mozilla/5.0 (iPad; CPU OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F69 Safari/600.1.4" "-"
178.191.155.244 - - [31/Dec/2015:12:54:10 +0100] "GET /media/system/css/modal.css HTTP/1.1" 200 1159 "http://www.almhuette-raith.at/index.php?option=com_phocagallery&view=category&id=1:almhuette-raith&Itemid=53" "Mozilla/5.0 (iPad; CPU OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F69 Safari/600.1.4" "-"
178.165.128.230 - - [06/Jan/2016:11:56:11 +0100] "GET /images/phocagallery/almhuette/thumbs/phoca_thumb_m_almhuette_raith_007.jpg HTTP/1.1" 200 5095 "http://www.almhuette-raith.at/index.php?option=com_phocagallery&view=category&id=1&Itemid=53" "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0" "-"
178.165.128.230 - - [06/Jan/2016:11:56:11 +0100] "GET /images/phocagallery/almhuette/thumbs/phoca_thumb_m_almhuette_raith_008.jpg HTTP/1.1" 200 3875 "http://www.almhuette-raith.at/index.php?option=com_phocagallery&view=category&id=1&Itemid=53" "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0" "-"
178.165.128.230 - - [06/Jan/2016:11:56:11 +0100] "GET /images/phocagallery/almhuette/thumbs/phoca_thumb_m_almhuette_raith_009.jpg HTTP/1.1" 200 4264 "http://www.almhuette-raith.at/index.php?option=com_phocagallery&view=category&id=1&Itemid=53" "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0" "-"
205.167.170.15 - - [29/Jan/2016:00:14:50 +0100] "GET /modules/mod_bowslideshow/tmpl/js/sliderman.1.3.0.js HTTP/1.1" 200 33472 "-" "Go-http-client/1.1" "-"
205.167.170.15 - - [29/Jan/2016:00:14:50 +0100] "GET /media/system/js/mootools.js HTTP/1.1" 200 74434 "-" "Go-http-client/1.1" "-"
205.167.170.15 - - [29/Jan/2016:00:14:52 +0100] "GET /index.php?option=com_content&view=article&id=50&Itemid=56 HTTP/1.1" 200 7991 "-" "Go-http-client/1.1" "-"
205.167.170.15 - - [10/Feb/2016:18:00:04 +0100] "GET /images/phocagallery/thumbs/phoca_thumb_l_winterfoto%2019.jpg HTTP/1.1" 200 68156 "-" "Go-http-client/1.1" "-"
205.167.170.15 - - [10/Feb/2016:18:00:04 +0100] "GET /images/phocagallery/almhuette/thumbs/phoca_thumb_l_garage.jpg HTTP/1.1" 200 51898 "-" "Go-http-client/1.1" "-"
205.167.170.15 - - [10/Feb/2016:18:00:04 +0100] "GET /images/phocagallery/thumbs/phoca_thumb_l_winterfoto%209.jpg HTTP/1.1" 200 56144 "-" "Go-http-client/1.1" "-"
205.167.170.15 - - [10/Feb/2016:18:00:04 +0100] "GET /images/phocagallery/thumbs/phoca_thumb_l_winterfoto%205.jpg HTTP/1.1" 200 65824 "-" "Go-http-client/1.1" "-"
205.167.170.15 - - [10/Feb/2016:18:00:04 +0100] "GET /images/phocagallery/thumbs/phoca_thumb_l_winterfoto%2026.jpg HTTP/1.1" 200 37342 "-" "Go-http-client/1.1" "-"
...