Как вы печатаете самые популярные IP-адреса клиентов с определенным кодом состояния.? - PullRequest
0 голосов
/ 30 апреля 2020
import os
import re 
from collections import Counter 
from collections import OrderedDict 
fileNames = []
textInfo = []
d = {}

currentDirectoryPath = os.getcwd()
print(currentDirectoryPath)


regexp = re.compile(
    r'(?P<clientIP>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).+\['
    + '(?P<timestamp>\d{2}/[A-Z][a-z]{2}/\d\d\d\d).+\"'
    + '(?P<action>[A-Z]{3,4}).+\"'  
    + '\s*(?P<statuscode>[1-5][0-9][0-9])'
    )



os.chdir("/content/drive/log")
currentDirectoryPath = os.getcwd()
listOfFileNames = os.listdir(currentDirectoryPath)
#for files in listOfFileNames :
    #print(files) 


f = open('access_1.log', 'r')
matched = 0
failed = 0
cnt_clientIPs = Counter()
cnt_clientAction = Counter()
cnt_clientTimeStamp = Counter()
cnt_clientStatusCode = Counter()


for line in f:
    m = re.match(regexp, line)
    if m:
        cnt_clientIPs.update([m.group('clientIP')])
        cnt_clientAction.update([m.group('action')])
        cnt_clientStatusCode.update([m.group('statuscode')])
        matched += 1
    else:
        failed += 1
        continue

    print("""""\
client .........: %s
timestamp ......: %s
action .........: %s
statuscode.........: %s
""" % ( m.group('clientIP'),
        m.group('timestamp'),
        m.group('action'),
        m.group('statuscode'),
    ))

for line in f:
    m = re.match(regexp, line)
    if m:
      d = {m.group("clientIP"): m.group("statuscode")}

print(d)


userInputIP = input("Enter how many of the top clients you want to see. ")
print('[*] %d lines matched the regular expression' % (matched))
print('[*] %d lines failed to match the regular expression' % (failed), end='\n\n')
print('[*] ============================================')
print('[*] '+ userInputIP +' Most Frequently Occurring Clients Queried')
print('[*] ============================================')

for clientIP, count in cnt_clientIPs.most_common(int(userInputIP)):
    print('[*] %30s: %d' % (clientIP, count))
print('[*] ============================================')


userInputAction = input("Enter how many of the top actions you want to see. ")
print('[*] '+ userInputAction +' Most Frequently Occurring Clients Actions')
print('[*] ============================================')

for action, count in cnt_clientAction.most_common(int(userInputAction)):
    print('[*] %30s: %d' % (action, count))
print('[*] ============================================')


userInputIpPlusStatus = input("Enter how many of the top clients  you want to see. and there status code ")
print('[*] '+ userInputIpPlusStatus +' Most Frequently Occurring Clients IP and Status Code')
print('[*] ============================================')


for clientIP,  count in cnt_clientIPs.most_common(int(userInputIpPlusStatus)):
    if (stuatuscode == userStatuscodeInput):
       print('[*] %30s: %d: %5s:' % (clientIP, count,  m.group('statuscode')))
print('[*] ============================================')

Enter how many of the top clients you want to see.5
[*] 49997 lines matched the regular expression
[*] 3 lines failed to match the regular expression

[*] ============================================
[*] 5 Most Frequently Occurring Clients Queried
[*] ============================================
[*]                 205.167.170.15: 15695
[*]                  79.142.95.122: 3207
[*]                  52.22.118.215: 734
[*]                  84.112.161.41: 712
[*]                   37.1.206.196: 371
[*] ============================================
Enter how many of the top actions you want to see.5
[*] 5 Most Frequently Occurring Clients Actions
[*] ============================================
[*]                            GET: 44048
[*]                           POST: 5921
[*]                           HEAD: 25
[*]                            PUT: 3
[*] ============================================

{}

Я хочу распечатать 5 лучших IP-адресов клиентов с кодом состояния 404. Или любой код состояния передается от пользователя. Вот некоторая тестовая строка ниже, чтобы помочь. Может ли это быть сделано в определенные сроки? Например распечатать IP-адреса Топ-клиентов с кодом состояния 404 с января по февраль?

for clientIP,  count in cnt_clientIPs.most_common(int(userInputIpPlusStatus)):
    if (stuatuscode == userStatuscodeInput):
       print('[*] %30s: %d: %5s:' % (clientIP, count,  m.group('statuscode')))
print('[*] ============================================')

Эта часть прямо здесь, где я говорю. Попытка добавить условие здесь.

80.110.186.51 - - [21/Dec/2015:17:20:12 +0100] "GET /images/stories/raith/oststeiermark.png HTTP/1.1" 200 65225 "http://www.almhuette-raith.at/" "Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1" "-"
80.110.186.51 - - [21/Dec/2015:17:20:12 +0100] "GET /images/stories/raith/garage.jpg HTTP/1.1" 200 57339 "http://www.almhuette-raith.at/" "Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1" "-"
80.110.186.51 - - [21/Dec/2015:17:20:12 +0100] "GET /images/stories/slideshow/almhuette_raith_03.jpg HTTP/1.1" 200 87782 "http://www.almhuette-raith.at/" "Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1" "-"
80.110.186.51 - - [21/Dec/2015:17:20:12 +0100] "GET /images/stories/raith/steiermark_herz.png HTTP/1.1" 200 39683 "http://www.almhuette-raith.at/" "Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1" "-"  

Эти строки выше представляют собой несколько тестовых строк, которые помогут вам, ребята, и показать, с чем я имею дело, в текстовых файлах.

1 Ответ

0 голосов
/ 03 мая 2020

Ваш cnt_clientStatusCode счетчик должен считать кортежи, состоящие из пар ip / код состояния:

for line in f:
    m = re.match(regexp, line)
    if m:
        client_ip = m.group('clientIP')
        statuscode = m.group('statuscode')
        client_statuscode = (client_ip, statuscode) # ip / status code combination
        cnt_clientIPs.update([client_ip])
        cnt_clientAction.update([m.group('action')])
        cnt_clientStatusCode.update([client_statuscode])
        matched += 1
    else:
        failed += 1
        continue

Затем вы можете перечислить n наиболее часто встречающиеся комбинации, где n = int(userInputIpPlusStatus), таким образом:

for (clientIP, statusCode),  count in cnt_clientStatusCode.most_common(int(userInputIpPlusStatus)):
    print('[*] %30s: %d: %5s:' % (clientIP, count, statusCode))
print('[*] ============================================')

Вы, конечно, можете задать дополнительный вопрос, спрашивая, какой именно c код статуса интересует пользователя, и печатать только элементы с этим конкретным кодом статуса. Лог c для этого:

wanted_status_code = input("What status code are you interested in: ")
userInputIpPlusStatus = input("Enter how many of the top clients do you want to see for this status code: ")

n = int(userInputIpPlusStatus)
count = 0
for (clientIP, statusCode),  count in cnt_clientStatusCode.most_common():
    if statusCode == wanted_status_code:
        print('[*] %30s: %d: %5s:' % (clientIP, count, statusCode))
        count += 1
        if count == n:
            break
print('[*] ============================================')

Обновление

Если вы хотите сделать поиск определенного кода состояния более эффективным, то есть словарь счетчиков, ключами которых являются коды состояния, а значениями являются Счетчики клиентских ips:

from collections import defaultdict
status_dict = defaultdict(Counter)

for line in f.split:
    m = re.match(regexp, line)
    if m:
        client_ip = m.group('clientIP')
        statuscode = m.group('statuscode')
        client_statuscode = (client_ip, statuscode)
        cnt_clientIPs.update([client_ip])
        cnt_clientAction.update([m.group('action')])
        cnt_clientStatusCode.update([client_statuscode])
        status_dict[statuscode].update([client_ip])
        matched += 1
    else:
        failed += 1
        continue

Тогда:

wanted_status_code = input("What status code are you interested in: ")
userInputIpPlusStatus = input("Enter how many of the top clients do you want to see for this status code: ")

for clientIP,  count in status_dict.get(wanted_status_code, Counter()).most_common(int(userInputIpPlusStatus)):
    print('[*] %30s: %d: %5s:' % (clientIP, count, wanted_status_code))
print('[*] ============================================')
...