Один вариант
from collections import Counter
# Read keywords
with open("./key_words.txt", "r", encoding='utf8') as keyfile:
# Use set of keywords (@MisterMiyagi comment)
keywords = set(keyfile.read().split('\n'))
# Process words
with open("./Text_file.txt", "r", encoding='utf8') as logfile:
cnts = Counter()
for line in logfile:
if line:
line = line.rstrip()
# only count keywords
cnts.update(word for word in line.split() if word in keywords)
# Use counter most_common to get most popular 50
print(cnts.most_common(50))
Альтернатива с использованием счетчика + регулярное выражение
Регулярное выражение, используемое для отделения слов от знаков препинания, т. Е. Перидов, кавычек, запятых и т. Д. c.
import re
from collections import Counter
with open("./key_words.txt", "r", encoding='utf8') as keyfile:
keywords = keyfile.read().lower().split('\n')
with open("./Text_file.txt", "r", encoding='utf8') as logfile:
cnts = Counter()
for line in logfile:
# use regex to separate words from punctuation
# lowercase words
words = map(lambda x:x.lower(), re.findall('[a-zA-Z]+', line, flags=re.A))
cnts.update(word for word in words if word in keywords)
print(cnts.most_common(50))