Файл "pandas \ _libs \ hashtable_class_helper.pxi", строка 998, в pandas ._ libs.hashtable.Int64HashTable.get_item KeyError: 3327 - PullRequest
1 голос
/ 19 января 2020

В приведенном ниже коде я пытаюсь получить счетчик u_count и h_count, но каждый раз, когда я получаю сообщение об ошибке

import numpy as np
import pandas as pd
import matplotlib
import re
import datetime
pattern = '^(([0-2]?[0-9]/[0-9]?[0-9]/[0-9][0-9]), ([0-9]?[0-9]:[0-9][0-9]\s\w{2}) - (\w+\s\w+|\w+|):( [\w ]+))'
def startsWithDateTime(pattern,s):
    result = re.match(pattern, s)
    if result:
        return True
    return False
def getDataPoint(pattern,s):
    result = re.match(pattern, s)
    date = result[2]
    time = result[3]
    author = result[4]
    message = result[5]
    return date, time, author, message

parsedData = [] # List to keep track of data so it can be used by a Pandas dataframe
conversationPath ="WhatsApp_Chat_with_Umesh.txt" # text file
with open(conversationPath, encoding="utf-8") as fp:
    fp.readline()
    messageBuffer = [] # Buffer to capture intermediate output for multi-line messages
    date, time, author = None, None, None # Intermediate variables to keep track of the current message being processed
    while True:
        line = fp.readline() 
        if not line: # Stop reading further if end of file has been reached
            break
        line = line.strip() # Guarding against erroneous leading and trailing whitespaces
        if startsWithDateTime(pattern,line): # If a line starts with a Date Time pattern, then this indicates the beginning of a new message
            if len(messageBuffer) > 0: # Check if the message buffer contains characters from previous iterations
                parsedData.append([date, time, author, ' '.join(messageBuffer)]) # Save the tokens from the previous message in parsedData
            messageBuffer.clear() # Clear the message buffer so that it can be used for the next message
            date, time, author, message = getDataPoint(pattern,line) # Identify and extract tokens from the line
            messageBuffer.append(message) # Append message to buffer
        else:
            messageBuffer.append(line)


df = pd.DataFrame(parsedData, columns=['Date', 'Time', 'Author', 'Message'])
def count(df):
    df['Letter_count'] = df['Message'].apply(lambda s: len(s))
    df['Word_count'] = df['Message'].apply(lambda s: len(s.split()))

# count(df)
# print(df.head(50))
# print(df['Date'][0])
temp =0
i = 0
h_count= 0
u_count = 0
while True:
    temp = df['Date'][i]
    filter =df[df['Date']==temp]
    data = filter.iloc[0]
        # print(data.loc['Author'])
        # print(type(data))
    if data.loc['Author'] == 'Umesh Yadav':
        u_count +=1
    else:
        h_count +=1
    i =i+1

Журнал ошибок: (whatsup_env)

L: \ whatsup_chat_analyzer \ WhatsApp-Chat-Analyzer> (whatsup_env) L: \ whatsup_chat_analyzer \ WhatsApp-Chat-Analyzer> C: / Users / Harish / Anaconda3 / python .exe l: / whatsup_chat_analyzer / Анализатор WhatsApp-Chat-Analyzer / .py Traceback (последний вызов был последним): файл "l: /whatsup_chat_analyzer/WhatsApp-Chat-Analyzer/analyzer.py", строка 66, в файле temp = df ['Date'] [i] "C: \ Пользователи \ Harish \ Anaconda3 \ lib \ site-packages \ pandas \ core \ series.py ", строка 1068, в getitem result = self.index.get_value (self, key) Файл" C: \ Users \ Harish \ Anaconda3 \ lib \ site-packages \ pandas \ core \ indexes \ base.py ", строка 4730, в get_value возвращает self._engine.get_value (s, k, tz = getattr (series.dtype," tz ", Нет)) Файл" pandas_libs \ index.pyx ", строка 80, в pandas ._ libs.index.IndexEngine.get_value Файл" pandas_libs \ index.pyx ", строка 88, в pandas ._ libs.index. Индекс Файл Engine.get_value "pandas_libs \ index.pyx", строка 131, в pandas ._ libs.index.IndexEngine.get_lo c Файл "pandas_libs \ hashtable_class_helper.pxi", строка 992, в pandas ._ libs.hashtable. Файл .Int64HashTable.get_item "pandas_libs \ hashtable_class_helper.pxi", строка 998, в pandas ._ libs.hashtable.Int64HashTable.get_item KeyError: 3327

...