Когда я запускаю эти приведенные ниже коды, я получаю сообщение об ошибке, в котором говорится, что существует ошибка атрибута: объект float не имеет атрибута split в python.
Я хотел бы знать, почему появляются эти ошибки, пожалуйста, помогите мне взглянуть на коды ниже, спасибо: ((
pd.options.display.max_colwidth = 10000
df = pd.read_csv(output, sep='|')
def text_processing(df):
"""""=== Lower case ==="""
'''First step is to transform comments into lower case'''
df['content'] = df['content'].apply(lambda x: " ".join(x.lower() for x in x.split() if x not in stop_words))
'''=== Removal of stop words ==='''
df['content'] = df['content'].apply(lambda x: " ".join(x for x in x.split() if x not in stop_words))
'''=== Removal of Punctuation ==='''
df['content'] = df['content'].str.replace('[^\w\s]', '')
'''=== Removal of Numeric ==='''
df['content'] = df['content'].str.replace('[0-9]', '')
'''=== Removal of common words ==='''
freq = pd.Series(' '.join(df['content']).split()).value_counts()[:5]
freq = list(freq.index)
df['content'] = df['content'].apply(lambda x: " ".join(x for x in x.split() if x not in freq))
'''=== Removal of rare words ==='''
freq = pd.Series(' '.join(df['content']).split()).value_counts()[-5:]
freq = list(freq.index)
df['content'] = df['content'].apply(lambda x: " ".join(x for x in x.split() if x not in freq))
return df
df = text_processing(df)
print(df)
Вывод ошибок:
Traceback (most recent call last):
File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.2\helpers\pydev\pydevd.py", line 1664, in <module>
main()
File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.2\helpers\pydev\pydevd.py", line 1658, in main
globals = debugger.run(setup['file'], None, None, is_module)
File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.2\helpers\pydev\pydevd.py", line 1068, in run
pydev_imports.execfile(file, globals, locals) # execute the script
File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.2.2\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/L31307/Documents/FYP P3_Lynn_161015H/FYP 10.10.18 (Wed) still working on it/FYP/dataanalysis/category_analysis.py", line 53, in <module>
df = text_processing(df)
File "C:/Users/L31307/Documents/FYP P3_Lynn_161015H/FYP 10.10.18 (Wed) still working on it/FYP/dataanalysis/category_analysis.py", line 30, in text_processing
df['content'] = df['content'].apply(lambda x: " ".join(x.lower() for x in x.split() if x not in stop_words))
File "C:\Users\L31307\AppData\Roaming\Python\Python37\site-packages\pandas\core\series.py", line 3194, in apply
mapped = lib.map_infer(values, f, convert=convert_dtype)
File "pandas/_libs/src\inference.pyx", line 1472, in pandas._libs.lib.map_infer
File "C:/Users/L31307/Documents/FYP P3_Lynn_161015H/FYP 10.10.18 (Wed) still working on it/FYP/dataanalysis/category_analysis.py", line 30, in <lambda>
df['content'] = df['content'].apply(lambda x: " ".join(x.lower() for x in x.split() if x not in stop_words))
AttributeError: 'float' object has no attribute 'split'