Я новичок в Python, но, добавив к вашему коду следующий код:
except Exception:
pass
traceback.print_exc()
, я вижу, что ячейка не определена
Если вы измените цикл нав то время как он будет работать, например,
for table in doc.tables:
for row in table.rows:
i = 0
while i < len(row.cells):
if Keyword in row.cells[i].text:
print(filename)
continue
Надеюсь, что это поможет
ОБНОВЛЕНИЕ:
import numpy as np
import glob
import os
from os import listdir
from docx import Document
import re
import win32com.client as win32
import traceback
Keyword = 'the'
#pattern = re.compile(Keyword)
documents = r'C:\Users\aac1928\Desktop\Test'
#Searches for Keywords in Converted Text Documents
for root, dirs, files in os.walk(documents, onerror=None):
print("Here 1")
for filename in files:
print(filename)
if filename.endswith(".doc") or filename.endswith("docx"):
file_path = os.path.join(root, filename)
print(file_path)
try:
with open(file_path, "rb") as f:
doc = Document(f)
for table in doc.tables:
for row in table.rows:
i = 0
while i < len(row.cells):
if Keyword in row.cells[i].text:
print(filename)
continue
except Exception:
pass
traceback.print_exc()
ОБНОВЛЕНИЕ 2:
import numpy as np
import glob
import os
from os import listdir
from docx import Document
import re
import win32com.client as win32
import traceback
Keyword = 'the'
#pattern = re.compile(Keyword)
documents = r'C:\Users\aac1928\Desktop\Test'
documentsWithKeyword = []
#Searches for Keywords in Converted Text Documents
for root, dirs, files in os.walk(documents, onerror=None):
print("Here 1")
for filename in files:
print(filename)
if filename.endswith(".doc") or filename.endswith("docx"):
file_path = os.path.join(root, filename)
print(file_path)
try:
with open(file_path, "rb") as f:
doc = Document(f)
for table in doc.tables:
for row in table.rows:
i = 0
while i < len(row.cells):
if Keyword in row.cells[i].text:
documentsWithKeyword.append(filename)
continue
except Exception:
pass
traceback.print_exc()
# remove duplicates
documentsWithKeyword = list(set(documentsWithKeyword))
documentsWithKeyword.sort()
#print documents that have the word
for docwithKeyword in documentsWithKeyword
print(docwithKeyword)