РЕШЕНИЕ:
Итак, решение, которое я нашел, состояло в том, что мне удалось исправить ошибку
text = td.text.strip()
ws.cell(row=ro, column=co, value=text)
, и это, казалось, работало без ошибок, теперь у меня есть еще немного логикиошибки, которые мне нужно выяснить, спасибо за помощь
КОНЕЦ РЕШЕНИЯ:
Я действительно не понимаю, что я делаю неправильно. Я все еще очень плохо знаком с Python. Любая помощь будет принята с благодарностью.
import requests
import bs4
import openpyxl
import os
from requests_ntlm import HttpNtlmAuth
domain = input('What is the name of your domain: ')
username = input('What is your Username: ')
password = input('What is your password: ')
r = requests.get('Some Website', auth=HttpNtlmAuth(domain + '\\' + username, password))
desktop = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
os.chdir(desktop)
wb = openpyxl.Workbook()
ws = wb.active
ws.title = 'Report'
soup = bs4.BeautifulSoup(res.text, 'html.parser')
ro = 1
for tr in soup.find_all('tr'):
#print(tr)
col = 1
for td in soup.find_all('td'):
#print(td.text.strip())
ws.cell(row=ro, column=col, value = td.text.strip())
col += col
ro += ro
wb.save('WebsiteInfo.xlsx')
wb.close()
Я получаю ошибку
Traceback (most recent call last):
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\utils\cell.py", line 110, in get_column_letter
return _STRING_COL_CACHE[idx]
KeyError: 2854495385411919762116571938898990272765493248
Во время обработки вышеуказанного исключения произошло другое исключение:
Traceback (most recent call last):
File "ae.py", line 35, in <module>
wb.save('balances.xlsx')
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\workbook\workbook.py", line 408, in save
save_workbook(self, filename)
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\writer\excel.py", line 293, in save_workbook
writer.save()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\writer\excel.py", line 275, in save
self.write_data()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\writer\excel.py", line 75, in
write_data
self._write_worksheets()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\writer\excel.py", line 215, in _write_worksheets
self.write_worksheet(ws)
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\writer\excel.py", line 200, in write_worksheet
writer.write()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\worksheet\_writer.py", line 355, in write
self.write_top()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\worksheet\_writer.py", line 99, in write_top
self.write_dimensions()
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\worksheet\_writer.py", line 69, in write_dimensions
dim = SheetDimension(ref())
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\worksheet\worksheet.py", line
389, in calculate_dimension
get_column_letter(max_col), max_row
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\utils\cell.py", line 112, in get_column_letter
raise ValueError("Invalid column index {0}".format(idx))
ValueError: Invalid column index 2854495385411919762116571938898990272765493248
Error in atexit._run_exitfuncs:
Traceback (most recent call last):
File "C:\Users\ThisUser\AppData\Local\Programs\Python\Python37\lib\site-packages\openpyxl\worksheet\_writer.py", line 32, in _openpyxl_shutdown
os.remove(path)
PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Windows\\Temp\\1\\openpyxl.we3wcfk_'
Пример того, как выглядит веб-страницавроде ниже
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<!-- saved from url=(0081)http://Some Internal Site -->
<html class="gr__something"><head><meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
<title>Internal Site</title>
<!-- This loads the code that makes the table sortable. -->
<script src="./Internal Site_files/sortabletable.js.download"></script>
<link type="text/css" rel="StyleSheet" href="./Internal Site_files/sortabletable.css">
<style type="text/css">
body {
font-family: Verdana, Helvetica, Arial, Sans-Serif;
font: Message-Box;
}
code {
font-size: 1em;
}
</style>
<script language="VBScript">
Function frm2_onsubmit()
If Len(frm2.FilterType.value) = 0 or Len(frm2.FilterValue.value) = 0 Then
MsgBox "Please enter a filter type and value!"
frm2_onsubmit = False
End If
End Function
</script></head>
<!-- Form Validation -->
<body data-gr-c-s-loaded="true"><hr>
<table width="100%"><tbody><tr><td align="CENTER"><font size="5">Internal Site</font></td></tr></tbody></table>
<hr>
<table>
<tbody><tr>
<form id="frm"></form>
<td><b>Division Filter: </b>
<select name="Division" id="Division" onchange="frm.action='InternalSite.asp?Division='+frm.Division.value;frm.submit();">
<option></option>
<option>3rd Party</option>
<option>Some Division 1</option>
<option>Some Division 2</option>
<option>Some Division 3</option>
<option>Some Division 4</option>
<option>Some Division 5</option>
<option>Some Division 6</option>
<option>Some Division 7</option>
<option>Some Division 8</option>
<option>Some Division 9</option>
<option selected="">Some Division</option>
</select>
</td>
<form method="POST" id="frm2" name="frm2" action="http://Some/Internal/Site/InternalPage.asp?mode=filter"></form>
<td><b> OR </b></td>
<td><b>FILTER BY: </b></td>
<td>
<select name="FilterType" id="FilterType">
<option></option>
<option value="PartRequestNumber">PR #</option>
<option value="SiteReplenPR">Clarify PR</option>
<option value="CaseNumber">Case #</option>
<option value="SubcaseNumber">Subcase #</option>
<option value="SupplierPartNumber">Supplier Part #</option>
</select>
</td>
<td><input type="TEXT" name="FilterValue" size="20" maxlength="50"></td>
<td><input type="SUBMIT" name="FILTER" value="SUBMIT"></td>
<td><input type="RESET" value="RESET VIEW" onclick="document.location.href='InternalPage.asp'"></td>
</tr>
</tbody></table>
<br>
<table class="sort-table" id="table-1" border="1" cellspacing="0">
<thead>
<tr>
<td width="80">Division<img src="./Internal Site_files/blank.png" class="sort-arrow"></td>
<td width="75">Case #<img src="./Internal Site_files/blank.png" class="sort-arrow"></td>
<td width="75">Netbuild #<img src="./Internal Site_files/blank.png" class="sort-arrow"></td>
<td width="60">PR #<img src="./Internal Site_files/blank.png" class="sort-arrow"></td>
<td width="60">CCL PR<img src="./Internal Site_files/blank.png" class="sort-arrow"></td>
<td width="75">Part #<img src="./Internal Site_files/blank.png" class="sort-arrow"></td>
<td width="100">Address<img src="./Internal Site_files/blank.png" class="sort-arrow"></td>
<td width="100">City<img src="./Internal Site_files/blank.png" class="sort-arrow"></td>
<td width="45">State<img src="./Internal Site_files/blank.png" class="sort-arrow"></td>
<td width="80">Tech Recvd<img src="./Internal Site_files/blank.png" class="sort-arrow"></td>
<td width="50">Aging<img src="./Internal Site_files/blank.png" class="sort-arrow"></td>
<td width="80">Case Closed<img src="./Internal Site_files/blank.png" class="sort-arrow"></td>
<td width="80">-1 Closed<img src="./Internal Site_files/blank.png" class="sort-arrow"></td>
<td width="100">Label Initiated<img src="./Internal Site_files/blank.png" class="sort-arrow"></td>
<td width="80">-2 Shipped<img src="./Internal Site_files/blank.png" class="sort-arrow"></td>
<td width="100">Exception<img src="./Internal Site_files/blank.png" class="sort-arrow"></td>
</tr>
</thead>
<tbody>
<tr>
<td>Some Division</td>
<td>Some 8 digit Number</td>
<td> </td>
<td>Some 10 digit Number</td>
<td> </td>
<td>Some Part Number</td>
<td>1152 Table Street</td>
<td>Some City</td>
<td>FL</td>
<td>10/2/2019</td>
<td>9</td>
<td> </td>
<td>10/7/2019</td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Some Division</td>
<td>Some 8 digit Number</td>
<td> </td>
<td>Some 10 digit Number</td>
<td> </td>
<td>Some Part Number</td>
<td>123 Bable Drive</td>
<td>Some City</td>
<td>CA</td>
<td>10/2/2019</td>
<td>9</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>Some Division</td>
<td>Some 8 digit Number</td>
<td> </td>
<td>Some 10 digit Number</td>
<td> </td>
<td>Some Part Number</td>
<td>34568 US Hwy 1</td>
<td>Some City</td>
<td>AL</td>
<td>10/8/2019</td>
<td>3</td>
<td> </td>
<td>10/8/2019</td>
<td> </td>
<td>10/9/2019</td>
<td> </td>
</tr>
<tr>
<td>Some Division</td>
<td>Some 8 digit Number</td>
<td> </td>
<td>Some 10 digit Number</td>
<td> </td>
<td>Some Part Number</td>
<td>8854 Something Lane</td>
<td>Some City</td>
<td>TN</td>
<td>10/8/2019</td>
<td>3</td>
<td> </td>
<td>10/8/2019</td>
<td> </td>
<td>10/10/2019</td>
<td>Some Exception</td>
</tr>
<tr>
<td>Some Division</td>
<td>Some 8 digit Number</td>
<td> </td>
<td>Some 10 digit Number</td>
<td> </td>
<td>Some Part Number</td>
<td>8854 Something Lane</td>
<td>Some City</td>
<td>TN</td>
<td>10/8/2019</td>
<td>3</td>
<td> </td>
<td>10/8/2019</td>
<td> </td>
<td> </td>
<td>Some Exception</td>
</tr>
</tbody>
</table>
<br>
<p><b>Total = 5</b></p>
<script type="text/javascript">
var st1 = new SortableTable(document.getElementById("table-1"), ["String","Number","String","Number","Number","String","String","String","String","Date","Number","Date","Date","Date","Date","String"]);
</script>
</body></html>