Я не совсем понимаю, чего вы хотите. Давайте сначала приведем пример. Если есть проблема, давайте продолжим общаться.
from simplified_scrapy import SimplifiedDoc,req,utils
# First page
symbol = 'AAPL'
url = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=" + symbol + "&type=&dateb=&owner=exclude&start=0&count=100&output=atom"
html = req.get(url)
doc = SimplifiedDoc(html)
links = doc.entrys.containsOr(['term="10-K"','term="10-Q"','term="20-F"']).link.href
# print (links)
# Second page
for link in links:
html = req.get(link)
doc = SimplifiedDoc(html)
# lstA = doc.select('table@summary=Data Files').selects('a') # Get all links in the table
a = doc.select('table@summary=Data Files').getElementByText('XBRL INSTANCE',tag='td').getNext('a') # Use text to locate the link
# print (utils.absoluteUrl(link,a.href),a.text)
u = utils.absoluteUrl(link,a.href) # Third page
html = req.get(u)
doc = SimplifiedDoc(html)
cash = doc.selects('us-gaap:CashAndCashEquivalentsAtCarryingValue')
if not cash:
cash = doc.selects('ifrs-full:Cash')
if not cash:
cash = doc.selects('us-gaap:CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents')
if not cash:
cash = doc.selects('us-gaap:Cash')
print (cash)
Результат:
[{'tag': 'us-gaap:CashAndCashEquivalentsAtCarryingValue', 'contextRef': 'FI2020Q1', 'decimals': '-6', 'id': 'd57642648e573-wk-Fact-B1DF26F97E6F5D2D9418D8B9DDC1DB79', 'unitRef': 'usd', 'html': '39771000000'},
...