Страница загружена с JavaScript
, модуль requests
не сможет отрендерить JS
.
Для вашего случая вывод desired
фактически представлен в пределах HTML
script
тег. Поэтому я быстро проанализировал его с помощью regex
.
import requests
import re
r = requests.get(
"https://editproj.sharepoint.com/:x:/g/Ea32XJl_g9VBreFAia_zMmEBY6FW2ZWh8F4VeJ1Rt5Z4YA?rtime=CJOEsjTZ10g")
match = re.search(r'FileGetUrl":"(.*?)"', r.text)
print(match.group(1))
Вывод:
https://editproj.sharepoint.com/_layouts/15/download.aspx?UniqueId=995cf6ad-837f-41d5-ade1-4089aff33261\u0026Translate=false\u0026tempauth=eyJ0eXAiOiJKV1QiLCJhbGciOiJub25lIn0.eyJhdWQiOiIwMDAwMDAwMy0wMDAwLTBmZjEtY2UwMC0wMDAwMDAwMDAwMDAvZWRpdHByb2ouc2hhcmVwb2ludC5jb21AZDJjZTI4MGQtYWExMi00ODQxLWFjYjMtOWYxZDNlMDYzYjhkIiwiaXNzIjoiMDAwMDAwMDMtMDAwMC0wZmYxLWNlMDAtMDAwMDAwMDAwMDAwIiwibmJmIjoiMTU4NjA3MzQwNCIsImV4cCI6IjE1ODYxMDk0MDQiLCJlbmRwb2ludHVybCI6Imtsc0lNb1NtVDQyejBXY085ZGQ2bHovUUJ3ZUVuZzZRd0MxcmdkTGxsVEU9IiwiZW5kcG9pbnR1cmxMZW5ndGgiOiIxMTkiLCJpc2xvb3BiYWNrIjoiVHJ1ZSIsImNpZCI6Ik5EVmlPRFExT1dZdE9UQmhNaTFoTURBd0xXTmlOemN0TTJSaE4yVTBZMlF6WWpWaiIsInZlciI6Imhhc2hlZHByb29mdG9rZW4iLCJzaXRlaWQiOiJNVFEwWmpsbU0yRXRNakV5TnkwME16RmhMV0ppTmpBdE1EY3dNbUV5TXpnNVpqQTMiLCJuYW1laWQiOiIwIy5mfG1lbWJlcnNoaXB8dXJuJTNhc3BvJTNhYW5vbiNkNWI2NDYyODQwYjk1MTVlNzcwYWE4MTViNDljNjNiZjk2OWY4MmQwNTdmMDhhZTljYjMwNjQwNTQ5YmMzYmQ2IiwibmlpIjoibWljcm9zb2Z0LnNoYXJlcG9pbnQiLCJpc3VzZXIiOiJ0cnVlIiwiY2FjaGVrZXkiOiIwaC5mfG1lbWJlcnNoaXB8dXJuJTNhc3BvJTNhYW5vbiNkNWI2NDYyODQwYjk1MTVlNzcwYWE4MTViNDljNjNiZjk2OWY4MmQwNTdmMDhhZTljYjMwNjQwNTQ5YmMzYmQ2Iiwic2hhcmluZ2lkIjoiNGtZU0VmaDFaMGlObWM3NnV1bkl6dyIsInR0IjoiMCIsInVzZVBlcnNpc3RlbnRDb29raWUiOiIyIn0.R3NjZWhxKzZobmI2bVhSK1JvZzNqUFl0QUw4SDhiTHlETkdQUWQ5MFZjVT0
Для полной нагрузки:
import requests
import re
import json
r = requests.get(
"https://editproj.sharepoint.com/:x:/g/Ea32XJl_g9VBreFAia_zMmEBY6FW2ZWh8F4VeJ1Rt5Z4YA?rtime=CJOEsjTZ10g")
match = re.search(r"var _wopiContextJson =({.+})", r.text).group(1)
data = json.loads(match)
print(data.keys())
# print(json.dumps(data, indent=4)) # to see it in human readable format.
Вывод:
dict_keys(['HostName', 'SessionId', 'UserId', 'WebAppUrl', 'FileName', 'FileSize', 'FileGetUrl', 'BundleMajorVersion', 'BundleUrl', 'ReadOnly', 'IrmEnabled', 'LabelIrmed', 'LastModified', 'ServerStartTime', 'ServerCompleteTime', 'DocUniqueId', 'CTag', 'ETag', 'RumOneUpdate', 'OpenWacInPlace', 'TemplateInfo', 'BundleStaleness', 'IsAsyncBundleStale', 'IsActivatedAsyncPreviewKillSwich', 'ViewOnly', 'DelayLoadResources', 'DocAspxSingleFlush', 'Origin', 'Slrid', 'InteractiveReadonlyExperiment', 'ClickTime', 'UniqueClick', 'HostGeo', 'PredictedOfficeAppEndPoint', 'PreseededSessionKey', 'PreseededWacSessionId', 'ParentFolderFullUrl', 'DocAgeBucketAtViewTimeBasedOnLastModifiedTime', 'DocAgeBucketAtViewTimeBasedOnLastWrittenTime', 'DocCategoryBasedOnLastModifiedTime', 'DocCategoryBasedOnLastWrittenTime', 'SSRGenerationReason', 'RecordAgeBucketsAndCategoryForRumOneUsingWopicontext', 'ListItemId', 'ListId', 'AllowedOrigins', 'IsPragueDocument', 'PragueSocketStorageDiscovery', 'ResetUriToAddressBarLink', 'IsEduUser'])
{
"HostName": "SharePoint Online",
"SessionId": "ABB8459F-300E-A000-C5E7-EB06D6377846",
"UserId": "",
"WebAppUrl": "https://excel.officeapps.live.com/x/_layouts/xlviewerinternal.aspx?unified=1&ui=en%2DUS&rs=en%2DUS&WOPISrc=https%3A%2F%2Feditproj%2Esharepoint%2Ecom%2F%5Fvti%5Fbin%2Fwopi%2Eashx%2Ffiles%2F995cf6ad837f41d5ade14089aff33261&wdEnableRoaming=1&mscc=0&hid=abb8459f-300e-a000-c5e7-eb06d6377846",
"FileName": "coronavirus-school-closures-data.xlsx",
"FileSize": 22439,
"FileGetUrl": "https://editproj.sharepoint.com/_layouts/15/download.aspx?UniqueId=995cf6ad-837f-41d5-ade1-4089aff33261&Translate=false&tempauth=eyJ0eXAiOiJKV1QiLCJhbGciOiJub25lIn0.eyJhdWQiOiIwMDAwMDAwMy0wMDAwLTBmZjEtY2UwMC0wMDAwMDAwMDAwMDAvZWRpdHByb2ouc2hhcmVwb2ludC5jb21AZDJjZTI4MGQtYWExMi00ODQxLWFjYjMtOWYxZDNlMDYzYjhkIiwiaXNzIjoiMDAwMDAwMDMtMDAwMC0wZmYxLWNlMDAtMDAwMDAwMDAwMDAwIiwibmJmIjoiMTU4NjA3MzgxOSIsImV4cCI6IjE1ODYxMDk4MTkiLCJlbmRwb2ludHVybCI6Imtsc0lNb1NtVDQyejBXY085ZGQ2bHovUUJ3ZUVuZzZRd0MxcmdkTGxsVEU9IiwiZW5kcG9pbnR1cmxMZW5ndGgiOiIxMTkiLCJpc2xvb3BiYWNrIjoiVHJ1ZSIsImNpZCI6IllXSmlPRFExT1dZdE16QXdaUzFoTURBd0xXTTFaVGN0WldJd05tUTJNemMzT0RRMiIsInZlciI6Imhhc2hlZHByb29mdG9rZW4iLCJzaXRlaWQiOiJNVFEwWmpsbU0yRXRNakV5TnkwME16RmhMV0ppTmpBdE1EY3dNbUV5TXpnNVpqQTMiLCJuYW1laWQiOiIwIy5mfG1lbWJlcnNoaXB8dXJuJTNhc3BvJTNhYW5vbiNkNWI2NDYyODQwYjk1MTVlNzcwYWE4MTViNDljNjNiZjk2OWY4MmQwNTdmMDhhZTljYjMwNjQwNTQ5YmMzYmQ2IiwibmlpIjoibWljcm9zb2Z0LnNoYXJlcG9pbnQiLCJpc3VzZXIiOiJ0cnVlIiwiY2FjaGVrZXkiOiIwaC5mfG1lbWJlcnNoaXB8dXJuJTNhc3BvJTNhYW5vbiNkNWI2NDYyODQwYjk1MTVlNzcwYWE4MTViNDljNjNiZjk2OWY4MmQwNTdmMDhhZTljYjMwNjQwNTQ5YmMzYmQ2Iiwic2hhcmluZ2lkIjoiS0dISHljQ05Ca0s2Mlk4Z24zLzNSQSIsInR0IjoiMCIsInVzZVBlcnNpc3RlbnRDb29raWUiOiIyIn0.R2k1TU9kWFpKajJ5WHdiOFhtTWNFNmV2cERaTnNMQXlMM1plaDZIc1drOD0",
"BundleMajorVersion": 1,
"BundleUrl": "https://editproj.sharepoint.com/_api/v2.0/drives/b!Op9PFCchGkO7YAcCojifB1FMdGKT1DNGuc0VZ45Ny28Z17W52UlFQ5ThVO31JqPP/items/01RTHF455N6ZOJS74D2VA23YKARGX7GMTB/versions/Published/streams/content_preview_O{0}/streamContent?tempauth=eyJ0eXAiOiJKV1QiLCJhbGciOiJub25lIn0.eyJhdWQiOiIwMDAwMDAwMy0wMDAwLTBmZjEtY2UwMC0wMDAwMDAwMDAwMDAvZWRpdHByb2ouc2hhcmVwb2ludC5jb21AZDJjZTI4MGQtYWExMi00ODQxLWFjYjMtOWYxZDNlMDYzYjhkIiwiaXNzIjoiMDAwMDAwMDMtMDAwMC0wZmYxLWNlMDAtMDAwMDAwMDAwMDAwIiwibmJmIjoiMTU4NjA3MzgxOSIsImV4cCI6IjE1ODYwOTU0MTkiLCJlbmRwb2ludHVybCI6IjdxVmtxdVZ6bVlaN1MwQXQyUWR4dytLWktRclpHazlnVkxhVW43TGkyaUE9IiwiZW5kcG9pbnR1cmxMZW5ndGgiOiIxNTYiLCJpc2xvb3BiYWNrIjoiVHJ1ZSIsImNpZCI6IllXSmlPRFExT1dZdE16QXdaUzFoTURBd0xXTTFaVGN0WldJd05tUTJNemMzT0RRMiIsInZlciI6Imhhc2hlZHByb29mdG9rZW4iLCJzaXRlaWQiOiJNVFEwWmpsbU0yRXRNakV5TnkwME16RmhMV0ppTmpBdE1EY3dNbUV5TXpnNVpqQTMiLCJuYW1laWQiOiIwIy5mfG1lbWJlcnNoaXB8dXJuJTNhc3BvJTNhYW5vbiNkNWI2NDYyODQwYjk1MTVlNzcwYWE4MTViNDljNjNiZjk2OWY4MmQwNTdmMDhhZTljYjMwNjQwNTQ5YmMzYmQ2IiwibmlpIjoibWljcm9zb2Z0LnNoYXJlcG9pbnQiLCJpc3VzZXIiOiJ0cnVlIiwiY2FjaGVrZXkiOiIwaC5mfG1lbWJlcnNoaXB8dXJuJTNhc3BvJTNhYW5vbiNkNWI2NDYyODQwYjk1MTVlNzcwYWE4MTViNDljNjNiZjk2OWY4MmQwNTdmMDhhZTljYjMwNjQwNTQ5YmMzYmQ2Iiwic2hhcmluZ2lkIjoiS0dISHljQ05Ca0s2Mlk4Z24zLzNSQSIsInR0IjoiMCIsInVzZVBlcnNpc3RlbnRDb29raWUiOiIyIn0.Z3BlU3hhWmNSRFI0YitvNkErWFkxU01uZVptZVVpZEgwUHBwNDBuVWhzMD0&usecachedssr=1&prefetchSSRCorrelationId=abb8459f-300e-a000-c5e7-eb06d6377846",
"ReadOnly": true,
"IrmEnabled": false,
"LabelIrmed": false,
"LastModified": 1585952003000,
"ServerStartTime": 1586073819363.505,
"ServerCompleteTime": 0,
"DocUniqueId": "editproj.sharepoint.com_144f9f3a-2127-431a-bb60-0702a2389f07_995cf6ad-837f-41d5-ade1-4089aff33261",
"CTag": "{995CF6AD-837F-41D5-ADE1-4089AFF33261},53,73",
"ETag": "\"{995CF6AD-837F-41D5-ADE1-4089AFF33261},53\"",
"RumOneUpdate": true,
"OpenWacInPlace": false,
"TemplateInfo": null,
"BundleStaleness": 0,
"IsAsyncBundleStale": false,
"IsActivatedAsyncPreviewKillSwich": false,
"ViewOnly": false,
"DelayLoadResources": true,
"DocAspxSingleFlush": true,
"Origin": "Sharing",
"Slrid": "aab8459f-d0f2-a000-c5e7-e3229e5f2252",
"InteractiveReadonlyExperiment": "Control",
"ClickTime": 0,
"UniqueClick": "504924ca-7e0e-49d9-a411-0779c638ce68",
"HostGeo": "US",
"PredictedOfficeAppEndPoint": "",
"PreseededSessionKey": null,
"PreseededWacSessionId": null,
"ParentFolderFullUrl": "https://editproj.sharepoint.com/Shared Documents/cartomapcoronavirus",
"DocAgeBucketAtViewTimeBasedOnLastModifiedTime": "1-2 days",
"DocAgeBucketAtViewTimeBasedOnLastWrittenTime": "1-2 days",
"DocCategoryBasedOnLastModifiedTime": "Valid",
"DocCategoryBasedOnLastWrittenTime": "Valid",
"SSRGenerationReason": "AnalysisPipeline",
"RecordAgeBucketsAndCategoryForRumOneUsingWopicontext": true,
"ListItemId": 37,
"ListId": "b9b5d719-49d9-4345-94e1-54edf526a3cf",
"AllowedOrigins": [
"https://excel.officeapps.live.com"
],
"IsPragueDocument": false,
"PragueSocketStorageDiscovery": null,
"ResetUriToAddressBarLink": false,
"IsEduUser": false
}