Я пишу базовый программный код для сокетов на python, который принимает любые URL-адреса и возвращает содержимое тела в байтах.Мне нужно использовать только библиотеку socket и ничего больше.Когда я передаю разные URL, я получаю полный ответ тела для некоторых URL и только частичный ответ для некоторых URL.Я не уверен, почему это так.
Это мой код:
import socket
def retrieve_url(url):
url1 = url.split("http://", 1)
empty = url1[1].find("/")
if empty > 0:
url2 = url1[1].split("/", 1)
else:
url2 = url1[1].split("/", 1)
url2.append('')
soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
soc.connect((url2[0], 80))
soc.sendall(b"GET /" + bytes(url2[1], 'utf8') + b" HTTP/1.1\r\nHost: " + bytes(url2[0],'utf8') + b"\r\nConnection: close\r\n\r\n")
spl = soc.recv(8192)
soc.close()
a = spl.split(b"\r\n\r\n", 1)
b = spl.split(None, 2)
if b[1] == b'200':
return a[1]
else:
return None
except:
return None
if __name__ == "__main__":
print(retrieve_url("http://bombus.myspecies.info/node/24"))
Это вывод, который я получаю:
b'007a84\r\n<!DOCTYPE html PUBLIC "-//W3C//DTD HTML+RDFa 1.1//EN">\n<html lang="en" dir="ltr" version="HTML+RDFa 1.1"\n xmlns:content="http://purl.org/rss/1.0/modules/content/"\n xmlns:dc="http://purl.org/dc/terms/"\n xmlns:foaf="http://xmlns.com/foaf/0.1/"\n xmlns:og="http://ogp.me/ns#"\n xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"\n xmlns:sioc="http://rdfs.org/sioc/ns#"\n xmlns:sioct="http://rdfs.org/sioc/types#"\n xmlns:skos="http://www.w3.org/2004/02/skos/core#"\n xmlns:xsd="http://www.w3.org/2001/XMLSchema#">\n<head profile="http://www.w3.org/1999/xhtml/vocab">\n <!--[if IE]><![endif]-->\n<!--[if IE]><meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />\n<![endif]--><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n<meta name="Generator" content="Drupal 7 (http://drupal.org)" />\n<link rel="canonical" href="/node/24" />\n<link rel="shortlink" href="/node/24" />\n<link rel="shortcut icon" href="http://bombus.myspecies.info/sites/all/themes/scratchpads/favicon.ico" type="image/vnd.microsoft.icon" />\n<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, minimum-scale=1, user-scalable=no" />\n <title>Bumblebee links | Genus Bombus</title>\n <link type="text/css" rel="stylesheet" href="//bombus.myspecies.info/sites/bombus.myspecies.info/files/advagg_css/css__-thr5kmN-aeH-BTlyCidKsE4D9T2geiRzcvwxBTJ3sU__VQtLPGzb9rjfNLJ2SaVDJUKhxtssNArRk3nO7wMUGoA__CirpVkWrddCrpKWbZfWXvbwVN8pmqviBo8YZAKaYUQg.css" media="all" />\n\n<!--[if (lt IE 9)]>\n<link type="text/css" rel="stylesheet" href="//bombus.myspecies.info/sites/bombus.myspecies.info/files/advagg_css/css__GuBcvhFB_-fswxhbycYya2JRgqrqDq5y-pWYcuQbqp4__5h6_elrgEAXONSci50a6ewD4zUldIVoOgFFSjk7rVzg__CirpVkWrddCrpKWbZfWXvbwVN8pmqviBo8YZAKaYUQg.css" media="all" />\n<![endif]-->\n<link type="text/css" rel="stylesheet" href="//bombus.myspecies.info/sites/bombus.myspecies.info/files/advagg_css/css__wxRBHPf0PIq6kCJXm57TkZCESl8zp_O0VduSG6wH0S0__ov4XZtBPHqH1F5cpK65jxEp1K5zF3dLEO4ihA2xTbE8__CirpVkWrddCrpKWbZfWXvbwVN8pmqviBo8YZAKaYUQg.css" media="all" />\n\n<!--[if (lt IE 9)&(!IEMobile)]>\n<link type="text/css" rel="stylesheet" href="//bombus.myspecies.info/sites/bombus.myspecies.info/files/advagg_css/css__ISa-1zfLkp-52D_pPloP6gBpDvJwKu9Kitwbfnm33JY__PDj463LZhTo68R-x__a4AOf-EyYxSMW6sZpiNjtiKlQ__CirpVkWrddCrpKWbZfWXvbwVN8pmqviBo8YZAKaYUQg.css" media="all" />\n<![endif]-->\n\n<!--[if gte IE 9]><!-->\n<link type="text/css" rel="stylesheet" href="//bombus.myspecies.info/sites/bombus.myspecies.info/files/advagg_css/css__Sisgbo-UreLp3cHBWfv37bIck8X5olI1W5xaYzDaATc__9XoSDv750KzHbfRkMww8VsZREmLh-SRR3SnhCYOEF2Q__CirpVkWrddCrpKWbZfWXvbwVN8pmqviBo8YZAKaYUQg.css" media="all" />\n<!--<![endif]-->\n <script type="text/javascript" src="//bombus.myspecies.info/sites/bombus.myspecies.info/files/advagg_js/js__jQBI8pfG-VfYV1aN0gSeRXUYps9-4-M-XVb2H2ZbWuw__SyNVdbb0UiBMvI1oo0AzTY4CH83E7BmTR7ZP1Wwz_VE__CirpVkWrddCrpKWbZfWXvbwVN8pmqviBo8YZAKaYUQg.js"></script>\n<script type="text/javascript" src="//bombus.myspecies.info/sites/all/libraries/mediaelement/build/mediaelement-and-player.min.js?v=2.1.6"></script>\n<script type="text/javascript" src="//bombus.myspecies.info/sites/bombus.myspecies.info/files/advagg_js/js__O2-Mfrpb6mHF0S5LAfIan_d38-kqXvv66sN_ZsHG9Qo__caMiUBuMxDF7rNYJXFf8geEVfuxGw22B96ouV1h3-1Q__CirpVkWrddCrpKWbZfWXvbwVN8pmqviBo8YZAKaYUQg.js"></script>\n<script type="text/javascript">\n<!--//--><![CDATA[//><!--\njQuery.extend(Drupal.settings,{"basePath":"\\/","pathPrefix":"","ajaxPageState":{"theme":"scratchpads","theme_token":"u7Gd7GPT7EPAVsIznB6HVhd9aHAwVUfjG4LrulLh3ak","jquery_version":"1.8","css":{"modules\\/system\\/system.base.css":1,"modules\\/system\\/system.menus.css":1,"modules\\/system\\/system.messages.css":1,"modules\\/system\\/system.theme.css":1,"sites\\/all\\/libraries\\/mediaelement\\/build\\/mediaelementplayer.min.css":1,"misc\\/ui\\/jquery.ui.core.css":1,"misc\\/ui\\/jquery.ui.theme.css":1,"modules\\/overlay\\/overlay-parent.css":1,"sites\\/all\\/modules\\/contrib\\/comment_notify\\/comment_notify.css":1,"modules\\/aggregator\\/aggregator.css":1,"modules\\/comment\\/comment.css":1,"sites\\/all\\/modules\\/contrib\\/date\\/date_api\\/date.css":1,"sites\\/all\\/modules\\/custom\\/entityfilter\\/ckeditor\\/entityfilter.css":1,"sites\\/all\\/modules\\/custom\\/field_quick_delete\\/theme\\/field.css":1,"modules\\/node\\/node.css":1,"sites\\/all\\/modules\\/custom\\/remote_issue_tab\\/css\\/remote_issue_tab.css":1,"sites\\/all\\/modules\\/custom\\/scratchpads\\/scratchpads_biography\\/css\\/scratchpads_biography.css":1,"sites\\/all\\/modules\\/custom\\/scratchpads\\/scratchpads_show_taxa_revisions\\/css\\/scratchpads_show_taxa_revisions.css":1,"modules\\/search\\/search.css":1,"sites\\/all\\/modules\\/custom\\/spm\\/css\\/spm.css":1,"sites\\/all\\/modules\\/custom\\/twitter_filter\\/css\\/twitter_filter.css":1,"sites\\/all\\/modules\\/custom\\/twitterscript\\/css\\/twitterscript.css":1,"modules\\/user\\/user.css":1,"sites\\/all\\/modules\\/contrib\\/views\\/css\\/views.css":1,"sites\\/all\\/modules\\/contrib\\/ckeditor\\/ckeditor.css":1,"sites\\/all\\/modules\\/contrib\\/colorbox\\/styles\\/default\\/colorbox_default_style.css":1,"sites\\/all\\/modules\\/contrib\\/ctools\\/css\\/ctools.css":1,"sites\\/all\\/modules\\/contrib\\/ctools\\/css\\/modal.css":1,"sites\\/all\\/modules\\/contrib\\/modal_forms\\/css\\/modal_forms_popup.css":1,"sites\\/all\\/modules\\/contrib\\/biblio\\/biblio.css":1,"modules\\/openid\\/openid.css":1,"public:\\/\\/spamicide\\/feed_me.css":1,"sites\\/all\\/modules\\/custom\\/scratchpads\\/scratchpads_search_block\\/css\\/scratchpads_search_block.css":1,"sites\\/all\\/modules\\/custom\\/creative_commons\\/css\\/creative_commons.css":1,"sites\\/all\\/themes\\/scratchpads\\/css\\/ie8.css":1,"public:\\/\\/css\\/css_tcVOMdlRmJTsBkm7ZJABjZ3Oct1H-tB7QsRkmUkgNco.css":1,"sites\\/all\\/themes\\/scratchpads\\/css\\/tabs.css":1,"sites\\/all\\/themes\\/scratchpads\\/css\\/sites.css":1,"sites\\/all\\/themes\\/omega\\/alpha\\/css\\/alpha-reset.css":1,"sites\\/all\\/themes\\/omega\\/alpha\\/css\\/alpha-mobile.css":1,"sites\\/all\\/themes\\/omega\\/alpha\\/css\\/alpha-alpha.css":1,"sites\\/all\\/themes\\/omega\\/omega\\/css\\/formalize.css":1,"sites\\/all\\/themes\\/omega\\/omega\\/css\\/omega-text.css":1,"sites\\/all\\/themes\\/omega\\/omega\\/css\\/omega-branding.css":1,"sites\\/all\\/themes\\/omega\\/omega\\/css\\/omega-menu.css":1,"sites\\/all\\/themes\\/omega\\/omega\\/css\\/omega-forms.css":1,"sites\\/all\\/themes\\/scratchpads\\/css\\/global.css":1,"ie::normal::sites\\/all\\/themes\\/scratchpads\\/css\\/scratchpads-alpha-default.css":1,"ie::normal::sites\\/all\\/themes\\/scratchpads\\/css\\/scratchpads-alpha-default-normal.css":1,"ie::normal::sites\\/all\\/themes\\/omega\\/alpha\\/css\\/grid\\/alpha_default\\/normal\\/alpha-default-normal-12.css":1,"narrow::sites\\/all\\/themes\\/scratchpads\\/css\\/scratchpads-alpha-default.css":1,"narrow::sites\\/all\\/themes\\/scratchpads\\/css\\/scratchpads-alpha-default-narrow.css":1,"sites\\/all\\/themes\\/omega\\/alpha\\/css\\/grid\\/alpha_default\\/narrow\\/alpha-default-narrow-12.css":1,"normal::sites\\/all\\/themes\\/scratchpads\\/css\\/scratchpads-alpha-default.css":1,"normal::sites\\/all\\/themes\\/scratchpads\\/css\\/scratchpads-alpha-default-normal.css":1,"sites\\/all\\/themes\\/omega\\/alpha\\/css\\/grid\\/alpha_default\\/normal\\/alpha-default-normal-12.css":1,"wide::sites\\/all\\/themes\\/scratchpads\\/css\\/scratchpads-alpha-default.css":1,"wide::sites\\/all\\/themes\\/scratchpads\\/css\\/scratchpads-alpha-default-wide.css":1,"sites\\/all\\/themes\\/omega\\/alpha\\/css\\/grid\\/alpha_default\\/wide\\/alpha-default-wide-12.css":1},"js":{"modules\\/statistics\\/statistics.js":1,"sites\\/all\\/modules\\/contrib\\/jquer'
Я получаю только частичный ответ за это, но мне нужен полный ответ.
Любая помощь приветствуется.Заранее спасибо.