Все элементы можно найти, используя tag
или class
print(soup.find('div', class_='qtext').text.strip())
# HOW DO I PRINT THIS QUESTION?
for item in soup.find_all('label'):
print(item.text.strip())
# a. I WANT TO PRINT THIS
# b. I WANT TO PRINT THIS TOO
# c. I WANT TO PRINT THIS ALSO
# d. I WANT TO PRINT THIS AS WELL
print(soup.find('div', class_='rightanswer').text.strip())
# THE CORRECT ANSWER IS: I WANT TO PRINT THIS
Вместо .text.strip()
вы также можете использовать .get_text(strip=True)
Полный код:
data = '''
<div class="que multichoice deferredfeedback correct" id="q7">
<div class="info">
<h3 class="no">
Question
<span class="qno">
7
</span>
</h3>
<div class="state">
Correct
</div>
<div class="grade">
Mark 1.00 out of 1.00
</div>
</div>
<div class="content">
<div class="formulation">
<h4 class="accesshide">
Question text
</h4>
<input name="q7391425:7_:sequencecheck" type="hidden" value="3"/>
<div class="qtext">
HOW DO I PRINT THIS QUESTION?
</div>
<div class="ablock">
<div class="prompt">
Select one:
</div>
<div class="answer">
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer0" name="q7391425:7_answer" type="radio" value="0"/>
<label for="q7391425:7_answer0">
a. I WANT TO PRINT THIS
</label>
</div>
<div class="r1 correct">
<input checked="checked" disabled="disabled" id="q7391425:7_answer1" name="q7391425:7_answer" type="radio" value="1"/>
<label for="q7391425:7_answer1">
b. I WANT TO PRINT THIS TOO
</label>
</div>
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer2" name="q7391425:7_answer" type="radio" value="2"/>
<label for="q7391425:7_answer2">
c. I WANT TO PRINT THIS ALSO
</label>
</div>
<div class="r1">
<input disabled="disabled" id="q7391425:7_answer3" name="q7391425:7_answer" type="radio" value="3"/>
<label for="q7391425:7_answer3">
d. I WANT TO PRINT THIS AS WELL
</label>
</div>
</div>
</div>
</div>
<div class="outcome">
<h4 class="accesshide">
Feedback
</h4>
<div class="feedback">
<div class="rightanswer">
THE CORRECT ANSWER IS: I WANT TO PRINT THIS
</div>
</div>
</div>
</div>
</div>
'''
from bs4 import BeautifulSoup as BS
soup = BS(data, 'html.parser')
print(soup.find('div', class_='qtext').text.strip())
for item in soup.find_all('label'):
print(item.text.strip())
print(soup.find('div', class_='rightanswer').text.strip())
РЕДАКТИРОВАТЬ: Если у вас есть больше вопросов в HTML, вы можете найти тег, который содержит один вопрос с его выбором и правильным ответом - т.е. <div class="que multichoice deferredfeedback correct" id="q7">
- а затем найдите все эти теги и затем выполните поиск внутри этих тегов.
for questions in soup.find_all('div', class_='multichoice'):
print(questions.find('div', class_='qtext').text.strip())
for item in questions.find_all('label'):
print(item.text.strip())
print(questions.find('div', class_='rightanswer').text.strip())
Полный код - я дублировал один и тот же HTML-код для имитации двух вопросов:
data = '''
<div class="que multichoice deferredfeedback correct" id="q7">
<div class="info">
<h3 class="no">
Question
<span class="qno">
7
</span>
</h3>
<div class="state">
Correct
</div>
<div class="grade">
Mark 1.00 out of 1.00
</div>
</div>
<div class="content">
<div class="formulation">
<h4 class="accesshide">
Question text
</h4>
<input name="q7391425:7_:sequencecheck" type="hidden" value="3"/>
<div class="qtext">
HOW DO I PRINT THIS QUESTION?
</div>
<div class="ablock">
<div class="prompt">
Select one:
</div>
<div class="answer">
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer0" name="q7391425:7_answer" type="radio" value="0"/>
<label for="q7391425:7_answer0">
a. I WANT TO PRINT THIS
</label>
</div>
<div class="r1 correct">
<input checked="checked" disabled="disabled" id="q7391425:7_answer1" name="q7391425:7_answer" type="radio" value="1"/>
<label for="q7391425:7_answer1">
b. I WANT TO PRINT THIS TOO
</label>
</div>
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer2" name="q7391425:7_answer" type="radio" value="2"/>
<label for="q7391425:7_answer2">
c. I WANT TO PRINT THIS ALSO
</label>
</div>
<div class="r1">
<input disabled="disabled" id="q7391425:7_answer3" name="q7391425:7_answer" type="radio" value="3"/>
<label for="q7391425:7_answer3">
d. I WANT TO PRINT THIS AS WELL
</label>
</div>
</div>
</div>
</div>
<div class="outcome">
<h4 class="accesshide">
Feedback
</h4>
<div class="feedback">
<div class="rightanswer">
THE CORRECT ANSWER IS: I WANT TO PRINT THIS
</div>
</div>
</div>
</div>
</div>
<div class="que multichoice deferredfeedback correct" id="q7">
<div class="info">
<h3 class="no">
Question
<span class="qno">
7
</span>
</h3>
<div class="state">
Correct
</div>
<div class="grade">
Mark 1.00 out of 1.00
</div>
</div>
<div class="content">
<div class="formulation">
<h4 class="accesshide">
Question text
</h4>
<input name="q7391425:7_:sequencecheck" type="hidden" value="3"/>
<div class="qtext">
HOW DO I PRINT THIS QUESTION?
</div>
<div class="ablock">
<div class="prompt">
Select one:
</div>
<div class="answer">
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer0" name="q7391425:7_answer" type="radio" value="0"/>
<label for="q7391425:7_answer0">
a. I WANT TO PRINT THIS
</label>
</div>
<div class="r1 correct">
<input checked="checked" disabled="disabled" id="q7391425:7_answer1" name="q7391425:7_answer" type="radio" value="1"/>
<label for="q7391425:7_answer1">
b. I WANT TO PRINT THIS TOO
</label>
</div>
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer2" name="q7391425:7_answer" type="radio" value="2"/>
<label for="q7391425:7_answer2">
c. I WANT TO PRINT THIS ALSO
</label>
</div>
<div class="r1">
<input disabled="disabled" id="q7391425:7_answer3" name="q7391425:7_answer" type="radio" value="3"/>
<label for="q7391425:7_answer3">
d. I WANT TO PRINT THIS AS WELL
</label>
</div>
</div>
</div>
</div>
<div class="outcome">
<h4 class="accesshide">
Feedback
</h4>
<div class="feedback">
<div class="rightanswer">
THE CORRECT ANSWER IS: I WANT TO PRINT THIS
</div>
</div>
</div>
</div>
</div>
'''
from bs4 import BeautifulSoup as BS
soup = BS(data, 'html.parser')
for questions in soup.find_all('div', class_='multichoice'):
print(questions.find('div', class_='qtext').text.strip())
for item in questions.find_all('label'):
print(item.text.strip())
print(questions.find('div', class_='rightanswer').text.strip())
print('---')
Или вы можете использовать for
-loop для группировки элементов
from bs4 import BeautifulSoup as BS
soup = BS(data, 'html.parser')
all_questions = soup.find_all('div', class_='qtext')
all_choices = soup.find_all('label')
all_answers = soup.find_all('div', class_='rightanswer')
for x in range(len(all_questions)):
print(all_questions[x].text.strip())
y = x*4
for item in all_choices[y:y+4]:
print(item.text.strip())
print(all_answers[x].text.strip())
print('---')
Полный код:
data = '''
<div class="que multichoice deferredfeedback correct" id="q7">
<div class="info">
<h3 class="no">
Question
<span class="qno">
7
</span>
</h3>
<div class="state">
Correct
</div>
<div class="grade">
Mark 1.00 out of 1.00
</div>
</div>
<div class="content">
<div class="formulation">
<h4 class="accesshide">
Question text
</h4>
<input name="q7391425:7_:sequencecheck" type="hidden" value="3"/>
<div class="qtext">
HOW DO I PRINT THIS QUESTION?
</div>
<div class="ablock">
<div class="prompt">
Select one:
</div>
<div class="answer">
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer0" name="q7391425:7_answer" type="radio" value="0"/>
<label for="q7391425:7_answer0">
a. I WANT TO PRINT THIS
</label>
</div>
<div class="r1 correct">
<input checked="checked" disabled="disabled" id="q7391425:7_answer1" name="q7391425:7_answer" type="radio" value="1"/>
<label for="q7391425:7_answer1">
b. I WANT TO PRINT THIS TOO
</label>
</div>
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer2" name="q7391425:7_answer" type="radio" value="2"/>
<label for="q7391425:7_answer2">
c. I WANT TO PRINT THIS ALSO
</label>
</div>
<div class="r1">
<input disabled="disabled" id="q7391425:7_answer3" name="q7391425:7_answer" type="radio" value="3"/>
<label for="q7391425:7_answer3">
d. I WANT TO PRINT THIS AS WELL
</label>
</div>
</div>
</div>
</div>
<div class="outcome">
<h4 class="accesshide">
Feedback
</h4>
<div class="feedback">
<div class="rightanswer">
THE CORRECT ANSWER IS: I WANT TO PRINT THIS
</div>
</div>
</div>
</div>
</div>
<div class="que multichoice deferredfeedback correct" id="q7">
<div class="info">
<h3 class="no">
Question
<span class="qno">
7
</span>
</h3>
<div class="state">
Correct
</div>
<div class="grade">
Mark 1.00 out of 1.00
</div>
</div>
<div class="content">
<div class="formulation">
<h4 class="accesshide">
Question text
</h4>
<input name="q7391425:7_:sequencecheck" type="hidden" value="3"/>
<div class="qtext">
HOW DO I PRINT THIS QUESTION?
</div>
<div class="ablock">
<div class="prompt">
Select one:
</div>
<div class="answer">
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer0" name="q7391425:7_answer" type="radio" value="0"/>
<label for="q7391425:7_answer0">
a. I WANT TO PRINT THIS
</label>
</div>
<div class="r1 correct">
<input checked="checked" disabled="disabled" id="q7391425:7_answer1" name="q7391425:7_answer" type="radio" value="1"/>
<label for="q7391425:7_answer1">
b. I WANT TO PRINT THIS TOO
</label>
</div>
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer2" name="q7391425:7_answer" type="radio" value="2"/>
<label for="q7391425:7_answer2">
c. I WANT TO PRINT THIS ALSO
</label>
</div>
<div class="r1">
<input disabled="disabled" id="q7391425:7_answer3" name="q7391425:7_answer" type="radio" value="3"/>
<label for="q7391425:7_answer3">
d. I WANT TO PRINT THIS AS WELL
</label>
</div>
</div>
</div>
</div>
<div class="outcome">
<h4 class="accesshide">
Feedback
</h4>
<div class="feedback">
<div class="rightanswer">
THE CORRECT ANSWER IS: I WANT TO PRINT THIS
</div>
</div>
</div>
</div>
</div>
'''
from bs4 import BeautifulSoup as BS
soup = BS(data, 'html.parser')
all_questions = soup.find_all('div', class_='qtext')
all_choices = soup.find_all('label')
all_answers = soup.find_all('div', class_='rightanswer')
for x in range(len(all_questions)):
print(all_questions[x].text.strip())
y = x*4
for item in all_choices[y:y+4]:
print(item.text.strip())
print(all_answers[x].text.strip())
print('---')