Попробуйте сделать это проще для себя, поскольку каждый элемент имеет один заголовок в одном месте, и одна компания сначала получает элемент, а затем использует select_one (), чтобы получить остальные.
import requests
from bs4 import BeautifulSoup
import re
URL = "https://www.indeed.co.uk/jobs-in-essex"
# conducting a request of the stated URL above:
page = requests.get(URL)
soup = BeautifulSoup(page.text, 'html.parser') # .text
jobs = [] # List to contain all jobs.
for item in soup.find_all('div', {'data-tu':True}): # iterate over all jobs on the page
job = [] # list to contain one job title, company and location.
title = item.select_one('.jobtitle')
job.append(title.text.strip()) # append title
company = item.select_one('.company')
job.append(company.text.strip()) # append company
location= item.select_one('.location')
job.append(location.text.strip())# append location
jobs.append(job)
print (jobs) # print list of lists
Выходы:
[['Terminal Security Officer', 'Manchester Airports Group', 'Stansted'], ['Project Estimator (Special Structures & Telecoms)', 'Swann Engineering Group Limited', 'Braintree'], ['Digital Copywriter', 'Absolute Digital Media Limited', 'Chelmsford'], ['Border Force Central Region - Support Team EO - Stansted Air...', 'Home Office', 'Stansted CM24'], ['TUI Cabin Crew - STN, Summer 2019', 'TUI Group', 'Stansted'], ['Administrative Assistant', 'NELFT NHS Foundation Trust', 'Essex'], ['Sales Assistant', 'Adidas', 'Essex'], ['Visitor Experience Officer', 'National Trust', 'Alresford'], ['Customer Assistant', 'Marks & Spencer', 'Braintree'], ['Checkout Operator', 'ASDA', 'Tiptree'], ['Customer Assistant - Checkouts', 'Tesco', 'Tiptree'], ['Cinema Host', 'ODEON', 'Colchester CO1'], ['Customer Assistant - checkout operator', 'Tesco', 'Braintree'], ['Farm Worker', 'Essex Farm Services', 'Billericay CM11'], ['Courier Work (Own van required)', 'Atlas Courier Express UK Ltd', 'Essex']]
Или просто выберите все названия, компании и места и заархивируйте их:
import requests
from bs4 import BeautifulSoup
URL = "https://www.indeed.co.uk/jobs-in-essex"
page = requests.get(URL)
soup = BeautifulSoup(page.text, 'html.parser')
title = [tag.text.strip() for tag in soup.select('.jobtitle')]
company = [tag.text.strip() for tag in soup.select('.company')]
location = [tag.text.strip() for tag in soup.select('.location')]
print(list(zip(title, company, location)))
Выходы:
[('Terminal Security Officer', 'Manchester Airports Group', 'Stansted'), ('Project Estimator (Special Structures & Telecoms)', 'Swann Engineering Group Limited', 'Braintree'), ('Digital Copywriter', 'Absolute Digital Media Limited', 'Chelmsford'), ('TUI Cabin Crew - STN, Summer 2019', 'TUI Group', 'Stansted'), ('Border Force Central Region - Support Team EO - Stansted Air...', 'Home Office', 'Stansted CM24'), ('Administrative Assistant', 'NELFT NHS Foundation Trust', 'Essex'), ('Customer Assistant', 'Marks & Spencer', 'Braintree'), ('Sales Assistant', 'Adidas', 'Essex'), ('Visitor Experience Officer', 'National Trust', 'Alresford'), ('Cinema Host', 'ODEON', 'Colchester CO1'), ('Maintenance Surveyor (35 hours per week - Permanent)', 'The Salvation Army', 'Bishops Stortford'), ('Checkout Operator', 'ASDA', 'Tiptree'), ('Customer Assistant - checkout operator', 'Tesco', 'Braintree'), ('Farm Worker', 'Essex Farm Services', 'Billericay CM11'), ('Courier Work (Own van required)', 'Atlas Courier Express UK Ltd', 'Essex')]