import requests
import pandas as pd
from bs4 import BeautifulSoup
import re
year_id = ['2019','2018','2017','2016','2015','2014','2013','2012','2011','2010','2009','2008','2007','2006','2005','2004','2003']
results = pd.DataFrame()
for year in year_id:
url = 'https://lehighsports.com/sports/mens-soccer/schedule/' + year
print (url)
lehigh = requests.get(url).text
soup = BeautifulSoup(lehigh,'lxml')
rows = soup.find_all('div',class_="sidearm-schedule-game-row flex flex-wrap flex-align-center row")
sheet = pd.DataFrame()
for row in rows:
date = row.find('div',class_="sidearm-schedule-game-opponent-date").text.strip()
name = row.find('div',class_="sidearm-schedule-game-opponent-name").text.strip()
opp = row.find('div',class_="sidearm-schedule-game-opponent-text").text.strip()
conf = row.find('div',class_="sidearm-schedule-game-conference-conference").text.strip()
try:
result, score, ot = re.findall(r'([A-Z]),\s+([\d-]+)\s*(.*)', row.select_one('.sidearm-schedule-game-result').get_text(strip=True, separator=' '))[0]
except:
result, score, ot = ('','','')
df = pd.DataFrame([[year,date,name,opp,conf,result, score, ot]], columns=['year','date','opponent','list','conference','result', 'score', 'ot'])
sheet = sheet.append(df,sort=True).reset_index(drop=True)
results = results.append(sheet, sort=True).reset_index(drop=True)
results.to_excel('lehigh.xlsx')