68 lines
1.7 KiB
Python
68 lines
1.7 KiB
Python
import requests
|
|
from bs4 import BeautifulSoup
|
|
import json
|
|
import datetime
|
|
import re
|
|
|
|
SPEISEPLAN_NAME_SELECTOR = '.csc-default .csc-header .csc-firstHeader'
|
|
|
|
|
|
def loadPage(url: str):
|
|
return requests.get(url).content
|
|
|
|
|
|
def getFoodplanName(soup):
|
|
foodplan_name = soup.select(SPEISEPLAN_NAME_SELECTOR)[0].getText()
|
|
return foodplan_name
|
|
|
|
|
|
def getRightLine(lines):
|
|
foodlines = []
|
|
pattern = re.compile("[0-9]+.+[A-Z]+")
|
|
for line in list(lines):
|
|
line = line.getText()
|
|
if pattern.match(line):
|
|
foodlines.append(line)
|
|
return foodlines
|
|
|
|
|
|
def getFoodPerDay(soup):
|
|
days = []
|
|
lines = soup.select('.csc-default .bodytext')
|
|
foodlines = getRightLine(lines)
|
|
for food in foodlines:
|
|
dayObj = {}
|
|
day = str(food).split()[0]
|
|
foodName = str(food).replace(day, "").strip()
|
|
singleFoodObj = {}
|
|
singleFoodObj['title'] = foodName
|
|
dayObj['date'] = day
|
|
dayObj['menu'] = [singleFoodObj]
|
|
days.append(dayObj)
|
|
return days
|
|
|
|
|
|
def parsePage(url: str):
|
|
pagecontent = {}
|
|
# {mensaspeiseplan:
|
|
# {name:"",
|
|
# weekmenu: [day:{date:, menu:[,,,]}]
|
|
# }
|
|
# }
|
|
|
|
page = loadPage(url)
|
|
mensaSpeiseplan = {}
|
|
soup = BeautifulSoup(page, "lxml")
|
|
foodplan_name = getFoodplanName(soup)
|
|
|
|
days = getFoodPerDay(soup)
|
|
mensaSpeiseplan['weekmenu'] = days
|
|
mensaSpeiseplan['name'] = foodplan_name
|
|
mensaSpeiseplan['execution_time'] = datetime.datetime.today().strftime("%A, %d.%m.%Y")
|
|
mensaSpeiseplanJson = json.dumps(mensaSpeiseplan)
|
|
return mensaSpeiseplanJson
|
|
|
|
|
|
# LINK_ERBA_CAFETE = "https://www.studentenwerk-wuerzburg.de/bamberg/essen-trinken/sonderspeiseplaene/cafeteria-erba-insel.html"
|
|
# parsePage(LINK_ERBA_CAFETE)
|