This repository has been archived on 2019-10-13. You can view files and clone it, but cannot push or open issues or pull requests.
basta-server/ofu-food/mensa-page-parser.py
2017-09-24 02:00:33 +02:00

48 lines
1.2 KiB
Python

import requests
from bs4 import BeautifulSoup
import json
def loadPage(url: str):
return requests.get(url).content
def getMenuDay(soup):
return soup.find("div", {"class": "day"}).h5.contents[0]
def getFoodPerDay(soup):
days = []
for day in soup.select('.currentweek .day'):
dayObj = {}
daysoup = BeautifulSoup(str(day), "lxml")
day = getMenuDay(daysoup)
dayMenu = [e.getText() for e in daysoup.select('.menuwrap .menu .left .title')]
dayObj['date'] = day
dayObj['menu'] = dayMenu
days.append(dayObj)
return days
def parsePage(url: str):
pagecontent = {}
# {mensaspeiseplan:
# {name:"",
# weekmenu: [day:{date:, menu:[,,,]}]
# }
# }
mensaSpeiseplan = {}
page = loadPage(url)
soup = BeautifulSoup(page, "lxml")
foodplan_name = getFoodplanName(soup)
days = getFoodPerDay(soup)
mensaSpeiseplan['weekmenu'] = days
mensaSpeiseplan['name'] = foodplan_name
mensaSpeiseplanJson = json.dumps(mensaSpeiseplan)
return mensaSpeiseplanJson
def getFoodplanName(soup):
foodplan_name = soup.select('.mensamenu h2')[0].getText()
return foodplan_name