Update parser, Implement logging
This commit is contained in:
parent
0be7701451
commit
4bda3fa5fb
@ -3,9 +3,11 @@ ADD ["ofu_app/requirements.txt", "/requirements.txt"]
|
||||
RUN apk upgrade --update && \
|
||||
apk add --update python3 py3-pillow py3-lxml py3-psycopg2 && \
|
||||
pip3 install -r /requirements.txt && rm /requirements.txt
|
||||
EXPOSE 80
|
||||
WORKDIR /app
|
||||
VOLUME ["/app"]
|
||||
VOLUME ["/app/data"]
|
||||
VOLUME ["/app/media"]
|
||||
VOLUME ["/app/log"]
|
||||
ENTRYPOINT ["python3", "manage.py"]
|
||||
CMD ["runserver", "0.0.0.0:80"]
|
||||
|
||||
@ -9,6 +9,7 @@ services:
|
||||
volumes:
|
||||
- ./data/data:/data
|
||||
- ./data/media:/media
|
||||
- ./log:/log
|
||||
- ./ofu_app/:/app
|
||||
env_file:
|
||||
- docker.env
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from apps.food.models import Menu, HappyHour, SingleFood
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from apps.food.utils import migrate_data
|
||||
|
||||
|
||||
|
||||
@ -4,6 +4,9 @@ from pprint import pprint
|
||||
from django.db.utils import IntegrityError
|
||||
from apps.food.models import SingleFood, Menu, HappyHour, Allergene
|
||||
from apps.food.utils.parser import mensa_page_parser, fekide_happyhour_page_parser, cafete_page_parser
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# CONFIG SERVICE LINKS
|
||||
LINK_FEKI_MENSA = "https://www.studentenwerk-wuerzburg.de/bamberg/essen-trinken/speiseplaene.html?tx_thmensamenu_pi2%5Bmensen%5D=3&tx_thmensamenu_pi2%5Baction%5D=show&tx_thmensamenu_pi2%5Bcontroller%5D=Speiseplan&cHash=c3fe5ebb35e5fba3794f01878e798b7c"
|
||||
@ -15,79 +18,58 @@ LINK_FEKIDE_GUIDE = "https://www.feki.de/happyhour"
|
||||
LOCATION_NAMES = ('erba', 'markusplatz', 'feldkirchenstraße', 'austraße')
|
||||
|
||||
|
||||
def getJsonFromFile(path):
|
||||
with open(path, "r") as file:
|
||||
return json.load(file)
|
||||
|
||||
|
||||
def getLocation(raw_loc):
|
||||
for choice, name in zip(Menu.LOCATION_CHOICES, LOCATION_NAMES):
|
||||
print(name.upper() in str(raw_loc).upper())
|
||||
if (name.upper() in str(raw_loc).upper()):
|
||||
if name.upper() in str(raw_loc).upper():
|
||||
return choice
|
||||
|
||||
print("LOCATION NOT FOUND")
|
||||
logger.warning("{loc} unknown location".format(loc=raw_loc))
|
||||
return None
|
||||
|
||||
|
||||
def writeStudentenwerkDataInDB(data):
|
||||
data = json.loads(data)
|
||||
pprint(data)
|
||||
if not data:
|
||||
logger.warning('no data')
|
||||
return
|
||||
logger.info("{location}".format(location=data['name']))
|
||||
for menu in data['weekmenu']:
|
||||
pprint(menu)
|
||||
logger.info("{date}".format(date=menu['date']))
|
||||
foodlist = []
|
||||
for single_food in menu['menu']:
|
||||
pprint(single_food)
|
||||
if 'allergens' in single_food:
|
||||
logger.info("{}".format(single_food['title']))
|
||||
allergens = []
|
||||
if 'allergens' in single_food:
|
||||
for allergen in single_food['allergens']:
|
||||
allergens.append(Allergene.objects.get_or_create(name=allergen)[0])
|
||||
# TODO: Consider keyword arg for price
|
||||
try:
|
||||
allergens.append(Allergene.objects.create(name=allergen))
|
||||
except IntegrityError:
|
||||
allergens.append(Allergene.objects.get(name=allergen))
|
||||
try:
|
||||
if 'prices' in single_food:
|
||||
if 'price_student' in single_food['prices']:
|
||||
price_student = single_food['prices']['price_student']
|
||||
else:
|
||||
price_student = "None"
|
||||
if 'price_employee' in single_food['prices']:
|
||||
price_employee = single_food['prices']['price_employee']
|
||||
else:
|
||||
price_employee = "None"
|
||||
if 'price_guest' in single_food['prices']:
|
||||
price_guest = single_food['prices']['price_guest']
|
||||
else:
|
||||
price_guest = "None"
|
||||
db_single_food = SingleFood.objects.create(name=single_food['title'],
|
||||
price_student=price_student,
|
||||
price_employee=price_employee,
|
||||
price_guest=price_guest)
|
||||
else:
|
||||
db_single_food = SingleFood.objects.create(name=single_food['title'])
|
||||
if 'allergens' in locals():
|
||||
db_single_food.allergens.set(allergens)
|
||||
foodlist.append(db_single_food)
|
||||
except IntegrityError:
|
||||
db_single_food = SingleFood.objects.get(name=single_food['title'])
|
||||
db_single_food, created = SingleFood.objects.get_or_create(name=single_food['title'])
|
||||
if 'prices' in single_food:
|
||||
if 'price_student' in single_food['prices']:
|
||||
db_single_food.price_student = single_food['prices']['price_student']
|
||||
else:
|
||||
db_single_food.price_student = "None"
|
||||
if 'price_employee' in single_food['prices']:
|
||||
db_single_food.price_employee = single_food['prices']['price_employee']
|
||||
else:
|
||||
db_single_food.price_employee = "None"
|
||||
if 'price_guest' in single_food['prices']:
|
||||
db_single_food.price_guest = single_food['prices']['price_guest']
|
||||
if 'allergens' in locals():
|
||||
else:
|
||||
db_single_food.price_guest = "None"
|
||||
if allergens:
|
||||
db_single_food.allergens.set(allergens)
|
||||
foodlist.append(db_single_food)
|
||||
try:
|
||||
db_single_food.save()
|
||||
except IntegrityError as e:
|
||||
logger.exception(e)
|
||||
|
||||
try:
|
||||
date = datetime.strptime(str(menu['date']), "%d.%m.").replace(year=datetime.today().year)
|
||||
menu = Menu.objects.create(location=getLocation(data['name']), date=date)
|
||||
menu, _ = Menu.objects.get_or_create(location=getLocation(data['name']), date=date)
|
||||
menu.menu.set(foodlist)
|
||||
menu.save()
|
||||
except IntegrityError as error:
|
||||
# ignored
|
||||
pass
|
||||
logger.exception(error)
|
||||
|
||||
|
||||
def writeFekideDataInDB(data):
|
||||
@ -106,14 +88,16 @@ def writeFekideDataInDB(data):
|
||||
happyhour.endtime = datetime.strptime(time[1], "%H:%M").time()
|
||||
happyhour.save()
|
||||
|
||||
print("%s: Happy Hour: Location: %s, Description: %s" % (
|
||||
str(happyhour.date.date()), str(happyhour.location), str(happyhour.description)))
|
||||
logger.info("%s: Happy Hour: Location: %s, Description: %s",
|
||||
str(happyhour.date.date()), str(happyhour.location), str(happyhour.description))
|
||||
|
||||
|
||||
def writeoutDBObjects():
|
||||
pprint("SingleFood: " + str(SingleFood.objects.count()))
|
||||
pprint("Menu: " + str(Menu.objects.count()))
|
||||
pprint("HappyHour: " + str(HappyHour.objects.count()))
|
||||
return "\n\tSingleFood: {single_food}\n\tMenu: {menu}\n\tHappyHour: {happy_hour}".format(
|
||||
single_food=SingleFood.objects.count(),
|
||||
menu=Menu.objects.count(),
|
||||
happy_hour=HappyHour.objects.count()
|
||||
)
|
||||
|
||||
|
||||
def delete():
|
||||
@ -126,17 +110,16 @@ def delete():
|
||||
|
||||
|
||||
def main():
|
||||
print("Aktueller Stand:")
|
||||
writeoutDBObjects()
|
||||
logger.info("Aktueller Stand:" + writeoutDBObjects())
|
||||
|
||||
# get food jsons
|
||||
writeStudentenwerkDataInDB(mensa_page_parser.parsePage(LINK_AUSTR_MENSA))
|
||||
writeStudentenwerkDataInDB(mensa_page_parser.parsePage(LINK_FEKI_MENSA))
|
||||
writeStudentenwerkDataInDB(cafete_page_parser.parsePage(LINK_ERBA_CAFETE))
|
||||
writeStudentenwerkDataInDB(cafete_page_parser.parsePage(LINK_MARKUS_CAFETE))
|
||||
writeFekideDataInDB(fekide_happyhour_page_parser.parsePage(LINK_FEKIDE_GUIDE))
|
||||
writeStudentenwerkDataInDB(cafete_page_parser.parse_page(LINK_ERBA_CAFETE))
|
||||
writeStudentenwerkDataInDB(cafete_page_parser.parse_page(LINK_MARKUS_CAFETE))
|
||||
writeFekideDataInDB(fekide_happyhour_page_parser.parse_page(LINK_FEKIDE_GUIDE))
|
||||
|
||||
print("Neuer Stand:")
|
||||
writeoutDBObjects()
|
||||
logger.info("Neuer Stand:" + writeoutDBObjects())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@ -0,0 +1,8 @@
|
||||
import requests
|
||||
|
||||
|
||||
def load_page(url: str):
|
||||
response = requests.get(url)
|
||||
if not response.ok:
|
||||
raise ConnectionError("Response not ok", response.status_code, url)
|
||||
return response.content
|
||||
@ -1,23 +1,22 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import datetime
|
||||
import logging
|
||||
import re
|
||||
from pprint import pprint
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from . import load_page
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SPEISEPLAN_NAME_SELECTOR = '.csc-default .csc-header .csc-firstHeader'
|
||||
|
||||
|
||||
def loadPage(url: str):
|
||||
return requests.get(url).content
|
||||
|
||||
|
||||
def getFoodplanName(soup):
|
||||
def get_foodplan_name(soup):
|
||||
foodplan_name = soup.select(SPEISEPLAN_NAME_SELECTOR)[0].getText()
|
||||
return foodplan_name
|
||||
|
||||
|
||||
def getRightLine(lines):
|
||||
def get_right_line(lines):
|
||||
foodlines = []
|
||||
pattern = re.compile("[0-9]+.+[A-Z]+")
|
||||
for line in list(lines):
|
||||
@ -27,42 +26,42 @@ def getRightLine(lines):
|
||||
return foodlines
|
||||
|
||||
|
||||
def getFoodPerDay(soup):
|
||||
def get_food_per_day(soup):
|
||||
days = []
|
||||
lines = soup.select('.csc-default .bodytext')
|
||||
foodlines = getRightLine(lines)
|
||||
foodlines = get_right_line(lines)
|
||||
for food in foodlines:
|
||||
dayObj = {}
|
||||
day = str(food).split()[0]
|
||||
foodName = str(food).replace(day, "").strip()
|
||||
singleFoodObj = {}
|
||||
singleFoodObj['title'] = foodName
|
||||
dayObj['date'] = day
|
||||
dayObj['menu'] = [singleFoodObj]
|
||||
days.append(dayObj)
|
||||
food_name = str(food).replace(day, "").strip()
|
||||
single_food_obj = {'title': food_name}
|
||||
day_obj = {
|
||||
'date': day,
|
||||
'menu': [single_food_obj]
|
||||
}
|
||||
days.append(day_obj)
|
||||
return days
|
||||
|
||||
|
||||
def parsePage(url: str):
|
||||
def parse_page(url: str):
|
||||
pagecontent = {}
|
||||
# {mensaspeiseplan:
|
||||
# {name:"",
|
||||
# weekmenu: [day:{date:, menu:[,,,]}]
|
||||
# }
|
||||
# }
|
||||
|
||||
page = loadPage(url)
|
||||
mensaSpeiseplan = {}
|
||||
try:
|
||||
page = load_page(url)
|
||||
soup = BeautifulSoup(page, "lxml")
|
||||
foodplan_name = getFoodplanName(soup)
|
||||
|
||||
days = getFoodPerDay(soup)
|
||||
mensaSpeiseplan['weekmenu'] = days
|
||||
mensaSpeiseplan['name'] = foodplan_name
|
||||
mensaSpeiseplan['execution_time'] = datetime.datetime.today().strftime("%A, %d.%m.%Y")
|
||||
mensaSpeiseplanJson = json.dumps(mensaSpeiseplan)
|
||||
return mensaSpeiseplanJson
|
||||
foodplan_name = get_foodplan_name(soup)
|
||||
|
||||
days = get_food_per_day(soup)
|
||||
return {
|
||||
'weekmenu': days,
|
||||
'name': foodplan_name,
|
||||
'execution_time': datetime.datetime.today().strftime("%A, %d.%m.%Y")
|
||||
}
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
return None
|
||||
|
||||
# LINK_ERBA_CAFETE = "https://www.studentenwerk-wuerzburg.de/bamberg/essen-trinken/sonderspeiseplaene/cafeteria-erba-insel.html"
|
||||
# pprint(parsePage(LINK_ERBA_CAFETE))
|
||||
|
||||
@ -1,51 +1,53 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from . import load_page
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SPEISEPLAN_NAME_SELECTOR = '.csc-default .csc-header .csc-firstHeader'
|
||||
|
||||
|
||||
def loadPage(url: str):
|
||||
return requests.get(url).content
|
||||
|
||||
|
||||
def getDay():
|
||||
def get_day():
|
||||
return datetime.datetime.today().strftime("%A, %d.%m.%Y")
|
||||
|
||||
|
||||
def getHappyHours(soup):
|
||||
def get_happy_hours(soup):
|
||||
happyhours = []
|
||||
happyhourstable = soup.select('#food .table tr')
|
||||
for tableline in happyhourstable:
|
||||
happyhour = {}
|
||||
linesoup = BeautifulSoup(str(tableline), "lxml")
|
||||
location = linesoup.find("td", {"class": "location"}).getText()
|
||||
time = linesoup.find("td", {"class": "time"}).getText()
|
||||
description = linesoup.find("td", {"class": "description"}).getText()
|
||||
description = str(description).strip()
|
||||
|
||||
happyhour['location'] = location
|
||||
happyhour['time'] = time
|
||||
happyhour['description'] = description
|
||||
happyhour = {
|
||||
'location': location,
|
||||
'time': time,
|
||||
'description': description
|
||||
}
|
||||
happyhours.append(happyhour)
|
||||
return happyhours
|
||||
|
||||
|
||||
def parsePage(url: str):
|
||||
pagecontent = {}
|
||||
def parse_page(url: str):
|
||||
# {
|
||||
# happyhours:[{happyhour:{location: "",time: "",description: ""},,,,]
|
||||
# }
|
||||
happyhours = []
|
||||
|
||||
page = loadPage(url)
|
||||
try:
|
||||
page = load_page(url)
|
||||
soup = BeautifulSoup(page, "lxml")
|
||||
happyhours = getHappyHours(soup)
|
||||
pagecontent['happyhours'] = happyhours
|
||||
pagecontent['day'] = getDay()
|
||||
pagecontent['execution_time'] = datetime.datetime.today().strftime("%A, %d.%m.%Y")
|
||||
return pagecontent
|
||||
|
||||
happyhours = get_happy_hours(soup)
|
||||
return {
|
||||
'happyhours': happyhours,
|
||||
'day': get_day(),
|
||||
'execution_time': datetime.datetime.today().strftime("%A, %d.%m.%Y")
|
||||
}
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
return None
|
||||
# LINK_FEKIDE_GUIDE = "https://www.feki.de/happyhour/wochenuebersicht"
|
||||
# parsePage(LINK_FEKIDE_GUIDE)
|
||||
|
||||
@ -1,14 +1,11 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import datetime
|
||||
import logging
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# FEKI_URL = "https://www.studentenwerk-wuerzburg.de/bamberg/essen-trinken/speiseplaene.html?tx_thmensamenu_pi2%5Bmensen%5D=3&tx_thmensamenu_pi2%5Baction%5D=show&tx_thmensamenu_pi2%5Bcontroller%5D=Speiseplan&cHash=c3fe5ebb35e5fba3794f01878e798b7c"
|
||||
from . import load_page
|
||||
|
||||
|
||||
def loadPage(url: str):
|
||||
return requests.get(url).content
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def getMenuDay(soup):
|
||||
@ -18,12 +15,10 @@ def getMenuDay(soup):
|
||||
def getFoodPerDay(soup):
|
||||
week_menus = []
|
||||
for day in soup.select('.currentweek .day'):
|
||||
menu = {}
|
||||
daysoup = BeautifulSoup(str(day), "lxml")
|
||||
day = getMenuDay(daysoup)
|
||||
day_menu = []
|
||||
for singleFood in daysoup.select('.menuwrap .menu'):
|
||||
singleFoodObj = {}
|
||||
singleFoodSoup = BeautifulSoup(str(singleFood), "lxml")
|
||||
title = singleFoodSoup.find('div', {'class': 'title'}).getText()
|
||||
allergens = [e.getText() for e in singleFoodSoup.select('.left .additnr .toggler ul li')]
|
||||
@ -34,13 +29,16 @@ def getFoodPerDay(soup):
|
||||
prices['price_employee'] = singleFoodSoup.select('.price')[0]['data-bed']
|
||||
if singleFoodSoup.select('.price'):
|
||||
prices['price_guest'] = singleFoodSoup.select('.price')[0]['data-guest']
|
||||
singleFoodObj['title'] = title
|
||||
singleFoodObj['allergens'] = allergens
|
||||
singleFoodObj['prices'] = prices
|
||||
day_menu.append(singleFoodObj)
|
||||
|
||||
menu['date'] = str(day).split(" ")[1]
|
||||
menu['menu'] = day_menu
|
||||
single_food_obj = {
|
||||
'title': title,
|
||||
'allergens': allergens,
|
||||
'prices': prices
|
||||
}
|
||||
day_menu.append(single_food_obj)
|
||||
menu = {
|
||||
'date': str(day).split(" ")[1],
|
||||
'menu': day_menu
|
||||
}
|
||||
week_menus.append(menu)
|
||||
return week_menus
|
||||
|
||||
@ -52,16 +50,19 @@ def parsePage(url: str):
|
||||
# weekmenu: [day:{date:, menu:[,,,]}]
|
||||
# }
|
||||
# }
|
||||
mensaSpeiseplan = {}
|
||||
page = loadPage(url)
|
||||
try:
|
||||
page = load_page(url)
|
||||
soup = BeautifulSoup(page, "lxml")
|
||||
foodplan_name = getFoodplanName(soup)
|
||||
days = getFoodPerDay(soup)
|
||||
mensaSpeiseplan['weekmenu'] = days
|
||||
mensaSpeiseplan['name'] = foodplan_name
|
||||
mensaSpeiseplan['execution_time'] = datetime.datetime.today().strftime("%A, %d.%m.%Y")
|
||||
mensaSpeiseplanJson = json.dumps(mensaSpeiseplan)
|
||||
return mensaSpeiseplanJson
|
||||
return {
|
||||
'weekmenu': days,
|
||||
'name': foodplan_name,
|
||||
'execution_time': datetime.datetime.today().strftime("%A, %d.%m.%Y")
|
||||
}
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
return None
|
||||
|
||||
|
||||
def getFoodplanName(soup):
|
||||
|
||||
@ -50,7 +50,6 @@ REST_FRAMEWORK = {
|
||||
'DEFAULT_PERMISSION_CLASSES': [
|
||||
'rest_framework.permissions.IsAuthenticated',
|
||||
],
|
||||
'PAGE_SIZE': 10
|
||||
}
|
||||
MIDDLEWARE = [
|
||||
'django.middleware.security.SecurityMiddleware',
|
||||
@ -190,3 +189,31 @@ EMAIL_USE_TLS = True
|
||||
|
||||
ACCOUNT_EMAIL_UNIQUE = True
|
||||
ACCOUNT_EMAIL_CONFIRMATION_REQUIRED = True
|
||||
|
||||
LOGGING = {
|
||||
'version': 1,
|
||||
'disable_existing_loggers': False,
|
||||
'formatters': {
|
||||
'default': {
|
||||
'format': '%(asctime)s %(module)s [%(levelname)s]: %(message)s',
|
||||
'datefmt': '%Y-%m-%d %H:%M:%S',
|
||||
}
|
||||
},
|
||||
'handlers': {
|
||||
'console': {
|
||||
'class': 'logging.StreamHandler',
|
||||
'formatter': 'default',
|
||||
},
|
||||
'file': {
|
||||
'class': 'logging.FileHandler',
|
||||
'filename': '/log/import_food.log',
|
||||
'formatter': 'default',
|
||||
}
|
||||
},
|
||||
'loggers': {
|
||||
'apps.food.utils': {
|
||||
'handlers': ['console', 'file'],
|
||||
'level': os.getenv('DJANGO_LOG_LEVEL', 'DEBUG'),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user