Update parser, Implement logging

This commit is contained in:
michigg 2018-03-22 17:22:42 +01:00
parent 0be7701451
commit 4bda3fa5fb
9 changed files with 177 additions and 154 deletions

View File

@ -3,9 +3,11 @@ ADD ["ofu_app/requirements.txt", "/requirements.txt"]
RUN apk upgrade --update && \
apk add --update python3 py3-pillow py3-lxml py3-psycopg2 && \
pip3 install -r /requirements.txt && rm /requirements.txt
EXPOSE 80
WORKDIR /app
VOLUME ["/app"]
VOLUME ["/app/data"]
VOLUME ["/app/media"]
VOLUME ["/app/log"]
ENTRYPOINT ["python3", "manage.py"]
CMD ["runserver", "0.0.0.0:80"]

View File

@ -9,6 +9,7 @@ services:
volumes:
- ./data/data:/data
- ./data/media:/media
- ./log:/log
- ./ofu_app/:/app
env_file:
- docker.env

View File

@ -1,5 +1,5 @@
from django.core.management.base import BaseCommand, CommandError
from apps.food.models import Menu, HappyHour, SingleFood
from django.core.management.base import BaseCommand
from apps.food.utils import migrate_data

View File

@ -4,6 +4,9 @@ from pprint import pprint
from django.db.utils import IntegrityError
from apps.food.models import SingleFood, Menu, HappyHour, Allergene
from apps.food.utils.parser import mensa_page_parser, fekide_happyhour_page_parser, cafete_page_parser
import logging
logger = logging.getLogger(__name__)
# CONFIG SERVICE LINKS
LINK_FEKI_MENSA = "https://www.studentenwerk-wuerzburg.de/bamberg/essen-trinken/speiseplaene.html?tx_thmensamenu_pi2%5Bmensen%5D=3&tx_thmensamenu_pi2%5Baction%5D=show&tx_thmensamenu_pi2%5Bcontroller%5D=Speiseplan&cHash=c3fe5ebb35e5fba3794f01878e798b7c"
@ -15,79 +18,58 @@ LINK_FEKIDE_GUIDE = "https://www.feki.de/happyhour"
LOCATION_NAMES = ('erba', 'markusplatz', 'feldkirchenstraße', 'austraße')
def getJsonFromFile(path):
with open(path, "r") as file:
return json.load(file)
def getLocation(raw_loc):
for choice, name in zip(Menu.LOCATION_CHOICES, LOCATION_NAMES):
print(name.upper() in str(raw_loc).upper())
if (name.upper() in str(raw_loc).upper()):
if name.upper() in str(raw_loc).upper():
return choice
print("LOCATION NOT FOUND")
logger.warning("{loc} unknown location".format(loc=raw_loc))
return None
def writeStudentenwerkDataInDB(data):
data = json.loads(data)
pprint(data)
if not data:
logger.warning('no data')
return
logger.info("{location}".format(location=data['name']))
for menu in data['weekmenu']:
pprint(menu)
logger.info("{date}".format(date=menu['date']))
foodlist = []
for single_food in menu['menu']:
pprint(single_food)
if 'allergens' in single_food:
logger.info("{}".format(single_food['title']))
allergens = []
if 'allergens' in single_food:
for allergen in single_food['allergens']:
allergens.append(Allergene.objects.get_or_create(name=allergen)[0])
# TODO: Consider keyword arg for price
try:
allergens.append(Allergene.objects.create(name=allergen))
except IntegrityError:
allergens.append(Allergene.objects.get(name=allergen))
try:
if 'prices' in single_food:
if 'price_student' in single_food['prices']:
price_student = single_food['prices']['price_student']
else:
price_student = "None"
if 'price_employee' in single_food['prices']:
price_employee = single_food['prices']['price_employee']
else:
price_employee = "None"
if 'price_guest' in single_food['prices']:
price_guest = single_food['prices']['price_guest']
else:
price_guest = "None"
db_single_food = SingleFood.objects.create(name=single_food['title'],
price_student=price_student,
price_employee=price_employee,
price_guest=price_guest)
else:
db_single_food = SingleFood.objects.create(name=single_food['title'])
if 'allergens' in locals():
db_single_food.allergens.set(allergens)
foodlist.append(db_single_food)
except IntegrityError:
db_single_food = SingleFood.objects.get(name=single_food['title'])
db_single_food, created = SingleFood.objects.get_or_create(name=single_food['title'])
if 'prices' in single_food:
if 'price_student' in single_food['prices']:
db_single_food.price_student = single_food['prices']['price_student']
else:
db_single_food.price_student = "None"
if 'price_employee' in single_food['prices']:
db_single_food.price_employee = single_food['prices']['price_employee']
else:
db_single_food.price_employee = "None"
if 'price_guest' in single_food['prices']:
db_single_food.price_guest = single_food['prices']['price_guest']
if 'allergens' in locals():
else:
db_single_food.price_guest = "None"
if allergens:
db_single_food.allergens.set(allergens)
foodlist.append(db_single_food)
try:
db_single_food.save()
except IntegrityError as e:
logger.exception(e)
try:
date = datetime.strptime(str(menu['date']), "%d.%m.").replace(year=datetime.today().year)
menu = Menu.objects.create(location=getLocation(data['name']), date=date)
menu, _ = Menu.objects.get_or_create(location=getLocation(data['name']), date=date)
menu.menu.set(foodlist)
menu.save()
except IntegrityError as error:
# ignored
pass
logger.exception(error)
def writeFekideDataInDB(data):
@ -106,14 +88,16 @@ def writeFekideDataInDB(data):
happyhour.endtime = datetime.strptime(time[1], "%H:%M").time()
happyhour.save()
print("%s: Happy Hour: Location: %s, Description: %s" % (
str(happyhour.date.date()), str(happyhour.location), str(happyhour.description)))
logger.info("%s: Happy Hour: Location: %s, Description: %s",
str(happyhour.date.date()), str(happyhour.location), str(happyhour.description))
def writeoutDBObjects():
pprint("SingleFood: " + str(SingleFood.objects.count()))
pprint("Menu: " + str(Menu.objects.count()))
pprint("HappyHour: " + str(HappyHour.objects.count()))
return "\n\tSingleFood: {single_food}\n\tMenu: {menu}\n\tHappyHour: {happy_hour}".format(
single_food=SingleFood.objects.count(),
menu=Menu.objects.count(),
happy_hour=HappyHour.objects.count()
)
def delete():
@ -126,17 +110,16 @@ def delete():
def main():
print("Aktueller Stand:")
writeoutDBObjects()
logger.info("Aktueller Stand:" + writeoutDBObjects())
# get food jsons
writeStudentenwerkDataInDB(mensa_page_parser.parsePage(LINK_AUSTR_MENSA))
writeStudentenwerkDataInDB(mensa_page_parser.parsePage(LINK_FEKI_MENSA))
writeStudentenwerkDataInDB(cafete_page_parser.parsePage(LINK_ERBA_CAFETE))
writeStudentenwerkDataInDB(cafete_page_parser.parsePage(LINK_MARKUS_CAFETE))
writeFekideDataInDB(fekide_happyhour_page_parser.parsePage(LINK_FEKIDE_GUIDE))
writeStudentenwerkDataInDB(cafete_page_parser.parse_page(LINK_ERBA_CAFETE))
writeStudentenwerkDataInDB(cafete_page_parser.parse_page(LINK_MARKUS_CAFETE))
writeFekideDataInDB(fekide_happyhour_page_parser.parse_page(LINK_FEKIDE_GUIDE))
print("Neuer Stand:")
writeoutDBObjects()
logger.info("Neuer Stand:" + writeoutDBObjects())
if __name__ == '__main__':

View File

@ -0,0 +1,8 @@
import requests
def load_page(url: str):
response = requests.get(url)
if not response.ok:
raise ConnectionError("Response not ok", response.status_code, url)
return response.content

View File

@ -1,23 +1,22 @@
import requests
from bs4 import BeautifulSoup
import json
import datetime
import logging
import re
from pprint import pprint
from bs4 import BeautifulSoup
from . import load_page
logger = logging.getLogger(__name__)
SPEISEPLAN_NAME_SELECTOR = '.csc-default .csc-header .csc-firstHeader'
def loadPage(url: str):
return requests.get(url).content
def getFoodplanName(soup):
def get_foodplan_name(soup):
foodplan_name = soup.select(SPEISEPLAN_NAME_SELECTOR)[0].getText()
return foodplan_name
def getRightLine(lines):
def get_right_line(lines):
foodlines = []
pattern = re.compile("[0-9]+.+[A-Z]+")
for line in list(lines):
@ -27,42 +26,42 @@ def getRightLine(lines):
return foodlines
def getFoodPerDay(soup):
def get_food_per_day(soup):
days = []
lines = soup.select('.csc-default .bodytext')
foodlines = getRightLine(lines)
foodlines = get_right_line(lines)
for food in foodlines:
dayObj = {}
day = str(food).split()[0]
foodName = str(food).replace(day, "").strip()
singleFoodObj = {}
singleFoodObj['title'] = foodName
dayObj['date'] = day
dayObj['menu'] = [singleFoodObj]
days.append(dayObj)
food_name = str(food).replace(day, "").strip()
single_food_obj = {'title': food_name}
day_obj = {
'date': day,
'menu': [single_food_obj]
}
days.append(day_obj)
return days
def parsePage(url: str):
def parse_page(url: str):
pagecontent = {}
# {mensaspeiseplan:
# {name:"",
# weekmenu: [day:{date:, menu:[,,,]}]
# }
# }
page = loadPage(url)
mensaSpeiseplan = {}
try:
page = load_page(url)
soup = BeautifulSoup(page, "lxml")
foodplan_name = getFoodplanName(soup)
days = getFoodPerDay(soup)
mensaSpeiseplan['weekmenu'] = days
mensaSpeiseplan['name'] = foodplan_name
mensaSpeiseplan['execution_time'] = datetime.datetime.today().strftime("%A, %d.%m.%Y")
mensaSpeiseplanJson = json.dumps(mensaSpeiseplan)
return mensaSpeiseplanJson
foodplan_name = get_foodplan_name(soup)
days = get_food_per_day(soup)
return {
'weekmenu': days,
'name': foodplan_name,
'execution_time': datetime.datetime.today().strftime("%A, %d.%m.%Y")
}
except Exception as e:
logger.exception(e)
return None
# LINK_ERBA_CAFETE = "https://www.studentenwerk-wuerzburg.de/bamberg/essen-trinken/sonderspeiseplaene/cafeteria-erba-insel.html"
# pprint(parsePage(LINK_ERBA_CAFETE))

View File

@ -1,51 +1,53 @@
import requests
from bs4 import BeautifulSoup
import datetime
import json
import logging
from bs4 import BeautifulSoup
from . import load_page
logger = logging.getLogger(__name__)
SPEISEPLAN_NAME_SELECTOR = '.csc-default .csc-header .csc-firstHeader'
def loadPage(url: str):
return requests.get(url).content
def getDay():
def get_day():
return datetime.datetime.today().strftime("%A, %d.%m.%Y")
def getHappyHours(soup):
def get_happy_hours(soup):
happyhours = []
happyhourstable = soup.select('#food .table tr')
for tableline in happyhourstable:
happyhour = {}
linesoup = BeautifulSoup(str(tableline), "lxml")
location = linesoup.find("td", {"class": "location"}).getText()
time = linesoup.find("td", {"class": "time"}).getText()
description = linesoup.find("td", {"class": "description"}).getText()
description = str(description).strip()
happyhour['location'] = location
happyhour['time'] = time
happyhour['description'] = description
happyhour = {
'location': location,
'time': time,
'description': description
}
happyhours.append(happyhour)
return happyhours
def parsePage(url: str):
pagecontent = {}
def parse_page(url: str):
# {
# happyhours:[{happyhour:{location: "",time: "",description: ""},,,,]
# }
happyhours = []
page = loadPage(url)
try:
page = load_page(url)
soup = BeautifulSoup(page, "lxml")
happyhours = getHappyHours(soup)
pagecontent['happyhours'] = happyhours
pagecontent['day'] = getDay()
pagecontent['execution_time'] = datetime.datetime.today().strftime("%A, %d.%m.%Y")
return pagecontent
happyhours = get_happy_hours(soup)
return {
'happyhours': happyhours,
'day': get_day(),
'execution_time': datetime.datetime.today().strftime("%A, %d.%m.%Y")
}
except Exception as e:
logger.exception(e)
return None
# LINK_FEKIDE_GUIDE = "https://www.feki.de/happyhour/wochenuebersicht"
# parsePage(LINK_FEKIDE_GUIDE)

View File

@ -1,14 +1,11 @@
import requests
from bs4 import BeautifulSoup
import json
import datetime
import logging
from bs4 import BeautifulSoup
# FEKI_URL = "https://www.studentenwerk-wuerzburg.de/bamberg/essen-trinken/speiseplaene.html?tx_thmensamenu_pi2%5Bmensen%5D=3&tx_thmensamenu_pi2%5Baction%5D=show&tx_thmensamenu_pi2%5Bcontroller%5D=Speiseplan&cHash=c3fe5ebb35e5fba3794f01878e798b7c"
from . import load_page
def loadPage(url: str):
return requests.get(url).content
logger = logging.getLogger(__name__)
def getMenuDay(soup):
@ -18,12 +15,10 @@ def getMenuDay(soup):
def getFoodPerDay(soup):
week_menus = []
for day in soup.select('.currentweek .day'):
menu = {}
daysoup = BeautifulSoup(str(day), "lxml")
day = getMenuDay(daysoup)
day_menu = []
for singleFood in daysoup.select('.menuwrap .menu'):
singleFoodObj = {}
singleFoodSoup = BeautifulSoup(str(singleFood), "lxml")
title = singleFoodSoup.find('div', {'class': 'title'}).getText()
allergens = [e.getText() for e in singleFoodSoup.select('.left .additnr .toggler ul li')]
@ -34,13 +29,16 @@ def getFoodPerDay(soup):
prices['price_employee'] = singleFoodSoup.select('.price')[0]['data-bed']
if singleFoodSoup.select('.price'):
prices['price_guest'] = singleFoodSoup.select('.price')[0]['data-guest']
singleFoodObj['title'] = title
singleFoodObj['allergens'] = allergens
singleFoodObj['prices'] = prices
day_menu.append(singleFoodObj)
menu['date'] = str(day).split(" ")[1]
menu['menu'] = day_menu
single_food_obj = {
'title': title,
'allergens': allergens,
'prices': prices
}
day_menu.append(single_food_obj)
menu = {
'date': str(day).split(" ")[1],
'menu': day_menu
}
week_menus.append(menu)
return week_menus
@ -52,16 +50,19 @@ def parsePage(url: str):
# weekmenu: [day:{date:, menu:[,,,]}]
# }
# }
mensaSpeiseplan = {}
page = loadPage(url)
try:
page = load_page(url)
soup = BeautifulSoup(page, "lxml")
foodplan_name = getFoodplanName(soup)
days = getFoodPerDay(soup)
mensaSpeiseplan['weekmenu'] = days
mensaSpeiseplan['name'] = foodplan_name
mensaSpeiseplan['execution_time'] = datetime.datetime.today().strftime("%A, %d.%m.%Y")
mensaSpeiseplanJson = json.dumps(mensaSpeiseplan)
return mensaSpeiseplanJson
return {
'weekmenu': days,
'name': foodplan_name,
'execution_time': datetime.datetime.today().strftime("%A, %d.%m.%Y")
}
except Exception as e:
logger.exception(e)
return None
def getFoodplanName(soup):

View File

@ -50,7 +50,6 @@ REST_FRAMEWORK = {
'DEFAULT_PERMISSION_CLASSES': [
'rest_framework.permissions.IsAuthenticated',
],
'PAGE_SIZE': 10
}
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
@ -76,7 +75,7 @@ TEMPLATES = [
},
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [os.path.join(BASE_DIR,'templates')],
'DIRS': [os.path.join(BASE_DIR, 'templates')],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
@ -190,3 +189,31 @@ EMAIL_USE_TLS = True
ACCOUNT_EMAIL_UNIQUE = True
ACCOUNT_EMAIL_CONFIRMATION_REQUIRED = True
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'default': {
'format': '%(asctime)s %(module)s [%(levelname)s]: %(message)s',
'datefmt': '%Y-%m-%d %H:%M:%S',
}
},
'handlers': {
'console': {
'class': 'logging.StreamHandler',
'formatter': 'default',
},
'file': {
'class': 'logging.FileHandler',
'filename': '/log/import_food.log',
'formatter': 'default',
}
},
'loggers': {
'apps.food.utils': {
'handlers': ['console', 'file'],
'level': os.getenv('DJANGO_LOG_LEVEL', 'DEBUG'),
},
},
}