From 203bcaedd3dc1a75bbba8a51ed24179103e3e6df Mon Sep 17 00:00:00 2001 From: michigg Date: Fri, 6 Oct 2017 01:27:31 +0200 Subject: [PATCH] fixed food import --- ofu_app/.idea/workspace.xml | 406 ++++++------------ .../food/management/commands/import_food.py | 6 +- .../food/utils/json_generator/__init__.py | 0 .../json_generator/controller_json_food.py | 60 --- .../json_generator/jsons/cafete-erba.json | 0 .../json_generator/jsons/cafete-markus.json | 1 - .../jsons/happyhourguide-fekide.json | 1 - .../json_generator/jsons/mensa-austr.json | 1 - .../json_generator/jsons/mensa-feki.json | 1 - ofu_app/apps/food/utils/migrate_data.py | 23 +- .../food/utils/parser/cafete_page_parser.py | 15 +- .../parser/fekide_happyhour_page_parser.py | 4 +- .../food/utils/parser/mensa_page_parser.py | 1 - 13 files changed, 163 insertions(+), 356 deletions(-) delete mode 100644 ofu_app/apps/food/utils/json_generator/__init__.py delete mode 100644 ofu_app/apps/food/utils/json_generator/controller_json_food.py delete mode 100644 ofu_app/apps/food/utils/json_generator/jsons/cafete-erba.json delete mode 100644 ofu_app/apps/food/utils/json_generator/jsons/cafete-markus.json delete mode 100644 ofu_app/apps/food/utils/json_generator/jsons/happyhourguide-fekide.json delete mode 100644 ofu_app/apps/food/utils/json_generator/jsons/mensa-austr.json delete mode 100644 ofu_app/apps/food/utils/json_generator/jsons/mensa-feki.json diff --git a/ofu_app/.idea/workspace.xml b/ofu_app/.idea/workspace.xml index bfd8997..88987be 100644 --- a/ofu_app/.idea/workspace.xml +++ b/ofu_app/.idea/workspace.xml @@ -46,55 +46,11 @@ - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - - - + + @@ -110,42 +66,58 @@ - + - - - + + + + + - - + + - - + + + + + + + + + + + + + + - - + + - + - - + + - - - + + + + + @@ -169,9 +141,6 @@ @@ -335,7 +307,7 @@ @@ -995,21 +853,7 @@ - - - - - - - - - - - - - - @@ -1017,13 +861,6 @@ - - - - - - - @@ -1031,13 +868,6 @@ - - - - - - - @@ -1252,16 +1082,6 @@ - - - - - - - - - - @@ -1270,30 +1090,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - @@ -1318,16 +1114,6 @@ - - - - - - - - - - @@ -1336,13 +1122,95 @@ + + + + + + + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ofu_app/apps/food/management/commands/import_food.py b/ofu_app/apps/food/management/commands/import_food.py index c05c0a0..1926f58 100644 --- a/ofu_app/apps/food/management/commands/import_food.py +++ b/ofu_app/apps/food/management/commands/import_food.py @@ -1,8 +1,8 @@ from django.core.management.base import BaseCommand, CommandError from apps.food.models import Menu, HappyHour, SingleFood -from apps.food.utils.json_generator import controller_json_food from apps.food.utils import migrate_data + class Command(BaseCommand): help = "Imports Food from special Websites" @@ -10,6 +10,4 @@ class Command(BaseCommand): pass def handle(self, *args, **options): - controller_json_food.main("apps/food/utils/json_generator/jsons/") - migrate_data.main("apps/food/utils/json_generator/jsons/") - + migrate_data.main() diff --git a/ofu_app/apps/food/utils/json_generator/__init__.py b/ofu_app/apps/food/utils/json_generator/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/ofu_app/apps/food/utils/json_generator/controller_json_food.py b/ofu_app/apps/food/utils/json_generator/controller_json_food.py deleted file mode 100644 index f6cd0bc..0000000 --- a/ofu_app/apps/food/utils/json_generator/controller_json_food.py +++ /dev/null @@ -1,60 +0,0 @@ -from apps.food.utils.parser import mensa_page_parser, cafete_page_parser, fekide_happyhour_page_parser - -# CONFIG -JSON_OUTPUT_DIR_FOOD = "./food/json_generator/jsons/" - -# CONFIG SERVICE LINKS -LINK_FEKI_MENSA = "https://www.studentenwerk-wuerzburg.de/bamberg/essen-trinken/speiseplaene.html?tx_thmensamenu_pi2%5Bmensen%5D=3&tx_thmensamenu_pi2%5Baction%5D=show&tx_thmensamenu_pi2%5Bcontroller%5D=Speiseplan&cHash=c3fe5ebb35e5fba3794f01878e798b7c" -LINK_AUSTR_MENSA = "https://www.studentenwerk-wuerzburg.de/bamberg/essen-trinken/speiseplaene.html?tx_thmensamenu_pi2%5Bmensen%5D=2&tx_thmensamenu_pi2%5Baction%5D=show&tx_thmensamenu_pi2%5Bcontroller%5D=Speiseplan&cHash=511e047953ee1370c3b82c11a04624bb" -LINK_ERBA_CAFETE = "https://www.studentenwerk-wuerzburg.de/bamberg/essen-trinken/sonderspeiseplaene/cafeteria-erba-insel.html" -LINK_MARKUS_CAFETE = "https://www.studentenwerk-wuerzburg.de/bamberg/essen-trinken/sonderspeiseplaene/cafeteria-markusplatz.html" -LINK_FEKIDE_GUIDE = "https://www.feki.de/happyhour" - - -def writeToFile(jsonfile, root, filename): - with open((root + filename), "w") as file: - file.write(jsonfile) - - -def main(path=JSON_OUTPUT_DIR_FOOD): - try: - json_food_mensa_feki = mensa_page_parser.parsePage(LINK_FEKI_MENSA) - - except IndexError: - print("Error") - json_food_mensa_feki = {} - - try: - json_food_mensa_austr = mensa_page_parser.parsePage(LINK_AUSTR_MENSA) - except IndexError: - print("Error") - json_food_mensa_austr = {} - - try: - json_food_cafete_erba = cafete_page_parser.parsePage(LINK_ERBA_CAFETE) - except IndexError: - print("Error") - json_food_cafete_erba = {} - - try: - json_food_cafete_markus = cafete_page_parser.parsePage(LINK_MARKUS_CAFETE) - except IndexError: - print("Error") - json_food_cafete_markus = {} - - try: - json_food_fekidehappyhours = fekide_happyhour_page_parser.parsePage(LINK_FEKIDE_GUIDE) - except IndexError: - print("Error") - json_food_fekidehappyhours = {} - - # WRITE JSONS - writeToFile(json_food_mensa_feki, path, "mensa-feki.json") - writeToFile(json_food_mensa_austr, path, "mensa-austr.json") - writeToFile(json_food_cafete_erba, path, "cafete-erba.json") - writeToFile(json_food_cafete_markus, path, "cafete-markus.json") - writeToFile(json_food_fekidehappyhours, path, "happyhourguide-fekide.json") - - -if __name__ == "__main__": - main() diff --git a/ofu_app/apps/food/utils/json_generator/jsons/cafete-erba.json b/ofu_app/apps/food/utils/json_generator/jsons/cafete-erba.json deleted file mode 100644 index e69de29..0000000 diff --git a/ofu_app/apps/food/utils/json_generator/jsons/cafete-markus.json b/ofu_app/apps/food/utils/json_generator/jsons/cafete-markus.json deleted file mode 100644 index fc09baf..0000000 --- a/ofu_app/apps/food/utils/json_generator/jsons/cafete-markus.json +++ /dev/null @@ -1 +0,0 @@ -{"execution_time": "Thursday, 05.10.2017", "name": "Cafeteria Markusplatz", "weekmenu": [{"menu": ["Vegetarisches Chili sin Carne"], "date": "24.07."}, {"menu": ["Gem\u00fcseschnitzel mit Kr\u00e4utercreme und Kartoffeln"], "date": "25.07."}, {"menu": ["\u00dcberbackene Zucchini an Paprikaso\u00dfe mit P\u00fcree"], "date": "26.07."}, {"menu": ["Kaiserschmarrn mit Apfelmus"], "date": "27.07."}]} \ No newline at end of file diff --git a/ofu_app/apps/food/utils/json_generator/jsons/happyhourguide-fekide.json b/ofu_app/apps/food/utils/json_generator/jsons/happyhourguide-fekide.json deleted file mode 100644 index 7acb55a..0000000 --- a/ofu_app/apps/food/utils/json_generator/jsons/happyhourguide-fekide.json +++ /dev/null @@ -1 +0,0 @@ -{"happyhours": [{"time": "11:00 - 18:00", "description": "10% Rabatt auf Beefburger & belg. Fritten", "location": "Fritz Heaven"}, {"time": "12:00 - 22:00", "description": "Burger, Wraps und Baguettes f\u00fcr 5,20 \u20ac", "location": "Caf\u00e9 Esspress"}, {"time": "16:00 - 20:00", "description": "Aperitivo Italiano incl. Antipasti", "location": "Caff\u00e8bar Kranen"}, {"time": "16:00 - 23:59", "description": "Burger-Tag 6\u20ac", "location": "Bowlinghaus Bamberg"}, {"time": "17:00 - 21:00", "description": "Burger f\u00fcr Studenten 5,50\u20ac", "location": "Cafe Arte"}, {"time": "17:30 - 21:00", "description": "Alle vegetarischen Hauptgerichte kosten f\u00fcr Studierende nur 8,90 \u20ac", "location": "Restaurant Kornblume"}, {"time": "18:00 - 21:00", "description": "Burger mit Pommes oder Westernkartoffeln + 0,5 Mahr's Bier - 7,99\u20ac", "location": "Brasserie"}, {"time": "18:00 - 21:00", "description": "Spare-Ribs AYCE mit Pommes 9,90 \u20ac oder mit Weissbrot 7,70 \u20ac", "location": "Bamberger Weissbierhaus"}, {"time": "19:00 - 22:00", "description": "Brotzeitplatten 7,50 \u20ac", "location": "Ah\u00f6rnla im Sand"}, {"time": "19:00 - 23:00", "description": "Jeder Burger f\u00fcr 4,90 \u20ac", "location": "Mainfranken Bowling"}], "execution_time": "Thursday, 05.10.2017", "day": "Thursday, 05.10.2017"} \ No newline at end of file diff --git a/ofu_app/apps/food/utils/json_generator/jsons/mensa-austr.json b/ofu_app/apps/food/utils/json_generator/jsons/mensa-austr.json deleted file mode 100644 index 581d7d7..0000000 --- a/ofu_app/apps/food/utils/json_generator/jsons/mensa-austr.json +++ /dev/null @@ -1 +0,0 @@ -{"name": "Speiseplan f\u00fcr Austra\u00dfe Bamberg", "execution_time": "Thursday, 05.10.2017", "weekmenu": [{"menu": ["Pennenudeln mit Lachs, Spinat und Kirschtomaten", "Rinderhacksteak mit Cognac - Pfefferso\u00dfe", "Schneller Teller: H\u00e4hnchenspie\u00df mit Gefl\u00fcgelso\u00dfe, Bratkartoffeln und Blumenkohl", "Kartoffelgnocchi - Gem\u00fcsepfanne", "Dampfkartoffeln mit hausgemachtem Schnittlauchquark"], "date": "02.10."}, {"menu": [], "date": "03.10."}, {"menu": ["Siebenschwabenplatte mit Bratenso\u00dfe", "Moussaka vom Rind", "Putenbrustgeschnetzeltes in Pilzrahm", "Schneller Teller: Veganes Soja - Gem\u00fcsegeschnetzeltes mit Penine Rigate, Apfel", "\u00dcberbackene Zucchini \"mediterrane Art\" an Paprikaso\u00dfe"], "date": "04.10."}, {"menu": ["Chicken Burger mit Ananas Currydip", "Kasseler mit gr\u00fcnem Bohnengem\u00fcse", "Schneller Teller: Paprikagulsch vom Schwein mit Spiralnudeln und Rosenkohl", "Putenbrustgeschnetzeltes in Pilzrahm", "Schneller Teller: Veganes Soja - Gem\u00fcsegeschnetzeltes mit Penine Rigate, Apfel", "Nudelgem\u00fcseauflauf", "Pizza \"Spinat & Hirtenk\u00e4se\""], "date": "05.10."}, {"menu": ["Schneller Teller: Seelachsfilet an \"S\u00fc\u00df - Scharfer\" Tomatenso\u00dfe, Langkornreis und Brokkoli", "Gyros mit hausgemachtem Tsatsiki", "Green Tacos mit Walnuss - Chili - P\u00e2t\u00e9"], "date": "06.10."}]} \ No newline at end of file diff --git a/ofu_app/apps/food/utils/json_generator/jsons/mensa-feki.json b/ofu_app/apps/food/utils/json_generator/jsons/mensa-feki.json deleted file mode 100644 index ae492ee..0000000 --- a/ofu_app/apps/food/utils/json_generator/jsons/mensa-feki.json +++ /dev/null @@ -1 +0,0 @@ -{"name": "Speiseplan f\u00fcr Feldkirchenstra\u00dfe Bamberg", "execution_time": "Thursday, 05.10.2017", "weekmenu": [{"menu": ["Rinderhacksteak mit Cognac - Pfefferso\u00dfe", "Schneller Teller: H\u00e4hnchenspie\u00df mit Gefl\u00fcgelso\u00dfe, Bratkartoffeln und Blumenkohl", "Kartoffelgnocchi - Gem\u00fcsepfanne", "Dampfkartoffeln mit hausgemachtem Schnittlauchquark"], "date": "02.10."}, {"menu": [], "date": "03.10."}, {"menu": ["Siebenschwabenplatte mit Bratenso\u00dfe", "Putenbrustgeschnetzeltes in Pilzrahm", "Schneller Teller: Veganes Soja - Gem\u00fcsegeschnetzeltes mit Penine Rigate, Apfel", "\u00dcberbackene Zucchini \"mediterrane Art\" an Paprikaso\u00dfe"], "date": "04.10."}, {"menu": ["Chicken Burger mit Ananas Currydip", "Schneller Teller: Paprikagulsch vom Schwein mit Spiralnudeln und Rosenkohl", "\u00dcberbackene Zucchini \"mediterrane Art\" an Paprikaso\u00dfe", "Nudelgem\u00fcseauflauf", "Pizza \"Spinat & Hirtenk\u00e4se\""], "date": "05.10."}, {"menu": ["Schneller Teller: Seelachsfilet an \"S\u00fc\u00df - Scharfer\" Tomatenso\u00dfe, Langkornreis und Brokkoli", "Gyros mit hausgemachtem Tsatsiki", "Green Tacos mit Walnuss - Chili - P\u00e2t\u00e9"], "date": "06.10."}]} \ No newline at end of file diff --git a/ofu_app/apps/food/utils/migrate_data.py b/ofu_app/apps/food/utils/migrate_data.py index 85d4d57..7922da8 100644 --- a/ofu_app/apps/food/utils/migrate_data.py +++ b/ofu_app/apps/food/utils/migrate_data.py @@ -3,9 +3,14 @@ from datetime import datetime from pprint import pprint from django.db.utils import IntegrityError from apps.food.models import SingleFood, Menu, HappyHour +from apps.food.utils.parser import mensa_page_parser, fekide_happyhour_page_parser, cafete_page_parser -# JSON_FILES_PATH_FOOD = "json_generator/jsons/" -JSON_FILES_PATH_FOOD = "food/json_generator/jsons/" +# CONFIG SERVICE LINKS +LINK_FEKI_MENSA = "https://www.studentenwerk-wuerzburg.de/bamberg/essen-trinken/speiseplaene.html?tx_thmensamenu_pi2%5Bmensen%5D=3&tx_thmensamenu_pi2%5Baction%5D=show&tx_thmensamenu_pi2%5Bcontroller%5D=Speiseplan&cHash=c3fe5ebb35e5fba3794f01878e798b7c" +LINK_AUSTR_MENSA = "https://www.studentenwerk-wuerzburg.de/bamberg/essen-trinken/speiseplaene.html?tx_thmensamenu_pi2%5Bmensen%5D=2&tx_thmensamenu_pi2%5Baction%5D=show&tx_thmensamenu_pi2%5Bcontroller%5D=Speiseplan&cHash=511e047953ee1370c3b82c11a04624bb" +LINK_ERBA_CAFETE = "https://www.studentenwerk-wuerzburg.de/bamberg/essen-trinken/sonderspeiseplaene/cafeteria-erba-insel.html" +LINK_MARKUS_CAFETE = "https://www.studentenwerk-wuerzburg.de/bamberg/essen-trinken/sonderspeiseplaene/cafeteria-markusplatz.html" +LINK_FEKIDE_GUIDE = "https://www.feki.de/happyhour" def getJsonFromFile(path): @@ -14,6 +19,7 @@ def getJsonFromFile(path): def writeStudentenwerkDataInDB(data): + data = json.loads(data) for menu in data['weekmenu']: foodlist = [] for single_food in menu['menu']: @@ -45,15 +51,14 @@ def writeFekideDataInDB(data): break -def main(path=JSON_FILES_PATH_FOOD): +def main(): # get food jsons - writeStudentenwerkDataInDB(getJsonFromFile(path + "mensa-austr.json")) - writeStudentenwerkDataInDB(getJsonFromFile(path + "cafete-erba.json")) - writeStudentenwerkDataInDB(getJsonFromFile(path + "cafete-markus.json")) - writeStudentenwerkDataInDB(getJsonFromFile(path + "mensa-feki.json")) + writeStudentenwerkDataInDB(mensa_page_parser.parsePage(LINK_AUSTR_MENSA)) + writeStudentenwerkDataInDB(mensa_page_parser.parsePage(LINK_FEKI_MENSA)) + writeStudentenwerkDataInDB(cafete_page_parser.parsePage(LINK_ERBA_CAFETE)) + writeStudentenwerkDataInDB(cafete_page_parser.parsePage(LINK_MARKUS_CAFETE)) + writeFekideDataInDB(fekide_happyhour_page_parser.parsePage(LINK_FEKIDE_GUIDE)) - json_food_fekide = getJsonFromFile(path + "happyhourguide-fekide.json") - writeFekideDataInDB(json_food_fekide) pprint("SingleFood: " + str(SingleFood.objects.count())) pprint("Menu: " + str(Menu.objects.count())) pprint("HappyHour: " + str(HappyHour.objects.count())) diff --git a/ofu_app/apps/food/utils/parser/cafete_page_parser.py b/ofu_app/apps/food/utils/parser/cafete_page_parser.py index af15b37..e067cff 100644 --- a/ofu_app/apps/food/utils/parser/cafete_page_parser.py +++ b/ofu_app/apps/food/utils/parser/cafete_page_parser.py @@ -2,6 +2,7 @@ import requests from bs4 import BeautifulSoup import json import datetime +import re SPEISEPLAN_NAME_SELECTOR = '.csc-default .csc-header .csc-firstHeader' @@ -16,18 +17,20 @@ def getFoodplanName(soup): def getRightLine(lines): + foodlines = [] + pattern = re.compile("[0-9]+.+[A-Z]+") for line in list(lines): - if str(line).__contains__("
"): - return line - return "" + line = line.getText() + if pattern.match(line): + foodlines.append(line) + return foodlines def getFoodPerDay(soup): days = [] lines = soup.select('.csc-default .bodytext') - line = getRightLine(lines) - foods = str(line).strip('

').strip('") - for food in foods: + foodlines = getRightLine(lines) + for food in foodlines: dayObj = {} day = str(food).split()[0] foodName = str(food).replace(day, "").strip() diff --git a/ofu_app/apps/food/utils/parser/fekide_happyhour_page_parser.py b/ofu_app/apps/food/utils/parser/fekide_happyhour_page_parser.py index 3dd38db..3af47a2 100644 --- a/ofu_app/apps/food/utils/parser/fekide_happyhour_page_parser.py +++ b/ofu_app/apps/food/utils/parser/fekide_happyhour_page_parser.py @@ -45,9 +45,7 @@ def parsePage(url: str): pagecontent['happyhours'] = happyhours pagecontent['day'] = getDay() pagecontent['execution_time'] = datetime.datetime.today().strftime("%A, %d.%m.%Y") - - jsondata = json.dumps(pagecontent) - return jsondata + return pagecontent # LINK_FEKIDE_GUIDE = "https://www.feki.de/happyhour/wochenuebersicht" # parsePage(LINK_FEKIDE_GUIDE) diff --git a/ofu_app/apps/food/utils/parser/mensa_page_parser.py b/ofu_app/apps/food/utils/parser/mensa_page_parser.py index 09b6230..b848d18 100644 --- a/ofu_app/apps/food/utils/parser/mensa_page_parser.py +++ b/ofu_app/apps/food/utils/parser/mensa_page_parser.py @@ -43,7 +43,6 @@ def parsePage(url: str): mensaSpeiseplan['weekmenu'] = days mensaSpeiseplan['name'] = foodplan_name mensaSpeiseplan['execution_time'] = datetime.datetime.today().strftime("%A, %d.%m.%Y") - # print(mensaSpeiseplan) mensaSpeiseplanJson = json.dumps(mensaSpeiseplan) return mensaSpeiseplanJson