From 9bdeff39dc831d322a609f37a18a92397921fe7d Mon Sep 17 00:00:00 2001 From: gwydion67 Date: Tue, 15 Oct 2024 00:31:19 +0530 Subject: [PATCH 1/6] fetch latest academic calendar from the iitkgp website --- .gitignore | 4 +++- utils/academic_calander_handler.py | 26 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 utils/academic_calander_handler.py diff --git a/.gitignore b/.gitignore index 00bf393..dc57fc0 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,6 @@ data.txt .idea/ .vscode venv -.env \ No newline at end of file +.env + +ACADEMIC_CALENDAR_*.pdf diff --git a/utils/academic_calander_handler.py b/utils/academic_calander_handler.py new file mode 100644 index 0000000..cd90f81 --- /dev/null +++ b/utils/academic_calander_handler.py @@ -0,0 +1,26 @@ +from datetime import datetime +import os +import urllib.request + + +JSON_FOLDER_NAME = 'Academic_Cal-j' + +#get the current working directory +def cwd(): + return os.getcwd() + +#fetch the latest academic calendar from the iitkgp website +def get_latest_calendar(): + currYear = datetime.today().year + currMonth = datetime.today().month + + if(currMonth < 7): + currYear -= 1 + + yearString = str(currYear) + '_' + str((currYear % 100) + 1) + fileName = 'ACADEMIC_CALENDAR_' + yearString + '.pdf' + url = 'https://www.iitkgp.ac.in/assets/pdf/' + fileName + + urllib.request.urlretrieve(url,fileName) + + From 6a47078cbc13d76087f42e1de77145c74cff0a73 Mon Sep 17 00:00:00 2001 From: gwydion67 Date: Tue, 15 Oct 2024 01:19:19 +0530 Subject: [PATCH 2/6] add delete old files methods --- utils/academic_calander_handler.py | 61 ++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/utils/academic_calander_handler.py b/utils/academic_calander_handler.py index cd90f81..25f55f3 100644 --- a/utils/academic_calander_handler.py +++ b/utils/academic_calander_handler.py @@ -1,7 +1,10 @@ from datetime import datetime +import glob +import camelot import os -import urllib.request - +import requests +import shutil +from zipfile import ZipFile JSON_FOLDER_NAME = 'Academic_Cal-j' @@ -9,18 +12,52 @@ def cwd(): return os.getcwd() -#fetch the latest academic calendar from the iitkgp website -def get_latest_calendar(): - currYear = datetime.today().year - currMonth = datetime.today().month +def get_latest_calendar_name(): + curr_year = datetime.today().year + curr_month = datetime.today().month - if(currMonth < 7): - currYear -= 1 + if(curr_month < 7): + curr_year -= 1 - yearString = str(currYear) + '_' + str((currYear % 100) + 1) - fileName = 'ACADEMIC_CALENDAR_' + yearString + '.pdf' - url = 'https://www.iitkgp.ac.in/assets/pdf/' + fileName + year_str = str(curr_year) + '_' + str((curr_year % 100) + 1) + filename = 'ACADEMIC_CALENDAR_' + year_str + '.pdf' + return filename + +def is_file_present(file): + if(os.path.exists(cwd() + '/' + file)): + return True + return False + +def delete_file(file): + if(is_file_present(file)): + try: + print("DELETING file ",file) + shutil.rmtree(cwd() + '/' + file) + except Exception as e: + print("ERROR: seems folder already exists but cannot be deleted") + print(e) + return False + else: + print("File not present..") + +#fetch the latest academic calendar from the iitkgp website +def get_latest_calendar(): - urllib.request.urlretrieve(url,fileName) + filename = get_latest_calendar_name() + filepath = cwd() + filename + + url = 'https://www.iitkgp.ac.in/assets/pdf/' + filename + ## delete any old academic calander pdf if exists + if(is_file_present(filename)): + delete_file(filename) + + with open(filename,"wb") as file: + response = requests.get(url) + file.write(response.content) + + if(is_file_present(filename)): + return True + return False + From 620626f621f85e8c30143ad5d37796e00d199d3b Mon Sep 17 00:00:00 2001 From: gwydion67 Date: Tue, 15 Oct 2024 04:09:04 +0530 Subject: [PATCH 3/6] add json parsing and final list of entries generation --- requirements.txt | 57 ++++++++++-- utils/academic_calander_handler.py | 136 +++++++++++++++++++++++++++-- 2 files changed, 181 insertions(+), 12 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9cfc0f4..902a757 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,58 @@ beautifulsoup4==4.12.2 -google_api_python_client==2.90.0 +blinker==1.8.2 +bs4==0.0.2 +cachetools==5.5.0 +certifi==2024.8.30 +cffi==1.17.1 +chardet==5.2.0 +charset-normalizer==3.4.0 +click==8.1.7 +cryptography==43.0.1 +et-xmlfile==1.1.0 +Flask==3.0.3 +Flask-Cors==4.0.1 +ghostscript==0.7 +google-api-core==2.21.0 +google-api-python-client==2.90.0 +google-auth==2.35.0 +google-auth-httplib2==0.2.0 +google-auth-oauthlib==1.2.1 +googleapis-common-protos==1.65.0 +gunicorn==22.0.0 httplib2==0.22.0 icalendar==5.0.7 +idna==3.10 iitkgp_erp_login==2.4.2 +itsdangerous==2.2.0 +Jinja2==3.1.4 +MarkupSafe==3.0.1 +numpy==2.1.2 oauth2client==4.1.3 +oauthlib==3.2.2 +opencv-python==4.10.0.84 +openpyxl==3.1.5 +packaging==24.1 +pandas==2.2.3 +pdfminer.six==20240706 +proto-plus==1.24.0 +protobuf==5.28.2 +pyasn1==0.6.1 +pyasn1_modules==0.4.1 +pycparser==2.22 +pyparsing==3.2.0 +pypdf==4.3.1 +pypdf_table_extraction==0.0.2 +python-dateutil==2.9.0.post0 pytz==2023.3 -Requests==2.31.0 -flask==3.0.3 -flask_cors==4.0.1 -gunicorn==22.0.0 +requests==2.31.0 +requests-oauthlib==2.0.0 +rsa==4.9 +setuptools==75.1.0 +six==1.16.0 +soupsieve==2.6 +tabulate==0.9.0 +tk==0.1.0 +tzdata==2024.2 +uritemplate==4.1.1 +urllib3==2.2.3 +Werkzeug==3.0.4 diff --git a/utils/academic_calander_handler.py b/utils/academic_calander_handler.py index 25f55f3..3f1b52e 100644 --- a/utils/academic_calander_handler.py +++ b/utils/academic_calander_handler.py @@ -1,13 +1,23 @@ -from datetime import datetime +from datetime import datetime, timedelta import glob import camelot import os import requests import shutil from zipfile import ZipFile +import json +from dataclasses import dataclass +import re + JSON_FOLDER_NAME = 'Academic_Cal-j' +@dataclass +class DataEntry: + start_date: datetime = datetime.today() + end_date: datetime = datetime.today() + event: str = "" + #get the current working directory def cwd(): return os.getcwd() @@ -24,7 +34,9 @@ def get_latest_calendar_name(): return filename def is_file_present(file): - if(os.path.exists(cwd() + '/' + file)): + if(os.path.exists(cwd() + '/' + file) or + os.path.exists(cwd() + '/' + file + '/') + ): return True return False @@ -32,20 +44,23 @@ def delete_file(file): if(is_file_present(file)): try: print("DELETING file ",file) - shutil.rmtree(cwd() + '/' + file) + if(os.path.isdir(file)): + shutil.rmtree(cwd() + '/' + file) + elif(os.path.isfile(file)): + os.remove(file) + else: + raise Exception("filename not valid") except Exception as e: - print("ERROR: seems folder already exists but cannot be deleted") + print("ERROR: seems file already exists but cannot be deleted") print(e) return False else: - print("File not present..") + print(file, "File not present..") #fetch the latest academic calendar from the iitkgp website def get_latest_calendar(): filename = get_latest_calendar_name() - filepath = cwd() + filename - url = 'https://www.iitkgp.ac.in/assets/pdf/' + filename ## delete any old academic calander pdf if exists @@ -60,4 +75,111 @@ def get_latest_calendar(): return True return False +def upzip_and_delete_zip(zip_file_name,result_folder_name): + with ZipFile(zip_file_name) as zip: + try: + zip.extractall(result_folder_name) + except Exception as E: + print(E) + return False + + print("Zip File not needed anymore, Deleteting ", zip_file_name) + delete_file(zip_file_name) + return True + +def export_json(): + filename = get_latest_calendar_name() + ## ignore the read_pdf not found warning + tables = camelot.read_pdf(filename,pages="all") + + print("Checking for pre-existing folder") + delete_file(JSON_FOLDER_NAME) + + try: + tables.export((JSON_FOLDER_NAME + '.json'),f='json',compress=True) + except Exception as E: + print(E) + return False + + upzip_and_delete_zip((JSON_FOLDER_NAME + '.zip'),JSON_FOLDER_NAME) + return True + +def get_json_files(): + folder_path = cwd() + '/' + JSON_FOLDER_NAME + if(is_file_present(JSON_FOLDER_NAME)): + files = glob.glob(folder_path + '/*.json',include_hidden=True) + return files + else: + return [] + +def merge_json(): + merged_data = [] + for file in get_json_files(): + with open(file) as f: + data = json.load(f) + merged_data.extend(data) + + with open('final.json',"w") as f: + json.dump(merged_data,f) + + return merged_data + +def get_academic_calendar() -> list[DataEntry]: + + get_latest_calendar() + export_json() + + all_dates = merge_json() + all_dates = all_dates[1:] + + main_dates = [] + # for date in all_dates: + # entry = DataEntry() + # if(len(date) > 4 and date['4'] != ''): + # if(len(date['1']) > 3): + # entry.event += date['1'].replace('\n','') + # entry.event += date['2'].replace('\n','') + # d = date['4'].replace('\n',' ').replace('(AN)','') + # print(d.find("to")) + # if(d.lower().find("to") != -1): + # d = str(d).lower().split("to") + # entry.start_date = datetime.strptime(d[0].split(" ")[0].strip(), "%d.%m.%Y") + # entry.end_date = datetime.strptime(d[-1].split(" ")[-1].strip(), "%d.%m.%Y") + # else: + # entry.start_date = datetime.strptime(d,"%d.%m.%Y") + # entry.end_date = ( entry.start_date + timedelta(1) ) + # # elif(len(date) == 2 and date['1'] != ''): + # # entry.event = date['0'] + # # d = date['1'].replace('\n','') + # # if(d.find("to")): + # # d = str(d).split("to") + # # entry.start_date = datetime.strptime(d[0].strip(), "%A, %d %B %Y") + # # entry.end_date = datetime.strptime(d[1].strip(), "%A, %d %B %Y") + # # else: + # # entry.start_date = datetime.strptime(d,"%A, %d %B %Y") + # # entry.end_date = ( entry.start_date + timedelta(1) ) + # # main_dates.append([date['0'],datetime_object]) + # main_dates.append(entry) + + date_regex = re.compile(r'\d\d\.\d\d\.\d{4}') + maxLen = 1 + for date in all_dates: + if(len(date) > 4 and date['4'] != ''): + entry = DataEntry() + if(len(date['1']) > 3): + entry.event += date['1'].replace('\n','') + entry.event += date['2'].replace('\n','') + + d =date['3'].replace('\n',' ').replace('(AN)','') + date['4'].replace('\n',' ').replace('(AN)','') + d = date_regex.findall(d) + if(maxLen < len(d)): + maxLen = len(d) + if(len(d) == 1): + entry.start_date = datetime.strptime(d[0],"%d.%m.%Y") + entry.end_date = ( entry.start_date + timedelta(1) ) + elif(len(d) == 2): + entry.start_date = datetime.strptime(d[0],"%d.%m.%Y") + entry.end_date = datetime.strptime(d[1],"%d.%m.%Y") + main_dates.append(entry) + return main_dates From 65d294cee7be77f776d3d390889df2ed76a8979f Mon Sep 17 00:00:00 2001 From: gwydion67 Date: Tue, 15 Oct 2024 04:10:52 +0530 Subject: [PATCH 4/6] export the academic calendar parser --- utils/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/__init__.py b/utils/__init__.py index fd90caa..42ad644 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -1,3 +1,4 @@ from utils.dates import * from utils.build_event import * +from utils.academic_calander_handler import * From d520eadf276e3e8f109accb89a7a7905fd1d5946 Mon Sep 17 00:00:00 2001 From: gwydion67 Date: Tue, 15 Oct 2024 04:25:44 +0530 Subject: [PATCH 5/6] Add event generation from academic calendar to generate ics --- .gitignore | 2 ++ requirements.txt.bak | 11 +++++++++++ timetable/generate_ics.py | 9 ++++++++- 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 requirements.txt.bak diff --git a/.gitignore b/.gitignore index dc57fc0..85de63a 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ venv .env ACADEMIC_CALENDAR_*.pdf +Academic_Cal-j/** +final.json diff --git a/requirements.txt.bak b/requirements.txt.bak new file mode 100644 index 0000000..9cfc0f4 --- /dev/null +++ b/requirements.txt.bak @@ -0,0 +1,11 @@ +beautifulsoup4==4.12.2 +google_api_python_client==2.90.0 +httplib2==0.22.0 +icalendar==5.0.7 +iitkgp_erp_login==2.4.2 +oauth2client==4.1.3 +pytz==2023.3 +Requests==2.31.0 +flask==3.0.3 +flask_cors==4.0.1 +gunicorn==22.0.0 diff --git a/timetable/generate_ics.py b/timetable/generate_ics.py index 3ac7276..c65d29f 100644 --- a/timetable/generate_ics.py +++ b/timetable/generate_ics.py @@ -3,7 +3,7 @@ from icalendar import Calendar, Event from datetime import datetime, timedelta from timetable import Course -from utils import dates, build_event_duration, generate_india_time, next_weekday +from utils import academic_calander_handler, dates, build_event_duration, generate_india_time, next_weekday WORKING_DAYS = dates.get_dates() @@ -51,6 +51,13 @@ def generate_ics(courses: list[Course], output_filename): event.add("dtstart", holiday[1]) event.add("dtend", holiday[1] + timedelta(days=1)) cal.add_component(event) + + for entry in academic_calander_handler.get_academic_calendar(): + event = Event() + event.add("summary", entry.event) + event.add("dtstart",entry.start_date) + event.add("dtend",entry.end_date) + cal.add_component(event) if output_filename != "": From 4dcc450c6872101830f49b4c4a60ee3025b00044 Mon Sep 17 00:00:00 2001 From: gwydion67 Date: Wed, 16 Oct 2024 20:15:56 +0530 Subject: [PATCH 6/6] limit the number events to the events for all only, ignore EMBA and Field trips --- utils/academic_calander_handler.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/utils/academic_calander_handler.py b/utils/academic_calander_handler.py index 3f1b52e..e707a3a 100644 --- a/utils/academic_calander_handler.py +++ b/utils/academic_calander_handler.py @@ -120,7 +120,7 @@ def merge_json(): merged_data.extend(data) with open('final.json',"w") as f: - json.dump(merged_data,f) + json.dump(merged_data,f,indent=4) return merged_data @@ -161,7 +161,7 @@ def get_academic_calendar() -> list[DataEntry]: # # main_dates.append([date['0'],datetime_object]) # main_dates.append(entry) - date_regex = re.compile(r'\d\d\.\d\d\.\d{4}') + date_regex = re.compile(r'\d{2}.\d{2}.\d{4}') maxLen = 1 for date in all_dates: if(len(date) > 4 and date['4'] != ''): @@ -181,5 +181,11 @@ def get_academic_calendar() -> list[DataEntry]: entry.start_date = datetime.strptime(d[0],"%d.%m.%Y") entry.end_date = datetime.strptime(d[1],"%d.%m.%Y") main_dates.append(entry) + annual_convocation = str(date['1']).strip().lower().split(" ") + ## KGP hai .. cannot trust, they can even mess up the spellings of annual convocation + ## this can just reduce the amount of places this will fail + if(len(annual_convocation) == 2 and ("annual" in annual_convocation or "convocation" in annual_convocation)): + break + return main_dates