diff --git a/.gitignore b/.gitignore index 00bf393..85de63a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,8 @@ data.txt .idea/ .vscode venv -.env \ No newline at end of file +.env + +ACADEMIC_CALENDAR_*.pdf +Academic_Cal-j/** +final.json diff --git a/requirements.txt b/requirements.txt index 9cfc0f4..902a757 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,58 @@ beautifulsoup4==4.12.2 -google_api_python_client==2.90.0 +blinker==1.8.2 +bs4==0.0.2 +cachetools==5.5.0 +certifi==2024.8.30 +cffi==1.17.1 +chardet==5.2.0 +charset-normalizer==3.4.0 +click==8.1.7 +cryptography==43.0.1 +et-xmlfile==1.1.0 +Flask==3.0.3 +Flask-Cors==4.0.1 +ghostscript==0.7 +google-api-core==2.21.0 +google-api-python-client==2.90.0 +google-auth==2.35.0 +google-auth-httplib2==0.2.0 +google-auth-oauthlib==1.2.1 +googleapis-common-protos==1.65.0 +gunicorn==22.0.0 httplib2==0.22.0 icalendar==5.0.7 +idna==3.10 iitkgp_erp_login==2.4.2 +itsdangerous==2.2.0 +Jinja2==3.1.4 +MarkupSafe==3.0.1 +numpy==2.1.2 oauth2client==4.1.3 +oauthlib==3.2.2 +opencv-python==4.10.0.84 +openpyxl==3.1.5 +packaging==24.1 +pandas==2.2.3 +pdfminer.six==20240706 +proto-plus==1.24.0 +protobuf==5.28.2 +pyasn1==0.6.1 +pyasn1_modules==0.4.1 +pycparser==2.22 +pyparsing==3.2.0 +pypdf==4.3.1 +pypdf_table_extraction==0.0.2 +python-dateutil==2.9.0.post0 pytz==2023.3 -Requests==2.31.0 -flask==3.0.3 -flask_cors==4.0.1 -gunicorn==22.0.0 +requests==2.31.0 +requests-oauthlib==2.0.0 +rsa==4.9 +setuptools==75.1.0 +six==1.16.0 +soupsieve==2.6 +tabulate==0.9.0 +tk==0.1.0 +tzdata==2024.2 +uritemplate==4.1.1 +urllib3==2.2.3 +Werkzeug==3.0.4 diff --git a/requirements.txt.bak b/requirements.txt.bak new file mode 100644 index 0000000..9cfc0f4 --- /dev/null +++ b/requirements.txt.bak @@ -0,0 +1,11 @@ +beautifulsoup4==4.12.2 +google_api_python_client==2.90.0 +httplib2==0.22.0 +icalendar==5.0.7 +iitkgp_erp_login==2.4.2 +oauth2client==4.1.3 +pytz==2023.3 +Requests==2.31.0 +flask==3.0.3 +flask_cors==4.0.1 +gunicorn==22.0.0 diff --git a/timetable/generate_ics.py b/timetable/generate_ics.py index 3ac7276..c65d29f 100644 --- a/timetable/generate_ics.py +++ b/timetable/generate_ics.py @@ -3,7 +3,7 @@ from icalendar import Calendar, Event from datetime import datetime, timedelta from timetable import Course -from utils import dates, build_event_duration, generate_india_time, next_weekday +from utils import academic_calander_handler, dates, build_event_duration, generate_india_time, next_weekday WORKING_DAYS = dates.get_dates() @@ -51,6 +51,13 @@ def generate_ics(courses: list[Course], output_filename): event.add("dtstart", holiday[1]) event.add("dtend", holiday[1] + timedelta(days=1)) cal.add_component(event) + + for entry in academic_calander_handler.get_academic_calendar(): + event = Event() + event.add("summary", entry.event) + event.add("dtstart",entry.start_date) + event.add("dtend",entry.end_date) + cal.add_component(event) if output_filename != "": diff --git a/utils/__init__.py b/utils/__init__.py index fd90caa..42ad644 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -1,3 +1,4 @@ from utils.dates import * from utils.build_event import * +from utils.academic_calander_handler import * diff --git a/utils/academic_calander_handler.py b/utils/academic_calander_handler.py new file mode 100644 index 0000000..e707a3a --- /dev/null +++ b/utils/academic_calander_handler.py @@ -0,0 +1,191 @@ +from datetime import datetime, timedelta +import glob +import camelot +import os +import requests +import shutil +from zipfile import ZipFile +import json +from dataclasses import dataclass +import re + + +JSON_FOLDER_NAME = 'Academic_Cal-j' + +@dataclass +class DataEntry: + start_date: datetime = datetime.today() + end_date: datetime = datetime.today() + event: str = "" + +#get the current working directory +def cwd(): + return os.getcwd() + +def get_latest_calendar_name(): + curr_year = datetime.today().year + curr_month = datetime.today().month + + if(curr_month < 7): + curr_year -= 1 + + year_str = str(curr_year) + '_' + str((curr_year % 100) + 1) + filename = 'ACADEMIC_CALENDAR_' + year_str + '.pdf' + return filename + +def is_file_present(file): + if(os.path.exists(cwd() + '/' + file) or + os.path.exists(cwd() + '/' + file + '/') + ): + return True + return False + +def delete_file(file): + if(is_file_present(file)): + try: + print("DELETING file ",file) + if(os.path.isdir(file)): + shutil.rmtree(cwd() + '/' + file) + elif(os.path.isfile(file)): + os.remove(file) + else: + raise Exception("filename not valid") + except Exception as e: + print("ERROR: seems file already exists but cannot be deleted") + print(e) + return False + else: + print(file, "File not present..") + +#fetch the latest academic calendar from the iitkgp website +def get_latest_calendar(): + + filename = get_latest_calendar_name() + url = 'https://www.iitkgp.ac.in/assets/pdf/' + filename + + ## delete any old academic calander pdf if exists + if(is_file_present(filename)): + delete_file(filename) + + with open(filename,"wb") as file: + response = requests.get(url) + file.write(response.content) + + if(is_file_present(filename)): + return True + return False + +def upzip_and_delete_zip(zip_file_name,result_folder_name): + with ZipFile(zip_file_name) as zip: + try: + zip.extractall(result_folder_name) + except Exception as E: + print(E) + return False + + print("Zip File not needed anymore, Deleteting ", zip_file_name) + delete_file(zip_file_name) + return True + +def export_json(): + filename = get_latest_calendar_name() + ## ignore the read_pdf not found warning + tables = camelot.read_pdf(filename,pages="all") + + print("Checking for pre-existing folder") + delete_file(JSON_FOLDER_NAME) + + try: + tables.export((JSON_FOLDER_NAME + '.json'),f='json',compress=True) + except Exception as E: + print(E) + return False + + upzip_and_delete_zip((JSON_FOLDER_NAME + '.zip'),JSON_FOLDER_NAME) + return True + +def get_json_files(): + folder_path = cwd() + '/' + JSON_FOLDER_NAME + if(is_file_present(JSON_FOLDER_NAME)): + files = glob.glob(folder_path + '/*.json',include_hidden=True) + return files + else: + return [] + +def merge_json(): + merged_data = [] + for file in get_json_files(): + with open(file) as f: + data = json.load(f) + merged_data.extend(data) + + with open('final.json',"w") as f: + json.dump(merged_data,f,indent=4) + + return merged_data + +def get_academic_calendar() -> list[DataEntry]: + + get_latest_calendar() + export_json() + + all_dates = merge_json() + all_dates = all_dates[1:] + + main_dates = [] + # for date in all_dates: + # entry = DataEntry() + # if(len(date) > 4 and date['4'] != ''): + # if(len(date['1']) > 3): + # entry.event += date['1'].replace('\n','') + # entry.event += date['2'].replace('\n','') + # d = date['4'].replace('\n',' ').replace('(AN)','') + # print(d.find("to")) + # if(d.lower().find("to") != -1): + # d = str(d).lower().split("to") + # entry.start_date = datetime.strptime(d[0].split(" ")[0].strip(), "%d.%m.%Y") + # entry.end_date = datetime.strptime(d[-1].split(" ")[-1].strip(), "%d.%m.%Y") + # else: + # entry.start_date = datetime.strptime(d,"%d.%m.%Y") + # entry.end_date = ( entry.start_date + timedelta(1) ) + # # elif(len(date) == 2 and date['1'] != ''): + # # entry.event = date['0'] + # # d = date['1'].replace('\n','') + # # if(d.find("to")): + # # d = str(d).split("to") + # # entry.start_date = datetime.strptime(d[0].strip(), "%A, %d %B %Y") + # # entry.end_date = datetime.strptime(d[1].strip(), "%A, %d %B %Y") + # # else: + # # entry.start_date = datetime.strptime(d,"%A, %d %B %Y") + # # entry.end_date = ( entry.start_date + timedelta(1) ) + # # main_dates.append([date['0'],datetime_object]) + # main_dates.append(entry) + + date_regex = re.compile(r'\d{2}.\d{2}.\d{4}') + maxLen = 1 + for date in all_dates: + if(len(date) > 4 and date['4'] != ''): + entry = DataEntry() + if(len(date['1']) > 3): + entry.event += date['1'].replace('\n','') + entry.event += date['2'].replace('\n','') + + d =date['3'].replace('\n',' ').replace('(AN)','') + date['4'].replace('\n',' ').replace('(AN)','') + d = date_regex.findall(d) + if(maxLen < len(d)): + maxLen = len(d) + if(len(d) == 1): + entry.start_date = datetime.strptime(d[0],"%d.%m.%Y") + entry.end_date = ( entry.start_date + timedelta(1) ) + elif(len(d) == 2): + entry.start_date = datetime.strptime(d[0],"%d.%m.%Y") + entry.end_date = datetime.strptime(d[1],"%d.%m.%Y") + main_dates.append(entry) + annual_convocation = str(date['1']).strip().lower().split(" ") + ## KGP hai .. cannot trust, they can even mess up the spellings of annual convocation + ## this can just reduce the amount of places this will fail + if(len(annual_convocation) == 2 and ("annual" in annual_convocation or "convocation" in annual_convocation)): + break + + return main_dates +