diff --git a/Contributor_demos/Sarcasm Detection/Ivy_Sarcasm_Detection_Demo b/Contributor_demos/Sarcasm Detection/Ivy_Sarcasm_Detection_Demo new file mode 100644 index 00000000..4a19d011 Binary files /dev/null and b/Contributor_demos/Sarcasm Detection/Ivy_Sarcasm_Detection_Demo differ diff --git a/Contributor_demos/Sarcasm Detection/Ivy_Sarcasm_Detection_Demo_weights.hdf5 b/Contributor_demos/Sarcasm Detection/Ivy_Sarcasm_Detection_Demo_weights.hdf5 new file mode 100644 index 00000000..aa59ef75 Binary files /dev/null and b/Contributor_demos/Sarcasm Detection/Ivy_Sarcasm_Detection_Demo_weights.hdf5 differ diff --git a/Contributor_demos/Sarcasm Detection/contributors.txt b/Contributor_demos/Sarcasm Detection/contributors.txt new file mode 100644 index 00000000..d5c3c6c4 --- /dev/null +++ b/Contributor_demos/Sarcasm Detection/contributors.txt @@ -0,0 +1,2 @@ +Omkar "Omickeyee" https://github.com/omickeyee +Kacper "Bubble" https://github.com/Kacper-W-Kozdon diff --git a/Contributor_demos/Sarcasm Detection/kaggle-sarcasm-detection-using-the-ivy-library.ipynb b/Contributor_demos/Sarcasm Detection/kaggle-sarcasm-detection-using-the-ivy-library.ipynb new file mode 100644 index 00000000..fa757271 --- /dev/null +++ b/Contributor_demos/Sarcasm Detection/kaggle-sarcasm-detection-using-the-ivy-library.ipynb @@ -0,0 +1 @@ +{"metadata":{"colab":{"provenance":[]},"kernelspec":{"name":"python3","display_name":"Python 3","language":"python"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[{"sourceId":36545,"sourceType":"datasetVersion","datasetId":1309}],"dockerImageVersionId":30674,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# DEPENDANCIES AND SETUP","metadata":{"id":"s2B-C0ETR8j-"}},{"cell_type":"markdown","source":"Installing kaggle and uploading the API key necessary to use it.","metadata":{"id":"lVY3Z4myS1O4"}},{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\"\n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"id":"GRXT-eQOLn0q","execution":{"iopub.status.busy":"2024-04-18T11:35:01.674141Z","iopub.execute_input":"2024-04-18T11:35:01.674377Z","iopub.status.idle":"2024-04-18T11:35:01.685910Z","shell.execute_reply.started":"2024-04-18T11:35:01.674357Z","shell.execute_reply":"2024-04-18T11:35:01.684921Z"},"trusted":true},"execution_count":55,"outputs":[{"name":"stdout","text":"/kaggle/input/sarcasm/train-balanced-sarc.csv.gz\n/kaggle/input/sarcasm/train-balanced-sarcasm.csv\n/kaggle/input/sarcasm/test-balanced.csv\n/kaggle/input/sarcasm/test-unbalanced.csv\n","output_type":"stream"}]},{"cell_type":"code","source":"!pip install -q kaggle\n# from google.colab import files\n# from google.colab import userdata\nimport os\n# files.upload(); #Upload kaggle.json - you can get from the kaggle account settings, from the API section.","metadata":{"id":"7R4luV8tSDFn","execution":{"iopub.status.busy":"2024-04-18T11:34:23.281041Z","iopub.execute_input":"2024-04-18T11:34:23.281363Z","iopub.status.idle":"2024-04-18T11:34:35.419251Z","shell.execute_reply.started":"2024-04-18T11:34:23.281334Z","shell.execute_reply":"2024-04-18T11:34:35.418137Z"},"trusted":true},"execution_count":45,"outputs":[{"name":"stderr","text":"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n","output_type":"stream"}]},{"cell_type":"code","source":"# UNCOMMENT BELOW IF YOU'RE RUNNING THE NOTEBOOK OUTSIDE KAGGLE\n\n# kaggle_api_key = open('kaggle.json', \"w+\")\n# kaggle_api_key.write('') # kaggle.json - you can get it from the kaggle account settings, from the API section.\n# !mkdir ~/.kaggle\n# !cp kaggle.json ~/.kaggle/\n# !chmod 600 ~/.kaggle/kaggle.json\n# !kaggle datasets list","metadata":{"id":"F39aDC9VLn0u","execution":{"iopub.status.busy":"2024-04-18T11:34:35.420885Z","iopub.execute_input":"2024-04-18T11:34:35.421252Z","iopub.status.idle":"2024-04-18T11:34:35.426035Z","shell.execute_reply.started":"2024-04-18T11:34:35.421217Z","shell.execute_reply":"2024-04-18T11:34:35.425033Z"},"trusted":true},"execution_count":46,"outputs":[]},{"cell_type":"markdown","source":"Installing packages necessary to use torch's transformers.","metadata":{"id":"GRyxyRkNqONt"}},{"cell_type":"code","source":"!pip install tqdm boto3 requests regex sentencepiece sacremoses botocore>=1.34.79","metadata":{"id":"yhD653HGqOj2","execution":{"iopub.status.busy":"2024-04-18T11:34:35.428253Z","iopub.execute_input":"2024-04-18T11:34:35.428564Z","iopub.status.idle":"2024-04-18T11:34:47.739677Z","shell.execute_reply.started":"2024-04-18T11:34:35.428532Z","shell.execute_reply":"2024-04-18T11:34:47.738644Z"},"trusted":true},"execution_count":47,"outputs":[{"name":"stderr","text":"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n","output_type":"stream"}]},{"cell_type":"markdown","source":"To use the API, credentials need to be copied into the kaggle folder. If everything works, the output will show the list of available datasets.","metadata":{"id":"aCN2c1DGTbVM"}},{"cell_type":"markdown","source":"import json","metadata":{"id":"P1aQHs-9Tkt2"}},{"cell_type":"code","source":"import json","metadata":{"execution":{"iopub.status.busy":"2024-04-18T11:34:47.741170Z","iopub.execute_input":"2024-04-18T11:34:47.741482Z","iopub.status.idle":"2024-04-18T11:34:47.746319Z","shell.execute_reply.started":"2024-04-18T11:34:47.741456Z","shell.execute_reply":"2024-04-18T11:34:47.745387Z"},"trusted":true},"execution_count":48,"outputs":[]},{"cell_type":"markdown","source":"Preparing the ivy library.","metadata":{"id":"_Sf8EImZT6kZ"}},{"cell_type":"code","source":"#Insert the correct user when cloning the repos. Make sure that they are up-to-date.\n\n!git clone \"https://github.com/Kacper-W-Kozdon/demos.git\"\n# !git clone \"https://github.com/Kacper-W-Kozdon/ivy.git\"\n!pip install -U -q paddlepaddle ivy accelerate>=0.21.0 2>/dev/null # If ran in a notebook with only cpu enabled, edit \"paddlepaddle-gpu\" to \"paddlepaddle\"","metadata":{"id":"7DMn3EoEUBGQ","execution":{"iopub.status.busy":"2024-04-18T11:34:47.747572Z","iopub.execute_input":"2024-04-18T11:34:47.747900Z","iopub.status.idle":"2024-04-18T11:35:01.612423Z","shell.execute_reply.started":"2024-04-18T11:34:47.747870Z","shell.execute_reply":"2024-04-18T11:35:01.611179Z"},"trusted":true},"execution_count":49,"outputs":[{"name":"stderr","text":"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n","output_type":"stream"},{"name":"stdout","text":"fatal: destination path 'demos' already exists and is not an empty directory.\n","output_type":"stream"},{"name":"stderr","text":"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\nTo disable this warning, you can either:\n\t- Avoid using `tokenizers` before the fork if possible\n\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n","output_type":"stream"}]},{"cell_type":"markdown","source":"Next: import the ivy library and get the dataset.","metadata":{"id":"y1sA3gFuWjDE"}},{"cell_type":"code","source":"import ivy","metadata":{"id":"_NUgteS_Dluc","execution":{"iopub.status.busy":"2024-04-18T11:35:01.614221Z","iopub.execute_input":"2024-04-18T11:35:01.614630Z","iopub.status.idle":"2024-04-18T11:35:01.619392Z","shell.execute_reply.started":"2024-04-18T11:35:01.614593Z","shell.execute_reply":"2024-04-18T11:35:01.618329Z"},"trusted":true},"execution_count":50,"outputs":[]},{"cell_type":"markdown","source":"Import the libraries suggested in the model which is to be transpiled.","metadata":{"id":"UKN-VX8QXDEG"}},{"cell_type":"code","source":"# Import necessary libraries\nimport pandas as pd # For data manipulation and analysis\nimport gc # For garbage collection to manage memory\nimport re # For regular expressions\nimport numpy as np # For numerical operations and arrays\nimport tensorflow as tf\nimport torch # PyTorch library for deep learning\nimport paddle","metadata":{"id":"1-LVDQOELn0x","execution":{"iopub.status.busy":"2024-04-18T11:35:01.620672Z","iopub.execute_input":"2024-04-18T11:35:01.621461Z","iopub.status.idle":"2024-04-18T11:35:01.631363Z","shell.execute_reply.started":"2024-04-18T11:35:01.621429Z","shell.execute_reply":"2024-04-18T11:35:01.630372Z"},"trusted":true},"execution_count":51,"outputs":[]},{"cell_type":"code","source":"# Libraries to accompany torch's transformers\nimport tqdm\nimport boto3\nimport requests\nimport regex\nimport sentencepiece\nimport sacremoses\n\nimport warnings # For handling warnings\nwarnings.filterwarnings(\"ignore\") # Ignore warning messages\n\nfrom transformers import AutoModel, AutoTokenizer # Transformers library for natural language processing\n# from transformers import TextDataset, LineByLineTextDataset, DataCollatorForLanguageModeling, \\\n# pipeline, Trainer, TrainingArguments, DataCollatorWithPadding # Transformers components for text processing\nfrom transformers import TextDataset, LineByLineTextDataset, DataCollatorForLanguageModeling, \\\npipeline, TrainingArguments, DataCollatorWithPadding\nfrom transformers import AutoModelForSequenceClassification # Transformer model for sequence classification\n\nimport accelerate\n\n# from nlp import Dataset # Import custom 'Dataset' class for natural language processing tasks\nfrom imblearn.over_sampling import RandomOverSampler # For oversampling to handle class imbalance\n# import datasets # Import datasets library\n# from datasets import Dataset, Image, ClassLabel # Import custom 'Dataset', 'ClassLabel', and 'Image' classes\nfrom transformers import pipeline # Transformers library for pipelines\nfrom bs4 import BeautifulSoup # For parsing HTML content\n\nimport matplotlib.pyplot as plt # For data visualization\nimport itertools # For working with iterators\nfrom sklearn.metrics import ( # Import various metrics from scikit-learn\n accuracy_score, # For calculating accuracy\n roc_auc_score, # For ROC AUC score\n confusion_matrix, # For confusion matrix\n classification_report, # For classification report\n f1_score # For F1 score\n)\n\n# from datasets import load_metric # Import load_metric function to load evaluation metrics\n\nfrom tqdm import tqdm # For displaying progress bars\n\ntqdm.pandas() # Enable progress bars for pandas operations","metadata":{"id":"19rgBXHJXHFu","execution":{"iopub.status.busy":"2024-04-18T11:35:01.634372Z","iopub.execute_input":"2024-04-18T11:35:01.634653Z","iopub.status.idle":"2024-04-18T11:35:01.644352Z","shell.execute_reply.started":"2024-04-18T11:35:01.634630Z","shell.execute_reply":"2024-04-18T11:35:01.643480Z"},"trusted":true},"execution_count":52,"outputs":[]},{"cell_type":"code","source":"device = \"gpu:0\" if torch.cuda.is_available() else \"cpu\"\n# This line checks if a CUDA-enabled GPU is available.\n# If a GPU is available, it sets the device to \"gpu:0\" (the first GPU).\n# If no GPU is available, it sets the device to \"cpu\".\n\nivy.set_default_device(device)\n# This line sets the default device for Ivy operations.\n# Ivy will use the device specified above (either \"gpu:0\" or \"cpu\") for all computations.\n\nivy.set_soft_device_mode(True)\n# This line enables Ivy's \"soft device mode\".\n# In soft device mode, Ivy will attempt to automatically move tensors to the correct device\n# when performing operations involving tensors on different devices.\n# This can simplify tensor management and device handling in certain cases.\n","metadata":{"id":"bXr9tGFLGRPI","execution":{"iopub.status.busy":"2024-04-18T11:35:01.645624Z","iopub.execute_input":"2024-04-18T11:35:01.646036Z","iopub.status.idle":"2024-04-18T11:35:01.659129Z","shell.execute_reply.started":"2024-04-18T11:35:01.646005Z","shell.execute_reply":"2024-04-18T11:35:01.658011Z"},"trusted":true},"execution_count":53,"outputs":[]},{"cell_type":"code","source":"print(ivy.default_device())\n# This line prints the current default device set in Ivy.\n# It will print either \"gpu:0\" or \"cpu\", depending on the device specified earlier.\n\nprint(ivy.num_gpus())\n# This line prints the number of CUDA-enabled GPUs available on the system.\n# If one or more GPUs are available, it will print the number of GPUs.\n# If no GPU is available, it will print 0.\n\nprint(torch.cuda.is_available())\n# This line checks if PyTorch can access a CUDA-enabled GPU.\n# PyTorch is a popular machine learning library, and it uses CUDA for GPU acceleration.\n# If a CUDA-enabled GPU is available and PyTorch can access it, this line will print True.\n# If no CUDA-enabled GPU is available or PyTorch cannot access it, this line will print False.","metadata":{"id":"ijs6fSKL9QZ4","execution":{"iopub.status.busy":"2024-04-18T11:35:59.644311Z","iopub.execute_input":"2024-04-18T11:35:59.645116Z","iopub.status.idle":"2024-04-18T11:35:59.650992Z","shell.execute_reply.started":"2024-04-18T11:35:59.645082Z","shell.execute_reply":"2024-04-18T11:35:59.649951Z"},"trusted":true},"execution_count":56,"outputs":[{"name":"stdout","text":"gpu:0\n2\nTrue\n","output_type":"stream"}]},{"cell_type":"markdown","source":"Set the seeds.","metadata":{"id":"JU7qbxYdsVlK"}},{"cell_type":"code","source":"tf.keras.utils.set_random_seed(0)\n# This line sets a seed value for the random number generator used by TensorFlow and Keras.\n# Setting a seed allows you to reproduce the same results across different runs.\n# In this case, the seed is set to 0, which is a common value used for reproducibility.\n\ntorch.manual_seed(0)\n# This line sets the seed for the random number generator used by PyTorch.\n# Similar to the TensorFlow seed, it helps ensure reproducibility of results.\n# The seed value is set to 0 here.\n\npaddle.seed(0)\n# This line sets the seed for the random number generator used by PaddlePaddle.\n# PaddlePaddle is another deep learning framework, and setting the seed ensures reproducibility.\n# Again, the seed value is set to 0 for consistency across frameworks.","metadata":{"id":"HxD1xridsU_l","execution":{"iopub.status.busy":"2024-04-18T11:36:03.490320Z","iopub.execute_input":"2024-04-18T11:36:03.490657Z","iopub.status.idle":"2024-04-18T11:36:03.498653Z","shell.execute_reply.started":"2024-04-18T11:36:03.490633Z","shell.execute_reply":"2024-04-18T11:36:03.497473Z"},"trusted":true},"execution_count":57,"outputs":[{"execution_count":57,"output_type":"execute_result","data":{"text/plain":""},"metadata":{}}]},{"cell_type":"markdown","source":"Get the API key for ivy transpiler from your account and upload it to the project. Move it to the correct directory.","metadata":{"id":"zwU4oNrkXyxT"}},{"cell_type":"code","source":"pwd","metadata":{"id":"VT4kcIOKLn00","execution":{"iopub.status.busy":"2024-04-18T11:27:28.816298Z","iopub.execute_input":"2024-04-18T11:27:28.816789Z","iopub.status.idle":"2024-04-18T11:27:28.824734Z","shell.execute_reply.started":"2024-04-18T11:27:28.816757Z","shell.execute_reply":"2024-04-18T11:27:28.823849Z"},"trusted":true},"execution_count":13,"outputs":[{"execution_count":13,"output_type":"execute_result","data":{"text/plain":"'/kaggle/working'"},"metadata":{}}]},{"cell_type":"markdown","source":"First we're loading the tokenizer and the model from torch. All of the basic set-up instructions can be found here: https://colab.research.google.com/github/pytorch/pytorch.github.io/blob/master/assets/hub/huggingface_pytorch-transformers.ipynb#scrollTo=72d8f2de","metadata":{"id":"DWCnfosUshGK"}},{"cell_type":"code","source":"tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'bert-base-cased')\n","metadata":{"id":"2rZY3rhisgXZ","execution":{"iopub.status.busy":"2024-04-18T11:27:28.825748Z","iopub.execute_input":"2024-04-18T11:27:28.826013Z","iopub.status.idle":"2024-04-18T11:27:35.762810Z","shell.execute_reply.started":"2024-04-18T11:27:28.825990Z","shell.execute_reply":"2024-04-18T11:27:35.762021Z"},"trusted":true},"execution_count":14,"outputs":[{"name":"stderr","text":"Downloading: \"https://github.com/huggingface/pytorch-transformers/zipball/main\" to /root/.cache/torch/hub/main.zip\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"tokenizer_config.json: 0%| | 0.00/49.0 [00:00\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
labeltitle
4723471Isn't gentrification wonderful?
6014140He's still alive and older than you so take tips
6646000cow-erkraut.
2997091The Q could stand for Quality!
3393781I can't believe the gays did this to him!
\n"},"metadata":{}}]},{"cell_type":"markdown","source":"# DATASET AND MODEL OVERVIEW","metadata":{"id":"bXFPiT6SgPob"}},{"cell_type":"code","source":"!echo -n API_KEY > .ivy/key.pem","metadata":{"id":"cXATe1TGLn01","execution":{"iopub.status.busy":"2024-04-18T11:27:44.919364Z","iopub.execute_input":"2024-04-18T11:27:44.919646Z","iopub.status.idle":"2024-04-18T11:27:45.890356Z","shell.execute_reply.started":"2024-04-18T11:27:44.919622Z","shell.execute_reply":"2024-04-18T11:27:45.889212Z"},"trusted":true},"execution_count":17,"outputs":[{"name":"stdout","text":"/bin/bash: .ivy/key.pem: No such file or directory\n","output_type":"stream"}]},{"cell_type":"code","source":"def count_words(text: str) -> int:\n return len(text.split())\n\ndef count_symbols(text: str) -> int:\n return len(\"\".join(text.split()))\n\ndef symbol_to_word_ratio(text: str) -> float:\n return count_symbols(text)/count_words(text)\n\ndef upper_lower_ratio(text: str) -> float:\n text = \"\".join(text.split())\n return sum(1 for c in text if c.isupper())/(max([sum(1 for c in text if c.islower()), 1]))\n\ndf['word_count'] = df[\"title\"].apply(count_words)\ndf['symbol_count'] = df[\"title\"].apply(count_symbols)\ndf[\"upper_lower_ratio\"] = df[\"title\"].apply(upper_lower_ratio)\ndf[\"symbol_to_word_ratio\"] = df[\"title\"].apply(symbol_to_word_ratio)\ndf.sample(5)","metadata":{"id":"BVA6U5Y0c7vg","execution":{"iopub.status.busy":"2024-04-18T11:27:45.892087Z","iopub.execute_input":"2024-04-18T11:27:45.893004Z","iopub.status.idle":"2024-04-18T11:28:01.866409Z","shell.execute_reply.started":"2024-04-18T11:27:45.892967Z","shell.execute_reply":"2024-04-18T11:28:01.865554Z"},"trusted":true},"execution_count":18,"outputs":[{"execution_count":18,"output_type":"execute_result","data":{"text/plain":" label title word_count \\\n627321 0 How do people keep getting matches on Tinder? 8 \n378524 0 Same thing happened to me, so it's definitely ... 13 \n96238 0 I agree with you, but too much of a good thing... 17 \n598116 0 'Twas a joke I do believe. 6 \n217680 0 [M] it will get bigger depending on luck of th... 11 \n\n symbol_count upper_lower_ratio symbol_to_word_ratio \n627321 38 0.057143 4.750000 \n378524 62 0.017241 4.769231 \n96238 53 0.020000 3.117647 \n598116 21 0.117647 3.500000 \n217680 42 0.025641 3.818182 ","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
labeltitleword_countsymbol_countupper_lower_ratiosymbol_to_word_ratio
6273210How do people keep getting matches on Tinder?8380.0571434.750000
3785240Same thing happened to me, so it's definitely ...13620.0172414.769231
962380I agree with you, but too much of a good thing...17530.0200003.117647
5981160'Twas a joke I do believe.6210.1176473.500000
2176800[M] it will get bigger depending on luck of th...11420.0256413.818182
\n
"},"metadata":{}}]},{"cell_type":"markdown","source":"A few plots to see some some characteristics of the data.","metadata":{"id":"YY9ru5DbJqxa"}},{"cell_type":"code","source":"df_no_sarc = df.where(df[\"label\"] == 0)\ndf_no_sarc = df_no_sarc.where(df_no_sarc[\"word_count\"] <= 51)\ndf_sarc = df.where(df[\"label\"] == 1)\ndf_sarc = df_sarc.where(df_sarc[\"word_count\"] <= 51)\ndf_no_sarc = df_no_sarc[np.isfinite(df_no_sarc[\"word_count\"])]\ndf_sarc = df_sarc[np.isfinite(df_sarc[\"word_count\"])]\nplt.style.use('_mpl-gallery-nogrid')\n\nhist_df_no_sarc, bin_edges_no = np.histogram(df_no_sarc[\"word_count\"].values, density=True)\nhist_df_sarc, bin_edges = np.histogram(df_sarc[\"word_count\"].values, density=True)\n# plot:\nfig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))\n\nbin_mids_no = [(bin_edges_no[i+1] + bin_edges_no[i])/2 for i in range(len(bin_edges_no) - 1)]\nbin_mids = [(bin_edges[i+1] + bin_edges[i])/2 for i in range(len(bin_edges) - 1)]\nax1.bar(bin_mids_no, hist_df_no_sarc, width=bin_edges_no[1] - bin_edges_no[0])\nax2.bar(bin_mids, hist_df_sarc, width=bin_edges[1] - bin_edges[0])\nax1.set_title(\"Hist no sarcasm\")\nax1.set_ylabel(\"density\")\nax1.set_xlabel(\"word count\")\nax1.set_xticks(bin_edges_no)\nax1.grid(True)\nax2.set_title(\"Hist sarcasm\")\nax2.set_xlabel(\"word count\")\nax2.set_xticks(bin_edges)\nax2.grid(True)\nplt.show()","metadata":{"id":"_LEHSFedgIBq","execution":{"iopub.status.busy":"2024-04-18T11:28:01.867545Z","iopub.execute_input":"2024-04-18T11:28:01.867816Z","iopub.status.idle":"2024-04-18T11:28:02.893326Z","shell.execute_reply.started":"2024-04-18T11:28:01.867793Z","shell.execute_reply":"2024-04-18T11:28:02.892139Z"},"trusted":true},"execution_count":19,"outputs":[{"output_type":"display_data","data":{"text/plain":"
","image/png":""},"metadata":{}}]},{"cell_type":"code","source":"df_no_sarc = df.where(df[\"label\"] == 0)\ndf_no_sarc = df_no_sarc.where(df_no_sarc[\"symbol_count\"] <= 201)\ndf_sarc = df.where(df[\"label\"] == 1)\ndf_sarc = df_sarc.where(df_sarc[\"symbol_count\"] <= 201)\ndf_no_sarc = df_no_sarc[np.isfinite(df_no_sarc[\"symbol_count\"])]\ndf_sarc = df_sarc[np.isfinite(df_sarc[\"symbol_count\"])]\nplt.style.use('_mpl-gallery-nogrid')\n\nhist_df_no_sarc, bin_edges_no = np.histogram(df_no_sarc[\"symbol_count\"].values, density=True)\nhist_df_sarc, bin_edges = np.histogram(df_sarc[\"symbol_count\"].values, density=True)\n# plot:\nfig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))\n\nbin_mids_no = [(bin_edges_no[i+1] + bin_edges_no[i])/2 for i in range(len(bin_edges_no) - 1)]\nbin_mids = [(bin_edges[i+1] + bin_edges[i])/2 for i in range(len(bin_edges) - 1)]\nax1.bar(bin_mids_no, hist_df_no_sarc, width=bin_edges_no[1] - bin_edges_no[0])\nax2.bar(bin_mids, hist_df_sarc, width=bin_edges[1] - bin_edges[0])\nax1.set_title(\"Hist no sarcasm\")\nax1.set_ylabel(\"density\")\nax1.set_xlabel(\"symbol count\")\nax1.set_xticks(bin_edges_no)\nax1.grid(True)\nax2.set_title(\"Hist sarcasm\")\nax2.set_xlabel(\"symbol count\")\nax2.set_xticks(bin_edges)\nax2.grid(True)\nplt.show()","metadata":{"id":"RcYhYzfygLc9","execution":{"iopub.status.busy":"2024-04-18T11:28:02.894545Z","iopub.execute_input":"2024-04-18T11:28:02.894873Z","iopub.status.idle":"2024-04-18T11:28:03.849902Z","shell.execute_reply.started":"2024-04-18T11:28:02.894846Z","shell.execute_reply":"2024-04-18T11:28:03.848948Z"},"trusted":true},"execution_count":20,"outputs":[{"output_type":"display_data","data":{"text/plain":"
","image/png":""},"metadata":{}}]},{"cell_type":"code","source":"df_no_sarc = df.where(df[\"label\"] == 0)\ndf_no_sarc = df_no_sarc.where(df_no_sarc[\"upper_lower_ratio\"] <= 0.3)\ndf_sarc = df.where(df[\"label\"] == 1)\ndf_sarc = df_sarc.where(df_sarc[\"upper_lower_ratio\"] <= 0.3)\ndf_no_sarc = df_no_sarc[np.isfinite(df_no_sarc[\"upper_lower_ratio\"])]\ndf_sarc = df_sarc[np.isfinite(df_sarc[\"upper_lower_ratio\"])]\nplt.style.use('_mpl-gallery-nogrid')\n\nhist_df_no_sarc, bin_edges_no = np.histogram(df_no_sarc[\"upper_lower_ratio\"].values, density=True)\nhist_df_sarc, bin_edges = np.histogram(df_sarc[\"upper_lower_ratio\"].values, density=True)\n# plot:\nfig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))\n\nbin_mids_no = [(bin_edges_no[i+1] + bin_edges_no[i])/2 for i in range(len(bin_edges_no) - 1)]\nbin_mids = [(bin_edges[i+1] + bin_edges[i])/2 for i in range(len(bin_edges) - 1)]\nax1.bar(bin_mids_no, hist_df_no_sarc, width=bin_edges_no[1] - bin_edges_no[0])\nax2.bar(bin_mids, hist_df_sarc, width=bin_edges[1] - bin_edges[0])\nax1.set_title(\"Hist no sarcasm\")\nax1.set_ylabel(\"density\")\nax1.set_xlabel(\"upper/lower ratio\")\nax1.set_xticks(bin_edges_no)\nax1.grid(True)\nax2.set_title(\"Hist sarcasm\")\nax2.set_xlabel(\"upper/lower ratio\")\nax2.set_xticks(bin_edges)\nax2.grid(True)\nplt.show()","metadata":{"id":"vvKbuhLaDaSP","execution":{"iopub.status.busy":"2024-04-18T11:28:03.851301Z","iopub.execute_input":"2024-04-18T11:28:03.851603Z","iopub.status.idle":"2024-04-18T11:28:04.803526Z","shell.execute_reply.started":"2024-04-18T11:28:03.851578Z","shell.execute_reply":"2024-04-18T11:28:04.802560Z"},"trusted":true},"execution_count":21,"outputs":[{"output_type":"display_data","data":{"text/plain":"
","image/png":""},"metadata":{}}]},{"cell_type":"code","source":"df_no_sarc = df.where(df[\"label\"] == 0)\ndf_no_sarc = df_no_sarc.where(df_no_sarc[\"symbol_to_word_ratio\"] <= 11)\ndf_sarc = df.where(df[\"label\"] == 1)\ndf_sarc = df_sarc.where(df_sarc[\"symbol_to_word_ratio\"] <= 11)\ndf_no_sarc = df_no_sarc[np.isfinite(df_no_sarc[\"symbol_to_word_ratio\"])]\ndf_sarc = df_sarc[np.isfinite(df_sarc[\"symbol_to_word_ratio\"])]\nplt.style.use('_mpl-gallery-nogrid')\n\nhist_df_no_sarc, bin_edges_no = np.histogram(df_no_sarc[\"symbol_to_word_ratio\"].values, density=True)\nhist_df_sarc, bin_edges = np.histogram(df_sarc[\"symbol_to_word_ratio\"].values, density=True)\n# plot:\nfig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))\n\nbin_mids_no = [(bin_edges_no[i+1] + bin_edges_no[i])/2 for i in range(len(bin_edges_no) - 1)]\nbin_mids = [(bin_edges[i+1] + bin_edges[i])/2 for i in range(len(bin_edges) - 1)]\nax1.bar(bin_mids_no, hist_df_no_sarc, width=bin_edges_no[1] - bin_edges_no[0])\nax2.bar(bin_mids, hist_df_sarc, width=bin_edges[1] - bin_edges[0])\nax1.set_title(\"Hist no sarcasm\")\nax1.set_ylabel(\"density\")\nax1.set_xlabel(\"symbols/words ratio\")\nax1.set_xticks(bin_edges_no)\nax1.grid(True)\nax2.set_title(\"Hist sarcasm\")\nax2.set_xlabel(\"symbols/words ratio\")\nax2.set_xticks(bin_edges)\nax2.grid(True)\nplt.show()","metadata":{"id":"HkuIzb1JF1U1","execution":{"iopub.status.busy":"2024-04-18T11:28:04.804629Z","iopub.execute_input":"2024-04-18T11:28:04.804912Z","iopub.status.idle":"2024-04-18T11:28:05.795170Z","shell.execute_reply.started":"2024-04-18T11:28:04.804888Z","shell.execute_reply":"2024-04-18T11:28:05.794061Z"},"trusted":true},"execution_count":22,"outputs":[{"output_type":"display_data","data":{"text/plain":"
","image/png":"iVBORw0KGgoAAAANSUhEUgAABNoAAAI6CAYAAADxHyXyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABWkUlEQVR4nO3de5hVdaE//vdwvyuIgqKB4hUvoBCGl7RCyMxLnZTMk4imPUc5ahz7ppWSt/BKmKGkhXpM01LrlJYykpgWSV5I80JpIqYCaioKP2Vk1u+PjnOcABlwzezZ+Ho9zzyw116X92fPDHzmPXutVVMURREAAAAA4H1pU+kAAAAAALA+ULQBAAAAQAkUbQAAAABQAkUbAAAAAJRA0QYAAAAAJVC0AQAAAEAJFG0AAAAAUAJFGwAAAACUQNEGAAAAACVQtAHrZMCAATnqqKMqHQMAgCpg7gh8UCjagFx99dWpqanJ/fffv8rn99133+y0007v+zi/+tWv8q1vfet97wcAgMoxdwRYPUUbsE7mzZuXK6+8cq22+dWvfpUzzzyzmRIBANBamTsCHxTtKh0AqE4dO3asdISq8Pbbb6e+vj4dOnSodBQAgIqp9rljfX19li9fnk6dOlU6CtDKeUcbsE7+9TobdXV1OfPMM7PNNtukU6dO2WijjbLXXnultrY2SXLUUUdl6tSpSZKampqGjzUd49Of/nTuvffeDB8+PJ06dcpWW22V//7v/15p3b/97W859NBD06tXr3Tp0iUf+chHcttttzVpLLW1tdlrr72y4YYbplu3btluu+3y9a9/veH55cuX54wzzsjQoUOzwQYbpGvXrtl7771z1113NdrP/PnzU1NTk4suuihTpkzJwIED07Fjxzz22GNJkieeeCKHHXZYNt5443Tu3DnbbbddvvGNbzRs/8wzz+T444/Pdtttl86dO2ejjTbKoYcemvnz5zc6zppe63de727dumXBggX59Kc/nW7duqVfv34Nn4NHHnkkH//4x9O1a9f0798/119/fZNeKwCAddESc8f7778/o0ePTu/evdO5c+dsueWWOfrooxutc9FFF2WPPfbIRhttlM6dO2fo0KG56aabVtpXTU1Nxo8fn+uuuy477rhjOnbsmNtvvz1J8txzz+WYY47JZpttlo4dO2bLLbfMf/zHf2T58uVJkn/84x855ZRTsvPOO6dbt27p0aNH9t9///zpT39a6TiXXnppdtxxx3Tp0iU9e/bMsGHDGs3LvvWtb6WmpiZ/+ctf8u///u/ZYIMNsvHGG+f0009PURR59tlnc/DBB6dHjx7p27dvLr744iZ8NoDm5B1tQIPXXnstL7300krL6+rq1rjtt771rUyaNClf+tKXMnz48CxZsiT3339/Hnzwwey333758pe/nOeffz61tbW59tprm5zpySefzOc+97kcc8wxGTt2bKZPn56jjjoqQ4cOzY477pgkWbRoUfbYY48sW7YsJ554YjbaaKNcc801Oeigg3LTTTflM5/5zGr3/+ijj+bTn/50dtlll5x11lnp2LFjnnzyyfzud79rWGfJkiX5wQ9+kMMPPzzHHntsXn/99fzwhz/M6NGjM2fOnAwZMqTRPq+66qq8+eabOe6449KxY8f06tUrDz/8cPbee++0b98+xx13XAYMGJCnnnoqv/zlL3PuuecmSf74xz/m97//fT7/+c9n8803z/z583P55Zdn3333zWOPPZYuXbo06bV+x4oVK7L//vvnox/9aC644IJcd911GT9+fLp27ZpvfOMbOeKII/LZz34206ZNy5FHHpkRI0Zkyy23bPLnBgD4YGtNc8fFixdn1KhR2XjjjXPqqadmww03zPz583PLLbc0Wu+SSy7JQQcdlCOOOCLLly/PDTfckEMPPTS33nprDjjggEbr/uY3v8lPfvKTjB8/Pr17986AAQPy/PPPZ/jw4Xn11Vdz3HHHZfvtt89zzz2Xm266KcuWLUuHDh3yt7/9LT//+c9z6KGHZsstt8yiRYvy/e9/P/vss08ee+yxbLbZZkmSK6+8MieeeGI+97nP5aSTTsqbb76Zhx9+OPfdd1++8IUvNMoyZsyY7LDDDjnvvPNy22235ZxzzkmvXr3y/e9/Px//+Mdz/vnn57rrrsspp5ySD3/4w/noRz+6xtcMaCYF8IF31VVXFUne82PHHXdstE3//v2LsWPHNjwePHhwccABB7zncU444YRibf7Z6d+/f5Gk+O1vf9uwbPHixUXHjh2L//qv/2pYdvLJJxdJinvuuadh2euvv15sueWWxYABA4oVK1as9hjf+c53iiTFiy++uNp13n777eKtt95qtOyVV14p+vTpUxx99NENy55++ukiSdGjR49i8eLFjdb/6Ec/WnTv3r145plnGi2vr69v+PuyZctWOvbs2bOLJMV///d/Nyxryms9duzYIknx7W9/u1Hmzp07FzU1NcUNN9zQsPyJJ54okhQTJ058z30CABRF65w7/uxnPyuSFH/84x/fc71/nW8tX7682GmnnYqPf/zjjZYnKdq0aVM8+uijjZYfeeSRRZs2bVZ5nHfmdW+++eZK88+nn3666NixY3HWWWc1LDv44INXep3+1cSJE4skxXHHHdew7O233y4233zzoqampjjvvPMalr8z13v36wy0PKeOAg2mTp2a2tralT522WWXNW674YYb5tFHH81f//rXUjMNGjQoe++9d8PjjTfeONttt13+9re/NSz71a9+leHDh2evvfZqWNatW7ccd9xxmT9/fsOpm6vLnST/8z//k/r6+lWu07Zt24ZrrNXX1+cf//hH3n777QwbNiwPPvjgSuv/27/9WzbeeOOGxy+++GJ++9vf5uijj86HPvShRuu++xSIzp07N/y9rq4uL7/8crbeeutsuOGGjY6zNq/1l770pUbbbbfddunatWsOO+ywhuXbbbddNtxww0avKQDAmrSmueM7c7pbb731Pd9R9+751iuvvJLXXnste++99yrndPvss08GDRrU8Li+vj4///nPc+CBB2bYsGErrf/OvK5jx45p0+afP2qvWLEiL7/8csPlSf51Tvf3v/89f/zjH9c4vnfP6dq2bZthw4alKIocc8wxjfb3r/NkoOUp2oAGw4cPz8iRI1f66Nmz5xq3Peuss/Lqq69m2223zc4775yvfvWrefjhh993pn8tppKkZ8+eeeWVVxoeP/PMM9luu+1WWm+HHXZoeH51xowZkz333DNf+tKX0qdPn3z+85/PT37yk5VKt2uuuSa77LJLwzVENt5449x222157bXXVtrnv55++c5kZ023uf///r//L2eccUa22GKLdOzYMb17987GG2+cV199tdFxmvpad+rUqVHhlyQbbLBBNt9885WucbLBBhs0ek0BANakNc0d99lnn/zbv/1bzjzzzPTu3TsHH3xwrrrqqrz11luN1rv11lvzkY98JJ06dUqvXr2y8cYb5/LLL2/SnO7FF1/MkiVL1jinq6+vz3e+851ss802jeZ0Dz/8cKPjfO1rX0u3bt0yfPjwbLPNNjnhhBMaXb7k3f51TrzBBhukU6dO6d2790rLzemgshRtQCk++tGP5qmnnsr06dOz00475Qc/+EF22223/OAHP3hf+23btu0qlxdF8b72+47OnTvnt7/9be6888588YtfzMMPP5wxY8Zkv/32y4oVK5IkP/rRj3LUUUdl4MCB+eEPf5jbb789tbW1+fjHP77Kd8G9+zela+M///M/c+655+awww7LT37yk8yYMSO1tbXZaKONGh2nqa/16l675n5NAQDWpOy5Y01NTW666abMnj0748ePz3PPPZejjz46Q4cOzRtvvJEkueeee3LQQQelU6dOueyyy/KrX/0qtbW1+cIXvrDKedC6zum+/e1vZ8KECfnoRz+aH/3oR7njjjtSW1ubHXfcsdGcbocddsi8efNyww03ZK+99srNN9+cvfbaKxMnTlxpn6uav5nTQeukaANK06tXr4wbNy4//vGP8+yzz2aXXXbJt771rYbn13SnqHXVv3//zJs3b6XlTzzxRMPz76VNmzb5xCc+kcmTJ+exxx7Lueeem9/85jcNdxW96aabstVWW+WWW27JF7/4xYwePTojR47Mm2++2aR8W221VZLkz3/+83uud9NNN2Xs2LG5+OKL87nPfS777bdf9tprr7z66qsrrbum1xoAoLVrjrnjRz7ykZx77rm5//77c9111+XRRx/NDTfckCS5+eab06lTp9xxxx05+uijs//++2fkyJFN3vfGG2+cHj16NGlO97GPfSw//OEP8/nPfz6jRo3KyJEjVzmn69q1a8aMGZOrrroqCxYsyAEHHJBzzz23yfNMoPVRtAGlePnllxs97tatW7beeutGb9fv2rVrkqxykvF+fOpTn8qcOXMye/bshmVLly7NFVdckQEDBjS6tsa/+sc//rHSsnfuIvpO9nd+W/ju3w7ed999jY73XjbeeON89KMfzfTp07NgwYJGz717n23btl3pN5CXXnppwzvr3tGU1xoAoDUre+74yiuvrDSPWtWcrqamptHcav78+fn5z3/epMxt2rTJIYcckl/+8pe5//77V3r+neOvak7305/+NM8991yjZf/6GnTo0CGDBg1KURRNunMr0Dq1q3QAYP0waNCg7Lvvvhk6dGh69eqV+++/PzfddFPGjx/fsM7QoUOTJCeeeGJGjx6dtm3b5vOf//z7Pvapp56aH//4x9l///1z4oknplevXrnmmmvy9NNP5+abb264GO2qnHXWWfntb3+bAw44IP3798/ixYtz2WWXZfPNN2+4ucKnP/3p3HLLLfnMZz6TAw44IE8//XSmTZuWQYMGNZyKsCbf/e53s9dee2W33XbLcccdly233DLz58/Pbbfdlrlz5zYc59prr80GG2yQQYMGZfbs2bnzzjuz0UYbNdpXU15rAIDWrOy54zXXXJPLLrssn/nMZzJw4MC8/vrrufLKK9OjR4986lOfSpIccMABmTx5cj75yU/mC1/4QhYvXpypU6dm6623bvL14b797W9nxowZ2WeffXLcccdlhx12yAsvvJCf/vSnuffee7Phhhvm05/+dM4666yMGzcue+yxRx555JFcd911DWc5vGPUqFHp27dv9txzz/Tp0yePP/54vve97+WAAw5I9+7d1+VlBVoBRRtQihNPPDG/+MUvMmPGjLz11lvp379/zjnnnHz1q19tWOezn/1s/vM//zM33HBDfvSjH6UoilKKtj59+uT3v/99vva1r+XSSy/Nm2++mV122SW//OUvc8ABB7zntgcddFDmz5+f6dOn56WXXkrv3r2zzz775Mwzz8wGG2yQJDnqqKOycOHCfP/7388dd9yRQYMG5Uc/+lF++tOfZtasWU3KOHjw4PzhD3/I6aefnssvvzxvvvlm+vfv3+jun5dccknatm2b6667Lm+++Wb23HPP3HnnnRk9enSjfTXltQYAaM3Knjvus88+mTNnTm644YYsWrQoG2ywQYYPH57rrruu4aYGH//4x/PDH/4w5513Xk4++eRsueWWOf/88zN//vwmF239+vXLfffdl9NPPz3XXXddlixZkn79+mX//fdPly5dkiRf//rXs3Tp0lx//fW58cYbs9tuu+W2227Lqaee2mhfX/7yl3Pddddl8uTJeeONN7L55pvnxBNPzDe/+c11eUmBVqKmcKVEAAAAAHjfXKMNAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBK0q3SA5lRfX5/nn38+3bt3T01NTaXjAABVriiKvP7669lss83Spo3fV7Zm5oEAQFnWZg64Xhdtzz//fLbYYotKxwAA1jPPPvtsNt9880rH4D2YBwIAZWvKHHC9Ltq6d++e5J8vRI8ePZrlGHV1dZkxY0ZGjRqV9u3bN8sxmlO150+qfwzVnj8xhtag2vMn1T+Gas+fGENTLFmyJFtssUXDHIP3Z+rUqbnwwguzcOHCDB48OJdeemmGDx++ynWvvvrqjBs3rtGyjh075s0331zl+s09D/T9UnnVnj8xhtag2vMn1T+Gas+fGENr0JrmgOt10fbOaQI9evRo1qKtS5cu6dGjR9V+MVZz/qT6x1Dt+RNjaA2qPX9S/WOo9vyJMawNpyK+fzfeeGMmTJiQadOmZffdd8+UKVMyevTozJs3L5tssskqt+nRo0fmzZvX8Pi9Pg/NPQ/0/VJ51Z4/MYbWoNrzJ9U/hmrPnxhDa9Ca5oDrddEGAEDrNHny5Bx77LEN71KbNm1abrvttkyfPj2nnnrqKrepqalJ37591+o4dXV1qaure995V7Xfd/9Zjap9DNWePzGG1qDa8yfVP4Zqz58YQ2vQ3PnXZr+KNgAAWtTy5cvzwAMP5LTTTmtY1qZNm4wcOTKzZ89e7XZvvPFG+vfvn/r6+uy222759re/nR133PE9jzVjxox06dKltOz/qra2ttn23VKqfQzVnj8xhtag2vMn1T+Gas+fGENr0Fz5ly1b1uR1FW0AALSol156KStWrEifPn0aLe/Tp0+eeOKJVW6z3XbbZfr06dlll13y2muv5aKLLsoee+yRRx999D0vSjxq1KhmO3W0trY2++23X1WeYpNU/xiqPX9iDK1BtedPqn8M1Z4/MYbWoLnzL1mypMnrKtoAAGj1RowYkREjRjQ83mOPPbLDDjvk+9//fs4+++zVbte+fftm/YGhufffEqp9DNWePzGG1qDa8yfVP4Zqz58YQ2vQXPnXZp9tSj86AAC8h969e6dt27ZZtGhRo+WLFi1q8jXY2rdvn1133TVPPvlkc0QEAFgnijYAAFpUhw4dMnTo0MycObNhWX19fWbOnNnoXWvvZcWKFXnkkUey6aabNldMAIC15tRRAABa3IQJEzJ27NgMGzYsw4cPz5QpU7J06dKGu5AeeeSR6devXyZNmpQkOeuss/KRj3wkW2+9dV599dVceOGFeeaZZ/KlL32pksMAAGhE0QYAQIsbM2ZMXnzxxZxxxhlZuHBhhgwZkttvv73hBgkLFixImzb/d/LFK6+8kmOPPTYLFy5Mz549M3To0Pz+97/PoEGDKjUEAICVKNoAAKiI8ePHZ/z48at8btasWY0ef+c738l3vvOdFkgFALDuXKMNAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoATtKh0ASE6a3S4nzZ5R6RjvyyUjKp0AAKD6mAcCrF+8ow0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBK0iqJt6tSpGTBgQDp16pTdd989c+bMadJ2N9xwQ2pqanLIIYc0b0AAAAAAWIOKF2033nhjJkyYkIkTJ+bBBx/M4MGDM3r06CxevPg9t5s/f35OOeWU7L333i2UFAAAAABWr+JF2+TJk3Psscdm3LhxGTRoUKZNm5YuXbpk+vTpq91mxYoVOeKII3LmmWdmq622asG0AAAAALBq7Sp58OXLl+eBBx7Iaaed1rCsTZs2GTlyZGbPnr3a7c4666xssskmOeaYY3LPPfes8Th1dXWpq6srJfOq9v3uP6tNtedPqn8M1Zp7Vap5LOvL11G15k+qfwzVnj8xhrXZPwAArEpFi7aXXnopK1asSJ8+fRot79OnT5544olVbnPvvffmhz/8YebOndvk48yYMSNdunR5P1HXqLa2tln339yqPX9S7WOo6Ldiaar7c/BP1T6Gas+fVP8Yqj1/YgzvZdmyZc2yXwAA1g9V9dP966+/ni9+8Yu58sor07t37yZvN2rUqPTo0aNZMtXV1aW2tjb77bdf2rdv3yzHaE7Vnj+p/jHU1dUls++qdIxSVOvnIFk/vo6qOX9S/WOo9vyJMTTFkiVLSt8nAADrj4oWbb17907btm2zaNGiRssXLVqUvn37rrT+U089lfnz5+fAAw9sWFZfX58kadeuXebNm5eBAweutF379u2b/QeGljhGc6r2/Mn6MYZqtz58Dqp9DNWeP6n+MVR7/sQY1rRfAABYnYreDKFDhw4ZOnRoZs6c2bCsvr4+M2fOzIgRI1Zaf/vtt88jjzySuXPnNnwcdNBB+djHPpa5c+dmiy22aMn4AAAAANCg4qeOTpgwIWPHjs2wYcMyfPjwTJkyJUuXLs24ceOSJEceeWT69euXSZMmpVOnTtlpp50abb/hhhsmyUrLAQAAAKAlVbxoGzNmTF588cWcccYZWbhwYYYMGZLbb7+94QYJCxYsSJs2FX3jHQAAAACsUcWLtiQZP358xo8fv8rnZs2a9Z7bXn311eUHAgAAAIC15K1iAAAAAFACRRsAAAAAlEDRBgAAAAAlULQBAAAAQAkUbQAAAABQAkUbAAAVMXXq1AwYMCCdOnXK7rvvnjlz5jRpuxtuuCE1NTU55JBDmjcgAMBaUrQBANDibrzxxkyYMCETJ07Mgw8+mMGDB2f06NFZvHjxe243f/78nHLKKdl7771bKCkAQNO1q3QAAAA+eCZPnpxjjz0248aNS5JMmzYtt912W6ZPn55TTz11ldusWLEiRxxxRM4888zcc889efXVV9d4nLq6utTV1ZUZvWG/7/6zGlX7GKo9f1Ld2f9VtY5lffo6qtYxVHv+xBhag+bOvzb7VbQBANCili9fngceeCCnnXZaw7I2bdpk5MiRmT179mq3O+uss7LJJpvkmGOOyT333NOkY82YMSNdunR535lXp7a2ttn23VKqfQzVnn99+ZGs2j8P1Z4/qf4xVHv+xBhag+bKv2zZsiavu378qw4AQNV46aWXsmLFivTp06fR8j59+uSJJ55Y5Tb33ntvfvjDH2bu3LlrdaxRo0alR48e6xp1terq6lJbW5v99tsv7du3L33/LaHax1Dt+ZP/fYfE7LsqHaMU1fp5WF++jqp5DNWePzGG1qC58y9ZsqTJ6yraAABo1V5//fV88YtfzJVXXpnevXuv1bbt27dv1h8Ymnv/LaHax1Dt+dcX1f55qPb8SfWPodrzJ8bQGjRX/rXZp6INAIAW1bt377Rt2zaLFi1qtHzRokXp27fvSus/9dRTmT9/fg488MCGZfX19UmSdu3aZd68eRk4cGDzhgYAaAJ3HQUAoEV16NAhQ4cOzcyZMxuW1dfXZ+bMmRkxYsRK62+//fZ55JFHMnfu3IaPgw46KB/72Mcyd+7cbLHFFi0ZHwBgtbyjDQCAFjdhwoSMHTs2w4YNy/DhwzNlypQsXbq04S6kRx55ZPr165dJkyalU6dO2WmnnRptv+GGGybJSssBACpJ0QYAQIsbM2ZMXnzxxZxxxhlZuHBhhgwZkttvv73hBgkLFixImzZOvgAAqouiDQCAihg/fnzGjx+/yudmzZr1ntteffXV5QcCAHif/JoQAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAErSKom3q1KkZMGBAOnXqlN133z1z5sxZ7bq33HJLhg0blg033DBdu3bNkCFDcu2117ZgWgAAAABYWcWLthtvvDETJkzIxIkT8+CDD2bw4MEZPXp0Fi9evMr1e/XqlW984xuZPXt2Hn744YwbNy7jxo3LHXfc0cLJAQAAAOD/VLxomzx5co499tiMGzcugwYNyrRp09KlS5dMnz59levvu++++cxnPpMddtghAwcOzEknnZRddtkl9957bwsnBwAAAID/066SB1++fHkeeOCBnHbaaQ3L2rRpk5EjR2b27Nlr3L4oivzmN7/JvHnzcv755692vbq6utTV1ZWSeVX7fvef1aba8yfVP4Zqzb0q1TyW9eXrqFrzJ9U/hmrPnxjD2uwfAABWpaJF20svvZQVK1akT58+jZb36dMnTzzxxGq3e+2119KvX7+89dZbadu2bS677LLst99+q11/xowZ6dKlS2m5V6W2trZZ99/cqj1/Uu1jqOi3Ymmq+3PwT9U+hmrPn1T/GKo9f2IM72XZsmXNsl8AANYPVfnTfffu3TN37ty88cYbmTlzZiZMmJCtttoq++677yrXHzVqVHr06NEsWerq6lJbW5v99tsv7du3b5ZjNKdqz59U/xjq6uqS2XdVOkYpqvVzkKwfX0fVnD+p/jFUe/7EGJpiyZIlpe8TAID1R0WLtt69e6dt27ZZtGhRo+WLFi1K3759V7tdmzZtsvXWWydJhgwZkscffzyTJk1abdHWvn37Zv+BoSWO0ZyqPX+yfoyh2q0Pn4NqH0O150+qfwzVnj8xhjXtFwAAVqeiN0Po0KFDhg4dmpkzZzYsq6+vz8yZMzNixIgm76e+vj5vvfVWc0QEAAAAgCap+KmjEyZMyNixYzNs2LAMHz48U6ZMydKlSzNu3LgkyZFHHpl+/fpl0qRJSZJJkyZl2LBhGThwYN5666386le/yrXXXpvLL7+8ksMAAAAA4AOu4kXbmDFj8uKLL+aMM87IwoULM2TIkNx+++0NN0hYsGBB2rT5vzfeLV26NMcff3z+/ve/p3Pnztl+++3zox/9KGPGjKnUEAAAAACg8kVbkowfPz7jx49f5XOzZs1q9Picc87JOeec0wKpAAAAAKDpKnqNNgAAAABYXyjaAACoiKlTp2bAgAHp1KlTdt9998yZM2e1695yyy0ZNmxYNtxww3Tt2jVDhgzJtdde24JpAQDWTNEGAECLu/HGGzNhwoRMnDgxDz74YAYPHpzRo0dn8eLFq1y/V69e+cY3vpHZs2fn4Ycfzrhx4zJu3LjccccdLZwcAGD1WsU12gAA+GCZPHlyjj322IY7zU+bNi233XZbpk+fnlNPPXWl9ffdd99Gj0866aRcc801uffeezN69OjVHqeuri51dXWlZn9nv+/+sxpV+xiqPX9S3dn/VbWOZX36OqrWMVR7/sQYWoPmzr82+1W0AQDQopYvX54HHnggp512WsOyNm3aZOTIkZk9e/Yaty+KIr/5zW8yb968nH/++e+57owZM9KlS5f3nXl1amtrm23fLaXax1Dt+deXH8mq/fNQ7fmT6h9DtedPjKE1aK78y5Yta/K668e/6gAAVI2XXnopK1asSJ8+fRot79OnT5544onVbvfaa6+lX79+eeutt9K2bdtcdtll2W+//d7zWKNGjUqPHj1Kyf1udXV1qa2tzX777Zf27duXvv+WUO1jqPb8yf++Q2L2XZWOUYpq/TysL19H1TyGas+fGENr0Nz5lyxZ0uR1FW0AAFSF7t27Z+7cuXnjjTcyc+bMTJgwIVtttdVKp5W+W/v27Zv1B4bm3n9LqPYxVHv+9UW1fx6qPX9S/WOo9vyJMbQGzZV/bfapaAMAoEX17t07bdu2zaJFixotX7RoUfr27bva7dq0aZOtt946STJkyJA8/vjjmTRp0nsWbQAALcldRwEAaFEdOnTI0KFDM3PmzIZl9fX1mTlzZkaMGNHk/dTX1+ett95qjogAAOvEO9oAAGhxEyZMyNixYzNs2LAMHz48U6ZMydKlSxvuQnrkkUemX79+mTRpUpJk0qRJGTZsWAYOHJi33norv/rVr3Lttdfm8ssvr+QwAAAaUbQBANDixowZkxdffDFnnHFGFi5cmCFDhuT2229vuEHCggUL0qbN/518sXTp0hx//PH5+9//ns6dO2f77bfPj370o4wZM6ZSQwAAWImiDQCAihg/fnzGjx+/yudmzZrV6PE555yTc845pwVSAQCsO9doAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqwTkXbXXfdVXYOAAAAAKhq61S0ffKTn8zAgQNzzjnn5Nlnny07EwAAAABUnXUq2p577rmMHz8+N910U7baaquMHj06P/nJT7J8+fKy8wEAAABAVVinoq137975yle+krlz5+a+++7Ltttum+OPPz6bbbZZTjzxxPzpT38qOycAAAAAtGrv+2YIu+22W0477bSMHz8+b7zxRqZPn56hQ4dm7733zqOPPlpGRgAAAABo9da5aKurq8tNN92UT33qU+nfv3/uuOOOfO9738uiRYvy5JNPpn///jn00EPLzAoAAAAArVa7ddnoP//zP/PjH/84RVHki1/8Yi644ILstNNODc937do1F110UTbbbLPSggIAAABAa7ZORdtjjz2WSy+9NJ/97GfTsWPHVa7Tu3fv3HXXXe8rHAAAAABUi3U6dXTixIk59NBDVyrZ3n777fz2t79NkrRr1y777LPP+08IAAAAAFVgnYq2j33sY/nHP/6x0vLXXnstH/vYx953KAAAAACoNutUtBVFkZqampWWv/zyy+natev7DgUAAAAA1WatrtH22c9+NklSU1OTo446qtGpoytWrMjDDz+cPfbYo9yEAAAAAFAF1qpo22CDDZL88x1t3bt3T+fOnRue69ChQz7ykY/k2GOPLTchAAAAAFSBtSrarrrqqiTJgAEDcsoppzhNFAAAAAD+11oVbe+YOHFi2TkAAAAAoKo1uWjbbbfdMnPmzPTs2TO77rrrKm+G8I4HH3ywlHAAAAAAUC2aXLQdfPDBDTc/OOSQQ5orDwAAAABUpSYXbe8+XdSpowAAAADQWJt12ejZZ5/N3//+94bHc+bMycknn5wrrriitGAAAAAAUE3WqWj7whe+kLvuuitJsnDhwowcOTJz5szJN77xjZx11lmlBgQAAACAarBORduf//znDB8+PEnyk5/8JDvvvHN+//vf57rrrsvVV19dZj4AAAAAqArrVLTV1dU13BjhzjvvzEEHHZQk2X777fPCCy+Ulw4AAAAAqsQ6FW077rhjpk2blnvuuSe1tbX55Cc/mSR5/vnns9FGG5UaEAAAAACqwToVbeeff36+//3vZ999983hhx+ewYMHJ0l+8YtfNJxSCgAAAAAfJO3WZaN99903L730UpYsWZKePXs2LD/uuOPSpUuX0sIBAAAAQLVYp6ItSdq2bduoZEuSAQMGvN88AAAAAFCV1unU0UWLFuWLX/xiNttss7Rr1y5t27Zt9AEAAAAAHzTr9I62o446KgsWLMjpp5+eTTfdNDU1NWXnAgAAAICqsk5F27333pt77rknQ4YMKTkOAAAAAFSndTp1dIsttkhRFGVnAQAAAICqtU5F25QpU3Lqqadm/vz5JccBAAAAgOq0TqeOjhkzJsuWLcvAgQPTpUuXtG/fvtHz//jHP0oJBwAAAADVYp2KtilTppQcAwAAAACq2zoVbWPHji07BwAAAABUtXW6RluSPPXUU/nmN7+Zww8/PIsXL06S/PrXv86jjz5aWjgAAAAAqBbrVLTdfffd2XnnnXPffffllltuyRtvvJEk+dOf/pSJEyeWGhAAAAAAqsE6FW2nnnpqzjnnnNTW1qZDhw4Nyz/+8Y/nD3/4Q2nhAAAAAKBarNM12h555JFcf/31Ky3fZJNN8tJLL73vULC2TprdLifNnlHpGAAAAMAH2Dq9o23DDTfMCy+8sNLyhx56KP369XvfoQAAAACg2qxT0fb5z38+X/va17Jw4cLU1NSkvr4+v/vd73LKKafkyCOPLDsjAAAAALR661S0ffvb387222+fLbbYIm+88UYGDRqUvffeO3vssUe++c1vlp0RAAAAAFq9dbpGW4cOHXLllVfmjDPOyCOPPJI33ngju+66a7bZZpuy8wEAAABAVWhy0TZhwoT3fP7ddxudPHnyuicCAAAAgCrU5KLtoYceavT4wQcfzNtvv53tttsuSfKXv/wlbdu2zdChQ8tNCAAAAABVoMlF21133dXw98mTJ6d79+655ppr0rNnzyTJK6+8knHjxmXvvfcuPyUAAAAAtHLrdDOEiy++OJMmTWoo2ZKkZ8+eOeecc3LxxReXFg4AAAAAqsU6FW1LlizJiy++uNLyF198Ma+//vr7DgUAAAAA1WadirbPfOYzGTduXG655Zb8/e9/z9///vfcfPPNOeaYY/LZz3627IwAAAAA0Oo1+Rpt7zZt2rSccsop+cIXvpC6urp/7qhduxxzzDG58MILSw0IAAAAANVgnYq2Ll265LLLLsuFF16Yp556KkkycODAdO3atdRwAAAAAFAt1qloe0fXrl2zyy67lJUFAAAAAKrW+yraAAAA+GA7aXa7nDR7RqVjrLNLRlQ6AbA+WaebIQAAAAAAjSnaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoAStomibOnVqBgwYkE6dOmX33XfPnDlzVrvulVdemb333js9e/ZMz549M3LkyPdcHwAAAABaQsWLthtvvDETJkzIxIkT8+CDD2bw4MEZPXp0Fi9evMr1Z82alcMPPzx33XVXZs+enS222CKjRo3Kc88918LJAQAAAOD/VLxomzx5co499tiMGzcugwYNyrRp09KlS5dMnz59letfd911Of744zNkyJBsv/32+cEPfpD6+vrMnDmzhZMDAAAAwP9pV8mDL1++PA888EBOO+20hmVt2rTJyJEjM3v27CbtY9myZamrq0uvXr1Wu05dXV3q6ured97V7fvdf1abas+fVHf29U01fy6q/Xuh2vMn1T+Gas+fGMPa7B8AAFalokXbSy+9lBUrVqRPnz6Nlvfp0ydPPPFEk/bxta99LZtttllGjhy52nVmzJiRLl26vK+sa1JbW9us+29u1Z6/wl/K/K/q/zqq/jFUe/6k+sdQ7fkTY3gvy5Yta5b9AgCwfqjqduK8887LDTfckFmzZqVTp06rXW/UqFHp0aNHs2Soq6tLbW1t9ttvv7Rv375ZjtGcqj1/8r/vLph9V6VjkFT911E1fy9Ue/6k+sdQ7fkTY2iKJUuWlL5PAADWHxUt2nr37p22bdtm0aJFjZYvWrQoffv2fc9tL7roopx33nm58847s8suu7znuu3bt2/2Hxha4hjNqdrz0zqsD19H1T6Gas+fVP8Yqj1/Ygxr2i8AAKxORW+G0KFDhwwdOrTRjQzeubHBiBEjVrvdBRdckLPPPju33357hg0b1hJRAQAAAOA9VfyuoxMmTMiVV16Za665Jo8//nj+4z/+I0uXLs24ceOSJEceeWSjmyWcf/75Of300zN9+vQMGDAgCxcuzMKFC/PGG29UaggAAKyDqVOnZsCAAenUqVN23333zJkzZ7XrXnnlldl7773Ts2fP9OzZMyNHjnzP9QEAKqHiRduYMWNy0UUX5YwzzsiQIUMyd+7c3H777Q03SFiwYEFeeOGFhvUvv/zyLF++PJ/73Oey6aabNnxcdNFFlRoCAABr6cYbb8yECRMyceLEPPjggxk8eHBGjx6dxYsXr3L9WbNm5fDDD89dd92V2bNnZ4sttsioUaPy3HPPtXByAIDVaxU3Qxg/fnzGjx+/yudmzZrV6PH8+fObPxAAAM1q8uTJOfbYYxvOYpg2bVpuu+22TJ8+PaeeeupK61933XWNHv/gBz/IzTffnJkzZ+bII49skcwAAGvSKoo2AAA+OJYvX54HHnig0eVB2rRpk5EjR2b27NlN2seyZctSV1eXXr16ved6dXV1/7xDecne2Wdz7LulVPsYqj1/Ut3Z1zfV/Lmo9u+Fas+fGENr0Nz512a/ijYAAFrUSy+9lBUrVjRcKuQdffr0yRNPPNGkfXzta1/LZpttlpEjR77nejNmzEiXLl3WOeua1NbWNtu+W0q1j6Ha8/uRrHWo/q+j6h9DtedPjKE1aK78y5Yta/K6/lUHAKCqnHfeebnhhhsya9asdOrU6T3XHTVqVHr06FF6hrq6utTW1ma//fZL+/btS99/S6j2MVR7/uR/3yEx+65KxyCp+q+jav5eqPb8iTG0Bs2df8mSJU1eV9EGAECL6t27d9q2bZtFixY1Wr5o0aL07dv3Pbe96KKLct555+XOO+/MLrvsssZjtW/fvll/YGju/beEah9DteendVgfvo6qfQzVnj8xhtagufKvzT4rftdRAAA+WDp06JChQ4dm5syZDcvq6+szc+bMjBgxYrXbXXDBBTn77LNz++23Z9iwYS0RFQBgrXhHGwAALW7ChAkZO3Zshg0bluHDh2fKlClZunRpw11IjzzyyPTr1y+TJk1Kkpx//vk544wzcv3112fAgAFZuHBhkqRbt27p1q1bxcYBAPBuijagFCfNbpeTZs+odIz35ZLVv4kCgJKNGTMmL774Ys4444wsXLgwQ4YMye23395wg4QFCxakTZv/O/ni8ssvz/Lly/O5z32u0X4mTpyYb33rWy0ZHQBgtRRtAABUxPjx4zN+/PhVPjdr1qxGj+fPn9/8gQAA3idFGwAAUJXWh3fUA7B+cTMEAAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKEHFi7apU6dmwIAB6dSpU3bffffMmTNntes++uij+bd/+7cMGDAgNTU1mTJlSssFBQAAAID3UNGi7cYbb8yECRMyceLEPPjggxk8eHBGjx6dxYsXr3L9ZcuWZauttsp5552Xvn37tnBaAAAAAFi9ihZtkydPzrHHHptx48Zl0KBBmTZtWrp06ZLp06evcv0Pf/jDufDCC/P5z38+HTt2bOG0AAAAALB67Sp14OXLl+eBBx7Iaaed1rCsTZs2GTlyZGbPnl3qserq6lJXV1fqPt+973f/WW2qPX9S3dlpfar162l9+l6u1jFUe/7EGNZm/wAAsCoVK9peeumlrFixIn369Gm0vE+fPnniiSdKPdaMGTPSpUuXUvf5r2pra5t1/82t2vNX8EuZ9Uy1fy9Ue/6k+sdQ7fkTY3gvy5Yta5b9flBNnTo1F154YRYuXJjBgwfn0ksvzfDhw1e57qOPPpozzjgjDzzwQJ555pl85zvfycknn9yygQEA1uAD0U6MGjUqPXr0aJZ919XVpba2Nvvtt1/at2/fLMdoTtWeP/nfdxfMvqvSMVhPVOv3wvryvVzNY6j2/IkxNMWSJUtK3+cH1TvX6p02bVp23333TJkyJaNHj868efOyySabrLT+O9fqPfTQQ/OVr3ylAokBANasYkVb796907Zt2yxatKjR8kWLFpV+o4P27ds3+w8MLXGM5lTt+aEs1f69UO35k+ofQ7XnT4xhTfulHO++Vm+STJs2LbfddlumT5+eU089daX1P/zhD+fDH/5wkqzyeQCA1qBiRVuHDh0ydOjQzJw5M4ccckiSpL6+PjNnzsz48eMrFQsAgGa2Plyr1zUNK69ac9M6VfPX0/ryvVyt+RNjaA1a03V6K3rq6IQJEzJ27NgMGzYsw4cPz5QpU7J06dKG32weeeSR6devXyZNmpTkn5Oyxx57rOHvzz33XObOnZtu3bpl6623rtg4AABouvXpWr2uaVhpH4gr4dACqvv74J+qfQzVnj8xhtagNVynt6L/M40ZMyYvvvhizjjjjCxcuDBDhgzJ7bff3jDpWrBgQdq0adOw/vPPP59dd9214fFFF12Uiy66KPvss09mzZrV0vEBAGjlmutava5pWHmu00uZqvX7IFk/vperOX9iDK1Ba7pOb8V/BTR+/PjVnir6r+XZgAEDUhRFC6QCAKC5rE/X6nVNQ1g/rA/fB9U+hmrPnxhDa9AartPbZs2rAABAed59rd53vHOt3hEjRlQwGQDA+1Pxd7QBAPDB41q9AMD6SNEGAECLc61eAGB9pGgDAKAiXKsXAFjfuEYbAAAAAJRA0QYAAAAAJVC0AQAAAEAJFG0AAAAAUAJFGwAAAACUQNEGAAAAACVQtAEAAABACRRtAAAAAFACRRsAAAAAlEDRBgAAAAAlULQBAAAAQAkUbQAAAABQAkUbAAAAAJRA0QYAAAAAJVC0AQAAAEAJFG0AAAAAUAJFGwAAAACUQNEGAAAAACVQtAEAAABACRRtAAAAAFACRRsAAAAAlEDRBgAAAAAlULQBAAAAQAkUbQAAAABQAkUbAAAAAJRA0QYAAAAAJVC0AQAAAEAJFG0AAAAAUAJFGwAAAACUQNEGAAAAACVQtAEAAABACdpVOgAAAABUykmz2+Wk2TMqHeN9uWREpRMA7/CONgAAAAAogaINAAAAAErg1FGA/1Xtpw04ZQAAAKCyvKMNAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2gAAAACgBO0qHQCAcpw0u11Omj2j0jHel0tGVDoBAADAuvOONgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEboYAAAAAVazab4rlhlisT7yjDQAAAABK4B1tVP1vPwAAAABaA+9oAwAAAIASKNoAAAAAoASKNgAAAAAogaINAAAAAErgZggAtBrVfnMWt6YHAIAPNu9oAwAAAIASKNoAAAAAoASKNgAAAAAogWu0AQDAB1S1XxsTWD+sD/8WuVYv71C0AUBJ1odJYlL9YzDRBQCgUpw6CgAAAAAl8I42AAAAgPdhfTizwVkB5WgVRdvUqVNz4YUXZuHChRk8eHAuvfTSDB8+fLXr//SnP83pp5+e+fPnZ5tttsn555+fT33qUy2YuLH14RsKAKClVfscEADWJ9XebbSWorDip47eeOONmTBhQiZOnJgHH3wwgwcPzujRo7N48eJVrv/73/8+hx9+eI455pg89NBDOeSQQ3LIIYfkz3/+cwsnBwBgXZkDAgDro4oXbZMnT86xxx6bcePGZdCgQZk2bVq6dOmS6dOnr3L9Sy65JJ/85Cfz1a9+NTvssEPOPvvs7Lbbbvne977XwskBAFhX5oAAwPqooqeOLl++PA888EBOO+20hmVt2rTJyJEjM3v27FVuM3v27EyYMKHRstGjR+fnP//5SusWRZEk+cc//pG6urrygr9LXV1d6t9a1iz7BgDW3rJlb+fll19O+/btS9/366+/nuT/5hism+aeAybNPw+sq6vLf85anv+cterjAwAtq7XMAStatL300ktZsWJF+vTp02h5nz598sQTT6xym4ULF65y/YULF6607jsvxJZbbllSYgCgtftCCxzj9ddfzwYbbNACR1o/NfccMDEPBIAPmtYyB2wVN0NoLptttlmeffbZdO/ePTU1NZWOAwBUuaIo8vrrr2ezzTardBTWwDwQACjL2swBK1q09e7dO23bts2iRYsaLV+0aFH69u27ym369u3b5PXbtGmTzTffvLzAAMAHnneyvX/NPQdMzAMBgHI1dQ5Y0ZshdOjQIUOHDs3MmTMbltXX12fmzJkZMWLV92UdMWJEo/WTpLa2drXrAwDQupgDAgDrq4qfOjphwoSMHTs2w4YNy/DhwzNlypQsXbo048aNS5IceeSR6devXyZNmpQkOemkk7LPPvvk4osvzgEHHJAbbrgh999/f6644opKDgMAgLVgDggArI8qXrSNGTMmL774Ys4444wsXLgwQ4YMye23395wsdsFCxakTZv/e+PdHnvskeuvvz7f/OY38/Wvfz3bbLNNfv7zn2ennXaq1BAAAFhL5oAAwPqooqeOvmP8+PF55pln8tZbb+W+++7L7rvv3vDcrFmzcvXVVzda/9BDD828efPy1ltv5c9//nM+9alPtXDi5Le//W0OPPDAbLbZZqmpqVntreVbq0mTJuXDH/5wunfvnk022SSHHHJI5s2bV+lYa+Xyyy/PLrvskh49eqRHjx4ZMWJEfv3rX1c61jo777zzUlNTk5NPPrnSUZrsW9/6Vmpqahp9bL/99pWOtVaee+65/Pu//3s22mijdO7cOTvvvHPuv//+SsdqsgEDBqz0OaipqckJJ5xQ6WhNsmLFipx++unZcsst07lz5wwcODBnn312k26b3Zq8/vrrOfnkk9O/f/907tw5e+yxR/74xz9WOtZqren/sKIocsYZZ2TTTTdN586dM3LkyPz1r3+tTNhVWFP+W265JaNGjcpGG22UmpqazJ07tyI5WTNzwJZnDtj6mANWTjXPA6t9DpisH/NAc8CWVw3zwFZRtFWjpUuXZvDgwZk6dWqlo6yTu+++OyeccEL+8Ic/pLa2NnV1dRk1alSWLl1a6WhNtvnmm+e8887LAw88kPvvvz8f//jHc/DBB+fRRx+tdLS19sc//jHf//73s8suu1Q6ylrbcccd88ILLzR83HvvvZWO1GSvvPJK9txzz7Rv3z6//vWv89hjj+Xiiy9Oz549Kx2tyf74xz82ev1ra2uT/POH0Wpw/vnn5/LLL8/3vve9PP744zn//PNzwQUX5NJLL610tLXypS99KbW1tbn22mvzyCOPZNSoURk5cmSee+65SkdbpTX9H3bBBRfku9/9bqZNm5b77rsvXbt2zejRo/Pmm2+2cNJVW1P+pUuXZq+99sr555/fwsn4IDAHrDxzwNahmueASfXPA6t9DpisH/NAc8CWVxXzwIL3LUnxs5/9rNIx3pfFixcXSYq777670lHel549exY/+MEPKh1jrbz++uvFNttsU9TW1hb77LNPcdJJJ1U6UpNNnDixGDx4cKVjrLOvfe1rxV577VXpGKU66aSTioEDBxb19fWVjtIkBxxwQHH00Uc3WvbZz362OOKIIyqUaO0tW7asaNu2bXHrrbc2Wr7bbrsV3/jGNyqUqun+9f+w+vr6om/fvsWFF17YsOzVV18tOnbsWPz4xz+uQML39l7/Bz/99NNFkuKhhx5q0Ux8cJgDth7mgC2r2ueARbH+zQOrbQ5YFNU/DzQHrLzWOg/0jjaSJK+99lqSpFevXhVOsm5WrFiRG264IUuXLq26u4+dcMIJOeCAAzJy5MhKR1knf/3rX7PZZptlq622yhFHHJEFCxZUOlKT/eIXv8iwYcNy6KGHZpNNNsmuu+6aK6+8stKx1tny5cvzox/9KEcffXRqamoqHadJ9thjj8ycOTN/+ctfkiR/+tOfcu+992b//fevcLKme/vtt7NixYp06tSp0fLOnTtX3W/3k+Tpp5/OwoULG/2btMEGG2T33XfP7NmzK5gMaA7mgJVjDlhZ69M8sBrngEn1zwPNAVmdit8Mgcqrr6/PySefnD333LPqLij8yCOPZMSIEXnzzTfTrVu3/OxnP8ugQYMqHavJbrjhhjz44IOt+jz+97L77rvn6quvznbbbZcXXnghZ555Zvbee+/8+c9/Tvfu3Ssdb43+9re/5fLLL8+ECRPy9a9/PX/84x9z4oknpkOHDhk7dmyl4621n//853n11Vdz1FFHVTpKk5166qlZsmRJtt9++7Rt2zYrVqzIueeemyOOOKLS0Zqse/fuGTFiRM4+++zssMMO6dOnT3784x9n9uzZ2XrrrSsdb60tXLgwSRouSP+OPn36NDwHrB/MASvHHLDy1qd5YDXOAZPqnweaA7I6ijZywgkn5M9//nNVtu7bbbdd5s6dm9deey033XRTxo4dm7vvvrsqJlrPPvtsTjrppNTW1q70W5Bq8e7fNu2yyy7Zfffd079///zkJz/JMcccU8FkTVNfX59hw4bl29/+dpJk1113zZ///OdMmzat6iZYSfLDH/4w+++/fzbbbLNKR2myn/zkJ7nuuuty/fXXZ8cdd8zcuXNz8sknZ7PNNquqz8G1116bo48+Ov369Uvbtm2z22675fDDD88DDzxQ6WgAq2UOWBnmgK3D+jQPrMY5YLJ+zAPNAVkVp45+wI0fPz633npr7rrrrmy++eaVjrPWOnTokK233jpDhw7NpEmTMnjw4FxyySWVjtUkDzzwQBYvXpzddtst7dq1S7t27XL33Xfnu9/9btq1a5cVK1ZUOuJa23DDDbPtttvmySefrHSUJtl0001XmpDvsMMOVXfqQ5I888wzufPOO/OlL32p0lHWyle/+tWceuqp+fznP5+dd945X/ziF/OVr3wlkyZNqnS0tTJw4MDcfffdeeONN/Lss89mzpw5qaury1ZbbVXpaGutb9++SZJFixY1Wr5o0aKG54DqZw5YOeaArcP6Mg+s1jlgsn7MA80BWRVF2wdUURQZP358fvazn+U3v/lNttxyy0pHKkV9fX3eeuutSsdokk984hN55JFHMnfu3IaPYcOG5YgjjsjcuXPTtm3bSkdca2+88UaeeuqpbLrpppWO0iR77rln5s2b12jZX/7yl/Tv379CidbdVVddlU022SQHHHBApaOslWXLlqVNm8b/FbVt2zb19fUVSvT+dO3aNZtuumleeeWV3HHHHTn44IMrHWmtbbnllunbt29mzpzZsGzJkiW57777qu76R8DKzAErzxywdVhf5oHVOgdM1q95oDkg7+bU0XX0xhtvNPqNzdNPP525c+emV69e+dCHPlTBZE1zwgkn5Prrr8///M//pHv37g3nXG+wwQbp3LlzhdM1zWmnnZb9998/H/rQh/L666/n+uuvz6xZs3LHHXdUOlqTdO/efaXroXTt2jUbbbRR1Vwn5ZRTTsmBBx6Y/v375/nnn8/EiRPTtm3bHH744ZWO1iRf+cpXsscee+Tb3/52DjvssMyZMydXXHFFrrjiikpHWyv19fW56qqrMnbs2LRrV13/rB944IE599xz86EPfSg77rhjHnrooUyePDlHH310paOtlTvuuCNFUWS77bbLk08+ma9+9avZfvvtM27cuEpHW6U1/R928skn55xzzsk222yTLbfcMqeffno222yzHHLIIZUL/S5ryv+Pf/wjCxYsyPPPP58kDT9I9e3b129ked/MASvPHLDyqn0OmKwf88BqngMm68c80Byw5VXFPLDF73O6nrjrrruKJCt9jB07ttLRmmRV2ZMUV111VaWjNdnRRx9d9O/fv+jQoUOx8cYbF5/4xCeKGTNmVDrW+1Jtt3YfM2ZMsemmmxYdOnQo+vXrV4wZM6Z48sknKx1rrfzyl78sdtppp6Jjx47F9ttvX1xxxRWVjrTW7rjjjiJJMW/evEpHWWtLliwpTjrppOJDH/pQ0alTp2KrrbYqvvGNbxRvvfVWpaOtlRtvvLHYaqutig4dOhR9+/YtTjjhhOLVV1+tdKzVWtP/YfX19cXpp59e9OnTp+jYsWPxiU98olV9fa0p/1VXXbXK5ydOnFjR3KwfzAErzxyw8taHOWBRVP88sJrngEWxfswDzQFbXjXMA2uKoijKLO4AAAAA4IPINdoAAAAAoASKNgAAAAAogaINAAAAAEqgaAMAAACAEijaAAAAAKAEijYAAAAAKIGiDQAAAABKoGgDAAAAgBIo2oAWM2DAgEyZMuV97eNb3/pWhgwZUkqeltSacs+aNSs1NTV59dVXKx0FAPgAMAccUukYScwBoaUo2oD13jXXXJO99tqr0jEqYt99983JJ5/caNkee+yRF154IRtssEFlQgEAtABzwJMbLTMHhJahaAPWe//zP/+Tgw46qEWOVVdX1+qP06FDh/Tt2zc1NTUlJgIAaF3MARszB4SWoWiDD6ibbropO++8czp37pyNNtooI0eOzNKlS/Pb3/427du3z8KFCxutf/LJJ2fvvfdOklx99dXZcMMNc+utt2a77bZLly5d8rnPfS7Lli3LNddckwEDBqRnz5458cQTs2LFikb7ef3113P44Yena9eu6devX6ZOndro+QULFuTggw9Ot27d0qNHjxx22GFZtGjRascxa9asDB8+PF27ds2GG26YPffcM88880zD82+++WZmzJiRgw46KN/73vey0047NTz385//PDU1NZk2bVrDspEjR+ab3/xmw+PLL788AwcOTIcOHbLddtvl2muvbXT8mpqaXH755TnooIPStWvXnHvuuUmS8847L3369En37t1zzDHH5M0331yr3O82f/781NTU5MYbb8w+++yTTp065brrrsvLL7+cww8/PP369UuXLl2y884758c//nHDdkcddVTuvvvuXHLJJampqUlNTU3mz5+/ytMGbr755uy4447p2LFjBgwYkIsvvni1rzkAUL3MAc0BzQGhmRXAB87zzz9ftGvXrpg8eXLx9NNPFw8//HAxderU4vXXXy+Koii23Xbb4oILLmhYf/ny5UXv3r2L6dOnF0VRFFdddVXRvn37Yr/99isefPDB4u677y422mijYtSoUcVhhx1WPProo8Uvf/nLokOHDsUNN9zQsJ/+/fsX3bt3LyZNmlTMmzev+O53v1u0bdu2mDFjRlEURbFixYpiyJAhxV577VXcf//9xR/+8Idi6NChxT777NOwj4kTJxaDBw8uiqIo6urqig022KA45ZRTiieffLJ47LHHiquvvrp45plnGta/9dZbi2233bYoiqJ4+OGHi5qammLx4sVFURTFySefXPTu3bsYM2ZMwzi7dOlS1NbWFkVRFLfcckvRvn37YurUqcW8efOKiy++uGjbtm3xm9/8pmH/SYpNNtmkmD59evHUU08VzzzzTHHjjTcWHTt2LH7wgx8UTzzxRPGNb3yj6N69+1rlfrenn366SFIMGDCguPnmm4u//e1vxfPPP1/8/e9/Ly688MLioYceKp566qmG1/O+++4riqIoXn311WLEiBHFscceW7zwwgvFCy+8ULz99tvFXXfdVSQpXnnllaIoiuL+++8v2rRpU5x11lnFvHnziquuuqro3LlzcdVVVzXtCwoAqArmgOaA5oDQ/BRt8AH0wAMPFEmK+fPnr/L5888/v9hhhx0aHt98881Ft27dijfeeKMoin9OspIUTz75ZMM6X/7yl4suXbo0TNSKoihGjx5dfPnLX2543L9//+KTn/xko2ONGTOm2H///YuiKIoZM2YUbdu2LRYsWNDw/KOPPlokKebMmVMUReNJ1ssvv1wkKWbNmrXasR577LHFKaecUhRFUdTX1xcbbbRR8dOf/rQoiqIYMmRIMWnSpKJv375FURTFvffeW7Rv375YunRpURRFscceexTHHntso/0deuihxac+9amGx0mKk08+udE6I0aMKI4//vhGy3bfffe1yv1u70yypkyZssZ1DzjggOK//uu/Gh7vs88+xUknndRonX+dZH3hC18o9ttvv0brfPWrXy0GDRrUpHwAQHUwBzQHNAeE5ufUUfgAGjx4cD7xiU9k5513zqGHHporr7wyr7zySsPzRx11VJ588sn84Q9/SPLP0wQOO+ywdO3atWGdLl26ZODAgQ2P+/TpkwEDBqRbt26Nli1evLjRsUeMGLHS48cffzxJ8vjjj2eLLbbIFlts0fD8oEGDsuGGGzas8269evXKUUcdldGjR+fAAw/MJZdckhdeeKHh+aIo8stf/rLh2hw1NTX56Ec/mlmzZuXVV1/NY489luOPPz5vvfVWnnjiidx999358Ic/nC5dujTk2XPPPRsdc88991wpy7Bhwxo9fvzxx7P77ruvdtxryr06/3qcFStW5Oyzz87OO++cXr16pVu3brnjjjuyYMGCNe7rX/Ouapx//etfVzrtAwCoXuaA5oD/mtccEMqnaIMPoLZt26a2tja//vWvM2jQoFx66aXZbrvt8vTTTydJNtlkkxx44IG56qqrsmjRovz617/O0Ucf3Wgf7du3b/S4pqZmlcvq6+ubdSxXXXVVZs+enT322CM33nhjtt1224bJ4Zw5c/L2229njz32aFh/3333zaxZs3LPPfdk1113TY8ePRomXnfffXf22Weftc7w7slnGbmbepwLL7wwl1xySb72ta/lrrvuyty5czN69OgsX758rfMAAOs/c0BzQKD5KdrgA6qmpiZ77rlnzjzzzDz00EPp0KFDfvaznzU8/6UvfSk33nhjrrjiigwcOHCl33atq3+dSPzhD3/IDjvskCTZYYcd8uyzz+bZZ59teP6xxx7Lq6++mkGDBq12n7vuumtOO+20/P73v89OO+2U66+/Psk/7zR1wAEHpG3btg3r7rPPPnnsscfy05/+NPvuu2+Sf0687rzzzvzud79rWPZOnt/97neNjvW73/3uPbO8s9199933nuN+r9xN9bvf/S4HH3xw/v3f/z2DBw/OVlttlb/85S+N1unQocMafyO5unFuu+22jV47AKD6mQOaA747rzkglK9dpQMALe++++7LzJkzM2rUqGyyySa577778uKLLzZMdpJk9OjR6dGjR84555ycddZZpR37d7/7XS644IIccsghqa2tzU9/+tPcdtttSf55t6edd945RxxxRKZMmZK33347xx9/fPbZZ5+V3jKfJE8//XSuuOKKHHTQQdlss80yb968/PWvf82RRx6ZJPnFL36xUvZddtklPXv2zPXXX59bb701yT8nWaecckrDxPMdX/3qV3PYYYdl1113zciRI/PLX/4yt9xyS+688873HONJJ52Uo446KsOGDcuee+6Z6667Lo8++mi22mqrJuVuqm222SY33XRTfv/736dnz56ZPHlyFi1a1GgSOGDAgNx3332ZP39+unXrll69eq20n//6r//Khz/84Zx99tkZM2ZMZs+ene9973u57LLL1ioPANC6mQOaA76bOSA0k0pfJA5oeY899lgxevToYuONNy46duxYbLvttsWll1660nqnn3560bZt2+L5559vtPyqq64qNthgg0bL3n2B2neMHTu2OPjggxse9+/fvzjzzDOLQw89tOjSpUvRt2/f4pJLLmm0zTPPPFMcdNBBRdeuXYvu3bsXhx56aLFw4cJVHmfhwoXFIYccUmy66aZFhw4div79+xdnnHFGsWLFiuLJJ58sOnbs2HDx3nc7+OCDi3bt2jVctHfFihVFz549i4985CMrrXvZZZcVW221VdG+ffti2223Lf77v/+70fNJip/97GcrbXfuuecWvXv3Lrp161aMHTu2+H//7/81KfeqvHMh3IceeqjR8pdffrk4+OCDi27duhWbbLJJ8c1vfrM48sgjG73m8+bNKz7ykY8UnTt3LpIUTz/99EoXwi2KorjpppuKQYMGFe3bty8+9KEPFRdeeOEqswAA1csc0BzQHBCaX01RFEXlaj6gNTvmmGPy4osv5he/+EWlo6y1yZMn584778yvfvWrSkcBAKgq5oAA686po8BKXnvttTzyyCO5/vrrq3KClSSbb755TjvttErHAACoGuaAAO+fd7QBK9l3330zZ86cfPnLX853vvOdSscBAKAFmAMCvH+KNgAAAAAoQZtKBwAAAACA9YGiDQAAAABKoGgDAAAAgBIo2gAAAACgBIo2AAAAACiBog0AAAAASqBoAwAAAIASKNoAAAAAoAT/PzP0TRPbya6tAAAAAElFTkSuQmCC"},"metadata":{}}]},{"cell_type":"code","source":"gc.collect()","metadata":{"id":"nk3vK1u3FlZc","execution":{"iopub.status.busy":"2024-04-18T11:28:05.797052Z","iopub.execute_input":"2024-04-18T11:28:05.797736Z","iopub.status.idle":"2024-04-18T11:28:06.233266Z","shell.execute_reply.started":"2024-04-18T11:28:05.797700Z","shell.execute_reply":"2024-04-18T11:28:06.232423Z"},"trusted":true},"execution_count":23,"outputs":[{"execution_count":23,"output_type":"execute_result","data":{"text/plain":"24829"},"metadata":{}}]},{"cell_type":"markdown","source":"# BUILDING LSTM ON CORE IVY","metadata":{"id":"Vy7iZ8QnAS2-"}},{"cell_type":"code","source":"# dir(tokenizer)","metadata":{"id":"SEPdRlMT6Ybd","execution":{"iopub.status.busy":"2024-04-18T11:28:06.234636Z","iopub.execute_input":"2024-04-18T11:28:06.235287Z","iopub.status.idle":"2024-04-18T11:28:06.536280Z","shell.execute_reply.started":"2024-04-18T11:28:06.235254Z","shell.execute_reply":"2024-04-18T11:28:06.535318Z"},"trusted":true},"execution_count":24,"outputs":[]},{"cell_type":"markdown","source":"Setting up the device for the computations.","metadata":{"id":"hKaKQ1kRLn04"}},{"cell_type":"code","source":"#imports the cuDF library (CUDA Data Frame) into your Python environment.\nimport cudf","metadata":{"id":"RsojtOQNLn04","execution":{"iopub.status.busy":"2024-04-18T11:28:06.537638Z","iopub.execute_input":"2024-04-18T11:28:06.538100Z","iopub.status.idle":"2024-04-18T11:28:10.200722Z","shell.execute_reply.started":"2024-04-18T11:28:06.538068Z","shell.execute_reply":"2024-04-18T11:28:10.199918Z"},"trusted":true},"execution_count":25,"outputs":[]},{"cell_type":"code","source":"train_test_ratio = 0.9\nfrac_dataset = 0.02","metadata":{"id":"oj86qS4aLn05","execution":{"iopub.status.busy":"2024-04-18T11:28:10.201894Z","iopub.execute_input":"2024-04-18T11:28:10.202233Z","iopub.status.idle":"2024-04-18T11:28:10.207169Z","shell.execute_reply.started":"2024-04-18T11:28:10.202202Z","shell.execute_reply":"2024-04-18T11:28:10.206120Z"},"trusted":true},"execution_count":26,"outputs":[]},{"cell_type":"code","source":"#importing dataset and preprocessing\n\ndf = cudf.read_csv(\"/kaggle/input/sarcasm/train-balanced-sarcasm.csv\")\ndf = df.drop_duplicates()\ndf = df.rename(columns={'comment': 'title'})\ndf = df[['label', 'title']]\ndf = df[~df['label'].isnull()]\ndf = df[~df['title'].isnull()]\ndf.sample(frac=1).reset_index(drop=True)\ndf.sample(5)\n","metadata":{"id":"2ft2bHc1Ln05","execution":{"iopub.status.busy":"2024-04-18T11:28:10.208363Z","iopub.execute_input":"2024-04-18T11:28:10.208754Z","iopub.status.idle":"2024-04-18T11:28:12.509049Z","shell.execute_reply.started":"2024-04-18T11:28:10.208724Z","shell.execute_reply":"2024-04-18T11:28:12.507884Z"},"trusted":true},"execution_count":27,"outputs":[{"name":"stderr","text":"get_mempolicy: Operation not permitted\n","output_type":"stream"},{"execution_count":27,"output_type":"execute_result","data":{"text/plain":" label title\n427824 1 What an absolute fucking professional\n284023 1 I thought we still were at ddr4... Crazy world\n88830 1 I think if Jan were to leave tomorrow, he'd ge...\n331949 1 say bye to guild system.. they ninja-ed it\n573293 1 Overclocking is easy, just change a few bios s...","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
labeltitle
4278241What an absolute fucking professional
2840231I thought we still were at ddr4... Crazy world
888301I think if Jan were to leave tomorrow, he'd ge...
3319491say bye to guild system.. they ninja-ed it
5732931Overclocking is easy, just change a few bios s...
\n
"},"metadata":{}}]},{"cell_type":"code","source":"df_full = df # Create a copy of the original dataframe\ndf_size = len(df_full) # Get the total number of rows in the dataframe\nsplit = int(df_size * train_test_ratio * frac_dataset) # Calculate the split index for the training set\ncutoff = int(df_size * frac_dataset) # Calculate the cutoff index for the evaluation set\n\n# Split the dataframe into training and evaluation sets\ndf = df_full.iloc[:split,:] # Training set\ndf_eval = df_full.iloc[split:cutoff,:] # Evaluation set\n\nprint(len(df)) # Print the number of rows in the training set","metadata":{"id":"CSTVRW2DLn05","execution":{"iopub.status.busy":"2024-04-18T11:28:12.515674Z","iopub.execute_input":"2024-04-18T11:28:12.516006Z","iopub.status.idle":"2024-04-18T11:28:12.525496Z","shell.execute_reply.started":"2024-04-18T11:28:12.515978Z","shell.execute_reply":"2024-04-18T11:28:12.523331Z"},"trusted":true},"execution_count":28,"outputs":[{"name":"stdout","text":"18193\n","output_type":"stream"}]},{"cell_type":"code","source":"print(torch.cuda.is_available())\n# Check if PyTorch can access a CUDA-enabled GPU\n\ndevice = ivy.as_native_dev(\"gpu:0\")\n# Get the Ivy device object for \"gpu:0\" (the first CUDA-enabled GPU)\n\nivy.set_default_device(\"gpu:0\")\n# Set the default device in Ivy to \"gpu:0\"\n\nprint(ivy.default_device())\n# Print the current default device in Ivy\n\nivy.set_soft_device_mode(True)\n# Enable Ivy's soft device mode, which automatically moves tensors to the correct device\n\nprint(device)\n# Print the Ivy device object for \"gpu:0\"","metadata":{"id":"VQlQ4V_-Ln06","execution":{"iopub.status.busy":"2024-04-18T11:30:25.812984Z","iopub.execute_input":"2024-04-18T11:30:25.813692Z","iopub.status.idle":"2024-04-18T11:30:25.820231Z","shell.execute_reply.started":"2024-04-18T11:30:25.813663Z","shell.execute_reply":"2024-04-18T11:30:25.819307Z"},"trusted":true},"execution_count":42,"outputs":[{"name":"stdout","text":"True\ngpu:0\ncuda:0\n","output_type":"stream"}]},{"cell_type":"code","source":"print(tokenizer.vocab_size)\n# Print the size of the vocabulary (number of unique tokens) used by the tokenizer\n\nprint(tokenizer.all_special_tokens_extended)\n# Print a list of all special tokens (e.g., padding token, start/end of sequence tokens) used by the tokenizer, including those added by extensions\n\nprint(tokenizer.all_special_ids)\n# Print a list of token IDs corresponding to the special tokens used by the tokenizer\n\nprint(tokenizer.pad_token_id)\n# Print the token ID assigned to the padding token used by the tokenizer for padding sequences to a fixed length","metadata":{"id":"mKyWQmUR6grC","execution":{"iopub.status.busy":"2024-04-18T11:28:12.543418Z","iopub.execute_input":"2024-04-18T11:28:12.543769Z","iopub.status.idle":"2024-04-18T11:28:12.551887Z","shell.execute_reply.started":"2024-04-18T11:28:12.543743Z","shell.execute_reply":"2024-04-18T11:28:12.550739Z"},"trusted":true},"execution_count":30,"outputs":[{"name":"stdout","text":"28996\n['[UNK]', '[SEP]', '[PAD]', '[CLS]', '[MASK]']\n[100, 102, 0, 101, 103]\n0\n","output_type":"stream"}]},{"cell_type":"code","source":"sample = list(df.sample(8)[\"title\"].to_pandas())\n# Sample 8 rows from the \"title\" column of the DataFrame df, convert the result to a pandas Series, and store the values in the list sample\n\nprint(sample)\n# Print the sampled titles\n\ntokenizer(sample, add_special_tokens=True, padding=True, truncation=True)\n# Apply the tokenizer to the sampled titles","metadata":{"id":"_jEnXjjUOo7f","execution":{"iopub.status.busy":"2024-04-18T11:28:12.553586Z","iopub.execute_input":"2024-04-18T11:28:12.553976Z","iopub.status.idle":"2024-04-18T11:28:12.569607Z","shell.execute_reply.started":"2024-04-18T11:28:12.553943Z","shell.execute_reply":"2024-04-18T11:28:12.568576Z"},"trusted":true},"execution_count":31,"outputs":[{"name":"stdout","text":"['Yeah, based on the \"choices\", Kanye West looks appealing.', 'This was the year Roman started pumping money into their club, the year the BPL chsnged and not for the better.', 'Well, we don\\'t have a culture of our own, so I guess that\\'s why we should just abandon any of these so-called \"traditions\" right?', 'Also a bigger terminal velocity', \"Yeah let's just get over the fact that the president is a complete fucking manchild\", \"#'MURICA\", \"Because civil disobedience isn't a thing and it would be sooooo much work to convince people to not pay their federal taxes.\", 'Imaginary :c']\n","output_type":"stream"},{"execution_count":31,"output_type":"execute_result","data":{"text/plain":"{'input_ids': [[101, 2814, 117, 1359, 1113, 1103, 107, 9940, 107, 117, 14812, 3382, 1162, 1537, 2736, 17117, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 1188, 1108, 1103, 1214, 2264, 1408, 15486, 1948, 1154, 1147, 1526, 117, 1103, 1214, 1103, 21062, 2162, 22572, 1116, 18288, 1105, 1136, 1111, 1103, 1618, 119, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 2119, 117, 1195, 1274, 112, 189, 1138, 170, 2754, 1104, 1412, 1319, 117, 1177, 146, 3319, 1115, 112, 188, 1725, 1195, 1431, 1198, 11092, 1251, 1104, 1292, 1177, 118, 1270, 107, 7181, 107, 1268, 136, 102], [101, 2907, 170, 6706, 6020, 10537, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 2814, 1519, 112, 188, 1198, 1243, 1166, 1103, 1864, 1115, 1103, 2084, 1110, 170, 2335, 8750, 1299, 21289, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 108, 112, 150, 19556, 9741, 1592, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [101, 2279, 2987, 4267, 7301, 18100, 2762, 112, 189, 170, 1645, 1105, 1122, 1156, 1129, 1177, 5658, 5658, 1277, 1250, 1106, 7627, 1234, 1106, 1136, 2653, 1147, 2877, 7538, 119, 102, 0, 0, 0, 0, 0, 0], [101, 146, 1918, 10533, 3113, 131, 172, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]}"},"metadata":{}}]},{"cell_type":"code","source":"# Import the Dataset class from the torch.utils.data module.\nfrom torch.utils.data import Dataset","metadata":{"id":"PU8tcMRKLn06","execution":{"iopub.status.busy":"2024-04-18T11:28:12.571056Z","iopub.execute_input":"2024-04-18T11:28:12.571341Z","iopub.status.idle":"2024-04-18T11:28:12.575936Z","shell.execute_reply.started":"2024-04-18T11:28:12.571318Z","shell.execute_reply":"2024-04-18T11:28:12.574863Z"},"trusted":true},"execution_count":32,"outputs":[]},{"cell_type":"code","source":"batch_size = 128\n\n# Define a custom dataset class named 'ivy_Dataset'.\nclass ivy_Dataset(Dataset):\n def __init__(self, df):\n # Initialize the dataset with the given dataframe 'df'.\n # Store the number of samples in the dataset.\n self.num_samples = df['title'].size\n # Extract 'title' and 'label' from the dataframe and store as data.\n self.data = [[entry[0], entry[1]] for entry in zip(df[\"title\"].to_pandas(), df[\"label\"].to_pandas())]\n\n def __getitem__(self, idx):\n # Retrieve and return the data at the given index 'idx'.\n title = self.data[idx][0]\n label = self.data[idx][1]\n return title, label\n\n def __len__(self):\n # Return the total number of samples in the dataset.\n return self.num_samples\n\n\n","metadata":{"id":"JEW-0sYgLn06","execution":{"iopub.status.busy":"2024-04-18T11:28:12.577339Z","iopub.execute_input":"2024-04-18T11:28:12.577812Z","iopub.status.idle":"2024-04-18T11:28:12.588255Z","shell.execute_reply.started":"2024-04-18T11:28:12.577778Z","shell.execute_reply":"2024-04-18T11:28:12.587345Z"},"trusted":true},"execution_count":33,"outputs":[]},{"cell_type":"code","source":"training_data = ivy_Dataset(df)# Access the label of the 10th data sample in the 'data_sample' list.\n","metadata":{"id":"3kEn6ka1Ln07","execution":{"iopub.status.busy":"2024-04-18T11:28:12.589385Z","iopub.execute_input":"2024-04-18T11:28:12.589758Z","iopub.status.idle":"2024-04-18T11:28:12.618589Z","shell.execute_reply.started":"2024-04-18T11:28:12.589724Z","shell.execute_reply":"2024-04-18T11:28:12.617910Z"},"trusted":true},"execution_count":34,"outputs":[]},{"cell_type":"code","source":"# Randomly sample 10 entries from the dataframe 'df' and store it as 'df_sample'.\ndf_sample = df.sample(10)\n\n# Extract 'title' and 'label' from the sampled dataframe and store as 'data_sample'.\ndata_sample = [[entry[0], entry[1]] for entry in zip(df_sample[\"title\"].to_pandas(), df_sample[\"label\"].to_pandas())]\n\n# Access the label of the 10th data sample in the 'data_sample' list.\ndata_sample[9][1]","metadata":{"id":"qzV0zScmLn07","execution":{"iopub.status.busy":"2024-04-18T11:28:12.619709Z","iopub.execute_input":"2024-04-18T11:28:12.620049Z","iopub.status.idle":"2024-04-18T11:28:12.633173Z","shell.execute_reply.started":"2024-04-18T11:28:12.620018Z","shell.execute_reply":"2024-04-18T11:28:12.632454Z"},"trusted":true},"execution_count":35,"outputs":[{"execution_count":35,"output_type":"execute_result","data":{"text/plain":"0"},"metadata":{}}]},{"cell_type":"code","source":"# Import the DataLoader class from the torch.utils.data module.\nfrom torch.utils.data import DataLoader\n\n# Create a DataLoader instance named 'train_dataloader' for the training data.\n# Set the batch size to 'batch_size', shuffle the data during each epoch.\ntrain_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)\n","metadata":{"id":"lupRA9BvLn07","execution":{"iopub.status.busy":"2024-04-18T11:28:12.634337Z","iopub.execute_input":"2024-04-18T11:28:12.634957Z","iopub.status.idle":"2024-04-18T11:28:12.639836Z","shell.execute_reply.started":"2024-04-18T11:28:12.634926Z","shell.execute_reply":"2024-04-18T11:28:12.638800Z"},"trusted":true},"execution_count":36,"outputs":[]},{"cell_type":"code","source":"def ivy_train_loader(dataset=df, batch_size=4):\n # Calculate the number of batches based on the dataset size and batch size.\n num_batches = int(len(dataset) / batch_size)\n\n # Generate batches of data using list comprehension.\n out = (\n (\n dataset[\"title\"][batch_idx * batch_size : batch_idx * batch_size + batch_size].to_pandas(),\n dataset[\"label\"][batch_idx * batch_size : batch_idx * batch_size + batch_size].to_pandas()\n )\n for batch_idx in range(num_batches)\n )\n\n return out\n\n# Create a loader using the ivy_train_loader function with the specified batch size.\nloader = ivy_train_loader(batch_size=batch_size)\n\n# Iterate over the loader with tqdm for progress tracking.\nfor batch_id, data in tqdm(enumerate(loader)):\n # Extract input and target data from the batch.\n x_data = data[0]\n y_data = data[1]\n\n # Print additional information or perform computations as needed.\n if batch_id == 10:\n break\n","metadata":{"id":"azrxUXP5Ln07","execution":{"iopub.status.busy":"2024-04-18T11:28:12.641140Z","iopub.execute_input":"2024-04-18T11:28:12.641465Z","iopub.status.idle":"2024-04-18T11:28:12.680173Z","shell.execute_reply.started":"2024-04-18T11:28:12.641437Z","shell.execute_reply":"2024-04-18T11:28:12.679246Z"},"trusted":true},"execution_count":37,"outputs":[{"name":"stderr","text":"10it [00:00, 371.74it/s]\n","output_type":"stream"}]},{"cell_type":"code","source":"# Iterate over the train_dataloader with tqdm for progress tracking.\nfor batch_id, data in tqdm(enumerate(train_dataloader)):\n # Extract input and target data from the batch.\n x_data = data[0]\n y_data = data[1]\n\n # Print additional information or perform computations as needed.\n if batch_id == 10:\n break\n","metadata":{"id":"5J1XVCUCLn07","execution":{"iopub.status.busy":"2024-04-18T11:28:12.681197Z","iopub.execute_input":"2024-04-18T11:28:12.681462Z","iopub.status.idle":"2024-04-18T11:28:12.735099Z","shell.execute_reply.started":"2024-04-18T11:28:12.681441Z","shell.execute_reply":"2024-04-18T11:28:12.734248Z"},"trusted":true},"execution_count":38,"outputs":[{"name":"stderr","text":"10it [00:00, 475.18it/s]\n","output_type":"stream"}]},{"cell_type":"markdown","source":"It seems that in this case just a simple generator is comparable or slightly faster than a proper data loader.","metadata":{"id":"Xcdx5-YbLn08"}},{"cell_type":"code","source":"def one_hot(args, num_classes=2):\n # Generate one-hot encoding for each element in 'args'.\n out = [[1 if idx == elem else 0 for idx in range(num_classes)] for elem in args]\n return out\n\ndef argmax(args):\n # Find the index of the maximum value in each array in 'args'.\n out = [ivy.argmax(elem) for elem in args]\n return out\n\n# Test the one_hot and argmax functions.\nprint(one_hot([0, 0, 1, 0]))\nprint(argmax(ivy.array([[0.49967843, 0.50032151],\n [0.49986687, 0.50013322],\n [0.49912587, 0.50087422],\n [0.50080854, 0.4991914 ],\n [0.50049627, 0.4995037 ],\n [0.4998956 , 0.50010443],\n [0.50008798, 0.49991205],\n [0.50053447, 0.49946556]])))","metadata":{"id":"SReEp9rmLn08","execution":{"iopub.status.busy":"2024-04-18T11:28:12.736323Z","iopub.execute_input":"2024-04-18T11:28:12.736736Z","iopub.status.idle":"2024-04-18T11:28:16.874645Z","shell.execute_reply.started":"2024-04-18T11:28:12.736705Z","shell.execute_reply":"2024-04-18T11:28:16.873497Z"},"trusted":true},"execution_count":39,"outputs":[{"name":"stdout","text":"[[1, 0], [1, 0], [0, 1], [1, 0]]\n[ivy.array(1), ivy.array(1), ivy.array(1), ivy.array(0), ivy.array(0), ivy.array(1), ivy.array(0), ivy.array(0)]\n","output_type":"stream"}]},{"cell_type":"code","source":"# Set the backend to \"torch\" for Ivy library.\nivy.set_backend(\"torch\")\n\n# Define various parameters and variables.\nnum_embeddings = tokenizer.vocab_size\nembedding_dim = 10\npad_token_id = tokenizer.pad_token_id\ninput_channels = embedding_dim\nnum_classes = 2\noutput_channels = 10\nnum_layers = 1\nmax_length = 128\nmax_length = max_length - 3\ntokenizer.model_max_length = max_length\neps = 1e-05\nbatch_size = 128\n\n# Generate testing input and labels.\ntesting_input = df.sample(batch_size)[\"title\"]\ntesting_labels = df.sample(batch_size)[\"label\"]\n\n# Calculate the linear input and output channels.\nlinear_input_channels = (tokenizer.model_max_length + 3) * batch_size * output_channels # 3 comes from the hidden states of the LSTM\nlinear_output_channels = num_classes * batch_size\n\n# Define the normalized shape.\nnormalized_shape = (num_classes)\n\n# Define a custom module for post-processing LSTM output.\nclass LSTM_postproc(Module):\n def __init__(self):\n super(LSTM_postproc, self).__init__()\n\n def _forward(self, args):\n lstm_output, lstm_state = args\n lstm_state_latest, lstm_state_hidden = lstm_state\n lstm_state_latest = ivy.array(lstm_state_latest)\n lstm_state_hidden = ivy.array([state for state in lstm_state_hidden][0])\n\n lstm_state = ivy.concat((lstm_state_latest, lstm_state_hidden), axis=0).reshape((batch_size, 3, -1))\n out = ivy.concat([lstm_output, lstm_state], axis=1)\n out = out.flatten()\n return out\n\n# Define a custom module for tokenizing input.\nclass Tokenizer(Module):\n def __init__(self, tokenizer):\n super(Tokenizer, self).__init__()\n self.tokenizer = tokenizer\n\n def _forward(self, args):\n args = list(args)\n return self.tokenizer(args, add_special_tokens=True, max_length=max_length, padding=\"max_length\", truncation=True)[\"input_ids\"]\n\n# Define a custom module for reshaping output.\nclass Reshaper(Module):\n def __init__(self):\n super(Reshaper, self).__init__()\n\n def _forward(self, args):\n return args.reshape((batch_size, num_classes))\n\n# Define a custom module for performing argmax operation.\nclass Argmax(Module):\n def __init__(self):\n super(Argmax, self).__init__()\n\n def _forward(self, args):\n return ivy.argmax(args, axis=-1)\n\n# Define a custom module for embedding.\nclass ivy_Embed(Module):\n def __init__(self, embedding):\n super(ivy_Embed, self).__init__()\n self.embedding = embedding\n\n def _forward(self, args):\n out = self.embedding(args).float()\n return out\n\n# Initialize the embedding layer.\nembedding = Embedding(num_embeddings, embedding_dim, pad_token_id)\n\n# Define the neural network architecture using Sequential.\nivy_LSTM = Sequential(\n ivy_Embed(embedding),\n LSTM(input_channels, output_channels, num_layers=num_layers, return_sequence=True, return_state=True, device=None, v=None, dtype=None),\n LSTM_postproc(),\n Linear(linear_input_channels, linear_output_channels, with_bias=True),\n Reshaper(),\n Sigmoid(),\n Softmax(),\n)\n","metadata":{"id":"4dQ2Recq9Xo8","execution":{"iopub.status.busy":"2024-04-18T11:28:16.875870Z","iopub.execute_input":"2024-04-18T11:28:16.876149Z","iopub.status.idle":"2024-04-18T11:28:17.438256Z","shell.execute_reply.started":"2024-04-18T11:28:16.876124Z","shell.execute_reply":"2024-04-18T11:28:17.436997Z"},"trusted":true},"execution_count":40,"outputs":[]},{"cell_type":"code","source":"def train_tokens_loader(dataset = df):\n train_loader = ivy_train_loader(dataset=dataset, batch_size=batch_size)\n ivy_tokenizer = Tokenizer(tokenizer)\n loaded_data = []\n data_dict = dict()\n for batch_id, data in tqdm(enumerate(train_loader)):\n\n x_data = ivy.array(ivy_tokenizer(data[0]), device=device).long()\n y_data = ivy.array(one_hot(list(data[1])), device=device)\n\n loaded_data.append([x_data, y_data])\n data_dict[f\"batch{batch_id}\"] = {\"x_data\": x_data, \"y_data\": y_data}\n \n loaded_data = ivy.array(loaded_data, device=device)\n return loaded_data, data_dict\n\nloaded_tokens, tokens_dict = train_tokens_loader(df_full)\n\nwith open(\"tokenized_dataset.json\", \"w\") as outfile: \n json.dump(tokens_dict, outfile)\n!cp \"tokenized_dataset.json\" \"/kaggle/working/demos/Contributor_demos/Sarcasm Detection/\"","metadata":{"execution":{"iopub.status.busy":"2024-04-18T11:36:24.496756Z","iopub.execute_input":"2024-04-18T11:36:24.497502Z"},"trusted":true},"execution_count":null,"outputs":[{"name":"stderr","text":"1it [00:17, 17.27s/it]","output_type":"stream"}]},{"cell_type":"code","source":"def train_ivy(model):\n logs = []\n learning_rate = 3e-4\n opt = SGD(lr=learning_rate, inplace=True, stop_gradients=True, trace_on_next_step=False)\n loss_fn = CrossEntropyLoss(axis=-1, epsilon=1e-07, reduction='mean')\n epochs = 2\n# grads = ivy.zeros_like(model.v)\n classifier = model\n ivy_tokenizer = Tokenizer(tokenizer)\n\n def loss(params):\n v, model, x, y = params\n predictions = model(x, v=v).flatten().to(device)\n return loss_fn(y, predictions)\n\n def one_hot(args, num_classes=2):\n # Convert labels to one-hot encoding.\n out = ivy.array([[1 if idx == elem else 0 for idx in range(num_classes)] for elem in args], device=device).flatten()\n return out.long()\n \n for epoch in range(epochs):\n # Iterate over the training loader for each epoch.\n for batch_id, data in tqdm(enumerate(loaded_tokens):\n\n x_data = ivy.array(ivy_tokenizer(data[0]), device=device).long()\n y_data = one_hot(list(data[1]))\n\n # Compute loss and gradients\n loss_val, grads = ivy.execute_with_gradients(loss, (model.v, model, x_data, y_data))\n\n # Update parameters using SGD optimizer\n model.v = opt.step(model.v, grads)\n\n if batch_id % 100 == 0:\n # Log loss values periodically\n logs.append([[epoch, batch_id, loss_val]])\n\n # opt.clear_grad()\n gc.collect()\n\n return logs, model\n","metadata":{"id":"1NqBJx51TYSd","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Train the Ivy LSTM model using the custom training function.\nlogs, ivy_LSTM = train_ivy(ivy_LSTM)\n","metadata":{"id":"7-5EmgSSLn0-","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Save the trained Ivy LSTM model.\nivy_LSTM.save(\"Ivy_Sarcasm_Detection_Demo\")\nivy_LSTM.save_weights(\"/kaggle/working/demos/Contributor_demos/Sarcasm Detection/Ivy_Sarcasm_Detection_Demo_weights.hdf5\")\n\n# Copy the saved model to the specified directory.\n!cp \"Ivy_Sarcasm_Detection_Demo\" \"/kaggle/working/demos/Contributor_demos/Sarcasm Detection/\"\n","metadata":{"id":"K4gP_H5OLn0_","execution":{"iopub.status.busy":"2024-04-16T16:40:14.616595Z","iopub.execute_input":"2024-04-16T16:40:14.617225Z","iopub.status.idle":"2024-04-16T16:40:16.202745Z","shell.execute_reply.started":"2024-04-16T16:40:14.617189Z","shell.execute_reply":"2024-04-16T16:40:16.200974Z"},"trusted":true},"execution_count":80,"outputs":[{"traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)","Cell \u001b[0;32mIn[80], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Save the trained Ivy LSTM model.\u001b[39;00m\n\u001b[1;32m 2\u001b[0m ivy_LSTM\u001b[38;5;241m.\u001b[39msave(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIvy_Sarcasm_Detection_Demo\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43mivy_LSTM\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_weights\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/kaggle/working/demos/Contributor_demos/Sarcasm Detection/Ivy_Sarcasm_Detection_Demo_weights.hdf5\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# Copy the saved model to the specified directory.\u001b[39;00m\n\u001b[1;32m 6\u001b[0m get_ipython()\u001b[38;5;241m.\u001b[39msystem(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcp \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIvy_Sarcasm_Detection_Demo\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/kaggle/working/demos/Contributor_demos/Sarcasm Detection\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m)\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/ivy/stateful/module.py:428\u001b[0m, in \u001b[0;36mModule.save_weights\u001b[0;34m(self, weights_path)\u001b[0m\n\u001b[1;32m 416\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Save the weights on the Module.\u001b[39;00m\n\u001b[1;32m 417\u001b[0m \n\u001b[1;32m 418\u001b[0m \u001b[38;5;124;03mParameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 425\u001b[0m \u001b[38;5;124;03mNone\u001b[39;00m\n\u001b[1;32m 426\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 427\u001b[0m os\u001b[38;5;241m.\u001b[39mmakedirs(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(weights_path\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m\"\u001b[39m)[:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]), exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m--> 428\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mv\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcont_to_disk_as_hdf5\u001b[49m\u001b[43m(\u001b[49m\u001b[43mweights_path\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/ivy/data_classes/container/base.py:2020\u001b[0m, in \u001b[0;36mContainerBase.cont_to_disk_as_hdf5\u001b[0;34m(self, h5_obj_or_filepath, starting_index, mode, max_batch_size)\u001b[0m\n\u001b[1;32m 2018\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2019\u001b[0m h5_group \u001b[38;5;241m=\u001b[39m h5_obj[key]\n\u001b[0;32m-> 2020\u001b[0m \u001b[43mvalue\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcont_to_disk_as_hdf5\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2021\u001b[0m \u001b[43m \u001b[49m\u001b[43mh5_group\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstarting_index\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_batch_size\u001b[49m\n\u001b[1;32m 2022\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2023\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2024\u001b[0m value_as_np \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cont_ivy\u001b[38;5;241m.\u001b[39mto_numpy(value)\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/ivy/data_classes/container/base.py:2020\u001b[0m, in \u001b[0;36mContainerBase.cont_to_disk_as_hdf5\u001b[0;34m(self, h5_obj_or_filepath, starting_index, mode, max_batch_size)\u001b[0m\n\u001b[1;32m 2018\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2019\u001b[0m h5_group \u001b[38;5;241m=\u001b[39m h5_obj[key]\n\u001b[0;32m-> 2020\u001b[0m \u001b[43mvalue\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcont_to_disk_as_hdf5\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2021\u001b[0m \u001b[43m \u001b[49m\u001b[43mh5_group\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstarting_index\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_batch_size\u001b[49m\n\u001b[1;32m 2022\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2023\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2024\u001b[0m value_as_np \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cont_ivy\u001b[38;5;241m.\u001b[39mto_numpy(value)\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/ivy/data_classes/container/base.py:2020\u001b[0m, in \u001b[0;36mContainerBase.cont_to_disk_as_hdf5\u001b[0;34m(self, h5_obj_or_filepath, starting_index, mode, max_batch_size)\u001b[0m\n\u001b[1;32m 2018\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2019\u001b[0m h5_group \u001b[38;5;241m=\u001b[39m h5_obj[key]\n\u001b[0;32m-> 2020\u001b[0m \u001b[43mvalue\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcont_to_disk_as_hdf5\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2021\u001b[0m \u001b[43m \u001b[49m\u001b[43mh5_group\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstarting_index\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_batch_size\u001b[49m\n\u001b[1;32m 2022\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2023\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2024\u001b[0m value_as_np \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cont_ivy\u001b[38;5;241m.\u001b[39mto_numpy(value)\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/ivy/data_classes/container/base.py:2041\u001b[0m, in \u001b[0;36mContainerBase.cont_to_disk_as_hdf5\u001b[0;34m(self, h5_obj_or_filepath, starting_index, mode, max_batch_size)\u001b[0m\n\u001b[1;32m 2039\u001b[0m amount_to_write \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mmin\u001b[39m(this_batch_size, space_left)\n\u001b[1;32m 2040\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(amount_to_write):\n\u001b[0;32m-> 2041\u001b[0m \u001b[43mh5_obj\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[43mstarting_index\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstarting_index\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 2042\u001b[0m value_as_np[i : i \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m 2043\u001b[0m )\n","File \u001b[0;32mh5py/_objects.pyx:54\u001b[0m, in \u001b[0;36mh5py._objects.with_phil.wrapper\u001b[0;34m()\u001b[0m\n","File \u001b[0;32mh5py/_objects.pyx:55\u001b[0m, in \u001b[0;36mh5py._objects.with_phil.wrapper\u001b[0;34m()\u001b[0m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/h5py/_hl/dataset.py:997\u001b[0m, in \u001b[0;36mDataset.__setitem__\u001b[0;34m(self, args, val)\u001b[0m\n\u001b[1;32m 994\u001b[0m mshape \u001b[38;5;241m=\u001b[39m val\u001b[38;5;241m.\u001b[39mshape\n\u001b[1;32m 996\u001b[0m \u001b[38;5;66;03m# Perform the write, with broadcasting\u001b[39;00m\n\u001b[0;32m--> 997\u001b[0m mspace \u001b[38;5;241m=\u001b[39m h5s\u001b[38;5;241m.\u001b[39mcreate_simple(\u001b[43mselection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexpand_shape\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmshape\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 998\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m fspace \u001b[38;5;129;01min\u001b[39;00m selection\u001b[38;5;241m.\u001b[39mbroadcast(mshape):\n\u001b[1;32m 999\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mid\u001b[38;5;241m.\u001b[39mwrite(mspace, fspace, val, mtype, dxpl\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dxpl)\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/h5py/_hl/selections.py:264\u001b[0m, in \u001b[0;36mSimpleSelection.expand_shape\u001b[0;34m(self, source_shape)\u001b[0m\n\u001b[1;32m 262\u001b[0m eshape\u001b[38;5;241m.\u001b[39mappend(t)\n\u001b[1;32m 263\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 264\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt broadcast \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m -> \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m (source_shape, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39marray_shape)) \u001b[38;5;66;03m# array shape\u001b[39;00m\n\u001b[1;32m 266\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28many\u001b[39m([n \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m n \u001b[38;5;129;01min\u001b[39;00m remaining_src_dims]):\n\u001b[1;32m 267\u001b[0m \u001b[38;5;66;03m# All dimensions from target_shape should either have been popped\u001b[39;00m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;66;03m# to match the selection shape, or be 1.\u001b[39;00m\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt broadcast \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m -> \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m (source_shape, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39marray_shape)) \u001b[38;5;66;03m# array shape\u001b[39;00m\n","\u001b[0;31mTypeError\u001b[0m: Can't broadcast (1, 10) -> (1, 3)"],"ename":"TypeError","evalue":"Can't broadcast (1, 10) -> (1, 3)","output_type":"error"}]},{"cell_type":"code","source":"with open('logs.csv','w',encoding = 'utf-8') as f:\n f.write(\"epoch, batch_id, loss_val\")\n for entry in logs:\n f.write(f\"{entry[0][0]}, {entry[0][1]}, {entry[0][2]}\")\n \n!cp logs.csv \"/kaggle/working/demos/Contributor_demos/Sarcasm Detection\"","metadata":{"execution":{"iopub.status.busy":"2024-04-16T16:40:16.203448Z","iopub.status.idle":"2024-04-16T16:40:16.203771Z","shell.execute_reply.started":"2024-04-16T16:40:16.203615Z","shell.execute_reply":"2024-04-16T16:40:16.203629Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Set the Ivy LSTM model to evaluation mode.\nivy_LSTM.eval()\n\n# Disable training mode for the Ivy LSTM model.\nivy_LSTM.train(False)\n","metadata":{"id":"2FFbG1JULn0_","execution":{"iopub.status.busy":"2024-04-16T16:40:16.205825Z","iopub.status.idle":"2024-04-16T16:40:16.206148Z","shell.execute_reply.started":"2024-04-16T16:40:16.205983Z","shell.execute_reply":"2024-04-16T16:40:16.205996Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"def eval_ivy(model):\n logs = []\n learning_rate = 3e-5\n opt = SGD(lr=learning_rate, inplace=True, stop_gradients=True, trace_on_next_step=False)\n loss_fn = CrossEntropyLoss(axis=-1, epsilon=1e-07, reduction='mean')\n epochs = 2\n grads = ivy.zeros_like(model.v)\n classifier = model\n train_loader = ivy_train_loader(dataset=df_eval, batch_size=batch_size)\n ivy_tokenizer = Tokenizer(tokenizer)\n\n def loss(params):\n v, model, x, y = params\n predictions = model(x, v=v).flatten()\n return loss_fn(y, predictions)\n\n def one_hot(args, num_classes=2):\n # Convert labels to one-hot encoding.\n out = ivy.array([[1 if idx == elem else 0 for idx in range(num_classes)] for elem in args]).flatten()\n return out.long()\n \n for batch_id, data in tqdm(enumerate(loaded_data)):\n\n x_data = ivy.array(ivy_tokenizer(data[0])).long()\n y_data = one_hot(list(data[1])).long()\n\n # Compute predictions\n predictions = classifier(x_data).float()\n predictions = ivy.argmax(predictions, axis=-1).flatten().float()\n predictions = one_hot(list(predictions)).reshape(y_data.shape)\n\n # Compute accuracy\n acc = ivy.matmul(predictions.float(), y_data.float()).float() / batch_size\n\n # Compute loss\n loss_vals = loss((model.v, model, x_data, y_data))\n\n # Append loss and accuracy to logs\n logs.append([[loss_vals, acc]])\n\n # Clear gradients and release memory\n # opt.clear_grad()\n gc.collect()\n\n # Calculate mean loss and accuracy\n return ivy.mean(logs, axis=0)\n","metadata":{"id":"ePSJNSfvLn0_","execution":{"iopub.status.busy":"2024-04-16T16:40:16.207292Z","iopub.status.idle":"2024-04-16T16:40:16.207630Z","shell.execute_reply.started":"2024-04-16T16:40:16.207469Z","shell.execute_reply":"2024-04-16T16:40:16.207483Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Evaluate the Ivy LSTM model using the custom evaluation function.\nlogs_eval = eval_ivy(ivy_LSTM)","metadata":{"id":"xTk4aOCkLn0_","execution":{"iopub.status.busy":"2024-04-16T16:40:16.208901Z","iopub.status.idle":"2024-04-16T16:40:16.209210Z","shell.execute_reply.started":"2024-04-16T16:40:16.209058Z","shell.execute_reply":"2024-04-16T16:40:16.209071Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Print the evaluation results (mean loss and accuracy).\nprint(logs_eval)","metadata":{"id":"QOmaAFL4Ln1A","execution":{"iopub.status.busy":"2024-04-16T16:40:16.210638Z","iopub.status.idle":"2024-04-16T16:40:16.210957Z","shell.execute_reply.started":"2024-04-16T16:40:16.210799Z","shell.execute_reply":"2024-04-16T16:40:16.210813Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Print the number of samples in the evaluation dataset.\nprint(len(df_eval))","metadata":{"id":"GF2kHSbyLn1A","execution":{"iopub.status.busy":"2024-04-15T22:20:10.026322Z","iopub.execute_input":"2024-04-15T22:20:10.026720Z","iopub.status.idle":"2024-04-15T22:20:10.041169Z","shell.execute_reply.started":"2024-04-15T22:20:10.026665Z","shell.execute_reply":"2024-04-15T22:20:10.040254Z"},"trusted":true},"execution_count":139,"outputs":[{"name":"stdout","text":"50538\n","output_type":"stream"}]},{"cell_type":"code","source":"# Define the train-test split ratio.\ntrain_test_ratio = 0.95\n\n# Define the fraction of the dataset to use.\nfrac_dataset = 1\n\n# Calculate the size of the full dataset.\ndf_size = len(df_full)\n\n# Calculate the index to split the dataset for training and evaluation.\nsplit = int(df_size * train_test_ratio * frac_dataset)\n\n# Define the cutoff index for the dataset fraction.\ncutoff = int(df_size * frac_dataset)\n\n# Extract the training dataset.\ndf = df_full.iloc[:split,:]\n\n# Extract the evaluation dataset.\ndf_eval = df_full.iloc[split:cutoff,:]\n","metadata":{"id":"ZqOd0SD7Ln1A","execution":{"iopub.status.busy":"2024-04-15T22:20:10.043253Z","iopub.execute_input":"2024-04-15T22:20:10.043522Z","iopub.status.idle":"2024-04-15T22:20:10.051719Z","shell.execute_reply.started":"2024-04-15T22:20:10.043491Z","shell.execute_reply":"2024-04-15T22:20:10.050967Z"},"trusted":true},"execution_count":140,"outputs":[]},{"cell_type":"code","source":"# Train the Ivy LSTM model using the custom training function.\nlogs, ivy_LSTM = train_ivy(ivy_LSTM)","metadata":{"id":"r1FZJkc6Ln1A","execution":{"iopub.status.busy":"2024-04-15T22:20:10.052855Z","iopub.execute_input":"2024-04-15T22:20:10.053158Z"},"trusted":true},"execution_count":null,"outputs":[{"name":"stderr","text":"93it [04:31, 2.93s/it]","output_type":"stream"}]},{"cell_type":"code","source":"# Save the trained Ivy LSTM model.\nivy_LSTM.save(\"Ivy_Sarcasm_Detection_Demo\")\nivy_LSTM.save_weights(\"/kaggle/working/demos/Contributor_demos/Sarcasm Detection/Ivy_Sarcasm_Detection_Demo_weights.hdf5\")\n# Copy the saved model to the specified directory.\n!cp \"Ivy_Sarcasm_Detection_Demo\" /kaggle/working/demos/Contributor_demos/Sarcasm Detection\n","metadata":{"id":"h-2vjH8NLn1A","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"with open('logs_full.csv','w',encoding = 'utf-8') as f:\n f.write(\"epoch, batch_id, loss_val\")\n for entry in logs:\n f.write(f\"{entry[0][0]}, {entry[0][1]}, {entry[0][2]}\")\n \n!cp logs.csv \"/kaggle/working/demos/Contributor_demos/Sarcasm Detection\"","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/Contributor_demos/Sarcasm Detection/logs.csv b/Contributor_demos/Sarcasm Detection/logs.csv new file mode 100644 index 00000000..28a7bdd7 --- /dev/null +++ b/Contributor_demos/Sarcasm Detection/logs.csv @@ -0,0 +1 @@ +epoch, batch_id, loss_val0, 0, ivy.array(0.34652316, dev=gpu:0)0, 100, ivy.array(0.34657605, dev=gpu:0)0, 200, ivy.array(0.34656865, dev=gpu:0)0, 300, ivy.array(0.34660537, dev=gpu:0)0, 400, ivy.array(0.34650582, dev=gpu:0)0, 500, ivy.array(0.34657182, dev=gpu:0)0, 600, ivy.array(0.34657007, dev=gpu:0)0, 700, ivy.array(0.34658008, dev=gpu:0)0, 800, ivy.array(0.346588, dev=gpu:0)0, 900, ivy.array(0.34657723, dev=gpu:0)0, 1000, ivy.array(0.34655271, dev=gpu:0)0, 1100, ivy.array(0.34657384, dev=gpu:0)0, 1200, ivy.array(0.34664949, dev=gpu:0)0, 1300, ivy.array(0.34648973, dev=gpu:0)0, 1400, ivy.array(0.34655848, dev=gpu:0)1, 0, ivy.array(0.34651978, dev=gpu:0)1, 100, ivy.array(0.34657322, dev=gpu:0)1, 200, ivy.array(0.34656644, dev=gpu:0)1, 300, ivy.array(0.34660367, dev=gpu:0)1, 400, ivy.array(0.34650583, dev=gpu:0)1, 500, ivy.array(0.34657063, dev=gpu:0)1, 600, ivy.array(0.34657082, dev=gpu:0)1, 700, ivy.array(0.34658015, dev=gpu:0)1, 800, ivy.array(0.34659015, dev=gpu:0)1, 900, ivy.array(0.34657967, dev=gpu:0)1, 1000, ivy.array(0.34655022, dev=gpu:0)1, 1100, ivy.array(0.34657131, dev=gpu:0)1, 1200, ivy.array(0.34664682, dev=gpu:0)1, 1300, ivy.array(0.34648872, dev=gpu:0)1, 1400, ivy.array(0.34655799, dev=gpu:0) \ No newline at end of file