index.json

[{"authors":null,"categories":null,"content":"CopeNLU is a Natural Language Processing research group led by Isabelle Augenstein and Pepa Atanasova with a focus on researching methods for tasks that require a deep understanding of language, as opposed to shallow processing. We are affiliated with the Natural Language Processing Section, as well as with the Pioneer Centre for AI, at the Department of Computer Science, University of Copenhagen. We are interested in core methodology research on, among others, learning with limited training data and explainable AI; as well as applications thereof to tasks such as fact checking, gender bias detection and question answering. Our group is partly funded by an ERC Starting Grant on Explainable and Robust Automatic Fact Checking, as well as a Sapere Aude Research Leader fellowship on `Learning to Explain Attitudes on Social Media\u0026rsquo;.\n","date":-62135596800,"expirydate":-62135596800,"kind":"section","lang":"en","lastmod":-62135596800,"objectID":"598b63dd58b43bce02403646f240cd3c","permalink":"https://copenlu.github.io/author/admin/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/author/admin/","section":"author","summary":"CopeNLU is a Natural Language Processing research group led by Isabelle Augenstein and Pepa Atanasova with a focus on researching methods for tasks that require a deep understanding of language, as opposed to shallow processing. We are affiliated with the Natural Language Processing Section, as well as with the Pioneer Centre for AI, at the Department of Computer Science, University of Copenhagen. We are interested in core methodology research on, among others, learning with limited training data and explainable AI; as well as applications thereof to tasks such as fact checking, gender bias detection and question answering.","tags":null,"title":"","type":"author"},{"authors":null,"categories":null,"content":"","date":-62135596800,"expirydate":-62135596800,"kind":"section","lang":"en","lastmod":-62135596800,"objectID":"d41d8cd98f00b204e9800998ecf8427e","permalink":"https://copenlu.github.io/author/","publishdate":"0001-01-01T00:00:00Z","relpermalink":"/author/","section":"author","summary":"","tags":null,"title":"Authors","type":"author"},{"authors":null,"categories":null,"content":"This feature can be used for publishing content such as:\n Project or software documentation Online courses Tutorials  The parent folder may be renamed, for example, to docs for project documentation or course for creating an online course.\nTo disable this feature, either delete the parent folder, or set draft = true in the front matter of all its pages.\nAfter renaming or deleting the parent folder, you may wish to update any [[menu.main]] menu links to it in the config.toml.\n","date":1536451200,"expirydate":-62135596800,"kind":"section","lang":"en","lastmod":1536451200,"objectID":"c3224f3a64174f08aaf31e1f1d16ffd3","permalink":"https://copenlu.github.io/tutorial/","publishdate":"2018-09-09T00:00:00Z","relpermalink":"/tutorial/","section":"tutorial","summary":"This feature can be used for publishing content such as:\n Project or software documentation Online courses Tutorials  The parent folder may be renamed, for example, to docs for project documentation or course for creating an online course.\nTo disable this feature, either delete the parent folder, or set draft = true in the front matter of all its pages.\nAfter renaming or deleting the parent folder, you may wish to update any [[menu.","tags":null,"title":"Overview","type":"docs"},{"authors":["Gayane Ghazaryan","Erik Arakelyan","Pasquale Minervini","Isabelle Augenstein"],"categories":null,"content":"","date":1733097600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1733097600,"objectID":"528245b31121a7d516081d94ed5348ba","permalink":"https://copenlu.github.io/publication/2024_arxiv_ghazaryan/","publishdate":"2024-12-02T00:00:00Z","relpermalink":"/publication/2024_arxiv_ghazaryan/","section":"publication","summary":"Question Answering (QA) datasets have been instrumental in developing and evaluating Large Language Model (LLM) capabilities. However, such datasets are scarce for languages other than English due to the cost and difficulties of collection and manual annotation. This means that producing novel models and measuring the performance of multilingual LLMs in low-resource languages is challenging. To mitigate this, we propose SynDARin, a method for generating and validating QA datasets for low-resource languages. We utilize parallel content mining to obtain human-curated paragraphs between English and the target language. We use the English data as context to generate synthetic multiple-choice (MC) question-answer pairs, which are automatically translated and further validated for quality. Combining these with their designated non-English human-curated paragraphs form the final QA dataset. The method allows to maintain the content quality, reduces the likelihood of factual errors, and circumvents the need for costly annotation. To test the method, we created a QA dataset with 1.2K samples for the Armenian language. The human evaluation shows that 98% of the generated English data maintains quality and diversity in the question types and topics, while the translation validation pipeline can filter out ∼70% of data with poor quality. We use the dataset to benchmark state-of-the-art LLMs, showing their inability to achieve human accuracy with some model performances closer to random chance. This shows that the generated dataset is non-trivial and can be used to evaluate reasoning capabilities in low-resource language.","tags":[],"title":"SynDARin: Synthesising Datasets for Automated Reasoning in Low-Resource Languages","type":"publication"},{"authors":[],"categories":null,"content":"One PhD fellowship on Interpretable Machine Learning is available for a start in Autumn 2025. The successfull candidate will be supervised by Pepa Atanasova and Isabelle Augenstein, and will join the Natural Language Processing Section at the Department of Computer Science, Faculty of Science, University of Copenhagen.\nThe full call and application link can be found here; the application deadline is January 15, 2025.\n","date":1731024000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1731024000,"objectID":"f2fd5cd936cc12751978de0d3ba2006e","permalink":"https://copenlu.github.io/talk/2024_11_phd/","publishdate":"2024-11-08T00:00:00Z","relpermalink":"/talk/2024_11_phd/","section":"talk","summary":"One PhD fellowship on Interpretable Machine Learning is available for a start in Autumn 2025. The successfull candidate will be supervised by Pepa Atanasova and Isabelle Augenstein, and will join the Natural Language Processing Section at the Department of Computer Science, Faculty of Science, University of Copenhagen.\nThe full call and application link can be found here; the application deadline is January 15, 2025.","tags":[],"title":"PhD fellowship on Interpretable Machine Learning available","type":"talk"},{"authors":["Siddhesh Milind Pawar","Junyeong Park","Jiho Jin","Arnav Arora","Junho Myung","Srishti Yadav","Faiz Ghifari Haznitrama","Inhwa Song","Alice Oh","Isabelle Augenstein"],"categories":null,"content":"","date":1730246400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1730246400,"objectID":"24fbf04fde8915c2462262a8365e3552","permalink":"https://copenlu.github.io/publication/2024_arxiv_sawar/","publishdate":"2024-10-30T00:00:00Z","relpermalink":"/publication/2024_arxiv_sawar/","section":"publication","summary":"Large-scale deployment of large language models (LLMs) in various applications, such as chatbots and virtual assistants, requires LLMs to be culturally sensitive to the user to ensure inclusivity. Culture has been widely studied in psychology and anthropology, and there has been a recent surge in research on making LLMs more culturally inclusive in LLMs that goes beyond multilinguality and builds on findings from psychology and anthropology. In this paper, we survey efforts towards incorporating cultural awareness into text-based and multimodal LLMs. We start by defining cultural awareness in LLMs, taking the definitions of culture from anthropology and psychology as a point of departure. We then examine methodologies adopted for creating cross-cultural datasets, strategies for cultural inclusion in downstream tasks, and methodologies that have been used for benchmarking cultural awareness in LLMs. Further, we discuss the ethical implications of cultural alignment, the role of Human-Computer Interaction in driving cultural inclusion in LLMs, and the role of cultural alignment in driving social science research. We finally provide pointers to future research based on our findings about gaps in the literature.","tags":[],"title":"Survey of Cultural Awareness in Language Models: Text and Beyond","type":"publication"},{"authors":["Erik Arakelyan","Pasquale Minervini","Pat Verga","Patrick Lewis","Isabelle Augenstein"],"categories":null,"content":"","date":1728950400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1728950400,"objectID":"fbd1446ad8e10ab84a9ae49e2f29bb63","permalink":"https://copenlu.github.io/publication/2024_arxiv_arakelyan_flare/","publishdate":"2024-10-15T00:00:00Z","relpermalink":"/publication/2024_arxiv_arakelyan_flare/","section":"publication","summary":"Modern Question Answering (QA) and Reasoning approaches based on Large Language Models (LLMs) commonly use prompting techniques, such as Chain-of-Thought (CoT), assuming the resulting generation will have a more granular exploration and reasoning over the question space and scope. However, such methods struggle with generating outputs that are faithful to the intermediate chain of reasoning produced by the model. On the other end of the spectrum, neuro-symbolic methods such as Faithful CoT (F-CoT) propose to combine LLMs with external symbolic solvers. While such approaches boast a high degree of faithfulness, they usually require a model trained for code generation and struggle with tasks that are ambiguous or hard to formalise strictly. We introduce Faithful Logic-Aided Reasoning and Exploration (FLARE), a novel interpretable approach for traversing the problem space using task decompositions. We use the LLM to plan a solution, soft-formalise the query into facts and predicates using a logic programming code and simulate that code execution using an exhaustive multi-hop search over the defined space. Our method allows us to compute the faithfulness of the reasoning process w.r.t. the generated code and analyse the steps of the multi-hop search without relying on external solvers. Our methods achieve SOTA results on 7 out of 9 diverse reasoning benchmarks. We also show that model faithfulness positively correlates with overall performance and further demonstrate that (FLARE) allows pinpointing the decisive factors sufficient for and leading to the correct answer with optimal reasoning during the multi-hop search.","tags":[],"title":"FLARE: Faithful Logic-Aided Reasoning and Exploration","type":"publication"},{"authors":["Anej Svete","Nadav Borenstein","Mike Zhou","Isabelle Augenstein","Ryan Cotterell"],"categories":null,"content":"","date":1727136000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1727136000,"objectID":"34b22c9e33d3f38701b898ac1857d64b","permalink":"https://copenlu.github.io/publication/2024_emnlp_svete/","publishdate":"2024-09-24T00:00:00Z","relpermalink":"/publication/2024_emnlp_svete/","section":"publication","summary":"Much theoretical work has described the ability of transformer language models (LMs) to represent formal languages.However, linking theoretical results to empirical performance is not straightforward. We empirically evaluate recent work linking transformers to n-gram LMs by studying their ability to learn random n-gram LMs of two kinds: ones with arbitrary next-symbol probabilities and ones where next-symbol probabilities are defined with shared parameters. We find that classic n-gram estimation techniques such as Add-lambda outperform transformers on the former, while transformers perform well on the latter, outperforming methods specifically designed to learn n-gram LMs.","tags":[],"title":"Can Transformers Learn n-gram Language Models?","type":"publication"},{"authors":["Sara Vera Marjanović","Haeun Yu","Pepa Atanasova","Maria Maistro","Christina Lioma","Isabelle Augenstein"],"categories":null,"content":"","date":1727136000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1727136000,"objectID":"5fb5361435b4d083ffad6cd0069b2f82","permalink":"https://copenlu.github.io/publication/2024_emnlp_marjanovic/","publishdate":"2024-09-24T00:00:00Z","relpermalink":"/publication/2024_emnlp_marjanovic/","section":"publication","summary":"Knowledge-intensive language understanding tasks require Language Models (LMs) to integrate relevant context, mitigating their inherent weaknesses, such as incomplete or outdated knowledge. However, conflicting knowledge can be present in the LM's parameters, termed intra-memory conflict, which can affect a model's propensity to accept contextual knowledge. To study the effect of intra-memory conflict on an LM's ability to accept relevant context, we utilize two knowledge conflict measures and a novel dataset containing inherently conflicting data, DynamicQA. This dataset includes facts with a temporal dynamic nature where facts can change over time and disputable dynamic facts, which can change depending on the viewpoint. DynamicQA is the first to include real-world knowledge conflicts and provide context to study the link between the different types of knowledge conflicts. We also evaluate several measures on their ability to reflect the presence of intra-memory conflict: semantic entropy and a novel coherent persuasion score. With our extensive experiments, we verify that LMs exhibit a greater degree of intra-memory conflict with dynamic facts compared to facts that have a single truth value. Furthermore, we reveal that facts with intra-memory conflict are harder to update with context, suggesting that retrieval-augmented generation will struggle with the most commonly adapted facts.","tags":[],"title":"DYNAMICQA: Tracing Internal Knowledge Conflicts in Language Models","type":"publication"},{"authors":["Yuxia Wang","Revanth Gangi Reddy","Zain Muhammad Mujahid","Arnav Arora","Aleksandr Rubashevskii","Jiahui Geng","Osama Mohammed Afzal","Liangming Pan","Nadav Borenstein","Aditya Pillai","Isabelle Augenstein","Iryna Gurevych","Preslav Nakov"],"categories":null,"content":"","date":1727136000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1727136000,"objectID":"12485895f8c6407ca65904f2a7c08c47","permalink":"https://copenlu.github.io/publication/2024_emnlp_wang/","publishdate":"2024-09-24T00:00:00Z","relpermalink":"/publication/2024_emnlp_wang/","section":"publication","summary":"The increased use of large language models (LLMs) across a variety of real-world applications calls for mechanisms to verify the factual accuracy of their outputs. In this work, we present a holistic end-to-end solution for annotating the factuality of LLM-generated responses, which encompasses a multi-stage annotation scheme designed to yield detailed labels concerning the verifiability and factual inconsistencies found in LLM outputs. We design and build an annotation tool to speed up the labelling procedure and ease the workload of raters. It allows flexible incorporation of automatic results in any stage, e.g. automatically-retrieved evidence. We further construct an open-domain document-level factuality benchmark in three-level granularity: claim, sentence and document. Preliminary experiments show that FacTool, FactScore and this http URL are struggling to identify false claims with the best F1=0.53.","tags":[],"title":"Factcheck-Bench: Fine-Grained Evaluation Benchmark for Automatic Fact-Checkers","type":"publication"},{"authors":["Dustin Wright","Arnav Arora","Nadav Borenstein","Srishti Yadav","Serge Belongie","Isabelle Augenstein"],"categories":null,"content":"","date":1727136000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1727136000,"objectID":"e5fe00ffef3c8f47b722099dcbc66cb5","permalink":"https://copenlu.github.io/publication/2024_emnlp_wright/","publishdate":"2024-09-24T00:00:00Z","relpermalink":"/publication/2024_emnlp_wright/","section":"publication","summary":"Uncovering latent values and opinions in large language models (LLMs) can help identify biases and mitigate potential harm. Recently, this has been approached by presenting LLMs with survey questions and quantifying their stances towards morally and politically charged statements. However, the stances generated by LLMs can vary greatly depending on how they are prompted, and there are many ways to argue for or against a given position. In this work, we propose to address this by analysing a large and robust dataset of 156k LLM responses to the 62 propositions of the Political Compass Test (PCT) generated by 6 LLMs using 420 prompt variations. We perform coarse-grained analysis of their generated stances and fine-grained analysis of the plain text justifications for those stances. For fine-grained analysis, we propose to identify tropes in the responses: semantically similar phrases that are recurrent and consistent across different prompts, revealing patterns in the text that a given LLM is prone to produce. We find that demographic features added to prompts significantly affect outcomes on the PCT, reflecting bias, as well as disparities between the results of tests when eliciting closed-form vs. open domain responses. Additionally, patterns in the plain text rationales via tropes show that similar justifications are repeatedly generated across models and prompts even with disparate stances.","tags":[],"title":"Revealing Fine-Grained Values and Opinions in Large Language Models","type":"publication"},{"authors":["Marta Marchiori Manerba","Karolina Stańczak","Riccardo Guidotti","Isabelle Augenstein"],"categories":null,"content":"","date":1727136000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1727136000,"objectID":"46dc31ea2d9c4b2491cc64e4e34c42e6","permalink":"https://copenlu.github.io/publication/2024_emnlp_manerba/","publishdate":"2024-09-24T00:00:00Z","relpermalink":"/publication/2024_emnlp_manerba/","section":"publication","summary":"Large language models have been shown to encode a variety of social biases, which carries the risk of downstream harms. While the impact of these biases has been recognized, prior methods for bias evaluation have been limited to binary association tests on small datasets, offering a constrained view of the nature of societal biases within language models. In this paper, we propose an original framework for probing language models for societal biases. We collect a probing dataset to analyze language models' general associations, as well as along the axes of societal categories, identities, and stereotypes. To this end, we leverage a novel perplexity-based fairness score. We curate a large-scale benchmarking dataset addressing drawbacks and limitations of existing fairness collections, expanding to a variety of different identities and stereotypes. When comparing our methodology with prior work, we demonstrate that biases within language models are more nuanced than previously acknowledged. In agreement with recent findings, we find that larger model variants exhibit a higher degree of bias. Moreover, we expose how identities expressing different religions lead to the most pronounced disparate treatments across all models.","tags":[],"title":"Social Bias Probing: Fairness Benchmarking for Language Models","type":"publication"},{"authors":[],"categories":null,"content":"We are delighted to share that Pepa, who has been a key member of the CopeNLU group during her PhD and postdoctoral fellowship, is now joining us as an Assistant Professor in the Department of Computer Science at the University of Copenhagen. Pepa\u0026rsquo;s research in Natural Language Processing has made significant progress in developing explainability techniques that enhance the fairness, transparency, and accountability of machine learning models. Her research, which aims to enhance the fairness, transparency, and accountability of machine learning models, particularly in the context of large language models, has already garnered significant recognition, including two prestigious awards (ELLIS, Informatics Europe) for her PhD thesis.\nIn her new role, Pepa will not only continue her research but also contribute to DIKU’s educational initiatives, helping to provide essential data science skills to industry professionals. CopeNLU is excited to have her as a new faculty member and we look forward to her continued contributions to the academic community!\n","date":1725148800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1725148800,"objectID":"e4ca32d449dbb0515dd99dd1097e7966","permalink":"https://copenlu.github.io/talk/2024_09_pepa/","publishdate":"2024-09-01T00:00:00Z","relpermalink":"/talk/2024_09_pepa/","section":"talk","summary":"We are delighted to share that Pepa, who has been a key member of the CopeNLU group during her PhD and postdoctoral fellowship, is now joining us as an Assistant Professor in the Department of Computer Science at the University of Copenhagen. Pepa\u0026rsquo;s research in Natural Language Processing has made significant progress in developing explainability techniques that enhance the fairness, transparency, and accountability of machine learning models. Her research, which aims to enhance the fairness, transparency, and accountability of machine learning models, particularly in the context of large language models, has already garnered significant recognition, including two prestigious awards (ELLIS, Informatics Europe) for her PhD thesis.","tags":[],"title":"Pepa has been appointed as a Tenure-Track Assistant Professor","type":"talk"},{"authors":["Alphaeus Dmonte","Roland Oruche","Marcos Zampieri","Prasad Calyam","Isabelle Augenstein"],"categories":null,"content":"","date":1724630400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1724630400,"objectID":"51b542ab0fda5c04b9b4ac0966f2a6bd","permalink":"https://copenlu.github.io/publication/2024_arxiv_dmonte/","publishdate":"2024-08-26T00:00:00Z","relpermalink":"/publication/2024_arxiv_dmonte/","section":"publication","summary":"The large and ever-increasing amount of data available on the Internet coupled with the laborious task of manual claim and fact verification has sparked the interest in the development of automated claim verification systems. Several deep learning and transformer-based models have been proposed for this task over the years. With the introduction of Large Language Models (LLMs) and their superior performance in several NLP tasks, we have seen a surge of LLM-based approaches to claim verification along with the use of novel methods such as Retrieval Augmented Generation (RAG). In this survey, we present a comprehensive account of recent claim verification frameworks using LLMs. We describe the different components of the claim verification pipeline used in these frameworks in detail including common approaches to retrieval, prompting, and fine-tuning. Finally, we describe publicly available English datasets created for this task.","tags":[],"title":"Claim Verification in the Age of Large Language Models: A Survey","type":"publication"},{"authors":["Isabelle Augenstein","Timothy Baldwin","Meeyoung Cha","Tanmoy Chakraborty","Giovanni Luca Ciampaglia","David Corney","Renee DiResta","Emilio Ferrara","Scott Hale","Alon Halevy","Eduard Hovy","Heng Ji","Filippo Menczer","Ruben Miguez","Preslav Nakov","Dietram Scheufele","Shivam Sharma","Giovanni Zagni"],"categories":null,"content":"","date":1720569600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1720569600,"objectID":"d96e6b12e5bb54015b445cf767054b91","permalink":"https://copenlu.github.io/publication/2024_nature_augenstein/","publishdate":"2024-07-10T00:00:00Z","relpermalink":"/publication/2024_nature_augenstein/","section":"publication","summary":"The emergence of tools based on large language models (LLMs), like OpenAI’s ChatGPT and Google’s Gemini, has garnered immense public attention due to their advanced natural language generation capabilities. These remarkably natural-sounding tools have the potential to be highly useful across various tasks. However, they also tend to produce false, erroneous, or misleading content -- commonly referred to as hallucinations. Additionally, LLMs can be misused to generate convincing yet false content and profiles on a large scale, posing a substantial societal challenge by potentially deceiving users and spreading inaccurate information. This makes fact-checking increasingly important. Despite their issues with factual accuracy, LLMs have shown proficiency in various subtasks that support fact-checking, which is essential for ensuring factually accurate responses. In light of these concerns, we explore the issues related to factuality in LLMs and their impact on fact-checking. We identify key challenges, imminent threats, and possible solutions to these factuality issues. We also thoroughly examine these challenges, existing solutions, and potential prospects for fact-checking. By analysing the factuality constraints within LLMs and their impact on fact-checking, we aim to contribute to a path towards maintaining accuracy at a time of confluence of generative AI and misinformation.","tags":[],"title":"Factuality Challenges in the Era of Large Language Models","type":"publication"},{"authors":["Karolina Stańczak","Kevin Du","Adina Williams","Isabelle Augenstein","Ryan Cotterell"],"categories":null,"content":"","date":1720569600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1720569600,"objectID":"d653e30755ad82341f3436257d346ae6","permalink":"https://copenlu.github.io/publication/2024_tacl_stanczak/","publishdate":"2024-07-10T00:00:00Z","relpermalink":"/publication/2024_tacl_stanczak/","section":"publication","summary":"How much meaning influences gender assignment across languages is an active area of research in modern linguistics and cognitive science. We can view current approaches as aiming to determine where gender assignment falls on a spectrum, from being fully arbitrarily determined to being largely semantically determined. For the latter case, there is a formulation of the neo-Whorfian hypothesis, which claims that even inanimate noun gender influences how people conceive of and talk about objects (using the choice of adjective used to modify inanimate nouns as a proxy for meaning). We offer a novel, causal graphical model that jointly represents the interactions between a noun's grammatical gender, its meaning, and adjective choice. In accordance with past results, we find a relationship between the gender of nouns and the adjectives which modify them. However, when we control for the meaning of the noun, we find that grammatical gender has a near-zero effect on adjective choice, thereby calling the neo-Whorfian hypothesis into question.","tags":[],"title":"Grammatical Gender's Influence on Distributional Semantics: A Causal Perspective","type":"publication"},{"authors":["Jingyi Sun","Pepa Atanasova","Isabelle Augenstein"],"categories":null,"content":"","date":1718928000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1718928000,"objectID":"b7673f070de45d3a15725f0336ca6d90","permalink":"https://copenlu.github.io/publication/2024_arxiv_sun/","publishdate":"2024-06-21T00:00:00Z","relpermalink":"/publication/2024_arxiv_sun/","section":"publication","summary":"Explaining the decision-making process of machine learning models is crucial for ensuring their reliability and fairness. One popular explanation form highlights key input features, such as i) tokens (e.g., Shapley Values and Integrated Gradients), ii) interactions between tokens (e.g., Bivariate Shapley and Attention-based methods), or iii) interactions between spans of the input (e.g., Louvain Span Interactions). However, these explanation types have only been studied in isolation, making it difficult to judge their respective applicability. To bridge this gap, we propose a unified framework that facilitates a direct comparison between highlight and interactive explanations comprised of four diagnostic properties. Through extensive analysis across these three types of input feature explanations--each utilizing three different explanation techniques--across two datasets and two models, we reveal that each explanation type excels in terms of different diagnostic properties. In our experiments, highlight explanations are the most faithful to a model's prediction, and interactive explanations provide better utility for learning to simulate a model's predictions. These insights further highlight the need for future research to develop combined methods that enhance all diagnostic properties.","tags":[],"title":"A Unified Framework for Input Feature Attribution Analysis","type":"publication"},{"authors":["Amalie Brogaard Pauli","Isabelle Augenstein","Ira Assent"],"categories":null,"content":"","date":1718928000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1718928000,"objectID":"8ccae526ea1ae9b6e024e347c7030911","permalink":"https://copenlu.github.io/publication/2024_arxiv_pauli/","publishdate":"2024-06-21T00:00:00Z","relpermalink":"/publication/2024_arxiv_pauli/","section":"publication","summary":"We are exposed to much information trying to influence us, such as teaser messages, debates, politically framed news, and propaganda - all of which use persuasive language. With the recent interest in Large Language Models (LLMs), we study the ability of LLMs to produce persuasive text. As opposed to prior work which focuses on particular domains or types of persuasion, we conduct a general study across various domains to measure and benchmark to what degree LLMs produce persuasive text - both when explicitly instructed to rewrite text to be more or less persuasive and when only instructed to paraphrase. To this end, we construct a new dataset, Persuasive-Pairs, of pairs each consisting of a short text and of a text rewritten by an LLM to amplify or diminish persuasive language. We multi-annotate the pairs on a relative scale for persuasive language. This data is not only a valuable resource in itself, but we also show that it can be used to train a regression model to predict a score of persuasive language between text pairs. This model can score and benchmark new LLMs across domains, thereby facilitating the comparison of different LLMs. Finally, we discuss effects observed for different system prompts. Notably, we find that different 'personas' in the system prompt of LLaMA3 change the persuasive language in the text substantially, even when only instructed to paraphrase. These findings underscore the importance of investigating persuasive language in LLM generated text.","tags":[],"title":"Measuring and Benchmarking Large Language Models' Capabilities to Generate Persuasive Language","type":"publication"},{"authors":[],"categories":null,"content":" We are recruiting professional fact checkers to take part in an interview and/or a survey about their experiences of fact checking and fact checking technologies.\nIf you are interested in participating in this research (interviews, surveys, or both), please complete the short online form linked below. A member of the research team will then contact you with more information about the study and taking part.\nInterview participants will be offered an online gift voucher to the value of 50 USD as compensation for their time. Participants who complete the survey will be offered an online gift voucher to the value of 15 USD. All personal data you may share will be kept confidential within the research team.\nSign up form\n What’s involved? We are conducting remote interviews (e.g., on Zoom) and online surveys with professional fact-checkers, members of the general public, and other stakeholders in the fact checking sector such as journalists and content moderators.\nThe interviews will be 60 minutes in duration and will take place remotely via Zoom. Interview participants will be offered an online gift voucher to the value of 50 USD as compensation for their time.\nParticipants who agree to take part in the survey will receive a link via email to an online survey in July 2024. The survey will take about 20 minutes to complete. All participants who complete the survey will be offered an online gift voucher to the value of 15 USD.\nAll personal data will be kept confidential within the project team and always anonymised for publications and presentations. This project has received ethical approval from the University of Copenhagen Research Ethics Committee for the Faculty of Science and Faculty of Health and Medical Sciences.\nYou can read the study information sheet here.\nHow do I take part? Simply fill in the online form here, and we will get in touch with you with more information. If you have any questions or would like more information about the project, you can contact Greta Warren at grwa@di.ku.dk.\nWhat is this research about? This research is part of the European Research Council-funded ExplainYourself project (grant agreement ID no. 101077481), which focuses on explainable automatic fact checking. Explainable automatic fact checking involves developing Artificial Intelligence (AI) systems that can detect and correct false information as well as produce explanations about how a system arrived at its prediction that a particular piece of information is true or false.\nThe aim of the current research is to understand what kinds of explanations people require when using or when impacted by automated fact checking systems, and how these information needs may differ between different groups of stakeholders. We seek to ensure that the explanations that these systems provide are truly useful to the people that interact with them.\nResearch Team: Dr Greta Warren Postdoctoral Researcher, Department of Computer Science, University of Copenhagen\nProf. Irina Shklovski Professor, Department of Computer Science, University of Copenhagen\nProf. Isabelle Augenstein (Principal Investigator)\nProfessor, Department of Computer Science, University of Copenhagen\n","date":1717200000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1717200000,"objectID":"4b9a0ee56b300f65b77974b35f8860ee","permalink":"https://copenlu.github.io/talk/2024_05_interviews/","publishdate":"2024-06-01T00:00:00Z","relpermalink":"/talk/2024_05_interviews/","section":"talk","summary":"We are recruiting professional fact checkers to take part in an interview and/or a survey about their experiences of fact checking and fact checking technologies.\nIf you are interested in participating in this research (interviews, surveys, or both), please complete the short online form linked below. A member of the research team will then contact you with more information about the study and taking part.\nInterview participants will be offered an online gift voucher to the value of 50 USD as compensation for their time.","tags":[],"title":"Participate in research on explainable fact checking","type":"talk"},{"authors":["Haeun Yu","Pepa Atanasova","Isabelle Augenstein"],"categories":null,"content":"","date":1716940800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1716940800,"objectID":"9721f43813dfe74726faa3e4c12ff64f","permalink":"https://copenlu.github.io/publication/2024_acl_yu/","publishdate":"2024-05-29T00:00:00Z","relpermalink":"/publication/2024_acl_yu/","section":"publication","summary":"Language Models (LMs) acquire parametric knowledge from their training process, embedding it within their weights. The increasing scalability of LMs, however, poses significant challenges for understanding a model's inner workings and further for updating or correcting this embedded knowledge without the significant cost of retraining. This underscores the importance of unveiling exactly what knowledge is stored and its association with specific model components. Instance Attribution (IA) and Neuron Attribution (NA) offer insights into this training-acquired knowledge, though they have not been compared systematically. Our study introduces a novel evaluation framework to quantify and compare the knowledge revealed by IA and NA. To align the results of the methods we introduce the attribution method NA-Instances to apply NA for retrieving influential training instances, and IA-Neurons to discover important neurons of influential instances discovered by IA. We further propose a comprehensive list of faithfulness tests to evaluate the comprehensiveness and sufficiency of the explanations provided by both methods. Through extensive experiments and analysis, we demonstrate that NA generally reveals more diverse and comprehensive information regarding the LM's parametric knowledge compared to IA. Nevertheless, IA provides unique and valuable insights into the LM's parametric knowledge, which are not revealed by NA. Our findings further suggest the potential of a synergistic approach of combining the diverse findings of IA and NA for a more holistic understanding of an LM's parametric knowledge.","tags":[],"title":"Revealing the Parametric Knowledge of Language Models: A Unified Framework for Attribution Methods","type":"publication"},{"authors":["Nadav Borenstein","Anej Svete","Robin Chan","Josef Valvoda","Franz Nowak","Isabelle Augenstein","Eleanor Chodroff","Ryan Cotterell"],"categories":null,"content":"","date":1716854400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1716854400,"objectID":"7c80665a2dd74493f107a16a2602d7d8","permalink":"https://copenlu.github.io/publication/2024_acl_borenstein/","publishdate":"2024-05-28T00:00:00Z","relpermalink":"/publication/2024_acl_borenstein/","section":"publication","summary":"What can large language models learn? By definition, language models (LM) are distributions over strings. Therefore, an intuitive way of addressing the above question is to formalize it as a matter of learnability of classes of distributions over strings. While prior work in this direction focused on assessing the theoretical limits, in contrast, we seek to understand the empirical learnability. Unlike prior empirical work, we evaluate neural LMs on their home turf-learning probabilistic languages-rather than as classifiers of formal languages. In particular, we investigate the learnability of regular LMs (RLMs) by RNN and Transformer LMs. We empirically test the learnability of RLMs as a function of various complexity parameters of the RLM and the hidden state size of the neural LM. We find that the RLM rank, which corresponds to the size of linear space spanned by the logits of its conditional distributions, and the expected length of sampled strings are strong and significant predictors of learnability for both RNNs and Transformers. Several other predictors also reach significance, but with differing patterns between RNNs and Transformers.","tags":[],"title":"What Languages are Easy to Language-Model? A Perspective from Learning Probabilistic Regular Languages","type":"publication"},{"authors":["Sara Vera Marjanović","Isabelle Augenstein","Christina Lioma"],"categories":null,"content":"","date":1716163200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1716163200,"objectID":"ab46bd10d7dd641b3c00cd79c3583957","permalink":"https://copenlu.github.io/publication/2024_acl_marjanovic/","publishdate":"2024-05-20T00:00:00Z","relpermalink":"/publication/2024_acl_marjanovic/","section":"publication","summary":"Explainable AI methods facilitate the understanding of model behaviour, yet, small, imperceptible perturbations to inputs can vastly distort explanations. As these explanations are typically evaluated holistically, before model deployment, it is difficult to assess when a particular explanation is trustworthy. Some studies have tried to create confidence estimators for explanations, but none have investigated an existing link between uncertainty and explanation quality. We artificially simulate epistemic uncertainty in text input by introducing noise at inference time. In this large-scale empirical study, we insert different levels of noise perturbations and measure the effect on the output of pre-trained language models and different uncertainty metrics. Realistic perturbations have minimal effect on performance and explanations, yet masking has a drastic effect. We find that high uncertainty doesn't necessarily imply low explanation plausibility; the correlation between the two metrics can be moderately positive when noise is exposed during the training process. This suggests that noise-augmented models may be better at identifying salient tokens when uncertain. Furthermore, when predictive and epistemic uncertainty measures are over-confident, the robustness of a saliency map to perturbation can indicate model stability issues. Integrated Gradients shows the overall greatest robustness to perturbation, while still showing model-specific patterns in performance; however, this phenomenon is limited to smaller Transformer-based language models.","tags":[],"title":"Investigating the Impact of Model Instability on Explanations and Uncertainty","type":"publication"},{"authors":["Amelie Wührl","Dustin Wright","Roman Klinger","Isabelle Augenstein"],"categories":null,"content":"","date":1716076800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1716076800,"objectID":"dc352a95d985ce0b845ab053b3429c74","permalink":"https://copenlu.github.io/publication/2024_acl_wuehrl/","publishdate":"2024-05-19T00:00:00Z","relpermalink":"/publication/2024_acl_wuehrl/","section":"publication","summary":"Distorted science communication harms individuals and society as it can lead to unhealthy behavior change and decrease trust in scientific institutions. Given the rapidly increasing volume of science communication in recent years, a fine-grained understanding of how findings from scientific publications are reported to the general public, and methods to detect distortions from the original work automatically, are crucial. Prior work focused on individual aspects of distortions or worked with unpaired data. In this work, we make three foundational contributions towards addressing this problem: (1) annotating 1,600 instances of scientific findings from academic papers paired with corresponding findings as reported in news articles and tweets wrt. four characteristics: causality, certainty, generality and sensationalism; (2) establishing baselines for automatically detecting these characteristics; and (3) analyzing the prevalence of changes in these characteristics in both human-annotated and large-scale unlabeled data. Our results show that scientific findings frequently undergo subtle distortions when reported. Tweets distort findings more often than science news reports. Detecting fine-grained distortions automatically poses a challenging task. In our experiments, fine-tuned task-specific models consistently outperform few-shot LLM prompting.","tags":[],"title":"Understanding Fine-grained Distortions in Reports of Scientific Findings","type":"publication"},{"authors":[],"categories":null,"content":"We are honoured to share that our paper on measuring the fragility of natural language inference models has won an outstanding paper award at EACL 2024. The paper is based on the MSc thesis of Zhaoqi Liu, who was supervised by Isabelle Augenstein and Erik Arakelyan.\nSemantic Sensitivities and Inconsistent Predictions: Measuring the Fragility of NLI Models. Erik Arakelyan, Zhaoqi Liu, Isabelle Augenstein.\n   ","date":1710892800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1710892800,"objectID":"712b3a4922843b5a6e365a70165d378b","permalink":"https://copenlu.github.io/talk/2024_03_eacl/","publishdate":"2024-03-20T00:00:00Z","relpermalink":"/talk/2024_03_eacl/","section":"talk","summary":"We are honoured to share that our paper on measuring the fragility of natural language inference models has won an outstanding paper award at EACL 2024. The paper is based on the MSc thesis of Zhaoqi Liu, who was supervised by Isabelle Augenstein and Erik Arakelyan.\nSemantic Sensitivities and Inconsistent Predictions: Measuring the Fragility of NLI Models. Erik Arakelyan, Zhaoqi Liu, Isabelle Augenstein.\n   ","tags":["explainability"],"title":"Oustanding paper award at EACL 2024","type":"talk"},{"authors":["Veerle C Eijsbroek","Katarina Kjell","H Andrew Schwartz","Jan R Boehnke","Eiko I Fried","Daniel N Klein","Peik Gustafsson","Isabelle Augenstein","Patrick M M Bossuyt","Oscar Kjell"],"categories":null,"content":"","date":1710806400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1710806400,"objectID":"e4b305479b11194949e5592c897d7679","permalink":"https://copenlu.github.io/publication/2024_arxiv_eijsbroek/","publishdate":"2024-03-19T00:00:00Z","relpermalink":"/publication/2024_arxiv_eijsbroek/","section":"publication","summary":"Accurate assessments of symptoms and diagnoses are essential for health research and clinical practice but face many challenges. The absence of a single error-free measure is currently addressed by assessment methods involving experts reviewing several sources of information to achieve a more accurate or best-estimate assessment. Three bodies of work spanning medicine, psychiatry, and psychology propose similar assessment methods: The Expert Panel, the Best-Estimate Diagnosis, and the Longitudinal Expert All Data (LEAD). However, the quality of such best-estimate assessments is typically very difficult to evaluate due to poor reporting of the assessment methods and when it is reported, the reporting quality varies substantially. Here we tackle this gap by developing reporting guidelines for such studies, using a four-stage approach: 1) drafting reporting standards accompanied by rationales and empirical evidence, which were further developed with a patient organization for depression, 2) incorporating expert feedback through a two-round Delphi procedure, 3) refining the guideline based on an expert consensus meeting, and 4) testing the guideline by i) having two researchers test it and ii) using it to examine the extent previously published articles report the standards. The last step also demonstrates the need for the guideline: 18 to 58% (Mean = 33%) of the standards were not reported across fifteen randomly selected studies. The LEADING guideline comprises 20 reporting standards related to four groups: The Longitudinal design; the Appropriate data; the Evaluation – experts, materials, and procedures; and the Validity group. We hope that the LEADING guideline will be useful in assisting researchers in planning, reporting, and evaluating research aiming to achieve best-estimate assessments.","tags":[],"title":"The LEADING Guideline Reporting Standards for Expert Panel, Best-Estimate Diagnosis, and Longitudinal Expert All Data (LEAD) Studies","type":"publication"},{"authors":[],"categories":null,"content":" A PhD and two postdoc positions on natural language understanding are available. The positions are funded by the Pioneer Centre for AI. Read more about reasons to join us here. You can read more about the positions at the Pioneer Centre here.\nPhD Fellowship on Factual Text Generation While recent large language models demonstrate surprising fluency and predictive capabilities in their generated text, they have been demonstrated to generate factual inaccuracies even when they have encoded truthful information. This limits their utility and safety in real world scenarios where guarantees of factuality are needed. To address this, the project will explore methods for improving the factuality of text generation with respect to both objective real-world facts and provided source documents.\nWe are looking for candidates with a background in computer science, machine learning, natural language processing, computational social science, or similar. The candidate should have an interest in automatic text generation and fact checking. They should also have an interest in interdisciplinary research endeavors, including at the Pioneer Center for AI. Early research experience, especially with empirical research methods, or relevant industry experience, will be a bonus.\nThe principal supervisor is Professor Isabelle Augenstein and the co-supervisor is Dustin Wright.\nApplication deadline: 1 April 2024. Apply here.\nPostdoctoral Fellowship on NLP for Computational Social Science The Pioneer Centre for AI and Department of Computer Science at the University of Copenhagen invite applications for a 2-year postdoctoral full-time research position in the domain of Natural Language Processing.\nNLP is becoming an increasingly powerful tool for social scientists. Yet, the intersection between the two disciplines is still poorly explored, with research in the two disciplines often being conducted as separate streams. The goal of this project is to research methods which can more directly be useful for downstream social science applications. One such application is to analyse common narratives in news, which requires methods including (interpretable) topic modelling, framing detection, social media analysis, etc. The successful candidate will be affiliated with a larger initiative on narrative analysis, spanning different content modalities, with the autonomy to define their project in this larger context.\nThe research will be conducted in collaboration with researchers at the Pioneer Centre for Artificial Intelligence\u0026rsquo;s Speech and Language Collaboratory, CopeNLU and the Belongie Lab. Inquiries about the position can be made to Professor Isabelle Augenstein.\nApplication deadline: 7 April 2024. Apply here.\nPostdoctoral Fellowship on Multi-Modal Fact Checking The Pioneer Centre for AI and Department of Computer Science at the University of Copenhagen invite applications for a 2-year postdoctoral full-time research position in the domain of Natural Language Processing.\nOnline content can include multiple different modalities, ranging from text to images or tables. Increasingly, detecting false information requires the understanding of a combination of these modalities and the relationship between them. This project will focus on developing general-purpose multi-modal methods for automatic fact checking in various domains, such as scientific publications, news or social media. Inquiries about the position can be made to Professor Isabelle Augenstein or Assistant Professor Desmond Elliot.\nApplication deadline: 7 April 2024. Apply here.\n","date":1709251200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1709251200,"objectID":"ef826c24eea6b3bc98ed42dbc816541e","permalink":"https://copenlu.github.io/talk/2024_03_positions/","publishdate":"2024-03-01T00:00:00Z","relpermalink":"/talk/2024_03_positions/","section":"talk","summary":"A PhD and two postdoc positions on natural language understanding are available. The positions are funded by the Pioneer Centre for AI. Read more about reasons to join us here. You can read more about the positions at the Pioneer Centre here.\nPhD Fellowship on Factual Text Generation While recent large language models demonstrate surprising fluency and predictive capabilities in their generated text, they have been demonstrated to generate factual inaccuracies even when they have encoded truthful information.","tags":[],"title":"PhD and postdoc positions available at Pioneer Centre for AI","type":"talk"},{"authors":["Nadav Borenstein","Arnav Arora","Lucie-Aimée Kaffee","Isabelle Augenstein"],"categories":null,"content":"","date":1708473600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1708473600,"objectID":"506f8ccdf157900fb0ce9fe9c86c284d","permalink":"https://copenlu.github.io/publication/2024_arxiv_borenstein/","publishdate":"2024-02-21T00:00:00Z","relpermalink":"/publication/2024_arxiv_borenstein/","section":"publication","summary":"Human values play a vital role as an analytical tool in social sciences, enabling the study of diverse dimensions within society as a whole and among individual communities. This paper addresses the limitations of traditional survey-based studies of human values by proposing a computational application of Schwartz's values framework to Reddit, a platform organized into distinct online communities. After ensuring the reliability of automated value extraction tools for Reddit content, we automatically annotate six million posts across 10,000 subreddits with Schwartz values. Our analysis unveils both previously recorded and novel insights into the values prevalent within various online communities. For instance, when examining subreddits with differing opinions on controversial topics, we discover higher universalism values in the Vegan subreddit compared to Carnivores. Additionally, our study of geographically specific subreddits highlights the correlation between traditional values and conservative U.S. states.","tags":[],"title":"Investigating Human Values in Online Communities","type":"publication"},{"authors":["Erik Arakelyan","Zhaoqi Liu","Isabelle Augenstein"],"categories":null,"content":"","date":1706745600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1706745600,"objectID":"3f32adfcf00d41bcd6320da02d2fefd4","permalink":"https://copenlu.github.io/publication/2024_eacl_arakelyan/","publishdate":"2024-02-01T00:00:00Z","relpermalink":"/publication/2024_eacl_arakelyan/","section":"publication","summary":"Recent studies of the emergent capabilities of transformer-based Natural Language Understanding (NLU) models have indicated that they have an understanding of lexical and compositional semantics. We provide evidence that suggests these claims should be taken with a grain of salt: we find that state-of-the-art Natural Language Inference (NLI) models are sensitive towards minor semantics preserving surface-form variations, which lead to sizable inconsistent model decisions during inference. Notably, this behaviour differs from valid and in-depth comprehension of compositional semantics, however does neither emerge when evaluating model accuracy on standard benchmarks nor when probing for syntactic, monotonic, and logically robust reasoning. We propose a novel framework to measure the extent of semantic sensitivity. To this end, we evaluate NLI models on adversarially generated examples containing minor semantics-preserving surface-form input noise. This is achieved using conditional text generation, with the explicit condition that the NLI model predicts the relationship between the original and adversarial inputs as a symmetric equivalence entailment. We systematically study the effects of the phenomenon across NLI models for in- and out-of- domain settings. Our experiments show that semantic sensitivity causes performance degradations of 12.92% and 23.71% average over in- and out-of- domain settings, respectively. We further perform ablation studies, analysing this phenomenon across models, datasets, and variations in inference and show that semantic sensitivity can lead to major inconsistency within model predictions.","tags":[],"title":"Semantic Sensitivities and Inconsistent Predictions: Measuring the Fragility of NLI Models","type":"publication"},{"authors":[],"categories":null,"content":"5 papers by CopeNLU authors are accepted to appear at EMNLP 2023, on topics ranging from explainability to language modelling.\nExplaining Interactions Between Text Spans. Sagnik Ray Choudhury, Pepa Atanasova, Isabelle Augenstein.\nWhy Should This Article Be Deleted? Transparent Stance Detection in Multilingual Wikipedia Editor Discussions. Lucie-Aimée Kaffee, Arnav Arora, Isabelle Augenstein.\nThorny Roses: Investigating the Dual Use Dilemma in Natural Language Processing. Lucie-Aimée Kaffee, Arnav Arora, Zeerak Talat, Isabelle Augenstein.\nPeople Make Better Edits: Measuring the Efficacy of LLM-Generated Counterfactually Augmented Data for Harmful Language Detection. Indira Sen, Dennis Assenmacher, Mattia Samory, Wil van der Aalst, Isabelle Augenstein, Claudia Wagner.\nPHD: Pixel-Based Language Modeling of Historical Documents. Nadav Borenstein, Philipp Rust, Desmond Elliott, Isabelle Augenstein.\n","date":1701734400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1701734400,"objectID":"fe25dc9cc25da7cf7ddcb2440acc273c","permalink":"https://copenlu.github.io/talk/2023_12_emnlp/","publishdate":"2023-12-05T00:00:00Z","relpermalink":"/talk/2023_12_emnlp/","section":"talk","summary":"5 papers by CopeNLU authors are accepted to appear at EMNLP 2023, on topics ranging from explainability to language modelling.\nExplaining Interactions Between Text Spans. Sagnik Ray Choudhury, Pepa Atanasova, Isabelle Augenstein.\nWhy Should This Article Be Deleted? Transparent Stance Detection in Multilingual Wikipedia Editor Discussions. Lucie-Aimée Kaffee, Arnav Arora, Isabelle Augenstein.\nThorny Roses: Investigating the Dual Use Dilemma in Natural Language Processing. Lucie-Aimée Kaffee, Arnav Arora, Zeerak Talat, Isabelle Augenstein.","tags":["explainability","limited-data","fact-checking","gender-bias","multilingual-learning"],"title":"5 Papers Accepted to EMNLP 2023","type":"talk"},{"authors":["Yevgeniy Golovchenko","Karolina Stańczak","Rebecca Adler-Nissen","Patrice Wangen","Isabelle Augenstein"],"categories":null,"content":"","date":1701216000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1701216000,"objectID":"d7b734ec8ac2447e6cd1e31835eda955","permalink":"https://copenlu.github.io/publication/2023_arxiv_golovchenko/","publishdate":"2023-11-29T00:00:00Z","relpermalink":"/publication/2023_arxiv_golovchenko/","section":"publication","summary":"Despite mounting evidence that women in foreign policy often bear the brunt of online hostility, the extent of online gender bias against diplomats remains unexplored. This paper offers the first global analysis of the treatment of women diplomats on social media. Introducing a multidimensional and multilingual methodology for studying online gender bias, it focuses on three critical elements: gendered language, negativity in tweets directed at diplomats, and the visibility of women diplomats. Our unique dataset encompasses ambassadors from 164 countries, their tweets, and the direct responses to these tweets in 65 different languages. Using automated content and sentiment analysis, our findings reveal a crucial gender bias. The language in responses to diplomatic tweets is only mildly gendered and largely pertains to international affairs and, generally, women ambassadors do not receive more negative reactions to their tweets than men, yet the pronounced discrepancy in online visibility stands out as a significant form of gender bias. Women receive a staggering 66.4% fewer retweets than men. By unraveling the invisibility that obscures women diplomats on social media, we hope to spark further research on online bias in international politics.","tags":[],"title":"Invisible Women in Digital Diplomacy: A Multidimensional Framework for Online Gender Bias Against Women Ambassadors Worldwide","type":"publication"},{"authors":["Karolina Stańczak","Sagnik Ray Choudhury","Tiago Pimentel","Ryan Cotterell","Isabelle Augenstein"],"categories":null,"content":"","date":1699574400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1699574400,"objectID":"9a705d64c26bb8fe43cf17ca023cef3c","permalink":"https://copenlu.github.io/publication/2023_plosone_stanczak/","publishdate":"2023-11-10T00:00:00Z","relpermalink":"/publication/2023_plosone_stanczak/","section":"publication","summary":"While the prevalence of large pre-trained language models has led to significant improvements in the performance of NLP systems, recent research has demonstrated that these models inherit societal biases extant in natural language. In this paper, we explore a simple method to probe pre-trained language models for gender bias, which we use to effect a multi-lingual study of gender bias towards politicians. We construct a dataset of 250k politicians from most countries in the world and quantify adjective and verb usage around those politicians' names as a function of their gender. We conduct our study in 7 languages across 6 different language modeling architectures. Our results demonstrate that stance towards politicians in pre-trained language models is highly dependent on the language used. Finally, contrary to previous findings, our study suggests that larger language models do not tend to be significantly more gender-biased than smaller ones.","tags":[],"title":"Quantifying Gender Bias Towards Politicians in Cross-Lingual Language Models","type":"publication"},{"authors":[],"categories":null,"content":"5 papers by CopeNLU authors are accepted to appear at EMNLP 2023, on topics including factuality and probing for bias.\nSocial Bias Probing: Fairness Benchmarking for Language Models. Marta Marchiori Manerba, Karolina Stańczak, Riccardo Guidotti, Isabelle Augenstein.\nCan Transformers Learn n-gram Language Models?. Anej Svete, Nadav Borenstein, Mike Zhou, Isabelle Augenstein, Ryan Cotterell.\nDYNAMICQA: Tracing Internal Knowledge Conflicts in Language Models. Sara Vera Marjanović, Haeun Yu, Pepa Atanasova, Maria Maistro, Maria Maistro, Christina Lioma, Isabelle Augenstein.\nRevealing Fine-Grained Values and Opinions in Large Language Models. Dustin Wright, Arnav Arora, Nadav Borenstein, Serge Belognie, Isabelle Augenstein.\nFactcheck-Bench: Fine-Grained Evaluation Benchmark for Automatic Fact-Checkers. Yuxia Wang, Revanth Gangi Reddy, Zain Muhammad Mujahid, Arnav Arora, Aleksandr Rubashevskii , Jiahui Geng, Osama Mohammed Afzal, Liangming Pan, Nadav Borenstein, Aditya Pillai, Isabelle Augenstein, Iryna Gurevych , Preslav Nakov.\n","date":1699315200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1699315200,"objectID":"160b5433775bf590d83c8627ba055bf4","permalink":"https://copenlu.github.io/talk/2024_11_emnlp/","publishdate":"2023-11-07T00:00:00Z","relpermalink":"/talk/2024_11_emnlp/","section":"talk","summary":"5 papers by CopeNLU authors are accepted to appear at EMNLP 2023, on topics including factuality and probing for bias.\nSocial Bias Probing: Fairness Benchmarking for Language Models. Marta Marchiori Manerba, Karolina Stańczak, Riccardo Guidotti, Isabelle Augenstein.\nCan Transformers Learn n-gram Language Models?. Anej Svete, Nadav Borenstein, Mike Zhou, Isabelle Augenstein, Ryan Cotterell.\nDYNAMICQA: Tracing Internal Knowledge Conflicts in Language Models. Sara Vera Marjanović, Haeun Yu, Pepa Atanasova, Maria Maistro, Maria Maistro, Christina Lioma, Isabelle Augenstein.","tags":["explainability","limited-data","fact-checking","gender-bias"],"title":"5 Papers Accepted to EMNLP 2024","type":"talk"},{"authors":[],"categories":null,"content":"A PhD fellowship on explainable natural language understanding is available in CopeNLU. The successful candidate will be supervised by Isabelle Augenstein and Pepa Atanasova. The positions are offered in the context of an ERC Starting Grant on \u0026lsquo;Explainable and Robust Automatic Fact Checking (ExplainYourself)\u0026rsquo;. ERC Starting Grant is a highly competitive funding program by the European Research Council to support the most talented early-career scientists in Europe with funding for a period of 5 years for blue-skies research to build up or expand their research groups.\nExplainYourself proposes to study explainable automatic fact checking, the task of automatically predicting the veracity of textual claims using machine learning (ML) methods, while also producing explanations about how the model arrived at the prediction. Automatic fact checking methods often use opaque deep neural network models, whose inner workings cannot easily be explained. Especially for complex tasks such as automatic fact checking, this hinders greater adoption, as it is unclear to users when the models\u0026rsquo; predictions can be trusted. Existing explainable ML methods partly overcome this by reducing the task of explanation generation to highlighting the right rationale. While a good first step, this does not fully explain how a ML model arrived at a prediction. For knowledge intensive natural language understanding (NLU) tasks such as fact checking, a ML model needs to learn complex relationships between the claim, multiple evidence documents, and common sense knowledge in addition to retrieving the right evidence. There is currently no explainability method that aims to illuminate this highly complex process. In addition, existing approaches are unable to produce diverse explanations, geared towards users with different information needs. ExplainYourself radically departs from existing work in proposing methods for explainable fact checking that more accurately reflect how fact checking models make decisions, and are useful to diverse groups of end users. It is expected that these innovations will apply to explanation generation for other knowledge-intensive NLU tasks, such as question answering or entity linking.\nIn addition to the principle investigator, PhD students and postdocs, the project team will also include collaborators from CopeNLU as well as external collaborators. Two PhD students as well as a postdoc have already been recruited as a result of earlier calls, and the project officially kicked off in September 2023.\nRead more about reasons to join us here. You can read more about the position and apply here.\n","date":1699315200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1699315200,"objectID":"a3af06e82ab1aed3568572014123c0d2","permalink":"https://copenlu.github.io/talk/2023_11_erc/","publishdate":"2023-11-07T00:00:00Z","relpermalink":"/talk/2023_11_erc/","section":"talk","summary":"A PhD fellowship on explainable natural language understanding is available in CopeNLU. The successful candidate will be supervised by Isabelle Augenstein and Pepa Atanasova. The positions are offered in the context of an ERC Starting Grant on \u0026lsquo;Explainable and Robust Automatic Fact Checking (ExplainYourself)\u0026rsquo;. ERC Starting Grant is a highly competitive funding program by the European Research Council to support the most talented early-career scientists in Europe with funding for a period of 5 years for blue-skies research to build up or expand their research groups.","tags":[],"title":"PhD position available in context of ERC Starting Grant project ExplainYourself","type":"talk"},{"authors":["Indira Sen","Dennis Assenmacher","Mattia Samory","Isabelle Augenstein","Wil van der Aalst","Claudia Wagner"],"categories":null,"content":"","date":1698969600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1698969600,"objectID":"de54e4df3baf9ff963ddd25c2c82c745","permalink":"https://copenlu.github.io/publication/2023_emnlp_sen/","publishdate":"2023-11-03T00:00:00Z","relpermalink":"/publication/2023_emnlp_sen/","section":"publication","summary":"NLP models are used in a variety of critical social computing tasks, such as detecting sexist, racist, or otherwise hateful content. Therefore, it is imperative that these models are robust to spurious features. Past work has attempted to tackle such spurious features using training data augmentation, including Counterfactually Augmented Data (CADs). CADs introduce minimal changes to existing training data points and flip their labels; training on them may reduce model dependency on spurious features. However, manually generating CADs can be time-consuming and expensive. Hence in this work, we assess if this task can be automated using generative NLP models. We automatically generate CADs using Polyjuice, ChatGPT, and Flan-T5, and evaluate their usefulness in improving model robustness compared to manually-generated CADs. By testing both model performance on multiple out-of-domain test sets and individual data point efficacy, our results show that while manual CADs are still the most effective, CADs generated by ChatGPT come a close second. One key reason for the lower performance of automated methods is that the changes they introduce are often insufficient to flip the original label.","tags":[],"title":"People Make Better Edits: Measuring the Efficacy of LLM-Generated Counterfactually Augmented Data for Harmful Language Detection","type":"publication"},{"authors":["Nadav Borenstein","Phillip Rust","Desmond Elliott","Isabelle Augenstein"],"categories":null,"content":"","date":1698710400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1698710400,"objectID":"c98e2dff04027bd8a29db35ad22e3f24","permalink":"https://copenlu.github.io/publication/2023_emnlp_borenstein/","publishdate":"2023-10-31T00:00:00Z","relpermalink":"/publication/2023_emnlp_borenstein/","section":"publication","summary":"The digitisation of historical documents has provided historians with unprecedented research opportunities. Yet, the conventional approach to analysing historical documents involves converting them from images to text using OCR, a process that overlooks the potential benefits of treating them as images and introduces high levels of noise. To bridge this gap, we take advantage of recent advancements in pixel-based language models trained to reconstruct masked patches of pixels instead of predicting token distributions. Due to the scarcity of real historical scans, we propose a novel method for generating synthetic scans to resemble real historical documents. We then pre-train our model, PHD, on a combination of synthetic scans and real historical newspapers from the 1700-1900 period. Through our experiments, we demonstrate that PHD exhibits high proficiency in reconstructing masked image patches and provide evidence of our model's noteworthy language understanding capabilities. Notably, we successfully apply our model to a historical QA task, highlighting its usefulness in this domain.","tags":[],"title":"PHD: Pixel-Based Language Modeling of Historical Documents","type":"publication"},{"authors":["Sagnik Ray Choudhury","Pepa Atanasova","Isabelle Augenstein"],"categories":null,"content":"","date":1698105600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1698105600,"objectID":"90778ba30a99d762b37fd8fb5636e20d","permalink":"https://copenlu.github.io/publication/2023_emnlp_choudhury/","publishdate":"2023-10-24T00:00:00Z","relpermalink":"/publication/2023_emnlp_choudhury/","section":"publication","summary":"Reasoning over spans of tokens from different parts of the input is essential for natural language understanding (NLU) tasks such as fact-checking (FC), machine reading comprehension (MRC) or natural language inference (NLI). However, existing highlight-based explanations primarily focus on identifying individual important tokens or interactions only between adjacent tokens or tuples of tokens. Most notably, there is a lack of annotations capturing the human decision-making process w.r.t. the necessary interactions for informed decision-making in such tasks. To bridge this gap, we introduce SpanEx, a multi-annotator dataset of human span interaction explanations for two NLU tasks: NLI and FC. We then investigate the decision-making processes of multiple fine-tuned large language models in terms of the employed connections between spans in separate parts of the input and compare them to the human reasoning processes. Finally, we present a novel community detection based unsupervised method to extract such interaction explanations from a model's inner workings.","tags":[],"title":"Explaining Interactions Between Text Spans","type":"publication"},{"authors":["Lucie-Aimée Kaffee","Arnav Arora","Zeerak Talat","Isabelle Augenstein"],"categories":null,"content":"","date":1698019200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1698019200,"objectID":"90026581a883d2c710eed9983fc0a35f","permalink":"https://copenlu.github.io/publication/2023_emnlp_kaffee-dual/","publishdate":"2023-10-23T00:00:00Z","relpermalink":"/publication/2023_emnlp_kaffee-dual/","section":"publication","summary":"Dual use, the intentional, harmful reuse of technology and scientific artefacts, is a problem yet to be well-defined within the context of Natural Language Processing (NLP). However, as NLP technologies continue to advance and become increasingly widespread in society, their inner workings have become increasingly opaque. Therefore, understanding dual use concerns and potential ways of limiting them is critical to minimising the potential harms of research and development. In this paper, we conduct a survey of NLP researchers and practitioners to understand the depth and their perspective of the problem as well as to assess existing available support. Based on the results of our survey, we offer a definition of dual use that is tailored to the needs of the NLP community. The survey revealed that a majority of researchers are concerned about the potential dual use of their research but only take limited action toward it. In light of the survey results, we discuss the current state and potential means for mitigating dual use in NLP and propose a checklist that can be integrated into existing conference ethics-frameworks, e.g., the ACL ethics checklist.","tags":[],"title":"Thorny Roses: Investigating the Dual Use Dilemma in Natural Language Processing","type":"publication"},{"authors":["Lucie-Aimée Kaffee","Arnav Arora","Isabelle Augenstein"],"categories":null,"content":"","date":1698019200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1698019200,"objectID":"3124166c1cf3e2acda50ecf6001902f1","permalink":"https://copenlu.github.io/publication/2023_emnlp_kaffee-wikipedia/","publishdate":"2023-10-23T00:00:00Z","relpermalink":"/publication/2023_emnlp_kaffee-wikipedia/","section":"publication","summary":"The moderation of content on online platforms is usually non-transparent. On Wikipedia, however, this discussion is carried out publicly and the editors are encouraged to use the content moderation policies as explanations for making moderation decisions. Currently, only a few comments explicitly mention those policies -- 20% of the English ones, but as few as 2% of the German and Turkish comments. To aid in this process of understanding how content is moderated, we construct a novel multilingual dataset of Wikipedia editor discussions along with their reasoning in three languages. The dataset contains the stances of the editors (keep, delete, merge, comment), along with the stated reason, and a content moderation policy, for each edit decision. We demonstrate that stance and corresponding reason (policy) can be predicted jointly with a high degree of accuracy, adding transparency to the decision-making process. We release both our joint prediction models and the multilingual content moderation dataset for further research on automated transparent content moderation.","tags":[],"title":"Why Should This Article Be Deleted? Transparent Stance Detection in Multilingual Wikipedia Editor Discussions","type":"publication"},{"authors":["Erik Arakelyan","Pasquale Minervini","Isabelle Augenstein"],"categories":null,"content":"","date":1695254400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1695254400,"objectID":"b743b28f2a9d65ceee1abf0e88ce8cbd","permalink":"https://copenlu.github.io/publication/2023_neurips_arakelyan/","publishdate":"2023-09-21T00:00:00Z","relpermalink":"/publication/2023_neurips_arakelyan/","section":"publication","summary":"Answering complex queries on incomplete knowledge graphs is a challenging task where a model needs to answer complex logical queries in the presence of missing knowledge. Recently, Arakelyan et al. (2021); Minervini et al. (2022) showed that neural link predictors could also be used for answering complex queries: their Continuous Query Decomposition (CQD) method works by decomposing complex queries into atomic sub-queries, answers them using neural link predictors and aggregates their scores via t-norms for ranking the answers to each complex query. However, CQD does not handle negations and only uses the training signal from atomic training queries: neural link prediction scores are not calibrated to interact together via fuzzy logic t-norms during complex query answering. In this work, we propose to address this problem by training a parameter-efficient score adaptation model to re-calibrate neural link prediction scores: this new component is trained on complex queries by back-propagating through the complex query-answering process. Our method, CQDA, produces significantly more accurate results than current state-of-the-art methods, improving from 34.4 to 35.1 Mean Reciprocal Rank values averaged across all datasets and query types while using ≤35% of the available training query types. We further show that CQDA is data-efficient, achieving competitive results with only 1% of the training data, and robust in out-of-domain evaluations.","tags":[],"title":"Adapting Neural Link Predictors for Complex Query Answering","type":"publication"},{"authors":[],"categories":null,"content":"On 1 September 2023, the ERC Starting Grant project ExplainYourself on \u0026lsquo;Explainable and Robust Automatic Fact Checking\u0026rsquo; is officially kicking off. ERC Starting Grant is a highly competitive fellowship programme by the European Research Council to support talented early-career scientists who show potential to be a research leader. It provides funding of blue-skies research for a period of up to 5 years.\nExplainYourself proposes to study explainable automatic fact checking, the task of automatically predicting the veracity of textual claims using machine learning (ML) methods, while also producing explanations about how the model arrived at the prediction. Automatic fact checking methods often use opaque deep neural network models, whose inner workings cannot easily be explained. Especially for complex tasks such as automatic fact checking, this hinders greater adoption, as it is unclear to users when the models’ predictions can be trusted. Existing explainable ML methods partly overcome this by reducing the task of explanation generation to highlighting the right rationale. While a good first step, this does not fully explain how a ML model arrived at a prediction. For knowledge intensive natural language understanding (NLU) tasks such as fact checking, a ML model needs to learn complex relationships between the claim, multiple evidence documents, and common sense knowledge in addition to retrieving the right evidence. There is currently no explainability method that aims to illuminate this highly complex process. In addition, existing approaches are unable to produce diverse explanations, geared towards users with different information needs. ExplainYourself radically departs from existing work in proposing methods for explainable fact checking that more accurately reflect how fact checking models make decisions, and are useful to diverse groups of end users. It is expected that these innovations will apply to explanation generation for other knowledge-intensive NLU tasks, such as question answering or entity linking.\nThe following researchers affiliated with the ExplainYourself project are joining CopeNLU on 1 September 2023:\n Haeun Yu (PhD Student), whose main research interests include enhancing explainability in fact-checking and transparency of knowledge-enhanced LM; Jingyi Sun (PhD student), whose research interests include explainability, fact-checking, and question answering.  They will both be supervised by Isabelle Augenstein and Pepa Atanasova. A postdoctoral researcher with a focus on human-centered explainability methods for fact checking is expected to join the team in Spring 2024, and there will soon be openings for further positions for a start in autumn 2024.\n","date":1693526400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1693526400,"objectID":"82503587e034816da693e6efb0d12804","permalink":"https://copenlu.github.io/talk/2023_09_explainyourself/","publishdate":"2023-09-01T00:00:00Z","relpermalink":"/talk/2023_09_explainyourself/","section":"talk","summary":"On 1 September 2023, the ERC Starting Grant project ExplainYourself on \u0026lsquo;Explainable and Robust Automatic Fact Checking\u0026rsquo; is officially kicking off. ERC Starting Grant is a highly competitive fellowship programme by the European Research Council to support talented early-career scientists who show potential to be a research leader. It provides funding of blue-skies research for a period of up to 5 years.\nExplainYourself proposes to study explainable automatic fact checking, the task of automatically predicting the veracity of textual claims using machine learning (ML) methods, while also producing explanations about how the model arrived at the prediction.","tags":[],"title":"ExplainYourself Project Kick-Off","type":"talk"},{"authors":["Andreas Nugaard Holm","Dustin Wright","Isabelle Augenstein"],"categories":null,"content":"","date":1687132800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1687132800,"objectID":"f6b429ecb6ebf1d40a0fb76251c02d5b","permalink":"https://copenlu.github.io/publication/2022_information_holm/","publishdate":"2023-06-19T00:00:00Z","relpermalink":"/publication/2022_information_holm/","section":"publication","summary":"Uncertainty approximation in text classification is an important area with applications in domain adaptation and interpretability. One of the most widely used uncertainty approximation methods is Monte Carlo (MC) Dropout, which is computationally expensive as it requires multiple forward passes through the model. A cheaper alternative is to simply use the softmax based on a single forward pass without dropout to estimate model uncertainty. However, prior work has indicated that these predictions tend to be overconfident. In this paper, we perform a thorough empirical analysis of these methods on five datasets with two base neural architectures in order to identify the trade-offs between the two. We compare both softmax and an efficient version of MC Dropout on their uncertainty approximations and downstream text classification performance, while weighing their runtime (cost) against performance (benefit). We find that, while MC dropout produces the best uncertainty approximations, using a simple softmax leads to competitive and in some cases better uncertainty estimation for text classification at a much lower computational cost, suggesting that softmax can in fact be a sufficient uncertainty estimate when computational resources are a concern.","tags":[],"title":"Revisiting Softmax for Uncertainty Approximation in Text Classification","type":"publication"},{"authors":["Arnav Arora","Preslav Nakov","Vibha Nayak","Kyle Dent","Ameya Bhatawdekar","Sheikh Muhammad Sarwar","Momchil Hardalov","Yoan Dinkov","Dimitrina Zlatkova","Guillaume Bouchard","Isabelle Augenstein"],"categories":null,"content":"","date":1686009600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1686009600,"objectID":"83d34e2ba951d5baaa05f315facc64de","permalink":"https://copenlu.github.io/publication/2023_csur_arora/","publishdate":"2023-06-06T00:00:00Z","relpermalink":"/publication/2023_csur_arora/","section":"publication","summary":"The proliferation of harmful content on online platforms is a major societal problem, which comes in many different forms including hate speech, offensive language, bullying and harassment, misinformation, spam, violence, graphic content, sexual abuse, self harm, and many other. Online platforms seek to moderate such content to limit societal harm, to comply with legislation, and to create a more inclusive environment for their users. Researchers have developed different methods for automatically detecting harmful content, often focusing on specific sub-problems or on narrow communities, as what is considered harmful often depends on the platform and on the context. We argue that there is currently a dichotomy between what types of harmful content online platforms seek to curb, and what research efforts there are to automatically detect such content. We thus survey existing methods as well as content moderation policies by online platforms in this light and we suggest directions for future work.","tags":[],"title":"Detecting Harmful Content on Online Platforms: What Platforms Need vs. Where Research Efforts Go","type":"publication"},{"authors":[],"categories":null,"content":"4 papers by CopeNLU authors are accepted to appear at ACL 2023. The papers make contributions within faithfulness of explanations, measuring intersectional biases, event extraction and few-shot stance detection.\nTopic-Guided Sampling For Data-Efficient Multi-Domain Stance Detection. Erik Arakelyan, Arnav Arora, Isabelle Augenstein.\nFaithfulness Tests for Natural Language Explanations. Pepa Atanasova, Oana-Maria Camburu, Christina Lioma, Thomas Lukasiewicz, Jakob Grue Simonsen, Isabelle Augenstein.\nMeasuring Intersectional Biases in Historical Documents. Nadav Borenstein, Karolina Stańczak, Thea Rolskov, Natacha Klein Käfer, Natália da Silva Perez, Isabelle Augenstein.\nMultilingual Event Extraction from Historical Newspaper Adverts. Nadav Borenstein, Natália da Silva Perez, Isabelle Augenstein.\n","date":1684713600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1684713600,"objectID":"22a08c3f66eb465879fb9fa8a7121d3a","permalink":"https://copenlu.github.io/talk/2023_05_acl/","publishdate":"2023-05-22T00:00:00Z","relpermalink":"/talk/2023_05_acl/","section":"talk","summary":"4 papers by CopeNLU authors are accepted to appear at ACL 2023. The papers make contributions within faithfulness of explanations, measuring intersectional biases, event extraction and few-shot stance detection.\nTopic-Guided Sampling For Data-Efficient Multi-Domain Stance Detection. Erik Arakelyan, Arnav Arora, Isabelle Augenstein.\nFaithfulness Tests for Natural Language Explanations. Pepa Atanasova, Oana-Maria Camburu, Christina Lioma, Thomas Lukasiewicz, Jakob Grue Simonsen, Isabelle Augenstein.\nMeasuring Intersectional Biases in Historical Documents. Nadav Borenstein, Karolina Stańczak, Thea Rolskov, Natacha Klein Käfer, Natália da Silva Perez, Isabelle Augenstein.","tags":["explainability","fact-checking","limited-data","multilingual-learning","question-answering","knowledge-bases"],"title":"4 Papers Accepted to ACL 2023","type":"talk"},{"authors":["Pepa Atanasova","Oana-Maria Camburu","Christina Lioma","Thomas Lukasiewicz","Jakob Grue Simonsen","Isabelle Augenstein"],"categories":null,"content":"","date":1684540800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1684540800,"objectID":"1398c0cb98c03f482d81b0493a0773dc","permalink":"https://copenlu.github.io/publication/2023_acl_atanasova/","publishdate":"2023-05-20T00:00:00Z","relpermalink":"/publication/2023_acl_atanasova/","section":"publication","summary":"Explanations of neural models aim to reveal a model's decision-making process for its predictions. However, recent work shows that current methods giving explanations such as saliency maps or counterfactuals can be misleading, as they are prone to present reasons that are unfaithful to the model's inner workings. This work explores the challenging question of evaluating the faithfulness of natural language explanations (NLEs). To this end, we present two tests. First, we propose an adversarial input editor for inserting reasons that lead to counterfactual predictions but are not reflected by the NLEs. Second, we reconstruct inputs from the reasons stated in the generated NLEs and check how often they lead to the same predictions. Our tests can evaluate emerging NLE models, proving a fundamental tool in the development of faithful NLEs.","tags":[],"title":"Faithfulness Tests for Natural Language Explanations","type":"publication"},{"authors":["Nadav Borenstein","Karolina Stańczak","Thea Rolskov","Natacha Klein Käfer","Natália da Silva Perez","Isabelle Augenstein"],"categories":null,"content":"","date":1684540800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1684540800,"objectID":"d723225952fd996cfd1fc1cd17a8e3ca","permalink":"https://copenlu.github.io/publication/2023_acl_borenstein_measuring/","publishdate":"2023-05-20T00:00:00Z","relpermalink":"/publication/2023_acl_borenstein_measuring/","section":"publication","summary":"Data-driven analyses of biases in historical texts can help illuminate the origin and development of biases prevailing in modern society. However, digitised historical documents pose a challenge for NLP practitioners as these corpora suffer from errors introduced by optical character recognition (OCR) and are written in an archaic language. In this paper, we investigate the continuities and transformations of bias in historical newspapers published in the Caribbean during the colonial era (18th to 19th centuries). Our analyses are performed along the axes of gender, race, and their intersection. We examine these biases by conducting a temporal study in which we measure the development of lexical associations using distributional semantics models and word embeddings. Further, we evaluate the effectiveness of techniques designed to process OCR-generated data and assess their stability when trained on and applied to the noisy historical newspapers. We find that there is a trade-off between the stability of the word embeddings and their compatibility with the historical dataset. We provide evidence that gender and racial biases are interdependent, and their intersection triggers distinct effects. These findings align with the theory of intersectionality, which stresses that biases affecting people with multiple marginalised identities compound to more than the sum of their constituents.","tags":[],"title":"Measuring Intersectional Biases in Historical Documents","type":"publication"},{"authors":["Nadav Borenstein","Natália da Silva Perez","Isabelle Augenstein"],"categories":null,"content":"","date":1684540800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1684540800,"objectID":"dfb5db5c36f4544539143cfa06127685","permalink":"https://copenlu.github.io/publication/2023_acl_borenstein_multilingual/","publishdate":"2023-05-20T00:00:00Z","relpermalink":"/publication/2023_acl_borenstein_multilingual/","section":"publication","summary":"NLP methods can aid historians in analyzing textual materials in greater volumes than manually feasible. Developing such methods poses substantial challenges though. First, acquiring large, annotated historical datasets is difficult, as only domain experts can reliably label them. Second, most available off-the-shelf NLP models are trained on modern language texts, rendering them significantly less effective when applied to historical corpora. This is particularly problematic for less well studied tasks, and for languages other than English. This paper addresses these challenges while focusing on the under-explored task of event extraction from a novel domain of historical texts. We introduce a new multilingual dataset in English, French, and Dutch composed of newspaper ads from the early modern colonial period reporting on enslaved people who liberated themselves from enslavement. We find that: 1) even with scarce annotated data, it is possible to achieve surprisingly good results by formulating the problem as an extractive QA task and leveraging existing datasets and models for modern languages; and 2) cross-lingual low-resource learning for historical languages is highly challenging, and machine translation of the historical datasets to the considered target languages is, in practice, often the best-performing solution.","tags":[],"title":"Multilingual Event Extraction from Historical Newspaper Adverts","type":"publication"},{"authors":["Erik Arakelyan","Arnav Arora","Isabelle Augenstein"],"categories":null,"content":"","date":1684540800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1684540800,"objectID":"b59ef48fef25cdc13a34ad48f1c719e9","permalink":"https://copenlu.github.io/publication/2023_acl_arakelyan/","publishdate":"2023-05-20T00:00:00Z","relpermalink":"/publication/2023_acl_arakelyan/","section":"publication","summary":"The task of Stance Detection is concerned with identifying the attitudes expressed by an author towards a target of interest. This task spans a variety of domains ranging from social media opinion identification to detecting the stance for a legal claim. However, the framing of the task varies within these domains, in terms of the data collection protocol, the label dictionary and the number of available annotations. Furthermore, these stance annotations are significantly imbalanced on a per-topic and inter-topic basis. These make multi-domain stance detection a challenging task, requiring standardization and domain adaptation. To overcome this challenge, we propose Topic Efficient StancE Detection (TESTED), consisting of a topic-guided diversity sampling technique and a contrastive objective that is used for fine-tuning a stance classifier. We evaluate the method on an existing benchmark of 16 datasets with in-domain, i.e. all topics seen and out-of-domain, i.e. unseen topics, experiments. The results show that the method outperforms the state-of-the-art with an average of 3.5 F1 points increase in-domain, and is more generalizable with an averaged 10.2 F1 on out-of-domain evaluation while using 10% of the training data. We show that our sampling technique  mitigates both inter- and per-topic class imbalances. Finally, our analysis demonstrates that the contrastive learning objective allows the model for a more pronounced segmentation of samples with varying labels.","tags":[],"title":"Topic-Guided Sampling For Data-Efficient Multi-Domain Stance Detection","type":"publication"},{"authors":["Sandra Martinková","Karolina Stańczak","Isabelle Augenstein"],"categories":null,"content":"","date":1682899200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1682899200,"objectID":"b3c3a0beb252fca5e33e2217aa61130c","permalink":"https://copenlu.github.io/publication/2023_slavnlp_stanczak/","publishdate":"2023-05-01T00:00:00Z","relpermalink":"/publication/2023_slavnlp_stanczak/","section":"publication","summary":"Pre-trained language models have been known to perpetuate biases from the underlying datasets to downstream tasks. However, these findings are predominantly based on monolingual language models for English, whereas there are few investigative studies of biases encoded in language models for languages beyond English. In this paper, we fill this gap by analysing gender bias in West Slavic language models. We introduce the first template-based dataset in Czech, Polish, and Slovak for measuring gender bias towards male, female and non-binary subjects. We complete the sentences using both mono- and multilingual language models and assess their suitability for the masked language modelling objective. Next, we measure gender bias encoded in West Slavic language models by quantifying the toxicity and genderness of the generated words. We find that these language models produce hurtful completions that depend on the subject’s gender. Perhaps surprisingly, Czech, Slovak, and Polish language models produce more hurtful completions with men as subjects, which, upon inspection, we find is due to completions being related to violence, death, and sickness.","tags":[],"title":"Measuring Gender Bias in West Slavic Language Models","type":"publication"},{"authors":["Arnav Arora","Lucie-Aimée Kaffee","Isabelle Augenstein"],"categories":null,"content":"","date":1682899200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1682899200,"objectID":"b531663471753575c94370ebfb526f1f","permalink":"https://copenlu.github.io/publication/2023_c3nlp_arora/","publishdate":"2023-05-01T00:00:00Z","relpermalink":"/publication/2023_c3nlp_arora/","section":"publication","summary":"Language embeds information about social, cultural, and political values people hold. Prior work has explored social and potentially harmful biases encoded in Pre-Trained Language models (PTLMs). However, there has been no systematic study investigating how values embedded in these models vary across cultures. In this paper, we introduce probes to study which values across cultures are embedded in these models, and whether they align with existing theories and cross-cultural value surveys. We find that PTLMs capture differences in values across cultures, but those only weakly align with established value surveys. We discuss implications of using mis-aligned models in cross-cultural settings, as well as ways of aligning PTLMs with value surveys.","tags":[],"title":"Probing Pre-Trained Language Models for Cross-Cultural Differences in Values","type":"publication"},{"authors":["Dustin Wright","Isabelle Augenstein"],"categories":null,"content":"","date":1671408000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1671408000,"objectID":"afede4a13ead321c853b565fb8b35ef6","permalink":"https://copenlu.github.io/publication/2022_arxiv_wright/","publishdate":"2022-12-19T00:00:00Z","relpermalink":"/publication/2022_arxiv_wright/","section":"publication","summary":"Selecting an effective training signal for tasks in natural language processing is difficult: expert annotations are expensive, and crowd-sourced annotations may not be reliable. At the same time, recent work in NLP has demonstrated that learning from a distribution over labels acquired from crowd annotations can be effective. However, there are many ways to acquire such a distribution, and the performance allotted by any one method can fluctuate based on the task and the amount of available crowd annotations, making it difficult to know a priori which distribution is best. This paper systematically analyzes this in the out-of-domain setting, adding to the NLP literature which has focused on in-domain evaluation, and proposes new methods for acquiring soft-labels from crowd-annotations by aggregating the distributions produced by existing methods. In particular, we propose to aggregate multiple-views of crowd annotations via temperature scaling and finding their Jensen-Shannon centroid. We demonstrate that these aggregation methods lead to the most consistent performance across four NLP tasks on out-of-domain test sets, mitigating fluctuations in performance from the individual distributions. Additionally, aggregation results in the most consistently well-calibrated uncertainty estimation. We argue that aggregating different views of crowd-annotations is an effective and minimal intervention to acquire soft-labels which induce robust classifiers despite the inconsistency of the individual soft-labeling methods.","tags":[],"title":"Multi-View Knowledge Distillation from Crowd Annotations for Out-of-Domain Generalization","type":"publication"},{"authors":["Karolina Stańczak","Lucas Torroba Hennigen","Adina Williams","Ryan Cotterell","Isabelle Augenstein"],"categories":null,"content":"","date":1669075200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1669075200,"objectID":"5f9c838c2b52729eb053cee6f3a933bd","permalink":"https://copenlu.github.io/publication/2023_aaai_stanczak/","publishdate":"2022-11-22T00:00:00Z","relpermalink":"/publication/2023_aaai_stanczak/","section":"publication","summary":"The success of pre-trained contextualized representations has prompted researchers to analyze them for the presence of linguistic information. Indeed, it is natural to assume that these pre-trained representations do encode some level of linguistic knowledge as they have brought about large empirical improvements on a wide variety of NLP tasks, which suggests they are learning true linguistic generalization. In this work, we focus on intrinsic probing, an analysis technique where the goal is not only to identify whether a representation encodes a linguistic attribute, but also to pinpoint where this attribute is encoded. We propose a novel latent-variable formulation for constructing intrinsic probes and derive a tractable variational approximation to the log-likelihood. Our results show that our model is versatile and yields tighter mutual information estimates than two intrinsic probes previously proposed in the literature. Finally, we find empirical evidence that pre-trained representations develop a cross-lingually entangled notion of morphosyntax.","tags":[],"title":"A Latent-Variable Model for Intrinsic Probing","type":"publication"},{"authors":[],"categories":null,"content":"Three PhD fellowships and two postdoc positions on explainable stance detection are available in CopeNLU. The positions are offered in the context of an ERC Starting Grant on \u0026lsquo;Explainable and Robust Automatic Fact Checking (ExplainYourself)\u0026rsquo;. ERC Starting Grant is a highly competitive funding program by the European Research Council to support the most talented early-career scientists in Europe with funding for a period of 5 years for blue-skies research to build up or expand their research groups.\nExplainYourself proposes to study explainable automatic fact checking, the task of automatically predicting the veracity of textual claims using machine learning (ML) methods, while also producing explanations about how the model arrived at the prediction. Automatic fact checking methods often use opaque deep neural network models, whose inner workings cannot easily be explained. Especially for complex tasks such as automatic fact checking, this hinders greater adoption, as it is unclear to users when the models\u0026rsquo; predictions can be trusted. Existing explainable ML methods partly overcome this by reducing the task of explanation generation to highlighting the right rationale. While a good first step, this does not fully explain how a ML model arrived at a prediction. For knowledge intensive natural language understanding (NLU) tasks such as fact checking, a ML model needs to learn complex relationships between the claim, multiple evidence documents, and common sense knowledge in addition to retrieving the right evidence. There is currently no explainability method that aims to illuminate this highly complex process. In addition, existing approaches are unable to produce diverse explanations, geared towards users with different information needs. ExplainYourself radically departs from existing work in proposing methods for explainable fact checking that more accurately reflect how fact checking models make decisions, and are useful to diverse groups of end users. It is expected that these innovations will apply to explanation generation for other knowledge-intensive NLU tasks, such as question answering or entity linking.\nIn addition to the principle investigator, the two PhD students and postdocs, the project team will also include collaborators from CopeNLU as well as external collaborators.\nRead more about reasons to join us here. The project team will consist of two PhD students and two postdocs, in addition to the principal investigator and external collaborators. At the moment, we are looking to fill one PhD position. You can read more about the position and apply here by 1 February 2024.\n","date":1669075200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1669075200,"objectID":"4c8b06aab72ea403d10540f860f5fd33","permalink":"https://copenlu.github.io/talk/2022_11_erc/","publishdate":"2022-11-22T00:00:00Z","relpermalink":"/talk/2022_11_erc/","section":"talk","summary":"Three PhD fellowships and two postdoc positions on explainable stance detection are available in CopeNLU. The positions are offered in the context of an ERC Starting Grant on \u0026lsquo;Explainable and Robust Automatic Fact Checking (ExplainYourself)\u0026rsquo;. ERC Starting Grant is a highly competitive funding program by the European Research Council to support the most talented early-career scientists in Europe with funding for a period of 5 years for blue-skies research to build up or expand their research groups.","tags":[],"title":"Positions available in context of ERC Starting Grant project ExplainYourself","type":"talk"},{"authors":[],"categories":null,"content":"Isabelle Augenstein has been promoted to full professor, making her the youngest ever female full professor in Denmark. The former officially reported youngest female full professor was appointed in 2008 when she was 34 years old. Read more University of Copenhagen\u0026rsquo;s press release.\n","date":1665964800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1665964800,"objectID":"482250a2176c8a7bf5326f5b9f45356a","permalink":"https://copenlu.github.io/talk/2022_10_promotion/","publishdate":"2022-10-17T00:00:00Z","relpermalink":"/talk/2022_10_promotion/","section":"talk","summary":"Isabelle Augenstein has been promoted to full professor, making her the youngest ever female full professor in Denmark. The former officially reported youngest female full professor was appointed in 2008 when she was 34 years old. Read more University of Copenhagen\u0026rsquo;s press release.","tags":null,"title":"Isabelle Augenstein becomes Denmark's youngest female full professor","type":"talk"},{"authors":[],"categories":null,"content":"2 papers by CopeNLU authors are accepted to appear at EMNLP 2022, which are on scholarly document understanding.\nCounterfactually Augmented Data and Unintended Bias: The Case of Sexism and Hate Speech Detection. Indira Sen, Mattia Samory, Claudia Wagner, Isabelle Augenstein.\nNeighborhood Contrastive Learning for Scientific Document Representations with Citation Embeddings. Malte Ostendorff, Nils Rethmeier, Isabelle Augenstein, Bela Gipp, Georg Rehm.\n","date":1665360000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1665360000,"objectID":"6c2fe5e9639f971697601fa57e985abd","permalink":"https://copenlu.github.io/talk/2022_11_emnlp/","publishdate":"2022-10-10T00:00:00Z","relpermalink":"/talk/2022_11_emnlp/","section":"talk","summary":"2 papers by CopeNLU authors are accepted to appear at EMNLP 2022, which are on scholarly document understanding.\nCounterfactually Augmented Data and Unintended Bias: The Case of Sexism and Hate Speech Detection. Indira Sen, Mattia Samory, Claudia Wagner, Isabelle Augenstein.\nNeighborhood Contrastive Learning for Scientific Document Representations with Citation Embeddings. Malte Ostendorff, Nils Rethmeier, Isabelle Augenstein, Bela Gipp, Georg Rehm.","tags":["scholarly-data","fact-checking"],"title":"2 Papers Accepted to EMNLP 2022","type":"talk"},{"authors":["Shailza Jolly","Pepa Atanasova","Isabelle Augenstein"],"categories":null,"content":"","date":1665187200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1665187200,"objectID":"3340d7c5aad05a041d35726ccfdc685a","permalink":"https://copenlu.github.io/publication/2022_information_jolly/","publishdate":"2022-10-08T00:00:00Z","relpermalink":"/publication/2022_information_jolly/","section":"publication","summary":"Fact-checking systems have become important tools to verify fake and misguiding news. These systems become more trustworthy when human-readable explanations accompany the veracity labels. However, manual collection of such explanations is expensive and time-consuming. Recent works frame explanation generation as extractive summarization, and propose to automatically select a sufficient subset of the most important facts from the ruling comments (RCs) of a professional journalist to obtain fact-checking explanations. However, these explanations lack fluency and sentence coherence. In this work, we present an iterative edit-based algorithm that uses only phrase-level edits to perform unsupervised post-editing of disconnected RCs. To regulate our editing algorithm, we use a scoring function with components including fluency and semantic preservation. In addition, we show the applicability of our approach in a completely unsupervised setting. We experiment with two benchmark datasets, LIAR-PLUS and PubHealth. We show that our model generates explanations that are fluent, readable, non-redundant, and cover important information for the fact check.","tags":[],"title":"Generating Fluent Fact Checking Explanations with Unsupervised Post-Editing","type":"publication"},{"authors":["Dustin Wright","Jiaxin Pei","David Jurgens","Isabelle Augenstein"],"categories":null,"content":" Code, models, and data We\u0026rsquo;ve released all of the code, models, and data for the project to help with further research on NLP for understanding science communication. The code can be found here, the sentence-transformers model here, and the dataset here. We\u0026rsquo;ve additionally released a lightweight python package scientific-information-change, which can be used to measure the information matching score (IMS) between scientific sentences. You can download the package as follows:\npip install scientific-information-change  ","date":1665100800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1665100800,"objectID":"640aa6fcdcd9892fd832e0d161565b0b","permalink":"https://copenlu.github.io/publication/2022_emnlp_wright/","publishdate":"2022-10-07T00:00:00Z","relpermalink":"/publication/2022_emnlp_wright/","section":"publication","summary":"Whether the media faithfully communicate scientific information has long been a core issue to the science community. Automatically identifying paraphrased scientific findings could enable large-scale tracking and analysis of information changes in the science communication process, but this requires systems to understand the similarity between scientific information across multiple domains. To this end, we present the SCIENTIFIC PARAPHRASE AND INFORMATION CHANGE DATASET (SPICED), the first paraphrase dataset of scientific findings annotated for degree of information change. SPICED contains 6,000 scientific finding pairs extracted from news stories, social media discussions, and full texts of original papers. We demonstrate that SPICED poses a challenging task and that models trained on SPICED improve downstream performance on evidence retrieval for fact checking of real-world scientific claims. Finally, we show that models trained on SPICED can reveal large-scale trends in the degrees to which people and organizations faithfully communicate new scientific findings.","tags":[],"title":"Modeling Information Change in Science Communication with Semantically Matched Paraphrases","type":"publication"},{"authors":["Malte Ostendorff","Nils Rethmeier","Isabelle Augenstein","Bela Gipp","Georg Rehm"],"categories":null,"content":"","date":1665100800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1665100800,"objectID":"3109e3cb650073a831f44f4d4a836673","permalink":"https://copenlu.github.io/publication/2022_emnlp_ostendorff/","publishdate":"2022-10-07T00:00:00Z","relpermalink":"/publication/2022_emnlp_ostendorff/","section":"publication","summary":"Learning scientific document representations can be substantially improved through contrastive learning objectives, where the challenge lies in creating positive and negative training samples that encode the desired similarity semantics. Prior work relies on discrete citation relations to generate contrast samples. However, discrete citations enforce a hard cut-off to similarity. This is counter-intuitive to similarity-based learning, and ignores that scientific papers can be very similar despite lacking a direct citation - a core problem of finding related research. Instead, we use controlled nearest neighbor sampling over citation graph embeddings for contrastive learning. This control allows us to learn continuous similarity, to sample hard-to-learn negatives and positives, and also to avoid collisions between negative and positive samples by controlling the sampling margin between them. The resulting method SciNCL outperforms the state-of-the-art on the SciDocs benchmark. Furthermore, we demonstrate that it can train (or tune) models sample-efficiently, and that it can be combined with recent training-efficient methods. Perhaps surprisingly, even training a general-domain language model this way outperforms baselines pretrained in-domain.","tags":[],"title":"Neighborhood Contrastive Learning for Scientific Document Representations with Citation Embeddings","type":"publication"},{"authors":[],"categories":null,"content":"2 papers by CopeNLU authors on probing question answering models are accepted to appear at Coling 2022.\nMachine Reading, Fast and Slow: When Do Models \u0026lsquo;Understand\u0026rsquo; Language?. Sagnik Ray Choudhury, Anna Rogers, Isabelle Augenstein.\nCan Edge Probing Tasks Reveal Linguistic Knowledge in QA Models?. Sagnik Ray Choudhury, Nikita Bhutani, Isabelle Augenstein.\n","date":1664582400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1664582400,"objectID":"d3ce4d46093dbfa336b184aa4e25c4f9","permalink":"https://copenlu.github.io/talk/2022_08_coling/","publishdate":"2022-10-01T00:00:00Z","relpermalink":"/talk/2022_08_coling/","section":"talk","summary":"2 papers by CopeNLU authors on probing question answering models are accepted to appear at Coling 2022.\nMachine Reading, Fast and Slow: When Do Models \u0026lsquo;Understand\u0026rsquo; Language?. Sagnik Ray Choudhury, Anna Rogers, Isabelle Augenstein.\nCan Edge Probing Tasks Reveal Linguistic Knowledge in QA Models?. Sagnik Ray Choudhury, Nikita Bhutani, Isabelle Augenstein.","tags":["explainability","question-answering"],"title":"2 Papers Accepted to Coling 2022","type":"talk"},{"authors":["Klim Zaporojets","Lucie-Aimée Kaffee","Johannes Deleu","Thomas Demeester","Chris Develder","Isabelle Augenstein"],"categories":null,"content":"","date":1663545600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1663545600,"objectID":"ff1034d5368676bb4dd508d6b6376e5b","permalink":"https://copenlu.github.io/publication/2022_neurips_zaporojets/","publishdate":"2022-09-19T00:00:00Z","relpermalink":"/publication/2022_neurips_zaporojets/","section":"publication","summary":"In our continuously evolving world, entities change over time and new, previously non-existing or unknown, entities appear. We study how this evolutionary scenario impacts the performance on a well established entity linking (EL) task. For that study, we introduce TempEL, an entity linking dataset that consists of time-stratified English Wikipedia snapshots from 2013 to 2022, from which we collect both anchor mentions of entities, and these target entities’ descriptions. By capturing such temporal aspects, our newly introduced TempEL resource contrasts with currently existing entity linking datasets, which are composed of fixed mentions linked to a single static version of a target Knowledge Base (e.g., Wikipedia 2010 for CoNLL-AIDA). Indeed, for each of our collected temporal snapshots, TempEL contains links to entities that are continual, i.e., occur in all of the years, as well as completely new entities that appear for the first time at some point. Thus, we enable to quantify the performance of current state-of-the-art EL models for: (i) entities that are subject to changes over time in their Knowledge Base descriptions as well as their mentions’ contexts, and (ii) newly created entities that were previously non-existing (e.g., at the time the EL model was trained). Our experimental results show that in terms of temporal performance degradation, (i) continual entities suffer a decrease of up to 4.6% EL accuracy, while (ii) for new entities this accuracy drop is up to 15.4%. This highlights the challenge of the introduced TempEL dataset and opens new research prospects in the area of time-evolving entity disambiguation.","tags":[],"title":"TempEL: Linking Dynamically Evolving and Newly Emerging Entities","type":"publication"},{"authors":["Sagnik Ray Choudhury","Nikita Bhutani","Isabelle Augenstein"],"categories":null,"content":"","date":1663286400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1663286400,"objectID":"61ee63df1b93e9360b216a2419d06602","permalink":"https://copenlu.github.io/publication/2022_coling_choudhury_edge/","publishdate":"2022-09-16T00:00:00Z","relpermalink":"/publication/2022_coling_choudhury_edge/","section":"publication","summary":"There have been many efforts to try to understand what grammatical knowledge (e.g., ability to understand the part of speech of a token) is encoded in large pre-trained language models (LM). This is done through `Edge Probing' (EP) tests: supervised classification tasks to predict the grammatical properties of a span (whether it has a particular part of speech) using only the token representations coming from the LM encoder. However, most NLP applications fine-tune these LM encoders for specific tasks. Here, we ask: if an LM is fine-tuned, does the encoding of linguistic information in it change, as measured by EP tests? Specifically, we focus on the task of Question Answering (QA) and conduct experiments on multiple datasets. We find that EP test results do not change significantly when the fine-tuned model performs well or in adversarial situations where the model is forced to learn wrong correlations. From a similar finding, some recent papers conclude that fine-tuning does not change linguistic knowledge in encoders but they do not provide an explanation. We find that EP models themselves are susceptible to exploiting spurious correlations in the EP datasets. When this dataset bias is corrected, we do see an improvement in the EP test results as expected.","tags":[],"title":"Can Edge Probing Tasks Reveal Linguistic Knowledge in QA Models?","type":"publication"},{"authors":["Sagnik Ray Choudhury","Anna Rogers","Isabelle Augenstein"],"categories":null,"content":"","date":1663286400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1663286400,"objectID":"4ae3728d516d5653445e105026c47c7b","permalink":"https://copenlu.github.io/publication/2022_coling_choudhury_machine/","publishdate":"2022-09-16T00:00:00Z","relpermalink":"/publication/2022_coling_choudhury_machine/","section":"publication","summary":"Two of the most fundamental challenges in Natural Language Understanding (NLU) at present are: (a) how to establish whether deep learning-based models score highly on NLU benchmarks for the 'right' reasons; and (b) to understand what those reasons would even be. We investigate the behavior of reading comprehension models with respect to two linguistic 'skills': coreference resolution and comparison. We propose a definition for the reasoning steps expected from a system that would be 'reading slowly', and compare that with the behavior of five models of the BERT family of various sizes, observed through saliency scores and counterfactual explanations. We find that for comparison (but not coreference) the systems based on larger encoders are more likely to rely on the 'right' information, but even they struggle with generalization, suggesting that they still learn specific lexical patterns rather than the general principles of comparison.","tags":[],"title":"Machine Reading, Fast and Slow: When Do Models 'Understand' Language?","type":"publication"},{"authors":["Anna Rogers","Matt Gardner","Isabelle Augenstein"],"categories":null,"content":"","date":1663027200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1663027200,"objectID":"b3f408328f7b3cf2a7725d4f1d1618a0","permalink":"https://copenlu.github.io/publication/2022_csur_rogers/","publishdate":"2022-09-13T00:00:00Z","relpermalink":"/publication/2022_csur_rogers/","section":"publication","summary":"Alongside huge volumes of research on deep learning models in NLP in the recent years, there has been also much work on benchmark datasets needed to track modeling progress. Question answering and reading comprehension have been particularly prolific in this regard, with over 80 new datasets appearing in the past two years. This study is the largest survey of the field to date. We provide an overview of the various formats and domains of the current resources, highlighting the current lacunae for future work. We further discuss the current classifications of “skills” that question answering/reading comprehension systems are supposed to acquire, and propose a new taxonomy. The supplementary materials survey the current multilingual resources and monolingual resources for languages other than English, and we discuss the implications of over-focusing on English. The study is aimed at both practitioners looking for pointers to the wealth of existing data, and at researchers working on new resources.","tags":[],"title":"QA Dataset Explosion: A Taxonomy of NLP Resources for Question Answering and Reading Comprehension","type":"publication"},{"authors":["Nils Rethmeier","Isabelle Augenstein"],"categories":null,"content":"","date":1662508800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1662508800,"objectID":"2c053439e5908e1a0d088c23b29db57e","permalink":"https://copenlu.github.io/publication/2022_csur_rethmeier/","publishdate":"2022-09-07T00:00:00Z","relpermalink":"/publication/2022_csur_rethmeier/","section":"publication","summary":"Modern natural language processing (NLP) methods employ self-supervised pretraining objectives such as masked language modeling to boost the performance of various downstream tasks. These pretraining methods are frequently extended with recurrence, adversarial, or linguistic property masking. Recently, contrastive self-supervised training objectives have enabled successes in image representation pretraining by learning to contrast input-input pairs of augmented images as either similar or dissimilar. In NLP however, a single token augmentation can invert the meaning of a sentence during input-input contrastive learning, which led to input-output contrastive approaches that avoid the issue by instead contrasting over input-label pairs. In this primer, we summarize recent self-supervised and supervised contrastive NLP pretraining methods and describe where they are used to improve language modeling, zero to few-shot learning, pretraining data-efficiency, and specific NLP tasks. We overview key contrastive learning concepts with lessons learned from prior research and structure works by applications. Finally, we point to open challenges and future directions for contrastive NLP to encourage bringing contrastive NLP pretraining closer to recent successes in image representation pretraining.","tags":[],"title":"A Primer on Contrastive Pretraining in Language Processing: Methods, Lessons Learned and Perspectives","type":"publication"},{"authors":["Isabelle Augenstein"],"categories":null,"content":"","date":1661990400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1661990400,"objectID":"43a5fafc6ecda73906a5332fd645ccb0","permalink":"https://copenlu.github.io/publication/2022_ki_augenstein/","publishdate":"2022-09-01T00:00:00Z","relpermalink":"/publication/2022_ki_augenstein/","section":"publication","summary":"With the substantial rise in the amount of mis- and disinformation online, fact checking has become an important task to automate. This article is a summary of a habilitation (doctor scientiarum) thesis submitted to the University of Copenhagen, which was sucessfully defended in December 2021 (Augenstein in Towards Explainable Fact Checking. Dr. Scient. thesis, University of Copenhagen, Faculty of Science, 2021). The dissertation addresses several fundamental research gaps within automatic fact checking. The contributions are organised along three verticles: (1) the fact-checking subtask they address; (2) methods which only require small amounts of manually labelled data; (3) methods for explainable fact checking, addressing the problem of opaqueness in the decision-making of black-box fact checking models.","tags":[],"title":"Habilitation Abstract: Towards Explainable Fact Checking","type":"publication"},{"authors":["Sara Marjanovic","Karolina Stańczak","Isabelle Augenstein"],"categories":null,"content":"","date":1661990400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1661990400,"objectID":"621b73eea2223620b013347eaaad8610","permalink":"https://copenlu.github.io/publication/2022_plosone_marjanovic/","publishdate":"2022-09-01T00:00:00Z","relpermalink":"/publication/2022_plosone_marjanovic/","section":"publication","summary":"Despite attempts to increase gender parity in politics, global efforts have struggled to ensure equal female representation. This is likely tied to implicit gender biases against women in authority. In this work, we present a comprehensive study of gender biases that appear in online political discussion. To this end, we collect 10 million comments on Reddit in conversations about male and female politicians, which enables an exhaustive study of automatic gender bias detection. We address not only misogynistic language, but also other manifestations of bias, like benevolent sexism in the form of seemingly positive sentiment and dominance attributed to female politicians, or differences in descriptor attribution. Finally, we conduct a multi-faceted study of gender bias towards politicians investigating both linguistic and extra-linguistic cues. We assess 5 different types of gender bias, evaluating coverage, combinatorial, nominal, sentimental, and lexical biases extant in social media language and discourse. Overall, we find that, contrary to previous research, coverage and sentiment biases suggest equal public interest in female politicians. Rather than overt hostile or benevolent sexism, the results of the nominal and lexical analyses suggest this interest is not as professional or respectful as that expressed about male politicians. Female politicians are often named by their first names and are described in relation to their body, clothing, or family; this is a treatment that is not similarly extended to men. On the now banned far-right subreddits, this disparity is greatest, though differences in gender biases still appear in the right and left-leaning subreddits. We release the curated dataset to the public for future studies.","tags":[],"title":"Quantifying Gender Biases Towards Politicians on Reddit","type":"publication"},{"authors":[],"categories":null,"content":"3 papers by CopeNLU authors are accepted to appear at NAACL 2022, which are on the topics of hatespeech detection, misinformation detection and multilingual probing.\nCounterfactually Augmented Data and Unintended Bias: The Case of Sexism and Hate Speech Detection. Indira Sen, Mattia Samory, Claudia Wagner, Isabelle Augenstein.\nA Survey on Stance Detection for Mis- and Disinformation Identification. Momchil Hardalov, Arnav Arora, Preslav Nakov, Isabelle Augenstein.\nSame Neurons, Different Languages: Probing Morphosyntax in Multilingual Pre-trained Models. Karolina Stańczak, Edoardo Ponti, Lucas Torroba Hennigen, Ryan Cotterell, Isabelle Augenstein.\n","date":1656633600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1656633600,"objectID":"000da3a4befc7cbcc8649783dff53ca7","permalink":"https://copenlu.github.io/talk/2022_07_naacl/","publishdate":"2022-07-01T00:00:00Z","relpermalink":"/talk/2022_07_naacl/","section":"talk","summary":"3 papers by CopeNLU authors are accepted to appear at NAACL 2022, which are on the topics of hatespeech detection, misinformation detection and multilingual probing.\nCounterfactually Augmented Data and Unintended Bias: The Case of Sexism and Hate Speech Detection. Indira Sen, Mattia Samory, Claudia Wagner, Isabelle Augenstein.\nA Survey on Stance Detection for Mis- and Disinformation Identification. Momchil Hardalov, Arnav Arora, Preslav Nakov, Isabelle Augenstein.\nSame Neurons, Different Languages: Probing Morphosyntax in Multilingual Pre-trained Models.","tags":["explainability","limited-data","fact-checking","multilingual-learning","gender-bias"],"title":"3 Papers Accepted to NAACL 2022","type":"talk"},{"authors":["Momchil Hardalov","Arnav Arora","Preslav Nakov","Isabelle Augenstein"],"categories":null,"content":"","date":1651708800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1651708800,"objectID":"a846b726eacc8ddd8854e2151cc59278","permalink":"https://copenlu.github.io/publication/2022_naacl_hardalov/","publishdate":"2022-05-05T00:00:00Z","relpermalink":"/publication/2022_naacl_hardalov/","section":"publication","summary":"Detecting attitudes expressed in texts, also known as stance detection, has become an important task for the detection of false information online, be it misinformation (unintentionally false) or disinformation (intentionally false, spread deliberately with malicious intent). Stance detection has been framed in different ways, including: (a) as a component of fact-checking, rumour detection, and detecting previously fact-checked claims; or (b) as a task in its own right. While there have been prior efforts to contrast stance detection with other related social media tasks such as argumentation mining and sentiment analysis, there is no survey examining the relationship between stance detection detection and mis- and disinformation detection from a holistic viewpoint, which is the focus of this survey. We review and analyse existing work in this area, before discussing lessons learnt and future challenges.","tags":[],"title":"A Survey on Stance Detection for Mis- and Disinformation Identification","type":"publication"},{"authors":["Indira Sen","Mattia Samory","Claudia Wagner","Isabelle Augenstein"],"categories":null,"content":"","date":1651622400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1651622400,"objectID":"6889736e120750a0a4351ea372844404","permalink":"https://copenlu.github.io/publication/2022_naacl_sen/","publishdate":"2022-05-04T00:00:00Z","relpermalink":"/publication/2022_naacl_sen/","section":"publication","summary":"Counterfactually Augmented Data (CAD) aims to improve out-of-domain generalizability, an indicator of model robustness. The improvement is credited to promoting core features of the construct over spurious artifacts that happen to correlate with it. Yet, over-relying on core features may lead to unintended model bias. Especially, construct-driven CAD -- perturbations of core features -- may induce models to ignore the context in which core features are used. Here, we test models for sexism and hate speech detection on challenging data: non-hateful and non-sexist usage of identity and gendered terms. On these hard cases, models trained on CAD, especially construct-driven CAD, show higher false positive rates than models trained on the original, unperturbed data. Using a diverse set of CAD -- construct-driven and construct-agnostic -- reduces such unintended bias.","tags":[],"title":"Counterfactually Augmented Data and Unintended Bias: The Case of Sexism and Hate Speech Detection","type":"publication"},{"authors":["Karolina Stańczak","Edoardo Ponti","Lucas Torroba Hennigen","Ryan Cotterell","Isabelle Augenstein"],"categories":null,"content":"","date":1651622400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1651622400,"objectID":"c2856f745bbc83e57986ad6fac3990a3","permalink":"https://copenlu.github.io/publication/2022_naacl_stanczak/","publishdate":"2022-05-04T00:00:00Z","relpermalink":"/publication/2022_naacl_stanczak/","section":"publication","summary":"The success of multilingual pre-trained models is underpinned by their ability to learn representations shared by multiple languages even in absence of any explicit supervision. However, it remains unclear how these models learn to generalise across languages. In this work, we conjecture that multilingual pre- trained models can derive language-universal abstractions about grammar. In particular, we investigate whether morphosyntactic information is encoded in the same subset of neurons in different languages. We conduct the first large-scale empirical study over 43 languages and 14 morphosyntactic categories with a state-of-the-art neuron-level probe. Our findings show that the cross-lingual overlap between neurons is significant, but its extent may vary across categories and depends on language proximity and pre-training data size.","tags":[],"title":"Same Neurons, Different Languages: Probing Morphosyntax in Multilingual Pre-trained Models","type":"publication"},{"authors":["Pepa Atanasova","Jakob Grue Simonsen","Christina Lioma","Isabelle Augenstein"],"categories":null,"content":"","date":1649203200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1649203200,"objectID":"a7d2ae23394f02d699ada5b960b6b8e3","permalink":"https://copenlu.github.io/publication/2022_tacl_atanasova/","publishdate":"2022-04-06T00:00:00Z","relpermalink":"/publication/2022_tacl_atanasova/","section":"publication","summary":"Automating the fact checking (FC) process relies on information obtained from external sources. In this work, we posit that it is crucial for FC models to make veracity predictions only when there is sufficient evidence and otherwise indicate when it is not enough. To this end, we are the first to study what information FC models consider sufficient by introducing a novel task and advancing it with three main contributions. First, we conduct an in-depth empirical analysis of the task with a new fluency-preserving method for omitting information from the evidence at the constituent and sentence level. We identify when models consider the remaining evidence (in)sufficient for FC, based on three trained models with different Transformer architectures and three FC datasets. Second, we ask annotators whether the omitted evidence was important for FC, resulting in a novel diagnostic dataset, SufficientFacts, for FC with omitted evidence. We find that models are least successful in detecting missing evidence when adverbial modifiers are omitted (21% accuracy), whereas it is easiest for omitted date modifiers (63% accuracy). Finally, we propose a novel data augmentation strategy for contrastive self-learning of missing evidence by employing the proposed omission method combined with tri-training. It improves performance for Evidence Sufficiency Prediction by up to 17.8 F1 score, which in turn improves FC performance by up to 2.6 F1 score.","tags":[],"title":"Fact Checking with Insufficient Evidence","type":"publication"},{"authors":["Dustin Wright","David Wadden","Kyle Lo","Bailey Kuehl","Isabelle Augenstein","Lucy Lu Wang"],"categories":null,"content":"","date":1647734400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1647734400,"objectID":"cf025103e293b5cb63a50156dcbe6d29","permalink":"https://copenlu.github.io/publication/2022_acl_wright/","publishdate":"2022-03-20T00:00:00Z","relpermalink":"/publication/2022_acl_wright/","section":"publication","summary":"Automated scientific fact checking is difficult due to the complexity of scientific language and a lack of significant amounts of training data, as annotation requires domain expertise. To address this challenge, we propose scientific claim generation, the task of generating one or more atomic and verifiable claims from scientific sentences, and demonstrate its usefulness in zero-shot fact checking for biomedical claims. We propose CLAIMGEN-BART, a new supervised method for generating claims supported by the literature, as well as KBIN, a novel method for generating claim negations. Additionally, we adapt an existing unsupervised entity-centric method of claim generation to biomedical claims, which we call CLAIMGEN-ENTITY. Experiments on zero-shot fact checking demonstrate that both CLAIMGEN-ENTITY and CLAIMGEN-BART, coupled with KBIN, achieve up to 90% performance of fully supervised models trained on manually annotated claims and evidence. A rigorous evaluation study demonstrates significant improvement in generated claim and negation quality over existing baselines.","tags":[],"title":"Generating Scientific Claims for Zero-Shot Scientific Fact Checking","type":"publication"},{"authors":["Andrea Lekkas","Peter Schneider-Kamp","Isabelle Augenstein"],"categories":null,"content":"","date":1646092800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1646092800,"objectID":"2cf136cf8830ddd803d9f4281b784be9","permalink":"https://copenlu.github.io/publication/2022_distcurate_lekkas/","publishdate":"2022-03-01T00:00:00Z","relpermalink":"/publication/2022_distcurate_lekkas/","section":"publication","summary":"The effectiveness of a language model is influenced by its token representations, which must encode contextual information and handle the same word form having a plurality of meanings (polysemy). Currently, none of the common language modelling architectures explicitly model polysemy. We propose a language model which not only predicts the next word, but also its sense in context. We argue that this higher prediction granularity may be useful for end tasks such as assistive writing, and allow for more a precise linking of language models with knowledge bases. We find that multi-sense language modelling requires architectures that go beyond standard language models, and here propose a structured prediction framework that decomposes the task into a word followed by a sense prediction task. For sense prediction, we utilise a Graph Attention Network, which encodes definitions and example uses of word senses. Overall, we find that multi-sense language modelling is a highly challenging task, and suggest that future work focus on the creation of more annotated training datasets.","tags":[],"title":"Multi-Sense Language Modelling","type":"publication"},{"authors":["Anabela Barreiro","José G. C. de Souza","Albert Gatt","Mehul Bhatt","Elena Lloret","Aykut Erdem","Dimitra Gkatzia","Helena Moniz","Irene Russo","Fabio Kepler","Iacer Calixto","Marcin Paprzycki","François Portet","Isabelle Augenstein","Mirela Alhasani"],"categories":null,"content":"","date":1646092800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1646092800,"objectID":"a05b89c3d4f2c97aeedd0cd725e0696a","permalink":"https://copenlu.github.io/publication/2022_eamt_barreiro/","publishdate":"2022-03-01T00:00:00Z","relpermalink":"/publication/2022_eamt_barreiro/","section":"publication","summary":"This paper presents the Multitask, Multilingual, Multimodal Language Generation COST Action – Multi3Generation (CA18231), an interdisciplinary network of research groups working on different aspects of language generation. This 'meta-paper' will serve as reference for citations of the Action in future publications. It presents the objectives, challenges and a the links for the achieved outcomes.","tags":[],"title":"Multi3Generation: Multi-task, Multilingual, Multi-Modal Language Generation","type":"publication"},{"authors":[],"categories":null,"content":"2 papers by CopeNLU authors are accepted to appear at AAAI 2022. One paper is on explanation generation, demonstrating how directly optimising for diagnostic properties of explanations, such as faithfulness, data consistency and confidence indication, can improve explanation quality. The other paper presents the most comprehensive study of cross-lingual stance detection to date, and proposes methods for learning with limited labelled data across languages and domains.\nDiagnostics-Guided Explanation Generation. Pepa Atanasova, Jakob Grue Simonsen, Christina Lioma, Isabelle Augenstein.\nFew-Shot Cross-Lingual Stance Detection with Sentiment-Based Pre-Training. Momchil Hardalov, Arnav Arora, Preslav Nakov, Isabelle Augenstein.\n","date":1638316800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1638316800,"objectID":"88a184ddff926993105c927dd8d17fab","permalink":"https://copenlu.github.io/talk/2021_12_aaai/","publishdate":"2021-12-01T00:00:00Z","relpermalink":"/talk/2021_12_aaai/","section":"talk","summary":"2 papers by CopeNLU authors are accepted to appear at AAAI 2022. One paper is on explanation generation, demonstrating how directly optimising for diagnostic properties of explanations, such as faithfulness, data consistency and confidence indication, can improve explanation quality. The other paper presents the most comprehensive study of cross-lingual stance detection to date, and proposes methods for learning with limited labelled data across languages and domains.\nDiagnostics-Guided Explanation Generation. Pepa Atanasova, Jakob Grue Simonsen, Christina Lioma, Isabelle Augenstein.","tags":["explainability","limited-data","fact-checking"],"title":"2 Papers Accepted to AAAI 2022","type":"talk"},{"authors":["Sheikh Muhammad Sarwar","Dimitrina Zlatkova","Momchil Hardalov","Yoan Dinkov","Isabelle Augenstein","Preslav Nakov"],"categories":null,"content":"","date":1638316800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1638316800,"objectID":"eb002b6034c5afcdd95afb979f4e0c29","permalink":"https://copenlu.github.io/publication/2022_tacl_sarwar/","publishdate":"2021-12-01T00:00:00Z","relpermalink":"/publication/2022_tacl_sarwar/","section":"publication","summary":"We propose a novel framework for cross-lingual content flagging with limited target-language data, which significantly outperforms prior work in terms of predictive performance. The framework is based on a nearest-neighbour architecture. It is a modern instantiation of the vanilla k-nearest neighbour model, as we use Transformer representations in all its components. Our framework can adapt to new source language instances, without the need to be retrained from scratch. Unlike prior work on neighbourhood based approaches, we encode the neighbourhood information based on query-neighbour interactions. We propose two encoding schemes and show their effectiveness using both qualitative and quantitative analyses. Our evaluation results on eight languages from two different datasets for abusive language detection show sizable improvements of up to 9.5 (for the Italian language) in F1 over strong baselines. On average we achieve 3.6 improvements in F1 for the three languages in the Jigsaw Multilingual dataset and 2.14 improvements in F1 for the WUL dataset.","tags":[],"title":"A Neighbourhood Framework for Resource-Lean Content Flagging","type":"publication"},{"authors":["Karolina Stańczak","Isabelle Augenstein"],"categories":null,"content":"","date":1638316800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1638316800,"objectID":"a392da73ead1d47c072a85823e7704b4","permalink":"https://copenlu.github.io/publication/2021_arxiv_stanczak_survey/","publishdate":"2021-12-01T00:00:00Z","relpermalink":"/publication/2021_arxiv_stanczak_survey/","section":"publication","summary":"While the prevalence of large pre-trained language models has led to significant improvements in the performance of NLP systems, recent research has demonstrated that these models inherit societal biases extant in natural language. In this paper, we explore a simple method to probe pre-trained language models for gender bias, which we use to effect a multi-lingual study of gender bias towards politicians. We construct a dataset of 250k politicians from most countries in the world and quantify adjective and verb usage around those politicians' names as a function of their gender. We conduct our study in 7 languages across 6 different language modeling architectures. Our results demonstrate that stance towards politicians in pre-trained language models is highly dependent on the language used. Finally, contrary to previous findings, our study suggests that larger language models do not tend to be significantly more gender-biased than smaller ones.","tags":[],"title":"A Survey on Gender Bias in Natural Language Processing","type":"publication"},{"authors":["Pepa Atanasova","Jakob Grue Simonsen","Christina Lioma","Isabelle Augenstein"],"categories":null,"content":"","date":1638316800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1638316800,"objectID":"0f6ab3143395d0ce4e4ff3da1382f402","permalink":"https://copenlu.github.io/publication/2022_aaai_atanasova/","publishdate":"2021-12-01T00:00:00Z","relpermalink":"/publication/2022_aaai_atanasova/","section":"publication","summary":"Explanations shed light on a machine learning model's rationales and can aid in identifying deficiencies in its reasoning process. Explanation generation models are typically trained in a supervised way given human explanations. When such annotations are not available, explanations are often selected as those portions of the input that maximise a downstream task's performance, which corresponds to optimising an explanation's Faithfulness to a given model. Faithfulness is one of several so-called diagnostic properties, which prior work has identified as useful for gauging the quality of an explanation without requiring annotations. Other diagnostic properties are Data Consistency, which measures how similar explanations are for similar input instances, and Confidence Indication, which shows whether the explanation reflects the confidence of the model. In this work, we show how to directly optimise for these diagnostic properties when training a model to generate sentence-level explanations, which markedly improves explanation quality, agreement with human rationales, and downstream task performance on three complex reasoning tasks.","tags":[],"title":"Diagnostics-Guided Explanation Generation","type":"publication"},{"authors":["Momchil Hardalov","Arnav Arora","Preslav Nakov","Isabelle Augenstein"],"categories":null,"content":"","date":1638316800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1638316800,"objectID":"647c18fab9233a76b72ee26ac924796c","permalink":"https://copenlu.github.io/publication/2022_aaai_hardalov/","publishdate":"2021-12-01T00:00:00Z","relpermalink":"/publication/2022_aaai_hardalov/","section":"publication","summary":"The goal of stance detection is to determine the viewpoint expressed in a piece of text towards a target. These viewpoints or contexts are often expressed in many different languages depending on the user and the platform, which can be a local news outlet, a social media platform, a news forum, etc. Most research in stance detection, however, has been limited to working with a single language and on a few limited targets, with little work on cross-lingual stance detection. Moreover, non-English sources of labelled data are often scarce and present additional challenges. Recently, large multilingual language models have substantially improved the performance on many non-English tasks, especially such with limited numbers of examples. This highlights the importance of model pre-training and its ability to learn from few examples. In this paper, we present the most comprehensive study of cross-lingual stance detection to date: we experiment with 15 diverse datasets in 12 languages from 6 language families, and with 6 low-resource evaluation settings each. For our experiments, we build on pattern-exploiting training, proposing the addition of a novel label encoder to simplify the verbalisation procedure. We further propose sentiment-based generation of stance data for pre-training, which shows sizeable improvement of more than 6% F1 absolute in low-shot settings compared to several strong baselines.","tags":[],"title":"Few-Shot Cross-Lingual Stance Detection with Sentiment-Based Pre-Training","type":"publication"},{"authors":["Nils Rethmeier","Isabelle Augenstein"],"categories":null,"content":"","date":1638144000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1638144000,"objectID":"7a48b0444680bf2d9c338f7890c33377","permalink":"https://copenlu.github.io/publication/2022_aibsd_rethmeier/","publishdate":"2021-11-29T00:00:00Z","relpermalink":"/publication/2022_aibsd_rethmeier/","section":"publication","summary":"For natural language processing (NLP) tasks such as sentiment or topic classification, currently prevailing approaches heavily rely on pretraining large self-supervised models on massive external data resources.  However, this methodology is being critiqued for: exceptional compute and pretraining data requirements; diminishing returns on both large and small datasets; and importantly, favourable evaluation settings that overestimate performance differences. The core belief behind current methodology, coined 'the bitter lesson' by R. Sutton, is that 'compute scale-up beats data and compute-efficient algorithms', neglecting that progress in compute hardware scale-up is based almost entirely on the miniaturisation of resource consumption. We thus approach pretraining from a miniaturisation perspective, such as not to require massive external data sources and models, or learned translations from continuous input embeddings to discrete labels.  To minimise overly favourable evaluation, we examine learning on a long-tailed, low-resource, multi-label text classification dataset with noisy, highly sparse labels and many rare concepts. To this end, we propose a novel 'dataset-internal' contrastive autoencoding approach to self-supervised pretraining and demonstrate marked improvements in zero-shot, few-shot and solely supervised learning performance; even under an unfavorable low-resource scenario, and without defaulting to large-scale external datasets for self-supervision. We also find empirical evidence that zero and few-shot learning markedly benefit from adding more `dataset-internal', self-supervised training signals, which is of practical importance when retrieving or computing on large external sources of such signals is infeasible.","tags":[],"title":"Contrastive Text Pretraining for Zero to Few-Shot Long-Tail Learning","type":"publication"},{"authors":["Andreas Nugaard Holm","Barbara Plank","Dustin Wright","Isabelle Augenstein"],"categories":null,"content":"","date":1638057600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1638057600,"objectID":"9fe861e88f4ab4f653bacaa3cf60c974","permalink":"https://copenlu.github.io/publication/2022_sdu_holm/","publishdate":"2021-11-28T00:00:00Z","relpermalink":"/publication/2022_sdu_holm/","section":"publication","summary":"Citation count prediction is the task of predicting the number of citations a paper has gained after a period of time. Prior work viewed this as a static prediction task. As papers and their citations evolve over time, considering the dynamics of the number of citations a paper will receive would seem logical. Here, we introduce the task of sequence citation prediction, where the goal is to accurately predict the trajectory of the number of citations a scholarly work receives over time. We propose to view papers as a structured network of citations, allowing us to use topological information as a learning signal. Additionally, we learn how this dynamic citation network changes over time and the impact of paper meta-data such as authors, venues and abstracts. To approach the introduced task, we derive a dynamic citation network from Semantic Scholar which spans over 42 years. We present a model which exploits topological and temporal information using graph convolution networks paired with sequence prediction, and compare it against multiple baselines, testing the importance of topological and temporal information and analyzing model performance. Our experiments show that leveraging both the temporal and topological information greatly increases the performance of predicting citation counts over time.","tags":[],"title":"Longitudinal Citation Prediction using Temporal Graph Neural Networks","type":"publication"},{"authors":["Andreas Holzinger","Matthias Dehmer","Frank Emmert-Streib","Rita Cucchiara","Isabelle Augenstein","Javier Del Ser","Wojciech Samek","Igor Jurisica","Natalia Díaz-Rodríguez"],"categories":null,"content":"","date":1635724800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1635724800,"objectID":"34e508f96075f32b9333edb58950fe31","permalink":"https://copenlu.github.io/publication/2022_if_holzinger/","publishdate":"2021-11-01T00:00:00Z","relpermalink":"/publication/2022_if_holzinger/","section":"publication","summary":"Medical artificial intelligence (AI) systems have been remarkably successful, even outperforming human performance at certain tasks. There is no doubt that AI is important to improve human health in many ways and will disrupt various medical workflows in the future. Using AI to solve problems in medicine beyond the lab, in routine environments, we need to do more than to just improve the performance of existing AI methods. Robust AI solutions must be able to cope with imprecision, missing and incorrect information, and explain both the result and the process of how it was obtained to a medical expert. Using conceptual knowledge as a guiding model of reality can help to develop more robust, explainable, and less biased machine learning models that can ideally learn from less data. Achieving these goals will require an orchestrated effort that combines three complementary Frontier Research Areas: (1) Complex Networks and their Inference, (2) Graph causal models and counterfactuals, and (3) Verification and Explainability methods. The goal of this paper is to describe these three areas from a unified view and to motivate how information fusion in a comprehensive and integrative manner can not only help bring these three areas together, but also have a transformative role by bridging the gap between research and practical applications in the context of future trustworthy medical AI. This makes it imperative to include ethical and legal aspects as a cross-cutting discipline, because all future solutions must not only be ethically responsible, but also legally compliant.","tags":[],"title":"Information fusion as an integrative cross-cutting enabler to achieve robust, explainable, and trustworthy medical artificial intelligence","type":"publication"},{"authors":[],"categories":null,"content":"On 1 September 2021, the DFF Sapere Aude project EXPANSE on \u0026lsquo;Learning to Explain Attitudes on Social Media\u0026rsquo; is officially kicking off. Sapere Aude is a program by the Independent Research Fund Denmark (DFF) to support the most talented younger researchers in Denmark with funding for blue-skies research to build up or expand their research groups.\nThe EXPANSE project will study attitudes voiced on social media, examining what is said, and explaining it by examining why, how and by whom attitudes are stated. Currently, the only criterion commonly taken into account when researching and developing not just such stance detection, but Natural Language Processing models in general, is predictive performance, e.g. how well models can predict labels such as ‘positive’, ‘negative’ or ‘neutral’. This does not provide any insights into why and how models arrive at certain predictions, which is crucial for utilising predictions for decision making. By contrast, EXPANSE will examine explainability as a success criterion, in an interdisciplinary approach which combines Natural Language Processing research with sociological theories.\nThe following researchers affiliated with the EXPANSE project will join CopeNLU on 1 September 2021:\n Erik Arakeylan (PhD Student), whose main research interests are question answering and explainability. Lucie-Aimée Kaffee (Postdoc), whose research interests include supporting lower-resourced language communities (including Wikipedia and Wikidata) with NLP, and multilingual knowledge graphs. Nodens Koren (PhD Student), who is interested in natural language understanding, explainability, and bio-inspired models. Nadav Borenstein (PhD Student), whose research interests include improving the trustworthiness and usefulness of deep models in the NLP domain.  ","date":1630368000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1630368000,"objectID":"f05cd3be344fdeb42a7b7442772918e7","permalink":"https://copenlu.github.io/talk/2021_09_people/","publishdate":"2021-08-31T00:00:00Z","relpermalink":"/talk/2021_09_people/","section":"talk","summary":"On 1 September 2021, the DFF Sapere Aude project EXPANSE on \u0026lsquo;Learning to Explain Attitudes on Social Media\u0026rsquo; is officially kicking off. Sapere Aude is a program by the Independent Research Fund Denmark (DFF) to support the most talented younger researchers in Denmark with funding for blue-skies research to build up or expand their research groups.\nThe EXPANSE project will study attitudes voiced on social media, examining what is said, and explaining it by examining why, how and by whom attitudes are stated.","tags":[],"title":"EXPANSE Project Kick-Off","type":"talk"},{"authors":["Liesbeth Allein","Isabelle Augenstein","Marie-Francine Moens"],"categories":null,"content":"","date":1630281600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1630281600,"objectID":"2449f75a426a468881e08117b3dbabbc","permalink":"https://copenlu.github.io/publication/2021_jws_allein/","publishdate":"2021-08-30T00:00:00Z","relpermalink":"/publication/2021_jws_allein/","section":"publication","summary":"Truth can vary over time. Therefore, fact-checking decisions on claim veracity should take into account temporal information of both the claim and supporting or refuting evidence. Automatic fact-checking models typically take claims and evidence pages as input, and previous work has shown that weighing or ranking these evidence pages by their relevance to the claim is useful. However, the temporal information of the evidence pages is not generally considered when defining evidence relevance. In this work, we investigate the hypothesis that the timestamp of an evidence page is crucial to how it should be ranked for a given claim. We delineate four temporal ranking methods that constrain evidence ranking differently: evidence-based recency, claim-based recency, claim-centered closeness and evidence-centered clustering ranking. Subsequently, we simulate hypothesis-specific evidence rankings given the evidence timestamps as gold standard. Evidence ranking is then optimized using a learning to rank loss function. The best performing time-aware fact-checking model outperforms its baseline by up to 33.34%, depending on the domain. Overall, evidence-based recency and evidence-centered clustering ranking lead to the best results. Our study reveals that time-aware evidence ranking not only surpasses relevance assumptions based purely on semantic similarity or position in a search results list, but also improves veracity predictions of time-sensitive claims in particular. ","tags":[],"title":"Time-Aware Evidence Ranking for Fact-Checking","type":"publication"},{"authors":[],"categories":null,"content":"3 papers by CopeNLU authors are accepted to appear at EMNLP 2021. The topics of these include stance detection, exaggeration detection, and counterfactually augmented data.\nCross-Domain Label-Adaptive Stance Detection. Momchil Hardalov, Arnav Arora, Preslav Nakov, Isabelle Augenstein. In Proceedings of EMNLP.\nHow Does Counterfactually Augmented Data Impact Models for Social Computing Constructs?. Indira Sen, Mattia Samory, Fabian Flöck, Claudia Wagner, Isabelle Augenstein. In Proceedings of EMNLP.\nSemi-Supervised Exaggeration Detection of Health Science Press Releases. Dustin Wright, Isabelle Augenstein. In Proceedings of EMNLP.\n","date":1629936000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1629936000,"objectID":"50d10c41a02c20e8256e2d56e6a131f6","permalink":"https://copenlu.github.io/talk/2021_08_emnlp/","publishdate":"2021-08-26T00:00:00Z","relpermalink":"/talk/2021_08_emnlp/","section":"talk","summary":"3 papers by CopeNLU authors are accepted to appear at EMNLP 2021. The topics of these include stance detection, exaggeration detection, and counterfactually augmented data.\nCross-Domain Label-Adaptive Stance Detection. Momchil Hardalov, Arnav Arora, Preslav Nakov, Isabelle Augenstein. In Proceedings of EMNLP.\nHow Does Counterfactually Augmented Data Impact Models for Social Computing Constructs?. Indira Sen, Mattia Samory, Fabian Flöck, Claudia Wagner, Isabelle Augenstein. In Proceedings of EMNLP.\nSemi-Supervised Exaggeration Detection of Health Science Press Releases.","tags":[],"title":"3 Papers Accepted to EMNLP 2021","type":"talk"},{"authors":["Momchil Hardalov","Arnav Arora","Preslav Nakov","Isabelle Augenstein"],"categories":null,"content":"","date":1629936000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1629936000,"objectID":"038e99cea9929c18fe8c0ee1fc855609","permalink":"https://copenlu.github.io/publication/2021_emlnp_hardalov/","publishdate":"2021-08-26T00:00:00Z","relpermalink":"/publication/2021_emlnp_hardalov/","section":"publication","summary":"Stance detection concerns the classification of a writer's viewpoint towards a target. There are different task variants, e.g., stance of a tweet vs. a full article, or stance with respect to a claim vs. an (implicit) topic. Moreover, task definitions vary, which includes the label inventory, the data collection, and the annotation protocol. All these aspects hinder cross-domain studies, as they require changes to standard domain adaptation approaches. In this paper, we perform an in-depth analysis of 16 stance detection datasets, and we explore the possibility for cross-domain learning from them. Moreover, we propose an end-to-end unsupervised framework for out-of-domain prediction of unseen, user-defined labels. In particular, we combine domain adaptation techniques such as mixture of experts and domain-adversarial training with label embeddings, and we demonstrate sizable performance gains over strong baselines -- both (i) in-domain, i.e., for seen targets, and (ii) out-of-domain, i.e., for unseen targets. Finally, we perform an exhaustive analysis of the cross-domain results, and we highlight the important factors influencing the model performance. ","tags":[],"title":"Cross-Domain Label-Adaptive Stance Detection","type":"publication"},{"authors":["Indira Sen","Mattia Samory","Fabian Flöck","Claudia Wagner","Isabelle Augenstein"],"categories":null,"content":"","date":1629936000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1629936000,"objectID":"05d17942b09ee2a245542c33e5aec3d5","permalink":"https://copenlu.github.io/publication/2021_emlnp_sen/","publishdate":"2021-08-26T00:00:00Z","relpermalink":"/publication/2021_emlnp_sen/","section":"publication","summary":"As NLP models are increasingly deployed in socially situated settings such as online abusive content detection, ensuring these models are robust is crucial. One way of improving model robustness is to generate counterfactually augmented data (CAD) for training models that can better learn to distinguish between core features and data artifacts. While models trained on this type of data have shown promising out-of-domain generalizability, it is still unclear what the sources of such improvements are. We investigate the benefits of CAD for social NLP models by focusing on three social computing constructs --- sentiment, sexism, and hate speech. Assessing the performance of models trained with and without CAD across different types of datasets, we find that while models trained on CAD show lower in-domain performance, they generalize better out-of-domain. We unpack this apparent discrepancy using machine explanations and find that CAD reduces model reliance on spurious features. Leveraging a novel typology of CAD to analyze their relationship with model performance, we find that CAD which acts on the construct directly or a diverse set of CAD leads to higher performance.","tags":[],"title":"How Does Counterfactually Augmented Data Impact Models for Social Computing Constructs?","type":"publication"},{"authors":["Dustin Wright","Isabelle Augenstein"],"categories":null,"content":"","date":1629936000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1629936000,"objectID":"b5542c60b597925591ba754d6dd97e4c","permalink":"https://copenlu.github.io/publication/2021_emlnp_wright/","publishdate":"2021-08-26T00:00:00Z","relpermalink":"/publication/2021_emlnp_wright/","section":"publication","summary":"Public trust in science depends on honest and factual communication of scientific papers. However, recent studies have demonstrated a tendency of news media to misrepresent scientific papers by exaggerating their findings. Given this, we present a formalization of and study into the problem of exaggeration detection in science communication. While there are an abundance of scientific papers and popular media articles written about them, very rarely do the articles include a direct link to the original paper, making data collection challenging, and necessitating the need for few-shot learning. We address this by curating a set of labeled press release/abstract pairs from existing expert annotated studies on exaggeration in press releases of scientific papers suitable for benchmarking the performance of machine learning models on the task. Using limited data from this and previous studies on exaggeration detection in science, we introduce MT-PET, a multi-task version of Pattern Exploiting Training (PET), which leverages knowledge from complementary cloze-style QA tasks to improve few-shot learning. We demonstrate that MT-PET outperforms PET and supervised learning both when data is limited, as well as when there is an abundance of data for the main task.","tags":[],"title":"Semi-Supervised Exaggeration Detection of Health Science Press Releases","type":"publication"},{"authors":["Luna De Bruyne","Pepa Atanasova","Isabelle Augenstein"],"categories":null,"content":"","date":1623801600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1623801600,"objectID":"b02915fc6c196b22d18c5c27bd88b439","permalink":"https://copenlu.github.io/publication/2022_csl_debruyne/","publishdate":"2021-06-16T00:00:00Z","relpermalink":"/publication/2022_csl_debruyne/","section":"publication","summary":"Emotion lexica are commonly used resources to combat data poverty in automatic emotion detection. However, methodological issues emerge when employing them: lexica are often not very extensive, and the way they are constructed can vary widely -- from lab conditions to crowdsourced approaches and distant supervision. Furthermore, both categorical frameworks and dimensional frameworks coexist, in which theorists provide many different sets of categorical labels or dimensional axes. The heterogenous nature of the resulting emotion detection resources results in a need for a unified approach to utilising them. This paper contributes to the field of emotion analysis in NLP by a) presenting the first study to unify existing emotion detection resources automatically and thus learn more about the relationships between them; b) exploring the use of existing lexica for the above-mentioned task; c) presenting an approach to automatically combining emotion lexica, namely by a multi-view variational auto-encoder (VAE), which facilitates the mapping of datasets into a joint emotion label space. We test the utility of joint emotion lexica by using them as additional features in state-of-the art emotion detection models. Our overall findings are that emotion lexica can offer complementary information to even extremely large pre-trained models such as BERT. The performance of our models is comparable to state-of-the art models that are specifically engineered for certain datasets, and even outperform the state-of-the art on four datasets.","tags":[],"title":"Joint Emotion Label Space Modelling for Affect Lexica","type":"publication"},{"authors":["Isabelle Augenstein"],"categories":null,"content":"","date":1622332800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1622332800,"objectID":"f830e3893b710af7c6523a5ef07d9363","permalink":"https://copenlu.github.io/publication/2021_sdp_augenstein/","publishdate":"2021-05-30T00:00:00Z","relpermalink":"/publication/2021_sdp_augenstein/","section":"publication","summary":"Most work on scholarly document processing assumes that the information processed is trust-worthy and factually correct. However, this is not always the case. There are two core challenges, which should be addressed: 1) ensuring that scientific publications are credible – e.g. that claims are not made without supporting evidence, and that all relevant supporting evidence is provided; and 2) that scientific findings are not misrepresented, distorted or outright misreported when communicated by journalists or the general public. I will present some first steps towards addressing these problems and outline remaining challenges.","tags":[],"title":"Determining the Credibility of Science Communication","type":"publication"},{"authors":["Wei Zhao","Steffen Eger","Johannes Bjerva","Isabelle Augenstein"],"categories":null,"content":"","date":1622246400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1622246400,"objectID":"eab61af53e4f6a77ac00ae35ead9c619","permalink":"https://copenlu.github.io/publication/2021_sem_zhao/","publishdate":"2021-05-29T00:00:00Z","relpermalink":"/publication/2021_sem_zhao/","section":"publication","summary":"Cross-lingual representations have the potential to make NLP techniques available to the vast majority of languages in the world. However, they currently require large pretraining corpora, or assume access to typologically similar languages. In this work, we address these obstacles by removing language identity signals from multilingual embeddings. We examine three approaches for this: (i) re-aligning the vector spaces of target languages (all together) to a pivot source language; (ii) removing languages-specific means and variances, which yields better discriminativeness of embeddings as a by-product; and (iii) normalizing input texts by removing morphological contractions and sentence reordering, thus yielding language-agnostic representations. We evaluate on the tasks of XNLI and reference-free MT evaluation across 19 selected languages. Our experiments demonstrate the language agnostic behavior of our multilingual representations, allowing better zero-shot cross-lingual transfer to distant and low-resource languages, and decrease the performance gap by 8.9 points (M-BERT) and 18.2 points (XLM-R) on average across all tasks and languages. We particularly show that vector normalization can lead to more consistent gains and is complementary to input normalization and recently popular vector space re-alignment. We make our codes and models available.","tags":[],"title":"Inducing Language-Agnostic Multilingual Representations","type":"publication"},{"authors":["Dustin Wright","Isabelle Augenstein"],"categories":null,"content":"","date":1620259200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1620259200,"objectID":"b9f157c90f6dac772b712ca1a957ea67","permalink":"https://copenlu.github.io/publication/2021_acl_wright_citeworth/","publishdate":"2021-05-06T00:00:00Z","relpermalink":"/publication/2021_acl_wright_citeworth/","section":"publication","summary":"Scientific document understanding is challenging as the data is highly domain specific and diverse. However, datasets for tasks with scientific text require expensive manual annotation and tend to be small and limited to only one or a few fields. At the same time, scientific documents contain many potential training signals, such as citations, which can be used to build large labelled datasets. Given this, we present an in-depth study of cite-worthiness detection in English, where a sentence is labelled for whether or not it cites an external source. To accomplish this, we introduce CiteWorth, a large, contextualized, rigorously cleaned labelled dataset for cite-worthiness detection built from a massive corpus of extracted plain-text scientific documents. We show that CiteWorth is high-quality, challenging, and suitable for studying problems such as domain adaptation. Our best performing cite-worthiness detection model is a paragraph-level contextualized sentence labelling model based on Longformer, exhibiting a 5 F1 point improvement over SciBERT which considers only individual sentences. Finally, we demonstrate that language model fine-tuning with cite-worthiness as a secondary task leads to improved performance on downstream scientific document understanding tasks.","tags":[],"title":"CiteWorth: Cite-Worthiness Detection for Improved Scientific Document Understanding","type":"publication"},{"authors":["Clara Meister","Stefan Lazov","Isabelle Augenstein","Ryan Cotterell"],"categories":null,"content":"","date":1620259200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1620259200,"objectID":"8607ee26cdafaf25843420d6e1740537","permalink":"https://copenlu.github.io/publication/2021_acl_meister_sparse/","publishdate":"2021-05-06T00:00:00Z","relpermalink":"/publication/2021_acl_meister_sparse/","section":"publication","summary":"Sparse attention has been claimed to increase model interpretability under the assumption that it highlights influential inputs. Yet the attention distribution is typically over representations internal to the model rather than inputs themselves, suggesting this assumption may not have merit. In this work, we design a set of experiments to help us understand how sparsity affects our ability to use attention as an explainability tool. On three text classification tasks, we verify that only a weak relationship between inputs and co-indexed intermediate representations exists -- under sparse attention and otherwise. Further, we do not find any plausible mappings from sparse attention distributions to a sparse set of influential inputs through other avenues. Rather, we observe in this setting that inducing sparsity may make it less plausible that attention can be used as a tool for understanding model behavior.","tags":[],"title":"Is Sparse Attention more Interpretable?","type":"publication"},{"authors":[],"categories":null,"content":"A paper by CopeNLU author is accepted to appear at IJCAI 2021. The paper studies how to perform complex claim verification on naturally occurring political claims with multiple hops over evidence chunks.\nMulti-Hop Fact Checking of Political Claims. Wojciech Ostrowski, Arnav Arora, Pepa Atanasova, Isabelle Augenstein.\n","date":1619827200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1619827200,"objectID":"36e5178c49320f683d3a221a18123e15","permalink":"https://copenlu.github.io/talk/2021_04_ijcai/","publishdate":"2021-05-01T00:00:00Z","relpermalink":"/talk/2021_04_ijcai/","section":"talk","summary":"A paper by CopeNLU author is accepted to appear at IJCAI 2021. The paper studies how to perform complex claim verification on naturally occurring political claims with multiple hops over evidence chunks.\nMulti-Hop Fact Checking of Political Claims. Wojciech Ostrowski, Arnav Arora, Pepa Atanasova, Isabelle Augenstein.","tags":["fact-checking","explainability","limited-data"],"title":"Paper Accepted to IJCAI 2021","type":"talk"},{"authors":["Wojciech Ostrowski","Arnav Arora","Pepa Atanasova","Isabelle Augenstein"],"categories":null,"content":"","date":1619740800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1619740800,"objectID":"c2f5beb4bf77b92983c856f9d19d8b85","permalink":"https://copenlu.github.io/publication/2021_ijcai_ostrowski/","publishdate":"2021-04-30T00:00:00Z","relpermalink":"/publication/2021_ijcai_ostrowski/","section":"publication","summary":"Recently, novel multi-hop models and datasets have been introduced to achieve more complex natural language reasoning with neural networks. One notable task that requires multi-hop reasoning is fact checking, where a chain of connected evidence pieces leads to the final verdict of a claim. However, existing datasets do not provide annotations for the gold evidence pieces, which is a critical aspect for improving the explainability of fact checking systems. The only exception is the FEVER dataset, which is artificially constructed based on Wikipedia and does not use naturally occurring political claims and evidence pages, which is more challenging. Most claims in FEVER only have one evidence sentence associated with them and require no reasoning to make label predictions -- the small number of instances with two evidence sentences only require simple reasoning. In this paper, we study how to perform more complex claim verification on naturally occurring claims with multiple hops overevidence chunks. We first construct a small annotated dataset, PolitiHop, of reasoning chains for claim verification. We then compare the dataset to other existing multi-hop datasets and study how to transfer knowledge from more extensive in- and out-of-domain resources to PolitiHop. We find that the task is complex, and achieve the best performance using an architecture that specifically models reasoning over evidence chains in combination within-domain transfer learning.","tags":[],"title":"Multi-Hop Fact Checking of Political Claims","type":"publication"},{"authors":[],"categories":null,"content":"2 papers by CopeNLU authors are accepted to appear at ACL 2021. One paper is on interpretability, examining how sparsity affects our ability to use attention as an explainability tool; whereas the other one is on scientific document understanding, introducing a new dataset for the task of cite-worthiness detection in scientific articles.\nIs Sparse Attention more Interpretable? Clara Meister, Stefan Lazov, Isabelle Augenstein, Ryan Cotterell.\nCiteWorth: Cite-Worthiness Detection for Improved Scientific Document Understanding. Dustin Wright, Isabelle Augenstein.\n","date":1617580800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1617580800,"objectID":"a938171a6684f926fdc058fcaee6414a","permalink":"https://copenlu.github.io/talk/2021_04_acl/","publishdate":"2021-04-05T00:00:00Z","relpermalink":"/talk/2021_04_acl/","section":"talk","summary":"2 papers by CopeNLU authors are accepted to appear at ACL 2021. One paper is on interpretability, examining how sparsity affects our ability to use attention as an explainability tool; whereas the other one is on scientific document understanding, introducing a new dataset for the task of cite-worthiness detection in scientific articles.\nIs Sparse Attention more Interpretable? Clara Meister, Stefan Lazov, Isabelle Augenstein, Ryan Cotterell.\nCiteWorth: Cite-Worthiness Detection for Improved Scientific Document Understanding.","tags":["explainability","scholarly-data","limited-data"],"title":"2 Papers Accepted to ACL 2021","type":"talk"},{"authors":["Thamar Solorio","Mahsa Shafaei","Christos Smailis","Isabelle Augenstein","Margaret Mitchell","Ingrid Stapf","Ioannis Kakadiaris"],"categories":null,"content":"","date":1612137600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1612137600,"objectID":"cb1395d25289acab27224703ff500598","permalink":"https://copenlu.github.io/publication/2021_openreview_solorio/","publishdate":"2021-02-01T00:00:00Z","relpermalink":"/publication/2021_openreview_solorio/","section":"publication","summary":"This white paper summarizes the authors' structured brainstorming regarding ethical considerations for creating an extensive repository of online content labeled with tags that describe potentially questionable content for young viewers. The workshop focused on four topics: 1) identifying risks for unintended biases in the data and labels, 2) how to reduce risks for unintended biases; 3) identifying ethical considerations of the annotation task, and 4) reducing the risks for the annotators.","tags":[],"title":"White Paper - Creating a Repository of Objectionable Online Content: Addressing Undesirable Biases and Ethical Considerations","type":"publication"},{"authors":["Admin"],"categories":null,"content":"We gave a tutorial on \u0026lsquo;Explainability for NLP\u0026rsquo; at the first ALPS (Advanced Language Processing) winter school: http://lig-alps.imag.fr/index.php/schedule/ The tutorial introduces the concepts of \u0026lsquo;model understanding\u0026rsquo; as well as \u0026lsquo;decision understanding\u0026rsquo; and provides examples of approaches from the areas of fact checking and text classification.\nExercises for both model understanding and decision understanding are available here: https://github.com/copenlu/ALPS_2021\n","date":1611792000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1611792000,"objectID":"911d94d0b24c375c3c34c539dafa2c5a","permalink":"https://copenlu.github.io/post/explainability_tutorial/","publishdate":"2021-01-28T00:00:00Z","relpermalink":"/post/explainability_tutorial/","section":"post","summary":"We gave a tutorial including lab session at the first ALPS (Advanced Language Prcoessing School. If you want to learn more about explainability for NLP, you can find our resources on Github.","tags":["Tutorial"],"title":"ALPS 2021 tutorial 'Explainability for NLP'","type":"post"},{"authors":["Johannes Bjerva","Isabelle Augenstein"],"categories":null,"content":"","date":1611792000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1611792000,"objectID":"a7ad865566a5fcf0d51aeacf2661ae70","permalink":"https://copenlu.github.io/publication/2021_eacl_bjerva/","publishdate":"2021-01-28T00:00:00Z","relpermalink":"/publication/2021_eacl_bjerva/","section":"publication","summary":"Bridging the performance gap between high- and low-resource languages has been the focus of much previous work. Typological features from databases such as the World Atlas of Language Structures (WALS) are a prime candidate for this, as such data exists even for very low-resource languages. However, previous work has only found minor benefits from using typological information. Our hypothesis is that a model trained in a cross-lingual setting will pick up on typological cues from the input data, thus overshadowing the utility of explicitly using such features. We verify this hypothesis by blinding a model to typological information, and investigate how cross-lingual sharing and performance is impacted. Our model is based on a cross-lingual architecture in which the latent weights governing the sharing between languages is learnt during training. We show that (i) preventing this model from exploiting typology severely reduces performance, while a control experiment reaffirms that (ii) encouraging sharing according to typology somewhat improves performance.","tags":[],"title":"Does Typological Blinding Impede Cross-Lingual Sharing?","type":"publication"},{"authors":["Isabelle Augenstein"],"categories":null,"content":"","date":1609804800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1609804800,"objectID":"2053e351c830223e7c32e1dc526bc186","permalink":"https://copenlu.github.io/publication/2021_habil_augenstein/","publishdate":"2021-01-05T00:00:00Z","relpermalink":"/publication/2021_habil_augenstein/","section":"publication","summary":"The past decade has seen a substantial rise in the amount of mis- and disinformation online, from targeted disinformation campaigns to influence politics, to the unintentional spreading of misinformation about public health. This development has spurred research in the area of automatic fact checking, from approaches to detect check-worthy claims and determining the stance of tweets towards claims, to methods to determine the veracity of claims given evidence documents. These automatic methods are often content-based, using natural language processing methods, which in turn utilise deep neural networks to learn higher-order features from text in order to make predictions. As deep neural networks are black-box models, their inner workings cannot be easily explained. At the same time, it is desirable to explain how they arrive at certain decisions, especially if they are to be used for decision making. While this has been known for some time, the issues this raises have been exacerbated by models increasing in size, and by EU legislation requiring models to be used for decision making to provide explanations, and, very recently, by legislation requiring online platforms operating in the EU to provide transparent reporting on their services. Despite this, current solutions for explainability are still lacking in the area of fact checking. A further general requirement of such deep learning based method is that they require large amounts of in-domain training data to produce reliable explanations. As automatic fact checking is a very recently introduced research area, there are few sufficiently large datasets. As such, research on how to learn from limited amounts of training data, such as how to adapt to unseen domains, is needed. This thesis presents my research on automatic fact checking, including claim check-worthiness detection, stance detection and veracity prediction. Its contributions go beyond fact checking, with the thesis proposing more general machine learning solutions for natural language processing in the area of learning with limited labelled data. Finally, the thesis presents some first solutions for explainable fact checking. Even so, the contributions presented here are only a start on the journey towards what is possible and needed. Future research should focus on more holistic explanations by combining instance- and model-based approaches, by developing large datasets for training models to generate explanations, and by collective intelligence and active learning approaches for using explainable fact checking models to support decision making.","tags":[],"title":"Towards Explainable Fact Checking","type":"publication"},{"authors":[],"categories":null,"content":"A paper by CopeNLU author is accepted to appear at EACL 2021. The paper aims to bridge the gap between high- and low-resource languages by investigating to what degree cross-lingual models share structural information about languages.\nDoes Typological Blinding Impede Cross-Lingual Sharing?. Johannes Bjerva, Isabelle Augenstein.\n","date":1609459200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1609459200,"objectID":"079db76348a36937126dc360addb376c","permalink":"https://copenlu.github.io/talk/2021_01_eacl/","publishdate":"2021-01-01T00:00:00Z","relpermalink":"/talk/2021_01_eacl/","section":"talk","summary":"A paper by CopeNLU author is accepted to appear at EACL 2021. The paper aims to bridge the gap between high- and low-resource languages by investigating to what degree cross-lingual models share structural information about languages.\nDoes Typological Blinding Impede Cross-Lingual Sharing?. Johannes Bjerva, Isabelle Augenstein.","tags":["limited-data","multilingual-learning"],"title":"Paper Accepted to EACL 2021","type":"talk"},{"authors":[],"categories":null,"content":"Two PhD fellowships and two postdoc positions on explainable stance detection are available in CopeNLU. The PhD fellowships and one of the postdoc positions are offered in the context of a DFF Sapere Aude research leader fellowship on `Learning to Explain Attitudes on Social Media (EXPANSE)´. Sapere Aude is a program by the Independent Research Fund Denmark (DFF) to support the most talented younger researchers in Denmark with funding for blue-skies research to build up or expand their research groups.\nThe EXPANSE project studies attitudes voiced on social media, examining what is said, and explaining it by examining why, how and by whom attitudes are stated. Currently, the only criterion commonly taken into account when researching and developing not just such stance detection, but Natural Language Processing models in general, is predictive performance, e.g. how well models can predict labels such as \u0026lsquo;positive\u0026rsquo;, \u0026lsquo;negative\u0026rsquo; or \u0026lsquo;neutral\u0026rsquo;. This does not provide any insights into why and how models arrive at certain predictions, which is crucial for utilising predictions for decision making. By contrast, this project will examine explainability as a success criterion, in an interdisciplinary approach which combines Natural Language Processing research with sociological theories.\nIn addition to the principle investigator, the two PhD students and the postdoc, the project team will also include interdisciplinary collaborators from sociology. More information about the project can also be found in the official announcement on DFF\u0026rsquo;s website.\nThe other available postdoc position is offered in the context of a project on researching explainable NLP methods for educational technologies.\nCandidates are expected to hold a Master\u0026rsquo;s degree in computer science or a related relevant area or be near completion for one. There are no restrictions on citizenship, and a language certificate is not required at application time. Candidates from traditionally underrepresented minorities in natural language processing are particularly encouraged to apply.\nRead more about reasons to join us here. Before applying, you are welcome to get in touch informally if you have questions about the call. The official call for PhD positions and application link can be found here; the application deadline is 6 May 2021. Note that the other PhD position as well as the postdoc position on the project have already been filled.\n","date":1605744000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1605744000,"objectID":"309c8616f3f4bf2af329fd1998fa36b4","permalink":"https://copenlu.github.io/talk/2020_10_phd/","publishdate":"2020-11-19T00:00:00Z","relpermalink":"/talk/2020_10_phd/","section":"talk","summary":"Two PhD fellowships and two postdoc positions on explainable stance detection are available in CopeNLU. The PhD fellowships and one of the postdoc positions are offered in the context of a DFF Sapere Aude research leader fellowship on `Learning to Explain Attitudes on Social Media (EXPANSE)´. Sapere Aude is a program by the Independent Research Fund Denmark (DFF) to support the most talented younger researchers in Denmark with funding for blue-skies research to build up or expand their research groups.","tags":[],"title":"Positions available in context of Sapere Aude Research Leader Fellowship on Explainable Stance Detection","type":"talk"},{"authors":["Admin"],"categories":null,"content":"Yes, we all have planned to be under palm trees of Punta Cana now and to sip drinks with umbrellas. Let\u0026rsquo;s make a new plan: beer garden on Thursday evening (19:30 CET). All ETH Rycolab and UCPH CopeNLU research group members \u0026amp; friends are invited to join our Gather.Town beer garden to mingle with one another. Sign up here to get the invitation link. The number of spots is limited!\n   ","date":1605657600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1605657600,"objectID":"c537941a870c7c7fd0a8077a73b30bb1","permalink":"https://copenlu.github.io/post/eth-ucph-party/","publishdate":"2020-11-18T00:00:00Z","relpermalink":"/post/eth-ucph-party/","section":"post","summary":"Yes, we all have planned to be under palm trees of Punta Cana now and to sip drinks with umbrellas. Let's make a new plan: an online beer garden on Thursday evening (19:30 CET)!","tags":["Party"],"title":"EMNLP 2020 Beer Garden Meetup","type":"post"},{"authors":["Lucas Chaves Lima","Dustin Wright","Isabelle Augenstein","Maria Maistro"],"categories":null,"content":"","date":1604188800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1604188800,"objectID":"feb781adcb9d40a5b1d020a9605839b1","permalink":"https://copenlu.github.io/publication/2020_trec_lima/","publishdate":"2020-11-01T00:00:00Z","relpermalink":"/publication/2020_trec_lima/","section":"publication","summary":"In this paper, we describe our participation in the TREC Health Misinformation Track 2020. We submitted 11 runs to the Total Recall Task and 13 runs to the Ad Hoc task. Our approach consists of 3 steps: (1) we create an initial run with BM25 and RM3; (2) we estimate credibility and misinformation scores for the documents in the initial run; (3) we merge the relevance, credibility and misinformation scores to re-rank documents in the initial run. To estimate credibility scores, we implement a classifier which exploits features based on the content and the popularity of a document. To compute the misinformation score, we apply a stance detection approach with a pretrained Transformer language model. Finally, we use different approaches to merge scores: weighted average, the distance among score vectors and rank fusion.","tags":[],"title":"University of Copenhagen Participation in TREC Health Misinformation Track 2020","type":"publication"},{"authors":[],"categories":null,"content":"7 CopeNLU papers are accepted to appear at EMNLP 2020. The topics of these include fact checking, explainability, domain adaptation, transfer learning, QA and improving peer review.\nA Diagnostic Study of Explainability Techniques for Text Classification. Pepa Atanasova, Jakob Grue Simonsen, Christina Lioma, Isabelle Augenstein. In Proceedings of EMNLP.\nGenerating Label Cohesive and Well-Formed Adversarial Claims. Pepa Atanasova\u0026#42;, Dustin Wright\u0026#42;, Isabelle Augenstein. In Proceedings of EMNLP.\nTransformer Based Multi-Source Domain Adaptation. Dustin Wright, Isabelle Augenstein. In Proceedings of EMNLP.\nSubjQA: A Dataset for Subjectivity and Review Comprehension. Johannes Bjerva, Nikita Bhutani, Behzad Golshan, Wang-Chiew Tan, Isabelle Augenstein. In Proceedings of EMNLP.\nZero-Shot Cross-Lingual Transfer with Meta Learning. Farhad Nooralahzadeh, Giannis Bekoulis, Johannes Bjerva, Isabelle Augenstein. In Proceedings of EMNLP.\nFact Check-Worthiness Detection as Positive Unlabelled Learning. Dustin Wright, Isabelle Augenstein. In Findings of EMNLP.\nWhat Can We Do to Improve Peer Review in NLP? Anna Rogers, Isabelle Augenstein. In Findings of EMNLP.\n*equal contributions\n","date":1600128000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1600128000,"objectID":"ea2c4e70d2f532144ee263eda3b038a4","permalink":"https://copenlu.github.io/talk/2020_09_emnlp/","publishdate":"2020-09-15T00:00:00Z","relpermalink":"/talk/2020_09_emnlp/","section":"talk","summary":"7 CopeNLU papers are accepted to appear at EMNLP 2020. The topics of these include fact checking, explainability, domain adaptation, transfer learning, QA and improving peer review.\nA Diagnostic Study of Explainability Techniques for Text Classification. Pepa Atanasova, Jakob Grue Simonsen, Christina Lioma, Isabelle Augenstein. In Proceedings of EMNLP.\nGenerating Label Cohesive and Well-Formed Adversarial Claims. Pepa Atanasova\u0026#42;, Dustin Wright\u0026#42;, Isabelle Augenstein. In Proceedings of EMNLP.\nTransformer Based Multi-Source Domain Adaptation.","tags":[],"title":"7 Papers Accepted to EMNLP 2020","type":"talk"},{"authors":["Pepa Atanasova","Jakob Grue Simonsen","Christina Lioma","Isabelle Augenstein"],"categories":null,"content":"","date":1600128000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1600128000,"objectID":"615014e7b718dbc85577e537b157f493","permalink":"https://copenlu.github.io/publication/2020_emnlp_atanasova_diagnostic/","publishdate":"2020-09-15T00:00:00Z","relpermalink":"/publication/2020_emnlp_atanasova_diagnostic/","section":"publication","summary":"Recent developments in machine learning have introduced models that approach human performance at the cost of increased architectural complexity. Efforts to make the rationales behind the models' predictions transparent have inspired an abundance of new explainability techniques. Provided with an already trained model, they compute saliency scores for the words of an input instance. However, there exists no definitive guide on (i) how to choose such a technique given a particular application task and model architecture, and (ii) the benefits and drawbacks of using each such technique. In this paper, we develop a comprehensive list of diagnostic properties for evaluating existing explainability techniques. We then employ the proposed list to compare a set of diverse explainability techniques on downstream text classification tasks and neural network architectures. We also compare the saliency scores assigned by the explainability techniques with human annotations of salient input regions to find relations between a model's performance and the agreement of its rationales with human ones. Overall, we find that the gradient-based explanations perform best across tasks and model architectures, and we present further insights into the properties of the reviewed explainability techniques.","tags":[],"title":"A Diagnostic Study of Explainability Techniques for Text Classification","type":"publication"},{"authors":["Pepa Atanasova","Dustin Wright","Isabelle Augenstein"],"categories":null,"content":"","date":1600128000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1600128000,"objectID":"a2f2c74ec0e577e1cd444fa0744bc9da","permalink":"https://copenlu.github.io/publication/2020_emnlp_atanasova_generating/","publishdate":"2020-09-15T00:00:00Z","relpermalink":"/publication/2020_emnlp_atanasova_generating/","section":"publication","summary":"Adversarial attacks reveal important vulnerabilities and flaws of trained models. One potent type of attack are universal adversarial triggers, which are individual n-grams that, when appended to instances of a class under attack, can trick a model into predicting a target class. However, for inference tasks such as fact checking, these triggers often inadvertently invert the meaning of instances they are inserted in. In addition, such attacks produce semantically nonsensical inputs, as they simply concatenate triggers to existing samples. Here, we investigate how to generate adversarial attacks against fact checking systems that preserve the ground truth meaning and are semantically valid. We extend the HotFlip attack algorithm used for universal trigger generation by jointly minimizing the target class loss of a fact checking model and the entailment class loss of an auxiliary natural language inference model. We then train a conditional language model to generate semantically valid statements, which include the found universal triggers. We find that the generated attacks maintain the directionality and semantic validity of the claim better than previous work.","tags":[],"title":"Generating Label Cohesive and Well-Formed Adversarial Claims","type":"publication"},{"authors":["Johannes Bjerva","Nikita Bhutani","Behzad Golshan","Wang-Chiew Tan","Isabelle Augenstein"],"categories":null,"content":"","date":1600128000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1600128000,"objectID":"94bcd38aad28e992d9cdad9176777b26","permalink":"https://copenlu.github.io/publication/2020_emnlp_bjerva/","publishdate":"2020-09-15T00:00:00Z","relpermalink":"/publication/2020_emnlp_bjerva/","section":"publication","summary":"Subjectivity is the expression of internal opinions or beliefs which cannot be objectively observed or verified, and has been shown to be important for sentiment analysis and word-sense disambiguation. Furthermore, subjectivity is an important aspect of user-generated data. In spite of this, subjectivity has not been investigated in contexts where such data is widespread, such as in question answering (QA). We therefore investigate the relationship between subjectivity and QA, while developing a new dataset. We compare and contrast with analyses from previous work, and verify that findings regarding subjectivity still hold when using recently developed NLP architectures. We find that subjectivity is also an important feature in the case of QA, albeit with more intricate interactions between subjectivity and QA performance. For instance, a subjective question may or may not be associated with a subjective answer. We release an English QA dataset (SubjQA) based on customer reviews, containing subjectivity annotations for questions and answer spans across 6 distinct domains.","tags":[],"title":"SubjQA: A Dataset for Subjectivity and Review Comprehension","type":"publication"},{"authors":["Dustin Wright","Isabelle Augenstein"],"categories":null,"content":"","date":1600128000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1600128000,"objectID":"e1ea406e0f0a5188cb58a4f5e833f3f0","permalink":"https://copenlu.github.io/publication/2020_emnlp_wright_transformer/","publishdate":"2020-09-15T00:00:00Z","relpermalink":"/publication/2020_emnlp_wright_transformer/","section":"publication","summary":"In practical machine learning settings, the data on which a model must make predictions often come from a different distribution than the data it was trained on. Here, we investigate the problem of unsupervised multi-source domain adaptation, where a model is trained on labelled data from multiple source domains and must make predictions on a domain for which no labelled data has been seen. Prior work with CNNs and RNNs has demonstrated the benefit of mixture of experts, where the predictions of multiple domain expert classifiers are combined; as well as domain adversarial training, to induce a domain agnostic representation space. Inspired by this, we investigate how such methods can be effectively applied to large pretrained transformer models. We find that domain adversarial training has an effect on the learned representations of these models while having little effect on their performance, suggesting that large transformer-based models are already relatively robust across domains. Additionally, we show that mixture of experts leads to significant performance improvements by comparing several variants of mixing functions, including one novel mixture based on attention. Finally, we demonstrate that the predictions of large pretrained transformer based domain experts are highly homogenous, making it challenging to learn effective functions for mixing their predictions.","tags":[],"title":"Transformer Based Multi-Source Domain Adaptation","type":"publication"},{"authors":["Farhad Nooralahzadeh","Giannis Bekoulis","Johannes Bjerva","Isabelle Augenstein"],"categories":null,"content":"","date":1600128000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1600128000,"objectID":"e4942e363a103fa1884d677e21279505","permalink":"https://copenlu.github.io/publication/2020_emnlp_nooralahzadeh/","publishdate":"2020-09-15T00:00:00Z","relpermalink":"/publication/2020_emnlp_nooralahzadeh/","section":"publication","summary":"Learning what to share between tasks has been a topic of high importance recently, as strategic sharing of knowledge has been shown to improve the performance of downstream tasks. The same applies to sharing between languages, and is especially important when considering the fact that most languages in the world suffer from being under-resourced. In this paper, we consider the setting of training models on multiple different languages at the same time, when little or no data is available for languages other than English. We show that this challenging setup can be approached using meta-learning, where, in addition to training a source language model, another model learns to select which training instances are the most beneficial. We experiment using standard supervised, zero-shot cross-lingual, as well as few-shot cross-lingual settings for different natural language understanding tasks (natural language inference, question answering). Our extensive experimental setup demonstrates the consistent effectiveness of meta-learning, on a total 16 languages. We improve upon the state-of-the-art for zero-shot and few-shot NLI and QA tasks on the XNLI and X-WikiRe datasets, respectively. We further conduct a comprehensive analysis which indicates that correlation of typological features between languages can further explain when parameter sharing learned via meta learning is beneficial.","tags":[],"title":"Zero-Shot Cross-Lingual Transfer with Meta Learning","type":"publication"},{"authors":["Dustin Wright","Isabelle Augenstein"],"categories":null,"content":"","date":1600041600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1600041600,"objectID":"8342681060dad043059270f781bd971f","permalink":"https://copenlu.github.io/publication/2020_emnlp_wright_claim/","publishdate":"2020-09-14T00:00:00Z","relpermalink":"/publication/2020_emnlp_wright_claim/","section":"publication","summary":"A critical component of automatically combating misinformation is the detection of fact check-worthiness, i.e. determining if a piece of information should be checked for veracity. There are multiple isolated lines of research which address this core issue: check-worthiness detection from political speeches and debates, rumour detection on Twitter, and citation needed detection from Wikipedia. What is still lacking is a structured comparison of these variants of check-worthiness, as well as a unified approach to them. We find that check-worthiness detection is a very challenging task in any domain, because it both hinges upon detecting how factual a sentence is, and how likely a sentence is to be believed without verification. As such, annotators often only mark those instances they judge to be clear-cut check-worthy. Our best-performing method automatically corrects for this, using a variant of positive unlabelled learning, which learns when an instance annotated as not check-worthy should in fact have been annotated as being check-worthy. In applying this, we outperform the state of the art in two of the three domains studied for check-worthiness detection in English.","tags":[],"title":"Claim Check-Worthiness Detection as Positive Unlabelled Learning","type":"publication"},{"authors":["Anna Rogers","Isabelle Augenstein"],"categories":null,"content":"","date":1600041600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1600041600,"objectID":"d590de91307c1381d89e0774ed47e684","permalink":"https://copenlu.github.io/publication/2020_emnlp_rogers/","publishdate":"2020-09-14T00:00:00Z","relpermalink":"/publication/2020_emnlp_rogers/","section":"publication","summary":"Peer review is our best tool for judging the quality of conference submissions, but it is becoming increasingly spurious. We argue that a part of the problem is that the reviewers face a poorly defined task forcing apples-to-oranges comparisons. As a community familiar with annotation, we can improve at least that.","tags":[],"title":"What Can We Do to Improve Peer Review in NLP?","type":"publication"},{"authors":["Johannes Bjerva","Elizabeth Salesky","Sabrina J. Mielke","Aditi Chaudhary","Giuseppe G. A. Celano","Edoardo M. Ponti","Ekaterina Vylomova","Ryan Cotterell","Isabelle Augenstein"],"categories":null,"content":"","date":1599955200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1599955200,"objectID":"0ebf346b4b6c9cdc6012ba55726e0427","permalink":"https://copenlu.github.io/publication/2020_sigtyp_bjerva/","publishdate":"2020-09-13T00:00:00Z","relpermalink":"/publication/2020_sigtyp_bjerva/","section":"publication","summary":"Typological knowledge bases (KBs) such as WALS contain information about linguistic properties of the world's languages. They have been shown to be useful for downstream applications, including cross-lingual transfer learning and linguistic probing. A major drawback hampering broader adoption of typological KBs is that they are sparsely populated, in the sense that most languages only have annotations for some features, and skewed, in that few features have wide coverage. As typological features often correlate with one another, it is possible to predict them and thus automatically populate typological KBs, which is also the focus of this shared task. Overall, the task attracted 8 submissions from 5 teams, out of which the most successful methods make use of such feature correlations. However, our error analysis reveals that even the strongest submitted systems struggle with predicting feature values for languages where few features are known.","tags":[],"title":"SIGTYP 2020 Shared Task: Prediction of Typological Features","type":"publication"},{"authors":["Lukas Muttenthaler","Isabelle Augenstein","Johannes Bjerva"],"categories":null,"content":"","date":1599955200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1599955200,"objectID":"6bba51243ced409e5eb588fde4c04be3","permalink":"https://copenlu.github.io/publication/2020_blackboxnlp_muttenthaler/","publishdate":"2020-09-13T00:00:00Z","relpermalink":"/publication/2020_blackboxnlp_muttenthaler/","section":"publication","summary":"It is challenging to automatically evaluate the answer of a QA model at inference time. Although many models provide confidence scores, and simple heuristics can go a long way towards indicating answer correctness, such measures are heavily dataset-dependent and are unlikely to generalise. In this work, we begin by investigating the hidden representations of questions, answers, and contexts in transformer-based QA architectures. We observe a consistent pattern in the answer representations, which we show can be used to automatically evaluate whether or not a predicted answer span is correct. Our method does not require any labelled data and outperforms strong heuristic baselines, across 2 datasets and 7 domains. We are able to predict whether or not a model's answer is correct with 91.37% accuracy on SQuAD, and 80.7% accuracy on SubjQA. We expect that this method will have broad applications, e.g., in semi-automatic development of QA datasets.","tags":[],"title":"Unsupervised Evaluation for Question Answering with Transformers","type":"publication"},{"authors":null,"categories":null,"content":"Learning with limited labelled data is useful for small domains or languages with little resources. Methods we research to mitigate problems arising in these contexts include multi-task learning, weakly supervised and zero-shot learning.\nThis is a cross-cutting theme in most of our research. Two previous projects specifically addressing this are Multi3Generation and Andreas Nugaard Holm\u0026rsquo;s industrial PhD project with BASE Life Science, supported by Innovation Fund Denmark.\nMulti3Generation is a COST Action that funds collaboration of researchers in Europe and abroad. The project is coordinated by Isabelle Augenstein, and its goals are to study language generation using multi-task, multilingual and multi-modal signals.\nAndreas Nugaard Holm\u0026rsquo;s industrial PhD project focuses on transfer learning and domain adaptation for scientific text.\n","date":1598832000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1598832000,"objectID":"673da02c015e56b1ada0bfcd1fc5431c","permalink":"https://copenlu.github.io/project/limited-data/","publishdate":"2020-08-31T00:00:00Z","relpermalink":"/project/limited-data/","section":"project","summary":"Learning with limited labelled data, including multi-task learning, weakly supervised and zero-shot learning","tags":["lld","limited-data"],"title":"Learning with Limited Labelled Data","type":"project"},{"authors":null,"categories":null,"content":"We are interested in studying method to determine the attitude expressed in a text towards a topic (stance detection), such as determining if a tweet expresses a positive, negative or neutral stance towards a political entity. One additional challenge we are exploring is stance detection in a conversational context, where the stance depends on the context of the conversation. Fact checking using textual data can be framed very similarly, namely as if an evidence document agrees with, disagrees with or is topically unrelated to a headline or claim.\nWe are researching the relationship between attitudes towards entities on social media and gender bias as part of a DFF Project 1.\nMoreover, we are researching methods for explainable stance detection in the context of a DFF Sapere Aude Research Leader project, and explainable fact checking as part of an ERC Starting Grant project.\n","date":1598745600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1598745600,"objectID":"9b262f5564a2254298043b81d48f5aa5","permalink":"https://copenlu.github.io/project/fact-checking/","publishdate":"2020-08-30T00:00:00Z","relpermalink":"/project/fact-checking/","section":"project","summary":"Determine the attitude expressed in a text towards a topic, and use this for automatic evidence-based fact checking","tags":["nlu","fact-checking"],"title":"Stance Detection and Fact Checking","type":"project"},{"authors":["Zeerak Waseem","Smarika Lulz","Joachim Bingel","Isabelle Augenstein"],"categories":null,"content":"","date":1592784000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1592784000,"objectID":"5c2333f2257b630027496010d4402cb0","permalink":"https://copenlu.github.io/publication/2020_openreview_waseem/","publishdate":"2020-06-22T00:00:00Z","relpermalink":"/publication/2020_openreview_waseem/","section":"publication","summary":"Machine Learning (ML) seeks to identify and encode bodies of knowledge within provided datasets. However, data encodes subjective content, which determines the possible outcomes of the models trained on it. Because such subjectivity potentially enables marginalisation of parts of society, it is termed (social) `bias' and sought to be removed. In this opinion paper, we contextualise this discourse of bias in the ML community against the subjective choices in the development process. Through a consideration of how choices in data and model development construct subjectivity, or biases that are represented in a model, we argue that addressing and mitigating biases is near-impossible. This is because both data and ML models are objects for which meaning is made in each step of the development pipeline, from data selection over annotation to model training and analysis. Accordingly, we find the prevalent discourse of bias limiting in its ability to address social marginalisation. We recommend to be conscientious of this, and to accept that de-biasing methods only correct for a fraction of biases.","tags":[],"title":"Disembodied Machine Learning: On the Illusion of Objectivity in NLP","type":"publication"},{"authors":["Nils Rethmeier","Vageesh Kumar Saxena","Isabelle Augenstein"],"categories":null,"content":"","date":1589932800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1589932800,"objectID":"54f14e8b8547114f3515bff64e31ddfd","permalink":"https://copenlu.github.io/publication/2020_uai_rethmeier/","publishdate":"2020-05-20T00:00:00Z","relpermalink":"/publication/2020_uai_rethmeier/","section":"publication","summary":"While state-of-the-art NLP explainability (XAI) methods focus on supervised, per-instance end or diagnostic probing task evaluation[4, 2, 10], this is insufficient to interpret and quantify model knowledge transfer during (un-) supervised training. By instead expressing each neuron as an interpretable token-activation distribution collected over many instances, one can quantify and guide visual exploration of neuron-knowledge change between model training stages to analyze transfer beyond probing tasks and the per-instance level. This allows one to analyze: (RQ1) how neurons abstract knowledge during unsupervised pretraining; (RQ2) how pretrained neurons zero-shot transfer knowledge to new domain data; and (RQ3) how supervised tasks reorder pretrained neuron knowledge abstractions. Since the meaningfulness of XAI methods is hard to quantify [11, 4], we analyze three example learning setups (RQ1-3) to empirically verify that our method (TX-Ray): identifies transfer (ir-)relevant neurons for pruning (RQ3), and that its transfer metrics coincide with traditional measures like perplexity (RQ1). We also find, that TX-Ray guided pruning of supervision (ir-)relevant neuron-knowledge (RQ3) can identify `lottery ticket'-like [9, 40] neurons that drive model performance and robustness. Upon inspecting pruned neurons, we find that task-relevant neuron-knowledge (`tickets'), appear (over-)fit, while task-irrelevant neurons lower overfitting, i.e. TX-Ray identifies neurons that generalize, transfer or specialize model-knowledge [25]. Finally, through RQ1-3, we find that TX-Ray helps to explore and quantify dynamics of (continual) knowledge transfer and that it can shed light on neuron-knowledge specialization and generalization, to complement (costly) supervised probing task procurement and established `summary' statistics like perplexity, ROC or F scores.","tags":[],"title":"TX-Ray: Quantifying and Explaining Model-Knowledge Transfer in (Un-)Supervised NLP","type":"publication"},{"authors":[],"categories":null,"content":"2 papers by CopeNLU authors are accepted to appear at ACL 2020. One paper is on explainable fact checking, providing the first study of how fact checking explanations can be generated automatically based on claim content, and how this task can be modelled jointly with veracity prediction; whereas the other one is on script conversion, proposing a novel Chinese character conversion model that can disambiguate between mappings and convert between Chinese scripts.\nGenerating Fact Checking Explanations. Pepa Atanasova, Jakob Grue Simonsen, Christina Lioma, Isabelle Augenstein.\n2kenize: Tying Subword Sequences for Chinese Script Conversion. Pranav A, Isabelle Augenstein.\n","date":1586044800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1586044800,"objectID":"1b6d2d8c54eac58bda836cb7a734f04a","permalink":"https://copenlu.github.io/talk/2020_04_acl/","publishdate":"2020-04-05T00:00:00Z","relpermalink":"/talk/2020_04_acl/","section":"talk","summary":"2 papers by CopeNLU authors are accepted to appear at ACL 2020. One paper is on explainable fact checking, providing the first study of how fact checking explanations can be generated automatically based on claim content, and how this task can be modelled jointly with veracity prediction; whereas the other one is on script conversion, proposing a novel Chinese character conversion model that can disambiguate between mappings and convert between Chinese scripts.","tags":["multilingual-learning","fact-checking"],"title":"2 Papers Accepted to ACL 2020","type":"talk"},{"authors":["Pepa Atanasova","Jakob Grue Simonsen","Christina Lioma","Isabelle Augenstein"],"categories":null,"content":"","date":1585958400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1585958400,"objectID":"80b48486085eda6938691ed579137fa5","permalink":"https://copenlu.github.io/publication/2020_acl_atanasova/","publishdate":"2020-04-04T00:00:00Z","relpermalink":"/publication/2020_acl_atanasova/","section":"publication","summary":"Most existing work on automated fact checking is concerned with predicting the veracity of claims based on metadata, social network spread, language used in claims, and, more recently, evidence supporting or denying claims. A crucial piece of the puzzle that is still missing is to understand how to automate the most elaborate part of the process -- generating justifications for verdicts on claims. This paper provides the first study of how these explanations can be generated automatically based on available claim context, and how this task can be modeled jointly with veracity prediction. Our results indicate that optimising both objectives at the same time, rather than training them separately, improves the performance of a fact checking system. The results of a manual evaluation further suggest that the informativeness, coverage and overall quality of the generated explanations are also improved in the multi-task model.","tags":[],"title":"Generating Fact Checking Explanations","type":"publication"},{"authors":["Pranav A","Isabelle Augenstein"],"categories":null,"content":"","date":1585872000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1585872000,"objectID":"6cc5bf28f7b94880f51ac56fe9e105d9","permalink":"https://copenlu.github.io/publication/2020_acl_a/","publishdate":"2020-04-03T00:00:00Z","relpermalink":"/publication/2020_acl_a/","section":"publication","summary":"Simplified Chinese to Traditional Chinese script conversion is a common preprocessing step in Chinese NLP. Despite this, current approaches have poor performance because they do not take into account that a simplified Chinese character can correspond to multiple traditional characters. Here, we propose a novel model that can disambiguate between mappings and convert between the two scripts. The model is based on subword segmentation, two language models, as well as a method for mapping between subword sequences. We further construct benchmark datasets for topic classification and script conversion. Our proposed method outperforms previous Chinese Character Conversion approaches by 6 points in accuracy. These results are further confirmed in a downstream application, where 2kenize is used to preprocess text for topic classification. An error analysis reveals that our method's particular strengths are in dealing with code mixing and named entities.","tags":[],"title":"2kenize: Tying Subword Sequences for Chinese Script Conversion","type":"publication"},{"authors":["Alok Debnath","Nikhil Pinnaparaju","Manish Shrivastava","Vasudeva Varma","Isabelle Augenstein"],"categories":null,"content":"","date":1580515200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1580515200,"objectID":"d6e93f486b225a90a2c12bd2107b3735","permalink":"https://copenlu.github.io/publication/2020_socialnlp_debnath/","publishdate":"2020-02-01T00:00:00Z","relpermalink":"/publication/2020_socialnlp_debnath/","section":"publication","summary":"In this paper, we extend the task of semantic textual similarity to include sentences which contain emojis. Emojis are ubiquitous on social media today, but are often removed in the pre-processing stage of curating datasets for NLP tasks. In this paper, we qualitatively ascertain the amount of semantic information lost by discounting emojis, as well as show a mechanism of accounting for emojis in a semantic task. We create a sentence similarity dataset of 4000 pairs of tweets with emojis, which have been annotated for relatedness. The corpus contains tweets curated based on common topic as well as by replacement of emojis. The latter was done to analyze the difference in semantics associated with different emojis. We aim to provide an understanding of the information lost by removing emojis by providing a qualitative analysis of the dataset. We also aim to present a method of using both emojis and words for downstream NLP tasks beyond sentiment analysis.","tags":[],"title":"Semantic Textual Similarity of Sentences with Emojis","type":"publication"},{"authors":["Johannes Bjerva","Wouter Kouw","Isabelle Augenstein"],"categories":null,"content":"","date":1573430400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1573430400,"objectID":"44ab0384bb61b068fbc09fc327cdd196","permalink":"https://copenlu.github.io/publication/2020_aaai_bjerva_kouw/","publishdate":"2019-11-11T00:00:00Z","relpermalink":"/publication/2020_aaai_bjerva_kouw/","section":"publication","summary":"Language evolves over time in many ways relevant to natural language processing tasks. For example, recent occurrences of tokens 'BERT' and 'ELMO' in publications refer to neural network architectures rather than persons. This type of temporal signal is typically overlooked, but is important if one aims to deploy a machine learning model over an extended period of time. In particular, language evolution causes data drift between time-steps in sequential decision-making tasks. Examples of such tasks include prediction of paper acceptance for yearly conferences (regular intervals) or author stance prediction for rumours on Twitter (irregular intervals). Inspired by successes in computer vision, we tackle data drift by sequentially aligning learned representations. We evaluate on three challenging tasks varying in terms of time-scales, linguistic units, and domains. These tasks show our method outperforming several strong baselines, including using all available data. We argue that, due to its low computational expense, sequential alignment is a practical solution to dealing with language evolution.","tags":[],"title":"Back to the Future -- Sequential Alignment of Text Representations","type":"publication"},{"authors":[],"categories":null,"content":"4 papers by CopeNLU authors are to be presented at EMNLP 2019 and co-located events, on fact checking and disinformation, as well as on multi-task and multi-lingual learning.\nMultiFC: A Real-World Multi-Domain Dataset for Evidence-Based Fact Checking of Claims. Isabelle Augenstein, Christina Lioma, Dongsheng Wang, Lucas Chaves Lima, Casper Hansen, Christian Hansen, Jakob Grue Simonsen. In Proceedings of EMNLP-IJCNLP 2019.\nMapping (Dis-)Information Flow about the MH17 Plane Crash. Mareike Hartmann, Yevgeniy Golovchenko, Isabelle Augenstein. In Proceedings of the 2019 Workshop on NLP4IF: censorship, disinformation, and propaganda (NLP4IF at EMNLP-IJCNLP 2019).\nX-WikiRE: A Large, Multilingual Resource for Relation Extraction as Machine Comprehension. Mostafa Abdou, Cezar Sas, Rahul Aralikatte, Isabelle Augenstein, Anders Søgaard. In Proceedings of the 2nd Workshop on Deep Learning for Low-Resource NLP (DeepLo at EMNLP-IJCNLP 2019).\nTransductive Auxiliary Task Self-Training for Neural Multi-Task Models. Johannes Bjerva, Katharina Kann, Isabelle Augenstein. In Proceedings of the 2nd Workshop on Deep Learning for Low-Resource NLP (DeepLo at EMNLP-IJCNLP 2019).\n","date":1572566400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1572566400,"objectID":"66a85a74e8c590681d82ae2d34cbf254","permalink":"https://copenlu.github.io/talk/2019_11_emnlp/","publishdate":"2019-11-01T00:00:00Z","relpermalink":"/talk/2019_11_emnlp/","section":"talk","summary":"4 papers by CopeNLU authors are to be presented at EMNLP 2019 and co-located events, on fact checking and disinformation, as well as on multi-task and multi-lingual learning.\nMultiFC: A Real-World Multi-Domain Dataset for Evidence-Based Fact Checking of Claims. Isabelle Augenstein, Christina Lioma, Dongsheng Wang, Lucas Chaves Lima, Casper Hansen, Christian Hansen, Jakob Grue Simonsen. In Proceedings of EMNLP-IJCNLP 2019.\nMapping (Dis-)Information Flow about the MH17 Plane Crash. Mareike Hartmann, Yevgeniy Golovchenko, Isabelle Augenstein.","tags":[],"title":"4 papers to be presented at EMNLP 2019","type":"talk"},{"authors":null,"categories":null,"content":"We are interested in studying method to explain relationships between inputs and outputs of black-box machine learning models, particularly in the context of challenging NLU tasks such as fact checking.\nWe are researching methods for explainable stance detection in the context of a DFF Sapere Aude Research Leader project, and explainable fact checking as part of an ERC Starting Grant project.\nMoreover, we are investigating fair and accountable Natural Language Processing methods to understand what influences the employer images that organisations project in job ads, as part of a Carlsberg-funded project.\n","date":1569888000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1569888000,"objectID":"d359f44e3a75e0b3491e12fadee50064","permalink":"https://copenlu.github.io/project/explainability/","publishdate":"2019-10-01T00:00:00Z","relpermalink":"/project/explainability/","section":"project","summary":"Explaining relationships between inputs and outputs of black-box machine learning models","tags":["nlu"],"title":"Explainable Machine Learning","type":"project"},{"authors":["Isabelle Augenstein","Christina Lioma","Dongsheng Wang","Lucas Chaves Lima","Casper Hansen","Christian Hansen","Jakob Grue Simonsen"],"categories":null,"content":"","date":1565740800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1565740800,"objectID":"4be9f04cad3214b7e117e486001cf0b2","permalink":"https://copenlu.github.io/publication/2019_emnlp_augenstein/","publishdate":"2019-08-14T00:00:00Z","relpermalink":"/publication/2019_emnlp_augenstein/","section":"publication","summary":"We contribute the largest publicly available dataset of naturally occurring factual claims for the purpose of automatic claim verification. It is collected from 26 fact checking websites in English, paired with textual sources and rich metadata, and labelled for veracity by human expert journalists. We present an in-depth analysis of the dataset, highlighting characteristics and challenges. Further, we present results for automatic veracity prediction, both with established baselines and with a novel method for joint ranking of evidence pages and predicting veracity that outperforms all baselines. Significant performance increases are achieved by encoding evidence, and by modelling metadata. Our best-performing model achieves a Macro F1 of 49.2%, showing that this is a challenging testbed for claim veracity prediction.","tags":[],"title":"MultiFC: A Real-World Multi-Domain Dataset for Evidence-Based Fact Checking of Claims","type":"publication"},{"authors":["Mareike Hartmann","Yevgeniy Golovchenko","Isabelle Augenstein"],"categories":null,"content":"","date":1565654400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1565654400,"objectID":"7f017aedb120ed83bd90a0ce329a5549","permalink":"https://copenlu.github.io/publication/2019_nlp4if_hartmann/","publishdate":"2019-08-13T00:00:00Z","relpermalink":"/publication/2019_nlp4if_hartmann/","section":"publication","summary":"Digital media enables not only fast sharing of information, but also disinformation. One prominent case of an event leading to circulation of disinformation on social media is the MH17 plane crash. Studies analysing the spread of information about this event on Twitter have focused on small, manually annotated datasets, or used proxys for data annotation. In this work, we examine to what extent text classifiers can be used to label data for subsequent content analysis, in particular we focus on predicting pro-Russian and pro-Ukrainian Twitter content related to the MH17 plane crash. Even though we find that a neural classifier improves over a hashtag based baseline, labeling pro-Russian and pro-Ukrainian content with high precision remains a challenging problem. We provide an error analysis underlining the difficulty of the task and identify factors that might help improve classification in future work. Finally, we show how the classifier can facilitate the annotation task for human annotators.","tags":[],"title":"Mapping (Dis-)Information Flow about the MH17 Plane Crash","type":"publication"},{"authors":["Joachim Bingel","Victor Petrén Bach Hansen","Ana Valeria Gonzalez","Paweł Budzianowski","Isabelle Augenstein","Anders Søgaard"],"categories":null,"content":"","date":1565568000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1565568000,"objectID":"e60966e2b6babcb61c965c98311808af","permalink":"https://copenlu.github.io/publication/2019_convai_bingel/","publishdate":"2019-08-12T00:00:00Z","relpermalink":"/publication/2019_convai_bingel/","section":"publication","summary":"Task oriented dialogue systems rely heavily on specialized dialogue state tracking (DST) modules for dynamically predicting user intent throughout the conversation. State-of-the-art DST models are typically trained in a supervised manner from manual annotations at the turn level. However, these annotations are costly to obtain, which makes it difficult to create accurate dialogue systems for new domains. To address these limitations, we propose a method, based on reinforcement learning, for transferring DST models to new domains without turn-level supervision. Across several domains, our experiments show that this method quickly adapts off-the-shelf models to new domains and performs on par with models trained with turn-level supervision. We also show our method can improve models trained using turn-level supervision by subsequent fine-tuning optimization toward dialog-level rewards.","tags":[],"title":"Domain Transfer in Dialogue Systems without Turn-Level Supervision","type":"publication"},{"authors":["Ana Valeria Gonzalez","Isabelle Augenstein","Anders Søgaard"],"categories":null,"content":"","date":1565568000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1565568000,"objectID":"5ffe12cab0c4cb4dac164965635cb813","permalink":"https://copenlu.github.io/publication/2019_convai_gonzalez/","publishdate":"2019-08-12T00:00:00Z","relpermalink":"/publication/2019_convai_gonzalez/","section":"publication","summary":"Task oriented dialogue systems rely heavily on specialized dialogue state tracking (DST) modules for dynamically predicting user intent throughout the conversation. State-of-the-art DST models are typically trained in a supervised manner from manual annotations at the turn level. However, these annotations are costly to obtain, which makes it difficult to create accurate dialogue systems for new domains. To address these limitations, we propose a method, based on reinforcement learning, for transferring DST models to new domains without turn-level supervision. Across several domains, our experiments show that this method quickly adapts off-the-shelf models to new domains and performs on par with models trained with turn-level supervision. We also show our method can improve models trained using turn-level supervision by subsequent fine-tuning optimization toward dialog-level rewards.","tags":[],"title":"Retrieval-Based Goal-Oriented Dialogue Generation","type":"publication"},{"authors":["Mostafa Abdou","Cezar Sas","Rahul Aralikatte","Isabelle Augenstein","Anders Søgaard"],"categories":null,"content":"","date":1565568000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1565568000,"objectID":"2a01314b332ff8ab8508ff83d8cb49c9","permalink":"https://copenlu.github.io/publication/2019_deeplo_abdou/","publishdate":"2019-08-12T00:00:00Z","relpermalink":"/publication/2019_deeplo_abdou/","section":"publication","summary":"Although the vast majority of knowledge bases KBs are heavily biased towards English, Wikipedias do cover very different topics in different languages. Exploiting this, we introduce a new multilingual dataset (X-WikiRE), framing relation extraction as a multilingual machine reading problem. We show that by leveraging this resource it is possible to robustly transfer models cross-lingually and that multilingual support significantly improves (zero-shot) relation extraction, enabling the population of low-resourced KBs from their well-populated counterparts.","tags":[],"title":"X-WikiRE: A Large, Multilingual Resource for Relation Extraction as Machine Comprehension","type":"publication"},{"authors":["Johannes Bjerva","Katharina Kann","Isabelle Augenstein"],"categories":null,"content":"","date":1565481600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1565481600,"objectID":"a5054ee59833f81fcfebde72b86819ad","permalink":"https://copenlu.github.io/publication/2019_deeplo_bjerva/","publishdate":"2019-08-11T00:00:00Z","relpermalink":"/publication/2019_deeplo_bjerva/","section":"publication","summary":"Multi-task learning and self-training are two common ways to improve a machine learning model's performance in settings with limited training data. Drawing heavily on ideas from those two approaches, we suggest transductive auxiliary task self-training: training a multi-task model on (i) a combination of main and auxiliary task training data, and (ii) test instances with auxiliary task labels which a single-task version of the model has previously generated. We perform extensive experiments on 86 combinations of languages and tasks. Our results are that, on average, transductive auxiliary task self-training improves absolute accuracy by up to 9.56% over the pure multi-task model for dependency relation tagging and by up to 13.03% for semantic tagging.","tags":[],"title":"Transductive Auxiliary Task Self-Training for Neural Multi-Task Models","type":"publication"},{"authors":[],"categories":null,"content":"2 papers by CopeNLU authors are accepted to appear at ACL 2019. One paper is on uncovering probabilistic implications in typological knowledge bases, following up from our NAACL 2019 paper on generative linguistic typology; whereas the other one is on unsupervised discovery of gendered language, utilising the multi-view autoencoder introduced in our NAACL 219 paper.\nUncovering Probabilistic Implications in Typological Knowledge Bases. Johannes Bjerva, Yova Kementchedjhieva, Ryan Cotterell, Isabelle Augenstein.\nUnsupervised Discovery of Gendered Language through Latent-Variable Modeling. Alexander Hoyle, Lawrence Wolf-Sonkin, Hanna Wallach, Isabelle Augenstein, Ryan Cotterell.\n","date":1557792000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1557792000,"objectID":"99bb0884709b5538d1fdb51c792a32c5","permalink":"https://copenlu.github.io/talk/2019_05_acl/","publishdate":"2019-05-14T00:00:00Z","relpermalink":"/talk/2019_05_acl/","section":"talk","summary":"2 papers by CopeNLU authors are accepted to appear at ACL 2019. One paper is on uncovering probabilistic implications in typological knowledge bases, following up from our NAACL 2019 paper on generative linguistic typology; whereas the other one is on unsupervised discovery of gendered language, utilising the multi-view autoencoder introduced in our NAACL 219 paper.\nUncovering Probabilistic Implications in Typological Knowledge Bases. Johannes Bjerva, Yova Kementchedjhieva, Ryan Cotterell, Isabelle Augenstein.","tags":[],"title":"2 Papers Accepted to ACL 2019","type":"talk"},{"authors":["Johannes Bjerva","Yova Kementchedjhieva","Ryan Cotterell","Isabelle Augenstein"],"categories":null,"content":"","date":1557792000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1557792000,"objectID":"0b58a94448025cc2412612d8fde6f580","permalink":"https://copenlu.github.io/publication/2019_acl_bjerva/","publishdate":"2019-05-14T00:00:00Z","relpermalink":"/publication/2019_acl_bjerva/","section":"publication","summary":"The study of linguistic typology is rooted in the implications we find between linguistic features, such as the fact that languages with object-verb word ordering tend to have postpositions. Uncovering such implications typically amounts to time-consuming manual processing by trained and experienced linguists, which potentially leaves key linguistic universals unexplored. In this paper, we present a computational model which successfully identifies known universals, including Greenberg universals, but also uncovers new ones, worthy of further linguistic investigation. Our approach outperforms baselines previously used for this problem, as well as a strong baseline from knowledge base population.","tags":[],"title":"Uncovering Probabilistic Implications in Typological Knowledge Bases","type":"publication"},{"authors":["Alexander Hoyle","Lawrence Wolf-Sonkin","Hanna Wallach","Isabelle Augenstein","Ryan Cotterell"],"categories":null,"content":"","date":1557792000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1557792000,"objectID":"360c04b943d2ac91d6031552ad5845fb","permalink":"https://copenlu.github.io/publication/2019_acl_hoyle/","publishdate":"2019-05-14T00:00:00Z","relpermalink":"/publication/2019_acl_hoyle/","section":"publication","summary":"Studying to what degree the language we use is gender-specific has long been an area of interest in socio-linguistics. Studies have explored, for instance, the speech of male and female characters in film, or gendered language used when describing male versus female politicians. In this paper, we aim not to merely analyze this phenomenon qualitatively, but instead to quantify the degree to which language used to describe men and women is different, and moreover, different in a positive or negative way. We propose a novel generative latent-variable model, to be trained on a large corpus, that jointly represents adjective (or verb) choice with its sentiment given the natural gender of the head (or dependent) noun. We find that there are significant differences between how male and female nouns are described, which are in line with common gender stereotypes: Positive adjectives used to describe women are more likely to be related to a person's body than adjectives describing men.","tags":[],"title":"Unsupervised Discovery of Gendered Language through Latent-Variable Modeling","type":"publication"},{"authors":["Isabelle Augenstein","Spandana Gella","Sebastian Ruder","Katharina Kann","Burcu Can","Alexis Conneau","Johannes Welbl","Xian Ren","Marek Rei"],"categories":null,"content":"","date":1555718400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1555718400,"objectID":"c7686844bbcb0a906aeef2426354f008","permalink":"https://copenlu.github.io/publication/2019_repl4nlp_augenstein/","publishdate":"2019-04-20T00:00:00Z","relpermalink":"/publication/2019_repl4nlp_augenstein/","section":"publication","summary":"The 4th Workshop on Representation Learning for NLP (RepL4NLP) will be hosted by ACL 2019 and held on 2 August 2019. The workshop is being organised by Isabelle Augenstein, Spandana Gella, Sebastian Ruder, Katharina Kann, Burcu Can, Alexis Conneau, Johannes Welbl, Xian Ren and Marek Rei; and advised by Kyunghyun Cho, Edward Grefenstette, Karl Moritz Hermann, Chris Dyer and Laura Rimell. The workshop is organised by the ACL Special Interest Group on Representation Learning (SIGREP) and receives generous sponsorship from Facebook AI Research, Amazon, and Naver. The 4th Workshop on Representation Learning for NLP aims to continue the success of the 1st Workshop on Representation Learning for NLP (about 50 submissions and over 250 attendees; second most attended collocated event at ACL’16 after WMT), 2nd Workshop on Representation Learning for NLP and 3rd Workshop on Representation Learning for NLP. The workshop was introduced as a synthesis of several years of independent *CL workshops focusing on vector space models of meaning, compositionality, and the application of deep neural networks and spectral methods to NLP. It provides a forum for discussing recent advances on these topics, as well as future research directions in linguistically motivated vector-based models in NLP.","tags":[],"title":"Proceedings of The Fourth Workshop on Representation Learning for NLP","type":"publication"},{"authors":[],"categories":null,"content":"3 papers by CopeNLU authors are accepted to appear at NAACL 2019. Topics span from population of typological knowledge bases and weak supervision from disparate lexica to frame detection in online fora.\nA Probabilistic Generative Model of Linguistic Typology. Johannes Bjerva, Yova Kementchedjhieva, Ryan Cotterell, Isabelle Augenstein.\nCombining Disparate Sentiment Lexica with a Multi-View Variational Autoencoder. Alexander Hoyle, Lawrence Wolf-Sonkin, Hanna Wallach, Ryan Cotterell, Isabelle Augenstein.\nIssue Framing in Online Discussion Fora. Mareike Hartmann, Tallulah Jansen, Isabelle Augenstein, Anders Søgaard.\n","date":1550793600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1550793600,"objectID":"cadc8f4d9fa61b8034486ed3ea2a336a","permalink":"https://copenlu.github.io/talk/2019_02_naacl/","publishdate":"2019-02-22T00:00:00Z","relpermalink":"/talk/2019_02_naacl/","section":"talk","summary":"3 papers by CopeNLU authors are accepted to appear at NAACL 2019. Topics span from population of typological knowledge bases and weak supervision from disparate lexica to frame detection in online fora.\nA Probabilistic Generative Model of Linguistic Typology. Johannes Bjerva, Yova Kementchedjhieva, Ryan Cotterell, Isabelle Augenstein.\nCombining Disparate Sentiment Lexica with a Multi-View Variational Autoencoder. Alexander Hoyle, Lawrence Wolf-Sonkin, Hanna Wallach, Ryan Cotterell, Isabelle Augenstein.\nIssue Framing in Online Discussion Fora.","tags":[],"title":"3 Papers Accepted to NAACL 2019","type":"talk"},{"authors":["Johannes Bjerva","Yova Kementchedjhieva","Ryan Cotterell","Isabelle Augenstein"],"categories":null,"content":"","date":1550793600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1550793600,"objectID":"568d0fcc90dddd21ecb612fb4a66e564","permalink":"https://copenlu.github.io/publication/2019_naacl_bjerva/","publishdate":"2019-02-22T00:00:00Z","relpermalink":"/publication/2019_naacl_bjerva/","section":"publication","summary":"In the Principles and Parameters framework, the structural features of languages depend on parameters that may be toggled on or off, with a single parameter often dictating the status of multiple features. The implied covariance between features inspires our probabilisation of this line of linguistic inquiry---we develop a generative model of language based on exponential-family matrix factorisation. By modelling all languages and features within the same architecture, we show how structural similarities between languages can be exploited to predict typological features with near-perfect accuracy, besting several baselines on the task of predicting held-out features. Furthermore, we show that language representations pre-trained on monolingual text allow for generalisation to unobserved languages. This finding has clear practical and also theoretical implications: the results confirm what linguists have hypothesised, i.e. that there are significant correlations between typological features and languages.","tags":[],"title":"A Probabilistic Generative Model of Linguistic Typology","type":"publication"},{"authors":["Alexander Hoyle","Lawrence Wolf-Sonkin","Hanna Wallach","Ryan Cotterell","Isabelle Augenstein"],"categories":null,"content":"","date":1550793600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1550793600,"objectID":"bb5d04545be8a1e5502706ae3d78a674","permalink":"https://copenlu.github.io/publication/2019_naacl_hoyle/","publishdate":"2019-02-22T00:00:00Z","relpermalink":"/publication/2019_naacl_hoyle/","section":"publication","summary":"When assigning quantitative labels to a dataset, different methodologies may rely on different scales. In particular, when assigning polarities to words in a sentiment lexicon, annotators may use binary, categorical, or continuous labels. Naturally, it is of interest to unify these labels from disparate scales to both achieve maximal coverage over words and to create a single, more robust sentiment lexicon while retaining scale coherence. We introduce a generative model of sentiment lexica to combine disparate scales into a common latent representation. We realize this model with a novel multi-view variational autoencoder (VAE), called SentiVAE. We evaluate our approach via a downstream text classification task involving nine English-Language sentiment analysis datasets; our representation outperforms six individual sentiment lexica, as well as a straightforward combination thereof.","tags":[],"title":"Combining Sentiment Lexica with a Multi-View Variational Autoencoder","type":"publication"},{"authors":["Mareike Hartmann","Tallulah Jansen","Isabelle Augenstein","Anders Søgaard"],"categories":null,"content":"","date":1550707200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1550707200,"objectID":"e0a4b15989b58d07523ecd5a988a8020","permalink":"https://copenlu.github.io/publication/2019_naacl_hartmann/","publishdate":"2019-02-21T00:00:00Z","relpermalink":"/publication/2019_naacl_hartmann/","section":"publication","summary":"In online discussion fora, speakers often make arguments for or against something, say birth control, by highlighting certain aspects of the topic. In social science, this is referred to as issue framing. In this paper, we introduce a new issue frame annotated corpus of online discussions. We explore to what extent models trained to detect issue frames in newswire and social media can be transferred to the domain of discussion fora, using a combination of multi-task and adversarial training, assuming only unlabeled training data in the target domain.","tags":[],"title":"Issue Framing in Online Discussion Fora","type":"publication"},{"authors":["Admin"],"categories":null,"content":" The University of Copenhagen is a great place if you\u0026rsquo;re both interested in high-quality NLP research (we\u0026rsquo;re ranked 2nd in Europe and 19th in the world on CSRankings for NLP) and a high quality of life (Denmark has consistently been ranked one of the three happiest countries in the world, and ranks equally highly on the quality of life). In addition to this, Copenhagen is a lively, bustling capital, while still being small enough that one can cycle nearly everwhere within a short amount of time. Intrigued? Read on!\n Life in Copenhagen NLP Research at the University of Copenhagen Applying for a PhD Doing a Postdoc CopeNLU Group Visiting the Group  Life in Copenhagen Copenhagen is lively and vibrant, yet comfortably-sized capital city by the North Sea on the border to Sweden with an oceanic climate. As it is located in Scandinavia, many of the corresponding stereotypes apply. Healthcare is socialised, public transport is cheap, working hours are short, taxes are high, but so are incomes. Denmark has been voted the happiest country in the world in 2016 and has kept a spot in the top three since, becoming a world-wide cultural phenomenon with many books published on the topic (some personal recommendations: \u0026ldquo;The Year of Living Danishly\u0026rdquo; by Helen Russell, and \u0026ldquo;The Almost Nearly Perfect People\u0026rdquo; by Michael Booth). Popular exports are hygge, beer and pork products. It doesn\u0026rsquo;t stop at traditional Danish food though as Copenhagen is also a leading gastro destination, being home to numerous fine dining restaurants.\nLiving in Copenhagen itself is a very relaxing experience. The city is very spacious compared to other capitals (think: London, New York, Tokyo), boasting large and open public spaces. Most people cycle to work and have a short commute, as it is easily possible to afford a city centre flat on an average salary. There are many things to do and explore in Copenhagen, from cozy bars and cafés to museums, the hipster district Vesterbro, the free town Christiania, and much more, as Copenhagen has incidentally also been voted the number 1 city to visit by Lonely Planet. The Copenhagen tourist board as well as the EMNLP 2017 local guide are excellent starting points for finding out more about what to do.\nFor practical advice about moving to Copenhagen, the International Staff Mobility Department at the University of Copenhagen provides information on topics ranging from housing to language classes. Speaking of, language classes are free for all new residents in Denmark \u0026ndash; not that one would need to speak Danish in order to get by in Copenhagen, as it is the capital city with the highest English profiency in the world in a country where English is not the first official language.\nLastly, another reason to move to Copenhagen is that academic salaries compare very favourably to other countries \u0026ndash; e.g. PhD salaries here are the highest in Europe \u0026ndash; meaning it is possible to live in Denmark comfortably as a researcher. An up-to-date overview of salaries by job category at the University of Copenhagen can also be found here.\n         NLP Research at the University of Copenhagen The NLP Section at the University of Copenhagen is currently home to four faculty members and many postdocs and PhD students, who work on topics including natural language understanding, multi-lingual learning, automatic fact checking, machine translation and multi-modal learning.\nNLP at the University of Copenhagen as a whole is highly productive and internationally well-regarded. For instance, we are ranked 2nd in Europe and 19th in the world on CSRankings for NLP), and were host to EMNLP 2017.\nApplying for a PhD PhD programs at the University of Copenhagen, and in Denmark in general, are fairly compact; students are expected to submit their thesis after three years, while an extension of one year can be granted. PhD students do not have to take courses during their studies, as they are expected to have completed a Master\u0026rsquo;s degree already. There are benefits and downsides to such a program structure, of course, and a more opinionated take on this can be found in this Quora post.\nThere are generally three routes to applying for a PhD:\n applying for a fully-funded position; applying with external funding; applying for an industrial PhD.  The first option is undoubtedly the most common one. Therein, a PhD fellowship is provided through a funded project, and the position is advertised on the KU job portal. The successful candidate becomes an employee at the University of Copenhagen, and in addition a PhD candidate registered with the PhD School. Depending on the funding source, a broad research topic is either already provided, or the topic is completely open. The Pioneer Centre for AI has an annual call for PhD positions with a deadline in Spring, typically a combination of pre-defined and open topic positions, which are additionally advertised on the Pioneer Centre\u0026rsquo;s website. Available positions in our group are also always advertised on the CopeNLU website.\nThe second option requires the PhD candidate to scure external funding to cover living expenses in Denmark for a three-year period. Funding can, for instance, come from a governmental scholarship programme, or from a central PhD funding call, such as the one by the Danish Data Science Academy (DDSA). For this, the candidate should contact the potential supervisor for their support to apply to the external funding programme, and, if funding is secured, apply for admission to the PhD School afterwards.\nThe third option is something fairly special to Denmark \u0026ndash; PhD students can study towards their PhD, while working at a company at the same time roughly one day a week. These PhD projects tend to be more applied and the topic for them is defined based on an agreement between the PhD advisor and the company. Funding for such projects is either obtained with or without a candidate for the position. As with the other two options, the PhD student is enrolled at the Doctoral School.\nAdditionally, candidates can express their interest for a PhD position by applying to the ELLIS PhD Programme. ELLIS is a central recruitment vehicle for AI labs across Europe and thus not directly offer PhD funding, so a dual application for external funding is required.\nMore information on the enrollment process at the Science Faculty Doctoral School is listed here.\nDoing a Postdoc Postdoc positions are available for anything between one and four years. Applicants are expected to have submitted their PhD thesis by the time they start, but it is typically not necessary for them to have been formally awarded a PhD yet, if they have sufficient evidence that this is likely to happen in the near future.\nThe routes to getting a postdoc position are very similar to those for a PhD position:\n applying for a fully-funded position; applying with external funding; applying for an industrial PhD.  Funded postdoc positions are often advertised on the KU job portal, though 1-year postdoc contracts can be offered without open calls. The postdoc topic varies widely based on the funding source. As for PhD positions, the Pioneer Centre for AI provides funding for postdocs, see the Pioneer Centre\u0026rsquo;s website, and positions are advertised Available positions in our group are also always advertised on CopeNLU website.\nIt is, furthermore, possible to start as a postdoc with already obtained funding, e.g. individual research fellowships. The topic for those is typically open and decided by the postdoc candidate. For some fellowship schemes, applications are made jointly with the host \u0026ndash; an example or this is the Danish Data Science Academy (DDSA) postdoctoral fellowship programme. Get in touch if you are interested in this option.\nLastly, industrial postdoc programs means a researcher works both at a university and at a company, roughly one day a week. The postdoc topic is typically more application-oriented and defined based on an agreement between the advisor and the company. Funding is either agreed upon with or without a candidate for the position.\nCopeNLU Group If you arrived on this page, you\u0026rsquo;ve likely already seen the rest of the CopeNLU website. We are an active and friendly multi-cultural research group at the Computer Science department at the University of Copenhagen, interested in natural language understanding. Our group is affiliated with the NLP Section, as well as with the Pioneer Centre for AI and located in the Observatory in the Botanical Gardens in central Copenhagen, near Nørreport Station. We are a very social and collaborative group, with weekly group meetings, biweekly reading groups, one-on-one supervision meetings, regular social events, research retreats, and more. In addition to group-internal activities, there are also a number of events organised by the NLP section, Computer Science department and Pioneer Centre. This includes, among others, seminar series and workshops, PhD lunches, and social events. PhD students and postdocs are encouraged not to work on projects on their own, but rather form synergies with other group members based on common research interests. We strive to have a diverse and inclusive group atmosphere, welcoming applicants of all genders and from all cultures and backgrounds.\nCopeNLU is headed by Isabelle Augenstein and Pepa Atanasova and mainly supported by two prestigious early-career fellowships \u0026ndash; an ERC Starting Grant on \u0026lsquo;Explainable and Robust Automatic Fact Checking\u0026rsquo;, as well as the Danish equivalent of that, a DFF Sapere Aude Research Leader fellowship on \u0026lsquo;Learning to Explain Attitudes on Social Media\u0026rsquo;.\nVisiting the Group We are always open to short or longer-term visitors to the group, but do not provide internships for Bachelor\u0026rsquo;s or Master\u0026rsquo;s students. Funding for visits can typically not be provided, with the exception of invited speakers who give a talk at the Pioneer Centre or the SCIENCE AI Seminar Series.\n","date":1549065600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1699315200,"objectID":"dfe14b7cff1dd6a8394505f419e3da78","permalink":"https://copenlu.github.io/post/why-ucph/","publishdate":"2019-02-02T00:00:00Z","relpermalink":"/post/why-ucph/","section":"post","summary":"The University of Copenhagen is a great place if you're both interested in high-quality NLP research and a high quality of life.","tags":["Academic"],"title":"Interested in joining us at the University of Copenhagen?","type":"post"},{"authors":["Johannes Bjerva","Robert Östling","Maria Han Veiga","Jörg Tiedemann","Isabelle Augenstein"],"categories":null,"content":"","date":1548979200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1548979200,"objectID":"77c14eca1f87d3b410f72a8c639228e9","permalink":"https://copenlu.github.io/publication/2019_cl_bjerva/","publishdate":"2019-02-01T00:00:00Z","relpermalink":"/publication/2019_cl_bjerva/","section":"publication","summary":"A neural language model trained on a text corpus can be used to induce distributed representations of words, such that similar words end up with similar representations. If the corpus is multilingual, the same model can be used to learn distributed representations of languages, such that similar languages end up with similar representations. We show that this holds even when the multilingual corpus has been translated into English, by picking up the faint signal left by the source languages. However, just like it is a thorny problem to separate semantic from syntactic similarity in word representations, it is not obvious what type of similarity is captured by language representations. We investigate correlations and causal relationships between language representations learned from translations on one hand, and genetic, geographical, and several levels of structural similarity between languages on the other. Of these, structural similarity is found to correlate most strongly with language representation similarity, while genetic relationships---a convenient benchmark used for evaluation in previous work---appears to be a confounding factor. Apart from implications about translation effects, we see this more generally as a case where NLP and linguistic typology can interact and benefit one another.","tags":[],"title":"What do Language Representations Really Represent?","type":"publication"},{"authors":["Sebastian Ruder","Joachim Bingel","Isabelle Augenstein","Anders Søgaard"],"categories":null,"content":"","date":1548892800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1548892800,"objectID":"c4469eafb02bfe1292a0bcad561bf951","permalink":"https://copenlu.github.io/publication/2019_aaai_ruder/","publishdate":"2019-01-31T00:00:00Z","relpermalink":"/publication/2019_aaai_ruder/","section":"publication","summary":"Multi-task learning (MTL) allows deep neural networks to learn from related tasks by sharing parameters with other networks. In practice, however, MTL involves searching an enormous space of possible parameter sharing architectures to find (a) the layers or subspaces that benefit from sharing, (b) the appropriate amount of sharing, and (c) the appropriate relative weights of the different task losses. Recent work has addressed each of the above problems in isolation. In this work we present an approach that learns a latent multi-task architecture that jointly addresses (a)--(c). We present experiments on synthetic data and data from OntoNotes 5.0, including four different tasks and seven different domains. Our extension consistently outperforms previous approaches to learning latent architectures for multi-task problems and achieves up to 15% average error reductions over common approaches to MTL.","tags":[],"title":"Latent multi-task architecture learning","type":"publication"},{"authors":null,"categories":null,"content":"","date":1548004021,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1548004021,"objectID":"95e81310bc3161eac19de3719bf77d94","permalink":"https://copenlu.github.io/people/isabelle/","publishdate":"2019-01-20T17:07:01Z","relpermalink":"/people/isabelle/","section":"people","summary":"","tags":["Members"],"title":"Isabelle Augenstein","type":"people"},{"authors":null,"categories":null,"content":"","date":1547917621,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1547917621,"objectID":"9dec93e26a79a922ecb78073404d2edb","permalink":"https://copenlu.github.io/people/pepa/","publishdate":"2019-01-19T17:07:01Z","relpermalink":"/people/pepa/","section":"people","summary":"","tags":["Members"],"title":"Pepa Atanasova","type":"people"},{"authors":null,"categories":null,"content":"","date":1547226421,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1547226421,"objectID":"408e3dc13d1d78e5ca83d9a2216695c0","permalink":"https://copenlu.github.io/people/erik/","publishdate":"2019-01-11T17:07:01Z","relpermalink":"/people/erik/","section":"people","summary":"","tags":["Members"],"title":"Erik Arakelyan","type":"people"},{"authors":null,"categories":null,"content":"","date":1546967221,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1546967221,"objectID":"d1ec2f2601ce0eaf2b137ac7d1ca5185","permalink":"https://copenlu.github.io/people/nadav/","publishdate":"2019-01-08T17:07:01Z","relpermalink":"/people/nadav/","section":"people","summary":"","tags":["Members"],"title":"Nadav Borenstein","type":"people"},{"authors":null,"categories":null,"content":"","date":1546880821,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1546880821,"objectID":"8ae6452e05fcf41d5c66de3ba856c124","permalink":"https://copenlu.github.io/people/arnav/","publishdate":"2019-01-07T17:07:01Z","relpermalink":"/people/arnav/","section":"people","summary":"","tags":["Members"],"title":"Arnav Arora","type":"people"},{"authors":null,"categories":null,"content":"","date":1546535221,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1546535221,"objectID":"1f03f45240e950309cef2ad5a3512b58","permalink":"https://copenlu.github.io/people/haeun/","publishdate":"2019-01-03T17:07:01Z","relpermalink":"/people/haeun/","section":"people","summary":"","tags":["Members"],"title":"Haeun Yu","type":"people"},{"authors":null,"categories":null,"content":"","date":1546448821,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1546448821,"objectID":"8ff87588e9b4324f8857db599768f3ff","permalink":"https://copenlu.github.io/people/jingyi/","publishdate":"2019-01-02T17:07:01Z","relpermalink":"/people/jingyi/","section":"people","summary":"","tags":["Members"],"title":"Jingyi Sun","type":"people"},{"authors":null,"categories":null,"content":"","date":1546445221,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1546445221,"objectID":"b858e55d616f555a11d701e38ea5ba04","permalink":"https://copenlu.github.io/people/lovisa/","publishdate":"2019-01-02T16:07:01Z","relpermalink":"/people/lovisa/","section":"people","summary":"","tags":["Alumni"],"title":"Lovisa Hagström","type":"people"},{"authors":null,"categories":null,"content":"","date":1546445221,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1546445221,"objectID":"f07f1feaa98d654ef39b0a89d0ca4bfc","permalink":"https://copenlu.github.io/people/siddhesh/","publishdate":"2019-01-02T16:07:01Z","relpermalink":"/people/siddhesh/","section":"people","summary":"","tags":["Members"],"title":"Siddhesh Pawar","type":"people"},{"authors":null,"categories":null,"content":"","date":1546358821,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1546358821,"objectID":"77c3ada3dcc99d95c6734f9bf4ac0757","permalink":"https://copenlu.github.io/people/dustin/","publishdate":"2019-01-01T16:07:01Z","relpermalink":"/people/dustin/","section":"people","summary":"","tags":["Members"],"title":"Dustin Wright","type":"people"},{"authors":null,"categories":null,"content":"","date":1546358821,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1546358821,"objectID":"641f0b18c13d93a1a8fcc976401f533a","permalink":"https://copenlu.github.io/people/ercong/","publishdate":"2019-01-01T16:07:01Z","relpermalink":"/people/ercong/","section":"people","summary":"","tags":["Alumni"],"title":"Ercong Nie","type":"people"},{"authors":null,"categories":null,"content":"","date":1546272421,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1546272421,"objectID":"8f718922ab072dd721c37ca449bb6aa1","permalink":"https://copenlu.github.io/people/steffen/","publishdate":"2018-12-31T16:07:01Z","relpermalink":"/people/steffen/","section":"people","summary":"","tags":["Alumni"],"title":"Steffen Eger","type":"people"},{"authors":null,"categories":null,"content":"","date":1546186021,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1546186021,"objectID":"a803453705455d6de970e1b5fc68b17c","permalink":"https://copenlu.github.io/people/ran-zhang/","publishdate":"2018-12-30T16:07:01Z","relpermalink":"/people/ran-zhang/","section":"people","summary":"","tags":["Alumni"],"title":"Ran Zhang","type":"people"},{"authors":null,"categories":null,"content":"","date":1546099621,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1546099621,"objectID":"797fbbaa90845dca45593666d9bd6e6b","permalink":"https://copenlu.github.io/people/christoph/","publishdate":"2018-12-29T16:07:01Z","relpermalink":"/people/christoph/","section":"people","summary":"","tags":["Alumni"],"title":"Christoph Leitner","type":"people"},{"authors":null,"categories":null,"content":"","date":1546013221,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1546013221,"objectID":"174e877e02e95941cd8fdd298f8a71d0","permalink":"https://copenlu.github.io/people/greta/","publishdate":"2018-12-28T16:07:01Z","relpermalink":"/people/greta/","section":"people","summary":"","tags":["Members"],"title":"Greta Warren","type":"people"},{"authors":null,"categories":null,"content":"","date":1546013221,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1546013221,"objectID":"474f4d35d0df5f36da88a98a377e2812","permalink":"https://copenlu.github.io/people/shuzhou/","publishdate":"2018-12-28T16:07:01Z","relpermalink":"/people/shuzhou/","section":"people","summary":"","tags":["Alumni"],"title":"Shuzhou Yuan","type":"people"},{"authors":null,"categories":null,"content":"","date":1545840421,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1545840421,"objectID":"2dee4e9e882bcec99aa4e9012ddb5bde","permalink":"https://copenlu.github.io/people/sekh/","publishdate":"2018-12-26T16:07:01Z","relpermalink":"/people/sekh/","section":"people","summary":"","tags":["Members"],"title":"Sekh Mainul Islam","type":"people"},{"authors":null,"categories":null,"content":"","date":1545754021,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1545754021,"objectID":"3d7860ce07e01171e0d4c7c88cd3b91a","permalink":"https://copenlu.github.io/people/sarah/","publishdate":"2018-12-25T16:07:01Z","relpermalink":"/people/sarah/","section":"people","summary":"","tags":["Members"],"title":"Sarah Masud","type":"people"},{"authors":null,"categories":null,"content":"","date":1545667621,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1545667621,"objectID":"59903756c95f068514727d9ba2ba3de6","permalink":"https://copenlu.github.io/people/zain/","publishdate":"2018-12-24T16:07:01Z","relpermalink":"/people/zain/","section":"people","summary":"","tags":["Members"],"title":"Zain Muhammad Mujahid","type":"people"},{"authors":["Ana V. González-Garduño ","Isabelle Augenstein","Anders Søgaard"],"categories":null,"content":"","date":1540944000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1540944000,"objectID":"7be805f652a43c8d17f6d8767fc0c080","permalink":"https://copenlu.github.io/publication/2018_emnlp_gonzalez/","publishdate":"2018-10-31T00:00:00Z","relpermalink":"/publication/2018_emnlp_gonzalez/","section":"publication","summary":"The best systems at the SemEval-16 and SemEval-17 community question answering shared tasks -- a task that amounts to question relevancy ranking -- involve complex pipelines and manual feature engineering. Despite this, many of these still fail at beating the IR baseline, i.e., the rankings provided by Google's search engine. We present a strong baseline for question relevancy ranking by training a simple multi-task feed forward network on a bag of 14 distance measures for the input question pair. This baseline model, which is fast to train and uses only language-independent features, outperforms the best shared task systems on the task of retrieving relevant previously asked questions.","tags":[],"title":"A strong baseline for question relevancy ranking","type":"publication"},{"authors":["Miryam de Lhoneux","Johannes Bjerva","Isabelle Augenstein","Anders Søgaard"],"categories":null,"content":"","date":1540857600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1540857600,"objectID":"11a2fc1d52386ca7a3d13ca4b2fea67f","permalink":"https://copenlu.github.io/publication/2018_emnlp_de-lhoneux/","publishdate":"2018-10-30T00:00:00Z","relpermalink":"/publication/2018_emnlp_de-lhoneux/","section":"publication","summary":"Previous work has suggested that parameter sharing between transition-based neural dependency parsers for related languages can lead to better performance, but there is no consensus on what parameters to share. We present an evaluation of 27 different parameter sharing strategies across 10 languages, representing five pairs of related languages, each pair from a different language family. We find that sharing transition classifier parameters always helps, whereas the usefulness of sharing word and/or character LSTM parameters varies. Based on this result, we propose an architecture where the transition classifier is shared, and the sharing of word and character parameters is controlled by a parameter that can be tuned on validation data. This model is linguistically motivated and obtains significant improvements over a monolingually trained baseline. We also find that sharing transition classifier parameters helps when training a parser on unrelated language pairs, but we find that, in the case of unrelated languages, sharing too many parameters does not help.","tags":[],"title":"Parameter sharing between dependency parsers for related languages","type":"publication"},{"authors":["Yova Kementchedjhieva","Johannes Bjerva","Isabelle Augenstein"],"categories":null,"content":"","date":1538352000,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1538352000,"objectID":"1b3f9646468795b7081d3cd891e790cd","permalink":"https://copenlu.github.io/publication/2018_sigmorphon_kementchedjhieva/","publishdate":"2018-10-01T00:00:00Z","relpermalink":"/publication/2018_sigmorphon_kementchedjhieva/","section":"publication","summary":"This paper documents the Team Copenhagen system which placed first in the CoNLL--SIGMORPHON 2018 shared task on universal morphological reinflection, Task 2 with an overall accuracy of 49.87. Task 2 focuses on morphological inflection in context: generating an inflected word form, given the lemma of the word and the context it occurs in. Previous SIGMORPHON shared tasks have focused on context-agnostic inflection---the 'inflection in context' task was introduced this year. We approach this with an encoder-decoder architecture over character sequences with three core innovations, all contributing to an improvement in performance: (1) a wide context window; (2) a multi-task learning approach with the auxiliary task of MSD prediction; (3) training models in a multilingual fashion.","tags":[],"title":" Copenhagen at CoNLL--SIGMORPHON 2018: Multilingual Inflection in Context with Explicit Morphosyntactic Decoding","type":"publication"},{"authors":["Anders Søgaard","Miryam de Lhoneux","Isabelle Augenstein"],"categories":null,"content":"","date":1538265600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1538265600,"objectID":"f3e4355ecd1db9a9ea4bfb28d4775004","permalink":"https://copenlu.github.io/publication/2018_blackbox_soegaard/","publishdate":"2018-09-30T00:00:00Z","relpermalink":"/publication/2018_blackbox_soegaard/","section":"publication","summary":"Punctuation is a strong indicator of syntactic structure, and parsers trained on text with punctuation often rely heavily on this signal. Punctuation is a diversion, however, since human language processing does not rely on punctuation to the same extent, and in informal texts, we therefore often leave out punctuation. We also use punctuation ungrammatically for emphatic or creative purposes, or simply by mistake. We show that (a) dependency parsers are sensitive to both absence of punctuation and to alternative uses; (b) neural parsers tend to be more sensitive than vintage parsers; (c) training neural parsers without punctuation outperforms all out-of-the-box parsers across all scenarios where punctuation departs from standard punctuation. Our main experiments are on synthetically corrupted data to study the effect of punctuation in isolation and avoid potential confounds, but we also show effects on out-of-domain data.","tags":[],"title":"Nightmare at test time: How punctuation prevents parsers from generalizing","type":"publication"},{"authors":null,"categories":null,"content":" In this tutorial, I\u0026rsquo;ll share my top 10 tips for getting started with Academic:\nTip 1 \u0026hellip;\nTip 2 \u0026hellip;\n","date":1536451200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1536451200,"objectID":"6a451186c775f5f0adb3a0416d0cb711","permalink":"https://copenlu.github.io/tutorial/example/","publishdate":"2018-09-09T00:00:00Z","relpermalink":"/tutorial/example/","section":"tutorial","summary":"In this tutorial, I\u0026rsquo;ll share my top 10 tips for getting started with Academic:\nTip 1 \u0026hellip;\nTip 2 \u0026hellip;","tags":null,"title":"Example Page","type":"docs"},{"authors":["Dirk Weissenborn","Pasquale Minervini","Tim Dettmers","Isabelle Augenstein","Johannes Welbl","Tim Rocktäschel","Matko Bošnjak","Jeff Mitchell","Thomas Demeester","Pontus Stenetorp","Sebastian Riedel"],"categories":null,"content":"","date":1533081600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1533081600,"objectID":"30a8402ff65b94561c394b6a22ec12f2","permalink":"https://copenlu.github.io/publication/2018_acl_weissenborn/","publishdate":"2018-08-01T00:00:00Z","relpermalink":"/publication/2018_acl_weissenborn/","section":"publication","summary":"Many Machine Reading and Natural Language Understanding tasks require reading supporting text in order to answer questions. For example, in Question Answering, the supporting text can be newswire or Wikipedia articles; in Natural Language Inference, premises can be seen as the supporting text and hypotheses as questions. Providing a set of useful primitives operating in a single framework of related tasks would allow for expressive modelling, and easier model comparison and replication. To that end, we present Jack the Reader (Jack), a framework for Machine Reading that allows for quick model prototyping by component reuse, evaluation of new models on existing datasets as well as integrating new datasets and applying them on a growing set of implemented baseline models. Jack is currently supporting (but not limited to) three tasks: Question Answering, Natural Language Inference, and Link Prediction. It is developed with the aim of increasing research efficiency and code reuse.","tags":[],"title":"Jack the Reader – A Machine Reading Framework","type":"publication"},{"authors":["Katharina Kann","Johannes Bjerva","Isabelle Augenstein","Barbara Plank","Anders Søgaard"],"categories":null,"content":"","date":1532995200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1532995200,"objectID":"ecddfe04d32ca116b139cbaae21fda6c","permalink":"https://copenlu.github.io/publication/2018_deeplo_kann/","publishdate":"2018-07-31T00:00:00Z","relpermalink":"/publication/2018_deeplo_kann/","section":"publication","summary":"Neural part-of-speech (POS) taggers are known to not perform well with little training data. As a step towards overcoming this problem, we present an architecture for learning more robust neural POS taggers by jointly training a hierarchical, recurrent model and a recurrent characterbased sequence-to-sequence network supervised using an auxiliary objective. This way, we introduce stronger character-level supervision into the model, which enables better generalization to unseen words and provides regularization, making our encoding less prone to overfitting. We experiment with three auxiliary tasks: lemmatization, character-based word autoencoding, and character-based random string autoencoding. Experiments with minimal amounts of labeled data on 34 languages show that our new architecture outperforms a single-task baseline and, surprisingly, that, on average, raw text autoencoding can be as beneficial for lowresource POS tagging as using lemma information. Our neural POS tagger closes the gap to a state-of-the-art POS tagger (MarMoT) for low-resource scenarios by 43%, even outperforming it on languages with templatic morphology, e.g., Arabic, Hebrew, and Turkish, by some margin","tags":[],"title":"Character-level Supervision for Low-resource POS Tagging","type":"publication"},{"authors":["Isabelle Augenstein","Kris Cao","He He","Felix Hill","Spandana Gella","Jamie Kiros","Hongyuan Mei","Dipendra Misra"],"categories":null,"content":"","date":1532995200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1532995200,"objectID":"6e6ca8c5d9f158ee98eede711a95f30f","permalink":"https://copenlu.github.io/publication/2018_repl4nlp_augenstein/","publishdate":"2018-07-31T00:00:00Z","relpermalink":"/publication/2018_repl4nlp_augenstein/","section":"publication","summary":"The ACL 2018 Workshop on Representation Learning for NLP (RepL4NLP) takes place on Friday, July 20, 2018 in Melbourne, Australia, immediately following the 56th Annual Meeting of the Association for Computational Linguistics (ACL). The workshop is generously sponsored by Facebook, Salesforce, ASAPP, DeepMind, Microsoft Research, and Naver. Repl4NLP is organised by Isabelle Augenstein, Kris Cao, He He, Felix Hill, Spandana Gella, Jamie Kiros, Hongyuan Mei and Dipendra Misra, and advised by Kyunghyun Cho, Edward Grefenstette, Karl Moritz Hermann and Laura Rimell. The 3rd Workshop on Representation Learning for NLP aims to continue the success of the 1st Workshop on Representation Learning for NLP, which received about 50 submissions and over 250 attendees and was the second most attended collocated event at ACL 2016 in Berlin, Germany after WMT; and the 2nd Workshop on Representation Learning for NLP at ACL 2017 in Vancouver, Canada. The workshop has a focus on vector space models of meaning, compositionality, and the application of deep neural networks and spectral methods to NLP. It provides a forum for discussing recent advances on these topics, as well as future research directions in linguistically motivated vector-based models in NLP.","tags":[],"title":"Proceedings of The Third Workshop on Representation Learning for NLP","type":"publication"},{"authors":["Johannes Bjerva","Isabelle Augenstein"],"categories":null,"content":"","date":1530403200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1530403200,"objectID":"04c9a44363b1ede24a1b169269f632e4","permalink":"https://copenlu.github.io/publication/2018_naacl_bjerva/","publishdate":"2018-07-01T00:00:00Z","relpermalink":"/publication/2018_naacl_bjerva/","section":"publication","summary":"A core part of linguistic typology is the classification of languages according to linguistic properties, such as those detailed in the World Atlas of Language Structure (WALS). Doing this manually is prohibitively time-consuming, which is in part evidenced by the fact that only 100 out of over 7,000 languages spoken in the world are fully covered in WALS. We learn distributed language representations, which can be used to predict typological properties on a massively multilingual scale. Additionally, quantitative and qualitative analyses of these language embeddings can tell us how language similarities are encoded in NLP models for tasks at different typological levels. The representations are learned in an unsupervised manner alongside tasks at three typological levels: phonology (grapheme-to-phoneme prediction, and phoneme reconstruction), morphology (morphological inflection), and syntax (part-of-speech tagging). We consider more than 800 languages and find significant differences in the language representations encoded, depending on the target task. For instance, although Norwegian Bokmal and Danish are typologically close to one another, they are phonologically distant, which is reflected in their language embeddings growing relatively distant in a phonological task. We are also able to predict typological features in WALS with high accuracies, even for unseen language families.","tags":[],"title":"From Phonology to Syntax: Unsupervised Linguistic Typology at Different Levels with Language Embeddings","type":"publication"},{"authors":["Isabelle Augenstein","Sebastian Ruder","Anders Søgaard"],"categories":null,"content":"","date":1530403200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1530403200,"objectID":"59e919526653ad514493d33dd8423a61","permalink":"https://copenlu.github.io/publication/2018_naacl_augenstein/","publishdate":"2018-07-01T00:00:00Z","relpermalink":"/publication/2018_naacl_augenstein/","section":"publication","summary":"We combine multi-task learning and semisupervised learning by inducing a joint embedding space between disparate label spaces and learning transfer functions between label embeddings, enabling us to jointly leverage unlabelled data and auxiliary, annotated datasets. We evaluate our approach on a variety of sequence classification tasks with disparate label spaces. We outperform strong single and multi-task baselines and achieve a new stateof-the-art for topic-based sentiment analysis.","tags":[],"title":"Multi-Task Learning of Pairwise Sequence Classification Tasks over      Disparate Label Spaces","type":"publication"},{"authors":["Thomas Nyegaard-Signori","Casper Veistrup Helms","Johannes Bjerva","Isabelle Augenstein"],"categories":null,"content":"","date":1527811200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1527811200,"objectID":"d1aafea6345c3797374bf6e984b97818","permalink":"https://copenlu.github.io/publication/2018_naacl_nyegaard-signori/","publishdate":"2018-06-01T00:00:00Z","relpermalink":"/publication/2018_naacl_nyegaard-signori/","section":"publication","summary":"We take a multi-task learning approach to the shared Task 1 at SemEval-2018. The general idea concerning the model structure is to use as little external data as possible in order to preserve the task relatedness and reduce complexity. We employ multi-task learning with hard parameter sharing to exploit the relatedness between sub-tasks. As a base model, we use a standard recurrent neural network for both the classification and regression subtasks. Our system ranks 32nd out of 48 participants with a Pearson score of 0.557 in the first subtask, and 20th out of 35 in the fifth subtask with an accuracy score of 0.464.","tags":[],"title":"KU-MTL at SemEval-2018 Task 1: Multi-task Identification of Affect in Tweets","type":"publication"},{"authors":null,"categories":null,"content":"We are working on studying methods to detect gendered language automatically using unsupervised learning methods, such as variational auto-encoders. The findings of our first paper on this (Hoyle et al., 2019) have been reported by 75+ international news outlets, including Forbes.\nCurrently, we are interested in expanding the above to a cross-lingual study, as well as researching the relationship between gender bias and attitudes towards entities on social media as part of a project funded by DFF.\nMoreover, in a Carlsberg-funded project starting in autumn 2023, we will be investigating fair and accountable Natural Language Processing methods, which can be used to understand what influences the employer images that organisations project in job ads.\n","date":1524700800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1524700800,"objectID":"f488819b45376aa4eb59e4e045239e20","permalink":"https://copenlu.github.io/project/gender-bias/","publishdate":"2018-04-26T00:00:00Z","relpermalink":"/project/gender-bias/","section":"project","summary":"Automatically detecting gendered language, and to what degree attitudes towards entities are influenced by gender bias","tags":["nlu","gender-bias"],"title":"Gender Bias Detection","type":"project"},{"authors":["Arkaitz Zubiaga","Elena Kochkina","Maria Liakata","Rob Procter","Michal Lukasik","Kalina Bontcheva","Trevor Cohn","Isabelle Augenstein"],"categories":null,"content":"","date":1519862400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1519862400,"objectID":"16c50ca2bb654ef1b5375468d0a6cd8a","permalink":"https://copenlu.github.io/publication/2018_ipm_zubiaga/","publishdate":"2018-03-01T00:00:00Z","relpermalink":"/publication/2018_ipm_zubiaga/","section":"publication","summary":"Rumour stance classification, defined as classifying the stance of specific social media posts into one of supporting, denying, querying or commenting on an earlier post, is becoming of increasing interest to researchers. While most previous work has focused on using individual tweets as classifier inputs, here we report on the performance of sequential classifiers that exploit the discourse features inherent in social media interactions or 'conversational threads'. Testing the effectiveness of four sequential classifiers -- Hawkes Processes, Linear-Chain Conditional Random Fields (Linear CRF), Tree-Structured Conditional Random Fields (Tree CRF) and Long Short Term Memory networks (LSTM) -- on eight datasets associated with breaking news stories, and looking at different types of local and contextual features, our work sheds new light on the development of accurate stance classifiers. We show that sequential classifiers that exploit the use of discourse properties in social media conversations while using only local features, outperform non-sequential classifiers. Furthermore, we show that LSTM using a reduced set of features can outperform the other sequential classifiers; this performance is consistent across datasets and across types of stances. To conclude, our work also analyses the different features under study, identifying those that best help characterise and distinguish between stances, such as supporting tweets being more likely to be accompanied by evidence than denying tweets. We also set forth a number of directions for future research.","tags":[],"title":"Discourse-Aware Rumour Stance Classification in Social Media Using Sequential Classifiers","type":"publication"},{"authors":null,"categories":null,"content":"","date":1516554421,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1516554421,"objectID":"28b1f1dedf78ca52283f0864b4703960","permalink":"https://copenlu.github.io/people/ryan/","publishdate":"2018-01-21T17:07:01Z","relpermalink":"/people/ryan/","section":"people","summary":"","tags":["Affiliated"],"title":"Ryan Cotterell","type":"people"},{"authors":null,"categories":null,"content":"","date":1516036021,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1516036021,"objectID":"2370e82131c0cc0e90b344a232fed815","permalink":"https://copenlu.github.io/people/sara/","publishdate":"2018-01-15T17:07:01Z","relpermalink":"/people/sara/","section":"people","summary":"","tags":["Affiliated"],"title":"Sara Vera Marjanovic","type":"people"},{"authors":null,"categories":null,"content":"","date":1515949621,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1515949621,"objectID":"c273fdd7b725e7ad5093fcd4787c9be9","permalink":"https://copenlu.github.io/people/amalie/","publishdate":"2018-01-14T17:07:01Z","relpermalink":"/people/amalie/","section":"people","summary":"","tags":["Affiliated"],"title":"Amalie Brogaard Pauli","type":"people"},{"authors":null,"categories":null,"content":"","date":1515776821,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1515776821,"objectID":"47418caa0f346a1d0e63f20c716c3127","permalink":"https://copenlu.github.io/people/lucas/","publishdate":"2018-01-12T17:07:01Z","relpermalink":"/people/lucas/","section":"people","summary":"","tags":["Affiliated"],"title":"Lucas Resck","type":"people"},{"authors":null,"categories":null,"content":"","date":1515690421,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1515690421,"objectID":"e65f47fa195f3765e260e87f816af8c1","permalink":"https://copenlu.github.io/people/dawar/","publishdate":"2018-01-11T17:07:01Z","relpermalink":"/people/dawar/","section":"people","summary":"","tags":["Affiliated"],"title":"Ahmad Dawar Hakimi","type":"people"},{"authors":["Johannes Bjerva","Isabelle Augenstein"],"categories":null,"content":"","date":1514764800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1514764800,"objectID":"375f4d44665380f3a0c98378eb3e263e","permalink":"https://copenlu.github.io/publication/2018_iwclul_bjerva/","publishdate":"2018-01-01T00:00:00Z","relpermalink":"/publication/2018_iwclul_bjerva/","section":"publication","summary":"*English Abstract*: Although linguistic typology has a long history, computational approaches have only recently gained popularity. The use of distributed representations in computational linguistics has also become increasingly popular. A recent development is to learn distributed representations of language, such that typologically similar languages are spatially close to one another. Although empirical successes have been shown for such language representations, they have not been subjected to much typological probing. In this paper, we first look at whether this type of language representations are empirically useful for model transfer between Uralic languages in deep neural networks. We then investigate which typological features are encoded in these representations by attempting to predict features in the World Atlas of Language Structures, at various stages of fine-tuning of the representations. We focus on Uralic languages, and find that some typological traits can be automatically inferred with accuracies well above a strong baseline. *Finnish Abstract*: Vaikka kielitypologialla on pitkä historia, siihen liittyvät laskennalliset menetelmät ovat vasta viime aikoina saavuttaneet suosiota. Myös hajautettujen representaatioiden käyttö laskennallisessa kielitieteessä on tullut yhä suositummaksi. Viimeaikainen kehitys alalla on oppia kielestä hajautettu representaatio, joka esittää samankaltaiset kielet lähellä toisiaan. Vaikka kyseiset representaatiot nauttivatkin empiiristä menestystä, ei niitä ole huomattavasti tutkittu typologisesti. Tässä artikkelissa tutkitaan, ovatko tällaiset kielirepresentaatiot empiirisesti käyttökelpoisia uralilaisten kielten välisissä mallimuunnoksissa syvissä neuroverkoissa. Pyrkimällä ennustamaan piirteitä \textit{World Atlas of Language Structures}-tietokannassa tutkimme, mitä typologisia ominaisuuksia nämä representaatiot sisältävät. Keskityimme uralilaisiin kieliin ja huomasimme, että jotkin typologiset ominaisuudet voidaan automaattisesti päätellä tarkkuudella, joka ylittää selvästi vahvan perustason.","tags":[],"title":"Tracking Typological Traits of Uralic Languages in Distributed Language Representations","type":"publication"},{"authors":["Ed Collins","Isabelle Augenstein","Sebastian Riedel"],"categories":null,"content":"","date":1498867200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1498867200,"objectID":"7551d306de802e85bed4c73da927408b","permalink":"https://copenlu.github.io/publication/2017_conll_collins/","publishdate":"2017-07-01T00:00:00Z","relpermalink":"/publication/2017_conll_collins/","section":"publication","summary":"Automatic summarisation is a popular approach to reduce a document to its main arguments. Recent research in the area has focused on neural approaches to summarisation, which can be very data-hungry. However, few large datasets exist and none for the traditionally popular domain of scientific publications, which opens up challenging research avenues centered on encoding large, complex documents. In this paper, we introduce a new dataset for summarisation of computer science publications by exploiting a large resource of author provided summaries and show straightforward ways of extending it further. We develop models on the dataset making use of both neural sentence encoding and traditionally used summarisation features and show that models which encode sentences as well as their local and global context perform best, significantly outperforming well-established baseline methods.","tags":[],"title":"A Supervised Approach to Extractive Summarisation of Scientific Papers","type":"publication"},{"authors":["Benjamin Riedel","Isabelle Augenstein","Georgios Spithourakis","Sebastian Riedel"],"categories":null,"content":"","date":1498867200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1498867200,"objectID":"2ceef4e10df7fe69e161e122c1289476","permalink":"https://copenlu.github.io/publication/2017_arxiv_riedel/","publishdate":"2017-07-01T00:00:00Z","relpermalink":"/publication/2017_arxiv_riedel/","section":"publication","summary":"Identifying public misinformation is a complicated and challenging task. An important part of checking the veracity of a specific claim is to evaluate the stance different news sources take towards the assertion. Automatic stance evaluation, i.e. stance detection, would arguably facilitate the process of fact checking. In this paper, we present our stance detection system which claimed third place in Stage 1 of the Fake News Challenge. Despite our straightforward approach, our system performs at a competitive level with the complex ensembles of the top two winning teams. We therefore propose our system as the 'simple but tough-to-beat baseline' for the Fake News Challenge stance detection task.","tags":[],"title":"A simple but tough-to-beat baseline for the Fake News Challenge stance detection task","type":"publication"},{"authors":["Isabelle Augenstein","Leon Derczynski","Kalina Bontcheva"],"categories":null,"content":"","date":1498867200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1498867200,"objectID":"7687b70fc2497295c5eadce83e18dbcc","permalink":"https://copenlu.github.io/publication/2017_ipm_augenstein/","publishdate":"2017-07-01T00:00:00Z","relpermalink":"/publication/2017_ipm_augenstein/","section":"publication","summary":"Named Entity Recognition (NER) is a key NLP task, which is all the more challenging on Web and user-generated content with their diverse and continuously changing language. This paper aims to quantify how this diversity impacts state-of-the-art NER methods, by measuring named entity (NE) and context variability, feature sparsity, and their effects on precision and recall. In particular, our findings indicate that NER approaches struggle to generalise in diverse genres with limited training data. Unseen NEs, in particular, play an important role, which have a higher incidence in diverse genres such as social media than in more regular genres such as newswire. Coupled with a higher incidence of unseen features more generally and the lack of large training corpora, this leads to significantly lower F1 scores for diverse genres as compared to more regular ones. We also find that leading systems rely heavily on surface forms found in training data, having problems generalising beyond these, and offer explanations for this observation.","tags":[],"title":"Generalisation in Named Entity Recognition: A Quantitative Analysis","type":"publication"},{"authors":["Isabelle Augenstein","Anders Søgaard"],"categories":null,"content":"","date":1498867200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1498867200,"objectID":"531bd207fb677a26642fb1f0fb61a1a0","permalink":"https://copenlu.github.io/publication/2017_acl_augenstein/","publishdate":"2017-07-01T00:00:00Z","relpermalink":"/publication/2017_acl_augenstein/","section":"publication","summary":"Keyphrase boundary classification (KBC) is the task of detecting keyphrases in scientific articles and labelling them with respect to predefined types. Although important in practice, this task is so far underexplored, partly due to the lack of labelled data. To overcome this, we explore several auxiliary tasks, including semantic super-sense tagging and identification of multi-word expressions, and cast the task as a multi-task learning problem with deep recurrent neural networks. Our multi-task models perform significantly better than previous state of the art approaches on two scientific KBC datasets, particularly for long keyphrases.","tags":[],"title":"Multi-Task Learning of Keyphrase Boundary Classification","type":"publication"},{"authors":["Isabelle Augenstein","Mrinal Das","Sebastian Riedel","Lakshmi Vikraman","Andrew McCallum"],"categories":null,"content":"","date":1498780800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1498780800,"objectID":"ac64c02d198f19b94c38dd746743353a","permalink":"https://copenlu.github.io/publication/2017_semeval_augenstein/","publishdate":"2017-06-30T00:00:00Z","relpermalink":"/publication/2017_semeval_augenstein/","section":"publication","summary":"We describe the SemEval task of extracting keyphrases and relations between them from scientific documents, which is crucial for understanding which publications describe which processes, tasks and materials. Although this was a new task, we had a total of 26 submissions across 3 evaluation scenarios. We expect the task and the findings reported in this paper to be relevant for researchers working on understanding scientific content, as well as the broader knowledge base population and information extraction communities.","tags":[],"title":"SemEval 2017 Task 10: ScienceIE - Extracting Keyphrases and Relations from Scientific Publications","type":"publication"},{"authors":["Elena Kochkina","Maria Liakata","Isabelle Augenstein"],"categories":null,"content":"","date":1498780800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1498780800,"objectID":"feb9b0fc3720c6ab93685d96db50c2f4","permalink":"https://copenlu.github.io/publication/2017_winlp_kochkina/","publishdate":"2017-06-30T00:00:00Z","relpermalink":"/publication/2017_winlp_kochkina/","section":"publication","summary":"Rumour stance classification is a task that involves identifying the attitude of Twitter users towards the truthfulness of the rumour they are discussing. Stance classification is considered to be an important step towards rumour verification, therefore performing well in this task is expected to be useful in debunking false rumours. In this work we classify a set of Twitter posts discussing rumours into either supporting, denying, questioning or commenting on the underlying rumours. We propose an LSTM-based sequential model that, through modelling the conversational structure of tweets, obtains state-of-theart accuracy on the SemEval-2017 RumourEval dataset.","tags":[],"title":"Sequential Approach to Rumour Stance Classification","type":"publication"},{"authors":["Elena Kochkina","Maria Liakata","Isabelle Augenstein"],"categories":null,"content":"","date":1498780800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1498780800,"objectID":"420b4382fe0eafec280fa592bc2a70b4","permalink":"https://copenlu.github.io/publication/2017_semeval_kochkina/","publishdate":"2017-06-30T00:00:00Z","relpermalink":"/publication/2017_semeval_kochkina/","section":"publication","summary":"This paper describes team Turing's submission to SemEval 2017 RumourEval: Determining rumour veracity and support for rumours (SemEval 2017 Task 8, Subtask A). Subtask A addresses the challenge of rumour stance classification, which involves identifying the attitude of Twitter users towards the truthfulness of the rumour they are discussing. Stance classification is considered to be an important step towards rumour verification, therefore performing well in this task is expected to be useful in debunking false rumours. In this work we classify a set of Twitter posts discussing rumours into either supporting, denying, questioning or commenting on the underlying rumours. We propose a LSTM-based sequential model that, through modelling the conversational structure of tweets, which achieves an accuracy of 0.784 on the RumourEval test set outperforming all other systems in Subtask A.","tags":[],"title":"Turing at SemEval-2017 Task 8: Sequential Approach to Rumour Stance Classification with Branch-LSTM","type":"publication"},{"authors":null,"categories":null,"content":"Multi-lingual and multi-cultural learning are concerned with training models to work well for multiple languages and cultures, including low-resource ones. We research methods for enabling information sharing between multiple languages, and study how to utilise typological knowledge bases to this end. We further study cultural awarness of languages models. We are currently involved in two larger funded projects on this.\nAs part of a Carlsberg-funded project starting which started autumn 2023, we are investigating methods to understand what influences the employer images that organisations project in job ads. This includes research on bias detection, including of gender and cross-cultural biases.\nWe are further partner in a research project funded by the Swedish Research Council coordinated by Robert Östling. Its goals are to study structured multilinguality, i.e. the idea of using language representations and typological knowledge bases to guide which information to share between specific languages.\nPreviously, we were further involved in the following projects:\nMulti3Generation is a COST Action that funds collaboration of researchers in Europe and abroad. The project is coordinated by Isabelle Augenstein, and its goals are to study language generation using multi-task, multilingual and multi-modal signals.\nAndrea Lekkas\u0026rsquo; industrial PhD project with Ordbogen, supported by Innovation Fund Denmark, focuses on multilingual language modelling for developing writing assistants.\n","date":1493251200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1493251200,"objectID":"c8405c441be49e9303825023c78eeadf","permalink":"https://copenlu.github.io/project/multilingual-learning/","publishdate":"2017-04-27T00:00:00Z","relpermalink":"/project/multilingual-learning/","section":"project","summary":"Training models to work well for multiple languages and cultures, including low-resource ones","tags":["lld","multilingual-learning"],"title":"Multilingual Learning and Multicultural Learning","type":"project"},{"authors":null,"categories":null,"content":"We are working on studying methods to automatically process scholarly data. This is to assist researchers in finding publications (e.g. by extracting content from papers automatically, which can be used to populate knowledge bases), writing better papers (e.g. by suggesting which sentences need citations, improving peer review), or tracking their impact (e.g. by tracking which papers are highly cited and how this relates to meta-data, such as venues or authors).\n","date":1493164800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1493164800,"objectID":"ece1c5bd4a0acc00bae6673243985c24","permalink":"https://copenlu.github.io/project/scholarly-data/","publishdate":"2017-04-26T00:00:00Z","relpermalink":"/project/scholarly-data/","section":"project","summary":"Automatically processing scholarly data to assist researchers in finding publications, writing better papers, or tracking their impact.","tags":["nlu","scholarly-data"],"title":"Scholarly Data Processing","type":"project"},{"authors":["Ziqi Zhang","Anna Lisa Gentile","Isabelle Augenstein","Eva Blomqvist","Fabio Ciravegna"],"categories":null,"content":"","date":1488326400,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1488326400,"objectID":"48dc2495a1f6b9cba1bcd037aed41069","permalink":"https://copenlu.github.io/publication/2017_swj_zhang/","publishdate":"2017-03-01T00:00:00Z","relpermalink":"/publication/2017_swj_zhang/","section":"publication","summary":"The Web of Data is currently undergoing an unprecedented level of growth thanks to the Linked Open Data effort. One escalated issue is the increasing level of heterogeneity in the published resources. This seriously hampers interoperability of Semantic Web applications. A decade of effort in the research of Ontology Alignment has contributed to a rich literature to solve such problems. However, existing methods can be still limited as 1) they primarily address concepts and entities while relations are less well-studied; 2) many build on the assumption of the ‘well-formedness’ of ontologies which is unnecessarily true in the domain of Linked Open Data; 3) few looked at schema heterogeneity from a single source, which is also a common issue particularly in very large Linked Dataset created automatically from heterogeneous resources, or integrated from multiple datasets. This article aims to address these issues with a domain- and language-independent and completely unsupervised method to align equivalent relations across schemata based on their shared instances. We propose a novel similarity measure able to cope with unbalanced population of schema elements, an unsupervised technique to automatically decide similarity threshold to assert equivalence for a pair of relations, and an unsupervised clustering process to discover groups of equivalent relations across different schemata. Although the method is designed for aligning relations within a single dataset, it can also be adapted for cross-dataset alignment where sameAs links between datasets have been established. Using three gold standards created based on DBpedia, we obtain encouraging results from a thorough evaluation involving four baseline similarity measures and over 15 comparative models based on variants of the proposed method. The proposed method makes significant improvement over baseline models in terms of F1 measure (mostly between 7% and 40%), and it always scores the highest precision and is also among the top performers in terms of recall. We also make public the datasets used in this work, which we believe make the largest collection of gold standards for evaluating relation alignment in the LOD context.","tags":[],"title":"An Unsupervised Data-driven Method to Discover Equivalent Relations in Large Linked Datasets","type":"publication"},{"authors":null,"categories":null,"content":"","date":1486400821,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1486400821,"objectID":"855afa891310a64312235795128a3440","permalink":"https://copenlu.github.io/people/karolina/","publishdate":"2017-02-06T17:07:01Z","relpermalink":"/people/karolina/","section":"people","summary":"","tags":["Alumni"],"title":"Karolina Stanczak","type":"people"},{"authors":null,"categories":null,"content":"","date":1486314421,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1486314421,"objectID":"41693197986afd7ab223908f0c971d1f","permalink":"https://copenlu.github.io/people/marta/","publishdate":"2017-02-05T17:07:01Z","relpermalink":"/people/marta/","section":"people","summary":"","tags":["Alumni"],"title":"Marta Marchiori Manerba","type":"people"},{"authors":null,"categories":null,"content":"","date":1486228021,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1486228021,"objectID":"1d78fe21fd28eff34053fb1f9148bf1e","permalink":"https://copenlu.github.io/people/kevin/","publishdate":"2017-02-04T17:07:01Z","relpermalink":"/people/kevin/","section":"people","summary":"","tags":["Alumni"],"title":"Kevin Roitero","type":"people"},{"authors":null,"categories":null,"content":"","date":1486228021,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1486228021,"objectID":"3b4f9852a4e8545215604892dafb43d8","permalink":"https://copenlu.github.io/people/nils/","publishdate":"2017-02-04T17:07:01Z","relpermalink":"/people/nils/","section":"people","summary":"","tags":["Alumni"],"title":"Nils Rethmeier","type":"people"},{"authors":null,"categories":null,"content":"","date":1486141621,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1486141621,"objectID":"c4c6f08f6ebee2bfc4a52e99d6c39430","permalink":"https://copenlu.github.io/people/amelie/","publishdate":"2017-02-03T17:07:01Z","relpermalink":"/people/amelie/","section":"people","summary":"","tags":["Alumni"],"title":"Amelie Wührl","type":"people"},{"authors":null,"categories":null,"content":"","date":1486055221,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1486055221,"objectID":"da6c37bedd3f31b1535b3d398ebd58b7","permalink":"https://copenlu.github.io/people/lucie/","publishdate":"2017-02-02T17:07:01Z","relpermalink":"/people/lucie/","section":"people","summary":"","tags":["Alumni"],"title":"Lucie-Aimée Kaffee","type":"people"},{"authors":null,"categories":null,"content":"","date":1485968941,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1485968941,"objectID":"a546055b1943e6eddf28c042caf2d888","permalink":"https://copenlu.github.io/people/oscar/","publishdate":"2017-02-01T17:09:01Z","relpermalink":"/people/oscar/","section":"people","summary":"","tags":["Alumni"],"title":"Oscar Kjell","type":"people"},{"authors":null,"categories":null,"content":"","date":1485968821,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1485968821,"objectID":"7d63205eaae7699bf4c5872340c0fcb8","permalink":"https://copenlu.github.io/people/andreas/","publishdate":"2017-02-01T17:07:01Z","relpermalink":"/people/andreas/","section":"people","summary":"","tags":["Alumni"],"title":"Andreas Nugaard Holm","type":"people"},{"authors":null,"categories":null,"content":"","date":1485796021,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1485796021,"objectID":"e3c56b7a723beffbf645221b0eebdcd0","permalink":"https://copenlu.github.io/people/thea/","publishdate":"2017-01-30T17:07:01Z","relpermalink":"/people/thea/","section":"people","summary":"","tags":["Alumni"],"title":"Thea Rolskov Sloth","type":"people"},{"authors":null,"categories":null,"content":"","date":1485709621,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1485709621,"objectID":"b5e298280269988c2eb30b9d21073bc8","permalink":"https://copenlu.github.io/people/miryam/","publishdate":"2017-01-29T17:07:01Z","relpermalink":"/people/miryam/","section":"people","summary":"","tags":["Alumni"],"title":"Miryam de Lhoneux","type":"people"},{"authors":null,"categories":null,"content":"","date":1485623221,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1485623221,"objectID":"ff92ea1819ae4cf0187b8388ec087931","permalink":"https://copenlu.github.io/people/nodens/","publishdate":"2017-01-28T17:07:01Z","relpermalink":"/people/nodens/","section":"people","summary":"","tags":["Alumni"],"title":"Nodens Koren","type":"people"},{"authors":null,"categories":null,"content":"","date":1485536821,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1485536821,"objectID":"b6af6f4c172219e9270497aa02910eae","permalink":"https://copenlu.github.io/people/klim/","publishdate":"2017-01-27T17:07:01Z","relpermalink":"/people/klim/","section":"people","summary":"","tags":["Alumni"],"title":"Klim Zaporojets","type":"people"},{"authors":null,"categories":null,"content":"","date":1485450541,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1485450541,"objectID":"728d073b83d08b45f5ea371bf60f82c8","permalink":"https://copenlu.github.io/people/sagnik/","publishdate":"2017-01-26T17:09:01Z","relpermalink":"/people/sagnik/","section":"people","summary":"","tags":["Alumni"],"title":"Sagnik Ray Choudhury","type":"people"},{"authors":null,"categories":null,"content":"","date":1485450421,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1485450421,"objectID":"e76f0c009d51d0d1425d7865f94bed4b","permalink":"https://copenlu.github.io/people/shailza/","publishdate":"2017-01-26T17:07:01Z","relpermalink":"/people/shailza/","section":"people","summary":"","tags":["Alumni"],"title":"Shailza Jolly","type":"people"},{"authors":null,"categories":null,"content":"","date":1485364021,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1485364021,"objectID":"aa7eb934e88b2941f7b2ea1dbd8d2b9a","permalink":"https://copenlu.github.io/people/yova/","publishdate":"2017-01-25T17:07:01Z","relpermalink":"/people/yova/","section":"people","summary":"","tags":["Alumni"],"title":"Yova Kementchedjhieva","type":"people"},{"authors":null,"categories":null,"content":"","date":1485277621,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1485277621,"objectID":"2aa98cf9e7d8b01f62de357efeda8999","permalink":"https://copenlu.github.io/people/ana/","publishdate":"2017-01-24T17:07:01Z","relpermalink":"/people/ana/","section":"people","summary":"","tags":["Alumni"],"title":"Ana Valeria González","type":"people"},{"authors":null,"categories":null,"content":"","date":1485277621,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1485277621,"objectID":"3b84fd5ae50359e916a4a0606047cdd9","permalink":"https://copenlu.github.io/people/andrea/","publishdate":"2017-01-24T17:07:01Z","relpermalink":"/people/andrea/","section":"people","summary":"","tags":["Alumni"],"title":"Andrea Lekkas","type":"people"},{"authors":null,"categories":null,"content":"","date":1485191341,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1485191341,"objectID":"6b65be908a86e77256fbcbf4a3629746","permalink":"https://copenlu.github.io/people/johannes/","publishdate":"2017-01-23T17:09:01Z","relpermalink":"/people/johannes/","section":"people","summary":"","tags":["Alumni"],"title":"Johannes Bjerva","type":"people"},{"authors":null,"categories":null,"content":"","date":1485104821,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1485104821,"objectID":"47eea0b33db0c4ebcd6f1d5631188871","permalink":"https://copenlu.github.io/people/liesbeth/","publishdate":"2017-01-22T17:07:01Z","relpermalink":"/people/liesbeth/","section":"people","summary":"","tags":["Alumni"],"title":"Liesbeth Allein","type":"people"},{"authors":null,"categories":null,"content":"","date":1485018421,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1485018421,"objectID":"0f90039fbe3f22579f88378edf717cc9","permalink":"https://copenlu.github.io/people/wei/","publishdate":"2017-01-21T17:07:01Z","relpermalink":"/people/wei/","section":"people","summary":"","tags":["Alumni"],"title":"Wei Zhao","type":"people"},{"authors":null,"categories":null,"content":"","date":1484932021,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1484932021,"objectID":"c773ea7fff41710544da3ebb32d508e2","permalink":"https://copenlu.github.io/people/mareike/","publishdate":"2017-01-20T17:07:01Z","relpermalink":"/people/mareike/","section":"people","summary":"","tags":["Alumni"],"title":"Mareike Hartmann","type":"people"},{"authors":null,"categories":null,"content":"","date":1484845621,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1484845621,"objectID":"4a0551a8f7326bd863d21e9e474d8410","permalink":"https://copenlu.github.io/people/farhad/","publishdate":"2017-01-19T17:07:01Z","relpermalink":"/people/farhad/","section":"people","summary":"","tags":["Alumni"],"title":"Farhad Nooralahzadeh","type":"people"},{"authors":null,"categories":null,"content":"","date":1484845621,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1484845621,"objectID":"b96bcc0a756b3a2e3e6a041824ced7fb","permalink":"https://copenlu.github.io/people/xuan/","publishdate":"2017-01-19T17:07:01Z","relpermalink":"/people/xuan/","section":"people","summary":"","tags":["Alumni"],"title":"Zhong Xuan","type":"people"},{"authors":null,"categories":null,"content":"","date":1484759221,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1484759221,"objectID":"8f7ec6df3ddaddd23752a710340e1517","permalink":"https://copenlu.github.io/people/luna/","publishdate":"2017-01-18T17:07:01Z","relpermalink":"/people/luna/","section":"people","summary":"","tags":["Alumni"],"title":"Luna De Bruyne","type":"people"},{"authors":null,"categories":null,"content":"","date":1484672821,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1484672821,"objectID":"5e82376c2db939241c73c98ea3326c8b","permalink":"https://copenlu.github.io/people/giannis/","publishdate":"2017-01-17T17:07:01Z","relpermalink":"/people/giannis/","section":"people","summary":"","tags":["Alumni"],"title":"Giannis Bekoulis","type":"people"},{"authors":null,"categories":null,"content":"","date":1484586541,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1484586541,"objectID":"aa55b41abc8e2441b932f86bfd7e67b1","permalink":"https://copenlu.github.io/people/sune/","publishdate":"2017-01-16T17:09:01Z","relpermalink":"/people/sune/","section":"people","summary":"","tags":["Alumni"],"title":"Sune Debel","type":"people"},{"authors":[],"categories":null,"content":" Click on the Slides button above to view the built-in slides feature.\n  Slides can be added in a few ways:\n Create slides using Academic\u0026rsquo;s Slides feature and link using url_slides parameter in the front matter of the talk file Upload an existing slide deck to static/ and link using url_slides parameter in the front matter of the talk file Embed your slides (e.g. Google Slides) or presentation video on this page using shortcodes.  Further talk details can easily be added to this page using Markdown and $\\rm \\LaTeX$ math code.\n","date":1483228800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1483228800,"objectID":"4acb79057f3b889f1ad90825bccc8b36","permalink":"https://copenlu.github.io/talk_backup/example/","publishdate":"2017-01-01T00:00:00Z","relpermalink":"/talk_backup/example/","section":"talk_backup","summary":"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis posuere tellus ac convallis placerat. Proin tincidunt magna sed ex sollicitudin condimentum. Sed ac faucibus dolor, scelerisque sollicitudin nisi. Cras purus urna, suscipit quis sapien eu, pulvinar tempor diam.","tags":[],"title":"Example Talk","type":"talk_backup"},{"authors":null,"categories":null,"content":"Question answering is concerned with answer user questions, either in a closed-domain or open-domain setting automatically. We are interested in exploiting synergies between question answering and related tasks, such as framing entailment or relation extraction as question answering tasks. Further research interests are question answering in conversational settings, such as for chatbots.\nWe were prevously involved in two funded projects related to this theme: a project on subjectivity in question answering, funded by a faculty research award from Megagon Labs; as well a PhD fellowship grant on conversational question answering for customer support from BotXO, which Ana Valeria Gonzalez works on.\n","date":1461715200,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1461715200,"objectID":"464368a48f0c4982ed7a6e6c606693ca","permalink":"https://copenlu.github.io/project/question-answering/","publishdate":"2016-04-27T00:00:00Z","relpermalink":"/project/question-answering/","section":"project","summary":"Answering questions automatically, including in conversational settings","tags":["nlu","question-answering"],"title":"Question Answering","type":"project"},{"authors":null,"categories":null,"content":"Information extraction is concerned with extracting information about entities, phrases and relations between them from text to populate knowledge bases, such as extracting \u0026ldquo;employee-at\u0026rdquo; relations. Within this context, we have worked on automatic knowledge base completion, knowledge base cleansing and detecting scientific keyphrases in text, as well as automatic completion of typological knowledge bases.\nWe are currently involved in one longer-term project related to this, namely a research project funded by the Swedish Research Council coordinated by Robert Östling. Its goals are to study structured multilinguality, i.e. the idea of using language representations and typological knowledge bases to guide which information to share between specific languages.\n","date":1459036800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1459036800,"objectID":"dbf6be1f58ca6227cd1a2b0d1e198a14","permalink":"https://copenlu.github.io/project/knowledge-bases/","publishdate":"2016-03-27T00:00:00Z","relpermalink":"/project/knowledge-bases/","section":"project","summary":"Extract information about entities, phrases and relations between them from text to populate knowledge bases","tags":["nlu","knowledge-bases"],"title":"Knowledge Base Population","type":"project"},{"authors":["GA Cushen"],"categories":null,"content":"More detail can easily be written here using Markdown and $\\rm \\LaTeX$ math code.\n","date":1441065600,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1441065600,"objectID":"02ddcc34f9b380308becfdd12994b2f4","permalink":"https://copenlu.github.io/publication_backup/person-re-id/","publishdate":"2015-09-01T00:00:00Z","relpermalink":"/publication_backup/person-re-id/","section":"publication_backup","summary":"Person re-identification is a critical security task for recognizing a person across spatially disjoint sensors. Previous work can be computationally intensive and is mainly based on low-level cues extracted from RGB data and implemented on a PC for a fixed sensor network (such as traditional CCTV). We present a practical and efficient framework for mobile devices (such as smart phones and robots) where high-level semantic soft biometrics are extracted from RGB and depth data. By combining these cues, our approach attempts to provide robustness to noise, illumination, and minor variations in clothing. This mobile approach may be particularly useful for the identification of persons in areas ill-served by fixed sensors or for tasks where the sensor position and direction need to dynamically adapt to a target. Results on the BIWI dataset are preliminary but encouraging. Further evaluation and demonstration of the system will be available on our website.","tags":[],"title":"A Person Re-Identification System For Mobile Devices","type":"publication_backup"},{"authors":["GA Cushen","MS Nixon"],"categories":null,"content":"More detail can easily be written here using Markdown and $\\rm \\LaTeX$ math code.\n","date":1372636800,"expirydate":-62135596800,"kind":"page","lang":"en","lastmod":1372636800,"objectID":"0af71cf2c5a7a78c0b9a5906512f8538","permalink":"https://copenlu.github.io/publication_backup/clothing-search/","publishdate":"2013-07-01T00:00:00Z","relpermalink":"/publication_backup/clothing-search/","section":"publication_backup","summary":"We present a mobile visual clothing search system whereby a smart phone user can either choose a social networking photo or take a new photo of a person wearing clothing of interest and search for similar clothing in a retail database. From the query image, the person is detected, clothing is segmented, and clothing features are extracted and quantized. The information is sent from the phone client to a server, where the feature vector of the query image is used to retrieve similar clothing products from online databases. The phone's GPS location is used to re-rank results by retail store location. State of the art work focuses primarily on the recognition of a diverse range of clothing offline and pays little attention to practical applications. Evaluated on a challenging dataset, the system is relatively fast and achieves promising results.","tags":[],"title":"Mobile visual clothing search","type":"publication_backup"}]