-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
106 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# SemEval-2025 Task-3 — Mu-SHROOM, the Multilingual Shared-task on Hallucinations and Related Observable Overgeneration Mistakes | ||
|
||
|
||
[Description](#description) | [Data](#data) | [Models](#models) | [Results](#results) | [Leaderboard](#leaderboard) |[Contributors](#contributors) | ||
|
||
In this repo, we provide our solution to solve [Mu-SHROOM, the Multilingual Shared-task on Hallucinations and Related Observable Overgeneration Mistakes](https://helsinki-nlp.github.io/shroom/). | ||
|
||
## Description | ||
|
||
ARKHN aims to detect hallucination spans in the outputs of instruction-tuned LLMs in a multilingual context in Mu-SHROOM, which stands for “Multilingual Shared-task on Hallucinations and Related Observable Overgeneration Mistakes”. | ||
|
||
Evaluation metrics: | ||
- intersection-over-union of characters marked as hallucinations in the gold reference vs. predicted as such | ||
- how well the probability assigned by the participants’ system that a character is part of a hallucination correlates with the empirical probabilities observed in our annotators. | ||
|
||
## Data | ||
(updating) | ||
|
||
## Models | ||
(updating) | ||
|
||
## Results | ||
(updating) | ||
|
||
## Leaderboard | ||
(updating) | ||
|
||
## Contributors | ||
(updating) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
[build-system] | ||
build-backend = "poetry.core.masonry.api" | ||
requires = ["poetry-core>=1.0.0"] | ||
|
||
[tool] | ||
|
||
[tool.black] | ||
exclude = ''' | ||
/( | ||
\.eggs | ||
| \.git | ||
| \.hg | ||
| \.mypy_cache | ||
| \.tox | ||
| \.venv | ||
| _build | ||
| buck-out | ||
| build | ||
| dist | ||
)/ | ||
''' | ||
include = '\.pyi?$' | ||
line-length = 100 | ||
target-version = ['py36', 'py37', 'py38', 'py39'] | ||
|
||
[tool.coverage.report] | ||
exclude_lines = [ | ||
"if __name__ == .__main__.:", | ||
"pass", | ||
"pragma: no cover", | ||
"raise NotImplementedError", | ||
"raise TypeError", | ||
"raise ValueError" | ||
] | ||
|
||
[tool.isort] | ||
known_local_folder = [ | ||
'style-transfer', | ||
'tests' | ||
] | ||
line_length = 100 | ||
profile = 'black' | ||
|
||
[tool.poetry] | ||
authors = ["Arkhn's AI Team <[email protected]>"] | ||
description = "a package to experiement style transfer using RLF, LLM, ReST ?" | ||
license = "Apache-2.0" | ||
name = "style-transfer" | ||
readme = "README.md" | ||
repository = "https://github.com/arkhn/open-nlp" | ||
version = "0.1.0" | ||
|
||
[tool.poetry.dependencies] | ||
datasets = "^2.16.1" | ||
hydra-colorlog = "^1.2.0" | ||
hydra-core = "^1.3.2" | ||
hydra-joblib-launcher = "^1.2.0" | ||
langchain = "^0.3.0" | ||
langchain-community = "^0.3.0" | ||
langchain-core = "^0.3.0" | ||
langchain-groq = "^0.2.0" | ||
pandas = "^2.2.0" | ||
python = "~3.11" | ||
rich = "^13.5.2" | ||
tqdm = "^4.66.1" | ||
wandb = "^0.16.3" | ||
|
||
[tool.poetry.dev-dependencies] | ||
pytest = "^7.4.2" | ||
pytest-cov = "^4.1.0" | ||
|
||
[tool.pytest.ini_options] | ||
addopts = "--cov-report term-missing:skip-covered" | ||
markers = [ | ||
"serial", | ||
"slow: marks tests as slow (deselect with '-m \"not slow\"')" | ||
] |