diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml new file mode 100644 index 00000000..e4e8eaea --- /dev/null +++ b/.github/workflows/tests.yaml @@ -0,0 +1,21 @@ +name: Docker Build and Run + +on: + push: + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Build Docker Image + run: | + docker build . -t needle-in-a-haystack + + - name: Run Docker Container + run: | + docker run --entrypoint pytest -t needle-in-a-haystack diff --git a/.gitignore b/.gitignore index 283b090c..0c393a98 100644 --- a/.gitignore +++ b/.gitignore @@ -162,9 +162,4 @@ dmypy.json # Cython debug symbols cython_debug/ -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ \ No newline at end of file +.idea/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..af1c305c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.12 + +ENV PYTHONPATH /app +ENV PYTHONUNBUFFERED 1 + +WORKDIR /app + +COPY requirements.txt . + +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +ENTRYPOINT ["python"] +CMD ["main.py"] diff --git a/requirements.txt b/requirements.txt index e281cdc6..4e47291d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -46,4 +46,5 @@ tqdm==4.66.1 typing-inspect==0.9.0 typing_extensions==4.8.0 urllib3==2.1.0 -yarl==1.9.3 \ No newline at end of file +yarl==1.9.3 +pytest==8.1.1 diff --git a/tests/test_evaluators.py b/tests/test_evaluators.py new file mode 100644 index 00000000..8e58b60e --- /dev/null +++ b/tests/test_evaluators.py @@ -0,0 +1,29 @@ +from unittest.mock import patch, MagicMock, call, ANY + +from needlehaystack.evaluators import OpenAIEvaluator + +QUESTION_ASKED = "What is the color of the sky?" +QUESTION_ANSWER = "Sky is blue" +API_KEY = "abc" +SCORE = 123 +TEMPERATURE = 0 +MODEL = "gpt-3.5-turbo-0125" + + +@patch('needlehaystack.evaluators.openai.ChatOpenAI') +@patch('needlehaystack.evaluators.openai.load_evaluator') +def test_openai(mock_load_evaluator, mock_chat_open_ai, monkeypatch): + monkeypatch.setenv('NIAH_EVALUATOR_API_KEY', API_KEY) + + mock_evaluator = MagicMock() + mock_evaluator.evaluate_strings.return_value = {'score': str(SCORE)} + + mock_load_evaluator.return_value = mock_evaluator + + evaluator = OpenAIEvaluator(question_asked=QUESTION_ASKED, true_answer=QUESTION_ANSWER) + result = evaluator.evaluate_response("Something") + + assert mock_chat_open_ai.call_args == call(model=MODEL, temperature=TEMPERATURE, openai_api_key=API_KEY) + assert mock_load_evaluator.call_args == call('labeled_score_string', criteria=OpenAIEvaluator.CRITERIA, llm=ANY) + + assert result == SCORE