Skip to content

Commit

Permalink
Add configurable retries, make flask optional, usable in Mech (#20)
Browse files Browse the repository at this point in the history
* Making it usable in Mech

* make flask optional

* Add date to serper

* configurable retries

* Update serper_retriever.py
  • Loading branch information
kongzii authored May 12, 2024
1 parent f620f6e commit af71b0f
Show file tree
Hide file tree
Showing 4 changed files with 369 additions and 367 deletions.
6 changes: 4 additions & 2 deletions factcheck/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def __init__(
evidence_retrieval_model: str = None,
claim_verify_model: str = None,
api_config: dict = None,
num_seed_retries: int = 3,
):
self.encoding = tiktoken.get_encoding("cl100k_base")

Expand Down Expand Up @@ -63,6 +64,7 @@ def __init__(
self.query_generator = QueryGenerator(llm_client=self.query_generator_model, prompt=self.prompt)
self.evidence_crawler = retriever_mapper(retriever_name=retriever)(api_config=self.api_config)
self.claimverify = ClaimVerify(llm_client=self.claim_verify_model, prompt=self.prompt)
self.num_seed_retries = num_seed_retries

logger.info("===Sub-modules Init Finished===")

Expand All @@ -73,15 +75,15 @@ def load_config(self, api_config: dict) -> None:
def check_response(self, response: str):
st_time = time.time()
# step 1
claims = self.decomposer.getclaims(doc=response)
claims = self.decomposer.getclaims(doc=response, num_retries=self.num_seed_retries)
for i, claim in enumerate(claims):
logger.info(f"== response claims {i}: {claim}")

# step 2
(
checkworthy_claims,
pairwise_checkworthy,
) = self.checkworthy.identify_checkworthiness(claims)
) = self.checkworthy.identify_checkworthiness(claims, num_retries=self.num_seed_retries)
for i, claim in enumerate(checkworthy_claims):
logger.info(f"== Check-worthy claims {i}: {claim}")

Expand Down
3 changes: 3 additions & 0 deletions factcheck/core/Retriever/serper_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def _retrieve_evidence_4_all_claim(self, query_list: list[str], top_k: int = 5,

# get the results for queries with an answer box
query_url_dict = {}
url_to_date = {} # TODO: decide whether to use date
_snippet_to_check = []
for i, (query, result) in enumerate(zip(query_list, serper_response.json())):
if query != result.get("searchParameters").get("q"):
Expand All @@ -92,6 +93,8 @@ def _retrieve_evidence_4_all_claim(self, query_list: list[str], top_k: int = 5,
"url": "Multiple",
}

# Save date for each url
url_to_date.update({result.get("link"): result.get("date") for result in results})
# Save query-url pair, 1 query may have multiple urls
query_url_dict.update({query: [result.get("link") for result in results]})
_snippet_to_check += [result["snippet"] for result in results]
Expand Down
Loading

0 comments on commit af71b0f

Please sign in to comment.