Skip to content

Commit ff898bb

Browse files
author
Duc Hoang
committed
Fix TruthfulQA generative evaluation bugs
1. Remove KeyError: mc1_targets field only exists in multiple_choice subset, not generation subset used by truthfulqa:gen task 2. Fix backwards answer processing logic that was replacing correct answers with periods instead of preserving answer text These fixes make truthfulqa:gen functional for proper evaluation. Task format: lighteval|truthfulqa:gen|0
1 parent 9b2ca83 commit ff898bb

File tree

1 file changed

+2
-3
lines changed

1 file changed

+2
-3
lines changed

src/lighteval/tasks/default_prompts.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2478,20 +2478,19 @@ def truthful_qa_multiple_choice(line, task_name: str = None):
24782478

24792479
def truthful_qa_generative(line, task_name: str = None): # BLEU and combination of BLEU
24802480
correct_answers = [
2481-
answer.strip() + "" if answer[-1] == "." else "." for answer in line["correct_answers"] if answer != ""
2481+
answer.strip() + ("" if answer.strip().endswith(".") else ".") for answer in line["correct_answers"] if answer.strip() != ""
24822482
]
24832483
if "I have no comment." not in correct_answers:
24842484
correct_answers.append("I have no comment.")
24852485
incorrect_answers = [
2486-
answer.strip() + "" if answer[-1] == "." else "." for answer in line["incorrect_answers"] if answer != ""
2486+
answer.strip() + ("" if answer.strip().endswith(".") else ".") for answer in line["incorrect_answers"] if answer.strip() != ""
24872487
]
24882488

24892489
return Doc(
24902490
task_name=task_name,
24912491
query=line["question"].strip(),
24922492
choices=correct_answers + incorrect_answers,
24932493
gold_index=list(range(len(correct_answers))),
2494-
specific={"len_mc1": len(line["mc1_targets"]["choices"])},
24952494
)
24962495

24972496

0 commit comments

Comments
 (0)