86 lines
2.4 KiB
Python
86 lines
2.4 KiB
Python
|
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
|
||
|
"""Custom evaluation tasks for LightEval."""
|
||
|
|
||
|
from lighteval.metrics.dynamic_metrics import (
|
||
|
ExprExtractionConfig,
|
||
|
LatexExtractionConfig,
|
||
|
multilingual_extractive_match_metric,
|
||
|
)
|
||
|
from lighteval.tasks.lighteval_task import LightevalTaskConfig
|
||
|
from lighteval.tasks.requests import Doc
|
||
|
from lighteval.utils.language import Language
|
||
|
|
||
|
|
||
|
metric = multilingual_extractive_match_metric(
|
||
|
language=Language.ENGLISH,
|
||
|
fallback_mode="first_match",
|
||
|
precision=5,
|
||
|
gold_extraction_target=(LatexExtractionConfig(),),
|
||
|
pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()),
|
||
|
aggregation_function=max,
|
||
|
)
|
||
|
|
||
|
|
||
|
def prompt_fn(line, task_name: str = None):
|
||
|
"""Assumes the model is either prompted to emit \\boxed{answer} or does so automatically"""
|
||
|
return Doc(
|
||
|
task_name=task_name,
|
||
|
query=line["problem"],
|
||
|
choices=[line["solution"]],
|
||
|
gold_index=0,
|
||
|
)
|
||
|
|
||
|
|
||
|
# Define tasks
|
||
|
aime24 = LightevalTaskConfig(
|
||
|
name="aime24",
|
||
|
suite=["custom"],
|
||
|
prompt_function=prompt_fn,
|
||
|
hf_repo="HuggingFaceH4/aime_2024",
|
||
|
hf_subset="default",
|
||
|
hf_avail_splits=["train"],
|
||
|
evaluation_splits=["train"],
|
||
|
few_shots_split=None,
|
||
|
few_shots_select=None,
|
||
|
generation_size=32768,
|
||
|
metric=[metric],
|
||
|
version=1,
|
||
|
)
|
||
|
math_500 = LightevalTaskConfig(
|
||
|
name="math_500",
|
||
|
suite=["custom"],
|
||
|
prompt_function=prompt_fn,
|
||
|
hf_repo="HuggingFaceH4/MATH-500",
|
||
|
hf_subset="default",
|
||
|
hf_avail_splits=["test"],
|
||
|
evaluation_splits=["test"],
|
||
|
few_shots_split=None,
|
||
|
few_shots_select=None,
|
||
|
generation_size=32768,
|
||
|
metric=[metric],
|
||
|
version=1,
|
||
|
)
|
||
|
|
||
|
# Add tasks to the table
|
||
|
TASKS_TABLE = []
|
||
|
TASKS_TABLE.append(aime24)
|
||
|
TASKS_TABLE.append(math_500)
|
||
|
|
||
|
# MODULE LOGIC
|
||
|
if __name__ == "__main__":
|
||
|
print([t["name"] for t in TASKS_TABLE])
|
||
|
print(len(TASKS_TABLE))
|