# Copyright 2025 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Custom evaluation tasks for LightEval.""" from lighteval.metrics.dynamic_metrics import ( ExprExtractionConfig, LatexExtractionConfig, multilingual_extractive_match_metric, ) from lighteval.tasks.lighteval_task import LightevalTaskConfig from lighteval.tasks.requests import Doc from lighteval.utils.language import Language metric = multilingual_extractive_match_metric( language=Language.ENGLISH, fallback_mode="first_match", precision=5, gold_extraction_target=(LatexExtractionConfig(),), pred_extraction_target=(ExprExtractionConfig(), LatexExtractionConfig()), aggregation_function=max, ) def prompt_fn(line, task_name: str = None): """Assumes the model is either prompted to emit \\boxed{answer} or does so automatically""" return Doc( task_name=task_name, query=line["problem"], choices=[line["solution"]], gold_index=0, ) # Define tasks aime24 = LightevalTaskConfig( name="aime24", suite=["custom"], prompt_function=prompt_fn, hf_repo="HuggingFaceH4/aime_2024", hf_subset="default", hf_avail_splits=["train"], evaluation_splits=["train"], few_shots_split=None, few_shots_select=None, generation_size=32768, metric=[metric], version=1, ) math_500 = LightevalTaskConfig( name="math_500", suite=["custom"], prompt_function=prompt_fn, hf_repo="HuggingFaceH4/MATH-500", hf_subset="default", hf_avail_splits=["test"], evaluation_splits=["test"], few_shots_split=None, few_shots_select=None, generation_size=32768, metric=[metric], version=1, ) # Add tasks to the table TASKS_TABLE = [] TASKS_TABLE.append(aime24) TASKS_TABLE.append(math_500) # MODULE LOGIC if __name__ == "__main__": print([t["name"] for t in TASKS_TABLE]) print(len(TASKS_TABLE))