@@ -115,6 +115,61 @@ def subset_from_split(self, split: Literal["train", "valid", "test"]):
115115
116116 return sub_benchmark
117117
118+ def subset_from_list (
119+ self ,
120+ task_list : list [str ],
121+ benchmark_name_suffix : Optional [str ] = "custom" ,
122+ split : Optional [str ] = None ,
123+ ):
124+ """Create a sub-benchmark containing only the specified tasks.
125+
126+ Args:
127+ task_list: List of task names to include in the sub-benchmark.
128+ benchmark_name_suffix: Optional suffix to append to the benchmark name. Defaults to "custom".
129+ split: Optional split name to append to the benchmark name. Useful for organization.
130+
131+ Returns:
132+ Benchmark: A new benchmark instance containing only the specified tasks.
133+
134+ Raises:
135+ ValueError: If the resulting task list is empty or if any specified task doesn't exist.
136+ """
137+ if not task_list :
138+ raise ValueError ("Task list cannot be empty" )
139+
140+ # Convert task_list to set for more efficient lookups
141+ task_set = set (task_list )
142+
143+ # Validate that all requested tasks exist in the original benchmark
144+ existing_tasks = {env_args .task_name for env_args in self .env_args_list }
145+ invalid_tasks = task_set - existing_tasks
146+ if invalid_tasks :
147+ raise ValueError (f"The following tasks do not exist in the benchmark: { invalid_tasks } " )
148+
149+ name = f"{ self .name } _{ benchmark_name_suffix } "
150+ if split :
151+ name += f"_{ split } "
152+
153+ sub_benchmark = Benchmark (
154+ name = name ,
155+ high_level_action_set_args = self .high_level_action_set_args ,
156+ is_multi_tab = self .is_multi_tab ,
157+ supports_parallel_seeds = self .supports_parallel_seeds ,
158+ backends = self .backends ,
159+ env_args_list = [
160+ env_args for env_args in self .env_args_list if env_args .task_name in task_set
161+ ],
162+ task_metadata = self .task_metadata ,
163+ )
164+
165+ # This check is redundant now due to the validation above, but kept for safety
166+ if not sub_benchmark .env_args_list :
167+ raise ValueError (
168+ f"The custom { split if split else '' } split for this benchmark is empty."
169+ )
170+
171+ return sub_benchmark
172+
118173 def subset_from_glob (self , column , glob ):
119174 subset = self .subset_from_regexp (column , regexp = fnmatch .translate (glob ))
120175 subset .name = f"{ self .name } [{ column } ={ glob } ]"
0 commit comments