@@ -83,25 +83,25 @@ def _validate_dataframe(self, dataframe: pd.DataFrame):
8383 if conflict :
8484 raise ValueError (f"The following column(s) already exist and would be overwritten by CodeQualitySampleEvaluator: { conflict } " )
8585
86- def _score_func (self , instruction : str , code : str ) -> Tuple [ int , str ]:
86+ def _build_prompts (self , dataframe : pd . DataFrame , input_instruction_key : str , input_code_key : str ) -> List [ str ]:
8787 """
88- Evaluate a single instruction-code pair and return score and feedback .
88+ Build prompts for all instruction-code pairs in the dataframe .
8989
9090 Args:
91- instruction: The instruction text
92- code: The generated code text
91+ dataframe: Input DataFrame
92+ input_instruction_key: Field name containing instructions
93+ input_code_key: Field name containing code
9394
9495 Returns:
95- Tuple of (score, feedback) where score is an integer and feedback is a string
96+ List of prompt strings
9697 """
97- prompt = self .prompt_template .build_prompt (instruction = instruction , code = code )
98- response = self .llm_serving .generate_from_input (user_inputs = [prompt ], system_prompt = "" )
99-
100- if not response or len (response ) == 0 :
101- self .logger .warning ("Empty response from LLM" )
102- return 0 , "No response from LLM"
103-
104- return self ._parse_score_and_feedback (response [0 ])
98+ prompts = []
99+ for _ , row in dataframe .iterrows ():
100+ instruction = row [input_instruction_key ]
101+ code = row [input_code_key ]
102+ prompt = self .prompt_template .build_prompt (instruction = instruction , code = code )
103+ prompts .append (prompt )
104+ return prompts
105105
106106 def _parse_score_and_feedback (self , response : str ) -> Tuple [int , str ]:
107107 """
@@ -133,23 +133,34 @@ def eval(self, dataframe: pd.DataFrame, input_instruction_key: str, input_code_k
133133
134134 Args:
135135 dataframe: Input DataFrame
136- input_key: Field name containing instruction-code pairs (as dict with 'instruction' and 'code' keys)
136+ input_instruction_key: Field name containing instructions
137+ input_code_key: Field name containing code
137138
138139 Returns:
139140 Tuple of (scores, feedbacks) lists
140141 """
141142 self .logger .info (f"Evaluating { self .score_name } ..." )
142143
143- scores = []
144- feedbacks = []
144+ # Build all prompts at once
145+ prompts = self . _build_prompts ( dataframe , input_instruction_key , input_code_key )
145146
146- for _ , row in dataframe .iterrows ():
147- instruction = row [input_instruction_key ]
148- code = row [input_code_key ]
149-
150- score , feedback = self ._score_func (instruction , code )
151- scores .append (score )
152- feedbacks .append (feedback )
147+ # Generate responses in batch
148+ responses = self .llm_serving .generate_from_input (user_inputs = prompts , system_prompt = "" )
149+
150+ if not responses or len (responses ) == 0 :
151+ self .logger .warning ("Empty response from LLM" )
152+ scores = [0 ] * len (dataframe )
153+ feedbacks = ["No response from LLM" ] * len (dataframe )
154+ elif len (responses ) != len (dataframe ):
155+ self .logger .warning (f"Response count ({ len (responses )} ) doesn't match dataframe length ({ len (dataframe )} )" )
156+ scores = [0 ] * len (dataframe )
157+ feedbacks = ["Response count mismatch" ] * len (dataframe )
158+ else :
159+ # Parse all responses
160+ results = [self ._parse_score_and_feedback (response ) for response in responses ]
161+ scores , feedbacks = zip (* results )
162+ scores = list (scores )
163+ feedbacks = list (feedbacks )
153164
154165 self .logger .info ("Evaluation complete!" )
155166 return scores , feedbacks
0 commit comments