Skip to content

Commit dae9683

Browse files
authored
Tr/updates23 (qodo-ai#2008)
* fix: improve PR description field guidance for clarity * feat: refine suggestion guidelines to avoid redundant recommendations in PR reviews * feat: enhance YAML parsing logic with additional keys and fallback strategies * fix: update expected output format in YAML parsing test case
1 parent 5fc466b commit dae9683

File tree

4 files changed

+49
-22
lines changed

4 files changed

+49
-22
lines changed

pr_agent/algo/utils.py

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -772,7 +772,8 @@ def try_fix_yaml(response_text: str,
772772
response_text_original="") -> dict:
773773
response_text_lines = response_text.split('\n')
774774

775-
keys_yaml = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:', 'improved code:', 'label:']
775+
keys_yaml = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:',
776+
'improved code:', 'label:', 'why:', 'suggestion_summary:']
776777
keys_yaml = keys_yaml + keys_fix_yaml
777778

778779
# first fallback - try to convert 'relevant line: ...' to relevant line: |-\n ...'
@@ -847,12 +848,13 @@ def try_fix_yaml(response_text: str,
847848
if index_end == -1:
848849
index_end = len(response_text)
849850
response_text_copy = response_text[index_start:index_end].strip().strip('```yaml').strip('`').strip()
850-
try:
851-
data = yaml.safe_load(response_text_copy)
852-
get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet")
853-
return data
854-
except:
855-
pass
851+
if response_text_copy:
852+
try:
853+
data = yaml.safe_load(response_text_copy)
854+
get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet")
855+
return data
856+
except:
857+
pass
856858

857859
# fifth fallback - try to remove leading '+' (sometimes added by AI for 'existing code' and 'improved code')
858860
response_text_lines_copy = response_text_lines.copy()
@@ -881,21 +883,46 @@ def try_fix_yaml(response_text: str,
881883
response_text_copy = copy.deepcopy(response_text)
882884
response_text_copy_lines = response_text_copy.split('\n')
883885
start_line = -1
886+
improve_sections = ['existing_code:', 'improved_code:', 'response:', 'why:']
887+
describe_sections = ['description:', 'title:', 'changes_diagram:', 'pr_files:', 'pr_ticket:']
884888
for i, line in enumerate(response_text_copy_lines):
885-
if 'existing_code:' in line or 'improved_code:' in line:
889+
line_stripped = line.rstrip()
890+
if any(key in line_stripped for key in (improve_sections+describe_sections)):
886891
start_line = i
887-
elif line.endswith(': |') or line.endswith(': |-') or line.endswith(': |2') or line.endswith(':'):
892+
elif line_stripped.endswith(': |') or line_stripped.endswith(': |-') or line_stripped.endswith(': |2') or any(line_stripped.endswith(key) for key in keys_yaml):
888893
start_line = -1
889894
elif start_line != -1:
890895
response_text_copy_lines[i] = ' ' + line
891896
response_text_copy = '\n'.join(response_text_copy_lines)
897+
response_text_copy = response_text_copy.replace(' |\n', ' |2\n')
892898
try:
893899
data = yaml.safe_load(response_text_copy)
894900
get_logger().info(f"Successfully parsed AI prediction after adding indent for sections of code blocks")
895901
return data
896902
except:
897903
pass
898904

905+
# eighth fallback - try to remove pipe chars at the root-level dicts
906+
response_text_copy = copy.deepcopy(response_text)
907+
response_text_copy = response_text_copy.lstrip('|\n')
908+
try:
909+
data = yaml.safe_load(response_text_copy)
910+
get_logger().info(f"Successfully parsed AI prediction after removing pipe chars")
911+
return data
912+
except:
913+
pass
914+
915+
# ninth fallback - try to decode the response text with different encodings. GPT-5 can return text that is not utf-8 encoded.
916+
encodings_to_try = ['latin-1', 'utf-16']
917+
for encoding in encodings_to_try:
918+
try:
919+
data = yaml.safe_load(response_text.encode(encoding).decode("utf-8"))
920+
if data:
921+
get_logger().info(f"Successfully parsed AI prediction after decoding with {encoding} encoding")
922+
return data
923+
except:
924+
pass
925+
899926
# # sixth fallback - try to remove last lines
900927
# for i in range(1, len(response_text_lines)):
901928
# response_text_lines_tmp = '\n'.join(response_text_lines[:-i])

pr_agent/settings/code_suggestions/pr_code_suggestions_prompts.toml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,15 @@ Specific guidelines for generating code suggestions:
6363
- Don't suggest to add docstring, type hints, or comments, to remove unused imports, or to use more specific exception types.
6464
{%- else %}
6565
- Only give suggestions that address critical problems and bugs in the PR code. If no relevant suggestions are applicable, return an empty list.
66-
- Do not suggest to change packages version, add missing import statement, or declare undefined variable.
66+
- DO NOT suggest the following:
67+
- change packages version
68+
- add missing import statement
69+
- declare undefined variable, or remove unused variable
70+
- use more specific exception types
71+
- repeat changes already done in the PR code
6772
{%- endif %}
73+
- Be aware that your input consists only of partial code segments (PR diff code), not the complete codebase. Therefore, avoid making suggestions that might duplicate existing functionality, and refrain from questioning code elements (such as variable declarations or import statements) that may be defined elsewhere in the codebase.
6874
- When mentioning code elements (variables, names, or files) in your response, surround them with backticks (`). For example: "verify that `user_id` is..."
69-
- Note that you only see changed code segments (diff hunks in a PR), not the entire codebase. Avoid suggestions that might duplicate existing functionality or questioning code elements (like variables declarations or import statements) that may be defined elsewhere in the codebase.
7075
7176
{%- if extra_instructions %}
7277

pr_agent/settings/pr_description_prompts.toml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class FileDescription(BaseModel):
4545
4646
class PRDescription(BaseModel):
4747
type: List[PRType] = Field(description="one or more types that describe the PR content. Return the label member value (e.g. 'Bug fix', not 'bug_fix')")
48-
description: str = Field(description="summarize the PR changes in up to four bullet points, each up to 8 words. For large PRs, add sub-bullets if needed. Order bullets by importance, with each bullet highlighting a key change group.")
48+
description: str = Field(description="summarize the PR changes with 1-4 bullet points, each up to 8 words. For large PRs, add sub-bullets for each bullet if needed. Order bullets by importance, with each bullet highlighting a key change group.")
4949
title: str = Field(description="a concise and descriptive title that captures the PR's main theme")
5050
{%- if enable_pr_diagram %}
5151
changes_diagram: str = Field(description='a horizontal diagram that represents the main PR changes, in the format of a valid mermaid LR flowchart. The diagram should be concise and easy to read. Leave empty if no diagram is relevant. To create robust Mermaid diagrams, follow this two-step process: (1) Declare the nodes: nodeID["node description"]. (2) Then define the links: nodeID1 -- "link text" --> nodeID2. Node description must always be surrounded with double quotation marks')
@@ -63,7 +63,8 @@ type:
6363
- ...
6464
- ...
6565
description: |
66-
...
66+
- ...
67+
- ...
6768
title: |
6869
...
6970
{%- if enable_pr_diagram %}
@@ -151,7 +152,8 @@ type:
151152
- Refactoring
152153
- ...
153154
description: |
154-
...
155+
- ...
156+
- ...
155157
title: |
156158
...
157159
{%- if enable_pr_diagram %}

tests/unittest/test_try_fix_yaml.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -242,12 +242,5 @@ def test_wrong_indentation_code_block_scalar(self):
242242
return a - b;
243243
}
244244
'''
245-
expected_output = {
246-
"code_suggestions": [
247-
{
248-
"relevant_file": "a.c\n",
249-
"existing_code": expected_code_block
250-
}
251-
]
252-
}
245+
expected_output = {'code_suggestions': [{'relevant_file': 'a.c\n', 'existing_code': ' int sum(int a, int b) {\n return a + b;\n }\n\n int sub(int a, int b) {\n return a - b;\n }\n'}]}
253246
assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='existing_code') == expected_output

0 commit comments

Comments
 (0)