Skip to content

Commit d33bc57

Browse files
committed
fix: make GitHub PR-description ticket selection deterministic
extract_ticket_links_from_pr_description() accumulated issue URLs in a set, then applied the MAX_TICKETS cap by slicing list(set), which picks an arbitrary subset that can vary between runs. Track URLs in first-seen order while de-duplicating (mirroring find_jira_tickets), so the cap keeps a stable, predictable subset. Co-Authored-By: Claude
1 parent cb9ee43 commit d33bc57

2 files changed

Lines changed: 43 additions & 7 deletions

File tree

pr_agent/tools/ticket_pr_compliance_check.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -191,31 +191,40 @@ def extract_ticket_links_from_pr_description(pr_description, repo_path, base_url
191191
"""
192192
Extract all ticket links from PR description
193193
"""
194-
github_tickets = set()
194+
# Preserve first-seen order while de-duplicating, so the MAX_TICKETS cap below
195+
# selects a deterministic subset (a plain set would slice an arbitrary one).
196+
seen = set()
197+
github_tickets = []
198+
199+
def _add(url):
200+
if url not in seen:
201+
seen.add(url)
202+
github_tickets.append(url)
203+
195204
try:
196205
# Use the updated pattern to find matches
197206
matches = GITHUB_TICKET_PATTERN.findall(pr_description)
198207

199208
for match in matches:
200209
if match[0]: # Full URL match
201-
github_tickets.add(match[0])
210+
_add(match[0])
202211
elif match[1]: # Shorthand notation match: owner/repo#issue_number
203212
owner, repo, issue_number = match[2], match[3], match[4]
204-
github_tickets.add(f'{base_url_html.strip("/")}/{owner}/{repo}/issues/{issue_number}')
213+
_add(f'{base_url_html.strip("/")}/{owner}/{repo}/issues/{issue_number}')
205214
else: # #123 format
206215
issue_number = match[5][1:] # remove #
207216
if issue_number.isdigit() and len(issue_number) < 5 and repo_path:
208-
github_tickets.add(f'{base_url_html.strip("/")}/{repo_path}/issues/{issue_number}')
217+
_add(f'{base_url_html.strip("/")}/{repo_path}/issues/{issue_number}')
209218

210219
if len(github_tickets) > MAX_TICKETS:
211220
get_logger().info(f"Too many tickets found in PR description: {len(github_tickets)}")
212221
# Limit the number of tickets
213-
github_tickets = set(list(github_tickets)[:MAX_TICKETS])
222+
github_tickets = github_tickets[:MAX_TICKETS]
214223
except Exception as e:
215224
get_logger().error(f"Error extracting tickets error= {e}",
216225
artifact={"traceback": traceback.format_exc()})
217226

218-
return list(github_tickets)
227+
return github_tickets
219228

220229
def extract_ticket_links_from_branch_name(branch_name, repo_path, base_url_html="https://github.com"):
221230
"""

tests/unittest/test_extract_issue_from_branch.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import pytest
22

3-
from pr_agent.tools.ticket_pr_compliance_check import extract_ticket_links_from_branch_name
3+
from pr_agent.tools.ticket_pr_compliance_check import (
4+
MAX_TICKETS,
5+
extract_ticket_links_from_branch_name,
6+
extract_ticket_links_from_pr_description,
7+
)
48

59

610
class TestExtractTicketsLinkFromBranchName:
@@ -110,3 +114,26 @@ def test_multiple_matches_deduplicated(self):
110114
"https://github.com/org/repo/issues/1",
111115
"https://github.com/org/repo/issues/2",
112116
}
117+
118+
119+
class TestExtractTicketLinksFromPrDescription:
120+
"""GitHub issue extraction from the PR description."""
121+
122+
def test_preserves_first_seen_order(self):
123+
"""Issues are returned in first-seen order, de-duplicated."""
124+
desc = "Fixes #3, relates to #1, also #3 again and #2"
125+
result = extract_ticket_links_from_pr_description(desc, "org/repo", "https://github.com")
126+
assert result == [
127+
"https://github.com/org/repo/issues/3",
128+
"https://github.com/org/repo/issues/1",
129+
"https://github.com/org/repo/issues/2",
130+
]
131+
132+
def test_cap_selects_deterministic_first_seen_subset(self):
133+
"""When more than MAX_TICKETS issues are present, the first MAX_TICKETS in
134+
first-seen order are kept (not an arbitrary subset from a set)."""
135+
nums = list(range(1, MAX_TICKETS + 4))
136+
desc = " ".join(f"#{n}" for n in nums)
137+
result = extract_ticket_links_from_pr_description(desc, "org/repo", "https://github.com")
138+
expected = [f"https://github.com/org/repo/issues/{n}" for n in nums[:MAX_TICKETS]]
139+
assert result == expected

0 commit comments

Comments
 (0)