Skip to content

Commit a0dea40

Browse files
Merge pull request #20 from mdhishaamakhtar/master
2 parents b449dbf + 5b3bfd8 commit a0dea40

File tree

12 files changed

+205
-24
lines changed

12 files changed

+205
-24
lines changed

.coverage

0 Bytes
Binary file not shown.

app/api/schemas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from marshmallow import Schema, fields, validate, ValidationError, post_load
1+
from marshmallow import Schema, fields, validate, post_load
22
from app.core.models import SearchRequest, ExploreRequest
33

44

app/core/pathfinding.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import uuid
2-
from typing import List, Set, Dict, Optional, Any
2+
from typing import List, Dict, Optional, Any
33
from app.core.interfaces import (
44
PathFinderInterface,
55
WikipediaClientInterface,
@@ -9,7 +9,6 @@
99
from app.utils.exceptions import (
1010
PathNotFoundError,
1111
InvalidPageError,
12-
WikipediaPageNotFoundError,
1312
)
1413
from app.utils.logging import get_logger
1514

app/core/services.py

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,8 @@
1313
WikipediaPage,
1414
)
1515
from app.utils.exceptions import (
16-
PathNotFoundError,
1716
InvalidPageError,
18-
WikipediaPageNotFoundError,
17+
DisambiguationPageError,
1918
)
2019
from app.utils.logging import get_logger
2120
import networkx as nx
@@ -99,16 +98,52 @@ def find_path(self, request: SearchRequest) -> PathResult:
9998
)
10099
raise
101100

102-
def validate_pages(self, start_page: str, end_page: str) -> tuple[bool, bool]:
101+
def validate_pages(self, start_page: str, end_page: str) -> tuple[bool, bool, dict]:
103102
"""
104-
Validate that both pages exist on Wikipedia.
103+
Validate that both pages exist on Wikipedia and check for disambiguation pages.
104+
105+
Args:
106+
start_page: Starting page title
107+
end_page: Target page title
105108
106109
Returns:
107-
Tuple of (start_exists, end_exists)
110+
Tuple of (start_exists, end_exists, validation_details)
111+
112+
Raises:
113+
DisambiguationPageError: When end page is a disambiguation page
108114
"""
109-
start_exists = self.wikipedia_client.page_exists(start_page)
110-
end_exists = self.wikipedia_client.page_exists(end_page)
111-
return start_exists, end_exists
115+
# Get detailed info for both pages
116+
start_info = self.wikipedia_client.get_page_with_redirect_info(start_page)
117+
end_info = self.wikipedia_client.get_page_with_redirect_info(end_page)
118+
119+
start_exists = start_info.get("exists", False)
120+
end_exists = end_info.get("exists", False)
121+
122+
validation_details = {
123+
"start_page": {
124+
"original": start_page,
125+
"final_title": start_info.get("final_title", start_page),
126+
"was_redirected": start_info.get("was_redirected", False),
127+
"is_disambiguation": start_info.get("is_disambiguation", False),
128+
"exists": start_exists,
129+
},
130+
"end_page": {
131+
"original": end_page,
132+
"final_title": end_info.get("final_title", end_page),
133+
"was_redirected": end_info.get("was_redirected", False),
134+
"is_disambiguation": end_info.get("is_disambiguation", False),
135+
"exists": end_exists,
136+
},
137+
}
138+
139+
# Check if end page is disambiguation - this should fail
140+
if end_exists and end_info.get("is_disambiguation", False):
141+
final_title = end_info.get("final_title", end_page)
142+
raise DisambiguationPageError(end_page, final_title)
143+
144+
# Note: We allow start page to be disambiguation as it might have useful links
145+
146+
return start_exists, end_exists, validation_details
112147

113148

114149
class ExploreService:

app/external/wikipedia.py

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from concurrent.futures import ThreadPoolExecutor
33
from typing import Dict, List, Optional
44
from app.core.interfaces import WikipediaClientInterface, CacheServiceInterface
5-
from app.utils.exceptions import WikipediaAPIError, WikipediaPageNotFoundError
5+
from app.utils.exceptions import WikipediaAPIError
66
from app.utils.logging import get_logger
77

88
logger = get_logger(__name__)
@@ -197,6 +197,81 @@ def page_exists(self, page_title: str) -> bool:
197197
logger.error(f"Failed to check page existence for {page_title}: {e}")
198198
return False
199199

200+
def get_page_with_redirect_info(self, page_title: str) -> Optional[dict]:
201+
"""
202+
Get page information including redirect details.
203+
204+
Args:
205+
page_title: Wikipedia page title
206+
207+
Returns:
208+
Dict with 'exists', 'final_title', 'was_redirected', 'is_disambiguation'
209+
"""
210+
params = {
211+
"action": "query",
212+
"format": "json",
213+
"titles": page_title,
214+
"prop": "info|categories",
215+
"redirects": 1,
216+
}
217+
218+
try:
219+
response = self.session.get(self.base_url, params=params, timeout=10)
220+
response.raise_for_status()
221+
data = response.json().get("query", {})
222+
223+
# Check for redirects
224+
redirects = data.get("redirects", [])
225+
was_redirected = len(redirects) > 0
226+
final_title = page_title
227+
228+
if was_redirected:
229+
# Find the final redirect target
230+
for redirect in redirects:
231+
if redirect.get("from") == page_title:
232+
final_title = redirect.get("to", page_title)
233+
break
234+
235+
# Check if page exists
236+
pages = data.get("pages", {})
237+
page_exists = False
238+
is_disambiguation = False
239+
240+
for page_data in pages.values():
241+
if "missing" not in page_data:
242+
page_exists = True
243+
current_title = page_data.get("title", "")
244+
245+
# Check if it's a disambiguation page
246+
if "(disambiguation)" in current_title.lower():
247+
is_disambiguation = True
248+
else:
249+
# Check categories for disambiguation
250+
categories = page_data.get("categories", [])
251+
for category in categories:
252+
cat_title = category.get("title", "").lower()
253+
if "disambiguation" in cat_title:
254+
is_disambiguation = True
255+
break
256+
257+
return {
258+
"exists": page_exists,
259+
"final_title": final_title,
260+
"was_redirected": was_redirected,
261+
"is_disambiguation": is_disambiguation,
262+
"original_title": page_title,
263+
}
264+
265+
except requests.RequestException as e:
266+
logger.error(f"Failed to get page redirect info for {page_title}: {e}")
267+
return {
268+
"exists": False,
269+
"final_title": page_title,
270+
"was_redirected": False,
271+
"is_disambiguation": False,
272+
"original_title": page_title,
273+
}
274+
200275
def get_page_info(self, page_title: str) -> Optional[dict]:
201276
"""
202277
Get basic information about a Wikipedia page.

app/infrastructure/tasks.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
InvalidPageError,
88
WikipediaAPIError,
99
CacheConnectionError,
10+
DisambiguationPageError,
1011
)
1112
from app.utils.logging import get_logger
1213

@@ -88,10 +89,20 @@ def progress_update(progress_data):
8889
# Get pathfinding service with progress callback
8990
pathfinding_service = get_pathfinding_service(algorithm, progress_update)
9091

91-
# Validate that pages exist
92-
start_exists, end_exists = pathfinding_service.validate_pages(
93-
start_page, end_page
94-
)
92+
# Validate that pages exist and check for disambiguation issues
93+
try:
94+
start_exists, end_exists, validation_details = (
95+
pathfinding_service.validate_pages(start_page, end_page)
96+
)
97+
except DisambiguationPageError as e:
98+
logger.error(f"Task {task_id}: Disambiguation page error - {e}")
99+
return {
100+
"status": "FAILURE",
101+
"error": str(e),
102+
"code": "DISAMBIGUATION_PAGE",
103+
"start_page": start_page,
104+
"end_page": end_page,
105+
}
95106

96107
if not start_exists:
97108
logger.error(f"Task {task_id}: Start page '{start_page}' does not exist")
@@ -175,6 +186,16 @@ def progress_update(progress_data):
175186
"end_page": end_page,
176187
}
177188

189+
except DisambiguationPageError as e:
190+
logger.error(f"Task {task_id}: Disambiguation page error - {e}")
191+
return {
192+
"status": "FAILURE",
193+
"error": str(e),
194+
"code": "DISAMBIGUATION_PAGE",
195+
"start_page": start_page,
196+
"end_page": end_page,
197+
}
198+
178199
except (requests.RequestException, CacheConnectionError, WikipediaAPIError) as e:
179200
# These exceptions trigger auto-retry
180201
logger.warning(f"Task {task_id}: Retryable error - {e}")

app/utils/exceptions.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,22 @@ def __init__(self, message: str = None, page_title: str = None):
7878
self.code = "INVALID_PAGE"
7979

8080

81+
class DisambiguationPageError(PathFindingError):
82+
"""Raised when a disambiguation page is used as a target."""
83+
84+
def __init__(self, page_title: str, resolved_title: str = None):
85+
self.page_title = page_title
86+
self.resolved_title = resolved_title
87+
88+
if resolved_title and resolved_title != page_title:
89+
message = f"'{page_title}' redirects to disambiguation page '{resolved_title}'. Please specify a more specific page."
90+
else:
91+
message = f"'{page_title}' is a disambiguation page. Please specify a more specific page."
92+
93+
super().__init__(message, page_title, resolved_title)
94+
self.code = "DISAMBIGUATION_PAGE"
95+
96+
8197
class CacheError(IrisBaseException):
8298
"""Base exception for cache-related errors."""
8399

static/script.js

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,17 @@ class PathFinderUI {
412412
this.hideLoading();
413413

414414
if (!result.path || result.path.length === 0) {
415-
// Hide the visualization section and show error
415+
// Check if this is actually a nested error response
416+
if (result.status === 'FAILURE' && result.error) {
417+
// Show the specific error message from the nested result
418+
const section = document.getElementById('visualizationSection');
419+
section.classList.remove('show');
420+
this.showError(result.error);
421+
StateManager.clear();
422+
return;
423+
}
424+
425+
// Fallback to generic message for actual empty paths
416426
const section = document.getElementById('visualizationSection');
417427
section.classList.remove('show');
418428
this.showError('No path found between the pages');

tests/integration/test_services.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,26 @@ def test_find_path_not_found(self, mock_wikipedia_client, mock_cache_service):
104104

105105
def test_validate_pages(self, mock_wikipedia_client, mock_cache_service):
106106
"""Test page validation."""
107+
107108
# Mock Wikipedia client responses
108-
mock_wikipedia_client.page_exists.side_effect = (
109-
lambda page: page != "NonExistent"
109+
def mock_get_page_info(page):
110+
if page == "NonExistent":
111+
return {
112+
"exists": False,
113+
"final_title": page,
114+
"was_redirected": False,
115+
"is_disambiguation": False,
116+
}
117+
else:
118+
return {
119+
"exists": True,
120+
"final_title": page,
121+
"was_redirected": False,
122+
"is_disambiguation": False,
123+
}
124+
125+
mock_wikipedia_client.get_page_with_redirect_info.side_effect = (
126+
mock_get_page_info
110127
)
111128

112129
mock_path_finder = Mock()
@@ -115,14 +132,22 @@ def test_validate_pages(self, mock_wikipedia_client, mock_cache_service):
115132
)
116133

117134
# Test both pages exist
118-
start_exists, end_exists = service.validate_pages("Page A", "Page B")
135+
start_exists, end_exists, validation_details = service.validate_pages(
136+
"Page A", "Page B"
137+
)
119138
assert start_exists is True
120139
assert end_exists is True
140+
assert validation_details["start_page"]["exists"] is True
141+
assert validation_details["end_page"]["exists"] is True
121142

122143
# Test one page doesn't exist
123-
start_exists, end_exists = service.validate_pages("Page A", "NonExistent")
144+
start_exists, end_exists, validation_details = service.validate_pages(
145+
"Page A", "NonExistent"
146+
)
124147
assert start_exists is True
125148
assert end_exists is False
149+
assert validation_details["start_page"]["exists"] is True
150+
assert validation_details["end_page"]["exists"] is False
126151

127152

128153
class TestExploreService:

tests/unit/test_external_wikipedia_client.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,4 +121,3 @@ def fake_process(batch):
121121
assert (len(calls) == 2) and (set(calls[0]) <= set(titles))
122122
# merged keys exist from both batches
123123
assert any(k in res for k in ("P0", "P49", "P50", "P74"))
124-

0 commit comments

Comments
 (0)