Skip to content

Commit f4aba0f

Browse files
committed
fix tests
1 parent e969a8f commit f4aba0f

File tree

2 files changed

+58
-21
lines changed

2 files changed

+58
-21
lines changed

tests/test_llm_classifier.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import unittest
88
from unittest.mock import MagicMock, patch
99

10-
import pandas as pd
1110
import pytest
1211

1312
from piedomains.api import DomainClassifier

tests/test_performance.py

Lines changed: 58 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,9 @@ def mock_classification(collection_data, *args, **kwargs):
140140
f"({rate:.1f} domains/second)"
141141
)
142142

143-
def test_cache_effectiveness(self):
143+
@patch("piedomains.data_collector.DataCollector.collect")
144+
@patch("piedomains.text.TextClassifier.classify_from_data")
145+
def test_cache_effectiveness(self, mock_classify, mock_collect):
144146
"""Test that caching improves performance."""
145147
# Create some test cache files
146148
cache_html_dir = os.path.join(self.temp_dir, "html")
@@ -151,29 +153,65 @@ def test_cache_effectiveness(self):
151153
with open(os.path.join(cache_html_dir, "example.com.html"), "w") as f:
152154
f.write(test_html)
153155

154-
# Mock the actual prediction to isolate cache performance
155-
with patch(
156-
"piedomains.text.TextClassifier._predict_text"
157-
) as mock_predict:
158-
mock_predict.return_value = {
159-
"text_label": "news",
160-
"text_prob": 0.8,
161-
"text_domain_probs": {"news": 0.8, "other": 0.2}
156+
# Mock data collection to simulate cache behavior
157+
def mock_collection(domains, *args, **kwargs):
158+
use_cache = kwargs.get("use_cache", True)
159+
return {
160+
"collection_id": "test_collection",
161+
"timestamp": "2025-12-17T12:00:00Z",
162+
"domains": [
163+
{
164+
"url": "example.com",
165+
"domain": "example.com",
166+
"text_path": "html/example.com.html",
167+
"image_path": "images/example.com.png",
168+
"date_time_collected": "2025-12-17T12:00:00Z",
169+
"fetch_success": True,
170+
"cached": use_cache, # Simulate cache usage
171+
"error": None
172+
}
173+
]
162174
}
163175

164-
# First call (should use cache)
165-
start_time = time.time()
166-
self.classifier.classify_by_text(["example.com"], use_cache=True)
167-
cached_time = time.time() - start_time
176+
# Mock classification
177+
def mock_classification(collection_data, *args, **kwargs):
178+
return [
179+
{
180+
"url": "example.com",
181+
"domain": "example.com",
182+
"text_path": "html/example.com.html",
183+
"image_path": "images/example.com.png",
184+
"date_time_collected": "2025-12-17T12:00:00Z",
185+
"model_used": "text/shallalist_ml",
186+
"category": "news",
187+
"confidence": 0.8,
188+
"reason": None,
189+
"error": None,
190+
"raw_predictions": {"news": 0.8, "other": 0.2}
191+
}
192+
]
168193

169-
# Second call (should also use cache)
170-
start_time = time.time()
171-
self.classifier.classify_by_text(["example.com"], use_cache=True)
172-
cached_time2 = time.time() - start_time
194+
mock_collect.side_effect = mock_collection
195+
mock_classify.side_effect = mock_classification
196+
197+
# First call (should use cache)
198+
start_time = time.time()
199+
result1 = self.classifier.classify_by_text(["example.com"], use_cache=True)
200+
cached_time = time.time() - start_time
201+
202+
# Second call (should also use cache)
203+
start_time = time.time()
204+
result2 = self.classifier.classify_by_text(["example.com"], use_cache=True)
205+
cached_time2 = time.time() - start_time
206+
207+
# Both should be fast since we're using cache
208+
self.assertLess(cached_time, 1.0)
209+
self.assertLess(cached_time2, 1.0)
173210

174-
# Both should be fast since we're using cache
175-
self.assertLess(cached_time, 1.0)
176-
self.assertLess(cached_time2, 1.0)
211+
# Verify results are correct
212+
self.assertEqual(len(result1), 1)
213+
self.assertEqual(len(result2), 1)
214+
self.assertEqual(result1[0]["category"], "news")
177215

178216
def test_memory_usage_batch_processing(self):
179217
"""Test memory usage doesn't grow excessively in batch processing."""

0 commit comments

Comments
 (0)