Skip to content

Commit 948db9f

Browse files
soodokuclaude
andcommitted
Fix CI/CD pipeline issues and code quality
- Updated GitHub Actions to use latest action versions (@v4) - Standardized test commands across all platforms to exclude ML tests - Fixed flake8 linting issues: removed unused imports, cleaned whitespace - Streamlined dependency installation in workflows - All non-ML tests now pass consistently (79 passed, 3 deselected) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 516d69c commit 948db9f

10 files changed

+130
-124
lines changed

piedomains/tests/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import sys
22
from contextlib import contextmanager
3-
import pandas
43

54
try:
65
from StringIO import StringIO
54 Bytes
Loading
54 Bytes
Loading

piedomains/tests/test_004_domain_validation.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def test_validate_domain_name_valid(self):
1919
"a.b",
2020
"very-long-subdomain.example-domain.com"
2121
]
22-
22+
2323
for domain in valid_domains:
2424
with self.subTest(domain=domain):
2525
self.assertTrue(Piedomain.validate_domain_name(domain))
@@ -38,7 +38,7 @@ def test_validate_domain_name_invalid(self):
3838
3939
"just-a-string-without-dot"
4040
]
41-
41+
4242
for domain in invalid_domains:
4343
with self.subTest(domain=domain):
4444
self.assertFalse(Piedomain.validate_domain_name(domain))
@@ -50,7 +50,7 @@ def test_validate_domain_name_with_protocol(self):
5050
"https://example.org",
5151
"https://sub.example.com/path"
5252
]
53-
53+
5454
for domain in domains_with_protocol:
5555
with self.subTest(domain=domain):
5656
self.assertTrue(Piedomain.validate_domain_name(domain))
@@ -64,13 +64,13 @@ def test_validate_domains_list(self):
6464
"",
6565
"twitter.com"
6666
]
67-
67+
6868
valid, invalid = Piedomain.validate_domains(mixed_domains)
69-
69+
7070
self.assertEqual(len(valid), 3)
7171
self.assertEqual(len(invalid), 2)
7272
self.assertIn("google.com", valid)
73-
self.assertIn("facebook.com", valid)
73+
self.assertIn("facebook.com", valid)
7474
self.assertIn("twitter.com", valid)
7575
self.assertIn("invalid..domain", invalid)
7676
self.assertIn("", invalid)
@@ -82,9 +82,9 @@ def test_validate_domains_normalization(self):
8282
"http://test.org/path",
8383
"example.net/"
8484
]
85-
85+
8686
valid, invalid = Piedomain.validate_domains(domains_to_normalize)
87-
87+
8888
self.assertEqual(len(valid), 3)
8989
self.assertEqual(len(invalid), 0)
9090
self.assertIn("example.com", valid)
@@ -93,4 +93,5 @@ def test_validate_domains_normalization(self):
9393

9494

9595
if __name__ == "__main__":
96-
unittest.main()
96+
unittest.main()
97+

piedomains/tests/test_005_text_processing.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ def test_text_from_html_basic(self):
2121
</body>
2222
</html>
2323
"""
24-
24+
2525
result = Piedomain.text_from_html(html_content)
26-
26+
2727
self.assertIsInstance(result, str)
2828
self.assertIn("welcome", result.lower())
2929
self.assertIn("test", result.lower())
@@ -44,9 +44,9 @@ def test_text_from_html_with_scripts_and_styles(self):
4444
</body>
4545
</html>
4646
"""
47-
47+
4848
result = Piedomain.text_from_html(html_content)
49-
49+
5050
# Should extract text but not script/style content
5151
self.assertIn("main", result.lower())
5252
self.assertIn("content", result.lower())
@@ -56,7 +56,7 @@ def test_data_cleanup_removes_numbers(self):
5656
"""Test that data cleanup removes numbers."""
5757
text_with_numbers = "test123 content456 more789 text"
5858
result = Piedomain.data_cleanup(text_with_numbers)
59-
59+
6060
self.assertNotIn("123", result)
6161
self.assertNotIn("456", result)
6262
self.assertNotIn("789", result)
@@ -65,7 +65,7 @@ def test_data_cleanup_removes_punctuation(self):
6565
"""Test that data cleanup removes punctuation."""
6666
text_with_punct = "hello, world! this is a test."
6767
result = Piedomain.data_cleanup(text_with_punct)
68-
68+
6969
self.assertNotIn(",", result)
7070
self.assertNotIn("!", result)
7171
self.assertNotIn(".", result)
@@ -74,7 +74,7 @@ def test_data_cleanup_removes_stopwords(self):
7474
"""Test that data cleanup removes English stopwords."""
7575
text_with_stopwords = "the quick brown fox and jumps in the lazy dog"
7676
result = Piedomain.data_cleanup(text_with_stopwords)
77-
77+
7878
# Common stopwords should be removed
7979
self.assertNotIn(" the ", " " + result + " ")
8080
self.assertNotIn(" and ", " " + result + " ")
@@ -88,7 +88,7 @@ def test_data_cleanup_removes_short_words(self):
8888
"""Test that data cleanup removes single character words."""
8989
text_with_short = "a big test i o u"
9090
result = Piedomain.data_cleanup(text_with_short)
91-
91+
9292
# Single characters should be removed
9393
result_words = result.split()
9494
for word in result_words:
@@ -98,17 +98,17 @@ def test_data_cleanup_lowercase(self):
9898
"""Test that data cleanup converts to lowercase."""
9999
text_mixed_case = "This IS Mixed CASE Text"
100100
result = Piedomain.data_cleanup(text_mixed_case)
101-
101+
102102
self.assertEqual(result, result.lower())
103103

104104
def test_data_cleanup_removes_duplicates(self):
105105
"""Test that data cleanup removes duplicate words."""
106106
text_with_duplicates = "test test content content more test"
107107
result = Piedomain.data_cleanup(text_with_duplicates)
108-
108+
109109
words = result.split()
110110
unique_words = set(words)
111-
111+
112112
# Should have same number of unique words as total words
113113
self.assertEqual(len(words), len(unique_words))
114114

@@ -117,7 +117,7 @@ def test_data_cleanup_filters_non_english(self):
117117
# This test may be limited by the NLTK words corpus availability
118118
text_mixed = "computer test français deutsche invalid"
119119
result = Piedomain.data_cleanup(text_mixed)
120-
120+
121121
# Should contain recognizable English words that aren't stopwords
122122
self.assertIn("computer", result)
123123
self.assertIn("test", result)
@@ -134,4 +134,5 @@ def test_data_cleanup_only_numbers_and_punct(self):
134134

135135

136136
if __name__ == "__main__":
137-
unittest.main()
137+
unittest.main()
138+

piedomains/tests/test_006_error_handling.py

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -29,26 +29,26 @@ def test_validate_input_empty_list_no_path(self):
2929
"""Test validate_input with empty domain list and no path."""
3030
with self.assertRaises(Exception) as context:
3131
Piedomain.validate_input([], None, "html")
32-
32+
3333
self.assertIn("Provide list of Domains", str(context.exception))
3434

3535
def test_validate_input_nonexistent_path(self):
3636
"""Test validate_input with nonexistent path."""
3737
nonexistent_path = "/path/that/does/not/exist"
38-
38+
3939
with self.assertRaises(Exception) as context:
4040
Piedomain.validate_input([], nonexistent_path, "html")
41-
41+
4242
self.assertIn("does not exist", str(context.exception))
4343

4444
def test_validate_input_empty_directory(self):
4545
"""Test validate_input with empty directory."""
4646
empty_dir = os.path.join(self.temp_dir, "empty")
4747
os.makedirs(empty_dir)
48-
48+
4949
with self.assertRaises(Exception) as context:
5050
Piedomain.validate_input([], empty_dir, "html")
51-
51+
5252
self.assertIn("is empty", str(context.exception))
5353

5454
def test_validate_input_valid_offline_mode(self):
@@ -57,18 +57,18 @@ def test_validate_input_valid_offline_mode(self):
5757
test_file = os.path.join(self.html_dir, "test.html")
5858
with open(test_file, "w") as f:
5959
f.write("<html><body>test</body></html>")
60-
60+
6161
result = Piedomain.validate_input([], self.html_dir, "html")
6262
self.assertTrue(result) # Should return True for offline mode
6363

6464
@patch('requests.get')
6565
def test_extract_htmls_network_error(self, mock_get):
6666
"""Test HTML extraction with network errors."""
6767
mock_get.side_effect = Exception("Network error")
68-
68+
6969
domains = ["example.com"]
7070
errors = Piedomain.extract_htmls(domains, False, self.html_dir)
71-
71+
7272
self.assertIn("example.com", errors)
7373
self.assertIn("Network error", str(errors["example.com"]))
7474

@@ -78,10 +78,10 @@ def test_extract_htmls_http_error(self, mock_get):
7878
mock_response = MagicMock()
7979
mock_response.raise_for_status.side_effect = Exception("404 Not Found")
8080
mock_get.return_value = mock_response
81-
81+
8282
domains = ["nonexistent.com"]
8383
errors = Piedomain.extract_htmls(domains, False, self.html_dir)
84-
84+
8585
self.assertIn("nonexistent.com", errors)
8686

8787
@patch('piedomains.piedomain.Piedomain.get_driver')
@@ -90,9 +90,9 @@ def test_save_image_webdriver_error(self, mock_get_driver):
9090
mock_driver = MagicMock()
9191
mock_driver.get.side_effect = Exception("WebDriver error")
9292
mock_get_driver.return_value = mock_driver
93-
93+
9494
success, error_msg = Piedomain.save_image("example.com", self.image_dir)
95-
95+
9696
self.assertFalse(success)
9797
self.assertIn("WebDriver error", error_msg)
9898
mock_driver.quit.assert_called_once()
@@ -104,34 +104,34 @@ def test_save_image_driver_quit_error(self, mock_get_driver):
104104
mock_driver.get.side_effect = Exception("WebDriver error")
105105
mock_driver.quit.side_effect = Exception("Quit error")
106106
mock_get_driver.return_value = mock_driver
107-
107+
108108
# Should handle quit error gracefully
109109
success, error_msg = Piedomain.save_image("example.com", self.image_dir)
110-
110+
111111
self.assertFalse(success)
112112
self.assertIn("WebDriver error", error_msg)
113113

114114
def test_extract_image_tensor_invalid_directory(self):
115115
"""Test image tensor extraction with invalid directory."""
116116
nonexistent_dir = "/path/that/does/not/exist"
117-
117+
118118
with self.assertRaises(FileNotFoundError):
119119
Piedomain.extract_image_tensor(True, ["example.com"], nonexistent_dir)
120120

121121
def test_extract_html_text_invalid_directory(self):
122122
"""Test HTML text extraction with invalid directory."""
123123
nonexistent_dir = "/path/that/does/not/exist"
124-
124+
125125
with self.assertRaises(FileNotFoundError):
126126
Piedomain.extract_html_text(True, ["example.com"], nonexistent_dir)
127127

128128
def test_text_from_html_malformed_html(self):
129129
"""Test text extraction from malformed HTML."""
130130
malformed_html = "<html><body><p>Unclosed paragraph<div>Nested incorrectly</p></div></body></html>"
131-
131+
132132
# Should handle malformed HTML gracefully
133133
result = Piedomain.text_from_html(malformed_html)
134-
134+
135135
self.assertIsInstance(result, str)
136136
# BeautifulSoup should handle malformed HTML
137137

@@ -146,7 +146,7 @@ def test_extract_images_permission_error(self):
146146
# Create a directory and remove write permissions
147147
restricted_dir = os.path.join(self.temp_dir, "restricted")
148148
os.makedirs(restricted_dir)
149-
149+
150150
# This test may not work on all systems due to permission handling
151151
try:
152152
os.chmod(restricted_dir, 0o444) # Read-only
@@ -160,13 +160,14 @@ def test_extract_images_permission_error(self):
160160
def test_validate_domains_with_none_values(self):
161161
"""Test domain validation with None values in list."""
162162
domains_with_none = ["google.com", None, "facebook.com"]
163-
163+
164164
valid, invalid = Piedomain.validate_domains(domains_with_none)
165-
165+
166166
self.assertEqual(len(invalid), 1)
167167
self.assertIn(None, invalid)
168168
self.assertEqual(len(valid), 2)
169169

170170

171171
if __name__ == "__main__":
172-
unittest.main()
172+
unittest.main()
173+

0 commit comments

Comments
 (0)