Skip to content

Commit b2531f1

Browse files
authored
Merge pull request #44 from SmilingWayne/basic-dev
- fix matplotlib display error - update 10+ solvers and automate the workflow - remove dataset to puzzlekit-dataset - update statistics
2 parents e732433 + 0186cc4 commit b2531f1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+3322
-335
lines changed

.github/workflows/publish.yml

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
name: Publish to PyPI
2+
3+
# Trigger: only when a tag starting with 'v' is pushed to the repository
4+
# For example: git push origin v0.1.0 (will trigger)
5+
on:
6+
push:
7+
tags:
8+
- "v*"
9+
10+
jobs:
11+
# First stage: test and build
12+
# This step is to ensure the code is not烂, and can successfully打出包来
13+
build-n-test:
14+
name: Build and Test
15+
runs-on: ubuntu-latest
16+
steps:
17+
- uses: actions/checkout@v4
18+
19+
- name: Set up Python
20+
uses: actions/setup-python@v5
21+
with:
22+
python-version: "3.10" # Use your base version
23+
24+
- name: Install dependencies
25+
run: |
26+
python -m pip install --upgrade pip
27+
pip install build pytest
28+
pip install .
29+
30+
- name: Run Tests
31+
# If tests fail, the process will stop here and not execute the subsequent package publishing
32+
run: |
33+
pytest tests/
34+
35+
- name: Build package
36+
# Try to build, ensure MANIFEST.in is configured correctly, and no wrong files are included
37+
run: |
38+
python -m build
39+
40+
# Store the built dist/ folder and pass it to the next job
41+
- name: Store distribution artifacts
42+
uses: actions/upload-artifact@v4
43+
with:
44+
name: python-package-distributions
45+
path: dist/
46+
47+
# Second stage: publish to PyPI
48+
# Only run if the build-n-test stage succeeds
49+
publish-to-pypi:
50+
name: Publish to PyPI
51+
needs: build-n-test # Dependency: must wait for tests to pass
52+
runs-on: ubuntu-latest
53+
environment:
54+
name: pypi
55+
url: https://pypi.org/p/puzzlekit
56+
permissions:
57+
id-token: write # This line is important for new Trusted Publishing, but also compatible with Token method
58+
59+
steps:
60+
- name: Download all the dists
61+
uses: actions/download-artifact@v4
62+
with:
63+
name: python-package-distributions
64+
path: dist/
65+
66+
- name: Publish distribution 📦 to PyPI
67+
uses: pypa/gh-action-pypi-publish@release/v1
68+
with:
69+
password: ${{ secrets.PYPI_API_TOKEN }}

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,3 +150,7 @@ cython_debug/
150150
# and can be added to the global gitignore or merged into this file. For a more nuclear
151151
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
152152
#.idea/
153+
154+
# Ignore dataset directory
155+
assets/
156+
benchmark_results/

README.md

Lines changed: 152 additions & 134 deletions
Large diffs are not rendered by default.

crawlers/BattleshipCrawler.py

Lines changed: 155 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,155 @@
1-
# TODO: Crawler
1+
import re
2+
from bs4 import BeautifulSoup
3+
from typing import List, Dict, Optional
4+
from Core.core import BasePuzzleCrawler, PuzzleItem
5+
6+
class BattleshipCrawler(BasePuzzleCrawler):
7+
8+
def parse_index(self, html_content: str) -> List[Dict]:
9+
soup = BeautifulSoup(html_content, 'html.parser')
10+
container = soup.find('div', id='index-1')
11+
12+
if not container:
13+
self.logger.warning("Index container #index-1 not found.")
14+
return []
15+
16+
results = []
17+
for link in container.find_all('a'):
18+
href = link.get('href')
19+
text = link.get_text(strip=True)
20+
21+
if href and text:
22+
# Custom logic to classify links as you did before
23+
link_type = 'class_sv' if 'sv' in link.get('class', []) else 'other'
24+
results.append({
25+
'href': self.config.base_url + href if not href.startswith('http') else href,
26+
'text': text,
27+
'type': link_type
28+
})
29+
30+
# You can add your `filter_and_classify_results` logic here if needed
31+
return results
32+
33+
def parse_puzzle_detail(self, html_content: str, metadata: Dict) -> Optional[PuzzleItem]:
34+
text = metadata.get('text', 'unknown')
35+
link_type = metadata.get('type')
36+
37+
# Define Regex patterns based on type
38+
if link_type == "class_sv":
39+
patterns = {
40+
'rows': r"(?<=\[rlabels\]\n)(.*?)(?=\[clabels\])",
41+
'cols': r"(?<=\[clabels\]\n)(.*?)(?=\[problem\])",
42+
'areas': r"(?<=\[problem\]\n)(.*?)(?=\[solution\])",
43+
'sol': r"(?<=\[solution\]\n)(.*?)(?=\[moves\])"
44+
}
45+
else:
46+
patterns = {
47+
'rows': r"(?<=\[rlabels\]\n)(.*?)(?=\[clabels\])",
48+
'cols': r"(?<=\[clabels\]\n)(.*?)(?=\[problem\])",
49+
'areas': r"(?<=\[problem\]\n)(.*?)(?=\[solution\])",
50+
'sol': r"(?<=\[solution\]\n)(.*?)(?=\[end\])"
51+
}
52+
53+
try:
54+
cols_match = re.search(patterns['cols'], html_content, re.DOTALL)
55+
rows_match = re.search(patterns['rows'], html_content, re.DOTALL)
56+
areas_match = re.search(patterns['areas'], html_content, re.DOTALL)
57+
sol_match = re.search(patterns['sol'], html_content, re.DOTALL)
58+
59+
if not all([cols_match, rows_match, areas_match, sol_match]):
60+
try:
61+
if link_type == "class_sv":
62+
patterns = {
63+
'cols': r"(?<=\[clabels\]\n)(.*?)(?=\[rlabels\])",
64+
'rows': r"(?<=\[rlabels\]\n)(.*?)(?=\[problem\])",
65+
'areas': r"(?<=\[problem\]\n)(.*?)(?=\[solution\])",
66+
'sol': r"(?<=\[solution\]\n)(.*?)(?=\[moves\])"
67+
}
68+
else:
69+
patterns = {
70+
'cols': r"(?<=\[clabels\]\n)(.*?)(?=\[rlabels\])",
71+
'rows': r"(?<=\[rlabels\]\n)(.*?)(?=\[problem\])",
72+
'areas': r"(?<=\[problem\]\n)(.*?)(?=\[solution\])",
73+
'sol': r"(?<=\[solution\]\n)(.*?)(?=\[end\])"
74+
}
75+
cols_match = re.search(patterns['cols'], html_content, re.DOTALL)
76+
rows_match = re.search(patterns['rows'], html_content, re.DOTALL)
77+
areas_match = re.search(patterns['areas'], html_content, re.DOTALL)
78+
sol_match = re.search(patterns['sol'], html_content, re.DOTALL)
79+
except Exception as e:
80+
try:
81+
if link_type == "class_sv":
82+
patterns = {
83+
'cols': r"(?<=\[clabels\]\n)(.*?)(?=\[rlabels\])",
84+
'rows': r"(?<=\[rlabels\]\n)(.*?)(?=\[solution\])",
85+
# 'areas': r"(?<=\[problem\]\n)(.*?)(?=\[solution\])",
86+
'sol': r"(?<=\[solution\]\n)(.*?)(?=\[moves\])"
87+
}
88+
else:
89+
patterns = {
90+
'cols': r"(?<=\[clabels\]\n)(.*?)(?=\[rlabels\])",
91+
'rows': r"(?<=\[rlabels\]\n)(.*?)(?=\[solution\])",
92+
# 'areas': r"(?<=\[problem\]\n)(.*?)(?=\[solution\])",
93+
'sol': r"(?<=\[solution\]\n)(.*?)(?=\[end\])"
94+
}
95+
cols_match = re.search(patterns['cols'], html_content, re.DOTALL)
96+
rows_match = re.search(patterns['rows'], html_content, re.DOTALL)
97+
areas_match = re.search(patterns['sol'], html_content, re.DOTALL)
98+
sol_match = re.search(patterns['sol'], html_content, re.DOTALL)
99+
except Exception as e:
100+
self.logger.error(f"Error parsing detail for {text}: {e}")
101+
return None
102+
103+
# Process data
104+
solution_raw = sol_match.group().strip()
105+
cols_raw = cols_match.group().strip()
106+
rows_raw = rows_match.group().strip()
107+
areas_raw = areas_match.group().strip()
108+
109+
rows_list = solution_raw.strip().split("\n")
110+
111+
num_rows = len(rows_list)
112+
num_cols = len(rows_list[0].split()) if num_rows > 0 else 0
113+
if areas_raw == solution_raw:
114+
areas_raw = "\n".join([" ".join(["-" for _ in range(num_cols)]) for _ in range(num_rows)])
115+
cnt_list = [0, 0, 0, 0, 0]
116+
sol_mat = [row.strip().split(" ") for row in rows_list]
117+
visited = set()
118+
for i in range(num_rows):
119+
for j in range(num_cols):
120+
if sol_mat[i][j] in "-x" or (i, j) in visited:
121+
continue
122+
elif sol_mat[i][j] == "o":
123+
cnt_list[0] += 1
124+
visited.add((i, j))
125+
elif sol_mat[i][j] == "n":
126+
k = i
127+
while k < num_rows and sol_mat[k][j] != "s":
128+
visited.add((k, j))
129+
k += 1
130+
cnt_list[k - i] += 1
131+
elif sol_mat[i][j] == "w":
132+
k = j
133+
while k < num_cols and sol_mat[i][k] != "e":
134+
visited.add((i, k))
135+
k += 1
136+
cnt_list[k - j] += 1
137+
138+
header = f"{num_rows} {num_cols} {cnt_list[0]} {cnt_list[1]} {cnt_list[2]} {cnt_list[3]} {cnt_list[4]}"
139+
problem_str = f"{header}\n{cols_raw}\n{rows_raw}\n{areas_raw}"
140+
solution_str = f"{header}\n{solution_raw}"
141+
142+
puzzle_id = f"{text}_{num_rows}x{num_cols}"
143+
144+
return PuzzleItem(
145+
id=puzzle_id,
146+
difficulty=0, # Placeholder
147+
source_url=metadata.get('href', ''),
148+
problem=problem_str,
149+
solution=solution_str,
150+
metadata=metadata
151+
)
152+
153+
except Exception as e:
154+
self.logger.error(f"Error parsing detail for {text}: {e}")
155+
return None

crawlers/CastleWallCrawler.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from YonmasuCrawler import YonmasuCrawler
2+
3+
CastleWallCrawler = YonmasuCrawler

crawlers/CocktailLampCrawler.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from JuosanCrawler import JuosanCrawler
2+
3+
CocktailLampCrawler = JuosanCrawler

crawlers/CrawlerFactory.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,22 @@
7070
from BricksCrawler import BricksCrawler
7171
from SkyscraperSudokuCrawler import SkyscraperSudokuCrawler
7272
from KuroshiroCrawler import KuroshiroCrawler
73+
from BattleshipCrawler import BattleshipCrawler
74+
from StitchesCrawler import StitchesCrawler
75+
from KenKenCrawler import KenKenCrawler
76+
from GalaxiesCrawler import GalaxiesCrawler
77+
from MathraxCrawler import MathraxCrawler
78+
from CastleWallCrawler import CastleWallCrawler
79+
from DigitalBattleshipCrawler import DigitalBattleshipCrawler
80+
from PutteriaCrawler import PutteriaCrawler
81+
from YajikabeCrawler import YajikabeCrawler
82+
from KoburinCrawler import KoburinCrawler
83+
from UsooneCrawler import UsooneCrawler
84+
from CocktailLampCrawler import CocktailLampCrawler
85+
from NurimisakiCrawler import NurimisakiCrawler
86+
from NawabariCrawler import NawabariCrawler
87+
from TriplettsCrawler import TriplettsCrawler
88+
from DoorsCrawler import DoorsCrawler
7389
from typing import Dict, Any
7490

7591
class CrawlerFactory:
@@ -147,8 +163,23 @@ class CrawlerFactory:
147163
"NumberLink": NumberLinkCrawler,
148164
"Bricks": BricksCrawler,
149165
"SkyscraperSudoku": SkyscraperSudokuCrawler,
150-
"Kuroshiro": KuroshiroCrawler
151-
166+
"Kuroshiro": KuroshiroCrawler,
167+
"Battleship": BattleshipCrawler,
168+
"Stitches": StitchesCrawler,
169+
"KenKen": KenKenCrawler,
170+
"Galaxies": GalaxiesCrawler,
171+
"Mathrax": MathraxCrawler,
172+
"CastleWall": CastleWallCrawler,
173+
"DigitalBattleship": DigitalBattleshipCrawler,
174+
"Putteria": PutteriaCrawler,
175+
"Yajikabe": YajikabeCrawler,
176+
"Koburin": KoburinCrawler,
177+
"Usoone": UsooneCrawler,
178+
"CocktailLamp": CocktailLampCrawler,
179+
"Nurimisaki": NurimisakiCrawler,
180+
"Nawabari": NawabariCrawler,
181+
"Tripletts": TriplettsCrawler,
182+
"Doors": DoorsCrawler
152183
}
153184

154185
@classmethod
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from BattleshipCrawler import BattleshipCrawler
2+
3+
DigitalBattleshipCrawler = BattleshipCrawler

0 commit comments

Comments
 (0)