SmilingWayne
diff --git a/‎.github/workflows/publish.yml‎
Lines changed: 69 additions & 0 deletions b/‎.github/workflows/publish.yml‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 152 additions & 134 deletions b/‎README.md‎
Lines changed: 152 additions & 134 deletions
diff --git a/‎crawlers/BattleshipCrawler.py‎
Lines changed: 155 additions & 1 deletion b/‎crawlers/BattleshipCrawler.py‎
Lines changed: 155 additions & 1 deletion
diff --git a/‎crawlers/CastleWallCrawler.py‎
Lines changed: 3 additions & 0 deletions b/‎crawlers/CastleWallCrawler.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎crawlers/CocktailLampCrawler.py‎
Lines changed: 3 additions & 0 deletions b/‎crawlers/CocktailLampCrawler.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎crawlers/CrawlerFactory.py‎
Lines changed: 33 additions & 2 deletions b/‎crawlers/CrawlerFactory.py‎
Lines changed: 33 additions & 2 deletions
diff --git a/‎crawlers/DigitalBattleshipCrawler.py‎
Lines changed: 3 additions & 0 deletions b/‎crawlers/DigitalBattleshipCrawler.py‎
Lines changed: 3 additions & 0 deletions
@@ -0,0 +1,69 @@
+name: Publish to PyPI
+
+# Trigger: only when a tag starting with 'v' is pushed to the repository
+# For example: git push origin v0.1.0 (will trigger)
+on:
+  push:
+    tags:
+      - "v*"
+
+jobs:
+  # First stage: test and build
+  # This step is to ensure the code is not烂, and can successfully打出包来
+  build-n-test:
+    name: Build and Test
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10" # Use your base version
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install build pytest
+          pip install .
+
+      - name: Run Tests
+        # If tests fail, the process will stop here and not execute the subsequent package publishing
+        run: |
+          pytest tests/
+
+      - name: Build package
+        # Try to build, ensure MANIFEST.in is configured correctly, and no wrong files are included
+        run: |
+          python -m build
+
+      # Store the built dist/ folder and pass it to the next job
+      - name: Store distribution artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: python-package-distributions
+          path: dist/
+
+  # Second stage: publish to PyPI
+  # Only run if the build-n-test stage succeeds
+  publish-to-pypi:
+    name: Publish to PyPI
+    needs: build-n-test # Dependency: must wait for tests to pass
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/puzzlekit
+    permissions:
+      id-token: write # This line is important for new Trusted Publishing, but also compatible with Token method
+
+    steps:
+      - name: Download all the dists
+        uses: actions/download-artifact@v4
+        with:
+          name: python-package-distributions
+          path: dist/
+
+      - name: Publish distribution 📦 to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          password: ${{ secrets.PYPI_API_TOKEN }}
@@ -150,3 +150,7 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+# Ignore dataset directory
+assets/
+benchmark_results/
@@ -1 +1,155 @@
-# TODO: Crawler
+import re
+from bs4 import BeautifulSoup
+from typing import List, Dict, Optional
+from Core.core import BasePuzzleCrawler, PuzzleItem
+
+class BattleshipCrawler(BasePuzzleCrawler):
+    
+    def parse_index(self, html_content: str) -> List[Dict]:
+        soup = BeautifulSoup(html_content, 'html.parser')
+        container = soup.find('div', id='index-1')
+        
+        if not container:
+            self.logger.warning("Index container #index-1 not found.")
+            return []
+
+        results = []
+        for link in container.find_all('a'):
+            href = link.get('href')
+            text = link.get_text(strip=True)
+            
+            if href and text:
+                # Custom logic to classify links as you did before
+                link_type = 'class_sv' if 'sv' in link.get('class', []) else 'other'
+                results.append({
+                    'href': self.config.base_url + href if not href.startswith('http') else href, 
+                    'text': text, 
+                    'type': link_type
+                })
+        
+        # You can add your `filter_and_classify_results` logic here if needed
+        return results
+
+    def parse_puzzle_detail(self, html_content: str, metadata: Dict) -> Optional[PuzzleItem]:
+        text = metadata.get('text', 'unknown')
+        link_type = metadata.get('type')
+
+        # Define Regex patterns based on type
+        if link_type == "class_sv":
+            patterns = {
+                'rows': r"(?<=\[rlabels\]\n)(.*?)(?=\[clabels\])",
+                'cols': r"(?<=\[clabels\]\n)(.*?)(?=\[problem\])",
+                'areas': r"(?<=\[problem\]\n)(.*?)(?=\[solution\])",
+                'sol': r"(?<=\[solution\]\n)(.*?)(?=\[moves\])"
+            }
+        else:
+            patterns = {
+                'rows': r"(?<=\[rlabels\]\n)(.*?)(?=\[clabels\])",
+                'cols': r"(?<=\[clabels\]\n)(.*?)(?=\[problem\])",
+                'areas': r"(?<=\[problem\]\n)(.*?)(?=\[solution\])",
+                'sol': r"(?<=\[solution\]\n)(.*?)(?=\[end\])"
+            }
+
+        try:
+            cols_match = re.search(patterns['cols'], html_content, re.DOTALL)
+            rows_match = re.search(patterns['rows'], html_content, re.DOTALL)
+            areas_match = re.search(patterns['areas'], html_content, re.DOTALL)
+            sol_match = re.search(patterns['sol'], html_content, re.DOTALL)
+
+            if not all([cols_match, rows_match, areas_match, sol_match]):
+                try:
+                    if link_type == "class_sv":
+                        patterns = {
+                            'cols': r"(?<=\[clabels\]\n)(.*?)(?=\[rlabels\])",
+                            'rows': r"(?<=\[rlabels\]\n)(.*?)(?=\[problem\])",
+                            'areas': r"(?<=\[problem\]\n)(.*?)(?=\[solution\])",
+                            'sol': r"(?<=\[solution\]\n)(.*?)(?=\[moves\])"
+                        }
+                    else:
+                        patterns = {
+                            'cols': r"(?<=\[clabels\]\n)(.*?)(?=\[rlabels\])",
+                            'rows': r"(?<=\[rlabels\]\n)(.*?)(?=\[problem\])",
+                            'areas': r"(?<=\[problem\]\n)(.*?)(?=\[solution\])",
+                            'sol': r"(?<=\[solution\]\n)(.*?)(?=\[end\])"
+                        }
+                    cols_match = re.search(patterns['cols'], html_content, re.DOTALL)
+                    rows_match = re.search(patterns['rows'], html_content, re.DOTALL)
+                    areas_match = re.search(patterns['areas'], html_content, re.DOTALL)
+                    sol_match = re.search(patterns['sol'], html_content, re.DOTALL)
+                except Exception as e:
+                    try:
+                        if link_type == "class_sv":
+                            patterns = {
+                                'cols': r"(?<=\[clabels\]\n)(.*?)(?=\[rlabels\])",
+                                'rows': r"(?<=\[rlabels\]\n)(.*?)(?=\[solution\])",
+                                # 'areas': r"(?<=\[problem\]\n)(.*?)(?=\[solution\])",
+                                'sol': r"(?<=\[solution\]\n)(.*?)(?=\[moves\])"
+                            }
+                        else:
+                            patterns = {
+                                'cols': r"(?<=\[clabels\]\n)(.*?)(?=\[rlabels\])",
+                                'rows': r"(?<=\[rlabels\]\n)(.*?)(?=\[solution\])",
+                                # 'areas': r"(?<=\[problem\]\n)(.*?)(?=\[solution\])",
+                                'sol': r"(?<=\[solution\]\n)(.*?)(?=\[end\])"
+                            }
+                        cols_match = re.search(patterns['cols'], html_content, re.DOTALL)
+                        rows_match = re.search(patterns['rows'], html_content, re.DOTALL)
+                        areas_match = re.search(patterns['sol'], html_content, re.DOTALL)
+                        sol_match = re.search(patterns['sol'], html_content, re.DOTALL)
+                    except Exception as e:
+                        self.logger.error(f"Error parsing detail for {text}: {e}")
+                        return None
+
+            # Process data
+            solution_raw = sol_match.group().strip()
+            cols_raw = cols_match.group().strip()
+            rows_raw = rows_match.group().strip()
+            areas_raw = areas_match.group().strip()
+                
+            rows_list = solution_raw.strip().split("\n")
+            
+            num_rows = len(rows_list)
+            num_cols = len(rows_list[0].split()) if num_rows > 0 else 0
+            if areas_raw == solution_raw:
+                areas_raw = "\n".join([" ".join(["-" for _ in range(num_cols)]) for _ in range(num_rows)])
+            cnt_list = [0, 0, 0, 0, 0]
+            sol_mat = [row.strip().split(" ") for row in rows_list]
+            visited = set()
+            for i in range(num_rows):
+                for j in range(num_cols):
+                    if sol_mat[i][j] in "-x" or (i, j) in visited:
+                        continue
+                    elif sol_mat[i][j] == "o":
+                        cnt_list[0] += 1
+                        visited.add((i, j))
+                    elif sol_mat[i][j] == "n":
+                        k = i
+                        while k < num_rows and sol_mat[k][j] != "s":
+                            visited.add((k, j))
+                            k += 1
+                        cnt_list[k - i] += 1
+                    elif sol_mat[i][j] == "w":
+                        k = j
+                        while k < num_cols and sol_mat[i][k] != "e":
+                            visited.add((i, k))
+                            k += 1
+                        cnt_list[k - j] += 1
+            
+            header = f"{num_rows} {num_cols} {cnt_list[0]} {cnt_list[1]} {cnt_list[2]} {cnt_list[3]} {cnt_list[4]}"
+            problem_str = f"{header}\n{cols_raw}\n{rows_raw}\n{areas_raw}"
+            solution_str = f"{header}\n{solution_raw}"
+            
+            puzzle_id = f"{text}_{num_rows}x{num_cols}"
+
+            return PuzzleItem(
+                id=puzzle_id,
+                difficulty=0, # Placeholder
+                source_url=metadata.get('href', ''),
+                problem=problem_str,
+                solution=solution_str,
+                metadata=metadata
+            )
+
+        except Exception as e:
+            self.logger.error(f"Error parsing detail for {text}: {e}")
+            return None
@@ -0,0 +1,3 @@
+from YonmasuCrawler import YonmasuCrawler
+
+CastleWallCrawler = YonmasuCrawler
@@ -0,0 +1,3 @@
+from JuosanCrawler import JuosanCrawler
+
+CocktailLampCrawler = JuosanCrawler
@@ -70,6 +70,22 @@
 from BricksCrawler import BricksCrawler
 from SkyscraperSudokuCrawler import SkyscraperSudokuCrawler
 from KuroshiroCrawler import KuroshiroCrawler
+from BattleshipCrawler import BattleshipCrawler
+from StitchesCrawler import StitchesCrawler
+from KenKenCrawler import KenKenCrawler
+from GalaxiesCrawler import GalaxiesCrawler
+from MathraxCrawler import MathraxCrawler
+from CastleWallCrawler import CastleWallCrawler
+from DigitalBattleshipCrawler import DigitalBattleshipCrawler
+from PutteriaCrawler import PutteriaCrawler
+from YajikabeCrawler import YajikabeCrawler
+from KoburinCrawler import KoburinCrawler
+from UsooneCrawler import UsooneCrawler
+from CocktailLampCrawler import CocktailLampCrawler
+from NurimisakiCrawler import NurimisakiCrawler
+from NawabariCrawler import NawabariCrawler
+from TriplettsCrawler import TriplettsCrawler
+from DoorsCrawler import DoorsCrawler
 from typing import Dict, Any
 
 class CrawlerFactory:
@@ -147,8 +163,23 @@ class CrawlerFactory:
         "NumberLink": NumberLinkCrawler,
         "Bricks": BricksCrawler,
         "SkyscraperSudoku": SkyscraperSudokuCrawler,
-        "Kuroshiro": KuroshiroCrawler
-        
+        "Kuroshiro": KuroshiroCrawler,
+        "Battleship": BattleshipCrawler,
+        "Stitches": StitchesCrawler,
+        "KenKen": KenKenCrawler,
+        "Galaxies": GalaxiesCrawler,
+        "Mathrax": MathraxCrawler,
+        "CastleWall": CastleWallCrawler,
+        "DigitalBattleship": DigitalBattleshipCrawler,
+        "Putteria": PutteriaCrawler,
+        "Yajikabe": YajikabeCrawler,
+        "Koburin": KoburinCrawler,
+        "Usoone": UsooneCrawler,
+        "CocktailLamp": CocktailLampCrawler,
+        "Nurimisaki": NurimisakiCrawler,
+        "Nawabari": NawabariCrawler,
+        "Tripletts": TriplettsCrawler,
+        "Doors": DoorsCrawler
     }
 
     @classmethod
 
@@ -0,0 +1,3 @@
+from BattleshipCrawler import BattleshipCrawler
+
+DigitalBattleshipCrawler = BattleshipCrawler
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from YonmasuCrawler import YonmasuCrawler`
	`2`	`+`
	`3`	`+CastleWallCrawler = YonmasuCrawler`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from JuosanCrawler import JuosanCrawler`
	`2`	`+`
	`3`	`+CocktailLampCrawler = JuosanCrawler`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from BattleshipCrawler import BattleshipCrawler`
	`2`	`+`
	`3`	`+DigitalBattleshipCrawler = BattleshipCrawler`