Skip to content

Commit 4fc28dd

Browse files
committed
docker
1 parent c7d6b93 commit 4fc28dd

File tree

3 files changed

+260
-15
lines changed

3 files changed

+260
-15
lines changed

.dockerignore

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# Version control
2+
.git
3+
.gitignore
4+
.gitattributes
5+
6+
# Python
7+
__pycache__
8+
*.pyc
9+
*.pyo
10+
*.pyd
11+
.Python
12+
env
13+
pip-log.txt
14+
pip-delete-this-directory.txt
15+
.tox
16+
.coverage
17+
.coverage.*
18+
.cache
19+
nosetests.xml
20+
coverage.xml
21+
*.cover
22+
*.log
23+
.pytest_cache
24+
25+
# Virtual environments
26+
venv/
27+
env/
28+
ENV/
29+
.venv/
30+
.env
31+
32+
# IDE
33+
.vscode/
34+
.idea/
35+
*.swp
36+
*.swo
37+
*~
38+
39+
# OS
40+
.DS_Store
41+
.DS_Store?
42+
._*
43+
.Spotlight-V100
44+
.Trashes
45+
ehthumbs.db
46+
Thumbs.db
47+
48+
# Testing and development
49+
test_*
50+
*_test.py
51+
tests/
52+
/tmp/
53+
*.tmp
54+
55+
# Documentation
56+
*.md
57+
!README.md
58+
docs/
59+
*.rst
60+
*.txt
61+
LICENSE
62+
63+
# Build artifacts
64+
build/
65+
dist/
66+
*.egg-info/
67+
.eggs/
68+
69+
# Cache and temporary files
70+
cache/
71+
*.cache
72+
.cache/
73+
/model/
74+
/images/
75+
screenshots/
76+
*.png
77+
*.jpg
78+
*.jpeg
79+
80+
# Jupyter notebooks
81+
*.ipynb
82+
.ipynb_checkpoints
83+
84+
# Local configuration
85+
.env.local
86+
.env.development.local
87+
.env.test.local
88+
.env.production.local
89+
90+
# Backup files
91+
*.bak
92+
*.backup
93+
*.old
94+
*.orig
95+
96+
# Node modules (if any)
97+
node_modules/
98+
npm-debug.log*
99+
100+
# Test environments and outputs
101+
/tmp/
102+
/test_*/
103+
test_*.py
104+
benchmark_*
105+
output/
106+
107+
# Development tools
108+
.pre-commit-config.yaml
109+
setup.cfg
110+
.flake8
111+
.mypy.ini
112+
.pylintrc
113+
114+
# Don't ignore essential files
115+
!pyproject.toml
116+
!piedomains/
117+
!examples/sandbox/
118+
!docker-entrypoint.sh

Dockerfile

Lines changed: 62 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,38 +7,85 @@ ENV PYTHONUNBUFFERED=1 \
77
PIP_NO_CACHE_DIR=1 \
88
DEBIAN_FRONTEND=noninteractive
99

10-
# Install system dependencies
10+
# Install system dependencies for Playwright
1111
RUN apt-get update && apt-get install -y \
1212
wget \
1313
gnupg \
1414
curl \
1515
unzip \
16+
# Playwright system dependencies
17+
libnss3 \
18+
libnspr4 \
19+
libatk-bridge2.0-0 \
20+
libdrm2 \
21+
libxkbcommon0 \
22+
libxcomposite1 \
23+
libxdamage1 \
24+
libxrandr2 \
25+
libgbm1 \
26+
libxss1 \
27+
libasound2 \
28+
libatspi2.0-0 \
29+
libgtk-3-0 \
30+
# Additional dependencies for headless operation
31+
xvfb \
1632
&& apt-get clean \
1733
&& rm -rf /var/lib/apt/lists/*
1834

19-
# Install Google Chrome
20-
RUN wget -q -O - https://dl.google.com/linux/linux_signing_key.pub | gpg --dearmor -o /usr/share/keyrings/googlechrome-linux-keyring.gpg \
21-
&& echo "deb [arch=amd64 signed-by=/usr/share/keyrings/googlechrome-linux-keyring.gpg] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google.list \
22-
&& apt-get update \
23-
&& apt-get install -y google-chrome-stable \
24-
&& apt-get clean \
25-
&& rm -rf /var/lib/apt/lists/*
26-
27-
# Install uv
35+
# Install uv for fast package management
2836
RUN pip install uv
2937

3038
# Set working directory
3139
WORKDIR /app
3240

33-
# Copy pyproject.toml and install dependencies
34-
COPY pyproject.toml ./
41+
# Copy essential files for package installation
42+
COPY pyproject.toml README.md ./
43+
44+
# Install Python dependencies
3545
RUN uv pip install --system -e .
3646

37-
# Copy source code
47+
# Install Playwright system dependencies
48+
RUN python -m playwright install-deps chromium
49+
50+
# Create non-root user for security
51+
RUN groupadd -r playwright && useradd -r -g playwright -G audio,video playwright \
52+
&& mkdir -p /home/playwright/Downloads \
53+
&& chown -R playwright:playwright /home/playwright \
54+
&& chown -R playwright:playwright /app
55+
56+
# Copy entrypoint script first (as root)
57+
COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
58+
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
59+
60+
# Copy source code (before switching user)
3861
COPY . .
3962

63+
# Set ownership of the app directory (as root)
64+
RUN chown -R playwright:playwright /app
65+
66+
# Switch to non-root user
67+
USER playwright
68+
69+
# Set Playwright environment variables
70+
ENV PLAYWRIGHT_BROWSERS_PATH=/home/playwright/.cache/ms-playwright
71+
72+
# Install browsers as playwright user
73+
RUN python -m playwright install chromium
74+
75+
# Create cache directories
76+
RUN mkdir -p /home/playwright/.cache/ms-playwright \
77+
&& mkdir -p /app/cache/html \
78+
&& mkdir -p /app/cache/images
79+
4080
# Expose port (if running web service)
4181
EXPOSE 8000
4282

43-
# Default command
44-
CMD ["python", "-c", "from piedomains.api import DomainClassifier; print('piedomains v0.4.0 ready!')"]
83+
# Set entrypoint
84+
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
85+
86+
# Health check to verify installation
87+
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
88+
CMD python -c "import piedomains; print('✓ piedomains ready')" || exit 1
89+
90+
# Default command - interactive shell
91+
CMD []

docker-entrypoint.sh

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#!/bin/bash
2+
# Docker entrypoint script for piedomains container
3+
# Handles Playwright browser installation at runtime
4+
5+
set -e
6+
7+
BROWSER_CACHE_DIR="/home/playwright/.cache/ms-playwright"
8+
PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD="${PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD:-0}"
9+
10+
echo "🐳 Starting piedomains container..."
11+
12+
# Function to check if browsers are installed
13+
check_browsers() {
14+
if ls "$BROWSER_CACHE_DIR"/chromium-* 1> /dev/null 2>&1; then
15+
return 0 # Browsers found
16+
elif ls "$BROWSER_CACHE_DIR"/chromium_headless_shell-* 1> /dev/null 2>&1; then
17+
return 0 # Headless browser found
18+
else
19+
return 1 # Browsers not found
20+
fi
21+
}
22+
23+
# Function to install browsers
24+
install_browsers() {
25+
echo "📦 Installing Playwright browsers (this may take a few minutes)..."
26+
27+
# Create cache directory with proper permissions
28+
mkdir -p "$BROWSER_CACHE_DIR"
29+
30+
# Try installing browsers with retries
31+
local retries=3
32+
local count=0
33+
34+
while [ $count -lt $retries ]; do
35+
if python -m playwright install chromium; then
36+
echo "✅ Browsers installed successfully"
37+
return 0
38+
else
39+
count=$((count + 1))
40+
if [ $count -lt $retries ]; then
41+
echo "⚠️ Browser installation failed, retrying ($count/$retries)..."
42+
sleep 2
43+
fi
44+
fi
45+
done
46+
47+
echo "❌ Failed to install browsers after $retries attempts"
48+
echo "💡 You can skip browser installation by setting PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1"
49+
echo "💡 Or mount a pre-downloaded browser cache volume"
50+
return 1
51+
}
52+
53+
# Main logic
54+
if [ "$PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD" = "1" ]; then
55+
echo "⏭️ Skipping browser installation (PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1)"
56+
elif check_browsers; then
57+
echo "✅ Browsers already installed"
58+
else
59+
if ! install_browsers; then
60+
echo "⚠️ Continuing without browsers - some functionality may be limited"
61+
fi
62+
fi
63+
64+
# Verify piedomains can be imported
65+
echo "🔍 Verifying piedomains installation..."
66+
if python -c "import piedomains; print('✅ piedomains imported successfully')"; then
67+
echo "🚀 piedomains container ready!"
68+
else
69+
echo "❌ piedomains import failed"
70+
exit 1
71+
fi
72+
73+
# Execute the provided command or default to interactive bash
74+
if [ "$#" -eq 0 ]; then
75+
echo "🐚 Starting interactive shell..."
76+
exec bash
77+
else
78+
echo "🏃 Executing: $*"
79+
exec "$@"
80+
fi

0 commit comments

Comments
 (0)