Skip to content

Commit 24777db

Browse files
author
Andy Dang
committed
Open source the WhyLabs container
0 parents  commit 24777db

File tree

454 files changed

+120096
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

454 files changed

+120096
-0
lines changed

.bumpversion.cfg

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
[bumpversion]
2+
current_version = 3.0.0
3+
tag = False
4+
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
5+
serialize =
6+
{major}.{minor}.{patch}-{release}{build}
7+
{major}.{minor}.{patch}
8+
9+
[bumpversion:part:release]
10+
optional_value = prod
11+
first_value = dev
12+
values =
13+
dev
14+
prod
15+
16+
[bumpversion:file:pyproject.toml]
17+
search = version = "{current_version}" # bump2version
18+
replace = version = "{new_version}" # bump2version
19+
20+
[bumpversion:file:version.txt]
21+
search = {current_version}
22+
replace = {new_version}
23+
24+
[bumpversion:file:docs/conf.py]
25+
search = release = "{current_version}"
26+
replace = release = "{new_version}"
27+
28+
[bumpversion:file:whylogs_container/whylabs/container/version.py]
29+
search = version = "{current_version}"
30+
replace = version = "{new_version}"
31+
32+
[bumpversion:file:Makefile]
33+
search = version := {current_version}
34+
replace = version := {new_version}
35+
36+
[bumpversion:file:whylogs-container-client/pyproject.toml]
37+
search = version = "{current_version}"
38+
replace = version = "{new_version}"
39+
40+
[bumpversion:file:openapi-generator.yaml]
41+
search = package_version_override: {current_version}
42+
replace = package_version_override: {new_version}
43+
44+
[bumpversion:file:.github/workflows/release.yaml]
45+
search = VERSION: {current_version}
46+
replace = VERSION: {new_version}
47+
48+
[bumpversion:file:.github/workflows/workflow.yaml]
49+
search = VERSION: {current_version}
50+
replace = VERSION: {new_version}
51+
52+
[bumpversion:file:example_repo/examples/configure_container_yaml/Dockerfile]
53+
search = FROM registry.gitlab.com/whylabs/langkit-container:{current_version}
54+
replace = FROM registry.gitlab.com/whylabs/langkit-container:{new_version}
55+
56+
[bumpversion:file:example_repo/examples/configure_container_python/Dockerfile]
57+
search = FROM registry.gitlab.com/whylabs/langkit-container:{current_version}
58+
replace = FROM registry.gitlab.com/whylabs/langkit-container:{new_version}
59+
60+
[bumpversion:file:example_repo/examples/custom_model/Dockerfile]
61+
search = FROM registry.gitlab.com/whylabs/langkit-container:{current_version}
62+
replace = FROM registry.gitlab.com/whylabs/langkit-container:{new_version}
63+
64+
[bumpversion:file:example_repo/examples/llm_segments/Dockerfile]
65+
search = FROM registry.gitlab.com/whylabs/langkit-container:{current_version}
66+
replace = FROM registry.gitlab.com/whylabs/langkit-container:{new_version}
67+
68+
[bumpversion:file:example_repo/examples/multi_tenant/Dockerfile]
69+
search = FROM registry.gitlab.com/whylabs/langkit-container:{current_version}
70+
replace = FROM registry.gitlab.com/whylabs/langkit-container:{new_version}
71+
72+
[bumpversion:file:example_repo/examples/configure_container_python/Makefile]
73+
search = version := {current_version}
74+
replace = version := {new_version}
75+
76+
[bumpversion:file:example_repo/examples/configure_container_yaml/Makefile]
77+
search = version := {current_version}
78+
replace = version := {new_version}
79+
80+
[bumpversion:file:example_repo/examples/container_library/Makefile]
81+
search = version := {current_version}
82+
replace = version := {new_version}
83+
84+
[bumpversion:file:example_repo/examples/custom_model/Makefile]
85+
search = version := {current_version}
86+
replace = version := {new_version}
87+
88+
[bumpversion:file:example_repo/examples/llm_segments/Makefile]
89+
search = version := {current_version}
90+
replace = version := {new_version}
91+
92+
[bumpversion:file:example_repo/examples/s3_configuration/Makefile]
93+
search =
94+
DOCKER_IMAGE = registry.gitlab.com/whylabs/langkit-container:{current_version}
95+
version := {current_version}
96+
replace =
97+
DOCKER_IMAGE = registry.gitlab.com/whylabs/langkit-container:{new_version}
98+
version := {new_version}
99+
100+
[bumpversion:file:example_repo/examples/no_configuration/Makefile]
101+
search =
102+
DOCKER_IMAGE = registry.gitlab.com/whylabs/langkit-container:{current_version}
103+
version := {current_version}
104+
replace =
105+
DOCKER_IMAGE = registry.gitlab.com/whylabs/langkit-container:{new_version}
106+
version := {new_version}
107+
108+
[bumpversion:file:policy-editor/index.html]
109+
search = <title>{current_version} WhyLabs Policy Editor</title>
110+
replace = <title>{new_version} WhyLabs Policy Editor</title>

.devcontainer/cache/.gitkeep

Whitespace-only changes.

.devcontainer/devcontainer.env

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
WHYLABS_API_KEY=
2+
GITLAB_API_KEY=
3+
CONTAINER_PASSWORD=local
4+
CACHE_ASSETS_RUNTIME=true
5+
AUTO_PULL_WHYLABS_POLICY_MODEL_IDS=
6+
CONFIG_SYNC_INTERVAL=1

.devcontainer/devcontainer.json

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{
2+
"name": "WhyLabs Container",
3+
"build": {
4+
"context": "..",
5+
"options": [ "--platform=linux/amd64"],
6+
"dockerfile": "../Dockerfile.dev"
7+
},
8+
"forwardPorts": [8000],
9+
"runArgs": [
10+
"--platform",
11+
"linux/amd64",
12+
"--env-file",
13+
".devcontainer/devcontainer.env"
14+
],
15+
"remoteEnv": {
16+
"CACHE_ASSETS_RUNTIME": "True"
17+
},
18+
"postCreateCommand": "poetry config http-basic.whylabs_container_gitlab __token__ $GITLAB_API_KEY; make install",
19+
"remoteUser": "root",
20+
"mounts": [
21+
"type=bind,source=${localWorkspaceFolder}/.devcontainer/cache/,target=/root/.cache/",
22+
"source=guardrails-bashhistory,target=/commandhistory,type=volume"
23+
]
24+
}

.dockerignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
data
2+
.venv
3+
*.pyc
4+
*__pycache__*
5+
# Ignore validation schemas during dev
6+
whylogs_container/whylogs_config/*
7+
!whylogs_container/whylogs_config/default.yaml
8+
!whylogs_container/whylogs_config/embeddings.yaml

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
whylabs_asset_cache/ filter=lfs diff=lfs merge=lfs -text
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
name: "Docker Build Steps"
2+
description: "Common Docker build steps"
3+
inputs:
4+
langkit_whylabs_api_key:
5+
description: "api key for org-0 used for langkit assets"
6+
required: true
7+
openai_api_key:
8+
description: "api key for calling openai, which happens during the asset caching phase of the llm container build."
9+
required: true
10+
gitlab_pypi_token:
11+
required: false
12+
description: "A gitlab token from the whylabs-llm-toolkit repo that can be used to download python packages. Only used in llm build."
13+
type:
14+
description: "main or llm"
15+
required: true
16+
workdir:
17+
description: "Where to put generated artifacts/images."
18+
default: "./docker"
19+
required: false
20+
version:
21+
description: "current container publish version"
22+
required: true
23+
upload_artifact:
24+
description: "if true, upload the generated image to the artifact server"
25+
required: true
26+
llm_default_encoder:
27+
description: "The encoder to use during the container build. Only applies to llm"
28+
required: false
29+
30+
runs:
31+
using: "composite"
32+
steps:
33+
- name: Free Disk Space (Ubuntu)
34+
uses: jlumbroso/free-disk-space@main
35+
with:
36+
# this might remove tools that are actually needed,
37+
# if set to "true" but frees about 6 GB
38+
tool-cache: false
39+
# all of these default to true, but feel free to set to
40+
# "false" if necessary for your workflow
41+
android: true
42+
dotnet: true
43+
haskell: true
44+
large-packages: true
45+
docker-images: true
46+
swap-storage: true
47+
48+
# Build the policy editor typescript project, required for the llm build
49+
- name: Use Node.js
50+
if: ${{ inputs.type == 'llm' }}
51+
uses: actions/setup-node@v4
52+
with:
53+
node-version: "20.x"
54+
55+
- name: Build the policy editor
56+
if: ${{ inputs.type == 'llm' }}
57+
shell: bash
58+
run: make policy-editor
59+
60+
- uses: actions/setup-python@v4
61+
if: ${{ inputs.type == 'llm' }}
62+
name: Install Python
63+
with:
64+
python-version: "3.10.8"
65+
66+
- uses: Gr1N/setup-poetry@v8
67+
if: ${{ inputs.type == 'llm' }}
68+
name: Install poetry
69+
with:
70+
poetry-version: 1.7.1
71+
72+
- name: Remove python dependencies to save space
73+
if: ${{ inputs.type == 'llm' }}
74+
shell: bash
75+
run: |
76+
rm -rf .venv/
77+
poetry cache clear PyPI --all
78+
poetry cache clear torch --all
79+
echo "Available storage:"
80+
sudo df -h
81+
echo
82+
83+
- name: Create docker dirs
84+
shell: bash
85+
run: mkdir -p ${{ inputs.workdir }}/llm/ && mkdir -p ${{ inputs.workdir }}/main/
86+
87+
- name: Set up QEMU
88+
uses: docker/setup-qemu-action@v3
89+
90+
- name: Set up Docker Buildx
91+
uses: docker/setup-buildx-action@v3
92+
93+
- name: Determine the tag postfix
94+
shell: bash
95+
id: tag_logic
96+
run: |
97+
if [[ "${{ inputs.type }}" == "main" ]]; then
98+
echo "TAG=registry.gitlab.com/whylabs/whylogs-container" >> $GITHUB_OUTPUT
99+
elif [[ "${{ inputs.type }}" == "llm" ]]; then
100+
echo "TAG=registry.gitlab.com/whylabs/langkit-container" >> $GITHUB_OUTPUT
101+
fi
102+
103+
- name: Set version
104+
shell: bash
105+
id: set-version
106+
run: |
107+
if [[ -n "${{ inputs.llm_default_encoder }}" ]]; then
108+
echo "version=${{ inputs.version }}_${{ inputs.llm_default_encoder }}" >> $GITHUB_OUTPUT
109+
else
110+
echo "version=${{ inputs.version }}" >> $GITHUB_OUTPUT
111+
fi
112+
113+
- name: Set tags
114+
shell: bash
115+
id: set-tags
116+
# Set the docker image tags to the <image name>:<version>_<encoder>. If the encoder is the default encoder AllMiniLML6V2,
117+
# then also tag to <image name>:<version> (no encoder) so people can just pull the latest version without specifying the encoder.
118+
run: |
119+
TAGS="${{ steps.tag_logic.outputs.TAG }}:${{ steps.set-version.outputs.version }}"
120+
if [[ "${{ inputs.llm_default_encoder }}" == "AllMiniLML6V2" ]]; then
121+
TAGS="$TAGS"$'\n'"${{ steps.tag_logic.outputs.TAG }}:${{ inputs.version }}"
122+
fi
123+
echo "TAGS<<EOF" >> $GITHUB_OUTPUT
124+
echo "$TAGS" >> $GITHUB_OUTPUT
125+
echo "EOF" >> $GITHUB_OUTPUT
126+
127+
- name: Build Docker container
128+
uses: docker/build-push-action@v5
129+
with:
130+
context: .
131+
file: ./Dockerfile.${{ inputs.type }}
132+
load: true
133+
push: false
134+
cache-to: type=local,dest=${{ inputs.workdir }}/docker # This will be on a larger mounted file system
135+
tags: ${{ steps.set-tags.outputs.TAGS }}
136+
outputs: type=docker,dest=${{ inputs.workdir }}/${{ inputs.type }}/whylogs-container-${{ inputs.type }}.tar
137+
secrets: |
138+
"whylabs_api_key=${{ inputs.langkit_whylabs_api_key }}"
139+
"openai_api_key=${{ inputs.openai_api_key }}"
140+
"pypi_api_key=${{ inputs.gitlab_pypi_token }}"
141+
build-args: |
142+
DEFAULT_ENCODER=${{ inputs.llm_default_encoder }}
143+
144+
- name: Install Trivy
145+
shell: bash
146+
run: |
147+
sudo apt-get install wget apt-transport-https gnupg
148+
wget -qO - https://aquasecurity.github.io/trivy-repo/deb/public.key | gpg --dearmor | sudo tee /usr/share/keyrings/trivy.gpg > /dev/null
149+
echo "deb [signed-by=/usr/share/keyrings/trivy.gpg] https://aquasecurity.github.io/trivy-repo/deb generic main" | sudo tee -a /etc/apt/sources.list.d/trivy.list
150+
sudo apt-get update
151+
sudo apt-get install trivy
152+
153+
- name: Download trivy vulnerability database
154+
uses: nick-fields/retry@v2
155+
with:
156+
timeout_minutes: 5
157+
max_attempts: 3
158+
command: trivy image --download-db-only --db-repository public.ecr.aws/aquasecurity/trivy-db
159+
160+
- name: Run Trivy image scan
161+
shell: bash
162+
run: |
163+
trivy image \
164+
--db-repository public.ecr.aws/aquasecurity/trivy-db \
165+
--scanners vuln \
166+
--severity HIGH,CRITICAL \
167+
--exit-code 1 \
168+
--ignore-unfixed \
169+
--input ${{ inputs.workdir }}/${{ inputs.type }}/whylogs-container-${{ inputs.type }}.tar
170+
171+
- name: Make sure container starts
172+
shell: bash
173+
run: |
174+
docker load -i ${{ inputs.workdir }}/${{ inputs.type }}/whylogs-container-${{ inputs.type }}.tar
175+
./scripts/check-health.sh ${{ inputs.type }} ${{ steps.tag_logic.outputs.TAG }}:${{ steps.set-version.outputs.version }}
176+
177+
# The build host is running out of disk space
178+
- name: Delete docker images and cache
179+
shell: bash
180+
run: docker system prune -a -f --volumes
181+
182+
- name: Show file system usage at the end
183+
shell: bash
184+
run: df -h
185+
186+
- name: Show image tar size
187+
shell: bash
188+
run: ls -lh ${{ inputs.workdir }}/${{ inputs.type }}/whylogs-container-${{ inputs.type }}.tar
189+
190+
- name: Fail if image size is too large
191+
if: ${{ inputs.type == 'main' }}
192+
shell: bash
193+
run: test $(du -m ${{ inputs.workdir }}/${{ inputs.type }}/whylogs-container-${{ inputs.type }}.tar | cut -f1) -le 200 || exit 1
194+
195+
# The size check is set to the size of the largest encoder variant
196+
- name: Fail if image size is too large
197+
if: ${{ inputs.type == 'llm' }}
198+
shell: bash
199+
run: test $(du -m ${{ inputs.workdir }}/${{ inputs.type }}/whylogs-container-${{ inputs.type }}.tar | cut -f1) -le 3400 || exit 1
200+
201+
- name: Set artifact name
202+
shell: bash
203+
id: set-artifact-name
204+
run: |
205+
if [[ -n "${{ inputs.llm_default_encoder }}" ]]; then
206+
echo "artifact_name=container-${{ inputs.type }}_${{ inputs.llm_default_encoder }}" >> $GITHUB_OUTPUT
207+
else
208+
echo "artifact_name=container-${{ inputs.type }}" >> $GITHUB_OUTPUT
209+
fi
210+
211+
- name: Upload container artifact
212+
if: ${{ inputs.upload_artifact == 'true' }}
213+
uses: actions/upload-artifact@v4
214+
with:
215+
name: ${{ steps.set-artifact-name.outputs.artifact_name }}
216+
path: ${{ inputs.workdir }}/${{ inputs.type }}/whylogs-container-${{ inputs.type }}.tar

0 commit comments

Comments
 (0)