diff --git a/demos/continuous_batching/rag/README.md b/demos/continuous_batching/rag/README.md index a5074a78fe..fc87405d81 100644 --- a/demos/continuous_batching/rag/README.md +++ b/demos/continuous_batching/rag/README.md @@ -60,17 +60,14 @@ ovms --add_to_config --config_path c:\models\config.json --model_name OpenVINO/b :::{tab-item} With Docker **Required:** Docker Engine installed ```bash -git clone https://github.com/openvinotoolkit/model_server.git -cd model_server -make python_image mkdir models -docker run --user $(id -u):$(id -g) -e HF_HOME=/hf_home/cache --rm -v $(pwd)/models:/models:rw -v /opt/home/user/.cache/huggingface/:/hf_home/cache openvino/model_server:py --pull --model_repository_path /models --source_model meta-llama/Meta-Llama-3-8B-Instruct --task text_generation --weight-format int8 -docker run --user $(id -u):$(id -g) -e HF_HOME=/hf_home/cache --rm -v $(pwd)/models:/models:rw -v /opt/home/user/.cache/huggingface/:/hf_home/cache openvino/model_server:py --pull --model_repository_path /models --source_model Alibaba-NLP/gte-large-en-v1.5 --task embeddings --weight-format int8 -docker run --user $(id -u):$(id -g) -e HF_HOME=/hf_home/cache --rm -v $(pwd)/models:/models:rw -v /opt/home/user/.cache/huggingface/:/hf_home/cache openvino/model_server:py --pull --model_repository_path /models --source_model BAAI/bge-reranker-large --task rerank --weight-format int8 +docker run --user $(id -u):$(id -g) -e HF_HOME=/hf_home/cache --rm -v $(pwd)/models:/models:rw -v /opt/home/user/.cache/huggingface/:/hf_home/cache openvino/model_server:latest-py --pull --model_repository_path /models --source_model meta-llama/Meta-Llama-3-8B-Instruct --task text_generation --weight-format int8 +docker run --user $(id -u):$(id -g) -e HF_HOME=/hf_home/cache --rm -v $(pwd)/models:/models:rw -v /opt/home/user/.cache/huggingface/:/hf_home/cache openvino/model_server:latest-py --pull --model_repository_path /models --source_model Alibaba-NLP/gte-large-en-v1.5 --task embeddings --weight-format int8 +docker run --user $(id -u):$(id -g) -e HF_HOME=/hf_home/cache --rm -v $(pwd)/models:/models:rw -v /opt/home/user/.cache/huggingface/:/hf_home/cache openvino/model_server-latest:py --pull --model_repository_path /models --source_model BAAI/bge-reranker-large --task rerank --weight-format int8 -docker run --user $(id -u):$(id -g) --rm -v $(pwd)/models:/models:rw openvino/model_server:py --add_to_config --config_path /models/config.json --model_name meta-llama/Meta-Llama-3-8B-Instruct --model_path meta-llama/Meta-Llama-3-8B-Instruct --weight-format int8 -docker run --user $(id -u):$(id -g) --rm -v $(pwd)/models:/models:rw openvino/model_server:py --add_to_config --config_path /models/config.json --model_name Alibaba-NLP/gte-large-en-v1.5 --model_path Alibaba-NLP/gte-large-en-v1.5 --weight-format int8 -docker run --user $(id -u):$(id -g) --rm -v $(pwd)/models:/models:rw openvino/model_server:py --add_to_config --config_path /models/config.json --model_name BAAI/bge-reranker-large --model_path BAAI/bge-reranker-large --weight-format int8 +docker run --user $(id -u):$(id -g) --rm -v $(pwd)/models:/models:rw openvino/model_server:latest-py --add_to_config --config_path /models/config.json --model_name meta-llama/Meta-Llama-3-8B-Instruct --model_path meta-llama/Meta-Llama-3-8B-Instruct --weight-format int8 +docker run --user $(id -u):$(id -g) --rm -v $(pwd)/models:/models:rw openvino/model_server:latest-py --add_to_config --config_path /models/config.json --model_name Alibaba-NLP/gte-large-en-v1.5 --model_path Alibaba-NLP/gte-large-en-v1.5 --weight-format int8 +docker run --user $(id -u):$(id -g) --rm -v $(pwd)/models:/models:rw openvino/model_server:latest-py --add_to_config --config_path /models/config.json --model_name BAAI/bge-reranker-large --model_path BAAI/bge-reranker-large --weight-format int8 ``` ::: diff --git a/demos/python_demos/Dockerfile.ubuntu b/demos/python_demos/Dockerfile.ubuntu index 12ee31ed2c..0b61ed411b 100644 --- a/demos/python_demos/Dockerfile.ubuntu +++ b/demos/python_demos/Dockerfile.ubuntu @@ -24,7 +24,7 @@ RUN apt update && apt install -y python3-pip git COPY requirements.txt . ENV PIP_BREAK_SYSTEM_PACKAGES=1 RUN BUILD_CUDA_EXT=0 pip3 install -r requirements.txt --no-cache-dir -RUN opt_in_out --opt_out +RUN opt_in_out --opt_out && cp -R /root/intel / RUN python3 -c "import openvino" && optimum-cli export openvino --help | grep -q "usage: optimum-cli export openvino" && convert_tokenizer --help | grep -q "Converts tokenizers" USER ovms ENTRYPOINT [ "/ovms/bin/ovms" ] diff --git a/demos/python_demos/clip_image_classification/README.md b/demos/python_demos/clip_image_classification/README.md index b9b7ab2f77..d1aa044ffd 100644 --- a/demos/python_demos/clip_image_classification/README.md +++ b/demos/python_demos/clip_image_classification/README.md @@ -10,14 +10,6 @@ The picture below shows the execution flow in the graph. ![Mediapipe graph image](graph.png) -## Build image - -```bash -git clone https://github.com/openvinotoolkit/model_server.git -cd model_server -make python_image -``` - ## Install client requirements ```bash @@ -47,7 +39,7 @@ Mount the `./servable` which contains: - `graph.pbtxt` - which defines MediaPipe graph containing python nodes ```bash -docker run -d --rm -p 9000:9000 -p 8000:8000 -v ${PWD}/servable:/workspace -v ${PWD}/model:/model/ openvino/model_server:py --config_path /workspace/config.json --port 9000 --rest_port 8000 +docker run -d --rm -p 9000:9000 -p 8000:8000 -v ${PWD}/servable:/workspace -v ${PWD}/model:/model/ openvino/model_server:latest-py --config_path /workspace/config.json --port 9000 --rest_port 8000 ``` ## Requesting detection name with grpc request diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 8edac63c9d..21fe39c30f 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -38,7 +38,7 @@ It is recommended to get started with [tested models](https://openvinotoolkit.gi ## Accuracy in Agentic Use Cases with Tools Calling -While using agentic models with tools calling capabilities, it is important to set proper tools parser and reasoning parser in the runtime parameters. For some models like, qwen3-coder, gpt-oss-20b, mistral, it is important to use correct, tunned chat template. Check the [agentic demo](../demos/continuous_batching/agentic_ai/README.md) +While using agentic models with tools calling capabilities, it is important to set proper tools parser and reasoning parser in the runtime parameters. For some models like, qwen3-coder, gpt-oss-20b, mistral, it is important to use correct, tuned chat template. Check the [agentic demo](../demos/continuous_batching/agentic_ai/README.md)