blublinsky
diff --git a/‎.github/workflows/e2e_tests.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/e2e_tests.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/e2e_tests_providers.yaml‎
Lines changed: 21 additions & 28 deletions b/‎.github/workflows/e2e_tests_providers.yaml‎
Lines changed: 21 additions & 28 deletions
diff --git a/‎.tekton/lightspeed-stack-pull-request.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.tekton/lightspeed-stack-pull-request.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.tekton/lightspeed-stack-push.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.tekton/lightspeed-stack-push.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 10 additions & 1 deletion b/‎Makefile‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docker-compose-library.yaml‎
Lines changed: 5 additions & 2 deletions b/‎docker-compose-library.yaml‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎docker-compose.yaml‎
Lines changed: 10 additions & 4 deletions b/‎docker-compose.yaml‎
Lines changed: 10 additions & 4 deletions
@@ -60,7 +60,7 @@ jobs:
           
           cp "${CONFIG_FILE}" lightspeed-stack.yaml
           echo "✅ Configuration loaded successfully"
-        
+
       - name: Select and configure run.yaml
         env:
           CONFIG_ENVIRONMENT: ${{ matrix.environment || 'ci' }}
@@ -100,7 +100,7 @@ jobs:
           echo "=== Configuration Summary ==="
           echo "Deployment mode: ${{ matrix.mode }}"
           echo "Environment: ${{ matrix.environment }}"
-          echo "Source config: tests/e2e/configs/run-ci.yaml"
+          echo "Source config: tests/e2e/configs/run-${{ matrix.environment }}.yaml"
           echo ""
           echo "=== Configuration Preview ==="
           echo "Providers: $(grep -c "provider_id:" run.yaml)"
 
@@ -52,6 +52,21 @@ jobs:
           echo "=== Recent commits ==="
           git log --oneline -5
 
+      - name: Add Azure Entra ID config block to all test configs
+        if: matrix.environment == 'azure'
+        run: |
+          echo "Adding azure_entra_id configuration block to all test configs..."
+          for config in tests/e2e/configuration/*/lightspeed-stack*.yaml; do
+            if [ -f "$config" ]; then
+              echo "" >> "$config"
+              echo "azure_entra_id:" >> "$config"
+              echo "  tenant_id: \${env.TENANT_ID}" >> "$config"
+              echo "  client_id: \${env.CLIENT_ID}" >> "$config"
+              echo "  client_secret: \${env.CLIENT_SECRET}" >> "$config"
+              echo "✅ Added to: $config"
+            fi
+          done
+
       - name: Load lightspeed-stack.yaml configuration
         run: |
           MODE="${{ matrix.mode }}"
@@ -66,32 +81,6 @@ jobs:
           
           cp "${CONFIG_FILE}" lightspeed-stack.yaml
           echo "✅ Configuration loaded successfully"
-        
-      - name: Get Azure API key (access token)
-        if: matrix.environment == 'azure'
-        id: azure_token
-        env:
-          CLIENT_ID: ${{ secrets.CLIENT_ID }}
-          CLIENT_SECRET: ${{ secrets.CLIENT_SECRET }}
-          TENANT_ID: ${{ secrets.TENANT_ID }}
-        run: |
-          echo "Requesting Azure API token..."
-          RESPONSE=$(curl -s -X POST \
-            -H "Content-Type: application/x-www-form-urlencoded" \
-            -d "client_id=$CLIENT_ID&scope=https://cognitiveservices.azure.com/.default&client_secret=$CLIENT_SECRET&grant_type=client_credentials" \
-            "https://login.microsoftonline.com/$TENANT_ID/oauth2/v2.0/token")
-
-          echo "Response received. Extracting access_token..."
-          ACCESS_TOKEN=$(echo "$RESPONSE" | jq -r '.access_token')
-
-          if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" == "null" ]; then
-            echo "❌ Failed to obtain Azure access token. Response:"
-            echo "$RESPONSE"
-            exit 1
-          fi
-
-          echo "✅ Successfully obtained Azure access token."
-          echo "AZURE_API_KEY=$ACCESS_TOKEN" >> $GITHUB_ENV
 
       - name: Save VertexAI service account key to file
         if: matrix.environment == 'vertexai'
@@ -198,7 +187,9 @@ jobs:
         if: matrix.mode == 'server'
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          AZURE_API_KEY: ${{ env.AZURE_API_KEY }}
+          TENANT_ID: ${{ secrets.TENANT_ID }}
+          CLIENT_ID: ${{ secrets.CLIENT_ID }}
+          CLIENT_SECRET: ${{ secrets.CLIENT_SECRET }}
           VERTEX_AI_LOCATION: ${{ secrets.VERTEX_AI_LOCATION }}
           VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }}
           GOOGLE_APPLICATION_CREDENTIALS: ${{ env.GOOGLE_APPLICATION_CREDENTIALS }}
@@ -227,7 +218,9 @@ jobs:
         if: matrix.mode == 'library'
         env: 
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          AZURE_API_KEY: ${{ env.AZURE_API_KEY }}
+          TENANT_ID: ${{ secrets.TENANT_ID }}
+          CLIENT_ID: ${{ secrets.CLIENT_ID }}
+          CLIENT_SECRET: ${{ secrets.CLIENT_SECRET }}
           VERTEX_AI_LOCATION: ${{ secrets.VERTEX_AI_LOCATION }}
           VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }}
           GOOGLE_APPLICATION_CREDENTIALS: ${{ env.GOOGLE_APPLICATION_CREDENTIALS }}
 
@@ -55,7 +55,7 @@ spec:
           ],
           "requirements_build_files": ["requirements-build.txt"],
           "binary": {
-            "packages": "accelerate,aiohappyeyeballs,aiohttp,aiosignal,aiosqlite,annotated-doc,annotated-types,anyio,asyncpg,attrs,autoevals,cachetools,cffi,charset-normalizer,chevron,click,cryptography,datasets,dill,distro,dnspython,durationpy,faiss-cpu,fire,frozenlist,fsspec,googleapis-common-protos,greenlet,grpcio,h11,hf-xet,httpcore,httpx,httpx-sse,huggingface-hub,idna,jinja2,jiter,joblib,jsonschema-specifications,llama-stack-client,lxml,markdown-it-py,markupsafe,mdurl,mpmath,multidict,networkx,nltk,numpy,oauthlib,opentelemetry-api,opentelemetry-exporter-otlp,opentelemetry-exporter-otlp-proto-common,opentelemetry-exporter-otlp-proto-grpc,opentelemetry-exporter-otlp-proto-http,opentelemetry-instrumentation,opentelemetry-proto,opentelemetry-sdk,opentelemetry-semantic-conventions,packaging,pandas,pillow,ply,polyleven,prompt-toolkit,propcache,proto-plus,psycopg2-binary,pyaml,pyarrow,pyasn1,pyasn1-modules,pycparser,pydantic,pydantic-core,pydantic-settings,pygments,pyjwt,python-dateutil,python-dotenv,python-multipart,pytz,pyyaml,referencing,regex,requests,requests-oauthlib,rich,rpds-py,rsa,safetensors,scikit-learn,scipy,sentence-transformers,setuptools,six,sniffio,sqlalchemy,starlette,sympy,threadpoolctl,tiktoken,tokenizers,torch,tqdm,transformers,triton,typing-extensions,typing-inspection,tzdata,urllib3,wcwidth,websocket-client,wrapt,xxhash,yarl,zipp,uv,pip,maturin",
+            "packages": "accelerate,aiohappyeyeballs,aiohttp,aiosignal,aiosqlite,annotated-doc,annotated-types,anyio,asyncpg,attrs,autoevals,cachetools,cffi,charset-normalizer,chevron,click,cryptography,datasets,dill,distro,dnspython,durationpy,einops,faiss-cpu,fire,frozenlist,fsspec,googleapis-common-protos,greenlet,grpcio,h11,hf-xet,httpcore,httpx,httpx-sse,huggingface-hub,idna,jinja2,jiter,joblib,jsonschema-specifications,llama-stack-client,lxml,markdown-it-py,markupsafe,mdurl,mpmath,multidict,networkx,nltk,numpy,oauthlib,opentelemetry-api,opentelemetry-exporter-otlp,opentelemetry-exporter-otlp-proto-common,opentelemetry-exporter-otlp-proto-grpc,opentelemetry-exporter-otlp-proto-http,opentelemetry-instrumentation,opentelemetry-proto,opentelemetry-sdk,opentelemetry-semantic-conventions,packaging,pandas,pillow,ply,polyleven,prompt-toolkit,propcache,proto-plus,psycopg2-binary,pyaml,pyarrow,pyasn1,pyasn1-modules,pycparser,pydantic,pydantic-core,pydantic-settings,pygments,pyjwt,python-dateutil,python-dotenv,python-multipart,pytz,pyyaml,referencing,requests,requests-oauthlib,rich,rpds-py,rsa,safetensors,scikit-learn,scipy,sentence-transformers,setuptools,six,sniffio,sqlalchemy,starlette,sympy,threadpoolctl,tiktoken,tokenizers,torch,tqdm,transformers,triton,typing-extensions,typing-inspection,tzdata,urllib3,wcwidth,websocket-client,wrapt,xxhash,yarl,zipp,uv,pip,maturin",
             "os": "linux",
             "arch": "x86_64,aarch64",
             "py_version": "312"
 
@@ -47,7 +47,7 @@ spec:
           ],
           "requirements_build_files": ["requirements-build.txt"],
           "binary": {
-            "packages": "accelerate,aiohappyeyeballs,aiohttp,aiosignal,aiosqlite,annotated-doc,annotated-types,anyio,asyncpg,attrs,autoevals,cachetools,cffi,charset-normalizer,chevron,click,cryptography,datasets,dill,distro,dnspython,durationpy,faiss-cpu,fire,frozenlist,fsspec,googleapis-common-protos,greenlet,grpcio,h11,hf-xet,httpcore,httpx,httpx-sse,huggingface-hub,idna,jinja2,jiter,joblib,jsonschema-specifications,llama-stack-client,lxml,markdown-it-py,markupsafe,mdurl,mpmath,multidict,networkx,nltk,numpy,oauthlib,opentelemetry-api,opentelemetry-exporter-otlp,opentelemetry-exporter-otlp-proto-common,opentelemetry-exporter-otlp-proto-grpc,opentelemetry-exporter-otlp-proto-http,opentelemetry-instrumentation,opentelemetry-proto,opentelemetry-sdk,opentelemetry-semantic-conventions,packaging,pandas,pillow,ply,polyleven,prompt-toolkit,propcache,proto-plus,psycopg2-binary,pyaml,pyarrow,pyasn1,pyasn1-modules,pycparser,pydantic,pydantic-core,pydantic-settings,pygments,pyjwt,python-dateutil,python-dotenv,python-multipart,pytz,pyyaml,referencing,regex,requests,requests-oauthlib,rich,rpds-py,rsa,safetensors,scikit-learn,scipy,sentence-transformers,setuptools,six,sniffio,sqlalchemy,starlette,sympy,threadpoolctl,tiktoken,tokenizers,torch,tqdm,transformers,triton,typing-extensions,typing-inspection,tzdata,urllib3,wcwidth,websocket-client,wrapt,xxhash,yarl,zipp,uv,pip,maturin",
+            "packages": "accelerate,aiohappyeyeballs,aiohttp,aiosignal,aiosqlite,annotated-doc,annotated-types,anyio,asyncpg,attrs,autoevals,cachetools,cffi,charset-normalizer,chevron,click,cryptography,datasets,dill,distro,dnspython,durationpy,einops,faiss-cpu,fire,frozenlist,fsspec,googleapis-common-protos,greenlet,grpcio,h11,hf-xet,httpcore,httpx,httpx-sse,huggingface-hub,idna,jinja2,jiter,joblib,jsonschema-specifications,llama-stack-client,lxml,markdown-it-py,markupsafe,mdurl,mpmath,multidict,networkx,nltk,numpy,oauthlib,opentelemetry-api,opentelemetry-exporter-otlp,opentelemetry-exporter-otlp-proto-common,opentelemetry-exporter-otlp-proto-grpc,opentelemetry-exporter-otlp-proto-http,opentelemetry-instrumentation,opentelemetry-proto,opentelemetry-sdk,opentelemetry-semantic-conventions,packaging,pandas,pillow,ply,polyleven,prompt-toolkit,propcache,proto-plus,psycopg2-binary,pyaml,pyarrow,pyasn1,pyasn1-modules,pycparser,pydantic,pydantic-core,pydantic-settings,pygments,pyjwt,python-dateutil,python-dotenv,python-multipart,pytz,pyyaml,referencing,requests,requests-oauthlib,rich,rpds-py,rsa,safetensors,scikit-learn,scipy,sentence-transformers,setuptools,six,sniffio,sqlalchemy,starlette,sympy,threadpoolctl,tiktoken,tokenizers,torch,tqdm,transformers,triton,typing-extensions,typing-inspection,tzdata,urllib3,wcwidth,websocket-client,wrapt,xxhash,yarl,zipp,uv,pip,maturin",
             "os": "linux",
             "arch": "x86_64,aarch64",
             "py_version": "312"
 
@@ -10,8 +10,17 @@ PYTHON_REGISTRY = pypi
 TORCH_VERSION := 2.9.0
 
 
+# Default configuration files (override with: make run CONFIG=myconfig.yaml)
+CONFIG ?= lightspeed-stack.yaml
+LLAMA_STACK_CONFIG ?= run.yaml
+
 run: ## Run the service locally
-	uv run src/lightspeed_stack.py
+	uv run src/lightspeed_stack.py -c $(CONFIG)
+
+run-llama-stack: ## Start Llama Stack with enriched config (for local service mode)
+	uv run src/llama_stack_configuration.py -c $(CONFIG) -i $(LLAMA_STACK_CONFIG) -o $(LLAMA_STACK_CONFIG) && \
+	AZURE_API_KEY=$$(grep '^AZURE_API_KEY=' .env | cut -d'=' -f2-) \
+	uv run llama stack run $(LLAMA_STACK_CONFIG)
 
 test-unit: ## Run the unit tests
 	@echo "Running unit tests..."
 
@@ -203,8 +203,8 @@ __Note__: Support for individual models is dependent on the specific inference p
 | RHOAI (vLLM)| meta-llama/Llama-3.2-1B-Instruct           | Yes          | remote::vllm   | [1](tests/e2e-prow/rhoai/configs/run.yaml)                                     |
 | RHAIIS (vLLM)| meta-llama/Llama-3.1-8B-Instruct           | Yes          | remote::vllm   | [1](tests/e2e/configs/run-rhaiis.yaml)                                     |
 | RHEL AI (vLLM)| meta-llama/Llama-3.1-8B-Instruct           | Yes          | remote::vllm   | [1](tests/e2e/configs/run-rhelai.yaml)                                     |
-| Azure    | gpt-5, gpt-5-mini, gpt-5-nano, gpt-5-chat, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3-mini, o4-mini | Yes          | remote::azure  | [1](examples/azure-run.yaml)                                               |
-| Azure    |  o1, o1-mini | No          | remote::azure  |  |
+| Azure    | gpt-5, gpt-5-mini, gpt-5-nano, gpt-4o-mini, o3-mini, o4-mini, o1| Yes          | remote::azure  | [1](examples/azure-run.yaml)                                               |
+| Azure    |  gpt-5-chat, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano,  o1-mini | No or limited         | remote::azure  |  |
 | VertexAI    | google/gemini-2.0-flash, google/gemini-2.5-flash, google/gemini-2.5-pro [^1] | Yes          | remote::vertexai  | [1](examples/vertexai-run.yaml)                                               |
 | WatsonX    | meta-llama/llama-3-3-70b-instruct | Yes          | remote::watsonx  | [1](examples/watsonx-run.yaml)                                               |
 
 
@@ -15,13 +15,16 @@ services:
       - ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro
       - ./tests/e2e/rag:/opt/app-root/src/.llama/storage/rag:Z
     environment:
+      # LLM Provider API Keys
       - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
       - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-}
       # OpenAI
       - OPENAI_API_KEY=${OPENAI_API_KEY}
       - E2E_OPENAI_MODEL=${E2E_OPENAI_MODEL:-gpt-4o-mini}
-      # Azure
-      - AZURE_API_KEY=${AZURE_API_KEY:-}
+      # Azure Entra ID credentials (AZURE_API_KEY is obtained dynamically in Python)
+      - TENANT_ID=${TENANT_ID:-}
+      - CLIENT_ID=${CLIENT_ID:-}
+      - CLIENT_SECRET=${CLIENT_SECRET:-}
       # RHAIIS
       - RHAIIS_URL=${RHAIIS_URL:-}
       - RHAIIS_API_KEY=${RHAIIS_API_KEY:-}
 
@@ -12,14 +12,17 @@ services:
       - ./run.yaml:/opt/app-root/run.yaml:Z
       - ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro
       - ./tests/e2e/rag:/opt/app-root/src/.llama/storage/rag:Z
+      - ./lightspeed-stack.yaml:/opt/app-root/lightspeed-stack.yaml:z
     environment:
       - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
       - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-}
       # OpenAI
       - OPENAI_API_KEY=${OPENAI_API_KEY}
       - E2E_OPENAI_MODEL=${E2E_OPENAI_MODEL:-gpt-4o-mini}
-      # Azure
-      - AZURE_API_KEY=${AZURE_API_KEY}
+      # Azure Entra ID credentials (AZURE_API_KEY is passed via provider_data at request time)
+      - TENANT_ID=${TENANT_ID:-}
+      - CLIENT_ID=${CLIENT_ID:-}
+      - CLIENT_SECRET=${CLIENT_SECRET:-}
       # RHAIIS
       - RHAIIS_URL=${RHAIIS_URL}
       - RHAIIS_API_KEY=${RHAIIS_API_KEY}
@@ -56,10 +59,13 @@ services:
     ports:
       - "8080:8080"
     volumes:
-      - ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:Z
+      - ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:z
     environment:
       - OPENAI_API_KEY=${OPENAI_API_KEY}
-      - AZURE_API_KEY=${AZURE_API_KEY}
+      # Azure Entra ID credentials (AZURE_API_KEY is obtained dynamically)
+      - TENANT_ID=${TENANT_ID:-}
+      - CLIENT_ID=${CLIENT_ID:-}
+      - CLIENT_SECRET=${CLIENT_SECRET:-}
     depends_on:
         llama-stack:
           condition: service_healthy