Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions tests/e2e-prow/rhoai/configs/run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ providers:
api_token: ${env.VLLM_API_KEY}
tls_verify: false
max_tokens: 1024
- provider_id: openai
provider_type: remote::openai
config:
api_key: ${env.OPENAI_API_KEY}
# - provider_id: openai
# provider_type: remote::openai
# config:
# api_key: ${env.OPENAI_API_KEY}
- config: {}
provider_id: sentence-transformers
provider_type: inline::sentence-transformers
Expand Down Expand Up @@ -144,7 +144,7 @@ registered_resources:
shields:
- shield_id: llama-guard
provider_id: llama-guard
provider_shield_id: openai/gpt-4o-mini
provider_shield_id: vllm/meta-llama/Llama-3.2-1B-Instruct
datasets: []
scoring_fns: []
benchmarks: []
Expand Down
31 changes: 31 additions & 0 deletions tests/e2e-prow/rhoai/manifests/gpu/cluster-policy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
apiVersion: nvidia.com/v1
kind: ClusterPolicy
metadata:
name: gpu-cluster-policy
spec:
daemonsets:
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
- key: gpu
operator: Exists
effect: NoSchedule
operator:
defaultRuntime: crio
driver:
enabled: true
toolkit:
enabled: true
devicePlugin:
enabled: true
dcgm:
enabled: true
dcgmExporter:
enabled: true
gfd:
enabled: true
migManager:
enabled: false
nodeStatusExporter:
enabled: true
8 changes: 8 additions & 0 deletions tests/e2e-prow/rhoai/manifests/gpu/create-nfd.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Minimal NFD instance
apiVersion: nfd.openshift.io/v1
kind: NodeFeatureDiscovery
metadata:
name: nfd-instance
namespace: openshift-nfd
spec:
instance: ""
4 changes: 3 additions & 1 deletion tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ metadata:
name: llama-stack-service
namespace: e2e-rhoai-dsc
spec:
imagePullSecrets:
- name: quay-lightspeed-pull-secret
containers:
- name: llama-stack-container
env:
Expand All @@ -17,7 +19,7 @@ spec:
secretKeyRef:
name: vllm-api-key-secret
key: key
image: quay.io/opendatahub/llama-stack:rhoai-v2.25-latest
image: ${LLAMA_STACK_IMAGE}
ports:
- containerPort: 8321
volumeMounts:
Expand Down
5 changes: 5 additions & 0 deletions tests/e2e-prow/rhoai/manifests/namespaces/nfd.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# NFD Namespace
apiVersion: v1
kind: Namespace
metadata:
name: openshift-nfd
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# NVIDIA GPU Operator Namespace
apiVersion: v1
kind: Namespace
metadata:
name: nvidia-gpu-operator
22 changes: 21 additions & 1 deletion tests/e2e-prow/rhoai/manifests/operators/operatorgroup.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,24 @@ kind: OperatorGroup
metadata:
name: global-operators
namespace: openshift-operators
spec:
spec:
---
# NVIDIA GPU Operator OperatorGroup
apiVersion: operators.coreos.com/v1
kind: OperatorGroup
metadata:
name: nvidia-gpu-operator-group
namespace: nvidia-gpu-operator
spec:
targetNamespaces:
- nvidia-gpu-operator
---
# Create OperatorGroup
apiVersion: operators.coreos.com/v1
kind: OperatorGroup
metadata:
name: nfd-operator-group
namespace: openshift-nfd
spec:
targetNamespaces:
- openshift-nfd
27 changes: 26 additions & 1 deletion tests/e2e-prow/rhoai/manifests/operators/operators.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,29 @@ spec:
channel: stable
name: rhods-operator
source: redhat-operators
sourceNamespace: openshift-marketplace
sourceNamespace: openshift-marketplace
---
# NVIDIA GPU Operator Subscription
apiVersion: operators.coreos.com/v1alpha1
kind: Subscription
metadata:
name: gpu-operator-certified
namespace: nvidia-gpu-operator
spec:
channel: stable
installPlanApproval: Automatic
name: gpu-operator-certified
source: certified-operators
sourceNamespace: openshift-marketplace
---
# Node Feature Discovery (NFD) Operator Subscription
apiVersion: operators.coreos.com/v1alpha1
kind: Subscription
metadata:
name: nfd
namespace: openshift-nfd
spec:
channel: stable
name: nfd
source: redhat-operators
sourceNamespace: openshift-marketplace
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: vllm-model
namespace: e2e-rhoai-dsc
annotations:
serving.kserve.io/deploymentMode: RawDeployment
sidecar.istio.io/inject: "false"
spec:
predictor:
minReplicas: 1
maxReplicas: 1
model:
modelFormat:
name: vLLM
runtime: vllm-gpu
resources:
limits:
nvidia.com/gpu: 1
cpu: "4"
memory: 20Gi
requests:
nvidia.com/gpu: 1
cpu: "2"
memory: 16Gi
82 changes: 82 additions & 0 deletions tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-gpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
annotations:
openshift.io/display-name: vLLM GPU
name: vllm-gpu
namespace: e2e-rhoai-dsc
labels:
opendatahub.io/dashboard: "true"
spec:
builtInAdapter:
modelLoadingTimeoutMillis: 90000
containers:
- args:
- --model
- meta-llama/Llama-3.2-1B-Instruct
- --enable-auto-tool-choice
- --tool-call-parser
- llama3_json
- --chat-template
- /mnt/chat-template/tool_chat_template_llama3.2_json.jinja
Comment on lines +17 to +21
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

# Check if the file exists and examine the exact content at lines 17-21
if [ -f "tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-gpu.yaml" ]; then
  echo "=== File content lines 17-21 with special chars shown ==="
  sed -n '17,21p' "tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-gpu.yaml" | cat -A
  echo ""
  echo "=== Full context lines 10-25 ==="
  sed -n '10,25p' "tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-gpu.yaml" | cat -n
else
  echo "File not found"
  find . -name "vllm-runtime-gpu.yaml" -type f
fi

Repository: lightspeed-core/lightspeed-stack

Length of output: 885


🌐 Web query:

vLLM argument parser trailing spaces CLI flags parsing behavior

💡 Result:

Short answer: vLLM’s CLI uses a FlexibleArgumentParser built on Python argparse, so flag-parsing follows standard argparse rules — trailing whitespace inside an argument value is preserved by the parser but the shell will usually strip/unpack whitespace unless you quote the value. For file/path arguments and complex JSON-style args, you must quote or escape spaces; vLLM has reported issues when unescaped spaces appear in local file paths. References and guidance below.

  • vLLM’s parser: vLLM provides FlexibleArgumentParser / argparse helpers (behaves like argparse). Preserve/stripping of trailing spaces is therefore determined by the shell and how you quote the value. [1]
  • Reported problems: there is a vLLM GitHub issue where inference failed when there was a space character in a local file path — shows you must quote/escape paths. [2]
  • CLI docs: many vLLM flags accept JSON or values; docs recommend usual CLI quoting (use --flag="value with space" or --flag=value). Boolean flags are store_true/store_false and are unaffected by trailing-space values. [3]

Practical tips:

  • Quote values containing spaces: --model "/path/with space/model" or --model="/path/with space/model".
  • Use the equals form to avoid shell word-splitting: --json-arg='{"k":"v with space"}'.
  • For boolean flags (store_true/store_false) trailing spaces aren’t meaningful.
  • If you see a bug when quoting correctly, report it with a repro; check/open vLLM issues (the file-path/space issue has been reported). [2]

Sources:
[1] vLLM argparse utilities documentation.
[2] vLLM GitHub issue: "Inference fails when there is space character in local file path."
[3] vLLM CLI reference / flags documentation.


Remove trailing spaces in vLLM flags to prevent startup failure.

Lines 19 and 21 contain trailing spaces after --tool-call-parser and --chat-template. Kubernetes passes the args array directly to the container without shell processing, so these flags become --tool-call-parser and --chat-template (with spaces). The vLLM argparse parser won't recognize these as valid flags and will reject them as unrecognized arguments, causing the container to fail on startup.

Fix
-        - --tool-call-parser 
+        - --tool-call-parser
         - llama3_json
-        - --chat-template 
+        - --chat-template
         - /mnt/chat-template/tool_chat_template_llama3.2_json.jinja
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
- --enable-auto-tool-choice
- --tool-call-parser
- llama3_json
- --chat-template
- /mnt/chat-template/tool_chat_template_llama3.2_json.jinja
- --enable-auto-tool-choice
- --tool-call-parser
- llama3_json
- --chat-template
- /mnt/chat-template/tool_chat_template_llama3.2_json.jinja
🤖 Prompt for AI Agents
In `@tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-gpu.yaml` around lines 17 -
21, In vllm-runtime-gpu.yaml remove the trailing spaces from the vLLM CLI flags
so Kubernetes doesn't pass malformed args: fix the args entries that contain
"--tool-call-parser " and "--chat-template " to be "--tool-call-parser" and
"--chat-template" (and ensure their corresponding values like "llama3_json" and
"/mnt/chat-template/tool_chat_template_llama3.2_json.jinja" remain separate
array items or properly paired) so vLLM's argparse recognizes the flags and the
container can start.

- --download-dir
- /tmp/models-cache
- --port
- "8080"
- --max-model-len
- "2048"
- --gpu-memory-utilization
- "0.9"
image: ${VLLM_IMAGE}
name: kserve-container
env:
- name: HF_HUB_OFFLINE
value: "false"
- name: TRANSFORMERS_OFFLINE
value: "false"
- name: HF_DATASETS_OFFLINE
value: "false"
- name: HF_HOME
value: /mnt/models-cache/hf_home
- name: HUGGING_FACE_HUB_TOKEN
valueFrom:
secretKeyRef:
name: hf-token-secret
key: token
- name: VLLM_API_KEY
valueFrom:
secretKeyRef:
name: vllm-api-key-secret
key: key
ports:
- containerPort: 8080
name: http1
protocol: TCP
volumeMounts:
- name: chat-template
mountPath: /mnt/chat-template
- name: models-cache
mountPath: /mnt/models-cache
- name: vllm-cache
mountPath: /.cache
resources:
limits:
nvidia.com/gpu: 1
cpu: "6"
memory: 20Gi
requests:
nvidia.com/gpu: 1
cpu: "4"
memory: 16Gi
volumes:
- name: chat-template
configMap:
name: vllm-chat-template
- name: models-cache
emptyDir: {}
- name: vllm-cache
emptyDir: {}
multiModel: false
supportedModelFormats:
- autoSelect: true
name: vLLM
4 changes: 2 additions & 2 deletions tests/e2e-prow/rhoai/pipeline-services.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

oc apply -f "$BASE_DIR/manifests/lightspeed/llama-stack.yaml"
envsubst < "$BASE_DIR/manifests/lightspeed/llama-stack.yaml" | oc apply -f -

oc wait pod/llama-stack-service \
-n e2e-rhoai-dsc --for=condition=Ready --timeout=300s
-n e2e-rhoai-dsc --for=condition=Ready --timeout=600s

# Get url address of llama-stack pod
oc label pod llama-stack-service pod=llama-stack-service -n e2e-rhoai-dsc
Expand Down
4 changes: 4 additions & 0 deletions tests/e2e-prow/rhoai/pipeline-vllm.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
#!/bin/bash

set -euo pipefail

PIPELINE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

"$PIPELINE_DIR/scripts/bootstrap.sh" "$PIPELINE_DIR"
"$PIPELINE_DIR/scripts/gpu-setup.sh" "$PIPELINE_DIR"
source "$PIPELINE_DIR/scripts/fetch-vllm-image.sh"
"$PIPELINE_DIR/scripts/deploy-vllm.sh" "$PIPELINE_DIR"
"$PIPELINE_DIR/scripts/get-vllm-pod-info.sh"
Loading
Loading