lightspeed-core · tisnik · Jan 19, 2026 · Jan 13, 2026 · coderabbitai · Jan 18, 2026
diff --git a/tests/e2e-prow/rhoai/configs/run.yaml b/tests/e2e-prow/rhoai/configs/run.yaml
@@ -36,10 +36,10 @@ providers:
       api_token: ${env.VLLM_API_KEY}
       tls_verify: false
       max_tokens: 1024
-  - provider_id: openai
-    provider_type: remote::openai
-    config:
-      api_key: ${env.OPENAI_API_KEY}
+  # - provider_id: openai
+  #   provider_type: remote::openai
+  #   config:
+  #     api_key: ${env.OPENAI_API_KEY}
   - config: {}
     provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
@@ -144,7 +144,7 @@ registered_resources:
   shields:
   - shield_id: llama-guard
     provider_id: llama-guard
-    provider_shield_id: openai/gpt-4o-mini
+    provider_shield_id: vllm/meta-llama/Llama-3.2-1B-Instruct
   datasets: []
   scoring_fns: []
   benchmarks: []

diff --git a/tests/e2e-prow/rhoai/manifests/gpu/cluster-policy.yaml b/tests/e2e-prow/rhoai/manifests/gpu/cluster-policy.yaml
@@ -0,0 +1,31 @@
+apiVersion: nvidia.com/v1
+kind: ClusterPolicy
+metadata:
+  name: gpu-cluster-policy
+spec:
+  daemonsets:
+    tolerations:
+    - key: nvidia.com/gpu
+      operator: Exists
+      effect: NoSchedule
+    - key: gpu
+      operator: Exists
+      effect: NoSchedule
+  operator:
+    defaultRuntime: crio
+  driver:
+    enabled: true
+  toolkit:
+    enabled: true
+  devicePlugin:
+    enabled: true
+  dcgm:
+    enabled: true
+  dcgmExporter:
+    enabled: true
+  gfd:
+    enabled: true
+  migManager:
+    enabled: false
+  nodeStatusExporter:
+    enabled: true
diff --git a/tests/e2e-prow/rhoai/manifests/gpu/create-nfd.yaml b/tests/e2e-prow/rhoai/manifests/gpu/create-nfd.yaml
@@ -0,0 +1,8 @@
+# Minimal NFD instance
+apiVersion: nfd.openshift.io/v1
+kind: NodeFeatureDiscovery
+metadata:
+  name: nfd-instance
+  namespace: openshift-nfd
+spec:
+  instance: ""
diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack.yaml
@@ -4,6 +4,8 @@ metadata:
   name: llama-stack-service
   namespace: e2e-rhoai-dsc
 spec:
+  imagePullSecrets:
+    - name: quay-lightspeed-pull-secret
   containers:
     - name: llama-stack-container
       env:
@@ -17,7 +19,7 @@ spec:
             secretKeyRef:
               name: vllm-api-key-secret
               key: key
-      image: quay.io/opendatahub/llama-stack:rhoai-v2.25-latest
+      image: ${LLAMA_STACK_IMAGE}
       ports:
         - containerPort: 8321
       volumeMounts:

diff --git a/tests/e2e-prow/rhoai/manifests/namespaces/nfd.yaml b/tests/e2e-prow/rhoai/manifests/namespaces/nfd.yaml
@@ -0,0 +1,5 @@
+# NFD Namespace
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: openshift-nfd
diff --git a/tests/e2e-prow/rhoai/manifests/namespaces/nvidia-operator.yaml b/tests/e2e-prow/rhoai/manifests/namespaces/nvidia-operator.yaml
@@ -0,0 +1,5 @@
+# NVIDIA GPU Operator Namespace
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: nvidia-gpu-operator
diff --git a/tests/e2e-prow/rhoai/manifests/operators/operatorgroup.yaml b/tests/e2e-prow/rhoai/manifests/operators/operatorgroup.yaml
@@ -3,4 +3,24 @@ kind: OperatorGroup
 metadata:
   name: global-operators
   namespace: openshift-operators
-spec: 
+spec: 
+---
+# NVIDIA GPU Operator OperatorGroup
+apiVersion: operators.coreos.com/v1
+kind: OperatorGroup
+metadata:
+  name: nvidia-gpu-operator-group
+  namespace: nvidia-gpu-operator
+spec:
+  targetNamespaces:
+  - nvidia-gpu-operator
+---
+# Create OperatorGroup
+apiVersion: operators.coreos.com/v1
+kind: OperatorGroup
+metadata:
+  name: nfd-operator-group
+  namespace: openshift-nfd
+spec:
+  targetNamespaces:
+  - openshift-nfd
diff --git a/tests/e2e-prow/rhoai/manifests/operators/operators.yaml b/tests/e2e-prow/rhoai/manifests/operators/operators.yaml
@@ -32,4 +32,29 @@ spec:
   channel: stable
   name: rhods-operator
   source: redhat-operators
-  sourceNamespace: openshift-marketplace
+  sourceNamespace: openshift-marketplace
+---
+# NVIDIA GPU Operator Subscription
+apiVersion: operators.coreos.com/v1alpha1
+kind: Subscription
+metadata:
+  name: gpu-operator-certified
+  namespace: nvidia-gpu-operator
+spec:
+  channel: stable
+  installPlanApproval: Automatic
+  name: gpu-operator-certified
+  source: certified-operators
+  sourceNamespace: openshift-marketplace
+---
+# Node Feature Discovery (NFD) Operator Subscription
+apiVersion: operators.coreos.com/v1alpha1
+kind: Subscription
+metadata:
+  name: nfd
+  namespace: openshift-nfd
+spec:
+  channel: stable
+  name: nfd
+  source: redhat-operators
+  sourceNamespace: openshift-marketplace
diff --git a/...oai/manifests/vllm/inference-service.yaml → ...ests/vllm/vllm-inference-service-cpu.yaml b/...oai/manifests/vllm/inference-service.yaml → ...ests/vllm/vllm-inference-service-cpu.yaml
diff --git a/tests/e2e-prow/rhoai/manifests/vllm/vllm-inference-service-gpu.yaml b/tests/e2e-prow/rhoai/manifests/vllm/vllm-inference-service-gpu.yaml
@@ -0,0 +1,25 @@
+apiVersion: serving.kserve.io/v1beta1
+kind: InferenceService
+metadata:
+  name: vllm-model
+  namespace: e2e-rhoai-dsc
+  annotations:
+    serving.kserve.io/deploymentMode: RawDeployment
+    sidecar.istio.io/inject: "false"
+spec:
+  predictor:
+    minReplicas: 1
+    maxReplicas: 1
+    model:
+      modelFormat:
+        name: vLLM
+      runtime: vllm-gpu
+      resources:
+        limits:
+          nvidia.com/gpu: 1
+          cpu: "4"
+          memory: 20Gi
+        requests:
+          nvidia.com/gpu: 1
+          cpu: "2"
+          memory: 16Gi
diff --git a/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-gpu.yaml b/tests/e2e-prow/rhoai/manifests/vllm/vllm-runtime-gpu.yaml
@@ -0,0 +1,82 @@
+apiVersion: serving.kserve.io/v1alpha1
+kind: ServingRuntime
+metadata:
+  annotations:
+    openshift.io/display-name: vLLM GPU
+  name: vllm-gpu
+  namespace: e2e-rhoai-dsc
+  labels:
+    opendatahub.io/dashboard: "true"
+spec:
+  builtInAdapter:
+    modelLoadingTimeoutMillis: 90000
+  containers:
+    - args:
+        - --model
+        - meta-llama/Llama-3.2-1B-Instruct
+        - --enable-auto-tool-choice
+        - --tool-call-parser 
+        - llama3_json
+        - --chat-template 
+        - /mnt/chat-template/tool_chat_template_llama3.2_json.jinja
-        - --enable-auto-tool-choice
-        - --tool-call-parser 
-        - llama3_json
-        - --chat-template 
-        - /mnt/chat-template/tool_chat_template_llama3.2_json.jinja
+        - --enable-auto-tool-choice
+        - --tool-call-parser
+        - llama3_json
+        - --chat-template
+        - /mnt/chat-template/tool_chat_template_llama3.2_json.jinja
-        - --enable-auto-tool-choice
-        - --tool-call-parser 
-        - llama3_json
-        - --chat-template 
-        - /mnt/chat-template/tool_chat_template_llama3.2_json.jinja
+        - --enable-auto-tool-choice
+        - --tool-call-parser
+        - llama3_json
+        - --chat-template
+        - /mnt/chat-template/tool_chat_template_llama3.2_json.jinja
+        - --download-dir
+        - /tmp/models-cache
+        - --port
+        - "8080"
+        - --max-model-len
+        - "2048"
+        - --gpu-memory-utilization
+        - "0.9"
+      image: ${VLLM_IMAGE}
+      name: kserve-container
+      env:
+        - name: HF_HUB_OFFLINE
+          value: "false"
+        - name: TRANSFORMERS_OFFLINE
+          value: "false"
+        - name: HF_DATASETS_OFFLINE
+          value: "false"
+        - name: HF_HOME
+          value: /mnt/models-cache/hf_home
+        - name: HUGGING_FACE_HUB_TOKEN
+          valueFrom:
+            secretKeyRef:
+              name: hf-token-secret
+              key: token
+        - name: VLLM_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: vllm-api-key-secret
+              key: key
+      ports:
+        - containerPort: 8080
+          name: http1
+          protocol: TCP
+      volumeMounts:
+        - name: chat-template
+          mountPath: /mnt/chat-template
+        - name: models-cache
+          mountPath: /mnt/models-cache
+        - name: vllm-cache
+          mountPath: /.cache
+      resources:
+        limits:
+          nvidia.com/gpu: 1
+          cpu: "6"
+          memory: 20Gi
+        requests:
+          nvidia.com/gpu: 1
+          cpu: "4"
+          memory: 16Gi
+  volumes:
+    - name: chat-template
+      configMap:
+        name: vllm-chat-template
+    - name: models-cache
+      emptyDir: {}
+    - name: vllm-cache
+      emptyDir: {}
+  multiModel: false
+  supportedModelFormats:
+    - autoSelect: true
+      name: vLLM
diff --git a/tests/e2e-prow/rhoai/pipeline-services.sh b/tests/e2e-prow/rhoai/pipeline-services.sh
@@ -2,10 +2,10 @@
 
 BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
-oc apply -f "$BASE_DIR/manifests/lightspeed/llama-stack.yaml"
+envsubst < "$BASE_DIR/manifests/lightspeed/llama-stack.yaml" | oc apply -f -
 
 oc wait pod/llama-stack-service \
--n e2e-rhoai-dsc --for=condition=Ready --timeout=300s
+-n e2e-rhoai-dsc --for=condition=Ready --timeout=600s
 
 # Get url address of llama-stack pod
 oc label pod llama-stack-service pod=llama-stack-service -n e2e-rhoai-dsc

diff --git a/tests/e2e-prow/rhoai/pipeline-vllm.sh b/tests/e2e-prow/rhoai/pipeline-vllm.sh
@@ -1,7 +1,11 @@
 #!/bin/bash
 
+set -euo pipefail
+
 PIPELINE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
 "$PIPELINE_DIR/scripts/bootstrap.sh" "$PIPELINE_DIR"
+"$PIPELINE_DIR/scripts/gpu-setup.sh" "$PIPELINE_DIR"
+source "$PIPELINE_DIR/scripts/fetch-vllm-image.sh"
 "$PIPELINE_DIR/scripts/deploy-vllm.sh" "$PIPELINE_DIR"
 "$PIPELINE_DIR/scripts/get-vllm-pod-info.sh"