diff --git a/Makefile b/Makefile index 0d173c29b..2dd9fc58a 100644 --- a/Makefile +++ b/Makefile @@ -109,11 +109,11 @@ upload-distribution-archives: ## Upload distribution archives into Python regist uv run python -m twine upload --repository ${PYTHON_REGISTRY} dist/* konflux-requirements: ## generate hermetic requirements.*.txt file for konflux build - uv pip compile pyproject.toml -o requirements.x86_64.txt --generate-hashes --group llslibdev --python-platform x86_64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 - uv pip compile pyproject.toml -o requirements.aarch64.txt --generate-hashes --group llslibdev --python-platform aarch64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 + uv pip compile pyproject.toml -o requirements.x86_64.txt --generate-hashes --group llslibdev --python-platform x86_64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 --refresh + uv pip compile pyproject.toml -o requirements.aarch64.txt --generate-hashes --group llslibdev --python-platform aarch64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 --refresh ./scripts/remove_torch_deps.sh requirements.x86_64.txt ./scripts/remove_torch_deps.sh requirements.aarch64.txt - echo "torch==${TORCH_VERSION}" | uv pip compile - -o requirements.torch.txt --generate-hashes --python-version 3.12 --torch-backend cpu --emit-index-url --no-deps --index-url https://download.pytorch.org/whl/cpu + echo "torch==${TORCH_VERSION}" | uv pip compile - -o requirements.torch.txt --generate-hashes --python-version 3.12 --torch-backend cpu --emit-index-url --no-deps --index-url https://download.pytorch.org/whl/cpu --refresh help: ## Show this help screen @echo 'Usage: make ... ' diff --git a/pyproject.toml b/pyproject.toml index 62f5ab0e3..180b55ea5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,8 +28,8 @@ dependencies = [ # Used by authentication/k8s integration "kubernetes>=30.1.0", # Used to call Llama Stack APIs - "llama-stack==0.3.4", - "llama-stack-client==0.3.4", + "llama-stack==0.3.5", + "llama-stack-client==0.3.5", # Used by Logger "rich>=14.0.0", # Used by JWK token auth handler diff --git a/requirements.aarch64.txt b/requirements.aarch64.txt index 2378fd5b2..3a6d03c41 100644 --- a/requirements.aarch64.txt +++ b/requirements.aarch64.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv pip compile pyproject.toml -o requirements.aarch64.txt --generate-hashes --group llslibdev --python-platform aarch64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 +# uv pip compile pyproject.toml -o requirements.aarch64.txt --generate-hashes --group llslibdev --python-platform aarch64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 --refresh accelerate==1.12.0 \ --hash=sha256:3e2091cd341423207e2f084a6654b1efcd250dc326f2a37d6dde446e07cabb11 \ --hash=sha256:70988c352feb481887077d2ab845125024b2a137a5090d6d7a32b57d03a45df6 @@ -1393,13 +1393,13 @@ litellm==1.80.10 \ --hash=sha256:4a4aff7558945c2f7e5c6523e67c1b5525a46b10b0e1ad6b8f847cb13b16779e \ --hash=sha256:9b3e561efaba0eb1291cb1555d3dcb7283cf7f3cb65aadbcdb42e2a8765898c8 # via lightspeed-stack (pyproject.toml) -llama-stack==0.3.4 \ - --hash=sha256:3e302db1efb2ed6c974526b8c6b04b9e54891f3959d0d83c004f77e1c21f6147 \ - --hash=sha256:bdb489e4341559465d604c9eba554460ab0d17c5dc005ee2d40aa892b94e2e9b +llama-stack==0.3.5 \ + --hash=sha256:4a0ce8014b17d14a06858251736f1170f12580fafc519daf75ee1df6c4fbf64b \ + --hash=sha256:93097409c65108e429fc3dda2f246ef4e8d0b07314a32865e941680e537ec366 # via lightspeed-stack (pyproject.toml) -llama-stack-client==0.3.4 \ - --hash=sha256:6afbd10b152911a044e8d038e58981425ce0a34510da3e31cdd3103516e27688 \ - --hash=sha256:949c0a6c9a1c925a2b0d930d85b6485bb8d264ba68d02f36aca3c2539cb7b893 +llama-stack-client==0.3.5 \ + --hash=sha256:2d954429347e920038709ae3e026c06f336ce570bd41245fc4e1e54c78879485 \ + --hash=sha256:b98acdc660d60839da8b71d5ae59531ba7f059e3e9656ca5ca20edca70f7d6a2 # via # lightspeed-stack (pyproject.toml) # llama-stack diff --git a/requirements.torch.txt b/requirements.torch.txt index c81da137f..404bf76ce 100644 --- a/requirements.torch.txt +++ b/requirements.torch.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv pip compile - -o requirements.torch.txt --generate-hashes --python-version 3.12 --torch-backend cpu --emit-index-url --no-deps --index-url https://download.pytorch.org/whl/cpu +# uv pip compile - -o requirements.torch.txt --generate-hashes --python-version 3.12 --torch-backend cpu --emit-index-url --no-deps --index-url https://download.pytorch.org/whl/cpu --refresh --index-url https://download.pytorch.org/whl/cpu torch==2.7.1+cpu \ diff --git a/requirements.x86_64.txt b/requirements.x86_64.txt index 1cc052aef..de18dc328 100644 --- a/requirements.x86_64.txt +++ b/requirements.x86_64.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv pip compile pyproject.toml -o requirements.x86_64.txt --generate-hashes --group llslibdev --python-platform x86_64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 +# uv pip compile pyproject.toml -o requirements.x86_64.txt --generate-hashes --group llslibdev --python-platform x86_64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 --refresh accelerate==1.12.0 \ --hash=sha256:3e2091cd341423207e2f084a6654b1efcd250dc326f2a37d6dde446e07cabb11 \ --hash=sha256:70988c352feb481887077d2ab845125024b2a137a5090d6d7a32b57d03a45df6 @@ -1393,13 +1393,13 @@ litellm==1.80.10 \ --hash=sha256:4a4aff7558945c2f7e5c6523e67c1b5525a46b10b0e1ad6b8f847cb13b16779e \ --hash=sha256:9b3e561efaba0eb1291cb1555d3dcb7283cf7f3cb65aadbcdb42e2a8765898c8 # via lightspeed-stack (pyproject.toml) -llama-stack==0.3.4 \ - --hash=sha256:3e302db1efb2ed6c974526b8c6b04b9e54891f3959d0d83c004f77e1c21f6147 \ - --hash=sha256:bdb489e4341559465d604c9eba554460ab0d17c5dc005ee2d40aa892b94e2e9b +llama-stack==0.3.5 \ + --hash=sha256:4a0ce8014b17d14a06858251736f1170f12580fafc519daf75ee1df6c4fbf64b \ + --hash=sha256:93097409c65108e429fc3dda2f246ef4e8d0b07314a32865e941680e537ec366 # via lightspeed-stack (pyproject.toml) -llama-stack-client==0.3.4 \ - --hash=sha256:6afbd10b152911a044e8d038e58981425ce0a34510da3e31cdd3103516e27688 \ - --hash=sha256:949c0a6c9a1c925a2b0d930d85b6485bb8d264ba68d02f36aca3c2539cb7b893 +llama-stack-client==0.3.5 \ + --hash=sha256:2d954429347e920038709ae3e026c06f336ce570bd41245fc4e1e54c78879485 \ + --hash=sha256:b98acdc660d60839da8b71d5ae59531ba7f059e3e9656ca5ca20edca70f7d6a2 # via # lightspeed-stack (pyproject.toml) # llama-stack diff --git a/src/constants.py b/src/constants.py index 7829f9245..959dccd9a 100644 --- a/src/constants.py +++ b/src/constants.py @@ -2,7 +2,7 @@ # Minimal and maximal supported Llama Stack version MINIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.2.17" -MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.3.4" +MAXIMAL_SUPPORTED_LLAMA_STACK_VERSION = "0.3.5" UNABLE_TO_PROCESS_RESPONSE = "Unable to process this request" diff --git a/tests/e2e/configuration/lightspeed-stack-library-mode.yaml b/tests/e2e/configuration/lightspeed-stack-library-mode.yaml new file mode 100644 index 000000000..47257bfb1 --- /dev/null +++ b/tests/e2e/configuration/lightspeed-stack-library-mode.yaml @@ -0,0 +1,19 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Library mode - embeds llama-stack as library + use_as_library_client: true + library_client_config_path: run.yaml +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" diff --git a/tests/e2e/configuration/lightspeed-stack-server-mode.yaml b/tests/e2e/configuration/lightspeed-stack-server-mode.yaml new file mode 100644 index 000000000..cc699ba89 --- /dev/null +++ b/tests/e2e/configuration/lightspeed-stack-server-mode.yaml @@ -0,0 +1,20 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Server mode - connects to separate llama-stack service + use_as_library_client: false + url: http://llama-stack:8321 + api_key: xyzzy +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" diff --git a/tests/e2e/features/info.feature b/tests/e2e/features/info.feature index 378456899..df7e30ded 100644 --- a/tests/e2e/features/info.feature +++ b/tests/e2e/features/info.feature @@ -16,7 +16,7 @@ Feature: Info tests When I access REST API endpoint "info" using HTTP GET method Then The status code of the response is 200 And The body of the response has proper name Lightspeed Core Service (LCS) and version 0.3.1 - And The body of the response has llama-stack version 0.3.4 + And The body of the response has llama-stack version 0.3.5 @skip-in-library-mode Scenario: Check if info endpoint reports error when llama-stack connection is not working diff --git a/uv.lock b/uv.lock index 5ce364d0c..1b559c337 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.12, <3.14" resolution-markers = [ "python_full_version >= '3.13' and sys_platform != 'darwin'", @@ -1433,8 +1433,8 @@ requires-dist = [ { name = "jsonpath-ng", specifier = ">=1.6.1" }, { name = "kubernetes", specifier = ">=30.1.0" }, { name = "litellm", specifier = ">=1.75.5.post1" }, - { name = "llama-stack", specifier = "==0.3.4" }, - { name = "llama-stack-client", specifier = "==0.3.4" }, + { name = "llama-stack", specifier = "==0.3.5" }, + { name = "llama-stack-client", specifier = "==0.3.5" }, { name = "openai", specifier = ">=1.99.9" }, { name = "prometheus-client", specifier = ">=0.22.1" }, { name = "psycopg2-binary", specifier = ">=2.9.10" }, @@ -1530,7 +1530,7 @@ wheels = [ [[package]] name = "llama-stack" -version = "0.3.4" +version = "0.3.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -1559,14 +1559,14 @@ dependencies = [ { name = "tiktoken" }, { name = "uvicorn" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8f/c5/ade666e8ce894066c0358988e831b31c81840e7b285aa8b5f70236e33681/llama_stack-0.3.4.tar.gz", hash = "sha256:bdb489e4341559465d604c9eba554460ab0d17c5dc005ee2d40aa892b94e2e9b", size = 3322494, upload-time = "2025-12-03T19:00:18.397Z" } +sdist = { url = "https://files.pythonhosted.org/packages/af/68/967f95e5fe3a650b9bb6a18c4beeb39e734695d92f1ab1525c5b9bfadb1b/llama_stack-0.3.5.tar.gz", hash = "sha256:4a0ce8014b17d14a06858251736f1170f12580fafc519daf75ee1df6c4fbf64b", size = 3320526, upload-time = "2025-12-15T14:34:32.96Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/49/14/c98e5b564b425e4fc7aabf33f4bf9f40c43057424a555f023bcd8e334874/llama_stack-0.3.4-py3-none-any.whl", hash = "sha256:3e302db1efb2ed6c974526b8c6b04b9e54891f3959d0d83c004f77e1c21f6147", size = 3637817, upload-time = "2025-12-03T19:00:16.581Z" }, + { url = "https://files.pythonhosted.org/packages/24/70/fb1896f07fc38a94b4c0bfb5999872d1514c6b3259fe77358cadef77a3db/llama_stack-0.3.5-py3-none-any.whl", hash = "sha256:93097409c65108e429fc3dda2f246ef4e8d0b07314a32865e941680e537ec366", size = 3636815, upload-time = "2025-12-15T14:34:31.354Z" }, ] [[package]] name = "llama-stack-client" -version = "0.3.4" +version = "0.3.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1585,9 +1585,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6a/10/9c198c62e720c647a01506f40ba4e058a5b2a23c947fab1827eb096a94f2/llama_stack_client-0.3.4.tar.gz", hash = "sha256:6afbd10b152911a044e8d038e58981425ce0a34510da3e31cdd3103516e27688", size = 335668, upload-time = "2025-12-03T18:59:25.48Z" } +sdist = { url = "https://files.pythonhosted.org/packages/34/ff/b4bb891249379849e6e273a6254998c7e08562613ca4020817af2da9498e/llama_stack_client-0.3.5.tar.gz", hash = "sha256:2d954429347e920038709ae3e026c06f336ce570bd41245fc4e1e54c78879485", size = 335659, upload-time = "2025-12-15T14:10:16.444Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/b9/bcc815cee68ef87635edf72f9454dd35cef8492d2670f5a6b229b5913f0b/llama_stack_client-0.3.4-py3-none-any.whl", hash = "sha256:949c0a6c9a1c925a2b0d930d85b6485bb8d264ba68d02f36aca3c2539cb7b893", size = 425244, upload-time = "2025-12-03T18:59:24.293Z" }, + { url = "https://files.pythonhosted.org/packages/4d/10/84a4f0ef1cc13f44a692e55bed6a55792671e5320c95a8fd581e02848d61/llama_stack_client-0.3.5-py3-none-any.whl", hash = "sha256:b98acdc660d60839da8b71d5ae59531ba7f059e3e9656ca5ca20edca70f7d6a2", size = 425244, upload-time = "2025-12-15T14:10:14.726Z" }, ] [[package]]