diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml index 4733d5d6c..429ed4555 100644 --- a/docker-compose-library.yaml +++ b/docker-compose-library.yaml @@ -13,6 +13,7 @@ services: - ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:Z - ./run.yaml:/app-root/run.yaml:Z - ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro + - ./tests/e2e/rag:/opt/app-root/src/.llama/storage/rag:Z environment: - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-} - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-} diff --git a/docker-compose.yaml b/docker-compose.yaml index 3b00c3815..77575841b 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -11,6 +11,7 @@ services: volumes: - ./run.yaml:/opt/app-root/run.yaml:Z - ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro + - ./tests/e2e/rag:/opt/app-root/src/.llama/storage/rag:Z environment: - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-} - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-} diff --git a/docs/providers.md b/docs/providers.md index 32f320dcb..3874f60ff 100644 --- a/docs/providers.md +++ b/docs/providers.md @@ -100,7 +100,7 @@ Red Hat providers: | Name | Type | Pip Dependencies | Supported in LCS | |---|---|---|:---:| | code-scanner | inline | `codeshield` | ❌ | -| llama-guard | inline | — | ✅ | +| llama-guard | inline | — | ❌ | | prompt-guard | inline | `transformers[accelerate]`, `torch --index-url https://download.pytorch.org/whl/cpu` | ❌ | | bedrock | remote | `boto3` | ❌ | | nvidia | remote | `requests` | ❌ | @@ -157,7 +157,7 @@ Red Hat providers: | Name | Type | Pip Dependencies | Supported in LCS | |---|---|---|:---:| -| rag-runtime | inline | `chardet`,`pypdf`, `tqdm`, `numpy`, `scikit-learn`, `scipy`, `nltk`, `sentencepiece`, `transformers` | ❌ | +| rag-runtime | inline | `chardet`,`pypdf`, `tqdm`, `numpy`, `scikit-learn`, `scipy`, `nltk`, `sentencepiece`, `transformers` | ✅ | | bing-search | remote | `requests` | ❌ | | brave-search | remote | `requests` | ❌ | | model-context-protocol | remote | `mcp>=1.8.1` | ✅ | diff --git a/examples/azure-run.yaml b/examples/azure-run.yaml index a50301add..5a14b99aa 100644 --- a/examples/azure-run.yaml +++ b/examples/azure-run.yaml @@ -1,128 +1,150 @@ -version: '2' -image_name: minimal-viable-llama-stack-configuration +version: 2 +image_name: azure-configuration apis: - - agents - - datasetio - - eval - - files - - inference - - post_training - - safety - - scoring - - telemetry - - tool_runtime - - vector_io +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- tool_runtime +- vector_io + benchmarks: [] -container_image: null +conversations_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db} + type: sqlite datasets: [] -external_providers_dir: null +# external_providers_dir: /opt/app-root/src/.llama/providers.d inference_store: - db_path: .llama/distributions/ollama/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db} type: sqlite -logging: null metadata_store: - db_path: .llama/distributions/ollama/registry.db - namespace: null + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db} type: sqlite + providers: + inference: + - provider_id: azure + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY} + api_base: https://ols-test.openai.azure.com/ + api_version: 2024-02-15-preview + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers files: - - provider_id: localfs - provider_type: inline::localfs - config: - storage_dir: /tmp/llama-stack-files + - config: metadata_store: - type: sqlite - db_path: .llama/distributions/ollama/files_metadata.db + table_name: files_metadata + backend: sql_default + storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files} + provider_id: meta-reference-files + provider_type: inline::localfs + safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + # - config: + # excluded_categories: [] + # provider_id: llama-guard + # provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - - provider_id: meta-reference + - config: + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default + provider_id: meta-reference provider_type: inline::meta-reference - config: - persistence_store: - db_path: .llama/distributions/ollama/agents_store.db - namespace: null - type: sqlite - responses_store: - db_path: .llama/distributions/ollama/responses_store.db - type: sqlite + batches: + - config: + kvstore: + namespace: batches_store + backend: kv_default + provider_id: reference + provider_type: inline::reference datasetio: - - provider_id: huggingface + - config: + kvstore: + namespace: huggingface_datasetio + backend: kv_default + provider_id: huggingface provider_type: remote::huggingface - config: + - config: kvstore: - db_path: .llama/distributions/ollama/huggingface_datasetio.db - namespace: null - type: sqlite - - provider_id: localfs + namespace: localfs_datasetio + backend: kv_default + provider_id: localfs provider_type: inline::localfs - config: - kvstore: - db_path: .llama/distributions/ollama/localfs_datasetio.db - namespace: null - type: sqlite eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: + - config: kvstore: - db_path: .llama/distributions/ollama/meta_reference_eval.db - namespace: null - type: sqlite - inference: - - provider_id: azure - provider_type: remote::azure - config: - api_key: ${env.AZURE_API_KEY} - api_base: https://ols-test.openai.azure.com/ - api_version: 2024-02-15-preview - api_type: ${env.AZURE_API_TYPE:=} - post_training: - - provider_id: huggingface - provider_type: inline::huggingface-gpu - config: - checkpoint_format: huggingface - device: cpu - distributed_backend: null - dpo_output_dir: "." - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: '********' - telemetry: - - provider_id: meta-reference + namespace: eval_store + backend: kv_default + provider_id: meta-reference provider_type: inline::meta-reference - config: - service_name: 'lightspeed-stack-telemetry' - sinks: sqlite - sqlite_db_path: .llama/distributions/ollama/trace_store.db - tool_runtime: - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} scoring_fns: [] server: - auth: null - host: null port: 8321 - quota: null - tls_cafile: null - tls_certfile: null - tls_keyfile: null -shields: [] -models: +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db} + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: - model_id: gpt-4o-mini - model_type: llm provider_id: azure - provider_model_id: gpt-4o-mini \ No newline at end of file + model_type: llm + provider_model_id: gpt-4o-mini + shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 \ No newline at end of file diff --git a/examples/gemini-run.yaml b/examples/gemini-run.yaml deleted file mode 100644 index 91edfb5dc..000000000 --- a/examples/gemini-run.yaml +++ /dev/null @@ -1,112 +0,0 @@ -# Example llama-stack configuration for Google Gemini inference -# -# Contributed by @eranco74 (2025-08). See https://github.com/rh-ecosystem-edge/assisted-chat/blob/main/template.yaml#L282-L386 -# This file shows how to integrate Gemini with LCS. -# -# Notes: -# - You will need valid Gemini API credentials to run this. -# - You will need a postgres instance to run this config. -# -version: 2 -image_name: gemini-config -apis: -- agents -- datasetio -- eval -- files -- inference -- safety -- scoring -- telemetry -- tool_runtime -- vector_io -providers: - inference: - - provider_id: ${LLAMA_STACK_INFERENCE_PROVIDER} - provider_type: remote::gemini - config: - api_key: ${env.GEMINI_API_KEY} - vector_io: [] - files: [] - safety: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: postgres - host: ${env.LLAMA_STACK_POSTGRES_HOST} - port: ${env.LLAMA_STACK_POSTGRES_PORT} - db: ${env.LLAMA_STACK_POSTGRES_NAME} - user: ${env.LLAMA_STACK_POSTGRES_USER} - password: ${env.LLAMA_STACK_POSTGRES_PASSWORD} - responses_store: - type: postgres - host: ${env.LLAMA_STACK_POSTGRES_HOST} - port: ${env.LLAMA_STACK_POSTGRES_PORT} - db: ${env.LLAMA_STACK_POSTGRES_NAME} - user: ${env.LLAMA_STACK_POSTGRES_USER} - password: ${env.LLAMA_STACK_POSTGRES_PASSWORD} - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${LLAMA_STACK_OTEL_SERVICE_NAME}" - sinks: ${LLAMA_STACK_TELEMETRY_SINKS} - sqlite_db_path: ${STORAGE_MOUNT_PATH}/sqlite/trace_store.db - eval: [] - datasetio: [] - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} -metadata_store: - type: sqlite - db_path: ${STORAGE_MOUNT_PATH}/sqlite/registry.db -inference_store: - type: postgres - host: ${env.LLAMA_STACK_POSTGRES_HOST} - port: ${env.LLAMA_STACK_POSTGRES_PORT} - db: ${env.LLAMA_STACK_POSTGRES_NAME} - user: ${env.LLAMA_STACK_POSTGRES_USER} - password: ${env.LLAMA_STACK_POSTGRES_PASSWORD} -models: -- metadata: {} - model_id: ${LLAMA_STACK_2_0_FLASH_MODEL} - provider_id: ${LLAMA_STACK_INFERENCE_PROVIDER} - provider_model_id: ${LLAMA_STACK_2_0_FLASH_MODEL} - model_type: llm -- metadata: {} - model_id: ${LLAMA_STACK_2_5_PRO_MODEL} - provider_id: ${LLAMA_STACK_INFERENCE_PROVIDER} - provider_model_id: ${LLAMA_STACK_2_5_PRO_MODEL} - model_type: llm -- metadata: {} - model_id: ${LLAMA_STACK_2_5_FLASH_MODEL} - provider_id: ${LLAMA_STACK_INFERENCE_PROVIDER} - provider_model_id: ${LLAMA_STACK_2_5_FLASH_MODEL} - model_type: llm -shields: [] -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::rag - provider_id: rag-runtime -- toolgroup_id: mcp::assisted - provider_id: model-context-protocol - mcp_endpoint: - uri: "${MCP_SERVER_URL}" -server: - port: ${LLAMA_STACK_SERVER_PORT} diff --git a/examples/openai-faiss-run.yaml b/examples/openai-faiss-run.yaml deleted file mode 100644 index 4068dea86..000000000 --- a/examples/openai-faiss-run.yaml +++ /dev/null @@ -1,83 +0,0 @@ -# Example llama-stack configuration for OpenAI inference + FAISS (RAG) -# -# Notes: -# - You will need an OpenAI API key -# - You can generate the vector index with the rag-content tool (https://github.com/lightspeed-core/rag-content) -# -version: 2 -image_name: openai-faiss-config - -apis: -- agents -- inference -- vector_io -- tool_runtime -- safety - -models: -- model_id: gpt-test - provider_id: openai # This ID is a reference to 'providers.inference' - model_type: llm - provider_model_id: gpt-4o-mini - -- model_id: sentence-transformers/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - model_type: embedding - provider_id: sentence-transformers # This ID is a reference to 'providers.inference' - provider_model_id: /home/USER/lightspeed-stack/embedding_models/all-mpnet-base-v2 - -providers: - inference: - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - db_path: .llama/distributions/ollama/agents_store.db - responses_store: - type: sqlite - db_path: .llama/distributions/ollama/responses_store.db - - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - - vector_io: - - provider_id: ocp-docs - provider_type: inline::faiss - config: - kvstore: - type: sqlite - db_path: /home/USER/lightspeed-stack/vector_dbs/ocp_docs/faiss_store.db - namespace: null - - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - -# Enable the RAG tool -tool_groups: -- provider_id: rag-runtime - toolgroup_id: builtin::rag - args: null - mcp_endpoint: null - -vector_dbs: -- embedding_dimension: 768 - embedding_model: sentence-transformers/all-mpnet-base-v2 - provider_id: ocp-docs # This ID is a reference to 'providers.vector_io' - vector_db_id: openshift-index # This ID was defined during index generation \ No newline at end of file diff --git a/examples/openai-pgvector-run.yaml b/examples/openai-pgvector-run.yaml deleted file mode 100644 index a8e1da345..000000000 --- a/examples/openai-pgvector-run.yaml +++ /dev/null @@ -1,87 +0,0 @@ -# Example llama-stack configuration for OpenAI inference + PSQL (pgvector) vector index (RAG) -# -# Notes: -# - You will need an OpenAI API key -# - You will need to setup PSQL with pgvector -# - The table schema must follow the expected schema in llama-stack (see rag_guide.md) -# -version: 2 -image_name: openai-pgvector-config - -apis: -- agents -- inference -- vector_io -- tool_runtime -- safety - -models: -- model_id: gpt-test - provider_id: openai - model_type: llm - provider_model_id: gpt-4o-mini -- model_id: sentence-transformers/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - model_type: embedding - provider_id: sentence-transformers - provider_model_id: /home/USER/lightspeed-stack/embedding_models/all-mpnet-base-v2 - -providers: - inference: - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - db_path: .llama/distributions/ollama/agents_store.db - responses_store: - type: sqlite - db_path: .llama/distributions/ollama/responses_store.db - - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - - vector_io: - - provider_id: pgvector-example - provider_type: remote::pgvector - config: - host: localhost - port: 5432 - db: pgvector_example # PostgreSQL database (psql -d pgvector_example) - user: lightspeed # PostgreSQL user - password: empty - kvstore: - type: sqlite - db_path: .llama/distributions/pgvector/pgvector_registry.db - - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - -tool_groups: -- provider_id: rag-runtime - toolgroup_id: builtin::rag - args: null - mcp_endpoint: null - -vector_dbs: -- embedding_dimension: 768 - embedding_model: sentence-transformers/all-mpnet-base-v2 - provider_id: pgvector-example - # A unique ID that becomes the PostgreSQL table name, prefixed with 'vector_store_'. - # e.g., 'rhdocs' will create the table 'vector_store_rhdocs'. - vector_db_id: rhdocs \ No newline at end of file diff --git a/examples/run.yaml b/examples/run.yaml index dcb1b0e67..f01e50121 100644 --- a/examples/run.yaml +++ b/examples/run.yaml @@ -1,126 +1,161 @@ -version: '2' -image_name: minimal-viable-llama-stack-configuration +# Example llama-stack configuration for OpenAI inference + FAISS (RAG) +# +# Notes: +# - You will need an OpenAI API key +# - You can generate the vector index with the rag-content tool (https://github.com/lightspeed-core/rag-content) +# +version: 2 apis: - - agents - - datasetio - - eval - - inference - - post_training - - safety - - scoring - - telemetry - - tool_runtime - - vector_io +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io + benchmarks: [] -container_image: null +conversations_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db} + type: sqlite datasets: [] -external_providers_dir: null +image_name: starter +# external_providers_dir: /opt/app-root/src/.llama/providers.d inference_store: - db_path: .llama/distributions/ollama/inference_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db} type: sqlite -logging: null metadata_store: - db_path: .llama/distributions/ollama/registry.db - namespace: null + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db} type: sqlite + providers: + inference: + - provider_id: openai # This ID is a reference to 'providers.inference' + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + allowed_models: ["${env.E2E_OPENAI_MODEL:=gpt-4o-mini}"] + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files} + provider_id: meta-reference-files + provider_type: inline::localfs + safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + # - config: + # excluded_categories: [] + # provider_id: llama-guard + # provider_type: inline::llama-guard + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: '********' + tool_runtime: + - config: {} # Enable the RAG tool + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: # Define the storage backend for RAG + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - config: - persistence_store: - db_path: .llama/distributions/ollama/agents_store.db - namespace: null - type: sqlite - responses_store: - db_path: .llama/distributions/ollama/responses_store.db - type: sqlite + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default provider_id: meta-reference provider_type: inline::meta-reference + batches: + - config: + kvstore: + namespace: batches_store + backend: kv_default + provider_id: reference + provider_type: inline::reference datasetio: - config: kvstore: - db_path: .llama/distributions/ollama/huggingface_datasetio.db - namespace: null - type: sqlite + namespace: huggingface_datasetio + backend: kv_default provider_id: huggingface provider_type: remote::huggingface - config: kvstore: - db_path: .llama/distributions/ollama/localfs_datasetio.db - namespace: null - type: sqlite + namespace: localfs_datasetio + backend: kv_default provider_id: localfs provider_type: inline::localfs eval: - config: kvstore: - db_path: .llama/distributions/ollama/meta_reference_eval.db - namespace: null - type: sqlite + namespace: eval_store + backend: kv_default provider_id: meta-reference provider_type: inline::meta-reference - inference: - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - post_training: - - config: - checkpoint_format: huggingface - device: cpu - distributed_backend: null - dpo_output_dir: .llama/distributions/ollama - provider_id: huggingface - provider_type: inline::huggingface-gpu - safety: - - config: - excluded_categories: [] - provider_id: llama-guard - provider_type: inline::llama-guard - scoring: - - config: {} - provider_id: basic - provider_type: inline::basic - - config: {} - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - config: - openai_api_key: '********' - provider_id: braintrust - provider_type: inline::braintrust - telemetry: - - config: - service_name: 'lightspeed-stack' - sinks: sqlite - sqlite_db_path: .llama/distributions/ollama/trace_store.db - provider_id: meta-reference - provider_type: inline::meta-reference - tool_runtime: - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - vector_io: - - config: - kvstore: - db_path: .llama/distributions/ollama/faiss_store.db - namespace: null - type: sqlite - provider_id: faiss - provider_type: inline::faiss scoring_fns: [] +telemetry: + enabled: true server: - auth: null - host: null port: 8321 - quota: null - tls_cafile: null - tls_certfile: null - tls_keyfile: null -shields: [] -vector_dbs: [] - -models: - - model_id: gpt-4-turbo - provider_id: openai - model_type: llm - provider_model_id: gpt-4-turbo +storage: + backends: + kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db} + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag # Register the RAG tool + provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: # Define the default embedding model for RAG + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 \ No newline at end of file diff --git a/examples/vertexai-run.yaml b/examples/vertexai-run.yaml index 37e083b8f..38c631548 100644 --- a/examples/vertexai-run.yaml +++ b/examples/vertexai-run.yaml @@ -1,4 +1,5 @@ version: 2 +image_name: vertexai-configuration apis: - agents @@ -14,19 +15,56 @@ apis: benchmarks: [] conversations_store: - db_path: ~/.llama/storage/conversations.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db} type: sqlite datasets: [] -image_name: starter # external_providers_dir: /opt/app-root/src/.llama/providers.d inference_store: - db_path: ~/.llama/storage/inference-store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db} type: sqlite metadata_store: - db_path: ~/.llama/storage/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db} type: sqlite providers: + inference: + - provider_id: google-vertex + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT} + location: ${env.VERTEX_AI_LOCATION} + allowed_models: ["google/gemini-2.5-flash"] + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files} + provider_id: meta-reference-files + provider_type: inline::localfs + safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + # - config: + # excluded_categories: [] + # provider_id: llama-guard + # provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - config: persistence: @@ -65,46 +103,6 @@ providers: backend: kv_default provider_id: meta-reference provider_type: inline::meta-reference - files: - - config: - metadata_store: - table_name: files_metadata - backend: sql_default - storage_dir: ~/.llama/storage - provider_id: meta-reference-files - provider_type: inline::localfs - inference: - - provider_id: google-vertex - provider_type: remote::vertexai - config: - project: ${env.VERTEX_AI_PROJECT} - location: ${env.VERTEX_AI_LOCATION} - - config: {} - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - safety: - - config: - excluded_categories: [] - provider_id: llama-guard - provider_type: inline::llama-guard - scoring: - - config: {} - provider_id: basic - provider_type: inline::basic - - config: {} - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - tool_runtime: - - config: {} - provider_id: rag-runtime - provider_type: inline::rag-runtime - vector_io: - - config: - persistence: - namespace: faiss_store - backend: kv_default - provider_id: faiss - provider_type: inline::faiss scoring_fns: [] server: port: 8321 @@ -112,10 +110,10 @@ storage: backends: kv_default: type: kv_sqlite - db_path: ~/.llama/storage/kv_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ~/.llama/storage/sql_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -133,7 +131,7 @@ storage: backend: kv_default registered_resources: models: [] - shields: [] + shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls vector_dbs: [] datasets: [] scoring_fns: [] @@ -141,3 +139,8 @@ registered_resources: tool_groups: - toolgroup_id: builtin::rag provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 \ No newline at end of file diff --git a/examples/vllm-granite-run.yaml b/examples/vllm-granite-run.yaml deleted file mode 100644 index 198095ad6..000000000 --- a/examples/vllm-granite-run.yaml +++ /dev/null @@ -1,148 +0,0 @@ -# Example llama-stack configuration for IBM Granite using vLLM (no RAG) - -# -# Contributed by @eranco74 (2025-08). -# -# Notes: -# - You will need to serve Granite on a vLLM instance -# -version: '2' -image_name: vllm-granite-config -apis: -- agents -- datasetio -- eval -- files -- inference -- post_training -- safety -- scoring -- telemetry -- tool_runtime -- vector_io -providers: - inference: - - provider_id: granite - provider_type: remote::vllm - config: - url: ${env.VLLM_URL} - api_token: ${env.VLLM_API_TOKEN:fake} - max_tokens: 10000 - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db - eval: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db - datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db - - provider_id: localfs - provider_type: inline::localfs - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - - provider_id: braintrust - provider_type: inline::braintrust - config: - openai_api_key: ${env.OPENAI_API_KEY:} - files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files} - metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db - post_training: - - provider_id: huggingface - provider_type: inline::huggingface - config: - checkpoint_format: huggingface - distributed_backend: null - device: cpu - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - - provider_id: wolfram-alpha - provider_type: remote::wolfram-alpha - config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db -inference_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: granite - provider_model_id: null -shields: [] -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime -- toolgroup_id: builtin::wolfram_alpha - provider_id: wolfram-alpha -server: - port: 8321 \ No newline at end of file diff --git a/examples/vllm-llama-faiss-run.yaml b/examples/vllm-llama-faiss-run.yaml deleted file mode 100644 index 924577470..000000000 --- a/examples/vllm-llama-faiss-run.yaml +++ /dev/null @@ -1,80 +0,0 @@ -# Example llama-stack configuration for vLLM on RHEL, Meta Llama 3.1 Instruct + FAISS (RAG) -# -# Notes: -# - You will need to serve Llama 3.1 Instruct on a vLLM instance -# -version: 2 -image_name: vllm-llama-faiss-config - -apis: -- agents -- inference -- vector_io -- tool_runtime -- safety - -models: -- model_id: meta-llama/Llama-3.1-8B-Instruct - provider_id: vllm - model_type: llm - provider_model_id: null -- model_id: sentence-transformers/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - model_type: embedding - provider_id: sentence-transformers - provider_model_id: /home/USER/embedding_models/all-mpnet-base-v2 - -providers: - inference: - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - - provider_id: vllm - provider_type: remote::vllm - config: - url: http://localhost:8000/v1/ - api_token: key - - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - db_path: .llama/distributions/ollama/agents_store.db - responses_store: - type: sqlite - db_path: .llama/distributions/ollama/responses_store.db - - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - - vector_io: - - provider_id: rhel-db - provider_type: inline::faiss - config: - kvstore: - type: sqlite - db_path: /home/USER/vector_dbs/rhel_index/faiss_store.db - namespace: null - - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - -tool_groups: -- provider_id: rag-runtime - toolgroup_id: builtin::rag - args: null - mcp_endpoint: null - -vector_dbs: -- embedding_dimension: 768 - embedding_model: sentence-transformers/all-mpnet-base-v2 - provider_id: rhel-db - vector_db_id: rhel-docs \ No newline at end of file diff --git a/examples/vllm-qwen3-run.yaml b/examples/vllm-qwen3-run.yaml deleted file mode 100644 index 9de77f2ec..000000000 --- a/examples/vllm-qwen3-run.yaml +++ /dev/null @@ -1,108 +0,0 @@ -# Example llama-stack configuration for Alibaba Qwen3 using vLLM (no RAG) - -# -# Contributed by @eranco74 (2025-08). -# -# Notes: -# - You will need to serve Qwen3 on a vLLM instance -# -version: 2 -image_name: vllm-qwen3-config -apis: -- agents -- datasetio -- eval -- files -- inference -- safety -- scoring -- telemetry -- tool_runtime -- vector_io -providers: - inference: - - provider_id: qwen - provider_type: remote::vllm - config: - url: https://qwen3.rosa.openshiftapps.com/v1 - max_tokens: 32768 - api_token: - tls_verify: true - vector_io: [] - files: [] - safety: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=user} - password: ${env.POSTGRES_PASSWORD:=password} - responses_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=user} - password: ${env.POSTGRES_PASSWORD:=password} - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" - sinks: ${env.TELEMETRY_SINKS:=console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:=/tmp/.llama/distributions/starter}/trace_store.db - eval: [] - datasetio: [] - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - config: {} - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} -metadata_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=user} - password: ${env.POSTGRES_PASSWORD:=password} - table_name: llamastack_kvstore -inference_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=user} - password: ${env.POSTGRES_PASSWORD:=password} -models: -- metadata: {} - model_id: qwen3-32b-maas - provider_id: qwen - provider_model_id: null -shields: [] -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::rag - provider_id: rag-runtime -- toolgroup_id: mcp::assisted - provider_id: model-context-protocol - mcp_endpoint: - uri: "http://assisted-service-mcp:8000/sse" -server: - port: 8321 \ No newline at end of file diff --git a/examples/vllm-rhaiis-run.yaml b/examples/vllm-rhaiis-run.yaml new file mode 100644 index 000000000..604c4dade --- /dev/null +++ b/examples/vllm-rhaiis-run.yaml @@ -0,0 +1,151 @@ +version: 2 +image_name: rhaiis-configuration + +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- tool_runtime +- vector_io + +benchmarks: [] +conversations_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db} + type: sqlite +datasets: [] +# external_providers_dir: /opt/app-root/src/.llama/providers.d +inference_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db} + type: sqlite +metadata_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db} + type: sqlite + +providers: + inference: + - provider_id: vllm + provider_type: remote::vllm + config: + url: http://${env.RHAIIS_URL}:8000/v1/ + api_token: ${env.RHAIIS_API_KEY} + tls_verify: false + max_tokens: 2048 + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files} + provider_id: meta-reference-files + provider_type: inline::localfs + safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + # - config: + # excluded_categories: [] + # provider_id: llama-guard + # provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss + agents: + - config: + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default + provider_id: meta-reference + provider_type: inline::meta-reference + batches: + - config: + kvstore: + namespace: batches_store + backend: kv_default + provider_id: reference + provider_type: inline::reference + datasetio: + - config: + kvstore: + namespace: huggingface_datasetio + backend: kv_default + provider_id: huggingface + provider_type: remote::huggingface + - config: + kvstore: + namespace: localfs_datasetio + backend: kv_default + provider_id: localfs + provider_type: inline::localfs + eval: + - config: + kvstore: + namespace: eval_store + backend: kv_default + provider_id: meta-reference + provider_type: inline::meta-reference +scoring_fns: [] +server: + port: 8321 +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db} + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - model_id: ${env.RHAIIS_MODEL} + provider_id: vllm + model_type: llm + provider_model_id: ${env.RHAIIS_MODEL} + shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 \ No newline at end of file diff --git a/examples/vllm-rhelai-run.yaml b/examples/vllm-rhelai-run.yaml new file mode 100644 index 000000000..43c59c7a0 --- /dev/null +++ b/examples/vllm-rhelai-run.yaml @@ -0,0 +1,151 @@ +version: 2 +image_name: rhelai-configuration + +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- tool_runtime +- vector_io + +benchmarks: [] +conversations_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db} + type: sqlite +datasets: [] +# external_providers_dir: /opt/app-root/src/.llama/providers.d +inference_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db} + type: sqlite +metadata_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db} + type: sqlite + +providers: + inference: + - provider_id: vllm + provider_type: remote::vllm + config: + url: http://${env.RHEL_AI_URL}:${env.RHEL_AI_PORT}/v1/ + api_token: ${env.RHEL_AI_API_KEY} + tls_verify: false + max_tokens: 2048 + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files} + provider_id: meta-reference-files + provider_type: inline::localfs + safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + # - config: + # excluded_categories: [] + # provider_id: llama-guard + # provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss + agents: + - config: + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default + provider_id: meta-reference + provider_type: inline::meta-reference + batches: + - config: + kvstore: + namespace: batches_store + backend: kv_default + provider_id: reference + provider_type: inline::reference + datasetio: + - config: + kvstore: + namespace: huggingface_datasetio + backend: kv_default + provider_id: huggingface + provider_type: remote::huggingface + - config: + kvstore: + namespace: localfs_datasetio + backend: kv_default + provider_id: localfs + provider_type: inline::localfs + eval: + - config: + kvstore: + namespace: eval_store + backend: kv_default + provider_id: meta-reference + provider_type: inline::meta-reference +scoring_fns: [] +server: + port: 8321 +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db} + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - model_id: ${env.RHEL_AI_MODEL} + provider_id: vllm + model_type: llm + provider_model_id: ${env.RHEL_AI_MODEL} + shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 \ No newline at end of file diff --git a/examples/vllm-rhoai-run.yaml b/examples/vllm-rhoai-run.yaml new file mode 100644 index 000000000..074b903d4 --- /dev/null +++ b/examples/vllm-rhoai-run.yaml @@ -0,0 +1,151 @@ +version: 2 +image_name: rhoai-configuration + +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- safety +- scoring +- tool_runtime +- vector_io + +benchmarks: [] +conversations_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db} + type: sqlite +datasets: [] +# external_providers_dir: /opt/app-root/src/.llama/providers.d +inference_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db} + type: sqlite +metadata_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db} + type: sqlite + +providers: + inference: + - provider_id: vllm + provider_type: remote::vllm + config: + url: ${env.KSVC_URL}/v1/ + api_token: ${env.VLLM_API_KEY} + tls_verify: false + max_tokens: 1024 + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files} + provider_id: meta-reference-files + provider_type: inline::localfs + safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + # - config: + # excluded_categories: [] + # provider_id: llama-guard + # provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss + agents: + - config: + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default + provider_id: meta-reference + provider_type: inline::meta-reference + batches: + - config: + kvstore: + namespace: batches_store + backend: kv_default + provider_id: reference + provider_type: inline::reference + datasetio: + - config: + kvstore: + namespace: huggingface_datasetio + backend: kv_default + provider_id: huggingface + provider_type: remote::huggingface + - config: + kvstore: + namespace: localfs_datasetio + backend: kv_default + provider_id: localfs + provider_type: inline::localfs + eval: + - config: + kvstore: + namespace: eval_store + backend: kv_default + provider_id: meta-reference + provider_type: inline::meta-reference +scoring_fns: [] +server: + port: 8321 +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db} + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: + - model_id: meta-llama/Llama-3.2-1B-Instruct + provider_id: vllm + model_type: llm + provider_model_id: null + shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index b16e0ea3e..1fe4f2126 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ dependencies = [ "psycopg2-binary>=2.9.10", "litellm>=1.75.5.post1", "urllib3==2.6.2", + "einops>=0.8.1", ] diff --git a/run.yaml b/run.yaml index 7787c93de..58b45abdc 100644 --- a/run.yaml +++ b/run.yaml @@ -9,24 +9,69 @@ apis: - inference - safety - scoring +- telemetry - tool_runtime - vector_io benchmarks: [] conversations_store: - db_path: ~/.llama/storage/conversations.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db} type: sqlite datasets: [] image_name: starter # external_providers_dir: /opt/app-root/src/.llama/providers.d inference_store: - db_path: ~/.llama/storage/inference-store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db} type: sqlite metadata_store: - db_path: ~/.llama/storage/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db} type: sqlite providers: + inference: + - provider_id: openai # This ID is a reference to 'providers.inference' + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + allowed_models: ["${env.E2E_OPENAI_MODEL:=gpt-4o-mini}"] + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files} + provider_id: meta-reference-files + provider_type: inline::localfs + safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + # - config: + # excluded_categories: [] + # provider_id: llama-guard + # provider_type: inline::llama-guard + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: '********' + tool_runtime: + - config: {} # Enable the RAG tool + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: # Define the storage backend for RAG + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - config: persistence: @@ -65,56 +110,19 @@ providers: backend: kv_default provider_id: meta-reference provider_type: inline::meta-reference - files: - - config: - metadata_store: - table_name: files_metadata - backend: sql_default - storage_dir: ~/.llama/storage - provider_id: meta-reference-files - provider_type: inline::localfs - inference: - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - - config: {} - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - safety: - - config: - excluded_categories: [] - provider_id: llama-guard - provider_type: inline::llama-guard - scoring: - - config: {} - provider_id: basic - provider_type: inline::basic - - config: {} - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - tool_runtime: - - config: {} - provider_id: rag-runtime - provider_type: inline::rag-runtime - vector_io: - - config: - persistence: - namespace: faiss_store - backend: kv_default - provider_id: faiss - provider_type: inline::faiss scoring_fns: [] +telemetry: + enabled: true server: port: 8321 storage: backends: - kv_default: + kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks type: kv_sqlite - db_path: ~/.llama/storage/kv_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ~/.llama/storage/sql_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -132,11 +140,16 @@ storage: backend: kv_default registered_resources: models: [] - shields: [] + shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] tool_groups: - - toolgroup_id: builtin::rag + - toolgroup_id: builtin::rag # Register the RAG tool provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: # Define the default embedding model for RAG + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 \ No newline at end of file diff --git a/tests/e2e-prow/rhoai/configs/run.yaml b/tests/e2e-prow/rhoai/configs/run.yaml index f9e992b1b..395d45ada 100644 --- a/tests/e2e-prow/rhoai/configs/run.yaml +++ b/tests/e2e-prow/rhoai/configs/run.yaml @@ -1,5 +1,6 @@ -version: '2' -image_name: sample-notebook +version: 2 +image_name: rhoai-configuration + apis: - agents - batches @@ -11,6 +12,19 @@ apis: - scoring - tool_runtime - vector_io + +benchmarks: [] +conversations_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db} + type: sqlite +datasets: [] +# external_providers_dir: /opt/app-root/src/.llama/providers.d +inference_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db} + type: sqlite +metadata_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db} + type: sqlite providers: inference: @@ -21,88 +35,85 @@ providers: api_token: ${env.VLLM_API_KEY} tls_verify: false max_tokens: 1024 - - provider_id: sentence-transformers + - config: {} + provider_id: sentence-transformers provider_type: inline::sentence-transformers - config: {} - vector_io: - - provider_id: documentation_faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/storage} + - config: metadata_store: table_name: files_metadata backend: sql_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] + storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files} + provider_id: meta-reference-files + provider_type: inline::localfs + safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + # - config: + # excluded_categories: [] + # provider_id: llama-guard + # provider_type: inline::llama-guard + scoring: + - provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: + - config: persistence: agent_state: - namespace: agents + namespace: agents_state backend: kv_default responses: - table_name: responses + table_name: agents_responses backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference + provider_id: meta-reference provider_type: inline::meta-reference - config: + batches: + - config: kvstore: - namespace: eval + namespace: batches_store backend: kv_default + provider_id: reference + provider_type: inline::reference datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: + - config: kvstore: - namespace: datasetio::huggingface + namespace: huggingface_datasetio backend: kv_default - - provider_id: localfs - provider_type: inline::localfs - config: + provider_id: huggingface + provider_type: remote::huggingface + - config: kvstore: - namespace: datasetio::localfs + namespace: localfs_datasetio backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - batches: - - provider_id: reference - provider_type: inline::reference - config: + provider_id: localfs + provider_type: inline::localfs + eval: + - config: kvstore: - namespace: batches + namespace: eval_store backend: kv_default + provider_id: meta-reference + provider_type: inline::meta-reference +scoring_fns: [] +server: + port: 8321 storage: backends: kv_default: type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/kv_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/sql_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -120,38 +131,20 @@ storage: backend: kv_default registered_resources: models: - - model_id: all-mpnet-base-v2 - provider_id: sentence-transformers - provider_model_id: all-mpnet-base-v2 - model_type: embedding - metadata: - embedding_dimension: 768 - model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: vllm model_type: llm - provider_model_id: meta-llama/Llama-3.2-1B-Instruct - - shields: - - shield_id: llama-guard - provider_id: ${env.SAFETY_MODEL:+llama-guard} - provider_shield_id: ${env.SAFETY_MODEL:=} - - shield_id: code-scanner - provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} - provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + provider_model_id: null + shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] tool_groups: - toolgroup_id: builtin::rag provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true vector_stores: - default_provider_id: documentation_faiss + default_provider_id: faiss default_embedding_model: provider_id: sentence-transformers - model_id: all-mpnet-base-v2 -safety: - default_shield_id: llama-guard + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/tests/e2e/configs/run-azure.yaml b/tests/e2e/configs/run-azure.yaml index a8aa02de2..5a14b99aa 100644 --- a/tests/e2e/configs/run-azure.yaml +++ b/tests/e2e/configs/run-azure.yaml @@ -1,4 +1,5 @@ version: 2 +image_name: azure-configuration apis: - agents @@ -14,19 +15,56 @@ apis: benchmarks: [] conversations_store: - db_path: ~/.llama/storage/conversations.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db} type: sqlite datasets: [] -image_name: starter # external_providers_dir: /opt/app-root/src/.llama/providers.d inference_store: - db_path: ~/.llama/storage/inference-store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db} type: sqlite metadata_store: - db_path: ~/.llama/storage/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db} type: sqlite providers: + inference: + - provider_id: azure + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY} + api_base: https://ols-test.openai.azure.com/ + api_version: 2024-02-15-preview + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files} + provider_id: meta-reference-files + provider_type: inline::localfs + safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + # - config: + # excluded_categories: [] + # provider_id: llama-guard + # provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - config: persistence: @@ -65,47 +103,6 @@ providers: backend: kv_default provider_id: meta-reference provider_type: inline::meta-reference - files: - - config: - metadata_store: - table_name: files_metadata - backend: sql_default - storage_dir: ~/.llama/storage - provider_id: meta-reference-files - provider_type: inline::localfs - inference: - - provider_id: azure - provider_type: remote::azure - config: - api_key: ${env.AZURE_API_KEY} - api_base: https://ols-test.openai.azure.com/ - api_version: 2024-02-15-preview - - config: {} - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - safety: - - config: - excluded_categories: [] - provider_id: llama-guard - provider_type: inline::llama-guard - scoring: - - config: {} - provider_id: basic - provider_type: inline::basic - - config: {} - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - tool_runtime: - - config: {} - provider_id: rag-runtime - provider_type: inline::rag-runtime - vector_io: - - config: - persistence: - namespace: faiss_store - backend: kv_default - provider_id: faiss - provider_type: inline::faiss scoring_fns: [] server: port: 8321 @@ -113,10 +110,10 @@ storage: backends: kv_default: type: kv_sqlite - db_path: ~/.llama/storage/kv_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ~/.llama/storage/sql_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -133,16 +130,21 @@ storage: namespace: prompts backend: kv_default registered_resources: - models: + models: - model_id: gpt-4o-mini provider_id: azure model_type: llm provider_model_id: gpt-4o-mini - shields: [] + shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] tool_groups: - toolgroup_id: builtin::rag - provider_id: rag-runtime \ No newline at end of file + provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 \ No newline at end of file diff --git a/tests/e2e/configs/run-ci.yaml b/tests/e2e/configs/run-ci.yaml index 7787c93de..58b45abdc 100644 --- a/tests/e2e/configs/run-ci.yaml +++ b/tests/e2e/configs/run-ci.yaml @@ -9,24 +9,69 @@ apis: - inference - safety - scoring +- telemetry - tool_runtime - vector_io benchmarks: [] conversations_store: - db_path: ~/.llama/storage/conversations.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db} type: sqlite datasets: [] image_name: starter # external_providers_dir: /opt/app-root/src/.llama/providers.d inference_store: - db_path: ~/.llama/storage/inference-store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db} type: sqlite metadata_store: - db_path: ~/.llama/storage/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db} type: sqlite providers: + inference: + - provider_id: openai # This ID is a reference to 'providers.inference' + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + allowed_models: ["${env.E2E_OPENAI_MODEL:=gpt-4o-mini}"] + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files} + provider_id: meta-reference-files + provider_type: inline::localfs + safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + # - config: + # excluded_categories: [] + # provider_id: llama-guard + # provider_type: inline::llama-guard + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: '********' + tool_runtime: + - config: {} # Enable the RAG tool + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: # Define the storage backend for RAG + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - config: persistence: @@ -65,56 +110,19 @@ providers: backend: kv_default provider_id: meta-reference provider_type: inline::meta-reference - files: - - config: - metadata_store: - table_name: files_metadata - backend: sql_default - storage_dir: ~/.llama/storage - provider_id: meta-reference-files - provider_type: inline::localfs - inference: - - provider_id: openai - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY} - - config: {} - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - safety: - - config: - excluded_categories: [] - provider_id: llama-guard - provider_type: inline::llama-guard - scoring: - - config: {} - provider_id: basic - provider_type: inline::basic - - config: {} - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - tool_runtime: - - config: {} - provider_id: rag-runtime - provider_type: inline::rag-runtime - vector_io: - - config: - persistence: - namespace: faiss_store - backend: kv_default - provider_id: faiss - provider_type: inline::faiss scoring_fns: [] +telemetry: + enabled: true server: port: 8321 storage: backends: - kv_default: + kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks type: kv_sqlite - db_path: ~/.llama/storage/kv_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ~/.llama/storage/sql_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -132,11 +140,16 @@ storage: backend: kv_default registered_resources: models: [] - shields: [] + shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] tool_groups: - - toolgroup_id: builtin::rag + - toolgroup_id: builtin::rag # Register the RAG tool provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: # Define the default embedding model for RAG + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 \ No newline at end of file diff --git a/tests/e2e/configs/run-rhaiis.yaml b/tests/e2e/configs/run-rhaiis.yaml index 7ec33263f..ae3aac064 100644 --- a/tests/e2e/configs/run-rhaiis.yaml +++ b/tests/e2e/configs/run-rhaiis.yaml @@ -1,5 +1,6 @@ -version: '2' -image_name: sample-notebook +version: 2 +image_name: rhaiis-configuration + apis: - agents - batches @@ -11,6 +12,19 @@ apis: - scoring - tool_runtime - vector_io + +benchmarks: [] +conversations_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db} + type: sqlite +datasets: [] +# external_providers_dir: /opt/app-root/src/.llama/providers.d +inference_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db} + type: sqlite +metadata_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db} + type: sqlite providers: inference: @@ -21,88 +35,85 @@ providers: api_token: ${env.RHAIIS_API_KEY} tls_verify: false max_tokens: 2048 - - provider_id: sentence-transformers + - config: {} + provider_id: sentence-transformers provider_type: inline::sentence-transformers - config: {} - vector_io: - - provider_id: documentation_faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/storage} + - config: metadata_store: table_name: files_metadata backend: sql_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] + storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files} + provider_id: meta-reference-files + provider_type: inline::localfs + safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + # - config: + # excluded_categories: [] + # provider_id: llama-guard + # provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: + - config: persistence: agent_state: - namespace: agents + namespace: agents_state backend: kv_default responses: - table_name: responses + table_name: agents_responses backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference + provider_id: meta-reference provider_type: inline::meta-reference - config: + batches: + - config: kvstore: - namespace: eval + namespace: batches_store backend: kv_default + provider_id: reference + provider_type: inline::reference datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: + - config: kvstore: - namespace: datasetio::huggingface + namespace: huggingface_datasetio backend: kv_default - - provider_id: localfs - provider_type: inline::localfs + provider_id: huggingface + provider_type: remote::huggingface config: kvstore: - namespace: datasetio::localfs + namespace: localfs_datasetio backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - batches: - - provider_id: reference - provider_type: inline::reference + provider_id: localfs + provider_type: inline::localfs config: kvstore: - namespace: batches + namespace: eval_store backend: kv_default + provider_id: meta-reference + provider_type: inline::meta-reference +scoring_fns: [] +server: + port: 8321 storage: backends: kv_default: type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/kv_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/sql_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -120,38 +131,20 @@ storage: backend: kv_default registered_resources: models: - - model_id: all-mpnet-base-v2 - provider_id: sentence-transformers - provider_model_id: all-mpnet-base-v2 - model_type: embedding - metadata: - embedding_dimension: 768 - model_id: ${env.RHAIIS_MODEL} provider_id: vllm model_type: llm provider_model_id: ${env.RHAIIS_MODEL} - - shields: - - shield_id: llama-guard - provider_id: ${env.SAFETY_MODEL:+llama-guard} - provider_shield_id: ${env.SAFETY_MODEL:=} - - shield_id: code-scanner - provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} - provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] tool_groups: - toolgroup_id: builtin::rag provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true vector_stores: - default_provider_id: documentation_faiss + default_provider_id: faiss default_embedding_model: provider_id: sentence-transformers - model_id: all-mpnet-base-v2 -safety: - default_shield_id: llama-guard + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/tests/e2e/configs/run-rhelai.yaml b/tests/e2e/configs/run-rhelai.yaml index 2d9ac373c..c9a1f1d21 100644 --- a/tests/e2e/configs/run-rhelai.yaml +++ b/tests/e2e/configs/run-rhelai.yaml @@ -1,5 +1,6 @@ -version: '2' -image_name: sample-notebook +version: 2 +image_name: rhelai-configuration + apis: - agents - batches @@ -11,6 +12,19 @@ apis: - scoring - tool_runtime - vector_io + +benchmarks: [] +conversations_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db} + type: sqlite +datasets: [] +# external_providers_dir: /opt/app-root/src/.llama/providers.d +inference_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db} + type: sqlite +metadata_store: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db} + type: sqlite providers: inference: @@ -21,88 +35,85 @@ providers: api_token: ${env.RHEL_AI_API_KEY} tls_verify: false max_tokens: 2048 - - provider_id: sentence-transformers + - config: {} + provider_id: sentence-transformers provider_type: inline::sentence-transformers - config: {} - vector_io: - - provider_id: documentation_faiss - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_default files: - - provider_id: meta-reference-files - provider_type: inline::localfs - config: - storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/storage} + - config: metadata_store: table_name: files_metadata backend: sql_default - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] + storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files} + provider_id: meta-reference-files + provider_type: inline::localfs + safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + # - config: + # excluded_categories: [] + # provider_id: llama-guard + # provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: + - config: persistence: agent_state: - namespace: agents + namespace: agents_state backend: kv_default responses: - table_name: responses + table_name: agents_responses backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - eval: - - provider_id: meta-reference + provider_id: meta-reference provider_type: inline::meta-reference - config: + batches: + - config: kvstore: - namespace: eval + namespace: batches_store backend: kv_default + provider_id: reference + provider_type: inline::reference datasetio: - - provider_id: huggingface - provider_type: remote::huggingface - config: + - config: kvstore: - namespace: datasetio::huggingface + namespace: huggingface_datasetio backend: kv_default - - provider_id: localfs - provider_type: inline::localfs + provider_id: huggingface + provider_type: remote::huggingface config: kvstore: - namespace: datasetio::localfs + namespace: localfs_datasetio backend: kv_default - scoring: - - provider_id: basic - provider_type: inline::basic - config: {} - tool_runtime: - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - batches: - - provider_id: reference - provider_type: inline::reference + provider_id: localfs + provider_type: inline::localfs config: kvstore: - namespace: batches + namespace: eval_store backend: kv_default + provider_id: meta-reference + provider_type: inline::meta-reference +scoring_fns: [] +server: + port: 8321 storage: backends: kv_default: type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/kv_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage}/sql_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -120,38 +131,20 @@ storage: backend: kv_default registered_resources: models: - - model_id: all-mpnet-base-v2 - provider_id: sentence-transformers - provider_model_id: all-mpnet-base-v2 - model_type: embedding - metadata: - embedding_dimension: 768 - model_id: ${env.RHEL_AI_MODEL} provider_id: vllm model_type: llm provider_model_id: ${env.RHEL_AI_MODEL} - - shields: - - shield_id: llama-guard - provider_id: ${env.SAFETY_MODEL:+llama-guard} - provider_shield_id: ${env.SAFETY_MODEL:=} - - shield_id: code-scanner - provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} - provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] tool_groups: - toolgroup_id: builtin::rag provider_id: rag-runtime -server: - port: 8321 -telemetry: - enabled: true vector_stores: - default_provider_id: documentation_faiss + default_provider_id: faiss default_embedding_model: provider_id: sentence-transformers - model_id: all-mpnet-base-v2 -safety: - default_shield_id: llama-guard + model_id: nomic-ai/nomic-embed-text-v1.5 diff --git a/tests/e2e/configs/run-vertexai.yaml b/tests/e2e/configs/run-vertexai.yaml index 37e083b8f..38c631548 100644 --- a/tests/e2e/configs/run-vertexai.yaml +++ b/tests/e2e/configs/run-vertexai.yaml @@ -1,4 +1,5 @@ version: 2 +image_name: vertexai-configuration apis: - agents @@ -14,19 +15,56 @@ apis: benchmarks: [] conversations_store: - db_path: ~/.llama/storage/conversations.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/conversations.db} type: sqlite datasets: [] -image_name: starter # external_providers_dir: /opt/app-root/src/.llama/providers.d inference_store: - db_path: ~/.llama/storage/inference-store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/inference-store.db} type: sqlite metadata_store: - db_path: ~/.llama/storage/registry.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/registry.db} type: sqlite providers: + inference: + - provider_id: google-vertex + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT} + location: ${env.VERTEX_AI_LOCATION} + allowed_models: ["google/gemini-2.5-flash"] + - config: {} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: ${env.SQLITE_STORE_DIR:=~/.llama/storage/files} + provider_id: meta-reference-files + provider_type: inline::localfs + safety: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls + # - config: + # excluded_categories: [] + # provider_id: llama-guard + # provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + tool_runtime: + - config: {} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + persistence: + namespace: vector_io::faiss + backend: kv_default + provider_id: faiss + provider_type: inline::faiss agents: - config: persistence: @@ -65,46 +103,6 @@ providers: backend: kv_default provider_id: meta-reference provider_type: inline::meta-reference - files: - - config: - metadata_store: - table_name: files_metadata - backend: sql_default - storage_dir: ~/.llama/storage - provider_id: meta-reference-files - provider_type: inline::localfs - inference: - - provider_id: google-vertex - provider_type: remote::vertexai - config: - project: ${env.VERTEX_AI_PROJECT} - location: ${env.VERTEX_AI_LOCATION} - - config: {} - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - safety: - - config: - excluded_categories: [] - provider_id: llama-guard - provider_type: inline::llama-guard - scoring: - - config: {} - provider_id: basic - provider_type: inline::basic - - config: {} - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - tool_runtime: - - config: {} - provider_id: rag-runtime - provider_type: inline::rag-runtime - vector_io: - - config: - persistence: - namespace: faiss_store - backend: kv_default - provider_id: faiss - provider_type: inline::faiss scoring_fns: [] server: port: 8321 @@ -112,10 +110,10 @@ storage: backends: kv_default: type: kv_sqlite - db_path: ~/.llama/storage/kv_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ~/.llama/storage/sql_store.db + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -133,7 +131,7 @@ storage: backend: kv_default registered_resources: models: [] - shields: [] + shields: [] # WARNING: Shields disabled due to infinite loop issue with LLM calls vector_dbs: [] datasets: [] scoring_fns: [] @@ -141,3 +139,8 @@ registered_resources: tool_groups: - toolgroup_id: builtin::rag provider_id: rag-runtime +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 \ No newline at end of file diff --git a/tests/e2e/features/faiss.feature b/tests/e2e/features/faiss.feature new file mode 100644 index 000000000..c94336e19 --- /dev/null +++ b/tests/e2e/features/faiss.feature @@ -0,0 +1,39 @@ +@Authorized +Feature: FAISS support tests + + Background: + Given The service is started locally + And REST API service prefix is /v1 + + @skip-in-library-mode + Scenario: Verify vector store is registered + Given The system is in default state + And REST API service hostname is localhost + And REST API service port is 8321 + When I access REST API endpoint vector_stores using HTTP GET method + Then The status code of the response is 200 + And I should see attribute named data in response + And the body of the response has the following structure + """ + { + "object": "list", + "data": [ + { + "object": "vector_store", + "name": "paul_graham_essay" + } + ] + } + """ + + Scenario: Query vector db using the file_search tool + Given The system is in default state + And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva + When I use "query" to ask question with authorization header + """ + {"query": "What is the title of the article from Paul?", "system_prompt": "You are an assistant. Always use the file_search tool to answer. Write only lowercase letters"} + """ + Then The status code of the response is 200 + And The response should contain following fragments + | Fragments in LLM response | + | great work | diff --git a/tests/e2e/rag/kv_store.db b/tests/e2e/rag/kv_store.db new file mode 100644 index 000000000..d83c2f163 Binary files /dev/null and b/tests/e2e/rag/kv_store.db differ diff --git a/tests/e2e/test_list.txt b/tests/e2e/test_list.txt index 2a62eaf6c..b61f186ac 100644 --- a/tests/e2e/test_list.txt +++ b/tests/e2e/test_list.txt @@ -1,3 +1,4 @@ +features/faiss.feature features/smoketests.feature features/authorized_noop.feature features/authorized_noop_token.feature diff --git a/uv.lock b/uv.lock index 376d17a72..41352a8eb 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.12, <3.14" resolution-markers = [ "python_full_version >= '3.13' and sys_platform != 'darwin'", @@ -651,6 +651,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b0/0d/9feae160378a3553fa9a339b0e9c1a048e147a4127210e286ef18b730f03/durationpy-0.10-py3-none-any.whl", hash = "sha256:3b41e1b601234296b4fb368338fdcd3e13e0b4fb5b67345948f4f2bf9868b286", size = 3922, upload-time = "2025-05-17T13:52:36.463Z" }, ] +[[package]] +name = "einops" +version = "0.8.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e5/81/df4fbe24dff8ba3934af99044188e20a98ed441ad17a274539b74e82e126/einops-0.8.1.tar.gz", hash = "sha256:de5d960a7a761225532e0f1959e5315ebeafc0cd43394732f103ca44b9837e84", size = 54805, upload-time = "2025-02-09T03:17:00.434Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/62/9773de14fe6c45c23649e98b83231fffd7b9892b6cf863251dc2afa73643/einops-0.8.1-py3-none-any.whl", hash = "sha256:919387eb55330f5757c6bea9165c5ff5cfe63a642682ea788a6d472576d81737", size = 64359, upload-time = "2025-02-09T03:17:01.998Z" }, +] + [[package]] name = "email-validator" version = "2.3.0" @@ -1346,6 +1355,7 @@ dependencies = [ { name = "aiohttp" }, { name = "authlib" }, { name = "cachetools" }, + { name = "einops" }, { name = "email-validator" }, { name = "fastapi" }, { name = "jsonpath-ng" }, @@ -1428,6 +1438,7 @@ requires-dist = [ { name = "aiohttp", specifier = ">=3.12.14" }, { name = "authlib", specifier = ">=1.6.0" }, { name = "cachetools", specifier = ">=6.1.0" }, + { name = "einops", specifier = ">=0.8.1" }, { name = "email-validator", specifier = ">=2.2.0" }, { name = "fastapi", specifier = ">=0.115.12" }, { name = "jsonpath-ng", specifier = ">=1.6.1" },