Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20260206194808781905.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "patch",
"description": "Remove unnecessary response format check. Fixes: #2203"
}
4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20260206205026841660.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "patch",
"description": "Add table provider factory."
}
18 changes: 6 additions & 12 deletions packages/graphrag-cache/graphrag_cache/cache_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,14 @@
"""Cache factory implementation."""

from collections.abc import Callable
from typing import TYPE_CHECKING

from graphrag_common.factory import Factory
from graphrag_storage import create_storage
from graphrag_common.factory import Factory, ServiceScope
from graphrag_storage import Storage, create_storage

from graphrag_cache.cache import Cache
from graphrag_cache.cache_config import CacheConfig
from graphrag_cache.cache_type import CacheType

if TYPE_CHECKING:
from graphrag_common.factory import ServiceScope
from graphrag_storage import Storage

from graphrag_cache.cache import Cache


class CacheFactory(Factory["Cache"]):
"""A factory class for cache implementations."""
Expand All @@ -29,8 +23,8 @@ class CacheFactory(Factory["Cache"]):

def register_cache(
cache_type: str,
cache_initializer: Callable[..., "Cache"],
scope: "ServiceScope" = "transient",
cache_initializer: Callable[..., Cache],
scope: ServiceScope = "transient",
) -> None:
"""Register a custom cache implementation.

Expand All @@ -45,7 +39,7 @@ def register_cache(


def create_cache(
config: CacheConfig | None = None, storage: "Storage | None" = None
config: CacheConfig | None = None, storage: Storage | None = None
) -> "Cache":
"""Create a cache implementation based on the given configuration.

Expand Down
11 changes: 0 additions & 11 deletions packages/graphrag-llm/graphrag_llm/completion/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,6 @@ def __init__(
"""
raise NotImplementedError

@abstractmethod
def supports_structured_response(self) -> bool:
"""Whether the completion supports structured responses.
Returns
-------
bool:
True if structured responses are supported, False otherwise.
"""
raise NotImplementedError

@abstractmethod
def completion(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import litellm
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from litellm import ModelResponse, supports_response_schema # type: ignore
from litellm import ModelResponse # type: ignore

from graphrag_llm.completion.completion import LLMCompletion
from graphrag_llm.config.types import AuthMethod
Expand Down Expand Up @@ -128,10 +128,6 @@ def __init__(
retrier=self._retrier,
)

def supports_structured_response(self) -> bool:
"""Check if the model supports structured response."""
return supports_response_schema(self._model_id)

def completion(
self,
/,
Expand All @@ -140,9 +136,6 @@ def completion(
"""Sync completion method."""
messages: LLMCompletionMessagesParam = kwargs.pop("messages")
response_format = kwargs.pop("response_format", None)
if response_format and not self.supports_structured_response():
msg = f"Model '{self._model_id}' does not support response schemas."
raise ValueError(msg)

is_streaming = kwargs.get("stream") or False

Expand Down Expand Up @@ -182,11 +175,6 @@ async def completion_async(
"""Async completion method."""
messages: LLMCompletionMessagesParam = kwargs.pop("messages")
response_format = kwargs.pop("response_format", None)
if response_format and not supports_response_schema(
self._model_id,
):
msg = f"Model '{self._model_id}' does not support response schemas."
raise ValueError(msg)

is_streaming = kwargs.get("stream") or False

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,6 @@ def __init__(

self._mock_responses = mock_responses # type: ignore

def supports_structured_response(self) -> bool:
"""Check if the model supports structured response."""
return True

def completion(
self,
/,
Expand Down
32 changes: 3 additions & 29 deletions packages/graphrag-llm/notebooks/03_structured_responses.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"id": "a79c242b",
"metadata": {},
"outputs": [
Expand All @@ -22,10 +22,10 @@
"text": [
"City: Seattle\n",
" Temperature: 11.1 °C\n",
" Condition: sunny\n",
" Condition: Sunny\n",
"City: San Francisco\n",
" Temperature: 23.9 °C\n",
" Condition: cloudy\n"
" Condition: Cloudy\n"
]
}
],
Expand Down Expand Up @@ -84,32 +84,6 @@
" print(f\" Condition: {report.condition}\")"
]
},
{
"cell_type": "markdown",
"id": "6dcfa20c",
"metadata": {},
"source": [
"## Checking for support\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "aa1edadb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Supports structured responses: True\n"
]
}
],
"source": [
"print(f\"Supports structured responses: {llm_completion.supports_structured_response()}\")"
]
},
{
"cell_type": "markdown",
"id": "6360f512",
Expand Down
2 changes: 1 addition & 1 deletion packages/graphrag-llm/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ classifiers = [
"Programming Language :: Python :: 3.13",
]
dependencies = [
"azure-identity~=1.19.0",
"azure-identity~=1.25",
"graphrag-cache==3.0.1",
"graphrag-common==3.0.1",
"jinja2~=3.1",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""Storage configuration model."""

from pydantic import BaseModel, ConfigDict, Field

from graphrag_storage.tables.table_type import TableType


class TableProviderConfig(BaseModel):
"""The default configuration section for table providers."""

model_config = ConfigDict(extra="allow")
"""Allow extra fields to support custom table provider implementations."""

type: str = Field(
description="The table type to use.",
default=TableType.Parquet,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License


"""Storage factory implementation."""

from collections.abc import Callable

from graphrag_common.factory import Factory, ServiceScope

from graphrag_storage.storage import Storage
from graphrag_storage.tables.table_provider import TableProvider
from graphrag_storage.tables.table_provider_config import TableProviderConfig
from graphrag_storage.tables.table_type import TableType


class TableProviderFactory(Factory[TableProvider]):
"""A factory class for table storage implementations."""


table_provider_factory = TableProviderFactory()


def register_table_provider(
table_type: str,
table_initializer: Callable[..., TableProvider],
scope: ServiceScope = "transient",
) -> None:
"""Register a custom storage implementation.

Args
----
- table_type: str
The table type id to register.
- table_initializer: Callable[..., TableProvider]
The table initializer to register.
"""
table_provider_factory.register(table_type, table_initializer, scope)


def create_table_provider(
config: TableProviderConfig, storage: Storage | None = None
) -> TableProvider:
"""Create a table provider implementation based on the given configuration.

Args
----
- config: TableProviderConfig
The table provider configuration to use.
- storage: Storage | None
The storage implementation to use for file-based TableProviders such as Parquet and CSV.

Returns
-------
TableProvider
The created table provider implementation.
"""
config_model = config.model_dump()
table_type = config.type

if table_type not in table_provider_factory:
match table_type:
case TableType.Parquet:
from graphrag_storage.tables.parquet_table_provider import (
ParquetTableProvider,
)

register_table_provider(TableType.Parquet, ParquetTableProvider)
case _:
msg = f"TableProviderConfig.type '{table_type}' is not registered in the TableProviderFactory. Registered types: {', '.join(table_provider_factory.keys())}."
raise ValueError(msg)

if storage:
config_model["storage"] = storage

return table_provider_factory.create(table_type, config_model)
13 changes: 13 additions & 0 deletions packages/graphrag-storage/graphrag_storage/tables/table_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License


"""Builtin table storage implementation types."""

from enum import StrEnum


class TableType(StrEnum):
"""Enum for table storage types."""

Parquet = "parquet"
2 changes: 1 addition & 1 deletion packages/graphrag-storage/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ classifiers = [
dependencies = [
"aiofiles~=24.1",
"azure-cosmos~=4.9",
"azure-identity~=1.19",
"azure-identity~=1.25",
"azure-storage-blob~=12.24",
"graphrag-common==3.0.1",
"pandas~=2.3",
Expand Down
2 changes: 1 addition & 1 deletion packages/graphrag-vectors/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ classifiers = [
dependencies = [
"azure-core~=1.32",
"azure-cosmos~=4.9",
"azure-identity~=1.19",
"azure-identity~=1.25",
"azure-search-documents~=11.6",
"graphrag-common==3.0.1",
"lancedb~=0.24.1",
Expand Down
4 changes: 2 additions & 2 deletions packages/graphrag/graphrag/cli/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from typing import TYPE_CHECKING, Any

from graphrag_storage import create_storage
from graphrag_storage.tables.parquet_table_provider import ParquetTableProvider
from graphrag_storage.tables.table_provider_factory import create_table_provider

import graphrag.api as api
from graphrag.callbacks.noop_query_callbacks import NoopQueryCallbacks
Expand Down Expand Up @@ -378,7 +378,7 @@ def _resolve_output_files(
"""Read indexing output files to a dataframe dict."""
dataframe_dict = {}
storage_obj = create_storage(config.output_storage)
table_provider = ParquetTableProvider(storage_obj)
table_provider = create_table_provider(config.table_provider, storage=storage_obj)
for name in output_list:
df_value = asyncio.run(table_provider.read_dataframe(name))
dataframe_dict[name] = df_value
Expand Down
6 changes: 6 additions & 0 deletions packages/graphrag/graphrag/config/models/graph_rag_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from graphrag_input import InputConfig
from graphrag_llm.config import ModelConfig
from graphrag_storage import StorageConfig, StorageType
from graphrag_storage.tables.table_provider_config import TableProviderConfig
from graphrag_vectors import IndexSchema, VectorStoreConfig, VectorStoreType
from pydantic import BaseModel, Field, model_validator

Expand Down Expand Up @@ -138,6 +139,11 @@ def _validate_update_output_storage_base_dir(self) -> None:
Path(self.update_output_storage.base_dir).resolve()
)

table_provider: TableProviderConfig = Field(
description="The table provider configuration.", default=TableProviderConfig()
)
"""The table provider configuration. By default we read/write parquet to disk. You can register custom output table storage."""

cache: CacheConfig = Field(
description="The cache configuration.",
default=CacheConfig(**asdict(graphrag_config_defaults.cache)),
Expand Down
Loading
Loading