Skip to content
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""extend collection table for provider agnostic support

Revision ID: 041
Revises: 040
Create Date: 2026-01-15 16:53:19.495583

"""
from alembic import op
import sqlalchemy as sa
import sqlmodel.sql.sqltypes
from sqlalchemy.dialects import postgresql


# revision identifiers, used by Alembic.
revision = "041"
down_revision = "040"
branch_labels = None
depends_on = None

provider_type = postgresql.ENUM(
"openai",
# aws
# gemini
name="providertype",
create_type=False,
)


def upgrade():
provider_type.create(op.get_bind(), checkfirst=True)
op.add_column(
"collection",
sa.Column(
"provider",
provider_type,
nullable=True,
comment="LLM provider used for this collection",
),
)
op.execute("UPDATE collection SET provider = 'openai' WHERE provider IS NULL")
op.alter_column("collection", "provider", nullable=False)
op.add_column(
"collection",
sa.Column(
"name",
sqlmodel.sql.sqltypes.AutoString(),
nullable=True,
comment="Name of the collection",
),
)
op.add_column(
"collection",
sa.Column(
"description",
sqlmodel.sql.sqltypes.AutoString(),
nullable=True,
comment="Description of the collection",
),
)
op.alter_column(
"collection",
"llm_service_name",
existing_type=sa.VARCHAR(),
comment="Name of the LLM service",
existing_comment="Name of the LLM service provider",
existing_nullable=False,
)
op.create_unique_constraint(
"uq_collection_project_id_name", "collection", ["project_id", "name"]
)
op.drop_constraint(
op.f("collection_organization_id_fkey"), "collection", type_="foreignkey"
)
op.drop_column("collection", "organization_id")


def downgrade():
op.add_column(
"collection",
sa.Column(
"organization_id",
sa.INTEGER(),
autoincrement=False,
nullable=True,
comment="Reference to the organization",
),
)
op.execute(
"""UPDATE collection SET organization_id = (SELECT organization_id FROM project
WHERE project.id = collection.project_id)"""
)
op.alter_column("collection", "organization_id", nullable=False)
op.create_foreign_key(
op.f("collection_organization_id_fkey"),
"collection",
"organization",
["organization_id"],
["id"],
ondelete="CASCADE",
)
op.drop_constraint("uq_collection_project_id_name", "collection", type_="unique")
op.alter_column(
"collection",
"llm_service_name",
existing_type=sa.VARCHAR(),
comment="Name of the LLM service provider",
existing_comment="Name of the LLM service",
existing_nullable=False,
)
op.drop_column("collection", "description")
op.drop_column("collection", "name")
op.drop_column("collection", "provider")
14 changes: 7 additions & 7 deletions backend/app/api/docs/collections/create.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@ pipeline:
"model" and "instruction" in the request body otherwise only a vector store will be
created from the documents given.

If any one of the OpenAI interactions fail, all OpenAI resources are
cleaned up. If a Vector Store is unable to be created, for example,
If any one of the LLM service interactions fail, all service resources are
cleaned up. If an OpenAI vector Store is unable to be created, for example,
all file(s) that were uploaded to OpenAI are removed from
OpenAI. Failure can occur from OpenAI being down, or some parameter
value being invalid. It can also fail due to document types not being
accepted. This is especially true for PDFs that may not be parseable.

Vector store/assistant will be created asynchronously. The immediate response
from this endpoint is `collection_job` object which is going to contain
the collection "job ID" and status. Once the collection has been created,
information about the collection will be returned to the user via the
callback URL. If a callback URL is not provided, clients can check the
In the case of Openai, Vector store/assistant will be created asynchronously.
The immediate response from this endpoint is `collection_job` object which is
going to contain the collection "job ID" and status. Once the collection has
been created, information about the collection will be returned to the user via
the callback URL. If a callback URL is not provided, clients can check the
`collection job info` endpoint with the `job_id`, to retrieve
information about the creation of collection.
4 changes: 4 additions & 0 deletions backend/app/api/routes/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
CollectionPublic,
)
from app.utils import APIResponse, load_description, validate_callback_url
from app.services.collections.helpers import ensure_unique_name
from app.services.collections import (
create_collection as create_service,
delete_collection as delete_service,
Expand Down Expand Up @@ -88,6 +89,9 @@ def create_collection(
if request.callback_url:
validate_callback_url(str(request.callback_url))

if request.name:
ensure_unique_name(session, current_user.project_.id, request.name)

collection_job_crud = CollectionJobCrud(session, current_user.project_.id)
collection_job = collection_job_crud.create(
CollectionJobCreate(
Expand Down
10 changes: 10 additions & 0 deletions backend/app/crud/collection/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,16 @@ def read_all(self):
collections = self.session.exec(statement).all()
return collections

def exists_by_name(self, collection_name: str) -> bool:
statement = (
select(Collection.id)
.where(Collection.project_id == self.project_id)
.where(Collection.name == collection_name)
.where(Collection.deleted_at.is_(None))
)
result = self.session.exec(statement).first()
return result is not None

def delete_by_id(self, collection_id: UUID) -> Collection:
coll = self.read_one(collection_id)
coll.deleted_at = now()
Expand Down
3 changes: 3 additions & 0 deletions backend/app/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@

from .collection import (
Collection,
CreationRequest,
CollectionPublic,
CollectionIDPublic,
CollectionWithDocsPublic,
DeletionRequest,
ProviderType,
)
from .collection_job import (
CollectionActionType,
Expand Down
66 changes: 48 additions & 18 deletions backend/app/models/collection.py
Original file line number Diff line number Diff line change
@@ -1,71 +1,102 @@
from datetime import datetime
from enum import Enum
from typing import Any, Literal
from uuid import UUID, uuid4

from pydantic import HttpUrl, model_validator
from sqlalchemy import UniqueConstraint
from sqlmodel import Field, Relationship, SQLModel

from app.core.util import now
from app.models.document import DocumentPublic

from .organization import Organization
from .project import Project


class ProviderType(str, Enum):
"""Supported LLM providers for collections."""

openai = "openai"
# BEDROCK = "bedrock"
# GEMINI = "gemini"


class Collection(SQLModel, table=True):
"""Database model for Collection operations."""

__table_args__ = (
UniqueConstraint(
"project_id",
"name",
name="uq_collection_project_id_name",
),
)

id: UUID = Field(
default_factory=uuid4,
primary_key=True,
description="Unique identifier for the collection",
sa_column_kwargs={"comment": "Unique identifier for the collection"},
)
provider: ProviderType = (
Field(
nullable=False,
description="LLM provider used for this collection (e.g., 'openai', 'bedrock', 'gemini', etc)",
sa_column_kwargs={"comment": "LLM provider used for this collection"},
),
)
llm_service_id: str = Field(
nullable=False,
description="External LLM service identifier (e.g., OpenAI vector store ID)",
sa_column_kwargs={
"comment": "External LLM service identifier (e.g., OpenAI vector store ID)"
},
)
llm_service_name: str = Field(
nullable=False,
description="Name of the LLM service",
sa_column_kwargs={"comment": "Name of the LLM service"},
)

# Foreign keys
organization_id: int = Field(
foreign_key="organization.id",
nullable=False,
ondelete="CASCADE",
sa_column_kwargs={"comment": "Reference to the organization"},
name: str = Field(
nullable=True,
description="Name of the collection",
sa_column_kwargs={"comment": "Name of the collection"},
)
description: str = Field(
nullable=True,
description="Description of the collection",
sa_column_kwargs={"comment": "Description of the collection"},
)
project_id: int = Field(
foreign_key="project.id",
nullable=False,
ondelete="CASCADE",
description="Project the collection belongs to",
sa_column_kwargs={"comment": "Reference to the project"},
)

# Timestamps
inserted_at: datetime = Field(
default_factory=now,
description="Timestamp when the collection was created",
sa_column_kwargs={"comment": "Timestamp when the collection was created"},
)
updated_at: datetime = Field(
default_factory=now,
description="Timestamp when the collection was updated",
sa_column_kwargs={"comment": "Timestamp when the collection was last updated"},
)
deleted_at: datetime | None = Field(
default=None,
description="Timestamp when the collection was deleted",
sa_column_kwargs={"comment": "Timestamp when the collection was deleted"},
)

# Relationships
organization: Organization = Relationship(back_populates="collections")
project: Project = Relationship(back_populates="collections")


# Request models
class DocumentOptions(SQLModel):
class CollectionOptions(SQLModel):
name: str | None = Field(default=None, description="Name of the collection")
description: str | None = Field(
default=None, description="Description of the collection"
)
documents: list[UUID] = Field(
description="List of document IDs",
)
Expand Down Expand Up @@ -154,9 +185,9 @@ class ProviderOptions(SQLModel):


class CreationRequest(
DocumentOptions,
ProviderOptions,
AssistantOptions,
CollectionOptions,
ProviderOptions,
CallbackRequest,
):
def extract_super_type(self, cls: "CreationRequest"):
Expand All @@ -181,7 +212,6 @@ class CollectionPublic(SQLModel):
llm_service_id: str
llm_service_name: str
project_id: int
organization_id: int

inserted_at: datetime
updated_at: datetime
Expand Down
3 changes: 0 additions & 3 deletions backend/app/models/organization.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,6 @@ class Organization(OrganizationBase, table=True):
assistants: list["Assistant"] = Relationship(
back_populates="organization", cascade_delete=True
)
collections: list["Collection"] = Relationship(
back_populates="organization", cascade_delete=True
)
openai_conversations: list["OpenAIConversation"] = Relationship(
back_populates="organization", cascade_delete=True
)
Expand Down
Loading