From 32e5576820f0aeb1a7a7898af260f703d0d6a1af Mon Sep 17 00:00:00 2001 From: Swati Tiwari Date: Mon, 15 Dec 2025 08:10:51 +0530 Subject: [PATCH] Fix file reference links --- backend/danswer/connectors/file/connector.py | 13 ++++++-- .../server/query_and_chat/chat_backend.py | 33 ++++++++++++++++--- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/backend/danswer/connectors/file/connector.py b/backend/danswer/connectors/file/connector.py index 77d01394d4f..44d824d75b0 100644 --- a/backend/danswer/connectors/file/connector.py +++ b/backend/danswer/connectors/file/connector.py @@ -9,6 +9,7 @@ from sqlalchemy.orm import Session from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.app_configs import WEB_DOMAIN from danswer.configs.constants import DocumentSource from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc from danswer.connectors.interfaces import GenerateDocumentsOutput @@ -130,12 +131,18 @@ def _process_file( else None ) + # Generate a link for file uploads if not already provided + # Files uploaded via the connector are stored as uuid/filename + file_link = all_metadata.get("link") + if not file_link and "/" in file_name: + # The file_name format is "uuid/filename" + # The endpoint expects /file/{uuid}/{filename} + file_link = f"{WEB_DOMAIN}/api/chat/file/{file_name}" + return [ Document( id=f"FILE_CONNECTOR__{file_name}", # add a prefix to avoid conflicts with other connectors - sections=[ - Section(link=all_metadata.get("link"), text=file_content_raw.strip()) - ], + sections=[Section(link=file_link, text=file_content_raw.strip())], source=DocumentSource.FILE, semantic_identifier=file_display_name, title=title, diff --git a/backend/danswer/server/query_and_chat/chat_backend.py b/backend/danswer/server/query_and_chat/chat_backend.py index f03dcb93397..4e5a1bb2138 100644 --- a/backend/danswer/server/query_and_chat/chat_backend.py +++ b/backend/danswer/server/query_and_chat/chat_backend.py @@ -573,14 +573,37 @@ def upload_files_for_chat( } -@router.get("/file/{file_id}") +@router.get("/file/{file_id:path}") def fetch_chat_file( file_id: str, db_session: Session = Depends(get_session), _: User | None = Depends(current_user), ) -> Response: file_store = get_default_file_store(db_session) - file_io = file_store.read_file(file_id, mode="b") - # NOTE: specifying "image/jpeg" here, but it still works for pngs - # TODO: do this properly - return Response(content=file_io.read(), media_type="image/jpeg") + + try: + file_io = file_store.read_file(file_id, mode="b") + + # Determine content type based on file extension if it's a path + content_type = "application/octet-stream" + if "/" in file_id: + # It's a path like uuid/filename, extract filename + filename = file_id.split("/")[-1].lower() + if filename.endswith(".pdf"): + content_type = "application/pdf" + elif filename.endswith((".jpg", ".jpeg")): + content_type = "image/jpeg" + elif filename.endswith(".png"): + content_type = "image/png" + elif filename.endswith(".txt"): + content_type = "text/plain" + elif filename.endswith((".doc", ".docx")): + content_type = "application/msword" + else: + # For simple IDs (chat uploads) + content_type = "image/jpeg" + + return Response(content=file_io.read(), media_type=content_type) + except Exception as e: + logger.error(f"Error fetching file {file_id}: {str(e)}") + raise HTTPException(status_code=404, detail="File not found")