From 32e5576820f0aeb1a7a7898af260f703d0d6a1af Mon Sep 17 00:00:00 2001
From: Swati Tiwari <swatitiwari354@gmail.com>
Date: Mon, 15 Dec 2025 08:10:51 +0530
Subject: [PATCH] Fix file reference links

---
 backend/danswer/connectors/file/connector.py  | 13 ++++++--
 .../server/query_and_chat/chat_backend.py     | 33 ++++++++++++++++---
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/backend/danswer/connectors/file/connector.py b/backend/danswer/connectors/file/connector.py
index 77d01394d4f..44d824d75b0 100644
--- a/backend/danswer/connectors/file/connector.py
+++ b/backend/danswer/connectors/file/connector.py
@@ -9,6 +9,7 @@
 from sqlalchemy.orm import Session
 
 from danswer.configs.app_configs import INDEX_BATCH_SIZE
+from danswer.configs.app_configs import WEB_DOMAIN
 from danswer.configs.constants import DocumentSource
 from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
 from danswer.connectors.interfaces import GenerateDocumentsOutput
@@ -130,12 +131,18 @@ def _process_file(
         else None
     )
 
+    # Generate a link for file uploads if not already provided
+    # Files uploaded via the connector are stored as uuid/filename
+    file_link = all_metadata.get("link")
+    if not file_link and "/" in file_name:
+        # The file_name format is "uuid/filename"
+        # The endpoint expects /file/{uuid}/{filename}
+        file_link = f"{WEB_DOMAIN}/api/chat/file/{file_name}"
+
     return [
         Document(
             id=f"FILE_CONNECTOR__{file_name}",  # add a prefix to avoid conflicts with other connectors
-            sections=[
-                Section(link=all_metadata.get("link"), text=file_content_raw.strip())
-            ],
+            sections=[Section(link=file_link, text=file_content_raw.strip())],
             source=DocumentSource.FILE,
             semantic_identifier=file_display_name,
             title=title,
diff --git a/backend/danswer/server/query_and_chat/chat_backend.py b/backend/danswer/server/query_and_chat/chat_backend.py
index f03dcb93397..4e5a1bb2138 100644
--- a/backend/danswer/server/query_and_chat/chat_backend.py
+++ b/backend/danswer/server/query_and_chat/chat_backend.py
@@ -573,14 +573,37 @@ def upload_files_for_chat(
     }
 
 
-@router.get("/file/{file_id}")
+@router.get("/file/{file_id:path}")
 def fetch_chat_file(
     file_id: str,
     db_session: Session = Depends(get_session),
     _: User | None = Depends(current_user),
 ) -> Response:
     file_store = get_default_file_store(db_session)
-    file_io = file_store.read_file(file_id, mode="b")
-    # NOTE: specifying "image/jpeg" here, but it still works for pngs
-    # TODO: do this properly
-    return Response(content=file_io.read(), media_type="image/jpeg")
+
+    try:
+        file_io = file_store.read_file(file_id, mode="b")
+
+        # Determine content type based on file extension if it's a path
+        content_type = "application/octet-stream"
+        if "/" in file_id:
+            # It's a path like uuid/filename, extract filename
+            filename = file_id.split("/")[-1].lower()
+            if filename.endswith(".pdf"):
+                content_type = "application/pdf"
+            elif filename.endswith((".jpg", ".jpeg")):
+                content_type = "image/jpeg"
+            elif filename.endswith(".png"):
+                content_type = "image/png"
+            elif filename.endswith(".txt"):
+                content_type = "text/plain"
+            elif filename.endswith((".doc", ".docx")):
+                content_type = "application/msword"
+        else:
+            # For simple IDs (chat uploads)
+            content_type = "image/jpeg"
+
+        return Response(content=file_io.read(), media_type=content_type)
+    except Exception as e:
+        logger.error(f"Error fetching file {file_id}: {str(e)}")
+        raise HTTPException(status_code=404, detail="File not found")