epflgraph · RamtinYazdanian · Sep 1, 2025 · Aug 28, 2025 · Sep 1, 2025 · Sep 1, 2025
diff --git a/graphai/api/retrieval/router.py b/graphai/api/retrieval/router.py
@@ -44,6 +44,12 @@ async def retrieve_from_es_index(data: RetrievalRequest,
     index_to_search_in = data.index
     return_scores = data.return_scores
     filter_by_date = data.filter_by_date
+    # Check for None (default) value in flag and mutate based on selected index
+    if filter_by_date is None:
+        if index_to_search_in.startswith('course_'):
+            filter_by_date = True
+        else:
+            filter_by_date = False
     if not has_rag_access_rights(current_user.username, index_to_search_in):
         return INSUFFICIENT_ACCESS_ERROR
     results = retrieve_from_es_job(text, index_to_search_in, filters, limit, return_scores, filter_by_date)

diff --git a/graphai/api/retrieval/schemas.py b/graphai/api/retrieval/schemas.py
@@ -31,14 +31,14 @@ class RetrievalRequest(BaseModel):
         default=False
     )
 
-    filter_by_date: bool = Field(
+    filter_by_date: Union[bool, None] = Field(
         title="Filter by current date",
         description="If True, if the requested index has 'from' and 'until' fields, only returns documents "
                     "that are available at the current date and time based on those two fields. Basically "
                     "a smart custom filter that doesn't require the user to manually provide the current "
                     "datetime and ask for 'from' to be before it and for 'until' to be after it. "
                     "If the index does not have 'from' and 'until' fields, this results in an empty response.",
-        default=False
+        default=None
     )
 
 

diff --git a/graphai/core/common/fingerprinting.py b/graphai/core/common/fingerprinting.py
@@ -7,12 +7,15 @@
 import imagehash
 import numpy as np
 from PIL import Image
-import pdf2image
+import pymupdf
 from fuzzywuzzy import fuzz
 
 from graphai.core.common.common_utils import file_exists, is_pdf
 
 
+Image.MAX_IMAGE_PIXELS = 933120000
+
+
 def perceptual_hash_text(s):
     """
     Computes the perceptual hash of a strong
@@ -137,9 +140,10 @@ def perceptual_hash_pdf(input_filename_with_path, hash_size=16):
     if not file_exists(input_filename_with_path) or not is_pdf(input_filename_with_path):
         print(f'File {input_filename_with_path} does not exist or is not in the right format')
         return None
-    pdf_imageset = pdf2image.convert_from_path(input_filename_with_path)
+    pdf_imageset = pymupdf.open(input_filename_with_path)
     results = hashlib.md5(
-        ''.join(str(imagehash.dhash(x, hash_size=hash_size)) for x in pdf_imageset).encode('utf8')
+        ''.join(str(imagehash.dhash(x.get_pixmap().pil_image(),
+                                    hash_size=hash_size)) for x in pdf_imageset).encode('utf8')
     ).hexdigest()
     return str(results)