zilliztech · niebayes · Dec 22, 2025 · Dec 22, 2025
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
@@ -32,6 +32,9 @@ jobs:
           python -m pip install --upgrade pip
           pip install -e ".[test]"
 
+      - name: Run typo checks
+        uses: crate-ci/typos@v1.40.0
+
       - name: Run coding checks
         run: |
           make lint

diff --git a/Makefile b/Makefile
@@ -8,3 +8,6 @@ format:
 lint:
 	PYTHONPATH=`pwd` python3 -m black vectordb_bench --check
 	PYTHONPATH=`pwd` python3 -m ruff check vectordb_bench
+
+typos:
+	typos README.md install.py vectordb_bench tests
diff --git a/tests/test_chroma.py b/tests/test_chroma.py
@@ -20,7 +20,7 @@
 
 
 
-dict = {} #Assumes chroma is acception connections on localhost:8000
+dict = {}  # Assumes chroma is accepting connections on localhost:8000
 dict['name'] = "chroma"
 dict['host'] = "localhost"
 dict['port'] = 8000
@@ -102,4 +102,4 @@ def test_insert_and_search(self):
                     break
             assert isFilter, f"Filter not working, id_list: {id_list}"
 
-
+
diff --git a/typos.toml b/typos.toml
@@ -0,0 +1,5 @@
+# Respect these words as correct spelling.
+[default.extend-words]
+"rabit" = "rabit"
+"typ" = "typ"
+"infom" = "infom"
diff --git a/vectordb_bench/backend/clients/alisql/alisql.py b/vectordb_bench/backend/clients/alisql/alisql.py
@@ -91,7 +91,7 @@ def _create_db_table(self, dim: int):
 
     @contextmanager
     def init(self):
-        """create and destory connections to database.
+        """create and destroy connections to database.
 
         Examples:
             >>> with self.init():

diff --git a/vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py b/vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py
@@ -310,7 +310,7 @@ def search_embedding(
             return [one_res["id"] for one_res in res["result"]]
 
     def need_normalize_cosine(self) -> bool:
-        """Wheather this database need to normalize dataset to support COSINE"""
+        """Whether this database need to normalize dataset to support COSINE"""
         if self.case_config.metric_type == MetricType.COSINE:
             log.info("cosine dataset need normalize.")
             return True

diff --git a/vectordb_bench/backend/clients/api.py b/vectordb_bench/backend/clients/api.py
@@ -97,7 +97,7 @@ def not_empty_field(cls, v: any, field: any):
 
 
 class DBCaseConfig(ABC):
-    """Case specific vector database configs, usually uesed for index params like HNSW"""
+    """Case specific vector database configs, usually used for index params like HNSW"""
 
     @abstractmethod
     def index_param(self) -> dict:
@@ -176,7 +176,7 @@ def __init__(
     @abstractmethod
     @contextmanager
     def init(self) -> None:
-        """create and destory connections to database.
+        """create and destroy connections to database.
         Why contextmanager:
 
             In multiprocessing search tasks, vectordbbench might init
@@ -192,7 +192,7 @@ def init(self) -> None:
         raise NotImplementedError
 
     def need_normalize_cosine(self) -> bool:
-        """Wheather this database need to normalize dataset to support COSINE"""
+        """Whether this database need to normalize dataset to support COSINE"""
         return False
 
     @abstractmethod

diff --git a/vectordb_bench/backend/clients/chroma/chroma.py b/vectordb_bench/backend/clients/chroma/chroma.py
@@ -40,7 +40,7 @@ def __init__(
 
     @contextmanager
     def init(self) -> None:
-        """create and destory connections to database.
+        """create and destroy connections to database.
 
         Examples:
             >>> with self.init():

diff --git a/vectordb_bench/backend/clients/clickhouse/clickhouse.py b/vectordb_bench/backend/clients/clickhouse/clickhouse.py
@@ -91,7 +91,7 @@ def _drop_table(self):
             log.warning(f"Failed to drop table {self.db_config['database']}.{self.table_name}: {e}")
             raise e from None
 
-    def _perfomance_tuning(self):
+    def _performance_tuning(self):
         self.conn.command("SET materialize_skip_indexes_on_insert = 1")
 
     def _create_index(self):
@@ -120,7 +120,7 @@ def _create_index(self):
                         """
                 self.conn.command(cmd=query)
             else:
-                log.warning("HNSW is only avaliable method in clickhouse now")
+                log.warning("HNSW is only available method in clickhouse now")
         except Exception as e:
             log.warning(f"Failed to create Clickhouse vector index on table: {self.table_name} error: {e}")
             raise e from None

diff --git a/vectordb_bench/backend/clients/doris/doris.py b/vectordb_bench/backend/clients/doris/doris.py
@@ -234,7 +234,7 @@ def optimize(self, data_size: int | None = None) -> None:
         log.info("Optimization completed using doris-vector-search library")
 
     def need_normalize_cosine(self) -> bool:
-        """Wheather this database need to normalize dataset to support COSINE"""
+        """Whether this database need to normalize dataset to support COSINE"""
         if self.case_config.metric_type == MetricType.COSINE:
             log.info("cosine dataset need normalize.")
             return True

diff --git a/vectordb_bench/backend/clients/mariadb/mariadb.py b/vectordb_bench/backend/clients/mariadb/mariadb.py
@@ -89,7 +89,7 @@ def _create_db_table(self, dim: int):
 
     @contextmanager
     def init(self):
-        """create and destory connections to database.
+        """create and destroy connections to database.
 
         Examples:
             >>> with self.init():

diff --git a/vectordb_bench/backend/clients/memorydb/memorydb.py b/vectordb_bench/backend/clients/memorydb/memorydb.py
@@ -148,7 +148,7 @@ def get_client(self, **kwargs):
 
     @contextmanager
     def init(self) -> Generator[None, None, None]:
-        """create and destory connections to database.
+        """create and destroy connections to database.
 
         Examples:
             >>> with self.init():

diff --git a/vectordb_bench/backend/clients/milvus/milvus.py b/vectordb_bench/backend/clients/milvus/milvus.py
@@ -135,7 +135,7 @@ def init(self):
         self.col: Collection | None = None
 
         connections.connect(**self.db_config, timeout=60)
-        # Grab the existing colection with connections
+        # Grab the existing collection with connections
         self.col = Collection(self.collection_name)
 
         yield
@@ -174,7 +174,7 @@ def wait_index():
                 try:
                     self.col.compact()
                     self.col.wait_for_compaction_completed()
-                    log.info("compactation completed. waiting for the rest of index buliding.")
+                    log.info("compactation completed. waiting for the rest of index building.")
                 except Exception as e:
                     log.warning(f"{self.name} compact error: {e}")
                     if hasattr(e, "code"):
@@ -192,7 +192,7 @@ def optimize(self, data_size: int | None = None):
         self._optimize()
 
     def need_normalize_cosine(self) -> bool:
-        """Wheather this database need to normalize dataset to support COSINE"""
+        """Whether this database need to normalize dataset to support COSINE"""
         if self.case_config.is_gpu_index:
             log.info("current gpu_index only supports IP / L2, cosine dataset need normalize.")
             return True

diff --git a/vectordb_bench/backend/clients/pinecone/pinecone.py b/vectordb_bench/backend/clients/pinecone/pinecone.py
@@ -76,7 +76,7 @@ def insert_embeddings(
         try:
             for batch_start_offset in range(0, len(embeddings), self.batch_size):
                 batch_end_offset = min(batch_start_offset + self.batch_size, len(embeddings))
-                insert_datas = []
+                insert_batch = []
                 for i in range(batch_start_offset, batch_end_offset):
                     metadata_dict = {self._scalar_id_field: metadata[i]}
                     if self.with_scalar_labels:
@@ -86,8 +86,8 @@ def insert_embeddings(
                         embeddings[i],
                         metadata_dict,
                     )
-                    insert_datas.append(insert_data)
-                self.index.upsert(insert_datas)
+                    insert_batch.append(insert_data)
+                self.index.upsert(insert_batch)
                 insert_count += batch_end_offset - batch_start_offset
         except Exception as e:
             return insert_count, e

diff --git a/vectordb_bench/backend/clients/redis/redis.py b/vectordb_bench/backend/clients/redis/redis.py
@@ -76,7 +76,7 @@ def make_index(self, vector_dimensions: int, conn: redis.Redis):
 
     @contextmanager
     def init(self) -> None:
-        """create and destory connections to database.
+        """create and destroy connections to database.
 
         Examples:
             >>> with self.init():

diff --git a/vectordb_bench/backend/clients/s3_vectors/s3_vectors.py b/vectordb_bench/backend/clients/s3_vectors/s3_vectors.py
@@ -95,7 +95,7 @@ def optimize(self, **kwargs):
         return
 
     def need_normalize_cosine(self) -> bool:
-        """Wheather this database need to normalize dataset to support COSINE"""
+        """Whether this database need to normalize dataset to support COSINE"""
         return False
 
     def insert_embeddings(

diff --git a/vectordb_bench/backend/clients/vespa/vespa.py b/vectordb_bench/backend/clients/vespa/vespa.py
@@ -40,7 +40,7 @@ def __init__(
 
     @contextmanager
     def init(self) -> Generator[None, None, None]:
-        """create and destory connections to database.
+        """create and destroy connections to database.
         Why contextmanager:
 
             In multiprocessing search tasks, vectordbbench might init
@@ -58,7 +58,7 @@ def init(self) -> Generator[None, None, None]:
         self.client = None
 
     def need_normalize_cosine(self) -> bool:
-        """Wheather this database need to normalize dataset to support COSINE"""
+        """Whether this database need to normalize dataset to support COSINE"""
         return False
 
     def insert_embeddings(

diff --git a/vectordb_bench/backend/runner/mp_runner.py b/vectordb_bench/backend/runner/mp_runner.py
@@ -32,7 +32,7 @@ class MultiProcessingSearchRunner:
     Args:
         k(int): search topk, default to 100
         concurrency(Iterable): concurrencies, default [1, 5, 10, 15, 20, 25, 30, 35]
-        duration(int): duration for each concurency, default to 30s
+        duration(int): duration for each concurrency, default to 30s
     """
 
     def __init__(

diff --git a/vectordb_bench/backend/runner/read_write_runner.py b/vectordb_bench/backend/runner/read_write_runner.py
@@ -88,7 +88,7 @@ def __init__(
 
     @time_it
     def run_optimize(self):
-        """Optimize needs to run in differenct process for pymilvus schema recursion problem"""
+        """Optimize needs to run in different process for pymilvus schema recursion problem"""
         with self.db.init():
             log.info("Search after write - Optimize start")
             self.db.optimize(data_size=self.data_volume)
@@ -104,7 +104,7 @@ def run_search(self, perc: int):
             f"p99={p99_latency}, p95={p95_latency}, dur={ssearch_dur:.4f}",
         )
         log.info(
-            f"Search after wirte - Conc search start, dur for each conc={self.read_dur_after_write}",
+            f"Search after write - Conc search start, dur for each conc={self.read_dur_after_write}",
         )
         result = self.run_by_dur(self.read_dur_after_write)
         max_qps = result[0]
@@ -114,7 +114,7 @@ def run_search(self, perc: int):
         conc_latency_p99_list = result[4]
         conc_latency_p95_list = result[5]
         conc_latency_avg_list = result[6]
-        log.info(f"Search after wirte - Conc search finished, max_qps={max_qps}")
+        log.info(f"Search after write - Conc search finished, max_qps={max_qps}")
 
         return [
             (

diff --git a/vectordb_bench/backend/utils.py b/vectordb_bench/backend/utils.py
@@ -11,27 +11,27 @@ def numerize(n: int) -> str:
         >>> numerize(1_000_000_000)
         '1B'
     """
-    sufix2upbound = {
+    suffix2upbound = {
         "EMPTY": 1e3,
         "K": 1e6,
         "M": 1e9,
         "B": 1e12,
         "END": float("inf"),
     }
 
-    display_n, sufix = n, ""
-    for s, base in sufix2upbound.items():
-        # number >= 1000B will alway have sufix 'B'
+    display_n, suffix = n, ""
+    for s, base in suffix2upbound.items():
+        # number >= 1000B will always have suffix 'B'
         if s == "END":
             display_n = int(n / 1e9)
-            sufix = "B"
+            suffix = "B"
             break
 
         if n < base:
-            sufix = "" if s == "EMPTY" else s
+            suffix = "" if s == "EMPTY" else s
             display_n = int(n / (base / 1e3))
             break
-    return f"{display_n}{sufix}"
+    return f"{display_n}{suffix}"
 
 
 def time_it(func: any):

diff --git a/vectordb_bench/custom/custom_case.json b/vectordb_bench/custom/custom_case.json
@@ -1,6 +1,6 @@
 [
     {
-        "name": "My Dataset (Performace Case)",
+        "name": "My Dataset (Performance Case)",
         "description": "this is a customized dataset.",
         "load_timeout": 36000,
         "optimize_timeout": 36000,

diff --git a/vectordb_bench/frontend/components/check_results/filters.py b/vectordb_bench/frontend/components/check_results/filters.py
@@ -98,15 +98,15 @@ def getShowDbsAndCases(st, result: list[CaseResult], filter_type: FilterOp) -> t
             "Case Filter",
             datasetWithSizeTypes,
             col=1,
-            optionLables=[v.value for v in datasetWithSizeTypes],
+            optionLabels=[v.value for v in datasetWithSizeTypes],
         )
         datasets = [dataset_with_size_type.get_manager() for dataset_with_size_type in showDatasetWithSizeTypes]
         showCaseNames = list(set([case.name for case in allCases if case.dataset in datasets]))
 
     return showDBNames, showCaseNames
 
 
-def filterView(container, header, options, col, optionLables=None):
+def filterView(container, header, options, col, optionLabels=None):
     selectAllState = f"{header}-select-all-state"
     if selectAllState not in st.session_state:
         st.session_state[selectAllState] = True
@@ -137,14 +137,14 @@ def filterView(container, header, options, col, optionLables=None):
         col,
         gap="small",
     )
-    if optionLables is None:
-        optionLables = options
-    isActive = {option: st.session_state[selectAllState] for option in optionLables}
-    for i, option in enumerate(optionLables):
+    if optionLabels is None:
+        optionLabels = options
+    isActive = {option: st.session_state[selectAllState] for option in optionLabels}
+    for i, option in enumerate(optionLabels):
         isActive[option] = columns[i % col].checkbox(
-            optionLables[i],
+            optionLabels[i],
             value=isActive[option],
-            key=f"{optionLables[i]}-{st.session_state[countKeyState]}",
+            key=f"{optionLabels[i]}-{st.session_state[countKeyState]}",
         )
 
-    return [options[i] for i, option in enumerate(optionLables) if isActive[option]]
+    return [options[i] for i, option in enumerate(optionLabels) if isActive[option]]
diff --git a/vectordb_bench/frontend/components/check_results/nav.py b/vectordb_bench/frontend/components/check_results/nav.py
@@ -9,11 +9,11 @@ def NavToRunTest(st):
         switch_page("run test")
 
 
-def NavToQuriesPerDollar(st):
+def NavToQueriesPerDollar(st):
     st.subheader("Compare qps with price.")
-    navClick = st.button("QP$ (Quries per Dollar) &nbsp;&nbsp;>")
+    navClick = st.button("QP$ (Queries per Dollar) &nbsp;&nbsp;>")
     if navClick:
-        switch_page("quries_per_dollar")
+        switch_page("queries_per_dollar")
 
 
 def NavToResults(st, key="nav-to-results"):
@@ -27,7 +27,7 @@ def NavToPages(st):
         {"name": "Run Test", "link": "run_test"},
         {"name": "Results", "link": "results"},
         {"name": "Qps & Recall", "link": "qps_recall"},
-        {"name": "Quries Per Dollar", "link": "quries_per_dollar"},
+        {"name": "Queries Per Dollar", "link": "queries_per_dollar"},
         {"name": "Concurrent", "link": "concurrent"},
         {"name": "Label Filter", "link": "label_filter"},
         {"name": "Int Filter", "link": "int_filter"},

diff --git a/vectordb_bench/frontend/components/custom/displayCustomCase.py b/vectordb_bench/frontend/components/custom/displayCustomCase.py
@@ -7,7 +7,7 @@ def displayCustomCase(customCase: CustomCaseConfig, st, key):
     customCase.dataset_config.name = columns[0].text_input(
         "Name", key=f"{key}_name", value=customCase.dataset_config.name
     )
-    customCase.name = f"{customCase.dataset_config.name} (Performace Case)"
+    customCase.name = f"{customCase.dataset_config.name} (Performance Case)"
     customCase.dataset_config.dir = columns[1].text_input(
         "Folder Path", key=f"{key}_dir", value=customCase.dataset_config.dir
     )
@@ -59,7 +59,7 @@ def displayCustomCase(customCase: CustomCaseConfig, st, key):
     default_label_percentages = ",".join(map(str, customCase.dataset_config.with_label_percentages))
     label_percentage_input = columns[1].text_input(
         "label percentages",
-        key=f"{key}_label_percantages",
+        key=f"{key}_label_percentages",
         value=default_label_percentages,
     )
     try:

diff --git a/vectordb_bench/frontend/components/custom/getCustomConfig.py b/vectordb_bench/frontend/components/custom/getCustomConfig.py
@@ -27,7 +27,7 @@ class CustomDatasetConfig(BaseModel):
 
 
 class CustomCaseConfig(BaseModel):
-    name: str = "custom_dataset (Performace Case)"
+    name: str = "custom_dataset (Performance Case)"
     description: str = ""
     load_timeout: int = 36000
     optimize_timeout: int = 36000