From c7cb18bfa1a4809a38ea4558395393e19ddfe2a6 Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Wed, 11 Feb 2026 21:00:30 +0000 Subject: [PATCH 1/5] add mysql/mariadb --- pyproject.toml | 5 + .../storage_clients/_sql/_client_mixin.py | 26 +++++- .../_sql/_request_queue_client.py | 50 +++++----- .../storage_clients/_sql/_storage_client.py | 25 +++-- uv.lock | 91 ++++++++++++++++++- 5 files changed, 162 insertions(+), 35 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9ddce5afb9..eef7277732 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,6 +81,11 @@ sql_sqlite = [ "sqlalchemy[asyncio]>=2.0.0,<3.0.0", "aiosqlite>=0.21.0", ] +sql_mysql = [ + "sqlalchemy[asyncio]>=2.0.0,<3.0.0", + "aiomysql>=0.3.2", + "cryptography>=46.0.5", +] redis = ["redis[hiredis] >= 7.0.0"] [project.scripts] diff --git a/src/crawlee/storage_clients/_sql/_client_mixin.py b/src/crawlee/storage_clients/_sql/_client_mixin.py index 43d511b5e4..bfa3be1a16 100644 --- a/src/crawlee/storage_clients/_sql/_client_mixin.py +++ b/src/crawlee/storage_clients/_sql/_client_mixin.py @@ -8,9 +8,10 @@ from sqlalchemy import CursorResult, delete, select, text, update from sqlalchemy import func as sql_func +from sqlalchemy.dialects.mysql import insert as mysql_insert from sqlalchemy.dialects.postgresql import insert as pg_insert from sqlalchemy.dialects.sqlite import insert as lite_insert -from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.exc import OperationalError, SQLAlchemyError from crawlee._utils.crypto import crypto_random_object_id @@ -227,6 +228,9 @@ def _build_insert_stmt_with_ignore( if dialect == 'sqlite': return lite_insert(table_model).values(insert_values).on_conflict_do_nothing() + if dialect == 'mysql': + return mysql_insert(table_model).values(insert_values).prefix_with('IGNORE') + raise NotImplementedError(f'Insert with ignore not supported for dialect: {dialect}') def _build_upsert_stmt( @@ -260,6 +264,11 @@ def _build_upsert_stmt( set_ = {col: getattr(lite_stmt.excluded, col) for col in update_columns} return lite_stmt.on_conflict_do_update(index_elements=conflict_cols, set_=set_) + if dialect == 'mysql': + mysql_stmt = mysql_insert(table_model).values(insert_values) + set_ = {col: getattr(mysql_stmt.inserted, col) for col in update_columns} + return mysql_stmt.on_duplicate_key_update(**set_) + raise NotImplementedError(f'Upsert not supported for dialect: {dialect}') async def _purge(self, metadata_kwargs: MetadataUpdateParams) -> None: @@ -402,11 +411,12 @@ async def _try_acquire_buffer_lock(self, session: AsyncSession) -> bool: Returns: True if lock was acquired, False if already locked by another process. """ + capture_error_code = 1020 # MariaDB error code for "Record has changed since last read" now = datetime.now(timezone.utc) lock_until = now + self._BLOCK_BUFFER_TIME dialect = self._storage_client.get_dialect_name() - if dialect == 'postgresql': + if dialect in ('postgresql', 'mysql'): select_stmt = ( select(self._METADATA_TABLE) .where( @@ -417,7 +427,17 @@ async def _try_acquire_buffer_lock(self, session: AsyncSession) -> bool: ) .with_for_update(skip_locked=True) ) - result = await session.execute(select_stmt) + + try: + result = await session.execute(select_stmt) + except OperationalError as e: + # MariaDB raises error 1020 ("Record has changed since last read") instead of + # silently skipping locked rows like MySQL/PostgreSQL. Treat it as lock not acquired. + error_code = getattr(e.orig, 'args', [None])[0] + if error_code == capture_error_code: + return False + raise + metadata_row = result.scalar_one_or_none() if metadata_row is None: diff --git a/src/crawlee/storage_clients/_sql/_request_queue_client.py b/src/crawlee/storage_clients/_sql/_request_queue_client.py index 78044c76df..cddc4e49cd 100644 --- a/src/crawlee/storage_clients/_sql/_request_queue_client.py +++ b/src/crawlee/storage_clients/_sql/_request_queue_client.py @@ -335,33 +335,33 @@ async def add_batch_of_requests( ) ) - if insert_values: - if forefront: - # If the request already exists in the database, we update the sequence_number by shifting request - # to the left. - upsert_stmt = self._build_upsert_stmt( - self._ITEM_TABLE, - insert_values, - update_columns=['sequence_number'], - conflict_cols=['request_id', 'request_queue_id'], - ) - result = await session.execute(upsert_stmt) - else: - # If the request already exists in the database, we ignore this request when inserting. - insert_stmt_with_ignore = self._build_insert_stmt_with_ignore(self._ITEM_TABLE, insert_values) - result = await session.execute(insert_stmt_with_ignore) + try: + if insert_values: + if forefront: + # If the request already exists in the database, we update the sequence_number + # by shifting request to the left. + upsert_stmt = self._build_upsert_stmt( + self._ITEM_TABLE, + insert_values, + update_columns=['sequence_number'], + conflict_cols=['request_id', 'request_queue_id'], + ) + result = await session.execute(upsert_stmt) + else: + # If the request already exists in the database, we ignore this request when inserting. + insert_stmt_with_ignore = self._build_insert_stmt_with_ignore(self._ITEM_TABLE, insert_values) + result = await session.execute(insert_stmt_with_ignore) - result = cast('CursorResult', result) if not isinstance(result, CursorResult) else result - approximate_new_request += result.rowcount + result = cast('CursorResult', result) if not isinstance(result, CursorResult) else result + approximate_new_request += result.rowcount - await self._add_buffer_record( - session, - update_modified_at=True, - delta_pending_request_count=approximate_new_request, - delta_total_request_count=approximate_new_request, - ) + await self._add_buffer_record( + session, + update_modified_at=True, + delta_pending_request_count=approximate_new_request, + delta_total_request_count=approximate_new_request, + ) - try: await session.commit() processed_requests.extend(transaction_processed_requests) except SQLAlchemyError as e: @@ -433,7 +433,7 @@ async def fetch_next_request(self) -> Request | None: async with self.get_session(with_simple_commit=True) as session: # We use the `skip_locked` database mechanism to prevent the 'interception' of requests by another client - if dialect == 'postgresql': + if dialect in ('postgresql', 'mysql'): stmt = stmt.with_for_update(skip_locked=True) result = await session.execute(stmt) requests_db = result.scalars().all() diff --git a/src/crawlee/storage_clients/_sql/_storage_client.py b/src/crawlee/storage_clients/_sql/_storage_client.py index 92fd260d33..28cc6c49f4 100644 --- a/src/crawlee/storage_clients/_sql/_storage_client.py +++ b/src/crawlee/storage_clients/_sql/_storage_client.py @@ -3,7 +3,7 @@ import warnings from logging import getLogger from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from sqlalchemy.exc import IntegrityError, OperationalError from sqlalchemy.ext.asyncio import AsyncEngine, async_sessionmaker, create_async_engine @@ -116,9 +116,9 @@ async def initialize(self, configuration: Configuration) -> None: async with engine.begin() as conn: self._dialect_name = engine.dialect.name - if self._dialect_name not in ('sqlite', 'postgresql'): + if self._dialect_name not in ('sqlite', 'postgresql', 'mysql'): raise ValueError( - f'Unsupported database dialect: {self._dialect_name}. Supported: sqlite, postgresql. ' + f'Unsupported database dialect: {self._dialect_name}. Supported: sqlite, postgresql, mysql. ' 'Consider using a different database.', ) @@ -256,11 +256,23 @@ def _get_or_create_engine(self, configuration: Configuration) -> AsyncEngine: # Create connection string with path to default database connection_string = f'sqlite+aiosqlite:///{db_path}' - if 'sqlite' not in connection_string and 'postgresql' not in connection_string: + if ( + ('sqlite' not in connection_string) + and ('postgresql' not in connection_string) + and ('mysql' not in connection_string) + ): raise ValueError( - 'Unsupported database. Supported: sqlite, postgresql. Consider using a different database.' + 'Unsupported database. Supported: sqlite, postgresql, mysql. Consider using a different database.' ) + connect_args: dict[str, Any] + kwargs: dict[str, Any] = {} + if 'mysql' in connection_string: + connect_args: dict[str, Any] = {'connect_timeout': 30} + kwargs['isolation_level'] = 'READ COMMITTED' + else: + connect_args = {'timeout': 30} + self._engine = create_async_engine( connection_string, future=True, @@ -270,6 +282,7 @@ def _get_or_create_engine(self, configuration: Configuration) -> AsyncEngine: pool_recycle=600, pool_pre_ping=True, echo=False, - connect_args={'timeout': 30}, + connect_args=connect_args, + **kwargs, ) return self._engine diff --git a/uv.lock b/uv.lock index 417ee8e2a4..492b10359e 100644 --- a/uv.lock +++ b/uv.lock @@ -7,6 +7,18 @@ resolution-markers = [ "python_full_version < '3.11'", ] +[[package]] +name = "aiomysql" +version = "0.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pymysql" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/29/e0/302aeffe8d90853556f47f3106b89c16cc2ec2a4d269bdfd82e3f4ae12cc/aiomysql-0.3.2.tar.gz", hash = "sha256:72d15ef5cfc34c03468eb41e1b90adb9fd9347b0b589114bd23ead569a02ac1a", size = 108311, upload-time = "2025-10-22T00:15:21.278Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/af/aae0153c3e28712adaf462328f6c7a3c196a1c1c27b491de4377dd3e6b52/aiomysql-0.3.2-py3-none-any.whl", hash = "sha256:c82c5ba04137d7afd5c693a258bea8ead2aad77101668044143a991e04632eb2", size = 71834, upload-time = "2025-10-22T00:15:15.905Z" }, +] + [[package]] name = "aiosqlite" version = "0.22.1" @@ -811,6 +823,11 @@ playwright = [ redis = [ { name = "redis", extra = ["hiredis"] }, ] +sql-mysql = [ + { name = "aiomysql" }, + { name = "cryptography" }, + { name = "sqlalchemy", extra = ["asyncio"] }, +] sql-postgres = [ { name = "asyncpg" }, { name = "sqlalchemy", extra = ["asyncio"] }, @@ -851,6 +868,7 @@ dev = [ [package.metadata] requires-dist = [ + { name = "aiomysql", marker = "extra == 'sql-mysql'", specifier = ">=0.3.2" }, { name = "aiosqlite", marker = "extra == 'sql-sqlite'", specifier = ">=0.21.0" }, { name = "apify-fingerprint-datapoints", marker = "extra == 'adaptive-crawler'", specifier = ">=0.0.3" }, { name = "apify-fingerprint-datapoints", marker = "extra == 'httpx'", specifier = ">=0.0.2" }, @@ -865,6 +883,7 @@ requires-dist = [ { name = "colorama", specifier = ">=0.4.0" }, { name = "cookiecutter", marker = "extra == 'cli'", specifier = ">=2.6.0" }, { name = "crawlee", extras = ["adaptive-crawler", "beautifulsoup", "cli", "curl-impersonate", "httpx", "parsel", "playwright", "otel", "sql-sqlite", "sql-postgres", "redis"], marker = "extra == 'all'" }, + { name = "cryptography", marker = "extra == 'sql-mysql'", specifier = ">=46.0.5" }, { name = "curl-cffi", marker = "extra == 'curl-impersonate'", specifier = ">=0.9.0" }, { name = "html5lib", marker = "extra == 'beautifulsoup'", specifier = ">=1.0" }, { name = "httpx", extras = ["brotli", "http2", "zstd"], marker = "extra == 'httpx'", specifier = ">=0.27.0" }, @@ -889,6 +908,7 @@ requires-dist = [ { name = "redis", extras = ["hiredis"], marker = "extra == 'redis'", specifier = ">=7.0.0" }, { name = "rich", marker = "extra == 'cli'", specifier = ">=13.9.0" }, { name = "scikit-learn", marker = "extra == 'adaptive-crawler'", specifier = ">=1.6.0" }, + { name = "sqlalchemy", extras = ["asyncio"], marker = "extra == 'sql-mysql'", specifier = ">=2.0.0,<3.0.0" }, { name = "sqlalchemy", extras = ["asyncio"], marker = "extra == 'sql-postgres'", specifier = ">=2.0.0,<3.0.0" }, { name = "sqlalchemy", extras = ["asyncio"], marker = "extra == 'sql-sqlite'", specifier = ">=2.0.0,<3.0.0" }, { name = "tldextract", specifier = ">=5.1.0" }, @@ -897,7 +917,7 @@ requires-dist = [ { name = "wrapt", marker = "extra == 'otel'", specifier = ">=1.17.0" }, { name = "yarl", specifier = ">=1.18.0" }, ] -provides-extras = ["all", "adaptive-crawler", "beautifulsoup", "cli", "curl-impersonate", "httpx", "parsel", "playwright", "otel", "sql-postgres", "sql-sqlite", "redis"] +provides-extras = ["all", "adaptive-crawler", "beautifulsoup", "cli", "curl-impersonate", "httpx", "parsel", "playwright", "otel", "sql-postgres", "sql-sqlite", "sql-mysql", "redis"] [package.metadata.requires-dev] dev = [ @@ -928,6 +948,66 @@ dev = [ { name = "uvicorn", extras = ["standard"], specifier = "<1.0.0" }, ] +[[package]] +name = "cryptography" +version = "46.0.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/60/04/ee2a9e8542e4fa2773b81771ff8349ff19cdd56b7258a0cc442639052edb/cryptography-46.0.5.tar.gz", hash = "sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d", size = 750064, upload-time = "2026-02-10T19:18:38.255Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f7/81/b0bb27f2ba931a65409c6b8a8b358a7f03c0e46eceacddff55f7c84b1f3b/cryptography-46.0.5-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:351695ada9ea9618b3500b490ad54c739860883df6c1f555e088eaf25b1bbaad", size = 7176289, upload-time = "2026-02-10T19:17:08.274Z" }, + { url = "https://files.pythonhosted.org/packages/ff/9e/6b4397a3e3d15123de3b1806ef342522393d50736c13b20ec4c9ea6693a6/cryptography-46.0.5-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c18ff11e86df2e28854939acde2d003f7984f721eba450b56a200ad90eeb0e6b", size = 4275637, upload-time = "2026-02-10T19:17:10.53Z" }, + { url = "https://files.pythonhosted.org/packages/63/e7/471ab61099a3920b0c77852ea3f0ea611c9702f651600397ac567848b897/cryptography-46.0.5-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d7e3d356b8cd4ea5aff04f129d5f66ebdc7b6f8eae802b93739ed520c47c79b", size = 4424742, upload-time = "2026-02-10T19:17:12.388Z" }, + { url = "https://files.pythonhosted.org/packages/37/53/a18500f270342d66bf7e4d9f091114e31e5ee9e7375a5aba2e85a91e0044/cryptography-46.0.5-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263", size = 4277528, upload-time = "2026-02-10T19:17:13.853Z" }, + { url = "https://files.pythonhosted.org/packages/22/29/c2e812ebc38c57b40e7c583895e73c8c5adb4d1e4a0cc4c5a4fdab2b1acc/cryptography-46.0.5-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:803812e111e75d1aa73690d2facc295eaefd4439be1023fefc4995eaea2af90d", size = 4947993, upload-time = "2026-02-10T19:17:15.618Z" }, + { url = "https://files.pythonhosted.org/packages/6b/e7/237155ae19a9023de7e30ec64e5d99a9431a567407ac21170a046d22a5a3/cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed", size = 4456855, upload-time = "2026-02-10T19:17:17.221Z" }, + { url = "https://files.pythonhosted.org/packages/2d/87/fc628a7ad85b81206738abbd213b07702bcbdada1dd43f72236ef3cffbb5/cryptography-46.0.5-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:f145bba11b878005c496e93e257c1e88f154d278d2638e6450d17e0f31e558d2", size = 3984635, upload-time = "2026-02-10T19:17:18.792Z" }, + { url = "https://files.pythonhosted.org/packages/84/29/65b55622bde135aedf4565dc509d99b560ee4095e56989e815f8fd2aa910/cryptography-46.0.5-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e9251e3be159d1020c4030bd2e5f84d6a43fe54b6c19c12f51cde9542a2817b2", size = 4277038, upload-time = "2026-02-10T19:17:20.256Z" }, + { url = "https://files.pythonhosted.org/packages/bc/36/45e76c68d7311432741faf1fbf7fac8a196a0a735ca21f504c75d37e2558/cryptography-46.0.5-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:47fb8a66058b80e509c47118ef8a75d14c455e81ac369050f20ba0d23e77fee0", size = 4912181, upload-time = "2026-02-10T19:17:21.825Z" }, + { url = "https://files.pythonhosted.org/packages/6d/1a/c1ba8fead184d6e3d5afcf03d569acac5ad063f3ac9fb7258af158f7e378/cryptography-46.0.5-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:4c3341037c136030cb46e4b1e17b7418ea4cbd9dd207e4a6f3b2b24e0d4ac731", size = 4456482, upload-time = "2026-02-10T19:17:25.133Z" }, + { url = "https://files.pythonhosted.org/packages/f9/e5/3fb22e37f66827ced3b902cf895e6a6bc1d095b5b26be26bd13c441fdf19/cryptography-46.0.5-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:890bcb4abd5a2d3f852196437129eb3667d62630333aacc13dfd470fad3aaa82", size = 4405497, upload-time = "2026-02-10T19:17:26.66Z" }, + { url = "https://files.pythonhosted.org/packages/1a/df/9d58bb32b1121a8a2f27383fabae4d63080c7ca60b9b5c88be742be04ee7/cryptography-46.0.5-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:80a8d7bfdf38f87ca30a5391c0c9ce4ed2926918e017c29ddf643d0ed2778ea1", size = 4667819, upload-time = "2026-02-10T19:17:28.569Z" }, + { url = "https://files.pythonhosted.org/packages/ea/ed/325d2a490c5e94038cdb0117da9397ece1f11201f425c4e9c57fe5b9f08b/cryptography-46.0.5-cp311-abi3-win32.whl", hash = "sha256:60ee7e19e95104d4c03871d7d7dfb3d22ef8a9b9c6778c94e1c8fcc8365afd48", size = 3028230, upload-time = "2026-02-10T19:17:30.518Z" }, + { url = "https://files.pythonhosted.org/packages/e9/5a/ac0f49e48063ab4255d9e3b79f5def51697fce1a95ea1370f03dc9db76f6/cryptography-46.0.5-cp311-abi3-win_amd64.whl", hash = "sha256:38946c54b16c885c72c4f59846be9743d699eee2b69b6988e0a00a01f46a61a4", size = 3480909, upload-time = "2026-02-10T19:17:32.083Z" }, + { url = "https://files.pythonhosted.org/packages/00/13/3d278bfa7a15a96b9dc22db5a12ad1e48a9eb3d40e1827ef66a5df75d0d0/cryptography-46.0.5-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:94a76daa32eb78d61339aff7952ea819b1734b46f73646a07decb40e5b3448e2", size = 7119287, upload-time = "2026-02-10T19:17:33.801Z" }, + { url = "https://files.pythonhosted.org/packages/67/c8/581a6702e14f0898a0848105cbefd20c058099e2c2d22ef4e476dfec75d7/cryptography-46.0.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5be7bf2fb40769e05739dd0046e7b26f9d4670badc7b032d6ce4db64dddc0678", size = 4265728, upload-time = "2026-02-10T19:17:35.569Z" }, + { url = "https://files.pythonhosted.org/packages/dd/4a/ba1a65ce8fc65435e5a849558379896c957870dd64fecea97b1ad5f46a37/cryptography-46.0.5-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe346b143ff9685e40192a4960938545c699054ba11d4f9029f94751e3f71d87", size = 4408287, upload-time = "2026-02-10T19:17:36.938Z" }, + { url = "https://files.pythonhosted.org/packages/f8/67/8ffdbf7b65ed1ac224d1c2df3943553766914a8ca718747ee3871da6107e/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c69fd885df7d089548a42d5ec05be26050ebcd2283d89b3d30676eb32ff87dee", size = 4270291, upload-time = "2026-02-10T19:17:38.748Z" }, + { url = "https://files.pythonhosted.org/packages/f8/e5/f52377ee93bc2f2bba55a41a886fd208c15276ffbd2569f2ddc89d50e2c5/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:8293f3dea7fc929ef7240796ba231413afa7b68ce38fd21da2995549f5961981", size = 4927539, upload-time = "2026-02-10T19:17:40.241Z" }, + { url = "https://files.pythonhosted.org/packages/3b/02/cfe39181b02419bbbbcf3abdd16c1c5c8541f03ca8bda240debc467d5a12/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:1abfdb89b41c3be0365328a410baa9df3ff8a9110fb75e7b52e66803ddabc9a9", size = 4442199, upload-time = "2026-02-10T19:17:41.789Z" }, + { url = "https://files.pythonhosted.org/packages/c0/96/2fcaeb4873e536cf71421a388a6c11b5bc846e986b2b069c79363dc1648e/cryptography-46.0.5-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:d66e421495fdb797610a08f43b05269e0a5ea7f5e652a89bfd5a7d3c1dee3648", size = 3960131, upload-time = "2026-02-10T19:17:43.379Z" }, + { url = "https://files.pythonhosted.org/packages/d8/d2/b27631f401ddd644e94c5cf33c9a4069f72011821cf3dc7309546b0642a0/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:4e817a8920bfbcff8940ecfd60f23d01836408242b30f1a708d93198393a80b4", size = 4270072, upload-time = "2026-02-10T19:17:45.481Z" }, + { url = "https://files.pythonhosted.org/packages/f4/a7/60d32b0370dae0b4ebe55ffa10e8599a2a59935b5ece1b9f06edb73abdeb/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:68f68d13f2e1cb95163fa3b4db4bf9a159a418f5f6e7242564fc75fcae667fd0", size = 4892170, upload-time = "2026-02-10T19:17:46.997Z" }, + { url = "https://files.pythonhosted.org/packages/d2/b9/cf73ddf8ef1164330eb0b199a589103c363afa0cf794218c24d524a58eab/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:a3d1fae9863299076f05cb8a778c467578262fae09f9dc0ee9b12eb4268ce663", size = 4441741, upload-time = "2026-02-10T19:17:48.661Z" }, + { url = "https://files.pythonhosted.org/packages/5f/eb/eee00b28c84c726fe8fa0158c65afe312d9c3b78d9d01daf700f1f6e37ff/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c4143987a42a2397f2fc3b4d7e3a7d313fbe684f67ff443999e803dd75a76826", size = 4396728, upload-time = "2026-02-10T19:17:50.058Z" }, + { url = "https://files.pythonhosted.org/packages/65/f4/6bc1a9ed5aef7145045114b75b77c2a8261b4d38717bd8dea111a63c3442/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7d731d4b107030987fd61a7f8ab512b25b53cef8f233a97379ede116f30eb67d", size = 4652001, upload-time = "2026-02-10T19:17:51.54Z" }, + { url = "https://files.pythonhosted.org/packages/86/ef/5d00ef966ddd71ac2e6951d278884a84a40ffbd88948ef0e294b214ae9e4/cryptography-46.0.5-cp314-cp314t-win32.whl", hash = "sha256:c3bcce8521d785d510b2aad26ae2c966092b7daa8f45dd8f44734a104dc0bc1a", size = 3003637, upload-time = "2026-02-10T19:17:52.997Z" }, + { url = "https://files.pythonhosted.org/packages/b7/57/f3f4160123da6d098db78350fdfd9705057aad21de7388eacb2401dceab9/cryptography-46.0.5-cp314-cp314t-win_amd64.whl", hash = "sha256:4d8ae8659ab18c65ced284993c2265910f6c9e650189d4e3f68445ef82a810e4", size = 3469487, upload-time = "2026-02-10T19:17:54.549Z" }, + { url = "https://files.pythonhosted.org/packages/e2/fa/a66aa722105ad6a458bebd64086ca2b72cdd361fed31763d20390f6f1389/cryptography-46.0.5-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:4108d4c09fbbf2789d0c926eb4152ae1760d5a2d97612b92d508d96c861e4d31", size = 7170514, upload-time = "2026-02-10T19:17:56.267Z" }, + { url = "https://files.pythonhosted.org/packages/0f/04/c85bdeab78c8bc77b701bf0d9bdcf514c044e18a46dcff330df5448631b0/cryptography-46.0.5-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1f30a86d2757199cb2d56e48cce14deddf1f9c95f1ef1b64ee91ea43fe2e18", size = 4275349, upload-time = "2026-02-10T19:17:58.419Z" }, + { url = "https://files.pythonhosted.org/packages/5c/32/9b87132a2f91ee7f5223b091dc963055503e9b442c98fc0b8a5ca765fab0/cryptography-46.0.5-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:039917b0dc418bb9f6edce8a906572d69e74bd330b0b3fea4f79dab7f8ddd235", size = 4420667, upload-time = "2026-02-10T19:18:00.619Z" }, + { url = "https://files.pythonhosted.org/packages/a1/a6/a7cb7010bec4b7c5692ca6f024150371b295ee1c108bdc1c400e4c44562b/cryptography-46.0.5-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ba2a27ff02f48193fc4daeadf8ad2590516fa3d0adeeb34336b96f7fa64c1e3a", size = 4276980, upload-time = "2026-02-10T19:18:02.379Z" }, + { url = "https://files.pythonhosted.org/packages/8e/7c/c4f45e0eeff9b91e3f12dbd0e165fcf2a38847288fcfd889deea99fb7b6d/cryptography-46.0.5-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:61aa400dce22cb001a98014f647dc21cda08f7915ceb95df0c9eaf84b4b6af76", size = 4939143, upload-time = "2026-02-10T19:18:03.964Z" }, + { url = "https://files.pythonhosted.org/packages/37/19/e1b8f964a834eddb44fa1b9a9976f4e414cbb7aa62809b6760c8803d22d1/cryptography-46.0.5-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ce58ba46e1bc2aac4f7d9290223cead56743fa6ab94a5d53292ffaac6a91614", size = 4453674, upload-time = "2026-02-10T19:18:05.588Z" }, + { url = "https://files.pythonhosted.org/packages/db/ed/db15d3956f65264ca204625597c410d420e26530c4e2943e05a0d2f24d51/cryptography-46.0.5-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:420d0e909050490d04359e7fdb5ed7e667ca5c3c402b809ae2563d7e66a92229", size = 3978801, upload-time = "2026-02-10T19:18:07.167Z" }, + { url = "https://files.pythonhosted.org/packages/41/e2/df40a31d82df0a70a0daf69791f91dbb70e47644c58581d654879b382d11/cryptography-46.0.5-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:582f5fcd2afa31622f317f80426a027f30dc792e9c80ffee87b993200ea115f1", size = 4276755, upload-time = "2026-02-10T19:18:09.813Z" }, + { url = "https://files.pythonhosted.org/packages/33/45/726809d1176959f4a896b86907b98ff4391a8aa29c0aaaf9450a8a10630e/cryptography-46.0.5-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:bfd56bb4b37ed4f330b82402f6f435845a5f5648edf1ad497da51a8452d5d62d", size = 4901539, upload-time = "2026-02-10T19:18:11.263Z" }, + { url = "https://files.pythonhosted.org/packages/99/0f/a3076874e9c88ecb2ecc31382f6e7c21b428ede6f55aafa1aa272613e3cd/cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c", size = 4452794, upload-time = "2026-02-10T19:18:12.914Z" }, + { url = "https://files.pythonhosted.org/packages/02/ef/ffeb542d3683d24194a38f66ca17c0a4b8bf10631feef44a7ef64e631b1a/cryptography-46.0.5-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9f16fbdf4da055efb21c22d81b89f155f02ba420558db21288b3d0035bafd5f4", size = 4404160, upload-time = "2026-02-10T19:18:14.375Z" }, + { url = "https://files.pythonhosted.org/packages/96/93/682d2b43c1d5f1406ed048f377c0fc9fc8f7b0447a478d5c65ab3d3a66eb/cryptography-46.0.5-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ced80795227d70549a411a4ab66e8ce307899fad2220ce5ab2f296e687eacde9", size = 4667123, upload-time = "2026-02-10T19:18:15.886Z" }, + { url = "https://files.pythonhosted.org/packages/45/2d/9c5f2926cb5300a8eefc3f4f0b3f3df39db7f7ce40c8365444c49363cbda/cryptography-46.0.5-cp38-abi3-win32.whl", hash = "sha256:02f547fce831f5096c9a567fd41bc12ca8f11df260959ecc7c3202555cc47a72", size = 3010220, upload-time = "2026-02-10T19:18:17.361Z" }, + { url = "https://files.pythonhosted.org/packages/48/ef/0c2f4a8e31018a986949d34a01115dd057bf536905dca38897bacd21fac3/cryptography-46.0.5-cp38-abi3-win_amd64.whl", hash = "sha256:556e106ee01aa13484ce9b0239bca667be5004efb0aabbed28d353df86445595", size = 3467050, upload-time = "2026-02-10T19:18:18.899Z" }, + { url = "https://files.pythonhosted.org/packages/eb/dd/2d9fdb07cebdf3d51179730afb7d5e576153c6744c3ff8fded23030c204e/cryptography-46.0.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:3b4995dc971c9fb83c25aa44cf45f02ba86f71ee600d81091c2f0cbae116b06c", size = 3476964, upload-time = "2026-02-10T19:18:20.687Z" }, + { url = "https://files.pythonhosted.org/packages/e9/6f/6cc6cc9955caa6eaf83660b0da2b077c7fe8ff9950a3c5e45d605038d439/cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:bc84e875994c3b445871ea7181d424588171efec3e185dced958dad9e001950a", size = 4218321, upload-time = "2026-02-10T19:18:22.349Z" }, + { url = "https://files.pythonhosted.org/packages/3e/5d/c4da701939eeee699566a6c1367427ab91a8b7088cc2328c09dbee940415/cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:2ae6971afd6246710480e3f15824ed3029a60fc16991db250034efd0b9fb4356", size = 4381786, upload-time = "2026-02-10T19:18:24.529Z" }, + { url = "https://files.pythonhosted.org/packages/ac/97/a538654732974a94ff96c1db621fa464f455c02d4bb7d2652f4edc21d600/cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:d861ee9e76ace6cf36a6a89b959ec08e7bc2493ee39d07ffe5acb23ef46d27da", size = 4217990, upload-time = "2026-02-10T19:18:25.957Z" }, + { url = "https://files.pythonhosted.org/packages/ae/11/7e500d2dd3ba891197b9efd2da5454b74336d64a7cc419aa7327ab74e5f6/cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:2b7a67c9cd56372f3249b39699f2ad479f6991e62ea15800973b956f4b73e257", size = 4381252, upload-time = "2026-02-10T19:18:27.496Z" }, + { url = "https://files.pythonhosted.org/packages/bc/58/6b3d24e6b9bc474a2dcdee65dfd1f008867015408a271562e4b690561a4d/cryptography-46.0.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:8456928655f856c6e1533ff59d5be76578a7157224dbd9ce6872f25055ab9ab7", size = 3407605, upload-time = "2026-02-10T19:18:29.233Z" }, +] + [[package]] name = "cssselect" version = "1.4.0" @@ -2976,6 +3056,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] +[[package]] +name = "pymysql" +version = "1.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f5/ae/1fe3fcd9f959efa0ebe200b8de88b5a5ce3e767e38c7ac32fb179f16a388/pymysql-1.1.2.tar.gz", hash = "sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03", size = 48258, upload-time = "2025-08-24T12:55:55.146Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/4c/ad33b92b9864cbde84f259d5df035a6447f91891f5be77788e2a3892bce3/pymysql-1.1.2-py3-none-any.whl", hash = "sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9", size = 45300, upload-time = "2025-08-24T12:55:53.394Z" }, +] + [[package]] name = "pyprobables" version = "0.6.2" From 83d4a4c60d94b8db08efeceedf515f68376924cd Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Fri, 13 Feb 2026 15:04:38 +0000 Subject: [PATCH 2/5] add support `mariadb` dialect --- src/crawlee/storage_clients/_sql/_client_mixin.py | 6 +++--- .../storage_clients/_sql/_request_queue_client.py | 2 +- src/crawlee/storage_clients/_sql/_storage_client.py | 12 +++++++----- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/crawlee/storage_clients/_sql/_client_mixin.py b/src/crawlee/storage_clients/_sql/_client_mixin.py index bfa3be1a16..b0d9d94075 100644 --- a/src/crawlee/storage_clients/_sql/_client_mixin.py +++ b/src/crawlee/storage_clients/_sql/_client_mixin.py @@ -228,7 +228,7 @@ def _build_insert_stmt_with_ignore( if dialect == 'sqlite': return lite_insert(table_model).values(insert_values).on_conflict_do_nothing() - if dialect == 'mysql': + if dialect in {'mysql', 'mariadb'}: return mysql_insert(table_model).values(insert_values).prefix_with('IGNORE') raise NotImplementedError(f'Insert with ignore not supported for dialect: {dialect}') @@ -264,7 +264,7 @@ def _build_upsert_stmt( set_ = {col: getattr(lite_stmt.excluded, col) for col in update_columns} return lite_stmt.on_conflict_do_update(index_elements=conflict_cols, set_=set_) - if dialect == 'mysql': + if dialect in {'mysql', 'mariadb'}: mysql_stmt = mysql_insert(table_model).values(insert_values) set_ = {col: getattr(mysql_stmt.inserted, col) for col in update_columns} return mysql_stmt.on_duplicate_key_update(**set_) @@ -416,7 +416,7 @@ async def _try_acquire_buffer_lock(self, session: AsyncSession) -> bool: lock_until = now + self._BLOCK_BUFFER_TIME dialect = self._storage_client.get_dialect_name() - if dialect in ('postgresql', 'mysql'): + if dialect in {'postgresql', 'mysql', 'mariadb'}: select_stmt = ( select(self._METADATA_TABLE) .where( diff --git a/src/crawlee/storage_clients/_sql/_request_queue_client.py b/src/crawlee/storage_clients/_sql/_request_queue_client.py index cddc4e49cd..efead3d8db 100644 --- a/src/crawlee/storage_clients/_sql/_request_queue_client.py +++ b/src/crawlee/storage_clients/_sql/_request_queue_client.py @@ -433,7 +433,7 @@ async def fetch_next_request(self) -> Request | None: async with self.get_session(with_simple_commit=True) as session: # We use the `skip_locked` database mechanism to prevent the 'interception' of requests by another client - if dialect in ('postgresql', 'mysql'): + if dialect in {'postgresql', 'mysql', 'mariadb'}: stmt = stmt.with_for_update(skip_locked=True) result = await session.execute(stmt) requests_db = result.scalars().all() diff --git a/src/crawlee/storage_clients/_sql/_storage_client.py b/src/crawlee/storage_clients/_sql/_storage_client.py index 28cc6c49f4..0b0922e396 100644 --- a/src/crawlee/storage_clients/_sql/_storage_client.py +++ b/src/crawlee/storage_clients/_sql/_storage_client.py @@ -116,10 +116,10 @@ async def initialize(self, configuration: Configuration) -> None: async with engine.begin() as conn: self._dialect_name = engine.dialect.name - if self._dialect_name not in ('sqlite', 'postgresql', 'mysql'): + if self._dialect_name not in {'sqlite', 'postgresql', 'mysql', 'mariadb'}: raise ValueError( - f'Unsupported database dialect: {self._dialect_name}. Supported: sqlite, postgresql, mysql. ' - 'Consider using a different database.', + f'Unsupported database dialect: {self._dialect_name}. Supported: sqlite, postgresql, mysql, ' + 'mariadb. Consider using a different database.', ) # Create tables if they don't exist. @@ -260,14 +260,16 @@ def _get_or_create_engine(self, configuration: Configuration) -> AsyncEngine: ('sqlite' not in connection_string) and ('postgresql' not in connection_string) and ('mysql' not in connection_string) + and ('mariadb' not in connection_string) ): raise ValueError( - 'Unsupported database. Supported: sqlite, postgresql, mysql. Consider using a different database.' + 'Unsupported database. Supported: sqlite, postgresql, mysql, mariadb. Consider using a different ' + 'database.' ) connect_args: dict[str, Any] kwargs: dict[str, Any] = {} - if 'mysql' in connection_string: + if 'mysql' in connection_string or 'mariadb' in connection_string: connect_args: dict[str, Any] = {'connect_timeout': 30} kwargs['isolation_level'] = 'READ COMMITTED' else: From 040c3548cc7388d699201f064562b084f1435e7c Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Fri, 13 Feb 2026 15:52:21 +0000 Subject: [PATCH 3/5] update docs --- docs/guides/storage_clients.mdx | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/docs/guides/storage_clients.mdx b/docs/guides/storage_clients.mdx index bdc2c0fc67..1522024ffa 100644 --- a/docs/guides/storage_clients.mdx +++ b/docs/guides/storage_clients.mdx @@ -28,7 +28,7 @@ Crawlee provides three main storage client implementations: - `FileSystemStorageClient` - Provides persistent file system storage with in-memory caching. - `MemoryStorageClient` - Stores data in memory with no persistence. -- `SqlStorageClient` - Provides persistent storage using a SQL database ([SQLite](https://sqlite.org/) or [PostgreSQL](https://www.postgresql.org/)). Requires installing the extra dependency: `crawlee[sql_sqlite]` for SQLite or `crawlee[sql_postgres]` for PostgreSQL. +- `SqlStorageClient` - Provides persistent storage using a SQL database ([SQLite](https://sqlite.org/), [PostgreSQL](https://www.postgresql.org/), [MySQL](https://www.mysql.com/) or [MariaDB](https://mariadb.org/)). Requires installing the extra dependency: `crawlee[sql_sqlite]` for SQLite, `crawlee[sql_postgres]` for PostgreSQL or `crawlee[sql_mysql]` for MySQL and MariaDB. - `RedisStorageClient` - Provides persistent storage using a [Redis](https://redis.io/) database v8.0+. Requires installing the extra dependency `crawlee[redis]`. - [`ApifyStorageClient`](https://docs.apify.com/sdk/python/reference/class/ApifyStorageClient) - Manages storage on the [Apify platform](https://apify.com), implemented in the [Apify SDK](https://github.com/apify/apify-sdk-python). @@ -144,7 +144,7 @@ The `MemoryStorageClient` does not persist data between runs. All data is lost w The `SqlStorageClient` is experimental. Its API and behavior may change in future releases. ::: -The `SqlStorageClient` provides persistent storage using a SQL database (SQLite by default, or PostgreSQL). It supports all Crawlee storage types and enables concurrent access from multiple independent clients or processes. +The `SqlStorageClient` provides persistent storage using a SQL database (SQLite by default, or PostgreSQL, MySQL, MariaDB). It supports all Crawlee storage types and enables concurrent access from multiple independent clients or processes. :::note dependencies The `SqlStorageClient` is not included in the core Crawlee package. @@ -154,10 +154,12 @@ To use it, you need to install Crawlee with the appropriate extra dependency: pip install 'crawlee[sql_sqlite]' - For PostgreSQL support, run: pip install 'crawlee[sql_postgres]' +- For MySQL or MariaDB support, run: + pip install 'crawlee[sql_mysql]' ::: By default, SqlStorageClient uses SQLite. -To use PostgreSQL instead, just provide a PostgreSQL connection string via the `connection_string` parameter. No other code changes are needed—the same client works for both databases. +To use a different database, just provide the appropriate connection string via the `connection_string` parameter. No other code changes are needed—the same client works for all supported databases. {SQLStorageClientBasicExample} @@ -214,7 +216,6 @@ class dataset_metadata_buffer { + id (PK) + accessed_at + modified_at - + dataset_id (FK) + delta_item_count } @@ -247,7 +248,6 @@ class key_value_store_metadata_buffer { + id (PK) + accessed_at + modified_at - + key_value_store_id (FK) } %% ======================== @@ -321,7 +321,6 @@ class request_queue_metadata_buffer { + id (PK) + accessed_at + modified_at - + request_queue_id (FK) + client_id + delta_handled_count + delta_pending_count @@ -346,11 +345,15 @@ Configuration options for the `SqlStorageCl Configuration options for the `SqlStorageClient` can be set via constructor arguments: -- **`connection_string`** (default: SQLite in `Configuration` storage dir) - SQLAlchemy connection string, e.g. `sqlite+aiosqlite:///my.db` or `postgresql+asyncpg://user:pass@host/db`. +- **`connection_string`** (default: SQLite in `Configuration` storage dir) - SQLAlchemy connection string, e.g. `sqlite+aiosqlite:///my.db`, `postgresql+asyncpg://user:pass@host/db`, `mysql+aiomysql://user:pass@host/db` or `mariadb+aiomysql://user:pass@host/db`. - **`engine`** - Pre-configured SQLAlchemy AsyncEngine (optional). For advanced scenarios, you can configure `SqlStorageClient` with a custom SQLAlchemy engine and additional options via the `Configuration` class. This is useful, for example, when connecting to an external PostgreSQL database or customizing connection pooling. +:::warning +If you use MySQL or MariaDB, pass the `isolation_level='READ COMMITTED'` argument to `create_async_engine`. +::: + {SQLStorageClientConfigurationExample} From 5bffe92777c4b86f73781a502bf719e02ad4609d Mon Sep 17 00:00:00 2001 From: Max Bohomolov <34358312+Mantisus@users.noreply.github.com> Date: Fri, 13 Feb 2026 18:21:12 +0200 Subject: [PATCH 4/5] Update docs/guides/storage_clients.mdx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docs/guides/storage_clients.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/storage_clients.mdx b/docs/guides/storage_clients.mdx index 1522024ffa..d5aa8bb871 100644 --- a/docs/guides/storage_clients.mdx +++ b/docs/guides/storage_clients.mdx @@ -351,7 +351,7 @@ Configuration options for the `SqlStorageCl For advanced scenarios, you can configure `SqlStorageClient` with a custom SQLAlchemy engine and additional options via the `Configuration` class. This is useful, for example, when connecting to an external PostgreSQL database or customizing connection pooling. :::warning -If you use MySQL or MariaDB, pass the `isolation_level='READ COMMITTED'` argument to `create_async_engine`. +If you use MySQL or MariaDB, pass the `isolation_level='READ COMMITTED'` argument to `create_async_engine`. MySQL/MariaDB default to the `REPEATABLE READ` isolation level, which can cause unnecessary locking, deadlocks, or stale reads when multiple Crawlee workers access the same tables concurrently. Using `READ COMMITTED` ensures more predictable row-level locking and visibility semantics for `SqlStorageClient`. ::: From 46c2abd51b29ea5a742c5c4642194d7083c75264 Mon Sep 17 00:00:00 2001 From: Max Bohomolov Date: Fri, 13 Feb 2026 16:23:43 +0000 Subject: [PATCH 5/5] fix --- src/crawlee/storage_clients/_sql/_storage_client.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/crawlee/storage_clients/_sql/_storage_client.py b/src/crawlee/storage_clients/_sql/_storage_client.py index 0b0922e396..4f52cd4b13 100644 --- a/src/crawlee/storage_clients/_sql/_storage_client.py +++ b/src/crawlee/storage_clients/_sql/_storage_client.py @@ -267,10 +267,11 @@ def _get_or_create_engine(self, configuration: Configuration) -> AsyncEngine: 'database.' ) - connect_args: dict[str, Any] kwargs: dict[str, Any] = {} if 'mysql' in connection_string or 'mariadb' in connection_string: connect_args: dict[str, Any] = {'connect_timeout': 30} + # MySQL/MariaDB require READ COMMITTED isolation level for correct behavior in concurrent environments + # without deadlocks. kwargs['isolation_level'] = 'READ COMMITTED' else: connect_args = {'timeout': 30}