From 5983af8cdf5fc23fcaae33013efd832440a854e5 Mon Sep 17 00:00:00 2001 From: Andrzej <6695650+thegrymek@users.noreply.github.com> Date: Tue, 10 Jun 2025 21:17:15 +0200 Subject: [PATCH 1/2] Changes: - persistent class can be passed as class or object to List/Deque. This change allows to pass custom modes to persistent class - add test cases from README.rst - update README.rst examples - add safer writing when serializer is in byte mode or not Signed-off-by: Andrzej <6695650+thegrymek@users.noreply.github.com> --- README.rst | 6 +- src/diskcollections/iterables/__init__.py | 2 + src/diskcollections/iterables/clients.py | 76 ++++++++++++++++------ src/diskcollections/iterables/iterables.py | 20 +++++- tests/conftest.py | 73 +++++++++++++++++++++ tests/test_examples.py | 46 +++++++++++++ tests/test_serializers.py | 26 -------- 7 files changed, 196 insertions(+), 53 deletions(-) create mode 100644 tests/conftest.py create mode 100644 tests/test_examples.py diff --git a/README.rst b/README.rst index 12d738f..36f624d 100644 --- a/README.rst +++ b/README.rst @@ -69,11 +69,11 @@ There are available more ways to serialize items. JsonZLibSerializer # convert to json + compress items ) >>> from functools import partial - >>> JsonFileList = partial(List, serializer_class=JsonHandler) + >>> JsonFileList = partial(FileList, serializer_class=JsonSerializer) >>> flist = JsonFileList() >>> flist.append({'a': 1, 'b': 2, 'c': 3}) >>> flist[0] - {u'a': 1, u'b': 2, u'c': 3} + {'a': 1, 'b': 2, 'c': 3} Installation @@ -112,7 +112,7 @@ Exactly this object `{'a': 1, 'b': 2, 'c': 3}` will serialized and compressed an .. code-block:: python >>> flist[0] - {u'a': 1, u'b': 2, u'c': 3} + {'a': 1, 'b': 2, 'c': 3} Getting an item will read a file and because `JsonZLibSerializer` is used: then content will be decompressed and tried to loaded from json. diff --git a/src/diskcollections/iterables/__init__.py b/src/diskcollections/iterables/__init__.py index 33744d4..50aa8ef 100644 --- a/src/diskcollections/iterables/__init__.py +++ b/src/diskcollections/iterables/__init__.py @@ -10,12 +10,14 @@ serializer_class=PickleZLibSerializer, ) + FileDeque = partial( Deque, client_class=TemporaryDirectoryClient, serializer_class=PickleZLibSerializer, ) + __all__ = ( "List", "Deque", diff --git a/src/diskcollections/iterables/clients.py b/src/diskcollections/iterables/clients.py index 5adfd77..928d880 100644 --- a/src/diskcollections/iterables/clients.py +++ b/src/diskcollections/iterables/clients.py @@ -1,9 +1,14 @@ import os.path import tempfile +from typing import AnyStr, Optional from diskcollections.interfaces import IClientSequence from diskcollections.py2to3 import TemporaryDirectory +mode_str = "w+" +mode_bytes = "w+b" +modes = {mode_str, mode_bytes} + class TemporaryDirectoryClient(IClientSequence): """ @@ -15,9 +20,10 @@ class TemporaryDirectoryClient(IClientSequence): new content. """ - def __init__(self, iterable=(), mode="w+b"): + def __init__(self, iterable=(), mode=mode_bytes): super(TemporaryDirectoryClient, self).__init__() self.__mode = mode + self.__available_modes = modes - {mode} self.__files = [] self.__directory = TemporaryDirectory() self.extend(iterable) @@ -49,22 +55,35 @@ def __getitem__(self, index): return file.read() def __setitem__(self, index, value): - file = tempfile.TemporaryFile( - mode=self.__mode, dir=self.__directory.name - ) - file.write(bytes(value)) + file = self.safe_write(value) self.__files[index] = file def __len__(self): return len(self.__files) def insert(self, index, value): - file = tempfile.TemporaryFile( - mode=self.__mode, dir=self.__directory.name - ) - file.write(value) + file = self.safe_write(value) self.__files.insert(index, file) + def __write(self, value, mode: Optional[str] = None): + mode = mode or self.__mode + file = tempfile.TemporaryFile(mode=mode, dir=self.__directory.name) + file.write(value) + return file + + def safe_write(self, value): + try: + return self.__write(value) + except TypeError: + pass + + for mode in self.__available_modes: + try: + return self.__write(value, mode=mode) + except TypeError as e: + pass + raise e + class PersistentDirectoryClient(IClientSequence): """ @@ -79,6 +98,7 @@ class PersistentDirectoryClient(IClientSequence): def __init__(self, directory, iterable=()): super(PersistentDirectoryClient, self).__init__() self.__mode = "w+" + self.__available_modes = modes - {self.__mode} self.__files = [] if not os.path.exists(directory): @@ -102,9 +122,9 @@ def __delitem__(self, index): """Delete item from given index. Delete means here: - - delete file undex `files[index]` + - delete file under `files[index]` - when item is deleted then list become smaller - - rename and reopen higher then index files + - rename and reopen higher than index files """ file = self.__files[index] del self.__files[index] @@ -144,16 +164,14 @@ def __getitem__(self, index): def __setitem__(self, index, value): file_path = self.get_file_path(index) - file = open(file_path, mode=self.__mode) - file.write(value) - file.seek(0) + file = self.safe_write(file_path, value) self.__files[index] = file def __len__(self): return len(self.__files) def get_file_path(self, index): - return "%s/%s" % (self.__directory, index) + return f"{self.__directory}/{index}" def insert(self, index, value): """Insert value to index. @@ -170,9 +188,7 @@ def insert(self, index, value): """ if index >= len(self.__files): file_path = self.get_file_path(index) - file = open(file_path, mode=self.__mode) - file.write(value) - file.seek(0) + file = self.safe_write(file_path, value) self.__files.insert(index, file) return @@ -186,9 +202,7 @@ def insert(self, index, value): os.rename(old_file_path, new_file_path) file_path = self.get_file_path(index) - file = open(file_path, mode=self.__mode) - file.write(value) - file.seek(0) + file = self.safe_write(file_path, value) self.__files.insert(index, file) for i in range(len(self.__files)): @@ -200,3 +214,23 @@ def insert(self, index, value): file = open(file_path, mode="r+") self.__files[i] = file + + def __write(self, file_path, value, mode: Optional[str] = None): + mode = mode or self.__mode + file = open(file_path, mode=mode) + file.write(value) + file.seek(0) + return file + + def safe_write(self, file_path, value): + try: + return self.__write(file_path, value) + except TypeError: + pass + + for mode in self.__available_modes: + try: + return self.__write(file_path, value, mode=mode) + except TypeError as e: + pass + raise e diff --git a/src/diskcollections/iterables/iterables.py b/src/diskcollections/iterables/iterables.py index 488967a..c2a56ef 100644 --- a/src/diskcollections/iterables/iterables.py +++ b/src/diskcollections/iterables/iterables.py @@ -1,5 +1,6 @@ import collections - +import inspect +from functools import partial from diskcollections.py2to3 import izip @@ -8,7 +9,14 @@ def __init__( self, iterable=None, client_class=None, serializer_class=None ): super(List, self).__init__() - self.__client = client_class() + + if inspect.isclass(client_class): + self.__client = client_class() + elif isinstance(client_class, partial): + self.__client = client_class() + else: + self.__client = client_class + self.__serializer = serializer_class iterable = iterable or [] @@ -76,7 +84,13 @@ def __init__( client_class=None, serializer_class=None, ): - self.__client = client_class() + if inspect.isclass(client_class): + self.__client = client_class() + elif isinstance(client_class, partial): + self.__client = client_class() + else: + self.__client = client_class + self.__serializer = serializer_class self.__max_length = maxlen self.extend(iterable) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..0525f9f --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,73 @@ +import random +import shutil +import uuid +from functools import partial + +import pathlib +import pytest + +from diskcollections import serializers +from diskcollections.iterables import clients, List + +from diskcollections.iterables import FileDeque + +here = pathlib.Path(__file__).parent.absolute() +test_persistent_dir = here / "persistent_dir" + + +primitive_values = ["a", 1, [1, 2, 3], {"a": 1, "b": 2, "c": [1, 2, 3]}] + + +@pytest.fixture(params=primitive_values, ids=list(map(str, primitive_values))) +def primitive_value(request): + return request.param + + +serializers_classes = [ + serializers.JsonSerializer, + serializers.JsonZLibSerializer, + serializers.PickleSerializer, + serializers.PickleZLibSerializer, +] + + +@pytest.fixture( + params=serializers_classes, ids=list(map(str, serializers_classes)) +) +def serializer_class(request): + return request.param + + +@pytest.fixture( + params=[ + clients.TemporaryDirectoryClient, + partial( + clients.PersistentDirectoryClient, + test_persistent_dir / str(uuid.uuid4()), + ), + ], + ids=["TemporaryDirectoryClient", "PersistentDirectoryClient"], +) +def client_class(request): + test_persistent_dir.mkdir(exist_ok=True) + yield request.param + shutil.rmtree(test_persistent_dir.absolute()) + + +@pytest.fixture( + params=[ + partial( + List, + client_class=partial(clients.TemporaryDirectoryClient, mode="w+b"), + serializer_class=serializers.PickleZLibSerializer, + ) + ], + ids=["FileList"], +) +def list_class(request): + return request.param + + +@pytest.fixture(params=[FileDeque], ids=["FileDeque"]) +def deque_class(request): + return request.param diff --git a/tests/test_examples.py b/tests/test_examples.py new file mode 100644 index 0000000..75dad78 --- /dev/null +++ b/tests/test_examples.py @@ -0,0 +1,46 @@ +from diskcollections.iterables import FileDeque, FileList, List, Deque + + +def test_file_list() -> None: + flist = FileList() + flist.extend([1, 2, 3]) + flist.append(4) + assert all(i in flist for i in [1, 2, 3, 4]) + + flist2 = flist[:] + assert isinstance(flist2, List) + + my_list = list(flist) + assert isinstance(my_list, list) + + +def test_file_deque() -> None: + fdeque = FileDeque() + fdeque.extend([1, 2, 3]) + fdeque.append(4) + assert all(i in fdeque for i in [1, 2, 3, 4]) + + fdeque = FileDeque([1, 2, 3, 4]) + assert fdeque.pop() == 4 + fdeque.appendleft(0) + assert fdeque.popleft() == 0 + + +def test_list_serializers(client_class, serializer_class) -> None: + expected = [{"a": 1, "b": 2, "c": 3}, "a", 1] + flist = List(client_class=client_class, serializer_class=serializer_class) + flist.extend(expected) + assert all(i in flist for i in expected) + assert flist == expected + assert list(flist) == expected + + +def test_deque_serializers(client_class, serializer_class) -> None: + expected = [{"a": 1, "b": 2, "c": 3}, "a", 1] + fdeque = Deque( + client_class=client_class, serializer_class=serializer_class + ) + fdeque.extend(expected) + assert all(i in fdeque for i in expected) + assert fdeque == expected + assert list(fdeque) == expected diff --git a/tests/test_serializers.py b/tests/test_serializers.py index a900c95..903b328 100644 --- a/tests/test_serializers.py +++ b/tests/test_serializers.py @@ -1,29 +1,3 @@ -import pytest - -from diskcollections import serializers - -primitive_values = ["a", 1, [1, 2, 3], {"a": 1, "b": 2, "c": [1, 2, 3]}] - -serializers_classes = [ - serializers.JsonSerializer, - serializers.JsonZLibSerializer, - serializers.PickleSerializer, - serializers.PickleZLibSerializer, -] - - -@pytest.fixture(params=primitive_values, ids=list(map(str, primitive_values))) -def primitive_value(request): - return request.param - - -@pytest.fixture( - params=serializers_classes, ids=list(map(str, serializers_classes)) -) -def serializer_class(request): - return request.param - - def test_encode_decode(primitive_value, serializer_class): encoded = serializer_class.dumps(primitive_value) decoded = serializer_class.loads(encoded) From 053bf5dafe229730b18622fe199d7e33b5486623 Mon Sep 17 00:00:00 2001 From: Andrzej <6695650+thegrymek@users.noreply.github.com> Date: Tue, 10 Jun 2025 21:23:55 +0200 Subject: [PATCH 2/2] . Signed-off-by: Andrzej <6695650+thegrymek@users.noreply.github.com> --- src/diskcollections/iterables/clients.py | 15 ++++++++++----- tests/conftest.py | 1 - 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/diskcollections/iterables/clients.py b/src/diskcollections/iterables/clients.py index 928d880..9a6ab03 100644 --- a/src/diskcollections/iterables/clients.py +++ b/src/diskcollections/iterables/clients.py @@ -1,6 +1,6 @@ import os.path import tempfile -from typing import AnyStr, Optional +from typing import Optional from diskcollections.interfaces import IClientSequence from diskcollections.py2to3 import TemporaryDirectory @@ -77,12 +77,14 @@ def safe_write(self, value): except TypeError: pass + exc = None + for mode in self.__available_modes: try: return self.__write(value, mode=mode) except TypeError as e: - pass - raise e + exc = e + raise exc class PersistentDirectoryClient(IClientSequence): @@ -228,9 +230,12 @@ def safe_write(self, file_path, value): except TypeError: pass + exc = None + for mode in self.__available_modes: try: return self.__write(file_path, value, mode=mode) except TypeError as e: - pass - raise e + exc = e + + raise exc \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 0525f9f..5f5eb93 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,3 @@ -import random import shutil import uuid from functools import partial