From 1715f2b62684364c7f3a11df4e78e9c17eacb125 Mon Sep 17 00:00:00 2001 From: Tim Band Date: Mon, 19 Jan 2026 11:16:33 +0000 Subject: [PATCH 01/22] New Overview page Mermaid diagrams can be embedded in docs Table CSS fixed --- .gitignore | 1 + docs/source/_static/custom.css | 5 + docs/source/conf.py | 4 + docs/source/index.rst | 2 + docs/source/introduction.rst | 18 +-- docs/source/overview.rst | 256 +++++++++++++++++++++++++++++++++ poetry.lock | 244 +++++++++++++++---------------- pyproject.toml | 9 +- 8 files changed, 401 insertions(+), 138 deletions(-) create mode 100644 docs/source/_static/custom.css create mode 100644 docs/source/overview.rst diff --git a/.gitignore b/.gitignore index 77b011d3..8318b436 100644 --- a/.gitignore +++ b/.gitignore @@ -138,6 +138,7 @@ datafaker/config.ini # sphinx docs/build/* +docs/esbonio-build/* docs/temp/* # vim swap files diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css new file mode 100644 index 00000000..9cd50168 --- /dev/null +++ b/docs/source/_static/custom.css @@ -0,0 +1,5 @@ +/* Fix for sphinx_rtd_theme's tables, which do not allow line breaks by default */ +.wy-table-responsive table td, +.wy-table-responsive table th { + white-space: normal; +} diff --git a/docs/source/conf.py b/docs/source/conf.py index 63233e54..2dd3fbd8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -29,6 +29,7 @@ "sphinx.ext.autosummary", "sphinx_rtd_theme", "sphinx.ext.napoleon", + "sphinxcontrib.mermaid", ] autodoc_mock_imports: list[str] = ["typer", "pydantic", "sqlalchemy"] @@ -41,3 +42,6 @@ html_theme = "sphinx_rtd_theme" # pylint: disable=C0103 html_static_path = ["_static"] + +def setup(app): + app.add_css_file("custom.css") diff --git a/docs/source/index.rst b/docs/source/index.rst index d2fc566b..2ce1e0e1 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -21,6 +21,7 @@ Contents: :glob: :maxdepth: 2 + overview installation docker quickstart @@ -29,6 +30,7 @@ Contents: loan_data health_data configuration + custom_generators api faq glossary diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst index 8cf833a8..2017628a 100644 --- a/docs/source/introduction.rst +++ b/docs/source/introduction.rst @@ -210,18 +210,20 @@ So let us find these sections in ``config.yaml`` and change ``vocabulary_table: (deleting the other properties if you like): .. code-block:: yaml - category: - vocabulary_table: true - city: - vocabulary_table: true - country: - vocabulary_table: true + + category: + vocabulary_table: true + city: + vocabulary_table: true + country: + vocabulary_table: true and later (although it doesn't matter if you re-arrange the table blocks): .. code-block:: yaml - language: - vocabulary_table: true + + language: + vocabulary_table: true and now we take this file into the private network (or pretend to) and run (in the private network with ``SRC_DSN`` and ``SRC_SCHEMA`` set as above): diff --git a/docs/source/overview.rst b/docs/source/overview.rst new file mode 100644 index 00000000..cf1283e7 --- /dev/null +++ b/docs/source/overview.rst @@ -0,0 +1,256 @@ +================== +Datafaker Overview +================== + +Datafaker provides a way to create a fake version of sensitive data. + +Datafaker's workflow permits Information Governance oversight by design. + +Background +========== + +Conceptually, any faking of sensitive data achieves privacy through one or both of two mechanisms: +Reducing the real data down to summaries, and obfuscating the data by adding noise. + +After these privacy-preserving steps, reproducing a fake version of the full steps +involves adding noise to the summary data to turn it back into data with the +required structure and volume. We can call these stages Reduce, Obfuscate, +Repopulate. + +.. mermaid:: + :alt: Generic synthetic data flow + + block-beta + columns 1 + Source["Sensitive Data"] + block:Arrow1 + ArrowNote1(["Remove Personally\nIdentifiable Data\n/ summarize / other removal"]) + Summary<["Reduce"]>(down) + end + block:Intermediate1 + ReducedNote(["This data is less sensitive"]) + Reduced["Reduced data / summaries"] + end + block:Arrow2 + ArrowNote2(["Adding noise"]) + Obfuscation<["Obfuscate"]>(down) + end + block:Intermediate2 + ObfuscatedNote(["This data is even\nless sensitive"]) + Obfuscated("Obfuscated\nReduced data") + end + block:Arrow3 + ArrowNote3(["Replacing removed data\nbased on summaries"]) + Repopulation<["Repopulate"]>(down) + end + Destination["Synthetic Data"] + classDef nobox fill:#fff,stroke-width:0px; + class Intermediate1 nobox + class Intermediate2 nobox + class Arrow1 nobox + class Arrow2 nobox + class Arrow3 nobox + classDef note fill:#18c; + class ArrowNote1 note + class ArrowNote2 note + class ArrowNote3 note + class ReducedNote note + class ObfuscatedNote note + +In this diagram, Reduce and Obfuscate take place in a private environment such as +a Trusted Research Environment or some machine on a private network from which +the sensitive data can be read. Information Governance oversight happens at the end +of this process to decide if this obfucated summary can be released out of the private +network. + +The Repopulate process happens outside of the private network and outside of the remit +of Information Governance. This is important! The act of turning reduced +obfucated data into a larger quantity of randomized data is *not* a privacy-sheilding +process! By this time all the actual privacy-sheilding has already happened and +the Information Governance has already been applied. + +Reduction techniques +-------------------- + +Here are a few techniques that can be used to preserve privacy through data reduction. +Most Reduction techniques have a corresponding Repopulation technique +to add the missing data back into the synthetic data. + +.. list-table:: Reduction Techniques + :width: 100% + :widths: 10 30 20 + :header-rows: 1 + + * - Reduction technique + - Description + - Repopulation technique + * - Anonymization + - Removing Personally Identifiable Information (remembering what sort of information was removed and its format) + - Generation of Names/Addresses etcetera + * - Removal of unnecessary data + - Some data does not need to be supplied and so can be removed completely. + - None. + * - Summarization + - Extract the means, standard deviations and size of numeric data, the frequencies of categorical data, or other summary measures. + - Pick the new data from a suitable random distribution. + * - Low number suppression + - Removal of rare data, which could be used to infer the presence of certain individuals in the data set. + - None. + * - Grouping + - Gathering data into clusters and extracting summary data from each cluster. + - Picking new data by picking a cluster then picking from the appropriate random distribution. + +Each of these extracts less than the full information from the source, then +most of them replace the information left behind with generated information +(perhaps after an Obfuscation step). + +Differential Privacy +-------------------- + +One actual concrete method of faking data is to apply a process such as Differential +Privacy to the real data without summarizing. In such a case Information Governance +cannot be expected to examine the data for privacy breaches as there is the same +amount of data to examine as in the real database, and breaches are likely to be +more subtle than real names being produced; in this case Information Governance +would have to understand and trust the obfucation process. + +In the US, The National Institute for Science and Technology has some +`guidelines `_ +about the use of Differential Privacy but even that is at a loss to explain how to +understand its main parameter: + + Selecting privacy loss parameters, such as ε, is challenging, + and we offer no specific guidelines on their selection. + +So Differential Privacy can be useful, but it is very hard to quantify exactly +how much good it is doing to preserve privacy. + +Differential Privacy is an Obfuscation technique rather than a Reduction technique +because the same amount of data is output as was input. + +Datafaker's Operation +===================== + +Datafaker is based on the Alan Turing Institute's SqlSynthGen tool. +SqlSynthGen implements all three operations of Reduce, Obfuscate and Repopulate. + +Datafaker builds on SqlSynthGen by automating the specification of the Reduce +operation and making the dataflow clearer for Information Governance purposes +(amongst other improvements). Datafaker still retains SqlSynthGen's Obfuscation +by Differential Privacy functionality, but this has not yet been folded into the +automation and will not be described further here: we are relying on the +Reduce operation to provide privacy shielding. + +Datafaker has a number of phases of operation; below we describe how +these map to the Reduce, Obfuscate and Repopulate stages described above. + +.. mermaid:: + :alt: Simplified diagram of Datafaker's data flow + + block-beta + columns 1 + Source["Sensitive Data"] + block:Processing + columns 4 + Arrow1<["Extract\nstructure"]>(down) + Arrow2<["User guided\nconfiguration"]>(down) + Arrow3<["Extract\nsummary"]>(down) + Arrow4<["Extract\nvocabulary"]>(down) + Orm["orm.yaml\nstructure\ndefinition"] + Config["config.yaml\nconfiguration\nof summarization"] + SrcStats["src-stats.yaml\nSummary\nstatistics"] + Vocab["Vocabulary\ntable data"] + end + Arrow5<["Information Governance\npermits release"]>(down) + block:Release + columns 1 + Arrow6<["Repopulation"]>(down) + Destination["Synthetic Data"] + end + classDef nobox fill:#fff,stroke-width:0px; + class Processing nobox + classDef publicnet fill:#18c; + class Release publicnet + +Datafaker make-tables phase +--------------------------- + +``datafaker make-tables`` makes a file called ``orm.yaml`` that describes the structure of the source database. +This is part of the Reduce phase, but this file is used in every other Datafaker phase. +By describing the structure of the database, no private data is leaked. +However it is not impossible that, in describing the structure of some commercial +database, some commercially-sensitive information could be leaked. +In such a case, the file is alterable by hand as long as the YAML structure is maintained. + +Datafaker configuration phase +----------------------------- + +These commands are not really part of the Reduce phase, but allow the user to configure +what the Reduce phase will entail (and hence also what the Repopulate phase will entail). + +- ``datafaker configure-tables`` makes a file called ``config.yaml`` that describes what needs to happen to each table. +- ``datafaker configure-generators`` amends ``config.yaml`` with information on what happens to each column. +- ``datafaker configure-missingness`` optionally amends ``config.yaml`` with simple summary missingness information, + +Additional configuration can be applied by hand at this point to allow more sophisticated Repopulation to happen than the automated generation can manage. + +``config.yaml`` contains data that has been set by a user that has seen the contents of the database, +but should not contain any sensitive data as Datafaker has not written any of the data (or summaries thereof) +into the file. + +``config.yaml`` also contains human-readable descriptions of the summaries that will be produced, +but these will be copied into the summary file produced in the following stage, next to the actual summary data. + +Datafaker summary stats phase +----------------------------- + +``datafaker make-stats`` makes a file called ``src-stats.yaml``. This contains the summary data from the source database. + +Information Governance should be focused on ensuring that private data is not leaking out in this file. + +Obfuscation happens in this phase if it is configured; therefore this command represents +most of the Reduce and all of the Obfuscate phase. + +Once these files, ``orm.yaml``, ``config.yaml`` and ``make-stats.yaml`` have been examined and approved for release, +we can move to the next phase. + +Datafaker make-vocab phase +-------------------------- + +``datafaker make-vocab`` makes a whole set of ``.yaml`` or ``.yaml.gz`` files, +each of which represents the entire contents of one table in the source database. + +The idea is that some tables will never contain sensitive data, +they might describe, for example, all the care centres that +are referenced in the database, or all the set of all possible +deseases that might be diagnosed (not who has such a diagnosis +or even if anybody has). Such tables are referred to as +Vocabulary tables. + +Which tables are represented is configured by the ``configure-tables`` command, +so the user must be careful not to configure any tables containing sensitive +data as Vocabulary tables. + +Releasing the intermediate data +------------------------------- + +These files -- ``orm.yaml``, ``config.yaml``, the vocabulary files and especially +the ``src-stats.yaml`` file -- now need to have Information Governance +processes applied to them as it is these files that can be extracted from the +private network or Trusted Research Environment to allow the construction of +the synthetic data in a less sensitive computing environment, if required. + +The sensitive database is no longer required in Datafaker's operation. + +Datafaker Repopulate phase +-------------------------- + +- ``datafaker create-tables`` creates the structure of the destination database to match (as much as is requested) the structure of the source database +- ``datafaker create-generators`` creates Python code files that will actually generate the data (this phase might be removed in a future version of Datafaker) +- ``datafaker create-data`` writes fake data into the destination database. + +As these operations require no access to the sensitive data, this phase can be +distributed to anybody that wants to make their own fake data in their own +database should we want to allow that. This could allow them to make +larger or smaller similar databases, or tweak the generated data with +the results of their own simulations in order to test their own analyses. diff --git a/poetry.lock b/poetry.lock index 2811b585..d26c56cf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -136,13 +136,13 @@ files = [ [[package]] name = "babel" -version = "2.17.0" +version = "2.18.0" description = "Internationalization utilities" optional = false python-versions = ">=3.8" files = [ - {file = "babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2"}, - {file = "babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d"}, + {file = "babel-2.18.0-py3-none-any.whl", hash = "sha256:e2b422b277c2b9a9630c1d7903c2a00d0830c409c59ac8cae9081c92f1aeba35"}, + {file = "babel-2.18.0.tar.gz", hash = "sha256:b80b99a14bd085fcacfa15c9165f651fbb3406e66cc603abf11c5750937c992d"}, ] [package.extras] @@ -1538,13 +1538,13 @@ files = [ [[package]] name = "pathspec" -version = "1.0.3" +version = "1.0.4" description = "Utility library for gitignore style pattern matching of file paths." optional = false python-versions = ">=3.9" files = [ - {file = "pathspec-1.0.3-py3-none-any.whl", hash = "sha256:e80767021c1cc524aa3fb14bedda9c34406591343cc42797b386ce7b9354fb6c"}, - {file = "pathspec-1.0.3.tar.gz", hash = "sha256:bac5cf97ae2c2876e2d25ebb15078eb04d76e4b98921ee31c6f85ade8b59444d"}, + {file = "pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723"}, + {file = "pathspec-1.0.4.tar.gz", hash = "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645"}, ] [package.extras] @@ -1588,7 +1588,7 @@ type = ["mypy (>=1.18.2)"] name = "pockets" version = "0.9.1" description = "A collection of helpful Python tools!" -optional = true +optional = false python-versions = "*" files = [ {file = "pockets-0.9.1-py2.py3-none-any.whl", hash = "sha256:68597934193c08a08eb2bf6a1d85593f627c22f9b065cc727a4f03f669d96d86"}, @@ -2011,13 +2011,13 @@ docutils = ">=0.11,<1.0" [[package]] name = "rich" -version = "14.3.1" +version = "14.3.2" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" optional = false python-versions = ">=3.8.0" files = [ - {file = "rich-14.3.1-py3-none-any.whl", hash = "sha256:da750b1aebbff0b372557426fb3f35ba56de8ef954b3190315eb64076d6fb54e"}, - {file = "rich-14.3.1.tar.gz", hash = "sha256:b8c5f568a3a749f9290ec6bddedf835cec33696bfc1e48bcfecb276c7386e4b8"}, + {file = "rich-14.3.2-py3-none-any.whl", hash = "sha256:08e67c3e90884651da3239ea668222d19bea7b589149d8014a21c633420dbb69"}, + {file = "rich-14.3.2.tar.gz", hash = "sha256:e712f11c1a562a11843306f5ed999475f09ac31ffb64281f73ab29ffdda8b3b8"}, ] [package.dependencies] @@ -2278,7 +2278,7 @@ test = ["cython", "html5lib", "pytest (>=4.6)", "typed_ast"] name = "sphinx-rtd-theme" version = "1.3.0" description = "Read the Docs theme for Sphinx" -optional = true +optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ {file = "sphinx_rtd_theme-1.3.0-py2.py3-none-any.whl", hash = "sha256:46ddef89cc2416a81ecfbeaceab1881948c014b1b6e4450b815311a89fb977b0"}, @@ -2345,7 +2345,7 @@ test = ["html5lib", "pytest"] name = "sphinxcontrib-jquery" version = "4.1" description = "Extension to include jQuery on newer Sphinx releases" -optional = true +optional = false python-versions = ">=2.7" files = [ {file = "sphinxcontrib-jquery-4.1.tar.gz", hash = "sha256:1620739f04e36a2c779f1a131a2dfd49b2fd07351bf1968ced074365933abc7a"}, @@ -2369,11 +2369,30 @@ files = [ [package.extras] test = ["flake8", "mypy", "pytest"] +[[package]] +name = "sphinxcontrib-mermaid" +version = "2.0.0" +description = "Mermaid diagrams in your Sphinx-powered docs" +optional = false +python-versions = ">=3.10" +files = [ + {file = "sphinxcontrib_mermaid-2.0.0-py3-none-any.whl", hash = "sha256:59a73249bbee2c74b1a4db036f8e8899ade65982bdda6712cf22b4f4e9874bb5"}, + {file = "sphinxcontrib_mermaid-2.0.0.tar.gz", hash = "sha256:cf4f7d453d001132eaba5d1fdf53d42049f02e913213cf8337427483bfca26f4"}, +] + +[package.dependencies] +jinja2 = "*" +pyyaml = "*" +sphinx = "*" + +[package.extras] +test = ["defusedxml", "myst-parser", "pytest", "ruff", "sphinx"] + [[package]] name = "sphinxcontrib-napoleon" version = "0.7" description = "Sphinx \"napoleon\" extension." -optional = true +optional = false python-versions = "*" files = [ {file = "sphinxcontrib-napoleon-0.7.tar.gz", hash = "sha256:407382beed396e9f2d7f3043fad6afda95719204a1e1a231ac865f40abcbfcf8"}, @@ -2640,13 +2659,13 @@ files = [ [[package]] name = "tqdm" -version = "4.67.1" +version = "4.67.3" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" files = [ - {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, - {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, + {file = "tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf"}, + {file = "tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb"}, ] [package.dependencies] @@ -2775,129 +2794,96 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [[package]] name = "wcwidth" -version = "0.4.0" +version = "0.5.3" description = "Measures the displayed width of unicode strings in a terminal" optional = false python-versions = ">=3.8" files = [ - {file = "wcwidth-0.4.0-py3-none-any.whl", hash = "sha256:8af2c81174b3aa17adf05058c543c267e4e5b6767a28e31a673a658c1d766783"}, - {file = "wcwidth-0.4.0.tar.gz", hash = "sha256:46478e02cf7149ba150fb93c39880623ee7e5181c64eda167b6a1de51b7a7ba1"}, + {file = "wcwidth-0.5.3-py3-none-any.whl", hash = "sha256:d584eff31cd4753e1e5ff6c12e1edfdb324c995713f75d26c29807bb84bf649e"}, + {file = "wcwidth-0.5.3.tar.gz", hash = "sha256:53123b7af053c74e9fe2e92ac810301f6139e64379031f7124574212fb3b4091"}, ] [[package]] name = "wrapt" -version = "2.0.1" +version = "2.1.1" description = "Module for decorators, wrappers and monkey patching." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "wrapt-2.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:64b103acdaa53b7caf409e8d45d39a8442fe6dcfec6ba3f3d141e0cc2b5b4dbd"}, - {file = "wrapt-2.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:91bcc576260a274b169c3098e9a3519fb01f2989f6d3d386ef9cbf8653de1374"}, - {file = "wrapt-2.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ab594f346517010050126fcd822697b25a7031d815bb4fbc238ccbe568216489"}, - {file = "wrapt-2.0.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:36982b26f190f4d737f04a492a68accbfc6fa042c3f42326fdfbb6c5b7a20a31"}, - {file = "wrapt-2.0.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:23097ed8bc4c93b7bf36fa2113c6c733c976316ce0ee2c816f64ca06102034ef"}, - {file = "wrapt-2.0.1-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8bacfe6e001749a3b64db47bcf0341da757c95959f592823a93931a422395013"}, - {file = "wrapt-2.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8ec3303e8a81932171f455f792f8df500fc1a09f20069e5c16bd7049ab4e8e38"}, - {file = "wrapt-2.0.1-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:3f373a4ab5dbc528a94334f9fe444395b23c2f5332adab9ff4ea82f5a9e33bc1"}, - {file = "wrapt-2.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f49027b0b9503bf6c8cdc297ca55006b80c2f5dd36cecc72c6835ab6e10e8a25"}, - {file = "wrapt-2.0.1-cp310-cp310-win32.whl", hash = "sha256:8330b42d769965e96e01fa14034b28a2a7600fbf7e8f0cc90ebb36d492c993e4"}, - {file = "wrapt-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:1218573502a8235bb8a7ecaed12736213b22dcde9feab115fa2989d42b5ded45"}, - {file = "wrapt-2.0.1-cp310-cp310-win_arm64.whl", hash = "sha256:eda8e4ecd662d48c28bb86be9e837c13e45c58b8300e43ba3c9b4fa9900302f7"}, - {file = "wrapt-2.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0e17283f533a0d24d6e5429a7d11f250a58d28b4ae5186f8f47853e3e70d2590"}, - {file = "wrapt-2.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:85df8d92158cb8f3965aecc27cf821461bb5f40b450b03facc5d9f0d4d6ddec6"}, - {file = "wrapt-2.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c1be685ac7700c966b8610ccc63c3187a72e33cab53526a27b2a285a662cd4f7"}, - {file = "wrapt-2.0.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:df0b6d3b95932809c5b3fecc18fda0f1e07452d05e2662a0b35548985f256e28"}, - {file = "wrapt-2.0.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4da7384b0e5d4cae05c97cd6f94faaf78cc8b0f791fc63af43436d98c4ab37bb"}, - {file = "wrapt-2.0.1-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ec65a78fbd9d6f083a15d7613b2800d5663dbb6bb96003899c834beaa68b242c"}, - {file = "wrapt-2.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7de3cc939be0e1174969f943f3b44e0d79b6f9a82198133a5b7fc6cc92882f16"}, - {file = "wrapt-2.0.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:fb1a5b72cbd751813adc02ef01ada0b0d05d3dcbc32976ce189a1279d80ad4a2"}, - {file = "wrapt-2.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3fa272ca34332581e00bf7773e993d4f632594eb2d1b0b162a9038df0fd971dd"}, - {file = "wrapt-2.0.1-cp311-cp311-win32.whl", hash = "sha256:fc007fdf480c77301ab1afdbb6ab22a5deee8885f3b1ed7afcb7e5e84a0e27be"}, - {file = "wrapt-2.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:47434236c396d04875180171ee1f3815ca1eada05e24a1ee99546320d54d1d1b"}, - {file = "wrapt-2.0.1-cp311-cp311-win_arm64.whl", hash = "sha256:837e31620e06b16030b1d126ed78e9383815cbac914693f54926d816d35d8edf"}, - {file = "wrapt-2.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1fdbb34da15450f2b1d735a0e969c24bdb8d8924892380126e2a293d9902078c"}, - {file = "wrapt-2.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3d32794fe940b7000f0519904e247f902f0149edbe6316c710a8562fb6738841"}, - {file = "wrapt-2.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:386fb54d9cd903ee0012c09291336469eb7b244f7183d40dc3e86a16a4bace62"}, - {file = "wrapt-2.0.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7b219cb2182f230676308cdcacd428fa837987b89e4b7c5c9025088b8a6c9faf"}, - {file = "wrapt-2.0.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:641e94e789b5f6b4822bb8d8ebbdfc10f4e4eae7756d648b717d980f657a9eb9"}, - {file = "wrapt-2.0.1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fe21b118b9f58859b5ebaa4b130dee18669df4bd111daad082b7beb8799ad16b"}, - {file = "wrapt-2.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:17fb85fa4abc26a5184d93b3efd2dcc14deb4b09edcdb3535a536ad34f0b4dba"}, - {file = "wrapt-2.0.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:b89ef9223d665ab255ae42cc282d27d69704d94be0deffc8b9d919179a609684"}, - {file = "wrapt-2.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a453257f19c31b31ba593c30d997d6e5be39e3b5ad9148c2af5a7314061c63eb"}, - {file = "wrapt-2.0.1-cp312-cp312-win32.whl", hash = "sha256:3e271346f01e9c8b1130a6a3b0e11908049fe5be2d365a5f402778049147e7e9"}, - {file = "wrapt-2.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:2da620b31a90cdefa9cd0c2b661882329e2e19d1d7b9b920189956b76c564d75"}, - {file = "wrapt-2.0.1-cp312-cp312-win_arm64.whl", hash = "sha256:aea9c7224c302bc8bfc892b908537f56c430802560e827b75ecbde81b604598b"}, - {file = "wrapt-2.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:47b0f8bafe90f7736151f61482c583c86b0693d80f075a58701dd1549b0010a9"}, - {file = "wrapt-2.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:cbeb0971e13b4bd81d34169ed57a6dda017328d1a22b62fda45e1d21dd06148f"}, - {file = "wrapt-2.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:eb7cffe572ad0a141a7886a1d2efa5bef0bf7fe021deeea76b3ab334d2c38218"}, - {file = "wrapt-2.0.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c8d60527d1ecfc131426b10d93ab5d53e08a09c5fa0175f6b21b3252080c70a9"}, - {file = "wrapt-2.0.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c654eafb01afac55246053d67a4b9a984a3567c3808bb7df2f8de1c1caba2e1c"}, - {file = "wrapt-2.0.1-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:98d873ed6c8b4ee2418f7afce666751854d6d03e3c0ec2a399bb039cd2ae89db"}, - {file = "wrapt-2.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c9e850f5b7fc67af856ff054c71690d54fa940c3ef74209ad9f935b4f66a0233"}, - {file = "wrapt-2.0.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:e505629359cb5f751e16e30cf3f91a1d3ddb4552480c205947da415d597f7ac2"}, - {file = "wrapt-2.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2879af909312d0baf35f08edeea918ee3af7ab57c37fe47cb6a373c9f2749c7b"}, - {file = "wrapt-2.0.1-cp313-cp313-win32.whl", hash = "sha256:d67956c676be5a24102c7407a71f4126d30de2a569a1c7871c9f3cabc94225d7"}, - {file = "wrapt-2.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:9ca66b38dd642bf90c59b6738af8070747b610115a39af2498535f62b5cdc1c3"}, - {file = "wrapt-2.0.1-cp313-cp313-win_arm64.whl", hash = "sha256:5a4939eae35db6b6cec8e7aa0e833dcca0acad8231672c26c2a9ab7a0f8ac9c8"}, - {file = "wrapt-2.0.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:a52f93d95c8d38fed0669da2ebdb0b0376e895d84596a976c15a9eb45e3eccb3"}, - {file = "wrapt-2.0.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4e54bbf554ee29fcceee24fa41c4d091398b911da6e7f5d7bffda963c9aed2e1"}, - {file = "wrapt-2.0.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:908f8c6c71557f4deaa280f55d0728c3bca0960e8c3dd5ceeeafb3c19942719d"}, - {file = "wrapt-2.0.1-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e2f84e9af2060e3904a32cea9bb6db23ce3f91cfd90c6b426757cf7cc01c45c7"}, - {file = "wrapt-2.0.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e3612dc06b436968dfb9142c62e5dfa9eb5924f91120b3c8ff501ad878f90eb3"}, - {file = "wrapt-2.0.1-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6d2d947d266d99a1477cd005b23cbd09465276e302515e122df56bb9511aca1b"}, - {file = "wrapt-2.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:7d539241e87b650cbc4c3ac9f32c8d1ac8a54e510f6dca3f6ab60dcfd48c9b10"}, - {file = "wrapt-2.0.1-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:4811e15d88ee62dbf5c77f2c3ff3932b1e3ac92323ba3912f51fc4016ce81ecf"}, - {file = "wrapt-2.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c1c91405fcf1d501fa5d55df21e58ea49e6b879ae829f1039faaf7e5e509b41e"}, - {file = "wrapt-2.0.1-cp313-cp313t-win32.whl", hash = "sha256:e76e3f91f864e89db8b8d2a8311d57df93f01ad6bb1e9b9976d1f2e83e18315c"}, - {file = "wrapt-2.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:83ce30937f0ba0d28818807b303a412440c4b63e39d3d8fc036a94764b728c92"}, - {file = "wrapt-2.0.1-cp313-cp313t-win_arm64.whl", hash = "sha256:4b55cacc57e1dc2d0991dbe74c6419ffd415fb66474a02335cb10efd1aa3f84f"}, - {file = "wrapt-2.0.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:5e53b428f65ece6d9dad23cb87e64506392b720a0b45076c05354d27a13351a1"}, - {file = "wrapt-2.0.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ad3ee9d0f254851c71780966eb417ef8e72117155cff04821ab9b60549694a55"}, - {file = "wrapt-2.0.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d7b822c61ed04ee6ad64bc90d13368ad6eb094db54883b5dde2182f67a7f22c0"}, - {file = "wrapt-2.0.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7164a55f5e83a9a0b031d3ffab4d4e36bbec42e7025db560f225489fa929e509"}, - {file = "wrapt-2.0.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e60690ba71a57424c8d9ff28f8d006b7ad7772c22a4af432188572cd7fa004a1"}, - {file = "wrapt-2.0.1-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3cd1a4bd9a7a619922a8557e1318232e7269b5fb69d4ba97b04d20450a6bf970"}, - {file = "wrapt-2.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b4c2e3d777e38e913b8ce3a6257af72fb608f86a1df471cb1d4339755d0a807c"}, - {file = "wrapt-2.0.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:3d366aa598d69416b5afedf1faa539fac40c1d80a42f6b236c88c73a3c8f2d41"}, - {file = "wrapt-2.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c235095d6d090aa903f1db61f892fffb779c1eaeb2a50e566b52001f7a0f66ed"}, - {file = "wrapt-2.0.1-cp314-cp314-win32.whl", hash = "sha256:bfb5539005259f8127ea9c885bdc231978c06b7a980e63a8a61c8c4c979719d0"}, - {file = "wrapt-2.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:4ae879acc449caa9ed43fc36ba08392b9412ee67941748d31d94e3cedb36628c"}, - {file = "wrapt-2.0.1-cp314-cp314-win_arm64.whl", hash = "sha256:8639b843c9efd84675f1e100ed9e99538ebea7297b62c4b45a7042edb84db03e"}, - {file = "wrapt-2.0.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:9219a1d946a9b32bb23ccae66bdb61e35c62773ce7ca6509ceea70f344656b7b"}, - {file = "wrapt-2.0.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:fa4184e74197af3adad3c889a1af95b53bb0466bced92ea99a0c014e48323eec"}, - {file = "wrapt-2.0.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c5ef2f2b8a53b7caee2f797ef166a390fef73979b15778a4a153e4b5fedce8fa"}, - {file = "wrapt-2.0.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e042d653a4745be832d5aa190ff80ee4f02c34b21f4b785745eceacd0907b815"}, - {file = "wrapt-2.0.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2afa23318136709c4b23d87d543b425c399887b4057936cd20386d5b1422b6fa"}, - {file = "wrapt-2.0.1-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6c72328f668cf4c503ffcf9434c2b71fdd624345ced7941bc6693e61bbe36bef"}, - {file = "wrapt-2.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3793ac154afb0e5b45d1233cb94d354ef7a983708cc3bb12563853b1d8d53747"}, - {file = "wrapt-2.0.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:fec0d993ecba3991645b4857837277469c8cc4c554a7e24d064d1ca291cfb81f"}, - {file = "wrapt-2.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:949520bccc1fa227274da7d03bf238be15389cd94e32e4297b92337df9b7a349"}, - {file = "wrapt-2.0.1-cp314-cp314t-win32.whl", hash = "sha256:be9e84e91d6497ba62594158d3d31ec0486c60055c49179edc51ee43d095f79c"}, - {file = "wrapt-2.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:61c4956171c7434634401db448371277d07032a81cc21c599c22953374781395"}, - {file = "wrapt-2.0.1-cp314-cp314t-win_arm64.whl", hash = "sha256:35cdbd478607036fee40273be8ed54a451f5f23121bd9d4be515158f9498f7ad"}, - {file = "wrapt-2.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:90897ea1cf0679763b62e79657958cd54eae5659f6360fc7d2ccc6f906342183"}, - {file = "wrapt-2.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:50844efc8cdf63b2d90cd3d62d4947a28311e6266ce5235a219d21b195b4ec2c"}, - {file = "wrapt-2.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:49989061a9977a8cbd6d20f2efa813f24bf657c6990a42967019ce779a878dbf"}, - {file = "wrapt-2.0.1-cp38-cp38-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:09c7476ab884b74dce081ad9bfd07fe5822d8600abade571cb1f66d5fc915af6"}, - {file = "wrapt-2.0.1-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1a8a09a004ef100e614beec82862d11fc17d601092c3599afd22b1f36e4137e"}, - {file = "wrapt-2.0.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:89a82053b193837bf93c0f8a57ded6e4b6d88033a499dadff5067e912c2a41e9"}, - {file = "wrapt-2.0.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:f26f8e2ca19564e2e1fdbb6a0e47f36e0efbab1acc31e15471fad88f828c75f6"}, - {file = "wrapt-2.0.1-cp38-cp38-win32.whl", hash = "sha256:115cae4beed3542e37866469a8a1f2b9ec549b4463572b000611e9946b86e6f6"}, - {file = "wrapt-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:c4012a2bd37059d04f8209916aa771dfb564cccb86079072bdcd48a308b6a5c5"}, - {file = "wrapt-2.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:68424221a2dc00d634b54f92441914929c5ffb1c30b3b837343978343a3512a3"}, - {file = "wrapt-2.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6bd1a18f5a797fe740cb3d7a0e853a8ce6461cc62023b630caec80171a6b8097"}, - {file = "wrapt-2.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fb3a86e703868561c5cad155a15c36c716e1ab513b7065bd2ac8ed353c503333"}, - {file = "wrapt-2.0.1-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5dc1b852337c6792aa111ca8becff5bacf576bf4a0255b0f05eb749da6a1643e"}, - {file = "wrapt-2.0.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c046781d422f0830de6329fa4b16796096f28a92c8aef3850674442cdcb87b7f"}, - {file = "wrapt-2.0.1-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f73f9f7a0ebd0db139253d27e5fc8d2866ceaeef19c30ab5d69dcbe35e1a6981"}, - {file = "wrapt-2.0.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:b667189cf8efe008f55bbda321890bef628a67ab4147ebf90d182f2dadc78790"}, - {file = "wrapt-2.0.1-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:a9a83618c4f0757557c077ef71d708ddd9847ed66b7cc63416632af70d3e2308"}, - {file = "wrapt-2.0.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1e9b121e9aeb15df416c2c960b8255a49d44b4038016ee17af03975992d03931"}, - {file = "wrapt-2.0.1-cp39-cp39-win32.whl", hash = "sha256:1f186e26ea0a55f809f232e92cc8556a0977e00183c3ebda039a807a42be1494"}, - {file = "wrapt-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:bf4cb76f36be5de950ce13e22e7fdf462b35b04665a12b64f3ac5c1bbbcf3728"}, - {file = "wrapt-2.0.1-cp39-cp39-win_arm64.whl", hash = "sha256:d6cc985b9c8b235bd933990cdbf0f891f8e010b65a3911f7a55179cd7b0fc57b"}, - {file = "wrapt-2.0.1-py3-none-any.whl", hash = "sha256:4d2ce1bf1a48c5277d7969259232b57645aae5686dba1eaeade39442277afbca"}, - {file = "wrapt-2.0.1.tar.gz", hash = "sha256:9c9c635e78497cacb81e84f8b11b23e0aacac7a136e73b8e5b2109a1d9fc468f"}, + {file = "wrapt-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7e927375e43fd5a985b27a8992327c22541b6dede1362fc79df337d26e23604f"}, + {file = "wrapt-2.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e1c99544b6a7d40ca22195563b6d8bc3986ee8bb82f272f31f0670fe9440c869"}, + {file = "wrapt-2.1.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b2be3fa5f4efaf16ee7c77d0556abca35f5a18ad4ac06f0ef3904c3399010ce9"}, + {file = "wrapt-2.1.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:67c90c1ae6489a6cb1a82058902caa8006706f7b4e8ff766f943e9d2c8e608d0"}, + {file = "wrapt-2.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:05c0db35ccffd7480143e62df1e829d101c7b86944ae3be7e4869a7efa621f53"}, + {file = "wrapt-2.1.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0c2ec9f616755b2e1e0bf4d0961f59bb5c2e7a77407e7e2c38ef4f7d2fdde12c"}, + {file = "wrapt-2.1.1-cp310-cp310-win32.whl", hash = "sha256:203ba6b3f89e410e27dbd30ff7dccaf54dcf30fda0b22aa1b82d560c7f9fe9a1"}, + {file = "wrapt-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:6f9426d9cfc2f8732922fc96198052e55c09bb9db3ddaa4323a18e055807410e"}, + {file = "wrapt-2.1.1-cp310-cp310-win_arm64.whl", hash = "sha256:69c26f51b67076b40714cff81bdd5826c0b10c077fb6b0678393a6a2f952a5fc"}, + {file = "wrapt-2.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6c366434a7fb914c7a5de508ed735ef9c133367114e1a7cb91dfb5cd806a1549"}, + {file = "wrapt-2.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5d6a2068bd2e1e19e5a317c8c0b288267eec4e7347c36bc68a6e378a39f19ee7"}, + {file = "wrapt-2.1.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:891ab4713419217b2aed7dd106c9200f64e6a82226775a0d2ebd6bef2ebd1747"}, + {file = "wrapt-2.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c8ef36a0df38d2dc9d907f6617f89e113c5892e0a35f58f45f75901af0ce7d81"}, + {file = "wrapt-2.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:76e9af3ebd86f19973143d4d592cbf3e970cf3f66ddee30b16278c26ae34b8ab"}, + {file = "wrapt-2.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ff562067485ebdeaef2fa3fe9b1876bc4e7b73762e0a01406ad81e2076edcebf"}, + {file = "wrapt-2.1.1-cp311-cp311-win32.whl", hash = "sha256:9e60a30aa0909435ec4ea2a3c53e8e1b50ac9f640c0e9fe3f21fd248a22f06c5"}, + {file = "wrapt-2.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:7d79954f51fcf84e5ec4878ab4aea32610d70145c5bbc84b3370eabfb1e096c2"}, + {file = "wrapt-2.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:d3ffc6b0efe79e08fd947605fd598515aebefe45e50432dc3b5cd437df8b1ada"}, + {file = "wrapt-2.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ab8e3793b239db021a18782a5823fcdea63b9fe75d0e340957f5828ef55fcc02"}, + {file = "wrapt-2.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7c0300007836373d1c2df105b40777986accb738053a92fe09b615a7a4547e9f"}, + {file = "wrapt-2.1.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2b27c070fd1132ab23957bcd4ee3ba707a91e653a9268dc1afbd39b77b2799f7"}, + {file = "wrapt-2.1.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b0e36d845e8b6f50949b6b65fc6cd279f47a1944582ed4ec8258cd136d89a64"}, + {file = "wrapt-2.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4aeea04a9889370fcfb1ef828c4cc583f36a875061505cd6cd9ba24d8b43cc36"}, + {file = "wrapt-2.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d88b46bb0dce9f74b6817bc1758ff2125e1ca9e1377d62ea35b6896142ab6825"}, + {file = "wrapt-2.1.1-cp312-cp312-win32.whl", hash = "sha256:63decff76ca685b5c557082dfbea865f3f5f6d45766a89bff8dc61d336348833"}, + {file = "wrapt-2.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:b828235d26c1e35aca4107039802ae4b1411be0fe0367dd5b7e4d90e562fcbcd"}, + {file = "wrapt-2.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:75128507413a9f1bcbe2db88fd18fbdbf80f264b82fa33a6996cdeaf01c52352"}, + {file = "wrapt-2.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ce9646e17fa7c3e2e7a87e696c7de66512c2b4f789a8db95c613588985a2e139"}, + {file = "wrapt-2.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:428cfc801925454395aa468ba7ddb3ed63dc0d881df7b81626cdd433b4e2b11b"}, + {file = "wrapt-2.1.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5797f65e4d58065a49088c3b32af5410751cd485e83ba89e5a45e2aa8905af98"}, + {file = "wrapt-2.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a2db44a71202c5ae4bb5f27c6d3afbc5b23053f2e7e78aa29704541b5dad789"}, + {file = "wrapt-2.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8d5350c3590af09c1703dd60ec78a7370c0186e11eaafb9dda025a30eee6492d"}, + {file = "wrapt-2.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2d9b076411bed964e752c01b49fd224cc385f3a96f520c797d38412d70d08359"}, + {file = "wrapt-2.1.1-cp313-cp313-win32.whl", hash = "sha256:0bb7207130ce6486727baa85373503bf3334cc28016f6928a0fa7e19d7ecdc06"}, + {file = "wrapt-2.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:cbfee35c711046b15147b0ae7db9b976f01c9520e6636d992cd9e69e5e2b03b1"}, + {file = "wrapt-2.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:7d2756061022aebbf57ba14af9c16e8044e055c22d38de7bf40d92b565ecd2b0"}, + {file = "wrapt-2.1.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4814a3e58bc6971e46baa910ecee69699110a2bf06c201e24277c65115a20c20"}, + {file = "wrapt-2.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:106c5123232ab9b9f4903692e1fa0bdc231510098f04c13c3081f8ad71c3d612"}, + {file = "wrapt-2.1.1-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1a40b83ff2535e6e56f190aff123821eea89a24c589f7af33413b9c19eb2c738"}, + {file = "wrapt-2.1.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:789cea26e740d71cf1882e3a42bb29052bc4ada15770c90072cb47bf73fb3dbf"}, + {file = "wrapt-2.1.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ba49c14222d5e5c0ee394495a8655e991dc06cbca5398153aefa5ac08cd6ccd7"}, + {file = "wrapt-2.1.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ac8cda531fe55be838a17c62c806824472bb962b3afa47ecbd59b27b78496f4e"}, + {file = "wrapt-2.1.1-cp313-cp313t-win32.whl", hash = "sha256:b8af75fe20d381dd5bcc9db2e86a86d7fcfbf615383a7147b85da97c1182225b"}, + {file = "wrapt-2.1.1-cp313-cp313t-win_amd64.whl", hash = "sha256:45c5631c9b6c792b78be2d7352129f776dd72c605be2c3a4e9be346be8376d83"}, + {file = "wrapt-2.1.1-cp313-cp313t-win_arm64.whl", hash = "sha256:da815b9263947ac98d088b6414ac83507809a1d385e4632d9489867228d6d81c"}, + {file = "wrapt-2.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9aa1765054245bb01a37f615503290d4e207e3fd59226e78341afb587e9c1236"}, + {file = "wrapt-2.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:feff14b63a6d86c1eee33a57f77573649f2550935981625be7ff3cb7342efe05"}, + {file = "wrapt-2.1.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:81fc5f22d5fcfdbabde96bb3f5379b9f4476d05c6d524d7259dc5dfb501d3281"}, + {file = "wrapt-2.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:951b228ecf66def855d22e006ab9a1fc12535111ae7db2ec576c728f8ddb39e8"}, + {file = "wrapt-2.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ddf582a95641b9a8c8bd643e83f34ecbbfe1b68bc3850093605e469ab680ae3"}, + {file = "wrapt-2.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:fc5c500966bf48913f795f1984704e6d452ba2414207b15e1f8c339a059d5b16"}, + {file = "wrapt-2.1.1-cp314-cp314-win32.whl", hash = "sha256:4aa4baadb1f94b71151b8e44a0c044f6af37396c3b8bcd474b78b49e2130a23b"}, + {file = "wrapt-2.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:860e9d3fd81816a9f4e40812f28be4439ab01f260603c749d14be3c0a1170d19"}, + {file = "wrapt-2.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:3c59e103017a2c1ea0ddf589cbefd63f91081d7ce9d491d69ff2512bb1157e23"}, + {file = "wrapt-2.1.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:9fa7c7e1bee9278fc4f5dd8275bc8d25493281a8ec6c61959e37cc46acf02007"}, + {file = "wrapt-2.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:39c35e12e8215628984248bd9c8897ce0a474be2a773db207eb93414219d8469"}, + {file = "wrapt-2.1.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:94ded4540cac9125eaa8ddf5f651a7ec0da6f5b9f248fe0347b597098f8ec14c"}, + {file = "wrapt-2.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:da0af328373f97ed9bdfea24549ac1b944096a5a71b30e41c9b8b53ab3eec04a"}, + {file = "wrapt-2.1.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4ad839b55f0bf235f8e337ce060572d7a06592592f600f3a3029168e838469d3"}, + {file = "wrapt-2.1.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0d89c49356e5e2a50fa86b40e0510082abcd0530f926cbd71cf25bee6b9d82d7"}, + {file = "wrapt-2.1.1-cp314-cp314t-win32.whl", hash = "sha256:f4c7dd22cf7f36aafe772f3d88656559205c3af1b7900adfccb70edeb0d2abc4"}, + {file = "wrapt-2.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:f76bc12c583ab01e73ba0ea585465a41e48d968f6d1311b4daec4f8654e356e3"}, + {file = "wrapt-2.1.1-cp314-cp314t-win_arm64.whl", hash = "sha256:7ea74fc0bec172f1ae5f3505b6655c541786a5cabe4bbc0d9723a56ac32eb9b9"}, + {file = "wrapt-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9e03b3d486eb39f5d3f562839f59094dcee30c4039359ea15768dc2214d9e07c"}, + {file = "wrapt-2.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0fdf3073f488ce4d929929b7799e3b8c52b220c9eb3f4a5a51e2dc0e8ff07881"}, + {file = "wrapt-2.1.1-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0cb4f59238c6625fae2eeb72278da31c9cfba0ff4d9cbe37446b73caa0e9bcf7"}, + {file = "wrapt-2.1.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f794a1c148871b714cb566f5466ec8288e0148a1c417550983864b3981737cd"}, + {file = "wrapt-2.1.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:95ef3866631c6da9ce1fc0f1e17b90c4c0aa6d041fc70a11bc90733aee122e1a"}, + {file = "wrapt-2.1.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:66bc1b2446f01cbbd3c56b79a3a8435bcd4178ac4e06b091913f7751a7f528b8"}, + {file = "wrapt-2.1.1-cp39-cp39-win32.whl", hash = "sha256:1b9e08e57cabc32972f7c956d10e85093c5da9019faa24faf411e7dd258e528c"}, + {file = "wrapt-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:e75ad48c3cca739f580b5e14c052993eb644c7fa5b4c90aa51193280b30875ae"}, + {file = "wrapt-2.1.1-cp39-cp39-win_arm64.whl", hash = "sha256:9ccd657873b7f964711447d004563a2bc08d1476d7a1afcad310f3713e6f50f4"}, + {file = "wrapt-2.1.1-py3-none-any.whl", hash = "sha256:3b0f4629eb954394a3d7c7a1c8cca25f0b07cefe6aa8545e862e9778152de5b7"}, + {file = "wrapt-2.1.1.tar.gz", hash = "sha256:5fdcb09bf6db023d88f312bd0767594b414655d58090fc1c46b3414415f67fac"}, ] [package.extras] @@ -2909,4 +2895,4 @@ docs = ["sphinx-rtd-theme", "sphinxcontrib-napoleon"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.14" -content-hash = "7bc095e0a55cd7cf20a196aa80c6ce7bf8f428664eaf9d095d90cb13a58c19df" +content-hash = "acfff071bfdc279f1937312612083daf291adb2f83b6f3944ea05434c2ae94f2" diff --git a/pyproject.toml b/pyproject.toml index 0916c993..4136d2d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,12 +51,15 @@ json-schema-for-humans = "^1.3.4" pre-commit = "^3.3.3" testing-postgresql = "^1.3.0" duckdb = "^1.4.3" +sphinx-rtd-theme = "^1.2.0" +sphinxcontrib-napoleon = "^0.7" +sphinxcontrib-mermaid = "^2.0.0" [tool.poetry.group.extras.dependencies] tqdm = "^4.65.0" [tool.poetry.extras] -docs = ["sphinx-rtd-theme", "sphinxcontrib-napoleon"] +docs = ["sphinx-rtd-theme", "sphinxcontrib-napoleon", "sphinxcontrib-mermaid"] [build-system] requires = ["poetry-core"] @@ -67,3 +70,7 @@ datafaker = "datafaker.main:app" [tool.isort] profile = "black" + +[tool.esbonio.sphinx] +buildCommand = ["sphinx-build", "-b", "html", "docs/source", "docs/esbonio-build"] +pythonCommand = ["poetry", "run", "python"] From c0bdf4a9c47bd8179a0f84ee9c1e09adbcef88db Mon Sep 17 00:00:00 2001 From: Tim Band Date: Mon, 19 Jan 2026 13:20:48 +0000 Subject: [PATCH 02/22] First attempt at a documentation publishing workflow --- .github/workflows/docs.yml | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .github/workflows/docs.yml diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 00000000..324d9e13 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,35 @@ +--- +name: publish documentation +on: {} +env: + PYTHON_VERSION: "3.12" +jobs: + docs: + runs-on: ubuntu-latest + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - name: Install poetry + shell: bash + run: | + sudo apt install python3-poetry + - name: Configure poetry + shell: bash + run: | + python -m poetry config virtualenvs.in-project true + - name: Install dependencies + shell: bash + run: | + python -m poetry install --all-extras + - name: Build documentation + shell: bash + run: | + python -m poetry run -C docs make html + - name: Publish docs on github Pages + uses: peaceiris/actions-gh-pages@v3 + with: + publish_branch: gh-pages + github_token: $${{ secrets.GITHUB_TOKEN }} + publish_dir: docs/build + force_orphan: true From 5b0bfe080b48593e2e0466f0f201e0c4abf9baaa Mon Sep 17 00:00:00 2001 From: Tim Band Date: Mon, 19 Jan 2026 13:25:58 +0000 Subject: [PATCH 03/22] Manual dispatch of documentation publish action --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 324d9e13..827c711d 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,6 +1,6 @@ --- name: publish documentation -on: {} +on: workflow_dispatch env: PYTHON_VERSION: "3.12" jobs: From 73638789d64dc450790e468a3b7c24f7fbd12ed1 Mon Sep 17 00:00:00 2001 From: Tim Band Date: Mon, 19 Jan 2026 15:15:47 +0000 Subject: [PATCH 04/22] Add Mermaid to the RST lint test --- tests/test_rst.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_rst.py b/tests/test_rst.py index 1a57ed61..969d0637 100644 --- a/tests/test_rst.py +++ b/tests/test_rst.py @@ -1,7 +1,9 @@ """Run the .rst linter via a unit test. The CLI does not allow errors to be disabled, but we can ignore them here.""" +from docutils.parsers.rst import directives from pathlib import Path +from sphinxcontrib.mermaid import Mermaid from typing import Any from unittest import TestCase @@ -29,6 +31,7 @@ def test_dir(self) -> None: """Run the linter on the docs/ directory.""" docs_path = Path("docs/") rst_files = docs_path.glob("**/*.rst") + directives.register_directive("mermaid", Mermaid) all_errors = [] for rst_file in rst_files: From 4b19091af99b69000ba5c3dfbc459527cb866f46 Mon Sep 17 00:00:00 2001 From: Tim Band Date: Mon, 19 Jan 2026 16:38:18 +0000 Subject: [PATCH 05/22] Cleaned the pre-commit --- docs/source/conf.py | 6 +++++- tests/test_providers.py | 6 +++--- tests/test_rst.py | 4 ++-- tests/utils.py | 13 ++++++------- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 2dd3fbd8..fc2643af 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -11,6 +11,8 @@ import pathlib import sys +from sphinx.application import Sphinx + sys.path.insert(0, pathlib.Path(__file__).parents[2].resolve().as_posix()) sys.path.insert(0, os.path.abspath("../..")) @@ -43,5 +45,7 @@ html_theme = "sphinx_rtd_theme" # pylint: disable=C0103 html_static_path = ["_static"] -def setup(app): + +def setup(app: Sphinx) -> None: + """Include our own CSS in rendered pages.""" app.add_css_file("custom.css") diff --git a/tests/test_providers.py b/tests/test_providers.py index cd880072..eab3b1a3 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -2,7 +2,7 @@ import datetime as dt from typing import Any -from sqlalchemy import Column, Integer, Text, insert +from sqlalchemy import Column, Integer, MetaData, Text, insert from sqlalchemy.ext.declarative import declarative_base from datafaker import providers @@ -11,7 +11,7 @@ # pylint: disable=invalid-name Base = declarative_base() # pylint: enable=invalid-name -metadata = Base.metadata +metadata: MetaData = Base.metadata class Person(Base): # type: ignore @@ -42,7 +42,7 @@ class ColumnValueProviderTestCase(RequiresDBTestCase): def setUp(self) -> None: """Pre-test setup.""" super().setUp() - metadata.create_all(self.engine) + metadata.create_all(self.sync_engine) def test_column_value_present(self) -> None: """Test the key method.""" diff --git a/tests/test_rst.py b/tests/test_rst.py index 969d0637..ee89b5b5 100644 --- a/tests/test_rst.py +++ b/tests/test_rst.py @@ -1,13 +1,13 @@ """Run the .rst linter via a unit test. The CLI does not allow errors to be disabled, but we can ignore them here.""" -from docutils.parsers.rst import directives from pathlib import Path -from sphinxcontrib.mermaid import Mermaid from typing import Any from unittest import TestCase +from docutils.parsers.rst import directives from restructuredtext_lint import lint_file +from sphinxcontrib.mermaid import Mermaid def _level_to_string(level: int) -> str: diff --git a/tests/utils.py b/tests/utils.py index 5712404b..8e8c6d50 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -19,8 +19,7 @@ import duckdb import testing.postgresql import yaml -from sqlalchemy import Engine -from sqlalchemy.schema import MetaData +from sqlalchemy import Engine, MetaData from datafaker import settings from datafaker.create import create_db_data_into @@ -319,12 +318,12 @@ def assert_subset(self, set1: set[T], set2: set[T], msg: str | None = None) -> N class RequiresDBTestCase(DatafakerTestCase): """ - A test case that only runs if PostgreSQL is installed. - A test postgres is installed - dump_file_path can be set to run in this postgres database. - database_name is the name of the database referred to in dump_file_path. + A test case that only runs if a database (PostgreSQL or DuckDB) is installed. + + ``dump_file_path`` can be set to run in this postgres database. + ``database_name`` is the name of the database referred to in dump_file_path. You can use ``self.dsn`` to retrieve the DSN of this database, ``self.engine`` - to get an engine to access the database and self.metadata to get metadata + to get an engine to access the database and ``self.metadata`` to get metadata reflected from that engine. """ From 5d3bb8552c8f14873987d2b80aa23f31c19cabb8 Mon Sep 17 00:00:00 2001 From: Tim Band Date: Mon, 19 Jan 2026 17:16:43 +0000 Subject: [PATCH 06/22] bump docs workflow --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 827c711d..f5e79495 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,5 +1,5 @@ --- -name: publish documentation +name: Publish Documentation on: workflow_dispatch env: PYTHON_VERSION: "3.12" From a01f9fda6b6e61c18daafb559d64d17114479ca2 Mon Sep 17 00:00:00 2001 From: Tim Band Date: Mon, 19 Jan 2026 17:20:55 +0000 Subject: [PATCH 07/22] Another attempt to bump docs workflow --- .github/workflows/{docs.yml => pubdocs.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{docs.yml => pubdocs.yml} (100%) diff --git a/.github/workflows/docs.yml b/.github/workflows/pubdocs.yml similarity index 100% rename from .github/workflows/docs.yml rename to .github/workflows/pubdocs.yml From 734b5e096cac15b0c86c80f362f3838fb1795178 Mon Sep 17 00:00:00 2001 From: Tim Band Date: Mon, 19 Jan 2026 17:22:14 +0000 Subject: [PATCH 08/22] temporarily make document publish "push" as well --- .github/workflows/pubdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pubdocs.yml b/.github/workflows/pubdocs.yml index f5e79495..c7d99e49 100644 --- a/.github/workflows/pubdocs.yml +++ b/.github/workflows/pubdocs.yml @@ -1,6 +1,6 @@ --- name: Publish Documentation -on: workflow_dispatch +on: [workflow_dispatch, push] env: PYTHON_VERSION: "3.12" jobs: From 81675427b1da00581570c2661cb0d793b999ed67 Mon Sep 17 00:00:00 2001 From: Tim Band Date: Mon, 19 Jan 2026 17:40:24 +0000 Subject: [PATCH 09/22] Fix working directory for docs build --- .github/workflows/{pubdocs.yml => docs.yml} | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) rename .github/workflows/{pubdocs.yml => docs.yml} (92%) diff --git a/.github/workflows/pubdocs.yml b/.github/workflows/docs.yml similarity index 92% rename from .github/workflows/pubdocs.yml rename to .github/workflows/docs.yml index c7d99e49..81ff1efe 100644 --- a/.github/workflows/pubdocs.yml +++ b/.github/workflows/docs.yml @@ -24,8 +24,9 @@ jobs: python -m poetry install --all-extras - name: Build documentation shell: bash + working-directory: ./docs run: | - python -m poetry run -C docs make html + python -m poetry run make html - name: Publish docs on github Pages uses: peaceiris/actions-gh-pages@v3 with: From 14fc023b7c3082605226ce5f2cdb555deab47d03 Mon Sep 17 00:00:00 2001 From: Tim Band Date: Mon, 19 Jan 2026 17:43:41 +0000 Subject: [PATCH 10/22] Fix github token expansion --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 81ff1efe..d0099b87 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -31,6 +31,6 @@ jobs: uses: peaceiris/actions-gh-pages@v3 with: publish_branch: gh-pages - github_token: $${{ secrets.GITHUB_TOKEN }} + github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: docs/build force_orphan: true From 3ee8fdc6e1bc5f03db5b713425368617a400518e Mon Sep 17 00:00:00 2001 From: Tim Band Date: Mon, 19 Jan 2026 18:05:29 +0000 Subject: [PATCH 11/22] Cleaned docs build --- datafaker/create.py | 8 +++--- datafaker/main.py | 4 +-- datafaker/make.py | 40 +++++++++++++--------------- datafaker/providers.py | 60 +++++++++++++++++++++--------------------- datafaker/remove.py | 4 +-- datafaker/utils.py | 4 +-- 6 files changed, 58 insertions(+), 62 deletions(-) diff --git a/datafaker/create.py b/datafaker/create.py index ef30217e..8cc74eba 100644 --- a/datafaker/create.py +++ b/datafaker/create.py @@ -164,11 +164,11 @@ def create_db_data_into( Populate the database. :param sorted_tables: The table names to populate, sorted so that foreign - keys' targets are populated before the foreign keys themselves. + keys' targets are populated before the foreign keys themselves. :param table_generator_dict: A mapping of table names to the generators - used to make data for them. + used to make data for them. :param story_generator_list: A list of story generators to be run after the - table generators on each pass. + table generators on each pass. :param num_passes: Number of passes to perform. :param db_dsn: Connection string for the destination database. :param schema_name: Destination schema name. @@ -230,7 +230,7 @@ def table_name(self) -> str | None: Get the name of the current table. :return: The table name, or None if there are no more stories - to process. + to process. """ return self._table_name diff --git a/datafaker/main.py b/datafaker/main.py index 9c6107f4..5d5e26f2 100644 --- a/datafaker/main.py +++ b/datafaker/main.py @@ -85,9 +85,9 @@ def load_metadata_config( :param orm_file_name: The name of the file to load. :param config: The ``config.yaml`` file object. Ignored tables will be - excluded from the output. + excluded from the output. :return: A dict representing the ``orm.yaml`` file, with the tables - the ``config`` says to ignore removed. + the ``config`` says to ignore removed. """ with open(orm_file_name, encoding="utf-8") as orm_fh: meta_dict = yaml.load(orm_fh, yaml.Loader) diff --git a/datafaker/make.py b/datafaker/make.py index 41f23c6f..2638021d 100644 --- a/datafaker/make.py +++ b/datafaker/make.py @@ -6,7 +6,7 @@ from datetime import datetime from pathlib import Path from types import TracebackType -from typing import Any, Callable, Final, Mapping, Optional, Sequence, Tuple, Type +from typing import Any, Callable, Final, Mapping, Optional, Sequence, Tuple, Type, Union import pandas as pd import snsql @@ -93,8 +93,8 @@ def make_column_choices( :param table_config: The ``tables`` part of ``config.yaml``. :return: A list of ``ColumnChoice`` objects; that is, descriptions of - functions and their arguments to call to reveal a list of columns that - should have values generated for them. + functions and their arguments to call to reveal a list of columns that + should have values generated for them. """ return [ ColumnChoice( @@ -128,7 +128,7 @@ class TableGeneratorInfo: column_choices: list[ColumnChoice] rows_per_pass: int row_gens: list[RowGeneratorInfo] = field(default_factory=list) - unique_constraints: Sequence[UniqueConstraint | _PrimaryConstraint] = field( + unique_constraints: Sequence[Union[UniqueConstraint, _PrimaryConstraint]] = field( default_factory=list ) @@ -289,7 +289,7 @@ def _integer_generator(column: Column) -> tuple[str, dict[str, str]]: :param column: The column to get the generator for. :return: A pair consisting of the name of a generator and its - arguments. + arguments. """ if not column.primary_key: return ("generic.numeric.integer_number", {}) @@ -426,7 +426,7 @@ def _get_generator_and_arguments(column: Column) -> tuple[str | None, dict[str, Get the generator and its arguments from the column type. :return: A tuple of a string representing the generator callable and a dict of - keyword arguments to supply to it. + keyword arguments to supply to it. """ generator_function = _get_generator_for_column(type(column.type)) @@ -440,12 +440,10 @@ def _get_provider_for_column(column: Column) -> Tuple[list[str], str, dict[str, """ Get a default Mimesis provider and its arguments for a SQL column type. - Args: - column: SQLAlchemy column object + :param column: SQLAlchemy column object - Returns: - Tuple[str, str, list[str]]: Tuple containing the variable names to assign to, - generator function and any generator arguments. + :return: Tuple[str, str, list[str]]: Tuple containing the variable names + to assign to, generator function and any generator arguments. """ variable_names: list[str] = [column.name] @@ -592,19 +590,17 @@ def make_table_generators( # pylint: disable=too-many-locals The orm and vocabulary YAML files must already have been generated (by make-tables and make-vocab). - Args: - metadata: database ORM - config: Configuration to control the generator creation. - orm_filename: "orm.yaml" file path so that the generator - file can load the MetaData object - config_filename: "config.yaml" file path so that the generator - file can load the MetaData object - src_stats_filename: A filename for where to read src stats from. + :param metadata: database ORM + :param config: Configuration to control the generator creation. + :param orm_filename: "orm.yaml" file path so that the generator + file can load the MetaData object + :param config_filename: "config.yaml" file path so that the generator + file can load the MetaData object + :param src_stats_filename: A filename for where to read src stats from. Optional, if `None` this feature will be skipped - overwrite_files: Whether to overwrite pre-existing vocabulary files + :param overwrite_files: Whether to overwrite pre-existing vocabulary files - Returns: - A string that is a valid Python module, once written to file. + :return: A string that is a valid Python module, once written to file. """ row_generator_module_name: str = config.get("row_generators_module", None) story_generator_module_name = config.get("story_generators_module", None) diff --git a/datafaker/providers.py b/datafaker/providers.py index 39a9d9ad..f791345e 100644 --- a/datafaker/providers.py +++ b/datafaker/providers.py @@ -249,12 +249,12 @@ def merge_with_constants( Merge a list of items with other items that must be placed at certain indices. :param constants_at: A map of indices to objects that must be placed at - those indices. + those indices. :param xs: Items that fill in the gaps left by ``constants_at``. :return: ``xs`` with ``constants_at`` inserted at the appropriate - points. If there are not enough elements in ``xs`` to fill in the gaps - in ``constants_at``, the elements of ``constants_at`` after the gap - are dropped. + points. If there are not enough elements in ``xs`` to fill in the gaps + in ``constants_at``, the elements of ``constants_at`` after the gap + are dropped. """ outi = 0 xi = 0 @@ -344,7 +344,7 @@ def choice(self, a: list[Mapping[str, T]]) -> T | None: Choose a value with equal probability. :param a: The list of values to output. Each element is a mapping with - a key ``value`` and the key is the value to return. + a key ``value`` and the key is the value to return. :return: The chosen value. """ return self.choice_direct(a).get("value", None) @@ -371,8 +371,8 @@ def zipf_choice(self, a: list[Mapping[str, T]], n: int | None = None) -> T | Non 1/n times as frequently as the first value is chosen. :param a: The list of rows to choose between, most frequent first. - Each element is a mapping with a key ``value`` and the key is the - value to return. + Each element is a mapping with a key ``value`` and the key is the + value to return. :return: The chosen value. """ c = self.zipf_choice_direct(a, n) @@ -383,8 +383,8 @@ def weighted_choice(self, a: list[dict[str, Any]]) -> Any: Choice weighted by the count in the original dataset. :param a: a list of dicts, each with a ``value`` key - holding the value to be returned and a ``count`` key holding the - number of that value found in the original dataset + holding the value to be returned and a ``count`` key holding the + number of that value found in the original dataset :return: The chosen ``value``. """ vs = [] @@ -406,9 +406,9 @@ def multivariate_normal_np(self, cov: dict[str, Any]) -> np.typing.NDArray: Return an array of values chosen from the given covariates. :param cov: Keys are ``rank``: The number of values to output; - ``mN``: The mean of variable ``N`` (where ``N`` is between 0 and - one less than ``rank``). ``cN_M`` (where 0 < ``N`` <= ``M`` < ``rank``): - the covariance between the ``N``th and the ``M``th variables. + ``mN``: The mean of variable ``N`` (where ``N`` is between 0 and + one less than ``rank``). ``cN_M`` (where 0 < ``N`` <= ``M`` < ``rank``): + the covariance between the ``N``\th and the ``M``\th variables. :return: A numpy array of results. """ rank = int(cov["rank"]) @@ -473,10 +473,10 @@ def multivariate_normal(self, cov: dict[str, Any]) -> list[float]: Produce a list of values pulled from a multivariate distribution. :param cov: A dict with various keys: ``rank`` is the number of - output values, ``m0``, ``m1``, ... are the means of the - distributions (``rank`` of them). ``c0_0``, ``c0_1``, ``c1_1``, ... - are the covariates, ``cN_M`` is the covariate of the ``N``th and - ``M``th varaibles, with 0 <= ``N`` <= ``M`` < ``rank``. + output values, ``m0``, ``m1``, ... are the means of the + distributions (``rank`` of them). ``c0_0``, ``c0_1``, ``c1_1``, ... + are the covariates, ``cN_M`` is the covariate of the ``N``\th and + ``M``\th varaibles, with 0 <= ``N`` <= ``M`` < ``rank``. :return: list of ``rank`` floating point values """ out: list[float] = self.multivariate_normal_np(cov).tolist() @@ -487,11 +487,11 @@ def multivariate_lognormal(self, cov: dict[str, Any]) -> list[float]: Produce a list of values pulled from a multivariate distribution. :param cov: A dict with various keys: ``rank`` is the number of - output values, ``m0``, ``m1``, ... are the means of the - distributions (``rank`` of them). ``c0_0``, ``c0_1``, ``c1_1``, ... - are the covariates, ``cN_M`` is the covariate of the ``N``th and - ``M``th varaibles, with 0 <= ``N`` <= ``M`` < ``rank``. These - are all the means and covariants of the logs of the data. + output values, ``m0``, ``m1``, ... are the means of the + distributions (``rank`` of them). ``c0_0``, ``c0_1``, ``c1_1``, ... + are the covariates, ``cN_M`` is the covariate of the ``N``\th and + ``M``\th varaibles, with 0 <= ``N`` <= ``M`` < ``rank``. These + are all the means and covariants of the logs of the data. :return: list of ``rank`` floating point values """ out: list[Any] = np.exp(self.multivariate_normal_np(cov)).tolist() @@ -528,13 +528,13 @@ def alternatives( Pick between other generators. :param alternative_configs: List of alternative generators. - Each alternative has the following keys: "count" -- a weight for - how often to use this alternative; "name" -- which generator - for this partition, for example "composite"; "params" -- the - parameters for this alternative. + Each alternative has the following keys: "count" -- a weight for + how often to use this alternative; "name" -- which generator + for this partition, for example "composite"; "params" -- the + parameters for this alternative. :param counts: A list of weights for each alternative. If None, the - "count" value of each alternative is used. Each count is a dict - with a "count" key. + "count" value of each alternative is used. Each count is a dict + with a "count" key. :return: list of values """ if counts is not None: @@ -560,12 +560,12 @@ def with_constants_at( Insert constants into the results of a different generator. :param constants_at: A dictionary of positions and objects to insert - into the return list at those positions. + into the return list at those positions. :param subgen: The name of the function to call to get the results - that will have the constants inserted into. + that will have the constants inserted into. :param params: Keyword arguments to the ``subgen`` function. :return: A list of results from calling ``subgen(**params)`` - with ``constants_at`` inserted in at the appropriate indices. + with ``constants_at`` inserted in at the appropriate indices. """ if subgen not in self.PERMITTED_SUBGENS: logger.error( diff --git a/datafaker/remove.py b/datafaker/remove.py index 3924cdaf..540a7290 100644 --- a/datafaker/remove.py +++ b/datafaker/remove.py @@ -1,5 +1,5 @@ """Functions and classes to undo the operations in create.py.""" -from typing import Any, Mapping +from typing import Any, Mapping, Optional from sqlalchemy import MetaData, delete @@ -56,7 +56,7 @@ def remove_db_vocab( reinstate_vocab_foreign_key_constraints(metadata, meta_dict, config, dst_conn) -def remove_db_tables(metadata: MetaData | None) -> None: +def remove_db_tables(metadata: Optional[MetaData]) -> None: """Drop the tables in the destination schema.""" settings = get_settings() assert settings.dst_dsn, "Missing destination database settings" diff --git a/datafaker/utils.py b/datafaker/utils.py index a778d37f..cd009b25 100644 --- a/datafaker/utils.py +++ b/datafaker/utils.py @@ -522,7 +522,7 @@ def make_primary_key_name(table_name: str) -> str: def remove_vocab_foreign_key_constraints( metadata: MetaData, config: Mapping[str, Any], - dst_engine: Connection | Engine, + dst_engine: Union[Connection, Engine], ) -> None: """ Remove the foreign key constraints from vocabulary tables. @@ -566,7 +566,7 @@ def reinstate_vocab_foreign_key_constraints( metadata: MetaData, meta_dict: Mapping[str, Any], config: Mapping[str, Any], - dst_engine: Connection | Engine, + dst_engine: Union[Connection, Engine], ) -> None: """ Put the removed foreign keys back into the destination database. From 1de573a23c2033686d98e8c4c89128c342f85f0f Mon Sep 17 00:00:00 2001 From: Tim Band Date: Tue, 20 Jan 2026 11:15:30 +0000 Subject: [PATCH 12/22] Fixed backslashes in docstrings --- datafaker/providers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafaker/providers.py b/datafaker/providers.py index f791345e..775a9f69 100644 --- a/datafaker/providers.py +++ b/datafaker/providers.py @@ -402,7 +402,7 @@ def constant(self, value: T) -> T: return value def multivariate_normal_np(self, cov: dict[str, Any]) -> np.typing.NDArray: - """ + r""" Return an array of values chosen from the given covariates. :param cov: Keys are ``rank``: The number of values to output; @@ -469,7 +469,7 @@ def _find_constants(self, result: dict[str, Any]) -> dict[int, Any]: } def multivariate_normal(self, cov: dict[str, Any]) -> list[float]: - """ + r""" Produce a list of values pulled from a multivariate distribution. :param cov: A dict with various keys: ``rank`` is the number of @@ -483,7 +483,7 @@ def multivariate_normal(self, cov: dict[str, Any]) -> list[float]: return out def multivariate_lognormal(self, cov: dict[str, Any]) -> list[float]: - """ + r""" Produce a list of values pulled from a multivariate distribution. :param cov: A dict with various keys: ``rank`` is the number of From 48b61607c11d99ae75d85faf2f157b100b1aca09 Mon Sep 17 00:00:00 2001 From: Tim Band Date: Tue, 20 Jan 2026 12:00:18 +0000 Subject: [PATCH 13/22] Docs publish warning removed --- .github/workflows/docs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index d0099b87..c9bfbbcf 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -10,6 +10,8 @@ jobs: - name: Checkout Code uses: actions/checkout@v4 - uses: actions/setup-python@v5 + with: + python-version: "${{ env.PYTHON_VERSION }}" - name: Install poetry shell: bash run: | From e8edb65cb112aef6626dd277b474f1bff7bfaa96 Mon Sep 17 00:00:00 2001 From: Tim Band Date: Tue, 20 Jan 2026 12:02:20 +0000 Subject: [PATCH 14/22] looks like Python shouldn't be invoked as a module --- .github/workflows/docs.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index c9bfbbcf..72dde506 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -19,16 +19,16 @@ jobs: - name: Configure poetry shell: bash run: | - python -m poetry config virtualenvs.in-project true + poetry config virtualenvs.in-project true - name: Install dependencies shell: bash run: | - python -m poetry install --all-extras + poetry install --all-extras - name: Build documentation shell: bash working-directory: ./docs run: | - python -m poetry run make html + poetry run make html - name: Publish docs on github Pages uses: peaceiris/actions-gh-pages@v3 with: From b9153376c4c62bdb7ae6d8c6b9b8bb03b989c4a4 Mon Sep 17 00:00:00 2001 From: Tim Band Date: Tue, 20 Jan 2026 12:15:45 +0000 Subject: [PATCH 15/22] Deploy the built html dir --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 72dde506..ada5b873 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -34,5 +34,5 @@ jobs: with: publish_branch: gh-pages github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: docs/build + publish_dir: docs/build/html force_orphan: true From a04e4538988f9bc48b0c6e015981c399f5a141db Mon Sep 17 00:00:00 2001 From: Tim Band Date: Tue, 20 Jan 2026 12:25:09 +0000 Subject: [PATCH 16/22] Not on push, only on workflow_dispatch Does this work? --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index ada5b873..59321127 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,6 +1,6 @@ --- name: Publish Documentation -on: [workflow_dispatch, push] +on: workflow_dispatch env: PYTHON_VERSION: "3.12" jobs: From e790f344e4db822b3fccd4c2e6a6749701e21bfa Mon Sep 17 00:00:00 2001 From: Tim Band Date: Wed, 28 Jan 2026 14:59:30 +0000 Subject: [PATCH 17/22] Overview changes based on Albert's feedback --- docs/source/overview.rst | 75 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 2 deletions(-) diff --git a/docs/source/overview.rst b/docs/source/overview.rst index cf1283e7..f05df334 100644 --- a/docs/source/overview.rst +++ b/docs/source/overview.rst @@ -185,7 +185,7 @@ In such a case, the file is alterable by hand as long as the YAML structure is m Datafaker configuration phase ----------------------------- -These commands are not really part of the Reduce phase, but allow the user to configure +The following commands are not really part of the Reduce phase, but allow the user to configure what the Reduce phase will entail (and hence also what the Repopulate phase will entail). - ``datafaker configure-tables`` makes a file called ``config.yaml`` that describes what needs to happen to each table. @@ -240,11 +240,82 @@ processes applied to them as it is these files that can be extracted from the private network or Trusted Research Environment to allow the construction of the synthetic data in a less sensitive computing environment, if required. -The sensitive database is no longer required in Datafaker's operation. +.. list-table:: Information Governance Classification of Each Datafaker Output + :widths: 10 20 20 20 10 10 20 + + * - Artefact + - Derived from real data? + - Contains patient-level data? + - Granularity + - Privacy risk + - IG approval required? + - Can leave TRE? + * - ``orm.yaml`` + - Yes + - No + - Structural only + - Low + - Yes + - Yes + * - ``config.yaml`` + - User-authored + - No + - None + - Low + - No + - Yes + * - ``src-stats.yaml`` + - Yes + - Occasionally + - Aggregate + - Medium + - Yes + - Conditional + * - Vocabulary tables + - Yes + - No + - Full table + - None if correctly identified + - Yes + - Conditional + * - Synthetic output (described below) + - No + - No + - Patient-level synthetic data + - Low + - No + - Yes + +It is worh further elaborating on two of these boxes: +Firstly, ``src-stats.yaml`` "occasionally" contains patient-level data; +this is true if the table being summarized contains patient-level data +*and* the summarizing function is reporting on every value in one or more columns +*and* rare values are not being suppressed (leading to a value that applies +to just one or two individuals being released). +Search the ``src-stats.yaml`` file for comments such as: + + All the values that appear in column *column-name* of table *table-name* + +or + + All the values that appear in column *column-name* of table *table-name* more than 7 times + +Secondly, Vocabulary Tables' privacy risk is "None if correctly identified". +A Vocabulary Table is supposed to be a table simply providing categories for other tables to reference. +They are not changed during the operation of the database and so releasing them does not represent a privacy risk. +However, there is some flexibility here; a list of care provider institutions is not technically a vocabulary table +but it is probably safe to treat it as one. +The important point is that Datafaker allows the user to specify any table as a vocabulary table; +if the user incorrectly specifies sensitive data as Vocabulary, it must not be released! Datafaker Repopulate phase -------------------------- +Once we have released the summary data as described above we can operate outside of the TRE +as the sensitive data is no longer accessed by Datafaker. + +The remaining commands are: + - ``datafaker create-tables`` creates the structure of the destination database to match (as much as is requested) the structure of the source database - ``datafaker create-generators`` creates Python code files that will actually generate the data (this phase might be removed in a future version of Datafaker) - ``datafaker create-data`` writes fake data into the destination database. From 83ffc8167850b998d79cd5f9e621bcfcb68a33ac Mon Sep 17 00:00:00 2001 From: Tim Band Date: Wed, 28 Jan 2026 17:15:52 +0000 Subject: [PATCH 18/22] re-enable docs workflow --- .github/workflows/docs.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 59321127..f5d4ff55 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,6 +1,11 @@ --- name: Publish Documentation -on: workflow_dispatch +on: + push: + branches: + # need this to be able to run the workflow until it has been merged into main + - overview_documentation + workflow_dispatch: env: PYTHON_VERSION: "3.12" jobs: From 8ee385bf1b436bd9b1cdbe80617eb88312bb7f1f Mon Sep 17 00:00:00 2001 From: Tim Band Date: Wed, 28 Jan 2026 17:20:48 +0000 Subject: [PATCH 19/22] Albert's table should be 100% wide --- docs/source/overview.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/overview.rst b/docs/source/overview.rst index f05df334..67df9935 100644 --- a/docs/source/overview.rst +++ b/docs/source/overview.rst @@ -241,6 +241,7 @@ private network or Trusted Research Environment to allow the construction of the synthetic data in a less sensitive computing environment, if required. .. list-table:: Information Governance Classification of Each Datafaker Output + :width: 100% :widths: 10 20 20 20 10 10 20 * - Artefact From eff4cc1b3b9fae710f96153d9aca050d878713ee Mon Sep 17 00:00:00 2001 From: Tim Band Date: Wed, 28 Jan 2026 18:35:20 +0000 Subject: [PATCH 20/22] Added some soft hyphens to the overview --- docs/source/overview.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/overview.rst b/docs/source/overview.rst index 67df9935..1e320955 100644 --- a/docs/source/overview.rst +++ b/docs/source/overview.rst @@ -267,18 +267,18 @@ the synthetic data in a less sensitive computing environment, if required. - Yes * - ``src-stats.yaml`` - Yes - - Occasionally + - Occasion­ally - Aggregate - Medium - Yes - - Conditional + - Condi­tion­al * - Vocabulary tables - Yes - No - Full table - None if correctly identified - Yes - - Conditional + - Condi­tion­al * - Synthetic output (described below) - No - No From 1b9f2eb8e656b3ea5f218b07c3f36c09363c68df Mon Sep 17 00:00:00 2001 From: Tim Band Date: Tue, 3 Feb 2026 10:20:28 +0000 Subject: [PATCH 21/22] Response to Stef's comments --- docs/source/overview.rst | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/source/overview.rst b/docs/source/overview.rst index 1e320955..6f727023 100644 --- a/docs/source/overview.rst +++ b/docs/source/overview.rst @@ -63,11 +63,11 @@ the sensitive data can be read. Information Governance oversight happens at the of this process to decide if this obfucated summary can be released out of the private network. -The Repopulate process happens outside of the private network and outside of the remit +The Repopulate process can happen outside of the private network and outside of the remit of Information Governance. This is important! The act of turning reduced obfucated data into a larger quantity of randomized data is *not* a privacy-sheilding process! By this time all the actual privacy-sheilding has already happened and -the Information Governance has already been applied. +the Information Governance restrictions have already been applied. Reduction techniques -------------------- @@ -131,7 +131,8 @@ because the same amount of data is output as was input. Datafaker's Operation ===================== -Datafaker is based on the Alan Turing Institute's SqlSynthGen tool. +Datafaker is based on the Alan Turing Institute's +`SqlSynthGen tool `_. SqlSynthGen implements all three operations of Reduce, Obfuscate and Repopulate. Datafaker builds on SqlSynthGen by automating the specification of the Reduce @@ -178,9 +179,9 @@ Datafaker make-tables phase ``datafaker make-tables`` makes a file called ``orm.yaml`` that describes the structure of the source database. This is part of the Reduce phase, but this file is used in every other Datafaker phase. By describing the structure of the database, no private data is leaked. -However it is not impossible that, in describing the structure of some commercial -database, some commercially-sensitive information could be leaked. -In such a case, the file is alterable by hand as long as the YAML structure is maintained. +However, if part of a commercial database's schema is considered commercially sensitive, +this information would be leaked in the ``orm.yaml`` file. +It may be possible to alter this file by hand to remove the sensitive information as long as the YAML structure is maintained. Datafaker configuration phase ----------------------------- From 6a3bbe2a7b6371eb926788648457d6abcf126382 Mon Sep 17 00:00:00 2001 From: Tim Band Date: Thu, 5 Feb 2026 12:07:02 +0000 Subject: [PATCH 22/22] Upped version number (really for DuckDB) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4136d2d3..46ef4031 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "datafaker" -version = "0.2.2" +version = "0.2.3" description = "Generates fake SQL data" authors = ["Tim Band <3266052+tim-band@users.noreply.github.com>"] license = "MIT"