From 469a0d81095a9738d0cec2f9e40ea014ecf75d4d Mon Sep 17 00:00:00 2001 From: Ganesh Patil <7030871503ganeshpatil@gmail.com> Date: Sat, 17 Jan 2026 12:10:04 +0530 Subject: [PATCH 01/12] docs: add Dockerfile for building documentation - Installs Rust 1.92.0, cargo-depgraph, and all doc dependencies - Provides isolated, reproducible build environment - Fixes line endings for cross-platform compatibility (Windows/Unix) - No additional host setup required beyond Docker --- docker-compose.yml | 27 +++++++++++++++++++++ docs/Dockerfile | 58 ++++++++++++++++++++++++++++++++++++++++++++++ docs/README.md | 23 ++++++++++++++++++ 3 files changed, 108 insertions(+) create mode 100644 docker-compose.yml create mode 100644 docs/Dockerfile diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000000..cca79605bcfeb --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +version: '3.8' + +services: + docs: + build: + context: . + dockerfile: docs/Dockerfile + volumes: + - .:/work + working_dir: /work/docs diff --git a/docs/Dockerfile b/docs/Dockerfile new file mode 100644 index 0000000000000..f6dd8038fb50f --- /dev/null +++ b/docs/Dockerfile @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM python:3.11-slim + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + build-essential \ + graphviz \ + && rm -rf /var/lib/apt/lists/* + +# Install Rust 1.92.0 +ENV RUST_VERSION=1.92.0 +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- \ + --default-toolchain ${RUST_VERSION} \ + --component rustfmt \ + --profile minimal \ + -y + +ENV PATH="/root/.cargo/bin:${PATH}" + +# Install cargo-depgraph +RUN cargo install cargo-depgraph --version ^1.6 --locked + +# Set working directory +WORKDIR /work + +# Copy Python requirements and install them +COPY docs/requirements.txt /work/docs/requirements.txt +RUN pip install --no-cache-dir -r /work/docs/requirements.txt + +# Copy the entire repository +COPY . /work + +# Fix line endings for shell scripts (convert CRLF to LF) +RUN find /work -name "*.sh" -type f -exec sh -c 'tr -d "\r" < "$1" > "$1.tmp" && mv "$1.tmp" "$1"' _ {} \; || true + +# Set working directory to docs for build +WORKDIR /work/docs + +# Default command: build documentation +# Fix line endings when running (for volume-mounted files from Windows) and then build +CMD ["bash", "-c", "find /work -name '*.sh' -type f -exec sh -c 'tr -d \"\\r\" < \"$1\" > \"$1.tmp\" && mv \"$1.tmp\" \"$1\"' _ {} \\; ; cd /work/docs && bash build.sh"] diff --git a/docs/README.md b/docs/README.md index c3d87ee8e84a3..ea9f67f2e7bb9 100644 --- a/docs/README.md +++ b/docs/README.md @@ -25,11 +25,29 @@ https://datafusion.apache.org/ as part of the release process. ## Dependencies +### Option 1: Docker (Recommended) + +If you have Docker installed, you can build the docs without installing any dependencies on your system: + +```sh +# Using docker-compose (simplest) +docker-compose run --rm docs bash build.sh + +# Or using docker directly +docker build -t datafusion-docs -f docs/Dockerfile . +docker run --rm -v $(pwd):/work datafusion-docs bash build.sh +``` + +The built documentation will be available in `docs/build/html/`. + +### Option 2: Local Installation + It's recommended to install build dependencies and build the documentation inside a Python virtualenv. ```sh python3 -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate pip install -r requirements.txt ``` @@ -40,6 +58,11 @@ needing to create a virtual environment: uv run --with-requirements requirements.txt bash build.sh ``` +The docs build regenerates the workspace dependency graph via +`docs/scripts/generate_dependency_graph.sh`, so ensure `cargo`, `cargo-depgraph` +(`cargo install cargo-depgraph --version ^1.6 --locked`), and Graphviz `dot` +(`brew install graphviz` or `sudo apt-get install -y graphviz`) are available. + ## Build & Preview Run the provided script to build the HTML pages. From b7adf87fd21bc9a058eab11e3631199b861a950b Mon Sep 17 00:00:00 2001 From: Ganesh Patil <7030871503ganeshpatil@gmail.com> Date: Sat, 17 Jan 2026 12:26:07 +0530 Subject: [PATCH 02/12] Update docs/Dockerfile Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docs/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Dockerfile b/docs/Dockerfile index f6dd8038fb50f..7788407ffe618 100644 --- a/docs/Dockerfile +++ b/docs/Dockerfile @@ -48,7 +48,7 @@ RUN pip install --no-cache-dir -r /work/docs/requirements.txt COPY . /work # Fix line endings for shell scripts (convert CRLF to LF) -RUN find /work -name "*.sh" -type f -exec sh -c 'tr -d "\r" < "$1" > "$1.tmp" && mv "$1.tmp" "$1"' _ {} \; || true +RUN find /work -name "*.sh" -type f -exec sh -c 'tr -d "\r" < "$1" > "$1.tmp" && mv "$1.tmp" "$1"' _ {} \; # Set working directory to docs for build WORKDIR /work/docs From 73c0d58dfc5eed41add736a755398ac52a021d2d Mon Sep 17 00:00:00 2001 From: Ganesh Patil <7030871503ganeshpatil@gmail.com> Date: Sat, 17 Jan 2026 12:26:33 +0530 Subject: [PATCH 03/12] Update docs/Dockerfile Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docs/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Dockerfile b/docs/Dockerfile index 7788407ffe618..22afda4d44415 100644 --- a/docs/Dockerfile +++ b/docs/Dockerfile @@ -54,5 +54,5 @@ RUN find /work -name "*.sh" -type f -exec sh -c 'tr -d "\r" < "$1" > "$1.tmp" && WORKDIR /work/docs # Default command: build documentation -# Fix line endings when running (for volume-mounted files from Windows) and then build -CMD ["bash", "-c", "find /work -name '*.sh' -type f -exec sh -c 'tr -d \"\\r\" < \"$1\" > \"$1.tmp\" && mv \"$1.tmp\" \"$1\"' _ {} \\; ; cd /work/docs && bash build.sh"] +# Fix line endings when running (for volume-mounted files from Windows) only if needed, and then build +CMD ["bash", "-c", "if find /work -name '*.sh' -type f -print0 | xargs -0 grep -Il $'\\r' >/dev/null 2>&1; then echo \"Converting CRLF to LF in shell scripts...\"; find /work -name '*.sh' -type f -exec sh -c 'tr -d \"\\r\" < \"$1\" > \"$1.tmp\" && mv \"$1.tmp\" \"$1\"' _ {} \\; ; fi; cd /work/docs && bash build.sh"] From f3865163596051c7692ab8fb54d178fab3888f11 Mon Sep 17 00:00:00 2001 From: Ganesh Patil <7030871503ganeshpatil@gmail.com> Date: Sat, 17 Jan 2026 12:26:50 +0530 Subject: [PATCH 04/12] Update docker-compose.yml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docker-compose.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index cca79605bcfeb..eef0aa62baceb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -version: '3.8' - services: docs: build: From c4a017e34208554071830a1b9d1bff5e1d563a72 Mon Sep 17 00:00:00 2001 From: Ganesh Patil <7030871503ganeshpatil@gmail.com> Date: Sat, 17 Jan 2026 12:27:06 +0530 Subject: [PATCH 05/12] Update docs/README.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docs/README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/README.md b/docs/README.md index ea9f67f2e7bb9..912c28417d675 100644 --- a/docs/README.md +++ b/docs/README.md @@ -30,12 +30,14 @@ https://datafusion.apache.org/ as part of the release process. If you have Docker installed, you can build the docs without installing any dependencies on your system: ```sh -# Using docker-compose (simplest) +# Using docker-compose (simplest) (POSIX shells: bash, zsh, etc.) docker-compose run --rm docs bash build.sh -# Or using docker directly +# Or using docker directly (POSIX shells: bash, zsh, etc.) docker build -t datafusion-docs -f docs/Dockerfile . docker run --rm -v $(pwd):/work datafusion-docs bash build.sh +# On Windows PowerShell, use: +# docker run --rm -v ${PWD}:/work datafusion-docs bash build.sh ``` The built documentation will be available in `docs/build/html/`. From c9fb69d59ce4054ac1526f3f31ac168f483e9704 Mon Sep 17 00:00:00 2001 From: Ganesh Patil <7030871503ganeshpatil@gmail.com> Date: Sat, 17 Jan 2026 12:27:24 +0530 Subject: [PATCH 06/12] Update docs/Dockerfile Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docs/Dockerfile | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/docs/Dockerfile b/docs/Dockerfile index 22afda4d44415..968350b2213b4 100644 --- a/docs/Dockerfile +++ b/docs/Dockerfile @@ -26,11 +26,24 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # Install Rust 1.92.0 ENV RUST_VERSION=1.92.0 -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- \ - --default-toolchain ${RUST_VERSION} \ - --component rustfmt \ - --profile minimal \ - -y +ENV RUSTUP_VERSION=1.27.1 +RUN set -eux; \ + arch="$(uname -m)"; \ + case "${arch}" in \ + x86_64) rustup_arch="x86_64-unknown-linux-gnu" ;; \ + aarch64) rustup_arch="aarch64-unknown-linux-gnu" ;; \ + *) echo "Unsupported architecture: ${arch}" >&2; exit 1 ;; \ + esac; \ + curl --proto '=https' --tlsv1.2 -sSf \ + "https://static.rust-lang.org/rustup/archive/${RUSTUP_VERSION}/${rustup_arch}/rustup-init" \ + -o rustup-init; \ + chmod +x rustup-init; \ + ./rustup-init \ + --default-toolchain "${RUST_VERSION}" \ + --component rustfmt \ + --profile minimal \ + -y; \ + rm rustup-init ENV PATH="/root/.cargo/bin:${PATH}" From 6c04dce332aa0c1f68df1a2c2668d248686b6387 Mon Sep 17 00:00:00 2001 From: Ganesh Patil <7030871503ganeshpatil@gmail.com> Date: Sat, 17 Jan 2026 12:41:40 +0530 Subject: [PATCH 07/12] docs: extract build logic to entrypoint.sh for better maintainability - Creates separate entrypoint script instead of complex bash in CMD - Improves readability and makes logic easier to test and modify - Cleaner Dockerfile with ENTRYPOINT pattern - Script intelligently checks for CRLF before converting - Handles both build-time and runtime line ending fixes --- docs/Dockerfile | 11 +++++++---- docs/entrypoint.sh | 30 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 docs/entrypoint.sh diff --git a/docs/Dockerfile b/docs/Dockerfile index 968350b2213b4..e7eabe31a4806 100644 --- a/docs/Dockerfile +++ b/docs/Dockerfile @@ -60,12 +60,15 @@ RUN pip install --no-cache-dir -r /work/docs/requirements.txt # Copy the entire repository COPY . /work -# Fix line endings for shell scripts (convert CRLF to LF) +# Copy and set up the entrypoint script +COPY docs/entrypoint.sh /usr/local/bin/entrypoint.sh +RUN chmod +x /usr/local/bin/entrypoint.sh + +# Fix line endings for shell scripts at build time (convert CRLF to LF) RUN find /work -name "*.sh" -type f -exec sh -c 'tr -d "\r" < "$1" > "$1.tmp" && mv "$1.tmp" "$1"' _ {} \; # Set working directory to docs for build WORKDIR /work/docs -# Default command: build documentation -# Fix line endings when running (for volume-mounted files from Windows) only if needed, and then build -CMD ["bash", "-c", "if find /work -name '*.sh' -type f -print0 | xargs -0 grep -Il $'\\r' >/dev/null 2>&1; then echo \"Converting CRLF to LF in shell scripts...\"; find /work -name '*.sh' -type f -exec sh -c 'tr -d \"\\r\" < \"$1\" > \"$1.tmp\" && mv \"$1.tmp\" \"$1\"' _ {} \\; ; fi; cd /work/docs && bash build.sh"] +# Use entrypoint script for better readability and maintainability +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] diff --git a/docs/entrypoint.sh b/docs/entrypoint.sh new file mode 100644 index 0000000000000..6067ea1aa1cb2 --- /dev/null +++ b/docs/entrypoint.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -euo pipefail + +# Fix line endings when running (for volume-mounted files from Windows) only if needed +if find /work -name '*.sh' -type f -print0 | xargs -0 grep -Il $'\r' >/dev/null 2>&1; then + echo "Converting CRLF to LF in shell scripts..." + find /work -name '*.sh' -type f -exec sh -c 'tr -d "\r" < "$1" > "$1.tmp" && mv "$1.tmp" "$1"' _ {} \; +fi + +# Build documentation +cd /work/docs +bash build.sh From 13bfb7f549250bbe24a835c305ab32e11ebaf5ae Mon Sep 17 00:00:00 2001 From: Ganesh Patil <7030871503ganeshpatil@gmail.com> Date: Sun, 18 Jan 2026 09:28:33 +0530 Subject: [PATCH 08/12] refactor: simplify Dockerfile based on review feedback - Use rust:1.92.0-bookworm base image instead of python with manual Rust install - Remove docker-compose.yml (unnecessary for this use case) - Remove entrypoint.sh and CRLF conversion logic - Use volume mount at runtime instead of copying entire repository Usage: docker build -t datafusion-docs ./docs docker run --rm -v C:\Users\HP\Music\Apache_org\data\datafusion:/datafusion datafusion-docs --- docker-compose.yml | 25 -------------------- docs/Dockerfile | 57 ++++++++-------------------------------------- docs/entrypoint.sh | 30 ------------------------ 3 files changed, 9 insertions(+), 103 deletions(-) delete mode 100644 docker-compose.yml delete mode 100644 docs/entrypoint.sh diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index eef0aa62baceb..0000000000000 --- a/docker-compose.yml +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -services: - docs: - build: - context: . - dockerfile: docs/Dockerfile - volumes: - - .:/work - working_dir: /work/docs diff --git a/docs/Dockerfile b/docs/Dockerfile index e7eabe31a4806..afec9755efef0 100644 --- a/docs/Dockerfile +++ b/docs/Dockerfile @@ -15,60 +15,21 @@ # specific language governing permissions and limitations # under the License. -FROM python:3.11-slim +FROM rust:1.92.0-bookworm -# Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ - curl \ - build-essential \ + python3 \ + python3-pip \ + python3-venv \ graphviz \ + make \ && rm -rf /var/lib/apt/lists/* -# Install Rust 1.92.0 -ENV RUST_VERSION=1.92.0 -ENV RUSTUP_VERSION=1.27.1 -RUN set -eux; \ - arch="$(uname -m)"; \ - case "${arch}" in \ - x86_64) rustup_arch="x86_64-unknown-linux-gnu" ;; \ - aarch64) rustup_arch="aarch64-unknown-linux-gnu" ;; \ - *) echo "Unsupported architecture: ${arch}" >&2; exit 1 ;; \ - esac; \ - curl --proto '=https' --tlsv1.2 -sSf \ - "https://static.rust-lang.org/rustup/archive/${RUSTUP_VERSION}/${rustup_arch}/rustup-init" \ - -o rustup-init; \ - chmod +x rustup-init; \ - ./rustup-init \ - --default-toolchain "${RUST_VERSION}" \ - --component rustfmt \ - --profile minimal \ - -y; \ - rm rustup-init - -ENV PATH="/root/.cargo/bin:${PATH}" - -# Install cargo-depgraph RUN cargo install cargo-depgraph --version ^1.6 --locked -# Set working directory -WORKDIR /work - -# Copy Python requirements and install them -COPY docs/requirements.txt /work/docs/requirements.txt -RUN pip install --no-cache-dir -r /work/docs/requirements.txt - -# Copy the entire repository -COPY . /work - -# Copy and set up the entrypoint script -COPY docs/entrypoint.sh /usr/local/bin/entrypoint.sh -RUN chmod +x /usr/local/bin/entrypoint.sh - -# Fix line endings for shell scripts at build time (convert CRLF to LF) -RUN find /work -name "*.sh" -type f -exec sh -c 'tr -d "\r" < "$1" > "$1.tmp" && mv "$1.tmp" "$1"' _ {} \; +WORKDIR /datafusion/docs -# Set working directory to docs for build -WORKDIR /work/docs +COPY requirements.txt . +RUN python3 -m pip install --break-system-packages -r requirements.txt -# Use entrypoint script for better readability and maintainability -ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] +CMD ["make", "html"] diff --git a/docs/entrypoint.sh b/docs/entrypoint.sh deleted file mode 100644 index 6067ea1aa1cb2..0000000000000 --- a/docs/entrypoint.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -euo pipefail - -# Fix line endings when running (for volume-mounted files from Windows) only if needed -if find /work -name '*.sh' -type f -print0 | xargs -0 grep -Il $'\r' >/dev/null 2>&1; then - echo "Converting CRLF to LF in shell scripts..." - find /work -name '*.sh' -type f -exec sh -c 'tr -d "\r" < "$1" > "$1.tmp" && mv "$1.tmp" "$1"' _ {} \; -fi - -# Build documentation -cd /work/docs -bash build.sh From 4826c824d43a9d6ed51456045725923e58802e73 Mon Sep 17 00:00:00 2001 From: Ganesh Patil <7030871503ganeshpatil@gmail.com> Date: Mon, 19 Jan 2026 15:41:21 +0530 Subject: [PATCH 09/12] fix: update Docker commands in README to match simplified Dockerfile --- docs/README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/README.md b/docs/README.md index 912c28417d675..c7ff31507ba4f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -30,14 +30,14 @@ https://datafusion.apache.org/ as part of the release process. If you have Docker installed, you can build the docs without installing any dependencies on your system: ```sh -# Using docker-compose (simplest) (POSIX shells: bash, zsh, etc.) -docker-compose run --rm docs bash build.sh +# Build the Docker image (from repository root) +docker build -t datafusion-docs ./docs + +# Run the docs build (POSIX shells: bash, zsh, etc.) +docker run --rm -v $(pwd):/datafusion datafusion-docs -# Or using docker directly (POSIX shells: bash, zsh, etc.) -docker build -t datafusion-docs -f docs/Dockerfile . -docker run --rm -v $(pwd):/work datafusion-docs bash build.sh # On Windows PowerShell, use: -# docker run --rm -v ${PWD}:/work datafusion-docs bash build.sh +# docker run --rm -v ${PWD}:/datafusion datafusion-docs ``` The built documentation will be available in `docs/build/html/`. From e7db3d5996523bbde5a89cc1167226b92b402f0c Mon Sep 17 00:00:00 2001 From: Ganesh Patil <7030871503ganeshpatil@gmail.com> Date: Tue, 20 Jan 2026 10:27:28 +0530 Subject: [PATCH 10/12] refactor: address review feedback - Use rust:bookworm base image (latest) instead of pinned version - Remove python3-venv (not used) - Use build.sh instead of make html (generates dependency graph) - Add dos2unix for Windows line ending compatibility --- docs/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/Dockerfile b/docs/Dockerfile index afec9755efef0..c4dd11e497a13 100644 --- a/docs/Dockerfile +++ b/docs/Dockerfile @@ -15,14 +15,14 @@ # specific language governing permissions and limitations # under the License. -FROM rust:1.92.0-bookworm +FROM rust:bookworm RUN apt-get update && apt-get install -y --no-install-recommends \ python3 \ python3-pip \ - python3-venv \ graphviz \ make \ + dos2unix \ && rm -rf /var/lib/apt/lists/* RUN cargo install cargo-depgraph --version ^1.6 --locked @@ -32,4 +32,4 @@ WORKDIR /datafusion/docs COPY requirements.txt . RUN python3 -m pip install --break-system-packages -r requirements.txt -CMD ["make", "html"] +CMD ["sh", "-c", "dos2unix build.sh scripts/*.sh 2>/dev/null || true; bash build.sh"] From e3aa78b67e61f59a1039974a53b0041092e8afb8 Mon Sep 17 00:00:00 2001 From: Ganesh Patil <7030871503ganeshpatil@gmail.com> Date: Wed, 21 Jan 2026 10:36:41 +0530 Subject: [PATCH 11/12] fix: optimize Docker build caching to prevent re-downloading Rust dependencies - Add docker-compose.yml with persistent volume mounts for Rust cache - Update Dockerfile with cache mount directives for cargo registry and git - Update README with docker-compose as recommended approach for iterative builds - Fixes performance issue where docker run re-downloads components each time This addresses the feedback from the reviewer where subsequent builds were slow due to missing Rust dependency cache. Using docker-compose now preserves the cargo cache between runs. --- docker-compose.yml | 17 +++++++++++++++++ docs/Dockerfile | 6 +++++- docs/README.md | 22 ++++++++++++++++++++-- 3 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 docker-compose.yml diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000000000..cba35756d3135 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,17 @@ +version: '3.8' + +services: + datafusion-docs: + build: + context: . + dockerfile: ./docs/Dockerfile + container_name: datafusion-docs + volumes: + - .:/datafusion + - rust-cache:/usr/local/cargo + - rust-target:/datafusion/docs/target + working_dir: /datafusion + +volumes: + rust-cache: + rust-target: diff --git a/docs/Dockerfile b/docs/Dockerfile index c4dd11e497a13..f514117697023 100644 --- a/docs/Dockerfile +++ b/docs/Dockerfile @@ -25,7 +25,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ dos2unix \ && rm -rf /var/lib/apt/lists/* -RUN cargo install cargo-depgraph --version ^1.6 --locked +# Install Rust tools with persistent caching +# These are cached in docker volumes when using docker-compose +RUN --mount=type=cache,target=/usr/local/cargo/registry \ + --mount=type=cache,target=/usr/local/cargo/git \ + cargo install cargo-depgraph --version ^1.6 --locked WORKDIR /datafusion/docs diff --git a/docs/README.md b/docs/README.md index c7ff31507ba4f..9bf8d25506633 100644 --- a/docs/README.md +++ b/docs/README.md @@ -25,9 +25,25 @@ https://datafusion.apache.org/ as part of the release process. ## Dependencies -### Option 1: Docker (Recommended) +### Option 1: Docker with Docker Compose (Recommended for iterative builds) -If you have Docker installed, you can build the docs without installing any dependencies on your system: +If you have Docker and Docker Compose installed, you can build the docs without installing any dependencies on your system. This method uses persistent volumes to cache Rust dependencies, making subsequent builds much faster: + +```sh +# Run the docs build with Docker Compose (from repository root) +docker-compose run --rm datafusion-docs +``` + +The built documentation will be available in `docs/build/html/`. + +**Benefits of this approach:** +- Rust toolchain and dependencies are cached between builds (no re-downloading on each run) +- Cross-platform compatibility (works on Windows, macOS, Linux) +- Simple one-command build process + +### Option 1b: Docker without Compose (Quick single build) + +If you prefer a quick build without Docker Compose: ```sh # Build the Docker image (from repository root) @@ -40,6 +56,8 @@ docker run --rm -v $(pwd):/datafusion datafusion-docs # docker run --rm -v ${PWD}:/datafusion datafusion-docs ``` +**Note:** This method re-downloads Rust dependencies on each run. For iterative development, use Option 1 (Docker Compose) instead. + The built documentation will be available in `docs/build/html/`. ### Option 2: Local Installation From 51d2eebcd2eddd40e5c18f56d9a1362b722342b0 Mon Sep 17 00:00:00 2001 From: Ganesh Patil <7030871503ganeshpatil@gmail.com> Date: Wed, 21 Jan 2026 10:46:31 +0530 Subject: [PATCH 12/12] fix: correct Docker build context and paths for docker-compose compatibility - Change Dockerfile to build from repository root instead of docs/ - Fix COPY path for requirements.txt from docs directory - Update working directory and CMD to properly handle paths - Ensure docker-compose works correctly with cache volumes - Docker build test passed successfully --- docker-compose.yml | 2 +- docs/Dockerfile | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index cba35756d3135..ad1e331bd50ea 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,7 +9,7 @@ services: volumes: - .:/datafusion - rust-cache:/usr/local/cargo - - rust-target:/datafusion/docs/target + - rust-target:/datafusion/target working_dir: /datafusion volumes: diff --git a/docs/Dockerfile b/docs/Dockerfile index f514117697023..3d218fced1ea7 100644 --- a/docs/Dockerfile +++ b/docs/Dockerfile @@ -31,9 +31,9 @@ RUN --mount=type=cache,target=/usr/local/cargo/registry \ --mount=type=cache,target=/usr/local/cargo/git \ cargo install cargo-depgraph --version ^1.6 --locked -WORKDIR /datafusion/docs +WORKDIR /datafusion -COPY requirements.txt . -RUN python3 -m pip install --break-system-packages -r requirements.txt +COPY docs/requirements.txt /datafusion/docs/ +RUN python3 -m pip install --break-system-packages -r /datafusion/docs/requirements.txt -CMD ["sh", "-c", "dos2unix build.sh scripts/*.sh 2>/dev/null || true; bash build.sh"] +CMD ["sh", "-c", "cd /datafusion/docs && dos2unix build.sh scripts/*.sh 2>/dev/null || true; bash build.sh"]