From 9282f5aa5ca67149030bf5683c170ae6db8c6fe9 Mon Sep 17 00:00:00 2001
From: Danner Stodolsky <dstodolsky@bostondynamics.com>
Date: Sat, 14 Feb 2026 17:01:36 -0500
Subject: [PATCH 1/2] perf(py_wheel): defer depset expansion to execution time

Replace analysis-time depset.to_list() loop with Args.add_all(map_each=...)
and ctx.actions.write(Args), deferring depset expansion to execution time.

The old code built the input file list via string concatenation in a loop,
which is O(n^2) in the total string length. For large py_wheel targets this
dominated analysis time (~18s for ~18k transitive files in a production
monorepo).

Tested under Bazel 8.5.1 and 9.0.0.

Add tests/py_wheel_performance/ with:
- A correctness test verifying wheel contents (100 deps)
- A scaling regression test (tagged manual) asserting analysis time
  grows linearly, not quadratically, with dep count (5k vs 10k deps)

Scaling test results (5k vs 10k deps, best of 3):

  Before: 2825 ms / 8616 ms = 3.05x (quadratic)
  After:   502 ms /  776 ms = 1.55x (linear)
---
 CHANGELOG.md                                  |   2 +
 python/private/py_wheel.bzl                   |  11 +-
 tests/py_wheel_performance/BUILD.bazel        |  76 +++++++++++
 tests/py_wheel_performance/gen_py_libs.bzl    |  66 ++++++++++
 .../py_wheel_analysis_scaling_test.sh         | 118 ++++++++++++++++++
 .../py_wheel_contents_test.py                 |  66 ++++++++++
 6 files changed, 334 insertions(+), 5 deletions(-)
 create mode 100644 tests/py_wheel_performance/BUILD.bazel
 create mode 100644 tests/py_wheel_performance/gen_py_libs.bzl
 create mode 100755 tests/py_wheel_performance/py_wheel_analysis_scaling_test.sh
 create mode 100644 tests/py_wheel_performance/py_wheel_contents_test.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 418d71b4a7..49fcf5d7e9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -72,6 +72,8 @@ END_UNRELEASED_TEMPLATE
   {obj}`--stamp` flag.
 * (pypi) Now the RECORD file patches will follow the quoted or unquoted filenames convention
   in order to make `pytorch` and friends easier to patch.
+* (wheel) `py_wheel` no longer expands the input depset during analysis,
+  improving analysis performance for targets with large dependency trees.
 
 {#v0-0-0-fixed}
 ### Fixed
diff --git a/python/private/py_wheel.bzl b/python/private/py_wheel.bzl
index 8202fa015a..1d98d21a65 100644
--- a/python/private/py_wheel.bzl
+++ b/python/private/py_wheel.bzl
@@ -344,12 +344,13 @@ def _py_wheel_impl(ctx):
     # Currently this is only the description file (if used).
     other_inputs = []
 
-    # Wrap the inputs into a file to reduce command line length.
+    # Wrap the inputs into a file to reduce command line length, deferring
+    # depset expansion to execution time via Args.add_all with map_each.
     packageinputfile = ctx.actions.declare_file(ctx.attr.name + "_target_wrapped_inputs.txt")
-    content = ""
-    for input_file in inputs_to_package.to_list():
-        content += _input_file_to_arg(input_file) + "\n"
-    ctx.actions.write(output = packageinputfile, content = content)
+    package_args = ctx.actions.args()
+    package_args.set_param_file_format("multiline")
+    package_args.add_all(inputs_to_package, map_each = _input_file_to_arg)
+    ctx.actions.write(output = packageinputfile, content = package_args)
     other_inputs.append(packageinputfile)
 
     args = ctx.actions.args()
diff --git a/tests/py_wheel_performance/BUILD.bazel b/tests/py_wheel_performance/BUILD.bazel
new file mode 100644
index 0000000000..8e39a6248f
--- /dev/null
+++ b/tests/py_wheel_performance/BUILD.bazel
@@ -0,0 +1,76 @@
+"""Performance test for py_wheel analysis-time scaling.
+
+Verifies that py_wheel analysis time scales linearly with dep count,
+not quadratically (as it would if inputs_to_package.to_list() were
+called during analysis).
+"""
+
+load("@rules_shell//shell:sh_test.bzl", "sh_test")
+load("//python:packaging.bzl", "py_wheel")
+load("//python:py_test.bzl", "py_test")
+load(":gen_py_libs.bzl", "gen_py_libs")
+
+package(default_visibility = ["//visibility:private"])
+
+# Two py_wheel targets at different sizes to measure scaling behavior.
+# If analysis is linear, 10k should take ~2x as long as 5k.
+# If analysis is quadratic (the old to_list() bug), 10k takes ~4x as long.
+
+SMALL_DEPS = gen_py_libs(
+    name = "small",
+    count = 5000,
+)
+
+LARGE_DEPS = gen_py_libs(
+    name = "large",
+    count = 10000,
+)
+
+py_wheel(
+    name = "small_wheel",
+    distribution = "small_wheel",
+    python_tag = "py3",
+    version = "0.0.1",
+    deps = SMALL_DEPS,
+)
+
+py_wheel(
+    name = "large_wheel",
+    distribution = "large_wheel",
+    python_tag = "py3",
+    version = "0.0.1",
+    deps = LARGE_DEPS,
+)
+
+# Smaller wheel (100 deps) for correctness verification.
+VERIFY_DEPS = gen_py_libs(
+    name = "verify",
+    count = 100,
+)
+
+py_wheel(
+    name = "verify_wheel",
+    distribution = "verify_wheel",
+    python_tag = "py3",
+    version = "0.0.1",
+    deps = VERIFY_DEPS,
+)
+
+py_test(
+    name = "py_wheel_contents_test",
+    srcs = ["py_wheel_contents_test.py"],
+    data = [":verify_wheel"],
+    deps = ["//python/runfiles"],
+)
+
+sh_test(
+    name = "py_wheel_analysis_scaling_test",
+    srcs = ["py_wheel_analysis_scaling_test.sh"],
+    tags = [
+        "exclusive",
+        "integration-test",
+        "manual",
+        "no-remote-exec",
+        "no-sandbox",
+    ],
+)
diff --git a/tests/py_wheel_performance/gen_py_libs.bzl b/tests/py_wheel_performance/gen_py_libs.bzl
new file mode 100644
index 0000000000..a5f148e45f
--- /dev/null
+++ b/tests/py_wheel_performance/gen_py_libs.bzl
@@ -0,0 +1,66 @@
+"""Macro to generate many py_library targets for benchmarking py_wheel."""
+
+load("@bazel_skylib//rules:write_file.bzl", "write_file")
+load("//python:py_library.bzl", "py_library")
+
+def gen_py_libs(name, count):
+    """Generate `count` py_library targets, each with a single .py file.
+
+    Uses deeply nested paths to simulate real-world package structures.
+    Longer paths amplify the cost of O(n^2) string concatenation in the
+    analysis phase, making quadratic scaling easier to detect.
+
+    Args:
+        name: Base name prefix for generated targets.
+        count: Number of py_library targets to generate.
+
+    Returns:
+        A list of label strings for use as py_wheel deps.
+    """
+
+    # Deep path prefix to make each _input_file_to_arg line long, simulating
+    # real-world monorepo package paths. Longer per-line strings make the
+    # quadratic string-concat cost dominate over linear target loading,
+    # so the scaling ratio reliably distinguishes O(n) from O(n^2).
+    deep_prefix = "/".join([
+        "pkg_{}".format(name),
+        "src",
+        "python",
+        "company_name_placeholder",
+        "organization_unit_division",
+        "engineering_team_name",
+        "project_name_repository",
+        "subproject_component_area",
+        "internal_implementation_detail",
+        "generated_sources_directory",
+        "modules_directory_location",
+        "feature_area_subdivision",
+        "subsystem_layer_component",
+        "detail_level_implementation",
+        "version_specific_code_path",
+        "platform_dependent_modules",
+    ])
+
+    labels = []
+    for i in range(count):
+        src_name = "{}_src_{}".format(name, i)
+        lib_name = "{}_lib_{}".format(name, i)
+
+        write_file(
+            name = src_name,
+            out = "{}/module_{}.py".format(deep_prefix, i),
+            content = [
+                "# Generated module {} of {}".format(i, count),
+                "VALUE = {}".format(i),
+                "",
+            ],
+        )
+
+        py_library(
+            name = lib_name,
+            srcs = [src_name],
+        )
+
+        labels.append(":{}".format(lib_name))
+
+    return labels
diff --git a/tests/py_wheel_performance/py_wheel_analysis_scaling_test.sh b/tests/py_wheel_performance/py_wheel_analysis_scaling_test.sh
new file mode 100755
index 0000000000..dba69c44fb
--- /dev/null
+++ b/tests/py_wheel_performance/py_wheel_analysis_scaling_test.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+# Test that py_wheel analysis time scales linearly with dep count.
+#
+# The old implementation called inputs_to_package.to_list() during analysis
+# and built a string via concatenation, giving O(n^2) scaling. The fix uses
+# Args.add_all(map_each=...) which defers to execution time, giving O(n).
+#
+# This test builds two py_wheel targets (5k and 10k deps) in analysis-only
+# mode and checks that the ratio of analysis times is closer to 2x (linear)
+# than 4x (quadratic).
+#
+# Uses --nokeep_state_after_build to discard the analysis cache after each
+# build, forcing a full re-analysis on the next invocation while keeping
+# the Bazel server warm (avoiding startup time noise).
+
+set -euo pipefail
+
+SMALL_TARGET="//tests/py_wheel_performance:small_wheel"
+LARGE_TARGET="//tests/py_wheel_performance:large_wheel"
+# Threshold ratio: linear=2.0, quadratic=4.0. We use 3.0 as the boundary.
+MAX_RATIO="3.0"
+ITERATIONS=3
+
+# Invalidate the analysis cache so the next build must re-analyze.
+invalidate_analysis_cache() {
+    bazel build --nobuild --nokeep_state_after_build "$@" 2>/dev/null
+}
+
+# Extract the "interleaved loading-and-analysis" phase time (in ms) from
+# a Bazel profile, falling back to wall-clock time if parsing fails.
+extract_analysis_ms() {
+    local profile="$1"
+    local ms
+    ms=$(bazel analyze-profile "${profile}" 2>&1 \
+        | grep "loading-and-analysis" \
+        | grep -oP '[\d.]+(?= s)' \
+        | head -1 \
+        | awk '{printf "%d", $1 * 1000}')
+    echo "${ms:-0}"
+}
+
+measure_analysis_time() {
+    local target="$1"
+    local best_ms=999999999
+
+    for i in $(seq 1 "${ITERATIONS}"); do
+        # Discard analysis cache from any prior build.
+        invalidate_analysis_cache "${target}"
+
+        # Measure a fresh analysis pass.
+        local profile
+        profile=$(mktemp /tmp/py_wheel_perf_XXXXXX.profile)
+        bazel build --nobuild --profile="${profile}" "${target}" 2>/dev/null
+
+        local analysis_ms
+        analysis_ms=$(extract_analysis_ms "${profile}")
+        rm -f "${profile}"
+
+        # Fall back to wall time if profile parsing returned 0.
+        if [[ "${analysis_ms}" == "0" ]]; then
+            invalidate_analysis_cache "${target}"
+            local start end
+            start=$(date +%s%N)
+            bazel build --nobuild "${target}" 2>/dev/null
+            end=$(date +%s%N)
+            analysis_ms=$(( (end - start) / 1000000 ))
+        fi
+
+        echo "    iteration ${i}: ${analysis_ms} ms" >&2
+
+        if (( analysis_ms < best_ms )); then
+            best_ms=${analysis_ms}
+        fi
+    done
+
+    echo "${best_ms}"
+}
+
+echo "=== py_wheel analysis scaling test ==="
+echo ""
+
+# Warm up: ensure Bazel server is running and external deps are fetched.
+echo "Warming up..."
+bazel build --nobuild "${SMALL_TARGET}" 2>/dev/null || true
+bazel build --nobuild "${LARGE_TARGET}" 2>/dev/null || true
+echo ""
+
+echo "Measuring small wheel (5k deps), best of ${ITERATIONS}..."
+small_ms=$(measure_analysis_time "${SMALL_TARGET}")
+echo "  Result: ${small_ms} ms"
+
+echo "Measuring large wheel (10k deps), best of ${ITERATIONS}..."
+large_ms=$(measure_analysis_time "${LARGE_TARGET}")
+echo "  Result: ${large_ms} ms"
+
+# Compute ratio using awk for floating point
+ratio=$(awk "BEGIN { printf \"%.2f\", ${large_ms} / ${small_ms} }")
+
+echo ""
+echo "=== Results ==="
+echo "  Small (5k deps):  ${small_ms} ms"
+echo "  Large (10k deps): ${large_ms} ms"
+echo "  Ratio (10k/5k):   ${ratio}x"
+echo "  Max allowed:       ${MAX_RATIO}x"
+echo ""
+
+# Check that ratio is below threshold
+passed=$(awk "BEGIN { print (${ratio} <= ${MAX_RATIO}) ? 1 : 0 }")
+
+if [[ "${passed}" == "1" ]]; then
+    echo "PASSED: Scaling ratio ${ratio}x is within linear bound (<= ${MAX_RATIO}x)"
+    exit 0
+else
+    echo "FAILED: Scaling ratio ${ratio}x exceeds ${MAX_RATIO}x, suggesting quadratic behavior"
+    echo "  Expected linear scaling (~2.0x) from Args.add_all(map_each=...)"
+    echo "  Got ${ratio}x which is closer to quadratic (4.0x)"
+    exit 1
+fi
diff --git a/tests/py_wheel_performance/py_wheel_contents_test.py b/tests/py_wheel_performance/py_wheel_contents_test.py
new file mode 100644
index 0000000000..60be03d3c4
--- /dev/null
+++ b/tests/py_wheel_performance/py_wheel_contents_test.py
@@ -0,0 +1,66 @@
+"""Test that py_wheel produces correct wheel contents with many deps.
+
+Verifies that the Args.add_all(map_each=...) approach used to write the
+input file list produces a wheel with the expected files.
+"""
+
+import os
+import unittest
+import zipfile
+
+from python.runfiles import runfiles
+
+_WHEEL_NAME = "verify_wheel-0.0.1-py3-none-any.whl"
+_EXPECTED_MODULE_COUNT = 100
+
+
+class PyWheelContentsTest(unittest.TestCase):
+
+    def setUp(self):
+        self.rf = runfiles.Create()
+        whl_path = self.rf.Rlocation(
+            os.path.join("rules_python", "tests", "py_wheel_performance", _WHEEL_NAME)
+        )
+        self.assertIsNotNone(whl_path, "Could not find wheel via runfiles")
+        self.assertTrue(os.path.exists(whl_path), f"Wheel not found: {whl_path}")
+        self.whl_path = whl_path
+
+    def test_verify_wheel_has_all_modules(self):
+        """Verify the wheel contains exactly the expected number of .py files."""
+        with zipfile.ZipFile(self.whl_path) as whl:
+            py_files = [n for n in whl.namelist() if n.endswith(".py")]
+            self.assertEqual(
+                len(py_files),
+                _EXPECTED_MODULE_COUNT,
+                f"Expected {_EXPECTED_MODULE_COUNT} .py files in wheel, got {len(py_files)}",
+            )
+
+    def test_verify_wheel_file_contents(self):
+        """Verify the .py files in the wheel have the expected content."""
+        with zipfile.ZipFile(self.whl_path) as whl:
+            py_files = sorted(n for n in whl.namelist() if n.endswith(".py"))
+            self.assertTrue(py_files, "No .py files found in wheel")
+            first = whl.read(py_files[0]).decode("utf-8")
+            self.assertIn("Generated module", first)
+            self.assertIn("VALUE =", first)
+
+    def test_verify_wheel_metadata(self):
+        """Verify the wheel has proper metadata files."""
+        with zipfile.ZipFile(self.whl_path) as whl:
+            names = whl.namelist()
+            metadata_files = [
+                n for n in names if "METADATA" in n or "WHEEL" in n or "RECORD" in n
+            ]
+            self.assertTrue(
+                len(metadata_files) >= 3,
+                f"Expected METADATA, WHEEL, RECORD files; got {metadata_files}",
+            )
+
+            metadata_path = [n for n in names if n.endswith("METADATA")][0]
+            metadata = whl.read(metadata_path).decode("utf-8")
+            self.assertIn("Name: verify_wheel", metadata)
+            self.assertIn("Version: 0.0.1", metadata)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 2f9c5a3645d41d57582925549080f8950eb67eed Mon Sep 17 00:00:00 2001
From: Danner Stodolsky <dstodolsky@bostondynamics.com>
Date: Sat, 14 Feb 2026 20:32:42 -0500
Subject: [PATCH 2/2] Remove py_wheel performance test

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/py_wheel_performance/BUILD.bazel        |  76 -----------
 tests/py_wheel_performance/gen_py_libs.bzl    |  66 ----------
 .../py_wheel_analysis_scaling_test.sh         | 118 ------------------
 .../py_wheel_contents_test.py                 |  66 ----------
 4 files changed, 326 deletions(-)
 delete mode 100644 tests/py_wheel_performance/BUILD.bazel
 delete mode 100644 tests/py_wheel_performance/gen_py_libs.bzl
 delete mode 100755 tests/py_wheel_performance/py_wheel_analysis_scaling_test.sh
 delete mode 100644 tests/py_wheel_performance/py_wheel_contents_test.py

diff --git a/tests/py_wheel_performance/BUILD.bazel b/tests/py_wheel_performance/BUILD.bazel
deleted file mode 100644
index 8e39a6248f..0000000000
--- a/tests/py_wheel_performance/BUILD.bazel
+++ /dev/null
@@ -1,76 +0,0 @@
-"""Performance test for py_wheel analysis-time scaling.
-
-Verifies that py_wheel analysis time scales linearly with dep count,
-not quadratically (as it would if inputs_to_package.to_list() were
-called during analysis).
-"""
-
-load("@rules_shell//shell:sh_test.bzl", "sh_test")
-load("//python:packaging.bzl", "py_wheel")
-load("//python:py_test.bzl", "py_test")
-load(":gen_py_libs.bzl", "gen_py_libs")
-
-package(default_visibility = ["//visibility:private"])
-
-# Two py_wheel targets at different sizes to measure scaling behavior.
-# If analysis is linear, 10k should take ~2x as long as 5k.
-# If analysis is quadratic (the old to_list() bug), 10k takes ~4x as long.
-
-SMALL_DEPS = gen_py_libs(
-    name = "small",
-    count = 5000,
-)
-
-LARGE_DEPS = gen_py_libs(
-    name = "large",
-    count = 10000,
-)
-
-py_wheel(
-    name = "small_wheel",
-    distribution = "small_wheel",
-    python_tag = "py3",
-    version = "0.0.1",
-    deps = SMALL_DEPS,
-)
-
-py_wheel(
-    name = "large_wheel",
-    distribution = "large_wheel",
-    python_tag = "py3",
-    version = "0.0.1",
-    deps = LARGE_DEPS,
-)
-
-# Smaller wheel (100 deps) for correctness verification.
-VERIFY_DEPS = gen_py_libs(
-    name = "verify",
-    count = 100,
-)
-
-py_wheel(
-    name = "verify_wheel",
-    distribution = "verify_wheel",
-    python_tag = "py3",
-    version = "0.0.1",
-    deps = VERIFY_DEPS,
-)
-
-py_test(
-    name = "py_wheel_contents_test",
-    srcs = ["py_wheel_contents_test.py"],
-    data = [":verify_wheel"],
-    deps = ["//python/runfiles"],
-)
-
-sh_test(
-    name = "py_wheel_analysis_scaling_test",
-    srcs = ["py_wheel_analysis_scaling_test.sh"],
-    tags = [
-        "exclusive",
-        "integration-test",
-        "manual",
-        "no-remote-exec",
-        "no-sandbox",
-    ],
-)
diff --git a/tests/py_wheel_performance/gen_py_libs.bzl b/tests/py_wheel_performance/gen_py_libs.bzl
deleted file mode 100644
index a5f148e45f..0000000000
--- a/tests/py_wheel_performance/gen_py_libs.bzl
+++ /dev/null
@@ -1,66 +0,0 @@
-"""Macro to generate many py_library targets for benchmarking py_wheel."""
-
-load("@bazel_skylib//rules:write_file.bzl", "write_file")
-load("//python:py_library.bzl", "py_library")
-
-def gen_py_libs(name, count):
-    """Generate `count` py_library targets, each with a single .py file.
-
-    Uses deeply nested paths to simulate real-world package structures.
-    Longer paths amplify the cost of O(n^2) string concatenation in the
-    analysis phase, making quadratic scaling easier to detect.
-
-    Args:
-        name: Base name prefix for generated targets.
-        count: Number of py_library targets to generate.
-
-    Returns:
-        A list of label strings for use as py_wheel deps.
-    """
-
-    # Deep path prefix to make each _input_file_to_arg line long, simulating
-    # real-world monorepo package paths. Longer per-line strings make the
-    # quadratic string-concat cost dominate over linear target loading,
-    # so the scaling ratio reliably distinguishes O(n) from O(n^2).
-    deep_prefix = "/".join([
-        "pkg_{}".format(name),
-        "src",
-        "python",
-        "company_name_placeholder",
-        "organization_unit_division",
-        "engineering_team_name",
-        "project_name_repository",
-        "subproject_component_area",
-        "internal_implementation_detail",
-        "generated_sources_directory",
-        "modules_directory_location",
-        "feature_area_subdivision",
-        "subsystem_layer_component",
-        "detail_level_implementation",
-        "version_specific_code_path",
-        "platform_dependent_modules",
-    ])
-
-    labels = []
-    for i in range(count):
-        src_name = "{}_src_{}".format(name, i)
-        lib_name = "{}_lib_{}".format(name, i)
-
-        write_file(
-            name = src_name,
-            out = "{}/module_{}.py".format(deep_prefix, i),
-            content = [
-                "# Generated module {} of {}".format(i, count),
-                "VALUE = {}".format(i),
-                "",
-            ],
-        )
-
-        py_library(
-            name = lib_name,
-            srcs = [src_name],
-        )
-
-        labels.append(":{}".format(lib_name))
-
-    return labels
diff --git a/tests/py_wheel_performance/py_wheel_analysis_scaling_test.sh b/tests/py_wheel_performance/py_wheel_analysis_scaling_test.sh
deleted file mode 100755
index dba69c44fb..0000000000
--- a/tests/py_wheel_performance/py_wheel_analysis_scaling_test.sh
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/usr/bin/env bash
-# Test that py_wheel analysis time scales linearly with dep count.
-#
-# The old implementation called inputs_to_package.to_list() during analysis
-# and built a string via concatenation, giving O(n^2) scaling. The fix uses
-# Args.add_all(map_each=...) which defers to execution time, giving O(n).
-#
-# This test builds two py_wheel targets (5k and 10k deps) in analysis-only
-# mode and checks that the ratio of analysis times is closer to 2x (linear)
-# than 4x (quadratic).
-#
-# Uses --nokeep_state_after_build to discard the analysis cache after each
-# build, forcing a full re-analysis on the next invocation while keeping
-# the Bazel server warm (avoiding startup time noise).
-
-set -euo pipefail
-
-SMALL_TARGET="//tests/py_wheel_performance:small_wheel"
-LARGE_TARGET="//tests/py_wheel_performance:large_wheel"
-# Threshold ratio: linear=2.0, quadratic=4.0. We use 3.0 as the boundary.
-MAX_RATIO="3.0"
-ITERATIONS=3
-
-# Invalidate the analysis cache so the next build must re-analyze.
-invalidate_analysis_cache() {
-    bazel build --nobuild --nokeep_state_after_build "$@" 2>/dev/null
-}
-
-# Extract the "interleaved loading-and-analysis" phase time (in ms) from
-# a Bazel profile, falling back to wall-clock time if parsing fails.
-extract_analysis_ms() {
-    local profile="$1"
-    local ms
-    ms=$(bazel analyze-profile "${profile}" 2>&1 \
-        | grep "loading-and-analysis" \
-        | grep -oP '[\d.]+(?= s)' \
-        | head -1 \
-        | awk '{printf "%d", $1 * 1000}')
-    echo "${ms:-0}"
-}
-
-measure_analysis_time() {
-    local target="$1"
-    local best_ms=999999999
-
-    for i in $(seq 1 "${ITERATIONS}"); do
-        # Discard analysis cache from any prior build.
-        invalidate_analysis_cache "${target}"
-
-        # Measure a fresh analysis pass.
-        local profile
-        profile=$(mktemp /tmp/py_wheel_perf_XXXXXX.profile)
-        bazel build --nobuild --profile="${profile}" "${target}" 2>/dev/null
-
-        local analysis_ms
-        analysis_ms=$(extract_analysis_ms "${profile}")
-        rm -f "${profile}"
-
-        # Fall back to wall time if profile parsing returned 0.
-        if [[ "${analysis_ms}" == "0" ]]; then
-            invalidate_analysis_cache "${target}"
-            local start end
-            start=$(date +%s%N)
-            bazel build --nobuild "${target}" 2>/dev/null
-            end=$(date +%s%N)
-            analysis_ms=$(( (end - start) / 1000000 ))
-        fi
-
-        echo "    iteration ${i}: ${analysis_ms} ms" >&2
-
-        if (( analysis_ms < best_ms )); then
-            best_ms=${analysis_ms}
-        fi
-    done
-
-    echo "${best_ms}"
-}
-
-echo "=== py_wheel analysis scaling test ==="
-echo ""
-
-# Warm up: ensure Bazel server is running and external deps are fetched.
-echo "Warming up..."
-bazel build --nobuild "${SMALL_TARGET}" 2>/dev/null || true
-bazel build --nobuild "${LARGE_TARGET}" 2>/dev/null || true
-echo ""
-
-echo "Measuring small wheel (5k deps), best of ${ITERATIONS}..."
-small_ms=$(measure_analysis_time "${SMALL_TARGET}")
-echo "  Result: ${small_ms} ms"
-
-echo "Measuring large wheel (10k deps), best of ${ITERATIONS}..."
-large_ms=$(measure_analysis_time "${LARGE_TARGET}")
-echo "  Result: ${large_ms} ms"
-
-# Compute ratio using awk for floating point
-ratio=$(awk "BEGIN { printf \"%.2f\", ${large_ms} / ${small_ms} }")
-
-echo ""
-echo "=== Results ==="
-echo "  Small (5k deps):  ${small_ms} ms"
-echo "  Large (10k deps): ${large_ms} ms"
-echo "  Ratio (10k/5k):   ${ratio}x"
-echo "  Max allowed:       ${MAX_RATIO}x"
-echo ""
-
-# Check that ratio is below threshold
-passed=$(awk "BEGIN { print (${ratio} <= ${MAX_RATIO}) ? 1 : 0 }")
-
-if [[ "${passed}" == "1" ]]; then
-    echo "PASSED: Scaling ratio ${ratio}x is within linear bound (<= ${MAX_RATIO}x)"
-    exit 0
-else
-    echo "FAILED: Scaling ratio ${ratio}x exceeds ${MAX_RATIO}x, suggesting quadratic behavior"
-    echo "  Expected linear scaling (~2.0x) from Args.add_all(map_each=...)"
-    echo "  Got ${ratio}x which is closer to quadratic (4.0x)"
-    exit 1
-fi
diff --git a/tests/py_wheel_performance/py_wheel_contents_test.py b/tests/py_wheel_performance/py_wheel_contents_test.py
deleted file mode 100644
index 60be03d3c4..0000000000
--- a/tests/py_wheel_performance/py_wheel_contents_test.py
+++ /dev/null
@@ -1,66 +0,0 @@
-"""Test that py_wheel produces correct wheel contents with many deps.
-
-Verifies that the Args.add_all(map_each=...) approach used to write the
-input file list produces a wheel with the expected files.
-"""
-
-import os
-import unittest
-import zipfile
-
-from python.runfiles import runfiles
-
-_WHEEL_NAME = "verify_wheel-0.0.1-py3-none-any.whl"
-_EXPECTED_MODULE_COUNT = 100
-
-
-class PyWheelContentsTest(unittest.TestCase):
-
-    def setUp(self):
-        self.rf = runfiles.Create()
-        whl_path = self.rf.Rlocation(
-            os.path.join("rules_python", "tests", "py_wheel_performance", _WHEEL_NAME)
-        )
-        self.assertIsNotNone(whl_path, "Could not find wheel via runfiles")
-        self.assertTrue(os.path.exists(whl_path), f"Wheel not found: {whl_path}")
-        self.whl_path = whl_path
-
-    def test_verify_wheel_has_all_modules(self):
-        """Verify the wheel contains exactly the expected number of .py files."""
-        with zipfile.ZipFile(self.whl_path) as whl:
-            py_files = [n for n in whl.namelist() if n.endswith(".py")]
-            self.assertEqual(
-                len(py_files),
-                _EXPECTED_MODULE_COUNT,
-                f"Expected {_EXPECTED_MODULE_COUNT} .py files in wheel, got {len(py_files)}",
-            )
-
-    def test_verify_wheel_file_contents(self):
-        """Verify the .py files in the wheel have the expected content."""
-        with zipfile.ZipFile(self.whl_path) as whl:
-            py_files = sorted(n for n in whl.namelist() if n.endswith(".py"))
-            self.assertTrue(py_files, "No .py files found in wheel")
-            first = whl.read(py_files[0]).decode("utf-8")
-            self.assertIn("Generated module", first)
-            self.assertIn("VALUE =", first)
-
-    def test_verify_wheel_metadata(self):
-        """Verify the wheel has proper metadata files."""
-        with zipfile.ZipFile(self.whl_path) as whl:
-            names = whl.namelist()
-            metadata_files = [
-                n for n in names if "METADATA" in n or "WHEEL" in n or "RECORD" in n
-            ]
-            self.assertTrue(
-                len(metadata_files) >= 3,
-                f"Expected METADATA, WHEEL, RECORD files; got {metadata_files}",
-            )
-
-            metadata_path = [n for n in names if n.endswith("METADATA")][0]
-            metadata = whl.read(metadata_path).decode("utf-8")
-            self.assertIn("Name: verify_wheel", metadata)
-            self.assertIn("Version: 0.0.1", metadata)
-
-
-if __name__ == "__main__":
-    unittest.main()