From cef18289f7ca2fa24148b3bfba00eb3c6dd35322 Mon Sep 17 00:00:00 2001 From: hallo1 <2302004040@qq.com> Date: Thu, 23 Oct 2025 09:08:11 +0800 Subject: [PATCH] Load external ONNX tensor data before transforming models Ensure every ONNX pass hydrates external tensor data after loading the IR model. This avoids writing broken references when saving quantized or converted models with external weights, restoring the behavior seen in v0.9.1. --- olive/passes/onnx/conversion.py | 1 + olive/passes/onnx/hqq_quantization.py | 1 + olive/passes/onnx/onnxscript_fusion.py | 4 +++- olive/passes/onnx/rtn_quantization.py | 1 + 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/olive/passes/onnx/conversion.py b/olive/passes/onnx/conversion.py index e49aae717f..72176ebdf6 100644 --- a/olive/passes/onnx/conversion.py +++ b/olive/passes/onnx/conversion.py @@ -655,6 +655,7 @@ def _run_for_config( ) -> ONNXModelHandler: output_model_path = resolve_onnx_path(output_model_path) model_ir = model.load_ir_model() + ir.external_data.load_to_model(model_ir) version_converter.convert_version(model_ir, config.target_opset, fallback=True) return ir_model_to_olive_model(model_ir, output_model_path, config) diff --git a/olive/passes/onnx/hqq_quantization.py b/olive/passes/onnx/hqq_quantization.py index 443db38a97..73273b6951 100644 --- a/olive/passes/onnx/hqq_quantization.py +++ b/olive/passes/onnx/hqq_quantization.py @@ -72,6 +72,7 @@ def _run_for_config( return model output_model_path = resolve_onnx_path(output_model_path, Path(model.model_path).name) ir_model = model.load_ir_model() + ir.external_data.load_to_model(ir_model) ir_model.graph.opset_imports[MSFT_DOMAIN] = 1 self._quantize_model( ir_model, diff --git a/olive/passes/onnx/onnxscript_fusion.py b/olive/passes/onnx/onnxscript_fusion.py index 20984216e0..254623ba23 100644 --- a/olive/passes/onnx/onnxscript_fusion.py +++ b/olive/passes/onnx/onnxscript_fusion.py @@ -6,6 +6,7 @@ from pathlib import Path from onnxscript.rewriter import ort_fusions +import onnx_ir as ir from olive.hardware.accelerator import AcceleratorSpec from olive.model import ONNXModelHandler @@ -30,7 +31,8 @@ def _run_for_config( output_model_path = resolve_onnx_path(output_model_path, Path(model.model_path).name) model_ir = model.load_ir_model() - + ir.external_data.load_to_model(model_ir) + # TODO(exporter team): Different fusions support different devices model_ir, function_stats = ort_fusions.optimize_for_ort(model_ir) logger.debug("Function stats: %s", function_stats) diff --git a/olive/passes/onnx/rtn_quantization.py b/olive/passes/onnx/rtn_quantization.py index d42bbcc229..62cfacedf7 100644 --- a/olive/passes/onnx/rtn_quantization.py +++ b/olive/passes/onnx/rtn_quantization.py @@ -77,6 +77,7 @@ def _run_for_config( ) -> ONNXModelHandler: output_model_path = resolve_onnx_path(output_model_path, Path(model.model_path).name) ir_model = model.load_ir_model() + ir.external_data.load_to_model(ir_model) ir_model.graph.opset_imports[MSFT_DOMAIN] = 1 self._quantize_model( ir_model,