From 30a0b980a3c7b08cee16fb7a59fff6974995e2d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc=20Serf=C5=91z=C5=91?= Date: Mon, 12 Jan 2026 10:36:46 +0100 Subject: [PATCH 1/2] Additional padding options for paganin_filter --- httomolibgpu/cupywrapper.py | 2 + httomolibgpu/prep/phase.py | 73 ++++++++++++++++++++---- tests/test_prep/test_phase.py | 84 ++++++++++++++++++++++++---- zenodo-tests/test_prep/test_phase.py | 19 +++++-- 4 files changed, 153 insertions(+), 25 deletions(-) diff --git a/httomolibgpu/cupywrapper.py b/httomolibgpu/cupywrapper.py index 717a753b..8d137cec 100644 --- a/httomolibgpu/cupywrapper.py +++ b/httomolibgpu/cupywrapper.py @@ -2,6 +2,7 @@ try: import cupy as cp import nvtx + from cupyx.scipy.fft import next_fast_len try: cp.cuda.Device(0).compute_capability @@ -15,5 +16,6 @@ ) from unittest.mock import Mock import numpy as cp + from scipy.fft import next_fast_len nvtx = Mock() diff --git a/httomolibgpu/prep/phase.py b/httomolibgpu/prep/phase.py index f18c8040..5dfd54f4 100644 --- a/httomolibgpu/prep/phase.py +++ b/httomolibgpu/prep/phase.py @@ -26,6 +26,7 @@ cp = cupywrapper.cp cupy_run = cupywrapper.cupy_run +next_fast_len = cupywrapper.next_fast_len from unittest.mock import Mock @@ -38,7 +39,7 @@ fftshift = Mock() from numpy import float32 -from typing import Optional, Tuple +from typing import Literal, Optional, Tuple import math __all__ = [ @@ -56,6 +57,10 @@ def paganin_filter( distance: float = 1.0, energy: float = 53.0, ratio_delta_beta: float = 250, + calculate_padding_value_method: Literal[ + "next_power_of_2", "next_fast_length", "use_pad_x_y" + ] = "next_power_of_2", + pad_x_y: Optional[Tuple[int, int]] = None, calc_peak_gpu_mem: bool = False, ) -> cp.ndarray: """ @@ -74,6 +79,10 @@ def paganin_filter( Beam energy in keV. ratio_delta_beta : float The ratio of delta/beta, where delta is the phase shift and real part of the complex material refractive index and beta is the absorption. + calculate_padding_value_method: str + Method to calculate the padded size of the input data. Accepted values are 'next_power_of_2', 'next_fast_length' and 'use_pad_x_y`. + pad_x_y (int, int) | None: + Padding values in pixels horizontally and vertically. Must be None, unless `calculate_padding_value_method` is 'use_pad_x_y'. calc_peak_gpu_mem: bool Parameter to support memory estimation in HTTomo. Irrelevant to the method itself and can be ignored by user. @@ -93,9 +102,9 @@ def paganin_filter( mem_stack.malloc(np.prod(tomo) * np.float32().itemsize) dz_orig, dy_orig, dx_orig = tomo.shape if not mem_stack else tomo - # Perform padding to the power of 2 as FFT is O(n*log(n)) complexity - # TODO: adding other options of padding? - padded_tomo, pad_tup = _pad_projections_to_second_power(tomo, mem_stack) + padded_tomo, pad_tup = _pad_projections( + tomo, calculate_padding_value_method, pad_x_y, mem_stack + ) dz, dy, dx = padded_tomo.shape if not mem_stack else padded_tomo @@ -219,21 +228,51 @@ def _shift_bit_length(x: int) -> int: return 1 << (x - 1).bit_length() -def _calculate_pad_size(datashape: tuple) -> list: +def _calculate_pad_size( + datashape: tuple, + calculate_padding_value_method: Literal[ + "next_power_of_2", "next_fast_length", "use_pad_x_y" + ], + pad_x_y: Optional[Tuple[int, int]], +) -> list: """Calculating the padding size Args: - datashape (tuple): the shape of the 3D data + datashape (tuple): + the shape of the 3D data + calculate_padding_value_method: str + Method to calculate the padded size of the input data. Accepted values are 'next_power_of_2', 'next_fast_length' and 'use_pad_x_y`. + pad_x_y (int, int) | None: + Padding values in pixels horizontally and vertically. Must be None, unless `calculate_padding_value_method` is 'use_pad_x_y'. Returns: list: the padded dimensions """ + if pad_x_y is not None and calculate_padding_value_method != "use_pad_x_y": + raise ValueError( + 'calculate_padding_value_method must be "use_pad_x_y" when pad_x_y is specified' + ) + elif calculate_padding_value_method == "use_pad_x_y" and pad_x_y is None: + raise ValueError( + 'pad_x_y must be provided when calculate_padding_value_method is "use_pad_x_y"' + ) + + if calculate_padding_value_method == "next_power_of_2": + calculate_padded_dim = lambda _, size: _shift_bit_length(size + 1) + elif calculate_padding_value_method == "next_fast_length": + calculate_padded_dim = lambda _, size: next_fast_len(size) + elif calculate_padding_value_method == "use_pad_x_y": + calculate_padded_dim = lambda dim, size: size + 2 * pad_x_y[2 - dim] + else: + raise ValueError( + f'Unexpected calculate_padding_value_method: "{calculate_padding_value_method}"' + ) pad_list = [] for index, element in enumerate(datashape): if index == 0: pad_width = (0, 0) # do not pad the slicing dim else: - diff = _shift_bit_length(element + 1) - element + diff = calculate_padded_dim(index, element) - element if element % 2 == 0: pad_width_scalar = diff // 2 pad_width = (pad_width_scalar, pad_width_scalar) @@ -248,17 +287,27 @@ def _calculate_pad_size(datashape: tuple) -> list: return pad_list -def _pad_projections_to_second_power( - tomo: cp.ndarray, mem_stack: Optional[_DeviceMemStack] +def _pad_projections( + tomo: cp.ndarray, + calculate_padding_value_method: Literal[ + "next_power_of_2", "next_fast_length", "use_pad_x_y" + ], + pad_x_y: Optional[Tuple[int, int]], + mem_stack: Optional[_DeviceMemStack], ) -> Tuple[cp.ndarray, Tuple[int, int]]: """ - Performs padding of each projection to the next power of 2. + Performs padding of each projection to a size optimal for FFT. If the shape is not even we also care of that before padding. Parameters ---------- tomo : cp.ndarray 3d projection data + calculate_padding_value_method: str + Method to calculate the padded size of the input data. Accepted values are 'next_power_of_2', 'next_fast_length' and 'use_pad_x_y`. + pad_x_y (int, int) | None: + Padding values in pixels horizontally and vertically. Must be None, unless `calculate_padding_value_method` is 'use_pad_x_y'. + Returns ------- @@ -268,7 +317,9 @@ def _pad_projections_to_second_power( """ full_shape_tomo = cp.shape(tomo) if not mem_stack else tomo - pad_list = _calculate_pad_size(full_shape_tomo) + pad_list = _calculate_pad_size( + full_shape_tomo, calculate_padding_value_method, pad_x_y + ) if mem_stack: padded_tomo = [ diff --git a/tests/test_prep/test_phase.py b/tests/test_prep/test_phase.py index d66a7f9c..30599294 100644 --- a/tests/test_prep/test_phase.py +++ b/tests/test_prep/test_phase.py @@ -51,6 +51,34 @@ def test_paganin_filter_dist3(data): assert_allclose(np.sum(filtered_data), -24870786.0, rtol=1e-6) +@pytest.mark.parametrize( + "test_case", + [ + ("next_power_of_2", None, -6.725061, -6.367116), + ("next_fast_length", None, -6.677313, -6.096187), + ("use_pad_x_y", (0, 0), -6.677313, -6.096187), + ("use_pad_x_y", (80, 80), -6.73193, -6.405338), + ("use_pad_x_y", (45, 75), -6.726483, -6.37466), + ], +) +def test_paganin_filter_padding_options(data, test_case): + # --- testing the Paganin filter on tomo_standard ---# + padding_method, pad_x_y, test_mean, test_max = test_case + filtered_data = paganin_filter( + data, + calculate_padding_value_method=padding_method, + pad_x_y=pad_x_y, + ).get() + + assert filtered_data.ndim == 3 + assert_allclose(np.mean(filtered_data), test_mean, rtol=eps) + assert_allclose(np.max(filtered_data), test_max, rtol=eps) + + #: make sure the output is float32 + assert filtered_data.dtype == np.float32 + assert filtered_data.flags.c_contiguous + + @pytest.mark.perf def test_paganin_filter_performance(ensure_clean_memory): # Note: low/high and size values taken from sample2_medium.yaml real run @@ -87,32 +115,65 @@ def test_paganin_filter_performance(ensure_clean_memory): @pytest.mark.parametrize("slices", [3, 7, 32, 61, 109, 120, 150]) @pytest.mark.parametrize("dim_x", [128, 140]) -def test_paganin_filter_calc_mem(slices, dim_x, ensure_clean_memory): +@pytest.mark.parametrize( + "padding", + [("next_power_of_2", None), ("next_fast_length", None), ("use_pad_x_y", (45, 45))], +) +def test_paganin_filter_calc_mem(slices, dim_x, padding, ensure_clean_memory): dim_y = 159 + padding_method, pad_x_y = padding + data = cp.random.random_sample((slices, dim_x, dim_y), dtype=np.float32) hook = MaxMemoryHook() with hook: - paganin_filter(cp.copy(data)) + paganin_filter( + cp.copy(data), + calculate_padding_value_method=padding_method, + pad_x_y=pad_x_y, + ) actual_mem_peak = hook.max_mem try: - estimated_mem_peak = paganin_filter(data.shape, calc_peak_gpu_mem=True) + estimated_mem_peak = paganin_filter( + data.shape, + calculate_padding_value_method=padding_method, + pad_x_y=pad_x_y, + calc_peak_gpu_mem=True, + ) except cp.cuda.memory.OutOfMemoryError: pytest.skip("Not enough GPU memory to estimate memory peak") - assert actual_mem_peak * 0.99 <= estimated_mem_peak - assert estimated_mem_peak <= actual_mem_peak * 1.01 + assert actual_mem_peak == estimated_mem_peak @pytest.mark.parametrize("slices", [38, 177, 268, 320, 490, 607, 803, 859, 902, 951]) @pytest.mark.parametrize("dims", [(900, 1280), (1801, 1540), (1801, 2560)]) -def test_paganin_filter_calc_mem_big(slices, dims, ensure_clean_memory): +@pytest.mark.parametrize( + "padding", + [ + ("next_power_of_2", None), + ("next_fast_length", None), + ("use_pad_x_y", (145, 122)), + ], +) +def test_paganin_filter_calc_mem_big(slices, dims, padding, ensure_clean_memory): dim_y, dim_x = dims data_shape = (slices, dim_x, dim_y) + padding_method, pad_x_y = padding try: - estimated_mem_peak = paganin_filter(data_shape, calc_peak_gpu_mem=True) + estimated_mem_peak = paganin_filter( + data_shape, + calculate_padding_value_method=padding_method, + pad_x_y=pad_x_y, + calc_peak_gpu_mem=True, + ) except cp.cuda.memory.OutOfMemoryError: pytest.skip("Not enough GPU memory to estimate memory peak") + except cp.cuda.cufft.CuFFTError as cufft_error: + if cufft_error.result == 8: # CUFFT_INVALID_SIZE + pytest.skip("Not usable FFT size") + else: + raise av_mem = cp.cuda.Device().mem_info[0] if av_mem < estimated_mem_peak: pytest.skip("Not enough GPU memory to run this test") @@ -120,8 +181,11 @@ def test_paganin_filter_calc_mem_big(slices, dims, ensure_clean_memory): hook = MaxMemoryHook() with hook: data = cp.random.random_sample(data_shape, dtype=np.float32) - paganin_filter(data) + paganin_filter( + data, + calculate_padding_value_method=padding_method, + pad_x_y=pad_x_y, + ) actual_mem_peak = hook.max_mem - assert actual_mem_peak * 0.99 <= estimated_mem_peak - assert estimated_mem_peak <= actual_mem_peak * 1.01 + assert actual_mem_peak == estimated_mem_peak diff --git a/zenodo-tests/test_prep/test_phase.py b/zenodo-tests/test_prep/test_phase.py index 4687ded9..79839cbd 100644 --- a/zenodo-tests/test_prep/test_phase.py +++ b/zenodo-tests/test_prep/test_phase.py @@ -8,7 +8,16 @@ # ----------------------------------------------------------# # appplying paganin filter to i12_dataset3 -def test_paganin_filter_i12_dataset3(i12_dataset3, ensure_clean_memory): +@pytest.mark.parametrize( + "test_case", + [ + ("next_power_of_2", None, -6.2188172, -10.92260456), + ("next_fast_length", None, -6.867182, -10.92272), + ("use_pad_x_y", (80, 80), -6.32930, -10.92270), + ], +) +def test_paganin_filter_i12_dataset3(i12_dataset3, test_case, ensure_clean_memory): + padding_method, pad_x_y, test_max, test_min = test_case inputdata = cp.empty((3, 2050, 2560)) inputdata[0, :, :] = i12_dataset3[0] inputdata[1, :, :] = i12_dataset3[1] @@ -17,7 +26,9 @@ def test_paganin_filter_i12_dataset3(i12_dataset3, ensure_clean_memory): ensure_clean_memory - filtered_paganin = paganin_filter(inputdata) + filtered_paganin = paganin_filter( + inputdata, calculate_padding_value_method=padding_method, pad_x_y=pad_x_y + ) - assert pytest.approx(np.max(cp.asnumpy(filtered_paganin)), rel=1e-3) == -6.2188172 - assert pytest.approx(np.min(cp.asnumpy(filtered_paganin)), rel=1e-3) == -10.92260456 + assert pytest.approx(np.max(cp.asnumpy(filtered_paganin)), rel=1e-3) == test_max + assert pytest.approx(np.min(cp.asnumpy(filtered_paganin)), rel=1e-3) == test_min From b6f6ef061e2ca250fb9a7a4b509e8cdb1b9451b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc=20Serf=C5=91z=C5=91?= Date: Mon, 12 Jan 2026 15:20:51 +0100 Subject: [PATCH 2/2] Use list instead of tuple for pad_x_y --- httomolibgpu/prep/phase.py | 26 ++++++++++++++-------- tests/test_prep/test_phase.py | 33 +++++++++++++++++++++++----- zenodo-tests/test_prep/test_phase.py | 2 +- 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/httomolibgpu/prep/phase.py b/httomolibgpu/prep/phase.py index 5dfd54f4..41f8468a 100644 --- a/httomolibgpu/prep/phase.py +++ b/httomolibgpu/prep/phase.py @@ -60,7 +60,7 @@ def paganin_filter( calculate_padding_value_method: Literal[ "next_power_of_2", "next_fast_length", "use_pad_x_y" ] = "next_power_of_2", - pad_x_y: Optional[Tuple[int, int]] = None, + pad_x_y: Optional[list] = None, calc_peak_gpu_mem: bool = False, ) -> cp.ndarray: """ @@ -81,7 +81,7 @@ def paganin_filter( The ratio of delta/beta, where delta is the phase shift and real part of the complex material refractive index and beta is the absorption. calculate_padding_value_method: str Method to calculate the padded size of the input data. Accepted values are 'next_power_of_2', 'next_fast_length' and 'use_pad_x_y`. - pad_x_y (int, int) | None: + pad_x_y list | None: Padding values in pixels horizontally and vertically. Must be None, unless `calculate_padding_value_method` is 'use_pad_x_y'. calc_peak_gpu_mem: bool Parameter to support memory estimation in HTTomo. Irrelevant to the method itself and can be ignored by user. @@ -233,7 +233,7 @@ def _calculate_pad_size( calculate_padding_value_method: Literal[ "next_power_of_2", "next_fast_length", "use_pad_x_y" ], - pad_x_y: Optional[Tuple[int, int]], + pad_x_y: Optional[list], ) -> list: """Calculating the padding size @@ -252,10 +252,18 @@ def _calculate_pad_size( raise ValueError( 'calculate_padding_value_method must be "use_pad_x_y" when pad_x_y is specified' ) - elif calculate_padding_value_method == "use_pad_x_y" and pad_x_y is None: - raise ValueError( - 'pad_x_y must be provided when calculate_padding_value_method is "use_pad_x_y"' - ) + elif calculate_padding_value_method == "use_pad_x_y": + if pad_x_y is None: + raise ValueError( + 'pad_x_y must be provided when calculate_padding_value_method is "use_pad_x_y"' + ) + elif ( + not isinstance(pad_x_y, list) + or len(pad_x_y) != 2 + or not isinstance(pad_x_y[0], int) + or not isinstance(pad_x_y[1], int) + ): + raise ValueError("pad_x_y must be a list of two integers") if calculate_padding_value_method == "next_power_of_2": calculate_padded_dim = lambda _, size: _shift_bit_length(size + 1) @@ -292,7 +300,7 @@ def _pad_projections( calculate_padding_value_method: Literal[ "next_power_of_2", "next_fast_length", "use_pad_x_y" ], - pad_x_y: Optional[Tuple[int, int]], + pad_x_y: Optional[list], mem_stack: Optional[_DeviceMemStack], ) -> Tuple[cp.ndarray, Tuple[int, int]]: """ @@ -305,7 +313,7 @@ def _pad_projections( 3d projection data calculate_padding_value_method: str Method to calculate the padded size of the input data. Accepted values are 'next_power_of_2', 'next_fast_length' and 'use_pad_x_y`. - pad_x_y (int, int) | None: + pad_x_y: list | None: Padding values in pixels horizontally and vertically. Must be None, unless `calculate_padding_value_method` is 'use_pad_x_y'. diff --git a/tests/test_prep/test_phase.py b/tests/test_prep/test_phase.py index 30599294..128bcfde 100644 --- a/tests/test_prep/test_phase.py +++ b/tests/test_prep/test_phase.py @@ -21,6 +21,29 @@ def test_paganin_filter_1D_raises(ensure_clean_memory): _data = None #: free up GPU memory +@pytest.mark.parametrize( + "padding", + [ + ("something", None), + ("something", [122, 133]), + ("next_power_of_2", [0, 0]), + ("next_fast_length", [20, 20]), + ("use_pad_x_y", [20, "20"]), + ("use_pad_x_y", [10, 20, 30]), + ("use_pad_x_y", [20, 20, 30]), + ("use_pad_x_y", (20, 20)), + ("use_pad_x_y", None), + ], +) +def test_paganin_filter_invalid_padding_raises(padding, ensure_clean_memory): + _data = cp.ones(10) + padding_method, pad_x_y = padding + with pytest.raises(ValueError): + paganin_filter( + _data, calculate_padding_value_method=padding_method, pad_x_y=pad_x_y + ) + + def test_paganin_filter(data): # --- testing the Paganin filter on tomo_standard ---# filtered_data = paganin_filter(data).get() @@ -56,9 +79,9 @@ def test_paganin_filter_dist3(data): [ ("next_power_of_2", None, -6.725061, -6.367116), ("next_fast_length", None, -6.677313, -6.096187), - ("use_pad_x_y", (0, 0), -6.677313, -6.096187), - ("use_pad_x_y", (80, 80), -6.73193, -6.405338), - ("use_pad_x_y", (45, 75), -6.726483, -6.37466), + ("use_pad_x_y", [0, 0], -6.677313, -6.096187), + ("use_pad_x_y", [80, 80], -6.73193, -6.405338), + ("use_pad_x_y", [45, 75], -6.726483, -6.37466), ], ) def test_paganin_filter_padding_options(data, test_case): @@ -117,7 +140,7 @@ def test_paganin_filter_performance(ensure_clean_memory): @pytest.mark.parametrize("dim_x", [128, 140]) @pytest.mark.parametrize( "padding", - [("next_power_of_2", None), ("next_fast_length", None), ("use_pad_x_y", (45, 45))], + [("next_power_of_2", None), ("next_fast_length", None), ("use_pad_x_y", [45, 45])], ) def test_paganin_filter_calc_mem(slices, dim_x, padding, ensure_clean_memory): dim_y = 159 @@ -153,7 +176,7 @@ def test_paganin_filter_calc_mem(slices, dim_x, padding, ensure_clean_memory): [ ("next_power_of_2", None), ("next_fast_length", None), - ("use_pad_x_y", (145, 122)), + ("use_pad_x_y", [145, 122]), ], ) def test_paganin_filter_calc_mem_big(slices, dims, padding, ensure_clean_memory): diff --git a/zenodo-tests/test_prep/test_phase.py b/zenodo-tests/test_prep/test_phase.py index 79839cbd..55cf3f0e 100644 --- a/zenodo-tests/test_prep/test_phase.py +++ b/zenodo-tests/test_prep/test_phase.py @@ -13,7 +13,7 @@ [ ("next_power_of_2", None, -6.2188172, -10.92260456), ("next_fast_length", None, -6.867182, -10.92272), - ("use_pad_x_y", (80, 80), -6.32930, -10.92270), + ("use_pad_x_y", [80, 80], -6.32930, -10.92270), ], ) def test_paganin_filter_i12_dataset3(i12_dataset3, test_case, ensure_clean_memory):