diff --git a/python/infinicore/__init__.py b/python/infinicore/__init__.py index c6b01d5aa..1b5d734fd 100644 --- a/python/infinicore/__init__.py +++ b/python/infinicore/__init__.py @@ -52,6 +52,7 @@ from infinicore.ops.paged_attention_prefill import paged_attention_prefill from infinicore.ops.paged_caching import paged_caching from infinicore.ops.rearrange import rearrange +from infinicore.ops.softmax import softmax from infinicore.ops.squeeze import squeeze from infinicore.ops.unsqueeze import unsqueeze from infinicore.tensor import ( @@ -121,6 +122,7 @@ "squeeze", "unsqueeze", "rearrange", + "softmax", "empty", "empty_like", "from_blob", diff --git a/python/infinicore/ops/softmax.py b/python/infinicore/ops/softmax.py new file mode 100644 index 000000000..50333043d --- /dev/null +++ b/python/infinicore/ops/softmax.py @@ -0,0 +1,38 @@ +import infinicore +from infinicore.tensor import Tensor + + +def softmax(input: Tensor, dim: int, dtype=None, *, out=None) -> Tensor: + r"""Apply the softmax function over a given dimension.""" + + if dim is None: + raise TypeError("softmax() missing required argument: 'dim'") + + if not infinicore.use_ntops or input.device.type not in ("cuda", "musa"): + raise RuntimeError("softmax is currently only available with ntops on CUDA/MUSA devices") + + if out is None: + target_dtype = dtype if dtype is not None else input.dtype + return infinicore.ntops.torch.softmax(input, dim, dtype=target_dtype) + + if not isinstance(out, Tensor): + raise TypeError(f"out must be a Tensor, got {type(out).__name__}") + + if out.shape != input.shape: + raise ValueError("out tensor must have the same shape as input") + + if out.device != input.device: + raise ValueError("out tensor must be on the same device as input") + + target_dtype = dtype if dtype is not None else out.dtype + + if dtype is not None and out.dtype != target_dtype: + raise TypeError("out tensor dtype must match the dtype argument") + + # Reuse the cached ntops kernel to write directly into the provided output tensor. + from infinicore.ntops.torch.utils import _cached_make + + kernel = _cached_make(infinicore.ntops.kernels.softmax.premake, input.ndim, dim) + kernel(input, out) + + return out diff --git a/test/infinicore/ops/softmax.py b/test/infinicore/ops/softmax.py new file mode 100644 index 000000000..38bbae1e7 --- /dev/null +++ b/test/infinicore/ops/softmax.py @@ -0,0 +1,86 @@ +import sys +import os + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +import torch +import infinicore +from framework import ( + BaseOperatorTest, + TensorSpec, + TestCase, + GenericTestRunner, + is_broadcast, +) + +# Test cases format: (in_shape, in_strides_or_None, dim_or_None) + +_TEST_CASES_DATA = [ + ((4, 10), None, -1), + ((2, 5, 8), (40, 8, 1), 1), + ((8, 20), None, 1), +] + +_TOLERANCE_MAP = { + infinicore.float16: {"atol": 1e-3, "rtol": 1e-2}, + infinicore.float32: {"atol": 1e-5, "rtol": 1e-4}, + infinicore.bfloat16: {"atol": 1e-2, "rtol": 5e-2}, +} + +_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32] + + +def parse_test_cases(): + """soft max(input, dim=None, dtype=None)""" + test_cases = [] + + for data in _TEST_CASES_DATA: + shape = data[0] + in_strides = data[1] if len(data) > 1 else None + dim = data[2] if len(data) > 2 else -1 + + for dtype in _TENSOR_DTYPES: + tolerance = _TOLERANCE_MAP.get(dtype, {"atol": 1e-5, "rtol": 1e-4}) + input_spec = TensorSpec.from_tensor(shape, in_strides, dtype) + + kwargs = {"dim": dim} + + test_cases.append( + TestCase( + inputs=[input_spec], + kwargs=kwargs, + output_spec=None, + comparison_target=None, + tolerance=tolerance, + description=f"softmax - OUT_OF_PLACE", + ) + ) + + return test_cases + + +class OpTest(BaseOperatorTest): + """softmax operator test with simplified implementation""" + + def __init__(self): + super().__init__("softmax") + + def get_test_cases(self): + return parse_test_cases() + + def torch_operator(self, *args, **kwargs): + return torch.nn.functional.softmax(*args, **kwargs) + + # def infinicore_operator(self, *args, **kwargs): + # """InfiniCore implementation (operator not yet available).""" + # return infinicore.nn.functional.softmax(*args, **kwargs) + + +def main(): + """Main entry point""" + runner = GenericTestRunner(OpTest) + runner.run_and_exit() + + +if __name__ == "__main__": + main() diff --git a/testop.py b/testop.py new file mode 100644 index 000000000..e46d4fa61 --- /dev/null +++ b/testop.py @@ -0,0 +1,11 @@ +import infinicore as ic + +device = ic.device("cuda:0") + +q = ic.empty((1, 1, 4), dtype=ic.float16, device=device) + +print(q.info) + +q = ic.softmax(q, dim=-1) + +print(q) diff --git a/third_party/spdlog b/third_party/spdlog index f1d748e5e..3f03542d2 160000 --- a/third_party/spdlog +++ b/third_party/spdlog @@ -1 +1 @@ -Subproject commit f1d748e5e3edfa4b1778edea003bac94781bc7b7 +Subproject commit 3f03542d2eb4952e3b279d9cad9098d370b7be57