From 0086e37ff907f2511b11be7609fc2348c85d0a41 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 23 Jan 2026 03:56:12 -0800 Subject: [PATCH 1/5] Remove cython backend impl of putmask --- dpnp/dpnp_algo/CMakeLists.txt | 5 +-- dpnp/dpnp_algo/dpnp_algo.pyx | 1 - dpnp/dpnp_algo/dpnp_algo_indexing.pxi | 54 --------------------------- 3 files changed, 1 insertion(+), 59 deletions(-) delete mode 100644 dpnp/dpnp_algo/dpnp_algo_indexing.pxi diff --git a/dpnp/dpnp_algo/CMakeLists.txt b/dpnp/dpnp_algo/CMakeLists.txt index c18a3dc8eac9..3f8196bc64f8 100644 --- a/dpnp/dpnp_algo/CMakeLists.txt +++ b/dpnp/dpnp_algo/CMakeLists.txt @@ -27,10 +27,7 @@ # THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** -set(dpnp_algo_pyx_deps - ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_sorting.pxi - ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_indexing.pxi -) +set(dpnp_algo_pyx_deps ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_sorting.pxi) build_dpnp_cython_ext_with_backend( dpnp_algo diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx index 53abcad11986..2946445f9a50 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pyx +++ b/dpnp/dpnp_algo/dpnp_algo.pyx @@ -48,7 +48,6 @@ __all__ = [ ] -include "dpnp_algo_indexing.pxi" include "dpnp_algo_sorting.pxi" diff --git a/dpnp/dpnp_algo/dpnp_algo_indexing.pxi b/dpnp/dpnp_algo/dpnp_algo_indexing.pxi deleted file mode 100644 index 54ed3e99fba8..000000000000 --- a/dpnp/dpnp_algo/dpnp_algo_indexing.pxi +++ /dev/null @@ -1,54 +0,0 @@ -# cython: language_level=3 -# cython: linetrace=True -# -*- coding: utf-8 -*- -# ***************************************************************************** -# Copyright (c) 2016, Intel Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# - Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# - Neither the name of the copyright holder nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -# ***************************************************************************** - -"""Module Backend (Indexing part) - -This module contains interface functions between C backend layer -and the rest of the library - -""" - -# NO IMPORTs here. All imports must be placed into main "dpnp_algo.pyx" file - -__all__ += [ - "dpnp_putmask", -] - -cpdef dpnp_putmask(utils.dpnp_descriptor arr, utils.dpnp_descriptor mask, utils.dpnp_descriptor values): - cdef int values_size = values.size - - mask_flatiter = mask.get_pyobj().flat - arr_flatiter = arr.get_pyobj().flat - values_flatiter = values.get_pyobj().flat - - for i in range(arr.size): - if mask_flatiter[i]: - arr_flatiter[i] = values_flatiter[i % values_size] From cc2c154513f0dab6243c005e322e9c2937cc80cc Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 23 Jan 2026 04:06:05 -0800 Subject: [PATCH 2/5] Add C-contig implementation of putmask --- .../extensions/indexing/CMakeLists.txt | 1 + .../extensions/indexing/indexing_py.cpp | 2 + dpnp/backend/extensions/indexing/putmask.cpp | 223 +++++++++++++++ dpnp/backend/extensions/indexing/putmask.hpp | 38 +++ .../extensions/indexing/putmask_kernel.hpp | 257 ++++++++++++++++++ dpnp/dpnp_iface_indexing.py | 91 +++++-- 6 files changed, 592 insertions(+), 20 deletions(-) create mode 100644 dpnp/backend/extensions/indexing/putmask.cpp create mode 100644 dpnp/backend/extensions/indexing/putmask.hpp create mode 100644 dpnp/backend/extensions/indexing/putmask_kernel.hpp diff --git a/dpnp/backend/extensions/indexing/CMakeLists.txt b/dpnp/backend/extensions/indexing/CMakeLists.txt index a6691f31f559..b407aa30eb07 100644 --- a/dpnp/backend/extensions/indexing/CMakeLists.txt +++ b/dpnp/backend/extensions/indexing/CMakeLists.txt @@ -30,6 +30,7 @@ set(python_module_name _indexing_impl) set(_module_src ${CMAKE_CURRENT_SOURCE_DIR}/choose.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/putmask.cpp ${CMAKE_CURRENT_SOURCE_DIR}/indexing_py.cpp ) diff --git a/dpnp/backend/extensions/indexing/indexing_py.cpp b/dpnp/backend/extensions/indexing/indexing_py.cpp index a2d0b2efd512..4c4e6808b2dd 100644 --- a/dpnp/backend/extensions/indexing/indexing_py.cpp +++ b/dpnp/backend/extensions/indexing/indexing_py.cpp @@ -33,8 +33,10 @@ #include #include "choose.hpp" +#include "putmask.hpp" PYBIND11_MODULE(_indexing_impl, m) { dpnp::extensions::indexing::init_choose(m); + dpnp::extensions::indexing::init_putmask(m); } diff --git a/dpnp/backend/extensions/indexing/putmask.cpp b/dpnp/backend/extensions/indexing/putmask.cpp new file mode 100644 index 000000000000..153da7dbf548 --- /dev/null +++ b/dpnp/backend/extensions/indexing/putmask.cpp @@ -0,0 +1,223 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include +#include + +#include + +#include "dpctl4pybind11.hpp" +#include +#include + +#include "putmask_kernel.hpp" + +#include "../elementwise_functions/simplify_iteration_space.hpp" + +// dpctl tensor headers +#include "utils/offset_utils.hpp" +#include "utils/output_validation.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +// utils extension headers +#include "ext/common.hpp" +#include "ext/validation_utils.hpp" + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpctl::tensor::usm_ndarray; + +using ext::common::dtype_from_typenum; +using ext::validation::array_names; +using ext::validation::check_has_dtype; +using ext::validation::check_no_overlap; +using ext::validation::check_num_dims; +using ext::validation::check_queue; +using ext::validation::check_same_dtype; +using ext::validation::check_same_size; +using ext::validation::check_writable; + +namespace dpnp::extensions::indexing +{ +using ext::common::init_dispatch_vector; + +typedef sycl::event (*putmask_contig_fn_ptr_t)( + sycl::queue &, + const std::size_t, // nelems + char *, // dst + const char *, // mask + const char *, // values + const std::size_t, // values_size + const std::vector &); + +static putmask_contig_fn_ptr_t putmask_contig_dispatch_vector[td_ns::num_types]; + +std::pair + py_putmask(const usm_ndarray &dst, + const usm_ndarray &mask, + const usm_ndarray &values, + sycl::queue &exec_q, + const std::vector &depends = {}) +{ + array_names names = {{&dst, "dst"}, {&mask, "mask"}, {&values, "values"}}; + + check_same_dtype(&dst, &values, names); + check_has_dtype(&mask, td_ns::typenum_t::BOOL, names); + + check_same_size({&dst, &mask}, names); + const int nd = dst.get_ndim(); + check_num_dims(&mask, nd, names); + + check_queue({&dst, &mask, &values}, names, exec_q); + check_no_overlap({&mask, &values}, {&dst}, names); + check_writable({&dst}, names); + + // values must be 1D + check_num_dims(&values, 1, names); + + auto types = td_ns::usm_ndarray_types(); + // dst_typeid == values_typeid (check_same_dtype(&dst, &values, names)) + int dst_values_typeid = types.typenum_to_lookup_id(dst.get_typenum()); + + const py::ssize_t *dst_shape = dst.get_shape_raw(); + const py::ssize_t *mask_shape = mask.get_shape_raw(); + bool shapes_equal(true); + std::size_t nelems(1); + + for (int i = 0; i < std::max(nd, 1); ++i) { + const py::ssize_t d = (nd == 0 ? 1 : dst_shape[i]); + const py::ssize_t m = (nd == 0 ? 1 : mask_shape[i]); + nelems *= static_cast(d); + shapes_equal = shapes_equal && (d == m); + } + if (!shapes_equal) { + throw py::value_error("`mask` and `dst` shapes must match"); + } + + // if nelems is zero, return + if (nelems == 0) { + return {sycl::event(), sycl::event()}; + } + + dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst, nelems); + + char *dst_p = dst.get_data(); + const char *mask_p = mask.get_data(); + const char *values_p = values.get_data(); + const std::size_t values_size = values.get_size(); + + // handle C contiguous inputs + const bool is_dst_c_contig = dst.is_c_contiguous(); + const bool is_mask_c_contig = mask.is_c_contiguous(); + const bool is_values_c_contig = values.is_c_contiguous(); + + const bool all_c_contig = + (is_dst_c_contig && is_mask_c_contig && is_values_c_contig); + + if (all_c_contig) { + auto contig_fn = putmask_contig_dispatch_vector[dst_values_typeid]; + + if (contig_fn == nullptr) { + py::dtype dst_values_dtype_py = + dtype_from_typenum(dst_values_typeid); + throw std::runtime_error( + "Contiguous implementation is missing for " + + std::string(py::str(dst_values_dtype_py)) + "data type"); + } + + auto comp_ev = contig_fn(exec_q, nelems, dst_p, mask_p, values_p, + values_size, depends); + sycl::event ht_ev = dpctl::utils::keep_args_alive( + exec_q, {dst, mask, values}, {comp_ev}); + + return std::make_pair(ht_ev, comp_ev); + } + + throw py::value_error("Stride implementation is not implemented yet"); +} + +/** + * @brief A factory to define pairs of supported types for which + * putmask function is available. + * + * @tparam T Type of input vector `dst` and `values` and of result vector `dst`. + */ +template +struct PutMaskOutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +struct PutMaskContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + return nullptr; + } + else { + return kernels::putmask_contig_impl; + } + } +}; + +static void populate_putmask_dispatch_vectors() +{ + init_dispatch_vector( + putmask_contig_dispatch_vector); +} + +void init_putmask(py::module_ m) +{ + populate_putmask_dispatch_vectors(); + + m.def("_putmask", &py_putmask, "", py::arg("dst"), py::arg("mask"), + py::arg("values"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); +} + +} // namespace dpnp::extensions::indexing diff --git a/dpnp/backend/extensions/indexing/putmask.hpp b/dpnp/backend/extensions/indexing/putmask.hpp new file mode 100644 index 000000000000..2a66d55aa285 --- /dev/null +++ b/dpnp/backend/extensions/indexing/putmask.hpp @@ -0,0 +1,38 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +namespace py = pybind11; + +namespace dpnp::extensions::indexing +{ +void init_putmask(py::module_ m); +} // namespace dpnp::extensions::indexing diff --git a/dpnp/backend/extensions/indexing/putmask_kernel.hpp b/dpnp/backend/extensions/indexing/putmask_kernel.hpp new file mode 100644 index 000000000000..ba73edd7580f --- /dev/null +++ b/dpnp/backend/extensions/indexing/putmask_kernel.hpp @@ -0,0 +1,257 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +//***************************************************************************** +// Copyright (c) 2025, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include +// dpctl tensor headers +#include "kernels/alignment.hpp" +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/sycl_complex.hpp" +#include "utils/offset_utils.hpp" +#include "utils/sycl_utils.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::indexing::kernels +{ +template +struct PutMaskContigFunctor +{ +private: + T *dst_ = nullptr; + const std::uint8_t *mask_u8_ = nullptr; + const T *values_ = nullptr; + std::size_t nelems_ = 0; + std::size_t val_size_ = 0; + +public: + PutMaskContigFunctor(T *dst, + const bool *mask, + const T *values, + std::size_t nelems, + std::size_t val_size) + : dst_(dst), mask_u8_(reinterpret_cast(mask)), + values_(values), nelems_(nelems), val_size_(val_size) + { + } + + void operator()(sycl::nd_item<1> ndit) const + { + if (val_size_ == 0 || nelems_ == 0) { + return; + } + + constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz; + /* Each work-item processes vec_sz elements, contiguous in memory */ + /* NOTE: work-group size must be divisible by sub-group size */ + + using dpctl::tensor::type_utils::is_complex_v; + if constexpr (enable_sg_loadstore && !is_complex_v) { + auto sg = ndit.get_sub_group(); + const std::uint32_t sgSize = sg.get_max_local_range()[0]; + const std::size_t lane_id = sg.get_local_id()[0]; + + const std::size_t base = + elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) + + sg.get_group_id()[0] * sgSize); + + const bool values_no_repeat = (val_size_ >= nelems_); + + if (base + elems_per_wi * sgSize <= nelems_) { + using dpctl::tensor::sycl_utils::sub_group_load; + using dpctl::tensor::sycl_utils::sub_group_store; + +#pragma unroll + for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) { + const std::size_t offset = base + it * sgSize; + + auto dst_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&dst_[offset]); + auto mask_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&mask_u8_[offset]); + + const sycl::vec dst_vec = + sub_group_load(sg, dst_multi_ptr); + const sycl::vec mask_vec = + sub_group_load(sg, mask_multi_ptr); + + sycl::vec val_vec; + + if (values_no_repeat) { + auto values_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&values_[offset]); + + val_vec = sub_group_load(sg, values_multi_ptr); + } + else { + const std::size_t idx = offset + lane_id; +#pragma unroll + for (std::uint8_t k = 0; k < vec_sz; ++k) { + const std::size_t g = + idx + static_cast(k) * sgSize; + val_vec[k] = values_[g % val_size_]; + } + } + + sycl::vec out_vec; +#pragma unroll + for (std::uint8_t vec_id = 0; vec_id < vec_sz; ++vec_id) { + out_vec[vec_id] = + (mask_vec[vec_id] != static_cast(0)) + ? val_vec[vec_id] + : dst_vec[vec_id]; + } + + sub_group_store(sg, out_vec, dst_multi_ptr); + } + } + else { + const std::size_t lane_id = sg.get_local_id()[0]; + for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) { + if (mask_u8_[k]) { + const std::size_t v = + values_no_repeat ? k : (k % val_size_); + dst_[k] = values_[v]; + } + } + } + } + else { + const std::size_t gid = ndit.get_global_linear_id(); + const std::size_t gws = ndit.get_global_range(0); + + const bool values_no_repeat = (val_size_ >= nelems_); + for (std::size_t offset = gid; offset < nelems_; offset += gws) { + if (mask_u8_[offset]) { + const std::size_t v = + values_no_repeat ? offset : (offset % val_size_); + dst_[offset] = values_[v]; + } + } + } + } +}; + +template +sycl::event putmask_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + char *dst_cp, + const char *mask_cp, + const char *values_cp, + std::size_t values_size, + const std::vector &depends = {}) +{ + T *dst_tp = reinterpret_cast(dst_cp); + const bool *mask_tp = reinterpret_cast(mask_cp); + const T *values_tp = reinterpret_cast(values_cp); + + constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz; + // const std::size_t n_work_items_needed = (nelems + elems_per_wi - 1) / + // elems_per_wi; + const std::size_t n_work_items_needed = nelems / elems_per_wi; + const std::size_t empirical_threshold = std::size_t(1) << 21; + const std::size_t lws = (n_work_items_needed <= empirical_threshold) + ? std::size_t(128) + : std::size_t(256); + + const std::size_t n_groups = + ((nelems + lws * elems_per_wi - 1) / (lws * elems_per_wi)); + const auto gws_range = sycl::range<1>(n_groups * lws); + const auto lws_range = sycl::range<1>(lws); + + using dpctl::tensor::kernels::alignment_utils::is_aligned; + using dpctl::tensor::kernels::alignment_utils::required_alignment; + + const bool aligned = is_aligned(dst_tp) && + is_aligned(mask_tp) && + is_aligned(values_tp); + + sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + if (aligned) { + constexpr bool enable_sg = true; + using PutMaskFunc = + PutMaskContigFunctor; + + cgh.parallel_for( + sycl::nd_range<1>(gws_range, lws_range), + PutMaskFunc(dst_tp, mask_tp, values_tp, nelems, values_size)); + } + else { + constexpr bool enable_sg = false; + using PutMaskFunc = + PutMaskContigFunctor; + + cgh.parallel_for( + sycl::nd_range<1>(gws_range, lws_range), + PutMaskFunc(dst_tp, mask_tp, values_tp, nelems, values_size)); + } + }); + + return comp_ev; +} + +} // namespace dpnp::extensions::indexing::kernels diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py index 6e7ab778299b..31f9a680a51f 100644 --- a/dpnp/dpnp_iface_indexing.py +++ b/dpnp/dpnp_iface_indexing.py @@ -40,6 +40,7 @@ """ # pylint: disable=protected-access +# pylint: disable=c-extension-no-member import operator from collections.abc import Iterable @@ -57,12 +58,8 @@ # pylint: disable=no-name-in-module import dpnp.backend.extensions.indexing._indexing_impl as indexing_ext -# pylint: disable=no-name-in-module -from .dpnp_algo import ( - dpnp_putmask, -) from .dpnp_array import dpnp_array -from .dpnp_utils import call_origin, get_usm_allocations +from .dpnp_utils import get_usm_allocations def _ravel_multi_index_checks(multi_index, dims, order): @@ -1804,30 +1801,84 @@ def put_along_axis(a, ind, values, axis, mode="wrap"): dpt.put_along_axis(usm_a, usm_ind, usm_vals, axis=axis, mode=mode) -def putmask(x1, mask, values): +def putmask(a, /, mask, values): """ Changes elements of an array based on conditional and input values. For full documentation refer to :obj:`numpy.putmask`. - Limitations - ----------- - Input arrays ``arr``, ``mask`` and ``values`` are supported - as :obj:`dpnp.ndarray`. + Parameters + ---------- + a : {dpnp.ndarray, usm_ndarray} + Target array. + mask : {dpnp.ndarray, usm_ndarray} + Boolean mask array. It has to be the same shape as `a`. + values : {scalar, array_like} + Values to put into `a` where `mask` is True. + If `values` is smaller than `a`, then it will be repeated. + + See Also + -------- + :obj:`dpnp.place` : Change elements of an array based on conditional and + input values. + :obj:`dpnp.put` : Replaces specified elements of an array with given values. + :obj:`dpnp.take` : Take elements from an array along an axis. + :obj:`dpnp.copyto` : Copies values from one array to another, broadcasting + as necessary. + + Examples + -------- + >>> import dpnp as np + >>> x = np.arange(6).reshape(2, 3) + >>> np.putmask(x, x>2, x**2) + >>> x + array([[ 0, 1, 2], + [ 9, 16, 25]]) + + If `values` is smaller than `a` it is repeated: + + >>> x = np.arange(5) + >>> np.putmask(x, x>1, np.array([-33, -44])) + >>> x + array([ 0, 1, -33, -44, -33]) """ - x1_desc = dpnp.get_dpnp_descriptor( - x1, copy_when_strides=False, copy_when_nondefault_queue=False - ) - mask_desc = dpnp.get_dpnp_descriptor(mask, copy_when_nondefault_queue=False) - values_desc = dpnp.get_dpnp_descriptor( - values, copy_when_nondefault_queue=False - ) - if x1_desc and mask_desc and values_desc: - return dpnp_putmask(x1_desc, mask_desc, values_desc) + dpnp.check_supported_arrays_type(a, mask) + dpnp.check_supported_arrays_type(values, scalar_type=True, all_scalars=True) + + if not a.shape == mask.shape: + raise ValueError("mask and data must be the same size") + + mask = dpnp.astype(mask, dpnp.bool, copy=False) + + if dpnp.isscalar(values): + a[mask] = values + + elif not dpnp.can_cast(values.dtype, a.dtype): + raise TypeError( + "Cannot cast array data from" + f" {values.dtype} to {a.dtype} according to the rule 'safe'" + ) + + elif a.shape == values.shape: + a[mask] = values[mask] + + else: + values = values.ravel(order="C") - return call_origin(numpy.putmask, x1, mask, values, dpnp_inplace=True) + if a.dtype != values.dtype: + values = dpnp.astype(values, a.dtype, casting="safe", copy=False) + + _, exec_q = get_usm_allocations([a, mask, values]) + + _manager = dpu.SequentialOrderManager[exec_q] + dep_evs = _manager.submitted_events + + h_ev, putmask_ev = indexing_ext._putmask( + a.get_array(), mask.get_array(), values.get_array(), exec_q, dep_evs + ) + _manager.add_event_pair(h_ev, putmask_ev) def ravel_multi_index(multi_index, dims, mode="raise", order="C"): From 367bdb4cc683ea1fd07dde0f4416ed211739f186 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 23 Jan 2026 04:20:12 -0800 Subject: [PATCH 3/5] Unskip TestPutmaskDifferentDtypes in cupy tests --- dpnp/tests/third_party/cupy/indexing_tests/test_insert.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dpnp/tests/third_party/cupy/indexing_tests/test_insert.py b/dpnp/tests/third_party/cupy/indexing_tests/test_insert.py index 7399343e7e57..29d5238b0315 100644 --- a/dpnp/tests/third_party/cupy/indexing_tests/test_insert.py +++ b/dpnp/tests/third_party/cupy/indexing_tests/test_insert.py @@ -231,7 +231,6 @@ def test_putmask_int_mask_scalar_values(self, xp): class TestPutmaskDifferentDtypes(unittest.TestCase): - @pytest.mark.skip("putmask() is not fully supported") @testing.for_all_dtypes_combination(names=["a_dtype", "val_dtype"]) def test_putmask_differnt_dtypes_raises(self, a_dtype, val_dtype): shape = (2, 3) From 764a9e58729028ff4a06597f4910e67e377ac589 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 23 Jan 2026 05:38:48 -0800 Subject: [PATCH 4/5] Add TestPutmask --- dpnp/tests/test_indexing.py | 268 +++++++++++++++++++++--------------- 1 file changed, 155 insertions(+), 113 deletions(-) diff --git a/dpnp/tests/test_indexing.py b/dpnp/tests/test_indexing.py index 9a55efe138b7..b01ba9abfffb 100644 --- a/dpnp/tests/test_indexing.py +++ b/dpnp/tests/test_indexing.py @@ -17,6 +17,12 @@ import dpnp from dpnp.dpnp_array import dpnp_array +from dpnp.tests.helper import ( + assert_array_equal, + assert_dtype_allclose, + generate_random_numpy_array, + get_all_dtypes, +) from .helper import ( get_abs_array, @@ -954,126 +960,162 @@ def test_indices(dimension, dtype, sparse): assert_array_equal(Xnp, X) -@pytest.mark.parametrize( - "mask", - [ - [[True, False], [False, True]], - [[False, True], [True, False]], - [[False, False], [True, True]], - ], - ids=[ - "[[True, False], [False, True]]", - "[[False, True], [True, False]]", - "[[False, False], [True, True]]", - ], -) -@pytest.mark.parametrize( - "arr", - [[[0, 0], [0, 0]], [[1, 2], [1, 2]], [[1, 2], [3, 4]]], - ids=["[[0, 0], [0, 0]]", "[[1, 2], [1, 2]]", "[[1, 2], [3, 4]]"], -) -def test_putmask1(arr, mask): - a = numpy.array(arr) - ia = dpnp.array(a) - m = numpy.array(mask) - im = dpnp.array(m) - v = numpy.array([100, 200]) - iv = dpnp.array(v) - numpy.putmask(a, m, v) - dpnp.putmask(ia, im, iv) - assert_array_equal(a, ia) +class TestPutmask: + @pytest.mark.parametrize( + "shape", + [ + (1,), + (5,), + (4, 3), + (3, 3), + (5, 3), + (3, 4, 5), + ], + ) + @pytest.mark.parametrize( + "dt", get_all_dtypes(no_bool=True, no_float16=False) + ) + @pytest.mark.parametrize("order", ["C", "F"]) + def test_putmask_scalar_values(self, shape, dt, order): + a_np = generate_random_numpy_array(shape, order=order, dtype=dt) + mask_np = a_np > 0 + val = numpy.array(7, dtype=dt).item() + a_dp = dpnp.array(a_np) + mask_dp = dpnp.array(mask_np) -@pytest.mark.parametrize( - "vals", - [ - [100, 200], - [100, 200, 300, 400, 500, 600], - [100, 200, 300, 400, 500, 600, 800, 900], - ], - ids=[ - "[100, 200]", - "[100, 200, 300, 400, 500, 600]", - "[100, 200, 300, 400, 500, 600, 800, 900]", - ], -) -@pytest.mark.parametrize( - "mask", - [ + dpnp.putmask(a_dp, mask_dp, val) + numpy.putmask(a_np, mask_np, val) + + assert_dtype_allclose(a_dp, a_np) + + @pytest.mark.parametrize( + "shape", [ - [[True, False], [False, True]], - [[False, True], [True, False]], - [[False, False], [True, True]], - ] - ], - ids=[ - "[[[True, False], [False, True]], [[False, True], [True, False]], [[False, False], [True, True]]]" - ], -) -@pytest.mark.parametrize( - "arr", - [[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]], - ids=["[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]"], -) -def test_putmask2(arr, mask, vals): - a = numpy.array(arr) - ia = dpnp.array(a) - m = numpy.array(mask) - im = dpnp.array(m) - v = numpy.array(vals) - iv = dpnp.array(v) - numpy.putmask(a, m, v) - dpnp.putmask(ia, im, iv) - assert_array_equal(a, ia) + (1,), + (5,), + (4, 3), + (3, 3), + (5, 3), + (3, 4, 5), + ], + ) + @pytest.mark.parametrize( + "dt", get_all_dtypes(no_bool=True, no_float16=False) + ) + @pytest.mark.parametrize("order", ["C", "F"]) + def test_putmask_same_shape(self, shape, dt, order): + a_np = generate_random_numpy_array(shape, dtype=dt, order=order) + mask_np = a_np > 0 + val_np = generate_random_numpy_array(shape, dtype=dt, order=order) + a_dp = dpnp.array(a_np, order=order) + mask_dp = dpnp.array(mask_np, order=order) + val_dp = dpnp.array(val_np, order=order) -@pytest.mark.parametrize( - "vals", - [ - [100, 200], - [100, 200, 300, 400, 500, 600], - [100, 200, 300, 400, 500, 600, 800, 900], - ], - ids=[ - "[100, 200]", - "[100, 200, 300, 400, 500, 600]", - "[100, 200, 300, 400, 500, 600, 800, 900]", - ], -) -@pytest.mark.parametrize( - "mask", - [ + dpnp.putmask(a_dp, mask_dp, val_dp) + numpy.putmask(a_np, mask_np, val_np) + + assert_dtype_allclose(a_dp, a_np) + + @pytest.mark.parametrize( + "a_shape,val_shape", [ - [[[False, False], [True, True]], [[True, True], [True, True]]], - [[[False, False], [True, True]], [[False, False], [False, False]]], - ] - ], - ids=[ - "[[[[False, False], [True, True]], [[True, True], [True, True]]], [[[False, False], [True, True]], [[False, False], [False, False]]]]" - ], -) -@pytest.mark.parametrize( - "arr", - [ + ((6,), (3,)), + ((6,), (7,)), + ((2, 3), (5,)), + ((6, 3), (5, 4)), + ((4, 3, 5), (8,)), + ((2, 4, 3), (5, 5, 2)), + ], + ) + @pytest.mark.parametrize( + "dt", get_all_dtypes(no_bool=True, no_float16=False) + ) + @pytest.mark.parametrize("order", ["C"]) # need to add "F" + def test_putmask_kernel(self, a_shape, val_shape, dt, order): + a_np = generate_random_numpy_array(a_shape, dtype=dt, order=order) + mask_np = a_np > 0 + val_np = generate_random_numpy_array(val_shape, dtype=dt, order=order) + + a_dp = dpnp.array(a_np, order=order) + mask_dp = dpnp.array(mask_np, order=order) + val_dp = dpnp.array(val_np, order=order) + + dpnp.putmask(a_dp, mask_dp, val_dp) + numpy.putmask(a_np, mask_np, val_np) + + assert_dtype_allclose(a_dp, a_np) + + # test_putmask_strided + + def test_putmask_mask_cast_to_bool(self): + a_np = generate_random_numpy_array((5, 5), dtype="f4") + mask_np = generate_random_numpy_array((5, 5), dtype="int64") + val_np = generate_random_numpy_array((7,), dtype="f4") + + a_dp = dpnp.array(a_np) + mask_dp = dpnp.array(mask_np) + val_dp = dpnp.array(val_np) + + dpnp.putmask(a_dp, mask_dp, val_dp) + numpy.putmask(a_np, mask_np, val_np) + + assert_dtype_allclose(a_dp, a_np) + + @pytest.mark.parametrize( + "dt", get_all_dtypes(no_bool=True, no_float16=False) + ) + @pytest.mark.parametrize("order", ["C", "F"]) + @pytest.mark.parametrize( + "shape", [ - [[[1, 2], [3, 4]], [[1, 2], [2, 1]]], - [[[1, 3], [3, 1]], [[0, 1], [1, 3]]], - ] - ], - ids=[ - "[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]" - ], -) -def test_putmask3(arr, mask, vals): - a = numpy.array(arr) - ia = dpnp.array(a) - m = numpy.array(mask) - im = dpnp.array(m) - v = numpy.array(vals) - iv = dpnp.array(v) - numpy.putmask(a, m, v) - dpnp.putmask(ia, im, iv) - assert_array_equal(a, ia) + (0,), + (0, 3), + (2, 0), + (0, 2, 3), + ], + ) + @pytest.mark.parametrize( + "values_case", ["scalar", "same_shape", "diff_shape"] + ) + def test_putmask_empty(self, dt, order, shape, values_case): + a_np = numpy.empty(shape, dtype=dt, order=order) + mask_np = numpy.empty(shape, dtype=numpy.bool_, order=order) + + if values_case == "scalar": + val_np = numpy.asarray(1, dtype=dt).item() + val_dp = val_np + elif values_case == "same_shape": + val_np = numpy.empty(shape, dtype=dt, order=order) + val_dp = dpnp.array(val_np, order=order) + else: + # different shape + val_np = numpy.array([1, 2], dtype=dt) + val_dp = dpnp.array(val_np) + + a_dp = dpnp.array(a_np, order=order) + mask_dp = dpnp.array(mask_np, order=order) + + dpnp.putmask(a_dp, mask_dp, val_dp) + numpy.putmask(a_np, mask_np, val_np) + + assert_dtype_allclose(a_dp, a_np) + + def test_putmask_errors(self): + # shape mask mismatch + a = dpnp.arange(6).reshape(2, 3) + mask_bad = dpnp.ones((3, 2), dtype=dpnp.bool) + assert_raises(ValueError, dpnp.putmask, a, mask_bad, 1) + + # safe-cast error + a = dpnp.arange(10, dtype=dpnp.int32) + mask = a > 3 + val_f = dpnp.array([1.5, 2.5], dtype="f4") + assert_raises(TypeError, dpnp.putmask, a, mask, val_f) + + # values as list + assert_raises(TypeError, dpnp.putmask, a, mask, [1, 2, 3]) @pytest.mark.parametrize("m", [None, 0, 1, 2, 3, 4]) From 13a7adb4b0d08db87ba4a923b9244354f1478d05 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 23 Jan 2026 07:20:25 -0800 Subject: [PATCH 5/5] Add sycl queue validation for mask and values --- dpnp/dpnp_iface_indexing.py | 13 +++++++++++-- dpnp/tests/test_indexing.py | 9 +++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py index 31f9a680a51f..b252d252245c 100644 --- a/dpnp/dpnp_iface_indexing.py +++ b/dpnp/dpnp_iface_indexing.py @@ -1870,8 +1870,17 @@ def putmask(a, /, mask, values): if a.dtype != values.dtype: values = dpnp.astype(values, a.dtype, casting="safe", copy=False) - _, exec_q = get_usm_allocations([a, mask, values]) - + exec_q = a.sycl_queue + if ( + dpu.get_execution_queue( + [exec_q, mask.sycl_queue, values.sycl_queue] + ) + is None + ): + raise ValueError( + "`mask` and `values` must be allocated on " + "the same SYCL queue as `a`" + ) _manager = dpu.SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events diff --git a/dpnp/tests/test_indexing.py b/dpnp/tests/test_indexing.py index b01ba9abfffb..822bd03d7a31 100644 --- a/dpnp/tests/test_indexing.py +++ b/dpnp/tests/test_indexing.py @@ -1117,6 +1117,15 @@ def test_putmask_errors(self): # values as list assert_raises(TypeError, dpnp.putmask, a, mask, [1, 2, 3]) + # values has a different SYCL queue + q1 = dpctl.SyclQueue() + q2 = dpctl.SyclQueue() + a = dpnp.arange(10, sycl_queue=q1) + mask = a > 3 + val = dpnp.arange(5, sycl_queue=q2) + if q1 != q2: + assert_raises(ValueError, dpnp.putmask, a, mask, val) + @pytest.mark.parametrize("m", [None, 0, 1, 2, 3, 4]) @pytest.mark.parametrize("k", [-3, -2, -1, 0, 1, 2, 3])