Skip to content

check_implemented_domains fails #730

@isuruf

Description

@isuruf

Following kernel fails with

loopy.diagnostic.LoopyError: sanity check failed--implemented and desired domain for instruction 'write_result' do not match

implemented: [ntgt_boxes, itgt_end, itgt_start] -> { [itgt_box, itgt, iknl] : 0 <= itgt_box < ntgt_boxes and itgt_start <= itgt < itgt_end }

desired:[ntgt_boxes, itgt_end, itgt_start] -> { [itgt_box, itgt, iknl] : 0 <= itgt_box < ntgt_boxes and itgt_start <= itgt < itgt_end and 0 <= iknl <= 1 }

sample point in implemented but not desired: itgt=0, itgt_box=0, ntgt_boxes=1, itgt_end=1, itgt_start=0, iknl=2
gist of constraints in implemented but not desired: [ntgt_boxes, itgt_end, itgt_start] -> { [itgt_box, itgt, iknl] : 0 <= iknl <= 1 }

loopy kernel:

Details
import loopy as lp
import numpy as np
from pymbolic.primitives import *
import immutables


e2p_from_csr_knl = lp.make_kernel(
    [
    "[ntgt_boxes] -> { [itgt_box] : 0 <= itgt_box < ntgt_boxes }",
    "[itgt_end, itgt_start] -> { [itgt] : itgt_start <= itgt < itgt_end }",
    "[isrc_box_end, isrc_box_start] -> { [isrc_box] : isrc_box_start <= isrc_box < isrc_box_end }",
    "{ [idim, idim_0] : 0 <= idim <= 1 and 0 <= idim_0 <= 1 }",
    "{ [icoeff_0] : 0 <= icoeff_0 <= 2 }",
    "{ [iknl, iknl_0] : 0 <= iknl <= 1 and 0 <= iknl_0 <= 1 }",
    "{ [e2p_idim] : 0 <= e2p_idim <= 1 }",
    ],
    '''
    for itgt_box
    tgt_ibox = target_boxes[itgt_box] {id=insn}
    itgt_start = box_target_starts[tgt_ibox] {id=insn_0, dep=insn}
    itgt_end = itgt_start + box_target_counts_nonchild[tgt_ibox] {id=insn_1, dep=insn:insn_0}
    for itgt
    tgt[idim] = targets[idim, itgt] {id=insn_2}
    isrc_box_start = source_box_starts[itgt_box] {id=insn_3}
    isrc_box_end = source_box_starts[itgt_box + 1] {id=insn_4}
    result_temp[iknl_0] = 0 {id=init_result}
    for isrc_box
    src_ibox = source_box_lists[isrc_box] {id=insn_5}
    coeffs[icoeff_0] = src_expansions[src_ibox + (-1)*src_base_ibox, icoeff_0] {id=fetch_coeffs, dep=insn_5}
    center[idim_0] = centers[idim_0, src_ibox] {id=fetch_center, dep=insn_5}
    ... nop {id=e2p__start, dep=fetch_coeffs:insn_2:init_result:fetch_center}
    e2p_kernel_scaling = ((-1) / 2)*3.141592653589793**(-1) {id=e2p_insn, dep=e2p__start}
    e2p_b[e2p_idim] = tgt[e2p_idim] + (-1)*center[e2p_idim] {id=e2p_insn_0, dep=e2p__start}
    e2p_expr_4 = 1 / rscale {id=e2p_insn_1, dep=e2p__start}
    e2p_expr_0 = e2p_b[0]*e2p_b[0] + e2p_b[1]*e2p_b[1] {id=e2p_insn_2, dep=e2p_insn_0:e2p__start}
    e2p_expr_1 = rscale*(1 / e2p_expr_0) {id=e2p_insn_3, dep=e2p__start:e2p_insn_2}
    e2p_expr_2 = e2p_b[1]*coeffs[2] {id=e2p_insn_4, dep=e2p_insn_0:e2p__start}
    e2p_expr_3 = e2p_b[0]*e2p_expr_1 {id=e2p_insn_5, dep=e2p_insn_0:e2p_insn_3:e2p__start}
    e2p_temp_2 = e2p_b[0]*e2p_expr_4 {id=e2p_insn_6, dep=e2p_insn_0:e2p_insn_1:e2p__start}
    e2p_temp_0 = e2p_b[1]*e2p_expr_4 {id=e2p_insn_7, dep=e2p_insn_0:e2p_insn_1:e2p__start}
    e2p_cse_exprvar = e2p_temp_0*e2p_temp_0 + e2p_temp_2*e2p_temp_2 {id=e2p_insn_8, dep=e2p_insn_6:e2p__start:e2p_insn_7}
    e2p_cse_exprvar_0 = sqrt(e2p_cse_exprvar) {id=e2p_insn_9, dep=e2p_insn_8:e2p__start}
    e2p_temp_1 = e2p_cse_exprvar_0 {id=e2p_insn_10, dep=e2p_insn_9:e2p__start}
    e2p_cse_exprvar_1 = sqrt(e2p_expr_0) {id=e2p_insn_11, dep=e2p__start:e2p_insn_2}
    result_temp[0] = result_temp[0] + e2p_kernel_scaling*(coeffs[0]*log(e2p_cse_exprvar_1) + e2p_expr_3*coeffs[1] + e2p_expr_2*e2p_expr_1) {id=e2p_result_0, dep=e2p_insn:e2p_insn_4:e2p_insn_5:e2p_insn_11:e2p_insn_3:e2p__start}
    e2p_temp_5 = e2p_expr_4 {id=e2p_insn_12, dep=e2p_insn_1:e2p__start}
    e2p_cse_exprvar_2 = 1 / e2p_temp_1 {id=e2p_insn_13, dep=e2p__start:e2p_insn_10}
    e2p_temp_6 = (1 + (-2)*e2p_temp_2*e2p_expr_3)*e2p_cse_exprvar_2*e2p_cse_exprvar_2 {id=e2p_insn_14, dep=e2p__start:e2p_insn_5:e2p_insn_13:e2p_insn_6}
    e2p_cse_exprvar_3 = 1 / e2p_expr_0 {id=e2p_insn_15, dep=e2p__start:e2p_insn_2}
    result_temp[1] = result_temp[1] + e2p_kernel_scaling*(e2p_expr_3*e2p_temp_5*coeffs[0] + e2p_temp_5*e2p_temp_6*coeffs[1] + (-2)*e2p_b[0]*e2p_expr_2*e2p_temp_5*rscale*rscale*e2p_cse_exprvar_3*e2p_cse_exprvar_3) {id=e2p_result_1, dep=e2p_insn_0:e2p_insn:e2p_insn_4:e2p_insn_5:e2p_insn_14:e2p_insn_15:e2p__start:e2p_insn_12}
    ... nop {id=update_result, dep=e2p_result_1:e2p_insn_7:e2p_insn_2:e2p_insn_5:e2p_insn_15:e2p_insn_12:e2p_insn_4:e2p_insn_11:e2p_insn_3:e2p_insn_9:e2p_result_0:e2p_insn_1:e2p_insn_6:e2p_insn:e2p_insn_8:e2p_insn_14:e2p_insn_10:e2p_insn_13:e2p_insn_0}
    end
    result[iknl, itgt] = result_temp[iknl] {id=write_result, dep=update_result:init_result}
    end
    end
    ''', [
        lp.GlobalArg(
            name="targets", dtype=np.float64,
            shape=(2, Variable('ntargets')), for_atomic=False),
        lp.GlobalArg(
            name="box_target_starts", dtype=np.uint32,
            shape=None, for_atomic=False),
        lp.GlobalArg(
            name="box_target_counts_nonchild", dtype=np.uint32,
            shape=None, for_atomic=False),
        lp.GlobalArg(
            name="centers", dtype=np.float64,
            shape=(2, Variable('aligned_nboxes')), for_atomic=False),
        lp.GlobalArg(
            name="src_expansions", dtype=np.float64,
            shape=(Variable('nsrc_level_boxes'), 3), for_atomic=False),
        lp.ValueArg(
            name="src_base_ibox",
            dtype=np.int32),
        lp.ValueArg(
            name="nsrc_level_boxes",
            dtype=np.int32),
        lp.ValueArg(
            name="aligned_nboxes",
            dtype=np.int32),
        lp.ValueArg(
            name="ntargets",
            dtype=np.int32),
        lp.GlobalArg(
            name="result", dtype=np.float64,
            shape=(2, Variable('ntargets')), for_atomic=False),
        lp.GlobalArg(
            name="source_box_starts", dtype=np.uint32,
            shape=None, for_atomic=False),
        lp.GlobalArg(
            name="source_box_lists", dtype=np.uint32,
            shape=None, for_atomic=False),
        lp.ValueArg(
            name="ntgt_boxes",
            dtype=np.int32),
        lp.ValueArg(
            name="rscale",
            dtype=np.float64),
        lp.GlobalArg(
            name="target_boxes", dtype=np.uint32,
            shape=(Variable('ntgt_boxes'),), for_atomic=False),
        lp.TemporaryVariable(
            name="tgt_ibox",
            dtype=np.uint32,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="itgt_start",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="itgt_end",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="tgt",
            dtype=np.float64,
            shape=(2,), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="isrc_box_start",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="isrc_box_end",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="result_temp",
            dtype=np.float64,
            shape=(2,), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="src_ibox",
            dtype=np.uint32,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="coeffs",
            dtype=np.float64,
            shape=(3,), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="center",
            dtype=np.float64,
            shape=(2,), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_cse_exprvar",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_cse_exprvar_0",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_cse_exprvar_1",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_cse_exprvar_2",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_cse_exprvar_3",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_kernel_scaling",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_b",
            dtype=np.float64,
            shape=(2,), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_expr_4",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_expr_0",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_expr_1",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_expr_2",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_expr_3",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_temp_2",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_temp_0",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_temp_1",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_temp_5",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="e2p_temp_6",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.auto,
            read_only=False,
            ),
        ],
        lang_version=(2018, 2),
        name="e2p_from_csr",
        )

e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "e2p_idim:unr")
e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "iknl:unr")
e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "idim:unr")
e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "iknl_0:unr")
e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "idim_0:unr")
e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "itgt_box:g.0")

t_unit = lp.merge([e2p_from_csr_knl])
lp.generate_code_v2(t_unit).device_code()

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions