-
Notifications
You must be signed in to change notification settings - Fork 79
Closed
Description
Following kernel fails with
loopy.diagnostic.LoopyError: sanity check failed--implemented and desired domain for instruction 'write_result' do not match
implemented: [ntgt_boxes, itgt_end, itgt_start] -> { [itgt_box, itgt, iknl] : 0 <= itgt_box < ntgt_boxes and itgt_start <= itgt < itgt_end }
desired:[ntgt_boxes, itgt_end, itgt_start] -> { [itgt_box, itgt, iknl] : 0 <= itgt_box < ntgt_boxes and itgt_start <= itgt < itgt_end and 0 <= iknl <= 1 }
sample point in implemented but not desired: itgt=0, itgt_box=0, ntgt_boxes=1, itgt_end=1, itgt_start=0, iknl=2
gist of constraints in implemented but not desired: [ntgt_boxes, itgt_end, itgt_start] -> { [itgt_box, itgt, iknl] : 0 <= iknl <= 1 }
loopy kernel:
Details
import loopy as lp
import numpy as np
from pymbolic.primitives import *
import immutables
e2p_from_csr_knl = lp.make_kernel(
[
"[ntgt_boxes] -> { [itgt_box] : 0 <= itgt_box < ntgt_boxes }",
"[itgt_end, itgt_start] -> { [itgt] : itgt_start <= itgt < itgt_end }",
"[isrc_box_end, isrc_box_start] -> { [isrc_box] : isrc_box_start <= isrc_box < isrc_box_end }",
"{ [idim, idim_0] : 0 <= idim <= 1 and 0 <= idim_0 <= 1 }",
"{ [icoeff_0] : 0 <= icoeff_0 <= 2 }",
"{ [iknl, iknl_0] : 0 <= iknl <= 1 and 0 <= iknl_0 <= 1 }",
"{ [e2p_idim] : 0 <= e2p_idim <= 1 }",
],
'''
for itgt_box
tgt_ibox = target_boxes[itgt_box] {id=insn}
itgt_start = box_target_starts[tgt_ibox] {id=insn_0, dep=insn}
itgt_end = itgt_start + box_target_counts_nonchild[tgt_ibox] {id=insn_1, dep=insn:insn_0}
for itgt
tgt[idim] = targets[idim, itgt] {id=insn_2}
isrc_box_start = source_box_starts[itgt_box] {id=insn_3}
isrc_box_end = source_box_starts[itgt_box + 1] {id=insn_4}
result_temp[iknl_0] = 0 {id=init_result}
for isrc_box
src_ibox = source_box_lists[isrc_box] {id=insn_5}
coeffs[icoeff_0] = src_expansions[src_ibox + (-1)*src_base_ibox, icoeff_0] {id=fetch_coeffs, dep=insn_5}
center[idim_0] = centers[idim_0, src_ibox] {id=fetch_center, dep=insn_5}
... nop {id=e2p__start, dep=fetch_coeffs:insn_2:init_result:fetch_center}
e2p_kernel_scaling = ((-1) / 2)*3.141592653589793**(-1) {id=e2p_insn, dep=e2p__start}
e2p_b[e2p_idim] = tgt[e2p_idim] + (-1)*center[e2p_idim] {id=e2p_insn_0, dep=e2p__start}
e2p_expr_4 = 1 / rscale {id=e2p_insn_1, dep=e2p__start}
e2p_expr_0 = e2p_b[0]*e2p_b[0] + e2p_b[1]*e2p_b[1] {id=e2p_insn_2, dep=e2p_insn_0:e2p__start}
e2p_expr_1 = rscale*(1 / e2p_expr_0) {id=e2p_insn_3, dep=e2p__start:e2p_insn_2}
e2p_expr_2 = e2p_b[1]*coeffs[2] {id=e2p_insn_4, dep=e2p_insn_0:e2p__start}
e2p_expr_3 = e2p_b[0]*e2p_expr_1 {id=e2p_insn_5, dep=e2p_insn_0:e2p_insn_3:e2p__start}
e2p_temp_2 = e2p_b[0]*e2p_expr_4 {id=e2p_insn_6, dep=e2p_insn_0:e2p_insn_1:e2p__start}
e2p_temp_0 = e2p_b[1]*e2p_expr_4 {id=e2p_insn_7, dep=e2p_insn_0:e2p_insn_1:e2p__start}
e2p_cse_exprvar = e2p_temp_0*e2p_temp_0 + e2p_temp_2*e2p_temp_2 {id=e2p_insn_8, dep=e2p_insn_6:e2p__start:e2p_insn_7}
e2p_cse_exprvar_0 = sqrt(e2p_cse_exprvar) {id=e2p_insn_9, dep=e2p_insn_8:e2p__start}
e2p_temp_1 = e2p_cse_exprvar_0 {id=e2p_insn_10, dep=e2p_insn_9:e2p__start}
e2p_cse_exprvar_1 = sqrt(e2p_expr_0) {id=e2p_insn_11, dep=e2p__start:e2p_insn_2}
result_temp[0] = result_temp[0] + e2p_kernel_scaling*(coeffs[0]*log(e2p_cse_exprvar_1) + e2p_expr_3*coeffs[1] + e2p_expr_2*e2p_expr_1) {id=e2p_result_0, dep=e2p_insn:e2p_insn_4:e2p_insn_5:e2p_insn_11:e2p_insn_3:e2p__start}
e2p_temp_5 = e2p_expr_4 {id=e2p_insn_12, dep=e2p_insn_1:e2p__start}
e2p_cse_exprvar_2 = 1 / e2p_temp_1 {id=e2p_insn_13, dep=e2p__start:e2p_insn_10}
e2p_temp_6 = (1 + (-2)*e2p_temp_2*e2p_expr_3)*e2p_cse_exprvar_2*e2p_cse_exprvar_2 {id=e2p_insn_14, dep=e2p__start:e2p_insn_5:e2p_insn_13:e2p_insn_6}
e2p_cse_exprvar_3 = 1 / e2p_expr_0 {id=e2p_insn_15, dep=e2p__start:e2p_insn_2}
result_temp[1] = result_temp[1] + e2p_kernel_scaling*(e2p_expr_3*e2p_temp_5*coeffs[0] + e2p_temp_5*e2p_temp_6*coeffs[1] + (-2)*e2p_b[0]*e2p_expr_2*e2p_temp_5*rscale*rscale*e2p_cse_exprvar_3*e2p_cse_exprvar_3) {id=e2p_result_1, dep=e2p_insn_0:e2p_insn:e2p_insn_4:e2p_insn_5:e2p_insn_14:e2p_insn_15:e2p__start:e2p_insn_12}
... nop {id=update_result, dep=e2p_result_1:e2p_insn_7:e2p_insn_2:e2p_insn_5:e2p_insn_15:e2p_insn_12:e2p_insn_4:e2p_insn_11:e2p_insn_3:e2p_insn_9:e2p_result_0:e2p_insn_1:e2p_insn_6:e2p_insn:e2p_insn_8:e2p_insn_14:e2p_insn_10:e2p_insn_13:e2p_insn_0}
end
result[iknl, itgt] = result_temp[iknl] {id=write_result, dep=update_result:init_result}
end
end
''', [
lp.GlobalArg(
name="targets", dtype=np.float64,
shape=(2, Variable('ntargets')), for_atomic=False),
lp.GlobalArg(
name="box_target_starts", dtype=np.uint32,
shape=None, for_atomic=False),
lp.GlobalArg(
name="box_target_counts_nonchild", dtype=np.uint32,
shape=None, for_atomic=False),
lp.GlobalArg(
name="centers", dtype=np.float64,
shape=(2, Variable('aligned_nboxes')), for_atomic=False),
lp.GlobalArg(
name="src_expansions", dtype=np.float64,
shape=(Variable('nsrc_level_boxes'), 3), for_atomic=False),
lp.ValueArg(
name="src_base_ibox",
dtype=np.int32),
lp.ValueArg(
name="nsrc_level_boxes",
dtype=np.int32),
lp.ValueArg(
name="aligned_nboxes",
dtype=np.int32),
lp.ValueArg(
name="ntargets",
dtype=np.int32),
lp.GlobalArg(
name="result", dtype=np.float64,
shape=(2, Variable('ntargets')), for_atomic=False),
lp.GlobalArg(
name="source_box_starts", dtype=np.uint32,
shape=None, for_atomic=False),
lp.GlobalArg(
name="source_box_lists", dtype=np.uint32,
shape=None, for_atomic=False),
lp.ValueArg(
name="ntgt_boxes",
dtype=np.int32),
lp.ValueArg(
name="rscale",
dtype=np.float64),
lp.GlobalArg(
name="target_boxes", dtype=np.uint32,
shape=(Variable('ntgt_boxes'),), for_atomic=False),
lp.TemporaryVariable(
name="tgt_ibox",
dtype=np.uint32,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="itgt_start",
dtype=np.int32,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="itgt_end",
dtype=np.int32,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="tgt",
dtype=np.float64,
shape=(2,), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="isrc_box_start",
dtype=np.int32,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="isrc_box_end",
dtype=np.int32,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="result_temp",
dtype=np.float64,
shape=(2,), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="src_ibox",
dtype=np.uint32,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="coeffs",
dtype=np.float64,
shape=(3,), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="center",
dtype=np.float64,
shape=(2,), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="e2p_cse_exprvar",
dtype=np.float64,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="e2p_cse_exprvar_0",
dtype=np.float64,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="e2p_cse_exprvar_1",
dtype=np.float64,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="e2p_cse_exprvar_2",
dtype=np.float64,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="e2p_cse_exprvar_3",
dtype=np.float64,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="e2p_kernel_scaling",
dtype=np.float64,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="e2p_b",
dtype=np.float64,
shape=(2,), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="e2p_expr_4",
dtype=np.float64,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="e2p_expr_0",
dtype=np.float64,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="e2p_expr_1",
dtype=np.float64,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="e2p_expr_2",
dtype=np.float64,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="e2p_expr_3",
dtype=np.float64,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="e2p_temp_2",
dtype=np.float64,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="e2p_temp_0",
dtype=np.float64,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="e2p_temp_1",
dtype=np.float64,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="e2p_temp_5",
dtype=np.float64,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
lp.TemporaryVariable(
name="e2p_temp_6",
dtype=np.float64,
shape=(), for_atomic=False,
address_space=lp.auto,
read_only=False,
),
],
lang_version=(2018, 2),
name="e2p_from_csr",
)
e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "e2p_idim:unr")
e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "iknl:unr")
e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "idim:unr")
e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "iknl_0:unr")
e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "idim_0:unr")
e2p_from_csr_knl = lp.tag_inames(e2p_from_csr_knl, "itgt_box:g.0")
t_unit = lp.merge([e2p_from_csr_knl])
lp.generate_code_v2(t_unit).device_code()Metadata
Metadata
Assignees
Labels
No labels