Skip to content

Commit 1a1c5df

Browse files
authored
[ARM] Introduce intrinsics for MVE fp-converts under strict-fp. (#170686)
This is the last of the generic instructions created from MVE intrinsics. It was a little more awkward than the others due to it taking a Type as one of the arguments. This creates a new function to create the intrinsic we need.
1 parent b97d247 commit 1a1c5df

File tree

8 files changed

+808
-318
lines changed

8 files changed

+808
-318
lines changed

clang/include/clang/Basic/arm_mve.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -598,7 +598,7 @@ foreach half = [ "b", "t" ] in {
598598
} // params = [f16], pnt = PNT_None
599599
} // loop over half = "b", "t"
600600

601-
multiclass float_int_conversions<Type FScalar, Type IScalar, IRBuilderBase ftoi, IRBuilderBase itof> {
601+
multiclass float_int_conversions<Type FScalar, Type IScalar, Builder ftoi, Builder itof> {
602602
defvar FVector = VecOf<FScalar>;
603603
defvar IVector = VecOf<IScalar>;
604604

clang/include/clang/Basic/arm_mve_defs.td

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,10 @@ def fcmp_ule_node: IRBuilder<"CreateFCmpULE">;
123123
def splat: CGHelperFn<"ARMMVEVectorSplat">;
124124
def select: IRBuilder<"CreateSelect">;
125125
def fneg: IRBuilder<"CreateFNeg">;
126-
def sitofp: IRBuilder<"CreateSIToFP">;
127-
def uitofp: IRBuilder<"CreateUIToFP">;
128-
def fptosi: IRBuilder<"CreateFPToSI">;
129-
def fptoui: IRBuilder<"CreateFPToUI">;
126+
def sitofp_node: IRBuilder<"CreateSIToFP">;
127+
def uitofp_node: IRBuilder<"CreateUIToFP">;
128+
def fptosi_node: IRBuilder<"CreateFPToSI">;
129+
def fptoui_node: IRBuilder<"CreateFPToUI">;
130130
def vrev: CGHelperFn<"ARMMVEVectorElementReverse"> {
131131
let special_params = [IRBuilderIntParam<1, "unsigned">];
132132
}
@@ -215,9 +215,9 @@ def bitsize;
215215

216216
// strictFPAlt allows a node to have different code generation under strict-fp.
217217
// TODO: The standard node can be IRBuilderBase or IRIntBase.
218-
class strictFPAlt<Builder standard_, IRIntBase strictfp_> : Builder {
218+
class strictFPAlt<Builder standard_, Builder strictfp_> : Builder {
219219
Builder standard = standard_;
220-
IRIntBase strictfp = strictfp_;
220+
Builder strictfp = strictfp_;
221221
}
222222

223223
// If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it
@@ -605,6 +605,14 @@ def fcmp_ult : strictFPAlt<fcmp_ult_node,
605605
IRInt<"cmp_lt", [Predicate, Vector]>>;
606606
def fcmp_ule : strictFPAlt<fcmp_ule_node,
607607
IRInt<"cmp_le", [Predicate, Vector]>>;
608+
def sitofp: strictFPAlt<sitofp_node,
609+
CGFHelperFn<"ARMMVECreateSIToFP">>;
610+
def uitofp: strictFPAlt<uitofp_node,
611+
CGFHelperFn<"ARMMVECreateUIToFP">>;
612+
def fptosi: strictFPAlt<fptosi_node,
613+
CGFHelperFn<"ARMMVECreateFPToSI">>;
614+
def fptoui: strictFPAlt<fptoui_node,
615+
CGFHelperFn<"ARMMVECreateFPToUI">>;
608616

609617
// -----------------------------------------------------------------------------
610618
// Convenience lists of parameter types. 'T' is just a container record, so you

clang/lib/CodeGen/TargetBuiltins/ARM.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3512,6 +3512,38 @@ static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
35123512
return Builder.CreateShuffleVector(V, Indices);
35133513
}
35143514

3515+
static llvm::Value *ARMMVECreateSIToFP(CGBuilderTy &Builder,
3516+
CodeGenFunction *CGF, llvm::Value *V,
3517+
llvm::Type *Ty) {
3518+
return Builder.CreateCall(
3519+
CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
3520+
{V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
3521+
}
3522+
3523+
static llvm::Value *ARMMVECreateUIToFP(CGBuilderTy &Builder,
3524+
CodeGenFunction *CGF, llvm::Value *V,
3525+
llvm::Type *Ty) {
3526+
return Builder.CreateCall(
3527+
CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
3528+
{V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
3529+
}
3530+
3531+
static llvm::Value *ARMMVECreateFPToSI(CGBuilderTy &Builder,
3532+
CodeGenFunction *CGF, llvm::Value *V,
3533+
llvm::Type *Ty) {
3534+
return Builder.CreateCall(
3535+
CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
3536+
{V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
3537+
}
3538+
3539+
static llvm::Value *ARMMVECreateFPToUI(CGBuilderTy &Builder,
3540+
CodeGenFunction *CGF, llvm::Value *V,
3541+
llvm::Type *Ty) {
3542+
return Builder.CreateCall(
3543+
CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
3544+
{V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
3545+
}
3546+
35153547
Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
35163548
const CallExpr *E,
35173549
ReturnValueSlot ReturnValue,

clang/test/CodeGen/arm-mve-intrinsics/vcvt.c

Lines changed: 658 additions & 300 deletions
Large diffs are not rendered by default.

clang/utils/TableGen/MveEmitter.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,10 +1294,13 @@ Result::Ptr EmitterBase::getCodeForDag(const DagInit *D,
12941294
return GenIRIntBase(Op);
12951295
} else if (Op->isSubClassOf("strictFPAlt")) {
12961296
auto StardardBuilder = Op->getValueAsDef("standard");
1297-
Result::Ptr Standard = StardardBuilder->isSubClassOf("IRBuilder")
1297+
Result::Ptr Standard = StardardBuilder->isSubClassOf("IRBuilderBase")
12981298
? GenIRBuilderBase(StardardBuilder)
12991299
: GenIRIntBase(StardardBuilder);
1300-
Result::Ptr StrictFp = GenIRIntBase(Op->getValueAsDef("strictfp"));
1300+
auto StrictBuilder = Op->getValueAsDef("strictfp");
1301+
Result::Ptr StrictFp = StrictBuilder->isSubClassOf("IRBuilderBase")
1302+
? GenIRBuilderBase(StrictBuilder)
1303+
: GenIRIntBase(StrictBuilder);
13011304
return std::make_shared<StrictFpAltResult>(Standard, StrictFp);
13021305
} else {
13031306
PrintFatalError("Unsupported dag node " + Op->getName());

llvm/include/llvm/IR/IntrinsicsARM.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,6 +1304,12 @@ defm int_arm_mve_vcvt_fix: MVEMXPredicated<
13041304
[llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */],
13051305
LLVMMatchType<0>, llvm_anyvector_ty>;
13061306

1307+
def int_arm_mve_vcvt_fp_int: DefaultAttrsIntrinsic<
1308+
[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */],
1309+
[IntrNoMem]>;
1310+
def int_arm_mve_vcvt_int_fp: DefaultAttrsIntrinsic<
1311+
[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */],
1312+
[IntrNoMem]>;
13071313
def int_arm_mve_vcvt_fp_int_predicated: DefaultAttrsIntrinsic<
13081314
[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */,
13091315
llvm_anyvector_ty /* predicate */, LLVMMatchType<0> /* inactive */],

llvm/lib/Target/ARM/ARMInstrMVE.td

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4067,7 +4067,7 @@ class MVE_VCVT_fp_int<string suffix, bits<2> size, bit toint, bit unsigned,
40674067
}
40684068

40694069
multiclass MVE_VCVT_fp_int_m<MVEVectorVTInfo Dest, MVEVectorVTInfo Src,
4070-
SDNode unpred_op> {
4070+
SDNode unpred_op, SDPatternOperator unpred_intrinsic> {
40714071
defvar Unsigned = !or(!eq(Dest.SuffixLetter,"u"), !eq(Src.SuffixLetter,"u"));
40724072
defvar ToInt = !eq(Src.SuffixLetter,"f");
40734073

@@ -4078,6 +4078,8 @@ multiclass MVE_VCVT_fp_int_m<MVEVectorVTInfo Dest, MVEVectorVTInfo Src,
40784078
let Predicates = [HasMVEFloat] in {
40794079
def : Pat<(Dest.Vec (unpred_op (Src.Vec MQPR:$src))),
40804080
(Dest.Vec (Inst (Src.Vec MQPR:$src)))>;
4081+
def : Pat<(Dest.Vec (unpred_intrinsic (Src.Vec MQPR:$src), (i32 Unsigned))),
4082+
(Dest.Vec (Inst (Src.Vec MQPR:$src)))>;
40814083
def : Pat<(Dest.Vec (int_arm_mve_vcvt_fp_int_predicated
40824084
(Src.Vec MQPR:$src), (i32 Unsigned),
40834085
(Src.Pred VCCR:$mask), (Dest.Vec MQPR:$inactive))),
@@ -4088,15 +4090,15 @@ multiclass MVE_VCVT_fp_int_m<MVEVectorVTInfo Dest, MVEVectorVTInfo Src,
40884090
}
40894091
// The unsuffixed VCVT for float->int implicitly rounds toward zero,
40904092
// which I reflect here in the llvm instruction names
4091-
defm MVE_VCVTs16f16z : MVE_VCVT_fp_int_m<MVE_v8s16, MVE_v8f16, fp_to_sint>;
4092-
defm MVE_VCVTu16f16z : MVE_VCVT_fp_int_m<MVE_v8u16, MVE_v8f16, fp_to_uint>;
4093-
defm MVE_VCVTs32f32z : MVE_VCVT_fp_int_m<MVE_v4s32, MVE_v4f32, fp_to_sint>;
4094-
defm MVE_VCVTu32f32z : MVE_VCVT_fp_int_m<MVE_v4u32, MVE_v4f32, fp_to_uint>;
4093+
defm MVE_VCVTs16f16z : MVE_VCVT_fp_int_m<MVE_v8s16, MVE_v8f16, fp_to_sint, int_arm_mve_vcvt_int_fp>;
4094+
defm MVE_VCVTu16f16z : MVE_VCVT_fp_int_m<MVE_v8u16, MVE_v8f16, fp_to_uint, int_arm_mve_vcvt_int_fp>;
4095+
defm MVE_VCVTs32f32z : MVE_VCVT_fp_int_m<MVE_v4s32, MVE_v4f32, fp_to_sint, int_arm_mve_vcvt_int_fp>;
4096+
defm MVE_VCVTu32f32z : MVE_VCVT_fp_int_m<MVE_v4u32, MVE_v4f32, fp_to_uint, int_arm_mve_vcvt_int_fp>;
40954097
// Whereas VCVT for int->float rounds to nearest
4096-
defm MVE_VCVTf16s16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8s16, sint_to_fp>;
4097-
defm MVE_VCVTf16u16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8u16, uint_to_fp>;
4098-
defm MVE_VCVTf32s32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4s32, sint_to_fp>;
4099-
defm MVE_VCVTf32u32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4u32, uint_to_fp>;
4098+
defm MVE_VCVTf16s16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8s16, sint_to_fp, int_arm_mve_vcvt_fp_int>;
4099+
defm MVE_VCVTf16u16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8u16, uint_to_fp, int_arm_mve_vcvt_fp_int>;
4100+
defm MVE_VCVTf32s32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4s32, sint_to_fp, int_arm_mve_vcvt_fp_int>;
4101+
defm MVE_VCVTf32u32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4u32, uint_to_fp, int_arm_mve_vcvt_fp_int>;
41004102

41014103
let Predicates = [HasMVEFloat] in {
41024104
def : Pat<(v4i32 (fp_to_sint_sat v4f32:$src, i32)),

llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,4 +452,85 @@ entry:
452452

453453

454454

455+
456+
define arm_aapcs_vfpcc <8 x half> @test_vcvtq_f16_s16(<8 x i16> noundef %a) #0 {
457+
; CHECK-LABEL: test_vcvtq_f16_s16:
458+
; CHECK: @ %bb.0: @ %entry
459+
; CHECK-NEXT: vcvt.f16.s16 q0, q0
460+
; CHECK-NEXT: bx lr
461+
entry:
462+
%0 = tail call <8 x half> @llvm.arm.mve.vcvt.fp.int.v8f16.v8i16(<8 x i16> %a, i32 0)
463+
ret <8 x half> %0
464+
}
465+
466+
define arm_aapcs_vfpcc <8 x half> @test_vcvtq_f16_u16(<8 x i16> noundef %a) #0 {
467+
; CHECK-LABEL: test_vcvtq_f16_u16:
468+
; CHECK: @ %bb.0: @ %entry
469+
; CHECK-NEXT: vcvt.f16.u16 q0, q0
470+
; CHECK-NEXT: bx lr
471+
entry:
472+
%0 = tail call <8 x half> @llvm.arm.mve.vcvt.fp.int.v8f16.v8i16(<8 x i16> %a, i32 1)
473+
ret <8 x half> %0
474+
}
475+
476+
define arm_aapcs_vfpcc <4 x float> @test_vcvtq_f32_s32(<4 x i32> noundef %a) #0 {
477+
; CHECK-LABEL: test_vcvtq_f32_s32:
478+
; CHECK: @ %bb.0: @ %entry
479+
; CHECK-NEXT: vcvt.f32.s32 q0, q0
480+
; CHECK-NEXT: bx lr
481+
entry:
482+
%0 = tail call <4 x float> @llvm.arm.mve.vcvt.fp.int.v4f32.v4i32(<4 x i32> %a, i32 0)
483+
ret <4 x float> %0
484+
}
485+
486+
define arm_aapcs_vfpcc <4 x float> @test_vcvtq_f32_u32(<4 x i32> noundef %a) #0 {
487+
; CHECK-LABEL: test_vcvtq_f32_u32:
488+
; CHECK: @ %bb.0: @ %entry
489+
; CHECK-NEXT: vcvt.f32.u32 q0, q0
490+
; CHECK-NEXT: bx lr
491+
entry:
492+
%0 = tail call <4 x float> @llvm.arm.mve.vcvt.fp.int.v4f32.v4i32(<4 x i32> %a, i32 1)
493+
ret <4 x float> %0
494+
}
495+
496+
define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_s16_f16(<8 x half> noundef %a) #0 {
497+
; CHECK-LABEL: test_vcvtq_s16_f16:
498+
; CHECK: @ %bb.0: @ %entry
499+
; CHECK-NEXT: vcvt.s16.f16 q0, q0
500+
; CHECK-NEXT: bx lr
501+
entry:
502+
%0 = tail call <8 x i16> @llvm.arm.mve.vcvt.int.fp.v8i16.v8f16(<8 x half> %a, i32 0)
503+
ret <8 x i16> %0
504+
}
505+
506+
define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_s32_f32(<4 x float> noundef %a) #0 {
507+
; CHECK-LABEL: test_vcvtq_s32_f32:
508+
; CHECK: @ %bb.0: @ %entry
509+
; CHECK-NEXT: vcvt.s32.f32 q0, q0
510+
; CHECK-NEXT: bx lr
511+
entry:
512+
%0 = tail call <4 x i32> @llvm.arm.mve.vcvt.int.fp.v4i32.v4f32(<4 x float> %a, i32 0)
513+
ret <4 x i32> %0
514+
}
515+
516+
define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_u16_f16(<8 x half> noundef %a) #0 {
517+
; CHECK-LABEL: test_vcvtq_u16_f16:
518+
; CHECK: @ %bb.0: @ %entry
519+
; CHECK-NEXT: vcvt.u16.f16 q0, q0
520+
; CHECK-NEXT: bx lr
521+
entry:
522+
%0 = tail call <8 x i16> @llvm.arm.mve.vcvt.int.fp.v8i16.v8f16(<8 x half> %a, i32 1)
523+
ret <8 x i16> %0
524+
}
525+
526+
define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_u32_f32(<4 x float> noundef %a) #0 {
527+
; CHECK-LABEL: test_vcvtq_u32_f32:
528+
; CHECK: @ %bb.0: @ %entry
529+
; CHECK-NEXT: vcvt.u32.f32 q0, q0
530+
; CHECK-NEXT: bx lr
531+
entry:
532+
%0 = tail call <4 x i32> @llvm.arm.mve.vcvt.int.fp.v4i32.v4f32(<4 x float> %a, i32 1)
533+
ret <4 x i32> %0
534+
}
535+
455536
attributes #0 = { strictfp }

0 commit comments

Comments
 (0)