Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -653,19 +653,20 @@ def int_amdgcn_cvt_pk_bf8_f16
// byte_sel selects byte to write in vdst.
def int_amdgcn_cvt_sr_fp8_f16 : DefaultAttrsIntrinsic<
[llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f16">;

// llvm.amdgcn.cvt.sr.bf8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3]
// byte_sel selects byte to write in vdst.
def int_amdgcn_cvt_sr_bf8_f16 : DefaultAttrsIntrinsic<
[llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f16">;

// llvm.amdgcn.cvt.scale.pk32.f16.bf6 v32f16 vdst, v6i32 src0, i32 scale_sel [0..15]
class AMDGPUCvtScaleIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
[DstTy], [Src0Ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]
[DstTy], [Src0Ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>, Range<ArgIndex<2>, 0, 16>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;

class AMDGPUCvtScaleF32Intrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
Expand Down Expand Up @@ -746,7 +747,8 @@ class AMDGPUCvtScaleFP4FP8BF8ToF1632Intrinsic<LLVMType DstTy, string name> : Def
[llvm_i32_ty, // src
llvm_float_ty, // scale
llvm_i32_ty], // src_sel index [0..3]
[IntrNoMem, ImmArg<ArgIndex<2>>]
[IntrNoMem,
ImmArg<ArgIndex<2>>, Range<ArgIndex<2>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;

class AMDGPUCvtScale_pk_FromFP8BF8Intrinsic<LLVMType DstTy, string name> : DefaultAttrsIntrinsic<
Expand Down Expand Up @@ -783,7 +785,7 @@ class AMDGPUCvtScaleFP8BF8ToF16TiedInputIntrinsic<LLVMType DstTy, string name> :
llvm_float_ty, // scale
llvm_i32_ty, // src_sel_index[0..3]
llvm_i1_ty], // dst_lo_hi_sel[true false]
[IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]
[IntrNoMem, ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>, ImmArg<ArgIndex<4>>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;

class AMDGPUCvtScaleF32ToFP4Intrinsic<string name> : DefaultAttrsIntrinsic<
Expand All @@ -793,7 +795,7 @@ class AMDGPUCvtScaleF32ToFP4Intrinsic<string name> : DefaultAttrsIntrinsic<
llvm_float_ty, // src1
llvm_float_ty, // scale
llvm_i32_ty], // dst_sel_index[0..3]
[IntrNoMem, ImmArg<ArgIndex<4>>]
[IntrNoMem, ImmArg<ArgIndex<4>>, Range<ArgIndex<4>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;

class AMDGPUCvtScaleF16ToFP4TiedInputIntrinsic<LLVMType SrcTy, string name> : DefaultAttrsIntrinsic<
Expand All @@ -802,7 +804,7 @@ class AMDGPUCvtScaleF16ToFP4TiedInputIntrinsic<LLVMType SrcTy, string name> : De
SrcTy, // src
llvm_float_ty, // scale
llvm_i32_ty], // dest_sel_index [0..3]
[IntrNoMem, ImmArg<ArgIndex<3>>]
[IntrNoMem, ImmArg<ArgIndex<3>>, Range<ArgIndex<3>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;

class AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8TiedInputIntrinsic<LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic<
Expand All @@ -812,7 +814,7 @@ class AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8TiedInputIntrinsic<LLVMType Src0Ty, st
llvm_i32_ty, // seed
llvm_float_ty, // scale
llvm_i32_ty], // dst_sel_index[0..3]
[IntrNoMem, ImmArg<ArgIndex<4>>]
[IntrNoMem, ImmArg<ArgIndex<4>>, Range<ArgIndex<4>, 0, 4>]
>, ClangBuiltin<"__builtin_amdgcn_"#name>;

class AMDGPUCvtScaleSRF32ToBF16F16TiedInputIntrinsic<LLVMType DstTy, string name> : DefaultAttrsIntrinsic<
Expand Down
147 changes: 147 additions & 0 deletions llvm/test/Verifier/AMDGPU/test-cvt-fp4f6f8-immarg-ranges.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
; RUN: not llvm-as %s -disable-output 2>&1 | FileCheck %s

; --------------------------------------------------------------------
; llvm.amdgcn.cvt.sr.fp8.f16 - byte_sel out of range
; --------------------------------------------------------------------

; CHECK: immarg value 4 out of range [0, 4)
; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %src, i32 %seed, i32 %old, i32 4)
define i32 @test_cvt_sr_fp8_f16_byte_sel_out_of_range(half %src, i32 %seed, i32 %old) {
%result = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %src, i32 %seed, i32 %old, i32 4)
ret i32 %result
}

; CHECK: immarg value 10 out of range [0, 4)
; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %src, i32 %seed, i32 %old, i32 10)
define i32 @test_cvt_sr_fp8_f16_byte_sel_way_out_of_range(half %src, i32 %seed, i32 %old) {
%result = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %src, i32 %seed, i32 %old, i32 10)
ret i32 %result
}

; --------------------------------------------------------------------
; llvm.amdgcn.cvt.sr.bf8.f16 - byte_sel out of range
; --------------------------------------------------------------------

; CHECK: immarg value 4 out of range [0, 4)
; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %src, i32 %seed, i32 %old, i32 4)
define i32 @test_cvt_sr_bf8_f16_byte_sel_out_of_range(half %src, i32 %seed, i32 %old) {
%result = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %src, i32 %seed, i32 %old, i32 4)
ret i32 %result
}

; --------------------------------------------------------------------
; llvm.amdgcn.cvt.scale.pk8.f16.fp8 - scale_sel out of range
; --------------------------------------------------------------------

; CHECK: immarg value 16 out of range [0, 16)
; CHECK-NEXT: %result = call <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.fp8(<2 x i32> %src, i32 0, i32 16)
define <8 x half> @test_cvt_scale_pk8_f16_fp8_scale_sel_out_of_range(<2 x i32> %src) {
%result = call <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.fp8(<2 x i32> %src, i32 0, i32 16)
ret <8 x half> %result
}

; CHECK: immarg value 100 out of range [0, 16)
; CHECK-NEXT: %result = call <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.fp8(<2 x i32> %src, i32 0, i32 100)
define <8 x half> @test_cvt_scale_pk8_f16_fp8_scale_sel_way_out_of_range(<2 x i32> %src) {
%result = call <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.fp8(<2 x i32> %src, i32 0, i32 100)
ret <8 x half> %result
}

; --------------------------------------------------------------------
; llvm.amdgcn.cvt.scalef32.f32.fp8 - src_sel out of range
; --------------------------------------------------------------------

; CHECK: immarg value 4 out of range [0, 4)
; CHECK-NEXT: %result = call float @llvm.amdgcn.cvt.scalef32.f32.fp8(i32 %src, float %scale, i32 4)
define float @test_cvt_scalef32_f32_fp8_src_sel_out_of_range(i32 %src, float %scale) {
%result = call float @llvm.amdgcn.cvt.scalef32.f32.fp8(i32 %src, float %scale, i32 4)
ret float %result
}

; CHECK: immarg value 7 out of range [0, 4)
; CHECK-NEXT: %result = call float @llvm.amdgcn.cvt.scalef32.f32.fp8(i32 %src, float %scale, i32 7)
define float @test_cvt_scalef32_f32_fp8_src_sel_way_out_of_range(i32 %src, float %scale) {
%result = call float @llvm.amdgcn.cvt.scalef32.f32.fp8(i32 %src, float %scale, i32 7)
ret float %result
}

; --------------------------------------------------------------------
; llvm.amdgcn.cvt.scalef32.f16.fp8 - src_sel_index out of range
; --------------------------------------------------------------------

; CHECK: immarg value 4 out of range [0, 4)
; CHECK-NEXT: %result = call <2 x half> @llvm.amdgcn.cvt.scalef32.f16.fp8(<2 x half> %old, i32 %src, float %scale, i32 4, i1 false)
define <2 x half> @test_cvt_scalef32_f16_fp8_src_sel_index_out_of_range(<2 x half> %old, i32 %src, float %scale) {
%result = call <2 x half> @llvm.amdgcn.cvt.scalef32.f16.fp8(<2 x half> %old, i32 %src, float %scale, i32 4, i1 false)
ret <2 x half> %result
}

; CHECK: immarg value 15 out of range [0, 4)
; CHECK-NEXT: %result = call <2 x half> @llvm.amdgcn.cvt.scalef32.f16.fp8(<2 x half> %old, i32 %src, float %scale, i32 15, i1 true)
define <2 x half> @test_cvt_scalef32_f16_fp8_src_sel_index_way_out_of_range(<2 x half> %old, i32 %src, float %scale) {
%result = call <2 x half> @llvm.amdgcn.cvt.scalef32.f16.fp8(<2 x half> %old, i32 %src, float %scale, i32 15, i1 true)
ret <2 x half> %result
}

; --------------------------------------------------------------------
; llvm.amdgcn.cvt.scalef32.pk.fp4.f32 - dst_sel_index out of range
; --------------------------------------------------------------------

; CHECK: immarg value 4 out of range [0, 4)
; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f32(i32 %old, float %src0, float %src1, float %scale, i32 4)
define i32 @test_cvt_scalef32_pk_fp4_f32_dst_sel_index_out_of_range(i32 %old, float %src0, float %src1, float %scale) {
%result = call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f32(i32 %old, float %src0, float %src1, float %scale, i32 4)
ret i32 %result
}

; CHECK: immarg value 8 out of range [0, 4)
; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f32(i32 %old, float %src0, float %src1, float %scale, i32 8)
define i32 @test_cvt_scalef32_pk_fp4_f32_dst_sel_index_way_out_of_range(i32 %old, float %src0, float %src1, float %scale) {
%result = call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f32(i32 %old, float %src0, float %src1, float %scale, i32 8)
ret i32 %result
}

; --------------------------------------------------------------------
; llvm.amdgcn.cvt.scalef32.pk.fp4.f16 - dest_sel_index out of range
; --------------------------------------------------------------------

; CHECK: immarg value 4 out of range [0, 4)
; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f16(i32 %old, <2 x half> %src, float %scale, i32 4)
define i32 @test_cvt_scalef32_pk_fp4_f16_dest_sel_index_out_of_range(i32 %old, <2 x half> %src, float %scale) {
%result = call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f16(i32 %old, <2 x half> %src, float %scale, i32 4)
ret i32 %result
}

; CHECK: immarg value 12 out of range [0, 4)
; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f16(i32 %old, <2 x half> %src, float %scale, i32 12)
define i32 @test_cvt_scalef32_pk_fp4_f16_dest_sel_index_way_out_of_range(i32 %old, <2 x half> %src, float %scale) {
%result = call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f16(i32 %old, <2 x half> %src, float %scale, i32 12)
ret i32 %result
}

; --------------------------------------------------------------------
; llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f16 - dst_sel_index out of range
; --------------------------------------------------------------------

; CHECK: immarg value 4 out of range [0, 4)
; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f16(i32 %old, <2 x half> %src, i32 %seed, float %scale, i32 4)
define i32 @test_cvt_scalef32_sr_pk_fp4_f16_dst_sel_index_out_of_range(i32 %old, <2 x half> %src, i32 %seed, float %scale) {
%result = call i32 @llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f16(i32 %old, <2 x half> %src, i32 %seed, float %scale, i32 4)
ret i32 %result
}

; CHECK: immarg value 9 out of range [0, 4)
; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f16(i32 %old, <2 x half> %src, i32 %seed, float %scale, i32 9)
define i32 @test_cvt_scalef32_sr_pk_fp4_f16_dst_sel_index_way_out_of_range(i32 %old, <2 x half> %src, i32 %seed, float %scale) {
%result = call i32 @llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f16(i32 %old, <2 x half> %src, i32 %seed, float %scale, i32 9)
ret i32 %result
}

declare i32 @llvm.amdgcn.cvt.sr.fp8.f16(half, i32, i32, i32)
declare i32 @llvm.amdgcn.cvt.sr.bf8.f16(half, i32, i32, i32)
declare <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.fp8(<2 x i32>, i32, i32)
declare float @llvm.amdgcn.cvt.scalef32.f32.fp8(i32, float, i32)
declare <2 x half> @llvm.amdgcn.cvt.scalef32.f16.fp8(<2 x half>, i32, float, i32, i1)
declare i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f32(i32, float, float, float, i32)
declare i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f16(i32, <2 x half>, float, i32)
declare i32 @llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f16(i32, <2 x half>, i32, float, i32)
Loading