Skip to content

Commit

Permalink
Merge pull request #436 from hvdijk/llvm19
Browse files Browse the repository at this point in the history
Accept nuw/nsw flags on trunc.
  • Loading branch information
hvdijk authored Apr 16, 2024
2 parents bd3faea + 1effca3 commit 361079f
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ define spir_kernel void @get_sub_group_size(i32 addrspace(1)* %in, i32 addrspace
; CHECK-F2: [[SZ:%.*]] = call i64 @__mux_get_local_size(i32 0)
; CHECK-F2: [[WL:%.*]] = sub {{.*}} i64 [[SZ]], [[ID]]
; CHECK-F2: [[VL0:%.*]] = call i64 @llvm.umin.i64(i64 [[WL]], i64 2)
; CHECK-F2: [[VL1:%.*]] = trunc i64 [[VL0]] to i32
; CHECK-F2: [[VL1:%.*]] = trunc {{(nuw )?(nsw )?}}i64 [[VL0]] to i32
; CHECK-F2: [[RED:%.*]] = call i32 @__mux_sub_group_reduce_add_i32(i32 [[VL1]])
; CHECK-F2: store i32 [[RED]], ptr addrspace(1) {{.*}}

Expand All @@ -51,6 +51,6 @@ define spir_kernel void @get_sub_group_size(i32 addrspace(1)* %in, i32 addrspace
; CHECK-S4: [[VF0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-S4: [[VF1:%.*]] = shl i64 [[VF0]], 2
; CHECK-S4: [[VL0:%.*]] = call i64 @llvm.umin.i64(i64 [[WL]], i64 [[VF1]])
; CHECK-S4: [[VL1:%.*]] = trunc i64 [[VL0]] to i32
; CHECK-S4: [[VL1:%.*]] = trunc {{(nuw )?(nsw )?}}i64 [[VL0]] to i32
; CHECK-S4: [[RED:%.*]] = call i32 @__mux_sub_group_reduce_add_i32(i32 [[VL1]])
; CHECK-S4: store i32 [[RED]], ptr addrspace(1) {{.*}}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ entry:
; CHECK_4F: [[LSIZE:%.*]] = call i64 @__mux_get_local_size(i32 0)
; CHECK_4F: [[WREM:%.*]] = sub nuw nsw i64 [[LSIZE]], [[LID]]
; CHECK_4F: [[T0:%.*]] = call i64 @llvm.umin.i64(i64 [[WREM]], i64 4)
; CHECK_4F: [[VL:%.*]] = trunc i64 [[T0]] to i32
; CHECK_4F: [[VL:%.*]] = trunc {{(nuw )?(nsw )?}}i64 [[T0]] to i32
; CHECK_4F: [[LHS:%.*]] = call <4 x i32> @llvm.vp.load.v4i32.p0(ptr {{%.*}}, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 [[VL]])
; CHECK_4F: [[RHS:%.*]] = call <4 x i32> @llvm.vp.load.v4i32.p0(ptr {{%.*}}, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 [[VL]])
; CHECK_4F: [[ADD:%.*]] = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> [[LHS]], <4 x i32> [[RHS]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 [[VL]])
Expand All @@ -57,7 +57,7 @@ entry:
; CHECK_1S: [[T0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK_1S: [[T1:%.*]] = shl i64 [[T0]], 2
; CHECK_1S: [[T2:%.*]] = call i64 @llvm.umin.i64(i64 [[WREM]], i64 [[T1]])
; CHECK_1S: [[VL:%.*]] = trunc i64 [[T2]] to i32
; CHECK_1S: [[VL:%.*]] = trunc {{(nuw )?(nsw )?}}i64 [[T2]] to i32
; CHECK_1S: [[LHS:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr {{%.*}}, [[TRUEMASK:<vscale x 4 x i1> shufflevector \(<vscale x 4 x i1> insertelement \(<vscale x 4 x i1> (undef|poison), i1 true, (i32|i64) 0\), <vscale x 4 x i1> (undef|poison), <vscale x 4 x i32> zeroinitializer\)]], i32 [[VL]])
; CHECK_1S: [[RHS:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr {{%.*}}, [[TRUEMASK]], i32 [[VL]])
; CHECK_1S: [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[LHS]], <vscale x 4 x i32> [[RHS]], [[TRUEMASK]], i32 [[VL]])
Expand All @@ -81,7 +81,7 @@ entry:
; CHECK_V4_2F: [[LSIZE:%.*]] = call i64 @__mux_get_local_size(i32 0)
; CHECK_V4_2F: [[WREM:%.*]] = sub nuw nsw i64 [[LSIZE]], [[LID]]
; CHECK_V4_2F: [[T0:%.*]] = call i64 @llvm.umin.i64(i64 [[WREM]], i64 2)
; CHECK_V4_2F: [[VL:%.*]] = trunc i64 [[T0]] to i32
; CHECK_V4_2F: [[VL:%.*]] = trunc {{(nuw )?(nsw )?}}i64 [[T0]] to i32
; Each WI performs 4 elements, so multiply the VL by 4
; CHECK_V4_2F: [[SVL:%.*]] = shl nuw nsw i32 [[VL]], 2
; CHECK_V4_2F: [[LHS:%.*]] = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr {{%.*}}, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 [[SVL]])
Expand All @@ -96,7 +96,7 @@ entry:
; CHECK_V4_1S: [[T0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK_V4_1S: [[T1:%.*]] = shl i64 [[T0]], 2
; CHECK_V4_1S: [[T2:%.*]] = call i64 @llvm.umin.i64(i64 [[WREM]], i64 [[T1]])
; CHECK_V4_1S: [[VL:%.*]] = trunc i64 [[T2]] to i32
; CHECK_V4_1S: [[VL:%.*]] = trunc {{(nuw )?(nsw )?}}i64 [[T2]] to i32
; Each WI performs 4 elements, so multiply the VL by 4
; CHECK_V4_1S: [[SVL:%.*]] = shl i32 [[VL]], 2
; CHECK_V4_1S: [[LHS:%.*]] = call <vscale x 16 x i32> @llvm.vp.load.nxv16i32.p0(ptr {{%.*}}, [[TRUEMASK:<vscale x 16 x i1> shufflevector \(<vscale x 16 x i1> insertelement \(<vscale x 16 x i1> (undef|poison), i1 true, (i32|i64) 0\), <vscale x 16 x i1> (undef|poison), <vscale x 16 x i32> zeroinitializer\)]], i32 [[SVL]])
Expand Down

0 comments on commit 361079f

Please sign in to comment.