Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wrong immediate generated by SPIR-V backend #121562

Open
jdnk opened this issue Jan 3, 2025 · 0 comments
Open

Wrong immediate generated by SPIR-V backend #121562

jdnk opened this issue Jan 3, 2025 · 0 comments

Comments

@jdnk
Copy link

jdnk commented Jan 3, 2025

The SPIR-V backend generates -1 instead of 0xFFFFFFFF in OpVectorShuffle instruction.

Original C source:

// repr_minus_one.c

#include <stdint.h>

typedef union {
  uint32_t bits;
  float value;
} fp32bits;

float fp16mul(float *restrict s, uint16_t a, uint16_t b) {
  const fp32bits af = {.bits = (a << 16)};
  const fp32bits bf = {.bits = (b << 16)};

  return af.value * bf.value;
}

clang -S -emit-llvm -O3 repr_minus_one.c -o repr_minus_one.ll generates:

; repr_minus_one.ll

; ModuleID = 'repr_minus_one.c'
source_filename = "repr_minus_one.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
define dso_local float @fp16mul(ptr noalias nocapture noundef readnone %s, i16 noundef zeroext %a, i16 noundef zeroext %b) local_unnamed_addr #0 {
entry:
  %0 = insertelement <2 x i16> poison, i16 %a, i64 0
  %1 = insertelement <2 x i16> %0, i16 %b, i64 1
  %2 = zext <2 x i16> %1 to <2 x i32>
  %3 = shl nuw <2 x i32> %2, <i32 16, i32 16>
  %4 = bitcast <2 x i32> %3 to <2 x float>
  %shift = shufflevector <2 x float> %4, <2 x float> poison, <2 x i32> <i32 1, i32 poison>
  %5 = fmul <2 x float> %shift, %4
  %mul = extractelement <2 x float> %5, i64 0
  ret float %mul
}

attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

!llvm.module.flags = !{!0, !1, !2, !3}
!llvm.ident = !{!4}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{!"clang version 19.1.6 (https://github.com/llvm/llvm-project.git e21dc4bd5474d04b8e62d7331362edcc5648d7e5)"}

llc -mtriple=spirv64 repr_minus_one.ll -o repr_minus_one.spt generates:

; repr_minus_one.spt

        OpCapability Kernel
        OpCapability Addresses
        OpCapability Int8
        OpCapability Int16
        OpCapability Linkage
        OpCapability Int64
        %1 = OpExtInstImport "OpenCL.std"
        OpMemoryModel Physical64 OpenCL
        OpSource OpenCL_CPP 100000
        OpName %16 "s"
        OpName %17 "a"
        OpName %18 "b"
        OpName %19 "fp16mul"
        OpName %25 "shift"
        OpName %27 "mul"
        OpDecorate %16 FuncParamAttr NoAlias
        OpDecorate %17 FuncParamAttr Zext
        OpDecorate %18 FuncParamAttr Zext
        OpDecorate %19 LinkageAttributes "fp16mul" Export
        %2 = OpTypeInt 8 0
        %3 = OpTypeInt 16 0
        %4 = OpTypeFloat 32
        %5 = OpTypePointer Function %2
        %6 = OpTypeFunction %4 %5 %3 %3
        %7 = OpTypeVector %4 2
        %8 = OpTypeInt 32 0
        %9 = OpTypeVector %8 2
        %10 = OpTypeVector %3 2
        %11 = OpTypeInt 64 0
        %12 = OpUndef %10
        %13 = OpConstant %8 16
        %14 = OpConstantComposite %9 %13 %13
        %15 = OpUndef %7
        %19 = OpFunction %4 Pure %6             ; -- Begin function fp16mul
        %16 = OpFunctionParameter %5
        %17 = OpFunctionParameter %3
        %18 = OpFunctionParameter %3
        %28 = OpLabel
        %20 = OpCompositeInsert %10 %17 %12 0
        %21 = OpCompositeInsert %10 %18 %20 1
        %22 = OpUConvert %9 %21
        %23 = OpShiftLeftLogical %9 %22 %14
        %24 = OpBitcast %7 %23
        %25 = OpVectorShuffle %7 %24 %15 1 -1  ;;; <-- here
        %26 = OpFMul %7 %25 %24
        %27 = OpCompositeExtract %4 %26 0
        OpReturnValue %27
        OpFunctionEnd
                                        ; -- End function

Towards the end, you can see %25 = OpVectorShuffle %7 %24 %15 1 -1, but the specification states that the Components literal of the OpVectorShuffle instruction must be an unsigned integer. I believe the constant 0xFFFFFFFF is supposed to be there instead of the -1.

Reproduced on LLVM 19.1.6, x86_64 (see repr_minus_one.ll for details).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants