From 55e6d896ad333f07bb3b1ba487df214fc268a4ab Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Dec 28 2017 21:23:03 +0000 Subject: Vendor import of clang trunk r321530: https://llvm.org/svn/llvm-project/cfe/trunk@321530 --- diff --git a/include/clang/AST/Stmt.h b/include/clang/AST/Stmt.h index 6bd07af..b27dbfa 100644 --- a/include/clang/AST/Stmt.h +++ b/include/clang/AST/Stmt.h @@ -592,15 +592,21 @@ public: }; /// CompoundStmt - This represents a group of statements like { stmt stmt }. -class CompoundStmt : public Stmt { +class CompoundStmt final : public Stmt, + private llvm::TrailingObjects { friend class ASTStmtReader; + friend TrailingObjects; - Stmt** Body = nullptr; SourceLocation LBraceLoc, RBraceLoc; + CompoundStmt(ArrayRef Stmts, SourceLocation LB, SourceLocation RB); + explicit CompoundStmt(EmptyShell Empty) : Stmt(CompoundStmtClass, Empty) {} + + void setStmts(ArrayRef Stmts); + public: - CompoundStmt(const ASTContext &C, ArrayRef Stmts, - SourceLocation LB, SourceLocation RB); + static CompoundStmt *Create(const ASTContext &C, ArrayRef Stmts, + SourceLocation LB, SourceLocation RB); // \brief Build an empty compound statement with a location. explicit CompoundStmt(SourceLocation Loc) @@ -609,11 +615,7 @@ public: } // \brief Build an empty compound statement. - explicit CompoundStmt(EmptyShell Empty) : Stmt(CompoundStmtClass, Empty) { - CompoundStmtBits.NumStmts = 0; - } - - void setStmts(const ASTContext &C, ArrayRef Stmts); + static CompoundStmt *CreateEmpty(const ASTContext &C, unsigned NumStmts); bool body_empty() const { return CompoundStmtBits.NumStmts == 0; } unsigned size() const { return CompoundStmtBits.NumStmts; } @@ -622,14 +624,16 @@ public: using body_range = llvm::iterator_range; body_range body() { return body_range(body_begin(), body_end()); } - body_iterator body_begin() { return Body; } - body_iterator body_end() { return Body + size(); } - Stmt *body_front() { return !body_empty() ? Body[0] : nullptr; } - Stmt *body_back() { return !body_empty() ? Body[size()-1] : nullptr; } + body_iterator body_begin() { return getTrailingObjects(); } + body_iterator body_end() { return body_begin() + size(); } + Stmt *body_front() { return !body_empty() ? body_begin()[0] : nullptr; } + Stmt *body_back() { + return !body_empty() ? body_begin()[size() - 1] : nullptr; + } void setLastStmt(Stmt *S) { assert(!body_empty() && "setLastStmt"); - Body[size()-1] = S; + body_begin()[size() - 1] = S; } using const_body_iterator = Stmt* const *; @@ -639,15 +643,17 @@ public: return body_const_range(body_begin(), body_end()); } - const_body_iterator body_begin() const { return Body; } - const_body_iterator body_end() const { return Body + size(); } + const_body_iterator body_begin() const { + return getTrailingObjects(); + } + const_body_iterator body_end() const { return body_begin() + size(); } const Stmt *body_front() const { - return !body_empty() ? Body[0] : nullptr; + return !body_empty() ? body_begin()[0] : nullptr; } const Stmt *body_back() const { - return !body_empty() ? Body[size() - 1] : nullptr; + return !body_empty() ? body_begin()[size() - 1] : nullptr; } using reverse_body_iterator = std::reverse_iterator; @@ -682,13 +688,10 @@ public: } // Iterators - child_range children() { - return child_range(Body, Body + CompoundStmtBits.NumStmts); - } + child_range children() { return child_range(body_begin(), body_end()); } const_child_range children() const { - return const_child_range(child_iterator(Body), - child_iterator(Body + CompoundStmtBits.NumStmts)); + return const_child_range(body_begin(), body_end()); } }; @@ -875,8 +878,11 @@ public: /// /// Represents an attribute applied to a statement. For example: /// [[omp::for(...)]] for (...) { ... } -class AttributedStmt : public Stmt { +class AttributedStmt final + : public Stmt, + private llvm::TrailingObjects { friend class ASTStmtReader; + friend TrailingObjects; Stmt *SubStmt; SourceLocation AttrLoc; @@ -894,11 +900,9 @@ class AttributedStmt : public Stmt { } const Attr *const *getAttrArrayPtr() const { - return reinterpret_cast(this + 1); - } - const Attr **getAttrArrayPtr() { - return reinterpret_cast(this + 1); + return getTrailingObjects(); } + const Attr **getAttrArrayPtr() { return getTrailingObjects(); } public: static AttributedStmt *Create(const ASTContext &C, SourceLocation Loc, diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index d0be484..465551b 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -429,9 +429,34 @@ TARGET_BUILTIN(__builtin_ia32_aesdeclast128, "V2LLiV2LLiV2LLi", "", "aes") TARGET_BUILTIN(__builtin_ia32_aesimc128, "V2LLiV2LLi", "", "aes") TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2LLiV2LLiIc", "", "aes") +// VAES +TARGET_BUILTIN(__builtin_ia32_aesenc256, "V4LLiV4LLiV4LLi", "", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes") +TARGET_BUILTIN(__builtin_ia32_aesenclast256, "V4LLiV4LLiV4LLi", "", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes") +TARGET_BUILTIN(__builtin_ia32_aesdec256, "V4LLiV4LLiV4LLi", "", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes") +TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4LLiV4LLiV4LLi", "", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes") + +// GFNI +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v16qi, "V16cV16cV16cIc", "", "gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v32qi, "V32cV32cV32cIc", "", "avx,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v64qi, "V64cV64cV64cIc", "", "avx512bw,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v16qi, "V16cV16cV16cIc", "", "gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v32qi, "V32cV32cV32cIc", "", "avx,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v64qi, "V64cV64cV64cIc", "", "avx512bw,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v16qi, "V16cV16cV16c", "", "gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v32qi, "V32cV32cV32c", "", "avx,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "", "avx512bw,gfni") + // CLMUL TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "", "pclmul") +// VPCLMULQDQ +TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4LLiV4LLiV4LLiIc", "", "vpclmulqdq") +TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8LLiV8LLiV8LLiIc", "", "avx512f,vpclmulqdq") + // AVX TARGET_BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_addsubps256, "V8fV8fV8f", "", "avx") @@ -954,6 +979,31 @@ TARGET_BUILTIN(__builtin_ia32_vpermt2varq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", TARGET_BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_vpermt2varpd512_mask, "V8dV8LLiV8dV8dUc", "", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni") + TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUcIi","","avx512vl") TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUcIi","","avx512vl") TARGET_BUILTIN(__builtin_ia32_gather3div4df, "V4dV4ddC*V4LLiUcIi","","avx512vl") @@ -1067,6 +1117,17 @@ TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4LLiV4LLi", "", "avx512vpopcntdq,a TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "", "avx512vpopcntdq") TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "", "avx512vpopcntdq") +TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntb_256, "V32cV32c", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntw_256, "V16sV16s", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "", "avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "", "avx512bitalg") + +TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "ULLiV64cV64cULLi", "", "avx512bitalg") + TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw") @@ -1107,6 +1168,12 @@ TARGET_BUILTIN(__builtin_ia32_compressdf128_mask, "V2dV2dV2dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressdf256_mask, "V4dV4dV4dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressdi128_mask, "V2LLiV2LLiV2LLiUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressdi256_mask, "V4LLiV4LLiV4LLiUc", "", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_compresshi128_mask, "V8sV8sV8sUc","","avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compresshi256_mask, "V16sV16sV16sUs","","avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressqi128_mask, "V16cV16cV16cUs","","avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressqi256_mask, "V32cV32cV32cUi","","avx512vl,avx512vbmi2") + TARGET_BUILTIN(__builtin_ia32_compresssf128_mask, "V4fV4fV4fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compresssf256_mask, "V8fV8fV8fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compresssi128_mask, "V4iV4iV4iUc", "", "avx512vl") @@ -1115,6 +1182,12 @@ TARGET_BUILTIN(__builtin_ia32_compressstoredf128_mask, "vV2d*V2dUc", "", "avx512 TARGET_BUILTIN(__builtin_ia32_compressstoredf256_mask, "vV4d*V4dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressstoredi128_mask, "vV2LLi*V2LLiUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressstoredi256_mask, "vV4LLi*V4LLiUc", "", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_compressstorehi128_mask, "vV8s*V8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstorehi256_mask, "vV16s*V16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstoreqi128_mask, "vV16c*V16cUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstoreqi256_mask, "vV32c*V32cUi", "", "avx512vl,avx512vbmi2") + TARGET_BUILTIN(__builtin_ia32_compressstoresf128_mask, "vV4f*V4fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "", "avx512vl") @@ -1147,10 +1220,22 @@ TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expanddf256_mask, "V4dV4dV4dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expanddi128_mask, "V2LLiV2LLiV2LLiUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expanddi256_mask, "V4LLiV4LLiV4LLiUc", "", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_expandhi128_mask, "V8sV8sV8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandhi256_mask, "V16sV16sV16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandqi128_mask, "V16cV16cV16cUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandqi256_mask, "V32cV32cV32cUi", "", "avx512vl,avx512vbmi2") + TARGET_BUILTIN(__builtin_ia32_expandloaddf128_mask, "V2dV2d*V2dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expandloaddf256_mask, "V4dV4d*V4dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expandloaddi128_mask, "V4iV2LLi*V2LLiUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expandloaddi256_mask, "V4LLiV4LLi*V4LLiUc", "", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_expandloadhi128_mask, "V8sV8sC*V8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadhi256_mask, "V16sV16sC*V16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadqi128_mask, "V16cV16cC*V16cUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadqi256_mask, "V32cV32cC*V32cUi", "", "avx512vl,avx512vbmi2") + TARGET_BUILTIN(__builtin_ia32_expandloadsf128_mask, "V4fV4f*V4fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expandloadsf256_mask, "V8fV8f*V8fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expandloadsi128_mask, "V4iV4i*V4iUc", "", "avx512vl") @@ -1223,6 +1308,65 @@ TARGET_BUILTIN(__builtin_ia32_vpermt2varq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", TARGET_BUILTIN(__builtin_ia32_vpermt2varq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_vpermt2varq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_vpermt2varq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_vpshldd128_mask, "V4iV4iV4iIiV4iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldd256_mask, "V8iV8iV8iIiV8iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldd512_mask, "V16iV16iV16iIiV16iUs", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldw128_mask, "V8sV8sV8sIiV8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldw256_mask, "V16sV16sV16sIiV16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldw512_mask, "V32sV32sV32sIiV32sUi", "", "avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_vpshldvd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd512_mask, "V16iV16iV16iV16iUs", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw512_mask, "V32sV32sV32sV32sUi", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw128_maskz, "V8sV8sV8sV8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw256_maskz, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw512_maskz, "V32sV32sV32sV32sUi", "", "avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_mask, "V16iV16iV16iV16iUs", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_mask, "V32sV32sV32sV32sUi", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_maskz, "V8sV8sV8sV8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_maskz, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_maskz, "V32sV32sV32sV32sUi", "", "avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_vpshrdd128_mask, "V4iV4iV4iiV4iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdd256_mask, "V8iV8iV8iiV8iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdd512_mask, "V16iV16iV16iiV16iUs", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq128_mask, "V2LLiV2LLiV2LLiiV2LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq256_mask, "V4LLiV4LLiV4LLiiV4LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq512_mask, "V8LLiV8LLiV8LLiiV8LLiUc", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw128_mask, "V8sV8sV8siV8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw256_mask, "V16sV16sV16siV16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw512_mask, "V32sV32sV32siV32sUi", "", "avx512vbmi2") + TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "", "avx512bw") @@ -1677,14 +1821,20 @@ TARGET_BUILTIN(__builtin_ia32_dbpsadbw512_mask, "V32sV64cV64cIiV32sUi","","avx51 TARGET_BUILTIN(__builtin_ia32_psadbw512, "V8LLiV64cV64c","","avx512bw") TARGET_BUILTIN(__builtin_ia32_compressdf512_mask, "V8dV8dV8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressdi512_mask, "V8LLiV8LLiV8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_compresshi512_mask, "V32sV32sV32sUi","","avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressqi512_mask, "V64cV64cV64cULLi","","avx512vbmi2") TARGET_BUILTIN(__builtin_ia32_compresssf512_mask, "V16fV16fV16fUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_expandhi512_mask, "V32sV32sV32sUi","","avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandqi512_mask, "V64cV64cV64cULLi","","avx512vbmi2") TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_expandloaddi512_mask, "V8LLiV8LLiC*V8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_expandloadhi512_mask, "V32sV32sC*V32sUi","","avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadqi512_mask, "V64cV64cC*V64cULLi","","avx512vbmi2") TARGET_BUILTIN(__builtin_ia32_expandloadsf512_mask, "V16fV16fC*V16fUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_expandloadsi512_mask, "V16iV16iC*V16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs","","avx512f") @@ -1692,6 +1842,8 @@ TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8LLi*V8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_compressstorehi512_mask, "vV32s*V32sUi","","avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstoreqi512_mask, "vV64c*V64cULLi","","avx512vbmi2") TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoresi512_mask, "vV16i*V16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtph2ps_mask, "V4fV8sV4fUc","","avx512vl") diff --git a/include/clang/Basic/DiagnosticParseKinds.td b/include/clang/Basic/DiagnosticParseKinds.td index a8d6955..7936cdd 100644 --- a/include/clang/Basic/DiagnosticParseKinds.td +++ b/include/clang/Basic/DiagnosticParseKinds.td @@ -587,6 +587,7 @@ def ext_using_attribute_ns : ExtWarn< def err_using_attribute_ns_conflict : Error< "attribute with scope specifier cannot follow default scope specifier">; def err_attributes_not_allowed : Error<"an attribute list cannot appear here">; +def err_attributes_misplaced : Error<"misplaced attributes; expected attributes here">; def err_l_square_l_square_not_attribute : Error< "C++11 only allows consecutive left square brackets when " "introducing an attribute">; diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td index d36e1a6..09efd7b 100644 --- a/include/clang/Driver/Options.td +++ b/include/clang/Driver/Options.td @@ -2467,6 +2467,8 @@ def mavx2 : Flag<["-"], "mavx2">, Group; def mno_avx2 : Flag<["-"], "mno-avx2">, Group; def mavx512f : Flag<["-"], "mavx512f">, Group; def mno_avx512f : Flag<["-"], "mno-avx512f">, Group; +def mavx512bitalg : Flag<["-"], "mavx512bitalg">, Group; +def mno_avx512bitalg : Flag<["-"], "mno-avx512bitalg">, Group; def mavx512bw : Flag<["-"], "mavx512bw">, Group; def mno_avx512bw : Flag<["-"], "mno-avx512bw">, Group; def mavx512cd : Flag<["-"], "mavx512cd">, Group; @@ -2481,8 +2483,12 @@ def mavx512pf : Flag<["-"], "mavx512pf">, Group; def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group; def mavx512vbmi : Flag<["-"], "mavx512vbmi">, Group; def mno_avx512vbmi : Flag<["-"], "mno-avx512vbmi">, Group; +def mavx512vbmi2 : Flag<["-"], "mavx512vbmi2">, Group; +def mno_avx512vbmi2 : Flag<["-"], "mno-avx512vbmi2">, Group; def mavx512vl : Flag<["-"], "mavx512vl">, Group; def mno_avx512vl : Flag<["-"], "mno-avx512vl">, Group; +def mavx512vnni : Flag<["-"], "mavx512vnni">, Group; +def mno_avx512vnni : Flag<["-"], "mno-avx512vnni">, Group; def mavx512vpopcntdq : Flag<["-"], "mavx512vpopcntdq">, Group; def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group; def madx : Flag<["-"], "madx">, Group; @@ -2511,6 +2517,8 @@ def mfsgsbase : Flag<["-"], "mfsgsbase">, Group; def mno_fsgsbase : Flag<["-"], "mno-fsgsbase">, Group; def mfxsr : Flag<["-"], "mfxsr">, Group; def mno_fxsr : Flag<["-"], "mno-fxsr">, Group; +def mgfni : Flag<["-"], "mgfni">, Group; +def mno_gfni : Flag<["-"], "mno-gfni">, Group; def mlwp : Flag<["-"], "mlwp">, Group; def mno_lwp : Flag<["-"], "mno-lwp">, Group; def mlzcnt : Flag<["-"], "mlzcnt">, Group; @@ -2543,6 +2551,10 @@ def msha : Flag<["-"], "msha">, Group; def mno_sha : Flag<["-"], "mno-sha">, Group; def mtbm : Flag<["-"], "mtbm">, Group; def mno_tbm : Flag<["-"], "mno-tbm">, Group; +def mvaes : Flag<["-"], "mvaes">, Group; +def mno_vaes : Flag<["-"], "mno-vaes">, Group; +def mvpclmulqdq : Flag<["-"], "mvpclmulqdq">, Group; +def mno_vpclmulqdq : Flag<["-"], "mno-vpclmulqdq">, Group; def mxop : Flag<["-"], "mxop">, Group; def mno_xop : Flag<["-"], "mno-xop">, Group; def mxsave : Flag<["-"], "mxsave">, Group; diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h index 8f5b20c..4a25c70 100644 --- a/include/clang/Parse/Parser.h +++ b/include/clang/Parse/Parser.h @@ -2200,13 +2200,16 @@ private: void stripTypeAttributesOffDeclSpec(ParsedAttributesWithRange &Attrs, DeclSpec &DS, Sema::TagUseKind TUK); - - void ProhibitAttributes(ParsedAttributesWithRange &attrs) { + + // FixItLoc = possible correct location for the attributes + void ProhibitAttributes(ParsedAttributesWithRange &attrs, + SourceLocation FixItLoc = SourceLocation()) { if (!attrs.Range.isValid()) return; - DiagnoseProhibitedAttributes(attrs); + DiagnoseProhibitedAttributes(attrs, FixItLoc); attrs.clear(); } - void DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs); + void DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs, + SourceLocation FixItLoc); // Forbid C++11 and C2x attributes that appear on certain syntactic locations // which standard permits but we don't supported yet, for example, attributes diff --git a/lib/AST/ASTImporter.cpp b/lib/AST/ASTImporter.cpp index 0e627f9..0d1d980 100644 --- a/lib/AST/ASTImporter.cpp +++ b/lib/AST/ASTImporter.cpp @@ -134,12 +134,17 @@ namespace clang { bool ImportTemplateArguments(const TemplateArgument *FromArgs, unsigned NumFromArgs, SmallVectorImpl &ToArgs); + template + bool ImportTemplateArgumentListInfo(const InContainerTy &Container, + TemplateArgumentListInfo &ToTAInfo); bool IsStructuralMatch(RecordDecl *FromRecord, RecordDecl *ToRecord, bool Complain = true); bool IsStructuralMatch(VarDecl *FromVar, VarDecl *ToVar, bool Complain = true); bool IsStructuralMatch(EnumDecl *FromEnum, EnumDecl *ToRecord); bool IsStructuralMatch(EnumConstantDecl *FromEC, EnumConstantDecl *ToEC); + bool IsStructuralMatch(FunctionTemplateDecl *From, + FunctionTemplateDecl *To); bool IsStructuralMatch(ClassTemplateDecl *From, ClassTemplateDecl *To); bool IsStructuralMatch(VarTemplateDecl *From, VarTemplateDecl *To); Decl *VisitDecl(Decl *D); @@ -195,6 +200,7 @@ namespace clang { ClassTemplateSpecializationDecl *D); Decl *VisitVarTemplateDecl(VarTemplateDecl *D); Decl *VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *D); + Decl *VisitFunctionTemplateDecl(FunctionTemplateDecl *D); // Importing statements DeclGroupRef ImportDeclGroup(DeclGroupRef DG); @@ -280,6 +286,7 @@ namespace clang { Expr *VisitCXXDeleteExpr(CXXDeleteExpr *E); Expr *VisitCXXConstructExpr(CXXConstructExpr *E); Expr *VisitCXXMemberCallExpr(CXXMemberCallExpr *E); + Expr *VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E); Expr *VisitExprWithCleanups(ExprWithCleanups *EWC); Expr *VisitCXXThisExpr(CXXThisExpr *E); Expr *VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr *E); @@ -1247,6 +1254,18 @@ bool ASTNodeImporter::ImportTemplateArguments(const TemplateArgument *FromArgs, return false; } +template +bool ASTNodeImporter::ImportTemplateArgumentListInfo( + const InContainerTy &Container, TemplateArgumentListInfo &ToTAInfo) { + for (const auto &FromLoc : Container) { + if (auto ToLoc = ImportTemplateArgumentLoc(FromLoc)) + ToTAInfo.addArgument(*ToLoc); + else + return true; + } + return false; +} + bool ASTNodeImporter::IsStructuralMatch(RecordDecl *FromRecord, RecordDecl *ToRecord, bool Complain) { // Eliminate a potential failure point where we attempt to re-import @@ -1280,6 +1299,14 @@ bool ASTNodeImporter::IsStructuralMatch(EnumDecl *FromEnum, EnumDecl *ToEnum) { return Ctx.IsStructurallyEquivalent(FromEnum, ToEnum); } +bool ASTNodeImporter::IsStructuralMatch(FunctionTemplateDecl *From, + FunctionTemplateDecl *To) { + StructuralEquivalenceContext Ctx( + Importer.getFromContext(), Importer.getToContext(), + Importer.getNonEquivalentDecls(), false, false); + return Ctx.IsStructurallyEquivalent(From, To); +} + bool ASTNodeImporter::IsStructuralMatch(EnumConstantDecl *FromEC, EnumConstantDecl *ToEC) { @@ -4197,6 +4224,64 @@ Decl *ASTNodeImporter::VisitVarTemplateSpecializationDecl( return D2; } +Decl *ASTNodeImporter::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) { + DeclContext *DC, *LexicalDC; + DeclarationName Name; + SourceLocation Loc; + NamedDecl *ToD; + + if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc)) + return nullptr; + + if (ToD) + return ToD; + + // Try to find a function in our own ("to") context with the same name, same + // type, and in the same context as the function we're importing. + if (!LexicalDC->isFunctionOrMethod()) { + unsigned IDNS = Decl::IDNS_Ordinary; + SmallVector FoundDecls; + DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls); + for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) { + if (!FoundDecls[I]->isInIdentifierNamespace(IDNS)) + continue; + + if (FunctionTemplateDecl *FoundFunction = + dyn_cast(FoundDecls[I])) { + if (FoundFunction->hasExternalFormalLinkage() && + D->hasExternalFormalLinkage()) { + if (IsStructuralMatch(D, FoundFunction)) { + Importer.Imported(D, FoundFunction); + // FIXME: Actually try to merge the body and other attributes. + return FoundFunction; + } + } + } + } + } + + TemplateParameterList *Params = + ImportTemplateParameterList(D->getTemplateParameters()); + if (!Params) + return nullptr; + + FunctionDecl *TemplatedFD = + cast_or_null(Importer.Import(D->getTemplatedDecl())); + if (!TemplatedFD) + return nullptr; + + FunctionTemplateDecl *ToFunc = FunctionTemplateDecl::Create( + Importer.getToContext(), DC, Loc, Name, Params, TemplatedFD); + + TemplatedFD->setDescribedFunctionTemplate(ToFunc); + ToFunc->setAccess(D->getAccess()); + ToFunc->setLexicalDeclContext(LexicalDC); + Importer.Imported(D, ToFunc); + + LexicalDC->addDeclInternal(ToFunc); + return ToFunc; +} + //---------------------------------------------------------------------------- // Import Statements //---------------------------------------------------------------------------- @@ -4321,9 +4406,8 @@ Stmt *ASTNodeImporter::VisitCompoundStmt(CompoundStmt *S) { SourceLocation ToLBraceLoc = Importer.Import(S->getLBracLoc()); SourceLocation ToRBraceLoc = Importer.Import(S->getRBracLoc()); - return new (Importer.getToContext()) CompoundStmt(Importer.getToContext(), - ToStmts, - ToLBraceLoc, ToRBraceLoc); + return CompoundStmt::Create(Importer.getToContext(), ToStmts, ToLBraceLoc, + ToRBraceLoc); } Stmt *ASTNodeImporter::VisitCaseStmt(CaseStmt *S) { @@ -5759,6 +5843,47 @@ Expr *ASTNodeImporter::VisitCXXPseudoDestructorExpr( Importer.Import(E->getTildeLoc()), Storage); } +Expr *ASTNodeImporter::VisitCXXDependentScopeMemberExpr( + CXXDependentScopeMemberExpr *E) { + Expr *Base = nullptr; + if (!E->isImplicitAccess()) { + Base = Importer.Import(E->getBase()); + if (!Base) + return nullptr; + } + + QualType BaseType = Importer.Import(E->getBaseType()); + if (BaseType.isNull()) + return nullptr; + + TemplateArgumentListInfo ToTAInfo(Importer.Import(E->getLAngleLoc()), + Importer.Import(E->getRAngleLoc())); + TemplateArgumentListInfo *ResInfo = nullptr; + if (E->hasExplicitTemplateArgs()) { + if (ImportTemplateArgumentListInfo(E->template_arguments(), ToTAInfo)) + return nullptr; + ResInfo = &ToTAInfo; + } + + DeclarationName Name = Importer.Import(E->getMember()); + if (!E->getMember().isEmpty() && Name.isEmpty()) + return nullptr; + + DeclarationNameInfo MemberNameInfo(Name, Importer.Import(E->getMemberLoc())); + // Import additional name location/type info. + ImportDeclarationNameLoc(E->getMemberNameInfo(), MemberNameInfo); + auto ToFQ = Importer.Import(E->getFirstQualifierFoundInScope()); + if (!ToFQ && E->getFirstQualifierFoundInScope()) + return nullptr; + + return CXXDependentScopeMemberExpr::Create( + Importer.getToContext(), Base, BaseType, E->isArrow(), + Importer.Import(E->getOperatorLoc()), + Importer.Import(E->getQualifierLoc()), + Importer.Import(E->getTemplateKeywordLoc()), + cast_or_null(ToFQ), MemberNameInfo, ResInfo); +} + Expr *ASTNodeImporter::VisitCallExpr(CallExpr *E) { QualType T = Importer.Import(E->getType()); if (T.isNull()) diff --git a/lib/AST/Decl.cpp b/lib/AST/Decl.cpp index 629037b..4c1d591 100644 --- a/lib/AST/Decl.cpp +++ b/lib/AST/Decl.cpp @@ -1550,7 +1550,7 @@ void NamedDecl::printQualifiedName(raw_ostream &OS, // the enum-specifier. Each scoped enumerator is declared in the // scope of the enumeration. // For the case of unscoped enumerator, do not include in the qualified - // name any information about its enum enclosing scope, as is visibility + // name any information about its enum enclosing scope, as its visibility // is global. if (ED->isScoped()) OS << *ED; diff --git a/lib/AST/Stmt.cpp b/lib/AST/Stmt.cpp index 8d240c1..982fd45 100644 --- a/lib/AST/Stmt.cpp +++ b/lib/AST/Stmt.cpp @@ -299,31 +299,34 @@ SourceLocation Stmt::getLocEnd() const { llvm_unreachable("unknown statement kind"); } -CompoundStmt::CompoundStmt(const ASTContext &C, ArrayRef Stmts, - SourceLocation LB, SourceLocation RB) - : Stmt(CompoundStmtClass), LBraceLoc(LB), RBraceLoc(RB) { +CompoundStmt::CompoundStmt(ArrayRef Stmts, SourceLocation LB, + SourceLocation RB) + : Stmt(CompoundStmtClass), LBraceLoc(LB), RBraceLoc(RB) { CompoundStmtBits.NumStmts = Stmts.size(); + setStmts(Stmts); +} + +void CompoundStmt::setStmts(ArrayRef Stmts) { assert(CompoundStmtBits.NumStmts == Stmts.size() && "NumStmts doesn't fit in bits of CompoundStmtBits.NumStmts!"); - if (Stmts.empty()) { - Body = nullptr; - return; - } - - Body = new (C) Stmt*[Stmts.size()]; - std::copy(Stmts.begin(), Stmts.end(), Body); + std::copy(Stmts.begin(), Stmts.end(), body_begin()); } -void CompoundStmt::setStmts(const ASTContext &C, ArrayRef Stmts) { - if (Body) - C.Deallocate(Body); - CompoundStmtBits.NumStmts = Stmts.size(); - assert(CompoundStmtBits.NumStmts == Stmts.size() && - "NumStmts doesn't fit in bits of CompoundStmtBits.NumStmts!"); +CompoundStmt *CompoundStmt::Create(const ASTContext &C, ArrayRef Stmts, + SourceLocation LB, SourceLocation RB) { + void *Mem = + C.Allocate(totalSizeToAlloc(Stmts.size()), alignof(CompoundStmt)); + return new (Mem) CompoundStmt(Stmts, LB, RB); +} - Body = new (C) Stmt*[Stmts.size()]; - std::copy(Stmts.begin(), Stmts.end(), Body); +CompoundStmt *CompoundStmt::CreateEmpty(const ASTContext &C, + unsigned NumStmts) { + void *Mem = + C.Allocate(totalSizeToAlloc(NumStmts), alignof(CompoundStmt)); + CompoundStmt *New = new (Mem) CompoundStmt(EmptyShell()); + New->CompoundStmtBits.NumStmts = NumStmts; + return New; } const char *LabelStmt::getName() const { @@ -334,7 +337,7 @@ AttributedStmt *AttributedStmt::Create(const ASTContext &C, SourceLocation Loc, ArrayRef Attrs, Stmt *SubStmt) { assert(!Attrs.empty() && "Attrs should not be empty"); - void *Mem = C.Allocate(sizeof(AttributedStmt) + sizeof(Attr *) * Attrs.size(), + void *Mem = C.Allocate(totalSizeToAlloc(Attrs.size()), alignof(AttributedStmt)); return new (Mem) AttributedStmt(Loc, Attrs, SubStmt); } @@ -342,7 +345,7 @@ AttributedStmt *AttributedStmt::Create(const ASTContext &C, SourceLocation Loc, AttributedStmt *AttributedStmt::CreateEmpty(const ASTContext &C, unsigned NumAttrs) { assert(NumAttrs > 0 && "NumAttrs should be greater than zero"); - void *Mem = C.Allocate(sizeof(AttributedStmt) + sizeof(Attr *) * NumAttrs, + void *Mem = C.Allocate(totalSizeToAlloc(NumAttrs), alignof(AttributedStmt)); return new (Mem) AttributedStmt(EmptyShell(), NumAttrs); } diff --git a/lib/Analysis/BodyFarm.cpp b/lib/Analysis/BodyFarm.cpp index e5d3c5c..89ca848 100644 --- a/lib/Analysis/BodyFarm.cpp +++ b/lib/Analysis/BodyFarm.cpp @@ -133,7 +133,7 @@ BinaryOperator *ASTMaker::makeComparison(const Expr *LHS, const Expr *RHS, } CompoundStmt *ASTMaker::makeCompound(ArrayRef Stmts) { - return new (C) CompoundStmt(C, Stmts, SourceLocation(), SourceLocation()); + return CompoundStmt::Create(C, Stmts, SourceLocation(), SourceLocation()); } DeclRefExpr *ASTMaker::makeDeclRefExpr( diff --git a/lib/Basic/Targets/AArch64.cpp b/lib/Basic/Targets/AArch64.cpp index 6080cef..4d3cd12 100644 --- a/lib/Basic/Targets/AArch64.cpp +++ b/lib/Basic/Targets/AArch64.cpp @@ -159,7 +159,7 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__ARM_FP_FAST", "1"); Builder.defineMacro("__ARM_SIZEOF_WCHAR_T", - llvm::utostr(Opts.WCharSize ? Opts.WCharSize : 4)); + Twine(Opts.WCharSize ? Opts.WCharSize : 4)); Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", Opts.ShortEnums ? "1" : "4"); diff --git a/lib/Basic/Targets/ARM.cpp b/lib/Basic/Targets/ARM.cpp index fe261b7..6fb0ab4 100644 --- a/lib/Basic/Targets/ARM.cpp +++ b/lib/Basic/Targets/ARM.cpp @@ -582,7 +582,7 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, // ACLE 6.4.4 LDREX/STREX if (LDREX) - Builder.defineMacro("__ARM_FEATURE_LDREX", "0x" + llvm::utohexstr(LDREX)); + Builder.defineMacro("__ARM_FEATURE_LDREX", "0x" + Twine::utohexstr(LDREX)); // ACLE 6.4.5 CLZ if (ArchVersion == 5 || (ArchVersion == 6 && CPUProfile != "M") || @@ -591,7 +591,7 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, // ACLE 6.5.1 Hardware Floating Point if (HW_FP) - Builder.defineMacro("__ARM_FP", "0x" + llvm::utohexstr(HW_FP)); + Builder.defineMacro("__ARM_FP", "0x" + Twine::utohexstr(HW_FP)); // ACLE predefines. Builder.defineMacro("__ARM_ACLE", "200"); @@ -672,11 +672,11 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, // current AArch32 NEON implementations do not support double-precision // floating-point even when it is present in VFP. Builder.defineMacro("__ARM_NEON_FP", - "0x" + llvm::utohexstr(HW_FP & ~HW_FP_DP)); + "0x" + Twine::utohexstr(HW_FP & ~HW_FP_DP)); } Builder.defineMacro("__ARM_SIZEOF_WCHAR_T", - llvm::utostr(Opts.WCharSize ? Opts.WCharSize : 4)); + Twine(Opts.WCharSize ? Opts.WCharSize : 4)); Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", Opts.ShortEnums ? "1" : "4"); diff --git a/lib/Basic/Targets/X86.cpp b/lib/Basic/Targets/X86.cpp index bdf5cdb..3efba26 100644 --- a/lib/Basic/Targets/X86.cpp +++ b/lib/Basic/Targets/X86.cpp @@ -132,7 +132,14 @@ bool X86TargetInfo::initFeatureMap( break; case CK_Icelake: - // TODO: Add icelake features here. + setFeatureEnabledImpl(Features, "vaes", true); + setFeatureEnabledImpl(Features, "gfni", true); + setFeatureEnabledImpl(Features, "vpclmulqdq", true); + setFeatureEnabledImpl(Features, "avx512bitalg", true); + setFeatureEnabledImpl(Features, "avx512vnni", true); + setFeatureEnabledImpl(Features, "avx512vbmi2", true); + setFeatureEnabledImpl(Features, "avx512vpopcntdq", true); + setFeatureEnabledImpl(Features, "clwb", true); LLVM_FALLTHROUGH; case CK_Cannonlake: setFeatureEnabledImpl(Features, "avx512ifma", true); @@ -145,8 +152,10 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "avx512dq", true); setFeatureEnabledImpl(Features, "avx512bw", true); setFeatureEnabledImpl(Features, "avx512vl", true); - setFeatureEnabledImpl(Features, "pku", true); - setFeatureEnabledImpl(Features, "clwb", true); + if (Kind == CK_SkylakeServer) { + setFeatureEnabledImpl(Features, "pku", true); + setFeatureEnabledImpl(Features, "clwb", true); + } LLVM_FALLTHROUGH; case CK_SkylakeClient: setFeatureEnabledImpl(Features, "xsavec", true); @@ -443,7 +452,7 @@ void X86TargetInfo::setSSELevel(llvm::StringMap &Features, LLVM_FALLTHROUGH; case SSE2: Features["sse2"] = Features["pclmul"] = Features["aes"] = Features["sha"] = - false; + Features["gfni"] = false; LLVM_FALLTHROUGH; case SSE3: Features["sse3"] = false; @@ -460,7 +469,7 @@ void X86TargetInfo::setSSELevel(llvm::StringMap &Features, LLVM_FALLTHROUGH; case AVX: Features["fma"] = Features["avx"] = Features["f16c"] = Features["xsave"] = - Features["xsaveopt"] = false; + Features["xsaveopt"] = Features["vaes"] = Features["vpclmulqdq"] = false; setXOPLevel(Features, FMA4, false); LLVM_FALLTHROUGH; case AVX2: @@ -470,7 +479,9 @@ void X86TargetInfo::setSSELevel(llvm::StringMap &Features, Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] = Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] = Features["avx512vl"] = Features["avx512vbmi"] = - Features["avx512ifma"] = Features["avx512vpopcntdq"] = false; + Features["avx512ifma"] = Features["avx512vpopcntdq"] = + Features["avx512bitalg"] = Features["avx512vnni"] = + Features["avx512vbmi2"] = false; break; } } @@ -572,9 +583,26 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap &Features, } else if (Name == "aes") { if (Enabled) setSSELevel(Features, SSE2, Enabled); + else + Features["vaes"] = false; + } else if (Name == "vaes") { + if (Enabled) { + setSSELevel(Features, AVX, Enabled); + Features["aes"] = true; + } } else if (Name == "pclmul") { if (Enabled) setSSELevel(Features, SSE2, Enabled); + else + Features["vpclmulqdq"] = false; + } else if (Name == "vpclmulqdq") { + if (Enabled) { + setSSELevel(Features, AVX, Enabled); + Features["pclmul"] = true; + } + } else if (Name == "gfni") { + if (Enabled) + setSSELevel(Features, SSE2, Enabled); } else if (Name == "avx") { setSSELevel(Features, AVX, Enabled); } else if (Name == "avx2") { @@ -584,15 +612,17 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap &Features, } else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" || Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" || Name == "avx512vbmi" || Name == "avx512ifma" || - Name == "avx512vpopcntdq") { + Name == "avx512vpopcntdq" || Name == "avx512bitalg" || + Name == "avx512vnni" || Name == "avx512vbmi2") { if (Enabled) setSSELevel(Features, AVX512F, Enabled); - // Enable BWI instruction if VBMI is being enabled. - if (Name == "avx512vbmi" && Enabled) + // Enable BWI instruction if VBMI/VBMI2/BITALG is being enabled. + if ((Name.startswith("avx512vbmi") || Name == "avx512bitalg") && Enabled) Features["avx512bw"] = true; - // Also disable VBMI if BWI is being disabled. + // Also disable VBMI/VBMI2/BITALG if BWI is being disabled. if (Name == "avx512bw" && !Enabled) - Features["avx512vbmi"] = false; + Features["avx512vbmi"] = Features["avx512vbmi2"] = + Features["avx512bitalg"] = false; } else if (Name == "fma") { if (Enabled) setSSELevel(Features, AVX, Enabled); @@ -636,8 +666,12 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, if (Feature == "+aes") { HasAES = true; + } else if (Feature == "+vaes") { + HasVAES = true; } else if (Feature == "+pclmul") { HasPCLMUL = true; + } else if (Feature == "+vpclmulqdq") { + HasVPCLMULQDQ = true; } else if (Feature == "+lzcnt") { HasLZCNT = true; } else if (Feature == "+rdrnd") { @@ -666,22 +700,30 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, HasFMA = true; } else if (Feature == "+f16c") { HasF16C = true; + } else if (Feature == "+gfni") { + HasGFNI = true; } else if (Feature == "+avx512cd") { HasAVX512CD = true; } else if (Feature == "+avx512vpopcntdq") { HasAVX512VPOPCNTDQ = true; + } else if (Feature == "+avx512vnni") { + HasAVX512VNNI = true; } else if (Feature == "+avx512er") { HasAVX512ER = true; } else if (Feature == "+avx512pf") { HasAVX512PF = true; } else if (Feature == "+avx512dq") { HasAVX512DQ = true; + } else if (Feature == "+avx512bitalg") { + HasAVX512BITALG = true; } else if (Feature == "+avx512bw") { HasAVX512BW = true; } else if (Feature == "+avx512vl") { HasAVX512VL = true; } else if (Feature == "+avx512vbmi") { HasAVX512VBMI = true; + } else if (Feature == "+avx512vbmi2") { + HasAVX512VBMI2 = true; } else if (Feature == "+avx512ifma") { HasAVX512IFMA = true; } else if (Feature == "+sha") { @@ -934,9 +976,15 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasAES) Builder.defineMacro("__AES__"); + if (HasVAES) + Builder.defineMacro("__VAES__"); + if (HasPCLMUL) Builder.defineMacro("__PCLMUL__"); + if (HasVPCLMULQDQ) + Builder.defineMacro("__VPCLMULQDQ__"); + if (HasLZCNT) Builder.defineMacro("__LZCNT__"); @@ -996,22 +1044,31 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasF16C) Builder.defineMacro("__F16C__"); + if (HasGFNI) + Builder.defineMacro("__GFNI__"); + if (HasAVX512CD) Builder.defineMacro("__AVX512CD__"); if (HasAVX512VPOPCNTDQ) Builder.defineMacro("__AVX512VPOPCNTDQ__"); + if (HasAVX512VNNI) + Builder.defineMacro("__AVX512VNNI__"); if (HasAVX512ER) Builder.defineMacro("__AVX512ER__"); if (HasAVX512PF) Builder.defineMacro("__AVX512PF__"); if (HasAVX512DQ) Builder.defineMacro("__AVX512DQ__"); + if (HasAVX512BITALG) + Builder.defineMacro("__AVX512BITALG__"); if (HasAVX512BW) Builder.defineMacro("__AVX512BW__"); if (HasAVX512VL) Builder.defineMacro("__AVX512VL__"); if (HasAVX512VBMI) Builder.defineMacro("__AVX512VBMI__"); + if (HasAVX512VBMI2) + Builder.defineMacro("__AVX512VBMI2__"); if (HasAVX512IFMA) Builder.defineMacro("__AVX512IFMA__"); @@ -1141,12 +1198,15 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("avx512f", true) .Case("avx512cd", true) .Case("avx512vpopcntdq", true) + .Case("avx512vnni", true) .Case("avx512er", true) .Case("avx512pf", true) .Case("avx512dq", true) + .Case("avx512bitalg", true) .Case("avx512bw", true) .Case("avx512vl", true) .Case("avx512vbmi", true) + .Case("avx512vbmi2", true) .Case("avx512ifma", true) .Case("bmi", true) .Case("bmi2", true) @@ -1159,6 +1219,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("fma4", true) .Case("fsgsbase", true) .Case("fxsr", true) + .Case("gfni", true) .Case("lwp", true) .Case("lzcnt", true) .Case("mmx", true) @@ -1185,6 +1246,8 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("sse4.2", true) .Case("sse4a", true) .Case("tbm", true) + .Case("vaes", true) + .Case("vpclmulqdq", true) .Case("x87", true) .Case("xop", true) .Case("xsave", true) @@ -1203,12 +1266,15 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("avx512f", SSELevel >= AVX512F) .Case("avx512cd", HasAVX512CD) .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ) + .Case("avx512vnni", HasAVX512VNNI) .Case("avx512er", HasAVX512ER) .Case("avx512pf", HasAVX512PF) .Case("avx512dq", HasAVX512DQ) + .Case("avx512bitalg", HasAVX512BITALG) .Case("avx512bw", HasAVX512BW) .Case("avx512vl", HasAVX512VL) .Case("avx512vbmi", HasAVX512VBMI) + .Case("avx512vbmi2", HasAVX512VBMI2) .Case("avx512ifma", HasAVX512IFMA) .Case("bmi", HasBMI) .Case("bmi2", HasBMI2) @@ -1221,6 +1287,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("fma4", XOPLevel >= FMA4) .Case("fsgsbase", HasFSGSBASE) .Case("fxsr", HasFXSR) + .Case("gfni", HasGFNI) .Case("ibt", HasIBT) .Case("lwp", HasLWP) .Case("lzcnt", HasLZCNT) @@ -1249,6 +1316,8 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("sse4.2", SSELevel >= SSE42) .Case("sse4a", XOPLevel >= SSE4A) .Case("tbm", HasTBM) + .Case("vaes", HasVAES) + .Case("vpclmulqdq", HasVPCLMULQDQ) .Case("x86", true) .Case("x86_32", getTriple().getArch() == llvm::Triple::x86) .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64) diff --git a/lib/Basic/Targets/X86.h b/lib/Basic/Targets/X86.h index b181159..cbd6a2d 100644 --- a/lib/Basic/Targets/X86.h +++ b/lib/Basic/Targets/X86.h @@ -48,7 +48,10 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { enum XOPEnum { NoXOP, SSE4A, FMA4, XOP } XOPLevel = NoXOP; bool HasAES = false; + bool HasVAES = false; bool HasPCLMUL = false; + bool HasVPCLMULQDQ = false; + bool HasGFNI = false; bool HasLZCNT = false; bool HasRDRND = false; bool HasFSGSBASE = false; @@ -65,12 +68,15 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasF16C = false; bool HasAVX512CD = false; bool HasAVX512VPOPCNTDQ = false; + bool HasAVX512VNNI = false; bool HasAVX512ER = false; bool HasAVX512PF = false; bool HasAVX512DQ = false; + bool HasAVX512BITALG = false; bool HasAVX512BW = false; bool HasAVX512VL = false; bool HasAVX512VBMI = false; + bool HasAVX512VBMI2 = false; bool HasAVX512IFMA = false; bool HasSHA = false; bool HasMPX = false; diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 609987c..ba54f83 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -8143,12 +8143,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_storesd128_mask: { return EmitX86MaskedStore(*this, Ops, 16); } + case X86::BI__builtin_ia32_vpopcntb_128: case X86::BI__builtin_ia32_vpopcntd_128: case X86::BI__builtin_ia32_vpopcntq_128: + case X86::BI__builtin_ia32_vpopcntw_128: + case X86::BI__builtin_ia32_vpopcntb_256: case X86::BI__builtin_ia32_vpopcntd_256: case X86::BI__builtin_ia32_vpopcntq_256: + case X86::BI__builtin_ia32_vpopcntw_256: + case X86::BI__builtin_ia32_vpopcntb_512: case X86::BI__builtin_ia32_vpopcntd_512: - case X86::BI__builtin_ia32_vpopcntq_512: { + case X86::BI__builtin_ia32_vpopcntq_512: + case X86::BI__builtin_ia32_vpopcntw_512: { llvm::Type *ResultType = ConvertType(E->getType()); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); return Builder.CreateCall(F, Ops); diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 90eeddf..c7dc833 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -570,7 +570,7 @@ static llvm::Value *emitHash16Bytes(CGBuilderTy &Builder, llvm::Value *Low, bool CodeGenFunction::isNullPointerAllowed(TypeCheckKind TCK) { return TCK == TCK_DowncastPointer || TCK == TCK_Upcast || - TCK == TCK_UpcastToVirtualBase; + TCK == TCK_UpcastToVirtualBase || TCK == TCK_DynamicOperation; } bool CodeGenFunction::isVptrCheckRequired(TypeCheckKind TCK, QualType Ty) { @@ -578,7 +578,7 @@ bool CodeGenFunction::isVptrCheckRequired(TypeCheckKind TCK, QualType Ty) { return (RD && RD->hasDefinition() && RD->isDynamicClass()) && (TCK == TCK_MemberAccess || TCK == TCK_MemberCall || TCK == TCK_DowncastPointer || TCK == TCK_DowncastReference || - TCK == TCK_UpcastToVirtualBase); + TCK == TCK_UpcastToVirtualBase || TCK == TCK_DynamicOperation); } bool CodeGenFunction::sanitizePerformTypeCheck() const { diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index 0749b0a..c32f1e5 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -2056,6 +2056,15 @@ static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF, const Expr *E, // Get the vtable pointer. Address ThisPtr = CGF.EmitLValue(E).getAddress(); + QualType SrcRecordTy = E->getType(); + + // C++ [class.cdtor]p4: + // If the operand of typeid refers to the object under construction or + // destruction and the static type of the operand is neither the constructor + // or destructor’s class nor one of its bases, the behavior is undefined. + CGF.EmitTypeCheck(CodeGenFunction::TCK_DynamicOperation, E->getExprLoc(), + ThisPtr.getPointer(), SrcRecordTy); + // C++ [expr.typeid]p2: // If the glvalue expression is obtained by applying the unary * operator to // a pointer and the pointer is a null pointer value, the typeid expression @@ -2064,7 +2073,6 @@ static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF, const Expr *E, // However, this paragraph's intent is not clear. We choose a very generous // interpretation which implores us to consider comma operators, conditional // operators, parentheses and other such constructs. - QualType SrcRecordTy = E->getType(); if (CGF.CGM.getCXXABI().shouldTypeidBeNullChecked( isGLValueFromPointerDeref(E), SrcRecordTy)) { llvm::BasicBlock *BadTypeidBlock = @@ -2127,10 +2135,6 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr, CGM.EmitExplicitCastExprType(DCE, this); QualType DestTy = DCE->getTypeAsWritten(); - if (DCE->isAlwaysNull()) - if (llvm::Value *T = EmitDynamicCastToNull(*this, DestTy)) - return T; - QualType SrcTy = DCE->getSubExpr()->getType(); // C++ [expr.dynamic.cast]p7: @@ -2151,6 +2155,18 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr, DestRecordTy = DestTy->castAs()->getPointeeType(); } + // C++ [class.cdtor]p5: + // If the operand of the dynamic_cast refers to the object under + // construction or destruction and the static type of the operand is not a + // pointer to or object of the constructor or destructor’s own class or one + // of its bases, the dynamic_cast results in undefined behavior. + EmitTypeCheck(TCK_DynamicOperation, DCE->getExprLoc(), ThisAddr.getPointer(), + SrcRecordTy); + + if (DCE->isAlwaysNull()) + if (llvm::Value *T = EmitDynamicCastToNull(*this, DestTy)) + return T; + assert(SrcRecordTy->isRecordType() && "source type must be a record type!"); // C++ [expr.dynamic.cast]p4: diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 5db29eb..fa38ee8 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -4175,14 +4175,23 @@ static void emitPrivatesInit(CodeGenFunction &CGF, auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); LValue SrcBase; - if (!Data.FirstprivateVars.empty()) { + bool IsTargetTask = + isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || + isOpenMPTargetExecutionDirective(D.getDirectiveKind()); + // For target-based directives skip 3 firstprivate arrays BasePointersArray, + // PointersArray and SizesArray. The original variables for these arrays are + // not captured and we get their addresses explicitly. + if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || + (IsTargetTask && Data.FirstprivateVars.size() > 3)) { SrcBase = CGF.MakeAddrLValue( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), SharedsTy); } - CodeGenFunction::CGCapturedStmtInfo CapturesInfo( - cast(*D.getAssociatedStmt())); + OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) + ? OMPD_taskloop + : OMPD_task; + CodeGenFunction::CGCapturedStmtInfo CapturesInfo(*D.getCapturedStmt(Kind)); FI = cast(FI->getType()->getAsTagDecl())->field_begin(); for (auto &&Pair : Privates) { auto *VD = Pair.second.PrivateCopy; @@ -4192,14 +4201,27 @@ static void emitPrivatesInit(CodeGenFunction &CGF, LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); if (auto *Elem = Pair.second.PrivateElemInit) { auto *OriginalVD = Pair.second.Original; - auto *SharedField = CapturesInfo.lookup(OriginalVD); - auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); - SharedRefLValue = CGF.MakeAddrLValue( - Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), - SharedRefLValue.getType(), - LValueBaseInfo(AlignmentSource::Decl), - SharedRefLValue.getTBAAInfo()); + // Check if the variable is the target-based BasePointersArray, + // PointersArray or SizesArray. + LValue SharedRefLValue; QualType Type = OriginalVD->getType(); + if (IsTargetTask && isa(OriginalVD) && + isa(OriginalVD->getDeclContext()) && + cast(OriginalVD->getDeclContext())->getNumParams() == + 0 && + isa( + cast(OriginalVD->getDeclContext()) + ->getDeclContext())) { + SharedRefLValue = + CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); + } else { + auto *SharedField = CapturesInfo.lookup(OriginalVD); + SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); + SharedRefLValue = CGF.MakeAddrLValue( + Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), + SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), + SharedRefLValue.getTBAAInfo()); + } if (Type->isArrayType()) { // Initialize firstprivate array. if (!isa(Init) || CGF.isTrivialInitializer(Init)) { @@ -4400,8 +4422,10 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, } KmpTaskTQTy = SavedKmpTaskloopTQTy; } else { - assert(D.getDirectiveKind() == OMPD_task && - "Expected taskloop or task directive"); + assert((D.getDirectiveKind() == OMPD_task || + isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || + isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && + "Expected taskloop, task or target directive"); if (SavedKmpTaskTQTy.isNull()) { SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); @@ -7417,8 +7441,8 @@ void CGOpenMPRuntime::emitTargetDataCalls( // Generate the code for the opening of the data environment. Capture all the // arguments of the runtime call by reference because they are used in the // closing of the region. - auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF, - PrePostActionTy &) { + auto &&BeginThenGen = [this, &D, Device, &Info, + &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { // Fill up the arrays with all the mapped variables. MappableExprsHandler::MapBaseValuesArrayTy BasePointers; MappableExprsHandler::MapValuesArrayTy Pointers; @@ -7454,8 +7478,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( llvm::Value *OffloadingArgs[] = { DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; - auto &RT = CGF.CGM.getOpenMPRuntime(); - CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin), + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), OffloadingArgs); // If device pointer privatization is required, emit the body of the region @@ -7465,7 +7488,8 @@ void CGOpenMPRuntime::emitTargetDataCalls( }; // Generate code for the closing of the data region. - auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, + PrePostActionTy &) { assert(Info.isValid() && "Invalid data environment closing arguments."); llvm::Value *BasePointersArrayArg = nullptr; @@ -7490,8 +7514,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( llvm::Value *OffloadingArgs[] = { DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; - auto &RT = CGF.CGM.getOpenMPRuntime(); - CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end), + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), OffloadingArgs); }; @@ -7543,25 +7566,11 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( isa(D)) && "Expecting either target enter, exit data, or update directives."); + CodeGenFunction::OMPTargetDataInfo InputInfo; + llvm::Value *MapTypesArray = nullptr; // Generate the code for the opening of the data environment. - auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) { - // Fill up the arrays with all the mapped variables. - MappableExprsHandler::MapBaseValuesArrayTy BasePointers; - MappableExprsHandler::MapValuesArrayTy Pointers; - MappableExprsHandler::MapValuesArrayTy Sizes; - MappableExprsHandler::MapFlagsArrayTy MapTypes; - - // Get map clause information. - MappableExprsHandler MEHandler(D, CGF); - MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); - - // Fill up the arrays and create the arguments. - TargetDataInfo Info; - emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); - emitOffloadingArraysArgument(CGF, Info.BasePointersArray, - Info.PointersArray, Info.SizesArray, - Info.MapTypesArray, Info); - + auto &&ThenGen = [this, &D, Device, &InputInfo, + &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { // Emit device ID if any. llvm::Value *DeviceID = nullptr; if (Device) { @@ -7572,13 +7581,16 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( } // Emit the number of elements in the offloading arrays. - auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); + llvm::Constant *PointerNum = + CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); - llvm::Value *OffloadingArgs[] = { - DeviceID, PointerNum, Info.BasePointersArray, - Info.PointersArray, Info.SizesArray, Info.MapTypesArray}; + llvm::Value *OffloadingArgs[] = {DeviceID, + PointerNum, + InputInfo.BasePointersArray.getPointer(), + InputInfo.PointersArray.getPointer(), + InputInfo.SizesArray.getPointer(), + MapTypesArray}; - auto &RT = CGF.CGM.getOpenMPRuntime(); // Select the right runtime function call for each expected standalone // directive. const bool HasNowait = D.hasClausesOfKind(); @@ -7600,18 +7612,47 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( : OMPRTL__tgt_target_data_update; break; } - CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs); + CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); }; - // In the event we get an if clause, we don't have to take any action on the - // else side. - auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; + auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( + CodeGenFunction &CGF, PrePostActionTy &) { + // Fill up the arrays with all the mapped variables. + MappableExprsHandler::MapBaseValuesArrayTy BasePointers; + MappableExprsHandler::MapValuesArrayTy Pointers; + MappableExprsHandler::MapValuesArrayTy Sizes; + MappableExprsHandler::MapFlagsArrayTy MapTypes; - if (IfCond) { - emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); - } else { - RegionCodeGenTy ThenGenRCG(ThenGen); - ThenGenRCG(CGF); + // Get map clause information. + MappableExprsHandler MEHandler(D, CGF); + MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); + + TargetDataInfo Info; + // Fill up the arrays and create the arguments. + emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); + emitOffloadingArraysArgument(CGF, Info.BasePointersArray, + Info.PointersArray, Info.SizesArray, + Info.MapTypesArray, Info); + InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; + InputInfo.BasePointersArray = + Address(Info.BasePointersArray, CGM.getPointerAlign()); + InputInfo.PointersArray = + Address(Info.PointersArray, CGM.getPointerAlign()); + InputInfo.SizesArray = + Address(Info.SizesArray, CGM.getPointerAlign()); + MapTypesArray = Info.MapTypesArray; + if (D.hasClausesOfKind()) + CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); + else + emitInlinedDirective(CGF, OMPD_target_update, ThenGen); + }; + + if (IfCond) + emitOMPIfClause(CGF, IfCond, TargetThenGen, + [](CodeGenFunction &CGF, PrePostActionTy &) {}); + else { + RegionCodeGenTy ThenRCG(TargetThenGen); + ThenRCG(CGF); } } diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index b5fc8d3..7b2993c 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -33,10 +33,11 @@ enum OpenMPRTLFunctionNVPTX { /// \brief Call to void __kmpc_spmd_kernel_deinit(); OMPRTL_NVPTX__kmpc_spmd_kernel_deinit, /// \brief Call to void __kmpc_kernel_prepare_parallel(void - /// *outlined_function, void ***args, kmp_int32 nArgs); + /// *outlined_function, void ***args, kmp_int32 nArgs, int16_t + /// IsOMPRuntimeInitialized); OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, /// \brief Call to bool __kmpc_kernel_parallel(void **outlined_function, void - /// ***args); + /// ***args, int16_t IsOMPRuntimeInitialized); OMPRTL_NVPTX__kmpc_kernel_parallel, /// \brief Call to void __kmpc_kernel_end_parallel(); OMPRTL_NVPTX__kmpc_kernel_end_parallel, @@ -521,7 +522,9 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, // Set up shared arguments Address SharedArgs = CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrPtrTy, "shared_args"); - llvm::Value *Args[] = {WorkFn.getPointer(), SharedArgs.getPointer()}; + // TODO: Optimize runtime initialization and pass in correct value. + llvm::Value *Args[] = {WorkFn.getPointer(), SharedArgs.getPointer(), + /*RequiresOMPRuntime=*/Bld.getInt16(1)}; llvm::Value *Ret = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args); Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); @@ -637,18 +640,21 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { } case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { /// Build void __kmpc_kernel_prepare_parallel( - /// void *outlined_function, void ***args, kmp_int32 nArgs); + /// void *outlined_function, void ***args, kmp_int32 nArgs, int16_t + /// IsOMPRuntimeInitialized); llvm::Type *TypeParams[] = {CGM.Int8PtrTy, - CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int32Ty}; + CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int32Ty, + CGM.Int16Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); break; } case OMPRTL_NVPTX__kmpc_kernel_parallel: { - /// Build bool __kmpc_kernel_parallel(void **outlined_function, void ***args); + /// Build bool __kmpc_kernel_parallel(void **outlined_function, void + /// ***args, int16_t IsOMPRuntimeInitialized); llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, - CGM.Int8PtrPtrTy->getPointerTo(0)}; + CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int16Ty}; llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); llvm::FunctionType *FnTy = llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); @@ -949,8 +955,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "shared_args"); llvm::Value *SharedArgsPtr = SharedArgs.getPointer(); + // TODO: Optimize runtime initialization and pass in correct value. llvm::Value *Args[] = {ID, SharedArgsPtr, - Bld.getInt32(CapturedVars.size())}; + Bld.getInt32(CapturedVars.size()), + /*RequiresOMPRuntime=*/Bld.getInt16(1)}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), @@ -970,9 +978,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( Idx++; } } else { - llvm::Value *Args[] = {ID, - llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy->getPointerTo(0)), - /*nArgs=*/Bld.getInt32(0)}; + // TODO: Optimize runtime initialization and pass in correct value. + llvm::Value *Args[] = { + ID, llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy->getPointerTo(0)), + /*nArgs=*/Bld.getInt32(0), /*RequiresOMPRuntime=*/Bld.getInt16(1)}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), Args); diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index f04d28e..f986173 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -2907,6 +2907,151 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, TaskGen(*this, OutlinedFn, Data); } +static ImplicitParamDecl * +createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, + QualType Ty, CapturedDecl *CD) { + auto *OrigVD = ImplicitParamDecl::Create( + C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other); + auto *OrigRef = + DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, + /*RefersToEnclosingVariableOrCapture=*/false, + SourceLocation(), Ty, VK_LValue); + auto *PrivateVD = ImplicitParamDecl::Create( + C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other); + auto *PrivateRef = DeclRefExpr::Create( + C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, + /*RefersToEnclosingVariableOrCapture=*/false, SourceLocation(), Ty, + VK_LValue); + QualType ElemType = C.getBaseElementType(Ty); + auto *InitVD = + ImplicitParamDecl::Create(C, CD, SourceLocation(), /*Id=*/nullptr, + ElemType, ImplicitParamDecl::Other); + auto *InitRef = + DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, + /*RefersToEnclosingVariableOrCapture=*/false, + SourceLocation(), ElemType, VK_LValue); + PrivateVD->setInitStyle(VarDecl::CInit); + PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, + InitRef, /*BasePath=*/nullptr, + VK_RValue)); + Data.FirstprivateVars.emplace_back(OrigRef); + Data.FirstprivateCopies.emplace_back(PrivateRef); + Data.FirstprivateInits.emplace_back(InitRef); + return OrigVD; +} + +void CodeGenFunction::EmitOMPTargetTaskBasedDirective( + const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, + OMPTargetDataInfo &InputInfo) { + // Emit outlined function for task construct. + auto CS = S.getCapturedStmt(OMPD_task); + auto CapturedStruct = GenerateCapturedStmtArgument(*CS); + auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + auto *I = CS->getCapturedDecl()->param_begin(); + auto *PartId = std::next(I); + auto *TaskT = std::next(I, 4); + OMPTaskDataTy Data; + // The task is not final. + Data.Final.setInt(/*IntVal=*/false); + // Get list of firstprivate variables. + for (const auto *C : S.getClausesOfKind()) { + auto IRef = C->varlist_begin(); + auto IElemInitRef = C->inits().begin(); + for (auto *IInit : C->private_copies()) { + Data.FirstprivateVars.push_back(*IRef); + Data.FirstprivateCopies.push_back(IInit); + Data.FirstprivateInits.push_back(*IElemInitRef); + ++IRef; + ++IElemInitRef; + } + } + OMPPrivateScope TargetScope(*this); + VarDecl *BPVD = nullptr; + VarDecl *PVD = nullptr; + VarDecl *SVD = nullptr; + if (InputInfo.NumberOfTargetItems > 0) { + auto *CD = CapturedDecl::Create( + getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); + llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); + QualType BaseAndPointersType = getContext().getConstantArrayType( + getContext().VoidPtrTy, ArrSize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + BPVD = createImplicitFirstprivateForType(getContext(), Data, + BaseAndPointersType, CD); + PVD = createImplicitFirstprivateForType(getContext(), Data, + BaseAndPointersType, CD); + QualType SizesType = getContext().getConstantArrayType( + getContext().getSizeType(), ArrSize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD); + TargetScope.addPrivate( + BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); + TargetScope.addPrivate(PVD, + [&InputInfo]() { return InputInfo.PointersArray; }); + TargetScope.addPrivate(SVD, + [&InputInfo]() { return InputInfo.SizesArray; }); + } + (void)TargetScope.Privatize(); + // Build list of dependences. + for (const auto *C : S.getClausesOfKind()) + for (auto *IRef : C->varlists()) + Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); + auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, + &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { + // Set proper addresses for generated private copies. + OMPPrivateScope Scope(CGF); + if (!Data.FirstprivateVars.empty()) { + enum { PrivatesParam = 2, CopyFnParam = 3 }; + auto *CopyFn = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); + auto *PrivatesPtr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); + // Map privates. + llvm::SmallVector, 16> PrivatePtrs; + llvm::SmallVector CallArgs; + CallArgs.push_back(PrivatesPtr); + for (auto *E : Data.FirstprivateVars) { + auto *VD = cast(cast(E)->getDecl()); + Address PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), + ".firstpriv.ptr.addr"); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr.getPointer()); + } + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), + CopyFn, CallArgs); + for (auto &&Pair : PrivatePtrs) { + Address Replacement(CGF.Builder.CreateLoad(Pair.second), + CGF.getContext().getDeclAlign(Pair.first)); + Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); + } + } + // Privatize all private variables except for in_reduction items. + (void)Scope.Privatize(); + InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize()); + InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize()); + InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( + CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize()); + + Action.Enter(CGF); + OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true, + /*EmitPreInitStmt=*/false); + BodyGen(CGF); + }; + auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( + S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, + Data.NumberOfParts); + llvm::APInt TrueOrFalse(32, S.hasClausesOfKind() ? 1 : 0); + IntegerLiteral IfCond(getContext(), TrueOrFalse, + getContext().getIntTypeForBitwidth(32, /*Signed=*/0), + SourceLocation()); + + CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, OutlinedFn, + SharedsTy, CapturedStruct, &IfCond, Data); +} + void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { // Emit outlined function for task construct. auto CS = cast(S.getAssociatedStmt()); @@ -4252,14 +4397,8 @@ void CodeGenFunction::EmitOMPTargetEnterDataDirective( if (auto *C = S.getSingleClause()) Device = C->getDevice(); - auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, - PrePostActionTy &) { - CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, - Device); - }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_enter_data, - CodeGen); + CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } void CodeGenFunction::EmitOMPTargetExitDataDirective( @@ -4279,14 +4418,8 @@ void CodeGenFunction::EmitOMPTargetExitDataDirective( if (auto *C = S.getSingleClause()) Device = C->getDevice(); - auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, - PrePostActionTy &) { - CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, - Device); - }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_exit_data, - CodeGen); + CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } static void emitTargetParallelRegion(CodeGenFunction &CGF, @@ -4585,12 +4718,6 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective( if (auto *C = S.getSingleClause()) Device = C->getDevice(); - auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, - PrePostActionTy &) { - CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, - Device); - }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_update, - CodeGen); + CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index cd62d00..dd4c2e4 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2371,7 +2371,10 @@ public: /// object within its lifetime. TCK_UpcastToVirtualBase, /// Checking the value assigned to a _Nonnull pointer. Must not be null. - TCK_NonnullAssign + TCK_NonnullAssign, + /// Checking the operand of a dynamic_cast or a typeid expression. Must be + /// null or an object within its lifetime. + TCK_DynamicOperation }; /// Determine whether the pointer type check \p TCK permits null pointers. @@ -2820,6 +2823,20 @@ public: void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, OMPTaskDataTy &Data); + struct OMPTargetDataInfo { + Address BasePointersArray = Address::invalid(); + Address PointersArray = Address::invalid(); + Address SizesArray = Address::invalid(); + unsigned NumberOfTargetItems = 0; + explicit OMPTargetDataInfo() = default; + OMPTargetDataInfo(Address BasePointersArray, Address PointersArray, + Address SizesArray, unsigned NumberOfTargetItems) + : BasePointersArray(BasePointersArray), PointersArray(PointersArray), + SizesArray(SizesArray), NumberOfTargetItems(NumberOfTargetItems) {} + }; + void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, + const RegionCodeGenTy &BodyGen, + OMPTargetDataInfo &InputInfo); void EmitOMPParallelDirective(const OMPParallelDirective &S); void EmitOMPSimdDirective(const OMPSimdDirective &S); diff --git a/lib/Driver/SanitizerArgs.cpp b/lib/Driver/SanitizerArgs.cpp index 6ba8892..3c985a1 100644 --- a/lib/Driver/SanitizerArgs.cpp +++ b/lib/Driver/SanitizerArgs.cpp @@ -794,7 +794,7 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, if (MsanTrackOrigins) CmdArgs.push_back(Args.MakeArgString("-fsanitize-memory-track-origins=" + - llvm::utostr(MsanTrackOrigins))); + Twine(MsanTrackOrigins))); if (MsanUseAfterDtor) CmdArgs.push_back("-fsanitize-memory-use-after-dtor"); @@ -829,7 +829,7 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, if (AsanFieldPadding) CmdArgs.push_back(Args.MakeArgString("-fsanitize-address-field-padding=" + - llvm::utostr(AsanFieldPadding))); + Twine(AsanFieldPadding))); if (AsanUseAfterScope) CmdArgs.push_back("-fsanitize-address-use-after-scope"); diff --git a/lib/Driver/ToolChains/Clang.cpp b/lib/Driver/ToolChains/Clang.cpp index 7b3f4bc..8b895c4 100644 --- a/lib/Driver/ToolChains/Clang.cpp +++ b/lib/Driver/ToolChains/Clang.cpp @@ -1738,10 +1738,9 @@ void Clang::AddHexagonTargetArgs(const ArgList &Args, CmdArgs.push_back("-Wreturn-type"); if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { - std::string N = llvm::utostr(G.getValue()); - std::string Opt = std::string("-hexagon-small-data-threshold=") + N; CmdArgs.push_back("-mllvm"); - CmdArgs.push_back(Args.MakeArgString(Opt)); + CmdArgs.push_back(Args.MakeArgString("-hexagon-small-data-threshold=" + + Twine(G.getValue()))); } if (!Args.hasArg(options::OPT_fno_short_enums)) diff --git a/lib/Driver/ToolChains/CommonArgs.cpp b/lib/Driver/ToolChains/CommonArgs.cpp index 60f96d0..f268801 100644 --- a/lib/Driver/ToolChains/CommonArgs.cpp +++ b/lib/Driver/ToolChains/CommonArgs.cpp @@ -419,8 +419,8 @@ void tools::AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args, CmdArgs.push_back("-plugin-opt=thinlto"); if (unsigned Parallelism = getLTOParallelism(Args, D)) - CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=jobs=") + - llvm::to_string(Parallelism))); + CmdArgs.push_back( + Args.MakeArgString("-plugin-opt=jobs=" + Twine(Parallelism))); // If an explicit debugger tuning argument appeared, pass it along. if (Arg *A = Args.getLastArg(options::OPT_gTune_Group, diff --git a/lib/Driver/ToolChains/Darwin.cpp b/lib/Driver/ToolChains/Darwin.cpp index 289f4ed..2250e82 100644 --- a/lib/Driver/ToolChains/Darwin.cpp +++ b/lib/Driver/ToolChains/Darwin.cpp @@ -545,8 +545,7 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (unsigned Parallelism = getLTOParallelism(Args, getToolChain().getDriver())) { CmdArgs.push_back("-mllvm"); - CmdArgs.push_back( - Args.MakeArgString(Twine("-threads=") + llvm::to_string(Parallelism))); + CmdArgs.push_back(Args.MakeArgString("-threads=" + Twine(Parallelism))); } if (getToolChain().ShouldLinkCXXStdlib(Args)) diff --git a/lib/Driver/ToolChains/Hexagon.cpp b/lib/Driver/ToolChains/Hexagon.cpp index f21af5b..2debf0e 100644 --- a/lib/Driver/ToolChains/Hexagon.cpp +++ b/lib/Driver/ToolChains/Hexagon.cpp @@ -138,16 +138,15 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const Driver &D = HTC.getDriver(); ArgStringList CmdArgs; - std::string MArchString = "-march=hexagon"; - CmdArgs.push_back(Args.MakeArgString(MArchString)); + CmdArgs.push_back("-march=hexagon"); RenderExtraToolArgs(JA, CmdArgs); - std::string AsName = "hexagon-llvm-mc"; - std::string MCpuString = "-mcpu=hexagon" + - toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str(); + const char *AsName = "hexagon-llvm-mc"; CmdArgs.push_back("-filetype=obj"); - CmdArgs.push_back(Args.MakeArgString(MCpuString)); + CmdArgs.push_back(Args.MakeArgString( + "-mcpu=hexagon" + + toolchains::HexagonToolChain::GetTargetCPUVersion(Args))); if (Output.isFilename()) { CmdArgs.push_back("-o"); @@ -158,8 +157,7 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA, } if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { - std::string N = llvm::utostr(G.getValue()); - CmdArgs.push_back(Args.MakeArgString(std::string("-gpsize=") + N)); + CmdArgs.push_back(Args.MakeArgString("-gpsize=" + Twine(G.getValue()))); } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); @@ -192,7 +190,7 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA, II.getInputArg().render(Args, CmdArgs); } - auto *Exec = Args.MakeArgString(HTC.GetProgramPath(AsName.c_str())); + auto *Exec = Args.MakeArgString(HTC.GetProgramPath(AsName)); C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } @@ -243,10 +241,8 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, CmdArgs.push_back(Opt.c_str()); CmdArgs.push_back("-march=hexagon"); - std::string CpuVer = - toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str(); - std::string MCpuString = "-mcpu=hexagon" + CpuVer; - CmdArgs.push_back(Args.MakeArgString(MCpuString)); + StringRef CpuVer = toolchains::HexagonToolChain::GetTargetCPUVersion(Args); + CmdArgs.push_back(Args.MakeArgString("-mcpu=hexagon" + CpuVer)); if (IsShared) { CmdArgs.push_back("-shared"); @@ -261,8 +257,7 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, CmdArgs.push_back("-pie"); if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { - std::string N = llvm::utostr(G.getValue()); - CmdArgs.push_back(Args.MakeArgString(std::string("-G") + N)); + CmdArgs.push_back(Args.MakeArgString("-G" + Twine(G.getValue()))); UseG0 = G.getValue() == 0; } @@ -291,7 +286,7 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, //---------------------------------------------------------------------------- // Start Files //---------------------------------------------------------------------------- - const std::string MCpuSuffix = "/" + CpuVer; + const std::string MCpuSuffix = "/" + CpuVer.str(); const std::string MCpuG0Suffix = MCpuSuffix + "/G0"; const std::string RootDir = HTC.getHexagonTargetDir(D.InstalledDir, D.PrefixDirs) + "/"; @@ -351,7 +346,7 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, CmdArgs.push_back("--start-group"); if (!IsShared) { - for (const std::string &Lib : OsLibs) + for (StringRef Lib : OsLibs) CmdArgs.push_back(Args.MakeArgString("-l" + Lib)); CmdArgs.push_back("-lc"); } diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt index 708cfaf..97ba3ed 100644 --- a/lib/Headers/CMakeLists.txt +++ b/lib/Headers/CMakeLists.txt @@ -7,6 +7,8 @@ set(files arm64intr.h avx2intrin.h avx512bwintrin.h + avx512bitalgintrin.h + avx512vlbitalgintrin.h avx512cdintrin.h avx512vpopcntdqintrin.h avx512dqintrin.h @@ -17,11 +19,15 @@ set(files avx512pfintrin.h avx512vbmiintrin.h avx512vbmivlintrin.h + avx512vbmi2intrin.h + avx512vlvbmi2intrin.h avx512vlbwintrin.h avx512vlcdintrin.h avx512vldqintrin.h avx512vlintrin.h avx512vpopcntdqvlintrin.h + avx512vnniintrin.h + avx512vlvnniintrin.h avxintrin.h bmi2intrin.h bmiintrin.h @@ -42,6 +48,7 @@ set(files fma4intrin.h fmaintrin.h fxsrintrin.h + gfniintrin.h htmintrin.h htmxlintrin.h ia32intrin.h @@ -82,8 +89,10 @@ set(files tmmintrin.h unwind.h vadefs.h + vaesintrin.h varargs.h vecintrin.h + vpclmulqdqintrin.h wmmintrin.h __wmmintrin_aes.h __wmmintrin_pclmul.h diff --git a/lib/Headers/avx512bitalgintrin.h b/lib/Headers/avx512bitalgintrin.h new file mode 100644 index 0000000..2dd1471 --- /dev/null +++ b/lib/Headers/avx512bitalgintrin.h @@ -0,0 +1,97 @@ +/*===------------- avx512bitalgintrin.h - BITALG intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512BITALGINTRIN_H +#define __AVX512BITALGINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_popcnt_epi16(__m512i __A) +{ + return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) +{ + return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U, + (__v32hi) _mm512_popcnt_epi16(__B), + (__v32hi) __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) +{ + return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_hi(), + __U, + __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_popcnt_epi8(__m512i __A) +{ + return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) +{ + return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U, + (__v64qi) _mm512_popcnt_epi8(__B), + (__v64qi) __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) +{ + return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_qi(), + __U, + __B); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B) +{ + return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask((__v64qi) __A, + (__v64qi) __B, + __U); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) +{ + return _mm512_mask_bitshuffle_epi64_mask((__mmask64) -1, + __A, + __B); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/Headers/avx512vbmi2intrin.h b/lib/Headers/avx512vbmi2intrin.h new file mode 100644 index 0000000..43e97b4 --- /dev/null +++ b/lib/Headers/avx512vbmi2intrin.h @@ -0,0 +1,391 @@ +/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512VBMI2INTRIN_H +#define __AVX512VBMI2INTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"))) + + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) +{ + return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, + (__v32hi) __S, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) +{ + return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, + (__v32hi) _mm512_setzero_hi(), + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) +{ + return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, + (__v64qi) __S, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) +{ + return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, + (__v64qi) _mm512_setzero_qi(), + __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) +{ + __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D, + __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) +{ + __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) +{ + return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, + (__v32hi) __S, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) +{ + return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, + (__v32hi) _mm512_setzero_hi(), + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) +{ + return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, + (__v64qi) __S, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) +{ + return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, + (__v64qi) _mm512_setzero_qi(), + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, + (__v32hi) __S, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, + (__v32hi) _mm512_setzero_hi(), + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, + (__v64qi) __S, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, + (__v64qi) _mm512_setzero_qi(), + __U); +} + +#define _mm512_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(A), \ + (__v8di)(B), \ + (int)(I), \ + (__v8di)(S), \ + (__mmask8)(U)); }) + +#define _mm512_maskz_shldi_epi64(U, A, B, I) \ + _mm512_mask_shldi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shldi_epi64(A, B, I) \ + _mm512_mask_shldi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm512_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(A), \ + (__v16si)(B), \ + (int)(I), \ + (__v16si)(S), \ + (__mmask16)(U)); }) + +#define _mm512_maskz_shldi_epi32(U, A, B, I) \ + _mm512_mask_shldi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shldi_epi32(A, B, I) \ + _mm512_mask_shldi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) + +#define _mm512_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(A), \ + (__v32hi)(B), \ + (int)(I), \ + (__v32hi)(S), \ + (__mmask32)(U)); }) + +#define _mm512_maskz_shldi_epi16(U, A, B, I) \ + _mm512_mask_shldi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shldi_epi16(A, B, I) \ + _mm512_mask_shldi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I)) + +#define _mm512_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(A), \ + (__v8di)(B), \ + (int)(I), \ + (__v8di)(S), \ + (__mmask8)(U)); }) + +#define _mm512_maskz_shrdi_epi64(U, A, B, I) \ + _mm512_mask_shrdi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shrdi_epi64(A, B, I) \ + _mm512_mask_shrdi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm512_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(A), \ + (__v16si)(B), \ + (int)(I), \ + (__v16si)(S), \ + (__mmask16)(U)); }) + +#define _mm512_maskz_shrdi_epi32(U, A, B, I) \ + _mm512_mask_shrdi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shrdi_epi32(A, B, I) \ + _mm512_mask_shrdi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) + +#define _mm512_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(A), \ + (__v32hi)(B), \ + (int)(I), \ + (__v32hi)(S), \ + (__mmask32)(U)); }) + +#define _mm512_maskz_shrdi_epi16(U, A, B, I) \ + _mm512_mask_shrdi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I)) + +#define _mm512_shrdi_epi16(A, B, I) \ + _mm512_mask_shrdi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I)) + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S, + (__v8di) __A, + (__v8di) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S, + (__v8di) __A, + (__v8di) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S, + (__v8di) __A, + (__v8di) __B, + (__mmask8) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S, + (__v32hi) __A, + (__v32hi) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S, + (__v32hi) __A, + (__v32hi) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S, + (__v32hi) __A, + (__v32hi) __B, + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S, + (__v8di) __A, + (__v8di) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S, + (__v8di) __A, + (__v8di) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S, + (__v8di) __A, + (__v8di) __B, + (__mmask8) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S, + (__v32hi) __A, + (__v32hi) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S, + (__v32hi) __A, + (__v32hi) __B, + __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S, + (__v32hi) __A, + (__v32hi) __B, + (__mmask32) -1); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif + diff --git a/lib/Headers/avx512vlbitalgintrin.h b/lib/Headers/avx512vlbitalgintrin.h new file mode 100644 index 0000000..76eb877 --- /dev/null +++ b/lib/Headers/avx512vlbitalgintrin.h @@ -0,0 +1,157 @@ +/*===------------- avx512vlbitalgintrin.h - BITALG intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512VLBITALGINTRIN_H +#define __AVX512VLBITALGINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"))) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_popcnt_epi16(__m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) +{ + return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U, + (__v16hi) _mm256_popcnt_epi16(__B), + (__v16hi) __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) +{ + return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(), + __U, + __B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_popcnt_epi16(__m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) +{ + return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U, + (__v8hi) _mm128_popcnt_epi16(__B), + (__v8hi) __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) +{ + return _mm128_mask_popcnt_epi16((__m128i) _mm_setzero_si128(), + __U, + __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_popcnt_epi8(__m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) +{ + return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U, + (__v32qi) _mm256_popcnt_epi8(__B), + (__v32qi) __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) +{ + return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(), + __U, + __B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_popcnt_epi8(__m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) +{ + return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U, + (__v16qi) _mm128_popcnt_epi8(__B), + (__v16qi) __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) +{ + return _mm128_mask_popcnt_epi8((__m128i) _mm_setzero_si128(), + __U, + __B); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B) +{ + return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A, + (__v32qi) __B, + __U); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B) +{ + return _mm256_mask_bitshuffle_epi32_mask((__mmask32) -1, + __A, + __B); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B) +{ + return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A, + (__v16qi) __B, + __U); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B) +{ + return _mm128_mask_bitshuffle_epi16_mask((__mmask16) -1, + __A, + __B); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/Headers/avx512vlvbmi2intrin.h b/lib/Headers/avx512vlvbmi2intrin.h new file mode 100644 index 0000000..d1ec497 --- /dev/null +++ b/lib/Headers/avx512vlvbmi2intrin.h @@ -0,0 +1,748 @@ +/*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512VLVBMI2INTRIN_H +#define __AVX512VLVBMI2INTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"))) + +static __inline __m128i __DEFAULT_FN_ATTRS +_mm128_setzero_hi(void) { + return (__m128i)(__v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, + (__v8hi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_compress_epi16(__mmask8 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, + (__v8hi) _mm128_setzero_hi(), + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, + (__v16qi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_compress_epi8(__mmask16 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, + (__v16qi) _mm128_setzero_hi(), + __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm128_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) +{ + __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D, + __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm128_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) +{ + __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, + (__v8hi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_expand_epi16(__mmask8 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, + (__v8hi) _mm128_setzero_hi(), + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, + (__v16qi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_expand_epi8(__mmask16 __U, __m128i __D) +{ + return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, + (__v16qi) _mm128_setzero_hi(), + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, + (__v8hi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_expandloadu_epi16(__mmask8 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, + (__v8hi) _mm128_setzero_hi(), + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, + (__v16qi) __S, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_expandloadu_epi8(__mmask16 __U, void const *__P) +{ + return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, + (__v16qi) _mm128_setzero_hi(), + __U); +} + +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_setzero_hi(void) { + return (__m256i)(__v16hi){ 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, + (__v16hi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, + (__v16hi) _mm256_setzero_hi(), + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, + (__v32qi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, + (__v32qi) _mm256_setzero_hi(), + __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) +{ + __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D, + __U); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D) +{ + __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, + (__v16hi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, + (__v16hi) _mm256_setzero_hi(), + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, + (__v32qi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) +{ + return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, + (__v32qi) _mm256_setzero_hi(), + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P) +{ + return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, + (__v16hi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P) +{ + return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, + (__v16hi) _mm256_setzero_hi(), + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P) +{ + return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, + (__v32qi) __S, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) +{ + return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, + (__v32qi) _mm256_setzero_hi(), + __U); +} + +#define _mm256_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(A), \ + (__v4di)(B), \ + (int)(I), \ + (__v4di)(S), \ + (__mmask8)(U)); }) + +#define _mm256_maskz_shldi_epi64(U, A, B, I) \ + _mm256_mask_shldi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shldi_epi64(A, B, I) \ + _mm256_mask_shldi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(A), \ + (__v2di)(B), \ + (int)(I), \ + (__v2di)(S), \ + (__mmask8)(U)); }) + +#define _mm128_maskz_shldi_epi64(U, A, B, I) \ + _mm128_mask_shldi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shldi_epi64(A, B, I) \ + _mm128_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm256_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(A), \ + (__v8si)(B), \ + (int)(I), \ + (__v8si)(S), \ + (__mmask8)(U)); }) + +#define _mm256_maskz_shldi_epi32(U, A, B, I) \ + _mm256_mask_shldi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shldi_epi32(A, B, I) \ + _mm256_mask_shldi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(A), \ + (__v4si)(B), \ + (int)(I), \ + (__v4si)(S), \ + (__mmask8)(U)); }) + +#define _mm128_maskz_shldi_epi32(U, A, B, I) \ + _mm128_mask_shldi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shldi_epi32(A, B, I) \ + _mm128_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm256_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(A), \ + (__v16hi)(B), \ + (int)(I), \ + (__v16hi)(S), \ + (__mmask16)(U)); }) + +#define _mm256_maskz_shldi_epi16(U, A, B, I) \ + _mm256_mask_shldi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shldi_epi16(A, B, I) \ + _mm256_mask_shldi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(A), \ + (__v8hi)(B), \ + (int)(I), \ + (__v8hi)(S), \ + (__mmask8)(U)); }) + +#define _mm128_maskz_shldi_epi16(U, A, B, I) \ + _mm128_mask_shldi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shldi_epi16(A, B, I) \ + _mm128_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm256_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(A), \ + (__v4di)(B), \ + (int)(I), \ + (__v4di)(S), \ + (__mmask8)(U)); }) + +#define _mm256_maskz_shrdi_epi64(U, A, B, I) \ + _mm256_mask_shrdi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shrdi_epi64(A, B, I) \ + _mm256_mask_shrdi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(A), \ + (__v2di)(B), \ + (int)(I), \ + (__v2di)(S), \ + (__mmask8)(U)); }) + +#define _mm128_maskz_shrdi_epi64(U, A, B, I) \ + _mm128_mask_shrdi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shrdi_epi64(A, B, I) \ + _mm128_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm256_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(A), \ + (__v8si)(B), \ + (int)(I), \ + (__v8si)(S), \ + (__mmask8)(U)); }) + +#define _mm256_maskz_shrdi_epi32(U, A, B, I) \ + _mm256_mask_shrdi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shrdi_epi32(A, B, I) \ + _mm256_mask_shrdi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(A), \ + (__v4si)(B), \ + (int)(I), \ + (__v4si)(S), \ + (__mmask8)(U)); }) + +#define _mm128_maskz_shrdi_epi32(U, A, B, I) \ + _mm128_mask_shrdi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shrdi_epi32(A, B, I) \ + _mm128_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm256_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(A), \ + (__v16hi)(B), \ + (int)(I), \ + (__v16hi)(S), \ + (__mmask16)(U)); }) + +#define _mm256_maskz_shrdi_epi16(U, A, B, I) \ + _mm256_mask_shrdi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I)) + +#define _mm256_shrdi_epi16(A, B, I) \ + _mm256_mask_shrdi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) + +#define _mm128_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(A), \ + (__v8hi)(B), \ + (int)(I), \ + (__v8hi)(S), \ + (__mmask8)(U)); }) + +#define _mm128_maskz_shrdi_epi16(U, A, B, I) \ + _mm128_mask_shrdi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I)) + +#define _mm128_shrdi_epi16(A, B, I) \ + _mm128_mask_shrdi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shldv_epi64(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shldv_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + (__mmask16) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shldv_epi16(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, + (__v4di) __A, + (__v4di) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, + (__v2di) __A, + (__v2di) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, + (__v16hi) __A, + (__v16hi) __B, + (__mmask16) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, + (__v8hi) __A, + (__v8hi) __B, + (__mmask8) -1); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/Headers/avx512vlvnniintrin.h b/lib/Headers/avx512vlvnniintrin.h new file mode 100644 index 0000000..745ae8b --- /dev/null +++ b/lib/Headers/avx512vlvnniintrin.h @@ -0,0 +1,254 @@ +/*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512VLVNNIINTRIN_H +#define __AVX512VLVNNIINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"))) + + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusd256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusds256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssd256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssds256_maskz ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S, + (__v8si) __A, + (__v8si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusd128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusds128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssd128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssds128_maskz ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S, + (__v4si) __A, + (__v4si) __B, + (__mmask8) -1); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/Headers/avx512vnniintrin.h b/lib/Headers/avx512vnniintrin.h new file mode 100644 index 0000000..0c6badd --- /dev/null +++ b/lib/Headers/avx512vnniintrin.h @@ -0,0 +1,146 @@ +/*===------------- avx512vnniintrin.h - VNNI intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512VNNIINTRIN_H +#define __AVX512VNNIINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"))) + + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusd512_maskz ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusds512_maskz ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssd512_maskz ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssds512_maskz ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S, + (__v16si) __A, + (__v16si) __B, + (__mmask16) -1); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/Headers/gfniintrin.h b/lib/Headers/gfniintrin.h new file mode 100644 index 0000000..20fadcc --- /dev/null +++ b/lib/Headers/gfniintrin.h @@ -0,0 +1,202 @@ +/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __GFNIINTRIN_H +#define __GFNIINTRIN_H + + +#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), \ + (char)(I)); }) + +#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ + (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \ + (__v16qi)(__m128i)(S)); }) + + +#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \ + U, A, B, I); }) + + +#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \ + (__v32qi)(__m256i)(B), \ + (char)(I)); }) + +#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ + (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \ + (__v32qi)(__m256i)(S)); }) + +#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \ + U, A, B, I); }) + + +#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), \ + (char)(I)); }) + +#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ + (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \ + (__v64qi)(__m512i)(S)); }) + +#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_qi(), \ + U, A, B, I); }) + +#define _mm_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), \ + (char)(I)); }) + +#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ + (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \ + (__v16qi)(__m128i)(S)); }) + + +#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \ + U, A, B, I); }) + + +#define _mm256_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \ + (__v32qi)(__m256i)(B), \ + (char)(I)); }) + +#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ + (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \ + (__v32qi)(__m256i)(S)); }) + +#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \ + U, A, B, I); }) + + +#define _mm512_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), \ + (char)(I)); }) + +#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ + (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \ + (__v64qi)(__m512i)(S)); }) + +#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_qi(), \ + U, A, B, I); }) + +/* Default attributes for simple form (no masking). */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"))) + +/* Default attributes for ZMM forms. */ +#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"))) + +/* Default attributes for VLX forms. */ +#define __DEFAULT_FN_ATTRS_VL __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"))) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_gf2p8mul_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A, + (__v16qi) __B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS_VL +_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_selectb_128(__U, + (__v16qi) _mm_gf2p8mul_epi8(__A, __B), + (__v16qi) __S); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS_VL +_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B) +{ + return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(), + __U, __A, __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A, + (__v32qi) __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS_VL +_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_selectb_256(__U, + (__v32qi) _mm256_gf2p8mul_epi8(__A, __B), + (__v32qi) __S); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS_VL +_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B) +{ + return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(), + __U, __A, __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F +_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A, + (__v64qi) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F +_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_selectb_512(__U, + (__v64qi) _mm512_gf2p8mul_epi8(__A, __B), + (__v64qi) __S); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F +_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B) +{ + return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_qi(), + __U, __A, __B); +} + +#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_F +#undef __DEFAULT_FN_ATTRS_VL + +#endif // __GFNIINTRIN_H + diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h index 64ad6e6..d3421dc 100644 --- a/lib/Headers/immintrin.h +++ b/lib/Headers/immintrin.h @@ -118,6 +118,10 @@ _mm256_cvtph_ps(__m128i __a) } #endif /* __AVX2__ */ +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__) #include #endif @@ -146,6 +150,10 @@ _mm256_cvtph_ps(__m128i __a) #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__) #include #endif @@ -159,11 +167,25 @@ _mm256_cvtph_ps(__m128i __a) #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512VNNI__)) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__) #include #endif #if !defined(_MSC_VER) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512BITALG__)) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512BW__)) #include #endif @@ -200,6 +222,15 @@ _mm256_cvtph_ps(__m128i __a) #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || \ + (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__) #include #endif @@ -208,6 +239,14 @@ _mm256_cvtph_ps(__m128i __a) #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__) static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand16_step(unsigned short *__p) diff --git a/lib/Headers/vaesintrin.h b/lib/Headers/vaesintrin.h new file mode 100644 index 0000000..efbb8a5 --- /dev/null +++ b/lib/Headers/vaesintrin.h @@ -0,0 +1,98 @@ +/*===------------------ vaesintrin.h - VAES intrinsics ---------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __VAESINTRIN_H +#define __VAESINTRIN_H + +/* Default attributes for YMM forms. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"))) + +/* Default attributes for ZMM forms. */ +#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"))) + + +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_aesenc_epi128(__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_aesenc256((__v4di) __A, + (__v4di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F + _mm512_aesenc_epi128(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_aesenc512((__v8di) __A, + (__v8di) __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_aesdec_epi128(__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_aesdec256((__v4di) __A, + (__v4di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F + _mm512_aesdec_epi128(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_aesdec512((__v8di) __A, + (__v8di) __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_aesenclast_epi128(__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A, + (__v4di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F + _mm512_aesenclast_epi128(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A, + (__v8di) __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_aesdeclast_epi128(__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A, + (__v4di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F + _mm512_aesdeclast_epi128(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A, + (__v8di) __B); +} + + +#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_F + +#endif diff --git a/lib/Headers/vpclmulqdqintrin.h b/lib/Headers/vpclmulqdqintrin.h new file mode 100644 index 0000000..21cda22 --- /dev/null +++ b/lib/Headers/vpclmulqdqintrin.h @@ -0,0 +1,42 @@ +/*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __VPCLMULQDQINTRIN_H +#define __VPCLMULQDQINTRIN_H + +#define _mm256_clmulepi64_epi128(A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), \ + (char)(I)); }) + +#define _mm512_clmulepi64_epi128(A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), \ + (char)(I)); }) + +#endif // __VPCLMULQDQINTRIN_H + diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp index 9fe4309..2a99939 100644 --- a/lib/Parse/ParseDecl.cpp +++ b/lib/Parse/ParseDecl.cpp @@ -1548,15 +1548,21 @@ void Parser::DiagnoseMisplacedCXX11Attribute(ParsedAttributesWithRange &Attrs, SourceLocation Loc = Tok.getLocation(); ParseCXX11Attributes(Attrs); CharSourceRange AttrRange(SourceRange(Loc, Attrs.Range.getEnd()), true); - + // FIXME: use err_attributes_misplaced Diag(Loc, diag::err_attributes_not_allowed) << FixItHint::CreateInsertionFromRange(CorrectLocation, AttrRange) << FixItHint::CreateRemoval(AttrRange); } -void Parser::DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs) { - Diag(attrs.Range.getBegin(), diag::err_attributes_not_allowed) - << attrs.Range; +void Parser::DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs, + const SourceLocation CorrectLocation) { + if (CorrectLocation.isValid()) { + CharSourceRange AttrRange(attrs.Range, true); + Diag(CorrectLocation, diag::err_attributes_misplaced) + << FixItHint::CreateInsertionFromRange(CorrectLocation, AttrRange) + << FixItHint::CreateRemoval(AttrRange); + } else + Diag(attrs.Range.getBegin(), diag::err_attributes_not_allowed) << attrs.Range; } void Parser::ProhibitCXX11Attributes(ParsedAttributesWithRange &Attrs, diff --git a/lib/Parse/Parser.cpp b/lib/Parse/Parser.cpp index 72d6537..8aa50a2 100644 --- a/lib/Parse/Parser.cpp +++ b/lib/Parse/Parser.cpp @@ -930,7 +930,31 @@ Parser::ParseDeclOrFunctionDefInternal(ParsedAttributesWithRange &attrs, // C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };" // declaration-specifiers init-declarator-list[opt] ';' if (Tok.is(tok::semi)) { - ProhibitAttributes(attrs); + auto LengthOfTSTToken = [](DeclSpec::TST TKind) { + assert(DeclSpec::isDeclRep(TKind)); + switch(TKind) { + case DeclSpec::TST_class: + return 5; + case DeclSpec::TST_struct: + return 6; + case DeclSpec::TST_union: + return 5; + case DeclSpec::TST_enum: + return 4; + case DeclSpec::TST_interface: + return 9; + default: + llvm_unreachable("we only expect to get the length of the class/struct/union/enum"); + } + + }; + // Suggest correct location to fix '[[attrib]] struct' to 'struct [[attrib]]' + SourceLocation CorrectLocationForAttributes = + DeclSpec::isDeclRep(DS.getTypeSpecType()) + ? DS.getTypeSpecTypeLoc().getLocWithOffset( + LengthOfTSTToken(DS.getTypeSpecType())) + : SourceLocation(); + ProhibitAttributes(attrs, CorrectLocationForAttributes); ConsumeToken(); RecordDecl *AnonRecord = nullptr; Decl *TheDecl = Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS_none, diff --git a/lib/Sema/SemaDeclCXX.cpp b/lib/Sema/SemaDeclCXX.cpp index f2fb95c..aa26b37 100644 --- a/lib/Sema/SemaDeclCXX.cpp +++ b/lib/Sema/SemaDeclCXX.cpp @@ -12265,11 +12265,10 @@ void Sema::DefineImplicitLambdaToFunctionPointerConversion( // Construct the body of the conversion function { return __invoke; }. Expr *FunctionRef = BuildDeclRefExpr(Invoker, Invoker->getType(), VK_LValue, Conv->getLocation()).get(); - assert(FunctionRef && "Can't refer to __invoke function?"); - Stmt *Return = BuildReturnStmt(Conv->getLocation(), FunctionRef).get(); - Conv->setBody(new (Context) CompoundStmt(Context, Return, - Conv->getLocation(), - Conv->getLocation())); + assert(FunctionRef && "Can't refer to __invoke function?"); + Stmt *Return = BuildReturnStmt(Conv->getLocation(), FunctionRef).get(); + Conv->setBody(CompoundStmt::Create(Context, Return, Conv->getLocation(), + Conv->getLocation())); Conv->markUsed(Context); Conv->setReferenced(); @@ -12330,9 +12329,8 @@ void Sema::DefineImplicitLambdaToBlockPointerConversion( // Set the body of the conversion function. Stmt *ReturnS = Return.get(); - Conv->setBody(new (Context) CompoundStmt(Context, ReturnS, - Conv->getLocation(), - Conv->getLocation())); + Conv->setBody(CompoundStmt::Create(Context, ReturnS, Conv->getLocation(), + Conv->getLocation())); Conv->markUsed(Context); // We're done; notify the mutation listener, if any. diff --git a/lib/Sema/SemaExprCXX.cpp b/lib/Sema/SemaExprCXX.cpp index 9c842de..cff9fbb 100644 --- a/lib/Sema/SemaExprCXX.cpp +++ b/lib/Sema/SemaExprCXX.cpp @@ -6265,9 +6265,8 @@ Stmt *Sema::MaybeCreateStmtWithCleanups(Stmt *SubStmt) { // a StmtExpr; currently this is only used for asm statements. // This is hacky, either create a new CXXStmtWithTemporaries statement or // a new AsmStmtWithTemporaries. - CompoundStmt *CompStmt = new (Context) CompoundStmt(Context, SubStmt, - SourceLocation(), - SourceLocation()); + CompoundStmt *CompStmt = CompoundStmt::Create( + Context, SubStmt, SourceLocation(), SourceLocation()); Expr *E = new (Context) StmtExpr(CompStmt, Context.VoidTy, SourceLocation(), SourceLocation()); return MaybeCreateExprWithCleanups(E); diff --git a/lib/Sema/SemaLookup.cpp b/lib/Sema/SemaLookup.cpp index d3f91a4..a9db973 100644 --- a/lib/Sema/SemaLookup.cpp +++ b/lib/Sema/SemaLookup.cpp @@ -1589,7 +1589,7 @@ bool LookupResult::isVisibleSlow(Sema &SemaRef, NamedDecl *D) { return false; // Find the extra places where we need to look. - llvm::DenseSet &LookupModules = SemaRef.getLookupModules(); + const auto &LookupModules = SemaRef.getLookupModules(); if (LookupModules.empty()) return false; @@ -1604,7 +1604,8 @@ bool LookupResult::isVisibleSlow(Sema &SemaRef, NamedDecl *D) { // Check whether DeclModule is transitively exported to an import of // the lookup set. return std::any_of(LookupModules.begin(), LookupModules.end(), - [&](Module *M) { return M->isModuleVisible(DeclModule); }); + [&](const Module *M) { + return M->isModuleVisible(DeclModule); }); } bool Sema::isVisibleSlow(const NamedDecl *D) { diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index 0880b2d..24b58e8 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -1297,7 +1297,8 @@ bool Sema::IsOpenMPCapturedByRef(ValueDecl *D, unsigned Level) { Level, /*NotLastprivate=*/true) && // If the variable is artificial and must be captured by value - try to // capture by value. - !(isa(D) && D->hasAttr()); + !(isa(D) && !D->hasAttr() && + !cast(D)->getInit()->isGLValue()); } // When passing data by copy, we need to make sure it fits the uintptr size @@ -2326,7 +2327,6 @@ static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id, ASTContext &C = S.getASTContext(); Expr *Init = AsExpression ? CaptureExpr : CaptureExpr->IgnoreImpCasts(); QualType Ty = Init->getType(); - Attr *OMPCaptureKind = nullptr; if (CaptureExpr->getObjectKind() == OK_Ordinary && CaptureExpr->isGLValue()) { if (S.getLangOpts().CPlusPlus) { Ty = C.getLValueReferenceType(Ty); @@ -2339,16 +2339,11 @@ static OMPCapturedExprDecl *buildCaptureDecl(Sema &S, IdentifierInfo *Id, Init = Res.get(); } WithInit = true; - } else if (AsExpression) { - // This variable must be captured by value. - OMPCaptureKind = OMPCaptureKindAttr::CreateImplicit(C, OMPC_unknown); } auto *CED = OMPCapturedExprDecl::Create(C, S.CurContext, Id, Ty, CaptureExpr->getLocStart()); if (!WithInit) CED->addAttr(OMPCaptureNoInitAttr::CreateImplicit(C, SourceRange())); - if (OMPCaptureKind) - CED->addAttr(OMPCaptureKind); S.CurContext->addHiddenDecl(CED); S.AddInitializerToDecl(CED, Init, /*DirectInit=*/false); return CED; @@ -7628,6 +7623,11 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_teams_distribute_parallel_for_simd: CaptureRegion = OMPD_teams; break; + case OMPD_target_update: + case OMPD_target_enter_data: + case OMPD_target_exit_data: + CaptureRegion = OMPD_task; + break; case OMPD_cancel: case OMPD_parallel: case OMPD_parallel_sections: @@ -7644,9 +7644,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_taskloop: case OMPD_taskloop_simd: case OMPD_target_data: - case OMPD_target_enter_data: - case OMPD_target_exit_data: - case OMPD_target_update: // Do not capture if-clause expressions. break; case OMPD_threadprivate: @@ -8007,15 +8004,17 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( break; case OMPC_device: switch (DKind) { + case OMPD_target_update: + case OMPD_target_enter_data: + case OMPD_target_exit_data: + CaptureRegion = OMPD_task; + break; case OMPD_target_teams: case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: case OMPD_target_data: - case OMPD_target_enter_data: - case OMPD_target_exit_data: - case OMPD_target_update: case OMPD_target: case OMPD_target_simd: case OMPD_target_parallel: diff --git a/lib/Sema/SemaStmt.cpp b/lib/Sema/SemaStmt.cpp index ff0f4d9..4474d62 100644 --- a/lib/Sema/SemaStmt.cpp +++ b/lib/Sema/SemaStmt.cpp @@ -388,7 +388,7 @@ StmtResult Sema::ActOnCompoundStmt(SourceLocation L, SourceLocation R, DiagnoseEmptyLoopBody(Elts[i], Elts[i + 1]); } - return new (Context) CompoundStmt(Context, Elts, L, R); + return CompoundStmt::Create(Context, Elts, L, R); } StmtResult diff --git a/lib/Sema/SemaTemplateInstantiateDecl.cpp b/lib/Sema/SemaTemplateInstantiateDecl.cpp index 1deb863..d8af8f3 100644 --- a/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -3932,22 +3932,22 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation, TemplateArgs)) return; - if (CXXConstructorDecl *Ctor = dyn_cast(Function)) { - // If this is a constructor, instantiate the member initializers. - InstantiateMemInitializers(Ctor, cast(PatternDecl), - TemplateArgs); - - // If this is an MS ABI dllexport default constructor, instantiate any - // default arguments. - if (Context.getTargetInfo().getCXXABI().isMicrosoft() && - Ctor->isDefaultConstructor()) { - InstantiateDefaultCtorDefaultArgs(*this, Ctor); - } - } - if (PatternDecl->hasSkippedBody()) { ActOnSkippedFunctionBody(Function); } else { + if (CXXConstructorDecl *Ctor = dyn_cast(Function)) { + // If this is a constructor, instantiate the member initializers. + InstantiateMemInitializers(Ctor, cast(PatternDecl), + TemplateArgs); + + // If this is an MS ABI dllexport default constructor, instantiate any + // default arguments. + if (Context.getTargetInfo().getCXXABI().isMicrosoft() && + Ctor->isDefaultConstructor()) { + InstantiateDefaultCtorDefaultArgs(*this, Ctor); + } + } + // Instantiate the function body. StmtResult Body = SubstStmt(Pattern, TemplateArgs); diff --git a/lib/Serialization/ASTReaderStmt.cpp b/lib/Serialization/ASTReaderStmt.cpp index 8ef1491..6163b81 100644 --- a/lib/Serialization/ASTReaderStmt.cpp +++ b/lib/Serialization/ASTReaderStmt.cpp @@ -119,7 +119,7 @@ void ASTStmtReader::VisitCompoundStmt(CompoundStmt *S) { unsigned NumStmts = Record.readInt(); while (NumStmts--) Stmts.push_back(Record.readSubStmt()); - S->setStmts(Record.getContext(), Stmts); + S->setStmts(Stmts); S->LBraceLoc = ReadSourceLocation(); S->RBraceLoc = ReadSourceLocation(); } @@ -3081,7 +3081,8 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; case STMT_COMPOUND: - S = new (Context) CompoundStmt(Empty); + S = CompoundStmt::CreateEmpty( + Context, /*NumStmts=*/Record[ASTStmtReader::NumStmtFields]); break; case STMT_CASE: diff --git a/test/CodeGen/attr-target-x86.c b/test/CodeGen/attr-target-x86.c index 6ec2d65..9e46de7 100644 --- a/test/CodeGen/attr-target-x86.c +++ b/test/CodeGen/attr-target-x86.c @@ -38,9 +38,9 @@ int __attribute__((target("arch=lakemont,mmx"))) lake(int a) { return 4; } // CHECK: lake{{.*}} #7 // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+x87" // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" -// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-xop,-xsave,-xsaveopt" +// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" -// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-xop,-xsave,-xsaveopt" -// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes" +// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" +// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-3dnow,-3dnowa,-mmx" // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+mmx" diff --git a/test/CodeGen/avx512bitalg-builtins.c b/test/CodeGen/avx512bitalg-builtins.c new file mode 100644 index 0000000..5770c66 --- /dev/null +++ b/test/CodeGen/avx512bitalg-builtins.c @@ -0,0 +1,54 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +__m512i test_mm512_popcnt_epi16(__m512i __A) { + // CHECK-LABEL: @test_mm512_popcnt_epi16 + // CHECK: @llvm.ctpop.v32i16 + return _mm512_popcnt_epi16(__A); +} + +__m512i test_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_popcnt_epi16 + // CHECK: @llvm.ctpop.v32i16 + // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}}, <32 x i16> {{.*}} + return _mm512_mask_popcnt_epi16(__A, __U, __B); +} +__m512i test_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_popcnt_epi16 + // CHECK: @llvm.ctpop.v32i16 + // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}}, <32 x i16> {{.*}} + return _mm512_maskz_popcnt_epi16(__U, __B); +} + +__m512i test_mm512_popcnt_epi8(__m512i __A) { + // CHECK-LABEL: @test_mm512_popcnt_epi8 + // CHECK: @llvm.ctpop.v64i8 + return _mm512_popcnt_epi8(__A); +} + +__m512i test_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_popcnt_epi8 + // CHECK: @llvm.ctpop.v64i8 + // CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}} + return _mm512_mask_popcnt_epi8(__A, __U, __B); +} +__m512i test_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_popcnt_epi8 + // CHECK: @llvm.ctpop.v64i8 + // CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}} + return _mm512_maskz_popcnt_epi8(__U, __B); +} + +__mmask64 test_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_bitshuffle_epi64_mask + // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.512 + return _mm512_mask_bitshuffle_epi64_mask(__U, __A, __B); +} + +__mmask64 test_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_bitshuffle_epi64_mask + // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.512 + return _mm512_bitshuffle_epi64_mask(__A, __B); +} + diff --git a/test/CodeGen/avx512vbmi2-builtins.c b/test/CodeGen/avx512vbmi2-builtins.c new file mode 100644 index 0000000..4da21e3 --- /dev/null +++ b/test/CodeGen/avx512vbmi2-builtins.c @@ -0,0 +1,304 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vbmi2 -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +__m512i test_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) { + // CHECK-LABEL: @test_mm512_mask_compress_epi16 + // CHECK: @llvm.x86.avx512.mask.compress.w.512 + return _mm512_mask_compress_epi16(__S, __U, __D); +} + +__m512i test_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) { + // CHECK-LABEL: @test_mm512_maskz_compress_epi16 + // CHECK: @llvm.x86.avx512.mask.compress.w.512 + return _mm512_maskz_compress_epi16(__U, __D); +} + +__m512i test_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) { + // CHECK-LABEL: @test_mm512_mask_compress_epi8 + // CHECK: @llvm.x86.avx512.mask.compress.b.512 + return _mm512_mask_compress_epi8(__S, __U, __D); +} + +__m512i test_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) { + // CHECK-LABEL: @test_mm512_maskz_compress_epi8 + // CHECK: @llvm.x86.avx512.mask.compress.b.512 + return _mm512_maskz_compress_epi8(__U, __D); +} + +void test_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) { + // CHECK-LABEL: @test_mm512_mask_compressstoreu_epi16 + // CHECK: @llvm.x86.avx512.mask.compress.store.w.512 + _mm512_mask_compressstoreu_epi16(__P, __U, __D); +} + +void test_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) { + // CHECK-LABEL: @test_mm512_mask_compressstoreu_epi8 + // CHECK: @llvm.x86.avx512.mask.compress.store.b.512 + _mm512_mask_compressstoreu_epi8(__P, __U, __D); +} + +__m512i test_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) { + // CHECK-LABEL: @test_mm512_mask_expand_epi16 + // CHECK: @llvm.x86.avx512.mask.expand.w.512 + return _mm512_mask_expand_epi16(__S, __U, __D); +} + +__m512i test_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) { + // CHECK-LABEL: @test_mm512_maskz_expand_epi16 + // CHECK: @llvm.x86.avx512.mask.expand.w.512 + return _mm512_maskz_expand_epi16(__U, __D); +} + +__m512i test_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) { + // CHECK-LABEL: @test_mm512_mask_expand_epi8 + // CHECK: @llvm.x86.avx512.mask.expand.b.512 + return _mm512_mask_expand_epi8(__S, __U, __D); +} + +__m512i test_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) { + // CHECK-LABEL: @test_mm512_maskz_expand_epi8 + // CHECK: @llvm.x86.avx512.mask.expand.b.512 + return _mm512_maskz_expand_epi8(__U, __D); +} + +__m512i test_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const* __P) { + // CHECK-LABEL: @test_mm512_mask_expandloadu_epi16 + // CHECK: @llvm.x86.avx512.mask.expand.load.w.512 + return _mm512_mask_expandloadu_epi16(__S, __U, __P); +} + +__m512i test_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const* __P) { + // CHECK-LABEL: @test_mm512_maskz_expandloadu_epi16 + // CHECK: @llvm.x86.avx512.mask.expand.load.w.512 + return _mm512_maskz_expandloadu_epi16(__U, __P); +} + +__m512i test_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const* __P) { + // CHECK-LABEL: @test_mm512_mask_expandloadu_epi8 + // CHECK: @llvm.x86.avx512.mask.expand.load.b.512 + return _mm512_mask_expandloadu_epi8(__S, __U, __P); +} + +__m512i test_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const* __P) { + // CHECK-LABEL: @test_mm512_maskz_expandloadu_epi8 + // CHECK: @llvm.x86.avx512.mask.expand.load.b.512 + return _mm512_maskz_expandloadu_epi8(__U, __P); +} + +__m512i test_mm512_mask_shldi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_shldi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshld.q.512 + return _mm512_mask_shldi_epi64(__S, __U, __A, __B, 127); +} + +__m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_shldi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshld.q.512 + return _mm512_maskz_shldi_epi64(__U, __A, __B, 63); +} + +__m512i test_mm512_shldi_epi64(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_shldi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshld.q.512 + return _mm512_shldi_epi64(__A, __B, 31); +} + +__m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_shldi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshld.d.512 + return _mm512_mask_shldi_epi32(__S, __U, __A, __B, 127); +} + +__m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_shldi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshld.d.512 + return _mm512_maskz_shldi_epi32(__U, __A, __B, 63); +} + +__m512i test_mm512_shldi_epi32(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_shldi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshld.d.512 + return _mm512_shldi_epi32(__A, __B, 31); +} + +__m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_shldi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshld.w.512 + return _mm512_mask_shldi_epi16(__S, __U, __A, __B, 127); +} + +__m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_shldi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshld.w.512 + return _mm512_maskz_shldi_epi16(__U, __A, __B, 63); +} + +__m512i test_mm512_shldi_epi16(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_shldi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshld.w.512 + return _mm512_shldi_epi16(__A, __B, 31); +} + +__m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_shrdi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshrd.q.512 + return _mm512_mask_shrdi_epi64(__S, __U, __A, __B, 127); +} + +__m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_shrdi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshrd.q.512 + return _mm512_maskz_shrdi_epi64(__U, __A, __B, 63); +} + +__m512i test_mm512_shrdi_epi64(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_shrdi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshrd.q.512 + return _mm512_shrdi_epi64(__A, __B, 31); +} + +__m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_shrdi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshrd.d.512 + return _mm512_mask_shrdi_epi32(__S, __U, __A, __B, 127); +} + +__m512i test_mm512_maskz_shrdi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_shrdi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshrd.d.512 + return _mm512_maskz_shrdi_epi32(__U, __A, __B, 63); +} + +__m512i test_mm512_shrdi_epi32(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_shrdi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshrd.d.512 + return _mm512_shrdi_epi32(__A, __B, 31); +} + +__m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_shrdi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshrd.w.512 + return _mm512_mask_shrdi_epi16(__S, __U, __A, __B, 127); +} + +__m512i test_mm512_maskz_shrdi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_shrdi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshrd.w.512 + return _mm512_maskz_shrdi_epi16(__U, __A, __B, 63); +} + +__m512i test_mm512_shrdi_epi16(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_shrdi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshrd.w.512 + return _mm512_shrdi_epi16(__A, __B, 31); +} + +__m512i test_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_shldv_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshldv.q.512 + return _mm512_mask_shldv_epi64(__S, __U, __A, __B); +} + +__m512i test_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_shldv_epi64 + // CHECK: @llvm.x86.avx512.maskz.vpshldv.q.512 + return _mm512_maskz_shldv_epi64(__U, __S, __A, __B); +} + +__m512i test_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_shldv_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshldv.q.512 + return _mm512_shldv_epi64(__S, __A, __B); +} + +__m512i test_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_shldv_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshldv.d.512 + return _mm512_mask_shldv_epi32(__S, __U, __A, __B); +} + +__m512i test_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_shldv_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpshldv.d.512 + return _mm512_maskz_shldv_epi32(__U, __S, __A, __B); +} + +__m512i test_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_shldv_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshldv.d.512 + return _mm512_shldv_epi32(__S, __A, __B); +} + +__m512i test_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_shldv_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshldv.w.512 + return _mm512_mask_shldv_epi16(__S, __U, __A, __B); +} + +__m512i test_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_shldv_epi16 + // CHECK: @llvm.x86.avx512.maskz.vpshldv.w.512 + return _mm512_maskz_shldv_epi16(__U, __S, __A, __B); +} + +__m512i test_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_shldv_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshldv.w.512 + return _mm512_shldv_epi16(__S, __A, __B); +} + +__m512i test_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_shrdv_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.q.512 + return _mm512_mask_shrdv_epi64(__S, __U, __A, __B); +} + +__m512i test_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_shrdv_epi64 + // CHECK: @llvm.x86.avx512.maskz.vpshrdv.q.512 + return _mm512_maskz_shrdv_epi64(__U, __S, __A, __B); +} + +__m512i test_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_shrdv_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.q.512 + return _mm512_shrdv_epi64(__S, __A, __B); +} + +__m512i test_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_shrdv_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.d.512 + return _mm512_mask_shrdv_epi32(__S, __U, __A, __B); +} + +__m512i test_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_shrdv_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpshrdv.d.512 + return _mm512_maskz_shrdv_epi32(__U, __S, __A, __B); +} + +__m512i test_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_shrdv_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.d.512 + return _mm512_shrdv_epi32(__S, __A, __B); +} + +__m512i test_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_shrdv_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.w.512 + return _mm512_mask_shrdv_epi16(__S, __U, __A, __B); +} + +__m512i test_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_shrdv_epi16 + // CHECK: @llvm.x86.avx512.maskz.vpshrdv.w.512 + return _mm512_maskz_shrdv_epi16(__U, __S, __A, __B); +} + +__m512i test_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_shrdv_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.w.512 + return _mm512_shrdv_epi16(__S, __A, __B); +} + diff --git a/test/CodeGen/avx512vlbitalg-builtins.c b/test/CodeGen/avx512vlbitalg-builtins.c new file mode 100644 index 0000000..9b2a1a4 --- /dev/null +++ b/test/CodeGen/avx512vlbitalg-builtins.c @@ -0,0 +1,104 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +__m256i test_mm256_popcnt_epi16(__m256i __A) { + // CHECK-LABEL: @test_mm256_popcnt_epi16 + // CHECK: @llvm.ctpop.v16i16 + return _mm256_popcnt_epi16(__A); +} + +__m256i test_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_popcnt_epi16 + // CHECK: @llvm.ctpop.v16i16 + // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i16> %{{[0-9]+}}, <16 x i16> {{.*}} + return _mm256_mask_popcnt_epi16(__A, __U, __B); +} +__m256i test_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_popcnt_epi16 + // CHECK: @llvm.ctpop.v16i16 + // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i16> %{{[0-9]+}}, <16 x i16> {{.*}} + return _mm256_maskz_popcnt_epi16(__U, __B); +} + +__m128i test_mm128_popcnt_epi16(__m128i __A) { + // CHECK-LABEL: @test_mm128_popcnt_epi16 + // CHECK: @llvm.ctpop.v8i16 + return _mm128_popcnt_epi16(__A); +} + +__m128i test_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_popcnt_epi16 + // CHECK: @llvm.ctpop.v8i16 + // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i16> {{.*}} + return _mm128_mask_popcnt_epi16(__A, __U, __B); +} +__m128i test_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_popcnt_epi16 + // CHECK: @llvm.ctpop.v8i16 + // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i16> {{.*}} + return _mm128_maskz_popcnt_epi16(__U, __B); +} + +__m256i test_mm256_popcnt_epi8(__m256i __A) { + // CHECK-LABEL: @test_mm256_popcnt_epi8 + // CHECK: @llvm.ctpop.v32i8 + return _mm256_popcnt_epi8(__A); +} + +__m256i test_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_popcnt_epi8 + // CHECK: @llvm.ctpop.v32i8 + // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}} + return _mm256_mask_popcnt_epi8(__A, __U, __B); +} +__m256i test_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_popcnt_epi8 + // CHECK: @llvm.ctpop.v32i8 + // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}} + return _mm256_maskz_popcnt_epi8(__U, __B); +} + +__m128i test_mm128_popcnt_epi8(__m128i __A) { + // CHECK-LABEL: @test_mm128_popcnt_epi8 + // CHECK: @llvm.ctpop.v16i8 + return _mm128_popcnt_epi8(__A); +} + +__m128i test_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_popcnt_epi8 + // CHECK: @llvm.ctpop.v16i8 + // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}} + return _mm128_mask_popcnt_epi8(__A, __U, __B); +} +__m128i test_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_popcnt_epi8 + // CHECK: @llvm.ctpop.v16i8 + // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}} + return _mm128_maskz_popcnt_epi8(__U, __B); +} + +__mmask32 test_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_bitshuffle_epi32_mask + // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.256 + return _mm256_mask_bitshuffle_epi32_mask(__U, __A, __B); +} + +__mmask32 test_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_bitshuffle_epi32_mask + // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.256 + return _mm256_bitshuffle_epi32_mask(__A, __B); +} + +__mmask16 test_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_bitshuffle_epi16_mask + // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.128 + return _mm128_mask_bitshuffle_epi16_mask(__U, __A, __B); +} + +__mmask16 test_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_bitshuffle_epi16_mask + // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.128 + return _mm128_bitshuffle_epi16_mask(__A, __B); +} + diff --git a/test/CodeGen/avx512vlvbmi2-builtins.c b/test/CodeGen/avx512vlvbmi2-builtins.c new file mode 100644 index 0000000..6edc66d --- /dev/null +++ b/test/CodeGen/avx512vlvbmi2-builtins.c @@ -0,0 +1,604 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vl -target-feature +avx512vbmi2 -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +__m128i test_mm128_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) { + // CHECK-LABEL: @test_mm128_mask_compress_epi16 + // CHECK: @llvm.x86.avx512.mask.compress.w.128 + return _mm128_mask_compress_epi16(__S, __U, __D); +} + +__m128i test_mm128_maskz_compress_epi16(__mmask8 __U, __m128i __D) { + // CHECK-LABEL: @test_mm128_maskz_compress_epi16 + // CHECK: @llvm.x86.avx512.mask.compress.w.128 + return _mm128_maskz_compress_epi16(__U, __D); +} + +__m128i test_mm128_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) { + // CHECK-LABEL: @test_mm128_mask_compress_epi8 + // CHECK: @llvm.x86.avx512.mask.compress.b.128 + return _mm128_mask_compress_epi8(__S, __U, __D); +} + +__m128i test_mm128_maskz_compress_epi8(__mmask16 __U, __m128i __D) { + // CHECK-LABEL: @test_mm128_maskz_compress_epi8 + // CHECK: @llvm.x86.avx512.mask.compress.b.128 + return _mm128_maskz_compress_epi8(__U, __D); +} + +void test_mm128_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) { + // CHECK-LABEL: @test_mm128_mask_compressstoreu_epi16 + // CHECK: @llvm.x86.avx512.mask.compress.store.w.128 + _mm128_mask_compressstoreu_epi16(__P, __U, __D); +} + +void test_mm128_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) { + // CHECK-LABEL: @test_mm128_mask_compressstoreu_epi8 + // CHECK: @llvm.x86.avx512.mask.compress.store.b.128 + _mm128_mask_compressstoreu_epi8(__P, __U, __D); +} + +__m128i test_mm128_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) { + // CHECK-LABEL: @test_mm128_mask_expand_epi16 + // CHECK: @llvm.x86.avx512.mask.expand.w.128 + return _mm128_mask_expand_epi16(__S, __U, __D); +} + +__m128i test_mm128_maskz_expand_epi16(__mmask8 __U, __m128i __D) { + // CHECK-LABEL: @test_mm128_maskz_expand_epi16 + // CHECK: @llvm.x86.avx512.mask.expand.w.128 + return _mm128_maskz_expand_epi16(__U, __D); +} + +__m128i test_mm128_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) { + // CHECK-LABEL: @test_mm128_mask_expand_epi8 + // CHECK: @llvm.x86.avx512.mask.expand.b.128 + return _mm128_mask_expand_epi8(__S, __U, __D); +} + +__m128i test_mm128_maskz_expand_epi8(__mmask16 __U, __m128i __D) { + // CHECK-LABEL: @test_mm128_maskz_expand_epi8 + // CHECK: @llvm.x86.avx512.mask.expand.b.128 + return _mm128_maskz_expand_epi8(__U, __D); +} + +__m128i test_mm128_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const* __P) { + // CHECK-LABEL: @test_mm128_mask_expandloadu_epi16 + // CHECK: @llvm.x86.avx512.mask.expand.load.w.128 + return _mm128_mask_expandloadu_epi16(__S, __U, __P); +} + +__m128i test_mm128_maskz_expandloadu_epi16(__mmask8 __U, void const* __P) { + // CHECK-LABEL: @test_mm128_maskz_expandloadu_epi16 + // CHECK: @llvm.x86.avx512.mask.expand.load.w.128 + return _mm128_maskz_expandloadu_epi16(__U, __P); +} + +__m128i test_mm128_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const* __P) { + // CHECK-LABEL: @test_mm128_mask_expandloadu_epi8 + // CHECK: @llvm.x86.avx512.mask.expand.load.b.128 + return _mm128_mask_expandloadu_epi8(__S, __U, __P); +} + +__m128i test_mm128_maskz_expandloadu_epi8(__mmask16 __U, void const* __P) { + // CHECK-LABEL: @test_mm128_maskz_expandloadu_epi8 + // CHECK: @llvm.x86.avx512.mask.expand.load.b.128 + return _mm128_maskz_expandloadu_epi8(__U, __P); +} + +__m256i test_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) { + // CHECK-LABEL: @test_mm256_mask_compress_epi16 + // CHECK: @llvm.x86.avx512.mask.compress.w.256 + return _mm256_mask_compress_epi16(__S, __U, __D); +} + +__m256i test_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) { + // CHECK-LABEL: @test_mm256_maskz_compress_epi16 + // CHECK: @llvm.x86.avx512.mask.compress.w.256 + return _mm256_maskz_compress_epi16(__U, __D); +} + +__m256i test_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) { + // CHECK-LABEL: @test_mm256_mask_compress_epi8 + // CHECK: @llvm.x86.avx512.mask.compress.b.256 + return _mm256_mask_compress_epi8(__S, __U, __D); +} + +__m256i test_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) { + // CHECK-LABEL: @test_mm256_maskz_compress_epi8 + // CHECK: @llvm.x86.avx512.mask.compress.b.256 + return _mm256_maskz_compress_epi8(__U, __D); +} + +void test_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) { + // CHECK-LABEL: @test_mm256_mask_compressstoreu_epi16 + // CHECK: @llvm.x86.avx512.mask.compress.store.w.256 + _mm256_mask_compressstoreu_epi16(__P, __U, __D); +} + +void test_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D) { + // CHECK-LABEL: @test_mm256_mask_compressstoreu_epi8 + // CHECK: @llvm.x86.avx512.mask.compress.store.b.256 + _mm256_mask_compressstoreu_epi8(__P, __U, __D); +} + +__m256i test_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) { + // CHECK-LABEL: @test_mm256_mask_expand_epi16 + // CHECK: @llvm.x86.avx512.mask.expand.w.256 + return _mm256_mask_expand_epi16(__S, __U, __D); +} + +__m256i test_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) { + // CHECK-LABEL: @test_mm256_maskz_expand_epi16 + // CHECK: @llvm.x86.avx512.mask.expand.w.256 + return _mm256_maskz_expand_epi16(__U, __D); +} + +__m256i test_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) { + // CHECK-LABEL: @test_mm256_mask_expand_epi8 + // CHECK: @llvm.x86.avx512.mask.expand.b.256 + return _mm256_mask_expand_epi8(__S, __U, __D); +} + +__m256i test_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) { + // CHECK-LABEL: @test_mm256_maskz_expand_epi8 + // CHECK: @llvm.x86.avx512.mask.expand.b.256 + return _mm256_maskz_expand_epi8(__U, __D); +} + +__m256i test_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const* __P) { + // CHECK-LABEL: @test_mm256_mask_expandloadu_epi16 + // CHECK: @llvm.x86.avx512.mask.expand.load.w.256 + return _mm256_mask_expandloadu_epi16(__S, __U, __P); +} + +__m256i test_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const* __P) { + // CHECK-LABEL: @test_mm256_maskz_expandloadu_epi16 + // CHECK: @llvm.x86.avx512.mask.expand.load.w.256 + return _mm256_maskz_expandloadu_epi16(__U, __P); +} + +__m256i test_mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const* __P) { + // CHECK-LABEL: @test_mm256_mask_expandloadu_epi8 + // CHECK: @llvm.x86.avx512.mask.expand.load.b.256 + return _mm256_mask_expandloadu_epi8(__S, __U, __P); +} + +__m256i test_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const* __P) { + // CHECK-LABEL: @test_mm256_maskz_expandloadu_epi8 + // CHECK: @llvm.x86.avx512.mask.expand.load.b.256 + return _mm256_maskz_expandloadu_epi8(__U, __P); +} + +__m256i test_mm256_mask_shldi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_shldi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshld.q.256 + return _mm256_mask_shldi_epi64(__S, __U, __A, __B, 127); +} + +__m256i test_mm256_maskz_shldi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_shldi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshld.q.256 + return _mm256_maskz_shldi_epi64(__U, __A, __B, 63); +} + +__m256i test_mm256_shldi_epi64(__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_shldi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshld.q.256 + return _mm256_shldi_epi64(__A, __B, 31); +} + +__m128i test_mm128_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_shldi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshld.q.128 + return _mm128_mask_shldi_epi64(__S, __U, __A, __B, 127); +} + +__m128i test_mm128_maskz_shldi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_shldi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshld.q.128 + return _mm128_maskz_shldi_epi64(__U, __A, __B, 63); +} + +__m128i test_mm128_shldi_epi64(__m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_shldi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshld.q.128 + return _mm128_shldi_epi64(__A, __B, 31); +} + +__m256i test_mm256_mask_shldi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_shldi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshld.d.256 + return _mm256_mask_shldi_epi32(__S, __U, __A, __B, 127); +} + +__m256i test_mm256_maskz_shldi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_shldi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshld.d.256 + return _mm256_maskz_shldi_epi32(__U, __A, __B, 63); +} + +__m256i test_mm256_shldi_epi32(__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_shldi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshld.d.256 + return _mm256_shldi_epi32(__A, __B, 31); +} + +__m128i test_mm128_mask_shldi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_shldi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshld.d.128 + return _mm128_mask_shldi_epi32(__S, __U, __A, __B, 127); +} + +__m128i test_mm128_maskz_shldi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_shldi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshld.d.128 + return _mm128_maskz_shldi_epi32(__U, __A, __B, 63); +} + +__m128i test_mm128_shldi_epi32(__m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_shldi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshld.d.128 + return _mm128_shldi_epi32(__A, __B, 31); +} + +__m256i test_mm256_mask_shldi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_shldi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshld.w.256 + return _mm256_mask_shldi_epi16(__S, __U, __A, __B, 127); +} + +__m256i test_mm256_maskz_shldi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_shldi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshld.w.256 + return _mm256_maskz_shldi_epi16(__U, __A, __B, 63); +} + +__m256i test_mm256_shldi_epi16(__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_shldi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshld.w.256 + return _mm256_shldi_epi16(__A, __B, 31); +} + +__m128i test_mm128_mask_shldi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_shldi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshld.w.128 + return _mm128_mask_shldi_epi16(__S, __U, __A, __B, 127); +} + +__m128i test_mm128_maskz_shldi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_shldi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshld.w.128 + return _mm128_maskz_shldi_epi16(__U, __A, __B, 63); +} + +__m128i test_mm128_shldi_epi16(__m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_shldi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshld.w.128 + return _mm128_shldi_epi16(__A, __B, 31); +} + +__m256i test_mm256_mask_shrdi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_shrdi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshrd.q.256 + return _mm256_mask_shrdi_epi64(__S, __U, __A, __B, 127); +} + +__m256i test_mm256_maskz_shrdi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_shrdi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshrd.q.256 + return _mm256_maskz_shrdi_epi64(__U, __A, __B, 63); +} + +__m256i test_mm256_shrdi_epi64(__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_shrdi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshrd.q.256 + return _mm256_shrdi_epi64(__A, __B, 31); +} + +__m128i test_mm128_mask_shrdi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_shrdi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshrd.q.128 + return _mm128_mask_shrdi_epi64(__S, __U, __A, __B, 127); +} + +__m128i test_mm128_maskz_shrdi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_shrdi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshrd.q.128 + return _mm128_maskz_shrdi_epi64(__U, __A, __B, 63); +} + +__m128i test_mm128_shrdi_epi64(__m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_shrdi_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshrd.q.128 + return _mm128_shrdi_epi64(__A, __B, 31); +} + +__m256i test_mm256_mask_shrdi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_shrdi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshrd.d.256 + return _mm256_mask_shrdi_epi32(__S, __U, __A, __B, 127); +} + +__m256i test_mm256_maskz_shrdi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_shrdi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshrd.d.256 + return _mm256_maskz_shrdi_epi32(__U, __A, __B, 63); +} + +__m256i test_mm256_shrdi_epi32(__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_shrdi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshrd.d.256 + return _mm256_shrdi_epi32(__A, __B, 31); +} + +__m128i test_mm128_mask_shrdi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_shrdi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshrd.d.128 + return _mm128_mask_shrdi_epi32(__S, __U, __A, __B, 127); +} + +__m128i test_mm128_maskz_shrdi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_shrdi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshrd.d.128 + return _mm128_maskz_shrdi_epi32(__U, __A, __B, 63); +} + +__m128i test_mm128_shrdi_epi32(__m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_shrdi_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshrd.d.128 + return _mm128_shrdi_epi32(__A, __B, 31); +} + +__m256i test_mm256_mask_shrdi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_shrdi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshrd.w.256 + return _mm256_mask_shrdi_epi16(__S, __U, __A, __B, 127); +} + +__m256i test_mm256_maskz_shrdi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_shrdi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshrd.w.256 + return _mm256_maskz_shrdi_epi16(__U, __A, __B, 63); +} + +__m256i test_mm256_shrdi_epi16(__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_shrdi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshrd.w.256 + return _mm256_shrdi_epi16(__A, __B, 31); +} + +__m128i test_mm128_mask_shrdi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_shrdi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshrd.w.128 + return _mm128_mask_shrdi_epi16(__S, __U, __A, __B, 127); +} + +__m128i test_mm128_maskz_shrdi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_shrdi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshrd.w.128 + return _mm128_maskz_shrdi_epi16(__U, __A, __B, 63); +} + +__m128i test_mm128_shrdi_epi16(__m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_shrdi_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshrd.w.128 + return _mm128_shrdi_epi16(__A, __B, 31); +} + +__m256i test_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_shldv_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshldv.q.256 + return _mm256_mask_shldv_epi64(__S, __U, __A, __B); +} + +__m256i test_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_shldv_epi64 + // CHECK: @llvm.x86.avx512.maskz.vpshldv.q.256 + return _mm256_maskz_shldv_epi64(__U, __S, __A, __B); +} + +__m256i test_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_shldv_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshldv.q.256 + return _mm256_shldv_epi64(__S, __A, __B); +} + +__m128i test_mm128_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_shldv_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshldv.q.128 + return _mm128_mask_shldv_epi64(__S, __U, __A, __B); +} + +__m128i test_mm128_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_shldv_epi64 + // CHECK: @llvm.x86.avx512.maskz.vpshldv.q.128 + return _mm128_maskz_shldv_epi64(__U, __S, __A, __B); +} + +__m128i test_mm128_shldv_epi64(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_shldv_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshldv.q.128 + return _mm128_shldv_epi64(__S, __A, __B); +} + +__m256i test_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_shldv_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshldv.d.256 + return _mm256_mask_shldv_epi32(__S, __U, __A, __B); +} + +__m256i test_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_shldv_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpshldv.d.256 + return _mm256_maskz_shldv_epi32(__U, __S, __A, __B); +} + +__m256i test_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_shldv_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshldv.d.256 + return _mm256_shldv_epi32(__S, __A, __B); +} + +__m128i test_mm128_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_shldv_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshldv.d.128 + return _mm128_mask_shldv_epi32(__S, __U, __A, __B); +} + +__m128i test_mm128_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_shldv_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpshldv.d.128 + return _mm128_maskz_shldv_epi32(__U, __S, __A, __B); +} + +__m128i test_mm128_shldv_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_shldv_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshldv.d.128 + return _mm128_shldv_epi32(__S, __A, __B); +} + +__m256i test_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_shldv_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshldv.w.256 + return _mm256_mask_shldv_epi16(__S, __U, __A, __B); +} + +__m256i test_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_shldv_epi16 + // CHECK: @llvm.x86.avx512.maskz.vpshldv.w.256 + return _mm256_maskz_shldv_epi16(__U, __S, __A, __B); +} + +__m256i test_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_shldv_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshldv.w.256 + return _mm256_shldv_epi16(__S, __A, __B); +} + +__m128i test_mm128_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_shldv_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshldv.w.128 + return _mm128_mask_shldv_epi16(__S, __U, __A, __B); +} + +__m128i test_mm128_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_shldv_epi16 + // CHECK: @llvm.x86.avx512.maskz.vpshldv.w.128 + return _mm128_maskz_shldv_epi16(__U, __S, __A, __B); +} + +__m128i test_mm128_shldv_epi16(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_shldv_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshldv.w.128 + return _mm128_shldv_epi16(__S, __A, __B); +} + +__m256i test_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_shrdv_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.q.256 + return _mm256_mask_shrdv_epi64(__S, __U, __A, __B); +} + +__m256i test_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_shrdv_epi64 + // CHECK: @llvm.x86.avx512.maskz.vpshrdv.q.256 + return _mm256_maskz_shrdv_epi64(__U, __S, __A, __B); +} + +__m256i test_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_shrdv_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.q.256 + return _mm256_shrdv_epi64(__S, __A, __B); +} + +__m128i test_mm128_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_shrdv_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.q.128 + return _mm128_mask_shrdv_epi64(__S, __U, __A, __B); +} + +__m128i test_mm128_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_shrdv_epi64 + // CHECK: @llvm.x86.avx512.maskz.vpshrdv.q.128 + return _mm128_maskz_shrdv_epi64(__U, __S, __A, __B); +} + +__m128i test_mm128_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_shrdv_epi64 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.q.128 + return _mm128_shrdv_epi64(__S, __A, __B); +} + +__m256i test_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_shrdv_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.d.256 + return _mm256_mask_shrdv_epi32(__S, __U, __A, __B); +} + +__m256i test_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_shrdv_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpshrdv.d.256 + return _mm256_maskz_shrdv_epi32(__U, __S, __A, __B); +} + +__m256i test_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_shrdv_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.d.256 + return _mm256_shrdv_epi32(__S, __A, __B); +} + +__m128i test_mm128_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_shrdv_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.d.128 + return _mm128_mask_shrdv_epi32(__S, __U, __A, __B); +} + +__m128i test_mm128_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_shrdv_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpshrdv.d.128 + return _mm128_maskz_shrdv_epi32(__U, __S, __A, __B); +} + +__m128i test_mm128_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_shrdv_epi32 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.d.128 + return _mm128_shrdv_epi32(__S, __A, __B); +} + +__m256i test_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_shrdv_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.w.256 + return _mm256_mask_shrdv_epi16(__S, __U, __A, __B); +} + +__m256i test_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_shrdv_epi16 + // CHECK: @llvm.x86.avx512.maskz.vpshrdv.w.256 + return _mm256_maskz_shrdv_epi16(__U, __S, __A, __B); +} + +__m256i test_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_shrdv_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.w.256 + return _mm256_shrdv_epi16(__S, __A, __B); +} + +__m128i test_mm128_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_shrdv_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.w.128 + return _mm128_mask_shrdv_epi16(__S, __U, __A, __B); +} + +__m128i test_mm128_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_shrdv_epi16 + // CHECK: @llvm.x86.avx512.maskz.vpshrdv.w.128 + return _mm128_maskz_shrdv_epi16(__U, __S, __A, __B); +} + +__m128i test_mm128_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_shrdv_epi16 + // CHECK: @llvm.x86.avx512.mask.vpshrdv.w.128 + return _mm128_shrdv_epi16(__S, __A, __B); +} + diff --git a/test/CodeGen/avx512vlvnni-builtins.c b/test/CodeGen/avx512vlvnni-builtins.c new file mode 100644 index 0000000..861b915 --- /dev/null +++ b/test/CodeGen/avx512vlvnni-builtins.c @@ -0,0 +1,148 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +__m256i test_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusd.256 + return _mm256_mask_dpbusd_epi32(__S, __U, __A, __B); +} + +__m256i test_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpbusd.256 + return _mm256_maskz_dpbusd_epi32(__U, __S, __A, __B); +} + +__m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusd.256 + return _mm256_dpbusd_epi32(__S, __A, __B); +} + +__m256i test_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusds.256 + return _mm256_mask_dpbusds_epi32(__S, __U, __A, __B); +} + +__m256i test_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpbusds.256 + return _mm256_maskz_dpbusds_epi32(__U, __S, __A, __B); +} + +__m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusds.256 + return _mm256_dpbusds_epi32(__S, __A, __B); +} + +__m256i test_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssd.256 + return _mm256_mask_dpwssd_epi32(__S, __U, __A, __B); +} + +__m256i test_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpwssd.256 + return _mm256_maskz_dpwssd_epi32(__U, __S, __A, __B); +} + +__m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssd.256 + return _mm256_dpwssd_epi32(__S, __A, __B); +} + +__m256i test_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssds.256 + return _mm256_mask_dpwssds_epi32(__S, __U, __A, __B); +} + +__m256i test_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpwssds.256 + return _mm256_maskz_dpwssds_epi32(__U, __S, __A, __B); +} + +__m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssds.256 + return _mm256_dpwssds_epi32(__S, __A, __B); +} + +__m128i test_mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusd.128 + return _mm128_mask_dpbusd_epi32(__S, __U, __A, __B); +} + +__m128i test_mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpbusd.128 + return _mm128_maskz_dpbusd_epi32(__U, __S, __A, __B); +} + +__m128i test_mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusd.128 + return _mm128_dpbusd_epi32(__S, __A, __B); +} + +__m128i test_mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusds.128 + return _mm128_mask_dpbusds_epi32(__S, __U, __A, __B); +} + +__m128i test_mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpbusds.128 + return _mm128_maskz_dpbusds_epi32(__U, __S, __A, __B); +} + +__m128i test_mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusds.128 + return _mm128_dpbusds_epi32(__S, __A, __B); +} + +__m128i test_mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssd.128 + return _mm128_mask_dpwssd_epi32(__S, __U, __A, __B); +} + +__m128i test_mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpwssd.128 + return _mm128_maskz_dpwssd_epi32(__U, __S, __A, __B); +} + +__m128i test_mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssd.128 + return _mm128_dpwssd_epi32(__S, __A, __B); +} + +__m128i test_mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssds.128 + return _mm128_mask_dpwssds_epi32(__S, __U, __A, __B); +} + +__m128i test_mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpwssds.128 + return _mm128_maskz_dpwssds_epi32(__U, __S, __A, __B); +} + +__m128i test_mm128_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssds.128 + return _mm128_dpwssds_epi32(__S, __A, __B); +} + diff --git a/test/CodeGen/avx512vnni-builtins.c b/test/CodeGen/avx512vnni-builtins.c new file mode 100644 index 0000000..d79046a --- /dev/null +++ b/test/CodeGen/avx512vnni-builtins.c @@ -0,0 +1,76 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +__m512i test_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusd.512 + return _mm512_mask_dpbusd_epi32(__S, __U, __A, __B); +} + +__m512i test_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpbusd.512 + return _mm512_maskz_dpbusd_epi32(__U, __S, __A, __B); +} + +__m512i test_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_dpbusd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusd.512 + return _mm512_dpbusd_epi32(__S, __A, __B); +} + +__m512i test_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusds.512 + return _mm512_mask_dpbusds_epi32(__S, __U, __A, __B); +} + +__m512i test_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpbusds.512 + return _mm512_maskz_dpbusds_epi32(__U, __S, __A, __B); +} + +__m512i test_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_dpbusds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpbusds.512 + return _mm512_dpbusds_epi32(__S, __A, __B); +} + +__m512i test_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssd.512 + return _mm512_mask_dpwssd_epi32(__S, __U, __A, __B); +} + +__m512i test_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpwssd.512 + return _mm512_maskz_dpwssd_epi32(__U, __S, __A, __B); +} + +__m512i test_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_dpwssd_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssd.512 + return _mm512_dpwssd_epi32(__S, __A, __B); +} + +__m512i test_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssds.512 + return _mm512_mask_dpwssds_epi32(__S, __U, __A, __B); +} + +__m512i test_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.maskz.vpdpwssds.512 + return _mm512_maskz_dpwssds_epi32(__U, __S, __A, __B); +} + +__m512i test_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_dpwssds_epi32 + // CHECK: @llvm.x86.avx512.mask.vpdpwssds.512 + return _mm512_dpwssds_epi32(__S, __A, __B); +} + diff --git a/test/CodeGen/gfni-builtins.c b/test/CodeGen/gfni-builtins.c new file mode 100644 index 0000000..95cfd4f --- /dev/null +++ b/test/CodeGen/gfni-builtins.c @@ -0,0 +1,182 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -emit-llvm -o - | FileCheck %s --check-prefix SSE +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -DAVX -target-feature +gfni -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -DAVX512 -target-feature +gfni -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX,AVX512 + +#include + +__m128i test_mm_gf2p8affineinv_epi64_epi8(__m128i A, __m128i B) { + // SSE-LABEL: @test_mm_gf2p8affineinv_epi64_epi8 + // SSE: @llvm.x86.vgf2p8affineinvqb.128 + return _mm_gf2p8affineinv_epi64_epi8(A, B, 1); +} + +__m128i test_mm_gf2p8affine_epi64_epi8(__m128i A, __m128i B) { + // SSE-LABEL: @test_mm_gf2p8affine_epi64_epi8 + // SSE: @llvm.x86.vgf2p8affineqb.128 + return _mm_gf2p8affine_epi64_epi8(A, B, 1); +} + +__m128i test_mm_gf2p8mul_epi8(__m128i A, __m128i B) { + // SSE-LABEL: @test_mm_gf2p8mul_epi8 + // SSE: @llvm.x86.vgf2p8mulb.128 + return _mm_gf2p8mul_epi8(A, B); +} + +#if defined(AVX) || defined(AVX512) +__m256i test_mm256_gf2p8affineinv_epi64_epi8(__m256i A, __m256i B) { + // AVX-LABEL: @test_mm256_gf2p8affineinv_epi64_epi8 + // AVX: @llvm.x86.vgf2p8affineinvqb.256 + return _mm256_gf2p8affineinv_epi64_epi8(A, B, 1); +} + +__m256i test_mm256_gf2p8affine_epi64_epi8(__m256i A, __m256i B) { + // AVX-LABEL: @test_mm256_gf2p8affine_epi64_epi8 + // AVX: @llvm.x86.vgf2p8affineqb.256 + return _mm256_gf2p8affine_epi64_epi8(A, B, 1); +} + +__m256i test_mm256_gf2p8mul_epi8(__m256i A, __m256i B) { + // AVX-LABEL: @test_mm256_gf2p8mul_epi8 + // AVX: @llvm.x86.vgf2p8mulb.256 + return _mm256_gf2p8mul_epi8(A, B); +} +#endif // AVX + +#ifdef AVX512 +__m512i test_mm512_gf2p8affineinv_epi64_epi8(__m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_gf2p8affineinv_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineinvqb.512 + return _mm512_gf2p8affineinv_epi64_epi8(A, B, 1); +} + +__m512i test_mm512_mask_gf2p8affineinv_epi64_epi8(__m512i S, __mmask64 U, __m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_mask_gf2p8affineinv_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineinvqb.512 + // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}} + return _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, 1); +} + +__m512i test_mm512_maskz_gf2p8affineinv_epi64_epi8(__mmask64 U, __m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_maskz_gf2p8affineinv_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineinvqb.512 + // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}} + return _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, 1); +} + +__m256i test_mm256_mask_gf2p8affineinv_epi64_epi8(__m256i S, __mmask32 U, __m256i A, __m256i B) { + // AVX256-LABEL: @test_mm256_mask_gf2p8affineinv_epi64_epi8 + // AVX256: @llvm.x86.vgf2p8affineinvqb.256 + // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}} + return _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, 1); +} + +__m256i test_mm256_maskz_gf2p8affineinv_epi64_epi8(__mmask32 U, __m256i A, __m256i B) { + // AVX256-LABEL: @test_mm256_maskz_gf2p8affineinv_epi64_epi8 + // AVX256: @llvm.x86.vgf2p8affineinvqb.256 + // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}} + return _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, 1); +} + +__m128i test_mm_mask_gf2p8affineinv_epi64_epi8(__m128i S, __mmask16 U, __m128i A, __m128i B) { + // AVX512-LABEL: @test_mm_mask_gf2p8affineinv_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineinvqb.128 + // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}} + return _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, 1); +} + +__m128i test_mm_maskz_gf2p8affineinv_epi64_epi8(__mmask16 U, __m128i A, __m128i B) { + // AVX512-LABEL: @test_mm_maskz_gf2p8affineinv_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineinvqb.128 + // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}} + return _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, 1); +} + +__m512i test_mm512_gf2p8affine_epi64_epi8(__m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_gf2p8affine_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineqb.512 + return _mm512_gf2p8affine_epi64_epi8(A, B, 1); +} + +__m512i test_mm512_mask_gf2p8affine_epi64_epi8(__m512i S, __mmask64 U, __m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_mask_gf2p8affine_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineqb.512 + // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}} + return _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, 1); +} + +__m512i test_mm512_maskz_gf2p8affine_epi64_epi8(__mmask64 U, __m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_maskz_gf2p8affine_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineqb.512 + // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}} + return _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, 1); +} + +__m256i test_mm256_mask_gf2p8affine_epi64_epi8(__m256i S, __mmask32 U, __m256i A, __m256i B) { + // AVX256-LABEL: @test_mm256_mask_gf2p8affine_epi64_epi8 + // AVX256: @llvm.x86.vgf2p8affineqb.256 + // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}} + return _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, 1); +} + +__m256i test_mm256_maskz_gf2p8affine_epi64_epi8(__mmask32 U, __m256i A, __m256i B) { + // AVX256-LABEL: @test_mm256_maskz_gf2p8affine_epi64_epi8 + // AVX256: @llvm.x86.vgf2p8affineqb.256 + // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}} + return _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, 1); +} + +__m128i test_mm_mask_gf2p8affine_epi64_epi8(__m128i S, __mmask16 U, __m128i A, __m128i B) { + // AVX512-LABEL: @test_mm_mask_gf2p8affine_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineqb.128 + // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}} + return _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, 1); +} + +__m128i test_mm_maskz_gf2p8affine_epi64_epi8(__mmask16 U, __m128i A, __m128i B) { + // AVX512-LABEL: @test_mm_maskz_gf2p8affine_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineqb.128 + // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}} + return _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, 1); +} + +__m512i test_mm512_gf2p8mul_epi8(__m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_gf2p8mul_epi8 + // AVX512: @llvm.x86.vgf2p8mulb.512 + return _mm512_gf2p8mul_epi8(A, B); +} + +__m512i test_mm512_mask_gf2p8mul_epi8(__m512i S, __mmask64 U, __m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_mask_gf2p8mul_epi8 + // AVX512: @llvm.x86.vgf2p8mulb.512 + // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}} + return _mm512_mask_gf2p8mul_epi8(S, U, A, B); +} + +__m512i test_mm512_maskz_gf2p8mul_epi8(__mmask64 U, __m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_maskz_gf2p8mul_epi8 + // AVX512: @llvm.x86.vgf2p8mulb.512 + // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}} + return _mm512_maskz_gf2p8mul_epi8(U, A, B); +} + +__m256i test_mm256_mask_gf2p8mul_epi8(__m256i S, __mmask32 U, __m256i A, __m256i B) { + // AVX256-LABEL: @test_mm256_mask_gf2p8mul_epi8 + // AVX256: @llvm.x86.vgf2p8mulb.256 + // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}} + return _mm256_mask_gf2p8mul_epi8(S, U, A, B); +} + +__m256i test_mm256_maskz_gf2p8mul_epi8(__mmask32 U, __m256i A, __m256i B) { + // AVX256-LABEL: @test_mm256_maskz_gf2p8mul_epi8 + // AVX256: @llvm.x86.vgf2p8mulb.256 + // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}} + return _mm256_maskz_gf2p8mul_epi8(U, A, B); +} + +__m128i test_mm_mask_gf2p8mul_epi8(__m128i S, __mmask16 U, __m128i A, __m128i B) { + // AVX512-LABEL: @test_mm_mask_gf2p8mul_epi8 + // AVX512: @llvm.x86.vgf2p8mulb.128 + // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}} + return _mm_mask_gf2p8mul_epi8(S, U, A, B); +} +#endif // AVX512 diff --git a/test/CodeGen/vaes-builtins.c b/test/CodeGen/vaes-builtins.c new file mode 100644 index 0000000..df160aa --- /dev/null +++ b/test/CodeGen/vaes-builtins.c @@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vaes -emit-llvm -o - | FileCheck %s --check-prefix AVX +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -DAVX512 -target-feature +vaes -target-feature +avx512f -emit-llvm -o - | FileCheck %s --check-prefixes AVX,AVX512 + +#include + +__m256i test_mm256_aesenc_epi128(__m256i __A, __m256i __B) { + // AVX-LABEL: @test_mm256_aesenc_epi128 + // AVX: @llvm.x86.aesni.aesenc.256 + return _mm256_aesenc_epi128(__A, __B); +} + +__m256i test_mm256_aesenclast_epi128(__m256i __A, __m256i __B) { + // AVX-LABEL: @test_mm256_aesenclast_epi128 + // AVX: @llvm.x86.aesni.aesenclast.256 + return _mm256_aesenclast_epi128(__A, __B); +} + +__m256i test_mm256_aesdec_epi128(__m256i __A, __m256i __B) { + // AVX-LABEL: @test_mm256_aesdec_epi128 + // AVX: @llvm.x86.aesni.aesdec.256 + return _mm256_aesdec_epi128(__A, __B); +} + +__m256i test_mm256_aesdeclast_epi128(__m256i __A, __m256i __B) { + // AVX-LABEL: @test_mm256_aesdeclast_epi128 + // AVX: @llvm.x86.aesni.aesdeclast.256 + return _mm256_aesdeclast_epi128(__A, __B); +} + +#ifdef AVX512 +__m512i test_mm512_aesenc_epi128(__m512i __A, __m512i __B) { + // AVX512-LABEL: @test_mm512_aesenc_epi128 + // AVX512: @llvm.x86.aesni.aesenc.512 + return _mm512_aesenc_epi128(__A, __B); +} + +__m512i test_mm512_aesenclast_epi128(__m512i __A, __m512i __B) { + // AVX512-LABEL: @test_mm512_aesenclast_epi128 + // AVX512: @llvm.x86.aesni.aesenclast.512 + return _mm512_aesenclast_epi128(__A, __B); +} + +__m512i test_mm512_aesdec_epi128(__m512i __A, __m512i __B) { + // AVX512-LABEL: @test_mm512_aesdec_epi128 + // AVX512: @llvm.x86.aesni.aesdec.512 + return _mm512_aesdec_epi128(__A, __B); +} + +__m512i test_mm512_aesdeclast_epi128(__m512i __A, __m512i __B) { + // AVX512-LABEL: @test_mm512_aesdeclast_epi128 + // AVX512: @llvm.x86.aesni.aesdeclast.512 + return _mm512_aesdeclast_epi128(__A, __B); +} +#endif + diff --git a/test/CodeGen/vpclmulqdq-builtins.c b/test/CodeGen/vpclmulqdq-builtins.c new file mode 100644 index 0000000..8c610e2 --- /dev/null +++ b/test/CodeGen/vpclmulqdq-builtins.c @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vpclmulqdq -emit-llvm -o - | FileCheck %s --check-prefix AVX +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -DAVX512 -target-feature +vpclmulqdq -target-feature +avx512f -emit-llvm -o - | FileCheck %s --check-prefixes AVX,AVX512 + +#include + +__m256i test_mm256_clmulepi64_epi128(__m256i A, __m256i B) { + // AVX: @llvm.x86.pclmulqdq.256 + return _mm256_clmulepi64_epi128(A, B, 0); +} + +#ifdef AVX512 +__m512i test_mm512_clmulepi64_epi128(__m512i A, __m512i B) { + // AVX512: @llvm.x86.pclmulqdq.512 + return _mm512_clmulepi64_epi128(A, B, 0); +} +#endif + diff --git a/test/CodeGenCXX/ubsan-vtable-checks.cpp b/test/CodeGenCXX/ubsan-vtable-checks.cpp index 5e17913..090707c 100644 --- a/test/CodeGenCXX/ubsan-vtable-checks.cpp +++ b/test/CodeGenCXX/ubsan-vtable-checks.cpp @@ -38,3 +38,15 @@ void delete_it(T *t) { // CHECK-VPTR: load {{.*}} (%struct.T*{{.*}})**, {{.*}} (%struct.T*{{.*}})*** delete t; } + +// ITANIUM: define %struct.U* @_Z7dyncastP1T +// MSABI: define %struct.U* @"\01?dyncast +U* dyncast(T *t) { + // First, we check that dynamic_cast is not called before a type check. + // CHECK-VPTR-NOT: call i8* @__{{dynamic_cast|RTDynamicCast}} + // CHECK-VPTR: br i1 {{.*}} label %{{.*}} + // CHECK-VPTR: call void @__ubsan_handle_dynamic_type_cache_miss_abort + // Second, we check that dynamic_cast is actually called once the type check is done. + // CHECK-VPTR: call i8* @__{{dynamic_cast|RTDynamicCast}} + return dynamic_cast(t); +} diff --git a/test/Driver/x86-target-features.c b/test/Driver/x86-target-features.c index c552ef7..1289823 100644 --- a/test/Driver/x86-target-features.c +++ b/test/Driver/x86-target-features.c @@ -20,10 +20,10 @@ // SSE4-AES: "-target-feature" "+sse4.2" "-target-feature" "+aes" // NO-SSE4-AES: "-target-feature" "-sse4.1" "-target-feature" "-aes" -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx -mavx2 -mavx512f -mavx512cd -mavx512er -mavx512pf -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512ifma %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX %s -// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx -mno-avx2 -mno-avx512f -mno-avx512cd -mno-avx512er -mno-avx512pf -mno-avx512dq -mno-avx512bw -mno-avx512vl -mno-avx512vbmi -mno-avx512ifma %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX %s -// AVX: "-target-feature" "+avx" "-target-feature" "+avx2" "-target-feature" "+avx512f" "-target-feature" "+avx512cd" "-target-feature" "+avx512er" "-target-feature" "+avx512pf" "-target-feature" "+avx512dq" "-target-feature" "+avx512bw" "-target-feature" "+avx512vl" "-target-feature" "+avx512vbmi" "-target-feature" "+avx512ifma" -// NO-AVX: "-target-feature" "-avx" "-target-feature" "-avx2" "-target-feature" "-avx512f" "-target-feature" "-avx512cd" "-target-feature" "-avx512er" "-target-feature" "-avx512pf" "-target-feature" "-avx512dq" "-target-feature" "-avx512bw" "-target-feature" "-avx512vl" "-target-feature" "-avx512vbmi" "-target-feature" "-avx512ifma" +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx -mavx2 -mavx512f -mavx512cd -mavx512er -mavx512pf -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx -mno-avx2 -mno-avx512f -mno-avx512cd -mno-avx512er -mno-avx512pf -mno-avx512dq -mno-avx512bw -mno-avx512vl -mno-avx512vbmi -mno-avx512vbmi2 -mno-avx512ifma %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX %s +// AVX: "-target-feature" "+avx" "-target-feature" "+avx2" "-target-feature" "+avx512f" "-target-feature" "+avx512cd" "-target-feature" "+avx512er" "-target-feature" "+avx512pf" "-target-feature" "+avx512dq" "-target-feature" "+avx512bw" "-target-feature" "+avx512vl" "-target-feature" "+avx512vbmi" "-target-feature" "+avx512vbmi2" "-target-feature" "+avx512ifma" +// NO-AVX: "-target-feature" "-avx" "-target-feature" "-avx2" "-target-feature" "-avx512f" "-target-feature" "-avx512cd" "-target-feature" "-avx512er" "-target-feature" "-avx512pf" "-target-feature" "-avx512dq" "-target-feature" "-avx512bw" "-target-feature" "-avx512vl" "-target-feature" "-avx512vbmi" "-target-feature" "-avx512vbmi2" "-target-feature" "-avx512ifma" // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mpclmul -mrdrnd -mfsgsbase -mbmi -mbmi2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=BMI %s // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-pclmul -mno-rdrnd -mno-fsgsbase -mno-bmi -mno-bmi2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-BMI %s @@ -94,3 +94,34 @@ // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-clzero %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CLZERO %s // CLZERO: "-target-feature" "+clzero" // NO-CLZERO: "-target-feature" "-clzero" + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mvaes %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VAES %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-vaes %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VAES %s +// VAES: "-target-feature" "+vaes" +// NO-VAES: "-target-feature" "-vaes" + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mgfni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=GFNI %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-gfni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-GFNI %s +// GFNI: "-target-feature" "+gfni" +// NO-GFNI: "-target-feature" "-gfni + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mvpclmulqdq %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VPCLMULQDQ %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-vpclmulqdq %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VPCLMULQDQ %s +// VPCLMULQDQ: "-target-feature" "+vpclmulqdq" +// NO-VPCLMULQDQ: "-target-feature" "-vpclmulqdq" + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512bitalg %s -### -o %t.o 2>&1 | FileCheck -check-prefix=BITALG %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512bitalg %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-BITALG %s +// BITALG: "-target-feature" "+avx512bitalg" +// NO-BITALG: "-target-feature" "-avx512bitalg" + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512vnni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VNNI %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512vnni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VNNI %s +// VNNI: "-target-feature" "+avx512vnni" +// NO-VNNI: "-target-feature" "-avx512vnni" + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512vbmi2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VBMI2 %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512vbmi2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VBMI2 %s +// VBMI2: "-target-feature" "+avx512vbmi2" +// NO-VBMI2: "-target-feature" "-avx512vbmi2" + diff --git a/test/Index/skipped-bodies-ctors.cpp b/test/Index/skipped-bodies-ctors.cpp new file mode 100644 index 0000000..8a559ee --- /dev/null +++ b/test/Index/skipped-bodies-ctors.cpp @@ -0,0 +1,16 @@ +// RUN: env CINDEXTEST_SKIP_FUNCTION_BODIES=1 c-index-test -test-load-source all %s 2>&1 \ +// RUN: | FileCheck --implicit-check-not "error:" %s + + +template +struct Foo { + template + Foo(int &a) : a(a) { + } + + int &a; +}; + + +int bar = Foo(bar).a + Foo(bar).a; +// CHECK-NOT: error: constructor for 'Foo' must explicitly initialize the reference diff --git a/test/OpenMP/nvptx_data_sharing.cpp b/test/OpenMP/nvptx_data_sharing.cpp index 65215cd..53bac3c 100644 --- a/test/OpenMP/nvptx_data_sharing.cpp +++ b/test/OpenMP/nvptx_data_sharing.cpp @@ -24,7 +24,7 @@ void test_ds(){ // CK1: define internal void @__omp_offloading_{{.*}}test_ds{{.*}}worker() [[ATTR1:#.*]] { // CK1: [[SHAREDARGS:%.+]] = alloca i8** -// CK1: call i1 @__kmpc_kernel_parallel(i8** %work_fn, i8*** [[SHAREDARGS]]) +// CK1: call i1 @__kmpc_kernel_parallel(i8** %work_fn, i8*** [[SHAREDARGS]], i16 1) // CK1: [[SHARGSTMP:%.+]] = load i8**, i8*** [[SHAREDARGS]] // CK1: call void @__omp_outlined___wrapper{{.*}}({{.*}}, i8** [[SHARGSTMP]]) @@ -32,7 +32,7 @@ void test_ds(){ // CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}() [[ATTR2:#.*]] { // CK1: [[SHAREDARGS1:%.+]] = alloca i8** -// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i8*** [[SHAREDARGS1]], i32 1) +// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i8*** [[SHAREDARGS1]], i32 1, i16 1) // CK1: [[SHARGSTMP1:%.+]] = load i8**, i8*** [[SHAREDARGS1]] // CK1: [[SHARGSTMP2:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP1]] // CK1: [[SHAREDVAR:%.+]] = bitcast i32* {{.*}} to i8* diff --git a/test/OpenMP/nvptx_target_teams_codegen.cpp b/test/OpenMP/nvptx_target_teams_codegen.cpp index b79fd18..8b016cc 100644 --- a/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -60,7 +60,7 @@ int bar(int n){ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @llvm.nvvm.barrier0() - // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i8*** %shared_args) + // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i8*** %shared_args, i16 1) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], @@ -148,7 +148,7 @@ int bar(int n){ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @llvm.nvvm.barrier0() - // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i8*** %shared_args) + // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i8*** %shared_args, i16 1) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], diff --git a/test/OpenMP/target_enter_data_depend_codegen.cpp b/test/OpenMP/target_enter_data_depend_codegen.cpp new file mode 100644 index 0000000..469a161 --- /dev/null +++ b/test/OpenMP/target_enter_data_depend_codegen.cpp @@ -0,0 +1,378 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 + +// expected-no-diagnostics +// CK1: [[ST:%.+]] = type { i32, double* } +// CK1: %struct.kmp_depend_info = type { i[[sz:64|32]], +// CK1-SAME: i[[sz]], i8 } +#ifndef HEADER +#define HEADER + +template +struct ST { + T a; + double *b; +}; + +ST gb; +double gc[100]; + +// CK1: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 800] +// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 32] + +// CK1: [[SIZE02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 4] +// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i64] [i64 33] + +// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 32] + +// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] 24] +// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17] + +// CK1-LABEL: _Z3fooi +void foo(int arg) { + int la; + float lb[arg]; + + // CK1: alloca [1 x %struct.kmp_depend_info], + // CK1: alloca [3 x %struct.kmp_depend_info], + // CK1: alloca [4 x %struct.kmp_depend_info], + // CK1: alloca [5 x %struct.kmp_depend_info], + + // Region 00 + // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to [100 x double]** + // CK1: store [100 x double]* @gc, [100 x double]** [[BP0_BC]], + // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0 + // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to [100 x double]** + // CK1: store [100 x double]* @gc, [100 x double]** [[P0_BC]], + // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0 + // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0 + // CK1: [[CAP_DEVICE:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[CAPTURES:%.+]], i32 0, i32 0 + // CK1: [[DEVICE:%.+]] = load i32, i32* %{{.+}} + // CK1: store i32 [[DEVICE]], i32* [[CAP_DEVICE]], + // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*)) + // CK1: [[BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates* + // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 0 + // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0 + // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]], + // CK1: [[BC1:%.+]] = bitcast %struct.anon* [[CAPTURES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[SHAREDS_REF]], i8* [[BC1]], i[[sz]] 4, i32 4, i1 false) + // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 1 + // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 0 + // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* + // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 1 + // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* + // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 2 + // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([1 x i[[sz]]]* [[SIZE00]] to i8*), i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[DEP:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0 + // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8* + // CK1: = call i32 @__kmpc_omp_task_with_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i8* [[BC]], i32 0, i8* null) + + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + #pragma omp target enter data if(1+3-5) device(arg) map(alloc:gc) nowait depend(in: arg) + {++arg;} + + // Region 01 + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + #pragma omp target enter data map(to:la) if(1+3-4) depend(in: la) depend(out: arg) + {++arg;} + + // Region 02 + // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] + // CK1: [[IFTHEN]] + // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to i32** + // CK1: store i32* [[ARG:%.+]], i32** [[BP0_BC]], + // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0 + // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to i32** + // CK1: store i32* [[ARG]], i32** [[P0_BC]], + // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0 + // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0 + // CK1: [[IF_DEVICE:%.+]] = getelementptr inbounds %struct.anon{{.+}}, %struct.anon{{.+}}* [[CAPTURES:%.+]], i32 0, i32 0 + // CK1: [[IF:%.+]] = load i8, i8* %{{.+}} + // CK1: [[IF_BOOL:%.+]] = trunc i8 [[IF]] to i1 + // CK1: [[IF:%.+]] = zext i1 [[IF_BOOL]] to i8 + // CK1: store i8 [[IF]], i8* [[IF_DEVICE]], + // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*)) + // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}* + // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0 + // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0 + // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]], + // CK1: [[BC1:%.+]] = bitcast %struct.anon{{.+}}* [[CAPTURES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[SHAREDS_REF]], i8* [[BC1]], i[[sz]] 1, i32 1, i1 false) + // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1 + // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 + // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* + // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 + // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* + // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 + // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([1 x i[[sz]]]* [[SIZE02]] to i8*), i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0 + // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8* + // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 3, i8* [[BC]], i32 0, i8* null) + // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + // CK1: = call i32 [[TASK_ENTRY2]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]]) + // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + + // CK1: br label %[[IFEND:[^,]+]] + + // CK1: [[IFELSE]] + // CK1: br label %[[IFEND]] + // CK1: [[IFEND]] + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + #pragma omp target enter data map(to:arg) if(arg) device(4) depend(inout: arg, la, gc) + {++arg;} + + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + {++arg;} + + // Region 03 + // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to float** + // CK1: store float* [[VLA:%.+]], float** [[BP0_BC]], + // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0 + // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to float** + // CK1: store float* [[VLA]], float** [[P0_BC]], + // CK1: [[S0:%.+]] = getelementptr inbounds [1 x i[[sz]]], [1 x i[[sz]]]* [[S:%.+]], i32 0, i32 0 + // CK1: store i[[sz]] {{.+}}, i[[sz]]* [[S0]], + // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0 + // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0 + // CK1: [[GEPS0:%.+]] = getelementptr inbounds [1 x i[[sz]]], [1 x i[[sz]]]* [[S]], i32 0, i32 0 + // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*)) + // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}* + // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0 + // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1 + // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 + // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* + // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 + // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* + // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 + // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* + // CK1: [[BC_SIZES:%.+]] = bitcast i[[sz]]* [[GEPS0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* [[BC_SIZES]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 3 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0 + // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8* + // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 4, i8* [[BC]], i32 0, i8* null) + // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + // CK1: = call i32 [[TASK_ENTRY3]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]]) + // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + #pragma omp target enter data map(alloc:lb) depend(out: lb, arg, la, gc) + {++arg;} + + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + {++arg;} + + // Region 04 + // CK1: [[BP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to %struct.ST** + // CK1: store %struct.ST* @gb, %struct.ST** [[BP0_BC]], + // CK1: [[P0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0 + // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to double*** + // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[P0_BC]], + // CK1: [[BP1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1 + // CK1: [[BP1_BC:%.+]] = bitcast i8** [[BP1]] to double*** + // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[BP1_BC]], + // CK1: [[P1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1 + // CK1: [[P1_BC:%.+]] = bitcast i8** [[P1]] to double** + // CK1: store double* %{{.+}}, double** [[P1_BC]], + // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0 + // CK1: [[GEPP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0 + // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{88|44}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*)) + // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}* + // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0 + // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1 + // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 + // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_BASEPTRS]] to i8* + // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 + // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_PTRS]] to i8* + // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 + // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [2 x i[[sz]]]* [[PRIVS_SIZES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([2 x i[[sz]]]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint double* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 3 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 4 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0 + // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8* + // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 5, i8* [[BC]], i32 0, i8* null) + // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + // CK1: = call i32 [[TASK_ENTRY4]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]]) + // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + #pragma omp target enter data map(to:gb.b[:3]) depend(in: gb.b[:3], la, lb, gc, arg) + {++arg;} +} + +// CK1: define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, %struct.kmp_task_t_with_privates* noalias) +// CK1-DAG: call void @__tgt_target_data_begin_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) +// CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64 +// CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}}, +// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] +// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] +// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] +// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]], +// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]], +// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]], +// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]]) +// CK1: ret i32 0 +// CK1: } + +// CK1: define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias) +// CK1-DAG: call void @__tgt_target_data_begin(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) +// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] +// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] +// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] +// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]], +// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]], +// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]], +// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]]) +// CK1: ret i32 0 +// CK1: } + +// CK1: define internal{{.*}} i32 [[TASK_ENTRY3]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias) +// CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) +// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] +// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] +// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] + +// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]], +// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]], +// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]], +// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]]) +// CK1-NOT: __tgt_target_data_end +// CK1: ret i32 0 +// CK1: } + +// CK1: define internal{{.*}} i32 [[TASK_ENTRY4]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias) +// CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}) +// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] +// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] +// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] + +// CK1-DAG: [[BP]] = load [2 x i8*]*, [2 x i8*]** [[BP_PRIV:%.+]], +// CK1-DAG: [[P]] = load [2 x i8*]*, [2 x i8*]** [[P_PRIV:%.+]], +// CK1-DAG: [[S]] = load [2 x i[[sz]]]*, [2 x i[[sz]]]** [[S_PRIV:%.+]], +// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i[[sz]]]** [[S_PRIV]]) +// CK1-NOT: __tgt_target_data_end +// CK1: ret i32 0 +// CK1: } + +#endif diff --git a/test/OpenMP/target_exit_data_depend_codegen.cpp b/test/OpenMP/target_exit_data_depend_codegen.cpp new file mode 100644 index 0000000..6f7fba2 --- /dev/null +++ b/test/OpenMP/target_exit_data_depend_codegen.cpp @@ -0,0 +1,378 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 + +// expected-no-diagnostics +// CK1: [[ST:%.+]] = type { i32, double* } +// CK1: %struct.kmp_depend_info = type { i[[sz:64|32]], +// CK1-SAME: i[[sz]], i8 } +#ifndef HEADER +#define HEADER + +template +struct ST { + T a; + double *b; +}; + +ST gb; +double gc[100]; + +// CK1: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 800] +// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 34] + +// CK1: [[SIZE02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 4] +// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i64] [i64 40] + +// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 34] + +// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] 24] +// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 34, i64 18] + +// CK1-LABEL: _Z3fooi +void foo(int arg) { + int la; + float lb[arg]; + + // CK1: alloca [1 x %struct.kmp_depend_info], + // CK1: alloca [3 x %struct.kmp_depend_info], + // CK1: alloca [4 x %struct.kmp_depend_info], + // CK1: alloca [5 x %struct.kmp_depend_info], + + // Region 00 + // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to [100 x double]** + // CK1: store [100 x double]* @gc, [100 x double]** [[BP0_BC]], + // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0 + // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to [100 x double]** + // CK1: store [100 x double]* @gc, [100 x double]** [[P0_BC]], + // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0 + // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0 + // CK1: [[CAP_DEVICE:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[CAPTURES:%.+]], i32 0, i32 0 + // CK1: [[DEVICE:%.+]] = load i32, i32* %{{.+}} + // CK1: store i32 [[DEVICE]], i32* [[CAP_DEVICE]], + // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*)) + // CK1: [[BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates* + // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 0 + // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0 + // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]], + // CK1: [[BC1:%.+]] = bitcast %struct.anon* [[CAPTURES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[SHAREDS_REF]], i8* [[BC1]], i[[sz]] 4, i32 4, i1 false) + // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 1 + // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 0 + // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* + // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 1 + // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* + // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 2 + // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([1 x i[[sz]]]* [[SIZE00]] to i8*), i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[DEP:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0 + // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8* + // CK1: = call i32 @__kmpc_omp_task_with_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i8* [[BC]], i32 0, i8* null) + + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + #pragma omp target exit data if(1+3-5) device(arg) map(from:gc) nowait depend(in: arg) + {++arg;} + + // Region 01 + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + #pragma omp target exit data map(release: la) if(1+3-4) depend(in: la) depend(out: arg) + {++arg;} + + // Region 02 + // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] + // CK1: [[IFTHEN]] + // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to i32** + // CK1: store i32* [[ARG:%.+]], i32** [[BP0_BC]], + // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0 + // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to i32** + // CK1: store i32* [[ARG]], i32** [[P0_BC]], + // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0 + // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0 + // CK1: [[IF_DEVICE:%.+]] = getelementptr inbounds %struct.anon{{.+}}, %struct.anon{{.+}}* [[CAPTURES:%.+]], i32 0, i32 0 + // CK1: [[IF:%.+]] = load i8, i8* %{{.+}} + // CK1: [[IF_BOOL:%.+]] = trunc i8 [[IF]] to i1 + // CK1: [[IF:%.+]] = zext i1 [[IF_BOOL]] to i8 + // CK1: store i8 [[IF]], i8* [[IF_DEVICE]], + // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*)) + // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}* + // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0 + // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0 + // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]], + // CK1: [[BC1:%.+]] = bitcast %struct.anon{{.+}}* [[CAPTURES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[SHAREDS_REF]], i8* [[BC1]], i[[sz]] 1, i32 1, i1 false) + // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1 + // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 + // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* + // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 + // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* + // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 + // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([1 x i[[sz]]]* [[SIZE02]] to i8*), i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0 + // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8* + // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 3, i8* [[BC]], i32 0, i8* null) + // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + // CK1: = call i32 [[TASK_ENTRY2]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]]) + // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + + // CK1: br label %[[IFEND:[^,]+]] + + // CK1: [[IFELSE]] + // CK1: br label %[[IFEND]] + // CK1: [[IFEND]] + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + #pragma omp target exit data map(delete: arg) if(arg) device(4) depend(inout: arg, la, gc) + {++arg;} + + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + {++arg;} + + // Region 03 + // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to float** + // CK1: store float* [[VLA:%.+]], float** [[BP0_BC]], + // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0 + // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to float** + // CK1: store float* [[VLA]], float** [[P0_BC]], + // CK1: [[S0:%.+]] = getelementptr inbounds [1 x i[[sz]]], [1 x i[[sz]]]* [[S:%.+]], i32 0, i32 0 + // CK1: store i[[sz]] {{.+}}, i[[sz]]* [[S0]], + // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0 + // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0 + // CK1: [[GEPS0:%.+]] = getelementptr inbounds [1 x i[[sz]]], [1 x i[[sz]]]* [[S]], i32 0, i32 0 + // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*)) + // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}* + // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0 + // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1 + // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 + // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* + // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 + // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* + // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 + // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* + // CK1: [[BC_SIZES:%.+]] = bitcast i[[sz]]* [[GEPS0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* [[BC_SIZES]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 3 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0 + // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8* + // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 4, i8* [[BC]], i32 0, i8* null) + // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + // CK1: = call i32 [[TASK_ENTRY3]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]]) + // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + #pragma omp target exit data map(from:lb) depend(out: lb, arg, la, gc) + {++arg;} + + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + {++arg;} + + // Region 04 + // CK1: [[BP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to %struct.ST** + // CK1: store %struct.ST* @gb, %struct.ST** [[BP0_BC]], + // CK1: [[P0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0 + // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to double*** + // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[P0_BC]], + // CK1: [[BP1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1 + // CK1: [[BP1_BC:%.+]] = bitcast i8** [[BP1]] to double*** + // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[BP1_BC]], + // CK1: [[P1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1 + // CK1: [[P1_BC:%.+]] = bitcast i8** [[P1]] to double** + // CK1: store double* %{{.+}}, double** [[P1_BC]], + // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0 + // CK1: [[GEPP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0 + // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{88|44}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*)) + // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}* + // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0 + // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1 + // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 + // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_BASEPTRS]] to i8* + // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 + // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_PTRS]] to i8* + // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 + // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [2 x i[[sz]]]* [[PRIVS_SIZES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([2 x i[[sz]]]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint double* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 3 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 4 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0 + // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8* + // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 5, i8* [[BC]], i32 0, i8* null) + // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + // CK1: = call i32 [[TASK_ENTRY4]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]]) + // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + #pragma omp target exit data map(from:gb.b[:3]) depend(in: gb.b[:3], la, lb, gc, arg) + {++arg;} +} + +// CK1: define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, %struct.kmp_task_t_with_privates* noalias) +// CK1-DAG: call void @__tgt_target_data_end_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) +// CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64 +// CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}}, +// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] +// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] +// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] +// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]], +// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]], +// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]], +// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]]) +// CK1: ret i32 0 +// CK1: } + +// CK1: define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias) +// CK1-DAG: call void @__tgt_target_data_end(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) +// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] +// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] +// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] +// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]], +// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]], +// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]], +// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]]) +// CK1: ret i32 0 +// CK1: } + +// CK1: define internal{{.*}} i32 [[TASK_ENTRY3]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias) +// CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) +// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] +// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] +// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] + +// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]], +// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]], +// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]], +// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]]) +// CK1-NOT: __tgt_target_data_end +// CK1: ret i32 0 +// CK1: } + +// CK1: define internal{{.*}} i32 [[TASK_ENTRY4]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias) +// CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}) +// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] +// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] +// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] + +// CK1-DAG: [[BP]] = load [2 x i8*]*, [2 x i8*]** [[BP_PRIV:%.+]], +// CK1-DAG: [[P]] = load [2 x i8*]*, [2 x i8*]** [[P_PRIV:%.+]], +// CK1-DAG: [[S]] = load [2 x i[[sz]]]*, [2 x i[[sz]]]** [[S_PRIV:%.+]], +// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i[[sz]]]** [[S_PRIV]]) +// CK1-NOT: __tgt_target_data_end +// CK1: ret i32 0 +// CK1: } + +#endif diff --git a/test/OpenMP/target_update_depend_codegen.cpp b/test/OpenMP/target_update_depend_codegen.cpp new file mode 100644 index 0000000..6542aa1 --- /dev/null +++ b/test/OpenMP/target_update_depend_codegen.cpp @@ -0,0 +1,378 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 + +// expected-no-diagnostics +// CK1: [[ST:%.+]] = type { i32, double* } +// CK1: %struct.kmp_depend_info = type { i[[sz:64|32]], +// CK1-SAME: i[[sz]], i8 } +#ifndef HEADER +#define HEADER + +template +struct ST { + T a; + double *b; +}; + +ST gb; +double gc[100]; + +// CK1: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 800] +// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 34] + +// CK1: [[SIZE02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 4] +// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i64] [i64 33] + +// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 34] + +// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] 24] +// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17] + +// CK1-LABEL: _Z3fooi +void foo(int arg) { + int la; + float lb[arg]; + + // CK1: alloca [1 x %struct.kmp_depend_info], + // CK1: alloca [3 x %struct.kmp_depend_info], + // CK1: alloca [4 x %struct.kmp_depend_info], + // CK1: alloca [5 x %struct.kmp_depend_info], + + // Region 00 + // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to [100 x double]** + // CK1: store [100 x double]* @gc, [100 x double]** [[BP0_BC]], + // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0 + // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to [100 x double]** + // CK1: store [100 x double]* @gc, [100 x double]** [[P0_BC]], + // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0 + // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0 + // CK1: [[CAP_DEVICE:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[CAPTURES:%.+]], i32 0, i32 0 + // CK1: [[DEVICE:%.+]] = load i32, i32* %{{.+}} + // CK1: store i32 [[DEVICE]], i32* [[CAP_DEVICE]], + // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*)) + // CK1: [[BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates* + // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 0 + // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0 + // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]], + // CK1: [[BC1:%.+]] = bitcast %struct.anon* [[CAPTURES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[SHAREDS_REF]], i8* [[BC1]], i[[sz]] 4, i32 4, i1 false) + // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 1 + // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 0 + // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* + // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 1 + // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* + // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t, %struct..kmp_privates.t* [[PRIVS]], i32 0, i32 2 + // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([1 x i[[sz]]]* [[SIZE00]] to i8*), i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[DEP:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0 + // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8* + // CK1: = call i32 @__kmpc_omp_task_with_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i8* [[BC]], i32 0, i8* null) + + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + #pragma omp target update if(1+3-5) device(arg) from(gc) nowait depend(in: arg) + {++arg;} + + // Region 01 + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + #pragma omp target update to(la) if(1+3-4) depend(in: la) depend(out: arg) + {++arg;} + + // Region 02 + // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] + // CK1: [[IFTHEN]] + // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to i32** + // CK1: store i32* [[ARG:%.+]], i32** [[BP0_BC]], + // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0 + // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to i32** + // CK1: store i32* [[ARG]], i32** [[P0_BC]], + // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0 + // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0 + // CK1: [[IF_DEVICE:%.+]] = getelementptr inbounds %struct.anon{{.+}}, %struct.anon{{.+}}* [[CAPTURES:%.+]], i32 0, i32 0 + // CK1: [[IF:%.+]] = load i8, i8* %{{.+}} + // CK1: [[IF_BOOL:%.+]] = trunc i8 [[IF]] to i1 + // CK1: [[IF:%.+]] = zext i1 [[IF_BOOL]] to i8 + // CK1: store i8 [[IF]], i8* [[IF_DEVICE]], + // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*)) + // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}* + // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0 + // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0 + // CK1: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS]], + // CK1: [[BC1:%.+]] = bitcast %struct.anon{{.+}}* [[CAPTURES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[SHAREDS_REF]], i8* [[BC1]], i[[sz]] 1, i32 1, i1 false) + // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1 + // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 + // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* + // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 + // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* + // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 + // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([1 x i[[sz]]]* [[SIZE02]] to i8*), i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0 + // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8* + // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 3, i8* [[BC]], i32 0, i8* null) + // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + // CK1: = call i32 [[TASK_ENTRY2]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]]) + // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + + // CK1: br label %[[IFEND:[^,]+]] + + // CK1: [[IFELSE]] + // CK1: br label %[[IFEND]] + // CK1: [[IFEND]] + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + #pragma omp target update to(arg) if(arg) device(4) depend(inout: arg, la, gc) + {++arg;} + + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + {++arg;} + + // Region 03 + // CK1: [[BP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to float** + // CK1: store float* [[VLA:%.+]], float** [[BP0_BC]], + // CK1: [[P0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P:%.+]], i32 0, i32 0 + // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to float** + // CK1: store float* [[VLA]], float** [[P0_BC]], + // CK1: [[S0:%.+]] = getelementptr inbounds [1 x i[[sz]]], [1 x i[[sz]]]* [[S:%.+]], i32 0, i32 0 + // CK1: store i[[sz]] {{.+}}, i[[sz]]* [[S0]], + // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0 + // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0 + // CK1: [[GEPS0:%.+]] = getelementptr inbounds [1 x i[[sz]]], [1 x i[[sz]]]* [[S]], i32 0, i32 0 + // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] [[sz]], i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*)) + // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}* + // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0 + // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1 + // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 + // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_BASEPTRS]] to i8* + // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 + // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [1 x i8*]* [[PRIVS_PTRS]] to i8* + // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 + // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [1 x i[[sz]]]* [[PRIVS_SIZES]] to i8* + // CK1: [[BC_SIZES:%.+]] = bitcast i[[sz]]* [[GEPS0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* [[BC_SIZES]], i[[sz]] {{8|4}}, i32 {{8|4}}, i1 false) + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 3 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 3, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0 + // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8* + // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 4, i8* [[BC]], i32 0, i8* null) + // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + // CK1: = call i32 [[TASK_ENTRY3]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]]) + // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + #pragma omp target update from(lb) depend(out: lb, arg, la, gc) + {++arg;} + + // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 + {++arg;} + + // Region 04 + // CK1: [[BP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CK1: [[BP0_BC:%.+]] = bitcast i8** [[BP0]] to %struct.ST** + // CK1: store %struct.ST* @gb, %struct.ST** [[BP0_BC]], + // CK1: [[P0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0 + // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to double*** + // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[P0_BC]], + // CK1: [[BP1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1 + // CK1: [[BP1_BC:%.+]] = bitcast i8** [[BP1]] to double*** + // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[BP1_BC]], + // CK1: [[P1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1 + // CK1: [[P1_BC:%.+]] = bitcast i8** [[P1]] to double** + // CK1: store double* %{{.+}}, double** [[P1_BC]], + // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0 + // CK1: [[GEPP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0 + // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{88|44}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*)) + // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}* + // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0 + // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1 + // CK1: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0 + // CK1: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_BASEPTRS]] to i8* + // CK1: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_BASEPTRS]], i8* [[BC_BASEPTRS]], i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_PTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1 + // CK1: [[BC_PRIVS_PTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_PTRS]] to i8* + // CK1: [[BC_PTRS:%.+]] = bitcast i8** [[GEPP0]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_PTRS]], i8* [[BC_PTRS]], i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2 + // CK1: [[BC_PRIVS_SIZES:%.+]] = bitcast [2 x i[[sz]]]* [[PRIVS_SIZES]] to i8* + // CK1: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* [[BC_PRIVS_SIZES]], i8* bitcast ([2 x i[[sz]]]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i32 {{8|4}}, i1 false) + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP:%.+]], i[[sz]] 0, i[[sz]] 0 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint double* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 1 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 2 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint float* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] %{{.+}}, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 3 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: store i[[sz]] ptrtoint ([100 x double]* @gc to i[[sz]]), i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 800, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i[[sz]] 0, i[[sz]] 4 + // CK1: [[DEP_ADR:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 0 + // CK1: [[BC_ADR:%.+]] = ptrtoint i32* %{{.+}} to i[[sz]] + // CK1: store i[[sz]] [[BC_ADR]], i[[sz]]* [[DEP_ADR]], + // CK1: [[DEP_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 1 + // CK1: store i[[sz]] 4, i[[sz]]* [[DEP_SIZE]], + // CK1: [[DEP_ATTRS:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP]], i32 0, i32 2 + // CK1: store i8 1, i8* [[DEP_ATTRS]] + // CK1: [[DEP:%.+]] = getelementptr inbounds [5 x %struct.kmp_depend_info], [5 x %struct.kmp_depend_info]* [[MAIN_DEP]], i32 0, i32 0 + // CK1: [[BC:%.+]] = bitcast %struct.kmp_depend_info* [[DEP]] to i8* + // CK1: call void @__kmpc_omp_wait_deps(%ident_t* @{{.+}}, i32 %{{.+}}, i32 5, i8* [[BC]], i32 0, i8* null) + // CK1: call void @__kmpc_omp_task_begin_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + // CK1: = call i32 [[TASK_ENTRY4]](i32 %{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]]) + // CK1: call void @__kmpc_omp_task_complete_if0(%ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]]) + #pragma omp target update to(gb.b[:3]) depend(in: gb.b[:3], la, lb, gc, arg) + {++arg;} +} + +// CK1: define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, %struct.kmp_task_t_with_privates* noalias) +// CK1-DAG: call void @__tgt_target_data_update_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) +// CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64 +// CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}}, +// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] +// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] +// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] +// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]], +// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]], +// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]], +// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]]) +// CK1: ret i32 0 +// CK1: } + +// CK1: define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias) +// CK1-DAG: call void @__tgt_target_data_update(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) +// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] +// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] +// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] +// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]], +// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]], +// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]], +// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]]) +// CK1: ret i32 0 +// CK1: } + +// CK1: define internal{{.*}} i32 [[TASK_ENTRY3]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias) +// CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) +// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] +// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] +// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] + +// CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%.+]], +// CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%.+]], +// CK1-DAG: [[S]] = load [1 x i[[sz]]]*, [1 x i[[sz]]]** [[S_PRIV:%.+]], +// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i[[sz]]]** [[S_PRIV]]) +// CK1-NOT: __tgt_target_data_end +// CK1: ret i32 0 +// CK1: } + +// CK1: define internal{{.*}} i32 [[TASK_ENTRY4]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias) +// CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}) +// CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] +// CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] +// CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] + +// CK1-DAG: [[BP]] = load [2 x i8*]*, [2 x i8*]** [[BP_PRIV:%.+]], +// CK1-DAG: [[P]] = load [2 x i8*]*, [2 x i8*]** [[P_PRIV:%.+]], +// CK1-DAG: [[S]] = load [2 x i[[sz]]]*, [2 x i[[sz]]]** [[S_PRIV:%.+]], +// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i[[sz]]]** [[S_PRIV]]) +// CK1-NOT: __tgt_target_data_end +// CK1: ret i32 0 +// CK1: } + +#endif diff --git a/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp index 304cc8b..5aed5b1 100644 --- a/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp index 2576241..aa7b933 100644 --- a/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER diff --git a/test/Parser/c2x-attributes.c b/test/Parser/c2x-attributes.c index 1be69f1..f261dee 100644 --- a/test/Parser/c2x-attributes.c +++ b/test/Parser/c2x-attributes.c @@ -7,7 +7,7 @@ enum [[]] E { }; enum [[]] { Four }; -[[]] enum E2 { Five }; // expected-error {{an attribute list cannot appear here}} +[[]] enum E2 { Five }; // expected-error {{misplaced attributes}} // FIXME: this diagnostic can be improved. enum { [[]] Six }; // expected-error {{expected identifier}} @@ -24,7 +24,7 @@ struct [[]] S1 { int o [[]] : 12; }; -[[]] struct S2 { int a; }; // expected-error {{an attribute list cannot appear here}} +[[]] struct S2 { int a; }; // expected-error {{misplaced attributes}} struct S3 [[]] { int a; }; // expected-error {{an attribute list cannot appear here}} union [[]] U { @@ -32,7 +32,7 @@ union [[]] U { [[]] int i; }; -[[]] union U2 { double d; }; // expected-error {{an attribute list cannot appear here}} +[[]] union U2 { double d; }; // expected-error {{misplaced attributes}} union U3 [[]] { double d; }; // expected-error {{an attribute list cannot appear here}} struct [[]] IncompleteStruct; diff --git a/test/Parser/cxx-decl.cpp b/test/Parser/cxx-decl.cpp index 58ad080..1a24520 100644 --- a/test/Parser/cxx-decl.cpp +++ b/test/Parser/cxx-decl.cpp @@ -199,7 +199,7 @@ namespace PR15017 { // expected-error@-2 {{expected expression}} // expected-error@-3 {{expected unqualified-id}} #else -// expected-error@-5 {{an attribute list cannot appear here}} +// expected-error@-5 {{misplaced attributes}} #endif namespace test7 { diff --git a/test/Parser/cxx0x-attributes.cpp b/test/Parser/cxx0x-attributes.cpp index 4e3a2e4..e01491d 100644 --- a/test/Parser/cxx0x-attributes.cpp +++ b/test/Parser/cxx0x-attributes.cpp @@ -64,6 +64,13 @@ struct MemberFnOrder { struct [[]] struct_attr; class [[]] class_attr {}; union [[]] union_attr; +enum [[]] E { }; +namespace test_misplacement { +[[]] struct struct_attr2; //expected-error{{misplaced attributes}} +[[]] class class_attr2; //expected-error{{misplaced attributes}} +[[]] union union_attr2; //expected-error{{misplaced attributes}} +[[]] enum E2 { }; //expected-error{{misplaced attributes}} +} // Checks attributes placed at wrong syntactic locations of class specifiers. class [[]] [[]] @@ -91,7 +98,7 @@ class C final [[deprecated(l]] {}); // expected-error {{use of undeclared identi class D final alignas ([l) {}]{}); // expected-error {{expected ',' or ']' in lambda capture list}} expected-error {{an attribute list cannot appear here}} [[]] struct with_init_declarators {} init_declarator; -[[]] struct no_init_declarators; // expected-error {{an attribute list cannot appear here}} +[[]] struct no_init_declarators; // expected-error {{misplaced attributes}} template [[]] struct no_init_declarators_template; // expected-error {{an attribute list cannot appear here}} void fn_with_structs() { [[]] struct with_init_declarators {} init_declarator; diff --git a/test/Preprocessor/arm-acle-6.4.c b/test/Preprocessor/arm-acle-6.4.c index 3102bd4..9e4253a 100644 --- a/test/Preprocessor/arm-acle-6.4.c +++ b/test/Preprocessor/arm-acle-6.4.c @@ -91,7 +91,7 @@ // RUN: %clang -target arm-none-linux-eabi -march=armv6k -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V6K -// CHECK-V6K: __ARM_FEATURE_LDREX 0xF +// CHECK-V6K: __ARM_FEATURE_LDREX 0xf // RUN: %clang -target arm-none-linux-eabi -march=armv7-a -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V7A @@ -101,7 +101,7 @@ // CHECK-V7A: __ARM_ARCH_PROFILE 'A' // CHECK-V7A: __ARM_FEATURE_CLZ 1 // CHECK-V7A: __ARM_FEATURE_DSP 1 -// CHECK-V7A: __ARM_FEATURE_LDREX 0xF +// CHECK-V7A: __ARM_FEATURE_LDREX 0xf // CHECK-V7A: __ARM_FEATURE_QBIT 1 // CHECK-V7A: __ARM_FEATURE_SAT 1 // CHECK-V7A: __ARM_FEATURE_SIMD32 1 @@ -129,7 +129,7 @@ // CHECK-V7VE: __ARM_FEATURE_CLZ 1 // CHECK-V7VE: __ARM_FEATURE_DSP 1 // CHECK-V7VE: __ARM_FEATURE_IDIV 1 -// CHECK-V7VE: __ARM_FEATURE_LDREX 0xF +// CHECK-V7VE: __ARM_FEATURE_LDREX 0xf // CHECK-V7VE: __ARM_FEATURE_QBIT 1 // CHECK-V7VE: __ARM_FEATURE_SAT 1 // CHECK-V7VE: __ARM_FEATURE_SIMD32 1 @@ -143,7 +143,7 @@ // CHECK-V7R: __ARM_ARCH_PROFILE 'R' // CHECK-V7R: __ARM_FEATURE_CLZ 1 // CHECK-V7R: __ARM_FEATURE_DSP 1 -// CHECK-V7R: __ARM_FEATURE_LDREX 0xF +// CHECK-V7R: __ARM_FEATURE_LDREX 0xf // CHECK-V7R: __ARM_FEATURE_QBIT 1 // CHECK-V7R: __ARM_FEATURE_SAT 1 // CHECK-V7R: __ARM_FEATURE_SIMD32 1 @@ -188,7 +188,7 @@ // CHECK-V8A: __ARM_FEATURE_CLZ 1 // CHECK-V8A: __ARM_FEATURE_DSP 1 // CHECK-V8A: __ARM_FEATURE_IDIV 1 -// CHECK-V8A: __ARM_FEATURE_LDREX 0xF +// CHECK-V8A: __ARM_FEATURE_LDREX 0xf // CHECK-V8A: __ARM_FEATURE_QBIT 1 // CHECK-V8A: __ARM_FEATURE_SAT 1 // CHECK-V8A: __ARM_FEATURE_SIMD32 1 diff --git a/test/Preprocessor/arm-acle-6.5.c b/test/Preprocessor/arm-acle-6.5.c index 7ad91bd..143ed75 100644 --- a/test/Preprocessor/arm-acle-6.5.c +++ b/test/Preprocessor/arm-acle-6.5.c @@ -26,7 +26,7 @@ // RUN: %clang -target armv7a-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP // RUN: %clang -target armv7ve-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP -// CHECK-SP-DP: __ARM_FP 0xC +// CHECK-SP-DP: __ARM_FP 0xc // RUN: %clang -target arm-eabi -mfpu=vfpv3-fp16 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP // RUN: %clang -target arm-eabi -mfpu=vfpv3-d16-fp16 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP @@ -40,7 +40,7 @@ // RUN: %clang -target arm-eabi -mfpu=crypto-neon-fp-armv8 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP // RUN: %clang -target armv8-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP -// CHECK-SP-DP-HP: __ARM_FP 0xE +// CHECK-SP-DP-HP: __ARM_FP 0xe // RUN: %clang -target armv4-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-FMA // RUN: %clang -target armv5-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-FMA diff --git a/test/Preprocessor/arm-target-features.c b/test/Preprocessor/arm-target-features.c index 0067e10..43c9c7c 100644 --- a/test/Preprocessor/arm-target-features.c +++ b/test/Preprocessor/arm-target-features.c @@ -5,7 +5,7 @@ // CHECK-V8A: #define __ARM_FEATURE_CRC32 1 // CHECK-V8A: #define __ARM_FEATURE_DIRECTED_ROUNDING 1 // CHECK-V8A: #define __ARM_FEATURE_NUMERIC_MAXMIN 1 -// CHECK-V8A: #define __ARM_FP 0xE +// CHECK-V8A: #define __ARM_FP 0xe // CHECK-V8A: #define __ARM_FP16_ARGS 1 // CHECK-V8A: #define __ARM_FP16_FORMAT_IEEE 1 @@ -16,7 +16,7 @@ // CHECK-V8R: #define __ARM_FEATURE_CRC32 1 // CHECK-V8R: #define __ARM_FEATURE_DIRECTED_ROUNDING 1 // CHECK-V8R: #define __ARM_FEATURE_NUMERIC_MAXMIN 1 -// CHECK-V8R: #define __ARM_FP 0xE +// CHECK-V8R: #define __ARM_FP 0xe // RUN: %clang -target armv7a-none-linux-gnu -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V7 %s // CHECK-V7: #define __ARMEL__ 1 @@ -25,14 +25,14 @@ // CHECK-V7-NOT: __ARM_FEATURE_CRC32 // CHECK-V7-NOT: __ARM_FEATURE_NUMERIC_MAXMIN // CHECK-V7-NOT: __ARM_FEATURE_DIRECTED_ROUNDING -// CHECK-V7: #define __ARM_FP 0xC +// CHECK-V7: #define __ARM_FP 0xc // RUN: %clang -target armv7ve-none-linux-gnu -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V7VE %s // CHECK-V7VE: #define __ARMEL__ 1 // CHECK-V7VE: #define __ARM_ARCH 7 // CHECK-V7VE: #define __ARM_ARCH_7VE__ 1 // CHECK-V7VE: #define __ARM_ARCH_EXT_IDIV__ 1 -// CHECK-V7VE: #define __ARM_FP 0xC +// CHECK-V7VE: #define __ARM_FP 0xc // RUN: %clang -target x86_64-apple-macosx10.10 -arch armv7s -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V7S %s // CHECK-V7S: #define __ARMEL__ 1 @@ -41,7 +41,7 @@ // CHECK-V7S-NOT: __ARM_FEATURE_CRC32 // CHECK-V7S-NOT: __ARM_FEATURE_NUMERIC_MAXMIN // CHECK-V7S-NOT: __ARM_FEATURE_DIRECTED_ROUNDING -// CHECK-V7S: #define __ARM_FP 0xE +// CHECK-V7S: #define __ARM_FP 0xe // RUN: %clang -target armv8a -mfloat-abi=hard -x c -E -dM %s | FileCheck -match-full-lines --check-prefix=CHECK-V8-BAREHF %s // CHECK-V8-BAREHF: #define __ARMEL__ 1 @@ -50,19 +50,19 @@ // CHECK-V8-BAREHF: #define __ARM_FEATURE_CRC32 1 // CHECK-V8-BAREHF: #define __ARM_FEATURE_DIRECTED_ROUNDING 1 // CHECK-V8-BAREHF: #define __ARM_FEATURE_NUMERIC_MAXMIN 1 -// CHECK-V8-BAREHP: #define __ARM_FP 0xE +// CHECK-V8-BAREHP: #define __ARM_FP 0xe // CHECK-V8-BAREHF: #define __ARM_NEON__ 1 // CHECK-V8-BAREHF: #define __ARM_PCS_VFP 1 // CHECK-V8-BAREHF: #define __VFP_FP__ 1 // RUN: %clang -target armv8a -mfloat-abi=hard -mfpu=fp-armv8 -x c -E -dM %s | FileCheck -match-full-lines --check-prefix=CHECK-V8-BAREHF-FP %s // CHECK-V8-BAREHF-FP-NOT: __ARM_NEON__ 1 -// CHECK-V8-BAREHP-FP: #define __ARM_FP 0xE +// CHECK-V8-BAREHP-FP: #define __ARM_FP 0xe // CHECK-V8-BAREHF-FP: #define __VFP_FP__ 1 // RUN: %clang -target armv8a -mfloat-abi=hard -mfpu=neon-fp-armv8 -x c -E -dM %s | FileCheck -match-full-lines --check-prefix=CHECK-V8-BAREHF-NEON-FP %s // RUN: %clang -target armv8a -mfloat-abi=hard -mfpu=crypto-neon-fp-armv8 -x c -E -dM %s | FileCheck -match-full-lines --check-prefix=CHECK-V8-BAREHF-NEON-FP %s -// CHECK-V8-BAREHP-NEON-FP: #define __ARM_FP 0xE +// CHECK-V8-BAREHP-NEON-FP: #define __ARM_FP 0xe // CHECK-V8-BAREHF-NEON-FP: #define __ARM_NEON__ 1 // CHECK-V8-BAREHF-NEON-FP: #define __VFP_FP__ 1 @@ -88,7 +88,7 @@ // RUN: %clang -target armv8a-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8A %s // RUN: %clang -target armv8a-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8A %s // V8A:#define __ARM_ARCH_EXT_IDIV__ 1 -// V8A:#define __ARM_FP 0xE +// V8A:#define __ARM_FP 0xe // RUN: %clang -target armv8m.base-none-linux-gnu -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8M_BASELINE %s // V8M_BASELINE: #define __ARM_ARCH 8 @@ -111,7 +111,7 @@ // V8M_MAINLINE: #define __ARM_ARCH_PROFILE 'M' // V8M_MAINLINE-NOT: __ARM_FEATURE_CRC32 // V8M_MAINLINE-NOT: __ARM_FEATURE_DSP -// V8M_MAINLINE: #define __ARM_FP 0xE +// V8M_MAINLINE: #define __ARM_FP 0xe // V8M_MAINLINE: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 // RUN: %clang -target arm-none-linux-gnu -march=armv8-m.main+dsp -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=V8M_MAINLINE_DSP %s @@ -123,7 +123,7 @@ // V8M_MAINLINE_DSP: #define __ARM_ARCH_PROFILE 'M' // V8M_MAINLINE_DSP-NOT: __ARM_FEATURE_CRC32 // V8M_MAINLINE_DSP: #define __ARM_FEATURE_DSP 1 -// V8M_MAINLINE_DSP: #define __ARM_FP 0xE +// V8M_MAINLINE_DSP: #define __ARM_FP 0xe // V8M_MAINLINE_DSP: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 // RUN: %clang -target arm-none-linux-gnu -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-DEFS %s @@ -161,7 +161,7 @@ // Check that -mfpu works properly for Cortex-A7 (enabled by default). // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A7 %s // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A7 %s -// DEFAULTFPU-A7:#define __ARM_FP 0xE +// DEFAULTFPU-A7:#define __ARM_FP 0xe // DEFAULTFPU-A7:#define __ARM_NEON__ 1 // DEFAULTFPU-A7:#define __ARM_VFPV4__ 1 @@ -173,14 +173,14 @@ // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a7 -mfpu=vfp4 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=NONEON-A7 %s // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a7 -mfpu=vfp4 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=NONEON-A7 %s -// NONEON-A7:#define __ARM_FP 0xE +// NONEON-A7:#define __ARM_FP 0xe // NONEON-A7-NOT:#define __ARM_NEON__ 1 // NONEON-A7:#define __ARM_VFPV4__ 1 // Check that -mfpu works properly for Cortex-A5 (enabled by default). // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A5 %s // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A5 %s -// DEFAULTFPU-A5:#define __ARM_FP 0xE +// DEFAULTFPU-A5:#define __ARM_FP 0xe // DEFAULTFPU-A5:#define __ARM_NEON__ 1 // DEFAULTFPU-A5:#define __ARM_VFPV4__ 1 @@ -192,7 +192,7 @@ // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a5 -mfpu=vfp4-d16 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=NONEON-A5 %s // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a5 -mfpu=vfp4-d16 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=NONEON-A5 %s -// NONEON-A5:#define __ARM_FP 0xE +// NONEON-A5:#define __ARM_FP 0xe // NONEON-A5-NOT:#define __ARM_NEON__ 1 // NONEON-A5:#define __ARM_VFPV4__ 1 @@ -218,7 +218,7 @@ // A5-NOT: #define __ARM_FEATURE_DIRECTED_ROUNDING // A5:#define __ARM_FEATURE_DSP 1 // A5-NOT: #define __ARM_FEATURE_NUMERIC_MAXMIN -// A5:#define __ARM_FP 0xE +// A5:#define __ARM_FP 0xe // Test whether predefines are as expected when targeting cortex-a7. // RUN: %clang -target armv7k -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A7 %s @@ -228,7 +228,7 @@ // A7:#define __ARM_ARCH_PROFILE 'A' // A7-NOT:#define __ARM_DWARF_EH__ 1 // A7:#define __ARM_FEATURE_DSP 1 -// A7:#define __ARM_FP 0xE +// A7:#define __ARM_FP 0xe // Test whether predefines are as expected when targeting cortex-a7. // RUN: %clang -target x86_64-apple-darwin -arch armv7k -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV7K %s @@ -237,7 +237,7 @@ // ARMV7K:#define __ARM_ARCH_PROFILE 'A' // ARMV7K:#define __ARM_DWARF_EH__ 1 // ARMV7K:#define __ARM_FEATURE_DSP 1 -// ARMV7K:#define __ARM_FP 0xE +// ARMV7K:#define __ARM_FP 0xe // ARMV7K:#define __ARM_PCS_VFP 1 @@ -246,20 +246,20 @@ // RUN: %clang -target armv7 -mthumb -mcpu=cortex-a8 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A8 %s // A8-NOT:#define __ARM_ARCH_EXT_IDIV__ // A8:#define __ARM_FEATURE_DSP 1 -// A8:#define __ARM_FP 0xC +// A8:#define __ARM_FP 0xc // Test whether predefines are as expected when targeting cortex-a9. // RUN: %clang -target armv7 -mcpu=cortex-a9 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A9 %s // RUN: %clang -target armv7 -mthumb -mcpu=cortex-a9 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A9 %s // A9-NOT:#define __ARM_ARCH_EXT_IDIV__ // A9:#define __ARM_FEATURE_DSP 1 -// A9:#define __ARM_FP 0xE +// A9:#define __ARM_FP 0xe // Check that -mfpu works properly for Cortex-A12 (enabled by default). // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a12 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A12 %s // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a12 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A12 %s -// DEFAULTFPU-A12:#define __ARM_FP 0xE +// DEFAULTFPU-A12:#define __ARM_FP 0xe // DEFAULTFPU-A12:#define __ARM_NEON__ 1 // DEFAULTFPU-A12:#define __ARM_VFPV4__ 1 @@ -277,19 +277,19 @@ // A12:#define __ARM_ARCH_EXT_IDIV__ 1 // A12:#define __ARM_ARCH_PROFILE 'A' // A12:#define __ARM_FEATURE_DSP 1 -// A12:#define __ARM_FP 0xE +// A12:#define __ARM_FP 0xe // Test whether predefines are as expected when targeting cortex-a15. // RUN: %clang -target armv7 -mcpu=cortex-a15 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A15 %s // RUN: %clang -target armv7 -mthumb -mcpu=cortex-a15 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=A15 %s // A15:#define __ARM_ARCH_EXT_IDIV__ 1 // A15:#define __ARM_FEATURE_DSP 1 -// A15:#define __ARM_FP 0xE +// A15:#define __ARM_FP 0xe // Check that -mfpu works properly for Cortex-A17 (enabled by default). // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a17 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A17 %s // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a17 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=DEFAULTFPU-A17 %s -// DEFAULTFPU-A17:#define __ARM_FP 0xE +// DEFAULTFPU-A17:#define __ARM_FP 0xe // DEFAULTFPU-A17:#define __ARM_NEON__ 1 // DEFAULTFPU-A17:#define __ARM_VFPV4__ 1 @@ -307,14 +307,14 @@ // A17:#define __ARM_ARCH_EXT_IDIV__ 1 // A17:#define __ARM_ARCH_PROFILE 'A' // A17:#define __ARM_FEATURE_DSP 1 -// A17:#define __ARM_FP 0xE +// A17:#define __ARM_FP 0xe // Test whether predefines are as expected when targeting swift. // RUN: %clang -target armv7s -mcpu=swift -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=SWIFT %s // RUN: %clang -target armv7s -mthumb -mcpu=swift -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=SWIFT %s // SWIFT:#define __ARM_ARCH_EXT_IDIV__ 1 // SWIFT:#define __ARM_FEATURE_DSP 1 -// SWIFT:#define __ARM_FP 0xE +// SWIFT:#define __ARM_FP 0xe // Test whether predefines are as expected when targeting ARMv8-A Cortex implementations // RUN: %clang -target armv8 -mcpu=cortex-a32 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8 %s @@ -331,7 +331,7 @@ // RUN: %clang -target armv8 -mthumb -mcpu=cortex-a73 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=ARMV8 %s // ARMV8:#define __ARM_ARCH_EXT_IDIV__ 1 // ARMV8:#define __ARM_FEATURE_DSP 1 -// ARMV8:#define __ARM_FP 0xE +// ARMV8:#define __ARM_FP 0xe // Test whether predefines are as expected when targeting cortex-r4. // RUN: %clang -target armv7 -mcpu=cortex-r4 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R4-ARM %s @@ -348,19 +348,19 @@ // RUN: %clang -target armv7 -mcpu=cortex-r4f -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R4F-ARM %s // R4F-ARM-NOT:#define __ARM_ARCH_EXT_IDIV__ // R4F-ARM:#define __ARM_FEATURE_DSP 1 -// R4F-ARM:#define __ARM_FP 0xC +// R4F-ARM:#define __ARM_FP 0xc // RUN: %clang -target armv7 -mthumb -mcpu=cortex-r4f -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R4F-THUMB %s // R4F-THUMB:#define __ARM_ARCH_EXT_IDIV__ 1 // R4F-THUMB:#define __ARM_FEATURE_DSP 1 -// R4F-THUMB:#define __ARM_FP 0xC +// R4F-THUMB:#define __ARM_FP 0xc // Test whether predefines are as expected when targeting cortex-r5. // RUN: %clang -target armv7 -mcpu=cortex-r5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R5 %s // RUN: %clang -target armv7 -mthumb -mcpu=cortex-r5 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R5 %s // R5:#define __ARM_ARCH_EXT_IDIV__ 1 // R5:#define __ARM_FEATURE_DSP 1 -// R5:#define __ARM_FP 0xC +// R5:#define __ARM_FP 0xc // Test whether predefines are as expected when targeting cortex-r7 and cortex-r8. // RUN: %clang -target armv7 -mcpu=cortex-r7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R7-R8 %s @@ -369,7 +369,7 @@ // RUN: %clang -target armv7 -mthumb -mcpu=cortex-r8 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=R7-R8 %s // R7-R8:#define __ARM_ARCH_EXT_IDIV__ 1 // R7-R8:#define __ARM_FEATURE_DSP 1 -// R7-R8:#define __ARM_FP 0xE +// R7-R8:#define __ARM_FP 0xe // Test whether predefines are as expected when targeting cortex-m0. // RUN: %clang -target armv7 -mthumb -mcpu=cortex-m0 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=M0-THUMB %s @@ -397,7 +397,7 @@ // RUN: %clang -target armv7 -mthumb -mcpu=cortex-m7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=M7-THUMB %s // M7-THUMB:#define __ARM_ARCH_EXT_IDIV__ 1 // M7-THUMB:#define __ARM_FEATURE_DSP 1 -// M7-THUMB:#define __ARM_FP 0xE +// M7-THUMB:#define __ARM_FP 0xe // M7-THUMB:#define __ARM_FPV5__ 1 // Test whether predefines are as expected when targeting v8m cores @@ -437,11 +437,11 @@ // CHECK-V81A: #define __ARM_ARCH_8_1A__ 1 // CHECK-V81A: #define __ARM_ARCH_PROFILE 'A' // CHECK-V81A: #define __ARM_FEATURE_QRDMX 1 -// CHECK-V81A: #define __ARM_FP 0xE +// CHECK-V81A: #define __ARM_FP 0xe // RUN: %clang -target armv8.2a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V82A %s // CHECK-V82A: #define __ARM_ARCH 8 // CHECK-V82A: #define __ARM_ARCH_8_2A__ 1 // CHECK-V82A: #define __ARM_ARCH_PROFILE 'A' // CHECK-V82A: #define __ARM_FEATURE_QRDMX 1 -// CHECK-V82A: #define __ARM_FP 0xE +// CHECK-V82A: #define __ARM_FP 0xe diff --git a/test/Preprocessor/predefined-arch-macros.c b/test/Preprocessor/predefined-arch-macros.c index f370f3e..dac137c 100644 --- a/test/Preprocessor/predefined-arch-macros.c +++ b/test/Preprocessor/predefined-arch-macros.c @@ -886,6 +886,7 @@ // CHECK_SKX_M32: #define __MMX__ 1 // CHECK_SKX_M32: #define __MPX__ 1 // CHECK_SKX_M32: #define __PCLMUL__ 1 +// CHECK_SKX_M32: #define __PKU__ 1 // CHECK_SKX_M32: #define __POPCNT__ 1 // CHECK_SKX_M32: #define __PRFCHW__ 1 // CHECK_SKX_M32: #define __RDRND__ 1 @@ -929,6 +930,7 @@ // CHECK_SKX_M64: #define __MMX__ 1 // CHECK_SKX_M64: #define __MPX__ 1 // CHECK_SKX_M64: #define __PCLMUL__ 1 +// CHECK_SKX_M64: #define __PKU__ 1 // CHECK_SKX_M64: #define __POPCNT__ 1 // CHECK_SKX_M64: #define __PRFCHW__ 1 // CHECK_SKX_M64: #define __RDRND__ 1 @@ -970,12 +972,14 @@ // CHECK_CNL_M32: #define __BMI2__ 1 // CHECK_CNL_M32: #define __BMI__ 1 // CHECK_CNL_M32: #define __CLFLUSHOPT__ 1 +// CHECK_CNL_M32-NOT: #define __CLWB__ 1 // CHECK_CNL_M32: #define __F16C__ 1 // CHECK_CNL_M32: #define __FMA__ 1 // CHECK_CNL_M32: #define __LZCNT__ 1 // CHECK_CNL_M32: #define __MMX__ 1 // CHECK_CNL_M32: #define __MPX__ 1 // CHECK_CNL_M32: #define __PCLMUL__ 1 +// CHECK_CNL_M32-NOT: #define __PKU__ 1 // CHECK_CNL_M32: #define __POPCNT__ 1 // CHECK_CNL_M32: #define __PRFCHW__ 1 // CHECK_CNL_M32: #define __RDRND__ 1 @@ -1015,12 +1019,14 @@ // CHECK_CNL_M64: #define __BMI2__ 1 // CHECK_CNL_M64: #define __BMI__ 1 // CHECK_CNL_M64: #define __CLFLUSHOPT__ 1 +// CHECK_CNL_M64-NOT: #define __CLWB__ 1 // CHECK_CNL_M64: #define __F16C__ 1 // CHECK_CNL_M64: #define __FMA__ 1 // CHECK_CNL_M64: #define __LZCNT__ 1 // CHECK_CNL_M64: #define __MMX__ 1 // CHECK_CNL_M64: #define __MPX__ 1 // CHECK_CNL_M64: #define __PCLMUL__ 1 +// CHECK_CNL_M64-NOT: #define __PKU__ 1 // CHECK_CNL_M64: #define __POPCNT__ 1 // CHECK_CNL_M64: #define __PRFCHW__ 1 // CHECK_CNL_M64: #define __RDRND__ 1 @@ -1050,23 +1056,30 @@ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ICL_M32 // CHECK_ICL_M32: #define __AES__ 1 // CHECK_ICL_M32: #define __AVX2__ 1 +// CHECK_ICL_M32: #define __AVX512BITALG__ 1 // CHECK_ICL_M32: #define __AVX512BW__ 1 // CHECK_ICL_M32: #define __AVX512CD__ 1 // CHECK_ICL_M32: #define __AVX512DQ__ 1 // CHECK_ICL_M32: #define __AVX512F__ 1 // CHECK_ICL_M32: #define __AVX512IFMA__ 1 +// CHECK_ICL_M32: #define __AVX512VBMI2__ 1 // CHECK_ICL_M32: #define __AVX512VBMI__ 1 // CHECK_ICL_M32: #define __AVX512VL__ 1 +// CHECK_ICL_M32: #define __AVX512VNNI__ 1 +// CHECK_ICL_M32: #define __AVX512VPOPCNTDQ__ 1 // CHECK_ICL_M32: #define __AVX__ 1 // CHECK_ICL_M32: #define __BMI2__ 1 // CHECK_ICL_M32: #define __BMI__ 1 // CHECK_ICL_M32: #define __CLFLUSHOPT__ 1 +// CHECK_ICL_M32: #define __CLWB__ 1 // CHECK_ICL_M32: #define __F16C__ 1 // CHECK_ICL_M32: #define __FMA__ 1 +// CHECK_ICL_M32: #define __GFNI__ 1 // CHECK_ICL_M32: #define __LZCNT__ 1 // CHECK_ICL_M32: #define __MMX__ 1 // CHECK_ICL_M32: #define __MPX__ 1 // CHECK_ICL_M32: #define __PCLMUL__ 1 +// CHECK_ICL_M32-NOT: #define __PKU__ 1 // CHECK_ICL_M32: #define __POPCNT__ 1 // CHECK_ICL_M32: #define __PRFCHW__ 1 // CHECK_ICL_M32: #define __RDRND__ 1 @@ -1079,6 +1092,8 @@ // CHECK_ICL_M32: #define __SSE4_2__ 1 // CHECK_ICL_M32: #define __SSE__ 1 // CHECK_ICL_M32: #define __SSSE3__ 1 +// CHECK_ICL_M32: #define __VAES__ 1 +// CHECK_ICL_M32: #define __VPCLMULQDQ__ 1 // CHECK_ICL_M32: #define __XSAVEC__ 1 // CHECK_ICL_M32: #define __XSAVEOPT__ 1 // CHECK_ICL_M32: #define __XSAVES__ 1 @@ -1095,23 +1110,30 @@ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ICL_M64 // CHECK_ICL_M64: #define __AES__ 1 // CHECK_ICL_M64: #define __AVX2__ 1 +// CHECK_ICL_M64: #define __AVX512BITALG__ 1 // CHECK_ICL_M64: #define __AVX512BW__ 1 // CHECK_ICL_M64: #define __AVX512CD__ 1 // CHECK_ICL_M64: #define __AVX512DQ__ 1 // CHECK_ICL_M64: #define __AVX512F__ 1 // CHECK_ICL_M64: #define __AVX512IFMA__ 1 +// CHECK_ICL_M64: #define __AVX512VBMI2__ 1 // CHECK_ICL_M64: #define __AVX512VBMI__ 1 // CHECK_ICL_M64: #define __AVX512VL__ 1 +// CHECK_ICL_M64: #define __AVX512VNNI__ 1 +// CHECK_ICL_M64: #define __AVX512VPOPCNTDQ__ 1 // CHECK_ICL_M64: #define __AVX__ 1 // CHECK_ICL_M64: #define __BMI2__ 1 // CHECK_ICL_M64: #define __BMI__ 1 // CHECK_ICL_M64: #define __CLFLUSHOPT__ 1 +// CHECK_ICL_M64: #define __CLWB__ 1 // CHECK_ICL_M64: #define __F16C__ 1 // CHECK_ICL_M64: #define __FMA__ 1 +// CHECK_ICL_M64: #define __GFNI__ 1 // CHECK_ICL_M64: #define __LZCNT__ 1 // CHECK_ICL_M64: #define __MMX__ 1 // CHECK_ICL_M64: #define __MPX__ 1 // CHECK_ICL_M64: #define __PCLMUL__ 1 +// CHECK_ICL_M64-NOT: #define __PKU__ 1 // CHECK_ICL_M64: #define __POPCNT__ 1 // CHECK_ICL_M64: #define __PRFCHW__ 1 // CHECK_ICL_M64: #define __RDRND__ 1 @@ -1124,6 +1146,8 @@ // CHECK_ICL_M64: #define __SSE4_2__ 1 // CHECK_ICL_M64: #define __SSE__ 1 // CHECK_ICL_M64: #define __SSSE3__ 1 +// CHECK_ICL_M64: #define __VAES__ 1 +// CHECK_ICL_M64: #define __VPCLMULQDQ__ 1 // CHECK_ICL_M64: #define __XSAVEC__ 1 // CHECK_ICL_M64: #define __XSAVEOPT__ 1 // CHECK_ICL_M64: #define __XSAVES__ 1 diff --git a/test/Preprocessor/x86_target_features.c b/test/Preprocessor/x86_target_features.c index e2d0e39..542328a 100644 --- a/test/Preprocessor/x86_target_features.c +++ b/test/Preprocessor/x86_target_features.c @@ -209,11 +209,54 @@ // AVX512VBMI: #define __SSE__ 1 // AVX512VBMI: #define __SSSE3__ 1 +// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bitalg -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BITALG %s + +// AVX512BITALG: #define __AVX2__ 1 +// AVX512BITALG: #define __AVX512BITALG__ 1 +// AVX512BITALG: #define __AVX512BW__ 1 +// AVX512BITALG: #define __AVX512F__ 1 +// AVX512BITALG: #define __AVX__ 1 +// AVX512BITALG: #define __SSE2_MATH__ 1 +// AVX512BITALG: #define __SSE2__ 1 +// AVX512BITALG: #define __SSE3__ 1 +// AVX512BITALG: #define __SSE4_1__ 1 +// AVX512BITALG: #define __SSE4_2__ 1 +// AVX512BITALG: #define __SSE_MATH__ 1 +// AVX512BITALG: #define __SSE__ 1 +// AVX512BITALG: #define __SSSE3__ 1 + + // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512vbmi -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512VBMINOAVX512BW %s // AVX512VBMINOAVX512BW-NOT: #define __AVX512BW__ 1 // AVX512VBMINOAVX512BW-NOT: #define __AVX512VBMI__ 1 +// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512vbmi2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512VBMI2 %s + +// AVX512VBMI2: #define __AVX2__ 1 +// AVX512VBMI2: #define __AVX512BW__ 1 +// AVX512VBMI2: #define __AVX512F__ 1 +// AVX512VBMI2: #define __AVX512VBMI2__ 1 +// AVX512VBMI2: #define __AVX__ 1 +// AVX512VBMI2: #define __SSE2_MATH__ 1 +// AVX512VBMI2: #define __SSE2__ 1 +// AVX512VBMI2: #define __SSE3__ 1 +// AVX512VBMI2: #define __SSE4_1__ 1 +// AVX512VBMI2: #define __SSE4_2__ 1 +// AVX512VBMI2: #define __SSE_MATH__ 1 +// AVX512VBMI2: #define __SSE__ 1 +// AVX512VBMI2: #define __SSSE3__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512vbmi2 -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512VBMI2NOAVX512BW %s + +// AVX512VBMI2NOAVX512BW-NOT: #define __AVX512BW__ 1 +// AVX512VBMI2NOAVX512BW-NOT: #define __AVX512VBMI2__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bitalg -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BITALGNOAVX512BW %s + +// AVX512BITALGNOAVX512BW-NOT: #define __AVX512BITALG__ 1 +// AVX512BITALGNOAVX512BW-NOT: #define __AVX512BW__ 1 + // RUN: %clang -target i386-unknown-unknown -march=atom -msse4.2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=SSE42POPCNT %s // SSE42POPCNT: #define __POPCNT__ 1 @@ -368,3 +411,28 @@ // RUN: %clang -target i386-unknown-unknown -march=atom -mclflushopt -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=CLFLUSHOPT %s // CLFLUSHOPT: #define __CLFLUSHOPT__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -mvaes -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=VAES %s + +// VAES: #define __AES__ 1 +// VAES: #define __VAES__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -mvaes -mno-aes -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=VAESNOAES %s + +// VAESNOAES-NOT: #define __AES__ 1 +// VAESNOAES-NOT: #define __VAES__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -mgfni -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=GFNI %s + +// GFNI: #define __GFNI__ 1 +// GFNI: #define __SSE2__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -mvpclmulqdq -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=VPCLMULQDQ %s + +// VPCLMULQDQ: #define __PCLMUL__ 1 +// VPCLMULQDQ: #define __VPCLMULQDQ__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -mvpclmulqdq -mno-pclmul -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=VPCLMULQDQNOPCLMUL %s +// VPCLMULQDQNOPCLMUL-NOT: #define __PCLMUL__ 1 +// VPCLMULQDQNOPCLMUL-NOT: #define __VPCLMULQDQ__ 1 + diff --git a/unittests/AST/ASTImporterTest.cpp b/unittests/AST/ASTImporterTest.cpp index 8355657..a6ec304 100644 --- a/unittests/AST/ASTImporterTest.cpp +++ b/unittests/AST/ASTImporterTest.cpp @@ -495,6 +495,36 @@ TEST(ImportType, ImportAtomicType) { has(atomicType()))))))))); } +TEST(ImportDecl, ImportFunctionTemplateDecl) { + MatchVerifier Verifier; + testImport("template void declToImport() { };", Lang_CXX, "", + Lang_CXX, Verifier, functionTemplateDecl()); +} + +const internal::VariadicDynCastAllOfMatcher + cxxDependentScopeMemberExpr; + +TEST(ImportExpr, ImportCXXDependentScopeMemberExpr) { + MatchVerifier Verifier; + testImport("template struct C { T t; };" + "template void declToImport() {" + " C d;" + " d.t;" + "}" + "void instantiate() { declToImport(); }", + Lang_CXX, "", Lang_CXX, Verifier, + functionTemplateDecl(has(functionDecl( + has(compoundStmt(has(cxxDependentScopeMemberExpr()))))))); + testImport("template struct C { T t; };" + "template void declToImport() {" + " C d;" + " (&d)->t;" + "}" + "void instantiate() { declToImport(); }", + Lang_CXX, "", Lang_CXX, Verifier, + functionTemplateDecl(has(functionDecl( + has(compoundStmt(has(cxxDependentScopeMemberExpr()))))))); +} TEST(ImportType, ImportTypeAliasTemplate) { MatchVerifier Verifier; diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp index 6982bfc..8117d2f 100644 --- a/utils/TableGen/NeonEmitter.cpp +++ b/utils/TableGen/NeonEmitter.cpp @@ -2106,7 +2106,7 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, OverloadInfo &OI = I.second; OS << "case NEON::BI__builtin_neon_" << I.first << ": "; - OS << "mask = 0x" << utohexstr(OI.Mask) << "ULL"; + OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL"; if (OI.PtrArgNum >= 0) OS << "; PtrArgNum = " << OI.PtrArgNum; if (OI.HasConstPtr) @@ -2320,7 +2320,7 @@ void NeonEmitter::run(raw_ostream &OS) { Type T2 = T; T2.makeScalar(); - OS << utostr(T.getNumElements()) << "))) "; + OS << T.getNumElements() << "))) "; OS << T2.str(); OS << " " << T.str() << ";\n"; } @@ -2350,7 +2350,7 @@ void NeonEmitter::run(raw_ostream &OS) { Type VT(TS, M); OS << "typedef struct " << VT.str() << " {\n"; OS << " " << T.str() << " val"; - OS << "[" << utostr(NumMembers) << "]"; + OS << "[" << NumMembers << "]"; OS << ";\n} "; OS << VT.str() << ";\n"; OS << "\n";