From 148779df305667b6942fee7e758fdf81a6498f38 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: May 03 2017 20:26:11 +0000
Subject: Vendor import of llvm trunk r302069:

https://llvm.org/svn/llvm-project/llvm/trunk@302069

---
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2f5df77..78e2e01 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -530,6 +530,8 @@ if(LLVM_LINK_LLVM_DYLIB OR LLVM_BUILD_LLVM_C_DYLIB)
 endif()
 option(LLVM_BUILD_LLVM_DYLIB "Build libllvm dynamic library" ${LLVM_BUILD_LLVM_DYLIB_default})
 
+option(LLVM_DYLIB_SYMBOL_VERSIONING OFF)
+
 option(LLVM_OPTIMIZED_TABLEGEN "Force TableGen to be built with optimization" OFF)
 if(CMAKE_CROSSCOMPILING OR (LLVM_OPTIMIZED_TABLEGEN AND (LLVM_ENABLE_ASSERTIONS OR CMAKE_CONFIGURATION_TYPES)))
   set(LLVM_USE_HOST_TOOLS ON)
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index bf4973c..dad99e3 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -1539,7 +1539,7 @@ example:
     This function attribute indicates that the function does not have any
     effects besides calculating its result and does not have undefined behavior.
     Note that ``speculatable`` is not enough to conclude that along any
-    particular exection path the number of calls to this function will not be
+    particular execution path the number of calls to this function will not be
     externally observable. This attribute is only valid on functions
     and declarations, not on individual call sites. If a function is
     incorrectly marked as speculatable and really does exhibit
@@ -7915,7 +7915,7 @@ makes sense:
     ; get pointers for 8 elements from array B
     %ptrs = getelementptr double, double* %B, <8 x i32> %C
     ; load 8 elements from array B into A
-    %A = call <8 x double> @llvm.masked.gather.v8f64(<8 x double*> %ptrs,
+    %A = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> %ptrs,
          i32 8, <8 x i1> %mask, <8 x double> %passthru)
 
 Conversion Operations
@@ -12024,9 +12024,9 @@ This is an overloaded intrinsic. The loaded data are multiple scalar values of a
 
 ::
 
-      declare <16 x float> @llvm.masked.gather.v16f32   (<16 x float*> <ptrs>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
-      declare <2 x double> @llvm.masked.gather.v2f64    (<2 x double*> <ptrs>, i32 <alignment>, <2 x i1>  <mask>, <2 x double> <passthru>)
-      declare <8 x float*> @llvm.masked.gather.v8p0f32  (<8 x float**> <ptrs>, i32 <alignment>, <8 x i1>  <mask>, <8 x float*> <passthru>)
+      declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32   (<16 x float*> <ptrs>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
+      declare <2 x double> @llvm.masked.gather.v2f64.v2p1f64     (<2 x double addrspace(1)*> <ptrs>, i32 <alignment>, <2 x i1>  <mask>, <2 x double> <passthru>)
+      declare <8 x float*> @llvm.masked.gather.v8p0f32.v8p0p0f32 (<8 x float**> <ptrs>, i32 <alignment>, <8 x i1>  <mask>, <8 x float*> <passthru>)
 
 Overview:
 """""""""
@@ -12049,7 +12049,7 @@ The semantics of this operation are equivalent to a sequence of conditional scal
 
 ::
 
-       %res = call <4 x double> @llvm.masked.gather.v4f64 (<4 x double*> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
+       %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64 (<4 x double*> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
 
        ;; The gather with all-true mask is equivalent to the following instruction sequence
        %ptr0 = extractelement <4 x double*> %ptrs, i32 0
@@ -12078,9 +12078,9 @@ This is an overloaded intrinsic. The data stored in memory is a vector of any in
 
 ::
 
-       declare void @llvm.masked.scatter.v8i32   (<8 x i32>     <value>, <8 x i32*>     <ptrs>, i32 <alignment>, <8 x i1>  <mask>)
-       declare void @llvm.masked.scatter.v16f32  (<16 x float>  <value>, <16 x float*>  <ptrs>, i32 <alignment>, <16 x i1> <mask>)
-       declare void @llvm.masked.scatter.v4p0f64 (<4 x double*> <value>, <4 x double**> <ptrs>, i32 <alignment>, <4 x i1>  <mask>)
+       declare void @llvm.masked.scatter.v8i32.v8p0i32     (<8 x i32>     <value>, <8 x i32*>     <ptrs>, i32 <alignment>, <8 x i1>  <mask>)
+       declare void @llvm.masked.scatter.v16f32.v16p1f32   (<16 x float>  <value>, <16 x float addrspace(1)*>  <ptrs>, i32 <alignment>, <16 x i1> <mask>)
+       declare void @llvm.masked.scatter.v4p0f64.v4p0p0f64 (<4 x double*> <value>, <4 x double**> <ptrs>, i32 <alignment>, <4 x i1>  <mask>)
 
 Overview:
 """""""""
@@ -12101,7 +12101,7 @@ The '``llvm.masked.scatter``' intrinsics is designed for writing selected vector
 ::
 
        ;; This instruction unconditionally stores data vector in multiple addresses
-       call @llvm.masked.scatter.v8i32 (<8 x i32> %value, <8 x i32*> %ptrs, i32 4,  <8 x i1>  <true, true, .. true>)
+       call @llvm.masked.scatter.v8i32.v8p0i32 (<8 x i32> %value, <8 x i32*> %ptrs, i32 4,  <8 x i1>  <true, true, .. true>)
 
        ;; It is equivalent to a list of scalar stores
        %val0 = extractelement <8 x i32> %value, i32 0
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index 6d74f34..63c92c1 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -86,7 +86,7 @@ private:
   union {
     uint64_t VAL;   ///< Used to store the <= 64 bits integer value.
     uint64_t *pVal; ///< Used to store the >64 bits integer value.
-  };
+  } U;
 
   unsigned BitWidth; ///< The number of bits in this APInt.
 
@@ -98,7 +98,9 @@ private:
   ///
   /// This constructor is used only internally for speed of construction of
   /// temporaries. It is unsafe for general use so it is not public.
-  APInt(uint64_t *val, unsigned bits) : pVal(val), BitWidth(bits) {}
+  APInt(uint64_t *val, unsigned bits) : BitWidth(bits) {
+    U.pVal = val;
+  }
 
   /// \brief Determine if this APInt just has one word to store value.
   ///
@@ -143,16 +145,16 @@ private:
     // Mask out the high bits.
     uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - WordBits);
     if (isSingleWord())
-      VAL &= mask;
+      U.VAL &= mask;
     else
-      pVal[getNumWords() - 1] &= mask;
+      U.pVal[getNumWords() - 1] &= mask;
     return *this;
   }
 
   /// \brief Get the word corresponding to a bit position
   /// \returns the corresponding word for the specified bit position.
   uint64_t getWord(unsigned bitPosition) const {
-    return isSingleWord() ? VAL : pVal[whichWord(bitPosition)];
+    return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
   }
 
   /// \brief Convert a char array into an APInt
@@ -258,7 +260,7 @@ public:
       : BitWidth(numBits) {
     assert(BitWidth && "bitwidth too small");
     if (isSingleWord()) {
-      VAL = val;
+      U.VAL = val;
       clearUnusedBits();
     } else {
       initSlowCase(val, isSigned);
@@ -300,20 +302,21 @@ public:
   /// @brief Copy Constructor.
   APInt(const APInt &that) : BitWidth(that.BitWidth) {
     if (isSingleWord())
-      VAL = that.VAL;
+      U.VAL = that.U.VAL;
     else
       initSlowCase(that);
   }
 
   /// \brief Move Constructor.
-  APInt(APInt &&that) : VAL(that.VAL), BitWidth(that.BitWidth) {
+  APInt(APInt &&that) : BitWidth(that.BitWidth) {
+    memcpy(&U, &that.U, sizeof(U));
     that.BitWidth = 0;
   }
 
   /// \brief Destructor.
   ~APInt() {
     if (needsCleanup())
-      delete[] pVal;
+      delete[] U.pVal;
   }
 
   /// \brief Default constructor that creates an uninteresting APInt
@@ -321,7 +324,7 @@ public:
   ///
   /// This is useful for object deserialization (pair this with the static
   ///  method Read).
-  explicit APInt() : VAL(0), BitWidth(1) {}
+  explicit APInt() : BitWidth(1) { U.VAL = 0; }
 
   /// \brief Returns whether this instance allocated memory.
   bool needsCleanup() const { return !isSingleWord(); }
@@ -373,7 +376,7 @@ public:
   /// This checks to see if the value has all bits of the APInt are set or not.
   bool isAllOnesValue() const {
     if (isSingleWord())
-      return VAL == WORD_MAX >> (APINT_BITS_PER_WORD - BitWidth);
+      return U.VAL == WORD_MAX >> (APINT_BITS_PER_WORD - BitWidth);
     return countPopulationSlowCase() == BitWidth;
   }
 
@@ -428,7 +431,7 @@ public:
   /// \returns true if the argument APInt value is a power of two > 0.
   bool isPowerOf2() const {
     if (isSingleWord())
-      return isPowerOf2_64(VAL);
+      return isPowerOf2_64(U.VAL);
     return countPopulationSlowCase() == 1;
   }
 
@@ -461,7 +464,7 @@ public:
     assert(numBits != 0 && "numBits must be non-zero");
     assert(numBits <= BitWidth && "numBits out of range");
     if (isSingleWord())
-      return VAL == (WORD_MAX >> (APINT_BITS_PER_WORD - numBits));
+      return U.VAL == (WORD_MAX >> (APINT_BITS_PER_WORD - numBits));
     unsigned Ones = countTrailingOnesSlowCase();
     return (numBits == Ones) &&
            ((Ones + countLeadingZerosSlowCase()) == BitWidth);
@@ -472,7 +475,7 @@ public:
   /// Ex. isMask(0x0000FFFFU) == true.
   bool isMask() const {
     if (isSingleWord())
-      return isMask_64(VAL);
+      return isMask_64(U.VAL);
     unsigned Ones = countTrailingOnesSlowCase();
     return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
   }
@@ -481,7 +484,7 @@ public:
   /// the remainder zero.
   bool isShiftedMask() const {
     if (isSingleWord())
-      return isShiftedMask_64(VAL);
+      return isShiftedMask_64(U.VAL);
     unsigned Ones = countPopulationSlowCase();
     unsigned LeadZ = countLeadingZerosSlowCase();
     return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
@@ -639,8 +642,8 @@ public:
   /// conversions.
   const uint64_t *getRawData() const {
     if (isSingleWord())
-      return &VAL;
-    return &pVal[0];
+      return &U.VAL;
+    return &U.pVal[0];
   }
 
   /// @}
@@ -686,7 +689,7 @@ public:
   /// \returns true if *this is zero, false otherwise.
   bool operator!() const {
     if (isSingleWord())
-      return VAL == 0;
+      return U.VAL == 0;
     return countLeadingZerosSlowCase() == BitWidth;
   }
 
@@ -700,7 +703,7 @@ public:
   APInt &operator=(const APInt &RHS) {
     // If the bitwidths are the same, we can avoid mucking with memory
     if (isSingleWord() && RHS.isSingleWord()) {
-      VAL = RHS.VAL;
+      U.VAL = RHS.U.VAL;
       BitWidth = RHS.BitWidth;
       return clearUnusedBits();
     }
@@ -713,11 +716,11 @@ public:
   APInt &operator=(APInt &&that) {
     assert(this != &that && "Self-move not supported");
     if (!isSingleWord())
-      delete[] pVal;
+      delete[] U.pVal;
 
     // Use memcpy so that type based alias analysis sees both VAL and pVal
     // as modified.
-    memcpy(&VAL, &that.VAL, sizeof(uint64_t));
+    memcpy(&U, &that.U, sizeof(U));
 
     BitWidth = that.BitWidth;
     that.BitWidth = 0;
@@ -734,11 +737,11 @@ public:
   /// \returns *this after assignment of RHS value.
   APInt &operator=(uint64_t RHS) {
     if (isSingleWord()) {
-      VAL = RHS;
+      U.VAL = RHS;
       clearUnusedBits();
     } else {
-      pVal[0] = RHS;
-      memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
+      U.pVal[0] = RHS;
+      memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
     }
     return *this;
   }
@@ -752,7 +755,7 @@ public:
   APInt &operator&=(const APInt &RHS) {
     assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
     if (isSingleWord())
-      VAL &= RHS.VAL;
+      U.VAL &= RHS.U.VAL;
     else
       AndAssignSlowCase(RHS);
     return *this;
@@ -765,11 +768,11 @@ public:
   /// the LHS.
   APInt &operator&=(uint64_t RHS) {
     if (isSingleWord()) {
-      VAL &= RHS;
+      U.VAL &= RHS;
       return *this;
     }
-    pVal[0] &= RHS;
-    memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
+    U.pVal[0] &= RHS;
+    memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
     return *this;
   }
 
@@ -782,7 +785,7 @@ public:
   APInt &operator|=(const APInt &RHS) {
     assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
     if (isSingleWord())
-      VAL |= RHS.VAL;
+      U.VAL |= RHS.U.VAL;
     else
       OrAssignSlowCase(RHS);
     return *this;
@@ -795,10 +798,10 @@ public:
   /// the LHS.
   APInt &operator|=(uint64_t RHS) {
     if (isSingleWord()) {
-      VAL |= RHS;
+      U.VAL |= RHS;
       clearUnusedBits();
     } else {
-      pVal[0] |= RHS;
+      U.pVal[0] |= RHS;
     }
     return *this;
   }
@@ -812,7 +815,7 @@ public:
   APInt &operator^=(const APInt &RHS) {
     assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
     if (isSingleWord())
-      VAL ^= RHS.VAL;
+      U.VAL ^= RHS.U.VAL;
     else
       XorAssignSlowCase(RHS);
     return *this;
@@ -825,10 +828,10 @@ public:
   /// the LHS.
   APInt &operator^=(uint64_t RHS) {
     if (isSingleWord()) {
-      VAL ^= RHS;
+      U.VAL ^= RHS;
       clearUnusedBits();
     } else {
-      pVal[0] ^= RHS;
+      U.pVal[0] ^= RHS;
     }
     return *this;
   }
@@ -865,9 +868,9 @@ public:
     assert(ShiftAmt <= BitWidth && "Invalid shift amount");
     if (isSingleWord()) {
       if (ShiftAmt == BitWidth)
-        VAL = 0;
+        U.VAL = 0;
       else
-        VAL <<= ShiftAmt;
+        U.VAL <<= ShiftAmt;
       return clearUnusedBits();
     }
     shlSlowCase(ShiftAmt);
@@ -913,11 +916,11 @@ public:
   void ashrInPlace(unsigned ShiftAmt) {
     assert(ShiftAmt <= BitWidth && "Invalid shift amount");
     if (isSingleWord()) {
-      int64_t SExtVAL = SignExtend64(VAL, BitWidth);
+      int64_t SExtVAL = SignExtend64(U.VAL, BitWidth);
       if (ShiftAmt == BitWidth)
-        VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit.
+        U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit.
       else
-        VAL = SExtVAL >> ShiftAmt;
+        U.VAL = SExtVAL >> ShiftAmt;
       clearUnusedBits();
       return;
     }
@@ -938,9 +941,9 @@ public:
     assert(ShiftAmt <= BitWidth && "Invalid shift amount");
     if (isSingleWord()) {
       if (ShiftAmt == BitWidth)
-        VAL = 0;
+        U.VAL = 0;
       else
-        VAL >>= ShiftAmt;
+        U.VAL >>= ShiftAmt;
       return;
     }
     lshrSlowCase(ShiftAmt);
@@ -1059,7 +1062,7 @@ public:
   bool operator[](unsigned bitPosition) const {
     assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
     return (maskBit(bitPosition) &
-            (isSingleWord() ? VAL : pVal[whichWord(bitPosition)])) !=
+            (isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)])) !=
            0;
   }
 
@@ -1074,7 +1077,7 @@ public:
   bool operator==(const APInt &RHS) const {
     assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths");
     if (isSingleWord())
-      return VAL == RHS.VAL;
+      return U.VAL == RHS.U.VAL;
     return EqualSlowCase(RHS);
   }
 
@@ -1265,7 +1268,7 @@ public:
   bool intersects(const APInt &RHS) const {
     assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
     if (isSingleWord())
-      return (VAL & RHS.VAL) != 0;
+      return (U.VAL & RHS.U.VAL) != 0;
     return intersectsSlowCase(RHS);
   }
 
@@ -1273,7 +1276,7 @@ public:
   bool isSubsetOf(const APInt &RHS) const {
     assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
     if (isSingleWord())
-      return (VAL & ~RHS.VAL) == 0;
+      return (U.VAL & ~RHS.U.VAL) == 0;
     return isSubsetOfSlowCase(RHS);
   }
 
@@ -1333,10 +1336,10 @@ public:
   /// \brief Set every bit to 1.
   void setAllBits() {
     if (isSingleWord())
-      VAL = WORD_MAX;
+      U.VAL = WORD_MAX;
     else
       // Set all the bits in all the words.
-      memset(pVal, -1, getNumWords() * APINT_WORD_SIZE);
+      memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE);
     // Clear the unused ones
     clearUnusedBits();
   }
@@ -1348,9 +1351,9 @@ public:
     assert(BitPosition <= BitWidth && "BitPosition out of range");
     WordType Mask = maskBit(BitPosition);
     if (isSingleWord())
-      VAL |= Mask;
+      U.VAL |= Mask;
     else
-      pVal[whichWord(BitPosition)] |= Mask;
+      U.pVal[whichWord(BitPosition)] |= Mask;
   }
 
   /// Set the sign bit to 1.
@@ -1369,9 +1372,9 @@ public:
       uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
       mask <<= loBit;
       if (isSingleWord())
-        VAL |= mask;
+        U.VAL |= mask;
       else
-        pVal[0] |= mask;
+        U.pVal[0] |= mask;
     } else {
       setBitsSlowCase(loBit, hiBit);
     }
@@ -1395,9 +1398,9 @@ public:
   /// \brief Set every bit to 0.
   void clearAllBits() {
     if (isSingleWord())
-      VAL = 0;
+      U.VAL = 0;
     else
-      memset(pVal, 0, getNumWords() * APINT_WORD_SIZE);
+      memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE);
   }
 
   /// \brief Set a given bit to 0.
@@ -1407,9 +1410,9 @@ public:
     assert(BitPosition <= BitWidth && "BitPosition out of range");
     WordType Mask = ~maskBit(BitPosition);
     if (isSingleWord())
-      VAL &= Mask;
+      U.VAL &= Mask;
     else
-      pVal[whichWord(BitPosition)] &= Mask;
+      U.pVal[whichWord(BitPosition)] &= Mask;
   }
 
   /// Set the sign bit to 0.
@@ -1420,7 +1423,7 @@ public:
   /// \brief Toggle every bit to its opposite value.
   void flipAllBits() {
     if (isSingleWord()) {
-      VAL ^= WORD_MAX;
+      U.VAL ^= WORD_MAX;
       clearUnusedBits();
     } else {
       flipAllBitsSlowCase();
@@ -1500,9 +1503,9 @@ public:
   /// uint64_t. Otherwise an assertion will result.
   uint64_t getZExtValue() const {
     if (isSingleWord())
-      return VAL;
+      return U.VAL;
     assert(getActiveBits() <= 64 && "Too many bits for uint64_t");
-    return pVal[0];
+    return U.pVal[0];
   }
 
   /// \brief Get sign extended value
@@ -1512,9 +1515,9 @@ public:
   /// int64_t. Otherwise an assertion will result.
   int64_t getSExtValue() const {
     if (isSingleWord())
-      return SignExtend64(VAL, BitWidth);
+      return SignExtend64(U.VAL, BitWidth);
     assert(getMinSignedBits() <= 64 && "Too many bits for int64_t");
-    return int64_t(pVal[0]);
+    return int64_t(U.pVal[0]);
   }
 
   /// \brief Get bits required for string value.
@@ -1534,7 +1537,7 @@ public:
   unsigned countLeadingZeros() const {
     if (isSingleWord()) {
       unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth;
-      return llvm::countLeadingZeros(VAL) - unusedBits;
+      return llvm::countLeadingZeros(U.VAL) - unusedBits;
     }
     return countLeadingZerosSlowCase();
   }
@@ -1575,7 +1578,7 @@ public:
   /// of ones from the least significant bit to the first zero bit.
   unsigned countTrailingOnes() const {
     if (isSingleWord())
-      return llvm::countTrailingOnes(VAL);
+      return llvm::countTrailingOnes(U.VAL);
     return countTrailingOnesSlowCase();
   }
 
@@ -1587,7 +1590,7 @@ public:
   /// \returns 0 if the value is zero, otherwise returns the number of set bits.
   unsigned countPopulation() const {
     if (isSingleWord())
-      return llvm::countPopulation(VAL);
+      return llvm::countPopulation(U.VAL);
     return countPopulationSlowCase();
   }
 
@@ -1646,7 +1649,7 @@ public:
       uint64_t I;
       double D;
     } T;
-    T.I = (isSingleWord() ? VAL : pVal[0]);
+    T.I = (isSingleWord() ? U.VAL : U.pVal[0]);
     return T.D;
   }
 
@@ -1660,7 +1663,7 @@ public:
       unsigned I;
       float F;
     } T;
-    T.I = unsigned((isSingleWord() ? VAL : pVal[0]));
+    T.I = unsigned((isSingleWord() ? U.VAL : U.pVal[0]));
     return T.F;
   }
 
@@ -1718,7 +1721,7 @@ public:
     // get 0. If VAL is 0, we get WORD_MAX which gets truncated to
     // UINT32_MAX.
     if (BitWidth == 1)
-      return VAL - 1;
+      return U.VAL - 1;
 
     // Handle the zero case.
     if (isNullValue())
diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h
index 317a5d3..0d89882 100644
--- a/include/llvm/CodeGen/CommandFlags.h
+++ b/include/llvm/CodeGen/CommandFlags.h
@@ -346,29 +346,21 @@ static inline void setFunctionAttributes(StringRef CPU, StringRef Features,
                                          Module &M) {
   for (auto &F : M) {
     auto &Ctx = F.getContext();
-    AttributeList Attrs = F.getAttributes(), NewAttrs;
+    AttributeList Attrs = F.getAttributes();
+    AttrBuilder NewAttrs;
 
     if (!CPU.empty())
-      NewAttrs = NewAttrs.addAttribute(Ctx, AttributeList::FunctionIndex,
-                                       "target-cpu", CPU);
-
+      NewAttrs.addAttribute("target-cpu", CPU);
     if (!Features.empty())
-      NewAttrs = NewAttrs.addAttribute(Ctx, AttributeList::FunctionIndex,
-                                       "target-features", Features);
-
+      NewAttrs.addAttribute("target-features", Features);
     if (DisableFPElim.getNumOccurrences() > 0)
-      NewAttrs = NewAttrs.addAttribute(Ctx, AttributeList::FunctionIndex,
-                                       "no-frame-pointer-elim",
-                                       DisableFPElim ? "true" : "false");
-
+      NewAttrs.addAttribute("no-frame-pointer-elim",
+                            DisableFPElim ? "true" : "false");
     if (DisableTailCalls.getNumOccurrences() > 0)
-      NewAttrs = NewAttrs.addAttribute(Ctx, AttributeList::FunctionIndex,
-                                       "disable-tail-calls",
-                                       toStringRef(DisableTailCalls));
-
+      NewAttrs.addAttribute("disable-tail-calls",
+                            toStringRef(DisableTailCalls));
     if (StackRealign)
-      NewAttrs = NewAttrs.addAttribute(Ctx, AttributeList::FunctionIndex,
-                                       "stackrealign");
+      NewAttrs.addAttribute("stackrealign");
 
     if (TrapFuncName.getNumOccurrences() > 0)
       for (auto &B : F)
@@ -382,8 +374,8 @@ static inline void setFunctionAttributes(StringRef CPU, StringRef Features,
                     Attribute::get(Ctx, "trap-func-name", TrapFuncName));
 
     // Let NewAttrs override Attrs.
-    NewAttrs = Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs);
-    F.setAttributes(NewAttrs);
+    F.setAttributes(
+        Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs));
   }
 }
 
diff --git a/include/llvm/DebugInfo/CodeView/CVRecord.h b/include/llvm/DebugInfo/CodeView/CVRecord.h
index 086d6df..ac8aaaf 100644
--- a/include/llvm/DebugInfo/CodeView/CVRecord.h
+++ b/include/llvm/DebugInfo/CodeView/CVRecord.h
@@ -53,7 +53,7 @@ struct VarStreamArrayExtractor<codeview::CVRecord<Kind>> {
   typedef void ContextType;
 
   static Error extract(BinaryStreamRef Stream, uint32_t &Len,
-                       codeview::CVRecord<Kind> &Item, void *Ctx) {
+                       codeview::CVRecord<Kind> &Item) {
     using namespace codeview;
     const RecordPrefix *Prefix = nullptr;
     BinaryStreamReader Reader(Stream);
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h b/include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h
index a5a3b85..6c08c9a 100644
--- a/include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h
+++ b/include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h
@@ -21,6 +21,8 @@
 namespace llvm {
 namespace codeview {
 
+class StringTable;
+
 struct FileChecksumEntry {
   uint32_t FileNameOffset;    // Byte offset of filename in global stringtable.
   FileChecksumKind Kind;      // The type of checksum.
@@ -35,7 +37,7 @@ public:
   typedef void ContextType;
 
   static Error extract(BinaryStreamRef Stream, uint32_t &Len,
-                       codeview::FileChecksumEntry &Item, void *Ctx);
+                       codeview::FileChecksumEntry &Item);
 };
 }
 
@@ -55,8 +57,8 @@ public:
 
   Error initialize(BinaryStreamReader Reader);
 
-  Iterator begin() const { return Checksums.begin(); }
-  Iterator end() const { return Checksums.end(); }
+  Iterator begin() { return Checksums.begin(); }
+  Iterator end() { return Checksums.end(); }
 
   const FileChecksumArray &getArray() const { return Checksums; }
 
@@ -66,20 +68,22 @@ private:
 
 class ModuleDebugFileChecksumFragment final : public ModuleDebugFragment {
 public:
-  ModuleDebugFileChecksumFragment();
+  explicit ModuleDebugFileChecksumFragment(StringTable &Strings);
 
   static bool classof(const ModuleDebugFragment *S) {
     return S->kind() == ModuleDebugFragmentKind::FileChecksums;
   }
 
-  void addChecksum(uint32_t StringTableOffset, FileChecksumKind Kind,
+  void addChecksum(StringRef FileName, FileChecksumKind Kind,
                    ArrayRef<uint8_t> Bytes);
 
   uint32_t calculateSerializedLength() override;
   Error commit(BinaryStreamWriter &Writer) override;
-  uint32_t mapChecksumOffset(uint32_t StringTableOffset) const;
+  uint32_t mapChecksumOffset(StringRef FileName) const;
 
 private:
+  StringTable &Strings;
+
   DenseMap<uint32_t, uint32_t> OffsetMap;
   uint32_t SerializedSize = 0;
   llvm::BumpPtrAllocator Storage;
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h b/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h
index b98c860..f68f21b 100644
--- a/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h
+++ b/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h
@@ -57,8 +57,6 @@ private:
   ModuleDebugFragment &Frag;
 };
 
-typedef VarStreamArray<ModuleDebugFragmentRecord> ModuleDebugFragmentArray;
-
 } // namespace codeview
 
 template <>
@@ -66,13 +64,17 @@ struct VarStreamArrayExtractor<codeview::ModuleDebugFragmentRecord> {
   typedef void ContextType;
 
   static Error extract(BinaryStreamRef Stream, uint32_t &Length,
-                       codeview::ModuleDebugFragmentRecord &Info, void *Ctx) {
+                       codeview::ModuleDebugFragmentRecord &Info) {
     if (auto EC = codeview::ModuleDebugFragmentRecord::initialize(Stream, Info))
       return EC;
     Length = Info.getRecordLength();
     return Error::success();
   }
 };
+
+namespace codeview {
+typedef VarStreamArray<ModuleDebugFragmentRecord> ModuleDebugFragmentArray;
+}
 } // namespace llvm
 
 #endif // LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTRECORD_H
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h b/include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h
index 177367c..348497c 100644
--- a/include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h
+++ b/include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h
@@ -20,6 +20,8 @@ namespace llvm {
 namespace codeview {
 
 class ModuleDebugInlineeLineFragmentRef;
+class ModuleDebugFileChecksumFragment;
+class StringTable;
 
 enum class InlineeLinesSignature : uint32_t {
   Normal,    // CV_INLINEE_SOURCE_LINE_SIGNATURE
@@ -42,11 +44,10 @@ struct InlineeSourceLine {
 }
 
 template <> struct VarStreamArrayExtractor<codeview::InlineeSourceLine> {
-  typedef codeview::ModuleDebugInlineeLineFragmentRef ContextType;
+  typedef bool ContextType;
 
   static Error extract(BinaryStreamRef Stream, uint32_t &Len,
-                       codeview::InlineeSourceLine &Item,
-                       ContextType *Fragment);
+                       codeview::InlineeSourceLine &Item, bool HasExtraFiles);
 };
 
 namespace codeview {
@@ -74,7 +75,8 @@ private:
 
 class ModuleDebugInlineeLineFragment final : public ModuleDebugFragment {
 public:
-  explicit ModuleDebugInlineeLineFragment(bool HasExtraFiles);
+  ModuleDebugInlineeLineFragment(ModuleDebugFileChecksumFragment &Checksums,
+                                 bool HasExtraFiles);
 
   static bool classof(const ModuleDebugFragment *S) {
     return S->kind() == ModuleDebugFragmentKind::InlineeLines;
@@ -83,11 +85,12 @@ public:
   Error commit(BinaryStreamWriter &Writer) override;
   uint32_t calculateSerializedLength() override;
 
-  void addInlineSite(TypeIndex FuncId, uint32_t FileOffset,
-                     uint32_t SourceLine);
-  void addExtraFile(uint32_t FileOffset);
+  void addInlineSite(TypeIndex FuncId, StringRef FileName, uint32_t SourceLine);
+  void addExtraFile(StringRef FileName);
 
 private:
+  ModuleDebugFileChecksumFragment &Checksums;
+
   bool HasExtraFiles = false;
   uint32_t ExtraFileCount = 0;
 
diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h b/include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h
index dcfe86d..3124236 100644
--- a/include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h
+++ b/include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h
@@ -19,6 +19,9 @@
 namespace llvm {
 namespace codeview {
 
+class ModuleDebugFileChecksumFragment;
+class StringTable;
+
 // Corresponds to the `CV_DebugSLinesHeader_t` structure.
 struct LineFragmentHeader {
   support::ulittle32_t RelocOffset;  // Code offset of line contribution.
@@ -61,10 +64,10 @@ struct LineColumnEntry {
 
 class LineColumnExtractor {
 public:
-  typedef const LineFragmentHeader ContextType;
+  typedef const LineFragmentHeader *ContextType;
 
   static Error extract(BinaryStreamRef Stream, uint32_t &Len,
-                       LineColumnEntry &Item, const LineFragmentHeader *Header);
+                       LineColumnEntry &Item, const LineFragmentHeader *Ctx);
 };
 
 class ModuleDebugLineFragmentRef final : public ModuleDebugFragmentRef {
@@ -104,13 +107,14 @@ class ModuleDebugLineFragment final : public ModuleDebugFragment {
   };
 
 public:
-  ModuleDebugLineFragment();
+  ModuleDebugLineFragment(ModuleDebugFileChecksumFragment &Checksums,
+                          StringTable &Strings);
 
   static bool classof(const ModuleDebugFragment *S) {
     return S->kind() == ModuleDebugFragmentKind::Lines;
   }
 
-  void createBlock(uint32_t ChecksumBufferOffset);
+  void createBlock(StringRef FileName);
   void addLineInfo(uint32_t Offset, const LineInfo &Line);
   void addLineAndColumnInfo(uint32_t Offset, const LineInfo &Line,
                             uint32_t ColStart, uint32_t ColEnd);
@@ -125,6 +129,8 @@ public:
   bool hasColumnInfo() const;
 
 private:
+  ModuleDebugFileChecksumFragment &Checksums;
+
   uint16_t RelocOffset = 0;
   uint16_t RelocSegment = 0;
   uint32_t CodeSize = 0;
diff --git a/include/llvm/DebugInfo/CodeView/StringTable.h b/include/llvm/DebugInfo/CodeView/StringTable.h
new file mode 100644
index 0000000..05dc02e
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/StringTable.h
@@ -0,0 +1,75 @@
+//===- StringTable.h - CodeView String Table Reader/Writer ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_STRINGTABLE_H
+#define LLVM_DEBUGINFO_CODEVIEW_STRINGTABLE_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+
+#include "llvm/Support/BinaryStreamRef.h"
+#include "llvm/Support/Error.h"
+
+#include <stdint.h>
+
+namespace llvm {
+
+class BinaryStreamReader;
+class BinaryStreamRef;
+class BinaryStreamWriter;
+
+namespace codeview {
+
+/// Represents a read-only view of a CodeView string table.  This is a very
+/// simple flat buffer consisting of null-terminated strings, where strings
+/// are retrieved by their offset in the buffer.  StringTableRef does not own
+/// the underlying storage for the buffer.
+class StringTableRef {
+public:
+  StringTableRef();
+
+  Error initialize(BinaryStreamRef Contents);
+
+  Expected<StringRef> getString(uint32_t Offset) const;
+
+  bool valid() const { return Stream.valid(); }
+
+private:
+  BinaryStreamRef Stream;
+};
+
+/// Represents a read-write view of a CodeView string table.  StringTable owns
+/// the underlying storage for the table, and is capable of serializing the
+/// string table into a format understood by StringTableRef.
+class StringTable {
+public:
+  // If string S does not exist in the string table, insert it.
+  // Returns the ID for S.
+  uint32_t insert(StringRef S);
+
+  // Return the ID for string S.  Assumes S exists in the table.
+  uint32_t getStringId(StringRef S) const;
+
+  uint32_t calculateSerializedSize() const;
+  Error commit(BinaryStreamWriter &Writer) const;
+
+  uint32_t size() const;
+
+  StringMap<uint32_t>::const_iterator begin() const { return Strings.begin(); }
+
+  StringMap<uint32_t>::const_iterator end() const { return Strings.end(); }
+
+private:
+  StringMap<uint32_t> Strings;
+  uint32_t StringSize = 1;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h b/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h
index 2bef3f6..96c8a47 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h
@@ -19,13 +19,15 @@ class BinaryStreamReader;
 
 namespace codeview {
 
+class StringTableRef;
+
 class SymbolVisitorDelegate {
 public:
   virtual ~SymbolVisitorDelegate() = default;
 
   virtual uint32_t getRecordOffset(BinaryStreamReader Reader) = 0;
   virtual StringRef getFileNameForFileOffset(uint32_t FileOffset) = 0;
-  virtual StringRef getStringTable() = 0;
+  virtual StringTableRef getStringTable() = 0;
 };
 
 } // end namespace codeview
diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 3c04c67..b9f3425 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -172,6 +172,9 @@ public:
     return DWOCUs[index].get();
   }
 
+  /// Get a DIE given an exact offset.
+  DWARFDie getDIEForOffset(uint32_t Offset);
+
   const DWARFUnitIndex &getCUIndex();
   DWARFGdbIndex &getGdbIndex();
   const DWARFUnitIndex &getTUIndex();
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index dd0e264..e21245b 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -30,7 +30,7 @@ public:
   struct FileNameEntry {
     FileNameEntry() = default;
 
-    const char *Name = nullptr;
+    StringRef Name = StringRef();
     uint64_t DirIdx = 0;
     uint64_t ModTime = 0;
     uint64_t Length = 0;
@@ -44,6 +44,10 @@ public:
     uint64_t TotalLength;
     /// Version identifier for the statement information format.
     uint16_t Version;
+    /// In v5, size in bytes of an address (or segment offset).
+    uint8_t AddressSize;
+    /// In v5, size in bytes of a segment selector.
+    uint8_t SegSelectorSize;
     /// The number of bytes following the prologue_length field to the beginning
     /// of the first byte of the statement program itself.
     uint64_t PrologueLength;
@@ -63,7 +67,7 @@ public:
     /// The number assigned to the first special opcode.
     uint8_t OpcodeBase;
     std::vector<uint8_t> StandardOpcodeLengths;
-    std::vector<const char *> IncludeDirectories;
+    std::vector<StringRef> IncludeDirectories;
     std::vector<FileNameEntry> FileNames;
 
     bool IsDWARF64;
@@ -100,7 +104,7 @@ public:
     void postAppend();
     void reset(bool DefaultIsStmt);
     void dump(raw_ostream &OS) const;
-
+    static void dumpTableHeader(raw_ostream &OS);
     static bool orderByAddress(const Row &LHS, const Row &RHS) {
       return LHS.Address < RHS.Address;
     }
diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index e29ba52..68e541b 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -312,9 +312,9 @@ public:
         [](const DWARFDebugInfoEntry &LHS, uint32_t Offset) {
           return LHS.getOffset() < Offset;
         });
-    if (it == DieArray.end())
-      return DWARFDie();
-    return DWARFDie(this, &*it);
+    if (it != DieArray.end() && it->getOffset() == Offset)
+      return DWARFDie(this, &*it);
+    return DWARFDie();
   }
 
   uint32_t getLineTableOffset() const {
diff --git a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
new file mode 100644
index 0000000..8e12bcd
--- /dev/null
+++ b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -0,0 +1,98 @@
+//===- DWARFVerifier.h ----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARF_DWARFVERIFIER_H
+#define LLVM_DEBUGINFO_DWARF_DWARFVERIFIER_H
+
+#include <cstdint>
+#include <map>
+#include <set>
+
+namespace llvm {
+class raw_ostream;
+struct DWARFAttribute;
+class DWARFContext;
+class DWARFDie;
+class DWARFUnit;
+
+/// A class that verifies DWARF debug information given a DWARF Context.
+class DWARFVerifier {
+  raw_ostream &OS;
+  DWARFContext &DCtx;
+  /// A map that tracks all references (converted absolute references) so we
+  /// can verify each reference points to a valid DIE and not an offset that
+  /// lies between to valid DIEs.
+  std::map<uint64_t, std::set<uint32_t>> ReferenceToDIEOffsets;
+  uint32_t NumDebugInfoErrors;
+  uint32_t NumDebugLineErrors;
+
+  /// Verifies the attribute's DWARF attribute and its value.
+  ///
+  /// This function currently checks for:
+  /// - DW_AT_ranges values is a valid .debug_ranges offset
+  /// - DW_AT_stmt_list is a valid .debug_line offset
+  ///
+  /// @param Die          The DWARF DIE that owns the attribute value
+  /// @param AttrValue    The DWARF attribute value to check
+  void verifyDebugInfoAttribute(DWARFDie &Die, DWARFAttribute &AttrValue);
+
+  /// Verifies the attribute's DWARF form.
+  ///
+  /// This function currently checks for:
+  /// - All DW_FORM_ref values that are CU relative have valid CU offsets
+  /// - All DW_FORM_ref_addr values have valid .debug_info offsets
+  /// - All DW_FORM_strp values have valid .debug_str offsets
+  ///
+  /// @param Die          The DWARF DIE that owns the attribute value
+  /// @param AttrValue    The DWARF attribute value to check
+  void verifyDebugInfoForm(DWARFDie &Die, DWARFAttribute &AttrValue);
+
+  /// Verifies the all valid references that were found when iterating through
+  /// all of the DIE attributes.
+  ///
+  /// This function will verify that all references point to DIEs whose DIE
+  /// offset matches. This helps to ensure if a DWARF link phase moved things
+  /// around, that it doesn't create invalid references by failing to relocate
+  /// CU relative and absolute references.
+  void veifyDebugInfoReferences();
+
+  /// Verify the the DW_AT_stmt_list encoding and value and ensure that no
+  /// compile units that have the same DW_AT_stmt_list value.
+  void verifyDebugLineStmtOffsets();
+
+  /// Verify that all of the rows in the line table are valid.
+  ///
+  /// This function currently checks for:
+  /// - addresses within a sequence that decrease in value
+  /// - invalid file indexes
+  void verifyDebugLineRows();
+
+public:
+  DWARFVerifier(raw_ostream &S, DWARFContext &D)
+      : OS(S), DCtx(D), NumDebugInfoErrors(0), NumDebugLineErrors(0) {}
+  /// Verify the information in the .debug_info section.
+  ///
+  /// Any errors are reported to the stream that was this object was
+  /// constructed with.
+  ///
+  /// @return True if the .debug_info verifies successfully, false otherwise.
+  bool handleDebugInfo();
+
+  /// Verify the information in the .debug_line section.
+  ///
+  /// Any errors are reported to the stream that was this object was
+  /// constructed with.
+  ///
+  /// @return True if the .debug_line verifies successfully, false otherwise.
+  bool handleDebugLine();
+};
+
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_DWARF_DWARFCONTEXT_H
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
index 879cb42..d1f791b 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
@@ -66,7 +66,7 @@ struct ModuleInfoEx {
 template <> struct VarStreamArrayExtractor<pdb::DbiModuleDescriptor> {
   typedef void ContextType;
   static Error extract(BinaryStreamRef Stream, uint32_t &Length,
-                       pdb::DbiModuleDescriptor &Info, void *Ctx) {
+                       pdb::DbiModuleDescriptor &Info) {
     if (auto EC = pdb::DbiModuleDescriptor::initialize(Stream, Info))
       return EC;
     Length = Info.getRecordLength();
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiStream.h b/include/llvm/DebugInfo/PDB/Native/DbiStream.h
index 84ae57f..08262e4 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiStream.h
@@ -13,9 +13,9 @@
 #include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
-#include "llvm/DebugInfo/PDB/Native/StringTable.h"
 #include "llvm/DebugInfo/PDB/PDBTypes.h"
 #include "llvm/Support/BinaryStreamArray.h"
 #include "llvm/Support/BinaryStreamArray.h"
@@ -91,7 +91,7 @@ private:
   std::unique_ptr<msf::MappedBlockStream> Stream;
 
   std::vector<ModuleInfoEx> ModuleInfos;
-  StringTable ECNames;
+  PDBStringTable ECNames;
 
   BinaryStreamRef ModInfoSubstream;
   BinaryStreamRef SecContrSubstream;
diff --git a/include/llvm/DebugInfo/PDB/Native/PDBFile.h b/include/llvm/DebugInfo/PDB/Native/PDBFile.h
index fbca62d..3bed671 100644
--- a/include/llvm/DebugInfo/PDB/Native/PDBFile.h
+++ b/include/llvm/DebugInfo/PDB/Native/PDBFile.h
@@ -33,7 +33,7 @@ namespace pdb {
 class DbiStream;
 class GlobalsStream;
 class InfoStream;
-class StringTable;
+class PDBStringTable;
 class PDBFileBuilder;
 class PublicsStream;
 class SymbolStream;
@@ -95,7 +95,7 @@ public:
   Expected<TpiStream &> getPDBIpiStream();
   Expected<PublicsStream &> getPDBPublicsStream();
   Expected<SymbolStream &> getPDBSymbolStream();
-  Expected<StringTable &> getStringTable();
+  Expected<PDBStringTable &> getStringTable();
 
   BumpPtrAllocator &getAllocator() { return Allocator; }
 
@@ -106,7 +106,7 @@ public:
   bool hasPDBPublicsStream();
   bool hasPDBSymbolStream();
   bool hasPDBTpiStream() const;
-  bool hasStringTable();
+  bool hasPDBStringTable();
 
 private:
   Expected<std::unique_ptr<msf::MappedBlockStream>>
@@ -131,7 +131,7 @@ private:
   std::unique_ptr<SymbolStream> Symbols;
   std::unique_ptr<msf::MappedBlockStream> DirectoryStream;
   std::unique_ptr<msf::MappedBlockStream> StringTableStream;
-  std::unique_ptr<StringTable> Strings;
+  std::unique_ptr<PDBStringTable> Strings;
 };
 }
 }
diff --git a/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
index 3898af5..cd7d3b0 100644
--- a/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
@@ -15,8 +15,8 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
-#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
@@ -46,12 +46,14 @@ public:
   DbiStreamBuilder &getDbiBuilder();
   TpiStreamBuilder &getTpiBuilder();
   TpiStreamBuilder &getIpiBuilder();
-  StringTableBuilder &getStringTableBuilder();
+  PDBStringTableBuilder &getStringTableBuilder();
 
   Error commit(StringRef Filename);
 
-private:
+  Expected<uint32_t> getNamedStreamIndex(StringRef Name) const;
   Error addNamedStream(StringRef Name, uint32_t Size);
+
+private:
   Expected<msf::MSFLayout> finalizeMsfLayout();
 
   BumpPtrAllocator &Allocator;
@@ -62,7 +64,7 @@ private:
   std::unique_ptr<TpiStreamBuilder> Tpi;
   std::unique_ptr<TpiStreamBuilder> Ipi;
 
-  StringTableBuilder Strings;
+  PDBStringTableBuilder Strings;
   NamedStreamMap NamedStreams;
 };
 }
diff --git a/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h
new file mode 100644
index 0000000..7c7f16b
--- /dev/null
+++ b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h
@@ -0,0 +1,64 @@
+//===- PDBStringTable.h - PDB String Table -----------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBSTRINGTABLE_H
+#define LLVM_DEBUGINFO_PDB_RAW_PDBSTRINGTABLE_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/StringTable.h"
+#include "llvm/Support/BinaryStreamArray.h"
+#include "llvm/Support/BinaryStreamRef.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
+#include <vector>
+
+namespace llvm {
+class BinaryStreamReader;
+
+namespace msf {
+class MappedBlockStream;
+}
+
+namespace pdb {
+
+struct PDBStringTableHeader;
+
+class PDBStringTable {
+public:
+  Error reload(BinaryStreamReader &Reader);
+
+  uint32_t getByteSize() const;
+  uint32_t getNameCount() const;
+  uint32_t getHashVersion() const;
+  uint32_t getSignature() const;
+
+  Expected<StringRef> getStringForID(uint32_t ID) const;
+  Expected<uint32_t> getIDForString(StringRef Str) const;
+
+  FixedStreamArray<support::ulittle32_t> name_ids() const;
+
+private:
+  Error readHeader(BinaryStreamReader &Reader);
+  Error readStrings(BinaryStreamReader &Reader);
+  Error readHashTable(BinaryStreamReader &Reader);
+  Error readEpilogue(BinaryStreamReader &Reader);
+
+  const PDBStringTableHeader *Header = nullptr;
+  codeview::StringTableRef Strings;
+  FixedStreamArray<support::ulittle32_t> IDs;
+  uint32_t ByteSize = 0;
+  uint32_t NameCount = 0;
+};
+
+} // end namespace pdb
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_RAW_STRINGTABLE_H
diff --git a/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h b/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h
new file mode 100644
index 0000000..6f85e7a
--- /dev/null
+++ b/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h
@@ -0,0 +1,60 @@
+//===- PDBStringTableBuilder.h - PDB String Table Builder -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file creates the "/names" stream.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBSTRINGTABLEBUILDER_H
+#define LLVM_DEBUGINFO_PDB_RAW_PDBSTRINGTABLEBUILDER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/StringTable.h"
+#include "llvm/Support/Error.h"
+#include <vector>
+
+namespace llvm {
+class BinaryStreamWriter;
+class WritableBinaryStreamRef;
+
+namespace msf {
+struct MSFLayout;
+}
+
+namespace pdb {
+
+class PDBFileBuilder;
+
+class PDBStringTableBuilder {
+public:
+  // If string S does not exist in the string table, insert it.
+  // Returns the ID for S.
+  uint32_t insert(StringRef S);
+
+  uint32_t calculateSerializedSize() const;
+  Error commit(BinaryStreamWriter &Writer) const;
+
+  codeview::StringTable &getStrings() { return Strings; }
+  const codeview::StringTable &getStrings() const { return Strings; }
+
+private:
+  uint32_t calculateHashTableSize() const;
+  Error writeHeader(BinaryStreamWriter &Writer) const;
+  Error writeStrings(BinaryStreamWriter &Writer) const;
+  Error writeHashTable(BinaryStreamWriter &Writer) const;
+  Error writeEpilogue(BinaryStreamWriter &Writer) const;
+
+  codeview::StringTable Strings;
+};
+
+} // end namespace pdb
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_RAW_PDBSTRINGTABLEBUILDER_H
diff --git a/include/llvm/DebugInfo/PDB/Native/RawTypes.h b/include/llvm/DebugInfo/PDB/Native/RawTypes.h
index e1c6cf0..93622d0 100644
--- a/include/llvm/DebugInfo/PDB/Native/RawTypes.h
+++ b/include/llvm/DebugInfo/PDB/Native/RawTypes.h
@@ -307,13 +307,13 @@ struct InfoStreamHeader {
 };
 
 /// The header preceeding the /names stream.
-struct StringTableHeader {
-  support::ulittle32_t Signature;
-  support::ulittle32_t HashVersion;
-  support::ulittle32_t ByteSize;
+struct PDBStringTableHeader {
+  support::ulittle32_t Signature;   // PDBStringTableSignature
+  support::ulittle32_t HashVersion; // 1 or 2
+  support::ulittle32_t ByteSize;    // Number of bytes of names buffer.
 };
 
-const uint32_t StringTableSignature = 0xEFFEEFFE;
+const uint32_t PDBStringTableSignature = 0xEFFEEFFE;
 
 } // namespace pdb
 } // namespace llvm
diff --git a/include/llvm/DebugInfo/PDB/Native/StringTable.h b/include/llvm/DebugInfo/PDB/Native/StringTable.h
deleted file mode 100644
index dd5e30e..0000000
--- a/include/llvm/DebugInfo/PDB/Native/StringTable.h
+++ /dev/null
@@ -1,56 +0,0 @@
-//===- StringTable.h - PDB String Table -------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_PDB_RAW_STRINGTABLE_H
-#define LLVM_DEBUGINFO_PDB_RAW_STRINGTABLE_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/BinaryStreamArray.h"
-#include "llvm/Support/BinaryStreamRef.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/Error.h"
-#include <cstdint>
-#include <vector>
-
-namespace llvm {
-class BinaryStreamReader;
-
-namespace pdb {
-
-class StringTable {
-public:
-  StringTable();
-
-  Error load(BinaryStreamReader &Stream);
-
-  uint32_t getByteSize() const;
-
-  uint32_t getNameCount() const { return NameCount; }
-  uint32_t getHashVersion() const { return HashVersion; }
-  uint32_t getSignature() const { return Signature; }
-
-  StringRef getStringForID(uint32_t ID) const;
-  uint32_t getIDForString(StringRef Str) const;
-
-  FixedStreamArray<support::ulittle32_t> name_ids() const;
-
-private:
-  BinaryStreamRef NamesBuffer;
-  FixedStreamArray<support::ulittle32_t> IDs;
-  uint32_t ByteSize = 0;
-  uint32_t Signature = 0;
-  uint32_t HashVersion = 0;
-  uint32_t NameCount = 0;
-};
-
-} // end namespace pdb
-} // end namespace llvm
-
-#endif // LLVM_DEBUGINFO_PDB_RAW_STRINGTABLE_H
diff --git a/include/llvm/DebugInfo/PDB/Native/StringTableBuilder.h b/include/llvm/DebugInfo/PDB/Native/StringTableBuilder.h
deleted file mode 100644
index 9c4b12e..0000000
--- a/include/llvm/DebugInfo/PDB/Native/StringTableBuilder.h
+++ /dev/null
@@ -1,45 +0,0 @@
-//===- StringTableBuilder.h - PDB String Table Builder ----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file creates the "/names" stream.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_PDB_RAW_STRINGTABLEBUILDER_H
-#define LLVM_DEBUGINFO_PDB_RAW_STRINGTABLEBUILDER_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Error.h"
-#include <vector>
-
-namespace llvm {
-class BinaryStreamWriter;
-
-namespace pdb {
-
-class StringTableBuilder {
-public:
-  // If string S does not exist in the string table, insert it.
-  // Returns the ID for S.
-  uint32_t insert(StringRef S);
-  uint32_t getStringIndex(StringRef S);
-
-  uint32_t finalize();
-  Error commit(BinaryStreamWriter &Writer) const;
-
-private:
-  DenseMap<StringRef, uint32_t> Strings;
-  uint32_t StringSize = 1;
-};
-
-} // end namespace pdb
-} // end namespace llvm
-
-#endif // LLVM_DEBUGINFO_PDB_RAW_STRINGTABLEBUILDER_H
diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h
index af46034..adcb726 100644
--- a/include/llvm/IR/Attributes.h
+++ b/include/llvm/IR/Attributes.h
@@ -285,7 +285,8 @@ class AttributeList {
 public:
   enum AttrIndex : unsigned {
     ReturnIndex = 0U,
-    FunctionIndex = ~0U
+    FunctionIndex = ~0U,
+    FirstArgIndex = 1,
   };
 
 private:
@@ -336,6 +337,13 @@ public:
   static AttributeList get(LLVMContext &C, unsigned Index,
                            const AttrBuilder &B);
 
+  /// Add an argument attribute to the list. Returns a new list because
+  /// attribute lists are immutable.
+  AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo,
+                                  Attribute::AttrKind Kind) const {
+    return addAttribute(C, ArgNo + FirstArgIndex, Kind);
+  }
+
   /// \brief Add an attribute to the attribute set at the given index. Because
   /// attribute sets are immutable, this returns a new set.
   AttributeList addAttribute(LLVMContext &C, unsigned Index,
@@ -354,9 +362,6 @@ public:
   /// \brief Add attributes to the attribute set at the given index. Because
   /// attribute sets are immutable, this returns a new set.
   AttributeList addAttributes(LLVMContext &C, unsigned Index,
-                              AttributeList Attrs) const;
-
-  AttributeList addAttributes(LLVMContext &C, unsigned Index,
                               const AttrBuilder &B) const;
 
   /// \brief Remove the specified attribute at the specified index from this
@@ -375,13 +380,7 @@ public:
   /// attribute list. Because attribute lists are immutable, this returns the
   /// new list.
   AttributeList removeAttributes(LLVMContext &C, unsigned Index,
-                                 AttributeList Attrs) const;
-
-  /// \brief Remove the specified attributes at the specified index from this
-  /// attribute list. Because attribute lists are immutable, this returns the
-  /// new list.
-  AttributeList removeAttributes(LLVMContext &C, unsigned Index,
-                                 const AttrBuilder &Attrs) const;
+                                 const AttrBuilder &AttrsToRemove) const;
 
   /// \brief Remove all attributes at the specified index from this
   /// attribute list. Because attribute lists are immutable, this returns the
@@ -442,7 +441,7 @@ public:
   /// may be faster.
   bool hasFnAttribute(StringRef Kind) const;
 
-  /// \brief Equivalent to hasAttribute(ArgNo + 1, Kind).
+  /// \brief Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
   bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const;
 
   /// \brief Return true if the specified attribute is set for at least one
diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h
index bad1d4e..d61431a 100644
--- a/include/llvm/IR/CallSite.h
+++ b/include/llvm/IR/CallSite.h
@@ -339,6 +339,10 @@ public:
     CALLSITE_DELEGATE_SETTER(addAttribute(i, Attr));
   }
 
+  void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
+    CALLSITE_DELEGATE_SETTER(addParamAttr(ArgNo, Kind));
+  }
+
   void removeAttribute(unsigned i, Attribute::AttrKind Kind) {
     CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind));
   }
@@ -347,6 +351,10 @@ public:
     CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind));
   }
 
+  void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
+    CALLSITE_DELEGATE_SETTER(removeParamAttr(ArgNo, Kind));
+  }
+
   /// Return true if this function has the given attribute.
   bool hasFnAttr(Attribute::AttrKind Kind) const {
     CALLSITE_DELEGATE_GETTER(hasFnAttr(Kind));
@@ -408,11 +416,9 @@ public:
     CALLSITE_DELEGATE_GETTER(getDereferenceableOrNullBytes(i));
   }
 
-  /// Determine if the parameter or return value is marked with NoAlias
-  /// attribute.
-  /// @param n The parameter to check. 1 is the first parameter, 0 is the return
-  bool doesNotAlias(unsigned n) const {
-    CALLSITE_DELEGATE_GETTER(doesNotAlias(n));
+  /// Determine if the return value is marked with NoAlias attribute.
+  bool returnDoesNotAlias() const {
+    CALLSITE_DELEGATE_GETTER(returnDoesNotAlias());
   }
 
   /// Return true if the call should not be treated as a call to a builtin.
diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h
index 9e723f9..f9582f5 100644
--- a/include/llvm/IR/Function.h
+++ b/include/llvm/IR/Function.h
@@ -204,6 +204,10 @@ public:
     addAttribute(AttributeList::FunctionIndex, Attr);
   }
 
+  void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
+    addAttribute(ArgNo + AttributeList::FirstArgIndex, Kind);
+  }
+
   /// @brief Remove function attributes from this function.
   void removeFnAttr(Attribute::AttrKind Kind) {
     removeAttribute(AttributeList::FunctionIndex, Kind);
@@ -211,10 +215,14 @@ public:
 
   /// @brief Remove function attribute from this function.
   void removeFnAttr(StringRef Kind) {
-    setAttributes(AttributeSets.removeAttribute(
+    setAttributes(getAttributes().removeAttribute(
         getContext(), AttributeList::FunctionIndex, Kind));
   }
 
+  void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
+    removeAttribute(ArgNo + AttributeList::FirstArgIndex, Kind);
+  }
+
   /// \brief Set the entry count for this function.
   ///
   /// Entry count is the number of times this function was executed based on
@@ -279,7 +287,7 @@ public:
   void addAttribute(unsigned i, Attribute Attr);
 
   /// @brief adds the attributes to the list of attributes.
-  void addAttributes(unsigned i, AttributeList Attrs);
+  void addAttributes(unsigned i, const AttrBuilder &Attrs);
 
   /// @brief removes the attribute from the list of attributes.
   void removeAttribute(unsigned i, Attribute::AttrKind Kind);
@@ -288,7 +296,7 @@ public:
   void removeAttribute(unsigned i, StringRef Kind);
 
   /// @brief removes the attributes from the list of attributes.
-  void removeAttributes(unsigned i, AttributeList Attrs);
+  void removeAttributes(unsigned i, const AttrBuilder &Attrs);
 
   /// @brief check if an attributes is in the list of attributes.
   bool hasAttribute(unsigned i, Attribute::AttrKind Kind) const {
@@ -459,35 +467,12 @@ public:
   /// @brief Determine if the parameter or return value is marked with NoAlias
   /// attribute.
   /// @param n The parameter to check. 1 is the first parameter, 0 is the return
-  bool doesNotAlias(unsigned n) const {
-    return AttributeSets.hasAttribute(n, Attribute::NoAlias);
-  }
-  void setDoesNotAlias(unsigned n) {
-    addAttribute(n, Attribute::NoAlias);
-  }
-
-  /// @brief Determine if the parameter can be captured.
-  /// @param n The parameter to check. 1 is the first parameter, 0 is the return
-  bool doesNotCapture(unsigned n) const {
-    return AttributeSets.hasAttribute(n, Attribute::NoCapture);
-  }
-  void setDoesNotCapture(unsigned n) {
-    addAttribute(n, Attribute::NoCapture);
-  }
-
-  bool doesNotAccessMemory(unsigned n) const {
-    return AttributeSets.hasAttribute(n, Attribute::ReadNone);
-  }
-  void setDoesNotAccessMemory(unsigned n) {
-    addAttribute(n, Attribute::ReadNone);
-  }
-
-  bool onlyReadsMemory(unsigned n) const {
-    return doesNotAccessMemory(n) ||
-      AttributeSets.hasAttribute(n, Attribute::ReadOnly);
+  bool returnDoesNotAlias() const {
+    return AttributeSets.hasAttribute(AttributeList::ReturnIndex,
+                                      Attribute::NoAlias);
   }
-  void setOnlyReadsMemory(unsigned n) {
-    addAttribute(n, Attribute::ReadOnly);
+  void setReturnDoesNotAlias() {
+    addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
   }
 
   /// Optimize this function for minimum size (-Oz).
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
index 4d3f1dc..844a727 100644
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -1658,12 +1658,18 @@ public:
   /// adds the attribute to the list of attributes.
   void addAttribute(unsigned i, Attribute Attr);
 
+  /// Adds the attribute to the indicated argument
+  void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
+
   /// removes the attribute from the list of attributes.
   void removeAttribute(unsigned i, Attribute::AttrKind Kind);
 
   /// removes the attribute from the list of attributes.
   void removeAttribute(unsigned i, StringRef Kind);
 
+  /// Removes the attribute from the given argument
+  void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
+
   /// adds the dereferenceable attribute to the list of attributes.
   void addDereferenceableAttr(unsigned i, uint64_t Bytes);
 
@@ -1734,11 +1740,9 @@ public:
     return Attrs.getDereferenceableOrNullBytes(i);
   }
 
-  /// @brief Determine if the parameter or return value is marked with NoAlias
-  /// attribute.
-  /// @param n The parameter to check. 1 is the first parameter, 0 is the return
-  bool doesNotAlias(unsigned n) const {
-    return Attrs.hasAttribute(n, Attribute::NoAlias);
+  /// @brief Determine if the return value is marked with NoAlias attribute.
+  bool returnDoesNotAlias() const {
+    return Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
   }
 
   /// Return true if the call should not be treated as a call to a
@@ -3750,12 +3754,18 @@ public:
   /// adds the attribute to the list of attributes.
   void addAttribute(unsigned i, Attribute Attr);
 
+  /// Adds the attribute to the indicated argument
+  void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
+
   /// removes the attribute from the list of attributes.
   void removeAttribute(unsigned i, Attribute::AttrKind Kind);
 
   /// removes the attribute from the list of attributes.
   void removeAttribute(unsigned i, StringRef Kind);
 
+  /// Removes the attribute from the given argument
+  void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
+
   /// adds the dereferenceable attribute to the list of attributes.
   void addDereferenceableAttr(unsigned i, uint64_t Bytes);
 
@@ -3827,11 +3837,9 @@ public:
     return Attrs.getDereferenceableOrNullBytes(i);
   }
 
-  /// @brief Determine if the parameter or return value is marked with NoAlias
-  /// attribute.
-  /// @param n The parameter to check. 1 is the first parameter, 0 is the return
-  bool doesNotAlias(unsigned n) const {
-    return Attrs.hasAttribute(n, Attribute::NoAlias);
+  /// @brief Determine if the return value is marked with NoAlias attribute.
+  bool returnDoesNotAlias() const {
+    return Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
   }
 
   /// Return true if the call should not be treated as a call to a
diff --git a/include/llvm/IR/Intrinsics.h b/include/llvm/IR/Intrinsics.h
index 2f6bdf8..fc79da7 100644
--- a/include/llvm/IR/Intrinsics.h
+++ b/include/llvm/IR/Intrinsics.h
@@ -100,7 +100,7 @@ namespace Intrinsic {
       Void, VarArg, MMX, Token, Metadata, Half, Float, Double,
       Integer, Vector, Pointer, Struct,
       Argument, ExtendArgument, TruncArgument, HalfVecArgument,
-      SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfPtrsToElt
+      SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt
     } Kind;
 
     union {
@@ -119,25 +119,43 @@ namespace Intrinsic {
       AK_AnyVector,
       AK_AnyPointer
     };
+
     unsigned getArgumentNumber() const {
       assert(Kind == Argument || Kind == ExtendArgument ||
              Kind == TruncArgument || Kind == HalfVecArgument ||
              Kind == SameVecWidthArgument || Kind == PtrToArgument ||
-             Kind == PtrToElt || Kind == VecOfPtrsToElt);
+             Kind == PtrToElt);
       return Argument_Info >> 3;
     }
     ArgKind getArgumentKind() const {
       assert(Kind == Argument || Kind == ExtendArgument ||
              Kind == TruncArgument || Kind == HalfVecArgument ||
-             Kind == SameVecWidthArgument || Kind == PtrToArgument ||
-             Kind == VecOfPtrsToElt);
+             Kind == SameVecWidthArgument || Kind == PtrToArgument);
       return (ArgKind)(Argument_Info & 7);
     }
 
+    // VecOfAnyPtrsToElt uses both an overloaded argument (for address space)
+    // and a reference argument (for matching vector width and element types)
+    unsigned getOverloadArgNumber() const {
+      assert(Kind == VecOfAnyPtrsToElt);
+      return Argument_Info >> 16;
+    }
+    unsigned getRefArgNumber() const {
+      assert(Kind == VecOfAnyPtrsToElt);
+      return Argument_Info & 0xFFFF;
+    }
+
     static IITDescriptor get(IITDescriptorKind K, unsigned Field) {
       IITDescriptor Result = { K, { Field } };
       return Result;
     }
+
+    static IITDescriptor get(IITDescriptorKind K, unsigned short Hi,
+                             unsigned short Lo) {
+      unsigned Field = Hi << 16 | Lo;
+      IITDescriptor Result = {K, {Field}};
+      return Result;
+    }
   };
 
   /// Return the IIT table descriptor for the specified intrinsic into an array
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index 39b992c..cf7e5d8 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -155,7 +155,7 @@ class LLVMVectorSameWidth<int num, LLVMType elty>
 }
 class LLVMPointerTo<int num> : LLVMMatchType<num>;
 class LLVMPointerToElt<int num> : LLVMMatchType<num>;
-class LLVMVectorOfPointersToElt<int num> : LLVMMatchType<num>;
+class LLVMVectorOfAnyPointersToElt<int num> : LLVMMatchType<num>;
 
 // Match the type of another intrinsic parameter that is expected to be a
 // vector type, but change the element count to be half as many
@@ -404,7 +404,7 @@ def int_memset  : Intrinsic<[],
 // FIXME: Add version of these floating point intrinsics which allow non-default
 // rounding modes and FP exception handling.
 
-let IntrProperties = [IntrNoMem] in {
+let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
   def int_fma  : Intrinsic<[llvm_anyfloat_ty],
                            [LLVMMatchType<0>, LLVMMatchType<0>,
                             LLVMMatchType<0>]>;
@@ -440,10 +440,12 @@ let IntrProperties = [IntrNoMem] in {
 }
 
 def int_minnum : Intrinsic<[llvm_anyfloat_ty],
-  [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, Commutative]
+  [LLVMMatchType<0>, LLVMMatchType<0>],
+  [IntrNoMem, IntrSpeculatable, Commutative]
 >;
 def int_maxnum : Intrinsic<[llvm_anyfloat_ty],
-  [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, Commutative]
+  [LLVMMatchType<0>, LLVMMatchType<0>],
+  [IntrNoMem, IntrSpeculatable, Commutative]
 >;
 
 // NOTE: these are internal interfaces.
@@ -455,7 +457,7 @@ def int_siglongjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrNoReturn]>;
 // Internal interface for object size checking
 def int_objectsize : Intrinsic<[llvm_anyint_ty],
                                [llvm_anyptr_ty, llvm_i1_ty, llvm_i1_ty],
-                               [IntrNoMem]>,
+                               [IntrNoMem, IntrSpeculatable]>,
                                GCCBuiltin<"__builtin_object_size">;
 
 //===--------------- Constrained Floating Point Intrinsics ----------------===//
@@ -500,7 +502,7 @@ def int_expect : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
 //
 
 // None of these intrinsics accesses memory at all.
-let IntrProperties = [IntrNoMem] in {
+let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
   def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
   def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
   def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
@@ -511,10 +513,11 @@ let IntrProperties = [IntrNoMem] in {
 //===------------------------ Debugger Intrinsics -------------------------===//
 //
 
-// None of these intrinsics accesses memory at all...but that doesn't mean the
-// optimizers can change them aggressively.  Special handling needed in a few
-// places.
-let IntrProperties = [IntrNoMem] in {
+// None of these intrinsics accesses memory at all...but that doesn't
+// mean the optimizers can change them aggressively.  Special handling
+// needed in a few places. These synthetic intrinsics have no
+// side-effects and just mark information about their operands.
+let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
   def int_dbg_declare      : Intrinsic<[],
                                        [llvm_metadata_ty,
                                        llvm_metadata_ty,
@@ -592,24 +595,24 @@ def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
 // Expose the carry flag from add operations on two integrals.
 def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
                                        [LLVMMatchType<0>, LLVMMatchType<0>],
-                                       [IntrNoMem]>;
+                                       [IntrNoMem, IntrSpeculatable]>;
 def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
                                        [LLVMMatchType<0>, LLVMMatchType<0>],
-                                       [IntrNoMem]>;
+                                       [IntrNoMem, IntrSpeculatable]>;
 
 def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
                                        [LLVMMatchType<0>, LLVMMatchType<0>],
-                                       [IntrNoMem]>;
+                                       [IntrNoMem, IntrSpeculatable]>;
 def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
                                        [LLVMMatchType<0>, LLVMMatchType<0>],
-                                       [IntrNoMem]>;
+                                       [IntrNoMem, IntrSpeculatable]>;
 
 def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
                                        [LLVMMatchType<0>, LLVMMatchType<0>],
-                                       [IntrNoMem]>;
+                                       [IntrNoMem, IntrSpeculatable]>;
 def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
                                        [LLVMMatchType<0>, LLVMMatchType<0>],
-                                       [IntrNoMem]>;
+                                       [IntrNoMem, IntrSpeculatable]>;
 
 //===------------------------- Memory Use Markers -------------------------===//
 //
@@ -633,7 +636,7 @@ def int_invariant_end   : Intrinsic<[],
 // it can be CSE only if memory didn't change between 2 barriers call,
 // which is valid.
 // The argument also can't be marked with 'returned' attribute, because
-// it would remove barrier. 
+// it would remove barrier.
 def int_invariant_group_barrier : Intrinsic<[llvm_ptr_ty],
                                             [llvm_ptr_ty],
                                             [IntrReadMem, IntrArgMemOnly]>;
@@ -758,14 +761,14 @@ def int_masked_load  : Intrinsic<[llvm_anyvector_ty],
                                  [IntrReadMem, IntrArgMemOnly]>;
 
 def int_masked_gather: Intrinsic<[llvm_anyvector_ty],
-                                 [LLVMVectorOfPointersToElt<0>, llvm_i32_ty,
+                                 [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
                                   LLVMVectorSameWidth<0, llvm_i1_ty>,
                                   LLVMMatchType<0>],
                                  [IntrReadMem]>;
 
 def int_masked_scatter: Intrinsic<[],
                                   [llvm_anyvector_ty,
-                                   LLVMVectorOfPointersToElt<0>, llvm_i32_ty,
+                                   LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
                                    LLVMVectorSameWidth<0, llvm_i1_ty>]>;
 
 def int_masked_expandload: Intrinsic<[llvm_anyvector_ty],
diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td
index 97c756c..1c466e7 100644
--- a/include/llvm/IR/IntrinsicsX86.td
+++ b/include/llvm/IR/IntrinsicsX86.td
@@ -3221,6 +3221,29 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 }
 
 //===----------------------------------------------------------------------===//
+// LWP
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_llwpcb :
+              GCCBuiltin<"__builtin_ia32_llwpcb">,
+              Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_x86_slwpcb :
+              GCCBuiltin<"__builtin_ia32_slwpcb">,
+              Intrinsic<[llvm_ptr_ty], [], []>;
+  def int_x86_lwpins32 :
+              GCCBuiltin<"__builtin_ia32_lwpins32">,
+              Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_lwpins64 :
+              GCCBuiltin<"__builtin_ia32_lwpins64">,
+              Intrinsic<[llvm_i8_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_lwpval32 :
+              GCCBuiltin<"__builtin_ia32_lwpval32">,
+              Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_lwpval64 :
+              GCCBuiltin<"__builtin_ia32_lwpval64">,
+              Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
 // MMX
 
 // Empty MMX state op.
diff --git a/include/llvm/Support/BinaryStreamArray.h b/include/llvm/Support/BinaryStreamArray.h
index 748a62b..f141c30 100644
--- a/include/llvm/Support/BinaryStreamArray.h
+++ b/include/llvm/Support/BinaryStreamArray.h
@@ -42,99 +42,34 @@ namespace llvm {
 /// having to specify a second template argument to VarStreamArray (documented
 /// below).
 template <typename T> struct VarStreamArrayExtractor {
-  typedef void Context;
+  struct ContextType {};
 
   // Method intentionally deleted.  You must provide an explicit specialization
-  // with the following method implemented.
-  static Error extract(BinaryStreamRef Stream, uint32_t &Len, T &Item,
-                       Context *Ctx) = delete;
-};
-
-/// VarStreamArray represents an array of variable length records backed by a
-/// stream.  This could be a contiguous sequence of bytes in memory, it could
-/// be a file on disk, or it could be a PDB stream where bytes are stored as
-/// discontiguous blocks in a file.  Usually it is desirable to treat arrays
-/// as contiguous blocks of memory, but doing so with large PDB files, for
-/// example, could mean allocating huge amounts of memory just to allow
-/// re-ordering of stream data to be contiguous before iterating over it.  By
-/// abstracting this out, we need not duplicate this memory, and we can
-/// iterate over arrays in arbitrarily formatted streams.  Elements are parsed
-/// lazily on iteration, so there is no upfront cost associated with building
-/// or copying a VarStreamArray, no matter how large it may be.
-///
-/// You create a VarStreamArray by specifying a ValueType and an Extractor type.
-/// If you do not specify an Extractor type, you are expected to specialize
-/// VarStreamArrayExtractor<T> for your ValueType.
-///
-/// The default extractor type is stateless, but by specializing
-/// VarStreamArrayExtractor or defining your own custom extractor type and
-/// adding the appropriate ContextType typedef to the class, you can pass a
-/// context field during construction of the VarStreamArray that will be
-/// passed to each call to extract.
-///
-template <typename ValueType, typename ExtractorType>
-class VarStreamArrayIterator;
-
-template <typename ValueType,
-          typename ExtractorType = VarStreamArrayExtractor<ValueType>>
-class VarStreamArray {
-public:
-  typedef typename ExtractorType::ContextType ContextType;
-  typedef VarStreamArrayIterator<ValueType, ExtractorType> Iterator;
-  friend Iterator;
-
-  VarStreamArray() = default;
-
-  explicit VarStreamArray(BinaryStreamRef Stream,
-                          ContextType *Context = nullptr)
-      : Stream(Stream), Context(Context) {}
-
-  VarStreamArray(const VarStreamArray<ValueType, ExtractorType> &Other)
-      : Stream(Other.Stream), Context(Other.Context) {}
-
-  Iterator begin(bool *HadError = nullptr) const {
-    if (empty())
-      return end();
-
-    return Iterator(*this, Context, HadError);
-  }
-
-  Iterator end() const { return Iterator(); }
-
-  bool empty() const { return Stream.getLength() == 0; }
-
-  /// \brief given an offset into the array's underlying stream, return an
-  /// iterator to the record at that offset.  This is considered unsafe
-  /// since the behavior is undefined if \p Offset does not refer to the
-  /// beginning of a valid record.
-  Iterator at(uint32_t Offset) const {
-    return Iterator(*this, Context, Stream.drop_front(Offset), nullptr);
-  }
-
-  BinaryStreamRef getUnderlyingStream() const { return Stream; }
+  // with one of the following two methods implemented.
+  static Error extract(BinaryStreamRef Stream, uint32_t &Len, T &Item) = delete;
 
-private:
-  BinaryStreamRef Stream;
-  ContextType *Context = nullptr;
+  static Error extract(BinaryStreamRef Stream, uint32_t &Len, T &Item,
+                       const ContextType &Ctx) = delete;
 };
 
-template <typename ValueType, typename ExtractorType>
+template <typename ArrayType, typename Value, typename Extractor,
+          typename WrappedCtx>
 class VarStreamArrayIterator
     : public iterator_facade_base<
-          VarStreamArrayIterator<ValueType, ExtractorType>,
-          std::forward_iterator_tag, ValueType> {
-  typedef typename ExtractorType::ContextType ContextType;
-  typedef VarStreamArrayIterator<ValueType, ExtractorType> IterType;
-  typedef VarStreamArray<ValueType, ExtractorType> ArrayType;
+          VarStreamArrayIterator<ArrayType, Value, Extractor, WrappedCtx>,
+          std::forward_iterator_tag, Value> {
+  typedef VarStreamArrayIterator<ArrayType, Value, Extractor, WrappedCtx>
+      IterType;
 
 public:
-  VarStreamArrayIterator(const ArrayType &Array, ContextType *Context,
+  VarStreamArrayIterator() = default;
+  VarStreamArrayIterator(const ArrayType &Array, const WrappedCtx &Ctx,
                          BinaryStreamRef Stream, bool *HadError = nullptr)
-      : IterRef(Stream), Context(Context), Array(&Array), HadError(HadError) {
+      : IterRef(Stream), Ctx(&Ctx), Array(&Array), HadError(HadError) {
     if (IterRef.getLength() == 0)
       moveToEnd();
     else {
-      auto EC = ExtractorType::extract(IterRef, ThisLen, ThisValue, Context);
+      auto EC = Ctx.template invoke<Extractor>(IterRef, ThisLen, ThisValue);
       if (EC) {
         consumeError(std::move(EC));
         markError();
@@ -142,11 +77,13 @@ public:
     }
   }
 
-  VarStreamArrayIterator(const ArrayType &Array, ContextType *Context,
+  VarStreamArrayIterator(const ArrayType &Array, const WrappedCtx &Ctx,
                          bool *HadError = nullptr)
-      : VarStreamArrayIterator(Array, Context, Array.Stream, HadError) {}
+      : VarStreamArrayIterator(Array, Ctx, Array.Stream, HadError) {}
+
+  VarStreamArrayIterator(const WrappedCtx &Ctx) : Ctx(&Ctx) {}
+  VarStreamArrayIterator(const VarStreamArrayIterator &Other) = default;
 
-  VarStreamArrayIterator() = default;
   ~VarStreamArrayIterator() = default;
 
   bool operator==(const IterType &R) const {
@@ -164,12 +101,12 @@ public:
     return false;
   }
 
-  const ValueType &operator*() const {
+  const Value &operator*() const {
     assert(Array && !HasError);
     return ThisValue;
   }
 
-  ValueType &operator*() {
+  Value &operator*() {
     assert(Array && !HasError);
     return ThisValue;
   }
@@ -185,7 +122,7 @@ public:
         moveToEnd();
       } else {
         // There is some data after the current record.
-        auto EC = ExtractorType::extract(IterRef, ThisLen, ThisValue, Context);
+        auto EC = Ctx->template invoke<Extractor>(IterRef, ThisLen, ThisValue);
         if (EC) {
           consumeError(std::move(EC));
           markError();
@@ -210,15 +147,136 @@ private:
       *HadError = true;
   }
 
-  ValueType ThisValue;
+  Value ThisValue;
   BinaryStreamRef IterRef;
-  ContextType *Context{nullptr};
+  const WrappedCtx *Ctx{nullptr};
   const ArrayType *Array{nullptr};
   uint32_t ThisLen{0};
   bool HasError{false};
   bool *HadError{nullptr};
 };
 
+template <typename T, typename Context> struct ContextWrapper {
+  ContextWrapper() = default;
+
+  explicit ContextWrapper(Context &&Ctx) : Ctx(Ctx) {}
+
+  template <typename Extractor>
+  Error invoke(BinaryStreamRef Stream, uint32_t &Len, T &Item) const {
+    return Extractor::extract(Stream, Len, Item, Ctx);
+  }
+
+  Context Ctx;
+};
+
+template <typename T> struct ContextWrapper<T, void> {
+  ContextWrapper() = default;
+
+  template <typename Extractor>
+  Error invoke(BinaryStreamRef Stream, uint32_t &Len, T &Item) const {
+    return Extractor::extract(Stream, Len, Item);
+  }
+};
+
+/// VarStreamArray represents an array of variable length records backed by a
+/// stream.  This could be a contiguous sequence of bytes in memory, it could
+/// be a file on disk, or it could be a PDB stream where bytes are stored as
+/// discontiguous blocks in a file.  Usually it is desirable to treat arrays
+/// as contiguous blocks of memory, but doing so with large PDB files, for
+/// example, could mean allocating huge amounts of memory just to allow
+/// re-ordering of stream data to be contiguous before iterating over it.  By
+/// abstracting this out, we need not duplicate this memory, and we can
+/// iterate over arrays in arbitrarily formatted streams.  Elements are parsed
+/// lazily on iteration, so there is no upfront cost associated with building
+/// or copying a VarStreamArray, no matter how large it may be.
+///
+/// You create a VarStreamArray by specifying a ValueType and an Extractor type.
+/// If you do not specify an Extractor type, you are expected to specialize
+/// VarStreamArrayExtractor<T> for your ValueType.
+///
+/// The default extractor type is stateless, but by specializing
+/// VarStreamArrayExtractor or defining your own custom extractor type and
+/// adding the appropriate ContextType typedef to the class, you can pass a
+/// context field during construction of the VarStreamArray that will be
+/// passed to each call to extract.
+///
+template <typename Value, typename Extractor, typename WrappedCtx>
+class VarStreamArrayBase {
+  typedef VarStreamArrayBase<Value, Extractor, WrappedCtx> MyType;
+
+public:
+  typedef VarStreamArrayIterator<MyType, Value, Extractor, WrappedCtx> Iterator;
+  friend Iterator;
+
+  VarStreamArrayBase() = default;
+
+  VarStreamArrayBase(BinaryStreamRef Stream, const WrappedCtx &Ctx)
+      : Stream(Stream), Ctx(Ctx) {}
+
+  VarStreamArrayBase(const MyType &Other)
+      : Stream(Other.Stream), Ctx(Other.Ctx) {}
+
+  Iterator begin(bool *HadError = nullptr) const {
+    if (empty())
+      return end();
+
+    return Iterator(*this, Ctx, Stream, HadError);
+  }
+
+  bool valid() const { return Stream.valid(); }
+
+  Iterator end() const { return Iterator(Ctx); }
+
+  bool empty() const { return Stream.getLength() == 0; }
+
+  /// \brief given an offset into the array's underlying stream, return an
+  /// iterator to the record at that offset.  This is considered unsafe
+  /// since the behavior is undefined if \p Offset does not refer to the
+  /// beginning of a valid record.
+  Iterator at(uint32_t Offset) const {
+    return Iterator(*this, Ctx, Stream.drop_front(Offset), nullptr);
+  }
+
+  BinaryStreamRef getUnderlyingStream() const { return Stream; }
+
+private:
+  BinaryStreamRef Stream;
+  WrappedCtx Ctx;
+};
+
+template <typename Value, typename Extractor, typename Context>
+class VarStreamArrayImpl
+    : public VarStreamArrayBase<Value, Extractor,
+                                ContextWrapper<Value, Context>> {
+  typedef ContextWrapper<Value, Context> WrappedContext;
+  typedef VarStreamArrayImpl<Value, Extractor, Context> MyType;
+  typedef VarStreamArrayBase<Value, Extractor, WrappedContext> BaseType;
+
+public:
+  typedef Context ContextType;
+
+  VarStreamArrayImpl() = default;
+  VarStreamArrayImpl(BinaryStreamRef Stream, Context &&Ctx)
+      : BaseType(Stream, WrappedContext(std::forward<Context>(Ctx))) {}
+};
+
+template <typename Value, typename Extractor>
+class VarStreamArrayImpl<Value, Extractor, void>
+    : public VarStreamArrayBase<Value, Extractor, ContextWrapper<Value, void>> {
+  typedef ContextWrapper<Value, void> WrappedContext;
+  typedef VarStreamArrayImpl<Value, Extractor, void> MyType;
+  typedef VarStreamArrayBase<Value, Extractor, WrappedContext> BaseType;
+
+public:
+  VarStreamArrayImpl() = default;
+  VarStreamArrayImpl(BinaryStreamRef Stream)
+      : BaseType(Stream, WrappedContext()) {}
+};
+
+template <typename Value, typename Extractor = VarStreamArrayExtractor<Value>>
+using VarStreamArray =
+    VarStreamArrayImpl<Value, Extractor, typename Extractor::ContextType>;
+
 template <typename T> class FixedStreamArrayIterator;
 
 /// FixedStreamArray is similar to VarStreamArray, except with each record
diff --git a/include/llvm/Support/BinaryStreamReader.h b/include/llvm/Support/BinaryStreamReader.h
index f30d82d..7773807 100644
--- a/include/llvm/Support/BinaryStreamReader.h
+++ b/include/llvm/Support/BinaryStreamReader.h
@@ -31,6 +31,7 @@ namespace llvm {
 /// are overridable.
 class BinaryStreamReader {
 public:
+  BinaryStreamReader() = default;
   explicit BinaryStreamReader(BinaryStreamRef Stream);
   virtual ~BinaryStreamReader() {}
 
@@ -172,13 +173,29 @@ public:
   /// \returns a success error code if the data was successfully read, otherwise
   /// returns an appropriate error code.
   template <typename T, typename U>
-  Error
-  readArray(VarStreamArray<T, U> &Array, uint32_t Size,
-            typename VarStreamArray<T, U>::ContextType *Context = nullptr) {
+  Error readArray(VarStreamArray<T, U> &Array, uint32_t Size) {
     BinaryStreamRef S;
     if (auto EC = readStreamRef(S, Size))
       return EC;
-    Array = VarStreamArray<T, U>(S, Context);
+    Array = VarStreamArray<T, U>(S);
+    return Error::success();
+  }
+
+  /// Read a VarStreamArray of size \p Size bytes and store the result into
+  /// \p Array.  Updates the stream's offset to point after the newly read
+  /// array.  Never causes a copy (although iterating the elements of the
+  /// VarStreamArray may, depending upon the implementation of the underlying
+  /// stream).
+  ///
+  /// \returns a success error code if the data was successfully read, otherwise
+  /// returns an appropriate error code.
+  template <typename T, typename U, typename ContextType>
+  Error readArray(VarStreamArray<T, U> &Array, uint32_t Size,
+                  ContextType &&Context) {
+    BinaryStreamRef S;
+    if (auto EC = readStreamRef(S, Size))
+      return EC;
+    Array = VarStreamArray<T, U>(S, std::move(Context));
     return Error::success();
   }
 
@@ -227,6 +244,9 @@ public:
   /// \returns the next byte in the stream.
   uint8_t peek() const;
 
+  std::pair<BinaryStreamReader, BinaryStreamReader>
+  split(uint32_t Offset) const;
+
 private:
   BinaryStreamRef Stream;
   uint32_t Offset;
diff --git a/include/llvm/Support/BinaryStreamRef.h b/include/llvm/Support/BinaryStreamRef.h
index 23ce02f..465e724 100644
--- a/include/llvm/Support/BinaryStreamRef.h
+++ b/include/llvm/Support/BinaryStreamRef.h
@@ -98,6 +98,9 @@ public:
   BinaryStreamRef(BinaryStreamRef &S, uint32_t Offset,
                   uint32_t Length) = delete;
 
+  /// Check if a Stream is valid.
+  bool valid() const { return Stream != nullptr; }
+
   /// Given an Offset into this StreamRef and a Size, return a reference to a
   /// buffer owned by the stream.
   ///
diff --git a/include/llvm/Support/BinaryStreamWriter.h b/include/llvm/Support/BinaryStreamWriter.h
index 6734a79..1b61c32 100644
--- a/include/llvm/Support/BinaryStreamWriter.h
+++ b/include/llvm/Support/BinaryStreamWriter.h
@@ -20,6 +20,7 @@
 #include "llvm/Support/Error.h"
 #include <cstdint>
 #include <type_traits>
+#include <utility>
 
 namespace llvm {
 
@@ -30,8 +31,6 @@ namespace llvm {
 /// although no methods are overridable.
 class BinaryStreamWriter {
 public:
-  // FIXME: We should be able to slice and drop_front etc on Writers / Readers.
-
   BinaryStreamWriter() = default;
   explicit BinaryStreamWriter(WritableBinaryStreamRef Stream);
   virtual ~BinaryStreamWriter() {}
@@ -152,6 +151,9 @@ public:
     return writeStreamRef(Array.getUnderlyingStream());
   }
 
+  /// Splits the Writer into two Writers at a given offset.
+  std::pair<BinaryStreamWriter, BinaryStreamWriter> split(uint32_t Off) const;
+
   void setOffset(uint32_t Off) { Offset = Off; }
   uint32_t getOffset() const { return Offset; }
   uint32_t getLength() const { return Stream.getLength(); }
diff --git a/include/llvm/Support/DataExtractor.h b/include/llvm/Support/DataExtractor.h
index 2d1180c..380b628 100644
--- a/include/llvm/Support/DataExtractor.h
+++ b/include/llvm/Support/DataExtractor.h
@@ -58,6 +58,28 @@ public:
   ///     NULL will be returned.
   const char *getCStr(uint32_t *offset_ptr) const;
 
+  /// Extract a C string from \a *OffsetPtr.
+  ///
+  /// Returns a StringRef for the C String from the data at the offset
+  /// pointed to by \a OffsetPtr. A variable length NULL terminated C
+  /// string will be extracted and the \a OffsetPtr will be
+  /// updated with the offset of the byte that follows the NULL
+  /// terminator byte.
+  ///
+  /// \param[in,out] OffsetPtr
+  ///     A pointer to an offset within the data that will be advanced
+  ///     by the appropriate number of bytes if the value is extracted
+  ///     correctly. If the offset is out of bounds or there are not
+  ///     enough bytes to extract this value, the offset will be left
+  ///     unmodified.
+  ///
+  /// \return
+  ///     A StringRef for the C string value in the data. If the offset
+  ///     pointed to by \a OffsetPtr is out of bounds, or if the
+  ///     offset plus the length of the C string is out of bounds,
+  ///     a default-initialized StringRef will be returned.
+  StringRef getCStrRef(uint32_t *OffsetPtr) const;
+
   /// Extract an unsigned integer of size \a byte_size from \a
   /// *offset_ptr.
   ///
diff --git a/include/llvm/Support/ELFRelocs/Hexagon.def b/include/llvm/Support/ELFRelocs/Hexagon.def
index 74e1d40..5021e2b 100644
--- a/include/llvm/Support/ELFRelocs/Hexagon.def
+++ b/include/llvm/Support/ELFRelocs/Hexagon.def
@@ -99,3 +99,8 @@ ELF_RELOC(R_HEX_LD_GOT_32_6_X,       91)
 ELF_RELOC(R_HEX_LD_GOT_16_X,         92)
 ELF_RELOC(R_HEX_LD_GOT_11_X,         93)
 ELF_RELOC(R_HEX_23_REG,              94)
+ELF_RELOC(R_HEX_GD_PLT_B22_PCREL_X,  95)
+ELF_RELOC(R_HEX_GD_PLT_B32_PCREL_X,  96)
+ELF_RELOC(R_HEX_LD_PLT_B22_PCREL_X,  97)
+ELF_RELOC(R_HEX_LD_PLT_B32_PCREL_X,  98)
+ELF_RELOC(R_HEX_27_REG,              99)
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index aa92300..ced1838 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -2061,6 +2061,14 @@ public:
     return false;
   }
 
+  // Return true if the instruction that performs a << b actually performs
+  // a << (b % (sizeof(a) * 8)).
+  virtual bool supportsModuloShift(ISD::NodeType Inst, EVT ReturnType) const {
+    assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) &&
+           "Expect a shift instruction");
+    return false;
+  }
+
   //===--------------------------------------------------------------------===//
   // Runtime Library hooks
   //
diff --git a/lib/Analysis/CFLGraph.h b/lib/Analysis/CFLGraph.h
index 06410bf..a8fb12b 100644
--- a/lib/Analysis/CFLGraph.h
+++ b/lib/Analysis/CFLGraph.h
@@ -429,7 +429,7 @@ template <typename CFLAA> class CFLGraphBuilder {
 
       if (Inst->getType()->isPointerTy()) {
         auto *Fn = CS.getCalledFunction();
-        if (Fn == nullptr || !Fn->doesNotAlias(AttributeList::ReturnIndex))
+        if (Fn == nullptr || !Fn->returnDoesNotAlias())
           // No need to call addNode() since we've added Inst at the
           // beginning of this function and we know it is not a global.
           Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown());
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 2f25a11..7aa6abf 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -4056,13 +4056,20 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
   unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
   unsigned InVecNumElts = InVecTy->getVectorNumElements();
 
+  auto *Op0Const = dyn_cast<Constant>(Op0);
+  auto *Op1Const = dyn_cast<Constant>(Op1);
+
+  // If all operands are constant, constant fold the shuffle.
+  if (Op0Const && Op1Const)
+    return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask);
+
   SmallVector<int, 32> Indices;
   ShuffleVectorInst::getShuffleMask(Mask, Indices);
   assert(MaskNumElts == Indices.size() &&
          "Size of Indices not same as number of mask elements?");
 
-  // Canonicalization: If mask does not select elements from an input vector,
-  // replace that input vector with undef.
+  // If only one of the operands is constant, constant fold the shuffle if the
+  // mask does not select elements from the variable operand.
   bool MaskSelects0 = false, MaskSelects1 = false;
   for (unsigned i = 0; i != MaskNumElts; ++i) {
     if (Indices[i] == -1)
@@ -4072,39 +4079,23 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
     else
       MaskSelects1 = true;
   }
-  if (!MaskSelects0)
-    Op0 = UndefValue::get(InVecTy);
-  if (!MaskSelects1)
-    Op1 = UndefValue::get(InVecTy);
-
-  auto *Op0Const = dyn_cast<Constant>(Op0);
-  auto *Op1Const = dyn_cast<Constant>(Op1);
-
-  // If all operands are constant, constant fold the shuffle.
-  if (Op0Const && Op1Const)
-    return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask);
-
-  // Canonicalization: if only one input vector is constant, it shall be the
-  // second one.
-  if (Op0Const && !Op1Const) {
-    std::swap(Op0, Op1);
-    for (auto &Idx : Indices) {
-      if (Idx == -1)
-        continue;
-      Idx = Idx < (int)MaskNumElts ? Idx + MaskNumElts : Idx - MaskNumElts;
-    }
-    Mask = ConstantDataVector::get(
-        Mask->getContext(),
-        makeArrayRef(reinterpret_cast<uint32_t *>(Indices.data()),
-                     MaskNumElts));
-  }
+  if (!MaskSelects0 && Op1Const)
+    return ConstantFoldShuffleVectorInstruction(UndefValue::get(InVecTy),
+                                                Op1Const, Mask);
+  if (!MaskSelects1 && Op0Const)
+    return ConstantFoldShuffleVectorInstruction(Op0Const,
+                                                UndefValue::get(InVecTy), Mask);
 
   // A shuffle of a splat is always the splat itself. Legal if the shuffle's
   // value type is same as the input vectors' type.
   if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op0))
-    if (isa<UndefValue>(Op1) && RetTy == InVecTy &&
+    if (!MaskSelects1 && RetTy == InVecTy &&
         OpShuf->getMask()->getSplatValue())
       return Op0;
+  if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op1))
+    if (!MaskSelects0 && RetTy == InVecTy &&
+        OpShuf->getMask()->getSplatValue())
+      return Op1;
 
   // Don't fold a shuffle with undef mask elements. This may get folded in a
   // better way using demanded bits or other analysis.
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index f6d9a73..a834125 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -451,12 +451,6 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
     auto &Summary = GlobalList.second[0];
     bool AllRefsCanBeExternallyReferenced =
         llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) {
-          // If a global value definition references an unnamed global,
-          // be conservative. They're valid IR so we don't want to crash
-          // when we encounter any of them but they're infrequent enough
-          // that we don't bother optimizing them.
-          if (!VI.getValue()->hasName())
-            return false;
           return !CantBePromoted.count(VI.getValue()->getGUID());
         });
     if (!AllRefsCanBeExternallyReferenced) {
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index dc151f2..6ec175f 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -3320,67 +3320,10 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
   case Instruction::Call: {
     auto *CI = cast<const CallInst>(Inst);
     const Function *Callee = CI->getCalledFunction();
-    if (Callee && Callee->isSpeculatable())
-      return true;
-    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
-      switch (II->getIntrinsicID()) {
-      // These synthetic intrinsics have no side-effects and just mark
-      // information about their operands.
-      // FIXME: There are other no-op synthetic instructions that potentially
-      // should be considered at least *safe* to speculate...
-      // FIXME: The speculatable attribute should be added to all these
-      // intrinsics and this case statement should be removed.
-      case Intrinsic::dbg_declare:
-      case Intrinsic::dbg_value:
-        return true;
 
-      case Intrinsic::bitreverse:
-      case Intrinsic::bswap:
-      case Intrinsic::ctlz:
-      case Intrinsic::ctpop:
-      case Intrinsic::cttz:
-      case Intrinsic::objectsize:
-      case Intrinsic::sadd_with_overflow:
-      case Intrinsic::smul_with_overflow:
-      case Intrinsic::ssub_with_overflow:
-      case Intrinsic::uadd_with_overflow:
-      case Intrinsic::umul_with_overflow:
-      case Intrinsic::usub_with_overflow:
-        return true;
-      // These intrinsics are defined to have the same behavior as libm
-      // functions except for setting errno.
-      case Intrinsic::sqrt:
-      case Intrinsic::fma:
-      case Intrinsic::fmuladd:
-        return true;
-      // These intrinsics are defined to have the same behavior as libm
-      // functions, and the corresponding libm functions never set errno.
-      case Intrinsic::trunc:
-      case Intrinsic::copysign:
-      case Intrinsic::fabs:
-      case Intrinsic::minnum:
-      case Intrinsic::maxnum:
-        return true;
-      // These intrinsics are defined to have the same behavior as libm
-      // functions, which never overflow when operating on the IEEE754 types
-      // that we support, and never set errno otherwise.
-      case Intrinsic::ceil:
-      case Intrinsic::floor:
-      case Intrinsic::nearbyint:
-      case Intrinsic::rint:
-      case Intrinsic::round:
-        return true;
-      // These intrinsics do not correspond to any libm function, and
-      // do not set errno.
-      case Intrinsic::powi:
-        return true;
-      // TODO: are convert_{from,to}_fp16 safe?
-      // TODO: can we list target-specific intrinsics here?
-      default: break;
-      }
-    }
-    return false; // The called function could have undefined behavior or
-                  // side-effects, even if marked readnone nounwind.
+    // The called function could have undefined behavior or side-effects, even
+    // if marked readnone nounwind.
+    return Callee && Callee->isSpeculatable();
   }
   case Instruction::VAArg:
   case Instruction::Alloca:
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 8aa7d0d..485d9b6 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -340,146 +340,28 @@ public:
     // in writing out the call graph edges. Save the mapping from GUID
     // to the new global value id to use when writing those edges, which
     // are currently saved in the index in terms of GUID.
-    for (const auto &I : *this)
+    forEachSummary([&](GVInfo I) {
       GUIDToValueIdMap[I.first] = ++GlobalValueId;
+    });
   }
 
   /// The below iterator returns the GUID and associated summary.
   typedef std::pair<GlobalValue::GUID, GlobalValueSummary *> GVInfo;
 
-  /// Iterator over the value GUID and summaries to be written to bitcode,
-  /// hides the details of whether they are being pulled from the entire
-  /// index or just those in a provided ModuleToSummariesForIndex map.
-  class iterator
-      : public llvm::iterator_facade_base<iterator, std::forward_iterator_tag,
-                                          GVInfo> {
-    /// Enables access to parent class.
-    const IndexBitcodeWriter &Writer;
-
-    // Iterators used when writing only those summaries in a provided
-    // ModuleToSummariesForIndex map:
-
-    /// Points to the last element in outer ModuleToSummariesForIndex map.
-    std::map<std::string, GVSummaryMapTy>::const_iterator ModuleSummariesBack;
-    /// Iterator on outer ModuleToSummariesForIndex map.
-    std::map<std::string, GVSummaryMapTy>::const_iterator ModuleSummariesIter;
-    /// Iterator on an inner global variable summary map.
-    GVSummaryMapTy::const_iterator ModuleGVSummariesIter;
-
-    // Iterators used when writing all summaries in the index:
-
-    /// Points to the last element in the Index outer GlobalValueMap.
-    const_gvsummary_iterator IndexSummariesBack;
-    /// Iterator on outer GlobalValueMap.
-    const_gvsummary_iterator IndexSummariesIter;
-    /// Iterator on an inner GlobalValueSummaryList.
-    GlobalValueSummaryList::const_iterator IndexGVSummariesIter;
-
-  public:
-    /// Construct iterator from parent \p Writer and indicate if we are
-    /// constructing the end iterator.
-    iterator(const IndexBitcodeWriter &Writer, bool IsAtEnd) : Writer(Writer) {
-      // Set up the appropriate set of iterators given whether we are writing
-      // the full index or just a subset.
-      // Can't setup the Back or inner iterators if the corresponding map
-      // is empty. This will be handled specially in operator== as well.
-      if (Writer.ModuleToSummariesForIndex &&
-          !Writer.ModuleToSummariesForIndex->empty()) {
-        for (ModuleSummariesBack = Writer.ModuleToSummariesForIndex->begin();
-             std::next(ModuleSummariesBack) !=
-             Writer.ModuleToSummariesForIndex->end();
-             ModuleSummariesBack++)
-          ;
-        ModuleSummariesIter = !IsAtEnd
-                                  ? Writer.ModuleToSummariesForIndex->begin()
-                                  : ModuleSummariesBack;
-        ModuleGVSummariesIter = !IsAtEnd ? ModuleSummariesIter->second.begin()
-                                         : ModuleSummariesBack->second.end();
-      } else if (!Writer.ModuleToSummariesForIndex &&
-                 Writer.Index.begin() != Writer.Index.end()) {
-        for (IndexSummariesBack = Writer.Index.begin();
-             std::next(IndexSummariesBack) != Writer.Index.end();
-             IndexSummariesBack++)
-          ;
-        IndexSummariesIter =
-            !IsAtEnd ? Writer.Index.begin() : IndexSummariesBack;
-        IndexGVSummariesIter = !IsAtEnd ? IndexSummariesIter->second.begin()
-                                        : IndexSummariesBack->second.end();
-      }
-    }
-
-    /// Increment the appropriate set of iterators.
-    iterator &operator++() {
-      // First the inner iterator is incremented, then if it is at the end
-      // and there are more outer iterations to go, the inner is reset to
-      // the start of the next inner list.
-      if (Writer.ModuleToSummariesForIndex) {
-        ++ModuleGVSummariesIter;
-        if (ModuleGVSummariesIter == ModuleSummariesIter->second.end() &&
-            ModuleSummariesIter != ModuleSummariesBack) {
-          ++ModuleSummariesIter;
-          ModuleGVSummariesIter = ModuleSummariesIter->second.begin();
-        }
-      } else {
-        ++IndexGVSummariesIter;
-        if (IndexGVSummariesIter == IndexSummariesIter->second.end() &&
-            IndexSummariesIter != IndexSummariesBack) {
-          ++IndexSummariesIter;
-          IndexGVSummariesIter = IndexSummariesIter->second.begin();
-        }
-      }
-      return *this;
-    }
-
-    /// Access the <GUID,GlobalValueSummary*> pair corresponding to the current
-    /// outer and inner iterator positions.
-    GVInfo operator*() {
-      if (Writer.ModuleToSummariesForIndex)
-        return std::make_pair(ModuleGVSummariesIter->first,
-                              ModuleGVSummariesIter->second);
-      return std::make_pair(IndexSummariesIter->first,
-                            IndexGVSummariesIter->get());
-    }
-
-    /// Checks if the iterators are equal, with special handling for empty
-    /// indexes.
-    bool operator==(const iterator &RHS) const {
-      if (Writer.ModuleToSummariesForIndex) {
-        // First ensure that both are writing the same subset.
-        if (Writer.ModuleToSummariesForIndex !=
-            RHS.Writer.ModuleToSummariesForIndex)
-          return false;
-        // Already determined above that maps are the same, so if one is
-        // empty, they both are.
-        if (Writer.ModuleToSummariesForIndex->empty())
-          return true;
-        // Ensure the ModuleGVSummariesIter are iterating over the same
-        // container before checking them below.
-        if (ModuleSummariesIter != RHS.ModuleSummariesIter)
-          return false;
-        return ModuleGVSummariesIter == RHS.ModuleGVSummariesIter;
-      }
-      // First ensure RHS also writing the full index, and that both are
-      // writing the same full index.
-      if (RHS.Writer.ModuleToSummariesForIndex ||
-          &Writer.Index != &RHS.Writer.Index)
-        return false;
-      // Already determined above that maps are the same, so if one is
-      // empty, they both are.
-      if (Writer.Index.begin() == Writer.Index.end())
-        return true;
-      // Ensure the IndexGVSummariesIter are iterating over the same
-      // container before checking them below.
-      if (IndexSummariesIter != RHS.IndexSummariesIter)
-        return false;
-      return IndexGVSummariesIter == RHS.IndexGVSummariesIter;
+  /// Calls the callback for each value GUID and summary to be written to
+  /// bitcode. This hides the details of whether they are being pulled from the
+  /// entire index or just those in a provided ModuleToSummariesForIndex map.
+  void forEachSummary(std::function<void(GVInfo)> Callback) {
+    if (ModuleToSummariesForIndex) {
+      for (auto &M : *ModuleToSummariesForIndex)
+        for (auto &Summary : M.second)
+          Callback(Summary);
+    } else {
+      for (auto &Summaries : Index)
+        for (auto &Summary : Summaries.second)
+          Callback({Summaries.first, Summary.get()});
     }
-  };
-
-  /// Obtain the start iterator over the summaries to be written.
-  iterator begin() { return iterator(*this, /*IsAtEnd=*/false); }
-  /// Obtain the end iterator over the summaries to be written.
-  iterator end() { return iterator(*this, /*IsAtEnd=*/true); }
+  }
 
   /// Main entry point for writing a combined index to bitcode.
   void write();
@@ -3528,16 +3410,16 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
   Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION});
 
   // Create value IDs for undefined references.
-  for (const auto &I : *this) {
+  forEachSummary([&](GVInfo I) {
     if (auto *VS = dyn_cast<GlobalVarSummary>(I.second)) {
       for (auto &RI : VS->refs())
         assignValueId(RI.getGUID());
-      continue;
+      return;
     }
 
     auto *FS = dyn_cast<FunctionSummary>(I.second);
     if (!FS)
-      continue;
+      return;
     for (auto &RI : FS->refs())
       assignValueId(RI.getGUID());
 
@@ -3553,7 +3435,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
       }
       assignValueId(GUID);
     }
-  }
+  });
 
   for (const auto &GVI : valueIds()) {
     Stream.EmitRecord(bitc::FS_VALUE_GUID,
@@ -3624,7 +3506,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
     NameVals.clear();
   };
 
-  for (const auto &I : *this) {
+  forEachSummary([&](GVInfo I) {
     GlobalValueSummary *S = I.second;
     assert(S);
 
@@ -3636,7 +3518,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
       // Will process aliases as a post-pass because the reader wants all
       // global to be loaded first.
       Aliases.push_back(AS);
-      continue;
+      return;
     }
 
     if (auto *VS = dyn_cast<GlobalVarSummary>(S)) {
@@ -3652,7 +3534,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
                         FSModRefsAbbrev);
       NameVals.clear();
       MaybeEmitOriginalName(*S);
-      continue;
+      return;
     }
 
     auto *FS = cast<FunctionSummary>(S);
@@ -3700,7 +3582,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
     Stream.EmitRecord(Code, NameVals, FSAbbrev);
     NameVals.clear();
     MaybeEmitOriginalName(*S);
-  }
+  });
 
   for (auto *AS : Aliases) {
     auto AliasValueId = SummaryToValueIdMap[AS];
diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp
index ebfe6cb..be0c5c2 100644
--- a/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -37,7 +37,7 @@ bool CallLowering::lowerCall(
   for (auto &Arg : CS.args()) {
     ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{},
                     i < NumFixedArgs};
-    setArgFlags(OrigArg, i + 1, DL, CS);
+    setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS);
     OrigArgs.push_back(OrigArg);
     ++i;
   }
@@ -83,8 +83,8 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
     // For ByVal, alignment should be passed from FE.  BE will guess if
     // this info is not there but there are cases it cannot get right.
     unsigned FrameAlign;
-    if (FuncInfo.getParamAlignment(OpIdx - 1))
-      FrameAlign = FuncInfo.getParamAlignment(OpIdx - 1);
+    if (FuncInfo.getParamAlignment(OpIdx - 2))
+      FrameAlign = FuncInfo.getParamAlignment(OpIdx - 2);
     else
       FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL);
     Arg.Flags.setByValAlign(FrameAlign);
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 570a0cd..549f07e 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -761,6 +761,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   } else if (MaxCSFrameIndex >= MinCSFrameIndex) {
     // Be careful about underflow in comparisons agains MinCSFrameIndex.
     for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) {
+      if (MFI.isDeadObjectIndex(i))
+        continue;
+
       unsigned Align = MFI.getObjectAlignment(i);
       // Adjust to alignment boundary
       Offset = alignTo(Offset, Align, Skew);
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index dc0276d..03698ac 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -242,6 +242,7 @@ namespace {
     SDValue visitUSUBO(SDNode *N);
     SDValue visitADDE(SDNode *N);
     SDValue visitADDCARRY(SDNode *N);
+    SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
     SDValue visitSUBE(SDNode *N);
     SDValue visitSUBCARRY(SDNode *N);
     SDValue visitMUL(SDNode *N);
@@ -2142,6 +2143,24 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
   if (isNullConstant(CarryIn))
     return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), N0, N1);
 
+  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
+    return Combined;
+
+  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
+    return Combined;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
+                                       SDNode *N) {
+  // Iff the flag result is dead:
+  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
+  if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::UADDO) &&
+      isNullConstant(N1) && !N->hasAnyUseOfValue(1))
+    return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
+                       N0.getOperand(0), N0.getOperand(1), CarryIn);
+
   return SDValue();
 }
 
@@ -5294,6 +5313,17 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
     }
   }
 
+  // If the target supports masking y in (shl, y),
+  // fold (shl x, (and y, ((1 << numbits(x)) - 1))) -> (shl x, y)
+  if (TLI.isOperationLegal(ISD::SHL, VT) &&
+      TLI.supportsModuloShift(ISD::SHL, VT) && N1->getOpcode() == ISD::AND) {
+    if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
+      if (Mask->getZExtValue() == OpSizeInBits - 1) {
+        return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1->getOperand(0));
+      }
+    }
+  }
+
   ConstantSDNode *N1C = isConstOrConstSplat(N1);
 
   // fold (shl c1, c2) -> c1<<c2
@@ -5492,6 +5522,17 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
+  // If the target supports masking y in (sra, y),
+  // fold (sra x, (and y, ((1 << numbits(x)) - 1))) -> (sra x, y)
+  if (TLI.isOperationLegal(ISD::SRA, VT) &&
+      TLI.supportsModuloShift(ISD::SRA, VT) && N1->getOpcode() == ISD::AND) {
+    if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
+      if (Mask->getZExtValue() == OpSizeInBits - 1) {
+        return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, N1->getOperand(0));
+      }
+    }
+  }
+
   // Arithmetic shifting an all-sign-bit value is a no-op.
   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
     return N0;
@@ -5650,6 +5691,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
+  // If the target supports masking y in (srl, y),
+  // fold (srl x, (and y, ((1 << numbits(x)) - 1))) -> (srl x, y)
+  if (TLI.isOperationLegal(ISD::SRL, VT) &&
+      TLI.supportsModuloShift(ISD::SRL, VT) && N1->getOpcode() == ISD::AND) {
+    if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
+      if (Mask->getZExtValue() == OpSizeInBits - 1) {
+        return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1->getOperand(0));
+      }
+    }
+  }
+
   // fold vector ops
   if (VT.isVector())
     if (SDValue FoldedVOp = SimplifyVBinOp(N))
diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt
index 421f22c..410d5a3 100644
--- a/lib/DebugInfo/CodeView/CMakeLists.txt
+++ b/lib/DebugInfo/CodeView/CMakeLists.txt
@@ -15,6 +15,7 @@ add_llvm_library(LLVMDebugInfoCodeView
   ModuleDebugLineFragment.cpp
   ModuleDebugUnknownFragment.cpp
   RecordSerialization.cpp
+  StringTable.cpp
   SymbolRecordMapping.cpp
   SymbolDumper.cpp
   SymbolSerializer.cpp
diff --git a/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp b/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp
index c349e7e..42f0afc 100644
--- a/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp
+++ b/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp
@@ -10,6 +10,7 @@
 #include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h"
 
 #include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/DebugInfo/CodeView/StringTable.h"
 #include "llvm/Support/BinaryStreamReader.h"
 
 using namespace llvm;
@@ -25,7 +26,7 @@ struct FileChecksumEntryHeader {
 };
 
 Error llvm::VarStreamArrayExtractor<FileChecksumEntry>::extract(
-    BinaryStreamRef Stream, uint32_t &Len, FileChecksumEntry &Item, void *Ctx) {
+    BinaryStreamRef Stream, uint32_t &Len, FileChecksumEntry &Item) {
   BinaryStreamReader Reader(Stream);
 
   const FileChecksumEntryHeader *Header;
@@ -49,10 +50,12 @@ Error ModuleDebugFileChecksumFragmentRef::initialize(
   return Error::success();
 }
 
-ModuleDebugFileChecksumFragment::ModuleDebugFileChecksumFragment()
-    : ModuleDebugFragment(ModuleDebugFragmentKind::FileChecksums) {}
+ModuleDebugFileChecksumFragment::ModuleDebugFileChecksumFragment(
+    StringTable &Strings)
+    : ModuleDebugFragment(ModuleDebugFragmentKind::FileChecksums),
+      Strings(Strings) {}
 
-void ModuleDebugFileChecksumFragment::addChecksum(uint32_t StringTableOffset,
+void ModuleDebugFileChecksumFragment::addChecksum(StringRef FileName,
                                                   FileChecksumKind Kind,
                                                   ArrayRef<uint8_t> Bytes) {
   FileChecksumEntry Entry;
@@ -61,13 +64,14 @@ void ModuleDebugFileChecksumFragment::addChecksum(uint32_t StringTableOffset,
     ::memcpy(Copy, Bytes.data(), Bytes.size());
     Entry.Checksum = makeArrayRef(Copy, Bytes.size());
   }
-  Entry.FileNameOffset = StringTableOffset;
+
+  Entry.FileNameOffset = Strings.insert(FileName);
   Entry.Kind = Kind;
   Checksums.push_back(Entry);
 
   // This maps the offset of this string in the string table to the offset
   // of this checksum entry in the checksum buffer.
-  OffsetMap[StringTableOffset] = SerializedSize;
+  OffsetMap[Entry.FileNameOffset] = SerializedSize;
   assert(SerializedSize % 4 == 0);
 
   uint32_t Len = alignTo(sizeof(FileChecksumEntryHeader) + Bytes.size(), 4);
@@ -94,9 +98,10 @@ Error ModuleDebugFileChecksumFragment::commit(BinaryStreamWriter &Writer) {
   return Error::success();
 }
 
-uint32_t ModuleDebugFileChecksumFragment::mapChecksumOffset(
-    uint32_t StringTableOffset) const {
-  auto Iter = OffsetMap.find(StringTableOffset);
+uint32_t
+ModuleDebugFileChecksumFragment::mapChecksumOffset(StringRef FileName) const {
+  uint32_t Offset = Strings.getStringId(FileName);
+  auto Iter = OffsetMap.find(Offset);
   assert(Iter != OffsetMap.end());
   return Iter->second;
 }
diff --git a/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp b/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp
index 483f7cb..cb6a847 100644
--- a/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp
+++ b/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp
@@ -10,20 +10,22 @@
 #include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h"
 
 #include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h"
 #include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h"
+#include "llvm/DebugInfo/CodeView/StringTable.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
 
 Error VarStreamArrayExtractor<InlineeSourceLine>::extract(
     BinaryStreamRef Stream, uint32_t &Len, InlineeSourceLine &Item,
-    ContextType *Fragment) {
+    bool HasExtraFiles) {
   BinaryStreamReader Reader(Stream);
 
   if (auto EC = Reader.readObject(Item.Header))
     return EC;
 
-  if (Fragment->hasExtraFiles()) {
+  if (HasExtraFiles) {
     uint32_t ExtraFileCount;
     if (auto EC = Reader.readInteger(ExtraFileCount))
       return EC;
@@ -42,7 +44,8 @@ Error ModuleDebugInlineeLineFragmentRef::initialize(BinaryStreamReader Reader) {
   if (auto EC = Reader.readEnum(Signature))
     return EC;
 
-  if (auto EC = Reader.readArray(Lines, Reader.bytesRemaining(), this))
+  if (auto EC =
+          Reader.readArray(Lines, Reader.bytesRemaining(), hasExtraFiles()))
     return EC;
 
   assert(Reader.bytesRemaining() == 0);
@@ -54,9 +57,9 @@ bool ModuleDebugInlineeLineFragmentRef::hasExtraFiles() const {
 }
 
 ModuleDebugInlineeLineFragment::ModuleDebugInlineeLineFragment(
-    bool HasExtraFiles)
+    ModuleDebugFileChecksumFragment &Checksums, bool HasExtraFiles)
     : ModuleDebugFragment(ModuleDebugFragmentKind::InlineeLines),
-      HasExtraFiles(HasExtraFiles) {}
+      Checksums(Checksums), HasExtraFiles(HasExtraFiles) {}
 
 uint32_t ModuleDebugInlineeLineFragment::calculateSerializedLength() {
   // 4 bytes for the signature
@@ -99,18 +102,22 @@ Error ModuleDebugInlineeLineFragment::commit(BinaryStreamWriter &Writer) {
   return Error::success();
 }
 
-void ModuleDebugInlineeLineFragment::addExtraFile(uint32_t FileOffset) {
+void ModuleDebugInlineeLineFragment::addExtraFile(StringRef FileName) {
+  uint32_t Offset = Checksums.mapChecksumOffset(FileName);
+
   auto &Entry = Entries.back();
-  Entry.ExtraFiles.push_back(ulittle32_t(FileOffset));
+  Entry.ExtraFiles.push_back(ulittle32_t(Offset));
   ++ExtraFileCount;
 }
 
 void ModuleDebugInlineeLineFragment::addInlineSite(TypeIndex FuncId,
-                                                   uint32_t FileOffset,
+                                                   StringRef FileName,
                                                    uint32_t SourceLine) {
+  uint32_t Offset = Checksums.mapChecksumOffset(FileName);
+
   Entries.emplace_back();
   auto &Entry = Entries.back();
-  Entry.Header.FileID = FileOffset;
+  Entry.Header.FileID = Offset;
   Entry.Header.SourceLineNum = SourceLine;
   Entry.Header.Inlinee = FuncId;
 }
diff --git a/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp b/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp
index 103010c..e0ee934 100644
--- a/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp
+++ b/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp
@@ -10,7 +10,9 @@
 #include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h"
 
 #include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h"
 #include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h"
+#include "llvm/DebugInfo/CodeView/StringTable.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
@@ -65,11 +67,15 @@ bool ModuleDebugLineFragmentRef::hasColumnInfo() const {
   return !!(Header->Flags & LF_HaveColumns);
 }
 
-ModuleDebugLineFragment::ModuleDebugLineFragment()
-    : ModuleDebugFragment(ModuleDebugFragmentKind::Lines) {}
+ModuleDebugLineFragment::ModuleDebugLineFragment(
+    ModuleDebugFileChecksumFragment &Checksums, StringTable &Strings)
+    : ModuleDebugFragment(ModuleDebugFragmentKind::Lines),
+      Checksums(Checksums) {}
 
-void ModuleDebugLineFragment::createBlock(uint32_t ChecksumBufferOffset) {
-  Blocks.emplace_back(ChecksumBufferOffset);
+void ModuleDebugLineFragment::createBlock(StringRef FileName) {
+  uint32_t Offset = Checksums.mapChecksumOffset(FileName);
+
+  Blocks.emplace_back(Offset);
 }
 
 void ModuleDebugLineFragment::addLineInfo(uint32_t Offset,
diff --git a/lib/DebugInfo/CodeView/StringTable.cpp b/lib/DebugInfo/CodeView/StringTable.cpp
new file mode 100644
index 0000000..21f1120
--- /dev/null
+++ b/lib/DebugInfo/CodeView/StringTable.cpp
@@ -0,0 +1,71 @@
+//===- StringTable.cpp - CodeView String Table Reader/Writer ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/StringTable.h"
+
+#include "llvm/Support/BinaryStream.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+StringTableRef::StringTableRef() {}
+
+Error StringTableRef::initialize(BinaryStreamRef Contents) {
+  Stream = Contents;
+  return Error::success();
+}
+
+Expected<StringRef> StringTableRef::getString(uint32_t Offset) const {
+  BinaryStreamReader Reader(Stream);
+  Reader.setOffset(Offset);
+  StringRef Result;
+  if (auto EC = Reader.readCString(Result))
+    return std::move(EC);
+  return Result;
+}
+
+uint32_t StringTable::insert(StringRef S) {
+  auto P = Strings.insert({S, StringSize});
+
+  // If a given string didn't exist in the string table, we want to increment
+  // the string table size.
+  if (P.second)
+    StringSize += S.size() + 1; // +1 for '\0'
+  return P.first->second;
+}
+
+uint32_t StringTable::calculateSerializedSize() const { return StringSize; }
+
+Error StringTable::commit(BinaryStreamWriter &Writer) const {
+  assert(Writer.bytesRemaining() == StringSize);
+  uint32_t MaxOffset = 1;
+
+  for (auto &Pair : Strings) {
+    StringRef S = Pair.getKey();
+    uint32_t Offset = Pair.getValue();
+    Writer.setOffset(Offset);
+    if (auto EC = Writer.writeCString(S))
+      return EC;
+    MaxOffset = std::max<uint32_t>(MaxOffset, Offset + S.size() + 1);
+  }
+
+  Writer.setOffset(MaxOffset);
+  assert(Writer.bytesRemaining() == 0);
+  return Error::success();
+}
+
+uint32_t StringTable::size() const { return Strings.size(); }
+
+uint32_t StringTable::getStringId(StringRef S) const {
+  auto P = Strings.find(S);
+  assert(P != Strings.end());
+  return P->second;
+}
diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp
index 134471e..5395e43 100644
--- a/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -13,6 +13,7 @@
 #include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h"
 #include "llvm/DebugInfo/CodeView/CVTypeDumper.h"
 #include "llvm/DebugInfo/CodeView/EnumTables.h"
+#include "llvm/DebugInfo/CodeView/StringTable.h"
 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
 #include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
@@ -369,14 +370,14 @@ Error CVSymbolDumperImpl::visitKnownRecord(
   DictScope S(W, "DefRangeSubfield");
 
   if (ObjDelegate) {
-    StringRef StringTable = ObjDelegate->getStringTable();
-    auto ProgramStringTableOffset = DefRangeSubfield.Program;
-    if (ProgramStringTableOffset >= StringTable.size())
+    StringTableRef Strings = ObjDelegate->getStringTable();
+    auto ExpectedProgram = Strings.getString(DefRangeSubfield.Program);
+    if (!ExpectedProgram) {
+      consumeError(ExpectedProgram.takeError());
       return llvm::make_error<CodeViewError>(
           "String table offset outside of bounds of String Table!");
-    StringRef Program =
-        StringTable.drop_front(ProgramStringTableOffset).split('\0').first;
-    W.printString("Program", Program);
+    }
+    W.printString("Program", *ExpectedProgram);
   }
   W.printNumber("OffsetInParent", DefRangeSubfield.OffsetInParent);
   printLocalVariableAddrRange(DefRangeSubfield.Range,
@@ -390,14 +391,14 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
   DictScope S(W, "DefRange");
 
   if (ObjDelegate) {
-    StringRef StringTable = ObjDelegate->getStringTable();
-    auto ProgramStringTableOffset = DefRange.Program;
-    if (ProgramStringTableOffset >= StringTable.size())
+    StringTableRef Strings = ObjDelegate->getStringTable();
+    auto ExpectedProgram = Strings.getString(DefRange.Program);
+    if (!ExpectedProgram) {
+      consumeError(ExpectedProgram.takeError());
       return llvm::make_error<CodeViewError>(
           "String table offset outside of bounds of String Table!");
-    StringRef Program =
-        StringTable.drop_front(ProgramStringTableOffset).split('\0').first;
-    W.printString("Program", Program);
+    }
+    W.printString("Program", *ExpectedProgram);
   }
   printLocalVariableAddrRange(DefRange.Range, DefRange.getRelocationOffset());
   printLocalVariableAddrGap(DefRange.Gaps);
diff --git a/lib/DebugInfo/DWARF/CMakeLists.txt b/lib/DebugInfo/DWARF/CMakeLists.txt
index 495edb7..6ca6e64 100644
--- a/lib/DebugInfo/DWARF/CMakeLists.txt
+++ b/lib/DebugInfo/DWARF/CMakeLists.txt
@@ -19,6 +19,7 @@ add_llvm_library(LLVMDebugInfoDWARF
   DWARFTypeUnit.cpp
   DWARFUnitIndex.cpp
   DWARFUnit.cpp
+  DWARFVerifier.cpp
   SyntaxHighlighting.cpp
 
   ADDITIONAL_HEADER_DIRS
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index b4ecbf8..573d37d 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -7,17 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
-#include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
@@ -29,6 +29,7 @@
 #include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h"
 #include "llvm/DebugInfo/DWARF/DWARFSection.h"
 #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
+#include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
 #include "llvm/Object/Decompressor.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/ObjectFile.h"
@@ -42,6 +43,8 @@
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cstdint>
+#include <map>
+#include <set>
 #include <string>
 #include <utility>
 #include <vector>
@@ -284,11 +287,30 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH,
                      getStringSection(), isLittleEndian());
 }
 
-bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) {
-  bool Success = true;
-  if (DumpType == DIDT_All || DumpType == DIDT_Info) {
+DWARFDie DWARFContext::getDIEForOffset(uint32_t Offset) {
+  parseCompileUnits();
+  if (auto *CU = CUs.getUnitForOffset(Offset))
+    return CU->getDIEForOffset(Offset);
+  return DWARFDie();
+}
+
+namespace {
+  
+class Verifier {
+  raw_ostream &OS;
+  DWARFContext &DCtx;
+public:
+  Verifier(raw_ostream &S, DWARFContext &D) : OS(S), DCtx(D) {}
+  
+  bool HandleDebugInfo() {
+    bool Success = true;
+    // A map that tracks all references (converted absolute references) so we
+    // can verify each reference points to a valid DIE and not an offset that
+    // lies between to valid DIEs.
+    std::map<uint64_t, std::set<uint32_t>> ReferenceToDIEOffsets;
+
     OS << "Verifying .debug_info...\n";
-    for (const auto &CU : compile_units()) {
+    for (const auto &CU : DCtx.compile_units()) {
       unsigned NumDies = CU->getNumDIEs();
       for (unsigned I = 0; I < NumDies; ++I) {
         auto Die = CU->getDIEAtIndex(I);
@@ -299,101 +321,231 @@ bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) {
           const auto Attr = AttrValue.Attr;
           const auto Form = AttrValue.Value.getForm();
           switch (Attr) {
-          case DW_AT_ranges:
-            // Make sure the offset in the DW_AT_ranges attribute is valid.
-            if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
-              if (*SectionOffset >= getRangeSection().Data.size()) {
+            case DW_AT_ranges:
+              // Make sure the offset in the DW_AT_ranges attribute is valid.
+              if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
+                if (*SectionOffset >= DCtx.getRangeSection().Data.size()) {
+                  Success = false;
+                  OS << "error: DW_AT_ranges offset is beyond .debug_ranges "
+                  "bounds:\n";
+                  Die.dump(OS, 0);
+                  OS << "\n";
+                }
+              } else {
                 Success = false;
-                OS << "error: DW_AT_ranges offset is beyond .debug_ranges "
-                      "bounds:\n";
+                OS << "error: DIE has invalid DW_AT_ranges encoding:\n";
                 Die.dump(OS, 0);
                 OS << "\n";
               }
-            } else {
-              Success = false;
-              OS << "error: DIE has invalid DW_AT_ranges encoding:\n";
-              Die.dump(OS, 0);
-              OS << "\n";
-            }
-            break;
-          case DW_AT_stmt_list:
-            // Make sure the offset in the DW_AT_stmt_list attribute is valid.
-            if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
-              if (*SectionOffset >= getLineSection().Data.size()) {
+              break;
+            case DW_AT_stmt_list:
+              // Make sure the offset in the DW_AT_stmt_list attribute is valid.
+              if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
+                if (*SectionOffset >= DCtx.getLineSection().Data.size()) {
+                  Success = false;
+                  OS << "error: DW_AT_stmt_list offset is beyond .debug_line "
+                  "bounds: "
+                  << format("0x%08" PRIx32, *SectionOffset) << "\n";
+                  CU->getUnitDIE().dump(OS, 0);
+                  OS << "\n";
+                }
+              } else {
                 Success = false;
-                OS << "error: DW_AT_stmt_list offset is beyond .debug_line "
-                      "bounds: "
-                   << format("0x%08" PRIx32, *SectionOffset) << "\n";
-                CU->getUnitDIE().dump(OS, 0);
+                OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n";
+                Die.dump(OS, 0);
                 OS << "\n";
               }
-            } else {
-              Success = false;
-              OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n";
-              Die.dump(OS, 0);
-              OS << "\n";
-            }
-            break;
-
-          default:
-            break;
+              break;
+              
+            default:
+              break;
           }
           switch (Form) {
-          case DW_FORM_ref1:
-          case DW_FORM_ref2:
-          case DW_FORM_ref4:
-          case DW_FORM_ref8:
-          case DW_FORM_ref_udata: {
-            // Verify all CU relative references are valid CU offsets.
-            Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
-            assert(RefVal);
-            if (RefVal) {
-              auto DieCU = Die.getDwarfUnit();
-              auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset();
-              auto CUOffset = AttrValue.Value.getRawUValue();
-              if (CUOffset >= CUSize) {
+            case DW_FORM_ref1:
+            case DW_FORM_ref2:
+            case DW_FORM_ref4:
+            case DW_FORM_ref8:
+            case DW_FORM_ref_udata: {
+              // Verify all CU relative references are valid CU offsets.
+              Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
+              assert(RefVal);
+              if (RefVal) {
+                auto DieCU = Die.getDwarfUnit();
+                auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset();
+                auto CUOffset = AttrValue.Value.getRawUValue();
+                if (CUOffset >= CUSize) {
+                  Success = false;
+                  OS << "error: " << FormEncodingString(Form) << " CU offset "
+                  << format("0x%08" PRIx32, CUOffset)
+                  << " is invalid (must be less than CU size of "
+                  << format("0x%08" PRIx32, CUSize) << "):\n";
+                  Die.dump(OS, 0);
+                  OS << "\n";
+                } else {
+                  // Valid reference, but we will verify it points to an actual
+                  // DIE later.
+                  ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset());
+                }
+              }
+              break;
+            }
+            case DW_FORM_ref_addr: {
+              // Verify all absolute DIE references have valid offsets in the
+              // .debug_info section.
+              Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
+              assert(RefVal);
+              if (RefVal) {
+                if(*RefVal >= DCtx.getInfoSection().Data.size()) {
+                  Success = false;
+                  OS << "error: DW_FORM_ref_addr offset beyond .debug_info "
+                        "bounds:\n";
+                  Die.dump(OS, 0);
+                  OS << "\n";
+                } else {
+                  // Valid reference, but we will verify it points to an actual
+                  // DIE later.
+                  ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset());
+                }
+              }
+              break;
+            }
+            case DW_FORM_strp: {
+              auto SecOffset = AttrValue.Value.getAsSectionOffset();
+              assert(SecOffset); // DW_FORM_strp is a section offset.
+              if (SecOffset && *SecOffset >= DCtx.getStringSection().size()) {
                 Success = false;
-                OS << "error: " << FormEncodingString(Form) << " CU offset "
-                   << format("0x%08" PRIx32, CUOffset)
-                   << " is invalid (must be less than CU size of "
-                   << format("0x%08" PRIx32, CUSize) << "):\n";
+                OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n";
                 Die.dump(OS, 0);
                 OS << "\n";
               }
+              break;
             }
-            break;
+            default:
+              break;
           }
-          case DW_FORM_ref_addr: {
-            // Verify all absolute DIE references have valid offsets in the
-            // .debug_info section.
-            Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
-            assert(RefVal);
-            if (RefVal && *RefVal >= getInfoSection().Data.size()) {
-              Success = false;
-              OS << "error: DW_FORM_ref_addr offset beyond .debug_info "
-                    "bounds:\n";
-              Die.dump(OS, 0);
-              OS << "\n";
-            }
-            break;
-          }
-          case DW_FORM_strp: {
-            auto SecOffset = AttrValue.Value.getAsSectionOffset();
-            assert(SecOffset); // DW_FORM_strp is a section offset.
-            if (SecOffset && *SecOffset >= getStringSection().size()) {
-              Success = false;
-              OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n";
-              Die.dump(OS, 0);
-              OS << "\n";
-            }
-            break;
-          }
-          default:
-            break;
+        }
+      }
+    }
+
+    // Take all references and make sure they point to an actual DIE by
+    // getting the DIE by offset and emitting an error
+    OS << "Verifying .debug_info references...\n";
+    for (auto Pair: ReferenceToDIEOffsets) {
+      auto Die = DCtx.getDIEForOffset(Pair.first);
+      if (Die)
+        continue;
+      Success = false;
+      OS << "error: invalid DIE reference " << format("0x%08" PRIx64, Pair.first)
+         << ". Offset is in between DIEs:\n";
+      for (auto Offset: Pair.second) {
+        auto ReferencingDie = DCtx.getDIEForOffset(Offset);
+        ReferencingDie.dump(OS, 0);
+        OS << "\n";
+      }
+      OS << "\n";
+    }
+    return Success;
+  }
+
+  bool HandleDebugLine() {
+    std::map<uint64_t, DWARFDie> StmtListToDie;
+    bool Success = true;
+    OS << "Verifying .debug_line...\n";
+    for (const auto &CU : DCtx.compile_units()) {
+      uint32_t LineTableOffset = 0;
+      auto CUDie = CU->getUnitDIE();
+      auto StmtFormValue = CUDie.find(DW_AT_stmt_list);
+      if (!StmtFormValue) {
+        // No line table for this compile unit.
+        continue;
+      }
+      // Get the attribute value as a section offset. No need to produce an
+      // error here if the encoding isn't correct because we validate this in
+      // the .debug_info verifier.
+      if (auto StmtSectionOffset = toSectionOffset(StmtFormValue)) {
+        LineTableOffset = *StmtSectionOffset;
+        if (LineTableOffset >= DCtx.getLineSection().Data.size()) {
+          // Make sure we don't get a valid line table back if the offset
+          // is wrong.
+          assert(DCtx.getLineTableForUnit(CU.get()) == nullptr);
+          // Skip this line table as it isn't valid. No need to create an error
+          // here because we validate this in the .debug_info verifier.
+          continue;
+        } else {
+          auto Iter = StmtListToDie.find(LineTableOffset);
+          if (Iter != StmtListToDie.end()) {
+            Success = false;
+            OS << "error: two compile unit DIEs, "
+               << format("0x%08" PRIx32, Iter->second.getOffset()) << " and "
+               << format("0x%08" PRIx32, CUDie.getOffset())
+               << ", have the same DW_AT_stmt_list section offset:\n";
+            Iter->second.dump(OS, 0);
+            CUDie.dump(OS, 0);
+            OS << '\n';
+            // Already verified this line table before, no need to do it again.
+            continue;
           }
+          StmtListToDie[LineTableOffset] = CUDie;
         }
       }
+      auto LineTable = DCtx.getLineTableForUnit(CU.get());
+      if (!LineTable) {
+        Success = false;
+        OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset)
+           << "] was not able to be parsed for CU:\n";
+        CUDie.dump(OS, 0);
+        OS << '\n';
+        continue;
+      }
+      uint32_t MaxFileIndex = LineTable->Prologue.FileNames.size();
+      uint64_t PrevAddress = 0;
+      uint32_t RowIndex = 0;
+      for (const auto &Row : LineTable->Rows) {
+        if (Row.Address < PrevAddress) {
+          Success = false;
+          OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset)
+             << "] row[" << RowIndex
+             << "] decreases in address from previous row:\n";
+
+          DWARFDebugLine::Row::dumpTableHeader(OS);
+          if (RowIndex > 0)
+            LineTable->Rows[RowIndex - 1].dump(OS);
+          Row.dump(OS);
+          OS << '\n';
+        }
+
+        if (Row.File > MaxFileIndex) {
+          Success = false;
+          OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset)
+             << "][" << RowIndex << "] has invalid file index " << Row.File
+             << " (valid values are [1," << MaxFileIndex << "]):\n";
+          DWARFDebugLine::Row::dumpTableHeader(OS);
+          Row.dump(OS);
+          OS << '\n';
+        }
+        if (Row.EndSequence)
+          PrevAddress = 0;
+        else
+          PrevAddress = Row.Address;
+        ++RowIndex;
+      }
     }
+    return Success;
+  }
+};
+  
+} // anonymous namespace
+
+bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) {
+  bool Success = true;
+  DWARFVerifier verifier(OS, *this);
+  if (DumpType == DIDT_All || DumpType == DIDT_Info) {
+    if (!verifier.handleDebugInfo())
+      Success = false;
+  }
+  if (DumpType == DIDT_All || DumpType == DIDT_Line) {
+    if (!verifier.handleDebugLine())
+      Success = false;
   }
   return Success;
 }
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 77f3c00..f32e8fe 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -10,6 +10,7 @@
 #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
@@ -26,11 +27,19 @@ using namespace llvm;
 using namespace dwarf;
 
 typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
+namespace {
+struct ContentDescriptor {
+  dwarf::LineNumberEntryFormat Type;
+  dwarf::Form Form;
+};
+typedef SmallVector<ContentDescriptor, 4> ContentDescriptors;
+} // end anonmyous namespace
 
 DWARFDebugLine::Prologue::Prologue() { clear(); }
 
 void DWARFDebugLine::Prologue::clear() {
   TotalLength = Version = PrologueLength = 0;
+  AddressSize = SegSelectorSize = 0;
   MinInstLength = MaxOpsPerInst = DefaultIsStmt = LineBase = LineRange = 0;
   OpcodeBase = 0;
   IsDWARF64 = false;
@@ -43,6 +52,8 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const {
   OS << "Line table prologue:\n"
      << format("    total_length: 0x%8.8" PRIx64 "\n", TotalLength)
      << format("         version: %u\n", Version)
+     << format(Version >= 5 ? "    address_size: %u\n" : "", AddressSize)
+     << format(Version >= 5 ? " seg_select_size: %u\n" : "", SegSelectorSize)
      << format(" prologue_length: 0x%8.8" PRIx64 "\n", PrologueLength)
      << format(" min_inst_length: %u\n", MinInstLength)
      << format(Version >= 4 ? "max_ops_per_inst: %u\n" : "", MaxOpsPerInst)
@@ -74,6 +85,125 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const {
   }
 }
 
+// Parse v2-v4 directory and file tables.
+static void
+parseV2DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr,
+                     uint64_t EndPrologueOffset,
+                     std::vector<StringRef> &IncludeDirectories,
+                     std::vector<DWARFDebugLine::FileNameEntry> &FileNames) {
+  while (*OffsetPtr < EndPrologueOffset) {
+    StringRef S = DebugLineData.getCStrRef(OffsetPtr);
+    if (S.empty())
+      break;
+    IncludeDirectories.push_back(S);
+  }
+
+  while (*OffsetPtr < EndPrologueOffset) {
+    StringRef Name = DebugLineData.getCStrRef(OffsetPtr);
+    if (Name.empty())
+      break;
+    DWARFDebugLine::FileNameEntry FileEntry;
+    FileEntry.Name = Name;
+    FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr);
+    FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr);
+    FileEntry.Length = DebugLineData.getULEB128(OffsetPtr);
+    FileNames.push_back(FileEntry);
+  }
+}
+
+// Parse v5 directory/file entry content descriptions.
+// Returns the descriptors, or an empty vector if we did not find a path or
+// ran off the end of the prologue.
+static ContentDescriptors
+parseV5EntryFormat(DataExtractor DebugLineData, uint32_t *OffsetPtr,
+                   uint64_t EndPrologueOffset) {
+  ContentDescriptors Descriptors;
+  int FormatCount = DebugLineData.getU8(OffsetPtr);
+  bool HasPath = false;
+  for (int I = 0; I != FormatCount; ++I) {
+    if (*OffsetPtr >= EndPrologueOffset)
+      return ContentDescriptors();
+    ContentDescriptor Descriptor;
+    Descriptor.Type =
+      dwarf::LineNumberEntryFormat(DebugLineData.getULEB128(OffsetPtr));
+    Descriptor.Form = dwarf::Form(DebugLineData.getULEB128(OffsetPtr));
+    if (Descriptor.Type == dwarf::DW_LNCT_path)
+      HasPath = true;
+    Descriptors.push_back(Descriptor);
+  }
+  return HasPath ? Descriptors : ContentDescriptors();
+}
+
+static bool
+parseV5DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr,
+                     uint64_t EndPrologueOffset,
+                     std::vector<StringRef> &IncludeDirectories,
+                     std::vector<DWARFDebugLine::FileNameEntry> &FileNames) {
+  // Get the directory entry description.
+  ContentDescriptors DirDescriptors =
+    parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset);
+  if (DirDescriptors.empty())
+    return false;
+
+  // Get the directory entries, according to the format described above.
+  int DirEntryCount = DebugLineData.getU8(OffsetPtr);
+  for (int I = 0; I != DirEntryCount; ++I) {
+    if (*OffsetPtr >= EndPrologueOffset)
+      return false;
+    for (auto Descriptor : DirDescriptors) {
+      DWARFFormValue Value(Descriptor.Form);
+      switch (Descriptor.Type) {
+      case DW_LNCT_path:
+        if (!Value.extractValue(DebugLineData, OffsetPtr, nullptr))
+          return false;
+        IncludeDirectories.push_back(Value.getAsCString().getValue());
+        break;
+      default:
+        if (!Value.skipValue(DebugLineData, OffsetPtr, nullptr))
+          return false;
+      }
+    }
+  }
+
+  // Get the file entry description.
+  ContentDescriptors FileDescriptors =
+    parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset);
+  if (FileDescriptors.empty())
+    return false;
+
+  // Get the file entries, according to the format described above.
+  int FileEntryCount = DebugLineData.getU8(OffsetPtr);
+  for (int I = 0; I != FileEntryCount; ++I) {
+    if (*OffsetPtr >= EndPrologueOffset)
+      return false;
+    DWARFDebugLine::FileNameEntry FileEntry;
+    for (auto Descriptor : FileDescriptors) {
+      DWARFFormValue Value(Descriptor.Form);
+      if (!Value.extractValue(DebugLineData, OffsetPtr, nullptr))
+        return false;
+      switch (Descriptor.Type) {
+      case DW_LNCT_path:
+        FileEntry.Name = Value.getAsCString().getValue();
+        break;
+      case DW_LNCT_directory_index:
+        FileEntry.DirIdx = Value.getAsUnsignedConstant().getValue();
+        break;
+      case DW_LNCT_timestamp:
+        FileEntry.ModTime = Value.getAsUnsignedConstant().getValue();
+        break;
+      case DW_LNCT_size:
+        FileEntry.Length = Value.getAsUnsignedConstant().getValue();
+        break;
+      // FIXME: Add MD5
+      default:
+        break;
+      }
+    }
+    FileNames.push_back(FileEntry);
+  }
+  return true;
+}
+
 bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData,
                                      uint32_t *OffsetPtr) {
   const uint64_t PrologueOffset = *OffsetPtr;
@@ -90,6 +220,11 @@ bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData,
   if (Version < 2)
     return false;
 
+  if (Version >= 5) {
+    AddressSize = DebugLineData.getU8(OffsetPtr);
+    SegSelectorSize = DebugLineData.getU8(OffsetPtr);
+  }
+
   PrologueLength = DebugLineData.getUnsigned(OffsetPtr, sizeofPrologueLength());
   const uint64_t EndPrologueOffset = PrologueLength + *OffsetPtr;
   MinInstLength = DebugLineData.getU8(OffsetPtr);
@@ -106,27 +241,18 @@ bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData,
     StandardOpcodeLengths.push_back(OpLen);
   }
 
-  while (*OffsetPtr < EndPrologueOffset) {
-    const char *S = DebugLineData.getCStr(OffsetPtr);
-    if (S && S[0])
-      IncludeDirectories.push_back(S);
-    else
-      break;
-  }
-
-  while (*OffsetPtr < EndPrologueOffset) {
-    const char *Name = DebugLineData.getCStr(OffsetPtr);
-    if (Name && Name[0]) {
-      FileNameEntry FileEntry;
-      FileEntry.Name = Name;
-      FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr);
-      FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr);
-      FileEntry.Length = DebugLineData.getULEB128(OffsetPtr);
-      FileNames.push_back(FileEntry);
-    } else {
-      break;
+  if (Version >= 5) {
+    if (!parseV5DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset,
+                              IncludeDirectories, FileNames)) {
+      fprintf(stderr,
+              "warning: parsing line table prologue at 0x%8.8" PRIx64
+              " found an invalid directory or file table description at"
+              " 0x%8.8" PRIx64 "\n", PrologueOffset, (uint64_t)*OffsetPtr);
+      return false;
     }
-  }
+  } else
+    parseV2DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset,
+                         IncludeDirectories, FileNames);
 
   if (*OffsetPtr != EndPrologueOffset) {
     fprintf(stderr,
@@ -161,6 +287,12 @@ void DWARFDebugLine::Row::reset(bool DefaultIsStmt) {
   EpilogueBegin = false;
 }
 
+void DWARFDebugLine::Row::dumpTableHeader(raw_ostream &OS) {
+  OS << "Address            Line   Column File   ISA Discriminator Flags\n"
+     << "------------------ ------ ------ ------ --- ------------- "
+        "-------------\n";
+}
+
 void DWARFDebugLine::Row::dump(raw_ostream &OS) const {
   OS << format("0x%16.16" PRIx64 " %6u %6u", Address, Line, Column)
      << format(" %6u %3u %13u ", File, Isa, Discriminator)
@@ -187,9 +319,7 @@ void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const {
   OS << '\n';
 
   if (!Rows.empty()) {
-    OS << "Address            Line   Column File   ISA Discriminator Flags\n"
-       << "------------------ ------ ------ ------ --- ------------- "
-          "-------------\n";
+    Row::dumpTableHeader(OS);
     for (const Row &R : Rows) {
       R.dump(OS);
     }
@@ -637,7 +767,7 @@ bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
   if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
     return false;
   const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1];
-  const char *FileName = Entry.Name;
+  StringRef FileName = Entry.Name;
   if (Kind != FileLineInfoKind::AbsoluteFilePath ||
       sys::path::is_absolute(FileName)) {
     Result = FileName;
@@ -646,7 +776,7 @@ bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
 
   SmallString<16> FilePath;
   uint64_t IncludeDirIndex = Entry.DirIdx;
-  const char *IncludeDir = "";
+  StringRef IncludeDir;
   // Be defensive about the contents of Entry.
   if (IncludeDirIndex > 0 &&
       IncludeDirIndex <= Prologue.IncludeDirectories.size())
diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
new file mode 100644
index 0000000..9494e87
--- /dev/null
+++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -0,0 +1,277 @@
+//===- DWARFVerifier.cpp --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
+#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFSection.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+using namespace dwarf;
+using namespace object;
+
+void DWARFVerifier::verifyDebugInfoAttribute(DWARFDie &Die,
+                                             DWARFAttribute &AttrValue) {
+  const auto Attr = AttrValue.Attr;
+  switch (Attr) {
+  case DW_AT_ranges:
+    // Make sure the offset in the DW_AT_ranges attribute is valid.
+    if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
+      if (*SectionOffset >= DCtx.getRangeSection().Data.size()) {
+        ++NumDebugInfoErrors;
+        OS << "error: DW_AT_ranges offset is beyond .debug_ranges "
+              "bounds:\n";
+        Die.dump(OS, 0);
+        OS << "\n";
+      }
+    } else {
+      ++NumDebugInfoErrors;
+      OS << "error: DIE has invalid DW_AT_ranges encoding:\n";
+      Die.dump(OS, 0);
+      OS << "\n";
+    }
+    break;
+  case DW_AT_stmt_list:
+    // Make sure the offset in the DW_AT_stmt_list attribute is valid.
+    if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) {
+      if (*SectionOffset >= DCtx.getLineSection().Data.size()) {
+        ++NumDebugInfoErrors;
+        OS << "error: DW_AT_stmt_list offset is beyond .debug_line "
+              "bounds: "
+           << format("0x%08" PRIx32, *SectionOffset) << "\n";
+        Die.dump(OS, 0);
+        OS << "\n";
+      }
+    } else {
+      ++NumDebugInfoErrors;
+      OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n";
+      Die.dump(OS, 0);
+      OS << "\n";
+    }
+    break;
+
+  default:
+    break;
+  }
+}
+
+void DWARFVerifier::verifyDebugInfoForm(DWARFDie &Die,
+                                        DWARFAttribute &AttrValue) {
+  const auto Form = AttrValue.Value.getForm();
+  switch (Form) {
+  case DW_FORM_ref1:
+  case DW_FORM_ref2:
+  case DW_FORM_ref4:
+  case DW_FORM_ref8:
+  case DW_FORM_ref_udata: {
+    // Verify all CU relative references are valid CU offsets.
+    Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
+    assert(RefVal);
+    if (RefVal) {
+      auto DieCU = Die.getDwarfUnit();
+      auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset();
+      auto CUOffset = AttrValue.Value.getRawUValue();
+      if (CUOffset >= CUSize) {
+        ++NumDebugInfoErrors;
+        OS << "error: " << FormEncodingString(Form) << " CU offset "
+           << format("0x%08" PRIx32, CUOffset)
+           << " is invalid (must be less than CU size of "
+           << format("0x%08" PRIx32, CUSize) << "):\n";
+        Die.dump(OS, 0);
+        OS << "\n";
+      } else {
+        // Valid reference, but we will verify it points to an actual
+        // DIE later.
+        ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset());
+      }
+    }
+    break;
+  }
+  case DW_FORM_ref_addr: {
+    // Verify all absolute DIE references have valid offsets in the
+    // .debug_info section.
+    Optional<uint64_t> RefVal = AttrValue.Value.getAsReference();
+    assert(RefVal);
+    if (RefVal) {
+      if (*RefVal >= DCtx.getInfoSection().Data.size()) {
+        ++NumDebugInfoErrors;
+        OS << "error: DW_FORM_ref_addr offset beyond .debug_info "
+              "bounds:\n";
+        Die.dump(OS, 0);
+        OS << "\n";
+      } else {
+        // Valid reference, but we will verify it points to an actual
+        // DIE later.
+        ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset());
+      }
+    }
+    break;
+  }
+  case DW_FORM_strp: {
+    auto SecOffset = AttrValue.Value.getAsSectionOffset();
+    assert(SecOffset); // DW_FORM_strp is a section offset.
+    if (SecOffset && *SecOffset >= DCtx.getStringSection().size()) {
+      ++NumDebugInfoErrors;
+      OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n";
+      Die.dump(OS, 0);
+      OS << "\n";
+    }
+    break;
+  }
+  default:
+    break;
+  }
+}
+
+void DWARFVerifier::veifyDebugInfoReferences() {
+  // Take all references and make sure they point to an actual DIE by
+  // getting the DIE by offset and emitting an error
+  OS << "Verifying .debug_info references...\n";
+  for (auto Pair : ReferenceToDIEOffsets) {
+    auto Die = DCtx.getDIEForOffset(Pair.first);
+    if (Die)
+      continue;
+    ++NumDebugInfoErrors;
+    OS << "error: invalid DIE reference " << format("0x%08" PRIx64, Pair.first)
+       << ". Offset is in between DIEs:\n";
+    for (auto Offset : Pair.second) {
+      auto ReferencingDie = DCtx.getDIEForOffset(Offset);
+      ReferencingDie.dump(OS, 0);
+      OS << "\n";
+    }
+    OS << "\n";
+  }
+}
+
+bool DWARFVerifier::handleDebugInfo() {
+  NumDebugInfoErrors = 0;
+  OS << "Verifying .debug_info...\n";
+  for (const auto &CU : DCtx.compile_units()) {
+    unsigned NumDies = CU->getNumDIEs();
+    for (unsigned I = 0; I < NumDies; ++I) {
+      auto Die = CU->getDIEAtIndex(I);
+      const auto Tag = Die.getTag();
+      if (Tag == DW_TAG_null)
+        continue;
+      for (auto AttrValue : Die.attributes()) {
+        verifyDebugInfoAttribute(Die, AttrValue);
+        verifyDebugInfoForm(Die, AttrValue);
+      }
+    }
+  }
+  veifyDebugInfoReferences();
+  return NumDebugInfoErrors == 0;
+}
+
+void DWARFVerifier::verifyDebugLineStmtOffsets() {
+  std::map<uint64_t, DWARFDie> StmtListToDie;
+  for (const auto &CU : DCtx.compile_units()) {
+    auto Die = CU->getUnitDIE();
+    // Get the attribute value as a section offset. No need to produce an
+    // error here if the encoding isn't correct because we validate this in
+    // the .debug_info verifier.
+    auto StmtSectionOffset = toSectionOffset(Die.find(DW_AT_stmt_list));
+    if (!StmtSectionOffset)
+      continue;
+    const uint32_t LineTableOffset = *StmtSectionOffset;
+    auto LineTable = DCtx.getLineTableForUnit(CU.get());
+    if (LineTableOffset < DCtx.getLineSection().Data.size()) {
+      if (!LineTable) {
+        ++NumDebugLineErrors;
+        OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset)
+           << "] was not able to be parsed for CU:\n";
+        Die.dump(OS, 0);
+        OS << '\n';
+        continue;
+      }
+    } else {
+      // Make sure we don't get a valid line table back if the offset is wrong.
+      assert(LineTable == nullptr);
+      // Skip this line table as it isn't valid. No need to create an error
+      // here because we validate this in the .debug_info verifier.
+      continue;
+    }
+    auto Iter = StmtListToDie.find(LineTableOffset);
+    if (Iter != StmtListToDie.end()) {
+      ++NumDebugLineErrors;
+      OS << "error: two compile unit DIEs, "
+         << format("0x%08" PRIx32, Iter->second.getOffset()) << " and "
+         << format("0x%08" PRIx32, Die.getOffset())
+         << ", have the same DW_AT_stmt_list section offset:\n";
+      Iter->second.dump(OS, 0);
+      Die.dump(OS, 0);
+      OS << '\n';
+      // Already verified this line table before, no need to do it again.
+      continue;
+    }
+    StmtListToDie[LineTableOffset] = Die;
+  }
+}
+
+void DWARFVerifier::verifyDebugLineRows() {
+  for (const auto &CU : DCtx.compile_units()) {
+    auto Die = CU->getUnitDIE();
+    auto LineTable = DCtx.getLineTableForUnit(CU.get());
+    // If there is no line table we will have created an error in the
+    // .debug_info verifier or in verifyDebugLineStmtOffsets().
+    if (!LineTable)
+      continue;
+    uint32_t MaxFileIndex = LineTable->Prologue.FileNames.size();
+    uint64_t PrevAddress = 0;
+    uint32_t RowIndex = 0;
+    for (const auto &Row : LineTable->Rows) {
+      if (Row.Address < PrevAddress) {
+        ++NumDebugLineErrors;
+        OS << "error: .debug_line["
+           << format("0x%08" PRIx32,
+                     *toSectionOffset(Die.find(DW_AT_stmt_list)))
+           << "] row[" << RowIndex
+           << "] decreases in address from previous row:\n";
+
+        DWARFDebugLine::Row::dumpTableHeader(OS);
+        if (RowIndex > 0)
+          LineTable->Rows[RowIndex - 1].dump(OS);
+        Row.dump(OS);
+        OS << '\n';
+      }
+
+      if (Row.File > MaxFileIndex) {
+        ++NumDebugLineErrors;
+        OS << "error: .debug_line["
+           << format("0x%08" PRIx32,
+                     *toSectionOffset(Die.find(DW_AT_stmt_list)))
+           << "][" << RowIndex << "] has invalid file index " << Row.File
+           << " (valid values are [1," << MaxFileIndex << "]):\n";
+        DWARFDebugLine::Row::dumpTableHeader(OS);
+        Row.dump(OS);
+        OS << '\n';
+      }
+      if (Row.EndSequence)
+        PrevAddress = 0;
+      else
+        PrevAddress = Row.Address;
+      ++RowIndex;
+    }
+  }
+}
+
+bool DWARFVerifier::handleDebugLine() {
+  NumDebugLineErrors = 0;
+  OS << "Verifying .debug_line...\n";
+  verifyDebugLineStmtOffsets();
+  verifyDebugLineRows();
+  return NumDebugLineErrors == 0;
+}
diff --git a/lib/DebugInfo/PDB/CMakeLists.txt b/lib/DebugInfo/PDB/CMakeLists.txt
index bd35efb..e175301 100644
--- a/lib/DebugInfo/PDB/CMakeLists.txt
+++ b/lib/DebugInfo/PDB/CMakeLists.txt
@@ -48,11 +48,11 @@ add_pdb_impl_folder(Native
   Native/NativeSession.cpp
   Native/PDBFile.cpp
   Native/PDBFileBuilder.cpp
+  Native/PDBStringTable.cpp
+  Native/PDBStringTableBuilder.cpp
   Native/PDBTypeServerHandler.cpp
   Native/PublicsStream.cpp
   Native/RawError.cpp
-  Native/StringTable.cpp
-  Native/StringTableBuilder.cpp
   Native/SymbolStream.cpp
   Native/TpiHashing.cpp
   Native/TpiStream.cpp
diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
index f994b45..867864e 100644
--- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
@@ -89,6 +89,14 @@ uint32_t DbiModuleDescriptorBuilder::calculateSerializedLength() const {
   return alignTo(L + M + O, sizeof(uint32_t));
 }
 
+template <typename T> struct Foo {
+  explicit Foo(T &&Answer) : Answer(Answer) {}
+
+  T Answer;
+};
+
+template <typename T> Foo<T> makeFoo(T &&t) { return Foo<T>(std::move(t)); }
+
 void DbiModuleDescriptorBuilder::finalize() {
   Layout.FileNameOffs = 0; // TODO: Fix this
   Layout.Flags = 0;        // TODO: Fix this
diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp
index 4802cc6..db70380 100644
--- a/lib/DebugInfo/PDB/Native/DbiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp
@@ -146,7 +146,7 @@ Error DbiStream::reload() {
 
   if (ECSubstream.getLength() > 0) {
     BinaryStreamReader ECReader(ECSubstream);
-    if (auto EC = ECNames.load(ECReader))
+    if (auto EC = ECNames.reload(ECReader))
       return EC;
   }
 
diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp
index 943e7fa..859295d 100644
--- a/lib/DebugInfo/PDB/Native/PDBFile.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp
@@ -15,9 +15,9 @@
 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
-#include "llvm/DebugInfo/PDB/Native/StringTable.h"
 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
 #include "llvm/Support/BinaryStream.h"
@@ -337,8 +337,8 @@ Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
   return *Symbols;
 }
 
-Expected<StringTable &> PDBFile::getStringTable() {
-  if (!Strings || !StringTableStream) {
+Expected<PDBStringTable &> PDBFile::getStringTable() {
+  if (!Strings) {
     auto IS = getPDBInfoStream();
     if (!IS)
       return IS.takeError();
@@ -350,12 +350,13 @@ Expected<StringTable &> PDBFile::getStringTable() {
     if (!NS)
       return NS.takeError();
 
+    auto N = llvm::make_unique<PDBStringTable>();
     BinaryStreamReader Reader(**NS);
-    auto N = llvm::make_unique<StringTable>();
-    if (auto EC = N->load(Reader))
+    if (auto EC = N->reload(Reader))
       return std::move(EC);
-    Strings = std::move(N);
+    assert(Reader.bytesRemaining() == 0);
     StringTableStream = std::move(*NS);
+    Strings = std::move(N);
   }
   return *Strings;
 }
@@ -389,7 +390,7 @@ bool PDBFile::hasPDBSymbolStream() {
 
 bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
 
-bool PDBFile::hasStringTable() {
+bool PDBFile::hasPDBStringTable() {
   auto IS = getPDBInfoStream();
   if (!IS)
     return false;
diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index b3c8490..4dd965c 100644
--- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -17,8 +17,8 @@
 #include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h"
 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
 #include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
-#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h"
 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
 #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h"
 #include "llvm/Support/BinaryStream.h"
@@ -67,7 +67,9 @@ TpiStreamBuilder &PDBFileBuilder::getIpiBuilder() {
   return *Ipi;
 }
 
-StringTableBuilder &PDBFileBuilder::getStringTableBuilder() { return Strings; }
+PDBStringTableBuilder &PDBFileBuilder::getStringTableBuilder() {
+  return Strings;
+}
 
 Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) {
   auto ExpectedStream = Msf->addStream(Size);
@@ -78,9 +80,9 @@ Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) {
 }
 
 Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() {
-  uint32_t StringTableSize = Strings.finalize();
+  uint32_t StringsLen = Strings.calculateSerializedSize();
 
-  if (auto EC = addNamedStream("/names", StringTableSize))
+  if (auto EC = addNamedStream("/names", StringsLen))
     return std::move(EC);
   if (auto EC = addNamedStream("/LinkInfo", 0))
     return std::move(EC);
@@ -107,6 +109,13 @@ Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() {
   return Msf->build();
 }
 
+Expected<uint32_t> PDBFileBuilder::getNamedStreamIndex(StringRef Name) const {
+  uint32_t SN = 0;
+  if (!NamedStreams.get(Name, SN))
+    return llvm::make_error<pdb::RawError>(raw_error_code::no_stream);
+  return SN;
+}
+
 Error PDBFileBuilder::commit(StringRef Filename) {
   auto ExpectedLayout = finalizeMsfLayout();
   if (!ExpectedLayout)
@@ -144,12 +153,12 @@ Error PDBFileBuilder::commit(StringRef Filename) {
       return EC;
   }
 
-  uint32_t StringTableStreamNo = 0;
-  if (!NamedStreams.get("/names", StringTableStreamNo))
-    return llvm::make_error<pdb::RawError>(raw_error_code::no_stream);
+  auto ExpectedSN = getNamedStreamIndex("/names");
+  if (!ExpectedSN)
+    return ExpectedSN.takeError();
 
   auto NS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer,
-                                                           StringTableStreamNo);
+                                                           *ExpectedSN);
   BinaryStreamWriter NSWriter(*NS);
   if (auto EC = Strings.commit(NSWriter))
     return EC;
diff --git a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
new file mode 100644
index 0000000..e84573f
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
@@ -0,0 +1,134 @@
+//===- PDBStringTable.cpp - PDB String Table ---------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Native/Hash.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace llvm::support;
+using namespace llvm::pdb;
+
+uint32_t PDBStringTable::getByteSize() const { return ByteSize; }
+uint32_t PDBStringTable::getNameCount() const { return NameCount; }
+uint32_t PDBStringTable::getHashVersion() const { return Header->HashVersion; }
+uint32_t PDBStringTable::getSignature() const { return Header->Signature; }
+
+Error PDBStringTable::readHeader(BinaryStreamReader &Reader) {
+  if (auto EC = Reader.readObject(Header))
+    return EC;
+
+  if (Header->Signature != PDBStringTableSignature)
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "Invalid hash table signature");
+  if (Header->HashVersion != 1 && Header->HashVersion != 2)
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "Unsupported hash version");
+
+  assert(Reader.bytesRemaining() == 0);
+  return Error::success();
+}
+
+Error PDBStringTable::readStrings(BinaryStreamReader &Reader) {
+  BinaryStreamRef Stream;
+  if (auto EC = Reader.readStreamRef(Stream))
+    return EC;
+
+  if (auto EC = Strings.initialize(Stream)) {
+    return joinErrors(std::move(EC),
+                      make_error<RawError>(raw_error_code::corrupt_file,
+                                           "Invalid hash table byte length"));
+  }
+
+  assert(Reader.bytesRemaining() == 0);
+  return Error::success();
+}
+
+Error PDBStringTable::readHashTable(BinaryStreamReader &Reader) {
+  const support::ulittle32_t *HashCount;
+  if (auto EC = Reader.readObject(HashCount))
+    return EC;
+
+  if (auto EC = Reader.readArray(IDs, *HashCount)) {
+    return joinErrors(std::move(EC),
+                      make_error<RawError>(raw_error_code::corrupt_file,
+                                           "Could not read bucket array"));
+  }
+
+  return Error::success();
+}
+
+Error PDBStringTable::readEpilogue(BinaryStreamReader &Reader) {
+  if (auto EC = Reader.readInteger(NameCount))
+    return EC;
+
+  assert(Reader.bytesRemaining() == 0);
+  return Error::success();
+}
+
+Error PDBStringTable::reload(BinaryStreamReader &Reader) {
+
+  BinaryStreamReader SectionReader;
+
+  std::tie(SectionReader, Reader) = Reader.split(sizeof(PDBStringTableHeader));
+  if (auto EC = readHeader(SectionReader))
+    return EC;
+
+  std::tie(SectionReader, Reader) = Reader.split(Header->ByteSize);
+  if (auto EC = readStrings(SectionReader))
+    return EC;
+
+  // We don't know how long the hash table is until we parse it, so let the
+  // function responsible for doing that figure it out.
+  if (auto EC = readHashTable(Reader))
+    return EC;
+
+  std::tie(SectionReader, Reader) = Reader.split(sizeof(uint32_t));
+  if (auto EC = readEpilogue(SectionReader))
+    return EC;
+
+  assert(Reader.bytesRemaining() == 0);
+  return Error::success();
+}
+
+Expected<StringRef> PDBStringTable::getStringForID(uint32_t ID) const {
+  return Strings.getString(ID);
+}
+
+Expected<uint32_t> PDBStringTable::getIDForString(StringRef Str) const {
+  uint32_t Hash =
+      (Header->HashVersion == 1) ? hashStringV1(Str) : hashStringV2(Str);
+  size_t Count = IDs.size();
+  uint32_t Start = Hash % Count;
+  for (size_t I = 0; I < Count; ++I) {
+    // The hash is just a starting point for the search, but if it
+    // doesn't work we should find the string no matter what, because
+    // we iterate the entire array.
+    uint32_t Index = (Start + I) % Count;
+
+    uint32_t ID = IDs[Index];
+    auto ExpectedStr = getStringForID(ID);
+    if (!ExpectedStr)
+      return ExpectedStr.takeError();
+
+    if (*ExpectedStr == Str)
+      return ID;
+  }
+  return make_error<RawError>(raw_error_code::no_entry);
+}
+
+FixedStreamArray<support::ulittle32_t> PDBStringTable::name_ids() const {
+  return IDs;
+}
diff --git a/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
new file mode 100644
index 0000000..a472181
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
@@ -0,0 +1,133 @@
+//===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Native/Hash.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace llvm::msf;
+using namespace llvm::support;
+using namespace llvm::support::endian;
+using namespace llvm::pdb;
+
+uint32_t PDBStringTableBuilder::insert(StringRef S) {
+  return Strings.insert(S);
+}
+
+static uint32_t computeBucketCount(uint32_t NumStrings) {
+  // The /names stream is basically an on-disk open-addressing hash table.
+  // Hash collisions are resolved by linear probing. We cannot make
+  // utilization 100% because it will make the linear probing extremely
+  // slow. But lower utilization wastes disk space. As a reasonable
+  // load factor, we choose 80%. We need +1 because slot 0 is reserved.
+  return (NumStrings + 1) * 1.25;
+}
+
+uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
+  uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
+  Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
+
+  return Size;
+}
+
+uint32_t PDBStringTableBuilder::calculateSerializedSize() const {
+  uint32_t Size = 0;
+  Size += sizeof(PDBStringTableHeader);
+  Size += Strings.calculateSerializedSize();
+  Size += calculateHashTableSize();
+  Size += sizeof(uint32_t); // The /names stream ends with the string count.
+  return Size;
+}
+
+Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
+  // Write a header
+  PDBStringTableHeader H;
+  H.Signature = PDBStringTableSignature;
+  H.HashVersion = 1;
+  H.ByteSize = Strings.calculateSerializedSize();
+  if (auto EC = Writer.writeObject(H))
+    return EC;
+  assert(Writer.bytesRemaining() == 0);
+  return Error::success();
+}
+
+Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
+  if (auto EC = Strings.commit(Writer))
+    return EC;
+
+  assert(Writer.bytesRemaining() == 0);
+  return Error::success();
+}
+
+Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
+  // Write a hash table.
+  uint32_t BucketCount = computeBucketCount(Strings.size());
+  if (auto EC = Writer.writeInteger(BucketCount))
+    return EC;
+  std::vector<ulittle32_t> Buckets(BucketCount);
+
+  for (auto &Pair : Strings) {
+    StringRef S = Pair.getKey();
+    uint32_t Offset = Pair.getValue();
+    uint32_t Hash = hashStringV1(S);
+
+    for (uint32_t I = 0; I != BucketCount; ++I) {
+      uint32_t Slot = (Hash + I) % BucketCount;
+      if (Slot == 0)
+        continue; // Skip reserved slot
+      if (Buckets[Slot] != 0)
+        continue;
+      Buckets[Slot] = Offset;
+      break;
+    }
+  }
+
+  if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
+    return EC;
+
+  assert(Writer.bytesRemaining() == 0);
+  return Error::success();
+}
+
+Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
+  if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
+    return EC;
+  assert(Writer.bytesRemaining() == 0);
+  return Error::success();
+}
+
+Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
+  BinaryStreamWriter SectionWriter;
+
+  std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
+  if (auto EC = writeHeader(SectionWriter))
+    return EC;
+
+  std::tie(SectionWriter, Writer) =
+      Writer.split(Strings.calculateSerializedSize());
+  if (auto EC = writeStrings(SectionWriter))
+    return EC;
+
+  std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
+  if (auto EC = writeHashTable(SectionWriter))
+    return EC;
+
+  std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
+  if (auto EC = writeEpilogue(SectionWriter))
+    return EC;
+
+  return Error::success();
+}
diff --git a/lib/DebugInfo/PDB/Native/StringTable.cpp b/lib/DebugInfo/PDB/Native/StringTable.cpp
deleted file mode 100644
index 7e28389..0000000
--- a/lib/DebugInfo/PDB/Native/StringTable.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-//===- StringTable.cpp - PDB String Table -----------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/PDB/Native/StringTable.h"
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/DebugInfo/PDB/Native/Hash.h"
-#include "llvm/DebugInfo/PDB/Native/RawError.h"
-#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
-#include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/Endian.h"
-
-using namespace llvm;
-using namespace llvm::support;
-using namespace llvm::pdb;
-
-StringTable::StringTable() {}
-
-Error StringTable::load(BinaryStreamReader &Stream) {
-  ByteSize = Stream.getLength();
-
-  const StringTableHeader *H;
-  if (auto EC = Stream.readObject(H))
-    return EC;
-
-  if (H->Signature != StringTableSignature)
-    return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Invalid hash table signature");
-  if (H->HashVersion != 1 && H->HashVersion != 2)
-    return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Unsupported hash version");
-
-  Signature = H->Signature;
-  HashVersion = H->HashVersion;
-  if (auto EC = Stream.readStreamRef(NamesBuffer, H->ByteSize))
-    return joinErrors(std::move(EC),
-                      make_error<RawError>(raw_error_code::corrupt_file,
-                                           "Invalid hash table byte length"));
-
-  const support::ulittle32_t *HashCount;
-  if (auto EC = Stream.readObject(HashCount))
-    return EC;
-
-  if (auto EC = Stream.readArray(IDs, *HashCount))
-    return joinErrors(std::move(EC),
-                      make_error<RawError>(raw_error_code::corrupt_file,
-                                           "Could not read bucket array"));
-
-  if (Stream.bytesRemaining() < sizeof(support::ulittle32_t))
-    return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Missing name count");
-
-  if (auto EC = Stream.readInteger(NameCount))
-    return EC;
-
-  if (Stream.bytesRemaining() > 0)
-    return make_error<RawError>(raw_error_code::stream_too_long,
-      "Unexpected bytes found in string table");
-
-  return Error::success();
-}
-
-uint32_t StringTable::getByteSize() const {
-  return ByteSize;
-}
-
-StringRef StringTable::getStringForID(uint32_t ID) const {
-  if (ID == IDs[0])
-    return StringRef();
-
-  // NamesBuffer is a buffer of null terminated strings back to back.  ID is
-  // the starting offset of the string we're looking for.  So just seek into
-  // the desired offset and a read a null terminated stream from that offset.
-  StringRef Result;
-  BinaryStreamReader NameReader(NamesBuffer);
-  NameReader.setOffset(ID);
-  if (auto EC = NameReader.readCString(Result))
-    consumeError(std::move(EC));
-  return Result;
-}
-
-uint32_t StringTable::getIDForString(StringRef Str) const {
-  uint32_t Hash = (HashVersion == 1) ? hashStringV1(Str) : hashStringV2(Str);
-  size_t Count = IDs.size();
-  uint32_t Start = Hash % Count;
-  for (size_t I = 0; I < Count; ++I) {
-    // The hash is just a starting point for the search, but if it
-    // doesn't work we should find the string no matter what, because
-    // we iterate the entire array.
-    uint32_t Index = (Start + I) % Count;
-
-    uint32_t ID = IDs[Index];
-    StringRef S = getStringForID(ID);
-    if (S == Str)
-      return ID;
-  }
-  // IDs[0] contains the ID of the "invalid" entry.
-  return IDs[0];
-}
-
-FixedStreamArray<support::ulittle32_t> StringTable::name_ids() const {
-  return IDs;
-}
diff --git a/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp b/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp
deleted file mode 100644
index 40dc8e1..0000000
--- a/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-//===- StringTableBuilder.cpp - PDB String Table ----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/DebugInfo/PDB/Native/Hash.h"
-#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
-#include "llvm/Support/BinaryStreamWriter.h"
-#include "llvm/Support/Endian.h"
-
-using namespace llvm;
-using namespace llvm::support;
-using namespace llvm::support::endian;
-using namespace llvm::pdb;
-
-uint32_t StringTableBuilder::insert(StringRef S) {
-  auto P = Strings.insert({S, StringSize});
-
-  // If a given string didn't exist in the string table, we want to increment
-  // the string table size.
-  if (P.second)
-    StringSize += S.size() + 1; // +1 for '\0'
-  return P.first->second;
-}
-
-uint32_t StringTableBuilder::getStringIndex(StringRef S) {
-  auto Iter = Strings.find(S);
-  assert(Iter != Strings.end());
-  return Iter->second;
-}
-
-static uint32_t computeBucketCount(uint32_t NumStrings) {
-  // The /names stream is basically an on-disk open-addressing hash table.
-  // Hash collisions are resolved by linear probing. We cannot make
-  // utilization 100% because it will make the linear probing extremely
-  // slow. But lower utilization wastes disk space. As a reasonable
-  // load factor, we choose 80%. We need +1 because slot 0 is reserved.
-  return (NumStrings + 1) * 1.25;
-}
-
-uint32_t StringTableBuilder::finalize() {
-  uint32_t Size = 0;
-  Size += sizeof(StringTableHeader);
-  Size += StringSize;
-  Size += sizeof(uint32_t); // Hash table begins with 4-byte size field.
-
-  uint32_t BucketCount = computeBucketCount(Strings.size());
-  Size += BucketCount * sizeof(uint32_t);
-
-  Size +=
-      sizeof(uint32_t); // The /names stream ends with the number of strings.
-  return Size;
-}
-
-Error StringTableBuilder::commit(BinaryStreamWriter &Writer) const {
-  // Write a header
-  StringTableHeader H;
-  H.Signature = StringTableSignature;
-  H.HashVersion = 1;
-  H.ByteSize = StringSize;
-  if (auto EC = Writer.writeObject(H))
-    return EC;
-
-  // Write a string table.
-  uint32_t StringStart = Writer.getOffset();
-  for (auto Pair : Strings) {
-    StringRef S = Pair.first;
-    uint32_t Offset = Pair.second;
-    Writer.setOffset(StringStart + Offset);
-    if (auto EC = Writer.writeCString(S))
-      return EC;
-  }
-  Writer.setOffset(StringStart + StringSize);
-
-  // Write a hash table.
-  uint32_t BucketCount = computeBucketCount(Strings.size());
-  if (auto EC = Writer.writeInteger(BucketCount))
-    return EC;
-  std::vector<ulittle32_t> Buckets(BucketCount);
-
-  for (auto Pair : Strings) {
-    StringRef S = Pair.first;
-    uint32_t Offset = Pair.second;
-    uint32_t Hash = hashStringV1(S);
-
-    for (uint32_t I = 0; I != BucketCount; ++I) {
-      uint32_t Slot = (Hash + I) % BucketCount;
-      if (Slot == 0)
-        continue; // Skip reserved slot
-      if (Buckets[Slot] != 0)
-        continue;
-      Buckets[Slot] = Offset;
-      break;
-    }
-  }
-
-  if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
-    return EC;
-  if (auto EC = Writer.writeInteger(static_cast<uint32_t>(Strings.size())))
-    return EC;
-  return Error::success();
-}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index f780137..50f63fb 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -819,6 +819,34 @@ void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section,
   }
 }
 
+void RuntimeDyldELF::resolveBPFRelocation(const SectionEntry &Section,
+                                          uint64_t Offset, uint64_t Value,
+                                          uint32_t Type, int64_t Addend) {
+  bool isBE = Arch == Triple::bpfeb;
+
+  switch (Type) {
+  default:
+    llvm_unreachable("Relocation type not implemented yet!");
+    break;
+  case ELF::R_BPF_NONE:
+    break;
+  case ELF::R_BPF_64_64: {
+    write(isBE, Section.getAddressWithOffset(Offset), Value + Addend);
+    DEBUG(dbgs() << "Writing " << format("%p", (Value + Addend)) << " at "
+                 << format("%p\n", Section.getAddressWithOffset(Offset)));
+    break;
+  }
+  case ELF::R_BPF_64_32: {
+    Value += Addend;
+    assert(Value <= UINT32_MAX);
+    write(isBE, Section.getAddressWithOffset(Offset), static_cast<uint32_t>(Value));
+    DEBUG(dbgs() << "Writing " << format("%p", Value) << " at "
+                 << format("%p\n", Section.getAddressWithOffset(Offset)));
+    break;
+  }
+  }
+}
+
 // The target location for the relocation is described by RE.SectionID and
 // RE.Offset.  RE.SectionID can be used to find the SectionEntry.  Each
 // SectionEntry has three members describing its location.
@@ -879,6 +907,10 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
   case Triple::systemz:
     resolveSystemZRelocation(Section, Offset, Value, Type, Addend);
     break;
+  case Triple::bpfel:
+  case Triple::bpfeb:
+    resolveBPFRelocation(Section, Offset, Value, Type, Addend);
+    break;
   default:
     llvm_unreachable("Unsupported CPU type!");
   }
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 4989797..84dd810 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -58,6 +58,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
   void resolveSystemZRelocation(const SectionEntry &Section, uint64_t Offset,
                                 uint64_t Value, uint32_t Type, int64_t Addend);
 
+  void resolveBPFRelocation(const SectionEntry &Section, uint64_t Offset,
+                            uint64_t Value, uint32_t Type, int64_t Addend);
+
   unsigned getMaxStubSize() override {
     if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be)
       return 20; // movz; movk; movk; movk; br
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index 62f127b..3b1140a 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -936,7 +936,9 @@ AttributeList AttributeList::get(LLVMContext &C,
 AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index,
                                           Attribute::AttrKind Kind) const {
   if (hasAttribute(Index, Kind)) return *this;
-  return addAttributes(C, Index, AttributeList::get(C, Index, Kind));
+  AttrBuilder B;
+  B.addAttribute(Kind);
+  return addAttributes(C, Index, B);
 }
 
 AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index,
@@ -944,7 +946,7 @@ AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index,
                                           StringRef Value) const {
   AttrBuilder B;
   B.addAttribute(Kind, Value);
-  return addAttributes(C, Index, AttributeList::get(C, Index, B));
+  return addAttributes(C, Index, B);
 }
 
 AttributeList AttributeList::addAttribute(LLVMContext &C,
@@ -978,14 +980,6 @@ AttributeList AttributeList::addAttribute(LLVMContext &C,
 }
 
 AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
-                                           AttributeList Attrs) const {
-  if (!pImpl) return Attrs;
-  if (!Attrs.pImpl) return *this;
-
-  return addAttributes(C, Index, Attrs.getAttributes(Index));
-}
-
-AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
                                            const AttrBuilder &B) const {
   if (!B.hasAttributes())
     return *this;
@@ -1034,18 +1028,17 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
 AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index,
                                              Attribute::AttrKind Kind) const {
   if (!hasAttribute(Index, Kind)) return *this;
-  return removeAttributes(C, Index, AttributeList::get(C, Index, Kind));
+  AttrBuilder B;
+  B.addAttribute(Kind);
+  return removeAttributes(C, Index, B);
 }
 
 AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index,
                                              StringRef Kind) const {
   if (!hasAttribute(Index, Kind)) return *this;
-  return removeAttributes(C, Index, AttributeList::get(C, Index, Kind));
-}
-
-AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index,
-                                              AttributeList Attrs) const {
-  return removeAttributes(C, Index, AttrBuilder(Attrs.getAttributes(Index)));
+  AttrBuilder B;
+  B.addAttribute(Kind);
+  return removeAttributes(C, Index, B);
 }
 
 AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index,
@@ -1103,7 +1096,7 @@ AttributeList AttributeList::addDereferenceableAttr(LLVMContext &C,
                                                     uint64_t Bytes) const {
   AttrBuilder B;
   B.addDereferenceableAttr(Bytes);
-  return addAttributes(C, Index, AttributeList::get(C, Index, B));
+  return addAttributes(C, Index, B);
 }
 
 AttributeList
@@ -1111,7 +1104,7 @@ AttributeList::addDereferenceableOrNullAttr(LLVMContext &C, unsigned Index,
                                             uint64_t Bytes) const {
   AttrBuilder B;
   B.addDereferenceableOrNullAttr(Bytes);
-  return addAttributes(C, Index, AttributeList::get(C, Index, B));
+  return addAttributes(C, Index, B);
 }
 
 AttributeList
@@ -1120,7 +1113,7 @@ AttributeList::addAllocSizeAttr(LLVMContext &C, unsigned Index,
                                 const Optional<unsigned> &NumElemsArg) {
   AttrBuilder B;
   B.addAllocSizeAttr(ElemSizeArg, NumElemsArg);
-  return addAttributes(C, Index, AttributeList::get(C, Index, B));
+  return addAttributes(C, Index, B);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1130,7 +1123,7 @@ AttributeList::addAllocSizeAttr(LLVMContext &C, unsigned Index,
 LLVMContext &AttributeList::getContext() const { return pImpl->getContext(); }
 
 AttributeSet AttributeList::getParamAttributes(unsigned ArgNo) const {
-  return getAttributes(ArgNo + 1);
+  return getAttributes(ArgNo + FirstArgIndex);
 }
 
 AttributeSet AttributeList::getRetAttributes() const {
@@ -1196,7 +1189,7 @@ unsigned AttributeList::getRetAlignment() const {
 }
 
 unsigned AttributeList::getParamAlignment(unsigned ArgNo) const {
-  return getAttributes(ArgNo + 1).getAlignment();
+  return getAttributes(ArgNo + FirstArgIndex).getAlignment();
 }
 
 unsigned AttributeList::getStackAlignment(unsigned Index) const {
@@ -1610,12 +1603,10 @@ static void adjustCallerSSPLevel(Function &Caller, const Function &Callee) {
   // If upgrading the SSP attribute, clear out the old SSP Attributes first.
   // Having multiple SSP attributes doesn't actually hurt, but it adds useless
   // clutter to the IR.
-  AttrBuilder B;
-  B.addAttribute(Attribute::StackProtect)
-    .addAttribute(Attribute::StackProtectStrong)
-    .addAttribute(Attribute::StackProtectReq);
-  AttributeList OldSSPAttr =
-      AttributeList::get(Caller.getContext(), AttributeList::FunctionIndex, B);
+  AttrBuilder OldSSPAttr;
+  OldSSPAttr.addAttribute(Attribute::StackProtect)
+      .addAttribute(Attribute::StackProtectStrong)
+      .addAttribute(Attribute::StackProtectReq);
 
   if (Callee.hasFnAttribute(Attribute::StackProtectReq)) {
     Caller.removeAttributes(AttributeList::FunctionIndex, OldSSPAttr);
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index 2897434..8bcba76 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -467,6 +467,27 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
         return true;
       }
     }
+    // Renaming gather/scatter intrinsics with no address space overloading
+    // to the new overload which includes an address space
+    if (Name.startswith("masked.gather.")) {
+      Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
+      if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
+        rename(F);
+        NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                          Intrinsic::masked_gather, Tys);
+        return true;
+      }
+    }
+    if (Name.startswith("masked.scatter.")) {
+      auto Args = F->getFunctionType()->params();
+      Type *Tys[] = {Args[0], Args[1]};
+      if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
+        rename(F);
+        NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                          Intrinsic::masked_scatter, Tys);
+        return true;
+      }
+    }
     break;
   }
   case 'n': {
@@ -2072,7 +2093,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   case Intrinsic::invariant_start:
   case Intrinsic::invariant_end:
   case Intrinsic::masked_load:
-  case Intrinsic::masked_store: {
+  case Intrinsic::masked_store:
+  case Intrinsic::masked_gather:
+  case Intrinsic::masked_scatter: {
     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
                                  CI->arg_operands().end());
     NewCall = Builder.CreateCall(NewFn, Args);
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index fc61ba7..58c0605 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -90,13 +90,15 @@ unsigned Argument::getParamAlignment() const {
 uint64_t Argument::getDereferenceableBytes() const {
   assert(getType()->isPointerTy() &&
          "Only pointers have dereferenceable bytes");
-  return getParent()->getDereferenceableBytes(getArgNo()+1);
+  return getParent()->getDereferenceableBytes(getArgNo() +
+                                              AttributeList::FirstArgIndex);
 }
 
 uint64_t Argument::getDereferenceableOrNullBytes() const {
   assert(getType()->isPointerTy() &&
          "Only pointers have dereferenceable bytes");
-  return getParent()->getDereferenceableOrNullBytes(getArgNo()+1);
+  return getParent()->getDereferenceableOrNullBytes(
+      getArgNo() + AttributeList::FirstArgIndex);
 }
 
 bool Argument::hasNestAttr() const {
@@ -139,20 +141,21 @@ bool Argument::onlyReadsMemory() const {
 
 void Argument::addAttrs(AttrBuilder &B) {
   AttributeList AL = getParent()->getAttributes();
-  AL = AL.addAttributes(Parent->getContext(), getArgNo() + 1, B);
+  AL = AL.addAttributes(Parent->getContext(),
+                        getArgNo() + AttributeList::FirstArgIndex, B);
   getParent()->setAttributes(AL);
 }
 
 void Argument::addAttr(Attribute::AttrKind Kind) {
-  getParent()->addAttribute(getArgNo() + 1, Kind);
+  getParent()->addAttribute(getArgNo() + AttributeList::FirstArgIndex, Kind);
 }
 
 void Argument::addAttr(Attribute Attr) {
-  getParent()->addAttribute(getArgNo() + 1, Attr);
+  getParent()->addAttribute(getArgNo() + AttributeList::FirstArgIndex, Attr);
 }
 
 void Argument::removeAttr(Attribute::AttrKind Kind) {
-  getParent()->removeAttribute(getArgNo() + 1, Kind);
+  getParent()->removeAttribute(getArgNo() + AttributeList::FirstArgIndex, Kind);
 }
 
 bool Argument::hasAttribute(Attribute::AttrKind Kind) const {
@@ -328,7 +331,7 @@ void Function::addAttribute(unsigned i, Attribute Attr) {
   setAttributes(PAL);
 }
 
-void Function::addAttributes(unsigned i, AttributeList Attrs) {
+void Function::addAttributes(unsigned i, const AttrBuilder &Attrs) {
   AttributeList PAL = getAttributes();
   PAL = PAL.addAttributes(getContext(), i, Attrs);
   setAttributes(PAL);
@@ -346,7 +349,7 @@ void Function::removeAttribute(unsigned i, StringRef Kind) {
   setAttributes(PAL);
 }
 
-void Function::removeAttributes(unsigned i, AttributeList Attrs) {
+void Function::removeAttributes(unsigned i, const AttrBuilder &Attrs) {
   AttributeList PAL = getAttributes();
   PAL = PAL.removeAttributes(getContext(), i, Attrs);
   setAttributes(PAL);
@@ -574,13 +577,12 @@ enum IIT_Info {
   IIT_SAME_VEC_WIDTH_ARG = 31,
   IIT_PTR_TO_ARG = 32,
   IIT_PTR_TO_ELT = 33,
-  IIT_VEC_OF_PTRS_TO_ELT = 34,
+  IIT_VEC_OF_ANYPTRS_TO_ELT = 34,
   IIT_I128 = 35,
   IIT_V512 = 36,
   IIT_V1024 = 37
 };
 
-
 static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
                       SmallVectorImpl<Intrinsic::IITDescriptor> &OutputTable) {
   IIT_Info Info = IIT_Info(Infos[NextElt++]);
@@ -716,10 +718,11 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
     OutputTable.push_back(IITDescriptor::get(IITDescriptor::PtrToElt, ArgInfo));
     return;
   }
-  case IIT_VEC_OF_PTRS_TO_ELT: {
-    unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecOfPtrsToElt,
-                                             ArgInfo));
+  case IIT_VEC_OF_ANYPTRS_TO_ELT: {
+    unsigned short ArgNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+    unsigned short RefNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+    OutputTable.push_back(
+        IITDescriptor::get(IITDescriptor::VecOfAnyPtrsToElt, ArgNo, RefNo));
     return;
   }
   case IIT_EMPTYSTRUCT:
@@ -808,7 +811,6 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
       Elts[i] = DecodeFixedType(Infos, Tys, Context);
     return StructType::get(Context, makeArrayRef(Elts,D.Struct_NumElements));
   }
-
   case IITDescriptor::Argument:
     return Tys[D.getArgumentNumber()];
   case IITDescriptor::ExtendArgument: {
@@ -850,15 +852,9 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
     Type *EltTy = VTy->getVectorElementType();
     return PointerType::getUnqual(EltTy);
   }
-  case IITDescriptor::VecOfPtrsToElt: {
-    Type *Ty = Tys[D.getArgumentNumber()];
-    VectorType *VTy = dyn_cast<VectorType>(Ty);
-    if (!VTy)
-      llvm_unreachable("Expected an argument of Vector Type");
-    Type *EltTy = VTy->getVectorElementType();
-    return VectorType::get(PointerType::getUnqual(EltTy),
-                           VTy->getNumElements());
-  }
+  case IITDescriptor::VecOfAnyPtrsToElt:
+    // Return the overloaded type (which determines the pointers address space)
+    return Tys[D.getOverloadArgNumber()];
  }
   llvm_unreachable("unhandled");
 }
@@ -1054,11 +1050,22 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor> 
       return (!ThisArgType || !ReferenceType ||
               ThisArgType->getElementType() != ReferenceType->getElementType());
     }
-    case IITDescriptor::VecOfPtrsToElt: {
-      if (D.getArgumentNumber() >= ArgTys.size())
+    case IITDescriptor::VecOfAnyPtrsToElt: {
+      unsigned RefArgNumber = D.getRefArgNumber();
+
+      // This may only be used when referring to a previous argument.
+      if (RefArgNumber >= ArgTys.size())
         return true;
-      VectorType * ReferenceType =
-              dyn_cast<VectorType> (ArgTys[D.getArgumentNumber()]);
+
+      // Record the overloaded type
+      assert(D.getOverloadArgNumber() == ArgTys.size() &&
+             "Table consistency error");
+      ArgTys.push_back(Ty);
+
+      // Verify the overloaded type "matches" the Ref type.
+      // i.e. Ty is a vector with the same width as Ref.
+      // Composed of pointers to the same element type as Ref.
+      VectorType *ReferenceType = dyn_cast<VectorType>(ArgTys[RefArgNumber]);
       VectorType *ThisArgVecTy = dyn_cast<VectorType>(Ty);
       if (!ThisArgVecTy || !ReferenceType ||
           (ReferenceType->getVectorNumElements() !=
diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp
index fd5ae71..e265a82 100644
--- a/lib/IR/IRBuilder.cpp
+++ b/lib/IR/IRBuilder.cpp
@@ -293,11 +293,13 @@ CallInst *IRBuilderBase::CreateMaskedGather(Value *Ptrs, unsigned Align,
     Mask = Constant::getAllOnesValue(VectorType::get(Type::getInt1Ty(Context),
                                      NumElts));
 
+  Type *OverloadedTypes[] = {DataTy, PtrsTy};
   Value * Ops[] = {Ptrs, getInt32(Align), Mask, UndefValue::get(DataTy)};
 
   // We specify only one type when we create this intrinsic. Types of other
   // arguments are derived from this type.
-  return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, { DataTy }, Name);
+  return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, OverloadedTypes,
+                               Name);
 }
 
 /// \brief Create a call to a Masked Scatter intrinsic.
@@ -323,11 +325,13 @@ CallInst *IRBuilderBase::CreateMaskedScatter(Value *Data, Value *Ptrs,
   if (!Mask)
     Mask = Constant::getAllOnesValue(VectorType::get(Type::getInt1Ty(Context),
                                      NumElts));
+
+  Type *OverloadedTypes[] = {DataTy, PtrsTy};
   Value * Ops[] = {Data, Ptrs, getInt32(Align), Mask};
 
   // We specify only one type when we create this intrinsic. Types of other
   // arguments are derived from this type.
-  return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, { DataTy });
+  return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, OverloadedTypes);
 }
 
 template <typename T0, typename T1, typename T2, typename T3>
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 5950099..a60cc37 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -335,12 +335,12 @@ Value *CallInst::getReturnedArgOperand() const {
   unsigned Index;
 
   if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index) && Index)
-    return getArgOperand(Index-1);
+    return getArgOperand(Index - AttributeList::FirstArgIndex);
   if (const Function *F = getCalledFunction())
     if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) &&
         Index)
-      return getArgOperand(Index-1);
-      
+      return getArgOperand(Index - AttributeList::FirstArgIndex);
+
   return nullptr;
 }
 
@@ -356,6 +356,10 @@ void CallInst::addAttribute(unsigned i, Attribute Attr) {
   setAttributes(PAL);
 }
 
+void CallInst::addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
+  addAttribute(ArgNo + AttributeList::FirstArgIndex, Kind);
+}
+
 void CallInst::removeAttribute(unsigned i, Attribute::AttrKind Kind) {
   AttributeList PAL = getAttributes();
   PAL = PAL.removeAttribute(getContext(), i, Kind);
@@ -368,6 +372,10 @@ void CallInst::removeAttribute(unsigned i, StringRef Kind) {
   setAttributes(PAL);
 }
 
+void CallInst::removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
+  removeAttribute(ArgNo + AttributeList::FirstArgIndex, Kind);
+}
+
 void CallInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
   AttributeList PAL = getAttributes();
   PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
@@ -501,8 +509,8 @@ static Instruction *createMalloc(Instruction *InsertBefore,
   MCall->setTailCall();
   if (Function *F = dyn_cast<Function>(MallocFunc)) {
     MCall->setCallingConv(F->getCallingConv());
-    if (!F->doesNotAlias(AttributeList::ReturnIndex))
-      F->setDoesNotAlias(AttributeList::ReturnIndex);
+    if (!F->returnDoesNotAlias())
+      F->setReturnDoesNotAlias();
   }
   assert(!MCall->getType()->isVoidTy() && "Malloc has void return type");
 
@@ -695,12 +703,12 @@ Value *InvokeInst::getReturnedArgOperand() const {
   unsigned Index;
 
   if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index) && Index)
-    return getArgOperand(Index-1);
+    return getArgOperand(Index - AttributeList::FirstArgIndex);
   if (const Function *F = getCalledFunction())
     if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) &&
         Index)
-      return getArgOperand(Index-1);
-      
+      return getArgOperand(Index - AttributeList::FirstArgIndex);
+
   return nullptr;
 }
 
@@ -756,6 +764,10 @@ void InvokeInst::addAttribute(unsigned i, Attribute Attr) {
   setAttributes(PAL);
 }
 
+void InvokeInst::addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
+  addAttribute(ArgNo + AttributeList::FirstArgIndex, Kind);
+}
+
 void InvokeInst::removeAttribute(unsigned i, Attribute::AttrKind Kind) {
   AttributeList PAL = getAttributes();
   PAL = PAL.removeAttribute(getContext(), i, Kind);
@@ -768,6 +780,10 @@ void InvokeInst::removeAttribute(unsigned i, StringRef Kind) {
   setAttributes(PAL);
 }
 
+void InvokeInst::removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
+  removeAttribute(ArgNo + AttributeList::FirstArgIndex, Kind);
+}
+
 void InvokeInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
   AttributeList PAL = getAttributes();
   PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index 7185736..9db30da 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -52,12 +52,12 @@ class Value;
 struct DenseMapAPIntKeyInfo {
   static inline APInt getEmptyKey() {
     APInt V(nullptr, 0);
-    V.VAL = 0;
+    V.U.VAL = 0;
     return V;
   }
   static inline APInt getTombstoneKey() {
     APInt V(nullptr, 0);
-    V.VAL = 1;
+    V.U.VAL = 1;
     return V;
   }
   static unsigned getHashValue(const APInt &Key) {
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 9f94264..b685790 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -286,6 +286,10 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T) {
                      ((CMModel == CodeModel::Large) ? dwarf::DW_EH_PE_sdata8
                                                     : dwarf::DW_EH_PE_sdata4);
     break;
+  case Triple::bpfel:
+  case Triple::bpfeb:
+    FDECFIEncoding = dwarf::DW_EH_PE_sdata8;
+    break;
   default:
     FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
     break;
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index b6c8cbe..fa81b28 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -76,34 +76,31 @@ inline static unsigned getDigit(char cdigit, uint8_t radix) {
 
 
 void APInt::initSlowCase(uint64_t val, bool isSigned) {
-  VAL = 0;
-  pVal = getClearedMemory(getNumWords());
-  pVal[0] = val;
+  U.pVal = getClearedMemory(getNumWords());
+  U.pVal[0] = val;
   if (isSigned && int64_t(val) < 0)
     for (unsigned i = 1; i < getNumWords(); ++i)
-      pVal[i] = WORD_MAX;
+      U.pVal[i] = WORD_MAX;
   clearUnusedBits();
 }
 
 void APInt::initSlowCase(const APInt& that) {
-  VAL = 0;
-  pVal = getMemory(getNumWords());
-  memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
+  U.pVal = getMemory(getNumWords());
+  memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
 }
 
 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
   assert(BitWidth && "Bitwidth too small");
   assert(bigVal.data() && "Null pointer detected!");
   if (isSingleWord())
-    VAL = bigVal[0];
+    U.VAL = bigVal[0];
   else {
     // Get memory, cleared to 0
-    VAL = 0;
-    pVal = getClearedMemory(getNumWords());
+    U.pVal = getClearedMemory(getNumWords());
     // Calculate the number of words to copy
     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
     // Copy the words from bigVal to pVal
-    memcpy(pVal, bigVal.data(), words * APINT_WORD_SIZE);
+    memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
   }
   // Make sure unused high bits are cleared
   clearUnusedBits();
@@ -120,7 +117,7 @@ APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
 }
 
 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
-  : VAL(0), BitWidth(numbits) {
+  : BitWidth(numbits) {
   assert(BitWidth && "Bitwidth too small");
   fromString(numbits, Str, radix);
 }
@@ -133,25 +130,24 @@ void APInt::AssignSlowCase(const APInt& RHS) {
   if (BitWidth == RHS.getBitWidth()) {
     // assume same bit-width single-word case is already handled
     assert(!isSingleWord());
-    memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
+    memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
     return;
   }
 
   if (isSingleWord()) {
     // assume case where both are single words is already handled
     assert(!RHS.isSingleWord());
-    VAL = 0;
-    pVal = getMemory(RHS.getNumWords());
-    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+    U.pVal = getMemory(RHS.getNumWords());
+    memcpy(U.pVal, RHS.U.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
   } else if (getNumWords() == RHS.getNumWords())
-    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+    memcpy(U.pVal, RHS.U.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
   else if (RHS.isSingleWord()) {
-    delete [] pVal;
-    VAL = RHS.VAL;
+    delete [] U.pVal;
+    U.VAL = RHS.U.VAL;
   } else {
-    delete [] pVal;
-    pVal = getMemory(RHS.getNumWords());
-    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+    delete [] U.pVal;
+    U.pVal = getMemory(RHS.getNumWords());
+    memcpy(U.pVal, RHS.U.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
   }
   BitWidth = RHS.BitWidth;
   clearUnusedBits();
@@ -162,30 +158,30 @@ void APInt::Profile(FoldingSetNodeID& ID) const {
   ID.AddInteger(BitWidth);
 
   if (isSingleWord()) {
-    ID.AddInteger(VAL);
+    ID.AddInteger(U.VAL);
     return;
   }
 
   unsigned NumWords = getNumWords();
   for (unsigned i = 0; i < NumWords; ++i)
-    ID.AddInteger(pVal[i]);
+    ID.AddInteger(U.pVal[i]);
 }
 
 /// @brief Prefix increment operator. Increments the APInt by one.
 APInt& APInt::operator++() {
   if (isSingleWord())
-    ++VAL;
+    ++U.VAL;
   else
-    tcIncrement(pVal, getNumWords());
+    tcIncrement(U.pVal, getNumWords());
   return clearUnusedBits();
 }
 
 /// @brief Prefix decrement operator. Decrements the APInt by one.
 APInt& APInt::operator--() {
   if (isSingleWord())
-    --VAL;
+    --U.VAL;
   else
-    tcDecrement(pVal, getNumWords());
+    tcDecrement(U.pVal, getNumWords());
   return clearUnusedBits();
 }
 
@@ -195,17 +191,17 @@ APInt& APInt::operator--() {
 APInt& APInt::operator+=(const APInt& RHS) {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
   if (isSingleWord())
-    VAL += RHS.VAL;
+    U.VAL += RHS.U.VAL;
   else
-    tcAdd(pVal, RHS.pVal, 0, getNumWords());
+    tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
   return clearUnusedBits();
 }
 
 APInt& APInt::operator+=(uint64_t RHS) {
   if (isSingleWord())
-    VAL += RHS;
+    U.VAL += RHS;
   else
-    tcAddPart(pVal, RHS, getNumWords());
+    tcAddPart(U.pVal, RHS, getNumWords());
   return clearUnusedBits();
 }
 
@@ -215,17 +211,17 @@ APInt& APInt::operator+=(uint64_t RHS) {
 APInt& APInt::operator-=(const APInt& RHS) {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
   if (isSingleWord())
-    VAL -= RHS.VAL;
+    U.VAL -= RHS.U.VAL;
   else
-    tcSubtract(pVal, RHS.pVal, 0, getNumWords());
+    tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
   return clearUnusedBits();
 }
 
 APInt& APInt::operator-=(uint64_t RHS) {
   if (isSingleWord())
-    VAL -= RHS;
+    U.VAL -= RHS;
   else
-    tcSubtractPart(pVal, RHS, getNumWords());
+    tcSubtractPart(U.pVal, RHS, getNumWords());
   return clearUnusedBits();
 }
 
@@ -300,7 +296,7 @@ static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
 APInt& APInt::operator*=(const APInt& RHS) {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
   if (isSingleWord()) {
-    VAL *= RHS.VAL;
+    U.VAL *= RHS.U.VAL;
     clearUnusedBits();
     return *this;
   }
@@ -326,12 +322,12 @@ APInt& APInt::operator*=(const APInt& RHS) {
   uint64_t *dest = getMemory(destWords);
 
   // Perform the long multiply
-  mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
+  mul(dest, U.pVal, lhsWords, RHS.U.pVal, rhsWords);
 
   // Copy result back into *this
   clearAllBits();
   unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
-  memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
+  memcpy(U.pVal, dest, wordsToCopy * APINT_WORD_SIZE);
   clearUnusedBits();
 
   // delete dest array and return
@@ -340,43 +336,43 @@ APInt& APInt::operator*=(const APInt& RHS) {
 }
 
 void APInt::AndAssignSlowCase(const APInt& RHS) {
-  tcAnd(pVal, RHS.pVal, getNumWords());
+  tcAnd(U.pVal, RHS.U.pVal, getNumWords());
 }
 
 void APInt::OrAssignSlowCase(const APInt& RHS) {
-  tcOr(pVal, RHS.pVal, getNumWords());
+  tcOr(U.pVal, RHS.U.pVal, getNumWords());
 }
 
 void APInt::XorAssignSlowCase(const APInt& RHS) {
-  tcXor(pVal, RHS.pVal, getNumWords());
+  tcXor(U.pVal, RHS.U.pVal, getNumWords());
 }
 
 APInt APInt::operator*(const APInt& RHS) const {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
   if (isSingleWord())
-    return APInt(BitWidth, VAL * RHS.VAL);
+    return APInt(BitWidth, U.VAL * RHS.U.VAL);
   APInt Result(*this);
   Result *= RHS;
   return Result;
 }
 
 bool APInt::EqualSlowCase(const APInt& RHS) const {
-  return std::equal(pVal, pVal + getNumWords(), RHS.pVal);
+  return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
 }
 
 int APInt::compare(const APInt& RHS) const {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
   if (isSingleWord())
-    return VAL < RHS.VAL ? -1 : VAL > RHS.VAL;
+    return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
 
-  return tcCompare(pVal, RHS.pVal, getNumWords());
+  return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
 }
 
 int APInt::compareSigned(const APInt& RHS) const {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
   if (isSingleWord()) {
-    int64_t lhsSext = SignExtend64(VAL, BitWidth);
-    int64_t rhsSext = SignExtend64(RHS.VAL, BitWidth);
+    int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
+    int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
   }
 
@@ -389,7 +385,7 @@ int APInt::compareSigned(const APInt& RHS) const {
 
   // Otherwise we can just use an unsigned comparison, because even negative
   // numbers compare correctly this way if both have the same signed-ness.
-  return tcCompare(pVal, RHS.pVal, getNumWords());
+  return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
 }
 
 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
@@ -409,19 +405,19 @@ void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
     if (hiWord == loWord)
       loMask &= hiMask;
     else
-      pVal[hiWord] |= hiMask;
+      U.pVal[hiWord] |= hiMask;
   }
   // Apply the mask to the low word.
-  pVal[loWord] |= loMask;
+  U.pVal[loWord] |= loMask;
 
   // Fill any words between loWord and hiWord with all ones.
   for (unsigned word = loWord + 1; word < hiWord; ++word)
-    pVal[word] = WORD_MAX;
+    U.pVal[word] = WORD_MAX;
 }
 
 /// @brief Toggle every bit to its opposite value.
 void APInt::flipAllBitsSlowCase() {
-  tcComplement(pVal, getNumWords());
+  tcComplement(U.pVal, getNumWords());
   clearUnusedBits();
 }
 
@@ -448,8 +444,8 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
   // Single word result can be done as a direct bitmask.
   if (isSingleWord()) {
     uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
-    VAL &= ~(mask << bitPosition);
-    VAL |= (subBits.VAL << bitPosition);
+    U.VAL &= ~(mask << bitPosition);
+    U.VAL |= (subBits.U.VAL << bitPosition);
     return;
   }
 
@@ -460,8 +456,8 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
   // Insertion within a single word can be done as a direct bitmask.
   if (loWord == hi1Word) {
     uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
-    pVal[loWord] &= ~(mask << loBit);
-    pVal[loWord] |= (subBits.VAL << loBit);
+    U.pVal[loWord] &= ~(mask << loBit);
+    U.pVal[loWord] |= (subBits.U.VAL << loBit);
     return;
   }
 
@@ -469,15 +465,15 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
   if (loBit == 0) {
     // Direct copy whole words.
     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
-    memcpy(pVal + loWord, subBits.getRawData(),
+    memcpy(U.pVal + loWord, subBits.getRawData(),
            numWholeSubWords * APINT_WORD_SIZE);
 
     // Mask+insert remaining bits.
     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
     if (remainingBits != 0) {
       uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - remainingBits);
-      pVal[hi1Word] &= ~mask;
-      pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
+      U.pVal[hi1Word] &= ~mask;
+      U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
     }
     return;
   }
@@ -499,7 +495,7 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
          "Illegal bit extraction");
 
   if (isSingleWord())
-    return APInt(numBits, VAL >> bitPosition);
+    return APInt(numBits, U.VAL >> bitPosition);
 
   unsigned loBit = whichBit(bitPosition);
   unsigned loWord = whichWord(bitPosition);
@@ -507,12 +503,12 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
 
   // Single word result extracting bits from a single word source.
   if (loWord == hiWord)
-    return APInt(numBits, pVal[loWord] >> loBit);
+    return APInt(numBits, U.pVal[loWord] >> loBit);
 
   // Extracting bits that start on a source word boundary can be done
   // as a fast memory copy.
   if (loBit == 0)
-    return APInt(numBits, makeArrayRef(pVal + loWord, 1 + hiWord - loWord));
+    return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
 
   // General case - shift + copy source words directly into place.
   APInt Result(numBits, 0);
@@ -520,10 +516,10 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
   unsigned NumDstWords = Result.getNumWords();
 
   for (unsigned word = 0; word < NumDstWords; ++word) {
-    uint64_t w0 = pVal[loWord + word];
+    uint64_t w0 = U.pVal[loWord + word];
     uint64_t w1 =
-        (loWord + word + 1) < NumSrcWords ? pVal[loWord + word + 1] : 0;
-    Result.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
+        (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
+    Result.U.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
   }
 
   return Result.clearUnusedBits();
@@ -584,9 +580,9 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
 
 hash_code llvm::hash_value(const APInt &Arg) {
   if (Arg.isSingleWord())
-    return hash_combine(Arg.VAL);
+    return hash_combine(Arg.U.VAL);
 
-  return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords());
+  return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
 }
 
 bool APInt::isSplat(unsigned SplatSizeInBits) const {
@@ -623,7 +619,7 @@ APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
 unsigned APInt::countLeadingZerosSlowCase() const {
   unsigned Count = 0;
   for (int i = getNumWords()-1; i >= 0; --i) {
-    uint64_t V = pVal[i];
+    uint64_t V = U.pVal[i];
     if (V == 0)
       Count += APINT_BITS_PER_WORD;
     else {
@@ -639,7 +635,7 @@ unsigned APInt::countLeadingZerosSlowCase() const {
 
 unsigned APInt::countLeadingOnes() const {
   if (isSingleWord())
-    return llvm::countLeadingOnes(VAL << (APINT_BITS_PER_WORD - BitWidth));
+    return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
 
   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
   unsigned shift;
@@ -650,13 +646,13 @@ unsigned APInt::countLeadingOnes() const {
     shift = APINT_BITS_PER_WORD - highWordBits;
   }
   int i = getNumWords() - 1;
-  unsigned Count = llvm::countLeadingOnes(pVal[i] << shift);
+  unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
   if (Count == highWordBits) {
     for (i--; i >= 0; --i) {
-      if (pVal[i] == WORD_MAX)
+      if (U.pVal[i] == WORD_MAX)
         Count += APINT_BITS_PER_WORD;
       else {
-        Count += llvm::countLeadingOnes(pVal[i]);
+        Count += llvm::countLeadingOnes(U.pVal[i]);
         break;
       }
     }
@@ -666,23 +662,23 @@ unsigned APInt::countLeadingOnes() const {
 
 unsigned APInt::countTrailingZeros() const {
   if (isSingleWord())
-    return std::min(unsigned(llvm::countTrailingZeros(VAL)), BitWidth);
+    return std::min(unsigned(llvm::countTrailingZeros(U.VAL)), BitWidth);
   unsigned Count = 0;
   unsigned i = 0;
-  for (; i < getNumWords() && pVal[i] == 0; ++i)
+  for (; i < getNumWords() && U.pVal[i] == 0; ++i)
     Count += APINT_BITS_PER_WORD;
   if (i < getNumWords())
-    Count += llvm::countTrailingZeros(pVal[i]);
+    Count += llvm::countTrailingZeros(U.pVal[i]);
   return std::min(Count, BitWidth);
 }
 
 unsigned APInt::countTrailingOnesSlowCase() const {
   unsigned Count = 0;
   unsigned i = 0;
-  for (; i < getNumWords() && pVal[i] == WORD_MAX; ++i)
+  for (; i < getNumWords() && U.pVal[i] == WORD_MAX; ++i)
     Count += APINT_BITS_PER_WORD;
   if (i < getNumWords())
-    Count += llvm::countTrailingOnes(pVal[i]);
+    Count += llvm::countTrailingOnes(U.pVal[i]);
   assert(Count <= BitWidth);
   return Count;
 }
@@ -690,13 +686,13 @@ unsigned APInt::countTrailingOnesSlowCase() const {
 unsigned APInt::countPopulationSlowCase() const {
   unsigned Count = 0;
   for (unsigned i = 0; i < getNumWords(); ++i)
-    Count += llvm::countPopulation(pVal[i]);
+    Count += llvm::countPopulation(U.pVal[i]);
   return Count;
 }
 
 bool APInt::intersectsSlowCase(const APInt &RHS) const {
   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
-    if ((pVal[i] & RHS.pVal[i]) != 0)
+    if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
       return true;
 
   return false;
@@ -704,7 +700,7 @@ bool APInt::intersectsSlowCase(const APInt &RHS) const {
 
 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
-    if ((pVal[i] & ~RHS.pVal[i]) != 0)
+    if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
       return false;
 
   return true;
@@ -713,22 +709,22 @@ bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
 APInt APInt::byteSwap() const {
   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
   if (BitWidth == 16)
-    return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
+    return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
   if (BitWidth == 32)
-    return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
+    return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
   if (BitWidth == 48) {
-    unsigned Tmp1 = unsigned(VAL >> 16);
+    unsigned Tmp1 = unsigned(U.VAL >> 16);
     Tmp1 = ByteSwap_32(Tmp1);
-    uint16_t Tmp2 = uint16_t(VAL);
+    uint16_t Tmp2 = uint16_t(U.VAL);
     Tmp2 = ByteSwap_16(Tmp2);
     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
   }
   if (BitWidth == 64)
-    return APInt(BitWidth, ByteSwap_64(VAL));
+    return APInt(BitWidth, ByteSwap_64(U.VAL));
 
   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
-    Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]);
+    Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
   if (Result.BitWidth != BitWidth) {
     Result.lshrInPlace(Result.BitWidth - BitWidth);
     Result.BitWidth = BitWidth;
@@ -739,13 +735,13 @@ APInt APInt::byteSwap() const {
 APInt APInt::reverseBits() const {
   switch (BitWidth) {
   case 64:
-    return APInt(BitWidth, llvm::reverseBits<uint64_t>(VAL));
+    return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
   case 32:
-    return APInt(BitWidth, llvm::reverseBits<uint32_t>(VAL));
+    return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
   case 16:
-    return APInt(BitWidth, llvm::reverseBits<uint16_t>(VAL));
+    return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
   case 8:
-    return APInt(BitWidth, llvm::reverseBits<uint8_t>(VAL));
+    return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
   default:
     break;
   }
@@ -890,13 +886,13 @@ double APInt::roundToDouble(bool isSigned) const {
   uint64_t mantissa;
   unsigned hiWord = whichWord(n-1);
   if (hiWord == 0) {
-    mantissa = Tmp.pVal[0];
+    mantissa = Tmp.U.pVal[0];
     if (n > 52)
       mantissa >>= n - 52; // shift down, we want the top 52 bits.
   } else {
     assert(hiWord > 0 && "huh?");
-    uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
-    uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
+    uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
+    uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
     mantissa = hibits | lobits;
   }
 
@@ -923,12 +919,12 @@ APInt APInt::trunc(unsigned width) const {
   // Copy full words.
   unsigned i;
   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
-    Result.pVal[i] = pVal[i];
+    Result.U.pVal[i] = U.pVal[i];
 
   // Truncate and copy any partial word.
   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
   if (bits != 0)
-    Result.pVal[i] = pVal[i] << bits >> bits;
+    Result.U.pVal[i] = U.pVal[i] << bits >> bits;
 
   return Result;
 }
@@ -938,20 +934,20 @@ APInt APInt::sext(unsigned Width) const {
   assert(Width > BitWidth && "Invalid APInt SignExtend request");
 
   if (Width <= APINT_BITS_PER_WORD)
-    return APInt(Width, SignExtend64(VAL, BitWidth));
+    return APInt(Width, SignExtend64(U.VAL, BitWidth));
 
   APInt Result(getMemory(getNumWords(Width)), Width);
 
   // Copy words.
-  std::memcpy(Result.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
+  std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
 
   // Sign extend the last word since there may be unused bits in the input.
-  Result.pVal[getNumWords() - 1] =
-      SignExtend64(Result.pVal[getNumWords() - 1],
+  Result.U.pVal[getNumWords() - 1] =
+      SignExtend64(Result.U.pVal[getNumWords() - 1],
                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
 
   // Fill with sign bits.
-  std::memset(Result.pVal + getNumWords(), isNegative() ? -1 : 0,
+  std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
   Result.clearUnusedBits();
   return Result;
@@ -962,15 +958,15 @@ APInt APInt::zext(unsigned width) const {
   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
 
   if (width <= APINT_BITS_PER_WORD)
-    return APInt(width, VAL);
+    return APInt(width, U.VAL);
 
   APInt Result(getMemory(getNumWords(width)), width);
 
   // Copy words.
-  std::memcpy(Result.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
+  std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
 
   // Zero remaining words.
-  std::memset(Result.pVal + getNumWords(), 0,
+  std::memset(Result.U.pVal + getNumWords(), 0,
               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
 
   return Result;
@@ -1027,28 +1023,28 @@ void APInt::ashrSlowCase(unsigned ShiftAmt) {
   unsigned WordsToMove = getNumWords() - WordShift;
   if (WordsToMove != 0) {
     // Sign extend the last word to fill in the unused bits.
-    pVal[getNumWords() - 1] = SignExtend64(
-        pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
+    U.pVal[getNumWords() - 1] = SignExtend64(
+        U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
 
     // Fastpath for moving by whole words.
     if (BitShift == 0) {
-      std::memmove(pVal, pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
+      std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
     } else {
       // Move the words containing significant bits.
       for (unsigned i = 0; i != WordsToMove - 1; ++i)
-        pVal[i] = (pVal[i + WordShift] >> BitShift) |
-                  (pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
+        U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
+                    (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
 
       // Handle the last word which has no high bits to copy.
-      pVal[WordsToMove - 1] = pVal[WordShift + WordsToMove - 1] >> BitShift;
+      U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
       // Sign extend one more time.
-      pVal[WordsToMove - 1] =
-          SignExtend64(pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
+      U.pVal[WordsToMove - 1] =
+          SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
     }
   }
 
   // Fill in the remainder based on the original sign.
-  std::memset(pVal + WordsToMove, Negative ? -1 : 0,
+  std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
               WordShift * APINT_WORD_SIZE);
   clearUnusedBits();
 }
@@ -1062,7 +1058,7 @@ void APInt::lshrInPlace(const APInt &shiftAmt) {
 /// Logical right-shift this APInt by shiftAmt.
 /// @brief Logical right-shift function.
 void APInt::lshrSlowCase(unsigned ShiftAmt) {
-  tcShiftRight(pVal, getNumWords(), ShiftAmt);
+  tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
 }
 
 /// Left-shift this APInt by shiftAmt.
@@ -1074,7 +1070,7 @@ APInt &APInt::operator<<=(const APInt &shiftAmt) {
 }
 
 void APInt::shlSlowCase(unsigned ShiftAmt) {
-  tcShiftLeft(pVal, getNumWords(), ShiftAmt);
+  tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
   clearUnusedBits();
 }
 
@@ -1137,7 +1133,7 @@ APInt APInt::sqrt() const {
       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       /*    31 */ 6
     };
-    return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
+    return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
   }
 
   // If the magnitude of the value fits in less than 52 bits (the precision of
@@ -1146,7 +1142,8 @@ APInt APInt::sqrt() const {
   // This should be faster than the algorithm below.
   if (magnitude < 52) {
     return APInt(BitWidth,
-                 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
+                 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
+                                                               : U.pVal[0])))));
   }
 
   // Okay, all the short cuts are exhausted. We must compute it. The following
@@ -1524,7 +1521,7 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS,
   // Initialize the dividend
   memset(U, 0, (m+n+1)*sizeof(unsigned));
   for (unsigned i = 0; i < lhsWords; ++i) {
-    uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
+    uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.U.VAL : LHS.U.pVal[i]);
     U[i * 2] = (unsigned)(tmp & mask);
     U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
   }
@@ -1533,7 +1530,7 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS,
   // Initialize the divisor
   memset(V, 0, (n)*sizeof(unsigned));
   for (unsigned i = 0; i < rhsWords; ++i) {
-    uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
+    uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.U.VAL : RHS.U.pVal[i]);
     V[i * 2] = (unsigned)(tmp & mask);
     V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
   }
@@ -1593,12 +1590,12 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS,
     // Set up the Quotient value's memory.
     if (Quotient->BitWidth != LHS.BitWidth) {
       if (Quotient->isSingleWord())
-        Quotient->VAL = 0;
+        Quotient->U.VAL = 0;
       else
-        delete [] Quotient->pVal;
+        delete [] Quotient->U.pVal;
       Quotient->BitWidth = LHS.BitWidth;
       if (!Quotient->isSingleWord())
-        Quotient->pVal = getClearedMemory(Quotient->getNumWords());
+        Quotient->U.pVal = getClearedMemory(Quotient->getNumWords());
     } else
       Quotient->clearAllBits();
 
@@ -1610,13 +1607,13 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS,
       uint64_t tmp =
         uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
       if (Quotient->isSingleWord())
-        Quotient->VAL = tmp;
+        Quotient->U.VAL = tmp;
       else
-        Quotient->pVal[0] = tmp;
+        Quotient->U.pVal[0] = tmp;
     } else {
       assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
       for (unsigned i = 0; i < lhsWords; ++i)
-        Quotient->pVal[i] =
+        Quotient->U.pVal[i] =
           uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
     }
   }
@@ -1626,12 +1623,12 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS,
     // Set up the Remainder value's memory.
     if (Remainder->BitWidth != RHS.BitWidth) {
       if (Remainder->isSingleWord())
-        Remainder->VAL = 0;
+        Remainder->U.VAL = 0;
       else
-        delete [] Remainder->pVal;
+        delete [] Remainder->U.pVal;
       Remainder->BitWidth = RHS.BitWidth;
       if (!Remainder->isSingleWord())
-        Remainder->pVal = getClearedMemory(Remainder->getNumWords());
+        Remainder->U.pVal = getClearedMemory(Remainder->getNumWords());
     } else
       Remainder->clearAllBits();
 
@@ -1641,13 +1638,13 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS,
       uint64_t tmp =
         uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
       if (Remainder->isSingleWord())
-        Remainder->VAL = tmp;
+        Remainder->U.VAL = tmp;
       else
-        Remainder->pVal[0] = tmp;
+        Remainder->U.pVal[0] = tmp;
     } else {
       assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
       for (unsigned i = 0; i < rhsWords; ++i)
-        Remainder->pVal[i] =
+        Remainder->U.pVal[i] =
           uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
     }
   }
@@ -1666,8 +1663,8 @@ APInt APInt::udiv(const APInt& RHS) const {
 
   // First, deal with the easy case
   if (isSingleWord()) {
-    assert(RHS.VAL != 0 && "Divide by zero?");
-    return APInt(BitWidth, VAL / RHS.VAL);
+    assert(RHS.U.VAL != 0 && "Divide by zero?");
+    return APInt(BitWidth, U.VAL / RHS.U.VAL);
   }
 
   // Get some facts about the LHS and RHS number of bits and words
@@ -1689,7 +1686,7 @@ APInt APInt::udiv(const APInt& RHS) const {
     return APInt(BitWidth, 1);
   } else if (lhsWords == 1 && rhsWords == 1) {
     // All high words are zero, just use native divide
-    return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
+    return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
   }
 
   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
@@ -1712,8 +1709,8 @@ APInt APInt::sdiv(const APInt &RHS) const {
 APInt APInt::urem(const APInt& RHS) const {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
   if (isSingleWord()) {
-    assert(RHS.VAL != 0 && "Remainder by zero?");
-    return APInt(BitWidth, VAL % RHS.VAL);
+    assert(RHS.U.VAL != 0 && "Remainder by zero?");
+    return APInt(BitWidth, U.VAL % RHS.U.VAL);
   }
 
   // Get some facts about the LHS
@@ -1737,7 +1734,7 @@ APInt APInt::urem(const APInt& RHS) const {
     return APInt(BitWidth, 0);
   } else if (lhsWords == 1) {
     // All high words are zero, just use native remainder
-    return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
+    return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
   }
 
   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
@@ -1763,9 +1760,9 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS,
 
   // First, deal with the easy case
   if (LHS.isSingleWord()) {
-    assert(RHS.VAL != 0 && "Divide by zero?");
-    uint64_t QuotVal = LHS.VAL / RHS.VAL;
-    uint64_t RemVal = LHS.VAL % RHS.VAL;
+    assert(RHS.U.VAL != 0 && "Divide by zero?");
+    uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
+    uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
     Quotient = APInt(LHS.BitWidth, QuotVal);
     Remainder = APInt(LHS.BitWidth, RemVal);
     return;
@@ -1798,8 +1795,8 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS,
 
   if (lhsWords == 1 && rhsWords == 1) {
     // There is only one word to consider so use the native versions.
-    uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
-    uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
+    uint64_t lhsValue = LHS.isSingleWord() ? LHS.U.VAL : LHS.U.pVal[0];
+    uint64_t rhsValue = RHS.isSingleWord() ? RHS.U.VAL : RHS.U.pVal[0];
     Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
     Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
     return;
@@ -1926,9 +1923,11 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
          "Insufficient bit width");
 
-  // Allocate memory
-  if (!isSingleWord())
-    pVal = getClearedMemory(getNumWords());
+  // Allocate memory if needed
+  if (isSingleWord())
+    U.VAL = 0;
+  else
+    U.pVal = getClearedMemory(getNumWords());
 
   // Figure out if we can shift instead of multiply
   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
diff --git a/lib/Support/BinaryStreamReader.cpp b/lib/Support/BinaryStreamReader.cpp
index c7a2e0d..702d987 100644
--- a/lib/Support/BinaryStreamReader.cpp
+++ b/lib/Support/BinaryStreamReader.cpp
@@ -93,3 +93,16 @@ uint8_t BinaryStreamReader::peek() const {
   llvm::consumeError(std::move(EC));
   return Buffer[0];
 }
+
+std::pair<BinaryStreamReader, BinaryStreamReader>
+BinaryStreamReader::split(uint32_t Off) const {
+  assert(getLength() >= Off);
+
+  BinaryStreamRef First = Stream.drop_front(Offset);
+
+  BinaryStreamRef Second = First.drop_front(Off);
+  First = First.keep_front(Off);
+  BinaryStreamReader W1{First};
+  BinaryStreamReader W2{Second};
+  return std::make_pair(W1, W2);
+}
\ No newline at end of file
diff --git a/lib/Support/BinaryStreamWriter.cpp b/lib/Support/BinaryStreamWriter.cpp
index d60b756..d78dbc6 100644
--- a/lib/Support/BinaryStreamWriter.cpp
+++ b/lib/Support/BinaryStreamWriter.cpp
@@ -59,6 +59,19 @@ Error BinaryStreamWriter::writeStreamRef(BinaryStreamRef Ref, uint32_t Length) {
   return Error::success();
 }
 
+std::pair<BinaryStreamWriter, BinaryStreamWriter>
+BinaryStreamWriter::split(uint32_t Off) const {
+  assert(getLength() >= Off);
+
+  WritableBinaryStreamRef First = Stream.drop_front(Offset);
+
+  WritableBinaryStreamRef Second = First.drop_front(Off);
+  First = First.keep_front(Off);
+  BinaryStreamWriter W1{First};
+  BinaryStreamWriter W2{Second};
+  return std::make_pair(W1, W2);
+}
+
 Error BinaryStreamWriter::padToAlignment(uint32_t Align) {
   uint32_t NewOffset = alignTo(Offset, Align);
   if (NewOffset > getLength())
diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp
index 5d6d60a..53c10bc 100644
--- a/lib/Support/DataExtractor.cpp
+++ b/lib/Support/DataExtractor.cpp
@@ -128,6 +128,16 @@ const char *DataExtractor::getCStr(uint32_t *offset_ptr) const {
   return nullptr;
 }
 
+StringRef DataExtractor::getCStrRef(uint32_t *OffsetPtr) const {
+  uint32_t Start = *OffsetPtr;
+  StringRef::size_type Pos = Data.find('\0', Start);
+  if (Pos != StringRef::npos) {
+    *OffsetPtr = Pos + 1;
+    return StringRef(Data.data() + Start, Pos - Start);
+  }
+  return StringRef();
+}
+
 uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const {
   uint64_t result = 0;
   if (Data.empty())
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 970ecfd..6a0b64f 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -1363,6 +1363,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1);
   Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1);
   Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
+  Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1);
   Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
   Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1);
   Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index f3a654d..eb81089 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -459,7 +459,7 @@ static Triple::OSType parseOS(StringRef OSName) {
     .StartsWith("kfreebsd", Triple::KFreeBSD)
     .StartsWith("linux", Triple::Linux)
     .StartsWith("lv2", Triple::Lv2)
-    .StartsWith("macosx", Triple::MacOSX)
+    .StartsWith("macos", Triple::MacOSX)
     .StartsWith("netbsd", Triple::NetBSD)
     .StartsWith("openbsd", Triple::OpenBSD)
     .StartsWith("solaris", Triple::Solaris)
@@ -984,6 +984,8 @@ void Triple::getOSVersion(unsigned &Major, unsigned &Minor,
   StringRef OSTypeName = getOSTypeName(getOS());
   if (OSName.startswith(OSTypeName))
     OSName = OSName.substr(OSTypeName.size());
+  else if (getOS() == MacOSX)
+    OSName.consume_front("macos");
 
   parseVersionFromName(OSName, Major, Minor, Micro);
 }
diff --git a/lib/Target/AArch64/AArch64CallLowering.cpp b/lib/Target/AArch64/AArch64CallLowering.cpp
index b2f55a7..ff3e4c4 100644
--- a/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -247,7 +247,7 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
   unsigned i = 0;
   for (auto &Arg : F.args()) {
     ArgInfo OrigArg{VRegs[i], Arg.getType()};
-    setArgFlags(OrigArg, i + 1, DL, F);
+    setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
     bool Split = false;
     LLT Ty = MRI.getType(VRegs[i]);
     unsigned Dst = VRegs[i];
diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 4fb262c..36dcc69 100644
--- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -677,12 +677,19 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
   }
 
   const Function &ContainingFunction = *I.getParent()->getParent();
+  CallingConv::ID CC = ContainingFunction.getCallingConv();
 
   // Don't promote the alloca to LDS for shader calling conventions as the work
   // item ID intrinsics are not supported for these calling conventions.
   // Furthermore not all LDS is available for some of the stages.
-  if (AMDGPU::isShader(ContainingFunction.getCallingConv()))
+  switch (CC) {
+  case CallingConv::AMDGPU_KERNEL:
+  case CallingConv::SPIR_KERNEL:
+    break;
+  default:
+    DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n");
     return;
+  }
 
   const AMDGPUSubtarget &ST =
     TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction);
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 005b74a..46fd1f7 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -577,6 +577,7 @@ def : Processor<"cortex-m0plus",    ARMV6Itineraries,   [ARMv6m]>;
 def : Processor<"cortex-m1",        ARMV6Itineraries,   [ARMv6m]>;
 def : Processor<"sc000",            ARMV6Itineraries,   [ARMv6m]>;
 
+def : Processor<"arm1176j-s",       ARMV6Itineraries,   [ARMv6kz]>;
 def : Processor<"arm1176jz-s",      ARMV6Itineraries,   [ARMv6kz]>;
 def : Processor<"arm1176jzf-s",     ARMV6Itineraries,   [ARMv6kz,
                                                          FeatureVFP2,
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index a818841..9178c67 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -354,7 +354,7 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
   unsigned Idx = 0;
   for (auto &Arg : F.args()) {
     ArgInfo AInfo(VRegs[Idx], Arg.getType());
-    setArgFlags(AInfo, Idx + 1, DL, F);
+    setArgFlags(AInfo, Idx + AttributeList::FirstArgIndex, DL, F);
     splitToValueTypes(AInfo, ArgInfos, DL, MF.getRegInfo());
     Idx++;
   }
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index e9df944..7f9fe55 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -740,7 +740,9 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
     unsigned PowerOfTwo = 0;
     SDValue NewMulConst;
     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
+      HandleSDNode Handle(Offset);
       replaceDAGValue(Offset.getOperand(1), NewMulConst);
+      Offset = Handle.getValue();
       ShAmt = PowerOfTwo;
       ShOpcVal = ARM_AM::lsl;
     }
@@ -1420,7 +1422,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
     unsigned PowerOfTwo = 0;
     SDValue NewMulConst;
     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
+      HandleSDNode Handle(OffReg);
       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
+      OffReg = Handle.getValue();
       ShAmt = PowerOfTwo;
     }
   }
diff --git a/lib/Target/AVR/AVRFrameLowering.cpp b/lib/Target/AVR/AVRFrameLowering.cpp
index 25232d2..c297865 100644
--- a/lib/Target/AVR/AVRFrameLowering.cpp
+++ b/lib/Target/AVR/AVRFrameLowering.cpp
@@ -228,9 +228,8 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF,
 bool AVRFrameLowering::hasFP(const MachineFunction &MF) const {
   const AVRMachineFunctionInfo *FuncInfo = MF.getInfo<AVRMachineFunctionInfo>();
 
-  // TODO: We do not always need a frame pointer.
-  // This can be optimised.
-  return true;
+  return (FuncInfo->getHasSpills() || FuncInfo->getHasAllocas() ||
+          FuncInfo->getHasStackArgs());
 }
 
 bool AVRFrameLowering::spillCalleeSavedRegisters(
diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp
index 5cc7eaf..2813e24 100644
--- a/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -65,12 +65,18 @@ BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(AVR::SPH);
   Reserved.set(AVR::SP);
 
-  // Reserve the frame pointer registers r28 and r29 if the function requires one.
-  if (TFI->hasFP(MF)) {
-    Reserved.set(AVR::R28);
-    Reserved.set(AVR::R29);
-    Reserved.set(AVR::R29R28);
-  }
+  // We tenatively reserve the frame pointer register r29:r28 because the
+  // function may require one, but we cannot tell until register allocation
+  // is complete, which can be too late.
+  //
+  // Instead we just unconditionally reserve the Y register.
+  //
+  // TODO: Write a pass to enumerate functions which reserved the Y register
+  //       but didn't end up needing a frame pointer. In these, we can
+  //       convert one or two of the spills inside to use the Y register.
+  Reserved.set(AVR::R28);
+  Reserved.set(AVR::R29);
+  Reserved.set(AVR::R29R28);
 
   return Reserved;
 }
diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 42ff9cc..b0b2644 100644
--- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -307,7 +307,7 @@ public:
   bool iss31_1Imm() const { return true; }
   bool iss30_2Imm() const { return true; }
   bool iss29_3Imm() const { return true; }
-  bool iss23_2Imm() const { return CheckImmRange(23, 2, true, true, false); }
+  bool iss27_2Imm() const { return CheckImmRange(27, 2, true, true, false); }
   bool iss10_0Imm() const { return CheckImmRange(10, 0, true, false, false); }
   bool iss10_6Imm() const { return CheckImmRange(10, 6, true, false, false); }
   bool iss9_0Imm() const { return CheckImmRange(9, 0, true, false, false); }
@@ -1292,13 +1292,13 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
   case Hexagon::A2_iconst: {
     Inst.setOpcode(Hexagon::A2_addi);
     MCOperand Reg = Inst.getOperand(0);
-    MCOperand S16 = Inst.getOperand(1);
-    HexagonMCInstrInfo::setMustNotExtend(*S16.getExpr());
-    HexagonMCInstrInfo::setS23_2_reloc(*S16.getExpr());
+    MCOperand S27 = Inst.getOperand(1);
+    HexagonMCInstrInfo::setMustNotExtend(*S27.getExpr());
+    HexagonMCInstrInfo::setS27_2_reloc(*S27.getExpr());
     Inst.clear();
     Inst.addOperand(Reg);
     Inst.addOperand(MCOperand::createReg(Hexagon::R0));
-    Inst.addOperand(S16);
+    Inst.addOperand(S27);
     break;
   }
   case Hexagon::M4_mpyrr_addr:
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index c8483f7..bb5128e 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -298,7 +298,7 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
     MCOperand Reg = Inst.getOperand(0);
     MCOperand S16 = Inst.getOperand(1);
     HexagonMCInstrInfo::setMustNotExtend(*S16.getExpr());
-    HexagonMCInstrInfo::setS23_2_reloc(*S16.getExpr());
+    HexagonMCInstrInfo::setS27_2_reloc(*S16.getExpr());
     Inst.clear();
     Inst.addOperand(Reg);
     Inst.addOperand(MCOperand::createReg(Hexagon::R0));
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index e5eb059..861af94 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1720,8 +1720,13 @@ HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
   Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag);
   InFlag = Chain.getValue(1);
 
+  unsigned Flags =
+      static_cast<const HexagonSubtarget &>(DAG.getSubtarget()).useLongCalls()
+          ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended
+          : HexagonII::MO_GDPLT;
+
   return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT,
-                           Hexagon::R0, HexagonII::MO_GDPLT);
+                           Hexagon::R0, Flags);
 }
 
 //
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index 7d1da5c..709d645 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -7,16 +7,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-// Maintain list of valid subtargets for each instruction.
-class SubTarget<bits<6> value> {
-  bits<6> Value = value;
-}
-
-def HasAnySubT    : SubTarget<0x3f>;  // 111111
-def HasV5SubT     : SubTarget<0x3e>;  // 111110
-def HasV55SubT    : SubTarget<0x3c>;  // 111100
-def HasV60SubT    : SubTarget<0x38>;  // 111000
-
 // Addressing modes for load/store instructions
 class AddrModeType<bits<3> value> {
   bits<3> Value = value;
@@ -131,12 +121,6 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
   bits<2> opExtentAlign = 0;
   let TSFlags{34-33} = opExtentAlign; // Alignment exponent before extending.
 
-  // If an instruction is valid on a subtarget, set the corresponding
-  // bit from validSubTargets.
-  // By default, instruction is valid on all subtargets.
-  SubTarget validSubTargets = HasAnySubT;
-  let TSFlags{40-35} = validSubTargets.Value;
-
   // Addressing mode for load/store instructions.
   AddrModeType addrMode = NoAddrMode;
   let TSFlags{43-41} = addrMode.Value;
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/lib/Target/Hexagon/HexagonInstrFormatsV60.td
index 1c46ae7..b913727 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV60.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV60.td
@@ -15,8 +15,6 @@
 //                         Instruction Classes Definitions +
 //----------------------------------------------------------------------------//
 
-let validSubTargets = HasV60SubT in
-{
 class CVI_VA_Resource<dag outs, dag ins, string asmstr,
                        list<dag> pattern = [], string cstr = "",
                        InstrItinClass itin = CVI_VA>
@@ -184,10 +182,7 @@ class CVI_HIST_Resource<dag outs, dag ins, string asmstr,
                         InstrItinClass itin = CVI_HIST>
    : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>,
      OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
-}
 
-let validSubTargets = HasV60SubT in
-{
 class CVI_VA_Resource1<dag outs, dag ins, string asmstr,
                        list<dag> pattern = [], string cstr = "",
                        InstrItinClass itin = CVI_VA>
@@ -205,6 +200,3 @@ class CVI_HIST_Resource1<dag outs, dag ins, string asmstr,
                         InstrItinClass itin = CVI_HIST>
    : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>,
      Requires<[HasV60T, UseHVX]>;
-}
-
-
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index b265a88..852bfb1 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -869,6 +869,9 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
   unsigned KillFlag = getKillRegState(isKill);
+  bool HasAlloca = MFI.hasVarSizedObjects();
+  const auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  const HexagonFrameLowering &HFI = *HST.getFrameLowering();
 
   MachineMemOperand *MMO = MF.getMachineMemOperand(
       MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
@@ -899,24 +902,36 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
       .addFrameIndex(FI).addImm(0)
       .addReg(SrcReg, KillFlag).addMemOperand(MMO);
   } else if (Hexagon::VectorRegs128BRegClass.hasSubClassEq(RC)) {
+    // If there are variable-sized objects, spills will not be aligned.
+    if (HasAlloca)
+      Align = HFI.getStackAlignment();
     unsigned Opc = Align < 128 ? Hexagon::V6_vS32Ub_ai_128B
                                : Hexagon::V6_vS32b_ai_128B;
     BuildMI(MBB, I, DL, get(Opc))
       .addFrameIndex(FI).addImm(0)
       .addReg(SrcReg, KillFlag).addMemOperand(MMO);
   } else if (Hexagon::VectorRegsRegClass.hasSubClassEq(RC)) {
+    // If there are variable-sized objects, spills will not be aligned.
+    if (HasAlloca)
+      Align = HFI.getStackAlignment();
     unsigned Opc = Align < 64 ? Hexagon::V6_vS32Ub_ai
                               : Hexagon::V6_vS32b_ai;
     BuildMI(MBB, I, DL, get(Opc))
       .addFrameIndex(FI).addImm(0)
       .addReg(SrcReg, KillFlag).addMemOperand(MMO);
   } else if (Hexagon::VecDblRegsRegClass.hasSubClassEq(RC)) {
+    // If there are variable-sized objects, spills will not be aligned.
+    if (HasAlloca)
+      Align = HFI.getStackAlignment();
     unsigned Opc = Align < 64 ? Hexagon::PS_vstorerwu_ai
                               : Hexagon::PS_vstorerw_ai;
     BuildMI(MBB, I, DL, get(Opc))
       .addFrameIndex(FI).addImm(0)
       .addReg(SrcReg, KillFlag).addMemOperand(MMO);
   } else if (Hexagon::VecDblRegs128BRegClass.hasSubClassEq(RC)) {
+    // If there are variable-sized objects, spills will not be aligned.
+    if (HasAlloca)
+      Align = HFI.getStackAlignment();
     unsigned Opc = Align < 128 ? Hexagon::PS_vstorerwu_ai_128B
                                : Hexagon::PS_vstorerw_ai_128B;
     BuildMI(MBB, I, DL, get(Opc))
@@ -935,6 +950,9 @@ void HexagonInstrInfo::loadRegFromStackSlot(
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
+  bool HasAlloca = MFI.hasVarSizedObjects();
+  const auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  const HexagonFrameLowering &HFI = *HST.getFrameLowering();
 
   MachineMemOperand *MMO = MF.getMachineMemOperand(
       MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
@@ -959,21 +977,33 @@ void HexagonInstrInfo::loadRegFromStackSlot(
     BuildMI(MBB, I, DL, get(Hexagon::PS_vloadrq_ai), DestReg)
       .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
   } else if (Hexagon::VecDblRegs128BRegClass.hasSubClassEq(RC)) {
+    // If there are variable-sized objects, spills will not be aligned.
+    if (HasAlloca)
+      Align = HFI.getStackAlignment();
     unsigned Opc = Align < 128 ? Hexagon::PS_vloadrwu_ai_128B
                                : Hexagon::PS_vloadrw_ai_128B;
     BuildMI(MBB, I, DL, get(Opc), DestReg)
       .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
   } else if (Hexagon::VectorRegs128BRegClass.hasSubClassEq(RC)) {
+    // If there are variable-sized objects, spills will not be aligned.
+    if (HasAlloca)
+      Align = HFI.getStackAlignment();
     unsigned Opc = Align < 128 ? Hexagon::V6_vL32Ub_ai_128B
                                : Hexagon::V6_vL32b_ai_128B;
     BuildMI(MBB, I, DL, get(Opc), DestReg)
       .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
   } else if (Hexagon::VectorRegsRegClass.hasSubClassEq(RC)) {
+    // If there are variable-sized objects, spills will not be aligned.
+    if (HasAlloca)
+      Align = HFI.getStackAlignment();
     unsigned Opc = Align < 64 ? Hexagon::V6_vL32Ub_ai
                               : Hexagon::V6_vL32b_ai;
     BuildMI(MBB, I, DL, get(Opc), DestReg)
       .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
   } else if (Hexagon::VecDblRegsRegClass.hasSubClassEq(RC)) {
+    // If there are variable-sized objects, spills will not be aligned.
+    if (HasAlloca)
+      Align = HFI.getStackAlignment();
     unsigned Opc = Align < 64 ? Hexagon::PS_vloadrwu_ai
                               : Hexagon::PS_vloadrw_ai;
     BuildMI(MBB, I, DL, get(Opc), DestReg)
@@ -1110,8 +1140,9 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
       MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpc),
                                      HRI.getSubReg(DstReg, Hexagon::vsub_lo))
-                                 .add(MI.getOperand(1))
-                                 .addImm(MI.getOperand(2).getImm());
+              .add(MI.getOperand(1))
+              .addImm(MI.getOperand(2).getImm())
+              .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
       MI1New->getOperand(1).setIsKill(false);
       BuildMI(MBB, MI, DL, get(NewOpc), HRI.getSubReg(DstReg, Hexagon::vsub_hi))
           .add(MI.getOperand(1))
@@ -1940,7 +1971,7 @@ bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const {
   case Hexagon::L4_return_fnew_pnt :
   case Hexagon::L4_return_tnew_pt :
   case Hexagon::L4_return_fnew_pt :
-   return true;
+    return true;
   }
   return false;
 }
@@ -1967,12 +1998,12 @@ bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI,
       if (RegA == RegB)
         return true;
 
-      if (Hexagon::DoubleRegsRegClass.contains(RegA))
+      if (TargetRegisterInfo::isPhysicalRegister(RegA))
         for (MCSubRegIterator SubRegs(RegA, &HRI); SubRegs.isValid(); ++SubRegs)
           if (RegB == *SubRegs)
             return true;
 
-      if (Hexagon::DoubleRegsRegClass.contains(RegB))
+      if (TargetRegisterInfo::isPhysicalRegister(RegB))
         for (MCSubRegIterator SubRegs(RegB, &HRI); SubRegs.isValid(); ++SubRegs)
           if (RegA == *SubRegs)
             return true;
@@ -2139,7 +2170,7 @@ bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const {
 bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr &MI,
       unsigned offset) const {
   // This selection of jump instructions matches to that what
-  // AnalyzeBranch can parse, plus NVJ.
+  // analyzeBranch can parse, plus NVJ.
   if (isNewValueJump(MI)) // r9:2
     return isInt<11>(offset);
 
@@ -2666,6 +2697,7 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
   case Hexagon::L2_loadrh_io:
   case Hexagon::L2_loadruh_io:
   case Hexagon::S2_storerh_io:
+  case Hexagon::S2_storerf_io:
     return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
       (Offset <= Hexagon_MEMH_OFFSET_MAX);
 
@@ -2876,6 +2908,11 @@ bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt,
 /// \brief Can these instructions execute at the same time in a bundle.
 bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First,
       const MachineInstr &Second) const {
+  if (Second.mayStore() && First.getOpcode() == Hexagon::S2_allocframe) {
+    const MachineOperand &Op = Second.getOperand(0);
+    if (Op.isReg() && Op.isUse() && Op.getReg() == Hexagon::R29)
+      return true;
+  }
   if (DisableNVSchedule)
     return false;
   if (mayBeNewStore(Second)) {
@@ -3000,13 +3037,9 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &MI,
   MachineBasicBlock::const_instr_iterator MII = BII;
   MachineBasicBlock::const_instr_iterator MIE = MII->getParent()->instr_end();
 
-  if (!MII->isBundle()) {
+  if (!(*MII).isBundle()) {
     const MachineInstr &J = *MII;
-    if (!isV60VectorInstruction(J))
-      return false;
-    else if (isVecUsableNextPacket(J, MI))
-      return false;
-    return true;
+    return producesStall(J, MI);
   }
 
   for (++MII; MII != MIE && MII->isInsideBundle(); ++MII) {
@@ -3034,12 +3067,14 @@ bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI,
 }
 
 bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const {
-  return (Opcode == Hexagon::J2_jumpt)      ||
-         (Opcode == Hexagon::J2_jumpf)      ||
-         (Opcode == Hexagon::J2_jumptnew)   ||
-         (Opcode == Hexagon::J2_jumpfnew)   ||
-         (Opcode == Hexagon::J2_jumptnewpt) ||
-         (Opcode == Hexagon::J2_jumpfnewpt);
+  return Opcode == Hexagon::J2_jumpt      ||
+         Opcode == Hexagon::J2_jumptpt    ||
+         Opcode == Hexagon::J2_jumpf      ||
+         Opcode == Hexagon::J2_jumpfpt    ||
+         Opcode == Hexagon::J2_jumptnew   ||
+         Opcode == Hexagon::J2_jumpfnew   ||
+         Opcode == Hexagon::J2_jumptnewpt ||
+         Opcode == Hexagon::J2_jumpfnewpt;
 }
 
 bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const {
@@ -3341,9 +3376,30 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const {
   return 0;
 }
 
+// Return the regular version of the .cur instruction.
+int HexagonInstrInfo::getNonDotCurOp(const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
+  default: llvm_unreachable("Unknown .cur type");
+  case Hexagon::V6_vL32b_cur_pi:
+    return Hexagon::V6_vL32b_pi;
+  case Hexagon::V6_vL32b_cur_ai:
+    return Hexagon::V6_vL32b_ai;
+  //128B
+  case Hexagon::V6_vL32b_cur_pi_128B:
+    return Hexagon::V6_vL32b_pi_128B;
+  case Hexagon::V6_vL32b_cur_ai_128B:
+    return Hexagon::V6_vL32b_ai_128B;
+  }
+  return 0;
+}
+
+
 // The diagram below shows the steps involved in the conversion of a predicated
 // store instruction to its .new predicated new-value form.
 //
+// Note: It doesn't include conditional new-value stores as they can't be
+// converted to .new predicate.
+//
 //               p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ]
 //                ^           ^
 //               /             \ (not OK. it will cause new-value store to be
@@ -3564,11 +3620,11 @@ int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI,
 }
 
 int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const {
+  const MachineFunction &MF = *MI.getParent()->getParent();
+  const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>();
   int NewOp = MI.getOpcode();
   if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
     NewOp = Hexagon::getPredOldOpcode(NewOp);
-    const MachineFunction &MF = *MI.getParent()->getParent();
-    const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>();
     // All Hexagon architectures have prediction bits on dot-new branches,
     // but only Hexagon V60+ has prediction bits on dot-old ones. Make sure
     // to pick the right opcode when converting back to dot-old.
@@ -3596,6 +3652,21 @@ int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const {
     NewOp = Hexagon::getNonNVStore(NewOp);
     assert(NewOp >= 0 && "Couldn't change new-value store to its old form.");
   }
+
+  if (HST.hasV60TOps())
+    return NewOp;
+
+  // Subtargets prior to V60 didn't support 'taken' forms of predicated jumps.
+  switch (NewOp) {
+  case Hexagon::J2_jumpfpt:
+    return Hexagon::J2_jumpf;
+  case Hexagon::J2_jumptpt:
+    return Hexagon::J2_jumpt;
+  case Hexagon::J2_jumprfpt:
+    return Hexagon::J2_jumprf;
+  case Hexagon::J2_jumprtpt:
+    return Hexagon::J2_jumprt;
+  }
   return NewOp;
 }
 
@@ -3947,18 +4018,6 @@ short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr &MI) const {
   return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Real);
 }
 
-// Return first non-debug instruction in the basic block.
-MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB)
-      const {
-  for (auto MII = BB->instr_begin(), End = BB->instr_end(); MII != End; MII++) {
-    MachineInstr &MI = *MII;
-    if (MI.isDebugValue())
-      continue;
-    return &MI;
-  }
-  return nullptr;
-}
-
 unsigned HexagonInstrInfo::getInstrTimingClassLatency(
       const InstrItineraryData *ItinData, const MachineInstr &MI) const {
   // Default to one cycle for no itinerary. However, an "empty" itinerary may
@@ -4139,11 +4198,6 @@ unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const {
   return IS.getUnits();
 }
 
-unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const {
-  const uint64_t F = get(Opcode).TSFlags;
-  return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask;
-}
-
 // Calculate size of the basic block without debug instructions.
 unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const {
   return nonDbgMICount(BB->instr_begin(), BB->instr_end());
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index b268c7a..21b4f73 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -399,6 +399,7 @@ public:
                              const MachineInstr &GB) const;
   int getCondOpcode(int Opc, bool sense) const;
   int getDotCurOp(const MachineInstr &MI) const;
+  int getNonDotCurOp(const MachineInstr &MI) const;
   int getDotNewOp(const MachineInstr &MI) const;
   int getDotNewPredJumpOp(const MachineInstr &MI,
                           const MachineBranchProbabilityInfo *MBPI) const;
@@ -424,7 +425,6 @@ public:
   unsigned getSize(const MachineInstr &MI) const;
   uint64_t getType(const MachineInstr &MI) const;
   unsigned getUnits(const MachineInstr &MI) const;
-  unsigned getValidSubTargets(const unsigned Opcode) const;
 
   /// getInstrTimingClassLatency - Compute the instruction latency of a given
   /// instruction using Timing Class information, if available.
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
index 7189b5a..072501d 100644
--- a/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -39,7 +39,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
   // Populate the relocation type based on Hexagon target flags
   // set on an operand
   MCSymbolRefExpr::VariantKind RelocationType;
-  switch (MO.getTargetFlags()) {
+  switch (MO.getTargetFlags() & ~HexagonII::HMOTF_ConstExtended) {
   default:
     RelocationType = MCSymbolRefExpr::VK_None;
     break;
diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td
index f87a1b8..f80e0ef 100644
--- a/lib/Target/Hexagon/HexagonOperands.td
+++ b/lib/Target/Hexagon/HexagonOperands.td
@@ -14,8 +14,8 @@ def f64Imm : Operand<f64> { let ParserMatchClass = f64ImmOperand; }
 def s8_0Imm64Pred  : PatLeaf<(i64 imm), [{ return isInt<8>(N->getSExtValue()); }]>;
 def s9_0ImmOperand : AsmOperandClass { let Name = "s9_0Imm"; }
 def s9_0Imm : Operand<i32> { let ParserMatchClass = s9_0ImmOperand; }
-def s23_2ImmOperand : AsmOperandClass { let Name = "s23_2Imm"; let RenderMethod = "addSignedImmOperands"; }
-def s23_2Imm : Operand<i32> { let ParserMatchClass = s23_2ImmOperand; }
+def s27_2ImmOperand : AsmOperandClass { let Name = "s27_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s27_2Imm : Operand<i32> { let ParserMatchClass = s27_2ImmOperand; }
 def r32_0ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<32>(v);
diff --git a/lib/Target/Hexagon/HexagonPseudo.td b/lib/Target/Hexagon/HexagonPseudo.td
index 5a720e7..2e8def5 100644
--- a/lib/Target/Hexagon/HexagonPseudo.td
+++ b/lib/Target/Hexagon/HexagonPseudo.td
@@ -14,8 +14,11 @@ let PrintMethod = "printGlobalOperand" in {
 
 let isPseudo = 1 in {
 let isCodeGenOnly = 0 in
-def A2_iconst : Pseudo<(outs IntRegs:$Rd32), (ins s23_2Imm:$Ii), "${Rd32}=iconst(#${Ii})">;
-def DUPLEX_Pseudo : InstHexagon<(outs), (ins s32_0Imm:$offset), "DUPLEX", [], "", DUPLEX, TypePSEUDO>;
+def A2_iconst : Pseudo<(outs IntRegs:$Rd32),
+    (ins s27_2Imm:$Ii), "${Rd32}=iconst(#${Ii})">;
+
+def DUPLEX_Pseudo : InstHexagon<(outs),
+    (ins s32_0Imm:$offset), "DUPLEX", [], "", DUPLEX, TypePSEUDO>;
 }
 
 let isExtendable = 1, opExtendable = 1, opExtentBits = 6,
@@ -321,7 +324,7 @@ def LDriw_mod : LDInst<(outs ModRegs:$dst),
 
 // Vector load
 let Predicates = [HasV60T, UseHVX] in
-let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+let mayLoad = 1, hasSideEffects = 0 in
   class V6_LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
                   string cstr = "", InstrItinClass itin = CVI_VM_LD,
                   IType type = TypeCVI_VM_LD>
@@ -329,7 +332,7 @@ let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
 
 // Vector store
 let Predicates = [HasV60T, UseHVX] in
-let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+let mayStore = 1, hasSideEffects = 0 in
 class V6_STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
                 string cstr = "", InstrItinClass itin = CVI_VM_ST,
                 IType type = TypeCVI_VM_ST>
@@ -415,7 +418,7 @@ let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in {
 
 // Vector load/store pseudos
 
-let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in
+let isPseudo = 1, isCodeGenOnly = 1 in
 class STrivv_template<RegisterClass RC>
   : V6_STInst<(outs), (ins IntRegs:$addr, s32_0Imm:$off, RC:$src), "", []>;
 
@@ -429,7 +432,7 @@ def PS_vstorerwu_ai_128B: STrivv_template<VecDblRegs128B>,
       Requires<[HasV60T,UseHVXDbl]>;
 
 
-let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in
+let isPseudo = 1, isCodeGenOnly = 1 in
 class LDrivv_template<RegisterClass RC>
   : V6_LDInst<(outs RC:$dst), (ins IntRegs:$addr, s32_0Imm:$off), "", []>;
 
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 3a789a5..bf1dce6 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -356,7 +356,7 @@ void HexagonPacketizerList::cleanUpDotCur() {
   MachineInstr *MI = nullptr;
   for (auto BI : CurrentPacketMIs) {
     DEBUG(dbgs() << "Cleanup packet has "; BI->dump(););
-    if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) {
+    if (HII->isDotCurInst(*BI)) {
       MI = BI;
       continue;
     }
@@ -369,7 +369,7 @@ void HexagonPacketizerList::cleanUpDotCur() {
   if (!MI)
     return;
   // We did not find a use of the CUR, so de-cur it.
-  MI->setDesc(HII->get(Hexagon::V6_vL32b_ai));
+  MI->setDesc(HII->get(HII->getNonDotCurOp(*MI)));
   DEBUG(dbgs() << "Demoted CUR "; MI->dump(););
 }
 
@@ -1579,14 +1579,13 @@ MachineBasicBlock::iterator
 HexagonPacketizerList::addToPacket(MachineInstr &MI) {
   MachineBasicBlock::iterator MII = MI.getIterator();
   MachineBasicBlock *MBB = MI.getParent();
-  if (MI.isImplicitDef()) {
-    unsigned R = MI.getOperand(0).getReg();
-    if (Hexagon::IntRegsRegClass.contains(R)) {
-      MCSuperRegIterator S(R, HRI, false);
-      MI.addOperand(MachineOperand::CreateReg(*S, true, true));
-    }
+
+  if (CurrentPacketMIs.size() == 0)
+    PacketStalls = false;
+  PacketStalls |= producesStall(MI);
+
+  if (MI.isImplicitDef())
     return MII;
-  }
   assert(ResourceTracker->canReserveResources(MI));
 
   bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI);
@@ -1677,6 +1676,11 @@ static bool isDependent(const MachineInstr &ProdMI,
 
 // V60 forward scheduling.
 bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
+  // If the packet already stalls, then ignore the stall from a subsequent
+  // instruction in the same packet.
+  if (PacketStalls)
+    return false;
+
   // Check whether the previous packet is in a different loop. If this is the
   // case, there is little point in trying to avoid a stall because that would
   // favor the rare case (loop entry) over the common case (loop iteration).
@@ -1699,6 +1703,7 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
       if (isDependent(*J, I) && !HII->isVecUsableNextPacket(*J, I))
         return true;
     }
+
     return false;
   }
 
@@ -1721,6 +1726,16 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
     }
   }
 
+  // Check if the latency is greater than one between this instruction and any
+  // instruction in the previous packet.
+  SUnit *SUI = MIToSUnit[const_cast<MachineInstr *>(&I)];
+  for (auto J : OldPacketMIs) {
+    SUnit *SUJ = MIToSUnit[J];
+    for (auto &Pred : SUI->Preds)
+      if (Pred.getSUnit() == SUJ && Pred.getLatency() > 1)
+        return true;
+  }
+
   return false;
 }
 
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h
index 3f28dc5..adb92b6 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.h
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -34,6 +34,10 @@ class HexagonPacketizerList : public VLIWPacketizerList {
   // Track MIs with ignored dependence.
   std::vector<MachineInstr*> IgnoreDepMIs;
 
+  // Set to true if the packet contains an instruction that stalls with an
+  // instruction from the previous packet.
+  bool PacketStalls = false;
+
 protected:
   /// \brief A handle to the branch probability pass.
   const MachineBranchProbabilityInfo *MBPI;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index c3b6eb1..9044035 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -184,7 +184,11 @@ public:
       { "fixup_Hexagon_IE_GOT_11_X",    0,      32,     0 },
       { "fixup_Hexagon_TPREL_32_6_X",   0,      32,     0 },
       { "fixup_Hexagon_TPREL_16_X",     0,      32,     0 },
-      { "fixup_Hexagon_TPREL_11_X",     0,      32,     0 }
+      { "fixup_Hexagon_TPREL_11_X",     0,      32,     0 },
+      { "fixup_Hexagon_GD_PLT_B22_PCREL_X",0,     32,     MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_Hexagon_GD_PLT_B32_PCREL_X",0,     32,     MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_Hexagon_LD_PLT_B22_PCREL_X",0,     32,     MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_Hexagon_LD_PLT_B32_PCREL_X",0,     32,     MCFixupKindInfo::FKF_IsPCRel }
     };
 
     if (Kind < FirstTargetFixupKind)
@@ -291,6 +295,11 @@ public:
       case fixup_Hexagon_32_PCREL:
       case fixup_Hexagon_6_PCREL_X:
       case fixup_Hexagon_23_REG:
+      case fixup_Hexagon_27_REG:
+      case fixup_Hexagon_GD_PLT_B22_PCREL_X:
+      case fixup_Hexagon_GD_PLT_B32_PCREL_X:
+      case fixup_Hexagon_LD_PLT_B22_PCREL_X:
+      case fixup_Hexagon_LD_PLT_B32_PCREL_X:
         // These relocations should always have a relocation recorded
         IsResolved = false;
         return;
@@ -347,6 +356,8 @@ public:
       case fixup_Hexagon_B9_PCREL_X:
       case fixup_Hexagon_B7_PCREL:
       case fixup_Hexagon_B7_PCREL_X:
+      case fixup_Hexagon_GD_PLT_B32_PCREL_X:
+      case fixup_Hexagon_LD_PLT_B32_PCREL_X:
         return 4;
     }
   }
@@ -374,6 +385,8 @@ public:
         break;
 
       case fixup_Hexagon_B32_PCREL_X:
+      case fixup_Hexagon_GD_PLT_B32_PCREL_X:
+      case fixup_Hexagon_LD_PLT_B32_PCREL_X:
         Value >>= 6;
         break;
     }
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index 53d8b04..adb546d 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -128,10 +128,6 @@ namespace HexagonII {
     ExtentAlignPos  = 33,
     ExtentAlignMask = 0x3,
 
-    // Valid subtargets
-    validSubTargetPos  = 35,
-    validSubTargetMask = 0x3f,
-
     // Addressing mode for load/store instructions.
     AddrModePos  = 41,
     AddrModeMask = 0x7,
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index 944e235..b975e31 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -284,6 +284,16 @@ unsigned HexagonELFObjectWriter::getRelocType(MCContext &Ctx,
     return ELF::R_HEX_TPREL_11_X;
   case fixup_Hexagon_23_REG:
     return ELF::R_HEX_23_REG;
+  case fixup_Hexagon_27_REG:
+    return ELF::R_HEX_27_REG;
+  case fixup_Hexagon_GD_PLT_B22_PCREL_X:
+    return ELF::R_HEX_GD_PLT_B22_PCREL_X;
+  case fixup_Hexagon_GD_PLT_B32_PCREL_X:
+    return ELF::R_HEX_GD_PLT_B32_PCREL_X;
+  case fixup_Hexagon_LD_PLT_B22_PCREL_X:
+    return ELF::R_HEX_LD_PLT_B22_PCREL_X;
+  case fixup_Hexagon_LD_PLT_B32_PCREL_X:
+    return ELF::R_HEX_LD_PLT_B32_PCREL_X;
   }
 }
 
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h b/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h
index 4c97ebb..3473276 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h
@@ -111,6 +111,11 @@ enum Fixups {
   fixup_Hexagon_TPREL_16_X,
   fixup_Hexagon_TPREL_11_X,
   fixup_Hexagon_23_REG,
+  fixup_Hexagon_27_REG,
+  fixup_Hexagon_GD_PLT_B22_PCREL_X,
+  fixup_Hexagon_GD_PLT_B32_PCREL_X,
+  fixup_Hexagon_LD_PLT_B22_PCREL_X,
+  fixup_Hexagon_LD_PLT_B32_PCREL_X,
 
   LastTargetFixupKind,
   NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
index 33d73f1..3bb658b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -113,7 +113,7 @@ void HexagonMCChecker::init(MCInst const &MCI) {
         // The instruction table models the USR.OVF flag, which can be
         // implicitly modified more than once, but cannot be modified in the
         // same packet with an instruction that modifies is explicitly. Deal
-        // with such situ- ations individually.
+        // with such situations individually.
         SoftDefs.insert(R);
       else if (isPredicateRegister(R) &&
                HexagonMCInstrInfo::isPredicateLate(MCII, MCI))
@@ -159,12 +159,6 @@ void HexagonMCChecker::init(MCInst const &MCI) {
                isPredicateRegister(*SRI))
         // Some insns produce predicates too late to be used in the same packet.
         LatePreds.insert(*SRI);
-      else if (i == 0 && HexagonMCInstrInfo::isCVINew(MCII, MCI) &&
-               MCID.mayLoad())
-        // Current loads should be used in the same packet.
-        // TODO: relies on the impossibility of a current and a temporary loads
-        // in the same packet.
-        CurDefs.insert(*SRI), Defs[*SRI].insert(PredSense(PredReg, isTrue));
       else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) ==
                              HexagonII::TypeCVI_VM_TMP_LD)
         // Temporary loads should be used in the same packet, but don't commit
@@ -202,9 +196,8 @@ void HexagonMCChecker::init(MCInst const &MCI) {
     if (HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) {
       unsigned R2 = HexagonMCInstrInfo::getNewValueOperand2(MCII, MCI).getReg();
 
-      for (MCRegAliasIterator SRI(R2, &RI,
-                                  !MCSubRegIterator(R2, &RI).isValid());
-           SRI.isValid(); ++SRI)
+      bool HasSubRegs = MCSubRegIterator(R2, &RI).isValid();
+      for (MCRegAliasIterator SRI(R2, &RI, !HasSubRegs); SRI.isValid(); ++SRI)
         if (!MCSubRegIterator(*SRI, &RI).isValid())
           NewDefs[*SRI].push_back(NewSense::Def(
               PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI),
@@ -252,6 +245,8 @@ bool HexagonMCChecker::check(bool FullCheck) {
   bool chkNV = checkNewValues();
   bool chkR = checkRegisters();
   bool chkRRO = checkRegistersReadOnly();
+  bool chkELB = checkEndloopBranches();
+  checkRegisterCurDefs();
   bool chkS = checkSolo();
   bool chkSh = true;
   if (FullCheck)
@@ -259,11 +254,106 @@ bool HexagonMCChecker::check(bool FullCheck) {
   bool chkSl = true;
   if (FullCheck)
     chkSl = checkSlots();
-  bool chk = chkB && chkP && chkNV && chkR && chkRRO && chkS && chkSh && chkSl;
+  bool chkAXOK = checkAXOK();
+  bool chk = chkB && chkP && chkNV && chkR && chkRRO && chkELB && chkS &&
+             chkSh && chkSl && chkAXOK;
 
   return chk;
 }
 
+bool HexagonMCChecker::checkEndloopBranches() {
+  for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) {
+    MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I);
+    if (Desc.isBranch() || Desc.isCall()) {
+      auto Inner = HexagonMCInstrInfo::isInnerLoop(MCB);
+      if (Inner || HexagonMCInstrInfo::isOuterLoop(MCB)) {
+        reportError(I.getLoc(),
+                    llvm::Twine("packet marked with `:endloop") +
+                        (Inner ? "0" : "1") + "' " +
+                        "cannot contain instructions that modify register " +
+                        "`" + llvm::Twine(RI.getName(Hexagon::PC)) + "'");
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+namespace {
+bool isDuplexAGroup(unsigned Opcode) {
+  switch (Opcode) {
+  case Hexagon::SA1_addi:
+  case Hexagon::SA1_addrx:
+  case Hexagon::SA1_addsp:
+  case Hexagon::SA1_and1:
+  case Hexagon::SA1_clrf:
+  case Hexagon::SA1_clrfnew:
+  case Hexagon::SA1_clrt:
+  case Hexagon::SA1_clrtnew:
+  case Hexagon::SA1_cmpeqi:
+  case Hexagon::SA1_combine0i:
+  case Hexagon::SA1_combine1i:
+  case Hexagon::SA1_combine2i:
+  case Hexagon::SA1_combine3i:
+  case Hexagon::SA1_combinerz:
+  case Hexagon::SA1_combinezr:
+  case Hexagon::SA1_dec:
+  case Hexagon::SA1_inc:
+  case Hexagon::SA1_seti:
+  case Hexagon::SA1_setin1:
+  case Hexagon::SA1_sxtb:
+  case Hexagon::SA1_sxth:
+  case Hexagon::SA1_tfr:
+  case Hexagon::SA1_zxtb:
+  case Hexagon::SA1_zxth:
+    return true;
+    break;
+  default:
+    return false;
+  }
+}
+
+bool isNeitherAnorX(MCInstrInfo const &MCII, MCInst const &ID) {
+  unsigned Result = 0;
+  unsigned Type = HexagonMCInstrInfo::getType(MCII, ID);
+  if (Type == HexagonII::TypeDUPLEX) {
+    unsigned subInst0Opcode = ID.getOperand(0).getInst()->getOpcode();
+    unsigned subInst1Opcode = ID.getOperand(1).getInst()->getOpcode();
+    Result += !isDuplexAGroup(subInst0Opcode);
+    Result += !isDuplexAGroup(subInst1Opcode);
+  } else
+    Result +=
+        Type != HexagonII::TypeALU32_2op && Type != HexagonII::TypeALU32_3op &&
+        Type != HexagonII::TypeALU32_ADDI && Type != HexagonII::TypeS_2op &&
+        Type != HexagonII::TypeS_3op &&
+        (Type != HexagonII::TypeALU64 || HexagonMCInstrInfo::isFloat(MCII, ID));
+  return Result != 0;
+}
+} // namespace
+
+bool HexagonMCChecker::checkAXOK() {
+  MCInst const *HasSoloAXInst = nullptr;
+  for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) {
+    if (HexagonMCInstrInfo::isSoloAX(MCII, I)) {
+      HasSoloAXInst = &I;
+    }
+  }
+  if (!HasSoloAXInst)
+    return true;
+  for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) {
+    if (&I != HasSoloAXInst && isNeitherAnorX(MCII, I)) {
+      reportError(
+          HasSoloAXInst->getLoc(),
+          llvm::Twine("Instruction can only be in a packet with ALU or "
+                      "non-FPU XTYPE instructions"));
+      reportError(I.getLoc(),
+                  llvm::Twine("Not an ALU or non-FPU XTYPE instruction"));
+      return false;
+    }
+  }
+  return true;
+}
+
 bool HexagonMCChecker::checkSlots() {
   unsigned slotsUsed = 0;
   for (auto HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) {
@@ -309,16 +399,6 @@ bool HexagonMCChecker::checkBranches() {
       }
     }
 
-    if (Branches) // FIXME: should "Defs.count(Hexagon::PC)" be here too?
-      if (HexagonMCInstrInfo::isInnerLoop(MCB) ||
-          HexagonMCInstrInfo::isOuterLoop(MCB)) {
-        // Error out if there's any branch in a loop-end packet.
-        Twine N(HexagonMCInstrInfo::isInnerLoop(MCB) ? '0' : '1');
-        reportError("packet marked with `:endloop" + N + "' " +
-                    "cannot contain instructions that modify register " + "`" +
-                    llvm::Twine(RI.getName(Hexagon::PC)) + "'");
-        return false;
-      }
     if (Branches > 1)
       if (!hasConditional || Conditional > Unconditional) {
         // Error out if more than one unconditional branch or
@@ -396,6 +476,31 @@ bool HexagonMCChecker::checkRegistersReadOnly() {
   return true;
 }
 
+bool HexagonMCChecker::registerUsed(unsigned Register) {
+  for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB))
+    for (unsigned j = HexagonMCInstrInfo::getDesc(MCII, I).getNumDefs(),
+                  n = I.getNumOperands();
+         j < n; ++j) {
+      MCOperand const &Operand = I.getOperand(j);
+      if (Operand.isReg() && Operand.getReg() == Register)
+        return true;
+    }
+  return false;
+}
+
+void HexagonMCChecker::checkRegisterCurDefs() {
+  for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) {
+    if (HexagonMCInstrInfo::isCVINew(MCII, I) &&
+        HexagonMCInstrInfo::getDesc(MCII, I).mayLoad()) {
+      unsigned Register = I.getOperand(0).getReg();
+      if (!registerUsed(Register))
+        reportWarning("Register `" + llvm::Twine(RI.getName(Register)) +
+                      "' used with `.cur' "
+                      "but not used in the same packet");
+    }
+  }
+}
+
 // Check for legal register uses and definitions.
 bool HexagonMCChecker::checkRegisters() {
   // Check for proper register definitions.
@@ -447,8 +552,7 @@ bool HexagonMCChecker::checkRegisters() {
         if (PM.count(P) && PM.size() > 2) {
           // Error out on conditional changes based on the same predicate
           // multiple times
-          // (e.g., "{ if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =...
-          // }").
+          // (e.g., "if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =...").
           reportErrorRegisters(R);
           return false;
         }
@@ -456,19 +560,6 @@ bool HexagonMCChecker::checkRegisters() {
     }
   }
 
-  // Check for use of current definitions.
-  for (const auto &I : CurDefs) {
-    unsigned R = I;
-
-    if (!Uses.count(R)) {
-      // Warn on an unused current definition.
-      reportWarning("register `" + llvm::Twine(RI.getName(R)) +
-                    "' used with `.cur' "
-                    "but not used in the same packet");
-      return true;
-    }
-  }
-
   // Check for use of temporary definitions.
   for (const auto &I : TmpDefs) {
     unsigned R = I;
@@ -499,12 +590,11 @@ bool HexagonMCChecker::checkRegisters() {
 // Check for legal use of solo insns.
 bool HexagonMCChecker::checkSolo() {
   if (HexagonMCInstrInfo::bundleSize(MCB) > 1)
-    for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
-      if (llvm::HexagonMCInstrInfo::isSolo(MCII, *I.getInst())) {
-        SMLoc Loc = I.getInst()->getLoc();
-        reportError(Loc, "Instruction is marked `isSolo' and "
-                         "cannot have other instructions in "
-                         "the same packet");
+    for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) {
+      if (llvm::HexagonMCInstrInfo::isSolo(MCII, I)) {
+        reportError(I.getLoc(), "Instruction is marked `isSolo' and "
+                                "cannot have other instructions in "
+                                "the same packet");
         return false;
       }
     }
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
index d023869..027f78b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
@@ -78,10 +78,6 @@ class HexagonMCChecker {
   typedef std::set<unsigned>::iterator SoftDefsIterator;
   std::set<unsigned> SoftDefs;
 
-  /// Set of current definitions committed to the register file.
-  typedef std::set<unsigned>::iterator CurDefsIterator;
-  std::set<unsigned> CurDefs;
-
   /// Set of temporary definitions not committed to the register file.
   typedef std::set<unsigned>::iterator TmpDefsIterator;
   std::set<unsigned> TmpDefs;
@@ -110,15 +106,20 @@ class HexagonMCChecker {
   void init(MCInst const &);
   void initReg(MCInst const &, unsigned, unsigned &PredReg, bool &isTrue);
 
+  bool registerUsed(unsigned Register);
+
   // Checks performed.
   bool checkBranches();
   bool checkPredicates();
   bool checkNewValues();
   bool checkRegisters();
   bool checkRegistersReadOnly();
+  bool checkEndloopBranches();
+  void checkRegisterCurDefs();
   bool checkSolo();
   bool checkShuffle();
   bool checkSlots();
+  bool checkAXOK();
 
   static void compoundRegisterMap(unsigned &);
 
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index c095652..dfb5f4c 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -199,6 +199,11 @@ Hexagon::Fixups HexagonMCCodeEmitter::getFixupNoBits(
       return Hexagon::fixup_Hexagon_IE_GOT_32_6_X;
     case MCSymbolRefExpr::VK_Hexagon_PCREL:
       return Hexagon::fixup_Hexagon_B32_PCREL_X;
+    case MCSymbolRefExpr::VK_Hexagon_GD_PLT:
+      return Hexagon::fixup_Hexagon_GD_PLT_B32_PCREL_X;
+    case MCSymbolRefExpr::VK_Hexagon_LD_PLT:
+      return Hexagon::fixup_Hexagon_LD_PLT_B32_PCREL_X;
+
     case MCSymbolRefExpr::VK_None: {
       auto Insts = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
       for (auto I = Insts.begin(), N = Insts.end(); I != N; ++I) {
@@ -318,6 +323,8 @@ namespace {
     case fixup_Hexagon_PLT_B22_PCREL:
     case fixup_Hexagon_GD_PLT_B22_PCREL:
     case fixup_Hexagon_LD_PLT_B22_PCREL:
+    case fixup_Hexagon_GD_PLT_B22_PCREL_X:
+    case fixup_Hexagon_LD_PLT_B22_PCREL_X:
     case fixup_Hexagon_6_PCREL_X:
       return true;
     default:
@@ -414,10 +421,12 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
   case 22:
     switch (kind) {
     case MCSymbolRefExpr::VK_Hexagon_GD_PLT:
-      FixupKind = Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL;
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL_X
+                            : Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL;
       break;
     case MCSymbolRefExpr::VK_Hexagon_LD_PLT:
-      FixupKind = Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL;
+      FixupKind = *Extended ? Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL_X
+                            : Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL;
       break;
     case MCSymbolRefExpr::VK_None:
       FixupKind = *Extended ? Hexagon::fixup_Hexagon_B22_PCREL_X
@@ -467,8 +476,8 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
     } else
       switch (kind) {
       case MCSymbolRefExpr::VK_None: {
-        if (HexagonMCInstrInfo::s23_2_reloc(*MO.getExpr()))
-          FixupKind = Hexagon::fixup_Hexagon_23_REG;
+        if (HexagonMCInstrInfo::s27_2_reloc(*MO.getExpr()))
+          FixupKind = Hexagon::fixup_Hexagon_27_REG;
         else
           if (MCID.mayStore() || MCID.mayLoad()) {
             for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses;
@@ -593,6 +602,12 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
       case MCSymbolRefExpr::VK_Hexagon_LD_GOT:
         FixupKind = Hexagon::fixup_Hexagon_LD_GOT_11_X;
         break;
+      case MCSymbolRefExpr::VK_Hexagon_GD_PLT:
+        FixupKind = Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL_X;
+        break;
+      case MCSymbolRefExpr::VK_Hexagon_LD_PLT:
+        FixupKind = Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL_X;
+        break;
       case MCSymbolRefExpr::VK_None:
         FixupKind = Hexagon::fixup_Hexagon_11_X;
         break;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
index 14300ed..9fbe299 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
@@ -94,9 +94,9 @@ void HexagonMCExpr::setMustNotExtend(bool Val) {
 }
 bool HexagonMCExpr::mustNotExtend() const { return MustNotExtend; }
 
-bool HexagonMCExpr::s23_2_reloc() const { return S23_2_reloc; }
-void HexagonMCExpr::setS23_2_reloc(bool Val) {
-  S23_2_reloc = Val;
+bool HexagonMCExpr::s27_2_reloc() const { return S27_2_reloc; }
+void HexagonMCExpr::setS27_2_reloc(bool Val) {
+  S27_2_reloc = Val;
 }
 
 bool HexagonMCExpr::classof(MCExpr const *E) {
@@ -104,7 +104,7 @@ bool HexagonMCExpr::classof(MCExpr const *E) {
 }
 
 HexagonMCExpr::HexagonMCExpr(MCExpr const *Expr)
-    : Expr(Expr), MustNotExtend(false), MustExtend(false), S23_2_reloc(false),
+    : Expr(Expr), MustNotExtend(false), MustExtend(false), S27_2_reloc(false),
       SignMismatch(false) {}
 
 void HexagonMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
index bca40cf..acfd996 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
@@ -29,8 +29,8 @@ public:
   bool mustExtend() const;
   void setMustNotExtend(bool Val = true);
   bool mustNotExtend() const;
-  void setS23_2_reloc(bool Val = true);
-  bool s23_2_reloc() const;
+  void setS27_2_reloc(bool Val = true);
+  bool s27_2_reloc() const;
   void setSignMismatch(bool Val = true);
   bool signMismatch() const;
 
@@ -39,7 +39,7 @@ private:
   MCExpr const *Expr;
   bool MustNotExtend;
   bool MustExtend;
-  bool S23_2_reloc;
+  bool S27_2_reloc;
   bool SignMismatch;
 };
 } // end namespace llvm
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index 30a811a..5fe638a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -22,6 +22,49 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 
 namespace llvm {
+
+Hexagon::PacketIterator::PacketIterator(MCInstrInfo const &MCII,
+                                        MCInst const &Inst)
+    : MCII(MCII), BundleCurrent(Inst.begin() +
+                                HexagonMCInstrInfo::bundleInstructionsOffset),
+      BundleEnd(Inst.end()), DuplexCurrent(Inst.end()), DuplexEnd(Inst.end()) {}
+
+Hexagon::PacketIterator::PacketIterator(MCInstrInfo const &MCII,
+                                        MCInst const &Inst, std::nullptr_t)
+    : MCII(MCII), BundleCurrent(Inst.end()), BundleEnd(Inst.end()),
+      DuplexCurrent(Inst.end()), DuplexEnd(Inst.end()) {}
+
+Hexagon::PacketIterator &Hexagon::PacketIterator::operator++() {
+  if (DuplexCurrent != DuplexEnd) {
+    ++DuplexCurrent;
+    if (DuplexCurrent == DuplexEnd) {
+      DuplexCurrent = BundleEnd;
+      DuplexEnd = BundleEnd;
+    }
+    return *this;
+  }
+  ++BundleCurrent;
+  if (BundleCurrent != BundleEnd) {
+    MCInst const &Inst = *BundleCurrent->getInst();
+    if (HexagonMCInstrInfo::isDuplex(MCII, Inst)) {
+      DuplexCurrent = Inst.begin();
+      DuplexEnd = Inst.end();
+    }
+  }
+  return *this;
+}
+
+MCInst const &Hexagon::PacketIterator::operator*() const {
+  if (DuplexCurrent != DuplexEnd)
+    return *DuplexCurrent->getInst();
+  return *BundleCurrent->getInst();
+}
+
+bool Hexagon::PacketIterator::operator==(PacketIterator const &Other) const {
+  return BundleCurrent == Other.BundleCurrent && BundleEnd == Other.BundleEnd &&
+         DuplexCurrent == Other.DuplexCurrent && DuplexEnd == Other.DuplexEnd;
+}
+
 void HexagonMCInstrInfo::addConstant(MCInst &MI, uint64_t Value,
                                      MCContext &Context) {
   MI.addOperand(MCOperand::createExpr(MCConstantExpr::create(Value, Context)));
@@ -41,6 +84,14 @@ void HexagonMCInstrInfo::addConstExtender(MCContext &Context,
   MCB.addOperand(MCOperand::createInst(XMCI));
 }
 
+iterator_range<Hexagon::PacketIterator>
+HexagonMCInstrInfo::bundleInstructions(MCInstrInfo const &MCII,
+                                       MCInst const &MCI) {
+  assert(isBundle(MCI));
+  return make_range(Hexagon::PacketIterator(MCII, MCI),
+                    Hexagon::PacketIterator(MCII, MCI, nullptr));
+}
+
 iterator_range<MCInst::const_iterator>
 HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) {
   assert(isBundle(MCI));
@@ -292,7 +343,7 @@ int HexagonMCInstrInfo::getMinValue(MCInstrInfo const &MCII,
 }
 
 StringRef HexagonMCInstrInfo::getName(MCInstrInfo const &MCII,
-                                        MCInst const &MCI) {
+                                      MCInst const &MCI) {
   return MCII.getName(MCI.getOpcode());
 }
 
@@ -339,25 +390,6 @@ unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII,
   return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
 }
 
-int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII,
-                                     MCInst const &MCI) {
-  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
-
-  HexagonII::SubTarget Target = static_cast<HexagonII::SubTarget>(
-      (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask);
-
-  switch (Target) {
-  default:
-    return Hexagon::ArchV4;
-  case HexagonII::HasV5SubT:
-    return Hexagon::ArchV5;
-  case HexagonII::HasV55SubT:
-    return Hexagon::ArchV55;
-  case HexagonII::HasV60SubT:
-    return Hexagon::ArchV60;
-  }
-}
-
 /// Return the slots this instruction can execute out of
 unsigned HexagonMCInstrInfo::getUnits(MCInstrInfo const &MCII,
                                       MCSubtargetInfo const &STI,
@@ -397,9 +429,8 @@ bool HexagonMCInstrInfo::hasDuplex(MCInstrInfo const &MCII, MCInst const &MCI) {
   if (!HexagonMCInstrInfo::isBundle(MCI))
     return false;
 
-  for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) {
-    auto MI = I.getInst();
-    if (HexagonMCInstrInfo::isDuplex(MCII, *MI))
+  for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCI)) {
+    if (HexagonMCInstrInfo::isDuplex(MCII, I))
       return true;
   }
 
@@ -410,13 +441,12 @@ bool HexagonMCInstrInfo::hasExtenderForIndex(MCInst const &MCB, size_t Index) {
   return extenderForIndex(MCB, Index) != nullptr;
 }
 
-bool HexagonMCInstrInfo::hasImmExt(MCInst const &MCI) {
+bool HexagonMCInstrInfo::hasImmExt( MCInst const &MCI) {
   if (!HexagonMCInstrInfo::isBundle(MCI))
     return false;
 
   for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) {
-    auto MI = I.getInst();
-    if (isImmext(*MI))
+    if (isImmext(*I.getInst()))
       return true;
   }
 
@@ -737,16 +767,16 @@ bool HexagonMCInstrInfo::mustNotExtend(MCExpr const &Expr) {
   HexagonMCExpr const &HExpr = cast<HexagonMCExpr>(Expr);
   return HExpr.mustNotExtend();
 }
-void HexagonMCInstrInfo::setS23_2_reloc(MCExpr const &Expr, bool Val) {
+void HexagonMCInstrInfo::setS27_2_reloc(MCExpr const &Expr, bool Val) {
   HexagonMCExpr &HExpr =
       const_cast<HexagonMCExpr &>(*llvm::cast<HexagonMCExpr>(&Expr));
-  HExpr.setS23_2_reloc(Val);
+  HExpr.setS27_2_reloc(Val);
 }
-bool HexagonMCInstrInfo::s23_2_reloc(MCExpr const &Expr) {
+bool HexagonMCInstrInfo::s27_2_reloc(MCExpr const &Expr) {
   HexagonMCExpr const *HExpr = llvm::dyn_cast<HexagonMCExpr>(&Expr);
   if (!HExpr)
     return false;
-  return HExpr->s23_2_reloc();
+  return HExpr->s27_2_reloc();
 }
 
 void HexagonMCInstrInfo::padEndloop(MCInst &MCB, MCContext &Context) {
@@ -818,4 +848,4 @@ unsigned HexagonMCInstrInfo::SubregisterBit(unsigned Consumer,
     return 0x1;
   return 0;
 }
-}
+} // namespace llvm
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index 4d2df4d..ca44c3a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -31,6 +31,25 @@ public:
   DuplexCandidate(unsigned i, unsigned j, unsigned iClass)
       : packetIndexI(i), packetIndexJ(j), iClass(iClass) {}
 };
+namespace Hexagon {
+class PacketIterator {
+  MCInstrInfo const &MCII;
+  MCInst::const_iterator BundleCurrent;
+  MCInst::const_iterator BundleEnd;
+  MCInst::const_iterator DuplexCurrent;
+  MCInst::const_iterator DuplexEnd;
+
+public:
+  PacketIterator(MCInstrInfo const &MCII, MCInst const &Inst);
+  PacketIterator(MCInstrInfo const &MCII, MCInst const &Inst, std::nullptr_t);
+  PacketIterator &operator++();
+  MCInst const &operator*() const;
+  bool operator==(PacketIterator const &Other) const;
+  bool operator!=(PacketIterator const &Other) const {
+    return !(*this == Other);
+  }
+};
+} // namespace Hexagon
 namespace HexagonMCInstrInfo {
 size_t const innerLoopOffset = 0;
 int64_t const innerLoopMask = 1 << innerLoopOffset;
@@ -54,6 +73,8 @@ void addConstExtender(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB,
                       MCInst const &MCI);
 
 // Returns a iterator range of instructions in this bundle
+iterator_range<Hexagon::PacketIterator>
+bundleInstructions(MCInstrInfo const &MCII, MCInst const &MCI);
 iterator_range<MCInst::const_iterator> bundleInstructions(MCInst const &MCI);
 
 // Returns the number of instructions in the bundle
@@ -131,7 +152,6 @@ MCOperand const &getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI);
 unsigned short getNewValueOp2(MCInstrInfo const &MCII, MCInst const &MCI);
 MCOperand const &getNewValueOperand2(MCInstrInfo const &MCII,
                                      MCInst const &MCI);
-int getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI);
 
 // Return the Hexagon ISA class for the insn.
 unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -263,14 +283,14 @@ bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI);
 // Replace the instructions inside MCB, represented by Candidate
 void replaceDuplex(MCContext &Context, MCInst &MCI, DuplexCandidate Candidate);
 
-bool s23_2_reloc(MCExpr const &Expr);
+bool s27_2_reloc(MCExpr const &Expr);
 // Marks a bundle as endloop0
 void setInnerLoop(MCInst &MCI);
 void setMemReorderDisabled(MCInst &MCI);
 void setMemStoreReorderEnabled(MCInst &MCI);
 void setMustExtend(MCExpr const &Expr, bool Val = true);
 void setMustNotExtend(MCExpr const &Expr, bool Val = true);
-void setS23_2_reloc(MCExpr const &Expr, bool Val = true);
+void setS27_2_reloc(MCExpr const &Expr, bool Val = true);
 
 // Marks a bundle as endloop1
 void setOuterLoop(MCInst &MCI);
@@ -283,7 +303,7 @@ unsigned SubregisterBit(unsigned Consumer, unsigned Producer,
 // Attempt to find and replace compound pairs
 void tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
                  MCContext &Context, MCInst &MCI);
-}
-}
+} // namespace HexagonMCInstrInfo
+} // namespace llvm
 
 #endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index eb30346..a5afa1d 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -205,64 +205,12 @@ static struct {
 } jumpSlots[] = {{8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1}};
 #define MAX_JUMP_SLOTS (sizeof(jumpSlots) / sizeof(jumpSlots[0]))
 
-namespace {
-bool isDuplexAGroup(unsigned Opcode) {
-  switch (Opcode) {
-  case Hexagon::SA1_addi:
-  case Hexagon::SA1_addrx:
-  case Hexagon::SA1_addsp:
-  case Hexagon::SA1_and1:
-  case Hexagon::SA1_clrf:
-  case Hexagon::SA1_clrfnew:
-  case Hexagon::SA1_clrt:
-  case Hexagon::SA1_clrtnew:
-  case Hexagon::SA1_cmpeqi:
-  case Hexagon::SA1_combine0i:
-  case Hexagon::SA1_combine1i:
-  case Hexagon::SA1_combine2i:
-  case Hexagon::SA1_combine3i:
-  case Hexagon::SA1_combinerz:
-  case Hexagon::SA1_combinezr:
-  case Hexagon::SA1_dec:
-  case Hexagon::SA1_inc:
-  case Hexagon::SA1_seti:
-  case Hexagon::SA1_setin1:
-  case Hexagon::SA1_sxtb:
-  case Hexagon::SA1_sxth:
-  case Hexagon::SA1_tfr:
-  case Hexagon::SA1_zxtb:
-  case Hexagon::SA1_zxth:
-    return true;
-    break;
-  default:
-    return false;
-  }
-}
-
-unsigned countNeitherAnorX(MCInstrInfo const &MCII, MCInst const &ID) {
-  unsigned Result = 0;
-  unsigned Type = HexagonMCInstrInfo::getType(MCII, ID);
-  if (Type == HexagonII::TypeDUPLEX) {
-    unsigned subInst0Opcode = ID.getOperand(0).getInst()->getOpcode();
-    unsigned subInst1Opcode = ID.getOperand(1).getInst()->getOpcode();
-    Result += !isDuplexAGroup(subInst0Opcode);
-    Result += !isDuplexAGroup(subInst1Opcode);
-  } else
-    Result +=
-        Type != HexagonII::TypeALU32_2op && Type != HexagonII::TypeALU32_3op &&
-        Type != HexagonII::TypeALU32_ADDI && Type != HexagonII::TypeS_2op &&
-        Type != HexagonII::TypeS_3op && Type != HexagonII::TypeALU64 &&
-        (Type != HexagonII::TypeM || HexagonMCInstrInfo::isFloat(MCII, ID));
-  return Result;
-}
-} // namespace
-
 /// Check that the packet is legal and enforce relative insn order.
 bool HexagonShuffler::check() {
   // Descriptive slot masks.
   const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1, slotOne = 0x2,
                  slotThree = 0x8, // slotFirstJump = 0x8,
-      slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1;
+                 slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1;
   // Highest slots for branches and stores used to keep their original order.
   // unsigned slotJump = slotFirstJump;
   unsigned slotLoadStore = slotFirstLoadStore;
@@ -271,18 +219,12 @@ bool HexagonShuffler::check() {
   // Number of memory operations, loads, solo loads, stores, solo stores, single
   // stores.
   unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0;
-  // Number of HVX loads, HVX stores.
-  unsigned CVIloads = 0, CVIstores = 0;
-  // Number of duplex insns, solo insns.
-  unsigned duplex = 0, solo = 0;
-  // Number of insns restricting other insns in the packet to A and X types,
-  // which is neither A or X types.
-  unsigned onlyAX = 0, neitherAnorX = 0;
+  // Number of duplex insns
+  unsigned duplex = 0;
   // Number of insns restricting other insns in slot #1 to A type.
   unsigned onlyAin1 = 0;
   // Number of insns restricting any insn in slot #1, except A2_nop.
   unsigned onlyNo1 = 0;
-  unsigned xtypeFloat = 0;
   unsigned pSlot3Cnt = 0;
   unsigned nvstores = 0;
   unsigned memops = 0;
@@ -295,13 +237,8 @@ bool HexagonShuffler::check() {
   for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
     MCInst const &ID = ISJ->getDesc();
 
-    if (HexagonMCInstrInfo::isSolo(MCII, ID))
-      solo++;
-    else if (HexagonMCInstrInfo::isSoloAX(MCII, ID))
-      onlyAX++;
-    else if (HexagonMCInstrInfo::isSoloAin1(MCII, ID))
-      onlyAin1++;
-    neitherAnorX += countNeitherAnorX(MCII, ID);
+    if (HexagonMCInstrInfo::isSoloAin1(MCII, ID))
+      ++onlyAin1;
     if (HexagonMCInstrInfo::prefersSlot3(MCII, ID)) {
       ++pSlot3Cnt;
       slot3ISJ = ISJ;
@@ -314,8 +251,6 @@ bool HexagonShuffler::check() {
     case HexagonII::TypeS_2op:
     case HexagonII::TypeS_3op:
     case HexagonII::TypeALU64:
-      if (HexagonMCInstrInfo::isFloat(MCII, ID))
-        ++xtypeFloat;
       break;
     case HexagonII::TypeJ:
       ++jumps;
@@ -325,7 +260,6 @@ bool HexagonShuffler::check() {
       ++onlyNo1;
     case HexagonII::TypeCVI_VM_LD:
     case HexagonII::TypeCVI_VM_TMP_LD:
-      ++CVIloads;
     case HexagonII::TypeLD:
       ++loads;
       ++memory;
@@ -341,7 +275,6 @@ bool HexagonShuffler::check() {
       ++onlyNo1;
     case HexagonII::TypeCVI_VM_ST:
     case HexagonII::TypeCVI_VM_NEW_ST:
-      ++CVIstores;
     case HexagonII::TypeST:
       ++stores;
       ++memory;
@@ -403,15 +336,22 @@ bool HexagonShuffler::check() {
         ++jumps;
         foundBranches.push_back(ISJ);
       }
+      if (HexagonMCInstrInfo::getDesc(MCII, Inst0).isReturn()) {
+        ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD.
+        foundBranches.push_back(ISJ);
+      }
+      if (HexagonMCInstrInfo::getDesc(MCII, Inst1).isReturn()) {
+        ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD.
+        foundBranches.push_back(ISJ);
+      }
       break;
     }
     }
   }
 
   // Check if the packet is legal.
-  if ((load0 > 1 || store0 > 1 || CVIloads > 1 || CVIstores > 1) ||
-      (duplex > 1 || (duplex && memory)) || (solo && size() > 1) ||
-      (onlyAX && neitherAnorX > 1) || (onlyAX && xtypeFloat)) {
+  if ((load0 > 1 || store0 > 1) ||
+      (duplex > 1 || (duplex && memory))) {
     reportError(llvm::Twine("invalid instruction packet"));
     return false;
   }
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index bd31c7b..10a9590 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -105,8 +105,8 @@ class HexagonInstr {
 public:
   HexagonInstr(HexagonCVIResource::TypeUnitsAndLanes *T,
                MCInstrInfo const &MCII, MCInst const *id,
-               MCInst const *Extender, unsigned s, bool x = false)
-      : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id) {}
+               MCInst const *Extender, unsigned s)
+      : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id) {};
 
   MCInst const &getDesc() const { return *ID; };
 
diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp
index a71b161..5a394fe 100644
--- a/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/lib/Target/Mips/Mips16HardFloat.cpp
@@ -490,15 +490,14 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV,
 // remove the use-soft-float attribute
 //
 static void removeUseSoftFloat(Function &F) {
-  AttributeList A;
+  AttrBuilder B;
   DEBUG(errs() << "removing -use-soft-float\n");
-  A = A.addAttribute(F.getContext(), AttributeList::FunctionIndex,
-                     "use-soft-float", "false");
-  F.removeAttributes(AttributeList::FunctionIndex, A);
+  B.addAttribute("use-soft-float", "false");
+  F.removeAttributes(AttributeList::FunctionIndex, B);
   if (F.hasFnAttribute("use-soft-float")) {
     DEBUG(errs() << "still has -use-soft-float\n");
   }
-  F.addAttributes(AttributeList::FunctionIndex, A);
+  F.addAttributes(AttributeList::FunctionIndex, B);
 }
 
 
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 5645fdc..3266109 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -1017,6 +1017,14 @@ namespace llvm {
     SDValue
     combineElementTruncationToVectorTruncation(SDNode *N,
                                                DAGCombinerInfo &DCI) const;
+
+    bool supportsModuloShift(ISD::NodeType Inst,
+                             EVT ReturnType) const override {
+      assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) &&
+             "Expect a shift instruction");
+      assert(isOperationLegal(Inst, ReturnType));
+      return ReturnType.isVector();
+    }
   };
 
   namespace PPC {
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index d286158..f56b238 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -530,9 +530,10 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
   if (CmpInst *CI = dyn_cast<CmpInst>(I->getOperand(0)))
     OpTy = CI->getOperand(0)->getType();
   else if (Instruction *LogicI = dyn_cast<Instruction>(I->getOperand(0)))
-    if (CmpInst *CI0 = dyn_cast<CmpInst>(LogicI->getOperand(0)))
-      if (isa<CmpInst>(LogicI->getOperand(1)))
-        OpTy = CI0->getOperand(0)->getType();
+    if (LogicI->getNumOperands() == 2)
+      if (CmpInst *CI0 = dyn_cast<CmpInst>(LogicI->getOperand(0)))
+        if (isa<CmpInst>(LogicI->getOperand(1)))
+          OpTy = CI0->getOperand(0)->getType();
 
   if (OpTy != nullptr) {
     if (VF == 1) {
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index d2f650c..784c3a6 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -170,6 +170,8 @@ def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
                                       [FeatureSSE2]>;
 def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
                                       "Enable TBM instructions">;
+def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
+                                      "Enable LWP instructions">;
 def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
                                       "Support MOVBE instruction">;
 def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
@@ -691,6 +693,7 @@ def : Proc<"bdver1", [
   FeatureLZCNT,
   FeaturePOPCNT,
   FeatureXSAVE,
+  FeatureLWP,
   FeatureSlowSHLD,
   FeatureLAHFSAHF
 ]>;
@@ -713,6 +716,7 @@ def : Proc<"bdver2", [
   FeatureXSAVE,
   FeatureBMI,
   FeatureTBM,
+  FeatureLWP,
   FeatureFMA,
   FeatureSlowSHLD,
   FeatureLAHFSAHF
@@ -737,6 +741,7 @@ def : Proc<"bdver3", [
   FeatureXSAVE,
   FeatureBMI,
   FeatureTBM,
+  FeatureLWP,
   FeatureFMA,
   FeatureXSAVEOPT,
   FeatureSlowSHLD,
@@ -763,6 +768,7 @@ def : Proc<"bdver4", [
   FeatureBMI,
   FeatureBMI2,
   FeatureTBM,
+  FeatureLWP,
   FeatureFMA,
   FeatureXSAVEOPT,
   FeatureSlowSHLD,
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index fd11b67..ebd179e 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -3181,6 +3181,15 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
   bool Is64Bit        = Subtarget->is64Bit();
   bool IsWin64        = Subtarget->isCallingConvWin64(CC);
 
+  const CallInst *CI =
+      CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
+  const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr;
+
+  // Functions with no_caller_saved_registers that need special handling.
+  if ((CI && CI->hasFnAttr("no_caller_saved_registers")) ||
+      (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
+    return false;
+
   // Handle only C, fastcc, and webkit_js calling conventions for now.
   switch (CC) {
   default: return false;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 6092fd2..83542aa 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2180,6 +2180,12 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   MachineFunction &MF = DAG.getMachineFunction();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
 
+  // In some cases we need to disable registers from the default CSR list.
+  // For example, when they are used for argument passing.
+  bool ShouldDisableCalleeSavedRegister =
+      CallConv == CallingConv::X86_RegCall ||
+      MF.getFunction()->hasFnAttribute("no_caller_saved_registers");
+
   if (CallConv == CallingConv::X86_INTR && !Outs.empty())
     report_fatal_error("X86 interrupts may not return any value");
 
@@ -2201,7 +2207,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
     assert(VA.isRegLoc() && "Can only return in registers!");
 
     // Add the register to the CalleeSaveDisableRegs list.
-    if (CallConv == CallingConv::X86_RegCall)
+    if (ShouldDisableCalleeSavedRegister)
       MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
 
     SDValue ValToCopy = OutVals[OutsIndex];
@@ -2280,7 +2286,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
              "Expecting two registers after Pass64BitArgInRegs");
 
       // Add the second register to the CalleeSaveDisableRegs list.
-      if (CallConv == CallingConv::X86_RegCall)
+      if (ShouldDisableCalleeSavedRegister)
         MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
     } else {
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
@@ -2340,7 +2346,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
         DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
 
     // Add the returned register to the CalleeSaveDisableRegs list.
-    if (CallConv == CallingConv::X86_RegCall)
+    if (ShouldDisableCalleeSavedRegister)
       MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
   }
 
@@ -2540,7 +2546,7 @@ SDValue X86TargetLowering::LowerCallResult(
 
     // In some calling conventions we need to remove the used registers
     // from the register mask.
-    if (RegMask && CallConv == CallingConv::X86_RegCall) {
+    if (RegMask) {
       for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
            SubRegs.isValid(); ++SubRegs)
         RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
@@ -3237,7 +3243,8 @@ SDValue X86TargetLowering::LowerFormalArguments(
     }
   }
 
-  if (CallConv == CallingConv::X86_RegCall) {
+  if (CallConv == CallingConv::X86_RegCall ||
+      Fn->hasFnAttribute("no_caller_saved_registers")) {
     const MachineRegisterInfo &MRI = MF.getRegInfo();
     for (const auto &Pair : make_range(MRI.livein_begin(), MRI.livein_end()))
       MF.getRegInfo().disableCalleeSavedRegister(Pair.first);
@@ -3329,6 +3336,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   bool IsSibcall      = false;
   X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
   auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
+  const CallInst *CI =
+      CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
+  const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
+  bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
+                 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
 
   if (CallConv == CallingConv::X86_INTR)
     report_fatal_error("X86 interrupts may not be called directly");
@@ -3741,7 +3753,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                   RegsToPass[i].second.getValueType()));
 
   // Add a register mask operand representing the call-preserved registers.
-  const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv);
+  // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
+  // set X86_INTR calling convention because it has the same CSR mask
+  // (same preserved registers).
+  const uint32_t *Mask = RegInfo->getCallPreservedMask(
+      MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
   assert(Mask && "Missing call preserved mask for calling convention");
 
   // If this is an invoke in a 32-bit function using a funclet-based
@@ -3764,7 +3780,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // In some calling conventions we need to remove the used physical registers
   // from the reg mask.
-  if (CallConv == CallingConv::X86_RegCall) {
+  if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
 
     // Allocate a new Reg Mask and copy Mask.
@@ -19044,8 +19060,7 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
   if (Op.getOpcode() == X86ISD::FSETCCM ||
       Op.getOpcode() == X86ISD::FSETCCM_RND)
     return DAG.getNode(ISD::AND, dl, VT, Op, IMask);
-  if (Op.getOpcode() == X86ISD::VFPCLASS ||
-      Op.getOpcode() == X86ISD::VFPCLASSS)
+  if (Op.getOpcode() == X86ISD::VFPCLASSS)
     return DAG.getNode(ISD::OR, dl, VT, Op, IMask);
 
   if (PreservedSrc.isUndef())
@@ -20284,16 +20299,17 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
                                       SelectionDAG &DAG) {
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 
-  const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo);
+  const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo);
   if (!IntrData) {
-    if (IntNo == llvm::Intrinsic::x86_seh_ehregnode)
+    switch (IntNo) {
+    case llvm::Intrinsic::x86_seh_ehregnode:
       return MarkEHRegistrationNode(Op, DAG);
-    if (IntNo == llvm::Intrinsic::x86_seh_ehguard)
+    case llvm::Intrinsic::x86_seh_ehguard:
       return MarkEHGuard(Op, DAG);
-    if (IntNo == llvm::Intrinsic::x86_flags_read_u32 ||
-        IntNo == llvm::Intrinsic::x86_flags_read_u64 ||
-        IntNo == llvm::Intrinsic::x86_flags_write_u32 ||
-        IntNo == llvm::Intrinsic::x86_flags_write_u64) {
+    case llvm::Intrinsic::x86_flags_read_u32:
+    case llvm::Intrinsic::x86_flags_read_u64:
+    case llvm::Intrinsic::x86_flags_write_u32:
+    case llvm::Intrinsic::x86_flags_write_u64: {
       // We need a frame pointer because this will get lowered to a PUSH/POP
       // sequence.
       MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
@@ -20302,6 +20318,20 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
       // during ExpandISelPseudos in EmitInstrWithCustomInserter.
       return SDValue();
     }
+    case Intrinsic::x86_lwpins32:
+    case Intrinsic::x86_lwpins64: {
+      SDLoc dl(Op);
+      SDValue Chain = Op->getOperand(0);
+      SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
+      SDValue LwpIns =
+          DAG.getNode(X86ISD::LWPINS, dl, VTs, Chain, Op->getOperand(2),
+                      Op->getOperand(3), Op->getOperand(4));
+      SDValue SetCC = getSETCC(X86::COND_B, LwpIns.getValue(0), dl, DAG);
+      SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, SetCC);
+      return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,
+                         LwpIns.getValue(1));
+    }
+    }
     return SDValue();
   }
 
@@ -24477,6 +24507,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::CVTP2UI_RND:        return "X86ISD::CVTP2UI_RND";
   case X86ISD::CVTS2SI_RND:        return "X86ISD::CVTS2SI_RND";
   case X86ISD::CVTS2UI_RND:        return "X86ISD::CVTS2UI_RND";
+  case X86ISD::LWPINS:             return "X86ISD::LWPINS";
   }
   return nullptr;
 }
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 46dc587..18106c2 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -559,6 +559,9 @@ namespace llvm {
       // Conversions between float and half-float.
       CVTPS2PH, CVTPH2PS,
 
+      // LWP insert record.
+      LWPINS,
+
       // Compare and swap.
       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
       LCMPXCHG8_DAG,
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 26444dd..888daa2 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -821,6 +821,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPSHLQrr,           X86::VPSHLQmr,         0 },
     { X86::VPSHLWrr,           X86::VPSHLWmr,         0 },
 
+    // LWP foldable instructions
+    { X86::LWPINS32rri,        X86::LWPINS32rmi,      0 },
+    { X86::LWPINS64rri,        X86::LWPINS64rmi,      0 },
+    { X86::LWPVAL32rri,        X86::LWPVAL32rmi,      0 },
+    { X86::LWPVAL64rri,        X86::LWPVAL64rmi,      0 },
+
     // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions
     { X86::BEXTR32rr,       X86::BEXTR32rm,           0 },
     { X86::BEXTR64rr,       X86::BEXTR64rm,           0 },
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index ce08764..cdf7ce1 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -283,6 +283,11 @@ def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
 def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
                         [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
+def X86lwpins : SDNode<"X86ISD::LWPINS",
+                       SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>,
+                                            SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
+                       [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPSideEffect]>;
+
 //===----------------------------------------------------------------------===//
 // X86 Operand Definitions.
 //
@@ -836,6 +841,7 @@ def HasFMA       : Predicate<"Subtarget->hasFMA()">;
 def HasFMA4      : Predicate<"Subtarget->hasFMA4()">;
 def HasXOP       : Predicate<"Subtarget->hasXOP()">;
 def HasTBM       : Predicate<"Subtarget->hasTBM()">;
+def HasLWP       : Predicate<"Subtarget->hasLWP()">;
 def HasMOVBE     : Predicate<"Subtarget->hasMOVBE()">;
 def HasRDRAND    : Predicate<"Subtarget->hasRDRAND()">;
 def HasF16C      : Predicate<"Subtarget->hasF16C()">;
@@ -2444,6 +2450,59 @@ defm TZMSK   : tbm_binary_intr<0x01, "tzmsk", MRM4r, MRM4m>;
 } // HasTBM, EFLAGS
 
 //===----------------------------------------------------------------------===//
+// Lightweight Profiling Instructions
+
+let Predicates = [HasLWP] in {
+
+def LLWPCB : I<0x12, MRM0r, (outs), (ins GR32:$src), "llwpcb\t$src",
+               [(int_x86_llwpcb GR32:$src)], IIC_LWP>,
+               XOP, XOP9, Requires<[Not64BitMode]>;
+def SLWPCB : I<0x12, MRM1r, (outs GR32:$dst), (ins), "slwpcb\t$dst",
+               [(set GR32:$dst, (int_x86_slwpcb))], IIC_LWP>,
+               XOP, XOP9, Requires<[Not64BitMode]>;
+
+def LLWPCB64 : I<0x12, MRM0r, (outs), (ins GR64:$src), "llwpcb\t$src",
+                 [(int_x86_llwpcb GR64:$src)], IIC_LWP>,
+                 XOP, XOP9, VEX_W, Requires<[In64BitMode]>;
+def SLWPCB64 : I<0x12, MRM1r, (outs GR64:$dst), (ins), "slwpcb\t$dst",
+                 [(set GR64:$dst, (int_x86_slwpcb))], IIC_LWP>,
+                 XOP, XOP9, VEX_W, Requires<[In64BitMode]>;
+
+multiclass lwpins_intr<RegisterClass RC> {
+  def rri : Ii32<0x12, MRM0r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl),
+                 "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
+                 [(set EFLAGS, (X86lwpins RC:$src0, GR32:$src1, imm:$cntl))]>,
+                 XOP_4V, XOPA;
+  let mayLoad = 1 in
+  def rmi : Ii32<0x12, MRM0m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl),
+                 "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
+                 [(set EFLAGS, (X86lwpins RC:$src0, (loadi32 addr:$src1), imm:$cntl))]>,
+                 XOP_4V, XOPA;
+}
+
+let Defs = [EFLAGS] in {
+  defm LWPINS32 : lwpins_intr<GR32>;
+  defm LWPINS64 : lwpins_intr<GR64>, VEX_W;
+} // EFLAGS
+
+multiclass lwpval_intr<RegisterClass RC, Intrinsic Int> {
+  def rri : Ii32<0x12, MRM1r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl),
+                 "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
+                 [(Int RC:$src0, GR32:$src1, imm:$cntl)], IIC_LWP>,
+                 XOP_4V, XOPA;
+  let mayLoad = 1 in
+  def rmi : Ii32<0x12, MRM1m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl),
+                 "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
+                 [(Int RC:$src0, (loadi32 addr:$src1), imm:$cntl)], IIC_LWP>,
+                 XOP_4V, XOPA;
+}
+
+defm LWPVAL32 : lwpval_intr<GR32, int_x86_lwpval32>;
+defm LWPVAL64 : lwpval_intr<GR64, int_x86_lwpval64>, VEX_W;
+
+} // HasLWP
+
+//===----------------------------------------------------------------------===//
 // MONITORX/MWAITX Instructions
 //
 let SchedRW = [ WriteSystem ] in {
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 1f16f3c..cf2ceef 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -276,7 +276,14 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   bool HasAVX512 = Subtarget.hasAVX512();
   bool CallsEHReturn = MF->callsEHReturn();
 
-  switch (MF->getFunction()->getCallingConv()) {
+  CallingConv::ID CC = MF->getFunction()->getCallingConv();
+
+  // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling
+  // convention because it has the CSR list.
+  if (MF->getFunction()->hasFnAttribute("no_caller_saved_registers"))
+    CC = CallingConv::X86_INTR;
+
+  switch (CC) {
   case CallingConv::GHC:
   case CallingConv::HiPE:
     return CSR_NoRegs_SaveList;
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index 7f7efd7..4eae6ca 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -497,6 +497,7 @@ def IIC_IN_RI : InstrItinClass;
 def IIC_OUT_RR : InstrItinClass;
 def IIC_OUT_IR : InstrItinClass;
 def IIC_INS : InstrItinClass;
+def IIC_LWP : InstrItinClass;
 def IIC_MOV_REG_DR : InstrItinClass;
 def IIC_MOV_DR_REG : InstrItinClass;
 def IIC_MOV_REG_CR : InstrItinClass;
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 82ff436..9ab751e 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -265,6 +265,7 @@ void X86Subtarget::initializeEnvironment() {
   HasFMA4 = false;
   HasXOP = false;
   HasTBM = false;
+  HasLWP = false;
   HasMOVBE = false;
   HasRDRAND = false;
   HasF16C = false;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 8568cf0..de15142 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -124,6 +124,9 @@ protected:
   /// Target has TBM instructions.
   bool HasTBM;
 
+  /// Target has LWP instructions
+  bool HasLWP;
+
   /// True if the processor has the MOVBE instruction.
   bool HasMOVBE;
 
@@ -447,6 +450,7 @@ public:
   bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
   bool hasXOP() const { return HasXOP; }
   bool hasTBM() const { return HasTBM; }
+  bool hasLWP() const { return HasLWP; }
   bool hasMOVBE() const { return HasMOVBE; }
   bool hasRDRAND() const { return HasRDRAND; }
   bool hasF16C() const { return HasF16C; }
diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp
index bc14630..500b26b 100644
--- a/lib/Target/X86/X86WinEHState.cpp
+++ b/lib/Target/X86/X86WinEHState.cpp
@@ -412,7 +412,7 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) {
   // Can't use musttail due to prototype mismatch, but we can use tail.
   Call->setTailCall(true);
   // Set inreg so we pass it in EAX.
-  Call->addAttribute(1, Attribute::InReg);
+  Call->addParamAttr(0, Attribute::InReg);
   Builder.CreateRet(Call);
   return Trampoline;
 }
diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp
index ab648f8..12eb167 100644
--- a/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -216,8 +216,8 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
   Function *NewF =
       Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage,
                        F.getName() + Suffix, M);
-  NewF->addAttribute(1, Attribute::NonNull);
-  NewF->addAttribute(1, Attribute::NoAlias);
+  NewF->addParamAttr(0, Attribute::NonNull);
+  NewF->addParamAttr(0, Attribute::NoAlias);
 
   ValueToValueMapTy VMap;
   // Replace all args with undefs. The buildCoroutineFrame algorithm already
@@ -245,9 +245,7 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
   // Remove old return attributes.
   NewF->removeAttributes(
       AttributeList::ReturnIndex,
-      AttributeList::get(
-          NewF->getContext(), AttributeList::ReturnIndex,
-          AttributeFuncs::typeIncompatible(NewF->getReturnType())));
+      AttributeFuncs::typeIncompatible(NewF->getReturnType()));
 
   // Make AllocaSpillBlock the new entry block.
   auto *SwitchBB = cast<BasicBlock>(VMap[ResumeEntry]);
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 25db0ef..6408cad 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -839,12 +839,12 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
     // avoiding a register copy.
     if (PtrArg->hasStructRetAttr()) {
       unsigned ArgNo = PtrArg->getArgNo();
-      F->removeAttribute(ArgNo + 1, Attribute::StructRet);
-      F->addAttribute(ArgNo + 1, Attribute::NoAlias);
+      F->removeParamAttr(ArgNo, Attribute::StructRet);
+      F->addParamAttr(ArgNo, Attribute::NoAlias);
       for (Use &U : F->uses()) {
         CallSite CS(U.getUser());
-        CS.removeAttribute(ArgNo + 1, Attribute::StructRet);
-        CS.addAttribute(ArgNo + 1, Attribute::NoAlias);
+        CS.removeParamAttr(ArgNo, Attribute::StructRet);
+        CS.addParamAttr(ArgNo, Attribute::NoAlias);
       }
     }
 
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 031c3d8..28cc81c 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -835,7 +835,7 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
   // pointers.
   for (Function *F : SCCNodes) {
     // Already noalias.
-    if (F->doesNotAlias(0))
+    if (F->returnDoesNotAlias())
       continue;
 
     // We can infer and propagate function attributes only when we know that the
@@ -855,11 +855,11 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
 
   bool MadeChange = false;
   for (Function *F : SCCNodes) {
-    if (F->doesNotAlias(AttributeList::ReturnIndex) ||
+    if (F->returnDoesNotAlias() ||
         !F->getReturnType()->isPointerTy())
       continue;
 
-    F->setDoesNotAlias(AttributeList::ReturnIndex);
+    F->setReturnDoesNotAlias();
     ++NumNoAlias;
     MadeChange = true;
   }
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 1bb9d65..2db47b3 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -337,6 +337,16 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
   if (F->hasAddressTaken())
     return nullptr;
 
+  // Let inliner handle it
+  if (F->hasFnAttribute(Attribute::AlwaysInline))
+    return nullptr;
+
+  if (F->hasFnAttribute(Attribute::NoInline))
+    return nullptr;
+
+  if (PSI->isFunctionEntryCold(F))
+    return nullptr;
+
   std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
       computeOutliningInfo(F);
 
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e9286b1..4fd90d7 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3845,7 +3845,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
     if (V->getType()->isPointerTy() &&
         !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
         isKnownNonNullAt(V, CS.getInstruction(), &DT))
-      Indices.push_back(ArgNo + 1);
+      Indices.push_back(ArgNo + AttributeList::FirstArgIndex);
     ArgNo++;
   }
 
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 4e454f0..8786781 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -254,7 +254,7 @@ class DataFlowSanitizer : public ModulePass {
   MDNode *ColdCallWeights;
   DFSanABIList ABIList;
   DenseMap<Value *, Function *> UnwrappedFnMap;
-  AttributeList ReadOnlyNoneAttrs;
+  AttrBuilder ReadOnlyNoneAttrs;
   bool DFSanRuntimeShadowMask;
 
   Value *getShadowAddress(Value *Addr, Instruction *Pos);
@@ -544,16 +544,12 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
   NewF->copyAttributesFrom(F);
   NewF->removeAttributes(
       AttributeList::ReturnIndex,
-      AttributeList::get(
-          F->getContext(), AttributeList::ReturnIndex,
-          AttributeFuncs::typeIncompatible(NewFT->getReturnType())));
+      AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
 
   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
   if (F->isVarArg()) {
-    NewF->removeAttributes(
-        AttributeList::FunctionIndex,
-        AttributeList().addAttribute(*Ctx, AttributeList::FunctionIndex,
-                                     "split-stack"));
+    NewF->removeAttributes(AttributeList::FunctionIndex,
+                           AttrBuilder().addAttribute("split-stack"));
     CallInst::Create(DFSanVarargWrapperFn,
                      IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",
                      BB);
@@ -629,16 +625,16 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
     F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
     F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
     F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
-    F->addAttribute(1, Attribute::ZExt);
-    F->addAttribute(2, Attribute::ZExt);
+    F->addParamAttr(0, Attribute::ZExt);
+    F->addParamAttr(1, Attribute::ZExt);
   }
   DFSanCheckedUnionFn = Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy);
   if (Function *F = dyn_cast<Function>(DFSanCheckedUnionFn)) {
     F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
     F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
     F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
-    F->addAttribute(1, Attribute::ZExt);
-    F->addAttribute(2, Attribute::ZExt);
+    F->addParamAttr(0, Attribute::ZExt);
+    F->addParamAttr(1, Attribute::ZExt);
   }
   DFSanUnionLoadFn =
       Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy);
@@ -652,7 +648,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
   DFSanSetLabelFn =
       Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy);
   if (Function *F = dyn_cast<Function>(DFSanSetLabelFn)) {
-    F->addAttribute(1, Attribute::ZExt);
+    F->addParamAttr(0, Attribute::ZExt);
   }
   DFSanNonzeroLabelFn =
       Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
@@ -698,9 +694,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
     }
   }
 
-  AttrBuilder B;
-  B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
-  ReadOnlyNoneAttrs = AttributeList::get(*Ctx, AttributeList::FunctionIndex, B);
+  ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly)
+      .addAttribute(Attribute::ReadNone);
 
   // First, change the ABI of every function in the module.  ABI-listed
   // functions keep their original ABI and get a wrapper function.
@@ -722,9 +717,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
         NewF->copyAttributesFrom(&F);
         NewF->removeAttributes(
             AttributeList::ReturnIndex,
-            AttributeList::get(
-                NewF->getContext(), AttributeList::ReturnIndex,
-                AttributeFuncs::typeIncompatible(NewFT->getReturnType())));
+            AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
         for (Function::arg_iterator FArg = F.arg_begin(),
                                     NewFArg = NewF->arg_begin(),
                                     FArgEnd = F.arg_end();
@@ -989,8 +982,8 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
   if (AvoidNewBlocks) {
     CallInst *Call = IRB.CreateCall(DFS.DFSanCheckedUnionFn, {V1, V2});
     Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
-    Call->addAttribute(1, Attribute::ZExt);
-    Call->addAttribute(2, Attribute::ZExt);
+    Call->addParamAttr(0, Attribute::ZExt);
+    Call->addParamAttr(1, Attribute::ZExt);
 
     CCS.Block = Pos->getParent();
     CCS.Shadow = Call;
@@ -1002,8 +995,8 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
     IRBuilder<> ThenIRB(BI);
     CallInst *Call = ThenIRB.CreateCall(DFS.DFSanUnionFn, {V1, V2});
     Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
-    Call->addAttribute(1, Attribute::ZExt);
-    Call->addAttribute(2, Attribute::ZExt);
+    Call->addParamAttr(0, Attribute::ZExt);
+    Call->addParamAttr(1, Attribute::ZExt);
 
     BasicBlock *Tail = BI->getSuccessor(0);
     PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front());
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index d91ac6a..9a82532 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -241,7 +241,7 @@ static Constant *getOrInsertValueProfilingCall(Module &M,
 
   if (Function *FunRes = dyn_cast<Function>(Res)) {
     if (auto AK = TLI.getExtAttrForI32Param(false))
-      FunRes->addAttribute(3, AK);
+      FunRes->addParamAttr(2, AK);
   }
   return Res;
 }
@@ -292,7 +292,7 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
         Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), Args);
   }
   if (auto AK = TLI->getExtAttrForI32Param(false))
-    Call->addAttribute(3, AK);
+    Call->addParamAttr(2, AK);
   Ind->replaceAllUsesWith(Call);
   Ind->eraseFromParent();
 }
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 3e480a6..15333a5 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2607,10 +2607,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
         AttrBuilder B;
         B.addAttribute(Attribute::ReadOnly)
           .addAttribute(Attribute::ReadNone);
-        Func->removeAttributes(AttributeList::FunctionIndex,
-                               AttributeList::get(Func->getContext(),
-                                                  AttributeList::FunctionIndex,
-                                                  B));
+        Func->removeAttributes(AttributeList::FunctionIndex, B);
       }
 
       maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
@@ -3659,9 +3656,7 @@ bool MemorySanitizer::runOnFunction(Function &F) {
   AttrBuilder B;
   B.addAttribute(Attribute::ReadOnly)
     .addAttribute(Attribute::ReadNone);
-  F.removeAttributes(
-      AttributeList::FunctionIndex,
-      AttributeList::get(F.getContext(), AttributeList::FunctionIndex, B));
+  F.removeAttributes(AttributeList::FunctionIndex, B);
 
   return Visitor.runOnFunction();
 }
diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index c541fa4..cb3b575 100644
--- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -163,7 +163,7 @@ private:
 
     AttributeList Attr = AttributeList().addAttribute(
         C, AttributeList::FunctionIndex, Attribute::NoUnwind);
-    Attr = Attr.addAttribute(C, 1, Attribute::NoCapture);
+    Attr = Attr.addParamAttribute(C, 0, Attribute::NoCapture);
 
     FunctionType *Fty = FunctionType::get(Type::getVoidTy(C), Params,
                                           /*isVarArg=*/false);
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index dc864f4..3f1a77b 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -318,7 +318,7 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
         LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
                             ConstantPointerNull::get(Type),
                             CS.getInstruction()) == LazyValueInfo::False)
-      Indices.push_back(ArgNo + 1);
+      Indices.push_back(ArgNo + AttributeList::FirstArgIndex);
     ArgNo++;
   }
 
diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp
index 48eda09..198d2b2 100644
--- a/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@@ -613,16 +613,16 @@ bool GuardWideningImpl::combineRangeChecks(
     // We have a series of f+1 checks as:
     //
     //   I+k_0 u< L   ... Chk_0
-    //   I_k_1 u< L   ... Chk_1
+    //   I+k_1 u< L   ... Chk_1
     //   ...
-    //   I_k_f u< L   ... Chk_(f+1)
+    //   I+k_f u< L   ... Chk_f
     //
-    //     with forall i in [0,f): k_f-k_i u< k_f-k_0  ... Precond_0
+    //     with forall i in [0,f]: k_f-k_i u< k_f-k_0  ... Precond_0
     //          k_f-k_0 u< INT_MIN+k_f                 ... Precond_1
     //          k_f != k_0                             ... Precond_2
     //
     // Claim:
-    //   Chk_0 AND Chk_(f+1)  implies all the other checks
+    //   Chk_0 AND Chk_f  implies all the other checks
     //
     // Informal proof sketch:
     //
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 73e8ce0..3151ccd 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
@@ -29,6 +30,21 @@ using namespace llvm;
 
 STATISTIC(NumDeleted, "Number of loops deleted");
 
+/// This function deletes dead loops. The caller of this function needs to
+/// guarantee that the loop is infact dead.  Here we handle two kinds of dead
+/// loop. The first kind (\p isLoopDead) is where only invariant values from
+/// within the loop are used outside of it. The second kind (\p
+/// isLoopNeverExecuted) is where the loop is provably never executed. We can
+/// always remove never executed loops since they will not cause any
+/// difference to program behaviour.
+/// 
+/// This also updates the relevant analysis information in \p DT, \p SE, and \p
+/// LI. It also updates the loop PM if an updater struct is provided.
+// TODO: This function will be used by loop-simplifyCFG as well. So, move this
+// to LoopUtils.cpp
+static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
+                           LoopInfo &LI, bool LoopIsNeverExecuted,
+                           LPMUpdater *Updater = nullptr);
 /// Determines if a loop is dead.
 ///
 /// This assumes that we've already checked for unique exit and exiting blocks,
@@ -84,12 +100,44 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE,
   return true;
 }
 
+/// This function returns true if there is no viable path from the
+/// entry block to the header of \p L. Right now, it only does
+/// a local search to save compile time.
+static bool isLoopNeverExecuted(Loop *L) {
+  using namespace PatternMatch;
+
+  auto *Preheader = L->getLoopPreheader();
+  // TODO: We can relax this constraint, since we just need a loop
+  // predecessor.
+  assert(Preheader && "Needs preheader!");
+
+  if (Preheader == &Preheader->getParent()->getEntryBlock())
+    return false;
+  // All predecessors of the preheader should have a constant conditional
+  // branch, with the loop's preheader as not-taken.
+  for (auto *Pred: predecessors(Preheader)) {
+    BasicBlock *Taken, *NotTaken;
+    ConstantInt *Cond;
+    if (!match(Pred->getTerminator(),
+               m_Br(m_ConstantInt(Cond), Taken, NotTaken)))
+      return false;
+    if (!Cond->getZExtValue())
+      std::swap(Taken, NotTaken);
+    if (Taken == Preheader)
+      return false;
+  }
+  assert(!pred_empty(Preheader) &&
+         "Preheader should have predecessors at this point!");
+  // All the predecessors have the loop preheader as not-taken target.
+  return true;
+}
+
 /// Remove a loop if it is dead.
 ///
 /// A loop is considered dead if it does not impact the observable behavior of
 /// the program other than finite running time. This never removes a loop that
-/// might be infinite, as doing so could change the halting/non-halting nature
-/// of a program.
+/// might be infinite (unless it is never executed), as doing so could change
+/// the halting/non-halting nature of a program.
 ///
 /// This entire process relies pretty heavily on LoopSimplify form and LCSSA in
 /// order to make various safety checks work.
@@ -97,9 +145,6 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE,
 /// \returns true if any changes were made. This may mutate the loop even if it
 /// is unable to delete it due to hoisting trivially loop invariant
 /// instructions out of the loop.
-///
-/// This also updates the relevant analysis information in \p DT, \p SE, and \p
-/// LI. It also updates the loop PM if an updater struct is provided.
 static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
                              LoopInfo &LI, LPMUpdater *Updater = nullptr) {
   assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
@@ -119,6 +164,17 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
   if (L->begin() != L->end())
     return false;
 
+
+  BasicBlock *ExitBlock = L->getUniqueExitBlock();
+
+  if (ExitBlock && isLoopNeverExecuted(L)) {
+    deleteDeadLoop(L, DT, SE, LI, true /* LoopIsNeverExecuted */, Updater);
+    ++NumDeleted;
+    return true;
+  }
+
+  // The remaining checks below are for a loop being dead because all statements
+  // in the loop are invariant.
   SmallVector<BasicBlock *, 4> ExitingBlocks;
   L->getExitingBlocks(ExitingBlocks);
 
@@ -126,7 +182,6 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
   // be in the situation of needing to be able to solve statically which exit
   // block will be branched to, or trying to preserve the branching logic in
   // a loop invariant manner.
-  BasicBlock *ExitBlock = L->getUniqueExitBlock();
   if (!ExitBlock)
     return false;
 
@@ -141,6 +196,19 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
   if (isa<SCEVCouldNotCompute>(S))
     return Changed;
 
+  deleteDeadLoop(L, DT, SE, LI, false /* LoopIsNeverExecuted */, Updater);
+  ++NumDeleted;
+
+  return true;
+}
+
+static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
+                           LoopInfo &LI, bool LoopIsNeverExecuted,
+                           LPMUpdater *Updater) {
+  assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
+  auto *Preheader = L->getLoopPreheader();
+  assert(Preheader && "Preheader should exist!");
+
   // Now that we know the removal is safe, remove the loop by changing the
   // branch from the preheader to go to the single exit block.
   //
@@ -156,17 +224,29 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
   // to determine what it needs to clean up.
   SE.forgetLoop(L);
 
+  auto *ExitBlock = L->getUniqueExitBlock();
+  assert(ExitBlock && "Should have a unique exit block!");
+
   // Connect the preheader directly to the exit block.
-  TerminatorInst *TI = Preheader->getTerminator();
-  TI->replaceUsesOfWith(L->getHeader(), ExitBlock);
+  // Even when the loop is never executed, we cannot remove the edge from the
+  // source block to the exit block. Consider the case where the unexecuted loop
+  // branches back to an outer loop. If we deleted the loop and removed the edge
+  // coming to this inner loop, this will break the outer loop structure (by
+  // deleting the backedge of the outer loop). If the outer loop is indeed a
+  // non-loop, it will be deleted in a future iteration of loop deletion pass.
+  Preheader->getTerminator()->replaceUsesOfWith(L->getHeader(), ExitBlock);
 
-  // Rewrite phis in the exit block to get their inputs from
-  // the preheader instead of the exiting block.
+  SmallVector<BasicBlock *, 4> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+  // Rewrite phis in the exit block to get their inputs from the Preheader
+  // instead of the exiting block.
   BasicBlock *ExitingBlock = ExitingBlocks[0];
   BasicBlock::iterator BI = ExitBlock->begin();
   while (PHINode *P = dyn_cast<PHINode>(BI)) {
     int j = P->getBasicBlockIndex(ExitingBlock);
     assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
+    if (LoopIsNeverExecuted)
+      P->setIncomingValue(j, UndefValue::get(P->getType()));
     P->setIncomingBlock(j, Preheader);
     for (unsigned i = 1; i < ExitingBlocks.size(); ++i)
       P->removeIncomingValue(ExitingBlocks[i]);
@@ -211,9 +291,6 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
 
   // The last step is to update LoopInfo now that we've eliminated this loop.
   LI.markAsRemoved(L);
-  ++NumDeleted;
-
-  return true;
 }
 
 PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM,
@@ -254,7 +331,6 @@ Pass *llvm::createLoopDeletionPass() { return new LoopDeletionLegacyPass(); }
 bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &) {
   if (skipLoop(L))
     return false;
-
   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 162d91b..62b5d80 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -1440,18 +1440,15 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) {
   // True if one of the incoming phi edges is a backedge.
   bool HasBackedge = false;
   // All constant tracks the state of whether all the *original* phi operands
-  // were constant.
-  // This is really shorthand for "this phi cannot cycle due to forward
-  // propagation", as any
-  // change in value of the phi is guaranteed not to later change the value of
-  // the phi.
+  // were constant. This is really shorthand for "this phi cannot cycle due
+  // to forward propagation", as any change in value of the phi is guaranteed
+  // not to later change the value of the phi.
   // IE it can't be v = phi(undef, v+1)
   bool AllConstant = true;
   auto *E =
       cast<PHIExpression>(createPHIExpression(I, HasBackedge, AllConstant));
   // We match the semantics of SimplifyPhiNode from InstructionSimplify here.
-
-  // See if all arguaments are the same.
+  // See if all arguments are the same.
   // We track if any were undef because they need special handling.
   bool HasUndef = false;
   auto Filtered = make_filter_range(E->operands(), [&](const Value *Arg) {
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index c11247c..77b2bd8 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -2286,12 +2286,11 @@ static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
   if (AH.getDereferenceableOrNullBytes(Index))
     R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull,
                                   AH.getDereferenceableOrNullBytes(Index)));
-  if (AH.doesNotAlias(Index))
+  if (AH.getAttributes().hasAttribute(Index, Attribute::NoAlias))
     R.addAttribute(Attribute::NoAlias);
 
   if (!R.empty())
-    AH.setAttributes(AH.getAttributes().removeAttributes(
-        Ctx, Index, AttributeList::get(Ctx, Index, R)));
+    AH.setAttributes(AH.getAttributes().removeAttributes(Ctx, Index, R));
 }
 
 void
@@ -2300,7 +2299,8 @@ RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) {
 
   for (Argument &A : F.args())
     if (isa<PointerType>(A.getType()))
-      RemoveNonValidAttrAtIndex(Ctx, F, A.getArgNo() + 1);
+      RemoveNonValidAttrAtIndex(Ctx, F,
+                                A.getArgNo() + AttributeList::FirstArgIndex);
 
   if (isa<PointerType>(F.getReturnType()))
     RemoveNonValidAttrAtIndex(Ctx, F, AttributeList::ReturnIndex);
@@ -2336,7 +2336,7 @@ void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
     if (CallSite CS = CallSite(&I)) {
       for (int i = 0, e = CS.arg_size(); i != e; i++)
         if (isa<PointerType>(CS.getArgument(i)->getType()))
-          RemoveNonValidAttrAtIndex(Ctx, CS, i + 1);
+          RemoveNonValidAttrAtIndex(Ctx, CS, i + AttributeList::FirstArgIndex);
       if (isa<PointerType>(CS.getType()))
         RemoveNonValidAttrAtIndex(Ctx, CS, AttributeList::ReturnIndex);
     }
diff --git a/lib/Transforms/Scalar/SpeculativeExecution.cpp b/lib/Transforms/Scalar/SpeculativeExecution.cpp
index a7c308b..a0fc966 100644
--- a/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -208,47 +208,6 @@ bool SpeculativeExecutionPass::runOnBasicBlock(BasicBlock &B) {
   return false;
 }
 
-static unsigned ComputeSpeculationCost(const Instruction *I,
-                                       const TargetTransformInfo &TTI) {
-  switch (Operator::getOpcode(I)) {
-    case Instruction::GetElementPtr:
-    case Instruction::Add:
-    case Instruction::Mul:
-    case Instruction::And:
-    case Instruction::Or:
-    case Instruction::Select:
-    case Instruction::Shl:
-    case Instruction::Sub:
-    case Instruction::LShr:
-    case Instruction::AShr:
-    case Instruction::Xor:
-    case Instruction::ZExt:
-    case Instruction::SExt:
-    case Instruction::Call:
-    case Instruction::BitCast:
-    case Instruction::PtrToInt:
-    case Instruction::IntToPtr:
-    case Instruction::AddrSpaceCast:
-    case Instruction::FPToUI:
-    case Instruction::FPToSI:
-    case Instruction::UIToFP:
-    case Instruction::SIToFP:
-    case Instruction::FPExt:
-    case Instruction::FPTrunc:
-    case Instruction::FAdd:
-    case Instruction::FSub:
-    case Instruction::FMul:
-    case Instruction::FDiv:
-    case Instruction::FRem:
-    case Instruction::ICmp:
-    case Instruction::FCmp:
-      return TTI.getUserCost(I);
-
-    default:
-      return UINT_MAX; // Disallow anything not whitelisted.
-  }
-}
-
 bool SpeculativeExecutionPass::considerHoistingFromTo(
     BasicBlock &FromBlock, BasicBlock &ToBlock) {
   SmallSet<const Instruction *, 8> NotHoisted;
@@ -264,7 +223,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
 
   unsigned TotalSpeculationCost = 0;
   for (auto& I : FromBlock) {
-    const unsigned Cost = ComputeSpeculationCost(&I, *TTI);
+    const unsigned Cost = TTI->getUserCost(&I);
     if (Cost != UINT_MAX && isSafeToSpeculativelyExecute(&I) &&
         AllPrecedingUsesFromBlockHoisted(&I)) {
       TotalSpeculationCost += Cost;
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 6cd9f16..1956697 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -58,7 +58,7 @@ static bool setOnlyReadsMemory(Function &F) {
 static bool setOnlyAccessesArgMemory(Function &F) {
   if (F.onlyAccessesArgMemory())
     return false;
-  F.setOnlyAccessesArgMemory ();
+  F.setOnlyAccessesArgMemory();
   ++NumArgMemOnly;
   return true;
 }
@@ -71,37 +71,36 @@ static bool setDoesNotThrow(Function &F) {
   return true;
 }
 
-static bool setDoesNotCapture(Function &F, unsigned n) {
-  if (F.doesNotCapture(n))
+static bool setRetDoesNotAlias(Function &F) {
+  if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias))
     return false;
-  F.setDoesNotCapture(n);
-  ++NumNoCapture;
+  F.addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+  ++NumNoAlias;
   return true;
 }
 
-static bool setOnlyReadsMemory(Function &F, unsigned n) {
-  if (F.onlyReadsMemory(n))
+static bool setDoesNotCapture(Function &F, unsigned ArgNo) {
+  if (F.hasParamAttribute(ArgNo, Attribute::NoCapture))
     return false;
-  F.setOnlyReadsMemory(n);
-  ++NumReadOnlyArg;
+  F.addParamAttr(ArgNo, Attribute::NoCapture);
+  ++NumNoCapture;
   return true;
 }
 
-static bool setDoesNotAlias(Function &F, unsigned n) {
-  if (F.doesNotAlias(n))
+static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) {
+  if (F.hasParamAttribute(ArgNo, Attribute::ReadOnly))
     return false;
-  F.setDoesNotAlias(n);
-  ++NumNoAlias;
+  F.addParamAttr(ArgNo, Attribute::ReadOnly);
+  ++NumReadOnlyArg;
   return true;
 }
 
-static bool setNonNull(Function &F, unsigned n) {
-  assert(
-      (n != AttributeList::ReturnIndex || F.getReturnType()->isPointerTy()) &&
-      "nonnull applies only to pointers");
-  if (F.getAttributes().hasAttribute(n, Attribute::NonNull))
+static bool setRetNonNull(Function &F) {
+  assert(F.getReturnType()->isPointerTy() &&
+         "nonnull applies only to pointers");
+  if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NonNull))
     return false;
-  F.addAttribute(n, Attribute::NonNull);
+  F.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
   ++NumNonNull;
   return true;
 }
@@ -116,7 +115,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
   case LibFunc_strlen:
     Changed |= setOnlyReadsMemory(F);
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_strchr:
   case LibFunc_strrchr:
@@ -131,8 +130,8 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
   case LibFunc_strtold:
   case LibFunc_strtoull:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_strcpy:
   case LibFunc_stpcpy:
@@ -141,14 +140,14 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
   case LibFunc_strncpy:
   case LibFunc_stpncpy:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 2);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
     return Changed;
   case LibFunc_strxfrm:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
     return Changed;
   case LibFunc_strcmp:      // 0,1
   case LibFunc_strspn:      // 0,1
@@ -159,84 +158,84 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
   case LibFunc_strncasecmp: //
     Changed |= setOnlyReadsMemory(F);
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
     return Changed;
   case LibFunc_strstr:
   case LibFunc_strpbrk:
     Changed |= setOnlyReadsMemory(F);
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setDoesNotCapture(F, 1);
     return Changed;
   case LibFunc_strtok:
   case LibFunc_strtok_r:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 2);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
     return Changed;
   case LibFunc_scanf:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_setbuf:
   case LibFunc_setvbuf:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_strdup:
   case LibFunc_strndup:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotAlias(F, 0);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setRetDoesNotAlias(F);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_stat:
   case LibFunc_statvfs:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_sscanf:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 0);
     Changed |= setOnlyReadsMemory(F, 1);
-    Changed |= setOnlyReadsMemory(F, 2);
     return Changed;
   case LibFunc_sprintf:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
     return Changed;
   case LibFunc_snprintf:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 3);
-    Changed |= setOnlyReadsMemory(F, 3);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
     return Changed;
   case LibFunc_setitimer:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
     Changed |= setDoesNotCapture(F, 2);
-    Changed |= setDoesNotCapture(F, 3);
-    Changed |= setOnlyReadsMemory(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
     return Changed;
   case LibFunc_system:
     // May throw; "system" is a valid pthread cancellation point.
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_malloc:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setRetDoesNotAlias(F);
     return Changed;
   case LibFunc_memcmp:
     Changed |= setOnlyReadsMemory(F);
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
     return Changed;
   case LibFunc_memchr:
   case LibFunc_memrchr:
@@ -247,100 +246,100 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
   case LibFunc_modff:
   case LibFunc_modfl:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setDoesNotCapture(F, 1);
     return Changed;
   case LibFunc_memcpy:
   case LibFunc_mempcpy:
   case LibFunc_memccpy:
   case LibFunc_memmove:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 2);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
     return Changed;
   case LibFunc_memcpy_chk:
     Changed |= setDoesNotThrow(F);
     return Changed;
   case LibFunc_memalign:
-    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setRetDoesNotAlias(F);
     return Changed;
   case LibFunc_mkdir:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_mktime:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_realloc:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotAlias(F, 0);
-    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setRetDoesNotAlias(F);
+    Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_read:
     // May throw; "read" is a valid pthread cancellation point.
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setDoesNotCapture(F, 1);
     return Changed;
   case LibFunc_rewind:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_rmdir:
   case LibFunc_remove:
   case LibFunc_realpath:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_rename:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 0);
     Changed |= setOnlyReadsMemory(F, 1);
-    Changed |= setOnlyReadsMemory(F, 2);
     return Changed;
   case LibFunc_readlink:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_write:
     // May throw; "write" is a valid pthread cancellation point.
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 2);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
     return Changed;
   case LibFunc_bcopy:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_bcmp:
     Changed |= setDoesNotThrow(F);
     Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
     return Changed;
   case LibFunc_bzero:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_calloc:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setRetDoesNotAlias(F);
     return Changed;
   case LibFunc_chmod:
   case LibFunc_chown:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_ctermid:
   case LibFunc_clearerr:
   case LibFunc_closedir:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_atoi:
   case LibFunc_atol:
@@ -348,26 +347,26 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
   case LibFunc_atoll:
     Changed |= setDoesNotThrow(F);
     Changed |= setOnlyReadsMemory(F);
-    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_access:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_fopen:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setRetDoesNotAlias(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 0);
     Changed |= setOnlyReadsMemory(F, 1);
-    Changed |= setOnlyReadsMemory(F, 2);
     return Changed;
   case LibFunc_fdopen:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotAlias(F, 0);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 2);
+    Changed |= setRetDoesNotAlias(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
     return Changed;
   case LibFunc_feof:
   case LibFunc_free:
@@ -384,11 +383,11 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
   case LibFunc_funlockfile:
   case LibFunc_ftrylockfile:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_ferror:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setOnlyReadsMemory(F);
     return Changed;
   case LibFunc_fputc:
@@ -398,51 +397,51 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
   case LibFunc_frexpl:
   case LibFunc_fstatvfs:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setDoesNotCapture(F, 1);
     return Changed;
   case LibFunc_fgets:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 3);
+    Changed |= setDoesNotCapture(F, 2);
     return Changed;
   case LibFunc_fread:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 4);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setDoesNotCapture(F, 3);
     return Changed;
   case LibFunc_fwrite:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 4);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setDoesNotCapture(F, 3);
     // FIXME: readonly #1?
     return Changed;
   case LibFunc_fputs:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_fscanf:
   case LibFunc_fprintf:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
     return Changed;
   case LibFunc_fgetpos:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
     return Changed;
   case LibFunc_getc:
   case LibFunc_getlogin_r:
   case LibFunc_getc_unlocked:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_getenv:
     Changed |= setDoesNotThrow(F);
     Changed |= setOnlyReadsMemory(F);
-    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_gets:
   case LibFunc_getchar:
@@ -450,132 +449,132 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
     return Changed;
   case LibFunc_getitimer:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setDoesNotCapture(F, 1);
     return Changed;
   case LibFunc_getpwnam:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_ungetc:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setDoesNotCapture(F, 1);
     return Changed;
   case LibFunc_uname:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_unlink:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_unsetenv:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_utime:
   case LibFunc_utimes:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 0);
     Changed |= setOnlyReadsMemory(F, 1);
-    Changed |= setOnlyReadsMemory(F, 2);
     return Changed;
   case LibFunc_putc:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setDoesNotCapture(F, 1);
     return Changed;
   case LibFunc_puts:
   case LibFunc_printf:
   case LibFunc_perror:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_pread:
     // May throw; "pread" is a valid pthread cancellation point.
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setDoesNotCapture(F, 1);
     return Changed;
   case LibFunc_pwrite:
     // May throw; "pwrite" is a valid pthread cancellation point.
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 2);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
     return Changed;
   case LibFunc_putchar:
     Changed |= setDoesNotThrow(F);
     return Changed;
   case LibFunc_popen:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setRetDoesNotAlias(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 0);
     Changed |= setOnlyReadsMemory(F, 1);
-    Changed |= setOnlyReadsMemory(F, 2);
     return Changed;
   case LibFunc_pclose:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_vscanf:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_vsscanf:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 0);
     Changed |= setOnlyReadsMemory(F, 1);
-    Changed |= setOnlyReadsMemory(F, 2);
     return Changed;
   case LibFunc_vfscanf:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
     return Changed;
   case LibFunc_valloc:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setRetDoesNotAlias(F);
     return Changed;
   case LibFunc_vprintf:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_vfprintf:
   case LibFunc_vsprintf:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
     return Changed;
   case LibFunc_vsnprintf:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 3);
-    Changed |= setOnlyReadsMemory(F, 3);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
     return Changed;
   case LibFunc_open:
     // May throw; "open" is a valid pthread cancellation point.
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_opendir:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotAlias(F, 0);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setRetDoesNotAlias(F);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_tmpfile:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setRetDoesNotAlias(F);
     return Changed;
   case LibFunc_times:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_htonl:
   case LibFunc_htons:
@@ -586,93 +585,93 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
     return Changed;
   case LibFunc_lstat:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_lchown:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_qsort:
     // May throw; places call through function pointer.
-    Changed |= setDoesNotCapture(F, 4);
+    Changed |= setDoesNotCapture(F, 3);
     return Changed;
   case LibFunc_dunder_strdup:
   case LibFunc_dunder_strndup:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotAlias(F, 0);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setRetDoesNotAlias(F);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_dunder_strtok_r:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 2);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
     return Changed;
   case LibFunc_under_IO_getc:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_under_IO_putc:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setDoesNotCapture(F, 1);
     return Changed;
   case LibFunc_dunder_isoc99_scanf:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_stat64:
   case LibFunc_lstat64:
   case LibFunc_statvfs64:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_dunder_isoc99_sscanf:
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 0);
     Changed |= setOnlyReadsMemory(F, 1);
-    Changed |= setOnlyReadsMemory(F, 2);
     return Changed;
   case LibFunc_fopen64:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setRetDoesNotAlias(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 0);
     Changed |= setOnlyReadsMemory(F, 1);
-    Changed |= setOnlyReadsMemory(F, 2);
     return Changed;
   case LibFunc_fseeko64:
   case LibFunc_ftello64:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
     return Changed;
   case LibFunc_tmpfile64:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setRetDoesNotAlias(F);
     return Changed;
   case LibFunc_fstat64:
   case LibFunc_fstatvfs64:
     Changed |= setDoesNotThrow(F);
-    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setDoesNotCapture(F, 1);
     return Changed;
   case LibFunc_open64:
     // May throw; "open" is a valid pthread cancellation point.
-    Changed |= setDoesNotCapture(F, 1);
-    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setDoesNotCapture(F, 0);
+    Changed |= setOnlyReadsMemory(F, 0);
     return Changed;
   case LibFunc_gettimeofday:
     // Currently some platforms have the restrict keyword on the arguments to
     // gettimeofday. To be conservative, do not add noalias to gettimeofday's
     // arguments.
     Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
     return Changed;
   case LibFunc_Znwj: // new(unsigned int)
   case LibFunc_Znwm: // new(unsigned long)
@@ -683,17 +682,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
   case LibFunc_msvc_new_array_int: // new[](unsigned int)
   case LibFunc_msvc_new_array_longlong: // new[](unsigned long long)
     // Operator new always returns a nonnull noalias pointer
-    Changed |= setNonNull(F, AttributeList::ReturnIndex);
-    Changed |= setDoesNotAlias(F, AttributeList::ReturnIndex);
+    Changed |= setRetNonNull(F);
+    Changed |= setRetDoesNotAlias(F);
     return Changed;
   //TODO: add LibFunc entries for:
   //case LibFunc_memset_pattern4:
   //case LibFunc_memset_pattern8:
   case LibFunc_memset_pattern16:
     Changed |= setOnlyAccessesArgMemory(F);
+    Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
-    Changed |= setDoesNotCapture(F, 2);
-    Changed |= setOnlyReadsMemory(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
     return Changed;
   // int __nvvm_reflect(const char *)
   case LibFunc_nvvm_reflect:
@@ -889,7 +888,13 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
   Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
                                          Op->getType());
   CallInst *CI = B.CreateCall(Callee, Op, Name);
-  CI->setAttributes(Attrs);
+
+  // The incoming attribute set may have come from a speculatable intrinsic, but
+  // is being replaced with a library call which is not allowed to be
+  // speculatable.
+  CI->setAttributes(Attrs.removeAttribute(B.getContext(),
+                                          AttributeList::FunctionIndex,
+                                          Attribute::Speculatable));
   if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
 
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 85db734..391fde3 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -512,6 +512,16 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
 
   BasicBlock *Latch = L->getLoopLatch();
 
+  // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
+  // targets of the Latch be the single exit block out of the loop. This needs
+  // to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
+  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
+  assert(
+      (LatchBR->getSuccessor(0) == Exit || LatchBR->getSuccessor(1) == Exit) &&
+      "one of the loop latch successors should be "
+      "the exit block!");
+  // Avoid warning of unused `LatchBR` variable in release builds.
+  (void)LatchBR;
   // Loop structure is the following:
   //
   // PreHeader
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 2c1c304..9e71d74 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -537,7 +537,7 @@ Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilder<> &B) {
   if (isa<ConstantPointerNull>(EndPtr)) {
     // With a null EndPtr, this function won't capture the main argument.
     // It would be readonly too, except that it still may write to errno.
-    CI->addAttribute(1, Attribute::NoCapture);
+    CI->addParamAttr(0, Attribute::NoCapture);
   }
 
   return nullptr;
diff --git a/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
index f2d3f3f..b673399 100644
--- a/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
+++ b/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
@@ -78,10 +78,10 @@ define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x 
 ; SKX-LABEL: test_gather_2f64
 ; SKX: Found an estimated cost of 7 {{.*}}.gather
 
-%res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
   ret <2 x double> %res
 }
-declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
+declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
 
 define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0)  {
 
@@ -94,7 +94,7 @@ define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %
 ; SKX-LABEL: test_gather_4i32
 ; SKX: Found an estimated cost of 6 {{.*}}.gather
 
-%res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
   ret <4 x i32> %res
 }
 
@@ -109,10 +109,10 @@ define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0)
 ; SKX-LABEL: test_gather_4i32_const_mask
 ; SKX: Found an estimated cost of 6 {{.*}}.gather
 
-%res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
   ret <4 x i32> %res
 }
-declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0)
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0)
 
 define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) {
 
@@ -128,7 +128,7 @@ define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind)
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
   ret <16 x float>%res
 }
 
@@ -146,7 +146,7 @@ define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
   ret <16 x float>%res
 }
 
@@ -164,7 +164,7 @@ define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i3
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
   ret <16 x float>%res
 }
 
@@ -185,7 +185,7 @@ define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
   ret <16 x float>%res
 }
 
@@ -204,7 +204,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3
 
   %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
   %imask = bitcast i16 %mask to <16 x i1>
-  call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+  call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
   ret void
 }
 
@@ -218,11 +218,11 @@ define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {
 ; SKX-LABEL: test_scatter_8i32
 ; SKX: Found an estimated cost of 10 {{.*}}.scatter
 
-  call void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+  call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
   ret void
 }
 
-declare void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask)
+declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask)
 
 define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
 ; AVX2-LABEL: test_scatter_4i32
@@ -234,7 +234,7 @@ define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
 ; SKX-LABEL: test_scatter_4i32
 ; SKX: Found an estimated cost of 6 {{.*}}.scatter
 
-  call void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+  call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
   ret void
 }
 
@@ -252,7 +252,7 @@ define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask)
   %sext_ind = sext <4 x i32> %ind to <4 x i64>
   %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
 
-  %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+  %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
   ret <4 x float>%res
 }
 
@@ -270,14 +270,14 @@ define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) {
   %sext_ind = sext <4 x i32> %ind to <4 x i64>
   %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
 
-  %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+  %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
   ret <4 x float>%res
 }
 
-declare <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> )
-declare void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask)
-declare void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask)
-declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>)
+declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> )
+declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask)
+declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask)
+declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>)
 
 declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
 declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
diff --git a/test/Analysis/CostModel/X86/vector_gep.ll b/test/Analysis/CostModel/X86/vector_gep.ll
index e49f258..17f70df 100644
--- a/test/Analysis/CostModel/X86/vector_gep.ll
+++ b/test/Analysis/CostModel/X86/vector_gep.ll
@@ -3,7 +3,7 @@
 %struct.S = type { [1000 x i32] }
 
 
-declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
 
 define <4 x i32> @foov(<4 x %struct.S*> %s, i64 %base){
   %temp = insertelement <4 x i64> undef, i64 %base, i32 0
@@ -12,6 +12,6 @@ define <4 x i32> @foov(<4 x %struct.S*> %s, i64 %base){
   %B = getelementptr inbounds %struct.S, <4 x %struct.S*> %s, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
 ;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds [1000 x i32]
   %arrayidx = getelementptr inbounds [1000 x i32], <4 x [1000 x i32]*> %B, <4 x i64> zeroinitializer, <4 x i64> %vector
-  %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+  %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
   ret <4 x i32> %res
 }
diff --git a/test/Assembler/auto_upgrade_intrinsics.ll b/test/Assembler/auto_upgrade_intrinsics.ll
index d00fe58..87ad371 100644
--- a/test/Assembler/auto_upgrade_intrinsics.ll
+++ b/test/Assembler/auto_upgrade_intrinsics.ll
@@ -85,6 +85,23 @@ define void @tests.masked.store(<2 x double>* %ptr, <2 x i1> %mask, <2 x double>
   ret void
 }
 
+declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
+
+define <2 x double> @tests.masked.gather(<2 x double*> %ptr, <2 x i1> %mask, <2 x double> %passthru)  {
+; CHECK-LABEL: @tests.masked.gather(
+; CHECK: @llvm.masked.gather.v2f64.v2p0f64
+  %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptr, i32 1, <2 x i1> %mask, <2 x double> %passthru)
+  ret <2 x double> %res
+}
+
+declare void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask)
+
+define void @tests.masked.scatter(<2 x double*> %ptr, <2 x i1> %mask, <2 x double> %val)  {
+; CHECK-LABEL: @tests.masked.scatter(
+; CHECK: @llvm.masked.scatter.v2f64.v2p0f64
+  call void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptr, i32 3, <2 x i1> %mask)
+  ret void
+}
 
 declare {}* @llvm.invariant.start(i64, i8* nocapture) nounwind readonly
 declare void @llvm.invariant.end({}*, i64, i8* nocapture) nounwind
diff --git a/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll b/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
new file mode 100644
index 0000000..a95e9f8
--- /dev/null
+++ b/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
@@ -0,0 +1,74 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=ASM %s
+
+; IR-LABEL: define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
+; IR: alloca [5 x i32]
+; ASM-LABEL: {{^}}promote_alloca_shaders:
+; ASM: ; LDSByteSize: 0 bytes/workgroup (compile time only)
+
+define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
+entry:
+  %stack = alloca [5 x i32], align 4
+  %tmp0 = load i32, i32 addrspace(1)* %in, align 4
+  %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0
+  store i32 4, i32* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
+  %tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1
+  store i32 5, i32* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
+  %tmp2 = load i32, i32* %arrayidx4, align 4
+  store i32 %tmp2, i32 addrspace(1)* %out, align 4
+  %arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
+  %tmp3 = load i32, i32* %arrayidx5
+  %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
+  store i32 %tmp3, i32 addrspace(1)* %arrayidx6
+  ret void
+}
+
+; OPT-LABEL: @promote_to_vector_call_c(
+; OPT-NOT: alloca
+; OPT: extractelement <2 x i32> %{{[0-9]+}}, i32 %in
+; ASM-NOT: LDSByteSize
+define void @promote_to_vector_call_c(i32 addrspace(1)* %out, i32 %in) #0 {
+entry:
+  %tmp = alloca [2 x i32]
+  %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
+  %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
+  store i32 0, i32* %tmp1
+  store i32 1, i32* %tmp2
+  %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
+  %tmp4 = load i32, i32* %tmp3
+  %tmp5 = load volatile i32, i32 addrspace(1)* undef
+  %tmp6 = add i32 %tmp4, %tmp5
+  store i32 %tmp6, i32 addrspace(1)* %out
+  ret void
+}
+
+; OPT-LABEL: @no_promote_to_lds_c(
+; OPT: alloca
+; ASM-NOT: LDSByteSize
+define void @no_promote_to_lds(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
+entry:
+  %stack = alloca [5 x i32], align 4
+  %0 = load i32, i32 addrspace(1)* %in, align 4
+  %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
+  store i32 4, i32* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
+  %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
+  store i32 5, i32* %arrayidx3, align 4
+  %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
+  %2 = load i32, i32* %arrayidx10, align 4
+  store i32 %2, i32 addrspace(1)* %out, align 4
+  %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
+  %3 = load i32, i32* %arrayidx12
+  %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
+  store i32 %3, i32 addrspace(1)* %arrayidx13
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/promote-alloca-shaders.ll b/test/CodeGen/AMDGPU/promote-alloca-shaders.ll
deleted file mode 100644
index d40fca9..0000000
--- a/test/CodeGen/AMDGPU/promote-alloca-shaders.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=ASM %s
-
-; IR-LABEL: define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
-; IR: alloca [5 x i32]
-; ASM-LABEL: {{^}}promote_alloca_shaders:
-; ASM: ; LDSByteSize: 0 bytes/workgroup (compile time only)
-
-define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
-entry:
-  %stack = alloca [5 x i32], align 4
-  %tmp0 = load i32, i32 addrspace(1)* %in, align 4
-  %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0
-  store i32 4, i32* %arrayidx1, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
-  %tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1
-  store i32 5, i32* %arrayidx3, align 4
-  %arrayidx4 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
-  %tmp2 = load i32, i32* %arrayidx4, align 4
-  store i32 %tmp2, i32 addrspace(1)* %out, align 4
-  %arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
-  %tmp3 = load i32, i32* %arrayidx5
-  %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
-  store i32 %tmp3, i32 addrspace(1)* %arrayidx6
-  ret void
-}
-
-attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" }
diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll
index 699ef6e..bef7bbe 100644
--- a/test/CodeGen/ARM/build-attributes.ll
+++ b/test/CodeGen/ARM/build-attributes.ll
@@ -199,7 +199,8 @@
 ; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
 ; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN
 ; ARMv6k
-; RUN: llc < %s -mtriple=armv6k-none-netbsd-gnueabi -mcpu=arm1176j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv6k-none-netbsd-gnueabi -mcpu=arm1176j-s 2> %t | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: FileCheck %s < %t --allow-empty --check-prefix=CPU-SUPPORTED
 ; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
 ; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN
 ; ARMv6m
@@ -222,6 +223,8 @@
 ; RUN: llc < %s -mtriple=thumbv8-none-none-eabi -mcpu=cortex-m33 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
 ; RUN: llc < %s -mtriple=thumbv8-none-none-eabi -mcpu=cortex-m33 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
 
+; CPU-SUPPORTED-NOT: is not a recognized processor for this target
+
 ; XSCALE:      .eabi_attribute 6, 5
 ; XSCALE:      .eabi_attribute 8, 1
 ; XSCALE:      .eabi_attribute 9, 1
diff --git a/test/CodeGen/ARM/load-arm.ll b/test/CodeGen/ARM/load-arm.ll
new file mode 100644
index 0000000..3807424
--- /dev/null
+++ b/test/CodeGen/ARM/load-arm.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mtriple=arm %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv7 %s -o - | FileCheck %s
+
+; We ended up feeding a deleted node back to TableGen when we converted "Off *
+; 410" into "(Off * 205) << 1", where the multiplication already existed in the
+; DAG.
+
+; CHECK-LABEL: addrmode_cse_mutation:
+; CHECK: {{mul|muls}}    [[OFFSET:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: {{ldrb|ldrb.w}} {{r[0-9]+}}, [r0, [[OFFSET]], lsl #3]
+define i32 @addrmode_cse_mutation(i8* %base, i32 %count) {
+  %offset = mul i32 %count, 277288
+  %ptr = getelementptr i8, i8* %base, i32 %offset
+  %val = load volatile i8, i8* %ptr
+  %res = mul i32 %count, 34661
+  ret i32 %res
+}
+
+; CHECK-LABEL: addrmode_cse_multi_use:
+; CHECK-NOT: {{ldrb|ldrb.w}} {{r[0-9]+}}, [{{r[0-9]+}}, {{r[0-9]+}}, lsl #3]
+define i32 @addrmode_cse_multi_use(i8* %base, i32 %count) {
+  %offset = mul i32 %count, 277288
+  %ptr = getelementptr i8, i8* %base, i32 %offset
+  %val = load volatile i8, i8* %ptr
+  %res = mul i32 %count, 34661
+  %res.1 = add i32 %res, %offset
+  ret i32 %res.1
+}
diff --git a/test/CodeGen/AVR/brind.ll b/test/CodeGen/AVR/brind.ll
index f92038d..ec8262e 100644
--- a/test/CodeGen/AVR/brind.ll
+++ b/test/CodeGen/AVR/brind.ll
@@ -4,8 +4,6 @@
 
 define i8 @brind(i8 %p) {
 ; CHECK-LABEL: brind:
-; CHECK: ld r30
-; CHECK: ldd r31
 ; CHECK: ijmp
 entry:
   %idxprom = sext i8 %p to i16
diff --git a/test/CodeGen/AVR/dynalloca.ll b/test/CodeGen/AVR/dynalloca.ll
index 13f5030..6aa776e 100644
--- a/test/CodeGen/AVR/dynalloca.ll
+++ b/test/CodeGen/AVR/dynalloca.ll
@@ -69,9 +69,9 @@ define void @dynalloca2(i16 %x) {
 ; SP restore
 ; CHECK: in r0, 63
 ; CHECK-NEXT: cli
-; CHECK-NEXT: out 62, r29
+; CHECK-NEXT: out 62, r7
 ; CHECK-NEXT: out 63, r0
-; CHECK-NEXT: out 61, r28
+; CHECK-NEXT: out 61, r6
   %vla = alloca i16, i16 %x
   call void @foo2(i16* %vla, i64 0, i64 0, i64 0)
   ret void
diff --git a/test/CodeGen/AVR/inline-asm/inline-asm.ll b/test/CodeGen/AVR/inline-asm/inline-asm.ll
index 88d0c3a..26f9080 100644
--- a/test/CodeGen/AVR/inline-asm/inline-asm.ll
+++ b/test/CodeGen/AVR/inline-asm/inline-asm.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=avr -mattr=movw -no-integrated-as | FileCheck %s
+; XFAIL: *
 
 ; CHECK-LABEL: no_operands:
 define void @no_operands() {
diff --git a/test/CodeGen/BPF/reloc.ll b/test/CodeGen/BPF/reloc.ll
new file mode 100644
index 0000000..75dbebf
--- /dev/null
+++ b/test/CodeGen/BPF/reloc.ll
@@ -0,0 +1,43 @@
+; RUN: llc -march=bpfel -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
+
+%struct.bpf_context = type { i64, i64, i64, i64, i64, i64, i64 }
+%struct.sk_buff = type { i64, i64, i64, i64, i64, i64, i64 }
+%struct.net_device = type { i64, i64, i64, i64, i64, i64, i64 }
+
+@bpf_prog1.devname = private unnamed_addr constant [3 x i8] c"lo\00", align 1
+@bpf_prog1.fmt = private unnamed_addr constant [15 x i8] c"skb %x dev %x\0A\00", align 1
+
+; Function Attrs: norecurse
+define i32 @bpf_prog1(%struct.bpf_context* nocapture %ctx) #0 section "events/net/netif_receive_skb" {
+  %devname = alloca [3 x i8], align 1
+  %fmt = alloca [15 x i8], align 1
+  %1 = getelementptr inbounds [3 x i8], [3 x i8]* %devname, i64 0, i64 0
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @bpf_prog1.devname, i64 0, i64 0), i64 3, i32 1, i1 false)
+  %2 = getelementptr inbounds %struct.bpf_context, %struct.bpf_context* %ctx, i64 0, i32 0
+  %3 = load i64, i64* %2, align 8
+  %4 = inttoptr i64 %3 to %struct.sk_buff*
+  %5 = getelementptr inbounds %struct.sk_buff, %struct.sk_buff* %4, i64 0, i32 2
+  %6 = bitcast i64* %5 to i8*
+  %7 = call i8* inttoptr (i64 4 to i8* (i8*)*)(i8* %6) #1
+  %8 = call i32 inttoptr (i64 9 to i32 (i8*, i8*, i32)*)(i8* %7, i8* %1, i32 2) #1
+  %9 = icmp eq i32 %8, 0
+  br i1 %9, label %10, label %13
+
+; <label>:10                                      ; preds = %0
+  %11 = getelementptr inbounds [15 x i8], [15 x i8]* %fmt, i64 0, i64 0
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %11, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @bpf_prog1.fmt, i64 0, i64 0), i64 15, i32 1, i1 false)
+  %12 = call i32 (i8*, i32, ...) inttoptr (i64 11 to i32 (i8*, i32, ...)*)(i8* %11, i32 15, %struct.sk_buff* %4, i8* %7) #1
+  br label %13
+
+; <label>:13                                      ; preds = %10, %0
+  ret i32 0
+
+; CHECK-RELOC: file format ELF64-BPF
+; CHECK-RELOC: RELOCATION RECORDS FOR [.rel.eh_frame]:
+; CHECK-RELOC: R_BPF_64_64 events/net/netif_receive_skb
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #1
+
+attributes #0 = { norecurse }
diff --git a/test/CodeGen/Hexagon/adjust-latency-stackST.ll b/test/CodeGen/Hexagon/adjust-latency-stackST.ll
new file mode 100644
index 0000000..915db91
--- /dev/null
+++ b/test/CodeGen/Hexagon/adjust-latency-stackST.ll
@@ -0,0 +1,81 @@
+; RUN: llc -march=hexagon -disable-post-ra < %s | FileCheck %s
+
+; Make sure that if there's only one store to the stack, it gets packetized
+; with allocframe as there's a latency of 2 cycles between allocframe and
+; the following store if not in the same packet.
+
+; CHECK: {
+; CHECK: memd(r29
+; CHECK-NOT: {
+; CHECK: allocframe
+; CHECK: }
+; CHECK: = memw(gp+#G)
+
+%struct.0 = type { %struct.0*, i32, %struct.2 }
+%struct.1 = type { i32, i32, [31 x i8] }
+%struct.2 = type { %struct.1 }
+
+@G = common global %struct.0* null, align 4
+
+define i32 @test(%struct.0* nocapture %a0) #0 {
+b1:
+  %v2 = alloca %struct.0*, align 4
+  %v3 = bitcast %struct.0** %v2 to i8*
+  %v4 = getelementptr inbounds %struct.0, %struct.0* %a0, i32 0, i32 0
+  %v5 = load %struct.0*, %struct.0** %v4, align 4
+  store %struct.0* %v5, %struct.0** %v2, align 4
+  %v6 = bitcast %struct.0* %v5 to i8*
+  %v7 = load i8*, i8** bitcast (%struct.0** @G to i8**), align 4
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %v6, i8* %v7, i32 48, i32 4, i1 false)
+  %v8 = getelementptr inbounds %struct.0, %struct.0* %a0, i32 0, i32 2, i32 0, i32 1
+  store i32 5, i32* %v8, align 4
+  %v9 = getelementptr inbounds %struct.0, %struct.0* %v5, i32 0, i32 2, i32 0, i32 1
+  store i32 5, i32* %v9, align 4
+  %v10 = bitcast %struct.0* %a0 to i32*
+  %v11 = load i32, i32* %v10, align 4
+  %v12 = bitcast %struct.0* %v5 to i32*
+  store i32 %v11, i32* %v12, align 4
+  %v13 = call i32 bitcast (i32 (...)* @f0 to i32 (%struct.0**)*)(%struct.0** nonnull %v2)
+  %v14 = load %struct.0*, %struct.0** %v2, align 4
+  %v15 = getelementptr inbounds %struct.0, %struct.0* %v14, i32 0, i32 1
+  %v16 = load i32, i32* %v15, align 4
+  %v17 = icmp eq i32 %v16, 0
+  br i1 %v17, label %b18, label %b32
+
+b18:                                              ; preds = %b1
+  %v19 = bitcast %struct.0** %v2 to i32**
+  %v20 = getelementptr inbounds %struct.0, %struct.0* %v14, i32 0, i32 2, i32 0, i32 1
+  store i32 6, i32* %v20, align 4
+  %v21 = getelementptr inbounds %struct.0, %struct.0* %a0, i32 0, i32 2, i32 0, i32 0
+  %v22 = load i32, i32* %v21, align 4
+  %v23 = getelementptr inbounds %struct.0, %struct.0* %v14, i32 0, i32 2, i32 0, i32 0
+  %v24 = call i32 bitcast (i32 (...)* @f1 to i32 (i32, i32*)*)(i32 %v22, i32* %v23)
+  %v25 = load i32*, i32** bitcast (%struct.0** @G to i32**), align 4
+  %v26 = load i32, i32* %v25, align 4
+  %v27 = load i32*, i32** %v19, align 4
+  store i32 %v26, i32* %v27, align 4
+  %v28 = load %struct.0*, %struct.0** %v2, align 4
+  %v29 = getelementptr inbounds %struct.0, %struct.0* %v28, i32 0, i32 2, i32 0, i32 1
+  %v30 = load i32, i32* %v29, align 4
+  %v31 = call i32 bitcast (i32 (...)* @f2 to i32 (i32, i32, i32*)*)(i32 %v30, i32 10, i32* %v29)
+  br label %b36
+
+b32:                                              ; preds = %b1
+  %v33 = bitcast %struct.0* %a0 to i8**
+  %v34 = load i8*, i8** %v33, align 4
+  %v35 = bitcast %struct.0* %a0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %v35, i8* %v34, i32 48, i32 4, i1 false)
+  br label %b36
+
+b36:                                              ; preds = %b32, %b18
+  ret i32 undef
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) #1
+
+declare i32 @f0(...) #0
+declare i32 @f1(...) #0
+declare i32 @f2(...) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { argmemonly nounwind }
diff --git a/test/CodeGen/Hexagon/multi-cycle.ll b/test/CodeGen/Hexagon/multi-cycle.ll
new file mode 100644
index 0000000..fc02182
--- /dev/null
+++ b/test/CodeGen/Hexagon/multi-cycle.ll
@@ -0,0 +1,103 @@
+; RUN: llc -march=hexagon -O2 < %s | FileCheck %s
+
+; CHECK: v{{[0-9]+}}.h{{ *}}={{ *}}vadd(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
+; CHECK: }
+; CHECK: {
+; CHECK: v{{[0-9]+}}{{ *}}={{ *}}valign(v{{[0-9]+}},v{{[0-9]+}},r{{[0-9]+}})
+; CHECK: }
+; CHECK: {
+; CHECK: v{{[0-9]+}}{{ *}}={{ *}}valign(v{{[0-9]+}},v{{[0-9]+}},r{{[0-9]+}})
+
+target triple = "hexagon"
+
+@ZERO = global <16 x i32> zeroinitializer, align 64
+
+define void @fred(i16* nocapture readonly %a0, i32 %a1, i32 %a2, i16* nocapture %a3) #0 {
+b4:
+  %v5 = bitcast i16* %a0 to <16 x i32>*
+  %v6 = getelementptr inbounds i16, i16* %a0, i32 %a1
+  %v7 = bitcast i16* %v6 to <16 x i32>*
+  %v8 = mul nsw i32 %a1, 2
+  %v9 = getelementptr inbounds i16, i16* %a0, i32 %v8
+  %v10 = bitcast i16* %v9 to <16 x i32>*
+  %v11 = load <16 x i32>, <16 x i32>* %v5, align 64, !tbaa !1
+  %v12 = load <16 x i32>, <16 x i32>* %v7, align 64, !tbaa !1
+  %v13 = load <16 x i32>, <16 x i32>* %v10, align 64, !tbaa !1
+  %v14 = load <16 x i32>, <16 x i32>* @ZERO, align 64, !tbaa !1
+  %v15 = tail call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %v14, <16 x i32> %v14)
+  %v16 = sdiv i32 %a2, 32
+  %v17 = icmp sgt i32 %a2, 31
+  br i1 %v17, label %b18, label %b66
+
+b18:                                              ; preds = %b4
+  %v19 = add i32 %v8, 32
+  %v20 = add i32 %a1, 32
+  %v21 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v12, <16 x i32> %v12)
+  %v22 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v11, <16 x i32> %v13)
+  %v23 = getelementptr inbounds i16, i16* %a0, i32 %v19
+  %v24 = getelementptr inbounds i16, i16* %a0, i32 %v20
+  %v25 = getelementptr inbounds i16, i16* %a0, i32 32
+  %v26 = tail call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %v11, <16 x i32> %v13)
+  %v27 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v22, <16 x i32> %v21)
+  %v28 = bitcast i16* %v23 to <16 x i32>*
+  %v29 = bitcast i16* %v24 to <16 x i32>*
+  %v30 = bitcast i16* %v25 to <16 x i32>*
+  %v31 = bitcast i16* %a3 to <16 x i32>*
+  br label %b32
+
+b32:                                              ; preds = %b32, %b18
+  %v33 = phi i32 [ 0, %b18 ], [ %v63, %b32 ]
+  %v34 = phi <16 x i32>* [ %v31, %b18 ], [ %v62, %b32 ]
+  %v35 = phi <16 x i32>* [ %v28, %b18 ], [ %v46, %b32 ]
+  %v36 = phi <16 x i32>* [ %v29, %b18 ], [ %v44, %b32 ]
+  %v37 = phi <16 x i32>* [ %v30, %b18 ], [ %v42, %b32 ]
+  %v38 = phi <16 x i32> [ %v15, %b18 ], [ %v39, %b32 ]
+  %v39 = phi <16 x i32> [ %v26, %b18 ], [ %v56, %b32 ]
+  %v40 = phi <16 x i32> [ %v27, %b18 ], [ %v51, %b32 ]
+  %v41 = phi <16 x i32> [ %v15, %b18 ], [ %v40, %b32 ]
+  %v42 = getelementptr inbounds <16 x i32>, <16 x i32>* %v37, i32 1
+  %v43 = load <16 x i32>, <16 x i32>* %v37, align 64, !tbaa !1
+  %v44 = getelementptr inbounds <16 x i32>, <16 x i32>* %v36, i32 1
+  %v45 = load <16 x i32>, <16 x i32>* %v36, align 64, !tbaa !1
+  %v46 = getelementptr inbounds <16 x i32>, <16 x i32>* %v35, i32 1
+  %v47 = load <16 x i32>, <16 x i32>* %v35, align 64, !tbaa !1
+  %v48 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v43, <16 x i32> %v47)
+  %v49 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v45, <16 x i32> %v45)
+  %v50 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %v40, <16 x i32> %v41, i32 62)
+  %v51 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v48, <16 x i32> %v49)
+  %v52 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %v51, <16 x i32> %v40, i32 2)
+  %v53 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffh(<16 x i32> %v50, <16 x i32> %v52)
+  %v54 = getelementptr inbounds <16 x i32>, <16 x i32>* %v34, i32 1
+  store <16 x i32> %v53, <16 x i32>* %v34, align 64, !tbaa !1
+  %v55 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %v39, <16 x i32> %v38, i32 62)
+  %v56 = tail call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %v43, <16 x i32> %v47)
+  %v57 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %v56, <16 x i32> %v39, i32 2)
+  %v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v39, <16 x i32> %v39)
+  %v59 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v58, <16 x i32> %v55)
+  %v60 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v59, <16 x i32> %v57)
+  %v61 = tail call <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32> %v60)
+  %v62 = getelementptr inbounds <16 x i32>, <16 x i32>* %v34, i32 2
+  store <16 x i32> %v61, <16 x i32>* %v54, align 64, !tbaa !1
+  %v63 = add nsw i32 %v33, 1
+  %v64 = icmp slt i32 %v63, %v16
+  br i1 %v64, label %b32, label %b65
+
+b65:                                              ; preds = %b32
+  br label %b66
+
+b66:                                              ; preds = %b65, %b4
+  ret void
+}
+
+declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32) #1
+declare <16 x i32> @llvm.hexagon.V6.vabsdiffh(<16 x i32>, <16 x i32>) #1
+declare <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32>) #1
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx" }
+attributes #1 = { nounwind readnone }
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/plt-rel.ll b/test/CodeGen/Hexagon/plt-rel.ll
new file mode 100644
index 0000000..1d38cf3
--- /dev/null
+++ b/test/CodeGen/Hexagon/plt-rel.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=hexagon -relocation-model=pic -mattr=+long-calls < %s | FileCheck --check-prefix=CHECK-LONG %s
+; RUN: llc -march=hexagon -relocation-model=pic < %s | FileCheck %s
+
+; CHECK-LONG: call ##_ZL13g_usr1_called@GDPLT
+; CHECK-LONG-NOT: call _ZL13g_usr1_called@GDPLT
+; CHECK: call _ZL13g_usr1_called@GDPLT
+; CHECK-NOT: call ##_ZL13g_usr1_called@GDPLT
+
+
+target triple = "hexagon"
+
+@_ZL13g_usr1_called = internal thread_local global i32 0, align 4
+
+; Function Attrs: norecurse nounwind
+define void @_Z14SigUsr1Handleri(i32) local_unnamed_addr #0 {
+entry:
+  store volatile i32 1, i32* @_ZL13g_usr1_called, align 4
+  ret void
+}
+
+; Function Attrs: norecurse nounwind
+define zeroext i1 @_Z27CheckForMonitorCancellationv() local_unnamed_addr #0 {
+entry:
+  %0 = load volatile i32, i32* @_ZL13g_usr1_called, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  store volatile i32 0, i32* @_ZL13g_usr1_called, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %.sink = phi i1 [ true, %if.then ], [ false, %entry ]
+  ret i1 %.sink
+}
+
+attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx" }
diff --git a/test/CodeGen/PowerPC/shift_mask.ll b/test/CodeGen/PowerPC/shift_mask.ll
index 91226a3..e9ca9b0 100644
--- a/test/CodeGen/PowerPC/shift_mask.ll
+++ b/test/CodeGen/PowerPC/shift_mask.ll
@@ -49,8 +49,6 @@ define i64 @test003(i64 %a, i64 %b) {
 define <16 x i8> @test010(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: test010:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltisb 4, 7
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vslb 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -61,8 +59,6 @@ define <16 x i8> @test010(<16 x i8> %a, <16 x i8> %b) {
 define <8 x i16> @test011(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test011:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltish 4, 15
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vslh 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@@ -73,10 +69,6 @@ define <8 x i16> @test011(<8 x i16> %a, <8 x i16> %b) {
 define <4 x i32> @test012(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test012:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltisw 4, -16
-; CHECK-NEXT:    vspltisw 5, 15
-; CHECK-NEXT:    vsubuwm 4, 5, 4
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vslw 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@@ -87,11 +79,6 @@ define <4 x i32> @test012(<4 x i32> %a, <4 x i32> %b) {
 define <2 x i64> @test013(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test013:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI7_0@toc@l
-; CHECK-NEXT:    lxvd2x 0, 0, 3
-; CHECK-NEXT:    xxswapd 36, 0
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsld 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <2 x i64> %b, <i64 63, i64 63>
@@ -148,8 +135,6 @@ define i64 @test103(i64 %a, i64 %b) {
 define <16 x i8> @test110(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: test110:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltisb 4, 7
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsrb 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -160,8 +145,6 @@ define <16 x i8> @test110(<16 x i8> %a, <16 x i8> %b) {
 define <8 x i16> @test111(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test111:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltish 4, 15
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsrh 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@@ -172,10 +155,6 @@ define <8 x i16> @test111(<8 x i16> %a, <8 x i16> %b) {
 define <4 x i32> @test112(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test112:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltisw 4, -16
-; CHECK-NEXT:    vspltisw 5, 15
-; CHECK-NEXT:    vsubuwm 4, 5, 4
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsrw 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@@ -186,11 +165,6 @@ define <4 x i32> @test112(<4 x i32> %a, <4 x i32> %b) {
 define <2 x i64> @test113(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test113:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI15_0@toc@l
-; CHECK-NEXT:    lxvd2x 0, 0, 3
-; CHECK-NEXT:    xxswapd 36, 0
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsrd 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <2 x i64> %b, <i64 63, i64 63>
@@ -247,8 +221,6 @@ define i64 @test203(i64 %a, i64 %b) {
 define <16 x i8> @test210(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: test210:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltisb 4, 7
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsrab 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -259,8 +231,6 @@ define <16 x i8> @test210(<16 x i8> %a, <16 x i8> %b) {
 define <8 x i16> @test211(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test211:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltish 4, 15
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsrah 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@@ -271,10 +241,6 @@ define <8 x i16> @test211(<8 x i16> %a, <8 x i16> %b) {
 define <4 x i32> @test212(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test212:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltisw 4, -16
-; CHECK-NEXT:    vspltisw 5, 15
-; CHECK-NEXT:    vsubuwm 4, 5, 4
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsraw 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@@ -285,11 +251,6 @@ define <4 x i32> @test212(<4 x i32> %a, <4 x i32> %b) {
 define <2 x i64> @test213(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test213:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    addis 3, 2, .LCPI23_0@toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI23_0@toc@l
-; CHECK-NEXT:    lxvd2x 0, 0, 3
-; CHECK-NEXT:    xxswapd 36, 0
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsrad 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <2 x i64> %b, <i64 63, i64 63>
diff --git a/test/CodeGen/X86/addcarry.ll b/test/CodeGen/X86/addcarry.ll
index 6fc07cd..5e95cd8 100644
--- a/test/CodeGen/X86/addcarry.ll
+++ b/test/CodeGen/X86/addcarry.ll
@@ -190,9 +190,9 @@ entry:
 define i64 @shiftadd(i64 %a, i64 %b, i64 %c, i64 %d) {
 ; CHECK-LABEL: shiftadd:
 ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    leaq (%rdx,%rcx), %rax
 ; CHECK-NEXT:    addq %rsi, %rdi
-; CHECK-NEXT:    adcq $0, %rax
+; CHECK-NEXT:    adcq %rcx, %rdx
+; CHECK-NEXT:    movq %rdx, %rax
 ; CHECK-NEXT:    retq
 entry:
   %0 = zext i64 %a to i128
diff --git a/test/CodeGen/X86/lwp-intrinsics-x86_64.ll b/test/CodeGen/X86/lwp-intrinsics-x86_64.ll
new file mode 100644
index 0000000..9ee9526
--- /dev/null
+++ b/test/CodeGen/X86/lwp-intrinsics-x86_64.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+lwp | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver1 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver3 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver4 | FileCheck %s --check-prefix=X64
+
+define i8 @test_lwpins64_rri(i64 %a0, i32 %a1) nounwind {
+; X64-LABEL: test_lwpins64_rri:
+; X64:       # BB#0:
+; X64-NEXT:    lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF
+; X64-NEXT:    setb %al
+; X64-NEXT:    retq
+  %1 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 2309737967)
+  ret i8 %1
+}
+
+define i8 @test_lwpins64_rmi(i64 %a0, i32 *%p1) nounwind {
+; X64-LABEL: test_lwpins64_rmi:
+; X64:       # BB#0:
+; X64-NEXT:    lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210
+; X64-NEXT:    setb %al
+; X64-NEXT:    retq
+  %a1 = load i32, i32 *%p1
+  %1 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 1985229328)
+  ret i8 %1
+}
+
+define void @test_lwpval64_rri(i64 %a0, i32 %a1) nounwind {
+; X64-LABEL: test_lwpval64_rri:
+; X64:       # BB#0:
+; X64-NEXT:    lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98
+; X64-NEXT:    retq
+  tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 4275878552)
+  ret void
+}
+
+define void @test_lwpval64_rmi(i64 %a0, i32 *%p1) nounwind {
+; X64-LABEL: test_lwpval64_rmi:
+; X64:       # BB#0:
+; X64-NEXT:    lwpval $305419896, (%rsi), %rdi # imm = 0x12345678
+; X64-NEXT:    retq
+  %a1 = load i32, i32 *%p1
+  tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 305419896)
+  ret void
+}
+
+declare i8 @llvm.x86.lwpins64(i64, i32, i32) nounwind
+declare void @llvm.x86.lwpval64(i64, i32, i32) nounwind
diff --git a/test/CodeGen/X86/lwp-intrinsics.ll b/test/CodeGen/X86/lwp-intrinsics.ll
new file mode 100644
index 0000000..c949bc8
--- /dev/null
+++ b/test/CodeGen/X86/lwp-intrinsics.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+lwp | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver1 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver2 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver3 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver4 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+lwp | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver1 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver3 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver4 | FileCheck %s --check-prefix=X64
+
+define void @test_llwpcb(i8 *%a0) nounwind {
+; X86-LABEL: test_llwpcb:
+; X86:       # BB#0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    llwpcb %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_llwpcb:
+; X64:       # BB#0:
+; X64-NEXT:    llwpcb %rdi
+; X64-NEXT:    retq
+  tail call void @llvm.x86.llwpcb(i8 *%a0)
+  ret void
+}
+
+define i8* @test_slwpcb(i8 *%a0) nounwind {
+; X86-LABEL: test_slwpcb:
+; X86:       # BB#0:
+; X86-NEXT:    slwpcb %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_slwpcb:
+; X64:       # BB#0:
+; X64-NEXT:    slwpcb %rax
+; X64-NEXT:    retq
+  %1 = tail call i8* @llvm.x86.slwpcb()
+  ret i8 *%1
+}
+
+define i8 @test_lwpins32_rri(i32 %a0, i32 %a1) nounwind {
+; X86-LABEL: test_lwpins32_rri:
+; X86:       # BB#0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    addl %ecx, %ecx
+; X86-NEXT:    lwpins $-1985229329, %ecx, %eax # imm = 0x89ABCDEF
+; X86-NEXT:    setb %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_lwpins32_rri:
+; X64:       # BB#0:
+; X64-NEXT:    addl %esi, %esi
+; X64-NEXT:    lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF
+; X64-NEXT:    setb %al
+; X64-NEXT:    retq
+  %1 = add i32 %a1, %a1
+  %2 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %1, i32 2309737967)
+  ret i8 %2
+}
+
+define i8 @test_lwpins32_rmi(i32 %a0, i32 *%p1) nounwind {
+; X86-LABEL: test_lwpins32_rmi:
+; X86:       # BB#0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    lwpins $1985229328, (%eax), %ecx # imm = 0x76543210
+; X86-NEXT:    setb %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_lwpins32_rmi:
+; X64:       # BB#0:
+; X64-NEXT:    lwpins $1985229328, (%rsi), %edi # imm = 0x76543210
+; X64-NEXT:    setb %al
+; X64-NEXT:    retq
+  %a1 = load i32, i32 *%p1
+  %1 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %a1, i32 1985229328)
+  ret i8 %1
+}
+
+define void @test_lwpval32_rri(i32 %a0, i32 %a1) nounwind {
+; X86-LABEL: test_lwpval32_rri:
+; X86:       # BB#0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    addl %ecx, %ecx
+; X86-NEXT:    lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_lwpval32_rri:
+; X64:       # BB#0:
+; X64-NEXT:    addl %esi, %esi
+; X64-NEXT:    lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98
+; X64-NEXT:    retq
+  %1 = add i32 %a1, %a1
+  tail call void @llvm.x86.lwpval32(i32 %a0, i32 %1, i32 4275878552)
+  ret void
+}
+
+define void @test_lwpval32_rmi(i32 %a0, i32 *%p1) nounwind {
+; X86-LABEL: test_lwpval32_rmi:
+; X86:       # BB#0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    lwpval $305419896, (%eax), %ecx # imm = 0x12345678
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_lwpval32_rmi:
+; X64:       # BB#0:
+; X64-NEXT:    lwpval $305419896, (%rsi), %edi # imm = 0x12345678
+; X64-NEXT:    retq
+  %a1 = load i32, i32 *%p1
+  tail call void @llvm.x86.lwpval32(i32 %a0, i32 %a1, i32 305419896)
+  ret void
+}
+
+declare void @llvm.x86.llwpcb(i8*) nounwind
+declare i8* @llvm.x86.slwpcb() nounwind
+declare i8 @llvm.x86.lwpins32(i32, i32, i32) nounwind
+declare void @llvm.x86.lwpval32(i32, i32, i32) nounwind
diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll
index 1a15cab..29a662f 100644
--- a/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/test/CodeGen/X86/masked_gather_scatter.ll
@@ -54,13 +54,13 @@ define <16 x float> @test1(float* %base, <16 x i32> %ind) {
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
   ret <16 x float>%res
 }
 
-declare <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
-declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
-declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> , i32, <8 x i1> , <8 x i32> )
+declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
+declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
+declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> , i32, <8 x i1> , <8 x i32> )
 
 
 ; SCALAR-LABEL: test2
@@ -111,7 +111,7 @@ define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) {
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
   %imask = bitcast i16 %mask to <16 x i1>
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> %imask, <16 x float>undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> %imask, <16 x float>undef)
   ret <16 x float> %res
 }
 
@@ -152,7 +152,7 @@ define <16 x i32> @test3(i32* %base, <16 x i32> %ind, i16 %mask) {
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i64> %sext_ind
   %imask = bitcast i16 %mask to <16 x i1>
-  %res = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
+  %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
   ret <16 x i32> %res
 }
 
@@ -205,8 +205,8 @@ define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) {
 
   %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
   %imask = bitcast i16 %mask to <16 x i1>
-  %gt1 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
-  %gt2 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
+  %gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
+  %gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
   %res = add <16 x i32> %gt1, %gt2
   ret <16 x i32> %res
 }
@@ -270,13 +270,13 @@ define void @test5(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
 
   %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
   %imask = bitcast i16 %mask to <16 x i1>
-  call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
-  call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+  call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+  call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
   ret void
 }
 
-declare void @llvm.masked.scatter.v8i32(<8 x i32> , <8 x i32*> , i32 , <8 x i1> )
-declare void @llvm.masked.scatter.v16i32(<16 x i32> , <16 x i32*> , i32 , <16 x i1> )
+declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> , <8 x i32*> , i32 , <8 x i1> )
+declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> , <16 x i32*> , i32 , <16 x i1> )
 
 
 ; SCALAR-LABEL: test6
@@ -326,9 +326,9 @@ define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) {
 ; SKX_32-NEXT:    vmovdqa %ymm2, %ymm0
 ; SKX_32-NEXT:    retl
 
-  %a = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
+  %a = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
 
-  call void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
   ret <8 x i32>%a
 }
 
@@ -384,8 +384,8 @@ define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) {
 
   %gep.random = getelementptr i32, <8 x i32*> %broadcast.splat, <8 x i32> %ind
   %imask = bitcast i8 %mask to <8 x i1>
-  %gt1 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>undef)
-  %gt2 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>%gt1)
+  %gt1 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>undef)
+  %gt2 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>%gt1)
   %res = add <8 x i32> %gt1, %gt2
   ret <8 x i32> %res
 }
@@ -444,8 +444,8 @@ define <16 x i32> @test8(<16 x i32*> %ptr.random, <16 x i32> %ind, i16 %mask) {
 ; SKX_32-NEXT:    retl
 
   %imask = bitcast i16 %mask to <16 x i1>
-  %gt1 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
-  %gt2 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
+  %gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
+  %gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
   %res = add <16 x i32> %gt1, %gt2
   ret <16 x i32> %res
 }
@@ -522,7 +522,7 @@ entry:
   %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
 
   %arrayidx = getelementptr  %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %ind1, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>, <8 x i32><i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <8 x i32> %ind5, <8 x i64> <i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13>
-  %res = call <8 x i32 >  @llvm.masked.gather.v8i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
+  %res = call <8 x i32 >  @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
   ret <8 x i32> %res
 }
 
@@ -591,7 +591,7 @@ entry:
   %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
 
   %arrayidx = getelementptr  %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %i1, i32 2, i32 1, <8 x i32> %ind5, i64 13
-  %res = call <8 x i32 >  @llvm.masked.gather.v8i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
+  %res = call <8 x i32 >  @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
   ret <8 x i32> %res
 }
 
@@ -632,7 +632,7 @@ define <16 x float> @test11(float* %base, i32 %ind) {
 
   %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
   ret <16 x float>%res
 }
 
@@ -671,7 +671,7 @@ define <16 x float> @test12(float* %base, <16 x i32> %ind) {
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
   ret <16 x float>%res
 }
 
@@ -710,7 +710,7 @@ define <16 x float> @test13(float* %base, <16 x i32> %ind) {
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
   ret <16 x float>%res
 }
 
@@ -772,13 +772,13 @@ define <16 x float> @test14(float* %base, i32 %ind, <16 x float*> %vec) {
 
   %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
   ret <16 x float>%res
 }
 
-declare <4 x float> @llvm.masked.gather.v4f32(<4 x float*>, i32, <4 x i1>, <4 x float>)
-declare <4 x double> @llvm.masked.gather.v4f64(<4 x double*>, i32, <4 x i1>, <4 x double>)
-declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*>, i32, <2 x i1>, <2 x double>)
+declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>)
+declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*>, i32, <4 x i1>, <4 x double>)
+declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>)
 
 ; Gather smaller than existing instruction
 define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) {
@@ -831,7 +831,7 @@ define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) {
 
   %sext_ind = sext <4 x i32> %ind to <4 x i64>
   %gep.random = getelementptr float, float* %base, <4 x i64> %sext_ind
-  %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.random, i32 4, <4 x i1> %mask, <4 x float> undef)
+  %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.random, i32 4, <4 x i1> %mask, <4 x float> undef)
   ret <4 x float>%res
 }
 
@@ -890,7 +890,7 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x 
 
   %sext_ind = sext <4 x i32> %ind to <4 x i64>
   %gep.random = getelementptr double, double* %base, <4 x i64> %sext_ind
-  %res = call <4 x double> @llvm.masked.gather.v4f64(<4 x double*> %gep.random, i32 4, <4 x i1> %mask, <4 x double> %src0)
+  %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %gep.random, i32 4, <4 x i1> %mask, <4 x double> %src0)
   ret <4 x double>%res
 }
 
@@ -942,15 +942,15 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x 
 
   %sext_ind = sext <2 x i32> %ind to <2 x i64>
   %gep.random = getelementptr double, double* %base, <2 x i64> %sext_ind
-  %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %gep.random, i32 4, <2 x i1> %mask, <2 x double> %src0)
+  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %gep.random, i32 4, <2 x i1> %mask, <2 x double> %src0)
   ret <2 x double>%res
 }
 
-declare void @llvm.masked.scatter.v4i32(<4 x i32> , <4 x i32*> , i32 , <4 x i1> )
-declare void @llvm.masked.scatter.v4f64(<4 x double> , <4 x double*> , i32 , <4 x i1> )
-declare void @llvm.masked.scatter.v2i64(<2 x i64> , <2 x i64*> , i32 , <2 x i1> )
-declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
-declare void @llvm.masked.scatter.v2f32(<2 x float> , <2 x float*> , i32 , <2 x i1> )
+declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> , <4 x i32*> , i32 , <4 x i1> )
+declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> , <4 x double*> , i32 , <4 x i1> )
+declare void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> , <2 x i64*> , i32 , <2 x i1> )
+declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
+declare void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> , <2 x float*> , i32 , <2 x i1> )
 
 define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
 ;
@@ -995,7 +995,7 @@ define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
 ; SKX_32-NEXT:    vptestmd %xmm2, %xmm2, %k1
 ; SKX_32-NEXT:    vpscatterdd %xmm0, (,%xmm1) {%k1}
 ; SKX_32-NEXT:    retl
-  call void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+  call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
   ret void
 }
 
@@ -1049,7 +1049,7 @@ define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
   %gep = getelementptr double, double* %ptr, <4 x i64> %ind
-  call void @llvm.masked.scatter.v4f64(<4 x double> %a1, <4 x double*> %gep, i32 8, <4 x i1> %mask)
+  call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %a1, <4 x double*> %gep, i32 8, <4 x i1> %mask)
   ret void
 }
 
@@ -1103,7 +1103,7 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
 ; SKX_32-NEXT:    kshiftrb $6, %k0, %k1
 ; SKX_32-NEXT:    vscatterdps %xmm0, (,%xmm1) {%k1}
 ; SKX_32-NEXT:    retl
-  call void @llvm.masked.scatter.v2f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask)
+  call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask)
   ret void
 }
 
@@ -1157,12 +1157,12 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
 ; SKX_32-NEXT:    vpscatterqd %xmm0, (,%ymm1) {%k1}
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
-  call void @llvm.masked.scatter.v2i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask)
+  call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask)
   ret void
 }
 
 ; The result type requires widening
-declare <2 x float> @llvm.masked.gather.v2f32(<2 x float*>, i32, <2 x i1>, <2 x float>)
+declare <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*>, i32, <2 x i1>, <2 x float>)
 
 define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x float> %src0) {
 ;
@@ -1222,12 +1222,12 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl
 ; SKX_32-NEXT:    retl
   %sext_ind = sext <2 x i32> %ind to <2 x i64>
   %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
-  %res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0)
+  %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0)
   ret <2 x float>%res
 }
 
-declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
-declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)
+declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
+declare <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)
 
 define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %src0) {
 ;
@@ -1276,7 +1276,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
 ; SKX_32-NEXT:    retl
   %sext_ind = sext <2 x i32> %ind to <2 x i64>
   %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
-  %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0)
+  %res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0)
   ret <2 x i32>%res
 }
 
@@ -1320,7 +1320,7 @@ define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
 ; SKX_32-NEXT:    retl
   %sext_ind = sext <2 x i32> %ind to <2 x i64>
   %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
-  %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
+  %res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
   ret <2 x i32>%res
 }
 
@@ -1371,7 +1371,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %
 ; SKX_32-NEXT:    retl
   %sext_ind = sext <2 x i32> %ind to <2 x i64>
   %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind
-  %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %gep.random, i32 8, <2 x i1> %mask, <2 x i64> %src0)
+  %res = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep.random, i32 8, <2 x i1> %mask, <2 x i64> %src0)
   ret <2 x i64>%res
 }
 
@@ -1418,7 +1418,7 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
 ; SKX_32-NEXT:    retl
   %sext_ind = sext <2 x i32> %ind to <2 x i64>
   %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind
-  %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %gep.random, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %src0)
+  %res = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep.random, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %src0)
   ret <2 x i64>%res
 }
 
@@ -1466,7 +1466,7 @@ define <2 x float> @test27(float* %base, <2 x i32> %ind) {
 ; SKX_32-NEXT:    retl
   %sext_ind = sext <2 x i32> %ind to <2 x i64>
   %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
-  %res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x float> undef)
+  %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x float> undef)
   ret <2 x float>%res
 }
 
@@ -1515,7 +1515,7 @@ define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
 ; SKX_32-NEXT:    vpscatterqd %xmm0, (,%ymm1) {%k1}
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
-  call void @llvm.masked.scatter.v2i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> <i1 true, i1 true>)
+  call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> <i1 true, i1 true>)
   ret void
 }
 
@@ -1568,23 +1568,23 @@ define <16 x float> @test29(float* %base, <16 x i32> %ind) {
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x float> undef)
   ret <16 x float>%res
 }
 
 ; Check non-power-of-2 case. It should be scalarized.
-declare <3 x i32> @llvm.masked.gather.v3i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>)
+declare <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>)
 define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) {
 ; ALL-LABEL: test30:
 ; ALL-NOT:       gather
 
   %sext_ind = sext <3 x i32> %ind to <3 x i64>
   %gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind
-  %res = call <3 x i32> @llvm.masked.gather.v3i32(<3 x i32*> %gep.random, i32 4, <3 x i1> %mask, <3 x i32> %src0)
+  %res = call <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*> %gep.random, i32 4, <3 x i1> %mask, <3 x i32> %src0)
   ret <3 x i32>%res
 }
 
-declare <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>)
+declare <16 x float*> @llvm.masked.gather.v16p0f32.v16p0p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>)
 
 ; KNL-LABEL: test31
 ; KNL: vpgatherqq
@@ -1626,7 +1626,7 @@ define <16 x float*> @test31(<16 x float**> %ptrs) {
 ; SKX_32-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; SKX_32-NEXT:    retl
 
-  %res = call <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float*> undef)
+  %res = call <16 x float*> @llvm.masked.gather.v16p0f32.v16p0p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float*> undef)
   ret <16 x float*>%res
 }
 
@@ -1672,7 +1672,7 @@ define <16 x i32> @test_gather_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i
 ; SKX_32-NEXT:    vpgatherdd (,%zmm0), %zmm2 {%k1}
 ; SKX_32-NEXT:    vmovdqa64 %zmm2, %zmm0
 ; SKX_32-NEXT:    retl
-  %res = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <16 x i32> %src0)
+  %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <16 x i32> %src0)
   ret <16 x i32> %res
 }
 define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0)  {
@@ -1749,10 +1749,10 @@ define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i
 ; SKX_32-NEXT:    movl %ebp, %esp
 ; SKX_32-NEXT:    popl %ebp
 ; SKX_32-NEXT:    retl
-  %res = call <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
+  %res = call <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
   ret <16 x i64> %res
 }
-declare <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*> %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0)
+declare <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0)
 define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0)  {
 ; KNL_64-LABEL: test_gather_16f32:
 ; KNL_64:       # BB#0:
@@ -1795,7 +1795,7 @@ define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16
 ; SKX_32-NEXT:    vgatherdps (,%zmm0), %zmm2 {%k1}
 ; SKX_32-NEXT:    vmovaps %zmm2, %zmm0
 ; SKX_32-NEXT:    retl
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0)
   ret <16 x float> %res
 }
 define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0)  {
@@ -1872,10 +1872,10 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <
 ; SKX_32-NEXT:    movl %ebp, %esp
 ; SKX_32-NEXT:    popl %ebp
 ; SKX_32-NEXT:    retl
-  %res = call <16 x double> @llvm.masked.gather.v16f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
+  %res = call <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
   ret <16 x double> %res
 }
-declare <16 x double> @llvm.masked.gather.v16f64(<16 x double*> %ptrs, i32, <16 x i1> %mask, <16 x double> %src0)
+declare <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32, <16 x i1> %mask, <16 x double> %src0)
 define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0)  {
 ; KNL_64-LABEL: test_scatter_16i32:
 ; KNL_64:       # BB#0:
@@ -1918,7 +1918,7 @@ define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %
 ; SKX_32-NEXT:    vpscatterdd %zmm2, (,%zmm0) {%k1}
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
-  call void @llvm.masked.scatter.v16i32(<16 x i32> %src0, <16 x i32*> %ptrs, i32 4, <16 x i1> %mask)
+  call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %src0, <16 x i32*> %ptrs, i32 4, <16 x i1> %mask)
   ret void
 }
 define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0)  {
@@ -1993,10 +1993,10 @@ define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %
 ; SKX_32-NEXT:    popl %ebp
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
-  call void @llvm.masked.scatter.v16i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask)
+  call void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.scatter.v16i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32, <16 x i1> %mask)
+declare void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32, <16 x i1> %mask)
 define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0)  {
 ; KNL_64-LABEL: test_scatter_16f32:
 ; KNL_64:       # BB#0:
@@ -2039,10 +2039,10 @@ define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x floa
 ; SKX_32-NEXT:    vscatterdps %zmm2, (,%zmm0) {%k1}
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
-  call void @llvm.masked.scatter.v16f32(<16 x float> %src0, <16 x float*> %ptrs, i32 4, <16 x i1> %mask)
+  call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %src0, <16 x float*> %ptrs, i32 4, <16 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.scatter.v16f32(<16 x float> %src0, <16 x float*> %ptrs, i32, <16 x i1> %mask)
+declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %src0, <16 x float*> %ptrs, i32, <16 x i1> %mask)
 define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0)  {
 ; KNL_64-LABEL: test_scatter_16f64:
 ; KNL_64:       # BB#0:
@@ -2115,10 +2115,10 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou
 ; SKX_32-NEXT:    popl %ebp
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
-  call void @llvm.masked.scatter.v16f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask)
+  call void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.scatter.v16f64(<16 x double> %src0, <16 x double*> %ptrs, i32, <16 x i1> %mask)
+declare void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32, <16 x i1> %mask)
 
 define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i64> %d) {
 ; KNL_64-LABEL: test_pr28312:
@@ -2193,11 +2193,11 @@ define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i6
 ; SKX_32-NEXT:    movl %ebp, %esp
 ; SKX_32-NEXT:    popl %ebp
 ; SKX_32-NEXT:    retl
-  %g1 = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
-  %g2 = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
-  %g3 = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
+  %g1 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
+  %g2 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
+  %g3 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
   %a = add <4 x i64> %g1, %g2
   %b = add <4 x i64> %a, %g3
   ret <4 x i64> %b
 }
-declare <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>)
+declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>)
diff --git a/test/CodeGen/X86/stack-folding-lwp.ll b/test/CodeGen/X86/stack-folding-lwp.ll
new file mode 100644
index 0000000..edf2798
--- /dev/null
+++ b/test/CodeGen/X86/stack-folding-lwp.ll
@@ -0,0 +1,49 @@
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+lwp < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; Stack reload folding tests.
+;
+; By including a nop call with sideeffects we can force a partial register spill of the
+; relevant registers and check that the reload is correctly folded into the instruction.
+
+define i8 @stack_fold_lwpins_u32(i32 %a0, i32 %a1) {
+; CHECK-LABEL: stack_fold_lwpins_u32
+; CHECK:       # BB#0:
+; CHECK:       lwpins $2814, {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
+  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+  %2 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %a1, i32 2814)
+  ret i8 %2
+}
+declare i8 @llvm.x86.lwpins32(i32, i32, i32)
+
+define i8 @stack_fold_lwpins_u64(i64 %a0, i32 %a1) {
+; CHECK-LABEL: stack_fold_lwpins_u64
+; CHECK:       # BB#0:
+; CHECK:       lwpins $2814, {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload
+  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+  %2 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 2814)
+  ret i8 %2
+}
+declare i8 @llvm.x86.lwpins64(i64, i32, i32)
+
+define void @stack_fold_lwpval_u32(i32 %a0, i32 %a1) {
+; CHECK-LABEL: stack_fold_lwpval_u32
+; CHECK:       # BB#0:
+; CHECK:       lwpval $2814, {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
+  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+  tail call void @llvm.x86.lwpval32(i32 %a0, i32 %a1, i32 2814)
+  ret void
+}
+declare void @llvm.x86.lwpval32(i32, i32, i32)
+
+define void @stack_fold_lwpval_u64(i64 %a0, i32 %a1) {
+; CHECK-LABEL: stack_fold_lwpval_u64
+; CHECK:       # BB#0:
+; CHECK:       lwpval $2814, {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload
+  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+  tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 2814)
+  ret void
+}
+declare void @llvm.x86.lwpval64(i64, i32, i32)
diff --git a/test/CodeGen/X86/version_directive.ll b/test/CodeGen/X86/version_directive.ll
index 8e4e6dc..ac5eda7 100644
--- a/test/CodeGen/X86/version_directive.ll
+++ b/test/CodeGen/X86/version_directive.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple x86_64-apple-darwin15.0.0 -o - /dev/null | FileCheck %s
 ; RUN: llc -mtriple x86_64-apple-macosx10.11.0 -o - /dev/null | FileCheck %s
+; RUN: llc -mtriple x86_64-apple-macos10.11.0 -o - /dev/null | FileCheck %s
 
 ; CHECK: .macosx_version_min 10, 11
diff --git a/test/CodeGen/X86/x86-32-intrcc.ll b/test/CodeGen/X86/x86-32-intrcc.ll
index 9794f2c..ac0e7e1 100644
--- a/test/CodeGen/X86/x86-32-intrcc.ll
+++ b/test/CodeGen/X86/x86-32-intrcc.ll
@@ -57,23 +57,23 @@ define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i32 %eco
 define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i32 %ecode) {
   call void asm sideeffect "", "~{eax},~{ebx},~{ebp}"()
   ; CHECK-LABEL: test_isr_clobbers
-  ; CHECK-SSE-NEXT: pushl %ebp
-  ; CHECK-SSE-NEXT: pushl %ebx
-  ; CHECK-SSE-NEXT; pushl %eax
-  ; CHECK-SSE-NEXT: popl %eax
-  ; CHECK-SSE-NEXT: popl %ebx
-  ; CHECK-SSE-NEXT: popl %ebp
-  ; CHECK-SSE-NEXT: addl $4, %esp
-  ; CHECK-SSE-NEXT: iretl
+  ; CHECK: pushl %ebp
+  ; CHECK: pushl %ebx
+  ; CHECK: pushl %eax
+  ; CHECK: popl %eax
+  ; CHECK: popl %ebx
+  ; CHECK: popl %ebp
+  ; CHECK: addl $4, %esp
+  ; CHECK: iretl
   ; CHECK0-LABEL: test_isr_clobbers
-  ; CHECK0-SSE-NEXT: pushl %ebp
-  ; CHECK0-SSE-NEXT: pushl %ebx
-  ; CHECK0-SSE-NEXT; pushl %eax
-  ; CHECK0-SSE-NEXT: popl %eax
-  ; CHECK0-SSE-NEXT: popl %ebx
-  ; CHECK0-SSE-NEXT: popl %ebp
-  ; CHECK0-SSE-NEXT: addl $4, %esp
-  ; CHECK0-SSE-NEXT: iretl
+  ; CHECK0: pushl %ebp
+  ; CHECK0: pushl %ebx
+  ; CHECK0: pushl %eax
+  ; CHECK0: popl %eax
+  ; CHECK0: popl %ebx
+  ; CHECK0: popl %ebp
+  ; CHECK0: addl $4, %esp
+  ; CHECK0: iretl
   ret void
 }
 
diff --git a/test/CodeGen/X86/x86-64-intrcc.ll b/test/CodeGen/X86/x86-64-intrcc.ll
index c8bc9e7..75ca1af 100644
--- a/test/CodeGen/X86/x86-64-intrcc.ll
+++ b/test/CodeGen/X86/x86-64-intrcc.ll
@@ -59,32 +59,33 @@ define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i64 %eco
 define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i64 %ecode) {
   call void asm sideeffect "", "~{rax},~{rbx},~{rbp},~{r11},~{xmm0}"()
   ; CHECK-LABEL: test_isr_clobbers
-  ; CHECK-SSE-NEXT: pushq %rax
-  ; CHECK-SSE-NEXT: pushq %rax
-  ; CHECK-SSE-NEXT; pushq %r11
-  ; CHECK-SSE-NEXT: pushq %rbp
-  ; CHECK-SSE-NEXT: pushq %rbx
-  ; CHECK-SSE-NEXT: movaps %xmm0
-  ; CHECK-SSE-NEXT: movaps %xmm0
-  ; CHECK-SSE-NEXT: popq %rbx
-  ; CHECK-SSE-NEXT: popq %rbp
-  ; CHECK-SSE-NEXT: popq %r11
-  ; CHECK-SSE-NEXT: popq %rax
-  ; CHECK-SSE-NEXT: addq $8, %rsp
-  ; CHECK-SSE-NEXT: iretq
+
+  ; CHECK: pushq %rax
+  ; CHECK: pushq %rbp
+  ; CHECK: pushq %r11
+  ; CHECK: pushq %rbx
+  ; CHECK: movaps %xmm0
+  ; CHECK: movaps {{.*}}, %xmm0
+  ; CHECK: popq %rbx
+  ; CHECK: popq %r11
+  ; CHECK: popq %rbp
+  ; CHECK: popq %rax
+  ; CHECK: addq $16, %rsp
+  ; CHECK: iretq
   ; CHECK0-LABEL: test_isr_clobbers
-  ; CHECK0-SSE-NEXT: pushq %rax
-  ; CHECK0-SSE-NEXT; pushq %r11
-  ; CHECK0-SSE-NEXT: pushq %rbp
-  ; CHECK0-SSE-NEXT: pushq %rbx
-  ; CHECK0-SSE-NEXT: movaps %xmm0
-  ; CHECK0-SSE-NEXT: movaps %xmm0
-  ; CHECK0-SSE-NEXT: popq %rbx
-  ; CHECK0-SSE-NEXT: popq %rbp
-  ; CHECK0-SSE-NEXT: popq %r11
-  ; CHECK0-SSE-NEXT: popq %rax
-  ; CHECK0-SSE-NEXT: addq $16, %rsp
-  ; CHECK0-SSE-NEXT: iretq
+
+  ; CHECK0: pushq %rax
+  ; CHECK0: pushq %rbp
+  ; CHECK0: pushq %r11
+  ; CHECK0: pushq %rbx
+  ; CHECK0: movaps %xmm0
+  ; CHECK0: movaps {{.*}}, %xmm0
+  ; CHECK0: popq %rbx
+  ; CHECK0: popq %r11
+  ; CHECK0: popq %rbp
+  ; CHECK0: popq %rax
+  ; CHECK0: addq $16, %rsp
+  ; CHECK0: iretq
   ret void
 }
 
diff --git a/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll b/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll
new file mode 100644
index 0000000..7e370c2
--- /dev/null
+++ b/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py for function "bar"
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+
+;; In functions with 'no_caller_saved_registers' attribute, all registers should
+;; be preserved except for registers used for passing/returning arguments.
+;; In the following function registers %RDI, %RSI and %XMM0 are used to store
+;; arguments %a0, %a1 and %b0 accordingally. The value is returned in %RAX.
+;; The above registers should not be preserved, however other registers
+;; (that are modified by the function) should be preserved (%RDX and %XMM1).
+define x86_64_sysvcc i32 @bar(i32 %a0, i32 %a1, float %b0) #0 {
+; CHECK-LABEL: bar:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pushq %rdx
+; CHECK-NEXT:  .Lcfi0:
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; CHECK-NEXT:  .Lcfi1:
+; CHECK-NEXT:    .cfi_offset %rdx, -16
+; CHECK-NEXT:  .Lcfi2:
+; CHECK-NEXT:    .cfi_offset %xmm1, -32
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    movl $4, %eax
+; CHECK-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    popq %rdx
+; CHECK-NEXT:    retq
+  call void asm sideeffect "", "~{rax},~{rdx},~{xmm1},~{rdi},~{rsi},~{xmm0}"()
+  ret i32 4
+}
+
+;; Because "bar" has 'no_caller_saved_registers' attribute, function "foo"
+;; doesn't need to preserve registers except for the arguments passed 
+;; to "bar" (%ESI, %EDI and %XMM0).
+define x86_64_sysvcc float @foo(i32 %a0, i32 %a1, float %b0) {
+; CHECK-LABEL: foo
+; CHECK:       movaps  %xmm0, %xmm1
+; CHECK-NEXT:  movl  %esi, %ecx
+; CHECK-NEXT:  movl  %edi, %edx
+; CHECK-NEXT:  callq bar
+; CHECK-NEXT:  addl  %edx, %eax
+; CHECK-NEXT:  addl  %ecx, %eax
+; CHECK-NEXT:  xorps %xmm0, %xmm0
+; CHECK-NEXT:  cvtsi2ssl %eax, %xmm0
+; CHECK-NEXT:  addss %xmm0, %xmm1
+; CHECK:       retq
+	%call = call i32 @bar(i32 %a0, i32 %a1, float %b0) #0
+	%c0   = add i32 %a0, %call
+	%c1   = add i32 %c0, %a1
+	%c2 = sitofp i32 %c1 to float
+	%c3 = fadd float %c2, %b0
+	ret float %c3
+}
+
+attributes #0 = { "no_caller_saved_registers" }
diff --git a/test/CodeGen/X86/x86-no_caller_saved_registers.ll b/test/CodeGen/X86/x86-no_caller_saved_registers.ll
new file mode 100644
index 0000000..9c62e3e
--- /dev/null
+++ b/test/CodeGen/X86/x86-no_caller_saved_registers.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=i686-unknown-unknown -mattr=+sse2 < %s | FileCheck %s
+; RUN: llc -mtriple=i686-unknown-unknown -mattr=+sse2 -O0 < %s | FileCheck %s
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; In functions with 'no_caller_saved_registers' attribute, all registers should
+;; be preserved except for registers used for passing/returning arguments.
+;; The test checks that function "bar" preserves xmm0 register.
+;; It also checks that caller function "foo" does not store registers for callee 
+;; "bar". For example, there is no store/load/access to xmm registers.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+define i32 @bar(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) #0 {
+; CHECK-LABEL: bar
+; CHECK:       mov{{.*}}  %xmm0
+; CHECK:       mov{{.*}} {{.*}}, %xmm0
+; CHECK:       ret
+  call void asm sideeffect "", "~{xmm0}"()
+  ret i32 1
+}
+
+define x86_intrcc void @foo(i8* nocapture readnone %c) {
+; CHECK-LABEL: foo
+; CHECK-NOT: xmm
+entry:
+  tail call i32 @bar(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8) #0
+  ret void
+}
+
+attributes #0 = { "no_caller_saved_registers" }
diff --git a/test/DebugInfo/Inputs/dwarfdump-header.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-header.elf-x86-64
index 4478134..21c1eac 100644
Binary files a/test/DebugInfo/Inputs/dwarfdump-header.elf-x86-64 and b/test/DebugInfo/Inputs/dwarfdump-header.elf-x86-64 differ
diff --git a/test/DebugInfo/Inputs/dwarfdump-header.s b/test/DebugInfo/Inputs/dwarfdump-header.s
index ce51e98..c5cf485 100644
--- a/test/DebugInfo/Inputs/dwarfdump-header.s
+++ b/test/DebugInfo/Inputs/dwarfdump-header.s
@@ -1,5 +1,6 @@
-# Test object to verify dwarfdump handles v4 and v5 CU/TU headers.
+# Test object to verify dwarfdump handles v4 and v5 CU/TU/line headers.
 # We have a representative set of units: v4 CU, v5 CU, v4 TU, v5 split TU.
+# We have v4 and v5 line-table headers.
 #
 # To generate the test object:
 # llvm-mc -triple x86_64-unknown-linux dwarfdump-header.s -filetype=obj \
@@ -28,6 +29,8 @@ dwo_TU_5:
         .byte 0x0e  # DW_FORM_strp
         .byte 0x03  # DW_AT_name
         .byte 0x0e  # DW_FORM_strp
+        .byte 0x10  # DW_AT_stmt_list
+        .byte 0x17  # DW_FORM_sec_offset
         .byte 0x00  # EOM(1)
         .byte 0x00  # EOM(2)
         .byte 0x02  # Abbrev code
@@ -81,10 +84,11 @@ CU_4_version:
         .short 4               # DWARF version number
         .long .debug_abbrev    # Offset Into Abbrev. Section
         .byte 8                # Address Size (in bytes)
-# The compile-unit DIE, which has just DW_AT_producer and DW_AT_name.
+# The compile-unit DIE, with DW_AT_producer, DW_AT_name, DW_AT_stmt_list.
         .byte 1
         .long str_producer
         .long str_CU_4
+        .long LH_4_start
         .byte 0 # NULL
 CU_4_end:
 
@@ -95,10 +99,11 @@ CU_5_version:
         .byte 1                # DWARF Unit Type
         .byte 8                # Address Size (in bytes)
         .long .debug_abbrev    # Offset Into Abbrev. Section
-# The compile-unit DIE, which has just DW_AT_producer and DW_AT_name.
+# The compile-unit DIE, with DW_AT_producer, DW_AT_name, DW_AT_stmt_list.
         .byte 1
         .long str_producer
         .long str_CU_5
+        .long LH_5_start
         .byte 0 # NULL
 CU_5_end:
 
@@ -147,3 +152,106 @@ TU_split_5_type:
         .byte 0 # NULL
         .byte 0 # NULL
 TU_split_5_end:
+
+        .section .debug_line,"",@progbits
+# DWARF v4 line-table header.
+LH_4_start:
+        .long   LH_4_end-LH_4_version   # Length of Unit
+LH_4_version:
+        .short  4               # DWARF version number
+        .long   LH_4_header_end-LH_4_params     # Length of Prologue
+LH_4_params:
+        .byte   1               # Minimum Instruction Length
+        .byte   1               # Maximum Operations per Instruction
+        .byte   1               # Default is_stmt
+        .byte   -5              # Line Base
+        .byte   14              # Line Range
+        .byte   13              # Opcode Base
+        .byte   0               # Standard Opcode Lengths
+        .byte   1
+        .byte   1
+        .byte   1
+        .byte   1
+        .byte   0
+        .byte   0
+        .byte   0
+        .byte   1
+        .byte   0
+        .byte   0
+        .byte   1
+        # Directory table
+        .asciz  "Directory4a"
+        .asciz  "Directory4b"
+        .byte   0
+        # File table
+        .asciz  "File4a"        # File name 1
+        .byte   1               # Directory index 1
+        .byte   0x41            # Timestamp 1
+        .byte   0x42            # File Size 1
+        .asciz  "File4b"        # File name 2
+        .byte   0               # Directory index 2
+        .byte   0x43            # Timestamp 2
+        .byte   0x44            # File Size 2
+        .byte   0               # End of list
+LH_4_header_end:
+        # Line number program, which is empty.
+LH_4_end:
+
+# DWARF v5 line-table header.
+LH_5_start:
+        .long   LH_5_end-LH_5_version   # Length of Unit
+LH_5_version:
+        .short  5               # DWARF version number
+        .byte   8               # Address Size
+        .byte   0               # Segment Selector Size
+        .long   LH_5_header_end-LH_5_params     # Length of Prologue
+LH_5_params:
+        .byte   1               # Minimum Instruction Length
+        .byte   1               # Maximum Operations per Instruction
+        .byte   1               # Default is_stmt
+        .byte   -5              # Line Base
+        .byte   14              # Line Range
+        .byte   13              # Opcode Base
+        .byte   0               # Standard Opcode Lengths
+        .byte   1
+        .byte   1
+        .byte   1
+        .byte   1
+        .byte   0
+        .byte   0
+        .byte   0
+        .byte   1
+        .byte   0
+        .byte   0
+        .byte   1
+        # Directory table format
+        .byte   1               # One element per directory entry
+        .byte   1               # DW_LNCT_path
+        .byte   0x08            # DW_FORM_string
+        # Directory table entries
+        .byte   2               # Two directories
+        .asciz "Directory5a"
+        .asciz "Directory5b"
+        # File table format
+        .byte   4               # Four elements per file entry
+        .byte   1               # DW_LNCT_path
+        .byte   0x08            # DW_FORM_string
+        .byte   2               # DW_LNCT_directory_index
+        .byte   0x0b            # DW_FORM_data1
+        .byte   3               # DW_LNCT_timestamp
+        .byte   0x0f            # DW_FORM_udata
+        .byte   4               # DW_LNCT_size
+        .byte   0x0f            # DW_FORM_udata
+        # File table entries
+        .byte   2               # Two files
+        .asciz "File5a"
+        .byte   1
+        .byte   0x51
+        .byte   0x52
+        .asciz "File5b"
+        .byte   2
+        .byte   0x53
+        .byte   0x54
+LH_5_header_end:
+        # Line number program, which is empty.
+LH_5_end:
diff --git a/test/DebugInfo/dwarfdump-header.test b/test/DebugInfo/dwarfdump-header.test
index 3947c8b..222e506 100644
--- a/test/DebugInfo/dwarfdump-header.test
+++ b/test/DebugInfo/dwarfdump-header.test
@@ -7,13 +7,13 @@ CHECK-LABEL: .debug_info contents:
 
 The v4 CU header.
 
-CHECK: 0x00000000: Compile Unit: length = 0x00000011 version = 0x0004 abbr_offset = 0x0000 addr_size = 0x08 (next unit at 0x00000015)
+CHECK: 0x00000000: Compile Unit: length = 0x00000015 version = 0x0004 abbr_offset = 0x0000 addr_size = 0x08 (next unit at 0x00000019)
 CHECK: 0x0000000b: DW_TAG_compile_unit
 
 The v5 normal CU header.
 
-CHECK: 0x00000015: Compile Unit: length = 0x00000012 version = 0x0005 unit_type = DW_UT_compile abbr_offset = 0x0000 addr_size = 0x08 (next unit at 0x0000002b)
-CHECK: 0x00000021: DW_TAG_compile_unit
+CHECK: 0x00000019: Compile Unit: length = 0x00000016 version = 0x0005 unit_type = DW_UT_compile abbr_offset = 0x0000 addr_size = 0x08 (next unit at 0x00000033)
+CHECK: 0x00000025: DW_TAG_compile_unit
 
 CHECK-LABEL: .debug_types contents:
 
@@ -27,3 +27,33 @@ CHECK: .debug_types.dwo contents:
 
 CHECK: 0x00000000: Type Unit: length = 0x00000020 version = 0x0005 unit_type = DW_UT_split_type abbr_offset = 0x0000 addr_size = 0x08 name = 'V5_split_type_unit' type_signature = 0x8899aabbccddeeff type_offset = 0x001d (next unit at 0x00000024)
 CHECK: 0x00000018: DW_TAG_type_unit
+
+CHECK-LABEL: .debug_line contents:
+
+The v4 line table header.
+
+CHECK: Line table prologue:
+CHECK: version: 4
+CHECK-NOT: address_size
+CHECK-NOT: seg_select_size
+CHECK: max_ops_per_inst: 1
+CHECK: include_directories[  1] = 'Directory4a'
+CHECK: include_directories[  2] = 'Directory4b'
+CHECK-NOT: include_directories
+CHECK: file_names[  1]    1 0x00000041 0x00000042 File4a{{$}}
+CHECK: file_names[  2]    0 0x00000043 0x00000044 File4b{{$}}
+CHECK-NOT: file_names
+
+The v5 line table header.
+
+CHECK: Line table prologue:
+CHECK: version: 5
+CHECK: address_size: 8
+CHECK: seg_select_size: 0
+CHECK: max_ops_per_inst: 1
+CHECK: include_directories[  1] = 'Directory5a'
+CHECK: include_directories[  2] = 'Directory5b'
+CHECK-NOT: include_directories
+CHECK: file_names[  1]    1 0x00000051 0x00000052 File5a{{$}}
+CHECK: file_names[  2]    2 0x00000053 0x00000054 File5b{{$}}
+CHECK-NOT: file_names
diff --git a/test/Feature/intrinsics.ll b/test/Feature/intrinsics.ll
index 278cb95..bbf30d3 100644
--- a/test/Feature/intrinsics.ll
+++ b/test/Feature/intrinsics.ll
@@ -69,5 +69,5 @@ define void @trap() {
   ret void
 }
 
-; CHECK: attributes #0 = { nounwind readnone }
+; CHECK: attributes #0 = { nounwind readnone speculatable }
 ; CHECK: attributes #1 = { noreturn nounwind }
diff --git a/test/MC/AArch64/arm32-large-relocs.s b/test/MC/AArch64/arm32-large-relocs.s
deleted file mode 100644
index 1ac86c0..0000000
--- a/test/MC/AArch64/arm32-large-relocs.s
+++ /dev/null
@@ -1,31 +0,0 @@
-// RUN: llvm-mc -target-abi=ilp32 -triple=arm64-linux-gnu -show-encoding -o - \
-// RUN:   %s \
-// RUN:   | FileCheck %s
-// RUN: llvm-mc -target-abi=ilp32 -triple=arm64-linux-gnu -show-encoding \
-// RUN:   -filetype=obj -o - %s \
-// RUN:   | llvm-objdump -r - \
-// RUN:   | FileCheck --check-prefix=CHECK-OBJ %s
-
-        movz x2, #:abs_g0:sym
-        movk w3, #:abs_g0_nc:sym
-        movz x13, #:abs_g0_s:sym
-        movn x17, #:abs_g0_s:sym
-// CHECK:   movz x2, #:abs_g0:sym // encoding: [0bAAA00010,A,0b100AAAAA,0xd2]
-// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_aarch64_movw
-// CHECK:   movk w3, #:abs_g0_nc:sym // encoding: [0bAAA00011,A,0b100AAAAA,0x72]
-// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_aarch64_movw
-// CHECK:   movz x13, #:abs_g0_s:sym // encoding: [0bAAA01101,A,0b100AAAAA,0xd2]
-// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw
-// CHECK:   movn x17, #:abs_g0_s:sym // encoding: [0bAAA10001,A,0b100AAAAA,0x92]
-// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw
-
-// CHECK-OBJ: 0 R_AARCH64_P32_MOVW_UABS_G0 sym
-// CHECK-OBJ: 4 R_AARCH64_P32_MOVW_UABS_G0_NC sym
-// CHECK-OBJ: 8 R_AARCH64_P32_MOVW_SABS_G0 sym
-// CHECK-OBJ: c R_AARCH64_P32_MOVW_SABS_G0 sym
-
-        movz x4, #:abs_g1:sym
-// CHECK:   movz x4, #:abs_g1:sym    // encoding: [0bAAA00100,A,0b101AAAAA,0xd2]
-// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_aarch64_movw
-
-// CHECK-OBJ: 10 R_AARCH64_P32_MOVW_UABS_G1 sym
diff --git a/test/MC/AArch64/arm32-tls-relocs.s b/test/MC/AArch64/arm32-tls-relocs.s
deleted file mode 100644
index 390da05..0000000
--- a/test/MC/AArch64/arm32-tls-relocs.s
+++ /dev/null
@@ -1,290 +0,0 @@
-// RUN: llvm-mc -target-abi=ilp32 -triple=arm64-none-linux-gnu \
-// RUN:   -show-encoding < %s | FileCheck --check-prefix=CHECK-ILP32 %s
-// RUN: llvm-mc -target-abi=ilp32 -triple=arm64-none-linux-gnu \
-// RUN:   -filetype=obj < %s -o - | \
-// RUN:   llvm-readobj -r -t | FileCheck --check-prefix=CHECK-ELF-ILP32 %s
-
-////////////////////////////////////////////////////////////////////////////////
-// TLS initial-exec forms
-////////////////////////////////////////////////////////////////////////////////
-
-        adrp x11, :gottprel:var
-        ldr w10, [x0, #:gottprel_lo12:var]
-        ldr w9, :gottprel:var
-// CHECK-ILP32: adrp x11, :gottprel:var      // encoding: [0x0b'A',A,A,0x90'A']
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_pcrel_adrp_imm21
-// CHECK-ILP32: ldr  w10, [x0, :gottprel_lo12:var] // encoding: [0x0a,0bAAAAAA00,0b01AAAAAA,0xb9]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4
-// CHECK-ILP32: ldr     w9, :gottprel:var       // encoding: [0bAAA01001,A,A,0x18]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_ldr_pcrel_imm19
-
-// CHECK-ELF-ILP32:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSIE_ADR_GOTTPREL_PAGE21 [[VARSYM:[^ ]+]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSIE_LD_GOTTPREL_PREL19 [[VARSYM]]
-
-
-////////////////////////////////////////////////////////////////////////////////
-// TLS local-exec forms
-////////////////////////////////////////////////////////////////////////////////
-
-        movz x5, #:tprel_g1:var
-        movn x6, #:tprel_g1:var
-        movz w7, #:tprel_g1:var
-// CHECK-ILP32: movz    x5, #:tprel_g1:var      // encoding: [0bAAA00101,A,0b101AAAAA,0x92]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
-// CHECK-ILP32: movn    x6, #:tprel_g1:var      // encoding: [0bAAA00110,A,0b101AAAAA,0x92]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
-// CHECK-ILP32: movz    w7, #:tprel_g1:var      // encoding: [0bAAA00111,A,0b101AAAAA,0x12]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw
-
-// CHECK-ELF-ILP32: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
-// CHECK-ELF-ILP32: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
-// CHECK-ELF-ILP32: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
-
-
-        movz x11, #:tprel_g0:var
-        movn x12, #:tprel_g0:var
-        movz w13, #:tprel_g0:var
-// CHECK-ILP32: movz    x11, #:tprel_g0:var     // encoding: [0bAAA01011,A,0b100AAAAA,0x92]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
-// CHECK-ILP32: movn    x12, #:tprel_g0:var     // encoding: [0bAAA01100,A,0b100AAAAA,0x92]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
-// CHECK-ILP32: movz    w13, #:tprel_g0:var     // encoding: [0bAAA01101,A,0b100AAAAA,0x12]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
-
-
-        movk w15, #:tprel_g0_nc:var
-        movk w16, #:tprel_g0_nc:var
-// CHECK-ILP32: movk    w15, #:tprel_g0_nc:var  // encoding: [0bAAA01111,A,0b100AAAAA,0x72]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw
-// CHECK-ILP32: movk    w16, #:tprel_g0_nc:var  // encoding: [0bAAA10000,A,0b100AAAAA,0x72]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
-
-
-        add x21, x22, #:tprel_lo12:var
-// CHECK-ILP32: add     x21, x22, :tprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_add_imm12
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_ADD_TPREL_LO12 [[VARSYM]]
-
-
-        add x25, x26, #:tprel_lo12_nc:var
-// CHECK-ILP32: add     x25, x26, :tprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_add_imm12
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_ADD_TPREL_LO12_NC [[VARSYM]]
-
-
-        ldrb w29, [x30, #:tprel_lo12:var]
-        ldrsb x29, [x28, #:tprel_lo12_nc:var]
-// CHECK-ILP32: ldrb    w29, [x30, :tprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1
-// CHECK-ILP32: ldrsb   x29, [x28, :tprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12_NC [[VARSYM]]
-
-
-        strh w27, [x26, #:tprel_lo12:var]
-        ldrsh x25, [x24, #:tprel_lo12_nc:var]
-// CHECK-ILP32: strh    w27, [x26, :tprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2
-// CHECK-ILP32: ldrsh   x25, [x24, :tprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12_NC [[VARSYM]]
-
-
-        ldr w23, [x22, #:tprel_lo12:var]
-        ldrsw x21, [x20, #:tprel_lo12_nc:var]
-// CHECK-ILP32: ldr     w23, [x22, :tprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4
-// CHECK-ILP32: ldrsw   x21, [x20, :tprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12_NC [[VARSYM]]
-
-        ldr x19, [x18, #:tprel_lo12:var]
-        str x17, [x16, #:tprel_lo12_nc:var]
-// CHECK-ILP32: ldr     x19, [x18, :tprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
-// CHECK-ILP32: str     x17, [x16, :tprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12_NC [[VARSYM]]
-
-
-   ldr q24, [x23, :tprel_lo12:var]
-   str q22, [x21, :tprel_lo12_nc:var]
-// CHECK-ILP32: ldr     q24, [x23, :tprel_lo12:var] // encoding: [0xf8,0bAAAAAA10,0b11AAAAAA,0x3d]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale16
-// CHECK-ILP32: str     q22, [x21, :tprel_lo12_nc:var] // encoding: [0xb6,0bAAAAAA10,0b10AAAAAA,0x3d]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale16
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST128_TPREL_LO12 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST128_TPREL_LO12_NC [[VARSYM]]
-
-////////////////////////////////////////////////////////////////////////////////
-// TLS local-dynamic forms
-////////////////////////////////////////////////////////////////////////////////
-
-        movz x5, #:dtprel_g1:var
-        movn x6, #:dtprel_g1:var
-        movz w7, #:dtprel_g1:var
-// CHECK-ILP32: movz    x5, #:dtprel_g1:var      // encoding: [0bAAA00101,A,0b101AAAAA,0x92]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
-// CHECK-ILP32: movn    x6, #:dtprel_g1:var      // encoding: [0bAAA00110,A,0b101AAAAA,0x92]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
-// CHECK-ILP32: movz    w7, #:dtprel_g1:var      // encoding: [0bAAA00111,A,0b101AAAAA,0x12]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
-
-
-        movz x11, #:dtprel_g0:var
-        movn x12, #:dtprel_g0:var
-        movz w13, #:dtprel_g0:var
-// CHECK-ILP32: movz    x11, #:dtprel_g0:var     // encoding: [0bAAA01011,A,0b100AAAAA,0x92]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
-// CHECK-ILP32: movn    x12, #:dtprel_g0:var     // encoding: [0bAAA01100,A,0b100AAAAA,0x92]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
-// CHECK-ILP32: movz    w13, #:dtprel_g0:var     // encoding: [0bAAA01101,A,0b100AAAAA,0x12]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
-
-
-        movk x15, #:dtprel_g0_nc:var
-        movk w16, #:dtprel_g0_nc:var
-// CHECK-ILP32: movk    x15, #:dtprel_g0_nc:var  // encoding: [0bAAA01111,A,0b100AAAAA,0xf2]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw
-// CHECK-ILP32: movk    w16, #:dtprel_g0_nc:var  // encoding: [0bAAA10000,A,0b100AAAAA,0x72]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
-
-
-        add x21, x22, #:dtprel_lo12:var
-// CHECK-ILP32: add     x21, x22, :dtprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_add_imm12
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_ADD_DTPREL_LO12 [[VARSYM]]
-
-
-        add x25, x26, #:dtprel_lo12_nc:var
-// CHECK-ILP32: add     x25, x26, :dtprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_add_imm12
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_ADD_DTPREL_LO12_NC [[VARSYM]]
-
-
-	add x0, x0, #:dtprel_hi12:var_tlsld, lsl #12
-	add x0, x0, #:tprel_hi12:var_tlsle, lsl #12
-
-// CHECK-ELF-ILP32: R_AARCH64_P32_TLSLD_ADD_DTPREL_HI12 var_tlsld
-// CHECK-ELF-ILP32: R_AARCH64_P32_TLSLE_ADD_TPREL_HI12 var_tlsle
-
-
-        ldrb w29, [x30, #:dtprel_lo12:var]
-        ldrsb x29, [x28, #:dtprel_lo12_nc:var]
-// CHECK-ILP32: ldrb    w29, [x30, :dtprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1
-// CHECK-ILP32: ldrsb   x29, [x28, :dtprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12_NC [[VARSYM]]
-
-
-        strh w27, [x26, #:dtprel_lo12:var]
-        ldrsh x25, [x24, #:dtprel_lo12_nc:var]
-// CHECK-ILP32: strh    w27, [x26, :dtprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2
-// CHECK-ILP32: ldrsh   x25, [x24, :dtprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12_NC [[VARSYM]]
-
-
-        ldr w23, [x22, #:dtprel_lo12:var]
-        ldrsw x21, [x20, #:dtprel_lo12_nc:var]
-// CHECK-ILP32: ldr     w23, [x22, :dtprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4
-// CHECK-ILP32: ldrsw   x21, [x20, :dtprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12_NC [[VARSYM]]
-
-        ldr x19, [x18, #:dtprel_lo12:var]
-        str x17, [x16, #:dtprel_lo12_nc:var]
-// CHECK-ILP32: ldr     x19, [x18, :dtprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8
-// CHECK-ILP32: str     x17, [x16, :dtprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12_NC [[VARSYM]]
-
-        ldr q24, [x23, #:dtprel_lo12:var]
-        str q22, [x21, #:dtprel_lo12_nc:var]
-// CHECK-ILP32: ldr     q24, [x23, :dtprel_lo12:var] // encoding: [0xf8,0bAAAAAA10,0b11AAAAAA,0x3d]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale16
-// CHECK-ILP32: str     q22, [x21, :dtprel_lo12_nc:var] // encoding: [0xb6,0bAAAAAA10,0b10AAAAAA,0x3d]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale16
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST128_DTPREL_LO12 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST128_DTPREL_LO12_NC [[VARSYM]]
-
-////////////////////////////////////////////////////////////////////////////////
-// TLS descriptor forms
-////////////////////////////////////////////////////////////////////////////////
-
-        adrp x8, :tlsdesc:var
-        ldr w7, [x6, #:tlsdesc_lo12:var]
-        add x5, x4, #:tlsdesc_lo12:var
-        .tlsdesccall var
-        blr x3
-
-// CHECK-ILP32: adrp    x8, :tlsdesc:var        // encoding: [0x08'A',A,A,0x90'A']
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_aarch64_pcrel_adrp_imm21
-// CHECK-ILP32: ldr     w7, [x6, :tlsdesc_lo12:var] // encoding: [0xc7,0bAAAAAA00,0b01AAAAAA,0xb9]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4
-// CHECK-ILP32: add     x5, x4, :tlsdesc_lo12:var // encoding: [0x85,0bAAAAAA00,0b00AAAAAA,0x91]
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_add_imm12
-// CHECK-ILP32: .tlsdesccall var                // encoding: []
-// CHECK-ILP32-NEXT:                                 //   fixup A - offset: 0, value: var, kind: fixup_aarch64_tlsdesc_call
-// CHECK-ILP32: blr     x3                      // encoding: [0x60,0x00,0x3f,0xd6]
-
-
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSDESC_ADR_PAGE21 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSDESC_LD32_LO12 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSDESC_ADD_LO12 [[VARSYM]]
-// CHECK-ELF-ILP32-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_P32_TLSDESC_CALL [[VARSYM]]
-
-        // Make sure symbol 5 has type STT_TLS:
-
-// CHECK-ELF-ILP32:      Symbols [
-// CHECK-ELF-ILP32:        Symbol {
-// CHECK-ELF-ILP32:          Name: var
-// CHECK-ELF-ILP32-NEXT:     Value:
-// CHECK-ELF-ILP32-NEXT:     Size:
-// CHECK-ELF-ILP32-NEXT:     Binding: Global
-// CHECK-ELF-ILP32-NEXT:     Type: TLS
diff --git a/test/MC/AArch64/elf-reloc-pcreladdressing-ilp32.s b/test/MC/AArch64/elf-reloc-pcreladdressing-ilp32.s
deleted file mode 100644
index c08192e..0000000
--- a/test/MC/AArch64/elf-reloc-pcreladdressing-ilp32.s
+++ /dev/null
@@ -1,17 +0,0 @@
-// RUN: llvm-mc -target-abi=ilp32 -triple=aarch64-none-linux-gnu \
-// RUN:   -filetype=obj %s -o - | \
-// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ-ILP32 %s
-        adr x2, some_label
-        adrp x5, some_label
-
-        adrp x5, :got:some_label
-        ldr w0, [x5, #:got_lo12:some_label]
-
-// OBJ-ILP32:      Relocations [
-// OBJ-ILP32-NEXT:   Section {{.*}} .rela.text {
-// OBJ-ILP32-NEXT:     0x0 R_AARCH64_P32_ADR_PREL_LO21    some_label 0x0
-// OBJ-ILP32-NEXT:     0x4 R_AARCH64_P32_ADR_PREL_PG_HI21 some_label 0x0
-// OBJ-ILP32-NEXT:     0x8 R_AARCH64_P32_ADR_GOT_PAGE     some_label 0x0
-// OBJ-ILP32-NEXT:     0xC R_AARCH64_P32_LD32_GOT_LO12_NC some_label 0x0
-// OBJ-ILP32-NEXT:   }
-// OBJ-ILP32-NEXT: ]
diff --git a/test/MC/AArch64/lp64-diagnostics.s b/test/MC/AArch64/lp64-diagnostics.s
deleted file mode 100644
index 942923f..0000000
--- a/test/MC/AArch64/lp64-diagnostics.s
+++ /dev/null
@@ -1,13 +0,0 @@
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2> %t2 -filetype=obj \
-// RUN:   >/dev/null
-// RUN: FileCheck --check-prefix=CHECK-ERROR %s < %t2
-
-   ldr w24, [x23, :tlsdesc_lo12:sym]
-   ldr s22, [x21, :tlsdesc_lo12:sym]
-
-// CHECK-ERROR: error: LP64 4 byte TLSDESC load/store relocation not supported (ILP32 eqv: TLSDESC_LD64_LO12)
-// CHECK-ERROR:   ldr w24, [x23, :tlsdesc_lo12:sym]
-// CHECK-ERROR:   ^
-// CHECK-ERROR: error: LP64 4 byte TLSDESC load/store relocation not supported (ILP32 eqv: TLSDESC_LD64_LO12)
-// CHECK-ERROR:   ldr s22, [x21, :tlsdesc_lo12:sym]
-// CHECK-ERROR:   ^
diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt
index 9dd49e5..1b865d3 100644
--- a/test/MC/Disassembler/X86/x86-32.txt
+++ b/test/MC/Disassembler/X86/x86-32.txt
@@ -773,3 +773,21 @@
 
 #CHECK: getsec
 0x0f 0x37
+
+#CHECK: llwpcb %ecx
+0x8f 0xe9 0x78 0x12 0xc1
+
+#CHECK: slwpcb %ecx
+0x8f 0xe9 0x78 0x12 0xc9
+
+# CHECK: lwpins $305419896, %ebx, %eax
+0x8f 0xea 0x78 0x12 0xc3 0x78 0x56 0x34 0x12
+
+# CHECK: lwpins $591751049, (%esp), %edx
+0x8f 0xea 0x68 0x12 0x04 0x24 0x89 0x67 0x45 0x23
+
+# CHECK: lwpval $1737075661, %ebx, %eax
+0x8f 0xea 0x78 0x12 0xcb 0xcd 0xab 0x89 0x67
+
+# CHECK: lwpval $2309737967, (%esp), %edx
+0x8f 0xea 0x68 0x12 0x0c 0x24 0xef 0xcd 0xab 0x89
diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt
index 1511347..659ad90 100644
--- a/test/MC/Disassembler/X86/x86-64.txt
+++ b/test/MC/Disassembler/X86/x86-64.txt
@@ -456,3 +456,27 @@
 
 # CHECK: callq -32769
 0xe8 0xff 0x7f 0xff 0xff
+
+# CHECK: llwpcb %rax
+0x8f 0xe9 0xf8 0x12 0xc0
+
+# CHECK: slwpcb %rax
+0x8f 0xe9 0xf8 0x12 0xc8
+
+# CHECK: lwpins $305419896, %ebx, %rax
+0x8f 0xea 0xf8 0x12 0xc3 0x78 0x56 0x34 0x12
+
+# CHECK: lwpins $591751049, (%rsp), %rdx
+0x8f 0xea 0xe8 0x12 0x04 0x24 0x89 0x67 0x45 0x23
+
+# CHECK: lwpins $591751049, (%esp), %edx
+0x67 0x8f 0xea 0x68 0x12 0x04 0x24 0x89 0x67 0x45 0x23
+
+# CHECK: lwpval $1737075661, %ebx, %rax
+0x8f 0xea 0xf8 0x12 0xcb 0xcd 0xab 0x89 0x67
+
+# CHECK: lwpval $2309737967, (%rsp), %rdx
+0x8f 0xea 0xe8 0x12 0x0c 0x24 0xef 0xcd 0xab 0x89
+
+# CHECK: lwpval $2309737967, (%esp), %edx
+0x67 0x8f 0xea 0x68 0x12 0x0c 0x24 0xef 0xcd 0xab 0x89
diff --git a/test/MC/Hexagon/PacketRules/endloop_branches.s b/test/MC/Hexagon/PacketRules/endloop_branches.s
new file mode 100644
index 0000000..fbaa246
--- /dev/null
+++ b/test/MC/Hexagon/PacketRules/endloop_branches.s
@@ -0,0 +1,12 @@
+# RUN: not llvm-mc -triple=hexagon -filetype=asm %s 2>&1 | FileCheck %s
+
+# Check that a branch in an end-loop packet is caught.
+
+{ jump unknown
+}:endloop0
+# CHECK: 5:3: error: packet marked with `:endloop0' cannot contain instructions that modify register
+
+{ jump unknown
+}:endloop1
+
+# CHECK: 9:3: error: packet marked with `:endloop1' cannot contain instructions that modify register
diff --git a/test/MC/Hexagon/PacketRules/restrict_ax.s b/test/MC/Hexagon/PacketRules/restrict_ax.s
new file mode 100644
index 0000000..b8f7a1f
--- /dev/null
+++ b/test/MC/Hexagon/PacketRules/restrict_ax.s
@@ -0,0 +1,4 @@
+{ r0=memw_locked(r0)
+  r1=-mpyi(r0,#0) }
+# RUN: not llvm-mc -arch=hexagon -filetype=asm %s 2>%t; FileCheck %s --check-prefix=CHECK00 <%t
+# CHECK00: 1:3: error: Instruction can only be in a packet with ALU or non-FPU XTYPE instructions
diff --git a/test/MC/Hexagon/dealloc-return-jump.s b/test/MC/Hexagon/dealloc-return-jump.s
new file mode 100644
index 0000000..0d480be
--- /dev/null
+++ b/test/MC/Hexagon/dealloc-return-jump.s
@@ -0,0 +1,7 @@
+# RUN: not llvm-mc -arch=hexagon -mcpu=hexagonv62 -filetype=obj -o - %s
+# Check that a duplex involving dealloc_return is correctly checked
+# dealloc_return cannot be involved in a double jump packet
+
+{ r0=add(r0,#-1)
+  p0=cmp.eq(r0,r0); if (p0.new) jump:nt 0
+  if (p0) dealloc_return }
diff --git a/test/MC/Hexagon/endloop.s b/test/MC/Hexagon/endloop.s
deleted file mode 100644
index d537eb0..0000000
--- a/test/MC/Hexagon/endloop.s
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llvm-mc -triple=hexagon -filetype=asm %s 2>&1 | FileCheck %s
-
-# Check that a branch in an end-loop packet is caught.
-
-1:
-{
-	r0 = #1
-	p0 = cmp.eq (r1, r2)
-	if (p0) jump 1b
-}:endloop0
-
-2:
-{
-        r0 = #1
-        p0 = cmp.eq (r1, r2)
-        if (p0) jump 2b
-}:endloop1
-
-# CHECK: rror: packet marked with `:endloop{{.}}' cannot contain instructions that modify register
diff --git a/test/MC/Hexagon/iconst.s b/test/MC/Hexagon/iconst.s
index 917cc64..156d1ab 100644
--- a/test/MC/Hexagon/iconst.s
+++ b/test/MC/Hexagon/iconst.s
@@ -2,5 +2,5 @@
 
 a:
 # CHECK: r0 = add(r0,#0)
-# CHECK: R_HEX_23_REG
+# CHECK: R_HEX_27_REG
 r0 = iconst(#a)
diff --git a/test/MC/Hexagon/plt-rel.s b/test/MC/Hexagon/plt-rel.s
new file mode 100644
index 0000000..cba3d7e
--- /dev/null
+++ b/test/MC/Hexagon/plt-rel.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -arch=hexagon -filetype=obj %s | llvm-objdump -d -r - | FileCheck %s
+
+call foo@GDPLT
+# CHECK: R_HEX_GD_PLT_B22_PCREL
+call ##foo@GDPLT
+# CHECK:  R_HEX_GD_PLT_B32_PCREL_X
+# CHECK-NEXT: R_HEX_GD_PLT_B22_PCREL_X
+
+call foo@LDPLT
+# CHECK:  R_HEX_LD_PLT_B22_PCREL
+call ##foo@LDPLT
+# CHECK:  R_HEX_LD_PLT_B32_PCREL_X
+# CHECK-NEXT:  R_HEX_LD_PLT_B22_PCREL_X
diff --git a/test/MC/Hexagon/solo-axok.s b/test/MC/Hexagon/solo-axok.s
new file mode 100644
index 0000000..2df5796
--- /dev/null
+++ b/test/MC/Hexagon/solo-axok.s
@@ -0,0 +1,9 @@
+# RUN: not llvm-mc -arch=hexagon -filetype=asm -mcpu=hexagonv55 %s 2>%t; FileCheck %s < %t
+#
+{
+  sp=asrh(r6)
+  l2fetch(fp,r23:22)
+  p2=r7
+  p1=dfclass(r31:30,#6)
+}
+# CHECK: rror: Instruction can only
diff --git a/test/MC/X86/lwp-x86_64.s b/test/MC/X86/lwp-x86_64.s
new file mode 100644
index 0000000..92f1596
--- /dev/null
+++ b/test/MC/X86/lwp-x86_64.s
@@ -0,0 +1,25 @@
+# RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s --check-prefix=CHECK
+
+llwpcb %rcx
+# CHECK: llwpcb %rcx
+# CHECK: encoding: [0x8f,0xe9,0xf8,0x12,0xc1]
+
+slwpcb %rax
+# CHECK: slwpcb %rax
+# CHECK: encoding: [0x8f,0xe9,0xf8,0x12,0xc8]
+
+lwpins $305419896, %ebx, %rax
+# CHECK: lwpins $305419896, %ebx, %rax
+# CHECK: encoding: [0x8f,0xea,0xf8,0x12,0xc3,0x78,0x56,0x34,0x12]
+
+lwpins $591751049, (%rsp), %rdx
+# CHECK: lwpins $591751049, (%rsp), %rdx
+# CHECK: encoding: [0x8f,0xea,0xe8,0x12,0x04,0x24,0x89,0x67,0x45,0x23]
+
+lwpval $1737075661, %ebx, %rax
+# CHECK: lwpval $1737075661, %ebx, %rax
+# CHECK: encoding: [0x8f,0xea,0xf8,0x12,0xcb,0xcd,0xab,0x89,0x67]
+
+lwpval $2309737967, (%rsp), %rdx
+# CHECK: lwpval $2309737967, (%rsp), %rdx
+# CHECK: encoding: [0x8f,0xea,0xe8,0x12,0x0c,0x24,0xef,0xcd,0xab,0x89]
diff --git a/test/MC/X86/lwp.s b/test/MC/X86/lwp.s
new file mode 100644
index 0000000..43d6f2c
--- /dev/null
+++ b/test/MC/X86/lwp.s
@@ -0,0 +1,32 @@
+# RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-X86
+# RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-X64
+
+llwpcb %ecx
+# CHECK: llwpcb %ecx
+# CHECK-X86: encoding: [0x8f,0xe9,0x78,0x12,0xc1]
+# CHECK-X64: encoding: [0x8f,0xe9,0x78,0x12,0xc1]
+
+slwpcb %eax
+# CHECK: slwpcb %eax
+# CHECK-X86: encoding: [0x8f,0xe9,0x78,0x12,0xc8]
+# CHECK-X64: encoding: [0x8f,0xe9,0x78,0x12,0xc8]
+
+lwpins $305419896, %ebx, %eax
+# CHECK: lwpins $305419896, %ebx, %eax
+# CHECK-X86: encoding: [0x8f,0xea,0x78,0x12,0xc3,0x78,0x56,0x34,0x12]
+# CHECK-X64: encoding: [0x8f,0xea,0x78,0x12,0xc3,0x78,0x56,0x34,0x12]
+
+lwpins $591751049, (%esp), %edx
+# CHECK: lwpins $591751049, (%esp), %edx
+# CHECK-X86: encoding: [0x8f,0xea,0x68,0x12,0x04,0x24,0x89,0x67,0x45,0x23]
+# CHECK-X64: encoding: [0x67,0x8f,0xea,0x68,0x12,0x04,0x24,0x89,0x67,0x45,0x23]
+
+lwpval $1737075661, %ebx, %eax
+# CHECK: lwpval $1737075661, %ebx, %eax
+# CHECK-X86: encoding: [0x8f,0xea,0x78,0x12,0xcb,0xcd,0xab,0x89,0x67]
+# CHECK-X64: encoding: [0x8f,0xea,0x78,0x12,0xcb,0xcd,0xab,0x89,0x67]
+
+lwpval $2309737967, (%esp), %edx
+# CHECK: lwpval $2309737967, (%esp), %edx
+# CHECK-X86: encoding: [0x8f,0xea,0x68,0x12,0x0c,0x24,0xef,0xcd,0xab,0x89]
+# CHECK-X64: encoding: [0x67,0x8f,0xea,0x68,0x12,0x0c,0x24,0xef,0xcd,0xab,0x89]
diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll
index 6b50e2b..b7f87fe 100644
--- a/test/Transforms/BBVectorize/simple-int.ll
+++ b/test/Transforms/BBVectorize/simple-int.ll
@@ -503,4 +503,4 @@ define i64 @testcttzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
 ; CHECK: declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) #0
 ; CHECK: declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0
 ; CHECK: declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) #0
-; CHECK: attributes #0 = { nounwind readnone }
+; CHECK: attributes #0 = { nounwind readnone speculatable }
diff --git a/test/Transforms/CodeExtractor/PartialInlineOptRemark.ll b/test/Transforms/CodeExtractor/PartialInlineOptRemark.ll
index c880818..b2442b8 100644
--- a/test/Transforms/CodeExtractor/PartialInlineOptRemark.ll
+++ b/test/Transforms/CodeExtractor/PartialInlineOptRemark.ll
@@ -64,6 +64,22 @@ bb2:                                              ; preds = %bb1, %bb
   ret i32 %tmp3, !dbg !19
 }
 
+define i32 @bar_cold(i32 %arg) local_unnamed_addr #3 !dbg !5 {
+bb:
+  %tmp = icmp slt i32 %arg, 0, !dbg !7
+  br i1 %tmp, label %bb1, label %bb2, !dbg !8
+
+bb1:                                              ; preds = %bb
+  tail call void (...) @foo() #0, !dbg !9
+  tail call void (...) @foo() #0, !dbg !10
+  tail call void (...) @foo() #0, !dbg !11
+  br label %bb2, !dbg !18
+
+bb2:                                              ; preds = %bb1, %bb
+  %tmp3 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
+  ret i32 %tmp3, !dbg !19
+}
+
 ; Function Attrs: nounwind
 declare void @foo(...) local_unnamed_addr #0
 
@@ -73,16 +89,19 @@ bb:
 ; CHECK:remark{{.*}}bar partially inlined into dummy_caller
 ; CHECK-NOT:remark{{.*}}bar_noinline partially inlined into dummy_caller
 ; CHECK-NOT:remark{{.*}}bar_alwaysinline partially inlined into dummy_caller
+; CHECK-NOT:remark{{.*}}bar_cold partially inlined into dummy_caller
 ; LIMIT-NOT:remark{{.*}}bar partially inlined into dummy_caller
   %tmp = tail call i32 @bar(i32 %arg), !dbg !21
   %tmp2 = tail call i32 @bar_noinline(i32 %arg), !dbg !21
   %tmp3 = tail call i32 @bar_alwaysinline(i32 %arg), !dbg !21
+  %tmp4 = tail call i32 @bar_cold(i32 %arg), !dbg !21
   ret i32 %tmp, !dbg !22
 }
 
 attributes #0 = { nounwind }
 attributes #1 = { noinline nounwind }
 attributes #2 = { alwaysinline nounwind }
+attributes #3 = { cold nounwind }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3}
diff --git a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
index dfa999e..aec00e8 100644
--- a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
+++ b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
@@ -39,7 +39,7 @@ bb2:                                              ; preds = %bb1, %bb
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 ; CHECK: attributes #0 = { nounwind ssp }
-; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes #1 = { nounwind readnone speculatable }
 ; CHECK: attributes #2 = { noinline nounwind ssp }
 ; CHECK: attributes [[NUW]] = { nounwind }
 
diff --git a/test/Transforms/FunctionAttrs/readattrs.ll b/test/Transforms/FunctionAttrs/readattrs.ll
index 988557e..3728a71 100644
--- a/test/Transforms/FunctionAttrs/readattrs.ll
+++ b/test/Transforms/FunctionAttrs/readattrs.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -functionattrs -S | FileCheck %s
 ; RUN: opt < %s -aa-pipeline=basic-aa -passes='cgscc(function-attrs)' -S | FileCheck %s
 @x = global i32 0
@@ -68,22 +69,22 @@ entry:
 }
 
 ; CHECK: declare void @llvm.masked.scatter
-declare void @llvm.masked.scatter.v4i32(<4 x i32>%val, <4 x i32*>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*>, i32, <4 x i1>)
 
 ; CHECK-NOT: readnone
 ; CHECK-NOT: readonly
 ; CHECK: define void @test9
 define void @test9(<4 x i32*> %ptrs, <4 x i32>%val) {
-  call void @llvm.masked.scatter.v4i32(<4 x i32>%val, <4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>)
+  call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>)
   ret void
 }
 
 ; CHECK: declare <4 x i32> @llvm.masked.gather
-declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
 ; CHECK: readonly
 ; CHECK: define <4 x i32> @test10
 define <4 x i32> @test10(<4 x i32*> %ptrs) {
-  %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>, <4 x i32>undef)
+  %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>, <4 x i32>undef)
   ret <4 x i32> %res
 }
 
diff --git a/test/Transforms/FunctionImport/unnamed-globals.ll b/test/Transforms/FunctionImport/unnamed-globals.ll
deleted file mode 100644
index 167fad2..0000000
--- a/test/Transforms/FunctionImport/unnamed-globals.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; Make sure we don't crash when referencing an unnamed global.
-; RUN: opt %s -module-summary-analysis -S
-
-@0 = external global [1 x { i64 }]
-
-define internal void @tinkywinky() {
-  call void @patatino(i64 ptrtoint ([1 x { i64 }]* @0 to i64), i64 4)
-  ret void
-}
-declare void @patatino(i64, i64)
diff --git a/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll b/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll
index 3f8fdcc..5b10a1b 100644
--- a/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll
+++ b/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 
-declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
-declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
+declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
+declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
 
 ; This test ensures that masked scatter and gather operations, which take vectors of pointers,
 ; do not have pointer aliasing ignored when being processed.
@@ -20,18 +20,18 @@ entry:
   %tmp.i = insertelement <2 x i32*> undef, i32* %tmp.0, i32 0
   %tmp = insertelement <2 x i32*> %tmp.i, i32* %tmp.1, i32 1
   ; Read from in1 and in2
-  %in1.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in1, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
-  %in2.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in2, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  %in1.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in1, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  %in2.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in2, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
   ; Store in1 to the allocas
-  call void @llvm.masked.scatter.v2i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
+  call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
   ; Read in1 from the allocas
   ; This gather should alias the scatter we just saw
-  %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
   ; Store in2 to the allocas
-  call void @llvm.masked.scatter.v2i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
+  call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
   ; Read in2 from the allocas
   ; This gather should alias the scatter we just saw, and not be eliminated
-  %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
   ; Store in2 to out for good measure
   %tmp.v.1.0 = extractelement <2 x i32> %tmp.v.1, i32 0
   %tmp.v.1.1 = extractelement <2 x i32> %tmp.v.1, i32 1
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index b9e2084..66ab7f4 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -284,7 +284,7 @@ define i32 @cttz(i32 %a) {
 define i1 @cttz_knownbits(i32 %arg) {
 ; CHECK-LABEL: @cttz_knownbits(
 ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[ARG:%.*]], 4
-; CHECK-NEXT:    [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) #0
+; CHECK-NEXT:    [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true)
 ; CHECK-NEXT:    [[RES:%.*]] = icmp eq i32 [[CNT]], 4
 ; CHECK-NEXT:    ret i1 [[RES]]
 ;
@@ -307,7 +307,7 @@ define i8 @ctlz(i8 %a) {
 define i1 @ctlz_knownbits(i8 %arg) {
 ; CHECK-LABEL: @ctlz_knownbits(
 ; CHECK-NEXT:    [[OR:%.*]] = or i8 [[ARG:%.*]], 32
-; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) #0
+; CHECK-NEXT:    [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true)
 ; CHECK-NEXT:    [[RES:%.*]] = icmp eq i8 [[CNT]], 4
 ; CHECK-NEXT:    ret i1 [[RES]]
 ;
diff --git a/test/Transforms/InstCombine/masked_intrinsics.ll b/test/Transforms/InstCombine/masked_intrinsics.ll
index ce79ce5..d5403d1 100644
--- a/test/Transforms/InstCombine/masked_intrinsics.ll
+++ b/test/Transforms/InstCombine/masked_intrinsics.ll
@@ -2,8 +2,8 @@
 
 declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
 declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask)
-declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru)
-declare void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask)
+declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru)
+declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask)
 
 define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru)  {
   %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru)
@@ -49,7 +49,7 @@ define void @store_onemask(<2 x double>* %ptr, <2 x double> %val)  {
 }
 
 define <2 x double> @gather_zeromask(<2 x double*> %ptrs, <2 x double> %passthru)  {
-  %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32 5, <2 x i1> zeroinitializer, <2 x double> %passthru)
+  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 5, <2 x i1> zeroinitializer, <2 x double> %passthru)
   ret <2 x double> %res
 
 ; CHECK-LABEL: @gather_zeromask(
@@ -57,7 +57,7 @@ define <2 x double> @gather_zeromask(<2 x double*> %ptrs, <2 x double> %passthru
 }
 
 define void @scatter_zeromask(<2 x double*> %ptrs, <2 x double> %val)  {
-  call void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptrs, i32 6, <2 x i1> zeroinitializer)
+  call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32 6, <2 x i1> zeroinitializer)
   ret void
 
 ; CHECK-LABEL: @scatter_zeromask(
diff --git a/test/Transforms/InstCombine/pow-sqrt.ll b/test/Transforms/InstCombine/pow-sqrt.ll
index 52175f1..82db192 100644
--- a/test/Transforms/InstCombine/pow-sqrt.ll
+++ b/test/Transforms/InstCombine/pow-sqrt.ll
@@ -6,7 +6,7 @@ define double @pow_half(double %x) {
 }
 
 ; CHECK-LABEL: define double @pow_half(
-; CHECK-NEXT:  %sqrt = call fast double @sqrt(double %x)
+; CHECK-NEXT:  %sqrt = call fast double @sqrt(double %x) #1
 ; CHECK-NEXT:  ret double %sqrt
 
 define double @pow_neghalf(double %x) {
@@ -15,8 +15,11 @@ define double @pow_neghalf(double %x) {
 }
 
 ; CHECK-LABEL: define double @pow_neghalf(
-; CHECK-NEXT: %sqrt = call fast double @sqrt(double %x) #0
+; CHECK-NEXT: %sqrt = call fast double @sqrt(double %x) #1
 ; CHECK-NEXT: %sqrtrecip = fdiv fast double 1.000000e+00, %sqrt
 ; CHECK-NEXT: ret double %sqrtrecip
 
-declare double @llvm.pow.f64(double, double)
+declare double @llvm.pow.f64(double, double) #0
+
+attributes #0 = { nounwind readnone speculatable }
+attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/sub-xor.ll b/test/Transforms/InstCombine/sub-xor.ll
index 812305d..adcca84 100644
--- a/test/Transforms/InstCombine/sub-xor.ll
+++ b/test/Transforms/InstCombine/sub-xor.ll
@@ -27,7 +27,7 @@ declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
 
 define i32 @test2(i32 %x) nounwind {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[COUNT:%.*]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) #0
+; CHECK-NEXT:    [[COUNT:%.*]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
 ; CHECK-NEXT:    [[SUB:%.*]] = xor i32 [[COUNT]], 31
 ; CHECK-NEXT:    ret i32 [[SUB]]
 ;
diff --git a/test/Transforms/LoopDeletion/unreachable-loops.ll b/test/Transforms/LoopDeletion/unreachable-loops.ll
new file mode 100644
index 0000000..147a856
--- /dev/null
+++ b/test/Transforms/LoopDeletion/unreachable-loops.ll
@@ -0,0 +1,336 @@
+; RUN: opt < %s -loop-deletion -verify-dom-info -S | FileCheck %s
+
+; Checking that we can delete loops that are never executed.
+; We do not change the constant conditional branch statement (where the not-taken target
+; is the loop) to an unconditional one.
+
+; delete the infinite loop because it is never executed.
+define void @test1(i64 %n, i64 %m) nounwind {
+; CHECK-LABEL: test1
+; CHECK-LABEL: entry:
+; CHECK-NEXT: br i1 true, label %return, label %bb.preheader
+; CHECK-NOT: bb:
+entry:
+  br i1 true, label %return, label %bb
+
+bb:
+  %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb ]
+  %t0 = add i64 %x.0, 1
+  %t1 = icmp slt i64 %x.0, %n
+  %t3 = icmp sgt i64 %x.0, %m
+  %t4 = and i1 %t1, %t3
+  br i1 true, label %bb, label %return
+
+return:
+  ret void
+}
+
+; FIXME: We can delete this infinite loop. Currently we do not,
+; because the infinite loop has no exit block.
+define void @test2(i64 %n, i64 %m) nounwind {
+; CHECK-LABEL: test2
+; CHECK-LABEL: entry:
+; CHECK-NEXT: br i1 true, label %return, label %bb.preheader
+; CHECK-LABEL: bb:
+; CHECK: br label %bb
+entry:
+  br i1 true, label %return, label %bb
+
+bb:
+  %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb ]
+  %t0 = add i64 %x.0, 1
+  %t1 = icmp slt i64 %x.0, %n
+  %t3 = icmp sgt i64 %x.0, %m
+  %t4 = and i1 %t1, %t3
+  br label %bb
+
+return:
+  ret void
+}
+
+; There are multiple exiting blocks and a single exit block. 
+; Since it is a never executed loop, we do not care about the values
+; from different exiting paths and we can
+; delete the loop.
+define i64 @test3(i64 %n, i64 %m, i64 %maybe_zero) nounwind {
+
+; CHECK-NOT: bb:
+; CHECK-NOT: bb2:
+; CHECK-NOT: bb3:
+; CHECK-LABEL: return.loopexit:
+; CHECK-NEXT: %x.lcssa.ph = phi i64 [ undef, %bb.preheader ]
+; CHECK-NEXT: br label %return
+; CHECK-LABEL: return:
+; CHECK-NEXT: %x.lcssa = phi i64 [ 20, %entry ], [ %x.lcssa.ph, %return.loopexit ]
+; CHECK-NEXT: ret i64 %x.lcssa
+entry:
+  br i1 false, label %bb, label %return
+
+bb:
+  %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb3 ]
+  %t0 = add i64 %x.0, 1
+  %t1 = icmp slt i64 %x.0, %n
+  br i1 %t1, label %bb2, label %return
+
+bb2:
+  %t2 = icmp slt i64 %x.0, %m
+  %unused1 = udiv i64 42, %maybe_zero
+  br i1 %t2, label %bb3, label %return
+
+bb3:
+  %t3 = icmp slt i64 %x.0, %m
+  %unused2 = sdiv i64 42, %maybe_zero
+  br i1 %t3, label %bb, label %return
+
+return:
+; the only valid value fo x.lcssa is 20.
+  %x.lcssa = phi i64 [ 12, %bb ], [ 14, %bb2 ], [ 16, %bb3 ], [20, %entry ]
+  ret i64 %x.lcssa
+}
+
+; Cannot delete the loop, since it may be executed at runtime.
+define void @test4(i64 %n, i64 %m, i1 %cond) {
+; CHECK-LABEL: test4
+; CHECK-LABEL: bb:
+entry:
+  br i1 %cond, label %looppred1, label %looppred2
+
+looppred1:
+  br i1 true, label %return, label %bb
+
+looppred2:
+  br i1 false, label %return, label %bb
+
+bb:
+  %x.0 = phi i64 [ 0, %looppred1 ], [ 1, %looppred2 ], [ %t0, %bb ]
+  %t0 = add i64 %x.0, 1
+  %t1 = icmp slt i64 %x.0, %n
+  %t3 = icmp sgt i64 %x.0, %m
+  %t4 = and i1 %t1, %t3
+  br i1 true, label %bb, label %return
+
+return:
+  ret void
+}
+
+; multiple constant conditional branches with loop not-taken in all cases.
+define void @test5(i64 %n, i64 %m, i1 %cond) nounwind {
+; CHECK-LABEL: test5
+; CHECK-LABEL: looppred1:
+; CHECK-NEXT: br i1 true, label %return, label %bb.preheader
+; CHECK-LABEL: looppred2:
+; CHECK-NEXT: br i1 true, label %return, label %bb.preheader
+; CHECK-NOT: bb:
+entry:
+  br i1 %cond, label %looppred1, label %looppred2
+
+looppred1:
+  br i1 true, label %return, label %bb
+
+looppred2:
+  br i1 true, label %return, label %bb
+
+bb:
+  %x.0 = phi i64 [ 0, %looppred1 ], [ 1, %looppred2 ], [ %t0, %bb ]
+  %t0 = add i64 %x.0, 1
+  %t1 = icmp slt i64 %x.0, %n
+  %t3 = icmp sgt i64 %x.0, %m
+  %t4 = and i1 %t1, %t3
+  br i1 true, label %bb, label %return
+
+return:
+  ret void
+}
+
+; Don't delete this infinite loop because the loop 
+; is executable at runtime.
+define void @test6(i64 %n, i64 %m) nounwind {
+; CHECK-LABEL: test6
+; CHECK-LABEL: entry:
+; CHECK-NEXT: br i1 true, label %bb.preheader, label %bb.preheader
+; CHECK: bb:
+entry:
+  br i1 true, label %bb, label %bb
+
+bb:
+  %x.0 = phi i64 [ 0, %entry ], [ 0, %entry ], [ %t0, %bb ]
+  %t0 = add i64 %x.0, 1
+  %t1 = icmp slt i64 %x.0, %n
+  %t3 = icmp sgt i64 %x.0, %m
+  %t4 = and i1 %t1, %t3
+  br i1 true, label %bb, label %return
+
+return:
+  ret void
+}
+
+declare i64 @foo(i64)
+; The loop L2 is never executed and is a subloop, with an 
+; exit block that branches back to parent loop.
+; Here we can delete loop L2, while L1 still exists.
+define i64 @test7(i64 %n) {
+; CHECK-LABEL: test7
+; CHECK-LABEL: L1:
+; CHECK: br i1 true, label %L1Latch, label %L2.preheader
+; CHECK-LABEL: L2.preheader:
+; CHECK-NEXT: br label %L1Latch.loopexit
+; CHECK-LABEL: L1Latch.loopexit:
+; CHECK: br label %L1Latch
+; CHECK-LABEL: L1Latch:
+; CHECK-NEXT: %y = phi i64 [ %y.next, %L1 ], [ %y.L2.lcssa, %L1Latch.loopexit ]
+; CHECK: br i1 %cond2, label %exit, label %L1
+entry: 
+  br label %L1
+
+L1:
+  %y.next = phi i64 [ 0, %entry ], [ %y.add, %L1Latch ]
+  br i1 true, label %L1Latch, label %L2
+
+L2:
+  %x = phi i64 [ 0, %L1 ], [ %x.next, %L2 ]
+  %x.next = add i64 %x, 1
+  %y.L2 = call i64 @foo(i64 %x.next)
+  %cond = icmp slt i64 %x.next, %n
+  br i1 %cond, label %L2, label %L1Latch
+
+L1Latch:
+ %y = phi i64 [ %y.next, %L1 ], [ %y.L2, %L2 ]
+ %y.add = add i64 %y, %n
+ %cond2 = icmp eq i64 %y.add, 42
+ br i1 %cond2, label %exit, label %L1
+
+exit:
+ ret i64 %y.add
+}
+
+
+; Show recursive deletion of loops. Since we start with subloops and progress outward 
+; to parent loop, we first delete the loop L2. Now loop L1 becomes a non-loop since it's backedge
+; from L2's preheader to L1's exit block is never taken. So, L1 gets deleted as well.
+define void @test8(i64 %n) {
+; CHECK-LABEL: test8
+; CHECK-LABEL: entry:
+; CHECK-NEXT: br label %exit
+; CHECK-LABEL: exit:
+; CHECK-NEXT: ret void
+entry: 
+  br label %L1
+
+L1:
+  br i1 true, label %exit, label %L2
+
+L2:
+  %x = phi i64 [ 0, %L1 ], [ %x.next, %L2 ]
+  %x.next = add i64 %x, 1
+  %y.L2 = call i64 @foo(i64 %x.next)
+  %cond = icmp slt i64 %x.next, %n
+  br i1 %cond, label %L2, label %L1
+
+exit:
+ ret void
+}
+
+
+; Delete a loop (L2) which has subloop (L3).
+; Here we delete loop L2, but leave L3 as is.
+; FIXME: Can delete L3 as well, by iteratively going backward through the single
+; predecessor of L3 until we reach L1's block that guarantees L3 is never
+; executed.
+define void @test9(i64 %n) {
+; CHECK-LABEL: test9
+; CHECK-LABEL: L2.preheader:
+; CHECK-NEXT: br label %L3.preheader
+; CHECK-NOT: L2:
+; CHECK-LABEL: L3.preheader:
+; CHECK-NEXT: %y.L2.lcssa = phi i64 [ undef, %L2.preheader ]
+; CHECK-NEXT: br label %L3
+; CHECK-LABEL: L3:
+; CHECK: br i1 %cond2, label %L3, label %L1.loopexit
+entry: 
+  br label %L1
+
+L1:
+  br i1 true, label %exit, label %L2
+
+L2:
+  %x = phi i64 [ 0, %L1 ], [ %x.next, %L2 ]
+  %x.next = add i64 %x, 1
+  %y.L2 = call i64 @foo(i64 %x.next)
+  %cond = icmp slt i64 %x.next, %n
+  br i1 %cond, label %L2, label %L3
+
+L3: 
+  %cond2 = icmp slt i64 %y.L2, %n
+  br i1 %cond2, label %L3, label %L1
+
+exit:
+ ret void
+}
+
+; We cannot delete L3 because of call within it.
+; Since L3 is not deleted, and entirely contained within L2, L2 is also not
+; deleted.
+; FIXME: We can delete unexecutable loops having
+; subloops contained entirely within them.
+define void @test10(i64 %n) {
+; CHECK-LABEL: test10
+; CHECK: L2:
+; CHECK: L3:
+entry: 
+  br label %L1
+
+L1:
+  br i1 true, label %exit, label %L2
+
+L2:
+  %x = phi i64 [ 0, %L1 ], [ %x.next, %L3 ]
+  %x.next = add i64 %x, 1
+  %y.L2 = call i64 @foo(i64 %x.next)
+  %cond = icmp slt i64 %x.next, %n
+  br i1 %cond, label %L1, label %L3
+
+L3:
+  %y.L3 = phi i64 [ %y.L2, %L2 ], [ %y.L3.next, %L3 ]
+  %y.L3.next = add i64 %y.L3, 1
+  %dummy = call i64 @foo(i64 %y.L3.next)
+  %cond2 = icmp slt i64 %y.L3, %n
+  br i1 %cond2, label %L3, label %L2
+
+exit:
+ ret void
+}
+
+; same as test10, but L3 does not contain call.
+; So, in the first iteration, all statements of L3 are made invariant, and L3 is
+; deleted.
+; In the next iteration, since L2 is never executed and has no subloops, we delete
+; L2 as well. Finally, the outermost loop L1 is deleted.
+define void @test11(i64 %n) {
+; CHECK-LABEL: test11
+; CHECK-LABEL: entry:
+; CHECK-NEXT: br label %exit
+; CHECK-LABEL: exit:
+; CHECK-NEXT: ret void
+entry: 
+  br label %L1
+
+L1:
+  br i1 true, label %exit, label %L2
+
+L2:
+  %x = phi i64 [ 0, %L1 ], [ %x.next, %L3 ]
+  %x.next = add i64 %x, 1
+  %y.L2 = call i64 @foo(i64 %x.next)
+  %cond = icmp slt i64 %x.next, %n
+  br i1 %cond, label %L1, label %L3
+
+L3: 
+  %y.L3 = phi i64 [ %y.L2, %L2 ], [ %y.L3.next, %L3 ]
+  %y.L3.next = add i64 %y.L3, 1
+  %cond2 = icmp slt i64 %y.L3, %n
+  br i1 %cond2, label %L3, label %L2
+
+exit:
+ ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
index 82f2e06..e18159f 100644
--- a/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
+++ b/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
@@ -36,7 +36,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK-NEXT:    [[WIDE_VEC1:%.*]] = load <80 x float>, <80 x float>* [[TMP4]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <80 x float> [[WIDE_VEC1]], <80 x float> undef, <16 x i32> <i32 0, i32 5, i32 10, i32 15, i32 20, i32 25, i32 30, i32 35, i32 40, i32 45, i32 50, i32 55, i32 60, i32 65, i32 70, i32 75>
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd <16 x float> [[STRIDED_VEC2]], [[TMP2]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v16f32(<16 x float> [[TMP5]], <16 x float*> [[TMP3]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[TMP5]], <16 x float*> [[TMP3]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80>
 ; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body
diff --git a/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/test/Transforms/LoopVectorize/X86/gather_scatter.ll
index 2ce3575..8ef5961 100644
--- a/test/Transforms/LoopVectorize/X86/gather_scatter.ll
+++ b/test/Transforms/LoopVectorize/X86/gather_scatter.ll
@@ -17,9 +17,9 @@ target triple = "x86_64-pc_linux"
 ;}
 
 ;AVX512-LABEL: @foo1
-;AVX512: llvm.masked.load.v16i32
-;AVX512: llvm.masked.gather.v16f32
-;AVX512: llvm.masked.store.v16f32
+;AVX512: llvm.masked.load.v16i32.p0v16i32
+;AVX512: llvm.masked.gather.v16f32.v16p0f32
+;AVX512: llvm.masked.store.v16f32.p0v16f32
 ;AVX512: ret void
 
 ; Function Attrs: nounwind uwtable
@@ -96,8 +96,8 @@ for.end:                                          ; preds = %for.cond
 
 ;AVX512-LABEL: @foo2
 ;AVX512: getelementptr inbounds %struct.In, %struct.In* %in, <16 x i64> {{.*}}, i32 1
-;AVX512: llvm.masked.gather.v16f32
-;AVX512: llvm.masked.scatter.v16f32
+;AVX512: llvm.masked.gather.v16f32.v16p0f32
+;AVX512: llvm.masked.scatter.v16f32.v16p0f32
 ;AVX512: ret void
 define void @foo2(%struct.In* noalias %in, float* noalias %out, i32* noalias %trigger, i32* noalias %index) #0 {
 entry:
@@ -171,10 +171,10 @@ for.end:                                          ; preds = %for.cond
 
 ;AVX512-LABEL: @foo3
 ;AVX512: getelementptr inbounds %struct.In, %struct.In* %in, <16 x i64> {{.*}}, i32 1
-;AVX512: llvm.masked.gather.v16f32
+;AVX512: llvm.masked.gather.v16f32.v16p0f32
 ;AVX512: fadd <16 x float>
 ;AVX512: getelementptr inbounds %struct.Out, %struct.Out* %out, <16 x i64> {{.*}}, i32 1
-;AVX512: llvm.masked.scatter.v16f32
+;AVX512: llvm.masked.scatter.v16f32.v16p0f32
 ;AVX512: ret void
 
 %struct.Out = type { float, float }
@@ -233,4 +233,194 @@ for.inc:                                          ; preds = %if.end
 for.end:                                          ; preds = %for.cond
   ret void
 }
-declare void @llvm.masked.scatter.v16f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
+
+; The same as @foo2 but scatter/gather argument is a vecotr of ptrs with addresspace 1
+
+;AVX512-LABEL: @foo2_addrspace
+;AVX512: getelementptr inbounds %struct.In, %struct.In addrspace(1)* %in, <16 x i64> {{.*}}, i32 1
+;AVX512: llvm.masked.gather.v16f32.v16p1f32
+;AVX512: llvm.masked.scatter.v16f32.v16p1f32
+;AVX512: ret void
+define void @foo2_addrspace(%struct.In addrspace(1)* noalias %in, float addrspace(1)* noalias %out, i32* noalias %trigger, i32* noalias %index) #0 {
+entry:
+  %in.addr = alloca %struct.In addrspace(1)*, align 8
+  %out.addr = alloca float addrspace(1)*, align 8
+  %trigger.addr = alloca i32*, align 8
+  %index.addr = alloca i32*, align 8
+  %i = alloca i32, align 4
+  store %struct.In addrspace(1)* %in, %struct.In addrspace(1)** %in.addr, align 8
+  store float addrspace(1)* %out, float addrspace(1)** %out.addr, align 8
+  store i32* %trigger, i32** %trigger.addr, align 8
+  store i32* %index, i32** %index.addr, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %cmp = icmp slt i32 %0, 4096
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %1 to i64
+  %2 = load i32*, i32** %trigger.addr, align 8
+  %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+  %3 = load i32, i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %3, 0
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  %4 = load i32, i32* %i, align 4
+  %idxprom2 = sext i32 %4 to i64
+  %5 = load %struct.In addrspace(1)*, %struct.In addrspace(1)** %in.addr, align 8
+  %arrayidx3 = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %5, i64 %idxprom2
+  %b = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %arrayidx3, i32 0, i32 1
+  %6 = load float, float addrspace(1)* %b, align 4
+  %add = fadd float %6, 5.000000e-01
+  %7 = load i32, i32* %i, align 4
+  %idxprom4 = sext i32 %7 to i64
+  %8 = load float addrspace(1)*, float addrspace(1)** %out.addr, align 8
+  %arrayidx5 = getelementptr inbounds float, float addrspace(1)* %8, i64 %idxprom4
+  store float %add, float addrspace(1)* %arrayidx5, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %9 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %9, 16
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Same as foo2_addrspace but here only the input has the non-default address space.
+
+;AVX512-LABEL: @foo2_addrspace2
+;AVX512: getelementptr inbounds %struct.In, %struct.In addrspace(1)* %in, <16 x i64> {{.*}}, i32 1
+;AVX512: llvm.masked.gather.v16f32.v16p1f32
+;AVX512: llvm.masked.scatter.v16f32.v16p0f32
+;AVX512: ret void
+define void @foo2_addrspace2(%struct.In addrspace(1)* noalias %in, float addrspace(0)* noalias %out, i32* noalias %trigger, i32* noalias %index) {
+entry:
+  %in.addr = alloca %struct.In addrspace(1)*, align 8
+  %out.addr = alloca float addrspace(0)*, align 8
+  %trigger.addr = alloca i32*, align 8
+  %index.addr = alloca i32*, align 8
+  %i = alloca i32, align 4
+  store %struct.In addrspace(1)* %in, %struct.In addrspace(1)** %in.addr, align 8
+  store float addrspace(0)* %out, float addrspace(0)** %out.addr, align 8
+  store i32* %trigger, i32** %trigger.addr, align 8
+  store i32* %index, i32** %index.addr, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %cmp = icmp slt i32 %0, 4096
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %1 to i64
+  %2 = load i32*, i32** %trigger.addr, align 8
+  %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+  %3 = load i32, i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %3, 0
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  %4 = load i32, i32* %i, align 4
+  %idxprom2 = sext i32 %4 to i64
+  %5 = load %struct.In addrspace(1)*, %struct.In addrspace(1)** %in.addr, align 8
+  %arrayidx3 = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %5, i64 %idxprom2
+  %b = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %arrayidx3, i32 0, i32 1
+  %6 = load float, float addrspace(1)* %b, align 4
+  %add = fadd float %6, 5.000000e-01
+  %7 = load i32, i32* %i, align 4
+  %idxprom4 = sext i32 %7 to i64
+  %8 = load float addrspace(0)*, float addrspace(0)** %out.addr, align 8
+  %arrayidx5 = getelementptr inbounds float, float addrspace(0)* %8, i64 %idxprom4
+  store float %add, float addrspace(0)* %arrayidx5, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %9 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %9, 16
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Same as foo2_addrspace but here only the output has the non-default address space.
+
+;AVX512-LABEL: @foo2_addrspace3
+;AVX512: getelementptr inbounds %struct.In, %struct.In* %in, <16 x i64> {{.*}}, i32 1
+;AVX512: llvm.masked.gather.v16f32.v16p0f32
+;AVX512: llvm.masked.scatter.v16f32.v16p1f32
+;AVX512: ret void
+
+define void @foo2_addrspace3(%struct.In addrspace(0)* noalias %in, float addrspace(1)* noalias %out, i32* noalias %trigger, i32* noalias %index) {
+entry:
+  %in.addr = alloca %struct.In addrspace(0)*, align 8
+  %out.addr = alloca float addrspace(1)*, align 8
+  %trigger.addr = alloca i32*, align 8
+  %index.addr = alloca i32*, align 8
+  %i = alloca i32, align 4
+  store %struct.In addrspace(0)* %in, %struct.In addrspace(0)** %in.addr, align 8
+  store float addrspace(1)* %out, float addrspace(1)** %out.addr, align 8
+  store i32* %trigger, i32** %trigger.addr, align 8
+  store i32* %index, i32** %index.addr, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %cmp = icmp slt i32 %0, 4096
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %1 to i64
+  %2 = load i32*, i32** %trigger.addr, align 8
+  %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+  %3 = load i32, i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %3, 0
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  %4 = load i32, i32* %i, align 4
+  %idxprom2 = sext i32 %4 to i64
+  %5 = load %struct.In addrspace(0)*, %struct.In addrspace(0)** %in.addr, align 8
+  %arrayidx3 = getelementptr inbounds %struct.In, %struct.In addrspace(0)* %5, i64 %idxprom2
+  %b = getelementptr inbounds %struct.In, %struct.In addrspace(0)* %arrayidx3, i32 0, i32 1
+  %6 = load float, float addrspace(0)* %b, align 4
+  %add = fadd float %6, 5.000000e-01
+  %7 = load i32, i32* %i, align 4
+  %idxprom4 = sext i32 %7 to i64
+  %8 = load float addrspace(1)*, float addrspace(1)** %out.addr, align 8
+  %arrayidx5 = getelementptr inbounds float, float addrspace(1)* %8, i64 %idxprom4
+  store float %add, float addrspace(1)* %arrayidx5, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %9 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %9, 16
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/test/Transforms/LoopVectorize/X86/scatter_crash.ll
index bda4b24..aff372b 100755
--- a/test/Transforms/LoopVectorize/X86/scatter_crash.ll
+++ b/test/Transforms/LoopVectorize/X86/scatter_crash.ll
@@ -23,11 +23,11 @@ define void @_Z3fn1v() #0 {
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, <16 x i64> [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP11]], <16 x i64> [[TMP12]], i64 0
-; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP13]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP13]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i64> [[VEC_IND3]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
 ; CHECK-NEXT:    [[TMP15:%.*]] = add nsw <16 x i64> [[TMP10]], [[TMP14]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP11]], <16 x i64> [[TMP15]], i64 0
-; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP16]], i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP16]], i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
 ; CHECK-NEXT:    [[VEC_IND_NEXT4]] = add <16 x i64> [[VEC_IND3]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
diff --git a/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll b/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll
index a3511c3..b3087c1 100644
--- a/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll
+++ b/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll
@@ -1,8 +1,8 @@
 ; XFAIL: *
 ; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
 
-declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
-declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
+declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
+declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
 
 ; This test ensures that masked scatter and gather operations, which take vectors of pointers,
 ; do not have pointer aliasing ignored when being processed.
@@ -21,18 +21,18 @@ entry:
   %tmp.i = insertelement <2 x i32*> undef, i32* %tmp.0, i32 0
   %tmp = insertelement <2 x i32*> %tmp.i, i32* %tmp.1, i32 1
   ; Read from in1 and in2
-  %in1.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in1, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
-  %in2.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in2, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  %in1.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in1, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  %in2.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in2, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
   ; Store in1 to the allocas
-  call void @llvm.masked.scatter.v2i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
+  call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
   ; Read in1 from the allocas
   ; This gather should alias the scatter we just saw
-  %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
   ; Store in2 to the allocas
-  call void @llvm.masked.scatter.v2i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
+  call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
   ; Read in2 from the allocas
   ; This gather should alias the scatter we just saw, and not be eliminated
-  %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
   ; Store in2 to out for good measure
   %tmp.v.1.0 = extractelement <2 x i32> %tmp.v.1, i32 0
   %tmp.v.1.1 = extractelement <2 x i32> %tmp.v.1, i32 1
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index c10c3b1..ad44f9d 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -3049,6 +3049,6 @@ define void @test67(i8* %x) {
 !4 = !DIFile(filename: "path/to/file", directory: "/path/to/dir")
 !5 = !{i32 2, !"Debug Info Version", i32 3}
 
-; CHECK: attributes #0 = { nounwind readnone }
+; CHECK: attributes #0 = { nounwind readnone speculatable }
 ; CHECK: attributes [[NUW]] = { nounwind }
 ; CHECK: ![[RELEASE]] = !{}
diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
index c856706..93a12a9 100644
--- a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
+++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
@@ -105,7 +105,7 @@ declare void @NSLog(i8*, ...)
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 ; CHECK: attributes #0 = { ssp uwtable }
-; CHECK: attributes #1 = { nounwind readnone }
+; CHECK: attributes #1 = { nounwind readnone speculatable }
 ; CHECK: attributes #2 = { nonlazybind }
 ; CHECK: attributes #3 = { noinline ssp uwtable }
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/SLPVectorizer/X86/call.ll b/test/Transforms/SLPVectorizer/X86/call.ll
index 923cbe7..03b1e83 100644
--- a/test/Transforms/SLPVectorizer/X86/call.ll
+++ b/test/Transforms/SLPVectorizer/X86/call.ll
@@ -147,5 +147,5 @@ entry:
 ; CHECK: declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) [[ATTR0]]
 ; CHECK: declare <2 x double> @llvm.exp2.v2f64(<2 x double>) [[ATTR0]]
 
-; CHECK: attributes [[ATTR0]] = { nounwind readnone }
+; CHECK: attributes [[ATTR0]] = { nounwind readnone speculatable }
 
diff --git a/test/Transforms/SpeculativeExecution/spec-other.ll b/test/Transforms/SpeculativeExecution/spec-other.ll
new file mode 100644
index 0000000..65e14b6
--- /dev/null
+++ b/test/Transforms/SpeculativeExecution/spec-other.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -S -speculative-execution \
+; RUN:   -spec-exec-max-speculation-cost 4 -spec-exec-max-not-hoisted 3 \
+; RUN:   | FileCheck %s
+
+; CHECK-LABEL: @ifThen_extractvalue(
+; CHECK: extractvalue
+; CHECK: br i1 true
+define void @ifThen_extractvalue() {
+  br i1 true, label %a, label %b
+
+a:
+  %x = extractvalue { i32, i32 } undef, 0
+  br label %b
+
+b:
+  ret void
+}
+
+; CHECK-LABEL: @ifThen_insertvalue(
+; CHECK: insertvalue
+; CHECK: br i1 true
+define void @ifThen_insertvalue() {
+  br i1 true, label %a, label %b
+
+a:
+  %x = insertvalue { i32, i32 } undef, i32 undef, 0
+  br label %b
+
+b:
+  ret void
+}
+
diff --git a/test/Transforms/SpeculativeExecution/spec-vector.ll b/test/Transforms/SpeculativeExecution/spec-vector.ll
new file mode 100644
index 0000000..9c64f1f
--- /dev/null
+++ b/test/Transforms/SpeculativeExecution/spec-vector.ll
@@ -0,0 +1,73 @@
+; RUN: opt < %s -S -speculative-execution \
+; RUN:   -spec-exec-max-speculation-cost 4 -spec-exec-max-not-hoisted 3 \
+; RUN:   | FileCheck %s
+
+; CHECK-LABEL: @ifThen_extractelement_constindex(
+; CHECK: extractelement
+; CHECK: br i1 true
+define void @ifThen_extractelement_constindex() {
+  br i1 true, label %a, label %b
+
+a:
+  %x = extractelement <4 x i32> undef, i32 0
+  br label %b
+
+b:
+  ret void
+}
+
+; CHECK-LABEL: @ifThen_extractelement_varindex(
+; CHECK: extractelement
+; CHECK: br i1 true
+define void @ifThen_extractelement_varindex(i32 %idx) {
+  br i1 true, label %a, label %b
+
+a:
+  %x = extractelement <4 x i32> undef, i32 %idx
+  br label %b
+
+b:
+  ret void
+}
+
+; CHECK-LABEL: @ifThen_insertelement_constindex(
+; CHECK: insertelement
+; CHECK: br i1 true
+define void @ifThen_insertelement_constindex() {
+  br i1 true, label %a, label %b
+
+a:
+  %x = insertelement <4 x i32> undef, i32 undef, i32 0
+  br label %b
+
+b:
+  ret void
+}
+
+; CHECK-LABEL: @ifThen_insertelement_varindex(
+; CHECK: insertelement
+; CHECK: br i1 true
+define void @ifThen_insertelement_varindex(i32 %idx) {
+  br i1 true, label %a, label %b
+
+a:
+  %x = insertelement <4 x i32> undef, i32 undef, i32 %idx
+  br label %b
+
+b:
+  ret void
+}
+
+; CHECK-LABEL: @ifThen_shufflevector(
+; CHECK: shufflevector
+; CHECK: br i1 true
+define void @ifThen_shufflevector() {
+  br i1 true, label %a, label %b
+
+a:
+  %x = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> undef
+  br label %b
+
+b:
+  ret void
+}
diff --git a/test/Verifier/scatter_gather.ll b/test/Verifier/scatter_gather.ll
new file mode 100644
index 0000000..3b1b0ee
--- /dev/null
+++ b/test/Verifier/scatter_gather.ll
@@ -0,0 +1,122 @@
+; RUN: not opt -verify < %s 2>&1 | FileCheck %s
+
+; Mask is not a vector
+; CHECK: Intrinsic has incorrect argument type!
+define <16 x float> @gather2(<16 x float*> %ptrs, <16 x i1>* %mask, <16 x float> %passthru) {
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %ptrs, i32 4, <16 x i1>* %mask, <16 x float> %passthru)
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>*, <16 x float>)
+
+; Mask length != return length
+; CHECK: Intrinsic has incorrect argument type!
+define <8 x float> @gather3(<8 x float*> %ptrs, <16 x i1> %mask, <8 x float> %passthru) {
+  %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %ptrs, i32 4, <16 x i1> %mask, <8 x float> %passthru)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32, <16 x i1>, <8 x float>)
+
+; Return type is not a vector
+; CHECK: Intrinsic has incorrect return type!
+define <8 x float>* @gather4(<8 x float*> %ptrs, <8 x i1> %mask, <8 x float> %passthru) {
+  %res = call <8 x float>* @llvm.masked.gather.p0v8f32.v8p0f32(<8 x float*> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru)
+  ret <8 x float>* %res
+}
+declare <8 x float>* @llvm.masked.gather.p0v8f32.v8p0f32(<8 x float*>, i32, <8 x i1>, <8 x float>)
+
+; Value type is not a vector
+; CHECK: Intrinsic has incorrect argument type!
+define <8 x float> @gather5(<8 x float*>* %ptrs, <8 x i1> %mask, <8 x float> %passthru) {
+  %res = call <8 x float> @llvm.masked.gather.v8f32.p0v8p0f32(<8 x float*>* %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.masked.gather.v8f32.p0v8p0f32(<8 x float*>*, i32, <8 x i1>, <8 x float>)
+
+; Value type is not a vector of pointers
+; CHECK: Intrinsic has incorrect argument type!
+define <8 x float> @gather6(<8 x float> %ptrs, <8 x i1> %mask, <8 x float> %passthru) {
+  %res = call <8 x float> @llvm.masked.gather.v8f32.v8f32(<8 x float> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.masked.gather.v8f32.v8f32(<8 x float>, i32, <8 x i1>, <8 x float>)
+
+; Value element type != vector of pointers element
+; CHECK: Intrinsic has incorrect argument type!
+define <8 x float> @gather7(<8 x double*> %ptrs, <8 x i1> %mask, <8 x float> %passthru) {
+  %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0f64(<8 x double*> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.masked.gather.v8f32.v8p0f64(<8 x double*>, i32, <8 x i1>, <8 x float>)
+
+; Value length!= vector of pointers length
+; CHECK: Intrinsic has incorrect argument type!
+define <8 x float> @gather8(<16 x float*> %ptrs, <8 x i1> %mask, <8 x float> %passthru) {
+  %res = call <8 x float> @llvm.masked.gather.v8f32.v16p0f32(<16 x float*> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.masked.gather.v8f32.v16p0f32(<16 x float*>, i32, <8 x i1>, <8 x float>)
+
+; Passthru type doesn't match return type 
+; CHECK: Intrinsic has incorrect argument type!
+define <16 x i32> @gather9(<16 x i32*> %ptrs, <16 x i1> %mask, <8 x i32> %passthru) {
+  %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <8 x i32> %passthru)
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <8 x i32>)
+
+; Mask is not a vector
+; CHECK: Intrinsic has incorrect argument type!
+define void @scatter2(<16 x float> %value, <16 x float*> %ptrs, <16 x i1>* %mask) {
+  call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %value, <16 x float*> %ptrs, i32 4, <16 x i1>* %mask)
+  ret void
+}
+declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>*)
+
+; Mask length != value length
+; CHECK: Intrinsic has incorrect argument type!
+define void @scatter3(<8 x float> %value, <8 x float*> %ptrs, <16 x i1> %mask) {
+  call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> %value, <8 x float*> %ptrs, i32 4, <16 x i1> %mask)
+  ret void
+}
+declare void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float>, <8 x float*>, i32, <16 x i1>)
+
+; Value type is not a vector
+; CHECK: Intrinsic has incorrect argument type!
+define void @scatter4(<8 x float>* %value, <8 x float*> %ptrs, <8 x i1> %mask) {
+  call void @llvm.masked.scatter.p0v8f32.v8p0f32(<8 x float>* %value, <8 x float*> %ptrs, i32 4, <8 x i1> %mask)
+  ret void
+}
+declare void @llvm.masked.scatter.p0v8f32.v8p0f32(<8 x float>*, <8 x float*>, i32, <8 x i1>)
+
+; ptrs is not a vector
+; CHECK: Intrinsic has incorrect argument type!
+define void @scatter5(<8 x float> %value, <8 x float*>* %ptrs, <8 x i1> %mask) {
+  call void @llvm.masked.scatter.v8f32.p0v8p0f32(<8 x float> %value, <8 x float*>* %ptrs, i32 4, <8 x i1> %mask)
+  ret void
+}
+declare void @llvm.masked.scatter.v8f32.p0v8p0f32(<8 x float>, <8 x float*>*, i32, <8 x i1>)
+
+; Value type is not a vector of pointers
+; CHECK: Intrinsic has incorrect argument type!
+define void @scatter6(<8 x float> %value, <8 x float> %ptrs, <8 x i1> %mask) {
+  call void @llvm.masked.scatter.v8f32.v8f32(<8 x float> %value, <8 x float> %ptrs, i32 4, <8 x i1> %mask)
+  ret void
+}
+declare void @llvm.masked.scatter.v8f32.v8f32(<8 x float>, <8 x float>, i32, <8 x i1>)
+
+; Value element type != vector of pointers element
+; CHECK: Intrinsic has incorrect argument type!
+define void @scatter7(<8 x float> %value, <8 x double*> %ptrs, <8 x i1> %mask) {
+  call void @llvm.masked.scatter.v8f32.v8p0f64(<8 x float> %value, <8 x double*> %ptrs, i32 4, <8 x i1> %mask)
+  ret void
+}
+declare void @llvm.masked.scatter.v8f32.v8p0f64(<8 x float>, <8 x double*>, i32, <8 x i1>)
+
+; Value length!= vector of pointers length
+; CHECK: Intrinsic has incorrect argument type!
+define void @scatter8(<8 x float> %value, <16 x float*> %ptrs, <8 x i1> %mask) {
+  call void @llvm.masked.scatter.v8f32.v16p0f32(<8 x float> %value, <16 x float*> %ptrs, i32 4, <8 x i1> %mask)
+  ret void
+}
+declare void @llvm.masked.scatter.v8f32.v16p0f32(<8 x float>, <16 x float*>, i32, <8 x i1>)
+
diff --git a/tools/llvm-pdbdump/C13DebugFragmentVisitor.cpp b/tools/llvm-pdbdump/C13DebugFragmentVisitor.cpp
index 7c680eb..b38b365 100644
--- a/tools/llvm-pdbdump/C13DebugFragmentVisitor.cpp
+++ b/tools/llvm-pdbdump/C13DebugFragmentVisitor.cpp
@@ -13,8 +13,8 @@
 #include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h"
 #include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
-#include "llvm/DebugInfo/PDB/Native/StringTable.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
diff --git a/tools/llvm-pdbdump/Diff.cpp b/tools/llvm-pdbdump/Diff.cpp
index 8c02d36..418c236 100644
--- a/tools/llvm-pdbdump/Diff.cpp
+++ b/tools/llvm-pdbdump/Diff.cpp
@@ -15,8 +15,8 @@
 #include "llvm/DebugInfo/PDB/Native/Formatters.h"
 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
-#include "llvm/DebugInfo/PDB/Native/StringTable.h"
 
 #include "llvm/Support/FormatAdapters.h"
 #include "llvm/Support/FormatProviders.h"
@@ -394,11 +394,17 @@ Error DiffStyle::diffStringTable() {
       StringRef S1, S2;
       if (I < IdList1.size()) {
         Id1 = IdList1[I];
-        S1 = ST1.getStringForID(*Id1);
+        if (auto Result = ST1.getStringForID(*Id1))
+          S1 = *Result;
+        else
+          return Result.takeError();
       }
       if (I < IdList2.size()) {
         Id2 = IdList2[I];
-        S2 = ST2.getStringForID(*Id2);
+        if (auto Result = ST2.getStringForID(*Id2))
+          S2 = *Result;
+        else
+          return Result.takeError();
       }
       if (Id1 == Id2 && S1 == S2)
         continue;
@@ -418,10 +424,18 @@ Error DiffStyle::diffStringTable() {
     std::vector<StringRef> Strings1, Strings2;
     Strings1.reserve(IdList1.size());
     Strings2.reserve(IdList2.size());
-    for (auto ID : IdList1)
-      Strings1.push_back(ST1.getStringForID(ID));
-    for (auto ID : IdList2)
-      Strings2.push_back(ST2.getStringForID(ID));
+    for (auto ID : IdList1) {
+      auto S = ST1.getStringForID(ID);
+      if (!S)
+        return S.takeError();
+      Strings1.push_back(*S);
+    }
+    for (auto ID : IdList2) {
+      auto S = ST2.getStringForID(ID);
+      if (!S)
+        return S.takeError();
+      Strings2.push_back(*S);
+    }
 
     SmallVector<StringRef, 64> OnlyP;
     SmallVector<StringRef, 64> OnlyQ;
diff --git a/tools/llvm-pdbdump/LLVMOutputStyle.cpp b/tools/llvm-pdbdump/LLVMOutputStyle.cpp
index f3e28e0..ec1325f 100644
--- a/tools/llvm-pdbdump/LLVMOutputStyle.cpp
+++ b/tools/llvm-pdbdump/LLVMOutputStyle.cpp
@@ -525,14 +525,17 @@ Error LLVMOutputStyle::dumpStringTable() {
 
   DictScope D(P, "String Table");
   for (uint32_t I : IS->name_ids()) {
-    StringRef S = IS->getStringForID(I);
-    if (!S.empty()) {
-      llvm::SmallString<32> Str;
-      Str.append("'");
-      Str.append(S);
-      Str.append("'");
-      P.printString(Str);
-    }
+    auto ES = IS->getStringForID(I);
+    if (!ES)
+      return ES.takeError();
+
+    if (ES->empty())
+      continue;
+    llvm::SmallString<32> Str;
+    Str.append("'");
+    Str.append(*ES);
+    Str.append("'");
+    P.printString(Str);
   }
   return Error::success();
 }
@@ -688,8 +691,11 @@ Error LLVMOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
     const auto &ST = *ExpectedST;
     for (const auto &E : Tpi->getHashAdjusters()) {
       DictScope DHA(P);
-      StringRef Name = ST.getStringForID(E.first);
-      P.printString("Type", Name);
+      auto Name = ST.getStringForID(E.first);
+      if (!Name)
+        return Name.takeError();
+
+      P.printString("Type", *Name);
       P.printHex("TI", E.second);
     }
   }
diff --git a/tools/llvm-pdbdump/YAMLOutputStyle.cpp b/tools/llvm-pdbdump/YAMLOutputStyle.cpp
index 807d7f8..b94b5a4 100644
--- a/tools/llvm-pdbdump/YAMLOutputStyle.cpp
+++ b/tools/llvm-pdbdump/YAMLOutputStyle.cpp
@@ -233,9 +233,12 @@ Error YAMLOutputStyle::dumpStringTable() {
 
   const auto &ST = ExpectedST.get();
   for (auto ID : ST.name_ids()) {
-    StringRef S = ST.getStringForID(ID);
-    if (!S.empty())
-      Obj.StringTable->push_back(S);
+    auto S = ST.getStringForID(ID);
+    if (!S)
+      return S.takeError();
+    if (S->empty())
+      continue;
+    Obj.StringTable->push_back(*S);
   }
   return Error::success();
 }
diff --git a/tools/llvm-pdbdump/llvm-pdbdump.cpp b/tools/llvm-pdbdump/llvm-pdbdump.cpp
index 642e169..4cdd876 100644
--- a/tools/llvm-pdbdump/llvm-pdbdump.cpp
+++ b/tools/llvm-pdbdump/llvm-pdbdump.cpp
@@ -47,9 +47,9 @@
 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
 #include "llvm/DebugInfo/PDB/Native/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
-#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h"
 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
 #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h"
 #include "llvm/DebugInfo/PDB/PDB.h"
@@ -424,21 +424,6 @@ cl::list<std::string> InputFilename(cl::Positional,
 
 static ExitOnError ExitOnErr;
 
-static uint32_t
-getFileChecksumOffset(StringRef FileName,
-                      ModuleDebugFileChecksumFragment &Checksums,
-                      StringTableBuilder &Strings) {
-  // The offset in the line info record is the offset of the checksum
-  // entry for the corresponding file.  That entry then contains an
-  // offset into the global string table of the file name.  So to
-  // compute the proper offset to write into the line info record, we
-  // must first get its offset in the global string table, then ask the
-  // checksum builder to find the offset in its serialized buffer that
-  // it mapped that filename string table offset to.
-  uint32_t StringOffset = Strings.insert(FileName);
-  return Checksums.mapChecksumOffset(StringOffset);
-}
-
 static void yamlToPdb(StringRef Path) {
   BumpPtrAllocator Allocator;
   ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorOrBuffer =
@@ -490,6 +475,8 @@ static void yamlToPdb(StringRef Path) {
   for (auto F : Info.Features)
     InfoBuilder.addFeature(F);
 
+  auto &Strings = Builder.getStringTableBuilder().getStrings();
+
   const auto &Dbi = YamlObj.DbiStream.getValueOr(DefaultDbiStream);
   auto &DbiBuilder = Builder.getDbiBuilder();
   DbiBuilder.setAge(Dbi.Age);
@@ -516,35 +503,24 @@ static void yamlToPdb(StringRef Path) {
       // File Checksums must be emitted before line information, because line
       // info records use offsets into the checksum buffer to reference a file's
       // source file name.
-      auto Checksums = llvm::make_unique<ModuleDebugFileChecksumFragment>();
+      auto Checksums =
+          llvm::make_unique<ModuleDebugFileChecksumFragment>(Strings);
       auto &ChecksumRef = *Checksums;
       if (!FLI.FileChecksums.empty()) {
-        auto &Strings = Builder.getStringTableBuilder();
-        for (auto &FC : FLI.FileChecksums) {
-          uint32_t STOffset = Strings.insert(FC.FileName);
-          Checksums->addChecksum(STOffset, FC.Kind, FC.ChecksumBytes.Bytes);
-        }
+        for (auto &FC : FLI.FileChecksums)
+          Checksums->addChecksum(FC.FileName, FC.Kind, FC.ChecksumBytes.Bytes);
       }
       ModiBuilder.setC13FileChecksums(std::move(Checksums));
 
-      // FIXME: StringTable / StringTableBuilder should really be in
-      // DebugInfoCodeView.  This would allow us to construct the
-      // ModuleDebugLineFragment with a reference to the string table,
-      // and we could just pass strings around rather than having to
-      // remember how to calculate the right offset.
-      auto &Strings = Builder.getStringTableBuilder();
-
       for (const auto &Fragment : FLI.LineFragments) {
-        auto Lines = llvm::make_unique<ModuleDebugLineFragment>();
+        auto Lines =
+            llvm::make_unique<ModuleDebugLineFragment>(ChecksumRef, Strings);
         Lines->setCodeSize(Fragment.CodeSize);
         Lines->setRelocationAddress(Fragment.RelocSegment,
                                     Fragment.RelocOffset);
         Lines->setFlags(Fragment.Flags);
         for (const auto &LC : Fragment.Blocks) {
-          uint32_t ChecksumOffset =
-              getFileChecksumOffset(LC.FileName, ChecksumRef, Strings);
-
-          Lines->createBlock(ChecksumOffset);
+          Lines->createBlock(LC.FileName);
           if (Lines->hasColumnInfo()) {
             for (const auto &Item : zip(LC.Lines, LC.Columns)) {
               auto &L = std::get<0>(Item);
@@ -567,18 +543,15 @@ static void yamlToPdb(StringRef Path) {
 
       for (const auto &Inlinee : FLI.Inlinees) {
         auto Inlinees = llvm::make_unique<ModuleDebugInlineeLineFragment>(
-            Inlinee.HasExtraFiles);
+            ChecksumRef, Inlinee.HasExtraFiles);
         for (const auto &Site : Inlinee.Sites) {
-          uint32_t FileOff =
-              getFileChecksumOffset(Site.FileName, ChecksumRef, Strings);
-
-          Inlinees->addInlineSite(Site.Inlinee, FileOff, Site.SourceLineNum);
+          Inlinees->addInlineSite(Site.Inlinee, Site.FileName,
+                                  Site.SourceLineNum);
           if (!Inlinee.HasExtraFiles)
             continue;
 
           for (auto EF : Site.ExtraFiles) {
-            FileOff = getFileChecksumOffset(EF, ChecksumRef, Strings);
-            Inlinees->addExtraFile(FileOff);
+            Inlinees->addExtraFile(EF);
           }
         }
         ModiBuilder.addC13Fragment(std::move(Inlinees));
diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp
index a7088c1..0438687 100644
--- a/tools/llvm-readobj/COFFDumper.cpp
+++ b/tools/llvm-readobj/COFFDumper.cpp
@@ -29,6 +29,7 @@
 #include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h"
 #include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h"
 #include "llvm/DebugInfo/CodeView/RecordSerialization.h"
+#include "llvm/DebugInfo/CodeView/StringTable.h"
 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
 #include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h"
 #include "llvm/DebugInfo/CodeView/SymbolDumper.h"
@@ -124,7 +125,7 @@ private:
                                   StringRef SectionContents, StringRef Block);
 
   /// Given a .debug$S section, find the string table and file checksum table.
-  void initializeFileAndStringTables(StringRef Data);
+  void initializeFileAndStringTables(BinaryStreamReader &Reader);
 
   void cacheRelocations();
 
@@ -145,8 +146,12 @@ private:
   const llvm::object::COFFObjectFile *Obj;
   bool RelocCached = false;
   RelocMapTy RelocMap;
-  StringRef CVFileChecksumTable;
-  StringRef CVStringTable;
+
+  BinaryByteStream ChecksumContents;
+  VarStreamArray<FileChecksumEntry> CVFileChecksumTable;
+
+  BinaryByteStream StringTableContents;
+  StringTableRef CVStringTable;
 
   ScopedPrinter &Writer;
   TypeDatabase TypeDB;
@@ -186,7 +191,7 @@ public:
     return CD.getFileNameForFileOffset(FileOffset);
   }
 
-  StringRef getStringTable() override { return CD.CVStringTable; }
+  StringTableRef getStringTable() override { return CD.CVStringTable; }
 
 private:
   COFFDumper &CD;
@@ -725,30 +730,35 @@ void COFFDumper::printCodeViewDebugInfo() {
   }
 }
 
-void COFFDumper::initializeFileAndStringTables(StringRef Data) {
-  while (!Data.empty() && (CVFileChecksumTable.data() == nullptr ||
-                           CVStringTable.data() == nullptr)) {
+void COFFDumper::initializeFileAndStringTables(BinaryStreamReader &Reader) {
+  while (Reader.bytesRemaining() > 0 &&
+         (!CVFileChecksumTable.valid() || !CVStringTable.valid())) {
     // The section consists of a number of subsection in the following format:
     // |SubSectionType|SubSectionSize|Contents...|
     uint32_t SubType, SubSectionSize;
-    error(consume(Data, SubType));
-    error(consume(Data, SubSectionSize));
-    if (SubSectionSize > Data.size())
-      return error(object_error::parse_failed);
+    error(Reader.readInteger(SubType));
+    error(Reader.readInteger(SubSectionSize));
+
+    StringRef Contents;
+    error(Reader.readFixedString(Contents, SubSectionSize));
+
     switch (ModuleDebugFragmentKind(SubType)) {
-    case ModuleDebugFragmentKind::FileChecksums:
-      CVFileChecksumTable = Data.substr(0, SubSectionSize);
-      break;
-    case ModuleDebugFragmentKind::StringTable:
-      CVStringTable = Data.substr(0, SubSectionSize);
+    case ModuleDebugFragmentKind::FileChecksums: {
+      ChecksumContents = BinaryByteStream(Contents, support::little);
+      BinaryStreamReader CSR(ChecksumContents);
+      error(CSR.readArray(CVFileChecksumTable, CSR.getLength()));
       break;
+    }
+    case ModuleDebugFragmentKind::StringTable: {
+      StringTableContents = BinaryByteStream(Contents, support::little);
+      error(CVStringTable.initialize(StringTableContents));
+    } break;
     default:
       break;
     }
+
     uint32_t PaddedSize = alignTo(SubSectionSize, 4);
-    if (PaddedSize > Data.size())
-      error(object_error::parse_failed);
-    Data = Data.drop_front(PaddedSize);
+    error(Reader.skip(PaddedSize - SubSectionSize));
   }
 }
 
@@ -771,7 +781,9 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
   if (Magic != COFF::DEBUG_SECTION_MAGIC)
     return error(object_error::parse_failed);
 
-  initializeFileAndStringTables(Data);
+  BinaryByteStream FileAndStrings(Data, support::little);
+  BinaryStreamReader FSReader(FileAndStrings);
+  initializeFileAndStringTables(FSReader);
 
   // TODO: Convert this over to using ModuleSubstreamVisitor.
   while (!Data.empty()) {
@@ -861,11 +873,7 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
         const FrameData *FD;
         error(SR.readObject(FD));
 
-        if (FD->FrameFunc >= CVStringTable.size())
-          error(object_error::parse_failed);
-
-        StringRef FrameFunc =
-            CVStringTable.drop_front(FD->FrameFunc).split('\0').first;
+        StringRef FrameFunc = error(CVStringTable.getString(FD->FrameFunc));
 
         DictScope S(W, "FrameData");
         W.printHex("RvaStart", FD->RvaStart);
@@ -971,10 +979,7 @@ void COFFDumper::printCodeViewFileChecksums(StringRef Subsection) {
   for (auto &FC : Checksums) {
     DictScope S(W, "FileChecksum");
 
-    if (FC.FileNameOffset >= CVStringTable.size())
-      error(object_error::parse_failed);
-    StringRef Filename =
-        CVStringTable.drop_front(FC.FileNameOffset).split('\0').first;
+    StringRef Filename = error(CVStringTable.getString(FC.FileNameOffset));
     W.printHex("Filename", Filename, FC.FileNameOffset);
     W.printHex("ChecksumSize", FC.Checksum.size());
     W.printEnum("ChecksumKind", uint8_t(FC.Kind),
@@ -1008,23 +1013,16 @@ void COFFDumper::printCodeViewInlineeLines(StringRef Subsection) {
 
 StringRef COFFDumper::getFileNameForFileOffset(uint32_t FileOffset) {
   // The file checksum subsection should precede all references to it.
-  if (!CVFileChecksumTable.data() || !CVStringTable.data())
-    error(object_error::parse_failed);
-  // Check if the file checksum table offset is valid.
-  if (FileOffset >= CVFileChecksumTable.size())
+  if (!CVFileChecksumTable.valid() || !CVStringTable.valid())
     error(object_error::parse_failed);
 
-  // The string table offset comes first before the file checksum.
-  StringRef Data = CVFileChecksumTable.drop_front(FileOffset);
-  uint32_t StringOffset;
-  error(consume(Data, StringOffset));
+  auto Iter = CVFileChecksumTable.at(FileOffset);
 
-  // Check if the string table offset is valid.
-  if (StringOffset >= CVStringTable.size())
+  // Check if the file checksum table offset is valid.
+  if (Iter == CVFileChecksumTable.end())
     error(object_error::parse_failed);
 
-  // Return the null-terminated string.
-  return CVStringTable.drop_front(StringOffset).split('\0').first;
+  return error(CVStringTable.getString(Iter->FileNameOffset));
 }
 
 void COFFDumper::printFileNameForOffset(StringRef Label, uint32_t FileOffset) {
diff --git a/tools/llvm-readobj/llvm-readobj.h b/tools/llvm-readobj/llvm-readobj.h
index 0156920..840ddba 100644
--- a/tools/llvm-readobj/llvm-readobj.h
+++ b/tools/llvm-readobj/llvm-readobj.h
@@ -25,6 +25,11 @@ namespace llvm {
   LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg);
   void error(std::error_code EC);
   void error(llvm::Error EC);
+  template <typename T> T error(llvm::Expected<T> &&E) {
+    error(E.takeError());
+    return std::move(*E);
+  }
+
   template <class T> T unwrapOrError(ErrorOr<T> EO) {
     if (EO)
       return *EO;
diff --git a/tools/llvm-shlib/CMakeLists.txt b/tools/llvm-shlib/CMakeLists.txt
index 2781586..3ebede0 100644
--- a/tools/llvm-shlib/CMakeLists.txt
+++ b/tools/llvm-shlib/CMakeLists.txt
@@ -50,6 +50,10 @@ endif()
 
 target_link_libraries(LLVM PRIVATE ${LIB_NAMES})
 
+if (LLVM_DYLIB_SYMBOL_VERSIONING)
+  set_property(TARGET LLVM APPEND_STRING PROPERTY LINK_FLAGS " -Wl,--default-symver")
+endif()
+
 if (APPLE)
   set_property(TARGET LLVM APPEND_STRING PROPERTY
               LINK_FLAGS
diff --git a/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp b/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
index 3f0e3da..a9d0d9e 100644
--- a/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
+++ b/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
@@ -10,6 +10,7 @@
 #include "DwarfGenerator.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Config/llvm-config.h"
@@ -18,8 +19,8 @@
 #include "llvm/DebugInfo/DWARF/DWARFDie.h"
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/Object/ObjectFile.h"
-#include "llvm/ObjectYAML/DWARFYAML.h"
 #include "llvm/ObjectYAML/DWARFEmitter.h"
+#include "llvm/ObjectYAML/DWARFYAML.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -1191,10 +1192,7 @@ TEST(DWARFDebugInfo, TestEmptyChildren) {
 
   auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata));
   ASSERT_TRUE((bool)ErrOrSections);
-
-  auto &DebugSections = *ErrOrSections;
-
-  DWARFContextInMemory DwarfContext(DebugSections, 8);
+  DWARFContextInMemory DwarfContext(*ErrOrSections, 8);
 
   // Verify the number of compile units is correct.
   uint32_t NumCUs = DwarfContext.getNumCompileUnits();
@@ -1667,6 +1665,13 @@ TEST(DWARFDebugInfo, TestImplicitConstAbbrevs) {
   EXPECT_EQ(DIEs.find(Val2)->second, AbbrevPtrVal2);
 }
 
+void VerifyError(DWARFContext &DwarfContext, StringRef Error) {
+  SmallString<1024> Str;
+  raw_svector_ostream Strm(Str);
+  EXPECT_FALSE(DwarfContext.verify(Strm, DIDT_All));
+  EXPECT_TRUE(Str.str().contains(Error));
+}
+
 TEST(DWARFDebugInfo, TestDwarfVerifyInvalidCURef) {
   // Create a single compile unit with a single function that has a DW_AT_type
   // that is CU relative. The CU offset is not valid becuase it is larger than
@@ -1711,17 +1716,10 @@ TEST(DWARFDebugInfo, TestDwarfVerifyInvalidCURef) {
   )";
   auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata));
   ASSERT_TRUE((bool)ErrOrSections);
-
-  auto &DebugSections = *ErrOrSections;
-
-  DWARFContextInMemory DwarfContext(DebugSections, 8);
-
-  std::string str;
-  raw_string_ostream strm(str);
-  EXPECT_FALSE(DwarfContext.verify(strm, DIDT_All));
-  const char *err = "error: DW_FORM_ref4 CU offset 0x00001234 is invalid "
-                    "(must be less than CU size of 0x0000001a):";
-  EXPECT_TRUE(strm.str().find(err) != std::string::npos);
+  DWARFContextInMemory DwarfContext(*ErrOrSections, 8);
+  VerifyError(DwarfContext, "error: DW_FORM_ref4 CU offset 0x00001234 is "
+                            "invalid (must be less than CU size of "
+                            "0x0000001a):");
 }
 
 TEST(DWARFDebugInfo, TestDwarfVerifyInvalidRefAddr) {
@@ -1766,17 +1764,9 @@ TEST(DWARFDebugInfo, TestDwarfVerifyInvalidRefAddr) {
   )";
   auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata));
   ASSERT_TRUE((bool)ErrOrSections);
-
-  auto &DebugSections = *ErrOrSections;
-
-  DWARFContextInMemory DwarfContext(DebugSections, 8);
-
-  std::string str;
-  raw_string_ostream strm(str);
-  EXPECT_FALSE(DwarfContext.verify(strm, DIDT_All));
-  strm.flush();
-  const char *err = "error: DW_FORM_ref_addr offset beyond .debug_info bounds:";
-  EXPECT_TRUE(strm.str().find(err) != std::string::npos);
+  DWARFContextInMemory DwarfContext(*ErrOrSections, 8);
+  VerifyError(DwarfContext,
+              "error: DW_FORM_ref_addr offset beyond .debug_info bounds:");
 }
 
 TEST(DWARFDebugInfo, TestDwarfVerifyInvalidRanges) {
@@ -1810,18 +1800,9 @@ TEST(DWARFDebugInfo, TestDwarfVerifyInvalidRanges) {
   )";
   auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata));
   ASSERT_TRUE((bool)ErrOrSections);
-
-  auto &DebugSections = *ErrOrSections;
-
-  DWARFContextInMemory DwarfContext(DebugSections, 8);
-
-  std::string str;
-  raw_string_ostream strm(str);
-  EXPECT_FALSE(DwarfContext.verify(strm, DIDT_All));
-  strm.flush();
-  const char *err = "error: DW_AT_ranges offset is beyond .debug_ranges "
-                    "bounds:";
-  EXPECT_TRUE(strm.str().find(err) != std::string::npos);
+  DWARFContextInMemory DwarfContext(*ErrOrSections, 8);
+  VerifyError(DwarfContext,
+              "error: DW_AT_ranges offset is beyond .debug_ranges bounds:");
 }
 
 TEST(DWARFDebugInfo, TestDwarfVerifyInvalidStmtList) {
@@ -1855,18 +1836,10 @@ TEST(DWARFDebugInfo, TestDwarfVerifyInvalidStmtList) {
   )";
   auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata));
   ASSERT_TRUE((bool)ErrOrSections);
-
-  auto &DebugSections = *ErrOrSections;
-
-  DWARFContextInMemory DwarfContext(DebugSections, 8);
-
-  std::string str;
-  raw_string_ostream strm(str);
-  EXPECT_FALSE(DwarfContext.verify(strm, DIDT_All));
-  strm.flush();
-  const char *err = "error: DW_AT_stmt_list offset is beyond .debug_line "
-                    "bounds: 0x00001000";
-  EXPECT_TRUE(strm.str().find(err) != std::string::npos);
+  DWARFContextInMemory DwarfContext(*ErrOrSections, 8);
+  VerifyError(
+      DwarfContext,
+      "error: DW_AT_stmt_list offset is beyond .debug_line bounds: 0x00001000");
 }
 
 TEST(DWARFDebugInfo, TestDwarfVerifyInvalidStrp) {
@@ -1895,17 +1868,278 @@ TEST(DWARFDebugInfo, TestDwarfVerifyInvalidStrp) {
   )";
   auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata));
   ASSERT_TRUE((bool)ErrOrSections);
+  DWARFContextInMemory DwarfContext(*ErrOrSections, 8);
+  VerifyError(DwarfContext,
+              "error: DW_FORM_strp offset beyond .debug_str bounds:");
+}
+
+TEST(DWARFDebugInfo, TestDwarfVerifyInvalidRefAddrBetween) {
+  // Create a single compile unit with a single function that has a DW_AT_type
+  // with a valid .debug_info offset, but the offset is between two DIEs.
+  const char *yamldata = R"(
+    debug_str:
+      - ''
+      - /tmp/main.c
+      - main
+    debug_abbrev:
+      - Code:            0x00000001
+        Tag:             DW_TAG_compile_unit
+        Children:        DW_CHILDREN_yes
+        Attributes:
+          - Attribute:       DW_AT_name
+            Form:            DW_FORM_strp
+      - Code:            0x00000002
+        Tag:             DW_TAG_subprogram
+        Children:        DW_CHILDREN_no
+        Attributes:
+          - Attribute:       DW_AT_name
+            Form:            DW_FORM_strp
+          - Attribute:       DW_AT_type
+            Form:            DW_FORM_ref_addr
+    debug_info:
+      - Length:
+          TotalLength:     22
+        Version:         4
+        AbbrOffset:      0
+        AddrSize:        8
+        Entries:
+          - AbbrCode:        0x00000001
+            Values:
+              - Value:           0x0000000000000001
+          - AbbrCode:        0x00000002
+            Values:
+              - Value:           0x000000000000000D
+              - Value:           0x0000000000000011
+          - AbbrCode:        0x00000000
+            Values:
+  )";
+  auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata));
+  ASSERT_TRUE((bool)ErrOrSections);
+  DWARFContextInMemory DwarfContext(*ErrOrSections, 8);
+  VerifyError(
+      DwarfContext,
+      "error: invalid DIE reference 0x00000011. Offset is in between DIEs:");
+}
 
-  auto &DebugSections = *ErrOrSections;
+TEST(DWARFDebugInfo, TestDwarfVerifyInvalidLineSequence) {
+  // Create a single compile unit whose line table has a sequence in it where
+  // the address decreases.
+  StringRef yamldata = R"(
+    debug_str:
+      - ''
+      - /tmp/main.c
+    debug_abbrev:
+      - Code:            0x00000001
+        Tag:             DW_TAG_compile_unit
+        Children:        DW_CHILDREN_no
+        Attributes:
+          - Attribute:       DW_AT_name
+            Form:            DW_FORM_strp
+          - Attribute:       DW_AT_stmt_list
+            Form:            DW_FORM_sec_offset
+    debug_info:
+      - Length:
+          TotalLength:     16
+        Version:         4
+        AbbrOffset:      0
+        AddrSize:        8
+        Entries:
+          - AbbrCode:        0x00000001
+            Values:
+              - Value:           0x0000000000000001
+              - Value:           0x0000000000000000
+    debug_line:
+      - Length:
+          TotalLength:     68
+        Version:         2
+        PrologueLength:  34
+        MinInstLength:   1
+        DefaultIsStmt:   1
+        LineBase:        251
+        LineRange:       14
+        OpcodeBase:      13
+        StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ]
+        IncludeDirs:
+          - /tmp
+        Files:
+          - Name:            main.c
+            DirIdx:          1
+            ModTime:         0
+            Length:          0
+        Opcodes:
+          - Opcode:          DW_LNS_extended_op
+            ExtLen:          9
+            SubOpcode:       DW_LNE_set_address
+            Data:            4112
+          - Opcode:          DW_LNS_advance_line
+            SData:           9
+            Data:            4112
+          - Opcode:          DW_LNS_copy
+            Data:            4112
+          - Opcode:          DW_LNS_advance_pc
+            Data:            18446744073709551600
+          - Opcode:          DW_LNS_extended_op
+            ExtLen:          1
+            SubOpcode:       DW_LNE_end_sequence
+            Data:            18446744073709551600
+  )";
+  auto ErrOrSections = DWARFYAML::EmitDebugSections(yamldata);
+  ASSERT_TRUE((bool)ErrOrSections);
+  DWARFContextInMemory DwarfContext(*ErrOrSections, 8);
+  VerifyError(DwarfContext, "error: .debug_line[0x00000000] row[1] decreases "
+                            "in address from previous row:");
+}
 
-  DWARFContextInMemory DwarfContext(DebugSections, 8);
+TEST(DWARFDebugInfo, TestDwarfVerifyInvalidLineFileIndex) {
+  // Create a single compile unit whose line table has a line table row with
+  // an invalid file index.
+  StringRef yamldata = R"(
+    debug_str:
+      - ''
+      - /tmp/main.c
+    debug_abbrev:
+      - Code:            0x00000001
+        Tag:             DW_TAG_compile_unit
+        Children:        DW_CHILDREN_no
+        Attributes:
+          - Attribute:       DW_AT_name
+            Form:            DW_FORM_strp
+          - Attribute:       DW_AT_stmt_list
+            Form:            DW_FORM_sec_offset
+    debug_info:
+      - Length:
+          TotalLength:     16
+        Version:         4
+        AbbrOffset:      0
+        AddrSize:        8
+        Entries:
+          - AbbrCode:        0x00000001
+            Values:
+              - Value:           0x0000000000000001
+              - Value:           0x0000000000000000
+    debug_line:
+      - Length:
+          TotalLength:     61
+        Version:         2
+        PrologueLength:  34
+        MinInstLength:   1
+        DefaultIsStmt:   1
+        LineBase:        251
+        LineRange:       14
+        OpcodeBase:      13
+        StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ]
+        IncludeDirs:
+          - /tmp
+        Files:
+          - Name:            main.c
+            DirIdx:          1
+            ModTime:         0
+            Length:          0
+        Opcodes:
+          - Opcode:          DW_LNS_extended_op
+            ExtLen:          9
+            SubOpcode:       DW_LNE_set_address
+            Data:            4096
+          - Opcode:          DW_LNS_advance_line
+            SData:           9
+            Data:            4096
+          - Opcode:          DW_LNS_copy
+            Data:            4096
+          - Opcode:          DW_LNS_advance_pc
+            Data:            16
+          - Opcode:          DW_LNS_set_file
+            Data:            5
+          - Opcode:          DW_LNS_extended_op
+            ExtLen:          1
+            SubOpcode:       DW_LNE_end_sequence
+            Data:            5
+  )";
+  auto ErrOrSections = DWARFYAML::EmitDebugSections(yamldata);
+  ASSERT_TRUE((bool)ErrOrSections);
+  DWARFContextInMemory DwarfContext(*ErrOrSections, 8);
+  VerifyError(DwarfContext, "error: .debug_line[0x00000000][1] has invalid "
+                            "file index 5 (valid values are [1,1]):");
+}
 
-  std::string str;
-  raw_string_ostream strm(str);
-  EXPECT_FALSE(DwarfContext.verify(strm, DIDT_All));
-  strm.flush();
-  const char *err = "error: DW_FORM_strp offset beyond .debug_str bounds:";
-  EXPECT_TRUE(strm.str().find(err) != std::string::npos);
+TEST(DWARFDebugInfo, TestDwarfVerifyCUDontShareLineTable) {
+  // Create a two compile units where both compile units share the same
+  // DW_AT_stmt_list value and verify we report the error correctly.
+  StringRef yamldata = R"(
+    debug_str:
+      - ''
+      - /tmp/main.c
+      - /tmp/foo.c
+    debug_abbrev:    
+      - Code:            0x00000001
+        Tag:             DW_TAG_compile_unit
+        Children:        DW_CHILDREN_no
+        Attributes:      
+          - Attribute:       DW_AT_name
+            Form:            DW_FORM_strp
+          - Attribute:       DW_AT_stmt_list
+            Form:            DW_FORM_sec_offset
+    debug_info:      
+      - Length:          
+          TotalLength:     16
+        Version:         4
+        AbbrOffset:      0
+        AddrSize:        8
+        Entries:         
+          - AbbrCode:        0x00000001
+            Values:          
+              - Value:           0x0000000000000001
+              - Value:           0x0000000000000000
+      - Length:          
+          TotalLength:     16
+        Version:         4
+        AbbrOffset:      0
+        AddrSize:        8
+        Entries:         
+          - AbbrCode:        0x00000001
+            Values:          
+              - Value:           0x000000000000000D
+              - Value:           0x0000000000000000
+    debug_line:      
+      - Length:          
+          TotalLength:     60
+        Version:         2
+        PrologueLength:  34
+        MinInstLength:   1
+        DefaultIsStmt:   1
+        LineBase:        251
+        LineRange:       14
+        OpcodeBase:      13
+        StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ]
+        IncludeDirs:     
+          - /tmp
+        Files:           
+          - Name:            main.c
+            DirIdx:          1
+            ModTime:         0
+            Length:          0
+        Opcodes:         
+          - Opcode:          DW_LNS_extended_op
+            ExtLen:          9
+            SubOpcode:       DW_LNE_set_address
+            Data:            4096
+          - Opcode:          DW_LNS_advance_line
+            SData:           9
+            Data:            4096
+          - Opcode:          DW_LNS_copy
+            Data:            4096
+          - Opcode:          DW_LNS_advance_pc
+            Data:            256
+          - Opcode:          DW_LNS_extended_op
+            ExtLen:          1
+            SubOpcode:       DW_LNE_end_sequence
+            Data:            256
+  )";
+  auto ErrOrSections = DWARFYAML::EmitDebugSections(yamldata);
+  ASSERT_TRUE((bool)ErrOrSections);
+  DWARFContextInMemory DwarfContext(*ErrOrSections, 8);
+  VerifyError(DwarfContext, "error: two compile unit DIEs, 0x0000000b and "
+                            "0x0000001f, have the same DW_AT_stmt_list section "
+                            "offset:");
 }
 
 } // end anonymous namespace
diff --git a/unittests/DebugInfo/PDB/ErrorChecking.h b/unittests/DebugInfo/PDB/ErrorChecking.h
index 6d4a7de..f284bfd 100644
--- a/unittests/DebugInfo/PDB/ErrorChecking.h
+++ b/unittests/DebugInfo/PDB/ErrorChecking.h
@@ -36,6 +36,18 @@
     }                                                                          \
   }
 
+#define EXPECT_EXPECTED_EQ(Val, Exp)                                           \
+  {                                                                            \
+    auto Result = Exp;                                                         \
+    auto E = Result.takeError();                                               \
+    EXPECT_FALSE(static_cast<bool>(E));                                        \
+    if (E) {                                                                   \
+      consumeError(std::move(E));                                              \
+      return;                                                                  \
+    }                                                                          \
+    EXPECT_EQ(Val, *Result);                                                   \
+  }
+
 #define EXPECT_UNEXPECTED(Exp)                                                 \
   {                                                                            \
     auto E = Exp.takeError();                                                  \
diff --git a/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp b/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp
index 7c48387..249bc4a 100644
--- a/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp
+++ b/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp
@@ -9,8 +9,8 @@
 
 #include "ErrorChecking.h"
 
-#include "llvm/DebugInfo/PDB/Native/StringTable.h"
-#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
 #include "llvm/Support/BinaryByteStream.h"
 #include "llvm/Support/BinaryStreamReader.h"
 #include "llvm/Support/BinaryStreamWriter.h"
@@ -27,13 +27,13 @@ class StringTableBuilderTest : public ::testing::Test {};
 
 TEST_F(StringTableBuilderTest, Simple) {
   // Create /names table contents.
-  StringTableBuilder Builder;
+  PDBStringTableBuilder Builder;
   EXPECT_EQ(1U, Builder.insert("foo"));
   EXPECT_EQ(5U, Builder.insert("bar"));
   EXPECT_EQ(1U, Builder.insert("foo"));
   EXPECT_EQ(9U, Builder.insert("baz"));
 
-  std::vector<uint8_t> Buffer(Builder.finalize());
+  std::vector<uint8_t> Buffer(Builder.calculateSerializedSize());
   MutableBinaryByteStream OutStream(Buffer, little);
   BinaryStreamWriter Writer(OutStream);
   EXPECT_NO_ERROR(Builder.commit(Writer));
@@ -41,15 +41,16 @@ TEST_F(StringTableBuilderTest, Simple) {
   // Reads the contents back.
   BinaryByteStream InStream(Buffer, little);
   BinaryStreamReader Reader(InStream);
-  StringTable Table;
-  EXPECT_NO_ERROR(Table.load(Reader));
+  PDBStringTable Table;
+  EXPECT_NO_ERROR(Table.reload(Reader));
 
   EXPECT_EQ(3U, Table.getNameCount());
   EXPECT_EQ(1U, Table.getHashVersion());
-  EXPECT_EQ("foo", Table.getStringForID(1));
-  EXPECT_EQ("bar", Table.getStringForID(5));
-  EXPECT_EQ("baz", Table.getStringForID(9));
-  EXPECT_EQ(1U, Table.getIDForString("foo"));
-  EXPECT_EQ(5U, Table.getIDForString("bar"));
-  EXPECT_EQ(9U, Table.getIDForString("baz"));
+
+  EXPECT_EXPECTED_EQ("foo", Table.getStringForID(1));
+  EXPECT_EXPECTED_EQ("bar", Table.getStringForID(5));
+  EXPECT_EXPECTED_EQ("baz", Table.getStringForID(9));
+  EXPECT_EXPECTED_EQ(1U, Table.getIDForString("foo"));
+  EXPECT_EXPECTED_EQ(5U, Table.getIDForString("bar"));
+  EXPECT_EXPECTED_EQ(9U, Table.getIDForString("baz"));
 }
diff --git a/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp b/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp
index 4af3aa7..ab43c4a 100644
--- a/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp
+++ b/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp
@@ -20,17 +20,15 @@ TEST(IndirectionUtilsTest, MakeStub) {
   LLVMContext Context;
   ModuleBuilder MB(Context, "x86_64-apple-macosx10.10", "");
   Function *F = MB.createFunctionDecl<void(DummyStruct, DummyStruct)>("");
-  SmallVector<AttributeList, 4> Attrs;
-  Attrs.push_back(
-      AttributeList::get(MB.getModule()->getContext(), 1U,
-                         AttrBuilder().addAttribute(Attribute::StructRet)));
-  Attrs.push_back(
-      AttributeList::get(MB.getModule()->getContext(), 2U,
-                         AttrBuilder().addAttribute(Attribute::ByVal)));
-  Attrs.push_back(
-      AttributeList::get(MB.getModule()->getContext(), ~0U,
-                         AttrBuilder().addAttribute(Attribute::NoUnwind)));
-  F->setAttributes(AttributeList::get(MB.getModule()->getContext(), Attrs));
+  AttributeSet FnAttrs = AttributeSet::get(
+      Context, AttrBuilder().addAttribute(Attribute::NoUnwind));
+  AttributeSet RetAttrs; // None
+  AttributeSet ArgAttrs[2] = {
+      AttributeSet::get(Context,
+                        AttrBuilder().addAttribute(Attribute::StructRet)),
+      AttributeSet::get(Context, AttrBuilder().addAttribute(Attribute::ByVal)),
+  };
+  F->setAttributes(AttributeList::get(Context, FnAttrs, RetAttrs, ArgAttrs));
 
   auto ImplPtr = orc::createImplPointer(*F->getType(), *MB.getModule(), "", nullptr);
   orc::makeStub(*F, *ImplPtr);
diff --git a/unittests/IR/AttributesTest.cpp b/unittests/IR/AttributesTest.cpp
index 7c3df2e..0df7a84 100644
--- a/unittests/IR/AttributesTest.cpp
+++ b/unittests/IR/AttributesTest.cpp
@@ -45,7 +45,7 @@ TEST(Attributes, Ordering) {
                          AttributeList::get(C, 1, Attribute::SExt)};
 
   AttributeList SetA = AttributeList::get(C, ASs);
-  AttributeList SetB = SetA.removeAttributes(C, 1, ASs[1]);
+  AttributeList SetB = SetA.removeAttributes(C, 1, ASs[1].getAttributes(1));
   EXPECT_NE(SetA, SetB);
 }
 
diff --git a/unittests/Support/BinaryStreamTest.cpp b/unittests/Support/BinaryStreamTest.cpp
index 74c51e3..41567da 100644
--- a/unittests/Support/BinaryStreamTest.cpp
+++ b/unittests/Support/BinaryStreamTest.cpp
@@ -358,14 +358,14 @@ TEST_F(BinaryStreamTest, VarStreamArray) {
 
   struct StringExtractor {
   public:
-    typedef uint32_t ContextType;
+    typedef uint32_t &ContextType;
     static Error extract(BinaryStreamRef Stream, uint32_t &Len, StringRef &Item,
-                         uint32_t *Index) {
-      if (*Index == 0)
+                         uint32_t &Index) {
+      if (Index == 0)
         Len = strlen("1. Test");
-      else if (*Index == 1)
+      else if (Index == 1)
         Len = strlen("2. Longer Test");
-      else if (*Index == 2)
+      else if (Index == 2)
         Len = strlen("3. Really Long Test");
       else
         Len = strlen("4. Super Extra Longest Test Of All");
@@ -374,14 +374,14 @@ TEST_F(BinaryStreamTest, VarStreamArray) {
         return EC;
       Item =
           StringRef(reinterpret_cast<const char *>(Bytes.data()), Bytes.size());
-      ++(*Index);
+      ++Index;
       return Error::success();
     }
   };
 
   for (auto &Stream : Streams) {
     uint32_t Context = 0;
-    VarStreamArray<StringRef, StringExtractor> Array(*Stream.Input, &Context);
+    VarStreamArray<StringRef, StringExtractor> Array(*Stream.Input, Context);
     auto Iter = Array.begin();
     ASSERT_EQ("1. Test", *Iter++);
     ASSERT_EQ("2. Longer Test", *Iter++);
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 1fc18a5..caa52d2 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -211,13 +211,12 @@ enum IIT_Info {
   IIT_SAME_VEC_WIDTH_ARG = 31,
   IIT_PTR_TO_ARG = 32,
   IIT_PTR_TO_ELT = 33,
-  IIT_VEC_OF_PTRS_TO_ELT = 34,
+  IIT_VEC_OF_ANYPTRS_TO_ELT = 34,
   IIT_I128 = 35,
   IIT_V512 = 36,
   IIT_V1024 = 37
 };
 
-
 static void EncodeFixedValueType(MVT::SimpleValueType VT,
                                  std::vector<unsigned char> &Sig) {
   if (MVT(VT).isInteger()) {
@@ -273,9 +272,16 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
     }
     else if (R->isSubClassOf("LLVMPointerTo"))
       Sig.push_back(IIT_PTR_TO_ARG);
-    else if (R->isSubClassOf("LLVMVectorOfPointersToElt"))
-      Sig.push_back(IIT_VEC_OF_PTRS_TO_ELT);
-    else if (R->isSubClassOf("LLVMPointerToElt"))
+    else if (R->isSubClassOf("LLVMVectorOfAnyPointersToElt")) {
+      Sig.push_back(IIT_VEC_OF_ANYPTRS_TO_ELT);
+      unsigned ArgNo = ArgCodes.size();
+      ArgCodes.push_back(3 /*vAny*/);
+      // Encode overloaded ArgNo
+      Sig.push_back(ArgNo);
+      // Encode LLVMMatchType<Number> ArgNo
+      Sig.push_back(Number);
+      return;
+    } else if (R->isSubClassOf("LLVMPointerToElt"))
       Sig.push_back(IIT_PTR_TO_ELT);
     else
       Sig.push_back(IIT_ARG);
@@ -557,8 +563,9 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
     if (ae) {
       while (ai != ae) {
         unsigned argNo = intrinsic.ArgumentAttributes[ai].first;
+        unsigned attrIdx = argNo + 1; // Must match AttributeList::FirstArgIndex
 
-        OS <<  "      const Attribute::AttrKind AttrParam" << argNo + 1 <<"[]= {";
+        OS << "      const Attribute::AttrKind AttrParam" << attrIdx << "[]= {";
         bool addComma = false;
 
         do {
@@ -599,7 +606,7 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
         } while (ai != ae && intrinsic.ArgumentAttributes[ai].first == argNo);
         OS << "};\n";
         OS << "      AS[" << numAttrs++ << "] = AttributeList::get(C, "
-           << argNo + 1 << ", AttrParam" << argNo + 1 << ");\n";
+           << attrIdx << ", AttrParam" << attrIdx << ");\n";
       }
     }
 
diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py
index 689a2d5..10cd777 100755
--- a/utils/lit/lit/main.py
+++ b/utils/lit/lit/main.py
@@ -161,7 +161,11 @@ def main(builtinParameters = {}):
         main_with_tmp(builtinParameters)
     finally:
         if lit_tmp:
-            shutil.rmtree(lit_tmp)
+            try:
+                shutil.rmtree(lit_tmp)
+            except:
+                # FIXME: Re-try after timeout on Windows.
+                pass
 
 def main_with_tmp(builtinParameters):
     parser = argparse.ArgumentParser()
diff --git a/utils/lit/lit/run.py b/utils/lit/lit/run.py
index 14d8ec9..27c7a9e 100644
--- a/utils/lit/lit/run.py
+++ b/utils/lit/lit/run.py
@@ -20,6 +20,14 @@ except ImportError:
 
 import lit.Test
 
+def abort_now():
+    """Abort the current process without doing any exception teardown"""
+    sys.stdout.flush()
+    if win32api:
+        win32api.TerminateProcess(win32api.GetCurrentProcess(), 3)
+    else:
+        os.kill(0, 9)
+
 ###
 # Test Execution Implementation
 
@@ -91,8 +99,7 @@ class Tester(object):
             # This is a sad hack. Unfortunately subprocess goes
             # bonkers with ctrl-c and we start forking merrily.
             print('\nCtrl-C detected, goodbye.')
-            sys.stdout.flush()
-            os.kill(0,9)
+            abort_now()
         self.consumer.update(test_index, test)
 
 class ThreadResultsConsumer(object):
@@ -353,7 +360,7 @@ class Run(object):
                 print('\nCtrl-C detected, terminating.')
                 pool.terminate()
                 pool.join()
-                os.kill(0,9)
+                abort_now()
                 return True
             win32api.SetConsoleCtrlHandler(console_ctrl_handler, True)
 
@@ -368,6 +375,10 @@ class Run(object):
             deadline = time.time() + max_time
 
         # Start a process pool. Copy over the data shared between all test runs.
+        # FIXME: Find a way to capture the worker process stderr. If the user
+        # interrupts the workers before we make it into our task callback, they
+        # will each raise a KeyboardInterrupt exception and print to stderr at
+        # the same time.
         pool = multiprocessing.Pool(jobs, worker_initializer,
                                     (self.lit_config,
                                      self.parallelism_semaphores))
@@ -379,6 +390,7 @@ class Run(object):
                                               args=(test_index, test),
                                               callback=self.consume_test_result)
                              for test_index, test in enumerate(self.tests)]
+            pool.close()
 
             # Wait for all results to come in. The callback that runs in the
             # parent process will update the display.
@@ -395,10 +407,12 @@ class Run(object):
                     a.get() # Exceptions raised here come from the worker.
                 if self.hit_max_failures:
                     break
-        finally:
+        except:
             # Stop the workers and wait for any straggling results to come in
             # if we exited without waiting on every async result.
             pool.terminate()
+            raise
+        finally:
             pool.join()
 
         # Mark any tests that weren't run as UNRESOLVED.
@@ -463,11 +477,7 @@ def worker_run_one_test(test_index, test):
         execute_test(test, child_lit_config, child_parallelism_semaphores)
         return (test_index, test)
     except KeyboardInterrupt as e:
-        # This is a sad hack. Unfortunately subprocess goes
-        # bonkers with ctrl-c and we start forking merrily.
-        print('\nCtrl-C detected, goodbye.')
-        traceback.print_exc()
-        sys.stdout.flush()
-        os.kill(0,9)
+        # If a worker process gets an interrupt, abort it immediately.
+        abort_now()
     except:
         traceback.print_exc()