system/xen: Updated for version 4.12.1.

Signed-off-by: Mario Preksavec <mario@slackware.hr>
author: Mario Preksavec <mario@slackware.hr> 2019-11-19 13:17:56 +0100
committer: Willy Sudiarto Raharjo <willysr@slackbuilds.org> 2019-11-23 16:02:01 +0700
commit: 903c02712d4cf39ae8218eb47149258dfa8c7d8a (patch)
tree: 61c869ed4904270e41898b9bcf74090822e28418 /system/xen
parent: 604be6a3da8dc95e2d89a426877c7f4021eb91df (diff)
34 files changed, 4956 insertions, 18 deletions
diff --git a/system/xen/dom0/README.dom0 b/system/xen/dom0/README.dom0
index a624587c3d717..728e3bae79bdf 100644
--- a/system/xen/dom0/README.dom0
+++ b/system/xen/dom0/README.dom0
@@ -46,7 +46,7 @@ Xen EFI binary.
 
 To make things a bit easier, a copy of Xen EFI binary can be found here:
 
-  http://slackware.hr/~mario/xen/xen-4.12.0.efi.gz
+  http://slackware.hr/~mario/xen/xen-4.12.1.efi.gz
 
 If an automatic boot to Xen kernel is desired, the binary should be renamed and
 copied to the following location: /boot/efi/EFI/BOOT/bootx64.efi
diff --git a/system/xen/dom0/config-4.4.172-xen.i686 b/system/xen/dom0/config-4.4.202-xen.i686
index 0cf848c1c95dd..b31c558e377e3 100644
--- a/system/xen/dom0/config-4.4.172-xen.i686
+++ b/system/xen/dom0/config-4.4.202-xen.i686
@@ -1,6 +1,6 @@
 #
 # Automatically generated file; DO NOT EDIT.
-# Linux/x86 4.4.172 Kernel Configuration
+# Linux/x86 4.4.202 Kernel Configuration
 #
 # CONFIG_64BIT is not set
 CONFIG_X86_32=y
@@ -545,6 +545,9 @@ CONFIG_ARCH_USES_PG_UNCACHED=y
 CONFIG_ARCH_RANDOM=y
 CONFIG_X86_SMAP=y
 # CONFIG_X86_INTEL_MPX is not set
+CONFIG_X86_INTEL_TSX_MODE_OFF=y
+# CONFIG_X86_INTEL_TSX_MODE_ON is not set
+# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
 CONFIG_EFI=y
 # CONFIG_EFI_STUB is not set
 CONFIG_SECCOMP=y
@@ -3473,6 +3476,7 @@ CONFIG_N_HDLC=m
 CONFIG_N_GSM=m
 CONFIG_TRACE_ROUTER=m
 CONFIG_TRACE_SINK=m
+CONFIG_LDISC_AUTOLOAD=y
 CONFIG_DEVMEM=y
 CONFIG_DEVKMEM=y
 
@@ -3553,7 +3557,6 @@ CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_HW_RANDOM_TPM=m
 CONFIG_NVRAM=m
 CONFIG_DTLK=m
-CONFIG_R3964=m
 CONFIG_APPLICOM=m
 CONFIG_SONYPI=m
 
@@ -5666,7 +5669,6 @@ CONFIG_USB_EMI62=m
 CONFIG_USB_EMI26=m
 CONFIG_USB_ADUTUX=m
 CONFIG_USB_SEVSEG=m
-CONFIG_USB_RIO500=m
 CONFIG_USB_LEGOTOWER=m
 CONFIG_USB_LCD=m
 CONFIG_USB_LED=m
@@ -6669,7 +6671,7 @@ CONFIG_FSNOTIFY=y
 CONFIG_DNOTIFY=y
 CONFIG_INOTIFY_USER=y
 CONFIG_FANOTIFY=y
-# CONFIG_FANOTIFY_ACCESS_PERMISSIONS is not set
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
@@ -7081,6 +7083,7 @@ CONFIG_TEST_HEXDUMP=m
 # CONFIG_TEST_KSTRTOX is not set
 # CONFIG_TEST_PRINTF is not set
 # CONFIG_TEST_RHASHTABLE is not set
+# CONFIG_TEST_HASH is not set
 # CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
 # CONFIG_DMA_API_DEBUG is not set
 CONFIG_TEST_LKM=m
@@ -7226,7 +7229,7 @@ CONFIG_CRYPTO_VMAC=m
 # Digest
 #
 CONFIG_CRYPTO_CRC32C=y
-CONFIG_CRYPTO_CRC32C_INTEL=m
+CONFIG_CRYPTO_CRC32C_INTEL=y
 CONFIG_CRYPTO_CRC32=m
 CONFIG_CRYPTO_CRC32_PCLMUL=m
 CONFIG_CRYPTO_CRCT10DIF=y
diff --git a/system/xen/dom0/config-4.4.172-xen.x86_64 b/system/xen/dom0/config-4.4.202-xen.x86_64
index 9dcff9e02418d..f81a31b9ab97d 100644
--- a/system/xen/dom0/config-4.4.172-xen.x86_64
+++ b/system/xen/dom0/config-4.4.202-xen.x86_64
@@ -1,6 +1,6 @@
 #
 # Automatically generated file; DO NOT EDIT.
-# Linux/x86 4.4.172 Kernel Configuration
+# Linux/x86 4.4.202 Kernel Configuration
 #
 CONFIG_64BIT=y
 CONFIG_X86_64=y
@@ -525,6 +525,9 @@ CONFIG_ARCH_USES_PG_UNCACHED=y
 CONFIG_ARCH_RANDOM=y
 CONFIG_X86_SMAP=y
 # CONFIG_X86_INTEL_MPX is not set
+CONFIG_X86_INTEL_TSX_MODE_OFF=y
+# CONFIG_X86_INTEL_TSX_MODE_ON is not set
+# CONFIG_X86_INTEL_TSX_MODE_AUTO is not set
 CONFIG_EFI=y
 CONFIG_EFI_STUB=y
 CONFIG_EFI_MIXED=y
@@ -3378,6 +3381,7 @@ CONFIG_N_HDLC=m
 CONFIG_N_GSM=m
 CONFIG_TRACE_ROUTER=m
 CONFIG_TRACE_SINK=m
+CONFIG_LDISC_AUTOLOAD=y
 CONFIG_DEVMEM=y
 CONFIG_DEVKMEM=y
 
@@ -3449,7 +3453,6 @@ CONFIG_HW_RANDOM_VIA=m
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_HW_RANDOM_TPM=m
 CONFIG_NVRAM=m
-CONFIG_R3964=m
 CONFIG_APPLICOM=m
 
 #
@@ -5468,7 +5471,6 @@ CONFIG_USB_EMI62=m
 CONFIG_USB_EMI26=m
 CONFIG_USB_ADUTUX=m
 CONFIG_USB_SEVSEG=m
-CONFIG_USB_RIO500=m
 CONFIG_USB_LEGOTOWER=m
 CONFIG_USB_LCD=m
 CONFIG_USB_LED=m
@@ -6477,7 +6479,7 @@ CONFIG_FSNOTIFY=y
 CONFIG_DNOTIFY=y
 CONFIG_INOTIFY_USER=y
 CONFIG_FANOTIFY=y
-# CONFIG_FANOTIFY_ACCESS_PERMISSIONS is not set
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
@@ -6892,6 +6894,7 @@ CONFIG_TEST_HEXDUMP=m
 # CONFIG_TEST_KSTRTOX is not set
 # CONFIG_TEST_PRINTF is not set
 # CONFIG_TEST_RHASHTABLE is not set
+# CONFIG_TEST_HASH is not set
 # CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
 # CONFIG_DMA_API_DEBUG is not set
 CONFIG_TEST_LKM=m
@@ -7040,7 +7043,7 @@ CONFIG_CRYPTO_VMAC=m
 # Digest
 #
 CONFIG_CRYPTO_CRC32C=y
-CONFIG_CRYPTO_CRC32C_INTEL=m
+CONFIG_CRYPTO_CRC32C_INTEL=y
 CONFIG_CRYPTO_CRC32=m
 CONFIG_CRYPTO_CRC32_PCLMUL=m
 CONFIG_CRYPTO_CRCT10DIF=y
diff --git a/system/xen/dom0/kernel-xen.sh b/system/xen/dom0/kernel-xen.sh
index 7067c04b79949..7228dcc541470 100644
--- a/system/xen/dom0/kernel-xen.sh
+++ b/system/xen/dom0/kernel-xen.sh
@@ -5,8 +5,8 @@
 # Written by Chris Abela <chris.abela@maltats.com>, 20100515
 # Modified by Mario Preksavec <mario@slackware.hr>
 
-KERNEL=${KERNEL:-4.4.172}
-XEN=${XEN:-4.12.0}
+KERNEL=${KERNEL:-4.4.202}
+XEN=${XEN:-4.12.1}
 BOOTLOADER=${BOOTLOADER:-lilo}
 
 ROOTMOD=${ROOTMOD:-ext4}
diff --git a/system/xen/domU/domU.sh b/system/xen/domU/domU.sh
index c471242392e6c..f017c1bd23737 100644
--- a/system/xen/domU/domU.sh
+++ b/system/xen/domU/domU.sh
@@ -7,7 +7,7 @@
 
 set -e
 
-KERNEL=${KERNEL:-4.4.172}
+KERNEL=${KERNEL:-4.4.202}
 
 # Build an image for the root file system and another for the swap
 # Default values : 8GB and 500MB resepectively.
diff --git a/system/xen/xen.SlackBuild b/system/xen/xen.SlackBuild
index 506d100a4c1fb..52bec8230e342 100644
--- a/system/xen/xen.SlackBuild
+++ b/system/xen/xen.SlackBuild
@@ -23,7 +23,7 @@
 #  ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 PRGNAM=xen
-VERSION=${VERSION:-4.12.0}
+VERSION=${VERSION:-4.12.1}
 BUILD=${BUILD:-1}
 TAG=${TAG:-_SBo}
 
diff --git a/system/xen/xen.info b/system/xen/xen.info
index ae94f715cdf14..e0ee8e6f20056 100644
--- a/system/xen/xen.info
+++ b/system/xen/xen.info
@@ -1,7 +1,7 @@
 PRGNAM="xen"
-VERSION="4.12.0"
+VERSION="4.12.1"
 HOMEPAGE="http://www.xenproject.org/"
-DOWNLOAD="http://mirror.slackware.hr/sources/xen/xen-4.12.0.tar.gz \
+DOWNLOAD="http://mirror.slackware.hr/sources/xen/xen-4.12.1.tar.gz \
           http://mirror.slackware.hr/sources/xen-extfiles/ipxe-git-d2063b7693e0e35db97b2264aa987eb6341ae779.tar.gz \
           http://mirror.slackware.hr/sources/xen-extfiles/lwip-1.3.0.tar.gz \
           http://mirror.slackware.hr/sources/xen-extfiles/zlib-1.2.3.tar.gz \
@@ -13,7 +13,7 @@ DOWNLOAD="http://mirror.slackware.hr/sources/xen/xen-4.12.0.tar.gz \
           http://mirror.slackware.hr/sources/xen-extfiles/tpm_emulator-0.7.4.tar.gz \
           http://mirror.slackware.hr/sources/xen-seabios/seabios-1.12.0.tar.gz \
           http://mirror.slackware.hr/sources/xen-ovmf/xen-ovmf-20180725_ef529e6ab7.tar.bz2"
-MD5SUM="7d24d4541e3025421e02384cabc3528b \
+MD5SUM="3f96ae93a5d6a3dd89bdf1398e30895e \
         0de05da7aec358881bb1dff815ecca14 \
         36cc57650cffda9a0269493be2a169bb \
         debc62758716a169df9f62e6ab2bc634 \
diff --git a/system/xen/xsa/xsa296.patch b/system/xen/xsa/xsa296.patch
new file mode 100644
index 0000000000000..e71ea7f790f27
--- /dev/null
+++ b/system/xen/xsa/xsa296.patch
@@ -0,0 +1,195 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: xen/hypercall: Don't use BUG() for parameter checking in hypercall_create_continuation()
+
+Since c/s 1d429034 "hypercall: update vcpu_op to take an unsigned vcpuid",
+which incorrectly swapped 'i' for 'u' in the parameter type list, guests have
+been able to hit the BUG() in next_args()'s default case.
+
+Correct these back to 'i'.
+
+In addition, make adjustments to prevent this class of issue from occurring in
+the future - crashing Xen is not an appropriate form of parameter checking.
+
+Capitalise NEXT_ARG() to catch all uses, to highlight that it is a macro doing
+non-function-like things behind the scenes, and undef it when appropriate.
+Implement a bad_fmt: block which prints an error, asserts unreachable, and
+crashes the guest.
+
+On the ARM side, drop all parameter checking of p.  It is asymmetric with the
+x86 side, and akin to expecting memcpy() or sprintf() to check their src/fmt
+parameter before use.  A caller passing "" or something other than a string
+literal will be obvious during code review.
+
+This is XSA-296.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Julien Grall <julien.grall@arm.com>
+
+diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
+index 941bbff4fe..a3da8e9c08 100644
+--- a/xen/arch/arm/domain.c
++++ b/xen/arch/arm/domain.c
+@@ -383,14 +383,15 @@ void sync_vcpu_execstate(struct vcpu *v)
+     /* Nothing to do -- no lazy switching */
+ }
+ 
+-#define next_arg(fmt, args) ({                                              \
++#define NEXT_ARG(fmt, args)                                                 \
++({                                                                          \
+     unsigned long __arg;                                                    \
+     switch ( *(fmt)++ )                                                     \
+     {                                                                       \
+     case 'i': __arg = (unsigned long)va_arg(args, unsigned int);  break;    \
+     case 'l': __arg = (unsigned long)va_arg(args, unsigned long); break;    \
+     case 'h': __arg = (unsigned long)va_arg(args, void *);        break;    \
+-    default:  __arg = 0; BUG();                                             \
++    default:  goto bad_fmt;                                                 \
+     }                                                                       \
+     __arg;                                                                  \
+ })
+@@ -405,9 +406,6 @@ unsigned long hypercall_create_continuation(
+     unsigned int i;
+     va_list args;
+ 
+-    /* All hypercalls take at least one argument */
+-    BUG_ON( !p || *p == '\0' );
+-
+     current->hcall_preempted = true;
+ 
+     va_start(args, format);
+@@ -415,7 +413,7 @@ unsigned long hypercall_create_continuation(
+     if ( mcs->flags & MCSF_in_multicall )
+     {
+         for ( i = 0; *p != '\0'; i++ )
+-            mcs->call.args[i] = next_arg(p, args);
++            mcs->call.args[i] = NEXT_ARG(p, args);
+ 
+         /* Return value gets written back to mcs->call.result */
+         rc = mcs->call.result;
+@@ -431,7 +429,7 @@ unsigned long hypercall_create_continuation(
+ 
+             for ( i = 0; *p != '\0'; i++ )
+             {
+-                arg = next_arg(p, args);
++                arg = NEXT_ARG(p, args);
+ 
+                 switch ( i )
+                 {
+@@ -454,7 +452,7 @@ unsigned long hypercall_create_continuation(
+ 
+             for ( i = 0; *p != '\0'; i++ )
+             {
+-                arg = next_arg(p, args);
++                arg = NEXT_ARG(p, args);
+ 
+                 switch ( i )
+                 {
+@@ -475,8 +473,16 @@ unsigned long hypercall_create_continuation(
+     va_end(args);
+ 
+     return rc;
++
++ bad_fmt:
++    gprintk(XENLOG_ERR, "Bad hypercall continuation format '%c'\n", *p);
++    ASSERT_UNREACHABLE();
++    domain_crash(current->domain);
++    return 0;
+ }
+ 
++#undef NEXT_ARG
++
+ void startup_cpu_idle_loop(void)
+ {
+     struct vcpu *v = current;
+diff --git a/xen/arch/x86/hypercall.c b/xen/arch/x86/hypercall.c
+index d483dbaa6b..4643e5eb43 100644
+--- a/xen/arch/x86/hypercall.c
++++ b/xen/arch/x86/hypercall.c
+@@ -80,14 +80,15 @@ const hypercall_args_t hypercall_args_table[NR_hypercalls] =
+ #undef COMP
+ #undef ARGS
+ 
+-#define next_arg(fmt, args) ({                                              \
++#define NEXT_ARG(fmt, args)                                                 \
++({                                                                          \
+     unsigned long __arg;                                                    \
+     switch ( *(fmt)++ )                                                     \
+     {                                                                       \
+     case 'i': __arg = (unsigned long)va_arg(args, unsigned int);  break;    \
+     case 'l': __arg = (unsigned long)va_arg(args, unsigned long); break;    \
+     case 'h': __arg = (unsigned long)va_arg(args, void *);        break;    \
+-    default:  __arg = 0; BUG();                                             \
++    default:  goto bad_fmt;                                                 \
+     }                                                                       \
+     __arg;                                                                  \
+ })
+@@ -109,7 +110,7 @@ unsigned long hypercall_create_continuation(
+     if ( mcs->flags & MCSF_in_multicall )
+     {
+         for ( i = 0; *p != '\0'; i++ )
+-            mcs->call.args[i] = next_arg(p, args);
++            mcs->call.args[i] = NEXT_ARG(p, args);
+     }
+     else
+     {
+@@ -121,7 +122,7 @@ unsigned long hypercall_create_continuation(
+         {
+             for ( i = 0; *p != '\0'; i++ )
+             {
+-                arg = next_arg(p, args);
++                arg = NEXT_ARG(p, args);
+                 switch ( i )
+                 {
+                 case 0: regs->rdi = arg; break;
+@@ -137,7 +138,7 @@ unsigned long hypercall_create_continuation(
+         {
+             for ( i = 0; *p != '\0'; i++ )
+             {
+-                arg = next_arg(p, args);
++                arg = NEXT_ARG(p, args);
+                 switch ( i )
+                 {
+                 case 0: regs->rbx = arg; break;
+@@ -154,8 +155,16 @@ unsigned long hypercall_create_continuation(
+     va_end(args);
+ 
+     return op;
++
++ bad_fmt:
++    gprintk(XENLOG_ERR, "Bad hypercall continuation format '%c'\n", *p);
++    ASSERT_UNREACHABLE();
++    domain_crash(curr->domain);
++    return 0;
+ }
+ 
++#undef NEXT_ARG
++
+ int hypercall_xlat_continuation(unsigned int *id, unsigned int nr,
+                                 unsigned int mask, ...)
+ {
+diff --git a/xen/common/compat/domain.c b/xen/common/compat/domain.c
+index 39877b3ab2..2531fa7421 100644
+--- a/xen/common/compat/domain.c
++++ b/xen/common/compat/domain.c
+@@ -81,7 +81,7 @@ int compat_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) ar
+         }
+ 
+         if ( rc == -ERESTART )
+-            rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iuh",
++            rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih",
+                                                cmd, vcpuid, arg);
+ 
+         break;
+diff --git a/xen/common/domain.c b/xen/common/domain.c
+index 2308588052..65bcd85e34 100644
+--- a/xen/common/domain.c
++++ b/xen/common/domain.c
+@@ -1411,7 +1411,7 @@ long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
+ 
+         rc = arch_initialise_vcpu(v, arg);
+         if ( rc == -ERESTART )
+-            rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iuh",
++            rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih",
+                                                cmd, vcpuid, arg);
+ 
+         break;
diff --git a/system/xen/xsa/xsa298.patch b/system/xen/xsa/xsa298.patch
new file mode 100644
index 0000000000000..aa39042be56bd
--- /dev/null
+++ b/system/xen/xsa/xsa298.patch
@@ -0,0 +1,89 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/PV: check GDT/LDT limits during emulation
+
+Accesses beyond the LDT limit originating from emulation would trigger
+the ASSERT() in pv_map_ldt_shadow_page(). On production builds such
+accesses would cause an attempt to promote the touched page (offset from
+the present LDT base address) to a segment descriptor one. If this
+happens to succeed, guest user mode would be able to elevate its
+privileges to that of the guest kernel. This is particularly easy when
+there's no LDT at all, in which case the LDT base stored internally to
+Xen is simply zero.
+
+Also adjust the ASSERT() that was triggering: It was off by one to
+begin with, and for production builds we also better use
+ASSERT_UNREACHABLE() instead with suitable recovery code afterwards.
+
+This is XSA-298.
+
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+---
+v2: Correct 64-bit-only limit check (by folding into the common one).
+
+--- a/xen/arch/x86/pv/emul-gate-op.c
++++ b/xen/arch/x86/pv/emul-gate-op.c
+@@ -51,7 +51,13 @@ static int read_gate_descriptor(unsigned
+     const seg_desc_t *pdesc = gdt_ldt_desc_ptr(gate_sel);
+ 
+     if ( (gate_sel < 4) ||
+-         ((gate_sel >= FIRST_RESERVED_GDT_BYTE) && !(gate_sel & 4)) ||
++         /*
++          * We're interested in call gates only, which occupy a single
++          * seg_desc_t for 32-bit and a consecutive pair of them for 64-bit.
++          */
++         ((gate_sel >> 3) + !is_pv_32bit_vcpu(v) >=
++          (gate_sel & 4 ? v->arch.pv.ldt_ents
++                        : v->arch.pv.gdt_ents)) ||
+          __get_user(desc, pdesc) )
+         return 0;
+ 
+@@ -70,7 +76,7 @@ static int read_gate_descriptor(unsigned
+     if ( !is_pv_32bit_vcpu(v) )
+     {
+         if ( (*ar & 0x1f00) != 0x0c00 ||
+-             (gate_sel >= FIRST_RESERVED_GDT_BYTE - 8 && !(gate_sel & 4)) ||
++             /* Limit check done above already. */
+              __get_user(desc, pdesc + 1) ||
+              (desc.b & 0x1f00) )
+             return 0;
+--- a/xen/arch/x86/pv/emulate.c
++++ b/xen/arch/x86/pv/emulate.c
+@@ -31,7 +31,14 @@ int pv_emul_read_descriptor(unsigned int
+ {
+     seg_desc_t desc;
+ 
+-    if ( sel < 4)
++    if ( sel < 4 ||
++         /*
++          * Don't apply the GDT limit here, as the selector may be a Xen
++          * provided one. __get_user() will fail (without taking further
++          * action) for ones falling in the gap between guest populated
++          * and Xen ones.
++          */
++         ((sel & 4) && (sel >> 3) >= v->arch.pv.ldt_ents) )
+         desc.b = desc.a = 0;
+     else if ( __get_user(desc, gdt_ldt_desc_ptr(sel)) )
+         return 0;
+--- a/xen/arch/x86/pv/mm.c
++++ b/xen/arch/x86/pv/mm.c
+@@ -92,12 +92,16 @@ bool pv_map_ldt_shadow_page(unsigned int
+     BUG_ON(unlikely(in_irq()));
+ 
+     /*
+-     * Hardware limit checking should guarantee this property.  NB. This is
++     * Prior limit checking should guarantee this property.  NB. This is
+      * safe as updates to the LDT can only be made by MMUEXT_SET_LDT to the
+      * current vcpu, and vcpu_reset() will block until this vcpu has been
+      * descheduled before continuing.
+      */
+-    ASSERT((offset >> 3) <= curr->arch.pv.ldt_ents);
++    if ( unlikely((offset >> 3) >= curr->arch.pv.ldt_ents) )
++    {
++        ASSERT_UNREACHABLE();
++        return false;
++    }
+ 
+     if ( is_pv_32bit_domain(currd) )
+         linear = (uint32_t)linear;
diff --git a/system/xen/xsa/xsa299-4.12-0001-x86-mm-L1TF-checks-don-t-leave-a-partial-entry.patch b/system/xen/xsa/xsa299-4.12-0001-x86-mm-L1TF-checks-don-t-leave-a-partial-entry.patch
new file mode 100644
index 0000000000000..fbb9d8086b6f8
--- /dev/null
+++ b/system/xen/xsa/xsa299-4.12-0001-x86-mm-L1TF-checks-don-t-leave-a-partial-entry.patch
@@ -0,0 +1,94 @@
+From 33d051917d5ef38f678b507a3c832afde48b9b49 Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap@citrix.com>
+Date: Thu, 10 Oct 2019 17:57:49 +0100
+Subject: [PATCH 01/11] x86/mm: L1TF checks don't leave a partial entry
+
+On detection of a potential L1TF issue, most validation code returns
+-ERESTART to allow the switch to shadow mode to happen and cause the
+original operation to be restarted.
+
+However, in the validation code, the return value -ERESTART has been
+repurposed to indicate 1) the function has partially completed
+something which needs to be undone, and 2) calling put_page_type()
+should cleanly undo it.  This causes problems in several places.
+
+For L1 tables, on receiving an -ERESTART return from alloc_l1_table(),
+alloc_page_type() will set PGT_partial on the page.  If for some
+reason the original operation never restarts, then on domain
+destruction, relinquish_memory() will call free_page_type() on the
+page.
+
+Unfortunately, alloc_ and free_l1_table() aren't set up to deal with
+PGT_partial.  When returning a failure, alloc_l1_table() always
+de-validates whatever it's validated so far, and free_l1_table()
+always devalidates the whole page.  This means that if
+relinquish_memory() calls free_page_type() on an L1 that didn't
+complete due to an L1TF, it will call put_page_from_l1e() on "page
+entries" that have never been validated.
+
+For L2+ tables, setting rc to ERESTART causes the rest of the
+alloc_lN_table() function to *think* that the entry in question will
+have PGT_partial set.  This will cause it to set partial_pte = 1.  If
+relinqush_memory() then calls free_page_type() on one of those pages,
+then free_lN_table() will call put_page_from_lNe() on the entry when
+it shouldn't.
+
+Rather than indicating -ERESTART, indicate -EINTR.  This is the code
+to indicate that nothing has changed from when you started the call
+(which is effectively how alloc_l1_table() handles errors).
+
+mod_lN_entry() shouldn't have any of these types of problems, so leave
+potential changes there for a clean-up patch later.
+
+This is part of XSA-299.
+
+Reported-by: George Dunlap <george.dunlap@citrix.com>
+Signed-off-by: George Dunlap <george.dunlap@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+---
+ xen/arch/x86/mm.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 3557cd1178..a1b55c10ff 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -1409,7 +1409,7 @@ static int alloc_l1_table(struct page_info *page)
+     {
+         if ( !(l1e_get_flags(pl1e[i]) & _PAGE_PRESENT) )
+         {
+-            ret = pv_l1tf_check_l1e(d, pl1e[i]) ? -ERESTART : 0;
++            ret = pv_l1tf_check_l1e(d, pl1e[i]) ? -EINTR : 0;
+             if ( ret )
+                 goto out;
+         }
+@@ -1517,7 +1517,7 @@ static int alloc_l2_table(struct page_info *page, unsigned long type)
+         {
+             if ( !pv_l1tf_check_l2e(d, l2e) )
+                 continue;
+-            rc = -ERESTART;
++            rc = -EINTR;
+         }
+         else
+             rc = get_page_from_l2e(l2e, pfn, d, partial);
+@@ -1603,7 +1603,7 @@ static int alloc_l3_table(struct page_info *page)
+         {
+             if ( !pv_l1tf_check_l3e(d, l3e) )
+                 continue;
+-            rc = -ERESTART;
++            rc = -EINTR;
+         }
+         else
+             rc = get_page_from_l3e(l3e, pfn, d, partial);
+@@ -1783,7 +1783,7 @@ static int alloc_l4_table(struct page_info *page)
+         {
+             if ( !pv_l1tf_check_l4e(d, l4e) )
+                 continue;
+-            rc = -ERESTART;
++            rc = -EINTR;
+         }
+         else
+             rc = get_page_from_l4e(l4e, pfn, d, partial);
+-- 
+2.23.0
+
diff --git a/system/xen/xsa/xsa299-4.12-0002-x86-mm-Don-t-re-set-PGT_pinned-on-a-partially-de-val.patch b/system/xen/xsa/xsa299-4.12-0002-x86-mm-Don-t-re-set-PGT_pinned-on-a-partially-de-val.patch
new file mode 100644
index 0000000000000..a74598e597fad
--- /dev/null
+++ b/system/xen/xsa/xsa299-4.12-0002-x86-mm-Don-t-re-set-PGT_pinned-on-a-partially-de-val.patch
@@ -0,0 +1,99 @@
+From b490792c18f74b76ec8161721c1e07f810e36309 Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap@citrix.com>
+Date: Thu, 10 Oct 2019 17:57:49 +0100
+Subject: [PATCH 02/11] x86/mm: Don't re-set PGT_pinned on a partially
+ de-validated page
+
+When unpinning pagetables, if an operation is interrupted,
+relinquish_memory() re-sets PGT_pinned so that the un-pin will
+pickedup again when the hypercall restarts.
+
+This is appropriate when put_page_and_type_preemptible() returns
+-EINTR, which indicates that the page is back in its initial state
+(i.e., completely validated).  However, for -ERESTART, this leads to a
+state where a page has both PGT_pinned and PGT_partial set.
+
+This happens to work at the moment, although it's not really a
+"canonical" state; but in subsequent patches, where we need to make a
+distinction in handling between PGT_validated and PGT_partial pages,
+this causes issues.
+
+Move to a "canonical" state by:
+- Only re-setting PGT_pinned on -EINTR
+- Re-dropping the refcount held by PGT_pinned on -ERESTART
+
+In the latter case, the PGT_partial bit will be cleared further down
+with the rest of the other PGT_partial pages.
+
+While here, clean up some trainling whitespace.
+
+This is part of XSA-299.
+
+Reported-by: George Dunlap <george.dunlap@citrix.com>
+Signed-off-by: George Dunlap <george.dunlap@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+---
+ xen/arch/x86/domain.c | 31 ++++++++++++++++++++++++++++---
+ 1 file changed, 28 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index 2585327834..59df8a6d8d 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -114,7 +114,7 @@ static void play_dead(void)
+      * this case, heap corruption or #PF can occur (when heap debugging is
+      * enabled). For example, even printk() can involve tasklet scheduling,
+      * which touches per-cpu vars.
+-     * 
++     *
+      * Consider very carefully when adding code to *dead_idle. Most hypervisor
+      * subsystems are unsafe to call.
+      */
+@@ -1909,9 +1909,34 @@ static int relinquish_memory(
+             break;
+         case -ERESTART:
+         case -EINTR:
++            /*
++             * -EINTR means PGT_validated has been re-set; re-set
++             * PGT_pinned again so that it gets picked up next time
++             * around.
++             *
++             * -ERESTART, OTOH, means PGT_partial is set instead.  Put
++             * it back on the list, but don't set PGT_pinned; the
++             * section below will finish off de-validation.  But we do
++             * need to drop the general ref associated with
++             * PGT_pinned, since put_page_and_type_preemptible()
++             * didn't do it.
++             *
++             * NB we can do an ASSERT for PGT_validated, since we
++             * "own" the type ref; but theoretically, the PGT_partial
++             * could be cleared by someone else.
++             */
++            if ( ret == -EINTR )
++            {
++                ASSERT(page->u.inuse.type_info & PGT_validated);
++                set_bit(_PGT_pinned, &page->u.inuse.type_info);
++            }
++            else
++                put_page(page);
++
+             ret = -ERESTART;
++
++            /* Put the page back on the list and drop the ref we grabbed above */
+             page_list_add(page, list);
+-            set_bit(_PGT_pinned, &page->u.inuse.type_info);
+             put_page(page);
+             goto out;
+         default:
+@@ -2161,7 +2186,7 @@ void vcpu_kick(struct vcpu *v)
+      * pending flag. These values may fluctuate (after all, we hold no
+      * locks) but the key insight is that each change will cause
+      * evtchn_upcall_pending to be polled.
+-     * 
++     *
+      * NB2. We save the running flag across the unblock to avoid a needless
+      * IPI for domains that we IPI'd to unblock.
+      */
+-- 
+2.23.0
+
diff --git a/system/xen/xsa/xsa299-4.12-0003-x86-mm-Separate-out-partial_pte-tristate-into-indivi.patch b/system/xen/xsa/xsa299-4.12-0003-x86-mm-Separate-out-partial_pte-tristate-into-indivi.patch
new file mode 100644
index 0000000000000..226e5487b1579
--- /dev/null
+++ b/system/xen/xsa/xsa299-4.12-0003-x86-mm-Separate-out-partial_pte-tristate-into-indivi.patch
@@ -0,0 +1,618 @@
+From 0f9f61e5737fdd346550ec6e30161fa99e4653fa Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap@citrix.com>
+Date: Thu, 10 Oct 2019 17:57:49 +0100
+Subject: [PATCH 03/11] x86/mm: Separate out partial_pte tristate into
+ individual flags
+
+At the moment, partial_pte is a tri-state that contains two distinct bits
+of information:
+
+1. If zero, the pte at index [nr_validated_ptes] is un-validated.  If
+   non-zero, the pte was last seen with PGT_partial set.
+
+2. If positive, the pte at index [nr_validated_ptes] does not hold a
+   general reference count.  If negative, it does.
+
+To make future patches more clear, separate out this functionality
+into two distinct, named bits: PTF_partial_set (for #1) and
+PTF_partial_general_ref (for #2).
+
+Additionally, a number of functions which need this information also
+take other flags to control behavior (such as `preemptible` and
+`defer`).  These are hard to read in the caller (since you only see
+'true' or 'false'), and ugly when many are added together.  In
+preparation for adding yet another flag in a future patch, collapse
+all of these into a single `flag` variable.
+
+NB that this does mean checking for what was previously the '-1'
+condition a bit more ugly in the put_page_from_lNe functions (since
+you have to check for both partial_set and general ref); but this
+clause will go away in a future patch.
+
+Also note that the original comment had an off-by-one error:
+partial_flags (like partial_pte before it) concerns
+plNe[nr_validated_ptes], not plNe[nr_validated_ptes+1].
+
+No functional change intended.
+
+This is part of XSA-299.
+
+Reported-by: George Dunlap <george.dunlap@citrix.com>
+Signed-off-by: George Dunlap <george.dunlap@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+---
+ xen/arch/x86/mm.c        | 165 ++++++++++++++++++++++++---------------
+ xen/include/asm-x86/mm.h |  41 +++++++---
+ 2 files changed, 128 insertions(+), 78 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index a1b55c10ff..3f6f8cc9b8 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -1094,20 +1094,35 @@ get_page_from_l1e(
+ }
+ 
+ #ifdef CONFIG_PV
++
++/*
++ * The following flags are used to specify behavior of various get and
++ * put commands.  The first two are also stored in page->partial_flags
++ * to indicate the state of the page pointed to by
++ * page->pte[page->nr_validated_entries].  See the comment in mm.h for
++ * more information.
++ */
++#define PTF_partial_set         (1 << 0)
++#define PTF_partial_general_ref (1 << 1)
++#define PTF_preemptible         (1 << 2)
++#define PTF_defer               (1 << 3)
++
+ static int get_page_and_type_from_mfn(
+     mfn_t mfn, unsigned long type, struct domain *d,
+-    int partial, int preemptible)
++    unsigned int flags)
+ {
+     struct page_info *page = mfn_to_page(mfn);
+     int rc;
++    bool preemptible = flags & PTF_preemptible,
++         partial_ref = flags & PTF_partial_general_ref;
+ 
+-    if ( likely(partial >= 0) &&
++    if ( likely(!partial_ref) &&
+          unlikely(!get_page_from_mfn(mfn, d)) )
+         return -EINVAL;
+ 
+     rc = _get_page_type(page, type, preemptible);
+ 
+-    if ( unlikely(rc) && partial >= 0 &&
++    if ( unlikely(rc) && !partial_ref &&
+          (!preemptible || page != current->arch.old_guest_table) )
+         put_page(page);
+ 
+@@ -1117,7 +1132,7 @@ static int get_page_and_type_from_mfn(
+ define_get_linear_pagetable(l2);
+ static int
+ get_page_from_l2e(
+-    l2_pgentry_t l2e, unsigned long pfn, struct domain *d, int partial)
++    l2_pgentry_t l2e, unsigned long pfn, struct domain *d, unsigned int flags)
+ {
+     unsigned long mfn = l2e_get_pfn(l2e);
+     int rc;
+@@ -1129,8 +1144,9 @@ get_page_from_l2e(
+         return -EINVAL;
+     }
+ 
+-    rc = get_page_and_type_from_mfn(_mfn(mfn), PGT_l1_page_table, d,
+-                                    partial, false);
++    ASSERT(!(flags & PTF_preemptible));
++
++    rc = get_page_and_type_from_mfn(_mfn(mfn), PGT_l1_page_table, d, flags);
+     if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
+         rc = 0;
+ 
+@@ -1140,7 +1156,7 @@ get_page_from_l2e(
+ define_get_linear_pagetable(l3);
+ static int
+ get_page_from_l3e(
+-    l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial)
++    l3_pgentry_t l3e, unsigned long pfn, struct domain *d, unsigned int flags)
+ {
+     int rc;
+ 
+@@ -1152,7 +1168,7 @@ get_page_from_l3e(
+     }
+ 
+     rc = get_page_and_type_from_mfn(
+-        l3e_get_mfn(l3e), PGT_l2_page_table, d, partial, 1);
++        l3e_get_mfn(l3e), PGT_l2_page_table, d, flags | PTF_preemptible);
+     if ( unlikely(rc == -EINVAL) &&
+          !is_pv_32bit_domain(d) &&
+          get_l3_linear_pagetable(l3e, pfn, d) )
+@@ -1164,7 +1180,7 @@ get_page_from_l3e(
+ define_get_linear_pagetable(l4);
+ static int
+ get_page_from_l4e(
+-    l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial)
++    l4_pgentry_t l4e, unsigned long pfn, struct domain *d, unsigned int flags)
+ {
+     int rc;
+ 
+@@ -1176,7 +1192,7 @@ get_page_from_l4e(
+     }
+ 
+     rc = get_page_and_type_from_mfn(
+-        l4e_get_mfn(l4e), PGT_l3_page_table, d, partial, 1);
++        l4e_get_mfn(l4e), PGT_l3_page_table, d, flags | PTF_preemptible);
+     if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
+         rc = 0;
+ 
+@@ -1277,7 +1293,7 @@ static void put_data_page(struct page_info *page, bool writeable)
+  * Note also that this automatically deals correctly with linear p.t.'s.
+  */
+ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn,
+-                             int partial, bool defer)
++                             unsigned int flags)
+ {
+     int rc = 0;
+ 
+@@ -1300,12 +1316,13 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn,
+         struct page_info *pg = l2e_get_page(l2e);
+         struct page_info *ptpg = mfn_to_page(_mfn(pfn));
+ 
+-        if ( unlikely(partial > 0) )
++        if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) ==
++              PTF_partial_set )
+         {
+-            ASSERT(!defer);
++            ASSERT(!(flags & PTF_defer));
+             rc = _put_page_type(pg, true, ptpg);
+         }
+-        else if ( defer )
++        else if ( flags & PTF_defer )
+         {
+             current->arch.old_guest_ptpg = ptpg;
+             current->arch.old_guest_table = pg;
+@@ -1322,7 +1339,7 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn,
+ }
+ 
+ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
+-                             int partial, bool defer)
++                             unsigned int flags)
+ {
+     struct page_info *pg;
+     int rc;
+@@ -1345,13 +1362,14 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
+ 
+     pg = l3e_get_page(l3e);
+ 
+-    if ( unlikely(partial > 0) )
++    if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) ==
++         PTF_partial_set )
+     {
+-        ASSERT(!defer);
++        ASSERT(!(flags & PTF_defer));
+         return _put_page_type(pg, true, mfn_to_page(_mfn(pfn)));
+     }
+ 
+-    if ( defer )
++    if ( flags & PTF_defer )
+     {
+         current->arch.old_guest_ptpg = mfn_to_page(_mfn(pfn));
+         current->arch.old_guest_table = pg;
+@@ -1366,7 +1384,7 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
+ }
+ 
+ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
+-                             int partial, bool defer)
++                             unsigned int flags)
+ {
+     int rc = 1;
+ 
+@@ -1375,13 +1393,14 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
+     {
+         struct page_info *pg = l4e_get_page(l4e);
+ 
+-        if ( unlikely(partial > 0) )
++        if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) ==
++              PTF_partial_set )
+         {
+-            ASSERT(!defer);
++            ASSERT(!(flags & PTF_defer));
+             return _put_page_type(pg, true, mfn_to_page(_mfn(pfn)));
+         }
+ 
+-        if ( defer )
++        if ( flags & PTF_defer )
+         {
+             current->arch.old_guest_ptpg = mfn_to_page(_mfn(pfn));
+             current->arch.old_guest_table = pg;
+@@ -1492,12 +1511,13 @@ static int alloc_l2_table(struct page_info *page, unsigned long type)
+     unsigned long  pfn = mfn_x(page_to_mfn(page));
+     l2_pgentry_t  *pl2e;
+     unsigned int   i;
+-    int            rc = 0, partial = page->partial_pte;
++    int            rc = 0;
++    unsigned int   partial_flags = page->partial_flags;
+ 
+     pl2e = map_domain_page(_mfn(pfn));
+ 
+     for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES;
+-          i++, partial = 0 )
++          i++, partial_flags = 0 )
+     {
+         l2_pgentry_t l2e;
+ 
+@@ -1520,17 +1540,18 @@ static int alloc_l2_table(struct page_info *page, unsigned long type)
+             rc = -EINTR;
+         }
+         else
+-            rc = get_page_from_l2e(l2e, pfn, d, partial);
++            rc = get_page_from_l2e(l2e, pfn, d, partial_flags);
+ 
+         if ( rc == -ERESTART )
+         {
+             page->nr_validated_ptes = i;
+-            page->partial_pte = partial ?: 1;
++            /* Set 'set', retain 'general ref' */
++            page->partial_flags = partial_flags | PTF_partial_set;
+         }
+         else if ( rc == -EINTR && i )
+         {
+             page->nr_validated_ptes = i;
+-            page->partial_pte = 0;
++            page->partial_flags = 0;
+             rc = -ERESTART;
+         }
+         else if ( rc < 0 && rc != -EINTR )
+@@ -1539,7 +1560,7 @@ static int alloc_l2_table(struct page_info *page, unsigned long type)
+             if ( i )
+             {
+                 page->nr_validated_ptes = i;
+-                page->partial_pte = 0;
++                page->partial_flags = 0;
+                 current->arch.old_guest_ptpg = NULL;
+                 current->arch.old_guest_table = page;
+             }
+@@ -1563,7 +1584,8 @@ static int alloc_l3_table(struct page_info *page)
+     unsigned long  pfn = mfn_x(page_to_mfn(page));
+     l3_pgentry_t  *pl3e;
+     unsigned int   i;
+-    int            rc = 0, partial = page->partial_pte;
++    int            rc = 0;
++    unsigned int   partial_flags = page->partial_flags;
+ 
+     pl3e = map_domain_page(_mfn(pfn));
+ 
+@@ -1578,7 +1600,7 @@ static int alloc_l3_table(struct page_info *page)
+         memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
+ 
+     for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES;
+-          i++, partial = 0 )
++          i++, partial_flags = 0 )
+     {
+         l3_pgentry_t l3e = pl3e[i];
+ 
+@@ -1597,7 +1619,8 @@ static int alloc_l3_table(struct page_info *page)
+             else
+                 rc = get_page_and_type_from_mfn(
+                     l3e_get_mfn(l3e),
+-                    PGT_l2_page_table | PGT_pae_xen_l2, d, partial, 1);
++                    PGT_l2_page_table | PGT_pae_xen_l2, d,
++                    partial_flags | PTF_preemptible);
+         }
+         else if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+         {
+@@ -1606,17 +1629,18 @@ static int alloc_l3_table(struct page_info *page)
+             rc = -EINTR;
+         }
+         else
+-            rc = get_page_from_l3e(l3e, pfn, d, partial);
++            rc = get_page_from_l3e(l3e, pfn, d, partial_flags);
+ 
+         if ( rc == -ERESTART )
+         {
+             page->nr_validated_ptes = i;
+-            page->partial_pte = partial ?: 1;
++            /* Set 'set', leave 'general ref' set if this entry was set */
++            page->partial_flags = partial_flags | PTF_partial_set;
+         }
+         else if ( rc == -EINTR && i )
+         {
+             page->nr_validated_ptes = i;
+-            page->partial_pte = 0;
++            page->partial_flags = 0;
+             rc = -ERESTART;
+         }
+         if ( rc < 0 )
+@@ -1633,7 +1657,7 @@ static int alloc_l3_table(struct page_info *page)
+         if ( i )
+         {
+             page->nr_validated_ptes = i;
+-            page->partial_pte = 0;
++            page->partial_flags = 0;
+             current->arch.old_guest_ptpg = NULL;
+             current->arch.old_guest_table = page;
+         }
+@@ -1767,10 +1791,11 @@ static int alloc_l4_table(struct page_info *page)
+     unsigned long  pfn = mfn_x(page_to_mfn(page));
+     l4_pgentry_t  *pl4e = map_domain_page(_mfn(pfn));
+     unsigned int   i;
+-    int            rc = 0, partial = page->partial_pte;
++    int            rc = 0;
++    unsigned int   partial_flags = page->partial_flags;
+ 
+     for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES;
+-          i++, partial = 0 )
++          i++, partial_flags = 0 )
+     {
+         l4_pgentry_t l4e;
+ 
+@@ -1786,12 +1811,13 @@ static int alloc_l4_table(struct page_info *page)
+             rc = -EINTR;
+         }
+         else
+-            rc = get_page_from_l4e(l4e, pfn, d, partial);
++            rc = get_page_from_l4e(l4e, pfn, d, partial_flags);
+ 
+         if ( rc == -ERESTART )
+         {
+             page->nr_validated_ptes = i;
+-            page->partial_pte = partial ?: 1;
++            /* Set 'set', leave 'general ref' set if this entry was set */
++            page->partial_flags = partial_flags | PTF_partial_set;
+         }
+         else if ( rc < 0 )
+         {
+@@ -1801,7 +1827,7 @@ static int alloc_l4_table(struct page_info *page)
+             if ( i )
+             {
+                 page->nr_validated_ptes = i;
+-                page->partial_pte = 0;
++                page->partial_flags = 0;
+                 if ( rc == -EINTR )
+                     rc = -ERESTART;
+                 else
+@@ -1853,19 +1879,20 @@ static int free_l2_table(struct page_info *page)
+     struct domain *d = page_get_owner(page);
+     unsigned long pfn = mfn_x(page_to_mfn(page));
+     l2_pgentry_t *pl2e;
+-    int rc = 0, partial = page->partial_pte;
+-    unsigned int i = page->nr_validated_ptes - !partial;
++    int rc = 0;
++    unsigned int partial_flags = page->partial_flags,
++        i = page->nr_validated_ptes - !(partial_flags & PTF_partial_set);
+ 
+     pl2e = map_domain_page(_mfn(pfn));
+ 
+     for ( ; ; )
+     {
+         if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) )
+-            rc = put_page_from_l2e(pl2e[i], pfn, partial, false);
++            rc = put_page_from_l2e(pl2e[i], pfn, partial_flags);
+         if ( rc < 0 )
+             break;
+ 
+-        partial = 0;
++        partial_flags = 0;
+ 
+         if ( !i-- )
+             break;
+@@ -1887,12 +1914,14 @@ static int free_l2_table(struct page_info *page)
+     else if ( rc == -ERESTART )
+     {
+         page->nr_validated_ptes = i;
+-        page->partial_pte = partial ?: -1;
++        page->partial_flags = (partial_flags & PTF_partial_set) ?
++            partial_flags :
++            (PTF_partial_set | PTF_partial_general_ref);
+     }
+     else if ( rc == -EINTR && i < L2_PAGETABLE_ENTRIES - 1 )
+     {
+         page->nr_validated_ptes = i + 1;
+-        page->partial_pte = 0;
++        page->partial_flags = 0;
+         rc = -ERESTART;
+     }
+ 
+@@ -1904,18 +1933,19 @@ static int free_l3_table(struct page_info *page)
+     struct domain *d = page_get_owner(page);
+     unsigned long pfn = mfn_x(page_to_mfn(page));
+     l3_pgentry_t *pl3e;
+-    int rc = 0, partial = page->partial_pte;
+-    unsigned int  i = page->nr_validated_ptes - !partial;
++    int rc = 0;
++    unsigned int partial_flags = page->partial_flags,
++        i = page->nr_validated_ptes - !(partial_flags & PTF_partial_set);
+ 
+     pl3e = map_domain_page(_mfn(pfn));
+ 
+     for ( ; ; )
+     {
+-        rc = put_page_from_l3e(pl3e[i], pfn, partial, 0);
++        rc = put_page_from_l3e(pl3e[i], pfn, partial_flags);
+         if ( rc < 0 )
+             break;
+ 
+-        partial = 0;
++        partial_flags = 0;
+         if ( rc == 0 )
+             pl3e[i] = unadjust_guest_l3e(pl3e[i], d);
+ 
+@@ -1934,12 +1964,14 @@ static int free_l3_table(struct page_info *page)
+     if ( rc == -ERESTART )
+     {
+         page->nr_validated_ptes = i;
+-        page->partial_pte = partial ?: -1;
++        page->partial_flags = (partial_flags & PTF_partial_set) ?
++            partial_flags :
++            (PTF_partial_set | PTF_partial_general_ref);
+     }
+     else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
+     {
+         page->nr_validated_ptes = i + 1;
+-        page->partial_pte = 0;
++        page->partial_flags = 0;
+         rc = -ERESTART;
+     }
+     return rc > 0 ? 0 : rc;
+@@ -1950,26 +1982,29 @@ static int free_l4_table(struct page_info *page)
+     struct domain *d = page_get_owner(page);
+     unsigned long pfn = mfn_x(page_to_mfn(page));
+     l4_pgentry_t *pl4e = map_domain_page(_mfn(pfn));
+-    int rc = 0, partial = page->partial_pte;
+-    unsigned int  i = page->nr_validated_ptes - !partial;
++    int rc = 0;
++    unsigned partial_flags = page->partial_flags,
++        i = page->nr_validated_ptes - !(partial_flags & PTF_partial_set);
+ 
+     do {
+         if ( is_guest_l4_slot(d, i) )
+-            rc = put_page_from_l4e(pl4e[i], pfn, partial, 0);
++            rc = put_page_from_l4e(pl4e[i], pfn, partial_flags);
+         if ( rc < 0 )
+             break;
+-        partial = 0;
++        partial_flags = 0;
+     } while ( i-- );
+ 
+     if ( rc == -ERESTART )
+     {
+         page->nr_validated_ptes = i;
+-        page->partial_pte = partial ?: -1;
++        page->partial_flags = (partial_flags & PTF_partial_set) ?
++            partial_flags :
++            (PTF_partial_set | PTF_partial_general_ref);
+     }
+     else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
+     {
+         page->nr_validated_ptes = i + 1;
+-        page->partial_pte = 0;
++        page->partial_flags = 0;
+         rc = -ERESTART;
+     }
+ 
+@@ -2247,7 +2282,7 @@ static int mod_l2_entry(l2_pgentry_t *pl2e,
+         return -EBUSY;
+     }
+ 
+-    put_page_from_l2e(ol2e, pfn, 0, true);
++    put_page_from_l2e(ol2e, pfn, PTF_defer);
+ 
+     return rc;
+ }
+@@ -2315,7 +2350,7 @@ static int mod_l3_entry(l3_pgentry_t *pl3e,
+         if ( !create_pae_xen_mappings(d, pl3e) )
+             BUG();
+ 
+-    put_page_from_l3e(ol3e, pfn, 0, 1);
++    put_page_from_l3e(ol3e, pfn, PTF_defer);
+     return rc;
+ }
+ 
+@@ -2378,7 +2413,7 @@ static int mod_l4_entry(l4_pgentry_t *pl4e,
+         return -EFAULT;
+     }
+ 
+-    put_page_from_l4e(ol4e, pfn, 0, 1);
++    put_page_from_l4e(ol4e, pfn, PTF_defer);
+     return rc;
+ }
+ #endif /* CONFIG_PV */
+@@ -2649,7 +2684,7 @@ int free_page_type(struct page_info *page, unsigned long type,
+     if ( !(type & PGT_partial) )
+     {
+         page->nr_validated_ptes = 1U << PAGETABLE_ORDER;
+-        page->partial_pte = 0;
++        page->partial_flags = 0;
+     }
+ 
+     switch ( type & PGT_type_mask )
+@@ -2946,7 +2981,7 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+         if ( !(x & PGT_partial) )
+         {
+             page->nr_validated_ptes = 0;
+-            page->partial_pte = 0;
++            page->partial_flags = 0;
+         }
+         page->linear_pt_count = 0;
+         rc = alloc_page_type(page, type, preemptible);
+@@ -3122,7 +3157,7 @@ int new_guest_cr3(mfn_t mfn)
+         return 0;
+     }
+ 
+-    rc = get_page_and_type_from_mfn(mfn, PGT_root_page_table, d, 0, 1);
++    rc = get_page_and_type_from_mfn(mfn, PGT_root_page_table, d, PTF_preemptible);
+     switch ( rc )
+     {
+     case 0:
+@@ -3473,7 +3508,7 @@ long do_mmuext_op(
+             if ( op.arg1.mfn != 0 )
+             {
+                 rc = get_page_and_type_from_mfn(
+-                    _mfn(op.arg1.mfn), PGT_root_page_table, currd, 0, 1);
++                    _mfn(op.arg1.mfn), PGT_root_page_table, currd, PTF_preemptible);
+ 
+                 if ( unlikely(rc) )
+                 {
+diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
+index 6faa563167..8406ac3c37 100644
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -228,19 +228,34 @@ struct page_info
+          * setting the flag must not drop that reference, whereas the instance
+          * clearing it will have to.
+          *
+-         * If @partial_pte is positive then PTE at @nr_validated_ptes+1 has
+-         * been partially validated. This implies that the general reference
+-         * to the page (acquired from get_page_from_lNe()) would be dropped
+-         * (again due to the apparent failure) and hence must be re-acquired
+-         * when resuming the validation, but must not be dropped when picking
+-         * up the page for invalidation.
++         * If partial_flags & PTF_partial_set is set, then the page at
++         * at @nr_validated_ptes had PGT_partial set as a result of an
++         * operation on the current page.  (That page may or may not
++         * still have PGT_partial set.)
+          *
+-         * If @partial_pte is negative then PTE at @nr_validated_ptes+1 has
+-         * been partially invalidated. This is basically the opposite case of
+-         * above, i.e. the general reference to the page was not dropped in
+-         * put_page_from_lNe() (due to the apparent failure), and hence it
+-         * must be dropped when the put operation is resumed (and completes),
+-         * but it must not be acquired if picking up the page for validation.
++         * If PTF_partial_general_ref is set, then the PTE at
++         * @nr_validated_ptef holds a general reference count for the
++         * page.
++         *
++         * This happens:
++         * - During de-validation, if de-validation of the page was
++         *   interrupted
++         * - During validation, if an invalid entry is encountered and
++         *   validation is preemptible
++         * - During validation, if PTF_partial_general_ref was set on
++         *   this entry to begin with (perhaps because we're picking
++         *   up from a partial de-validation).
++         *
++         * When resuming validation, if PTF_partial_general_ref is clear,
++         * then a general reference must be re-acquired; if it is set, no
++         * reference should be acquired.
++         *
++         * When resuming de-validation, if PTF_partial_general_ref is
++         * clear, no reference should be dropped; if it is set, a
++         * reference should be dropped.
++         *
++         * NB that PTF_partial_set and PTF_partial_general_ref are
++         * defined in mm.c, the only place where they are used.
+          *
+          * The 3rd field, @linear_pt_count, indicates
+          * - by a positive value, how many same-level page table entries a page
+@@ -251,7 +266,7 @@ struct page_info
+         struct {
+             u16 nr_validated_ptes:PAGETABLE_ORDER + 1;
+             u16 :16 - PAGETABLE_ORDER - 1 - 2;
+-            s16 partial_pte:2;
++            u16 partial_flags:2;
+             s16 linear_pt_count;
+         };
+ 
+-- 
+2.23.0
+
diff --git a/system/xen/xsa/xsa299-4.12-0004-x86-mm-Use-flags-for-_put_page_type-rather-than-a-bo.patch b/system/xen/xsa/xsa299-4.12-0004-x86-mm-Use-flags-for-_put_page_type-rather-than-a-bo.patch
new file mode 100644
index 0000000000000..d07c233225664
--- /dev/null
+++ b/system/xen/xsa/xsa299-4.12-0004-x86-mm-Use-flags-for-_put_page_type-rather-than-a-bo.patch
@@ -0,0 +1,140 @@
+From db1d801aa8dcb918a27486a6e8d9cf5d7307dec3 Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap@citrix.com>
+Date: Thu, 10 Oct 2019 17:57:49 +0100
+Subject: [PATCH 04/11] x86/mm: Use flags for _put_page_type rather than a
+ boolean
+
+This is in mainly in preparation for _put_page_type taking the
+partial_flags value in the future.  It also makes it easier to read in
+the caller (since you see a flag name rather than `true` or `false`).
+
+No functional change intended.
+
+This is part of XSA-299.
+
+Reported-by: George Dunlap <george.dunlap@citrix.com>
+Signed-off-by: George Dunlap <george.dunlap@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+---
+ xen/arch/x86/mm.c | 25 +++++++++++++------------
+ 1 file changed, 13 insertions(+), 12 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 3f6f8cc9b8..0740b61af8 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -1200,7 +1200,7 @@ get_page_from_l4e(
+ }
+ #endif /* CONFIG_PV */
+ 
+-static int _put_page_type(struct page_info *page, bool preemptible,
++static int _put_page_type(struct page_info *page, unsigned int flags,
+                           struct page_info *ptpg);
+ 
+ void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner)
+@@ -1320,7 +1320,7 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn,
+               PTF_partial_set )
+         {
+             ASSERT(!(flags & PTF_defer));
+-            rc = _put_page_type(pg, true, ptpg);
++            rc = _put_page_type(pg, PTF_preemptible, ptpg);
+         }
+         else if ( flags & PTF_defer )
+         {
+@@ -1329,7 +1329,7 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn,
+         }
+         else
+         {
+-            rc = _put_page_type(pg, true, ptpg);
++            rc = _put_page_type(pg, PTF_preemptible, ptpg);
+             if ( likely(!rc) )
+                 put_page(pg);
+         }
+@@ -1366,7 +1366,7 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
+          PTF_partial_set )
+     {
+         ASSERT(!(flags & PTF_defer));
+-        return _put_page_type(pg, true, mfn_to_page(_mfn(pfn)));
++        return _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn)));
+     }
+ 
+     if ( flags & PTF_defer )
+@@ -1376,7 +1376,7 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
+         return 0;
+     }
+ 
+-    rc = _put_page_type(pg, true, mfn_to_page(_mfn(pfn)));
++    rc = _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn)));
+     if ( likely(!rc) )
+         put_page(pg);
+ 
+@@ -1397,7 +1397,7 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
+               PTF_partial_set )
+         {
+             ASSERT(!(flags & PTF_defer));
+-            return _put_page_type(pg, true, mfn_to_page(_mfn(pfn)));
++            return _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn)));
+         }
+ 
+         if ( flags & PTF_defer )
+@@ -1407,7 +1407,7 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
+             return 0;
+         }
+ 
+-        rc = _put_page_type(pg, true, mfn_to_page(_mfn(pfn)));
++        rc = _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn)));
+         if ( likely(!rc) )
+             put_page(pg);
+     }
+@@ -2757,10 +2757,11 @@ static int _put_final_page_type(struct page_info *page, unsigned long type,
+ }
+ 
+ 
+-static int _put_page_type(struct page_info *page, bool preemptible,
++static int _put_page_type(struct page_info *page, unsigned int flags,
+                           struct page_info *ptpg)
+ {
+     unsigned long nx, x, y = page->u.inuse.type_info;
++    bool preemptible = flags & PTF_preemptible;
+ 
+     ASSERT(current_locked_page_ne_check(page));
+ 
+@@ -2969,7 +2970,7 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+ 
+             if ( unlikely(iommu_ret) )
+             {
+-                _put_page_type(page, false, NULL);
++                _put_page_type(page, 0, NULL);
+                 rc = iommu_ret;
+                 goto out;
+             }
+@@ -2996,7 +2997,7 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+ 
+ void put_page_type(struct page_info *page)
+ {
+-    int rc = _put_page_type(page, false, NULL);
++    int rc = _put_page_type(page, 0, NULL);
+     ASSERT(rc == 0);
+     (void)rc;
+ }
+@@ -3013,7 +3014,7 @@ int get_page_type(struct page_info *page, unsigned long type)
+ 
+ int put_page_type_preemptible(struct page_info *page)
+ {
+-    return _put_page_type(page, true, NULL);
++    return _put_page_type(page, PTF_preemptible, NULL);
+ }
+ 
+ int get_page_type_preemptible(struct page_info *page, unsigned long type)
+@@ -3030,7 +3031,7 @@ int put_old_guest_table(struct vcpu *v)
+     if ( !v->arch.old_guest_table )
+         return 0;
+ 
+-    switch ( rc = _put_page_type(v->arch.old_guest_table, true,
++    switch ( rc = _put_page_type(v->arch.old_guest_table, PTF_preemptible,
+                                  v->arch.old_guest_ptpg) )
+     {
+     case -EINTR:
+-- 
+2.23.0
+
diff --git a/system/xen/xsa/xsa299-4.12-0005-x86-mm-Rework-get_page_and_type_from_mfn-conditional.patch b/system/xen/xsa/xsa299-4.12-0005-x86-mm-Rework-get_page_and_type_from_mfn-conditional.patch
new file mode 100644
index 0000000000000..9cfbb739079b5
--- /dev/null
+++ b/system/xen/xsa/xsa299-4.12-0005-x86-mm-Rework-get_page_and_type_from_mfn-conditional.patch
@@ -0,0 +1,79 @@
+From 6f257854c8778774210281c5c21028c4b7739b44 Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap@citrix.com>
+Date: Thu, 10 Oct 2019 17:57:49 +0100
+Subject: [PATCH 05/11] x86/mm: Rework get_page_and_type_from_mfn conditional
+
+Make it easier to read by declaring the conditions in which we will
+retain the ref, rather than the conditions under which we release it.
+
+The only way (page == current->arch.old_guest_table) can be true is if
+preemptible is true; so remove this from the query itself, and add an
+ASSERT() to that effect on the opposite path.
+
+No functional change intended.
+
+NB that alloc_lN_table() mishandle the "linear pt failure" situation
+described in the comment; this will be addressed in a future patch.
+
+This is part of XSA-299.
+
+Reported-by: George Dunlap <george.dunlap@citrix.com>
+Signed-off-by: George Dunlap <george.dunlap@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+---
+ xen/arch/x86/mm.c | 39 +++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 37 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 0740b61af8..0a4d39a2c3 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -1122,8 +1122,43 @@ static int get_page_and_type_from_mfn(
+ 
+     rc = _get_page_type(page, type, preemptible);
+ 
+-    if ( unlikely(rc) && !partial_ref &&
+-         (!preemptible || page != current->arch.old_guest_table) )
++    /*
++     * Retain the refcount if:
++     * - page is fully validated (rc == 0)
++     * - page is not validated (rc < 0) but:
++     *   - We came in with a reference (partial_ref)
++     *   - page is partially validated but there's been an error
++     *     (page == current->arch.old_guest_table)
++     *
++     * The partial_ref-on-error clause is worth an explanation.  There
++     * are two scenarios where partial_ref might be true coming in:
++     * - mfn has been partially demoted as type `type`; i.e. has
++     *   PGT_partial set
++     * - mfn has been partially demoted as L(type+1) (i.e., a linear
++     *   page; e.g. we're being called from get_page_from_l2e with
++     *   type == PGT_l1_table, but the mfn is PGT_l2_table)
++     *
++     * If there's an error, in the first case, _get_page_type will
++     * either return -ERESTART, in which case we want to retain the
++     * ref (as the caller will consider it retained), or -EINVAL, in
++     * which case old_guest_table will be set; in both cases, we need
++     * to retain the ref.
++     *
++     * In the second case, if there's an error, _get_page_type() can
++     * *only* return -EINVAL, and *never* set old_guest_table.  In
++     * that case we also want to retain the reference, to allow the
++     * page to continue to be torn down (i.e., PGT_partial cleared)
++     * safely.
++     *
++     * Also note that we shouldn't be able to leave with the reference
++     * count retained unless we succeeded, or the operation was
++     * preemptible.
++     */
++    if ( likely(!rc) || partial_ref )
++        /* nothing */;
++    else if ( page == current->arch.old_guest_table )
++        ASSERT(preemptible);
++    else
+         put_page(page);
+ 
+     return rc;
+-- 
+2.23.0
+
diff --git a/system/xen/xsa/xsa299-4.12-0006-x86-mm-Have-alloc_l-23-_table-clear-partial_flags-wh.patch b/system/xen/xsa/xsa299-4.12-0006-x86-mm-Have-alloc_l-23-_table-clear-partial_flags-wh.patch
new file mode 100644
index 0000000000000..72ee3eac9e4e5
--- /dev/null
+++ b/system/xen/xsa/xsa299-4.12-0006-x86-mm-Have-alloc_l-23-_table-clear-partial_flags-wh.patch
@@ -0,0 +1,111 @@
+From 4ad70553611a7a4e4494d5a3b51b5cc295a488e0 Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap@citrix.com>
+Date: Thu, 10 Oct 2019 17:57:49 +0100
+Subject: [PATCH 06/11] x86/mm: Have alloc_l[23]_table clear partial_flags when
+ preempting
+
+In order to allow recursive pagetable promotions and demotions to be
+interrupted, Xen must keep track of the state of the sub-pages
+promoted or demoted.  This is stored in two elements in the page
+struct: nr_entries_validated and partial_flags.
+
+The rule is that entries [0, nr_entries_validated) should always be
+validated and hold a general reference count.  If partial_flags is
+zero, then [nr_entries_validated] is not validated and no reference
+count is held.  If PTF_partial_set is set, then [nr_entries_validated]
+is partially validated.
+
+At the moment, a distinction is made between promotion and demotion
+with regard to whether the entry itself "holds" a general reference
+count: when entry promotion is interrupted (i.e., returns -ERESTART),
+the entry is not considered to hold a reference; when entry demotion
+is interrupted, the entry is still considered to hold a general
+reference.
+
+PTF_partial_general_ref is used to distinguish between these cases.
+If clear, it's a partial promotion => no general reference count held
+by the entry; if set, it's partial demotion, so a general reference
+count held.  Because promotions and demotions can be interleaved, this
+value is passed to get_page_and_type_from_mfn and put_page_from_l*e,
+to be able to properly handle reference counts.
+
+Unfortunately, when alloc_l[23]_table check hypercall_preempt_check()
+and return -ERESTART, they set nr_entries_validated, but don't clear
+partial_flags.
+
+If we were picking up from a previously-interrupted promotion, that
+means that PTF_partial_set would be set even though
+[nr_entries_validated] was not partially validated.  This means that
+if the page in this state were de-validated, put_page_type() would
+erroneously be called on that entry.
+
+Perhaps worse, if we were racing with a de-validation, then we might
+leave both PTF_partial_set and PTF_partial_general_ref; and when
+de-validation picked up again, both the type and the general ref would
+be erroneously dropped from [nr_entries_validated].
+
+In a sense, the real issue here is code duplication.  Rather than
+duplicate the interruption code, set rc to -EINTR and fall through to
+the code which already handles that case correctly.
+
+Given the logic at this point, it should be impossible for
+partial_flags to be non-zero; add an ASSERT() to catch any changes.
+
+This is part of XSA-299.
+
+Reported-by: George Dunlap <george.dunlap@citrix.com>
+Signed-off-by: George Dunlap <george.dunlap@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+---
+ xen/arch/x86/mm.c | 25 ++++++-------------------
+ 1 file changed, 6 insertions(+), 19 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 0a4d39a2c3..bbd29a68f4 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -1554,21 +1554,13 @@ static int alloc_l2_table(struct page_info *page, unsigned long type)
+     for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES;
+           i++, partial_flags = 0 )
+     {
+-        l2_pgentry_t l2e;
++        l2_pgentry_t l2e = pl2e[i];
+ 
+         if ( i > page->nr_validated_ptes && hypercall_preempt_check() )
+-        {
+-            page->nr_validated_ptes = i;
+-            rc = -ERESTART;
+-            break;
+-        }
+-
+-        if ( !is_guest_l2_slot(d, type, i) )
++            rc = -EINTR;
++        else if ( !is_guest_l2_slot(d, type, i) )
+             continue;
+-
+-        l2e = pl2e[i];
+-
+-        if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
++        else if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+         {
+             if ( !pv_l1tf_check_l2e(d, l2e) )
+                 continue;
+@@ -1640,13 +1632,8 @@ static int alloc_l3_table(struct page_info *page)
+         l3_pgentry_t l3e = pl3e[i];
+ 
+         if ( i > page->nr_validated_ptes && hypercall_preempt_check() )
+-        {
+-            page->nr_validated_ptes = i;
+-            rc = -ERESTART;
+-            break;
+-        }
+-
+-        if ( is_pv_32bit_domain(d) && (i == 3) )
++            rc = -EINTR;
++        else if ( is_pv_32bit_domain(d) && (i == 3) )
+         {
+             if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ||
+                  (l3e_get_flags(l3e) & l3_disallow_mask(d)) )
+-- 
+2.23.0
+
diff --git a/system/xen/xsa/xsa299-4.12-0007-x86-mm-Always-retain-a-general-ref-on-partial.patch b/system/xen/xsa/xsa299-4.12-0007-x86-mm-Always-retain-a-general-ref-on-partial.patch
new file mode 100644
index 0000000000000..ef390e2b137db
--- /dev/null
+++ b/system/xen/xsa/xsa299-4.12-0007-x86-mm-Always-retain-a-general-ref-on-partial.patch
@@ -0,0 +1,378 @@
+From 51fe4e67d954649fcf103116be6206a769f0db1e Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap@citrix.com>
+Date: Thu, 10 Oct 2019 17:57:49 +0100
+Subject: [PATCH 07/11] x86/mm: Always retain a general ref on partial
+
+In order to allow recursive pagetable promotions and demotions to be
+interrupted, Xen must keep track of the state of the sub-pages
+promoted or demoted.  This is stored in two elements in the page struct:
+nr_entries_validated and partial_flags.
+
+The rule is that entries [0, nr_entries_validated) should always be
+validated and hold a general reference count.  If partial_flags is
+zero, then [nr_entries_validated] is not validated and no reference
+count is held.  If PTF_partial_set is set, then [nr_entries_validated]
+is partially validated.
+
+At the moment, a distinction is made between promotion and demotion
+with regard to whether the entry itself "holds" a general reference
+count: when entry promotion is interrupted (i.e., returns -ERESTART),
+the entry is not considered to hold a reference; when entry demotion
+is interrupted, the entry is still considered to hold a general
+reference.
+
+PTF_partial_general_ref is used to distinguish between these cases.
+If clear, it's a partial promotion => no general reference count held
+by the entry; if set, it's partial demotion, so a general reference
+count held.  Because promotions and demotions can be interleaved, this
+value is passed to get_page_and_type_from_mfn and put_page_from_l*e,
+to be able to properly handle reference counts.
+
+Unfortunately, because a refcount is not held, it is possible to
+engineer a situation where PFT_partial_set is set but the page in
+question has been assigned to another domain.  A sketch is provided in
+the appendix.
+
+Fix this by having the parent page table entry hold a general
+reference count whenever PFT_partial_set is set.  (For clarity of
+change, keep two separate flags.  These will be collapsed in a
+subsequent changeset.)
+
+This has two basic implications.  On the put_page_from_lNe() side,
+this mean that the (partial_set && !partial_ref) case can never happen,
+and no longer needs to be special-cased.
+
+Secondly, because both flags are set together, there's no need to carry over
+existing bits from partial_pte.
+
+(NB there is still another issue with calling _put_page_type() on a
+page which had PGT_partial set; that will be handled in a subsequent
+patch.)
+
+On the get_page_and_type_from_mfn() side, we need to distinguish
+between callers which hold a reference on partial (i.e.,
+alloc_lN_table()), and those which do not (new_cr3, PIN_LN_TABLE, and
+so on): pass a flag if the type should be retained on interruption.
+
+NB that since l1 promotion can't be preempted, that get_page_from_l2e
+can't return -ERESTART.
+
+This is part of XSA-299.
+
+Reported-by: George Dunlap <george.dunlap@citrix.com>
+Signed-off-by: George Dunlap <george.dunlap@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+-----
+* Appendix: Engineering PTF_partial_set while a page belongs to a
+  foreign domain
+
+Suppose A is a page which can be promoted to an l3, and B is a page
+which can be promoted to an l2, and A[x] points to B.  B has
+PGC_allocated set but no other general references.
+
+V1:  PIN_L3 A.
+  A is validated, B is validated.
+  A.type_count = 1 | PGT_validated | PGT_pinned
+  B.type_count = 1 | PGT_validated
+  B.count = 2 | PGC_allocated (A[x] holds a general ref)
+
+V1: UNPIN A.
+  A begins de-validation.
+  Arrange to be interrupted when i < x
+  V1->old_guest_table = A
+  V1->old_guest_table_ref_held = false
+  A.type_count = 1 | PGT_partial
+  A.nr_validated_entries = i < x
+  B.type_count = 0
+  B.count = 1 | PGC_allocated
+
+V2: MOD_L4_ENTRY to point some l4e to A.
+  Picks up re-validation of A.
+  Arrange to be interrupted halfway through B's validation
+  B.type_count = 1 | PGT_partial
+  B.count = 2 | PGC_allocated (PGT_partial holds a general ref)
+  A.type_count = 1 | PGT_partial
+  A.nr_validated_entries = x
+  A.partial_pte = PTF_partial_set
+
+V3: MOD_L3_ENTRY to point some other l3e (not in A) to B.
+  Validates B.
+  B.type_count = 1 | PGT_validated
+  B.count = 2 | PGC_allocated ("other l3e" holds a general ref)
+
+V3: MOD_L3_ENTRY to clear l3e pointing to B.
+  Devalidates B.
+  B.type_count = 0
+  B.count = 1 | PGC_allocated
+
+V3: decrease_reservation(B)
+  Clears PGC_allocated
+  B.count = 0 => B is freed
+
+B gets assigned to a different domain
+
+V1: Restarts UNPIN of A
+  put_old_guest_table(A)
+    ...
+      free_l3_table(A)
+
+Now since A.partial_flags has PTF_partial_set, free_l3_table() will
+call put_page_from_l3e() on A[x], which points to B, while B is owned
+by another domain.
+
+If A[x] held a general refcount for B on partial validation, as it does
+for partial de-validation, then B would still have a reference count of
+1 after PGC_allocated was freed; so B wouldn't be freed until after
+put_page_from_l3e() had happend on A[x].
+---
+ xen/arch/x86/mm.c        | 84 +++++++++++++++++++++++-----------------
+ xen/include/asm-x86/mm.h | 15 ++++---
+ 2 files changed, 58 insertions(+), 41 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index bbd29a68f4..4d3ebf341d 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -1102,10 +1102,11 @@ get_page_from_l1e(
+  * page->pte[page->nr_validated_entries].  See the comment in mm.h for
+  * more information.
+  */
+-#define PTF_partial_set         (1 << 0)
+-#define PTF_partial_general_ref (1 << 1)
+-#define PTF_preemptible         (1 << 2)
+-#define PTF_defer               (1 << 3)
++#define PTF_partial_set           (1 << 0)
++#define PTF_partial_general_ref   (1 << 1)
++#define PTF_preemptible           (1 << 2)
++#define PTF_defer                 (1 << 3)
++#define PTF_retain_ref_on_restart (1 << 4)
+ 
+ static int get_page_and_type_from_mfn(
+     mfn_t mfn, unsigned long type, struct domain *d,
+@@ -1114,7 +1115,11 @@ static int get_page_and_type_from_mfn(
+     struct page_info *page = mfn_to_page(mfn);
+     int rc;
+     bool preemptible = flags & PTF_preemptible,
+-         partial_ref = flags & PTF_partial_general_ref;
++         partial_ref = flags & PTF_partial_general_ref,
++         partial_set = flags & PTF_partial_set,
++         retain_ref  = flags & PTF_retain_ref_on_restart;
++
++    ASSERT(partial_ref == partial_set);
+ 
+     if ( likely(!partial_ref) &&
+          unlikely(!get_page_from_mfn(mfn, d)) )
+@@ -1127,13 +1132,15 @@ static int get_page_and_type_from_mfn(
+      * - page is fully validated (rc == 0)
+      * - page is not validated (rc < 0) but:
+      *   - We came in with a reference (partial_ref)
++     *   - page is partially validated (rc == -ERESTART), and the
++     *     caller has asked the ref to be retained in that case
+      *   - page is partially validated but there's been an error
+      *     (page == current->arch.old_guest_table)
+      *
+      * The partial_ref-on-error clause is worth an explanation.  There
+      * are two scenarios where partial_ref might be true coming in:
+-     * - mfn has been partially demoted as type `type`; i.e. has
+-     *   PGT_partial set
++     * - mfn has been partially promoted / demoted as type `type`;
++     *   i.e. has PGT_partial set
+      * - mfn has been partially demoted as L(type+1) (i.e., a linear
+      *   page; e.g. we're being called from get_page_from_l2e with
+      *   type == PGT_l1_table, but the mfn is PGT_l2_table)
+@@ -1156,7 +1163,8 @@ static int get_page_and_type_from_mfn(
+      */
+     if ( likely(!rc) || partial_ref )
+         /* nothing */;
+-    else if ( page == current->arch.old_guest_table )
++    else if ( page == current->arch.old_guest_table ||
++              (retain_ref && rc == -ERESTART) )
+         ASSERT(preemptible);
+     else
+         put_page(page);
+@@ -1354,8 +1362,8 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn,
+         if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) ==
+               PTF_partial_set )
+         {
+-            ASSERT(!(flags & PTF_defer));
+-            rc = _put_page_type(pg, PTF_preemptible, ptpg);
++            /* partial_set should always imply partial_ref */
++            BUG();
+         }
+         else if ( flags & PTF_defer )
+         {
+@@ -1400,8 +1408,8 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
+     if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) ==
+          PTF_partial_set )
+     {
+-        ASSERT(!(flags & PTF_defer));
+-        return _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn)));
++        /* partial_set should always imply partial_ref */
++        BUG();
+     }
+ 
+     if ( flags & PTF_defer )
+@@ -1431,8 +1439,8 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
+         if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) ==
+               PTF_partial_set )
+         {
+-            ASSERT(!(flags & PTF_defer));
+-            return _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn)));
++            /* partial_set should always imply partial_ref */
++            BUG();
+         }
+ 
+         if ( flags & PTF_defer )
+@@ -1569,13 +1577,22 @@ static int alloc_l2_table(struct page_info *page, unsigned long type)
+         else
+             rc = get_page_from_l2e(l2e, pfn, d, partial_flags);
+ 
+-        if ( rc == -ERESTART )
+-        {
+-            page->nr_validated_ptes = i;
+-            /* Set 'set', retain 'general ref' */
+-            page->partial_flags = partial_flags | PTF_partial_set;
+-        }
+-        else if ( rc == -EINTR && i )
++        /*
++         * It shouldn't be possible for get_page_from_l2e to return
++         * -ERESTART, since we never call this with PTF_preemptible.
++         * (alloc_l1_table may return -EINTR on an L1TF-vulnerable
++         * entry.)
++         *
++         * NB that while on a "clean" promotion, we can never get
++         * PGT_partial.  It is possible to arrange for an l2e to
++         * contain a partially-devalidated l2; but in that case, both
++         * of the following functions will fail anyway (the first
++         * because the page in question is not an l1; the second
++         * because the page is not fully validated).
++         */
++        ASSERT(rc != -ERESTART);
++
++        if ( rc == -EINTR && i )
+         {
+             page->nr_validated_ptes = i;
+             page->partial_flags = 0;
+@@ -1584,6 +1601,7 @@ static int alloc_l2_table(struct page_info *page, unsigned long type)
+         else if ( rc < 0 && rc != -EINTR )
+         {
+             gdprintk(XENLOG_WARNING, "Failure in alloc_l2_table: slot %#x\n", i);
++            ASSERT(current->arch.old_guest_table == NULL);
+             if ( i )
+             {
+                 page->nr_validated_ptes = i;
+@@ -1642,7 +1660,7 @@ static int alloc_l3_table(struct page_info *page)
+                 rc = get_page_and_type_from_mfn(
+                     l3e_get_mfn(l3e),
+                     PGT_l2_page_table | PGT_pae_xen_l2, d,
+-                    partial_flags | PTF_preemptible);
++                    partial_flags | PTF_preemptible | PTF_retain_ref_on_restart);
+         }
+         else if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+         {
+@@ -1651,13 +1669,14 @@ static int alloc_l3_table(struct page_info *page)
+             rc = -EINTR;
+         }
+         else
+-            rc = get_page_from_l3e(l3e, pfn, d, partial_flags);
++            rc = get_page_from_l3e(l3e, pfn, d,
++                                   partial_flags | PTF_retain_ref_on_restart);
+ 
+         if ( rc == -ERESTART )
+         {
+             page->nr_validated_ptes = i;
+             /* Set 'set', leave 'general ref' set if this entry was set */
+-            page->partial_flags = partial_flags | PTF_partial_set;
++            page->partial_flags = PTF_partial_set | PTF_partial_general_ref;
+         }
+         else if ( rc == -EINTR && i )
+         {
+@@ -1833,13 +1852,14 @@ static int alloc_l4_table(struct page_info *page)
+             rc = -EINTR;
+         }
+         else
+-            rc = get_page_from_l4e(l4e, pfn, d, partial_flags);
++            rc = get_page_from_l4e(l4e, pfn, d,
++                                   partial_flags | PTF_retain_ref_on_restart);
+ 
+         if ( rc == -ERESTART )
+         {
+             page->nr_validated_ptes = i;
+             /* Set 'set', leave 'general ref' set if this entry was set */
+-            page->partial_flags = partial_flags | PTF_partial_set;
++            page->partial_flags = PTF_partial_set | PTF_partial_general_ref;
+         }
+         else if ( rc < 0 )
+         {
+@@ -1936,9 +1956,7 @@ static int free_l2_table(struct page_info *page)
+     else if ( rc == -ERESTART )
+     {
+         page->nr_validated_ptes = i;
+-        page->partial_flags = (partial_flags & PTF_partial_set) ?
+-            partial_flags :
+-            (PTF_partial_set | PTF_partial_general_ref);
++        page->partial_flags = PTF_partial_set | PTF_partial_general_ref;
+     }
+     else if ( rc == -EINTR && i < L2_PAGETABLE_ENTRIES - 1 )
+     {
+@@ -1986,9 +2004,7 @@ static int free_l3_table(struct page_info *page)
+     if ( rc == -ERESTART )
+     {
+         page->nr_validated_ptes = i;
+-        page->partial_flags = (partial_flags & PTF_partial_set) ?
+-            partial_flags :
+-            (PTF_partial_set | PTF_partial_general_ref);
++        page->partial_flags = PTF_partial_set | PTF_partial_general_ref;
+     }
+     else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
+     {
+@@ -2019,9 +2035,7 @@ static int free_l4_table(struct page_info *page)
+     if ( rc == -ERESTART )
+     {
+         page->nr_validated_ptes = i;
+-        page->partial_flags = (partial_flags & PTF_partial_set) ?
+-            partial_flags :
+-            (PTF_partial_set | PTF_partial_general_ref);
++        page->partial_flags = PTF_partial_set | PTF_partial_general_ref;
+     }
+     else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
+     {
+diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
+index 8406ac3c37..02079e1324 100644
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -238,22 +238,25 @@ struct page_info
+          * page.
+          *
+          * This happens:
+-         * - During de-validation, if de-validation of the page was
++         * - During validation or de-validation, if the operation was
+          *   interrupted
+          * - During validation, if an invalid entry is encountered and
+          *   validation is preemptible
+          * - During validation, if PTF_partial_general_ref was set on
+-         *   this entry to begin with (perhaps because we're picking
+-         *   up from a partial de-validation).
++         *   this entry to begin with (perhaps because it picked up a
++         *   previous operation)
+          *
+-         * When resuming validation, if PTF_partial_general_ref is clear,
+-         * then a general reference must be re-acquired; if it is set, no
+-         * reference should be acquired.
++         * When resuming validation, if PTF_partial_general_ref is
++         * clear, then a general reference must be re-acquired; if it
++         * is set, no reference should be acquired.
+          *
+          * When resuming de-validation, if PTF_partial_general_ref is
+          * clear, no reference should be dropped; if it is set, a
+          * reference should be dropped.
+          *
++         * NB at the moment, PTF_partial_set should be set if and only if
++         * PTF_partial_general_ref is set.
++         *
+          * NB that PTF_partial_set and PTF_partial_general_ref are
+          * defined in mm.c, the only place where they are used.
+          *
+-- 
+2.23.0
+
diff --git a/system/xen/xsa/xsa299-4.12-0008-x86-mm-Collapse-PTF_partial_set-and-PTF_partial_gene.patch b/system/xen/xsa/xsa299-4.12-0008-x86-mm-Collapse-PTF_partial_set-and-PTF_partial_gene.patch
new file mode 100644
index 0000000000000..6cf41d1cd6941
--- /dev/null
+++ b/system/xen/xsa/xsa299-4.12-0008-x86-mm-Collapse-PTF_partial_set-and-PTF_partial_gene.patch
@@ -0,0 +1,227 @@
+From 8a8d836f7f7418e659d37817a66cd7a6b115042b Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap@citrix.com>
+Date: Thu, 10 Oct 2019 17:57:49 +0100
+Subject: [PATCH 08/11] x86/mm: Collapse PTF_partial_set and
+ PTF_partial_general_ref into one
+
+...now that they are equivalent.  No functional change intended.
+
+Reported-by: George Dunlap <george.dunlap@citrix.com>
+Signed-off-by: George Dunlap <george.dunlap@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+---
+ xen/arch/x86/mm.c        | 50 +++++++++++-----------------------------
+ xen/include/asm-x86/mm.h | 29 +++++++++++------------
+ 2 files changed, 26 insertions(+), 53 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 4d3ebf341d..886e93b8aa 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -1097,13 +1097,12 @@ get_page_from_l1e(
+ 
+ /*
+  * The following flags are used to specify behavior of various get and
+- * put commands.  The first two are also stored in page->partial_flags
+- * to indicate the state of the page pointed to by
++ * put commands.  The first is also stored in page->partial_flags to
++ * indicate the state of the page pointed to by
+  * page->pte[page->nr_validated_entries].  See the comment in mm.h for
+  * more information.
+  */
+ #define PTF_partial_set           (1 << 0)
+-#define PTF_partial_general_ref   (1 << 1)
+ #define PTF_preemptible           (1 << 2)
+ #define PTF_defer                 (1 << 3)
+ #define PTF_retain_ref_on_restart (1 << 4)
+@@ -1115,13 +1114,10 @@ static int get_page_and_type_from_mfn(
+     struct page_info *page = mfn_to_page(mfn);
+     int rc;
+     bool preemptible = flags & PTF_preemptible,
+-         partial_ref = flags & PTF_partial_general_ref,
+          partial_set = flags & PTF_partial_set,
+          retain_ref  = flags & PTF_retain_ref_on_restart;
+ 
+-    ASSERT(partial_ref == partial_set);
+-
+-    if ( likely(!partial_ref) &&
++    if ( likely(!partial_set) &&
+          unlikely(!get_page_from_mfn(mfn, d)) )
+         return -EINVAL;
+ 
+@@ -1131,14 +1127,14 @@ static int get_page_and_type_from_mfn(
+      * Retain the refcount if:
+      * - page is fully validated (rc == 0)
+      * - page is not validated (rc < 0) but:
+-     *   - We came in with a reference (partial_ref)
++     *   - We came in with a reference (partial_set)
+      *   - page is partially validated (rc == -ERESTART), and the
+      *     caller has asked the ref to be retained in that case
+      *   - page is partially validated but there's been an error
+      *     (page == current->arch.old_guest_table)
+      *
+-     * The partial_ref-on-error clause is worth an explanation.  There
+-     * are two scenarios where partial_ref might be true coming in:
++     * The partial_set-on-error clause is worth an explanation.  There
++     * are two scenarios where partial_set might be true coming in:
+      * - mfn has been partially promoted / demoted as type `type`;
+      *   i.e. has PGT_partial set
+      * - mfn has been partially demoted as L(type+1) (i.e., a linear
+@@ -1161,7 +1157,7 @@ static int get_page_and_type_from_mfn(
+      * count retained unless we succeeded, or the operation was
+      * preemptible.
+      */
+-    if ( likely(!rc) || partial_ref )
++    if ( likely(!rc) || partial_set )
+         /* nothing */;
+     else if ( page == current->arch.old_guest_table ||
+               (retain_ref && rc == -ERESTART) )
+@@ -1359,13 +1355,7 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn,
+         struct page_info *pg = l2e_get_page(l2e);
+         struct page_info *ptpg = mfn_to_page(_mfn(pfn));
+ 
+-        if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) ==
+-              PTF_partial_set )
+-        {
+-            /* partial_set should always imply partial_ref */
+-            BUG();
+-        }
+-        else if ( flags & PTF_defer )
++        if ( flags & PTF_defer )
+         {
+             current->arch.old_guest_ptpg = ptpg;
+             current->arch.old_guest_table = pg;
+@@ -1405,13 +1395,6 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
+ 
+     pg = l3e_get_page(l3e);
+ 
+-    if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) ==
+-         PTF_partial_set )
+-    {
+-        /* partial_set should always imply partial_ref */
+-        BUG();
+-    }
+-
+     if ( flags & PTF_defer )
+     {
+         current->arch.old_guest_ptpg = mfn_to_page(_mfn(pfn));
+@@ -1436,13 +1419,6 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
+     {
+         struct page_info *pg = l4e_get_page(l4e);
+ 
+-        if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) ==
+-              PTF_partial_set )
+-        {
+-            /* partial_set should always imply partial_ref */
+-            BUG();
+-        }
+-
+         if ( flags & PTF_defer )
+         {
+             current->arch.old_guest_ptpg = mfn_to_page(_mfn(pfn));
+@@ -1676,7 +1652,7 @@ static int alloc_l3_table(struct page_info *page)
+         {
+             page->nr_validated_ptes = i;
+             /* Set 'set', leave 'general ref' set if this entry was set */
+-            page->partial_flags = PTF_partial_set | PTF_partial_general_ref;
++            page->partial_flags = PTF_partial_set;
+         }
+         else if ( rc == -EINTR && i )
+         {
+@@ -1859,7 +1835,7 @@ static int alloc_l4_table(struct page_info *page)
+         {
+             page->nr_validated_ptes = i;
+             /* Set 'set', leave 'general ref' set if this entry was set */
+-            page->partial_flags = PTF_partial_set | PTF_partial_general_ref;
++            page->partial_flags = PTF_partial_set;
+         }
+         else if ( rc < 0 )
+         {
+@@ -1956,7 +1932,7 @@ static int free_l2_table(struct page_info *page)
+     else if ( rc == -ERESTART )
+     {
+         page->nr_validated_ptes = i;
+-        page->partial_flags = PTF_partial_set | PTF_partial_general_ref;
++        page->partial_flags = PTF_partial_set;
+     }
+     else if ( rc == -EINTR && i < L2_PAGETABLE_ENTRIES - 1 )
+     {
+@@ -2004,7 +1980,7 @@ static int free_l3_table(struct page_info *page)
+     if ( rc == -ERESTART )
+     {
+         page->nr_validated_ptes = i;
+-        page->partial_flags = PTF_partial_set | PTF_partial_general_ref;
++        page->partial_flags = PTF_partial_set;
+     }
+     else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
+     {
+@@ -2035,7 +2011,7 @@ static int free_l4_table(struct page_info *page)
+     if ( rc == -ERESTART )
+     {
+         page->nr_validated_ptes = i;
+-        page->partial_flags = PTF_partial_set | PTF_partial_general_ref;
++        page->partial_flags = PTF_partial_set;
+     }
+     else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
+     {
+diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
+index 02079e1324..f0fd35bf6b 100644
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -233,7 +233,7 @@ struct page_info
+          * operation on the current page.  (That page may or may not
+          * still have PGT_partial set.)
+          *
+-         * If PTF_partial_general_ref is set, then the PTE at
++         * Additionally, if PTF_partial_set is set, then the PTE at
+          * @nr_validated_ptef holds a general reference count for the
+          * page.
+          *
+@@ -242,23 +242,20 @@ struct page_info
+          *   interrupted
+          * - During validation, if an invalid entry is encountered and
+          *   validation is preemptible
+-         * - During validation, if PTF_partial_general_ref was set on
+-         *   this entry to begin with (perhaps because it picked up a
++         * - During validation, if PTF_partial_set was set on this
++         *   entry to begin with (perhaps because it picked up a
+          *   previous operation)
+          *
+-         * When resuming validation, if PTF_partial_general_ref is
+-         * clear, then a general reference must be re-acquired; if it
+-         * is set, no reference should be acquired.
++         * When resuming validation, if PTF_partial_set is clear, then
++         * a general reference must be re-acquired; if it is set, no
++         * reference should be acquired.
+          *
+-         * When resuming de-validation, if PTF_partial_general_ref is
+-         * clear, no reference should be dropped; if it is set, a
+-         * reference should be dropped.
++         * When resuming de-validation, if PTF_partial_set is clear,
++         * no reference should be dropped; if it is set, a reference
++         * should be dropped.
+          *
+-         * NB at the moment, PTF_partial_set should be set if and only if
+-         * PTF_partial_general_ref is set.
+-         *
+-         * NB that PTF_partial_set and PTF_partial_general_ref are
+-         * defined in mm.c, the only place where they are used.
++         * NB that PTF_partial_set is defined in mm.c, the only place
++         * where it is used.
+          *
+          * The 3rd field, @linear_pt_count, indicates
+          * - by a positive value, how many same-level page table entries a page
+@@ -268,8 +265,8 @@ struct page_info
+          */
+         struct {
+             u16 nr_validated_ptes:PAGETABLE_ORDER + 1;
+-            u16 :16 - PAGETABLE_ORDER - 1 - 2;
+-            u16 partial_flags:2;
++            u16 :16 - PAGETABLE_ORDER - 1 - 1;
++            u16 partial_flags:1;
+             s16 linear_pt_count;
+         };
+ 
+-- 
+2.23.0
+
diff --git a/system/xen/xsa/xsa299-4.12-0009-x86-mm-Properly-handle-linear-pagetable-promotion-fa.patch b/system/xen/xsa/xsa299-4.12-0009-x86-mm-Properly-handle-linear-pagetable-promotion-fa.patch
new file mode 100644
index 0000000000000..bbaba794fc7bc
--- /dev/null
+++ b/system/xen/xsa/xsa299-4.12-0009-x86-mm-Properly-handle-linear-pagetable-promotion-fa.patch
@@ -0,0 +1,106 @@
+From da3d1d258e54fe600f7f75287183b74d957ec63b Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap@citrix.com>
+Date: Thu, 10 Oct 2019 17:57:49 +0100
+Subject: [PATCH 09/11] x86/mm: Properly handle linear pagetable promotion
+ failures
+
+In order to allow recursive pagetable promotions and demotions to be
+interrupted, Xen must keep track of the state of the sub-pages
+promoted or demoted.  This is stored in two elements in the page
+struct: nr_entries_validated and partial_flags.
+
+The rule is that entries [0, nr_entries_validated) should always be
+validated and hold a general reference count.  If partial_flags is
+zero, then [nr_entries_validated] is not validated and no reference
+count is held.  If PTF_partial_set is set, then [nr_entries_validated]
+is partially validated, and a general reference count is held.
+
+Unfortunately, in cases where an entry began with PTF_partial_set set,
+and get_page_from_lNe() returns -EINVAL, the PTF_partial_set bit is
+erroneously dropped.  (This scenario can be engineered mainly by the
+use of interleaving of promoting and demoting a page which has "linear
+pagetable" entries; see the appendix for a sketch.)  This means that
+we will "leak" a general reference count on the page in question,
+preventing the page from being freed.
+
+Fix this by setting page->partial_flags to the partial_flags local
+variable.
+
+This is part of XSA-299.
+
+Reported-by: George Dunlap <george.dunlap@citrix.com>
+Signed-off-by: George Dunlap <george.dunlap@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+-----
+Appendix
+
+Suppose A and B can both be promoted to L2 pages, and A[x] points to B.
+
+V1: PIN_L2 B.
+  B.type_count = 1 | PGT_validated
+  B.count = 2 | PGC_allocated
+
+V1: MOD_L3_ENTRY pointing something to A.
+  In the process of validating A[x], grab an extra type / ref on B:
+  B.type_count = 2 | PGT_validated
+  B.count = 3 | PGC_allocated
+  A.type_count = 1 | PGT_validated
+  A.count = 2 | PGC_allocated
+
+V1: UNPIN B.
+  B.type_count = 1 | PGT_validate
+  B.count = 2 | PGC_allocated
+
+V1: MOD_L3_ENTRY removing the reference to A.
+  De-validate A, down to A[x], which points to B.
+  Drop the final type on B.  Arrange to be interrupted.
+  B.type_count = 1 | PGT_partial
+  B.count = 2 | PGC_allocated
+  A.type_count = 1 | PGT_partial
+  A.nr_validated_entries = x
+  A.partial_pte = -1
+
+V2: MOD_L3_ENTRY adds a reference to A.
+
+At this point, get_page_from_l2e(A[x]) tries
+get_page_and_type_from_mfn(), which fails because it's the wrong type;
+and get_l2_linear_pagetable() also fails, because B isn't validated as
+an l2 anymore.
+---
+ xen/arch/x86/mm.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 886e93b8aa..0a094291da 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -1581,7 +1581,7 @@ static int alloc_l2_table(struct page_info *page, unsigned long type)
+             if ( i )
+             {
+                 page->nr_validated_ptes = i;
+-                page->partial_flags = 0;
++                page->partial_flags = partial_flags;
+                 current->arch.old_guest_ptpg = NULL;
+                 current->arch.old_guest_table = page;
+             }
+@@ -1674,7 +1674,7 @@ static int alloc_l3_table(struct page_info *page)
+         if ( i )
+         {
+             page->nr_validated_ptes = i;
+-            page->partial_flags = 0;
++            page->partial_flags = partial_flags;
+             current->arch.old_guest_ptpg = NULL;
+             current->arch.old_guest_table = page;
+         }
+@@ -1845,7 +1845,7 @@ static int alloc_l4_table(struct page_info *page)
+             if ( i )
+             {
+                 page->nr_validated_ptes = i;
+-                page->partial_flags = 0;
++                page->partial_flags = partial_flags;
+                 if ( rc == -EINTR )
+                     rc = -ERESTART;
+                 else
+-- 
+2.23.0
+
diff --git a/system/xen/xsa/xsa299-4.12-0010-x86-mm-Fix-nested-de-validation-on-error.patch b/system/xen/xsa/xsa299-4.12-0010-x86-mm-Fix-nested-de-validation-on-error.patch
new file mode 100644
index 0000000000000..7d5f022e892c4
--- /dev/null
+++ b/system/xen/xsa/xsa299-4.12-0010-x86-mm-Fix-nested-de-validation-on-error.patch
@@ -0,0 +1,166 @@
+From b3e169dc8daeae85b0b51c25fdb142e2e552ec7f Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap@citrix.com>
+Date: Thu, 10 Oct 2019 17:57:49 +0100
+Subject: [PATCH 10/11] x86/mm: Fix nested de-validation on error
+
+If an invalid entry is discovered when validating a page-table tree,
+the entire tree which has so far been validated must be de-validated.
+Since this may take a long time, alloc_l[2-4]_table() set current
+vcpu's old_guest_table immediately; put_old_guest_table() will make
+sure that put_page_type() will be called to finish off the
+de-validation before any other MMU operations can happen on the vcpu.
+
+The invariant for partial pages should be:
+
+* Entries [0, nr_validated_ptes) should be completely validated;
+  put_page_type() will de-validate these.
+
+* If [nr_validated_ptes] is partially validated, partial_flags should
+  set PTF_partiaL_set.  put_page_type() will be called on this page to
+  finish off devalidation, and the appropriate refcount adjustments
+  will be done.
+
+alloc_l[2-3]_table() indicates partial validation to its callers by
+setting current->old_guest_table.
+
+Unfortunately, this is mishandled.
+
+Take the case where validating lNe[x] returns an error.
+
+First, alloc_l3_table() doesn't check old_guest_table at all; as a
+result, partial_flags is not set when it should be.  nr_validated_ptes
+is set to x; and since PFT_partial_set clear, de-validation resumes at
+nr_validated_ptes-1.  This means that the l2 page at pl3e[x] will not
+have put_page_type() called on it when de-validating the rest of the
+l3: it will be stuck in the PGT_partial state until the domain is
+destroyed, or until it is re-used as an l2.  (Any other page type will
+fail.)
+
+Worse, alloc_l4_table(), rather than setting PTF_partial_set as it
+should, sets nr_validated_ptes to x+1.  When de-validating, since
+partial is 0, this will correctly resume calling put_page_type at [x];
+but, if the put_page_type() is never called, but instead
+get_page_type() is called, validation will pick up at [x+1],
+neglecting to validate [x].  If the rest of the validation succeeds,
+the l4 will be validated even though [x] is invalid.
+
+Fix this in both cases by setting PTF_partial_set if old_guest_table
+is set.
+
+While here, add some safety catches:
+- old_guest_table must point to the page contained in
+  [nr_validated_ptes].
+- alloc_l1_page shouldn't set old_guest_table
+
+If we experience one of these situations in production builds, it's
+safer to avoid calling put_page_type for the pages in question.  If
+they have PGT_partial set, they will be cleaned up on domain
+destruction; if not, we have no idea whether a type count is safe to
+drop.  Retaining an extra type ref that should have been dropped may
+trigger a BUG() on the free_domain_page() path, but dropping a type
+count that shouldn't be dropped may cause a privilege escalation.
+
+This is part of XSA-299.
+
+Reported-by: George Dunlap <george.dunlap@citrix.com>
+Signed-off-by: George Dunlap <george.dunlap@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+---
+ xen/arch/x86/mm.c | 53 +++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 51 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 0a094291da..a432e69c74 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -1580,6 +1580,20 @@ static int alloc_l2_table(struct page_info *page, unsigned long type)
+             ASSERT(current->arch.old_guest_table == NULL);
+             if ( i )
+             {
++                /*
++                 * alloc_l1_table() doesn't set old_guest_table; it does
++                 * its own tear-down immediately on failure.  If it
++                 * did we'd need to check it and set partial_flags as we
++                 * do in alloc_l[34]_table().
++                 *
++                 * Note on the use of ASSERT: if it's non-null and
++                 * hasn't been cleaned up yet, it should have
++                 * PGT_partial set; and so the type will be cleaned up
++                 * on domain destruction.  Unfortunately, we would
++                 * leak the general ref held by old_guest_table; but
++                 * leaking a page is less bad than a host crash.
++                 */
++                ASSERT(current->arch.old_guest_table == NULL);
+                 page->nr_validated_ptes = i;
+                 page->partial_flags = partial_flags;
+                 current->arch.old_guest_ptpg = NULL;
+@@ -1607,6 +1621,7 @@ static int alloc_l3_table(struct page_info *page)
+     unsigned int   i;
+     int            rc = 0;
+     unsigned int   partial_flags = page->partial_flags;
++    l3_pgentry_t   l3e = l3e_empty();
+ 
+     pl3e = map_domain_page(_mfn(pfn));
+ 
+@@ -1623,7 +1638,7 @@ static int alloc_l3_table(struct page_info *page)
+     for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES;
+           i++, partial_flags = 0 )
+     {
+-        l3_pgentry_t l3e = pl3e[i];
++        l3e = pl3e[i];
+ 
+         if ( i > page->nr_validated_ptes && hypercall_preempt_check() )
+             rc = -EINTR;
+@@ -1675,6 +1690,24 @@ static int alloc_l3_table(struct page_info *page)
+         {
+             page->nr_validated_ptes = i;
+             page->partial_flags = partial_flags;
++            if ( current->arch.old_guest_table )
++            {
++                /*
++                 * We've experienced a validation failure.  If
++                 * old_guest_table is set, "transfer" the general
++                 * reference count to pl3e[nr_validated_ptes] by
++                 * setting PTF_partial_set.
++                 *
++                 * As a precaution, check that old_guest_table is the
++                 * page pointed to by pl3e[nr_validated_ptes].  If
++                 * not, it's safer to leak a type ref on production
++                 * builds.
++                 */
++                if ( current->arch.old_guest_table == l3e_get_page(l3e) )
++                    page->partial_flags = PTF_partial_set;
++                else
++                    ASSERT_UNREACHABLE();
++            }
+             current->arch.old_guest_ptpg = NULL;
+             current->arch.old_guest_table = page;
+         }
+@@ -1851,7 +1884,23 @@ static int alloc_l4_table(struct page_info *page)
+                 else
+                 {
+                     if ( current->arch.old_guest_table )
+-                        page->nr_validated_ptes++;
++                    {
++                        /*
++                         * We've experienced a validation failure.  If
++                         * old_guest_table is set, "transfer" the general
++                         * reference count to pl3e[nr_validated_ptes] by
++                         * setting PTF_partial_set.
++                         *
++                         * As a precaution, check that old_guest_table is the
++                         * page pointed to by pl4e[nr_validated_ptes].  If
++                         * not, it's safer to leak a type ref on production
++                         * builds.
++                         */
++                        if ( current->arch.old_guest_table == l4e_get_page(l4e) )
++                            page->partial_flags = PTF_partial_set;
++                        else
++                            ASSERT_UNREACHABLE();
++                    }
+                     current->arch.old_guest_ptpg = NULL;
+                     current->arch.old_guest_table = page;
+                 }
+-- 
+2.23.0
+
diff --git a/system/xen/xsa/xsa299-4.12-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch b/system/xen/xsa/xsa299-4.12-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch
new file mode 100644
index 0000000000000..ad7e6fee1b9d9
--- /dev/null
+++ b/system/xen/xsa/xsa299-4.12-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch
@@ -0,0 +1,413 @@
+From ea3dc624c5e6325a9c2f079e52a85965d4ab6ce8 Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap@citrix.com>
+Date: Thu, 10 Oct 2019 17:57:50 +0100
+Subject: [PATCH 11/11] x86/mm: Don't drop a type ref unless you held a ref to
+ begin with
+
+Validation and de-validation of pagetable trees may take arbitrarily
+large amounts of time, and so must be preemptible.  This is indicated
+by setting the PGT_partial bit in the type_info, and setting
+nr_validated_entries and partial_flags appropriately.  Specifically,
+if the entry at [nr_validated_entries] is partially validated,
+partial_flags should have the PGT_partial_set bit set, and the entry
+should hold a general reference count.  During de-validation,
+put_page_type() is called on partially validated entries.
+
+Unfortunately, there are a number of issues with the current algorithm.
+
+First, doing a "normal" put_page_type() is not safe when no type ref
+is held: there is nothing to stop another vcpu from coming along and
+picking up validation again: at which point the put_page_type may drop
+the only page ref on an in-use page.  Some examples are listed in the
+appendix.
+
+The core issue is that put_page_type() is being called both to clean
+up PGT_partial, and to drop a type count; and has no way of knowing
+which is which; and so if in between, PGT_partial is cleared,
+put_page_type() will drop the type ref erroneously.
+
+What is needed is to distinguish between two states:
+- Dropping a type ref which is held
+- Cleaning up a page which has been partially de/validated
+
+Fix this by telling put_page_type() which of the two activities you
+intend.
+
+When cleaning up a partial de/validation, take no action unless you
+find a page partially validated.
+
+If put_page_type() is called without PTF_partial_set, and finds the
+page in a PGT_partial state anyway, then there's certainly been a
+misaccounting somewhere, and carrying on would almost certainly cause
+a security issue, so crash the host instead.
+
+In put_page_from_lNe, pass partial_flags on to _put_page_type().
+
+old_guest_table may be set either with a fully validated page (when
+using the "deferred put" pattern), or with a partially validated page
+(when a normal "de-validation" is interrupted, or when a validation
+fails part-way through due to invalid entries).  Add a flag,
+old_guest_table_partial, to indicate which of these it is, and use
+that to pass the appropriate flag to _put_page_type().
+
+While here, delete stray trailing whitespace.
+
+This is part of XSA-299.
+
+Reported-by: George Dunlap <george.dunlap@citrix.com>
+Signed-off-by: George Dunlap <george.dunlap@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+-----
+Appendix:
+
+Suppose page A, when interpreted as an l3 pagetable, contains all
+valid entries; and suppose A[x] points to page B, which when
+interpreted as an l2 pagetable, contains all valid entries.
+
+P1: PIN_L3_TABLE
+  A -> PGT_l3_table | 1 | valid
+  B -> PGT_l2_table | 1 | valid
+
+P1: UNPIN_TABLE
+  > Arrange to interrupt after B has been de-validated
+  B:
+    type_info -> PGT_l2_table | 0
+  A:
+    type_info -> PGT_l3_table | 1 | partial
+    nr_validated_enties -> (less than x)
+
+P2: mod_l4_entry to point to A
+  > Arrange for this to be interrupted while B is being validated
+  B:
+    type_info -> PGT_l2_table | 1 | partial
+    (nr_validated_entires &c set as appropriate)
+  A:
+    type_info -> PGT_l3_table | 1 | partial
+    nr_validated_entries -> x
+    partial_pte = 1
+
+P3: mod_l3_entry some other unrelated l3 to point to B:
+  B:
+    type_info -> PGT_l2_table | 1
+
+P1: Restart UNPIN_TABLE
+
+At this point, since A.nr_validate_entries == x and A.partial_pte !=
+0, free_l3_table() will call put_page_from_l3e() on pl3e[x], dropping
+its type count to 0 while it's still being pointed to by some other l3
+
+A similar issue arises with old_guest_table.  Consider the following
+scenario:
+
+Suppose A is a page which, when interpreted as an l2, has valid entries
+until entry x, which is invalid.
+
+V1:  PIN_L2_TABLE(A)
+  <Validate until we try to validate [x], get -EINVAL>
+  A -> PGT_l2_table | 1 | PGT_partial
+  V1 -> old_guest_table = A
+  <delayed>
+
+V2: PIN_L2_TABLE(A)
+  <Pick up where V1 left off, try to re-validate [x], get -EINVAL>
+  A -> PGT_l2_table | 1 | PGT_partial
+  V2 -> old_guest_table = A
+  <restart>
+  put_old_guest_table()
+    _put_page_type(A)
+      A -> PGT_l2_table | 0
+
+V1: <restart>
+  put_old_guest_table()
+    _put_page_type(A) # UNDERFLOW
+
+Indeed, it is possible to engineer for old_guest_table for every vcpu
+a guest has to point to the same page.
+---
+ xen/arch/x86/domain.c        |  6 +++
+ xen/arch/x86/mm.c            | 99 +++++++++++++++++++++++++++++++-----
+ xen/include/asm-x86/domain.h |  4 +-
+ 3 files changed, 95 insertions(+), 14 deletions(-)
+
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index 59df8a6d8d..f1ae5f89f5 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -1104,9 +1104,15 @@ int arch_set_info_guest(
+                     rc = -ERESTART;
+                     /* Fallthrough */
+                 case -ERESTART:
++                    /*
++                     * NB that we're putting the kernel-mode table
++                     * here, which we've already successfully
++                     * validated above; hence partial = false;
++                     */
+                     v->arch.old_guest_ptpg = NULL;
+                     v->arch.old_guest_table =
+                         pagetable_get_page(v->arch.guest_table);
++                    v->arch.old_guest_table_partial = false;
+                     v->arch.guest_table = pagetable_null();
+                     break;
+                 default:
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index a432e69c74..81774368a0 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -1359,10 +1359,11 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn,
+         {
+             current->arch.old_guest_ptpg = ptpg;
+             current->arch.old_guest_table = pg;
++            current->arch.old_guest_table_partial = false;
+         }
+         else
+         {
+-            rc = _put_page_type(pg, PTF_preemptible, ptpg);
++            rc = _put_page_type(pg, flags | PTF_preemptible, ptpg);
+             if ( likely(!rc) )
+                 put_page(pg);
+         }
+@@ -1385,6 +1386,7 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
+         unsigned long mfn = l3e_get_pfn(l3e);
+         bool writeable = l3e_get_flags(l3e) & _PAGE_RW;
+ 
++        ASSERT(!(flags & PTF_partial_set));
+         ASSERT(!(mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)));
+         do {
+             put_data_page(mfn_to_page(_mfn(mfn)), writeable);
+@@ -1397,12 +1399,14 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
+ 
+     if ( flags & PTF_defer )
+     {
++        ASSERT(!(flags & PTF_partial_set));
+         current->arch.old_guest_ptpg = mfn_to_page(_mfn(pfn));
+         current->arch.old_guest_table = pg;
++        current->arch.old_guest_table_partial = false;
+         return 0;
+     }
+ 
+-    rc = _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn)));
++    rc = _put_page_type(pg, flags | PTF_preemptible, mfn_to_page(_mfn(pfn)));
+     if ( likely(!rc) )
+         put_page(pg);
+ 
+@@ -1421,12 +1425,15 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
+ 
+         if ( flags & PTF_defer )
+         {
++            ASSERT(!(flags & PTF_partial_set));
+             current->arch.old_guest_ptpg = mfn_to_page(_mfn(pfn));
+             current->arch.old_guest_table = pg;
++            current->arch.old_guest_table_partial = false;
+             return 0;
+         }
+ 
+-        rc = _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn)));
++        rc = _put_page_type(pg, flags | PTF_preemptible,
++                            mfn_to_page(_mfn(pfn)));
+         if ( likely(!rc) )
+             put_page(pg);
+     }
+@@ -1535,6 +1542,14 @@ static int alloc_l2_table(struct page_info *page, unsigned long type)
+ 
+     pl2e = map_domain_page(_mfn(pfn));
+ 
++    /*
++     * NB that alloc_l2_table will never set partial_pte on an l2; but
++     * free_l2_table might if a linear_pagetable entry is interrupted
++     * partway through de-validation.  In that circumstance,
++     * get_page_from_l2e() will always return -EINVAL; and we must
++     * retain the type ref by doing the normal partial_flags tracking.
++     */
++
+     for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES;
+           i++, partial_flags = 0 )
+     {
+@@ -1598,6 +1613,7 @@ static int alloc_l2_table(struct page_info *page, unsigned long type)
+                 page->partial_flags = partial_flags;
+                 current->arch.old_guest_ptpg = NULL;
+                 current->arch.old_guest_table = page;
++                current->arch.old_guest_table_partial = true;
+             }
+         }
+         if ( rc < 0 )
+@@ -1704,12 +1720,16 @@ static int alloc_l3_table(struct page_info *page)
+                  * builds.
+                  */
+                 if ( current->arch.old_guest_table == l3e_get_page(l3e) )
++                {
++                    ASSERT(current->arch.old_guest_table_partial);
+                     page->partial_flags = PTF_partial_set;
++                }
+                 else
+                     ASSERT_UNREACHABLE();
+             }
+             current->arch.old_guest_ptpg = NULL;
+             current->arch.old_guest_table = page;
++            current->arch.old_guest_table_partial = true;
+         }
+         while ( i-- > 0 )
+             pl3e[i] = unadjust_guest_l3e(pl3e[i], d);
+@@ -1897,12 +1917,16 @@ static int alloc_l4_table(struct page_info *page)
+                          * builds.
+                          */
+                         if ( current->arch.old_guest_table == l4e_get_page(l4e) )
++                        {
++                            ASSERT(current->arch.old_guest_table_partial);
+                             page->partial_flags = PTF_partial_set;
++                        }
+                         else
+                             ASSERT_UNREACHABLE();
+                     }
+                     current->arch.old_guest_ptpg = NULL;
+                     current->arch.old_guest_table = page;
++                    current->arch.old_guest_table_partial = true;
+                 }
+             }
+         }
+@@ -2831,6 +2855,28 @@ static int _put_page_type(struct page_info *page, unsigned int flags,
+         x  = y;
+         nx = x - 1;
+ 
++        /*
++         * Is this expected to do a full reference drop, or only
++         * cleanup partial validation / devalidation?
++         *
++         * If the former, the caller must hold a "full" type ref;
++         * which means the page must be validated.  If the page is
++         * *not* fully validated, continuing would almost certainly
++         * open up a security hole.  An exception to this is during
++         * domain destruction, where PGT_validated can be dropped
++         * without dropping a type ref.
++         *
++         * If the latter, do nothing unless type PGT_partial is set.
++         * If it is set, the type count must be 1.
++         */
++        if ( !(flags & PTF_partial_set) )
++            BUG_ON((x & PGT_partial) ||
++                   !((x & PGT_validated) || page_get_owner(page)->is_dying));
++        else if ( !(x & PGT_partial) )
++            return 0;
++        else
++            BUG_ON((x & PGT_count_mask) != 1);
++
+         ASSERT((x & PGT_count_mask) != 0);
+ 
+         switch ( nx & (PGT_locked | PGT_count_mask) )
+@@ -3092,17 +3138,34 @@ int put_old_guest_table(struct vcpu *v)
+     if ( !v->arch.old_guest_table )
+         return 0;
+ 
+-    switch ( rc = _put_page_type(v->arch.old_guest_table, PTF_preemptible,
+-                                 v->arch.old_guest_ptpg) )
++    rc = _put_page_type(v->arch.old_guest_table,
++                        PTF_preemptible |
++                        ( v->arch.old_guest_table_partial ?
++                          PTF_partial_set : 0 ),
++                        v->arch.old_guest_ptpg);
++
++    if ( rc == -ERESTART || rc == -EINTR )
+     {
+-    case -EINTR:
+-    case -ERESTART:
++        v->arch.old_guest_table_partial = (rc == -ERESTART);
+         return -ERESTART;
+-    case 0:
+-        put_page(v->arch.old_guest_table);
+     }
+ 
++    /*
++     * It shouldn't be possible for _put_page_type() to return
++     * anything else at the moment; but if it does happen in
++     * production, leaking the type ref is probably the best thing to
++     * do.  Either way, drop the general ref held by old_guest_table.
++     */
++    ASSERT(rc == 0);
++
++    put_page(v->arch.old_guest_table);
+     v->arch.old_guest_table = NULL;
++    v->arch.old_guest_ptpg = NULL;
++    /*
++     * Safest default if someone sets old_guest_table without
++     * explicitly setting old_guest_table_partial.
++     */
++    v->arch.old_guest_table_partial = true;
+ 
+     return rc;
+ }
+@@ -3253,11 +3316,11 @@ int new_guest_cr3(mfn_t mfn)
+             switch ( rc = put_page_and_type_preemptible(page) )
+             {
+             case -EINTR:
+-                rc = -ERESTART;
+-                /* fallthrough */
+             case -ERESTART:
+                 curr->arch.old_guest_ptpg = NULL;
+                 curr->arch.old_guest_table = page;
++                curr->arch.old_guest_table_partial = (rc == -ERESTART);
++                rc = -ERESTART;
+                 break;
+             default:
+                 BUG_ON(rc);
+@@ -3494,6 +3557,7 @@ long do_mmuext_op(
+                     {
+                         curr->arch.old_guest_ptpg = NULL;
+                         curr->arch.old_guest_table = page;
++                        curr->arch.old_guest_table_partial = false;
+                     }
+                 }
+             }
+@@ -3528,6 +3592,11 @@ long do_mmuext_op(
+             case -ERESTART:
+                 curr->arch.old_guest_ptpg = NULL;
+                 curr->arch.old_guest_table = page;
++                /*
++                 * EINTR means we still hold the type ref; ERESTART
++                 * means PGT_partial holds the type ref
++                 */
++                curr->arch.old_guest_table_partial = (rc == -ERESTART);
+                 rc = 0;
+                 break;
+             default:
+@@ -3596,11 +3665,15 @@ long do_mmuext_op(
+                 switch ( rc = put_page_and_type_preemptible(page) )
+                 {
+                 case -EINTR:
+-                    rc = -ERESTART;
+-                    /* fallthrough */
+                 case -ERESTART:
+                     curr->arch.old_guest_ptpg = NULL;
+                     curr->arch.old_guest_table = page;
++                    /*
++                     * EINTR means we still hold the type ref;
++                     * ERESTART means PGT_partial holds the ref
++                     */
++                    curr->arch.old_guest_table_partial = (rc == -ERESTART);
++                    rc = -ERESTART;
+                     break;
+                 default:
+                     BUG_ON(rc);
+diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
+index 214e44ce1c..2cfce7b36b 100644
+--- a/xen/include/asm-x86/domain.h
++++ b/xen/include/asm-x86/domain.h
+@@ -307,7 +307,7 @@ struct arch_domain
+ 
+     struct paging_domain paging;
+     struct p2m_domain *p2m;
+-    /* To enforce lock ordering in the pod code wrt the 
++    /* To enforce lock ordering in the pod code wrt the
+      * page_alloc lock */
+     int page_alloc_unlock_level;
+ 
+@@ -581,6 +581,8 @@ struct arch_vcpu
+     struct page_info *old_guest_table;  /* partially destructed pagetable */
+     struct page_info *old_guest_ptpg;   /* containing page table of the */
+                                         /* former, if any */
++    bool old_guest_table_partial;       /* Are we dropping a type ref, or just
++                                         * finishing up a partial de-validation? */
+     /* guest_table holds a ref to the page, and also a type-count unless
+      * shadow refcounts are in use */
+     pagetable_t shadow_table[4];        /* (MFN) shadow(s) of guest */
+-- 
+2.23.0
+
diff --git a/system/xen/xsa/xsa301-master-1.patch b/system/xen/xsa/xsa301-master-1.patch
new file mode 100644
index 0000000000000..54cce2ce28456
--- /dev/null
+++ b/system/xen/xsa/xsa301-master-1.patch
@@ -0,0 +1,80 @@
+From 19d6330f142cb941b6340a88592e8a294de0ff8c Mon Sep 17 00:00:00 2001
+From: Julien Grall <julien.grall@arm.com>
+Date: Tue, 15 Oct 2019 17:10:40 +0100
+Subject: [PATCH 1/3] xen/arm: p2m: Avoid aliasing guest physical frame
+
+The P2M helpers implementation is quite lax and will end up to ignore
+the unused top bits of a guest physical frame.
+
+This effectively means that p2m_set_entry() will create a mapping for a
+different frame (it is always equal to gfn & (mask unused bits)). Yet
+p2m->max_mapped_gfn will be updated using the original frame.
+
+At the moment, p2m_get_entry() and p2m_resolve_translation_fault()
+assume that p2m_get_root_pointer() will always return a non-NULL pointer
+when the GFN is smaller than p2m->max_mapped_gfn.
+
+Unfortunately, because of the aliasing described above, it would be
+possible to set p2m->max_mapped_gfn high enough so it covers frame that
+would lead p2m_get_root_pointer() to return NULL.
+
+As we don't sanity check the guest physical frame provided by a guest, a
+malicious guest could craft a series of hypercalls that will hit the
+BUG_ON() and therefore DoS Xen.
+
+To prevent aliasing, the function p2m_get_root_pointer() is now reworked
+to return NULL If any of the unused top bits are not zero. The caller
+can then decide what's the appropriate action to do. Since the two paths
+(i.e. P2M_ROOT_PAGES == 1 and P2M_ROOT_PAGES != 1) are now very
+similarly, take the opportunity to consolidate them making the code a
+bit simpler.
+
+With this change, p2m_get_entry() will not try to insert a mapping as
+the root pointer is invalid.
+
+Note that root_table is now switch to unsigned long as unsigned int is
+not enough to hold part of a GFN.
+
+This is part of XSA-301.
+
+Reported-by: Julien Grall <Julien.Grall@arm.com>
+Signed-off-by: Julien Grall <julien.grall@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+---
+ xen/arch/arm/p2m.c | 17 +++++------------
+ 1 file changed, 5 insertions(+), 12 deletions(-)
+
+diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
+index a2749d9b6f..d0045a8b28 100644
+--- a/xen/arch/arm/p2m.c
++++ b/xen/arch/arm/p2m.c
+@@ -229,21 +229,14 @@ void p2m_tlb_flush_sync(struct p2m_domain *p2m)
+ static lpae_t *p2m_get_root_pointer(struct p2m_domain *p2m,
+                                     gfn_t gfn)
+ {
+-    unsigned int root_table;
+-
+-    if ( P2M_ROOT_PAGES == 1 )
+-        return __map_domain_page(p2m->root);
++    unsigned long root_table;
+ 
+     /*
+-     * Concatenated root-level tables. The table number will be the
+-     * offset at the previous level. It is not possible to
+-     * concatenate a level-0 root.
++     * While the root table index is the offset from the previous level,
++     * we can't use (P2M_ROOT_LEVEL - 1) because the root level might be
++     * 0. Yet we still want to check if all the unused bits are zeroed.
+      */
+-    ASSERT(P2M_ROOT_LEVEL > 0);
+-
+-    root_table = gfn_x(gfn) >> (level_orders[P2M_ROOT_LEVEL - 1]);
+-    root_table &= LPAE_ENTRY_MASK;
+-
++    root_table = gfn_x(gfn) >> (level_orders[P2M_ROOT_LEVEL] + LPAE_SHIFT);
+     if ( root_table >= P2M_ROOT_PAGES )
+         return NULL;
+ 
+-- 
+2.23.0
+
diff --git a/system/xen/xsa/xsa301-master-2.patch b/system/xen/xsa/xsa301-master-2.patch
new file mode 100644
index 0000000000000..baedc9c29751f
--- /dev/null
+++ b/system/xen/xsa/xsa301-master-2.patch
@@ -0,0 +1,92 @@
+From 3b896936f7505e929dd869d14afcb185d0ee75f8 Mon Sep 17 00:00:00 2001
+From: Julien Grall <julien.grall@arm.com>
+Date: Tue, 15 Oct 2019 17:10:41 +0100
+Subject: [PATCH 2/3] xen/arm: p2m: Avoid off-by-one check on
+ p2m->max_mapped_gfn
+
+The code base is using inconsistently the field p2m->max_mapped_gfn.
+Some of the useres expect that p2m->max_guest_gfn contain the highest
+mapped GFN while others expect highest + 1.
+
+p2m->max_guest_gfn is set as highest + 1, because of that the sanity
+check on the GFN in p2m_resolved_translation_fault() and
+p2m_get_entry() can be bypassed when GFN == p2m->max_guest_gfn.
+
+p2m_get_root_pointer(p2m->max_guest_gfn) may return NULL if it is
+outside of address range supported and therefore the BUG_ON() could be
+hit.
+
+The current value hold in p2m->max_mapped_gfn is inconsistent with the
+expectation of the common code (see domain_get_maximum_gpfn()) and also
+the documentation of the field.
+
+Rather than changing the check in p2m_translation_fault() and
+p2m_get_entry(), p2m->max_mapped_gfn is now containing the highest
+mapped GFN and the callers assuming "highest + 1" are now adjusted.
+
+Take the opportunity to use 1UL rather than 1 as page_order could
+theoritically big enough to overflow a 32-bit integer.
+
+Lastly, the documentation of the field max_guest_gfn to reflect how it
+is computed.
+
+This is part of XSA-301.
+
+Reported-by: Julien Grall <Julien.Grall@arm.com>
+Signed-off-by: Julien Grall <julien.grall@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+---
+ xen/arch/arm/p2m.c        | 6 +++---
+ xen/include/asm-arm/p2m.h | 5 +----
+ 2 files changed, 4 insertions(+), 7 deletions(-)
+
+diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
+index d0045a8b28..8d20d27961 100644
+--- a/xen/arch/arm/p2m.c
++++ b/xen/arch/arm/p2m.c
+@@ -1041,7 +1041,7 @@ static int __p2m_set_entry(struct p2m_domain *p2m,
+         p2m_write_pte(entry, pte, p2m->clean_pte);
+ 
+         p2m->max_mapped_gfn = gfn_max(p2m->max_mapped_gfn,
+-                                      gfn_add(sgfn, 1 << page_order));
++                                      gfn_add(sgfn, (1UL << page_order) - 1));
+         p2m->lowest_mapped_gfn = gfn_min(p2m->lowest_mapped_gfn, sgfn);
+     }
+ 
+@@ -1572,7 +1572,7 @@ int relinquish_p2m_mapping(struct domain *d)
+     p2m_write_lock(p2m);
+ 
+     start = p2m->lowest_mapped_gfn;
+-    end = p2m->max_mapped_gfn;
++    end = gfn_add(p2m->max_mapped_gfn, 1);
+ 
+     for ( ; gfn_x(start) < gfn_x(end);
+           start = gfn_next_boundary(start, order) )
+@@ -1641,7 +1641,7 @@ int p2m_cache_flush_range(struct domain *d, gfn_t *pstart, gfn_t end)
+     p2m_read_lock(p2m);
+ 
+     start = gfn_max(start, p2m->lowest_mapped_gfn);
+-    end = gfn_min(end, p2m->max_mapped_gfn);
++    end = gfn_min(end, gfn_add(p2m->max_mapped_gfn, 1));
+ 
+     next_block_gfn = start;
+ 
+diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
+index 89f82df380..5fdb6e8183 100644
+--- a/xen/include/asm-arm/p2m.h
++++ b/xen/include/asm-arm/p2m.h
+@@ -36,10 +36,7 @@ struct p2m_domain {
+     /* Current Translation Table Base Register for the p2m */
+     uint64_t vttbr;
+ 
+-    /*
+-     * Highest guest frame that's ever been mapped in the p2m
+-     * Only takes into account ram and foreign mapping
+-     */
++    /* Highest guest frame that's ever been mapped in the p2m */
+     gfn_t max_mapped_gfn;
+ 
+     /*
+-- 
+2.23.0
+
diff --git a/system/xen/xsa/xsa301-master-3.patch b/system/xen/xsa/xsa301-master-3.patch
new file mode 100644
index 0000000000000..9f137b89f6ef2
--- /dev/null
+++ b/system/xen/xsa/xsa301-master-3.patch
@@ -0,0 +1,67 @@
+From 060c2dd3b7c2674a019d94afb2b4ebf3663f6c6e Mon Sep 17 00:00:00 2001
+From: Julien Grall <julien.grall@arm.com>
+Date: Tue, 15 Oct 2019 17:10:42 +0100
+Subject: [PATCH 3/3] xen/arm: p2m: Don't check the return of
+ p2m_get_root_pointer() with BUG_ON()
+
+It turns out that the BUG_ON() was actually reachable with well-crafted
+hypercalls. The BUG_ON() is here to prevent catch logical error, so
+crashing Xen is a bit over the top.
+
+While all the holes should now be fixed, it would be better to downgrade
+the BUG_ON() to something less fatal to prevent any more DoS.
+
+The BUG_ON() in p2m_get_entry() is now replaced by ASSERT_UNREACHABLE()
+to catch mistake in debug build and return INVALID_MFN for production
+build. The interface also requires to set page_order to give an idea of
+the size of "hole". So 'level' is now set so we report a hole of size of
+the an entry of the root page-table. This stays inline with what happen
+when the GFN is higher than p2m->max_mapped_gfn.
+
+The BUG_ON() in p2m_resolve_translation_fault() is now replaced by
+ASSERT_UNREACHABLE() to catch mistake in debug build and just report a
+fault for producion build.
+
+This is part of XSA-301.
+
+Reported-by: Julien Grall <Julien.Grall@arm.com>
+Signed-off-by: Julien Grall <julien.grall@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+---
+ xen/arch/arm/p2m.c | 13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
+index 8d20d27961..ce59f2b503 100644
+--- a/xen/arch/arm/p2m.c
++++ b/xen/arch/arm/p2m.c
+@@ -395,7 +395,12 @@ mfn_t p2m_get_entry(struct p2m_domain *p2m, gfn_t gfn,
+      * the table should always be non-NULL because the gfn is below
+      * p2m->max_mapped_gfn and the root table pages are always present.
+      */
+-    BUG_ON(table == NULL);
++    if ( !table )
++    {
++        ASSERT_UNREACHABLE();
++        level = P2M_ROOT_LEVEL;
++        goto out;
++    }
+ 
+     for ( level = P2M_ROOT_LEVEL; level < 3; level++ )
+     {
+@@ -1196,7 +1201,11 @@ bool p2m_resolve_translation_fault(struct domain *d, gfn_t gfn)
+      * The table should always be non-NULL because the gfn is below
+      * p2m->max_mapped_gfn and the root table pages are always present.
+      */
+-    BUG_ON(table == NULL);
++    if ( !table )
++    {
++        ASSERT_UNREACHABLE();
++        goto out;
++    }
+ 
+     /*
+      * Go down the page-tables until an entry has the valid bit unset or
+-- 
+2.23.0
+
diff --git a/system/xen/xsa/xsa302-4.12-0001-IOMMU-add-missing-HVM-check.patch b/system/xen/xsa/xsa302-4.12-0001-IOMMU-add-missing-HVM-check.patch
new file mode 100644
index 0000000000000..5d52163406f0c
--- /dev/null
+++ b/system/xen/xsa/xsa302-4.12-0001-IOMMU-add-missing-HVM-check.patch
@@ -0,0 +1,37 @@
+From 0c9c0fbb356e3210cb77b3d738be50981b26058a Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 2 Oct 2019 13:36:59 +0200
+Subject: [PATCH 1/2] IOMMU: add missing HVM check
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Fix an unguarded d->arch.hvm access in assign_device().
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+(cherry picked from commit 41fd1009cd7416b73d745a77c24b4e8d1a296fe6)
+Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
+---
+ xen/drivers/passthrough/pci.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
+index 8108ed5f9a..d7420bd8bf 100644
+--- a/xen/drivers/passthrough/pci.c
++++ b/xen/drivers/passthrough/pci.c
+@@ -1452,7 +1452,8 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
+ 
+     /* Prevent device assign if mem paging or mem sharing have been 
+      * enabled for this domain */
+-    if ( unlikely(d->arch.hvm.mem_sharing_enabled ||
++    if ( unlikely((is_hvm_domain(d) &&
++                   d->arch.hvm.mem_sharing_enabled) ||
+                   vm_event_check_ring(d->vm_event_paging) ||
+                   p2m_get_hostp2m(d)->global_logdirty) )
+         return -EXDEV;
+-- 
+2.11.0
+
diff --git a/system/xen/xsa/xsa302-4.12-0002-passthrough-quarantine-PCI-devices.patch b/system/xen/xsa/xsa302-4.12-0002-passthrough-quarantine-PCI-devices.patch
new file mode 100644
index 0000000000000..181ece3bb72a2
--- /dev/null
+++ b/system/xen/xsa/xsa302-4.12-0002-passthrough-quarantine-PCI-devices.patch
@@ -0,0 +1,499 @@
+From 278d8e585a9f110a1af0bd92a9fc43733c9c7227 Mon Sep 17 00:00:00 2001
+From: Paul Durrant <paul.durrant@citrix.com>
+Date: Mon, 14 Oct 2019 17:52:59 +0100
+Subject: [PATCH 2/2] passthrough: quarantine PCI devices
+
+When a PCI device is assigned to an untrusted domain, it is possible for
+that domain to program the device to DMA to an arbitrary address. The
+IOMMU is used to protect the host from malicious DMA by making sure that
+the device addresses can only target memory assigned to the guest. However,
+when the guest domain is torn down the device is assigned back to dom0,
+thus allowing any in-flight DMA to potentially target critical host data.
+
+This patch introduces a 'quarantine' for PCI devices using dom_io. When
+the toolstack makes a device assignable (by binding it to pciback), it
+will now also assign it to DOMID_IO and the device will only be assigned
+back to dom0 when the device is made unassignable again. Whilst device is
+assignable it will only ever transfer between dom_io and guest domains.
+dom_io is actually only used as a sentinel domain for quarantining purposes;
+it is not configured with any IOMMU mappings. Assignment to dom_io simply
+means that the device's initiator (requestor) identifier is not present in
+the IOMMU's device table and thus any DMA transactions issued will be
+terminated with a fault condition.
+
+In addition, a fix to assignment handling is made for VT-d.  Failure
+during the assignment step should not lead to a device still being
+associated with its prior owner. Hand the device to DomIO temporarily,
+until the assignment step has completed successfully.  Remove the PI
+hooks from the source domain then earlier as well.
+
+Failure of the recovery reassign_device_ownership() may not go silent:
+There e.g. may still be left over RMRR mappings in the domain assignment
+to which has failed, and hence we can't allow that domain to continue
+executing.
+
+NOTE: This patch also includes one printk() cleanup; the
+      "XEN_DOMCTL_assign_device: " tag is dropped in iommu_do_pci_domctl(),
+      since similar printk()-s elsewhere also don't log such a tag.
+
+This is XSA-302.
+
+Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
+(cherry picked from commit ec99857f59f7f06236f11ca8b0b2303e5e745cc4)
+---
+ tools/libxl/libxl_pci.c                     | 25 +++++++++++-
+ xen/arch/x86/mm.c                           |  2 +
+ xen/common/domctl.c                         | 14 ++++++-
+ xen/drivers/passthrough/amd/pci_amd_iommu.c | 10 ++++-
+ xen/drivers/passthrough/iommu.c             |  9 +++++
+ xen/drivers/passthrough/pci.c               | 59 ++++++++++++++++++++++-------
+ xen/drivers/passthrough/vtd/iommu.c         | 40 ++++++++++++++++---
+ xen/include/xen/pci.h                       |  3 ++
+ 8 files changed, 138 insertions(+), 24 deletions(-)
+
+diff --git a/tools/libxl/libxl_pci.c b/tools/libxl/libxl_pci.c
+index 88c324ea23..d6a23fb5f8 100644
+--- a/tools/libxl/libxl_pci.c
++++ b/tools/libxl/libxl_pci.c
+@@ -754,6 +754,7 @@ static int libxl__device_pci_assignable_add(libxl__gc *gc,
+                                             libxl_device_pci *pcidev,
+                                             int rebind)
+ {
++    libxl_ctx *ctx = libxl__gc_owner(gc);
+     unsigned dom, bus, dev, func;
+     char *spath, *driver_path = NULL;
+     int rc;
+@@ -779,7 +780,7 @@ static int libxl__device_pci_assignable_add(libxl__gc *gc,
+     }
+     if ( rc ) {
+         LOG(WARN, PCI_BDF" already assigned to pciback", dom, bus, dev, func);
+-        return 0;
++        goto quarantine;
+     }
+ 
+     /* Check to see if there's already a driver that we need to unbind from */
+@@ -810,6 +811,19 @@ static int libxl__device_pci_assignable_add(libxl__gc *gc,
+         return ERROR_FAIL;
+     }
+ 
++quarantine:
++    /*
++     * DOMID_IO is just a sentinel domain, without any actual mappings,
++     * so always pass XEN_DOMCTL_DEV_RDM_RELAXED to avoid assignment being
++     * unnecessarily denied.
++     */
++    rc = xc_assign_device(ctx->xch, DOMID_IO, pcidev_encode_bdf(pcidev),
++                          XEN_DOMCTL_DEV_RDM_RELAXED);
++    if ( rc < 0 ) {
++        LOG(ERROR, "failed to quarantine "PCI_BDF, dom, bus, dev, func);
++        return ERROR_FAIL;
++    }
++
+     return 0;
+ }
+ 
+@@ -817,9 +831,18 @@ static int libxl__device_pci_assignable_remove(libxl__gc *gc,
+                                                libxl_device_pci *pcidev,
+                                                int rebind)
+ {
++    libxl_ctx *ctx = libxl__gc_owner(gc);
+     int rc;
+     char *driver_path;
+ 
++    /* De-quarantine */
++    rc = xc_deassign_device(ctx->xch, DOMID_IO, pcidev_encode_bdf(pcidev));
++    if ( rc < 0 ) {
++        LOG(ERROR, "failed to de-quarantine "PCI_BDF, pcidev->domain, pcidev->bus,
++            pcidev->dev, pcidev->func);
++        return ERROR_FAIL;
++    }
++
+     /* Unbind from pciback */
+     if ( (rc=pciback_dev_is_assigned(gc, pcidev)) < 0 ) {
+         return ERROR_FAIL;
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 3557cd1178..11d753d8d2 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -295,9 +295,11 @@ void __init arch_init_memory(void)
+      * Initialise our DOMID_IO domain.
+      * This domain owns I/O pages that are within the range of the page_info
+      * array. Mappings occur at the priv of the caller.
++     * Quarantined PCI devices will be associated with this domain.
+      */
+     dom_io = domain_create(DOMID_IO, NULL, false);
+     BUG_ON(IS_ERR(dom_io));
++    INIT_LIST_HEAD(&dom_io->arch.pdev_list);
+ 
+     /*
+      * Initialise our COW domain.
+diff --git a/xen/common/domctl.c b/xen/common/domctl.c
+index d08b6274e2..e3c4be2b48 100644
+--- a/xen/common/domctl.c
++++ b/xen/common/domctl.c
+@@ -391,6 +391,16 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
+ 
+     switch ( op->cmd )
+     {
++    case XEN_DOMCTL_assign_device:
++    case XEN_DOMCTL_deassign_device:
++        if ( op->domain == DOMID_IO )
++        {
++            d = dom_io;
++            break;
++        }
++        else if ( op->domain == DOMID_INVALID )
++            return -ESRCH;
++        /* fall through */
+     case XEN_DOMCTL_test_assign_device:
+         if ( op->domain == DOMID_INVALID )
+         {
+@@ -412,7 +422,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
+ 
+     if ( !domctl_lock_acquire() )
+     {
+-        if ( d )
++        if ( d && d != dom_io )
+             rcu_unlock_domain(d);
+         return hypercall_create_continuation(
+             __HYPERVISOR_domctl, "h", u_domctl);
+@@ -1074,7 +1084,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
+     domctl_lock_release();
+ 
+  domctl_out_unlock_domonly:
+-    if ( d )
++    if ( d && d != dom_io )
+         rcu_unlock_domain(d);
+ 
+     if ( copyback && __copy_to_guest(u_domctl, op, 1) )
+diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c
+index 33a3798f36..15c13e1163 100644
+--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
++++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
+@@ -120,6 +120,10 @@ static void amd_iommu_setup_domain_device(
+     u8 bus = pdev->bus;
+     const struct domain_iommu *hd = dom_iommu(domain);
+ 
++    /* dom_io is used as a sentinel for quarantined devices */
++    if ( domain == dom_io )
++        return;
++
+     BUG_ON( !hd->arch.root_table || !hd->arch.paging_mode ||
+             !iommu->dev_table.buffer );
+ 
+@@ -277,6 +281,10 @@ void amd_iommu_disable_domain_device(struct domain *domain,
+     int req_id;
+     u8 bus = pdev->bus;
+ 
++    /* dom_io is used as a sentinel for quarantined devices */
++    if ( domain == dom_io )
++        return;
++
+     BUG_ON ( iommu->dev_table.buffer == NULL );
+     req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn));
+     dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
+@@ -363,7 +371,7 @@ static int amd_iommu_assign_device(struct domain *d, u8 devfn,
+             ivrs_mappings[req_id].read_permission);
+     }
+ 
+-    return reassign_device(hardware_domain, d, devfn, pdev);
++    return reassign_device(pdev->domain, d, devfn, pdev);
+ }
+ 
+ static void deallocate_next_page_table(struct page_info *pg, int level)
+diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
+index a6697d58fb..2762e1342f 100644
+--- a/xen/drivers/passthrough/iommu.c
++++ b/xen/drivers/passthrough/iommu.c
+@@ -232,6 +232,9 @@ void iommu_teardown(struct domain *d)
+ {
+     struct domain_iommu *hd = dom_iommu(d);
+ 
++    if ( d == dom_io )
++        return;
++
+     hd->status = IOMMU_STATUS_disabled;
+     hd->platform_ops->teardown(d);
+     tasklet_schedule(&iommu_pt_cleanup_tasklet);
+@@ -241,6 +244,9 @@ int iommu_construct(struct domain *d)
+ {
+     struct domain_iommu *hd = dom_iommu(d);
+ 
++    if ( d == dom_io )
++        return 0;
++
+     if ( hd->status == IOMMU_STATUS_initialized )
+         return 0;
+ 
+@@ -521,6 +527,9 @@ int __init iommu_setup(void)
+     printk("I/O virtualisation %sabled\n", iommu_enabled ? "en" : "dis");
+     if ( iommu_enabled )
+     {
++        if ( iommu_domain_init(dom_io) )
++            panic("Could not set up quarantine\n");
++
+         printk(" - Dom0 mode: %s\n",
+                iommu_hwdom_passthrough ? "Passthrough" :
+                iommu_hwdom_strict ? "Strict" : "Relaxed");
+diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
+index d7420bd8bf..d66a8a1daf 100644
+--- a/xen/drivers/passthrough/pci.c
++++ b/xen/drivers/passthrough/pci.c
+@@ -1426,19 +1426,29 @@ static int iommu_remove_device(struct pci_dev *pdev)
+     return hd->platform_ops->remove_device(pdev->devfn, pci_to_dev(pdev));
+ }
+ 
+-/*
+- * If the device isn't owned by the hardware domain, it means it already
+- * has been assigned to other domain, or it doesn't exist.
+- */
+ static int device_assigned(u16 seg, u8 bus, u8 devfn)
+ {
+     struct pci_dev *pdev;
++    int rc = 0;
+ 
+     pcidevs_lock();
+-    pdev = pci_get_pdev_by_domain(hardware_domain, seg, bus, devfn);
++
++    pdev = pci_get_pdev(seg, bus, devfn);
++
++    if ( !pdev )
++        rc = -ENODEV;
++    /*
++     * If the device exists and it is not owned by either the hardware
++     * domain or dom_io then it must be assigned to a guest, or be
++     * hidden (owned by dom_xen).
++     */
++    else if ( pdev->domain != hardware_domain &&
++              pdev->domain != dom_io )
++        rc = -EBUSY;
++
+     pcidevs_unlock();
+ 
+-    return pdev ? 0 : -EBUSY;
++    return rc;
+ }
+ 
+ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
+@@ -1452,7 +1462,8 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
+ 
+     /* Prevent device assign if mem paging or mem sharing have been 
+      * enabled for this domain */
+-    if ( unlikely((is_hvm_domain(d) &&
++    if ( d != dom_io &&
++         unlikely((is_hvm_domain(d) &&
+                    d->arch.hvm.mem_sharing_enabled) ||
+                   vm_event_check_ring(d->vm_event_paging) ||
+                   p2m_get_hostp2m(d)->global_logdirty) )
+@@ -1468,12 +1479,20 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
+         return rc;
+     }
+ 
+-    pdev = pci_get_pdev_by_domain(hardware_domain, seg, bus, devfn);
++    pdev = pci_get_pdev(seg, bus, devfn);
++
++    rc = -ENODEV;
+     if ( !pdev )
+-    {
+-        rc = pci_get_pdev(seg, bus, devfn) ? -EBUSY : -ENODEV;
+         goto done;
+-    }
++
++    rc = 0;
++    if ( d == pdev->domain )
++        goto done;
++
++    rc = -EBUSY;
++    if ( pdev->domain != hardware_domain &&
++         pdev->domain != dom_io )
++        goto done;
+ 
+     if ( pdev->msix )
+         msixtbl_init(d);
+@@ -1496,6 +1515,10 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
+     }
+ 
+  done:
++    /* The device is assigned to dom_io so mark it as quarantined */
++    if ( !rc && d == dom_io )
++        pdev->quarantine = true;
++
+     if ( !has_arch_pdevs(d) && has_iommu_pt(d) )
+         iommu_teardown(d);
+     pcidevs_unlock();
+@@ -1508,6 +1531,7 @@ int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
+ {
+     const struct domain_iommu *hd = dom_iommu(d);
+     struct pci_dev *pdev = NULL;
++    struct domain *target;
+     int ret = 0;
+ 
+     if ( !iommu_enabled || !hd->platform_ops )
+@@ -1518,12 +1542,16 @@ int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
+     if ( !pdev )
+         return -ENODEV;
+ 
++    /* De-assignment from dom_io should de-quarantine the device */
++    target = (pdev->quarantine && pdev->domain != dom_io) ?
++        dom_io : hardware_domain;
++
+     while ( pdev->phantom_stride )
+     {
+         devfn += pdev->phantom_stride;
+         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
+             break;
+-        ret = hd->platform_ops->reassign_device(d, hardware_domain, devfn,
++        ret = hd->platform_ops->reassign_device(d, target, devfn,
+                                                 pci_to_dev(pdev));
+         if ( !ret )
+             continue;
+@@ -1534,7 +1562,7 @@ int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
+     }
+ 
+     devfn = pdev->devfn;
+-    ret = hd->platform_ops->reassign_device(d, hardware_domain, devfn,
++    ret = hd->platform_ops->reassign_device(d, target, devfn,
+                                             pci_to_dev(pdev));
+     if ( ret )
+     {
+@@ -1544,6 +1572,9 @@ int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
+         return ret;
+     }
+ 
++    if ( pdev->domain == hardware_domain  )
++        pdev->quarantine = false;
++
+     pdev->fault.count = 0;
+ 
+     if ( !has_arch_pdevs(d) && has_iommu_pt(d) )
+@@ -1722,7 +1753,7 @@ int iommu_do_pci_domctl(
+             ret = hypercall_create_continuation(__HYPERVISOR_domctl,
+                                                 "h", u_domctl);
+         else if ( ret )
+-            printk(XENLOG_G_ERR "XEN_DOMCTL_assign_device: "
++            printk(XENLOG_G_ERR
+                    "assign %04x:%02x:%02x.%u to dom%d failed (%d)\n",
+                    seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+                    d->domain_id, ret);
+diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
+index 1db1cd9f2d..a8d1baa064 100644
+--- a/xen/drivers/passthrough/vtd/iommu.c
++++ b/xen/drivers/passthrough/vtd/iommu.c
+@@ -1338,6 +1338,10 @@ int domain_context_mapping_one(
+     int agaw, rc, ret;
+     bool_t flush_dev_iotlb;
+ 
++    /* dom_io is used as a sentinel for quarantined devices */
++    if ( domain == dom_io )
++        return 0;
++
+     ASSERT(pcidevs_locked());
+     spin_lock(&iommu->lock);
+     maddr = bus_to_context_maddr(iommu, bus);
+@@ -1573,6 +1577,10 @@ int domain_context_unmap_one(
+     int iommu_domid, rc, ret;
+     bool_t flush_dev_iotlb;
+ 
++    /* dom_io is used as a sentinel for quarantined devices */
++    if ( domain == dom_io )
++        return 0;
++
+     ASSERT(pcidevs_locked());
+     spin_lock(&iommu->lock);
+ 
+@@ -1705,6 +1713,10 @@ static int domain_context_unmap(struct domain *domain, u8 devfn,
+         goto out;
+     }
+ 
++    /* dom_io is used as a sentinel for quarantined devices */
++    if ( domain == dom_io )
++        goto out;
++
+     /*
+      * if no other devices under the same iommu owned by this domain,
+      * clear iommu in iommu_bitmap and clear domain_id in domid_bitmp
+@@ -2441,6 +2453,15 @@ static int reassign_device_ownership(
+     if ( ret )
+         return ret;
+ 
++    if ( devfn == pdev->devfn )
++    {
++        list_move(&pdev->domain_list, &dom_io->arch.pdev_list);
++        pdev->domain = dom_io;
++    }
++
++    if ( !has_arch_pdevs(source) )
++        vmx_pi_hooks_deassign(source);
++
+     if ( !has_arch_pdevs(target) )
+         vmx_pi_hooks_assign(target);
+ 
+@@ -2459,15 +2480,13 @@ static int reassign_device_ownership(
+         pdev->domain = target;
+     }
+ 
+-    if ( !has_arch_pdevs(source) )
+-        vmx_pi_hooks_deassign(source);
+-
+     return ret;
+ }
+ 
+ static int intel_iommu_assign_device(
+     struct domain *d, u8 devfn, struct pci_dev *pdev, u32 flag)
+ {
++    struct domain *s = pdev->domain;
+     struct acpi_rmrr_unit *rmrr;
+     int ret = 0, i;
+     u16 bdf, seg;
+@@ -2510,8 +2529,8 @@ static int intel_iommu_assign_device(
+         }
+     }
+ 
+-    ret = reassign_device_ownership(hardware_domain, d, devfn, pdev);
+-    if ( ret )
++    ret = reassign_device_ownership(s, d, devfn, pdev);
++    if ( ret || d == dom_io )
+         return ret;
+ 
+     /* Setup rmrr identity mapping */
+@@ -2524,11 +2543,20 @@ static int intel_iommu_assign_device(
+             ret = rmrr_identity_mapping(d, 1, rmrr, flag);
+             if ( ret )
+             {
+-                reassign_device_ownership(d, hardware_domain, devfn, pdev);
++                int rc;
++
++                rc = reassign_device_ownership(d, s, devfn, pdev);
+                 printk(XENLOG_G_ERR VTDPREFIX
+                        " cannot map reserved region (%"PRIx64",%"PRIx64"] for Dom%d (%d)\n",
+                        rmrr->base_address, rmrr->end_address,
+                        d->domain_id, ret);
++                if ( rc )
++                {
++                    printk(XENLOG_ERR VTDPREFIX
++                           " failed to reclaim %04x:%02x:%02x.%u from %pd (%d)\n",
++                           seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), d, rc);
++                    domain_crash(d);
++                }
+                 break;
+             }
+         }
+diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
+index 8b21e8dc84..a031fd6020 100644
+--- a/xen/include/xen/pci.h
++++ b/xen/include/xen/pci.h
+@@ -88,6 +88,9 @@ struct pci_dev {
+ 
+     nodeid_t node; /* NUMA node */
+ 
++    /* Device to be quarantined, don't automatically re-assign to dom0 */
++    bool quarantine;
++
+     /* Device with errata, ignore the BARs. */
+     bool ignore_bars;
+ 
+-- 
+2.11.0
+
diff --git a/system/xen/xsa/xsa303-0001-xen-arm32-entry-Split-__DEFINE_ENTRY_TRAP-in-two.patch b/system/xen/xsa/xsa303-0001-xen-arm32-entry-Split-__DEFINE_ENTRY_TRAP-in-two.patch
new file mode 100644
index 0000000000000..afb1096c1d30b
--- /dev/null
+++ b/system/xen/xsa/xsa303-0001-xen-arm32-entry-Split-__DEFINE_ENTRY_TRAP-in-two.patch
@@ -0,0 +1,74 @@
+From c8cb33fa64c9ccbfa2a494a9dad2e0a763c09176 Mon Sep 17 00:00:00 2001
+From: Julien Grall <julien.grall@arm.com>
+Date: Tue, 1 Oct 2019 13:07:53 +0100
+Subject: [PATCH 1/4] xen/arm32: entry: Split __DEFINE_ENTRY_TRAP in two
+
+The preprocessing macro __DEFINE_ENTRY_TRAP is used to generate trap
+entry function. While the macro is fairly small today, follow-up patches
+will increase the size signicantly.
+
+In general, assembly macros are more readable as they allow you to name
+parameters and avoid '\'. So the actual implementation of the trap is
+now switched to an assembly macro.
+
+This is part of XSA-303.
+
+Reported-by: Julien Grall <Julien.Grall@arm.com>
+Signed-off-by: Julien Grall <julien.grall@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Reviewed-by: Andre Przywara <andre.przywara@arm.com>
+---
+ xen/arch/arm/arm32/entry.S | 34 +++++++++++++++++++---------------
+ 1 file changed, 19 insertions(+), 15 deletions(-)
+
+diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
+index 0b4cd19abd..4a762e04f1 100644
+--- a/xen/arch/arm/arm32/entry.S
++++ b/xen/arch/arm/arm32/entry.S
+@@ -126,24 +126,28 @@ abort_guest_exit_end:
+ skip_check:
+         mov pc, lr
+ 
+-/*
+- * Macro to define trap entry. The iflags corresponds to the list of
+- * interrupts (Asynchronous Abort, IRQ, FIQ) to unmask.
+- */
++        /*
++         * Macro to define trap entry. The iflags corresponds to the list of
++         * interrupts (Asynchronous Abort, IRQ, FIQ) to unmask.
++         */
++        .macro vector trap, iflags
++        SAVE_ALL
++        cpsie   \iflags
++        adr     lr, return_from_trap
++        mov     r0, sp
++        /*
++         * Save the stack pointer in r11. It will be restored after the
++         * trap has been handled (see return_from_trap).
++         */
++        mov     r11, sp
++        bic     sp, #7      /* Align the stack pointer (noop on guest trap) */
++        b       do_trap_\trap
++        .endm
++
+ #define __DEFINE_TRAP_ENTRY(trap, iflags)                               \
+         ALIGN;                                                          \
+ trap_##trap:                                                            \
+-        SAVE_ALL;                                                       \
+-        cpsie iflags;                                                   \
+-        adr lr, return_from_trap;                                       \
+-        mov r0, sp;                                                     \
+-        /*                                                              \
+-         * Save the stack pointer in r11. It will be restored after the \
+-         * trap has been handled (see return_from_trap).                \
+-         */                                                             \
+-        mov r11, sp;                                                    \
+-        bic sp, #7; /* Align the stack pointer (noop on guest trap) */  \
+-        b do_trap_##trap
++        vector trap, iflags
+ 
+ /* Trap handler which unmask IRQ/Abort, keep FIQ masked */
+ #define DEFINE_TRAP_ENTRY(trap) __DEFINE_TRAP_ENTRY(trap, ai)
+-- 
+2.11.0
+
diff --git a/system/xen/xsa/xsa303-0002-xen-arm32-entry-Fold-the-macro-SAVE_ALL-in-the-macro.patch b/system/xen/xsa/xsa303-0002-xen-arm32-entry-Fold-the-macro-SAVE_ALL-in-the-macro.patch
new file mode 100644
index 0000000000000..35f9c0475e62c
--- /dev/null
+++ b/system/xen/xsa/xsa303-0002-xen-arm32-entry-Fold-the-macro-SAVE_ALL-in-the-macro.patch
@@ -0,0 +1,97 @@
+From be7379207c83fa74f8a6c22a8ea213f02714776f Mon Sep 17 00:00:00 2001
+From: Julien Grall <julien.grall@arm.com>
+Date: Tue, 1 Oct 2019 13:15:48 +0100
+Subject: [PATCH 2/4] xen/arm32: entry: Fold the macro SAVE_ALL in the macro
+ vector
+
+Follow-up rework will require the macro vector to distinguish between
+a trap from a guest vs while in the hypervisor.
+
+The macro SAVE_ALL already has code to distinguish between the two and
+it is only called by the vector macro. So fold the former into the
+latter. This will help to avoid duplicating the check.
+
+This is part of XSA-303.
+
+Reported-by: Julien Grall <Julien.Grall@arm.com>
+Signed-off-by: Julien Grall <julien.grall@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Reviewed-by: Andre Przywara <andre.przywara@arm.com>
+---
+ xen/arch/arm/arm32/entry.S | 46 +++++++++++++++++++++++-----------------------
+ 1 file changed, 23 insertions(+), 23 deletions(-)
+
+diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
+index 4a762e04f1..150cbc0b4b 100644
+--- a/xen/arch/arm/arm32/entry.S
++++ b/xen/arch/arm/arm32/entry.S
+@@ -13,27 +13,6 @@
+ #define RESTORE_BANKED(mode) \
+         RESTORE_ONE_BANKED(SP_##mode) ; RESTORE_ONE_BANKED(LR_##mode) ; RESTORE_ONE_BANKED(SPSR_##mode)
+ 
+-#define SAVE_ALL                                                        \
+-        sub sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */      \
+-        push {r0-r12}; /* Save R0-R12 */                                \
+-                                                                        \
+-        mrs r11, ELR_hyp;               /* ELR_hyp is return address. */\
+-        str r11, [sp, #UREGS_pc];                                       \
+-                                                                        \
+-        str lr, [sp, #UREGS_lr];                                        \
+-                                                                        \
+-        add r11, sp, #UREGS_kernel_sizeof+4;                            \
+-        str r11, [sp, #UREGS_sp];                                       \
+-                                                                        \
+-        mrc CP32(r11, HSR);             /* Save exception syndrome */   \
+-        str r11, [sp, #UREGS_hsr];                                      \
+-                                                                        \
+-        mrs r11, SPSR_hyp;                                              \
+-        str r11, [sp, #UREGS_cpsr];                                     \
+-        and r11, #PSR_MODE_MASK;                                        \
+-        cmp r11, #PSR_MODE_HYP;                                         \
+-        blne save_guest_regs
+-
+ save_guest_regs:
+ #ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
+         /*
+@@ -52,7 +31,7 @@ save_guest_regs:
+         ldr r11, =0xffffffff  /* Clobber SP which is only valid for hypervisor frames. */
+         str r11, [sp, #UREGS_sp]
+         SAVE_ONE_BANKED(SP_usr)
+-        /* LR_usr is the same physical register as lr and is saved in SAVE_ALL */
++        /* LR_usr is the same physical register as lr and is saved by the caller */
+         SAVE_BANKED(svc)
+         SAVE_BANKED(abt)
+         SAVE_BANKED(und)
+@@ -131,7 +110,28 @@ skip_check:
+          * interrupts (Asynchronous Abort, IRQ, FIQ) to unmask.
+          */
+         .macro vector trap, iflags
+-        SAVE_ALL
++        /* Save registers in the stack */
++        sub     sp, #(UREGS_SP_usr - UREGS_sp) /* SP, LR, SPSR, PC */
++        push    {r0-r12}                       /* Save R0-R12 */
++        mrs     r11, ELR_hyp                   /* ELR_hyp is return address */
++        str     r11, [sp, #UREGS_pc]
++
++        str     lr, [sp, #UREGS_lr]
++
++        add     r11, sp, #(UREGS_kernel_sizeof + 4)
++
++        str     r11, [sp, #UREGS_sp]
++
++        mrc     CP32(r11, HSR)                 /* Save exception syndrome */
++        str     r11, [sp, #UREGS_hsr]
++
++        mrs     r11, SPSR_hyp
++        str     r11, [sp, #UREGS_cpsr]
++        and     r11, #PSR_MODE_MASK
++        cmp     r11, #PSR_MODE_HYP
++        blne    save_guest_regs
++
++        /* We are ready to handle the trap, setup the registers and jump. */
+         cpsie   \iflags
+         adr     lr, return_from_trap
+         mov     r0, sp
+-- 
+2.11.0
+
diff --git a/system/xen/xsa/xsa303-0003-xen-arm32-Don-t-blindly-unmask-interrupts-on-trap-wi.patch b/system/xen/xsa/xsa303-0003-xen-arm32-Don-t-blindly-unmask-interrupts-on-trap-wi.patch
new file mode 100644
index 0000000000000..516845214880c
--- /dev/null
+++ b/system/xen/xsa/xsa303-0003-xen-arm32-Don-t-blindly-unmask-interrupts-on-trap-wi.patch
@@ -0,0 +1,226 @@
+From 098fe877967870ffda2dfd9629a5fd272f6aacdc Mon Sep 17 00:00:00 2001
+From: Julien Grall <julien.grall@arm.com>
+Date: Fri, 11 Oct 2019 17:49:28 +0100
+Subject: [PATCH 3/4] xen/arm32: Don't blindly unmask interrupts on trap
+ without a change of level
+
+Exception vectors will unmask interrupts regardless the state of them in
+the interrupted context.
+
+One of the consequences is IRQ will be unmasked when receiving an
+undefined instruction exception (used by WARN*) from the hypervisor.
+This could result to unexpected behavior such as deadlock (if a lock was
+shared with interrupts).
+
+In a nutshell, interrupts should only be unmasked when it is safe to do.
+Xen only unmask IRQ and Abort interrupts, so the logic can stay simple.
+
+As vectors exceptions may be shared between guest and hypervisor, we now
+need to have a different policy for the interrupts.
+
+On exception from hypervisor, each vector will select the list of
+interrupts to inherit from the interrupted context. Any interrupts not
+listed will be kept masked.
+
+On exception from the guest, the Abort and IRQ will be unmasked
+depending on the exact vector.
+
+The interrupts will be kept unmasked when the vector cannot used by
+either guest or hypervisor.
+
+Note that each vector is not anymore preceded by ALIGN. This is fine
+because the alignment is already bigger than what we need.
+
+This is part of XSA-303.
+
+Reported-by: Julien Grall <Julien.Grall@arm.com>
+Signed-off-by: Julien Grall <julien.grall@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Reviewed-by: Andre Przywara <andre.przywara@arm.com>
+---
+ xen/arch/arm/arm32/entry.S | 138 +++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 109 insertions(+), 29 deletions(-)
+
+diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
+index 150cbc0b4b..ec90cca093 100644
+--- a/xen/arch/arm/arm32/entry.S
++++ b/xen/arch/arm/arm32/entry.S
+@@ -4,6 +4,17 @@
+ #include <asm/alternative.h>
+ #include <public/xen.h>
+ 
++/*
++ * Short-hands to defined the interrupts (A, I, F)
++ *
++ * _ means the interrupt state will not change
++ * X means the state of interrupt X will change
++ *
++ * To be used with msr cpsr_* only
++ */
++#define IFLAGS_AIF      PSR_ABT_MASK | PSR_IRQ_MASK | PSR_FIQ_MASK
++#define IFLAGS_A_F      PSR_ABT_MASK | PSR_FIQ_MASK
++
+ #define SAVE_ONE_BANKED(reg)    mrs r11, reg; str r11, [sp, #UREGS_##reg]
+ #define RESTORE_ONE_BANKED(reg) ldr r11, [sp, #UREGS_##reg]; msr reg, r11
+ 
+@@ -106,10 +117,18 @@ skip_check:
+         mov pc, lr
+ 
+         /*
+-         * Macro to define trap entry. The iflags corresponds to the list of
+-         * interrupts (Asynchronous Abort, IRQ, FIQ) to unmask.
++         * Macro to define a trap entry.
++         *
++         *  @guest_iflags: Optional list of interrupts to unmask when
++         *      entering from guest context. As this is used with cpsie,
++         *      the letter (a, i, f) should be used.
++         *
++         *  @hyp_iflags: Optional list of interrupts to inherit when
++         *      entering from hypervisor context. Any interrupts not
++         *      listed will be kept unchanged. As this is used with cpsr_*,
++         *      IFLAGS_* short-hands should be used.
+          */
+-        .macro vector trap, iflags
++        .macro vector trap, guest_iflags=n, hyp_iflags=0
+         /* Save registers in the stack */
+         sub     sp, #(UREGS_SP_usr - UREGS_sp) /* SP, LR, SPSR, PC */
+         push    {r0-r12}                       /* Save R0-R12 */
+@@ -127,12 +146,39 @@ skip_check:
+ 
+         mrs     r11, SPSR_hyp
+         str     r11, [sp, #UREGS_cpsr]
+-        and     r11, #PSR_MODE_MASK
+-        cmp     r11, #PSR_MODE_HYP
+-        blne    save_guest_regs
+ 
++        /*
++         * We need to distinguish whether we came from guest or
++         * hypervisor context.
++         */
++        and     r0, r11, #PSR_MODE_MASK
++        cmp     r0, #PSR_MODE_HYP
++
++        bne     1f
++        /*
++         * Trap from the hypervisor
++         *
++         * Inherit the state of the interrupts from the hypervisor
++         * context. For that we need to use SPSR (stored in r11) and
++         * modify CPSR accordingly.
++         *
++         * CPSR = (CPSR & ~hyp_iflags) | (SPSR & hyp_iflags)
++         */
++        mrs     r10, cpsr
++        bic     r10, r10, #\hyp_iflags
++        and     r11, r11, #\hyp_iflags
++        orr     r10, r10, r11
++        msr     cpsr_cx, r10
++        b       2f
++
++1:
++        /* Trap from the guest */
++        bl      save_guest_regs
++        .if     \guest_iflags != n
++        cpsie   \guest_iflags
++        .endif
++2:
+         /* We are ready to handle the trap, setup the registers and jump. */
+-        cpsie   \iflags
+         adr     lr, return_from_trap
+         mov     r0, sp
+         /*
+@@ -144,20 +190,6 @@ skip_check:
+         b       do_trap_\trap
+         .endm
+ 
+-#define __DEFINE_TRAP_ENTRY(trap, iflags)                               \
+-        ALIGN;                                                          \
+-trap_##trap:                                                            \
+-        vector trap, iflags
+-
+-/* Trap handler which unmask IRQ/Abort, keep FIQ masked */
+-#define DEFINE_TRAP_ENTRY(trap) __DEFINE_TRAP_ENTRY(trap, ai)
+-
+-/* Trap handler which unmask Abort, keep IRQ/FIQ masked */
+-#define DEFINE_TRAP_ENTRY_NOIRQ(trap) __DEFINE_TRAP_ENTRY(trap, a)
+-
+-/* Trap handler which unmask IRQ, keep Abort/FIQ masked */
+-#define DEFINE_TRAP_ENTRY_NOABORT(trap) __DEFINE_TRAP_ENTRY(trap, i)
+-
+         .align 5
+ GLOBAL(hyp_traps_vector)
+         b trap_reset                    /* 0x00 - Reset */
+@@ -228,14 +260,62 @@ decode_vectors:
+ 
+ #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */
+ 
+-DEFINE_TRAP_ENTRY(reset)
+-DEFINE_TRAP_ENTRY(undefined_instruction)
+-DEFINE_TRAP_ENTRY(hypervisor_call)
+-DEFINE_TRAP_ENTRY(prefetch_abort)
+-DEFINE_TRAP_ENTRY(guest_sync)
+-DEFINE_TRAP_ENTRY_NOIRQ(irq)
+-DEFINE_TRAP_ENTRY_NOIRQ(fiq)
+-DEFINE_TRAP_ENTRY_NOABORT(data_abort)
++/* Vector not used by the Hypervisor. */
++trap_reset:
++        vector reset
++
++/*
++ * Vector only used by the Hypervisor.
++ *
++ * While the exception can be executed with all the interrupts (e.g.
++ * IRQ) unmasked, the interrupted context may have purposefully masked
++ * some of them. So we want to inherit the state from the interrupted
++ * context.
++ */
++trap_undefined_instruction:
++        vector undefined_instruction, hyp_iflags=IFLAGS_AIF
++
++/* We should never reach this trap */
++trap_hypervisor_call:
++        vector hypervisor_call
++
++/*
++ * Vector only used by the hypervisor.
++ *
++ * While the exception can be executed with all the interrupts (e.g.
++ * IRQ) unmasked, the interrupted context may have purposefully masked
++ * some of them. So we want to inherit the state from the interrupted
++ * context.
++ */
++trap_prefetch_abort:
++       vector prefetch_abort, hyp_iflags=IFLAGS_AIF
++
++/*
++ * Vector only used by the hypervisor.
++ *
++ * Data Abort should be rare and most likely fatal. It is best to not
++ * unmask any interrupts to limit the amount of code that can run before
++ * the Data Abort is treated.
++ */
++trap_data_abort:
++        vector data_abort
++
++/* Vector only used by the guest. We can unmask Abort/IRQ. */
++trap_guest_sync:
++        vector guest_sync, guest_iflags=ai
++
++
++/* Vector used by the hypervisor and the guest. */
++trap_irq:
++        vector irq, guest_iflags=a, hyp_iflags=IFLAGS_A_F
++
++/*
++ * Vector used by the hypervisor and the guest.
++ *
++ * FIQ are not meant to happen, so we don't unmask any interrupts.
++ */
++trap_fiq:
++        vector fiq
+ 
+ return_from_trap:
+         /*
+-- 
+2.11.0
+
diff --git a/system/xen/xsa/xsa303-0004-xen-arm64-Don-t-blindly-unmask-interrupts-on-trap-wi.patch b/system/xen/xsa/xsa303-0004-xen-arm64-Don-t-blindly-unmask-interrupts-on-trap-wi.patch
new file mode 100644
index 0000000000000..106cbf98f1771
--- /dev/null
+++ b/system/xen/xsa/xsa303-0004-xen-arm64-Don-t-blindly-unmask-interrupts-on-trap-wi.patch
@@ -0,0 +1,114 @@
+From c6d290ce157a044dec417fdda8db71e41a37d744 Mon Sep 17 00:00:00 2001
+From: Julien Grall <julien.grall@arm.com>
+Date: Mon, 7 Oct 2019 18:10:56 +0100
+Subject: [PATCH 4/4] xen/arm64: Don't blindly unmask interrupts on trap
+ without a change of level
+
+Some of the traps without a change of the level (i.e. hypervisor ->
+hypervisor) will unmask interrupts regardless the state of them in the
+interrupted context.
+
+One of the consequences is IRQ will be unmasked when receiving a
+synchronous exception (used by WARN*()). This could result to unexpected
+behavior such as deadlock (if a lock was shared with interrupts).
+
+In a nutshell, interrupts should only be unmasked when it is safe to
+do. Xen only unmask IRQ and Abort interrupts, so the logic can stay
+simple:
+    - hyp_error: All the interrupts are now kept masked. SError should
+      be pretty rare and if ever happen then we most likely want to
+      avoid any other interrupts to be generated. The potential main
+      "caller" is during virtual SError synchronization on the exit
+      path from the guest (see check_pending_vserror).
+
+    - hyp_sync: The interrupts state is inherited from the interrupted
+      context.
+
+    - hyp_irq: All the interrupts but IRQ state are inherited from the
+      interrupted context. IRQ is kept masked.
+
+This is part of XSA-303.
+
+Reported-by: Julien Grall <Julien.Grall@arm.com>
+Signed-off-by: Julien Grall <julien.grall@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Reviewed-by: Andre Przywara <andre.przywara@arm.com>
+---
+ xen/arch/arm/arm64/entry.S | 47 ++++++++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 43 insertions(+), 4 deletions(-)
+
+diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S
+index 2d9a2713a1..3e41ba65b6 100644
+--- a/xen/arch/arm/arm64/entry.S
++++ b/xen/arch/arm/arm64/entry.S
+@@ -188,24 +188,63 @@ hyp_error_invalid:
+         entry   hyp=1
+         invalid BAD_ERROR
+ 
++/*
++ * SError received while running in the hypervisor mode.
++ *
++ * Technically, we could unmask the IRQ if it were unmasked in the
++ * interrupted context. However, this require to check the PSTATE. For
++ * simplicity, as SError should be rare and potentially fatal,
++ * all interrupts are kept masked.
++ */
+ hyp_error:
+         entry   hyp=1
+-        msr     daifclr, #2
+         mov     x0, sp
+         bl      do_trap_hyp_serror
+         exit    hyp=1
+ 
+-/* Traps taken in Current EL with SP_ELx */
++/*
++ * Synchronous exception received while running in the hypervisor mode.
++ *
++ * While the exception could be executed with all the interrupts (e.g.
++ * IRQ) unmasked, the interrupted context may have purposefully masked
++ * some of them. So we want to inherit the state from the interrupted
++ * context.
++ */
+ hyp_sync:
+         entry   hyp=1
+-        msr     daifclr, #6
++
++        /* Inherit interrupts */
++        mrs     x0, SPSR_el2
++        and     x0, x0, #(PSR_DBG_MASK | PSR_ABT_MASK | PSR_IRQ_MASK | PSR_FIQ_MASK)
++        msr     daif, x0
++
+         mov     x0, sp
+         bl      do_trap_hyp_sync
+         exit    hyp=1
+ 
++/*
++ * IRQ received while running in the hypervisor mode.
++ *
++ * While the exception could be executed with all the interrupts but IRQ
++ * unmasked, the interrupted context may have purposefully masked some
++ * of them. So we want to inherit the state from the interrupt context
++ * and keep IRQ masked.
++ *
++ * XXX: We may want to consider an ordering between interrupts (e.g. if
++ * SError are masked, then IRQ should be masked too). However, this
++ * would require some rework in some paths (e.g. panic, livepatch) to
++ * ensure the ordering is enforced everywhere.
++ */
+ hyp_irq:
+         entry   hyp=1
+-        msr     daifclr, #4
++
++        /* Inherit D, A, F interrupts and keep I masked */
++        mrs     x0, SPSR_el2
++        mov     x1, #(PSR_DBG_MASK | PSR_ABT_MASK | PSR_FIQ_MASK)
++        and     x0, x0, x1
++        orr     x0, x0, #PSR_IRQ_MASK
++        msr     daif, x0
++
+         mov     x0, sp
+         bl      do_trap_irq
+         exit    hyp=1
+-- 
+2.11.0
+
diff --git a/system/xen/xsa/xsa304-4.12-1.patch b/system/xen/xsa/xsa304-4.12-1.patch
new file mode 100644
index 0000000000000..c2ed2c2ced89f
--- /dev/null
+++ b/system/xen/xsa/xsa304-4.12-1.patch
@@ -0,0 +1,71 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/vtd: Hide superpage support for SandyBridge IOMMUs
+
+Something causes SandyBridge IOMMUs to choke when sharing EPT pagetables, and
+an EPT superpage gets shattered.  The root cause is still under investigation,
+but the end result is unusable in combination with CVE-2018-12207 protections.
+
+This is part of XSA-304 / CVE-2018-12207
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h
+index 16eada9fa2..a71c8b0f84 100644
+--- a/xen/drivers/passthrough/vtd/extern.h
++++ b/xen/drivers/passthrough/vtd/extern.h
+@@ -97,6 +97,8 @@ void vtd_ops_postamble_quirk(struct iommu* iommu);
+ int __must_check me_wifi_quirk(struct domain *domain,
+                                u8 bus, u8 devfn, int map);
+ void pci_vtd_quirk(const struct pci_dev *);
++void quirk_iommu_caps(struct iommu *iommu);
++
+ bool_t platform_supports_intremap(void);
+ bool_t platform_supports_x2apic(void);
+ 
+diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
+index b3664ecbe0..5d34f75306 100644
+--- a/xen/drivers/passthrough/vtd/iommu.c
++++ b/xen/drivers/passthrough/vtd/iommu.c
+@@ -1215,6 +1215,8 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd)
+     if ( !(iommu->cap + 1) || !(iommu->ecap + 1) )
+         return -ENODEV;
+ 
++    quirk_iommu_caps(iommu);
++
+     if ( cap_fault_reg_offset(iommu->cap) +
+          cap_num_fault_regs(iommu->cap) * PRIMARY_FAULT_REG_LEN >= PAGE_SIZE ||
+          ecap_iotlb_offset(iommu->ecap) >= PAGE_SIZE )
+diff --git a/xen/drivers/passthrough/vtd/quirks.c b/xen/drivers/passthrough/vtd/quirks.c
+index d6db862678..b02688e316 100644
+--- a/xen/drivers/passthrough/vtd/quirks.c
++++ b/xen/drivers/passthrough/vtd/quirks.c
+@@ -540,3 +540,28 @@ void pci_vtd_quirk(const struct pci_dev *pdev)
+         break;
+     }
+ }
++
++void __init quirk_iommu_caps(struct iommu *iommu)
++{
++    /*
++     * IOMMU Quirks:
++     *
++     * SandyBridge IOMMUs claim support for 2M and 1G superpages, but don't
++     * implement superpages internally.
++     *
++     * There are issues changing the walk length under in-flight DMA, which
++     * has manifested as incompatibility between EPT/IOMMU sharing and the
++     * workaround for CVE-2018-12207 / XSA-304.  Hide the superpages
++     * capabilities in the IOMMU, which will prevent Xen from sharing the EPT
++     * and IOMMU pagetables.
++     *
++     * Detection of SandyBridge unfortunately has to be done by processor
++     * model because the client parts don't expose their IOMMUs as PCI devices
++     * we could match with a Device ID.
++     */
++    if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
++         boot_cpu_data.x86 == 6 &&
++         (boot_cpu_data.x86_model == 0x2a ||
++          boot_cpu_data.x86_model == 0x2d) )
++        iommu->cap &= ~(0xful << 34);
++}
diff --git a/system/xen/xsa/xsa304-4.12-2.patch b/system/xen/xsa/xsa304-4.12-2.patch
new file mode 100644
index 0000000000000..66d4301838e6e
--- /dev/null
+++ b/system/xen/xsa/xsa304-4.12-2.patch
@@ -0,0 +1,272 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/vtx: Disable executable EPT superpages to work around
+ CVE-2018-12207
+
+CVE-2018-12207 covers a set of errata on various Intel processors, whereby a
+machine check exception can be generated in a corner case when an executable
+mapping changes size or cacheability without TLB invalidation.  HVM guest
+kernels can trigger this to DoS the host.
+
+To mitigate, in affected hardware, all EPT superpages are marked NX.  When an
+instruction fetch violation is observed against the superpage, the superpage
+is shattered to 4k and has execute permissions restored.  This prevents the
+guest kernel from being able to create the necessary preconditions in the iTLB
+to exploit the vulnerability.
+
+This does come with a workload-dependent performance overhead, caused by
+increased TLB pressure.  Performance can be restored, if guest kernels are
+trusted not to mount an attack, by specifying ept=exec-sp on the command line.
+
+This is part of XSA-304 / CVE-2018-12207
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: George Dunlap <george.dunlap@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
+index 85081fdc94..e283017015 100644
+--- a/docs/misc/xen-command-line.pandoc
++++ b/docs/misc/xen-command-line.pandoc
+@@ -895,7 +895,7 @@ Controls for interacting with the system Extended Firmware Interface.
+     uncacheable.
+ 
+ ### ept
+-> `= List of [ ad=<bool>, pml=<bool> ]`
++> `= List of [ ad=<bool>, pml=<bool>, exec-sp=<bool> ]`
+ 
+ > Applicability: Intel
+ 
+@@ -926,6 +926,16 @@ introduced with the Nehalem architecture.
+     disable PML.  `pml=0` can be used to prevent the use of PML on otherwise
+     capable hardware.
+ 
++*   The `exec-sp` boolean controls whether EPT superpages with execute
++    permissions are permitted.  In general this is good for performance.
++
++    However, on processors vulnerable CVE-2018-12207, HVM guest kernels can
++    use executable superpages to crash the host.  By default, executable
++    superpages are disabled on affected hardware.
++
++    If HVM guest kernels are trusted not to mount a DoS against the system,
++    this option can enabled to regain performance.
++
+ ### extra_guest_irqs
+ > `= [<domU number>][,<dom0 number>]`
+ 
+diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
+index 2089a77270..84191d4e4b 100644
+--- a/xen/arch/x86/hvm/hvm.c
++++ b/xen/arch/x86/hvm/hvm.c
+@@ -1814,6 +1814,24 @@ int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla,
+             break;
+         }
+ 
++        /*
++         * Workaround for XSA-304 / CVE-2018-12207.  If we take an execution
++         * fault against a non-executable superpage, shatter it to regain
++         * execute permissions.
++         */
++        if ( page_order > 0 && npfec.insn_fetch && npfec.present && !violation )
++        {
++            int res = p2m_set_entry(p2m, _gfn(gfn), mfn, PAGE_ORDER_4K,
++                                    p2mt, p2ma);
++
++            if ( res )
++                printk(XENLOG_ERR "Failed to shatter gfn %"PRI_gfn": %d\n",
++                       gfn, res);
++
++            rc = !res;
++            goto out_put_gfn;
++        }
++
+         if ( violation )
+         {
+             /* Should #VE be emulated for this fault? */
+diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
+index 56519fee84..ec5ab860ad 100644
+--- a/xen/arch/x86/hvm/vmx/vmcs.c
++++ b/xen/arch/x86/hvm/vmx/vmcs.c
+@@ -67,6 +67,7 @@ integer_param("ple_window", ple_window);
+ 
+ static bool __read_mostly opt_ept_pml = true;
+ static s8 __read_mostly opt_ept_ad = -1;
++int8_t __read_mostly opt_ept_exec_sp = -1;
+ 
+ static int __init parse_ept_param(const char *s)
+ {
+@@ -82,6 +83,8 @@ static int __init parse_ept_param(const char *s)
+             opt_ept_ad = val;
+         else if ( (val = parse_boolean("pml", s, ss)) >= 0 )
+             opt_ept_pml = val;
++        else if ( (val = parse_boolean("exec-sp", s, ss)) >= 0 )
++            opt_ept_exec_sp = val;
+         else
+             rc = -EINVAL;
+ 
+diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
+index 26b7ddb5fe..28cba8ec28 100644
+--- a/xen/arch/x86/hvm/vmx/vmx.c
++++ b/xen/arch/x86/hvm/vmx/vmx.c
+@@ -2445,6 +2445,102 @@ static void pi_notification_interrupt(struct cpu_user_regs *regs)
+ static void __init lbr_tsx_fixup_check(void);
+ static void __init bdw_erratum_bdf14_fixup_check(void);
+ 
++/*
++ * Calculate whether the CPU is vulnerable to Instruction Fetch page
++ * size-change MCEs.
++ */
++static bool __init has_if_pschange_mc(void)
++{
++    uint64_t caps = 0;
++
++    /*
++     * If we are virtualised, there is nothing we can do.  Our EPT tables are
++     * shadowed by our hypervisor, and not walked by hardware.
++     */
++    if ( cpu_has_hypervisor )
++        return false;
++
++    if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) )
++        rdmsrl(MSR_ARCH_CAPABILITIES, caps);
++
++    if ( caps & ARCH_CAPS_IF_PSCHANGE_MC_NO )
++        return false;
++
++    /*
++     * IF_PSCHANGE_MC is only known to affect Intel Family 6 processors at
++     * this time.
++     */
++    if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
++         boot_cpu_data.x86 != 6 )
++        return false;
++
++    switch ( boot_cpu_data.x86_model )
++    {
++        /*
++         * Core processors since at least Nehalem are vulnerable.
++         */
++    case 0x1f: /* Auburndale / Havendale */
++    case 0x1e: /* Nehalem */
++    case 0x1a: /* Nehalem EP */
++    case 0x2e: /* Nehalem EX */
++    case 0x25: /* Westmere */
++    case 0x2c: /* Westmere EP */
++    case 0x2f: /* Westmere EX */
++    case 0x2a: /* SandyBridge */
++    case 0x2d: /* SandyBridge EP/EX */
++    case 0x3a: /* IvyBridge */
++    case 0x3e: /* IvyBridge EP/EX */
++    case 0x3c: /* Haswell */
++    case 0x3f: /* Haswell EX/EP */
++    case 0x45: /* Haswell D */
++    case 0x46: /* Haswell H */
++    case 0x3d: /* Broadwell */
++    case 0x47: /* Broadwell H */
++    case 0x4f: /* Broadwell EP/EX */
++    case 0x56: /* Broadwell D */
++    case 0x4e: /* Skylake M */
++    case 0x5e: /* Skylake D */
++    case 0x55: /* Skylake-X / Cascade Lake */
++    case 0x8e: /* Kaby / Coffee / Whiskey Lake M */
++    case 0x9e: /* Kaby / Coffee / Whiskey Lake D */
++        return true;
++
++        /*
++         * Atom processors are not vulnerable.
++         */
++    case 0x1c: /* Pineview */
++    case 0x26: /* Lincroft */
++    case 0x27: /* Penwell */
++    case 0x35: /* Cloverview */
++    case 0x36: /* Cedarview */
++    case 0x37: /* Baytrail / Valleyview (Silvermont) */
++    case 0x4d: /* Avaton / Rangely (Silvermont) */
++    case 0x4c: /* Cherrytrail / Brasswell */
++    case 0x4a: /* Merrifield */
++    case 0x5a: /* Moorefield */
++    case 0x5c: /* Goldmont */
++    case 0x5d: /* SoFIA 3G Granite/ES2.1 */
++    case 0x65: /* SoFIA LTE AOSP */
++    case 0x5f: /* Denverton */
++    case 0x6e: /* Cougar Mountain */
++    case 0x75: /* Lightning Mountain */
++    case 0x7a: /* Gemini Lake */
++    case 0x86: /* Jacobsville */
++
++        /*
++         * Knights processors are not vulnerable.
++         */
++    case 0x57: /* Knights Landing */
++    case 0x85: /* Knights Mill */
++        return false;
++
++    default:
++        printk("Unrecognised CPU model %#x - assuming vulnerable to IF_PSCHANGE_MC\n",
++               boot_cpu_data.x86_model);
++        return true;
++    }
++}
++
+ const struct hvm_function_table * __init start_vmx(void)
+ {
+     set_in_cr4(X86_CR4_VMXE);
+@@ -2465,6 +2561,17 @@ const struct hvm_function_table * __init start_vmx(void)
+      */
+     if ( cpu_has_vmx_ept && (cpu_has_vmx_pat || opt_force_ept) )
+     {
++        bool cpu_has_bug_pschange_mc = has_if_pschange_mc();
++
++        if ( opt_ept_exec_sp == -1 )
++        {
++            /* Default to non-executable superpages on vulnerable hardware. */
++            opt_ept_exec_sp = !cpu_has_bug_pschange_mc;
++
++            if ( cpu_has_bug_pschange_mc )
++                printk("VMX: Disabling executable EPT superpages due to CVE-2018-12207\n");
++        }
++
+         vmx_function_table.hap_supported = 1;
+         vmx_function_table.altp2m_supported = 1;
+ 
+diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
+index 952ebad82f..834d4798c8 100644
+--- a/xen/arch/x86/mm/p2m-ept.c
++++ b/xen/arch/x86/mm/p2m-ept.c
+@@ -174,6 +174,12 @@ static void ept_p2m_type_to_flags(struct p2m_domain *p2m, ept_entry_t *entry,
+             break;
+     }
+     
++    /*
++     * Don't create executable superpages if we need to shatter them to
++     * protect against CVE-2018-12207.
++     */
++    if ( !opt_ept_exec_sp && is_epte_superpage(entry) )
++        entry->x = 0;
+ }
+ 
+ #define GUEST_TABLE_MAP_FAILED  0
+diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h
+index ebaa74449b..371b912887 100644
+--- a/xen/include/asm-x86/hvm/vmx/vmx.h
++++ b/xen/include/asm-x86/hvm/vmx/vmx.h
+@@ -28,6 +28,8 @@
+ #include <asm/hvm/trace.h>
+ #include <asm/hvm/vmx/vmcs.h>
+ 
++extern int8_t opt_ept_exec_sp;
++
+ typedef union {
+     struct {
+         u64 r       :   1,  /* bit 0 - Read permission */
+diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
+index 637259bd1f..32746aa8ae 100644
+--- a/xen/include/asm-x86/msr-index.h
++++ b/xen/include/asm-x86/msr-index.h
+@@ -52,6 +52,7 @@
+ #define ARCH_CAPS_SKIP_L1DFL		(_AC(1, ULL) << 3)
+ #define ARCH_CAPS_SSB_NO		(_AC(1, ULL) << 4)
+ #define ARCH_CAPS_MDS_NO		(_AC(1, ULL) << 5)
++#define ARCH_CAPS_IF_PSCHANGE_MC_NO	(_AC(1, ULL) << 6)
+ 
+ #define MSR_FLUSH_CMD			0x0000010b
+ #define FLUSH_CMD_L1D			(_AC(1, ULL) << 0)
diff --git a/system/xen/xsa/xsa304-4.12-3.patch b/system/xen/xsa/xsa304-4.12-3.patch
new file mode 100644
index 0000000000000..04b4c454f24fa
--- /dev/null
+++ b/system/xen/xsa/xsa304-4.12-3.patch
@@ -0,0 +1,108 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/vtx: Allow runtime modification of the exec-sp setting
+
+See patch for details.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: George Dunlap <george.dunlap@citrix.com>
+
+diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
+index e283017015..84221fe60a 100644
+--- a/docs/misc/xen-command-line.pandoc
++++ b/docs/misc/xen-command-line.pandoc
+@@ -936,6 +936,21 @@ introduced with the Nehalem architecture.
+     If HVM guest kernels are trusted not to mount a DoS against the system,
+     this option can enabled to regain performance.
+ 
++    This boolean may be modified at runtime using `xl set-parameters
++    ept=[no-]exec-sp` to switch between fast and secure.
++
++    *   When switching from secure to fast, preexisting HVM domains will run
++        at their current performance until they are rebooted; new domains will
++        run without any overhead.
++
++    *   When switching from fast to secure, all HVM domains will immediately
++        suffer a performance penalty.
++
++    **Warning: No guarantee is made that this runtime option will be retained
++      indefinitely, or that it will retain this exact behaviour.  It is
++      intended as an emergency option for people who first chose fast, then
++      change their minds to secure, and wish not to reboot.**
++
+ ### extra_guest_irqs
+ > `= [<domU number>][,<dom0 number>]`
+ 
+diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
+index ec5ab860ad..c4d8a5ba78 100644
+--- a/xen/arch/x86/hvm/vmx/vmcs.c
++++ b/xen/arch/x86/hvm/vmx/vmcs.c
+@@ -95,6 +95,41 @@ static int __init parse_ept_param(const char *s)
+ }
+ custom_param("ept", parse_ept_param);
+ 
++static int parse_ept_param_runtime(const char *s)
++{
++    int val;
++
++    if ( !cpu_has_vmx_ept || !hvm_funcs.hap_supported ||
++         !(hvm_funcs.hap_capabilities &
++           (HVM_HAP_SUPERPAGE_2MB | HVM_HAP_SUPERPAGE_1GB)) )
++    {
++        printk("VMX: EPT not available, or not in use - ignoring\n");
++        return 0;
++    }
++
++    if ( (val = parse_boolean("exec-sp", s, NULL)) < 0 )
++        return -EINVAL;
++
++    if ( val != opt_ept_exec_sp )
++    {
++        struct domain *d;
++
++        opt_ept_exec_sp = val;
++
++        rcu_read_lock(&domlist_read_lock);
++        for_each_domain ( d )
++            if ( paging_mode_hap(d) )
++                p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_rw);
++        rcu_read_unlock(&domlist_read_lock);
++    }
++
++    printk("VMX: EPT executable superpages %sabled\n",
++           val ? "en" : "dis");
++
++    return 0;
++}
++custom_runtime_only_param("ept", parse_ept_param_runtime);
++
+ /* Dynamic (run-time adjusted) execution control flags. */
+ u32 vmx_pin_based_exec_control __read_mostly;
+ u32 vmx_cpu_based_exec_control __read_mostly;
+diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
+index f518f86493..16608098b1 100644
+--- a/xen/arch/x86/mm/p2m.c
++++ b/xen/arch/x86/mm/p2m.c
+@@ -289,15 +289,20 @@ static void change_entry_type_global(struct p2m_domain *p2m,
+                                      p2m_type_t ot, p2m_type_t nt)
+ {
+     p2m->change_entry_type_global(p2m, ot, nt);
+-    p2m->global_logdirty = (nt == p2m_ram_logdirty);
++    /* Don't allow 'recalculate' operations to change the logdirty state. */
++    if ( ot != nt )
++        p2m->global_logdirty = (nt == p2m_ram_logdirty);
+ }
+ 
++/*
++ * May be called with ot = nt = p2m_ram_rw for its side effect of
++ * recalculating all PTEs in the p2m.
++ */
+ void p2m_change_entry_type_global(struct domain *d,
+                                   p2m_type_t ot, p2m_type_t nt)
+ {
+     struct p2m_domain *hostp2m = p2m_get_hostp2m(d);
+ 
+-    ASSERT(ot != nt);
+     ASSERT(p2m_is_changeable(ot) && p2m_is_changeable(nt));
+ 
+     p2m_lock(hostp2m);
diff --git a/system/xen/xsa/xsa305-4.12-1.patch b/system/xen/xsa/xsa305-4.12-1.patch
new file mode 100644
index 0000000000000..e1a91a52640b6
--- /dev/null
+++ b/system/xen/xsa/xsa305-4.12-1.patch
@@ -0,0 +1,288 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/tsx: Introduce tsx= to use MSR_TSX_CTRL when available
+
+To protect against the TSX Async Abort speculative vulnerability, Intel have
+released new microcode for affected parts which introduce the MSR_TSX_CTRL
+control, which allows TSX to be turned off.  This will be architectural on
+future parts.
+
+Introduce tsx= to provide a global on/off for TSX, including its enumeration
+via CPUID.  Provide stub virtualisation of this MSR, as it is not exposed to
+guests at the moment.
+
+VMs may have booted before microcode is loaded, or before hosts have rebooted,
+and they still want to migrate freely.  A VM which booted seeing TSX can
+migrate safely to hosts with TSX disabled - TSX will start unconditionally
+aborting, but still behave in a manner compatible with the ABI.
+
+The guest-visible behaviour is equivalent to late loading the microcode and
+setting the RTM_DISABLE bit in the course of live patching.
+
+This is part of XSA-305 / CVE-2019-11135
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
+index e283017015..b7e1bf8e8b 100644
+--- a/docs/misc/xen-command-line.pandoc
++++ b/docs/misc/xen-command-line.pandoc
+@@ -2033,6 +2033,20 @@ Xen version.
+ ### tsc (x86)
+ > `= unstable | skewed | stable:socket`
+ 
++### tsx
++    = <bool>
++
++    Applicability: x86
++    Default: true
++
++Controls for the use of Transactional Synchronization eXtensions.
++
++On Intel parts released in Q3 2019 (with updated microcode), and future parts,
++a control has been introduced which allows TSX to be turned off.
++
++On systems with the ability to turn TSX off, this boolean offers system wide
++control of whether TSX is enabled or disabled.
++
+ ### ucode (x86)
+ > `= [<integer> | scan]`
+ 
+diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
+index 8a8d8f060f..9b9a4435fb 100644
+--- a/xen/arch/x86/Makefile
++++ b/xen/arch/x86/Makefile
+@@ -66,6 +66,7 @@ obj-y += sysctl.o
+ obj-y += time.o
+ obj-y += trace.o
+ obj-y += traps.o
++obj-y += tsx.o
+ obj-y += usercopy.o
+ obj-y += x86_emulate.o
+ obj-$(CONFIG_TBOOT) += tboot.o
+diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
+index 57e80694f2..1727497459 100644
+--- a/xen/arch/x86/cpuid.c
++++ b/xen/arch/x86/cpuid.c
+@@ -524,6 +524,20 @@ void recalculate_cpuid_policy(struct domain *d)
+     if ( cpu_has_itsc && (d->disable_migrate || d->arch.vtsc) )
+         __set_bit(X86_FEATURE_ITSC, max_fs);
+ 
++    /*
++     * On hardware with MSR_TSX_CTRL, the admin may have elected to disable
++     * TSX and hide the feature bits.  Migrating-in VMs may have been booted
++     * pre-mitigation when the TSX features were visbile.
++     *
++     * This situation is compatible (albeit with a perf hit to any TSX code in
++     * the guest), so allow the feature bits to remain set.
++     */
++    if ( cpu_has_tsx_ctrl )
++    {
++        __set_bit(X86_FEATURE_HLE, max_fs);
++        __set_bit(X86_FEATURE_RTM, max_fs);
++    }
++
+     /* Clamp the toolstacks choices to reality. */
+     for ( i = 0; i < ARRAY_SIZE(fs); i++ )
+         fs[i] &= max_fs[i];
+diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
+index 56de0fe9e1..c2722d7c73 100644
+--- a/xen/arch/x86/msr.c
++++ b/xen/arch/x86/msr.c
+@@ -132,6 +132,7 @@ int guest_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
+     case MSR_FLUSH_CMD:
+         /* Write-only */
+     case MSR_TSX_FORCE_ABORT:
++    case MSR_TSX_CTRL:
+         /* Not offered to guests. */
+         goto gp_fault;
+ 
+@@ -260,6 +261,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
+     case MSR_ARCH_CAPABILITIES:
+         /* Read-only */
+     case MSR_TSX_FORCE_ABORT:
++    case MSR_TSX_CTRL:
+         /* Not offered to guests. */
+         goto gp_fault;
+ 
+diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
+index cf790f36ef..c1c7c44000 100644
+--- a/xen/arch/x86/setup.c
++++ b/xen/arch/x86/setup.c
+@@ -1594,6 +1594,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+ 
+     early_microcode_init();
+ 
++    tsx_init(); /* Needs microcode.  May change HLE/RTM feature bits. */
++
+     identify_cpu(&boot_cpu_data);
+ 
+     set_in_cr4(X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT);
+diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
+index 737a44f055..e21cf0a310 100644
+--- a/xen/arch/x86/smpboot.c
++++ b/xen/arch/x86/smpboot.c
+@@ -376,6 +376,8 @@ void start_secondary(void *unused)
+     if ( boot_cpu_has(X86_FEATURE_IBRSB) )
+         wrmsrl(MSR_SPEC_CTRL, default_xen_spec_ctrl);
+ 
++    tsx_init(); /* Needs microcode.  May change HLE/RTM feature bits. */
++
+     if ( xen_guest )
+         hypervisor_ap_setup();
+ 
+diff --git a/xen/arch/x86/tsx.c b/xen/arch/x86/tsx.c
+new file mode 100644
+index 0000000000..a8ec2ccc69
+--- /dev/null
++++ b/xen/arch/x86/tsx.c
+@@ -0,0 +1,74 @@
++#include <xen/init.h>
++#include <asm/msr.h>
++
++/*
++ * Valid values:
++ *   1 => Explicit tsx=1
++ *   0 => Explicit tsx=0
++ *  -1 => Default, implicit tsx=1
++ *
++ * This is arranged such that the bottom bit encodes whether TSX is actually
++ * disabled, while identifying various explicit (>=0) and implicit (<0)
++ * conditions.
++ */
++int8_t __read_mostly opt_tsx = -1;
++int8_t __read_mostly cpu_has_tsx_ctrl = -1;
++
++static int __init parse_tsx(const char *s)
++{
++    int rc = 0, val = parse_bool(s, NULL);
++
++    if ( val >= 0 )
++        opt_tsx = val;
++    else
++        rc = -EINVAL;
++
++    return rc;
++}
++custom_param("tsx", parse_tsx);
++
++void tsx_init(void)
++{
++    /*
++     * This function is first called between microcode being loaded, and CPUID
++     * being scanned generally.  Calculate from raw data whether MSR_TSX_CTRL
++     * is available.
++     */
++    if ( unlikely(cpu_has_tsx_ctrl < 0) )
++    {
++        uint64_t caps = 0;
++
++        if ( boot_cpu_data.cpuid_level >= 7 &&
++             (cpuid_count_edx(7, 0) & cpufeat_mask(X86_FEATURE_ARCH_CAPS)) )
++            rdmsrl(MSR_ARCH_CAPABILITIES, caps);
++
++        cpu_has_tsx_ctrl = !!(caps & ARCH_CAPS_TSX_CTRL);
++    }
++
++    if ( cpu_has_tsx_ctrl )
++    {
++        uint64_t val;
++
++        rdmsrl(MSR_TSX_CTRL, val);
++
++        val &= ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR);
++        /* Check bottom bit only.  Higher bits are various sentinals. */
++        if ( !(opt_tsx & 1) )
++            val |= TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR;
++
++        wrmsrl(MSR_TSX_CTRL, val);
++    }
++    else if ( opt_tsx >= 0 )
++        printk_once(XENLOG_WARNING
++                    "MSR_TSX_CTRL not available - Ignoring tsx= setting\n");
++}
++
++/*
++ * Local variables:
++ * mode: C
++ * c-file-style: "BSD"
++ * c-basic-offset: 4
++ * tab-width: 4
++ * indent-tabs-mode: nil
++ * End:
++ */
+diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
+index 32746aa8ae..d5f3899f73 100644
+--- a/xen/include/asm-x86/msr-index.h
++++ b/xen/include/asm-x86/msr-index.h
+@@ -53,6 +53,7 @@
+ #define ARCH_CAPS_SSB_NO		(_AC(1, ULL) << 4)
+ #define ARCH_CAPS_MDS_NO		(_AC(1, ULL) << 5)
+ #define ARCH_CAPS_IF_PSCHANGE_MC_NO	(_AC(1, ULL) << 6)
++#define ARCH_CAPS_TSX_CTRL		(_AC(1, ULL) << 7)
+ 
+ #define MSR_FLUSH_CMD			0x0000010b
+ #define FLUSH_CMD_L1D			(_AC(1, ULL) << 0)
+@@ -60,6 +61,10 @@
+ #define MSR_TSX_FORCE_ABORT             0x0000010f
+ #define TSX_FORCE_ABORT_RTM             (_AC(1, ULL) <<  0)
+ 
++#define MSR_TSX_CTRL                    0x00000122
++#define TSX_CTRL_RTM_DISABLE            (_AC(1, ULL) <<  0)
++#define TSX_CTRL_CPUID_CLEAR            (_AC(1, ULL) <<  1)
++
+ /* Intel MSRs. Some also available on other CPUs */
+ #define MSR_IA32_PERFCTR0		0x000000c1
+ #define MSR_IA32_A_PERFCTR0		0x000004c1
+diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
+index d33ac34d29..1b52712180 100644
+--- a/xen/include/asm-x86/processor.h
++++ b/xen/include/asm-x86/processor.h
+@@ -263,6 +263,16 @@ static always_inline unsigned int cpuid_count_ebx(
+     return ebx;
+ }
+ 
++static always_inline unsigned int cpuid_count_edx(
++    unsigned int leaf, unsigned int subleaf)
++{
++    unsigned int edx, tmp;
++
++    cpuid_count(leaf, subleaf, &tmp, &tmp, &tmp, &edx);
++
++    return edx;
++}
++
+ static inline unsigned long read_cr0(void)
+ {
+     unsigned long cr0;
+@@ -609,6 +619,9 @@ static inline uint8_t get_cpu_family(uint32_t raw, uint8_t *model,
+     return fam;
+ }
+ 
++extern int8_t opt_tsx, cpu_has_tsx_ctrl;
++void tsx_init(void);
++
+ #endif /* !__ASSEMBLY__ */
+ 
+ #endif /* __ASM_X86_PROCESSOR_H */
+diff --git a/xen/include/xen/lib.h b/xen/include/xen/lib.h
+index 89939f43c8..6529f12dae 100644
+--- a/xen/include/xen/lib.h
++++ b/xen/include/xen/lib.h
+@@ -114,6 +114,16 @@ extern int printk_ratelimit(void);
+ #define gprintk(lvl, fmt, args...) \
+     printk(XENLOG_GUEST lvl "%pv " fmt, current, ## args)
+ 
++#define printk_once(fmt, args...)               \
++({                                              \
++    static bool __read_mostly once_;            \
++    if ( unlikely(!once_) )                     \
++    {                                           \
++        once_ = true;                           \
++        printk(fmt, ## args);                   \
++    }                                           \
++})
++
+ #ifdef NDEBUG
+ 
+ static inline void
diff --git a/system/xen/xsa/xsa305-4.12-2.patch b/system/xen/xsa/xsa305-4.12-2.patch
new file mode 100644
index 0000000000000..07fba86287f8a
--- /dev/null
+++ b/system/xen/xsa/xsa305-4.12-2.patch
@@ -0,0 +1,192 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/spec-ctrl: Mitigate the TSX Asynchronous Abort sidechannel
+
+See patch documentation and comments.
+
+This is part of XSA-305 / CVE-2019-11135
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
+index b7e1bf8e8b..74e1e35b88 100644
+--- a/docs/misc/xen-command-line.pandoc
++++ b/docs/misc/xen-command-line.pandoc
+@@ -1920,7 +1920,7 @@ extreme care.**
+ An overall boolean value, `spec-ctrl=no`, can be specified to turn off all
+ mitigations, including pieces of infrastructure used to virtualise certain
+ mitigation features for guests.  This also includes settings which `xpti`,
+-`smt`, `pv-l1tf` control, unless the respective option(s) have been
++`smt`, `pv-l1tf`, `tsx` control, unless the respective option(s) have been
+ specified earlier on the command line.
+ 
+ Alternatively, a slightly more restricted `spec-ctrl=no-xen` can be used to
+@@ -2037,7 +2037,7 @@ Xen version.
+     = <bool>
+ 
+     Applicability: x86
+-    Default: true
++    Default: false on parts vulnerable to TAA, true otherwise
+ 
+ Controls for the use of Transactional Synchronization eXtensions.
+ 
+@@ -2047,6 +2047,19 @@ a control has been introduced which allows TSX to be turned off.
+ On systems with the ability to turn TSX off, this boolean offers system wide
+ control of whether TSX is enabled or disabled.
+ 
++On parts vulnerable to CVE-2019-11135 / TSX Asynchronous Abort, the following
++logic applies:
++
++ * An explicit `tsx=` choice is honoured, even if it is `true` and would
++   result in a vulnerable system.
++
++ * When no explicit `tsx=` choice is given, parts vulnerable to TAA will be
++   mitigated by disabling TSX, as this is the lowest overhead option.
++
++ * If the use of TSX is important, the more expensive TAA mitigations can be
++   opted in to with `smt=0 spec-ctrl=md-clear`, at which point TSX will remain
++   active by default.
++
+ ### ucode (x86)
+ > `= [<integer> | scan]`
+ 
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index b37d40e643..800139d79c 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -96,6 +96,9 @@ static int __init parse_spec_ctrl(const char *s)
+             if ( opt_pv_l1tf_domu < 0 )
+                 opt_pv_l1tf_domu = 0;
+ 
++            if ( opt_tsx == -1 )
++                opt_tsx = -3;
++
+         disable_common:
+             opt_rsb_pv = false;
+             opt_rsb_hvm = false;
+@@ -306,7 +309,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+     printk("Speculative mitigation facilities:\n");
+ 
+     /* Hardware features which pertain to speculative mitigations. */
+-    printk("  Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s\n",
++    printk("  Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+            (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "",
+            (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP"     : "",
+            (_7d0 & cpufeat_mask(X86_FEATURE_L1D_FLUSH)) ? " L1D_FLUSH" : "",
+@@ -318,7 +321,9 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+            (caps & ARCH_CAPS_RSBA)                  ? " RSBA"      : "",
+            (caps & ARCH_CAPS_SKIP_L1DFL)            ? " SKIP_L1DFL": "",
+            (caps & ARCH_CAPS_SSB_NO)                ? " SSB_NO"    : "",
+-           (caps & ARCH_CAPS_MDS_NO)                ? " MDS_NO"    : "");
++           (caps & ARCH_CAPS_MDS_NO)                ? " MDS_NO"    : "",
++           (caps & ARCH_CAPS_TSX_CTRL)              ? " TSX_CTRL"  : "",
++           (caps & ARCH_CAPS_TAA_NO)                ? " TAA_NO"    : "");
+ 
+     /* Compiled-in support which pertains to mitigations. */
+     if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) )
+@@ -332,7 +337,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+                "\n");
+ 
+     /* Settings for Xen's protection, irrespective of guests. */
+-    printk("  Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s, Other:%s%s%s\n",
++    printk("  Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s, Other:%s%s%s\n",
+            thunk == THUNK_NONE      ? "N/A" :
+            thunk == THUNK_RETPOLINE ? "RETPOLINE" :
+            thunk == THUNK_LFENCE    ? "LFENCE" :
+@@ -341,6 +346,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+            (default_xen_spec_ctrl & SPEC_CTRL_IBRS)  ? "IBRS+" :  "IBRS-",
+            !boot_cpu_has(X86_FEATURE_SSBD)           ? "" :
+            (default_xen_spec_ctrl & SPEC_CTRL_SSBD)  ? " SSBD+" : " SSBD-",
++           !(caps & ARCH_CAPS_TSX_CTRL)              ? "" :
++           (opt_tsx & 1)                             ? " TSX+" : " TSX-",
+            opt_ibpb                                  ? " IBPB"  : "",
+            opt_l1d_flush                             ? " L1D_FLUSH" : "",
+            opt_md_clear_pv || opt_md_clear_hvm       ? " VERW"  : "");
+@@ -862,6 +869,7 @@ void __init init_speculation_mitigations(void)
+ {
+     enum ind_thunk thunk = THUNK_DEFAULT;
+     bool use_spec_ctrl = false, ibrs = false, hw_smt_enabled;
++    bool cpu_has_bug_taa;
+     uint64_t caps = 0;
+ 
+     if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) )
+@@ -1086,6 +1094,53 @@ void __init init_speculation_mitigations(void)
+             "enabled.  Mitigations will not be fully effective.  Please\n"
+             "choose an explicit smt=<bool> setting.  See XSA-297.\n");
+ 
++    /*
++     * Vulnerability to TAA is a little complicated to quantify.
++     *
++     * In the pipeline, it is just another way to get speculative access to
++     * stale load port, store buffer or fill buffer data, and therefore can be
++     * considered a superset of MDS (on TSX-capable parts).  On parts which
++     * predate MDS_NO, the existing VERW flushing will mitigate this
++     * sidechannel as well.
++     *
++     * On parts which contain MDS_NO, the lack of VERW flushing means that an
++     * attacker can still use TSX to target microarchitectural buffers to leak
++     * secrets.  Therefore, we consider TAA to be the set of TSX-capable parts
++     * which have MDS_NO but lack TAA_NO.
++     *
++     * Note: cpu_has_rtm (== hle) could already be hidden by `tsx=0` on the
++     *       cmdline.  MSR_TSX_CTRL will only appear on TSX-capable parts, so
++     *       we check both to spot TSX in a microcode/cmdline independent way.
++     */
++    cpu_has_bug_taa =
++        (cpu_has_rtm || (caps & ARCH_CAPS_TSX_CTRL)) &&
++        (caps & (ARCH_CAPS_MDS_NO | ARCH_CAPS_TAA_NO)) == ARCH_CAPS_MDS_NO;
++
++    /*
++     * On TAA-affected hardware, disabling TSX is the preferred mitigation, vs
++     * the MDS mitigation of disabling HT and using VERW flushing.
++     *
++     * On CPUs which advertise MDS_NO, VERW has no flushing side effect until
++     * the TSX_CTRL microcode is loaded, despite the MD_CLEAR CPUID bit being
++     * advertised, and there isn't a MD_CLEAR_2 flag to use...
++     *
++     * If we're on affected hardware, able to do something about it (which
++     * implies that VERW now works), no explicit TSX choice and traditional
++     * MDS mitigations (no-SMT, VERW) not obviosuly in use (someone might
++     * plausibly value TSX higher than Hyperthreading...), disable TSX to
++     * mitigate TAA.
++     */
++    if ( opt_tsx == -1 && cpu_has_bug_taa && (caps & ARCH_CAPS_TSX_CTRL) &&
++         ((hw_smt_enabled && opt_smt) ||
++          !boot_cpu_has(X86_FEATURE_SC_VERW_IDLE)) )
++    {
++        setup_clear_cpu_cap(X86_FEATURE_HLE);
++        setup_clear_cpu_cap(X86_FEATURE_RTM);
++
++        opt_tsx = 0;
++        tsx_init();
++    }
++
+     print_details(thunk, caps);
+ 
+     /*
+diff --git a/xen/arch/x86/tsx.c b/xen/arch/x86/tsx.c
+index a8ec2ccc69..2d202a0d4e 100644
+--- a/xen/arch/x86/tsx.c
++++ b/xen/arch/x86/tsx.c
+@@ -5,7 +5,8 @@
+  * Valid values:
+  *   1 => Explicit tsx=1
+  *   0 => Explicit tsx=0
+- *  -1 => Default, implicit tsx=1
++ *  -1 => Default, implicit tsx=1, may change to 0 to mitigate TAA
++ *  -3 => Implicit tsx=1 (feed-through from spec-ctrl=0)
+  *
+  * This is arranged such that the bottom bit encodes whether TSX is actually
+  * disabled, while identifying various explicit (>=0) and implicit (<0)
+diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
+index d5f3899f73..3971b992d3 100644
+--- a/xen/include/asm-x86/msr-index.h
++++ b/xen/include/asm-x86/msr-index.h
+@@ -54,6 +54,7 @@
+ #define ARCH_CAPS_MDS_NO		(_AC(1, ULL) << 5)
+ #define ARCH_CAPS_IF_PSCHANGE_MC_NO	(_AC(1, ULL) << 6)
+ #define ARCH_CAPS_TSX_CTRL		(_AC(1, ULL) << 7)
++#define ARCH_CAPS_TAA_NO		(_AC(1, ULL) << 8)
+ 
+ #define MSR_FLUSH_CMD			0x0000010b
+ #define FLUSH_CMD_L1D			(_AC(1, ULL) << 0)
author	Mario Preksavec <mario@slackware.hr>	2019-11-19 13:17:56 +0100
committer	Willy Sudiarto Raharjo <willysr@slackbuilds.org>	2019-11-23 16:02:01 +0700
commit	903c02712d4cf39ae8218eb47149258dfa8c7d8a (patch)
tree	61c869ed4904270e41898b9bcf74090822e28418 /system/xen
parent	604be6a3da8dc95e2d89a426877c7f4021eb91df (diff)