aboutsummaryrefslogtreecommitdiff
path: root/system/xen/xsa/xsa373-4.15-3.patch
diff options
context:
space:
mode:
Diffstat (limited to 'system/xen/xsa/xsa373-4.15-3.patch')
-rw-r--r--system/xen/xsa/xsa373-4.15-3.patch163
1 files changed, 163 insertions, 0 deletions
diff --git a/system/xen/xsa/xsa373-4.15-3.patch b/system/xen/xsa/xsa373-4.15-3.patch
new file mode 100644
index 0000000000000..c7ddf5d6441ce
--- /dev/null
+++ b/system/xen/xsa/xsa373-4.15-3.patch
@@ -0,0 +1,163 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: VT-d: eliminate flush related timeouts
+
+Leaving an in-progress operation pending when it appears to take too
+long is problematic: If e.g. a QI command completed later, the write to
+the "poll slot" may instead be understood to signal a subsequently
+started command's completion. Also our accounting of the timeout period
+was actually wrong: We included the time it took for the command to
+actually make it to the front of the queue, which could be heavily
+affected by guests other than the one for which the flush is being
+performed.
+
+Do away with all timeout detection on all flush related code paths.
+Log excessively long processing times (with a progressive threshold) to
+have some indication of problems in this area.
+
+Additionally log (once) if qinval_next_index() didn't immediately find
+an available slot. Together with the earlier change sizing the queue(s)
+dynamically, we should now have a guarantee that with our fully
+synchronous model any demand for slots can actually be satisfied.
+
+This is part of XSA-373 / CVE-2021-28692.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Paul Durrant <paul@xen.org>
+
+--- a/xen/drivers/passthrough/vtd/dmar.h
++++ b/xen/drivers/passthrough/vtd/dmar.h
+@@ -127,6 +127,34 @@ do {
+ } \
+ } while (0)
+
++#define IOMMU_FLUSH_WAIT(what, iommu, offset, op, cond, sts) \
++do { \
++ static unsigned int __read_mostly threshold = 1; \
++ s_time_t start = NOW(); \
++ s_time_t timeout = start + DMAR_OPERATION_TIMEOUT * threshold; \
++ \
++ for ( ; ; ) \
++ { \
++ sts = op(iommu->reg, offset); \
++ if ( cond ) \
++ break; \
++ if ( timeout && NOW() > timeout ) \
++ { \
++ threshold |= threshold << 1; \
++ printk(XENLOG_WARNING VTDPREFIX \
++ " IOMMU#%u: %s flush taking too long\n", \
++ iommu->index, what); \
++ timeout = 0; \
++ } \
++ cpu_relax(); \
++ } \
++ \
++ if ( !timeout ) \
++ printk(XENLOG_WARNING VTDPREFIX \
++ " IOMMU#%u: %s flush took %lums\n", \
++ iommu->index, what, (NOW() - start) / 10000000); \
++} while ( false )
++
+ int vtd_hw_check(void);
+ void disable_pmr(struct vtd_iommu *iommu);
+ int is_igd_drhd(struct acpi_drhd_unit *drhd);
+--- a/xen/drivers/passthrough/vtd/iommu.c
++++ b/xen/drivers/passthrough/vtd/iommu.c
+@@ -373,8 +373,8 @@ static void iommu_flush_write_buffer(str
+ dmar_writel(iommu->reg, DMAR_GCMD_REG, val | DMA_GCMD_WBF);
+
+ /* Make sure hardware complete it */
+- IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
+- !(val & DMA_GSTS_WBFS), val);
++ IOMMU_FLUSH_WAIT("write buffer", iommu, DMAR_GSTS_REG, dmar_readl,
++ !(val & DMA_GSTS_WBFS), val);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+ }
+@@ -423,8 +423,8 @@ int vtd_flush_context_reg(struct vtd_iom
+ dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
+
+ /* Make sure hardware complete it */
+- IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, dmar_readq,
+- !(val & DMA_CCMD_ICC), val);
++ IOMMU_FLUSH_WAIT("context", iommu, DMAR_CCMD_REG, dmar_readq,
++ !(val & DMA_CCMD_ICC), val);
+
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+ /* flush context entry will implicitly flush write buffer */
+@@ -501,8 +501,8 @@ int vtd_flush_iotlb_reg(struct vtd_iommu
+ dmar_writeq(iommu->reg, tlb_offset + 8, val);
+
+ /* Make sure hardware complete it */
+- IOMMU_WAIT_OP(iommu, (tlb_offset + 8), dmar_readq,
+- !(val & DMA_TLB_IVT), val);
++ IOMMU_FLUSH_WAIT("iotlb", iommu, (tlb_offset + 8), dmar_readq,
++ !(val & DMA_TLB_IVT), val);
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+ /* check IOTLB invalidation granularity */
+--- a/xen/drivers/passthrough/vtd/qinval.c
++++ b/xen/drivers/passthrough/vtd/qinval.c
+@@ -29,8 +29,6 @@
+ #include "extern.h"
+ #include "../ats.h"
+
+-#define VTD_QI_TIMEOUT 1
+-
+ static unsigned int __read_mostly qi_pg_order;
+ static unsigned int __read_mostly qi_entry_nr;
+
+@@ -60,7 +58,11 @@ static unsigned int qinval_next_index(st
+ /* (tail+1 == head) indicates a full queue, wait for HW */
+ while ( ((tail + 1) & (qi_entry_nr - 1)) ==
+ ( dmar_readq(iommu->reg, DMAR_IQH_REG) >> QINVAL_INDEX_SHIFT ) )
++ {
++ printk_once(XENLOG_ERR VTDPREFIX " IOMMU#%u: no QI slot available\n",
++ iommu->index);
+ cpu_relax();
++ }
+
+ return tail;
+ }
+@@ -180,23 +182,32 @@ static int __must_check queue_invalidate
+ /* Now we don't support interrupt method */
+ if ( sw )
+ {
+- s_time_t timeout;
+-
+- /* In case all wait descriptor writes to same addr with same data */
+- timeout = NOW() + MILLISECS(flush_dev_iotlb ?
+- iommu_dev_iotlb_timeout : VTD_QI_TIMEOUT);
++ static unsigned int __read_mostly threshold = 1;
++ s_time_t start = NOW();
++ s_time_t timeout = start + (flush_dev_iotlb
++ ? iommu_dev_iotlb_timeout
++ : 100) * MILLISECS(threshold);
+
+ while ( ACCESS_ONCE(*this_poll_slot) != QINVAL_STAT_DONE )
+ {
+- if ( NOW() > timeout )
++ if ( timeout && NOW() > timeout )
+ {
+- print_qi_regs(iommu);
++ threshold |= threshold << 1;
+ printk(XENLOG_WARNING VTDPREFIX
+- " Queue invalidate wait descriptor timed out\n");
+- return -ETIMEDOUT;
++ " IOMMU#%u: QI%s wait descriptor taking too long\n",
++ iommu->index, flush_dev_iotlb ? " dev" : "");
++ print_qi_regs(iommu);
++ timeout = 0;
+ }
+ cpu_relax();
+ }
++
++ if ( !timeout )
++ printk(XENLOG_WARNING VTDPREFIX
++ " IOMMU#%u: QI%s wait descriptor took %lums\n",
++ iommu->index, flush_dev_iotlb ? " dev" : "",
++ (NOW() - start) / 10000000);
++
+ return 0;
+ }
+