aboutsummaryrefslogtreecommitdiff
path: root/system/xen/xsa/xsa373-4.15-3.patch
blob: c7ddf5d6441ce4244eaf49a2d11599fc3f1d0230 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
From: Jan Beulich <jbeulich@suse.com>
Subject: VT-d: eliminate flush related timeouts

Leaving an in-progress operation pending when it appears to take too
long is problematic: If e.g. a QI command completed later, the write to
the "poll slot" may instead be understood to signal a subsequently
started command's completion. Also our accounting of the timeout period
was actually wrong: We included the time it took for the command to
actually make it to the front of the queue, which could be heavily
affected by guests other than the one for which the flush is being
performed.

Do away with all timeout detection on all flush related code paths.
Log excessively long processing times (with a progressive threshold) to
have some indication of problems in this area.

Additionally log (once) if qinval_next_index() didn't immediately find
an available slot. Together with the earlier change sizing the queue(s)
dynamically, we should now have a guarantee that with our fully
synchronous model any demand for slots can actually be satisfied.

This is part of XSA-373 / CVE-2021-28692.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul@xen.org>

--- a/xen/drivers/passthrough/vtd/dmar.h
+++ b/xen/drivers/passthrough/vtd/dmar.h
@@ -127,6 +127,34 @@ do {
     }                                                           \
 } while (0)
 
+#define IOMMU_FLUSH_WAIT(what, iommu, offset, op, cond, sts)       \
+do {                                                               \
+    static unsigned int __read_mostly threshold = 1;               \
+    s_time_t start = NOW();                                        \
+    s_time_t timeout = start + DMAR_OPERATION_TIMEOUT * threshold; \
+                                                                   \
+    for ( ; ; )                                                    \
+    {                                                              \
+        sts = op(iommu->reg, offset);                              \
+        if ( cond )                                                \
+            break;                                                 \
+        if ( timeout && NOW() > timeout )                          \
+        {                                                          \
+            threshold |= threshold << 1;                           \
+            printk(XENLOG_WARNING VTDPREFIX                        \
+                   " IOMMU#%u: %s flush taking too long\n",        \
+                   iommu->index, what);                            \
+            timeout = 0;                                           \
+        }                                                          \
+        cpu_relax();                                               \
+    }                                                              \
+                                                                   \
+    if ( !timeout )                                                \
+        printk(XENLOG_WARNING VTDPREFIX                            \
+               " IOMMU#%u: %s flush took %lums\n",                 \
+               iommu->index, what, (NOW() - start) / 10000000);    \
+} while ( false )
+
 int vtd_hw_check(void);
 void disable_pmr(struct vtd_iommu *iommu);
 int is_igd_drhd(struct acpi_drhd_unit *drhd);
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -373,8 +373,8 @@ static void iommu_flush_write_buffer(str
     dmar_writel(iommu->reg, DMAR_GCMD_REG, val | DMA_GCMD_WBF);
 
     /* Make sure hardware complete it */
-    IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
-                  !(val & DMA_GSTS_WBFS), val);
+    IOMMU_FLUSH_WAIT("write buffer", iommu, DMAR_GSTS_REG, dmar_readl,
+                     !(val & DMA_GSTS_WBFS), val);
 
     spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
@@ -423,8 +423,8 @@ int vtd_flush_context_reg(struct vtd_iom
     dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
 
     /* Make sure hardware complete it */
-    IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, dmar_readq,
-                  !(val & DMA_CCMD_ICC), val);
+    IOMMU_FLUSH_WAIT("context", iommu, DMAR_CCMD_REG, dmar_readq,
+                     !(val & DMA_CCMD_ICC), val);
 
     spin_unlock_irqrestore(&iommu->register_lock, flags);
     /* flush context entry will implicitly flush write buffer */
@@ -501,8 +501,8 @@ int vtd_flush_iotlb_reg(struct vtd_iommu
     dmar_writeq(iommu->reg, tlb_offset + 8, val);
 
     /* Make sure hardware complete it */
-    IOMMU_WAIT_OP(iommu, (tlb_offset + 8), dmar_readq,
-                  !(val & DMA_TLB_IVT), val);
+    IOMMU_FLUSH_WAIT("iotlb", iommu, (tlb_offset + 8), dmar_readq,
+                     !(val & DMA_TLB_IVT), val);
     spin_unlock_irqrestore(&iommu->register_lock, flags);
 
     /* check IOTLB invalidation granularity */
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -29,8 +29,6 @@
 #include "extern.h"
 #include "../ats.h"
 
-#define VTD_QI_TIMEOUT	1
-
 static unsigned int __read_mostly qi_pg_order;
 static unsigned int __read_mostly qi_entry_nr;
 
@@ -60,7 +58,11 @@ static unsigned int qinval_next_index(st
     /* (tail+1 == head) indicates a full queue, wait for HW */
     while ( ((tail + 1) & (qi_entry_nr - 1)) ==
             ( dmar_readq(iommu->reg, DMAR_IQH_REG) >> QINVAL_INDEX_SHIFT ) )
+    {
+        printk_once(XENLOG_ERR VTDPREFIX " IOMMU#%u: no QI slot available\n",
+                    iommu->index);
         cpu_relax();
+    }
 
     return tail;
 }
@@ -180,23 +182,32 @@ static int __must_check queue_invalidate
     /* Now we don't support interrupt method */
     if ( sw )
     {
-        s_time_t timeout;
-
-        /* In case all wait descriptor writes to same addr with same data */
-        timeout = NOW() + MILLISECS(flush_dev_iotlb ?
-                                    iommu_dev_iotlb_timeout : VTD_QI_TIMEOUT);
+        static unsigned int __read_mostly threshold = 1;
+        s_time_t start = NOW();
+        s_time_t timeout = start + (flush_dev_iotlb
+                                    ? iommu_dev_iotlb_timeout
+                                    : 100) * MILLISECS(threshold);
 
         while ( ACCESS_ONCE(*this_poll_slot) != QINVAL_STAT_DONE )
         {
-            if ( NOW() > timeout )
+            if ( timeout && NOW() > timeout )
             {
-                print_qi_regs(iommu);
+                threshold |= threshold << 1;
                 printk(XENLOG_WARNING VTDPREFIX
-                       " Queue invalidate wait descriptor timed out\n");
-                return -ETIMEDOUT;
+                       " IOMMU#%u: QI%s wait descriptor taking too long\n",
+                       iommu->index, flush_dev_iotlb ? " dev" : "");
+                print_qi_regs(iommu);
+                timeout = 0;
             }
             cpu_relax();
         }
+
+        if ( !timeout )
+            printk(XENLOG_WARNING VTDPREFIX
+                   " IOMMU#%u: QI%s wait descriptor took %lums\n",
+                   iommu->index, flush_dev_iotlb ? " dev" : "",
+                   (NOW() - start) / 10000000);
+
         return 0;
     }