diff options
Diffstat (limited to 'system/xen/xsa')
25 files changed, 0 insertions, 2324 deletions
diff --git a/system/xen/xsa/xsa226.patch b/system/xen/xsa/xsa226.patch deleted file mode 100644 index 48fae1217221e..0000000000000 --- a/system/xen/xsa/xsa226.patch +++ /dev/null @@ -1,133 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: grant_table: Default to v1, and disallow transitive grants - -The reference counting and locking discipline for transitive grants is broken. -Their use is therefore declared out of security support. - -This is XSA-226. - -Transitive grants are expected to be unconditionally available with grant -table v2. Hiding transitive grants alone is an ABI breakage for the guest. -Modern versions of Linux and the Windows PV drivers use grant table v1, but -older versions did use v2. - -In principle, disabling gnttab v2 entirely is the safer way to cause guests to -avoid using transitive grants. However, some older guests which defaulted to -using gnttab v2 don't tolerate falling back from v2 to v1 over migrate. - -This patch introduces a new command line option to control grant table -behaviour. One suboption allows a choice of the maximum grant table version -Xen will allow the guest to use, and defaults to v2. A different suboption -independently controls whether transitive grants can be used. - -The default case is: - - gnttab=max_ver:2 - -To disable gnttab v2 entirely, use: - - gnttab=max_ver:1 - -To allow gnttab v2 and transitive grants, use: - - gnttab=max_ver:2,transitive - -Reported-by: Jan Beulich <jbeulich@suse.com> -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown -index 4002eab..af079b4 100644 ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -868,6 +868,22 @@ Controls EPT related features. - - Specify which console gdbstub should use. See **console**. - -+### gnttab -+> `= List of [ max_ver:<integer>, transitive ]` -+ -+> Default: `gnttab=max_ver:2,no-transitive` -+ -+Control various aspects of the grant table behaviour available to guests. -+ -+* `max_ver` Select the maximum grant table version to offer to guests. Valid -+version are 1 and 2. -+* `transitive` Permit or disallow the use of transitive grants. Note that the -+use of grant table v2 without transitive grants is an ABI breakage from the -+guests point of view. -+ -+*Warning:* -+Due to XSA-226, the use of transitive grants is outside of security support. -+ - ### gnttab\_max\_frames - > `= <integer>` - -diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index ae34547..87131f8 100644 ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -50,6 +50,42 @@ integer_param("gnttab_max_nr_frames", max_nr_grant_frames); - unsigned int __read_mostly max_grant_frames; - integer_param("gnttab_max_frames", max_grant_frames); - -+static unsigned int __read_mostly opt_gnttab_max_version = 2; -+static bool __read_mostly opt_transitive_grants; -+ -+static void __init parse_gnttab(char *s) -+{ -+ char *ss; -+ -+ do { -+ ss = strchr(s, ','); -+ if ( ss ) -+ *ss = '\0'; -+ -+ if ( !strncmp(s, "max_ver:", 8) ) -+ { -+ long ver = simple_strtol(s + 8, NULL, 10); -+ -+ if ( ver >= 1 && ver <= 2 ) -+ opt_gnttab_max_version = ver; -+ } -+ else -+ { -+ bool val = !!strncmp(s, "no-", 3); -+ -+ if ( !val ) -+ s += 3; -+ -+ if ( !strcmp(s, "transitive") ) -+ opt_transitive_grants = val; -+ } -+ -+ s = ss + 1; -+ } while ( ss ); -+} -+ -+custom_param("gnttab", parse_gnttab); -+ - /* The maximum number of grant mappings is defined as a multiplier of the - * maximum number of grant table entries. This defines the multiplier used. - * Pretty arbitrary. [POLICY] -@@ -2191,6 +2227,10 @@ __acquire_grant_for_copy( - } - else if ( (shah->flags & GTF_type_mask) == GTF_transitive ) - { -+ if ( !opt_transitive_grants ) -+ PIN_FAIL(unlock_out_clear, GNTST_general_error, -+ "transitive grant disallowed by policy\n"); -+ - if ( !allow_transitive ) - PIN_FAIL(unlock_out_clear, GNTST_general_error, - "transitive grant when transitivity not allowed\n"); -@@ -3159,7 +3199,10 @@ do_grant_table_op( - } - case GNTTABOP_set_version: - { -- rc = gnttab_set_version(guest_handle_cast(uop, gnttab_set_version_t)); -+ if ( opt_gnttab_max_version == 1 ) -+ rc = -ENOSYS; /* Behave as before set_version was introduced. */ -+ else -+ rc = gnttab_set_version(guest_handle_cast(uop, gnttab_set_version_t)); - break; - } - case GNTTABOP_get_status_frames: diff --git a/system/xen/xsa/xsa227.patch b/system/xen/xsa/xsa227.patch deleted file mode 100644 index 86aa41e2d4868..0000000000000 --- a/system/xen/xsa/xsa227.patch +++ /dev/null @@ -1,52 +0,0 @@ -From fa7268b94f8a0a7792ee12d5b8e23a60e52a3a84 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 20 Jun 2017 19:18:54 +0100 -Subject: [PATCH] x86/grant: Disallow misaligned PTEs - -Pagetable entries must be aligned to function correctly. Disallow attempts -from the guest to have a grant PTE created at a misaligned address, which -would result in corruption of the L1 table with largely-guest-controlled -values. - -This is XSA-227 - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> ---- - xen/arch/x86/mm.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 97b3b4b..00f517a 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -3763,6 +3763,9 @@ static int create_grant_pte_mapping( - l1_pgentry_t ol1e; - struct domain *d = v->domain; - -+ if ( !IS_ALIGNED(pte_addr, sizeof(nl1e)) ) -+ return GNTST_general_error; -+ - adjust_guest_l1e(nl1e, d); - - gmfn = pte_addr >> PAGE_SHIFT; -@@ -3819,6 +3822,16 @@ static int destroy_grant_pte_mapping( - struct page_info *page; - l1_pgentry_t ol1e; - -+ /* -+ * addr comes from Xen's active_entry tracking so isn't guest controlled, -+ * but it had still better be PTE-aligned. -+ */ -+ if ( !IS_ALIGNED(addr, sizeof(ol1e)) ) -+ { -+ ASSERT_UNREACHABLE(); -+ return GNTST_general_error; -+ } -+ - gmfn = addr >> PAGE_SHIFT; - page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC); - --- -2.1.4 - diff --git a/system/xen/xsa/xsa228.patch b/system/xen/xsa/xsa228.patch deleted file mode 100644 index 65add3a588ff7..0000000000000 --- a/system/xen/xsa/xsa228.patch +++ /dev/null @@ -1,198 +0,0 @@ -From 9a52c78eb4ff7836bf7ac9ecd918b289cead1f3f Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Mon, 31 Jul 2017 15:17:56 +0100 -Subject: [PATCH] gnttab: split maptrack lock to make it fulfill its purpose - again - -The way the lock is currently being used in get_maptrack_handle(), it -protects only the maptrack limit: The function acts on current's list -only, so races on list accesses are impossible even without the lock. - -Otoh list access races are possible between __get_maptrack_handle() and -put_maptrack_handle(), due to the invocation of the former for other -than current from steal_maptrack_handle(). Introduce a per-vCPU lock -for list accesses to become race free again. This lock will be -uncontended except when it becomes necessary to take the steal path, -i.e. in the common case there should be no meaningful performance -impact. - -When in get_maptrack_handle adds a stolen entry to a fresh, empty, -freelist, we think that there is probably no concurrency. However, -this is not a fast path and adding the locking there makes the code -clearly correct. - -Also, while we are here: the stolen maptrack_entry's tail pointer was -not properly set. Set it. - -This is XSA-228. - -Reported-by: Ian Jackson <ian.jackson@eu.citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com> ---- - docs/misc/grant-tables.txt | 7 ++++++- - xen/common/grant_table.c | 30 ++++++++++++++++++++++++------ - xen/include/xen/grant_table.h | 2 +- - xen/include/xen/sched.h | 1 + - 4 files changed, 32 insertions(+), 8 deletions(-) - -diff --git a/docs/misc/grant-tables.txt b/docs/misc/grant-tables.txt -index 417ce2d..64da5cf 100644 ---- a/docs/misc/grant-tables.txt -+++ b/docs/misc/grant-tables.txt -@@ -87,7 +87,8 @@ is complete. - inconsistent grant table state such as current - version, partially initialized active table pages, - etc. -- grant_table->maptrack_lock : spinlock used to protect the maptrack free list -+ grant_table->maptrack_lock : spinlock used to protect the maptrack limit -+ v->maptrack_freelist_lock : spinlock used to protect the maptrack free list - active_grant_entry->lock : spinlock used to serialize modifications to - active entries - -@@ -102,6 +103,10 @@ is complete. - The maptrack free list is protected by its own spinlock. The maptrack - lock may be locked while holding the grant table lock. - -+ The maptrack_freelist_lock is an innermost lock. It may be locked -+ while holding other locks, but no other locks may be acquired within -+ it. -+ - Active entries are obtained by calling active_entry_acquire(gt, ref). - This function returns a pointer to the active entry after locking its - spinlock. The caller must hold the grant table read lock before -diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index ae34547..ee33bd8 100644 ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -304,11 +304,16 @@ __get_maptrack_handle( - { - unsigned int head, next, prev_head; - -+ spin_lock(&v->maptrack_freelist_lock); -+ - do { - /* No maptrack pages allocated for this VCPU yet? */ - head = read_atomic(&v->maptrack_head); - if ( unlikely(head == MAPTRACK_TAIL) ) -+ { -+ spin_unlock(&v->maptrack_freelist_lock); - return -1; -+ } - - /* - * Always keep one entry in the free list to make it easier to -@@ -316,12 +321,17 @@ __get_maptrack_handle( - */ - next = read_atomic(&maptrack_entry(t, head).ref); - if ( unlikely(next == MAPTRACK_TAIL) ) -+ { -+ spin_unlock(&v->maptrack_freelist_lock); - return -1; -+ } - - prev_head = head; - head = cmpxchg(&v->maptrack_head, prev_head, next); - } while ( head != prev_head ); - -+ spin_unlock(&v->maptrack_freelist_lock); -+ - return head; - } - -@@ -380,6 +390,8 @@ put_maptrack_handle( - /* 2. Add entry to the tail of the list on the original VCPU. */ - v = currd->vcpu[maptrack_entry(t, handle).vcpu]; - -+ spin_lock(&v->maptrack_freelist_lock); -+ - cur_tail = read_atomic(&v->maptrack_tail); - do { - prev_tail = cur_tail; -@@ -388,6 +400,8 @@ put_maptrack_handle( - - /* 3. Update the old tail entry to point to the new entry. */ - write_atomic(&maptrack_entry(t, prev_tail).ref, handle); -+ -+ spin_unlock(&v->maptrack_freelist_lock); - } - - static inline int -@@ -411,10 +425,6 @@ get_maptrack_handle( - */ - if ( nr_maptrack_frames(lgt) >= max_maptrack_frames ) - { -- /* -- * Can drop the lock since no other VCPU can be adding a new -- * frame once they've run out. -- */ - spin_unlock(&lgt->maptrack_lock); - - /* -@@ -426,8 +436,12 @@ get_maptrack_handle( - handle = steal_maptrack_handle(lgt, curr); - if ( handle == -1 ) - return -1; -+ spin_lock(&curr->maptrack_freelist_lock); -+ maptrack_entry(lgt, handle).ref = MAPTRACK_TAIL; - curr->maptrack_tail = handle; -- write_atomic(&curr->maptrack_head, handle); -+ if ( curr->maptrack_head == MAPTRACK_TAIL ) -+ write_atomic(&curr->maptrack_head, handle); -+ spin_unlock(&curr->maptrack_freelist_lock); - } - return steal_maptrack_handle(lgt, curr); - } -@@ -460,12 +474,15 @@ get_maptrack_handle( - smp_wmb(); - lgt->maptrack_limit += MAPTRACK_PER_PAGE; - -+ spin_unlock(&lgt->maptrack_lock); -+ spin_lock(&curr->maptrack_freelist_lock); -+ - do { - new_mt[i - 1].ref = read_atomic(&curr->maptrack_head); - head = cmpxchg(&curr->maptrack_head, new_mt[i - 1].ref, handle + 1); - } while ( head != new_mt[i - 1].ref ); - -- spin_unlock(&lgt->maptrack_lock); -+ spin_unlock(&curr->maptrack_freelist_lock); - - return handle; - } -@@ -3475,6 +3492,7 @@ grant_table_destroy( - - void grant_table_init_vcpu(struct vcpu *v) - { -+ spin_lock_init(&v->maptrack_freelist_lock); - v->maptrack_head = MAPTRACK_TAIL; - v->maptrack_tail = MAPTRACK_TAIL; - } -diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h -index 4e77899..100f2b3 100644 ---- a/xen/include/xen/grant_table.h -+++ b/xen/include/xen/grant_table.h -@@ -78,7 +78,7 @@ struct grant_table { - /* Mapping tracking table per vcpu. */ - struct grant_mapping **maptrack; - unsigned int maptrack_limit; -- /* Lock protecting the maptrack page list, head, and limit */ -+ /* Lock protecting the maptrack limit */ - spinlock_t maptrack_lock; - /* The defined versions are 1 and 2. Set to 0 if we don't know - what version to use yet. */ -diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h -index 6673b27..8690f29 100644 ---- a/xen/include/xen/sched.h -+++ b/xen/include/xen/sched.h -@@ -230,6 +230,7 @@ struct vcpu - int controller_pause_count; - - /* Grant table map tracking. */ -+ spinlock_t maptrack_freelist_lock; - unsigned int maptrack_head; - unsigned int maptrack_tail; - --- -2.1.4 - diff --git a/system/xen/xsa/xsa230.patch b/system/xen/xsa/xsa230.patch deleted file mode 100644 index c3b50c8aaa98d..0000000000000 --- a/system/xen/xsa/xsa230.patch +++ /dev/null @@ -1,38 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: gnttab: correct pin status fixup for copy - -Regardless of copy operations only setting GNTPIN_hst*, GNTPIN_dev* -also need to be taken into account when deciding whether to clear -_GTF_{read,writ}ing. At least for consistency with code elsewhere the -read part better doesn't use any mask at all. - -This is XSA-230. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> -diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index ae34547..9c9d33c 100644 ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -2107,10 +2107,10 @@ __release_grant_for_copy( - static void __fixup_status_for_copy_pin(const struct active_grant_entry *act, - uint16_t *status) - { -- if ( !(act->pin & GNTPIN_hstw_mask) ) -+ if ( !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) ) - gnttab_clear_flag(_GTF_writing, status); - -- if ( !(act->pin & GNTPIN_hstr_mask) ) -+ if ( !act->pin ) - gnttab_clear_flag(_GTF_reading, status); - } - -@@ -2318,7 +2318,7 @@ __acquire_grant_for_copy( - - unlock_out_clear: - if ( !(readonly) && -- !(act->pin & GNTPIN_hstw_mask) ) -+ !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) ) - gnttab_clear_flag(_GTF_writing, status); - - if ( !act->pin ) diff --git a/system/xen/xsa/xsa231-4.9.patch b/system/xen/xsa/xsa231-4.9.patch deleted file mode 100644 index 251165e6bddb0..0000000000000 --- a/system/xen/xsa/xsa231-4.9.patch +++ /dev/null @@ -1,108 +0,0 @@ -From: George Dunlap <george.dunlap@citrix.com> -Subject: xen/mm: make sure node is less than MAX_NUMNODES - -The output of MEMF_get_node(memflags) can be as large as nodeid_t can -hold (currently 255). This is then used as an index to arrays of size -MAX_NUMNODE, which is 64 on x86 and 1 on ARM, can be passed in by an -untrusted guest (via memory_exchange and increase_reservation) and is -not currently bounds-checked. - -Check the value in page_alloc.c before using it, and also check the -value in the hypercall call sites and return -EINVAL if appropriate. -Don't permit domains other than the hardware or control domain to -allocate node-constrained memory. - -This is XSA-231. - -Reported-by: Matthew Daley <mattd@bugfuzz.com> -Signed-off-by: George Dunlap <george.dunlap@citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> - ---- a/xen/common/memory.c -+++ b/xen/common/memory.c -@@ -411,6 +411,31 @@ static void decrease_reservation(struct - a->nr_done = i; - } - -+static bool propagate_node(unsigned int xmf, unsigned int *memflags) -+{ -+ const struct domain *currd = current->domain; -+ -+ BUILD_BUG_ON(XENMEMF_get_node(0) != NUMA_NO_NODE); -+ BUILD_BUG_ON(MEMF_get_node(0) != NUMA_NO_NODE); -+ -+ if ( XENMEMF_get_node(xmf) == NUMA_NO_NODE ) -+ return true; -+ -+ if ( is_hardware_domain(currd) || is_control_domain(currd) ) -+ { -+ if ( XENMEMF_get_node(xmf) >= MAX_NUMNODES ) -+ return false; -+ -+ *memflags |= MEMF_node(XENMEMF_get_node(xmf)); -+ if ( xmf & XENMEMF_exact_node_request ) -+ *memflags |= MEMF_exact_node; -+ } -+ else if ( xmf & XENMEMF_exact_node_request ) -+ return false; -+ -+ return true; -+} -+ - static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg) - { - struct xen_memory_exchange exch; -@@ -483,6 +508,12 @@ static long memory_exchange(XEN_GUEST_HA - } - } - -+ if ( unlikely(!propagate_node(exch.out.mem_flags, &memflags)) ) -+ { -+ rc = -EINVAL; -+ goto fail_early; -+ } -+ - d = rcu_lock_domain_by_any_id(exch.in.domid); - if ( d == NULL ) - { -@@ -501,7 +532,6 @@ static long memory_exchange(XEN_GUEST_HA - d, - XENMEMF_get_address_bits(exch.out.mem_flags) ? : - (BITS_PER_LONG+PAGE_SHIFT))); -- memflags |= MEMF_node(XENMEMF_get_node(exch.out.mem_flags)); - - for ( i = (exch.nr_exchanged >> in_chunk_order); - i < (exch.in.nr_extents >> in_chunk_order); -@@ -864,12 +894,8 @@ static int construct_memop_from_reservat - } - read_unlock(&d->vnuma_rwlock); - } -- else -- { -- a->memflags |= MEMF_node(XENMEMF_get_node(r->mem_flags)); -- if ( r->mem_flags & XENMEMF_exact_node_request ) -- a->memflags |= MEMF_exact_node; -- } -+ else if ( unlikely(!propagate_node(r->mem_flags, &a->memflags)) ) -+ return -EINVAL; - - return 0; - } ---- a/xen/common/page_alloc.c -+++ b/xen/common/page_alloc.c -@@ -706,9 +706,13 @@ static struct page_info *alloc_heap_page - if ( node >= MAX_NUMNODES ) - node = cpu_to_node(smp_processor_id()); - } -+ else if ( unlikely(node >= MAX_NUMNODES) ) -+ { -+ ASSERT_UNREACHABLE(); -+ return NULL; -+ } - first_node = node; - -- ASSERT(node < MAX_NUMNODES); - ASSERT(zone_lo <= zone_hi); - ASSERT(zone_hi < NR_ZONES); - diff --git a/system/xen/xsa/xsa232.patch b/system/xen/xsa/xsa232.patch deleted file mode 100644 index 9e5f35c7d6dcd..0000000000000 --- a/system/xen/xsa/xsa232.patch +++ /dev/null @@ -1,23 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: grant_table: fix GNTTABOP_cache_flush handling - -Don't fall over a NULL grant_table pointer when the owner of the domain -is a system domain (DOMID_{XEN,IO} etc). - -This is XSA-232. - -Reported-by: Matthew Daley <mattd@bugfuzz.com> -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> - ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -3053,7 +3053,7 @@ static int cache_flush(gnttab_cache_flus - - page = mfn_to_page(mfn); - owner = page_get_owner_and_reference(page); -- if ( !owner ) -+ if ( !owner || !owner->grant_table ) - { - rcu_unlock_domain(d); - return -EPERM; diff --git a/system/xen/xsa/xsa233.patch b/system/xen/xsa/xsa233.patch deleted file mode 100644 index 6013c52b410be..0000000000000 --- a/system/xen/xsa/xsa233.patch +++ /dev/null @@ -1,52 +0,0 @@ -From: Juergen Gross <jgross@suse.com> -Subject: tools/xenstore: dont unlink connection object twice - -A connection object of a domain with associated stubdom has two -parents: the domain and the stubdom. When cleaning up the list of -active domains in domain_cleanup() make sure not to unlink the -connection twice from the same domain. This could happen when the -domain and its stubdom are being destroyed at the same time leading -to the domain loop being entered twice. - -Additionally don't use talloc_free() in this case as it will remove -a random parent link, leading eventually to a memory leak. Use -talloc_unlink() instead specifying the context from which the -connection object should be removed. - -This is XSA-233. - -Reported-by: Eric Chanudet <chanudete@ainfosec.com> -Signed-off-by: Juergen Gross <jgross@suse.com> -Reviewed-by: Ian Jackson <ian.jackson@eu.citrix.com> - ---- a/tools/xenstore/xenstored_domain.c -+++ b/tools/xenstore/xenstored_domain.c -@@ -221,10 +221,11 @@ static int destroy_domain(void *_domain) - static void domain_cleanup(void) - { - xc_dominfo_t dominfo; -- struct domain *domain, *tmp; -+ struct domain *domain; - int notify = 0; - -- list_for_each_entry_safe(domain, tmp, &domains, list) { -+ again: -+ list_for_each_entry(domain, &domains, list) { - if (xc_domain_getinfo(*xc_handle, domain->domid, 1, - &dominfo) == 1 && - dominfo.domid == domain->domid) { -@@ -236,8 +237,12 @@ static void domain_cleanup(void) - if (!dominfo.dying) - continue; - } -- talloc_free(domain->conn); -- notify = 0; /* destroy_domain() fires the watch */ -+ if (domain->conn) { -+ talloc_unlink(talloc_autofree_context(), domain->conn); -+ domain->conn = NULL; -+ notify = 0; /* destroy_domain() fires the watch */ -+ goto again; -+ } - } - - if (notify) diff --git a/system/xen/xsa/xsa234-4.9.patch b/system/xen/xsa/xsa234-4.9.patch deleted file mode 100644 index 8dbf40172053a..0000000000000 --- a/system/xen/xsa/xsa234-4.9.patch +++ /dev/null @@ -1,192 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: gnttab: also validate PTE permissions upon destroy/replace - -In order for PTE handling to match up with the reference counting done -by common code, presence and writability of grant mapping PTEs must -also be taken into account; validating just the frame number is not -enough. This is in particular relevant if a guest fiddles with grant -PTEs via non-grant hypercalls. - -Note that the flags being passed to replace_grant_host_mapping() -already happen to be those of the existing mapping, so no new function -parameter is needed. - -This is XSA-234. - -Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> - ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -4058,7 +4058,8 @@ static int create_grant_pte_mapping( - } - - static int destroy_grant_pte_mapping( -- uint64_t addr, unsigned long frame, struct domain *d) -+ uint64_t addr, unsigned long frame, unsigned int grant_pte_flags, -+ struct domain *d) - { - int rc = GNTST_okay; - void *va; -@@ -4104,17 +4105,29 @@ static int destroy_grant_pte_mapping( - - ol1e = *(l1_pgentry_t *)va; - -- /* Check that the virtual address supplied is actually mapped to frame. */ -- if ( unlikely(l1e_get_pfn(ol1e) != frame) ) -+ /* -+ * Check that the PTE supplied actually maps frame (with appropriate -+ * permissions). -+ */ -+ if ( unlikely(l1e_get_pfn(ol1e) != frame) || -+ unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & -+ (_PAGE_PRESENT | _PAGE_RW)) ) - { - page_unlock(page); -- gdprintk(XENLOG_WARNING, -- "PTE entry %"PRIpte" for address %"PRIx64" doesn't match frame %lx\n", -- l1e_get_intpte(ol1e), addr, frame); -+ gdprintk(XENLOG_ERR, -+ "PTE %"PRIpte" at %"PRIx64" doesn't match grant (%"PRIpte")\n", -+ l1e_get_intpte(ol1e), addr, -+ l1e_get_intpte(l1e_from_pfn(frame, grant_pte_flags))); - rc = GNTST_general_error; - goto failed; - } - -+ if ( unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & -+ ~(_PAGE_AVAIL | PAGE_CACHE_ATTRS)) ) -+ gdprintk(XENLOG_WARNING, -+ "PTE flags %x at %"PRIx64" don't match grant (%x)\n", -+ l1e_get_flags(ol1e), addr, grant_pte_flags); -+ - /* Delete pagetable entry. */ - if ( unlikely(!UPDATE_ENTRY - (l1, -@@ -4123,7 +4136,8 @@ static int destroy_grant_pte_mapping( - 0)) ) - { - page_unlock(page); -- gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %p\n", va); -+ gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %"PRIx64"\n", -+ addr); - rc = GNTST_general_error; - goto failed; - } -@@ -4191,7 +4205,8 @@ static int create_grant_va_mapping( - } - - static int replace_grant_va_mapping( -- unsigned long addr, unsigned long frame, l1_pgentry_t nl1e, struct vcpu *v) -+ unsigned long addr, unsigned long frame, unsigned int grant_pte_flags, -+ l1_pgentry_t nl1e, struct vcpu *v) - { - l1_pgentry_t *pl1e, ol1e; - unsigned long gl1mfn; -@@ -4227,20 +4242,33 @@ static int replace_grant_va_mapping( - - ol1e = *pl1e; - -- /* Check that the virtual address supplied is actually mapped to frame. */ -- if ( unlikely(l1e_get_pfn(ol1e) != frame) ) -- { -- gdprintk(XENLOG_WARNING, -- "PTE entry %lx for address %lx doesn't match frame %lx\n", -- l1e_get_pfn(ol1e), addr, frame); -+ /* -+ * Check that the virtual address supplied is actually mapped to frame -+ * (with appropriate permissions). -+ */ -+ if ( unlikely(l1e_get_pfn(ol1e) != frame) || -+ unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & -+ (_PAGE_PRESENT | _PAGE_RW)) ) -+ { -+ gdprintk(XENLOG_ERR, -+ "PTE %"PRIpte" for %lx doesn't match grant (%"PRIpte")\n", -+ l1e_get_intpte(ol1e), addr, -+ l1e_get_intpte(l1e_from_pfn(frame, grant_pte_flags))); - rc = GNTST_general_error; - goto unlock_and_out; - } - -+ if ( unlikely((l1e_get_flags(ol1e) ^ grant_pte_flags) & -+ ~(_PAGE_AVAIL | PAGE_CACHE_ATTRS)) ) -+ gdprintk(XENLOG_WARNING, -+ "PTE flags %x for %"PRIx64" don't match grant (%x)\n", -+ l1e_get_flags(ol1e), addr, grant_pte_flags); -+ - /* Delete pagetable entry. */ - if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) ) - { -- gdprintk(XENLOG_WARNING, "Cannot delete PTE entry at %p\n", pl1e); -+ gdprintk(XENLOG_WARNING, "Cannot delete PTE entry for %"PRIx64"\n", -+ addr); - rc = GNTST_general_error; - goto unlock_and_out; - } -@@ -4254,9 +4282,11 @@ static int replace_grant_va_mapping( - } - - static int destroy_grant_va_mapping( -- unsigned long addr, unsigned long frame, struct vcpu *v) -+ unsigned long addr, unsigned long frame, unsigned int grant_pte_flags, -+ struct vcpu *v) - { -- return replace_grant_va_mapping(addr, frame, l1e_empty(), v); -+ return replace_grant_va_mapping(addr, frame, grant_pte_flags, -+ l1e_empty(), v); - } - - static int create_grant_p2m_mapping(uint64_t addr, unsigned long frame, -@@ -4351,20 +4381,39 @@ int replace_grant_host_mapping( - unsigned long gl1mfn; - struct page_info *l1pg; - int rc; -+ unsigned int grant_pte_flags; - - if ( paging_mode_external(current->domain) ) - return replace_grant_p2m_mapping(addr, frame, new_addr, flags); - -+ grant_pte_flags = -+ _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_GNTTAB | _PAGE_NX; -+ -+ if ( flags & GNTMAP_application_map ) -+ grant_pte_flags |= _PAGE_USER; -+ if ( !(flags & GNTMAP_readonly) ) -+ grant_pte_flags |= _PAGE_RW; -+ /* -+ * On top of the explicit settings done by create_grant_host_mapping() -+ * also open-code relevant parts of adjust_guest_l1e(). Don't mirror -+ * available and cachability flags, though. -+ */ -+ if ( !is_pv_32bit_domain(curr->domain) ) -+ grant_pte_flags |= (grant_pte_flags & _PAGE_USER) -+ ? _PAGE_GLOBAL -+ : _PAGE_GUEST_KERNEL | _PAGE_USER; -+ - if ( flags & GNTMAP_contains_pte ) - { - if ( !new_addr ) -- return destroy_grant_pte_mapping(addr, frame, curr->domain); -+ return destroy_grant_pte_mapping(addr, frame, grant_pte_flags, -+ curr->domain); - - return GNTST_general_error; - } - - if ( !new_addr ) -- return destroy_grant_va_mapping(addr, frame, curr); -+ return destroy_grant_va_mapping(addr, frame, grant_pte_flags, curr); - - pl1e = guest_map_l1e(new_addr, &gl1mfn); - if ( !pl1e ) -@@ -4412,7 +4461,7 @@ int replace_grant_host_mapping( - put_page(l1pg); - guest_unmap_l1e(pl1e); - -- rc = replace_grant_va_mapping(addr, frame, ol1e, curr); -+ rc = replace_grant_va_mapping(addr, frame, grant_pte_flags, ol1e, curr); - if ( rc && !paging_mode_refcounts(curr->domain) ) - put_page_from_l1e(ol1e, curr->domain); - diff --git a/system/xen/xsa/xsa235-4.9.patch b/system/xen/xsa/xsa235-4.9.patch deleted file mode 100644 index 25dd650755f99..0000000000000 --- a/system/xen/xsa/xsa235-4.9.patch +++ /dev/null @@ -1,49 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: arm/mm: release grant lock on xenmem_add_to_physmap_one() error paths - -Commit 55021ff9ab ("xen/arm: add_to_physmap_one: Avoid to map mfn 0 if -an error occurs") introduced error paths not releasing the grant table -lock. Replace them by a suitable check after the lock was dropped. - -This is XSA-235. - -Reported-by: Wei Liu <wei.liu2@citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Julien Grall <julien.grall@arm.com> - ---- a/xen/arch/arm/mm.c -+++ b/xen/arch/arm/mm.c -@@ -1164,7 +1164,7 @@ int xenmem_add_to_physmap_one( - if ( idx < nr_status_frames(d->grant_table) ) - mfn = virt_to_mfn(d->grant_table->status[idx]); - else -- return -EINVAL; -+ mfn = mfn_x(INVALID_MFN); - } - else - { -@@ -1175,14 +1175,21 @@ int xenmem_add_to_physmap_one( - if ( idx < nr_grant_frames(d->grant_table) ) - mfn = virt_to_mfn(d->grant_table->shared_raw[idx]); - else -- return -EINVAL; -+ mfn = mfn_x(INVALID_MFN); - } - -- d->arch.grant_table_gfn[idx] = gfn; -+ if ( mfn != mfn_x(INVALID_MFN) ) -+ { -+ d->arch.grant_table_gfn[idx] = gfn; - -- t = p2m_ram_rw; -+ t = p2m_ram_rw; -+ } - - grant_write_unlock(d->grant_table); -+ -+ if ( mfn == mfn_x(INVALID_MFN) ) -+ return -EINVAL; -+ - break; - case XENMAPSPACE_shared_info: - if ( idx != 0 ) diff --git a/system/xen/xsa/xsa236-4.9.patch b/system/xen/xsa/xsa236-4.9.patch deleted file mode 100644 index 203025dbae68c..0000000000000 --- a/system/xen/xsa/xsa236-4.9.patch +++ /dev/null @@ -1,66 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: gnttab: fix pin count / page reference race - -Dropping page references before decrementing pin counts is a bad idea -if assumptions are being made that a non-zero pin count implies a valid -page. Fix the order of operations in gnttab_copy_release_buf(), but at -the same time also remove the assertion that was found to trigger: -map_grant_ref() also has the potential of causing a race here, and -changing the order of operations there would likely be quite a bit more -involved. - -This is XSA-236. - -Reported-by: Pawel Wieczorkiewicz <wipawel@amazon.de> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> - ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -2330,9 +2330,20 @@ __acquire_grant_for_copy( - td = page_get_owner_and_reference(*page); - /* - * act->pin being non-zero should guarantee the page to have a -- * non-zero refcount and hence a valid owner. -+ * non-zero refcount and hence a valid owner (matching the one on -+ * record), with one exception: If the owning domain is dying we -+ * had better not make implications from pin count (map_grant_ref() -+ * updates pin counts before obtaining page references, for -+ * example). - */ -- ASSERT(td); -+ if ( td != rd || rd->is_dying ) -+ { -+ if ( td ) -+ put_page(*page); -+ *page = NULL; -+ rc = GNTST_bad_domain; -+ goto unlock_out_clear; -+ } - } - - act->pin += readonly ? GNTPIN_hstr_inc : GNTPIN_hstw_inc; -@@ -2451,6 +2462,11 @@ static void gnttab_copy_release_buf(stru - unmap_domain_page(buf->virt); - buf->virt = NULL; - } -+ if ( buf->have_grant ) -+ { -+ __release_grant_for_copy(buf->domain, buf->ptr.u.ref, buf->read_only); -+ buf->have_grant = 0; -+ } - if ( buf->have_type ) - { - put_page_type(buf->page); -@@ -2461,11 +2477,6 @@ static void gnttab_copy_release_buf(stru - put_page(buf->page); - buf->page = NULL; - } -- if ( buf->have_grant ) -- { -- __release_grant_for_copy(buf->domain, buf->ptr.u.ref, buf->read_only); -- buf->have_grant = 0; -- } - } - - static int gnttab_copy_claim_buf(const struct gnttab_copy *op, diff --git a/system/xen/xsa/xsa237-4.9-0001-x86-dont-allow-MSI-pIRQ-mapping-on-unowned-device.patch b/system/xen/xsa/xsa237-4.9-0001-x86-dont-allow-MSI-pIRQ-mapping-on-unowned-device.patch deleted file mode 100644 index 7c9dff9672414..0000000000000 --- a/system/xen/xsa/xsa237-4.9-0001-x86-dont-allow-MSI-pIRQ-mapping-on-unowned-device.patch +++ /dev/null @@ -1,27 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: x86: don't allow MSI pIRQ mapping on unowned device - -MSI setup should be permitted only for existing devices owned by the -respective guest (the operation may still be carried out by the domain -controlling that guest). - -This is part of XSA-237. - -Reported-by: HW42 <hw42@ipsumj.de> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> - ---- a/xen/arch/x86/irq.c -+++ b/xen/arch/x86/irq.c -@@ -1963,7 +1963,10 @@ int map_domain_pirq( - if ( !cpu_has_apic ) - goto done; - -- pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn); -+ pdev = pci_get_pdev_by_domain(d, msi->seg, msi->bus, msi->devfn); -+ if ( !pdev ) -+ goto done; -+ - ret = pci_enable_msi(msi, &msi_desc); - if ( ret ) - { diff --git a/system/xen/xsa/xsa237-4.9-0002-x86-enforce-proper-privilege-when-mapping-pIRQ-s.patch b/system/xen/xsa/xsa237-4.9-0002-x86-enforce-proper-privilege-when-mapping-pIRQ-s.patch deleted file mode 100644 index 0add704587129..0000000000000 --- a/system/xen/xsa/xsa237-4.9-0002-x86-enforce-proper-privilege-when-mapping-pIRQ-s.patch +++ /dev/null @@ -1,66 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: x86: enforce proper privilege when (un)mapping pIRQ-s - -(Un)mapping of IRQs, just like other RESOURCE__ADD* / RESOURCE__REMOVE* -actions (in FLASK terms) should be XSM_DM_PRIV rather than XSM_TARGET. -This in turn requires bypassing the XSM check in physdev_unmap_pirq() -for the HVM emuirq case just like is being done in physdev_map_pirq(). -The primary goal security wise, however, is to no longer allow HVM -guests, by specifying their own domain ID instead of DOMID_SELF, to -enter code paths intended for PV guest and the control domains of HVM -guests only. - -This is part of XSA-237. - -Reported-by: HW42 <hw42@ipsumj.de> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: George Dunlap <george.dunlap@citrix.com> - ---- a/xen/arch/x86/physdev.c -+++ b/xen/arch/x86/physdev.c -@@ -111,7 +111,7 @@ int physdev_map_pirq(domid_t domid, int - if ( d == NULL ) - return -ESRCH; - -- ret = xsm_map_domain_pirq(XSM_TARGET, d); -+ ret = xsm_map_domain_pirq(XSM_DM_PRIV, d); - if ( ret ) - goto free_domain; - -@@ -256,13 +256,14 @@ int physdev_map_pirq(domid_t domid, int - int physdev_unmap_pirq(domid_t domid, int pirq) - { - struct domain *d; -- int ret; -+ int ret = 0; - - d = rcu_lock_domain_by_any_id(domid); - if ( d == NULL ) - return -ESRCH; - -- ret = xsm_unmap_domain_pirq(XSM_TARGET, d); -+ if ( domid != DOMID_SELF || !is_hvm_domain(d) || !has_pirq(d) ) -+ ret = xsm_unmap_domain_pirq(XSM_DM_PRIV, d); - if ( ret ) - goto free_domain; - ---- a/xen/include/xsm/dummy.h -+++ b/xen/include/xsm/dummy.h -@@ -453,7 +453,7 @@ static XSM_INLINE char *xsm_show_irq_sid - - static XSM_INLINE int xsm_map_domain_pirq(XSM_DEFAULT_ARG struct domain *d) - { -- XSM_ASSERT_ACTION(XSM_TARGET); -+ XSM_ASSERT_ACTION(XSM_DM_PRIV); - return xsm_default_action(action, current->domain, d); - } - -@@ -465,7 +465,7 @@ static XSM_INLINE int xsm_map_domain_irq - - static XSM_INLINE int xsm_unmap_domain_pirq(XSM_DEFAULT_ARG struct domain *d) - { -- XSM_ASSERT_ACTION(XSM_TARGET); -+ XSM_ASSERT_ACTION(XSM_DM_PRIV); - return xsm_default_action(action, current->domain, d); - } - diff --git a/system/xen/xsa/xsa237-4.9-0003-x86-MSI-disallow-redundant-enabling.patch b/system/xen/xsa/xsa237-4.9-0003-x86-MSI-disallow-redundant-enabling.patch deleted file mode 100644 index 5c69c4826504e..0000000000000 --- a/system/xen/xsa/xsa237-4.9-0003-x86-MSI-disallow-redundant-enabling.patch +++ /dev/null @@ -1,55 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: x86/MSI: disallow redundant enabling - -At the moment, Xen attempts to allow redundant enabling of MSI by -having pci_enable_msi() return 0, and point to the existing MSI -descriptor, when the msi already exists. - -Unfortunately, if subsequent errors are encountered, the cleanup -paths assume pci_enable_msi() had done full initialization, and -hence undo everything that was assumed to be done by that -function without also undoing other setup that would normally -occur only after that function was called (in map_domain_pirq() -itself). - -Rather than try to make the redundant enabling case work properly, just -forbid it entirely by having pci_enable_msi() return -EEXIST when MSI -is already set up. - -This is part of XSA-237. - -Reported-by: HW42 <hw42@ipsumj.de> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: George Dunlap <george.dunlap@citrix.com> - ---- a/xen/arch/x86/msi.c -+++ b/xen/arch/x86/msi.c -@@ -1050,11 +1050,10 @@ static int __pci_enable_msi(struct msi_i - old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSI); - if ( old_desc ) - { -- printk(XENLOG_WARNING "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n", -+ printk(XENLOG_ERR "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n", - msi->irq, msi->seg, msi->bus, - PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); -- *desc = old_desc; -- return 0; -+ return -EEXIST; - } - - old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX); -@@ -1118,11 +1117,10 @@ static int __pci_enable_msix(struct msi_ - old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSIX); - if ( old_desc ) - { -- printk(XENLOG_WARNING "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n", -+ printk(XENLOG_ERR "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n", - msi->irq, msi->seg, msi->bus, - PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); -- *desc = old_desc; -- return 0; -+ return -EEXIST; - } - - old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI); diff --git a/system/xen/xsa/xsa237-4.9-0004-x86-IRQ-conditionally-preserve-irq-pirq-mapping-on-error.patch b/system/xen/xsa/xsa237-4.9-0004-x86-IRQ-conditionally-preserve-irq-pirq-mapping-on-error.patch deleted file mode 100644 index a16ec1bba1cd7..0000000000000 --- a/system/xen/xsa/xsa237-4.9-0004-x86-IRQ-conditionally-preserve-irq-pirq-mapping-on-error.patch +++ /dev/null @@ -1,124 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: x86/IRQ: conditionally preserve irq <-> pirq mapping on map error paths - -Mappings that had been set up before should not be torn down when -handling unrelated errors. - -This is part of XSA-237. - -Reported-by: HW42 <hw42@ipsumj.de> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: George Dunlap <george.dunlap@citrix.com> - ---- a/xen/arch/x86/irq.c -+++ b/xen/arch/x86/irq.c -@@ -1251,7 +1251,8 @@ static int prepare_domain_irq_pirq(struc - return -ENOMEM; - } - *pinfo = info; -- return 0; -+ -+ return !!err; - } - - static void set_domain_irq_pirq(struct domain *d, int irq, struct pirq *pirq) -@@ -1294,7 +1295,10 @@ int init_domain_irq_mapping(struct domai - continue; - err = prepare_domain_irq_pirq(d, i, i, &info); - if ( err ) -+ { -+ ASSERT(err < 0); - break; -+ } - set_domain_irq_pirq(d, i, info); - } - -@@ -1902,6 +1906,7 @@ int map_domain_pirq( - struct pirq *info; - struct irq_desc *desc; - unsigned long flags; -+ DECLARE_BITMAP(prepared, MAX_MSI_IRQS) = {}; - - ASSERT(spin_is_locked(&d->event_lock)); - -@@ -1945,8 +1950,10 @@ int map_domain_pirq( - } - - ret = prepare_domain_irq_pirq(d, irq, pirq, &info); -- if ( ret ) -+ if ( ret < 0 ) - goto revoke; -+ if ( !ret ) -+ __set_bit(0, prepared); - - desc = irq_to_desc(irq); - -@@ -2018,8 +2025,10 @@ int map_domain_pirq( - irq = create_irq(NUMA_NO_NODE); - ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info) - : irq; -- if ( ret ) -+ if ( ret < 0 ) - break; -+ if ( !ret ) -+ __set_bit(nr, prepared); - msi_desc[nr].irq = irq; - - if ( irq_permit_access(d, irq) != 0 ) -@@ -2052,15 +2061,15 @@ int map_domain_pirq( - desc->msi_desc = NULL; - spin_unlock_irqrestore(&desc->lock, flags); - } -- while ( nr-- ) -+ while ( nr ) - { - if ( irq >= 0 && irq_deny_access(d, irq) ) - printk(XENLOG_G_ERR - "dom%d: could not revoke access to IRQ%d (pirq %d)\n", - d->domain_id, irq, pirq); -- if ( info ) -+ if ( info && test_bit(nr, prepared) ) - cleanup_domain_irq_pirq(d, irq, info); -- info = pirq_info(d, pirq + nr); -+ info = pirq_info(d, pirq + --nr); - irq = info->arch.irq; - } - msi_desc->irq = -1; -@@ -2076,12 +2085,14 @@ int map_domain_pirq( - spin_lock_irqsave(&desc->lock, flags); - set_domain_irq_pirq(d, irq, info); - spin_unlock_irqrestore(&desc->lock, flags); -+ ret = 0; - } - - done: - if ( ret ) - { -- cleanup_domain_irq_pirq(d, irq, info); -+ if ( test_bit(0, prepared) ) -+ cleanup_domain_irq_pirq(d, irq, info); - revoke: - if ( irq_deny_access(d, irq) ) - printk(XENLOG_G_ERR ---- a/xen/arch/x86/physdev.c -+++ b/xen/arch/x86/physdev.c -@@ -186,7 +186,7 @@ int physdev_map_pirq(domid_t domid, int - } - else if ( type == MAP_PIRQ_TYPE_MULTI_MSI ) - { -- if ( msi->entry_nr <= 0 || msi->entry_nr > 32 ) -+ if ( msi->entry_nr <= 0 || msi->entry_nr > MAX_MSI_IRQS ) - ret = -EDOM; - else if ( msi->entry_nr != 1 && !iommu_intremap ) - ret = -EOPNOTSUPP; ---- a/xen/include/asm-x86/msi.h -+++ b/xen/include/asm-x86/msi.h -@@ -56,6 +56,8 @@ - /* MAX fixed pages reserved for mapping MSIX tables. */ - #define FIX_MSIX_MAX_PAGES 512 - -+#define MAX_MSI_IRQS 32 /* limited by MSI capability struct properties */ -+ - struct msi_info { - u16 seg; - u8 bus; diff --git a/system/xen/xsa/xsa237-4.9-0005-x86-FLASK-fix-unmap-domain-IRQ-XSM-hook.patch b/system/xen/xsa/xsa237-4.9-0005-x86-FLASK-fix-unmap-domain-IRQ-XSM-hook.patch deleted file mode 100644 index 155ba15d0871e..0000000000000 --- a/system/xen/xsa/xsa237-4.9-0005-x86-FLASK-fix-unmap-domain-IRQ-XSM-hook.patch +++ /dev/null @@ -1,37 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: x86/FLASK: fix unmap-domain-IRQ XSM hook - -The caller and the FLASK implementation of xsm_unmap_domain_irq() -disagreed about what the "data" argument points to in the MSI case: -Change both sides to pass/take a PCI device. - -This is part of XSA-237. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> - ---- a/xen/arch/x86/irq.c -+++ b/xen/arch/x86/irq.c -@@ -2143,7 +2143,8 @@ int unmap_domain_pirq(struct domain *d, - nr = msi_desc->msi.nvec; - } - -- ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc); -+ ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, -+ msi_desc ? msi_desc->dev : NULL); - if ( ret ) - goto done; - ---- a/xen/xsm/flask/hooks.c -+++ b/xen/xsm/flask/hooks.c -@@ -918,8 +918,8 @@ static int flask_unmap_domain_msi (struc - u32 *sid, struct avc_audit_data *ad) - { - #ifdef CONFIG_HAS_PCI -- struct msi_info *msi = data; -- u32 machine_bdf = (msi->seg << 16) | (msi->bus << 8) | msi->devfn; -+ const struct pci_dev *pdev = data; -+ u32 machine_bdf = (pdev->seg << 16) | (pdev->bus << 8) | pdev->devfn; - - AVC_AUDIT_DATA_INIT(ad, DEV); - ad->device = machine_bdf; diff --git a/system/xen/xsa/xsa238.patch b/system/xen/xsa/xsa238.patch deleted file mode 100644 index 0d7d48fef80f6..0000000000000 --- a/system/xen/xsa/xsa238.patch +++ /dev/null @@ -1,45 +0,0 @@ -From cdc2887076b19b39fab9faec495082586f3113df Mon Sep 17 00:00:00 2001 -From: XenProject Security Team <security@xenproject.org> -Date: Tue, 5 Sep 2017 13:41:37 +0200 -Subject: x86/ioreq server: correctly handle bogus - XEN_DMOP_{,un}map_io_range_to_ioreq_server arguments - -Misbehaving device model can pass incorrect XEN_DMOP_map/ -unmap_io_range_to_ioreq_server arguments, namely end < start when -specifying address range. When this happens we hit ASSERT(s <= e) in -rangeset_contains_range()/rangeset_overlaps_range() with debug builds. -Production builds will not trap right away but may misbehave later -while handling such bogus ranges. - -This is XSA-238. - -Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> ---- - xen/arch/x86/hvm/ioreq.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c -index b2a8b0e986..8c8bf1f0ec 100644 ---- a/xen/arch/x86/hvm/ioreq.c -+++ b/xen/arch/x86/hvm/ioreq.c -@@ -820,6 +820,9 @@ int hvm_map_io_range_to_ioreq_server(struct domain *d, ioservid_t id, - struct hvm_ioreq_server *s; - int rc; - -+ if ( start > end ) -+ return -EINVAL; -+ - spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); - - rc = -ENOENT; -@@ -872,6 +875,9 @@ int hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id, - struct hvm_ioreq_server *s; - int rc; - -+ if ( start > end ) -+ return -EINVAL; -+ - spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock); - - rc = -ENOENT; diff --git a/system/xen/xsa/xsa239.patch b/system/xen/xsa/xsa239.patch deleted file mode 100644 index 5daecb5e4732f..0000000000000 --- a/system/xen/xsa/xsa239.patch +++ /dev/null @@ -1,46 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: x86/HVM: prefill partially used variable on emulation paths - -Certain handlers ignore the access size (vioapic_write() being the -example this was found with), perhaps leading to subsequent reads -seeing data that wasn't actually written by the guest. For -consistency and extra safety also do this on the read path of -hvm_process_io_intercept(), even if this doesn't directly affect what -guests get to see, as we've supposedly already dealt with read handlers -leaving data completely unitialized. - -This is XSA-239. - -Reported-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> - ---- a/xen/arch/x86/hvm/emulate.c -+++ b/xen/arch/x86/hvm/emulate.c -@@ -129,7 +129,7 @@ static int hvmemul_do_io( - .count = *reps, - .dir = dir, - .df = df, -- .data = data, -+ .data = data_is_addr ? data : 0, - .data_is_ptr = data_is_addr, /* ioreq_t field name is misleading */ - .state = STATE_IOREQ_READY, - }; ---- a/xen/arch/x86/hvm/intercept.c -+++ b/xen/arch/x86/hvm/intercept.c -@@ -127,6 +127,7 @@ int hvm_process_io_intercept(const struc - addr = (p->type == IOREQ_TYPE_COPY) ? - p->addr + step * i : - p->addr; -+ data = 0; - rc = ops->read(handler, addr, p->size, &data); - if ( rc != X86EMUL_OKAY ) - break; -@@ -161,6 +162,7 @@ int hvm_process_io_intercept(const struc - { - if ( p->data_is_ptr ) - { -+ data = 0; - switch ( hvm_copy_from_guest_phys(&data, p->data + step * i, - p->size) ) - { diff --git a/system/xen/xsa/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch b/system/xen/xsa/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch deleted file mode 100644 index 515ad22b66bb3..0000000000000 --- a/system/xen/xsa/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch +++ /dev/null @@ -1,494 +0,0 @@ -From 867988237d3e472fe2c99e81ae733e103422566c Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Thu, 28 Sep 2017 15:17:25 +0100 -Subject: [PATCH 1/2] x86: limit linear page table use to a single level - -That's the only way that they're meant to be used. Without such a -restriction arbitrarily long chains of same-level page tables can be -built, tearing down of which may then cause arbitrarily deep recursion, -causing a stack overflow. To facilitate this restriction, a counter is -being introduced to track both the number of same-level entries in a -page table as well as the number of uses of a page table in another -same-level one (counting into positive and negative direction -respectively, utilizing the fact that both counts can't be non-zero at -the same time). - -Note that the added accounting introduces a restriction on the number -of times a page can be used in other same-level page tables - more than -32k of such uses are no longer possible. - -Note also that some put_page_and_type[_preemptible]() calls are -replaced with open-coded equivalents. This seemed preferrable to -adding "parent_table" to the matrix of functions. - -Note further that cross-domain same-level page table references are no -longer permitted (they probably never should have been). - -This is XSA-240. - -Reported-by: Jann Horn <jannh@google.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Signed-off-by: George Dunlap <george.dunlap@citrix.com> ---- - xen/arch/x86/domain.c | 1 + - xen/arch/x86/mm.c | 171 ++++++++++++++++++++++++++++++++++++++----- - xen/include/asm-x86/domain.h | 2 + - xen/include/asm-x86/mm.h | 25 +++++-- - 4 files changed, 175 insertions(+), 24 deletions(-) - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index d7e699228c..d7ed72c246 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -1226,6 +1226,7 @@ int arch_set_info_guest( - rc = -ERESTART; - /* Fallthrough */ - case -ERESTART: -+ v->arch.old_guest_ptpg = NULL; - v->arch.old_guest_table = - pagetable_get_page(v->arch.guest_table); - v->arch.guest_table = pagetable_null(); -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 86f5eda52d..1e469bd354 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -747,6 +747,61 @@ static void put_data_page( - put_page(page); - } - -+static bool inc_linear_entries(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc; -+ -+ do { -+ /* -+ * The check below checks for the "linear use" count being non-zero -+ * as well as overflow. Signed integer overflow is undefined behavior -+ * according to the C spec. However, as long as linear_pt_count is -+ * smaller in size than 'int', the arithmetic operation of the -+ * increment below won't overflow; rather the result will be truncated -+ * when stored. Ensure that this is always true. -+ */ -+ BUILD_BUG_ON(sizeof(nc) >= sizeof(int)); -+ oc = nc++; -+ if ( nc <= 0 ) -+ return false; -+ nc = cmpxchg(&pg->linear_pt_count, oc, nc); -+ } while ( oc != nc ); -+ -+ return true; -+} -+ -+static void dec_linear_entries(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) oc; -+ -+ oc = arch_fetch_and_add(&pg->linear_pt_count, -1); -+ ASSERT(oc > 0); -+} -+ -+static bool inc_linear_uses(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc; -+ -+ do { -+ /* See the respective comment in inc_linear_entries(). */ -+ BUILD_BUG_ON(sizeof(nc) >= sizeof(int)); -+ oc = nc--; -+ if ( nc >= 0 ) -+ return false; -+ nc = cmpxchg(&pg->linear_pt_count, oc, nc); -+ } while ( oc != nc ); -+ -+ return true; -+} -+ -+static void dec_linear_uses(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) oc; -+ -+ oc = arch_fetch_and_add(&pg->linear_pt_count, 1); -+ ASSERT(oc < 0); -+} -+ - /* - * We allow root tables to map each other (a.k.a. linear page tables). It - * needs some special care with reference counts and access permissions: -@@ -777,15 +832,35 @@ get_##level##_linear_pagetable( \ - \ - if ( (pfn = level##e_get_pfn(pde)) != pde_pfn ) \ - { \ -+ struct page_info *ptpg = mfn_to_page(pde_pfn); \ -+ \ -+ /* Make sure the page table belongs to the correct domain. */ \ -+ if ( unlikely(page_get_owner(ptpg) != d) ) \ -+ return 0; \ -+ \ - /* Make sure the mapped frame belongs to the correct domain. */ \ - if ( unlikely(!get_page_from_pagenr(pfn, d)) ) \ - return 0; \ - \ - /* \ -- * Ensure that the mapped frame is an already-validated page table. \ -+ * Ensure that the mapped frame is an already-validated page table \ -+ * and is not itself having linear entries, as well as that the \ -+ * containing page table is not iself in use as a linear page table \ -+ * elsewhere. \ - * If so, atomically increment the count (checking for overflow). \ - */ \ - page = mfn_to_page(pfn); \ -+ if ( !inc_linear_entries(ptpg) ) \ -+ { \ -+ put_page(page); \ -+ return 0; \ -+ } \ -+ if ( !inc_linear_uses(page) ) \ -+ { \ -+ dec_linear_entries(ptpg); \ -+ put_page(page); \ -+ return 0; \ -+ } \ - y = page->u.inuse.type_info; \ - do { \ - x = y; \ -@@ -793,6 +868,8 @@ get_##level##_linear_pagetable( \ - unlikely((x & (PGT_type_mask|PGT_validated)) != \ - (PGT_##level##_page_table|PGT_validated)) ) \ - { \ -+ dec_linear_uses(page); \ -+ dec_linear_entries(ptpg); \ - put_page(page); \ - return 0; \ - } \ -@@ -1226,6 +1303,9 @@ get_page_from_l4e( - l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \ - } while ( 0 ) - -+static int _put_page_type(struct page_info *page, bool preemptible, -+ struct page_info *ptpg); -+ - void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner) - { - unsigned long pfn = l1e_get_pfn(l1e); -@@ -1296,17 +1376,22 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) - if ( l2e_get_flags(l2e) & _PAGE_PSE ) - put_superpage(l2e_get_pfn(l2e)); - else -- put_page_and_type(l2e_get_page(l2e)); -+ { -+ struct page_info *pg = l2e_get_page(l2e); -+ int rc = _put_page_type(pg, false, mfn_to_page(pfn)); -+ -+ ASSERT(!rc); -+ put_page(pg); -+ } - - return 0; - } - --static int __put_page_type(struct page_info *, int preemptible); -- - static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, - int partial, bool_t defer) - { - struct page_info *pg; -+ int rc; - - if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) ) - return 1; -@@ -1329,21 +1414,28 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, - if ( unlikely(partial > 0) ) - { - ASSERT(!defer); -- return __put_page_type(pg, 1); -+ return _put_page_type(pg, true, mfn_to_page(pfn)); - } - - if ( defer ) - { -+ current->arch.old_guest_ptpg = mfn_to_page(pfn); - current->arch.old_guest_table = pg; - return 0; - } - -- return put_page_and_type_preemptible(pg); -+ rc = _put_page_type(pg, true, mfn_to_page(pfn)); -+ if ( likely(!rc) ) -+ put_page(pg); -+ -+ return rc; - } - - static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, - int partial, bool_t defer) - { -+ int rc = 1; -+ - if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && - (l4e_get_pfn(l4e) != pfn) ) - { -@@ -1352,18 +1444,22 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, - if ( unlikely(partial > 0) ) - { - ASSERT(!defer); -- return __put_page_type(pg, 1); -+ return _put_page_type(pg, true, mfn_to_page(pfn)); - } - - if ( defer ) - { -+ current->arch.old_guest_ptpg = mfn_to_page(pfn); - current->arch.old_guest_table = pg; - return 0; - } - -- return put_page_and_type_preemptible(pg); -+ rc = _put_page_type(pg, true, mfn_to_page(pfn)); -+ if ( likely(!rc) ) -+ put_page(pg); - } -- return 1; -+ -+ return rc; - } - - static int alloc_l1_table(struct page_info *page) -@@ -1561,6 +1657,7 @@ static int alloc_l3_table(struct page_info *page) - { - page->nr_validated_ptes = i; - page->partial_pte = 0; -+ current->arch.old_guest_ptpg = NULL; - current->arch.old_guest_table = page; - } - while ( i-- > 0 ) -@@ -1654,6 +1751,7 @@ static int alloc_l4_table(struct page_info *page) - { - if ( current->arch.old_guest_table ) - page->nr_validated_ptes++; -+ current->arch.old_guest_ptpg = NULL; - current->arch.old_guest_table = page; - } - } -@@ -2403,14 +2501,20 @@ int free_page_type(struct page_info *pag - } - - --static int __put_final_page_type( -- struct page_info *page, unsigned long type, int preemptible) -+static int _put_final_page_type(struct page_info *page, unsigned long type, -+ bool preemptible, struct page_info *ptpg) - { - int rc = free_page_type(page, type, preemptible); - - /* No need for atomic update of type_info here: noone else updates it. */ - if ( rc == 0 ) - { -+ if ( ptpg && PGT_type_equal(type, ptpg->u.inuse.type_info) ) -+ { -+ dec_linear_uses(page); -+ dec_linear_entries(ptpg); -+ } -+ ASSERT(!page->linear_pt_count || page_get_owner(page)->is_dying); - /* - * Record TLB information for flush later. We do not stamp page tables - * when running in shadow mode: -@@ -2446,8 +2550,8 @@ static int __put_final_page_type( - } - - --static int __put_page_type(struct page_info *page, -- int preemptible) -+static int _put_page_type(struct page_info *page, bool preemptible, -+ struct page_info *ptpg) - { - unsigned long nx, x, y = page->u.inuse.type_info; - int rc = 0; -@@ -2474,12 +2578,28 @@ static int __put_page_type(struct page_info *page, - x, nx)) != x) ) - continue; - /* We cleared the 'valid bit' so we do the clean up. */ -- rc = __put_final_page_type(page, x, preemptible); -+ rc = _put_final_page_type(page, x, preemptible, ptpg); -+ ptpg = NULL; - if ( x & PGT_partial ) - put_page(page); - break; - } - -+ if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) -+ { -+ /* -+ * page_set_tlbflush_timestamp() accesses the same union -+ * linear_pt_count lives in. Unvalidated page table pages, -+ * however, should occur during domain destruction only -+ * anyway. Updating of linear_pt_count luckily is not -+ * necessary anymore for a dying domain. -+ */ -+ ASSERT(page_get_owner(page)->is_dying); -+ ASSERT(page->linear_pt_count < 0); -+ ASSERT(ptpg->linear_pt_count > 0); -+ ptpg = NULL; -+ } -+ - /* - * Record TLB information for flush later. We do not stamp page - * tables when running in shadow mode: -@@ -2499,6 +2619,13 @@ static int __put_page_type(struct page_info *page, - return -EINTR; - } - -+ if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) -+ { -+ ASSERT(!rc); -+ dec_linear_uses(page); -+ dec_linear_entries(ptpg); -+ } -+ - return rc; - } - -@@ -2638,6 +2765,7 @@ static int __get_page_type(struct page_info *page, unsigned long type, - page->nr_validated_ptes = 0; - page->partial_pte = 0; - } -+ page->linear_pt_count = 0; - rc = alloc_page_type(page, type, preemptible); - } - -@@ -2652,7 +2780,7 @@ static int __get_page_type(struct page_info *page, unsigned long type, - - void put_page_type(struct page_info *page) - { -- int rc = __put_page_type(page, 0); -+ int rc = _put_page_type(page, false, NULL); - ASSERT(rc == 0); - (void)rc; - } -@@ -2668,7 +2796,7 @@ int get_page_type(struct page_info *page, unsigned long type) - - int put_page_type_preemptible(struct page_info *page) - { -- return __put_page_type(page, 1); -+ return _put_page_type(page, true, NULL); - } - - int get_page_type_preemptible(struct page_info *page, unsigned long type) -@@ -2878,11 +3006,14 @@ int put_old_guest_table(struct vcpu *v) - if ( !v->arch.old_guest_table ) - return 0; - -- switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table) ) -+ switch ( rc = _put_page_type(v->arch.old_guest_table, true, -+ v->arch.old_guest_ptpg) ) - { - case -EINTR: - case -ERESTART: - return -ERESTART; -+ case 0: -+ put_page(v->arch.old_guest_table); - } - - v->arch.old_guest_table = NULL; -@@ -3042,6 +3173,7 @@ int new_guest_cr3(unsigned long mfn) - rc = -ERESTART; - /* fallthrough */ - case -ERESTART: -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; - break; - default: -@@ -3310,7 +3442,10 @@ long do_mmuext_op( - if ( type == PGT_l1_page_table ) - put_page_and_type(page); - else -+ { -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; -+ } - } - } - -@@ -3346,6 +3481,7 @@ long do_mmuext_op( - { - case -EINTR: - case -ERESTART: -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; - rc = 0; - break; -@@ -3425,6 +3561,7 @@ long do_mmuext_op( - rc = -ERESTART; - /* fallthrough */ - case -ERESTART: -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; - break; - default: -diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h -index 924caac834..5a512918cc 100644 ---- a/xen/include/asm-x86/domain.h -+++ b/xen/include/asm-x86/domain.h -@@ -527,6 +527,8 @@ struct arch_vcpu - pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */ - pagetable_t guest_table; /* (MFN) guest notion of cr3 */ - struct page_info *old_guest_table; /* partially destructed pagetable */ -+ struct page_info *old_guest_ptpg; /* containing page table of the */ -+ /* former, if any */ - /* guest_table holds a ref to the page, and also a type-count unless - * shadow refcounts are in use */ - pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */ -diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h -index 119d7dec6b..445da50d47 100644 ---- a/xen/include/asm-x86/mm.h -+++ b/xen/include/asm-x86/mm.h -@@ -124,11 +124,11 @@ struct page_info - u32 tlbflush_timestamp; - - /* -- * When PGT_partial is true then this field is valid and indicates -- * that PTEs in the range [0, @nr_validated_ptes) have been validated. -- * An extra page reference must be acquired (or not dropped) whenever -- * PGT_partial gets set, and it must be dropped when the flag gets -- * cleared. This is so that a get() leaving a page in partially -+ * When PGT_partial is true then the first two fields are valid and -+ * indicate that PTEs in the range [0, @nr_validated_ptes) have been -+ * validated. An extra page reference must be acquired (or not dropped) -+ * whenever PGT_partial gets set, and it must be dropped when the flag -+ * gets cleared. This is so that a get() leaving a page in partially - * validated state (where the caller would drop the reference acquired - * due to the getting of the type [apparently] failing [-ERESTART]) - * would not accidentally result in a page left with zero general -@@ -152,10 +152,18 @@ struct page_info - * put_page_from_lNe() (due to the apparent failure), and hence it - * must be dropped when the put operation is resumed (and completes), - * but it must not be acquired if picking up the page for validation. -+ * -+ * The 3rd field, @linear_pt_count, indicates -+ * - by a positive value, how many same-level page table entries a page -+ * table has, -+ * - by a negative value, in how many same-level page tables a page is -+ * in use. - */ - struct { -- u16 nr_validated_ptes; -- s8 partial_pte; -+ u16 nr_validated_ptes:PAGETABLE_ORDER + 1; -+ u16 :16 - PAGETABLE_ORDER - 1 - 2; -+ s16 partial_pte:2; -+ s16 linear_pt_count; - }; - - /* -@@ -206,6 +214,9 @@ struct page_info - #define PGT_count_width PG_shift(9) - #define PGT_count_mask ((1UL<<PGT_count_width)-1) - -+/* Are the 'type mask' bits identical? */ -+#define PGT_type_equal(x, y) (!(((x) ^ (y)) & PGT_type_mask)) -+ - /* Cleared when the owning guest 'frees' this page. */ - #define _PGC_allocated PG_shift(1) - #define PGC_allocated PG_mask(1, 1) --- -2.14.1 - diff --git a/system/xen/xsa/xsa240-4.9-0002-x86-mm-Disable-PV-linear-pagetables-by-default.patch b/system/xen/xsa/xsa240-4.9-0002-x86-mm-Disable-PV-linear-pagetables-by-default.patch deleted file mode 100644 index 5e057c5652c11..0000000000000 --- a/system/xen/xsa/xsa240-4.9-0002-x86-mm-Disable-PV-linear-pagetables-by-default.patch +++ /dev/null @@ -1,83 +0,0 @@ -From e614979ce054044d9e19023f1ef10dae6e38baf4 Mon Sep 17 00:00:00 2001 -From: George Dunlap <george.dunlap@citrix.com> -Date: Fri, 22 Sep 2017 11:46:55 +0100 -Subject: [PATCH 2/2] x86/mm: Disable PV linear pagetables by default - -Allowing pagetables to point to other pagetables of the same level -(often called 'linear pagetables') has been included in Xen since its -inception. But it is not used by the most common PV guests (Linux, -NetBSD, minios), and has been the source of a number of subtle -reference-counting bugs. - -Add a command-line option to control whether PV linear pagetables are -allowed (disabled by default). - -Reported-by: Jann Horn <jannh@google.com> -Signed-off-by: George Dunlap <george.dunlap@citrix.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> ---- -Changes since v2: -- s/_/-/; in command-line option -- Added __read_mostly ---- - docs/misc/xen-command-line.markdown | 15 +++++++++++++++ - xen/arch/x86/mm.c | 10 ++++++++++ - 2 files changed, 25 insertions(+) - -diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown -index 44d99852aa..45ef873abb 100644 ---- a/docs/misc/xen-command-line.markdown -+++ b/docs/misc/xen-command-line.markdown -@@ -1374,6 +1374,21 @@ The following resources are available: - CDP, one COS will corespond two CBMs other than one with CAT, due to the - sum of CBMs is fixed, that means actual `cos_max` in use will automatically - reduce to half when CDP is enabled. -+ -+### pv-linear-pt -+> `= <boolean>` -+ -+> Default: `false` -+ -+Allow PV guests to have pagetable entries pointing to other pagetables -+of the same level (i.e., allowing L2 PTEs to point to other L2 pages). -+This technique is often called "linear pagetables", and is sometimes -+used to allow operating systems a simple way to consistently map the -+current process's pagetables into its own virtual address space. -+ -+None of the most common PV operating systems (Linux, NetBSD, MiniOS) -+use this technique, but there may be custom operating systems which -+do. - - ### reboot - > `= t[riple] | k[bd] | a[cpi] | p[ci] | P[ower] | e[fi] | n[o] [, [w]arm | [c]old]` -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 1e469bd354..32952a46b9 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -814,6 +814,9 @@ static void dec_linear_uses(struct page_info *pg) - * frame if it is mapped by a different root table. This is sufficient and - * also necessary to allow validation of a root table mapping itself. - */ -+static bool __read_mostly pv_linear_pt_enable = false; -+boolean_param("pv-linear-pt", pv_linear_pt_enable); -+ - #define define_get_linear_pagetable(level) \ - static int \ - get_##level##_linear_pagetable( \ -@@ -823,6 +826,13 @@ get_##level##_linear_pagetable( \ - struct page_info *page; \ - unsigned long pfn; \ - \ -+ if ( !pv_linear_pt_enable ) \ -+ { \ -+ gdprintk(XENLOG_WARNING, \ -+ "Attempt to create linear p.t. (feature disabled)\n"); \ -+ return 0; \ -+ } \ -+ \ - if ( (level##e_get_flags(pde) & _PAGE_RW) ) \ - { \ - gdprintk(XENLOG_WARNING, \ --- -2.14.1 - diff --git a/system/xen/xsa/xsa241-4.9.patch b/system/xen/xsa/xsa241-4.9.patch deleted file mode 100644 index 514e4c7a4b0b6..0000000000000 --- a/system/xen/xsa/xsa241-4.9.patch +++ /dev/null @@ -1,120 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: x86: don't store possibly stale TLB flush time stamp - -While the timing window is extremely narrow, it is theoretically -possible for an update to the TLB flush clock and a subsequent flush -IPI to happen between the read and write parts of the update of the -per-page stamp. Exclude this possibility by disabling interrupts -across the update, preventing the IPI to be serviced in the middle. - -This is XSA-241. - -Reported-by: Jann Horn <jannh@google.com> -Suggested-by: George Dunlap <george.dunlap@citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: George Dunlap <george.dunlap@citrix.com> - ---- a/xen/arch/arm/smp.c -+++ b/xen/arch/arm/smp.c -@@ -1,3 +1,4 @@ -+#include <xen/mm.h> - #include <asm/system.h> - #include <asm/smp.h> - #include <asm/cpregs.h> ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -2524,7 +2524,7 @@ static int _put_final_page_type(struct p - */ - if ( !(shadow_mode_enabled(page_get_owner(page)) && - (page->count_info & PGC_page_table)) ) -- page->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(page); - wmb(); - page->u.inuse.type_info--; - } -@@ -2534,7 +2534,7 @@ static int _put_final_page_type(struct p - (PGT_count_mask|PGT_validated|PGT_partial)) == 1); - if ( !(shadow_mode_enabled(page_get_owner(page)) && - (page->count_info & PGC_page_table)) ) -- page->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(page); - wmb(); - page->u.inuse.type_info |= PGT_validated; - } -@@ -2588,7 +2588,7 @@ static int _put_page_type(struct page_in - if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) - { - /* -- * page_set_tlbflush_timestamp() accesses the same union -+ * set_tlbflush_timestamp() accesses the same union - * linear_pt_count lives in. Unvalidated page table pages, - * however, should occur during domain destruction only - * anyway. Updating of linear_pt_count luckily is not -@@ -2609,7 +2609,7 @@ static int _put_page_type(struct page_in - */ - if ( !(shadow_mode_enabled(page_get_owner(page)) && - (page->count_info & PGC_page_table)) ) -- page->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(page); - } - - if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) ---- a/xen/arch/x86/mm/shadow/common.c -+++ b/xen/arch/x86/mm/shadow/common.c -@@ -1464,7 +1464,7 @@ void shadow_free(struct domain *d, mfn_t - * TLBs when we reuse the page. Because the destructors leave the - * contents of the pages in place, we can delay TLB flushes until - * just before the allocator hands the page out again. */ -- sp->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(sp); - perfc_decr(shadow_alloc_count); - page_list_add_tail(sp, &d->arch.paging.shadow.freelist); - sp = next; ---- a/xen/common/page_alloc.c -+++ b/xen/common/page_alloc.c -@@ -960,7 +960,7 @@ static void free_heap_pages( - /* If a page has no owner it will need no safety TLB flush. */ - pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL); - if ( pg[i].u.free.need_tlbflush ) -- pg[i].tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(&pg[i]); - - /* This page is not a guest frame any more. */ - page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */ ---- a/xen/include/asm-arm/flushtlb.h -+++ b/xen/include/asm-arm/flushtlb.h -@@ -12,6 +12,11 @@ static inline void tlbflush_filter(cpuma - - #define tlbflush_current_time() (0) - -+static inline void page_set_tlbflush_timestamp(struct page_info *page) -+{ -+ page->tlbflush_timestamp = tlbflush_current_time(); -+} -+ - #if defined(CONFIG_ARM_32) - # include <asm/arm32/flushtlb.h> - #elif defined(CONFIG_ARM_64) ---- a/xen/include/asm-x86/flushtlb.h -+++ b/xen/include/asm-x86/flushtlb.h -@@ -23,6 +23,20 @@ DECLARE_PER_CPU(u32, tlbflush_time); - - #define tlbflush_current_time() tlbflush_clock - -+static inline void page_set_tlbflush_timestamp(struct page_info *page) -+{ -+ /* -+ * Prevent storing a stale time stamp, which could happen if an update -+ * to tlbflush_clock plus a subsequent flush IPI happen between the -+ * reading of tlbflush_clock and the writing of the struct page_info -+ * field. -+ */ -+ ASSERT(local_irq_is_enabled()); -+ local_irq_disable(); -+ page->tlbflush_timestamp = tlbflush_current_time(); -+ local_irq_enable(); -+} -+ - /* - * @cpu_stamp is the timestamp at last TLB flush for the CPU we are testing. - * @lastuse_stamp is a timestamp taken when the PFN we are testing was last diff --git a/system/xen/xsa/xsa242-4.9.patch b/system/xen/xsa/xsa242-4.9.patch deleted file mode 100644 index 8adfa61fd71ec..0000000000000 --- a/system/xen/xsa/xsa242-4.9.patch +++ /dev/null @@ -1,43 +0,0 @@ -From: Jan Beulich <jbeulich@suse.com> -Subject: x86: don't allow page_unlock() to drop the last type reference - -Only _put_page_type() does the necessary cleanup, and hence not all -domain pages can be released during guest cleanup (leaving around -zombie domains) if we get this wrong. - -This is XSA-242. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> - ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -1923,7 +1923,11 @@ void page_unlock(struct page_info *page) - - do { - x = y; -+ ASSERT((x & PGT_count_mask) && (x & PGT_locked)); -+ - nx = x - (1 | PGT_locked); -+ /* We must not drop the last reference here. */ -+ ASSERT(nx & PGT_count_mask); - } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x ); - } - -@@ -2611,6 +2615,17 @@ static int _put_page_type(struct page_in - (page->count_info & PGC_page_table)) ) - page_set_tlbflush_timestamp(page); - } -+ else if ( unlikely((nx & (PGT_locked | PGT_count_mask)) == -+ (PGT_locked | 1)) ) -+ { -+ /* -+ * We must not drop the second to last reference when the page is -+ * locked, as page_unlock() doesn't do any cleanup of the type. -+ */ -+ cpu_relax(); -+ y = page->u.inuse.type_info; -+ continue; -+ } - - if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) - break; diff --git a/system/xen/xsa/xsa243.patch b/system/xen/xsa/xsa243.patch deleted file mode 100644 index aaff277514dc9..0000000000000 --- a/system/xen/xsa/xsa243.patch +++ /dev/null @@ -1,93 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: x86/shadow: Don't create self-linear shadow mappings for 4-level translated guests - -When initially creating a monitor table for 4-level translated guests, don't -install a shadow-linear mapping. This mapping is actually self-linear, and -trips up the writeable heuristic logic into following Xen's mappings, not the -guests' shadows it was expecting to follow. - -A consequence of this is that sh_guess_wrmap() needs to cope with there being -no shadow-linear mapping present, which in practice occurs once each time a -vcpu switches to 4-level paging from a different paging mode. - -An appropriate shadow-linear slot will be inserted into the monitor table -either while constructing lower level monitor tables, or by sh_update_cr3(). - -While fixing this, clarify the safety of the other mappings. Despite -appearing unsafe, it is correct to create a guest-linear mapping for -translated domains; this is self-linear and doesn't point into the translated -domain. Drop a dead clause for translate != external guests. - -This is XSA-243. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Acked-by: Tim Deegan <tim@xen.org> - -diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c -index 8d4f244..a18d286 100644 ---- a/xen/arch/x86/mm/shadow/multi.c -+++ b/xen/arch/x86/mm/shadow/multi.c -@@ -1485,26 +1485,38 @@ void sh_install_xen_entries_in_l4(struct domain *d, mfn_t gl4mfn, mfn_t sl4mfn) - sl4e[shadow_l4_table_offset(RO_MPT_VIRT_START)] = shadow_l4e_empty(); - } - -- /* Shadow linear mapping for 4-level shadows. N.B. for 3-level -- * shadows on 64-bit xen, this linear mapping is later replaced by the -- * monitor pagetable structure, which is built in make_monitor_table -- * and maintained by sh_update_linear_entries. */ -- sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] = -- shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR_RW); -- -- /* Self linear mapping. */ -- if ( shadow_mode_translate(d) && !shadow_mode_external(d) ) -+ /* -+ * Linear mapping slots: -+ * -+ * Calling this function with gl4mfn == sl4mfn is used to construct a -+ * monitor table for translated domains. In this case, gl4mfn forms the -+ * self-linear mapping (i.e. not pointing into the translated domain), and -+ * the shadow-linear slot is skipped. The shadow-linear slot is either -+ * filled when constructing lower level monitor tables, or via -+ * sh_update_cr3() for 4-level guests. -+ * -+ * Calling this function with gl4mfn != sl4mfn is used for non-translated -+ * guests, where the shadow-linear slot is actually self-linear, and the -+ * guest-linear slot points into the guests view of its pagetables. -+ */ -+ if ( shadow_mode_translate(d) ) - { -- // linear tables may not be used with translated PV guests -- sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = -+ ASSERT(mfn_eq(gl4mfn, sl4mfn)); -+ -+ sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] = - shadow_l4e_empty(); - } - else - { -- sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = -- shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR_RW); -+ ASSERT(!mfn_eq(gl4mfn, sl4mfn)); -+ -+ sl4e[shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START)] = -+ shadow_l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR_RW); - } - -+ sl4e[shadow_l4_table_offset(LINEAR_PT_VIRT_START)] = -+ shadow_l4e_from_mfn(gl4mfn, __PAGE_HYPERVISOR_RW); -+ - unmap_domain_page(sl4e); - } - #endif -@@ -4405,6 +4417,11 @@ static int sh_guess_wrmap(struct vcpu *v, unsigned long vaddr, mfn_t gmfn) - - /* Carefully look in the shadow linear map for the l1e we expect */ - #if SHADOW_PAGING_LEVELS >= 4 -+ /* Is a shadow linear map is installed in the first place? */ -+ sl4p = v->arch.paging.shadow.guest_vtable; -+ sl4p += shadow_l4_table_offset(SH_LINEAR_PT_VIRT_START); -+ if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) -+ return 0; - sl4p = sh_linear_l4_table(v) + shadow_l4_linear_offset(vaddr); - if ( !(shadow_l4e_get_flags(*sl4p) & _PAGE_PRESENT) ) - return 0; diff --git a/system/xen/xsa/xsa244.patch b/system/xen/xsa/xsa244.patch deleted file mode 100644 index c35a80be32f10..0000000000000 --- a/system/xen/xsa/xsa244.patch +++ /dev/null @@ -1,59 +0,0 @@ -From: Andrew Cooper <andrew.cooper3@citrix.com> -Subject: [PATCH] x86/cpu: Fix IST handling during PCPU bringup - -Clear IST references in newly allocated IDTs. Nothing good will come of -having them set before the TSS is suitably constructed (although the chances -of the CPU surviving such an IST interrupt/exception is extremely slim). - -Uniformly set the IST references after the TSS is in place. This fixes an -issue on AMD hardware, where onlining a PCPU while PCPU0 is in HVM context -will cause IST_NONE to be copied into the new IDT, making that PCPU vulnerable -to privilege escalation from PV guests until it subsequently schedules an HVM -guest. - -This is XSA-244 - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> ---- - xen/arch/x86/cpu/common.c | 5 +++++ - xen/arch/x86/smpboot.c | 3 +++ - 2 files changed, 8 insertions(+) - -diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c -index 78f5667..6cf3628 100644 ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -640,6 +640,7 @@ void __init early_cpu_init(void) - * - Sets up TSS with stack pointers, including ISTs - * - Inserts TSS selector into regular and compat GDTs - * - Loads GDT, IDT, TR then null LDT -+ * - Sets up IST references in the IDT - */ - void load_system_tables(void) - { -@@ -702,6 +703,10 @@ void load_system_tables(void) - asm volatile ("ltr %w0" : : "rm" (TSS_ENTRY << 3) ); - asm volatile ("lldt %w0" : : "rm" (0) ); - -+ set_ist(&idt_tables[cpu][TRAP_double_fault], IST_DF); -+ set_ist(&idt_tables[cpu][TRAP_nmi], IST_NMI); -+ set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE); -+ - /* - * Bottom-of-stack must be 16-byte aligned! - * -diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c -index 3ca716c..1609b62 100644 ---- a/xen/arch/x86/smpboot.c -+++ b/xen/arch/x86/smpboot.c -@@ -724,6 +724,9 @@ static int cpu_smpboot_alloc(unsigned int cpu) - if ( idt_tables[cpu] == NULL ) - goto oom; - memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES * sizeof(idt_entry_t)); -+ set_ist(&idt_tables[cpu][TRAP_double_fault], IST_NONE); -+ set_ist(&idt_tables[cpu][TRAP_nmi], IST_NONE); -+ set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE); - - for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1); - i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i ) diff --git a/system/xen/xsa/xsa245-0001-xen-page_alloc-Cover-memory-unreserved-after-boot-in.patch b/system/xen/xsa/xsa245-0001-xen-page_alloc-Cover-memory-unreserved-after-boot-in.patch deleted file mode 100644 index 2047686903f02..0000000000000 --- a/system/xen/xsa/xsa245-0001-xen-page_alloc-Cover-memory-unreserved-after-boot-in.patch +++ /dev/null @@ -1,48 +0,0 @@ -From a48d47febc1340f27d6c716545692641a09b414c Mon Sep 17 00:00:00 2001 -From: Julien Grall <julien.grall@arm.com> -Date: Thu, 21 Sep 2017 14:13:08 +0100 -Subject: [PATCH 1/2] xen/page_alloc: Cover memory unreserved after boot in - first_valid_mfn - -On Arm, some regions (e.g Initramfs, Dom0 Kernel...) are marked as -reserved until the hardware domain is built and they are copied into its -memory. Therefore, they will not be added in the boot allocator via -init_boot_pages. - -Instead, init_xenheap_pages will be called once the region are not used -anymore. - -Update first_valid_mfn in both init_heap_pages and init_boot_pages -(already exist) to cover all the cases. - -Signed-off-by: Julien Grall <julien.grall@arm.com> -[Adjust comment, added locking around first_valid_mfn update] -Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> ---- - xen/common/page_alloc.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c -index 0b9f6cc6df..fbe5a8af39 100644 ---- a/xen/common/page_alloc.c -+++ b/xen/common/page_alloc.c -@@ -1700,6 +1700,16 @@ static void init_heap_pages( - { - unsigned long i; - -+ /* -+ * Some pages may not go through the boot allocator (e.g reserved -+ * memory at boot but released just after --- kernel, initramfs, -+ * etc.). -+ * Update first_valid_mfn to ensure those regions are covered. -+ */ -+ spin_lock(&heap_lock); -+ first_valid_mfn = min_t(unsigned long, page_to_mfn(pg), first_valid_mfn); -+ spin_unlock(&heap_lock); -+ - for ( i = 0; i < nr_pages; i++ ) - { - unsigned int nid = phys_to_nid(page_to_maddr(pg+i)); --- -2.11.0 - diff --git a/system/xen/xsa/xsa245-0002-xen-arm-Correctly-report-the-memory-region-in-the-du.patch b/system/xen/xsa/xsa245-0002-xen-arm-Correctly-report-the-memory-region-in-the-du.patch deleted file mode 100644 index cd4d2709be643..0000000000000 --- a/system/xen/xsa/xsa245-0002-xen-arm-Correctly-report-the-memory-region-in-the-du.patch +++ /dev/null @@ -1,73 +0,0 @@ -From cbfcf039d0e0b6f4c4cb3de612f7bf788a0c47cd Mon Sep 17 00:00:00 2001 -From: Julien Grall <julien.grall@arm.com> -Date: Mon, 18 Sep 2017 14:24:08 +0100 -Subject: [PATCH 2/2] xen/arm: Correctly report the memory region in the dummy - NUMA helpers - -NUMA is currently not supported on Arm. Because common code is -NUMA-aware, dummy helpers are instead provided to expose a single node. - -Those helpers are for instance used to know the region to scrub. - -However the memory region is not reported correctly. Indeed, the -frametable may not be at the beginning of the memory and there might be -multiple memory banks. This will lead to not scrub some part of the -memory. - -The memory information can be found using: - * first_valid_mfn as the start of the memory - * max_page - first_valid_mfn as the spanned pages - -Note that first_valid_mfn is now been exported. The prototype has been -added in asm-arm/numa.h and not in a common header because I would -expect the variable to become static once NUMA is fully supported on -Arm. - -Signed-off-by: Julien Grall <julien.grall@arm.com> ---- - xen/common/page_alloc.c | 6 +++++- - xen/include/asm-arm/numa.h | 10 ++++++++-- - 2 files changed, 13 insertions(+), 3 deletions(-) - -diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c -index fbe5a8af39..472c6fe329 100644 ---- a/xen/common/page_alloc.c -+++ b/xen/common/page_alloc.c -@@ -192,7 +192,11 @@ PAGE_LIST_HEAD(page_broken_list); - * BOOT-TIME ALLOCATOR - */ - --static unsigned long __initdata first_valid_mfn = ~0UL; -+/* -+ * first_valid_mfn is exported because it is use in ARM specific NUMA -+ * helpers. See comment in asm-arm/numa.h. -+ */ -+unsigned long first_valid_mfn = ~0UL; - - static struct bootmem_region { - unsigned long s, e; /* MFNs @s through @e-1 inclusive are free */ -diff --git a/xen/include/asm-arm/numa.h b/xen/include/asm-arm/numa.h -index a2c1a3476d..3e7384da9e 100644 ---- a/xen/include/asm-arm/numa.h -+++ b/xen/include/asm-arm/numa.h -@@ -12,9 +12,15 @@ static inline __attribute__((pure)) nodeid_t phys_to_nid(paddr_t addr) - return 0; - } - -+/* -+ * TODO: make first_valid_mfn static when NUMA is supported on Arm, this -+ * is required because the dummy helpers is using it. -+ */ -+extern unsigned long first_valid_mfn; -+ - /* XXX: implement NUMA support */ --#define node_spanned_pages(nid) (total_pages) --#define node_start_pfn(nid) (pdx_to_pfn(frametable_base_pdx)) -+#define node_spanned_pages(nid) (max_page - first_valid_mfn) -+#define node_start_pfn(nid) (first_valid_mfn) - #define __node_distance(a, b) (20) - - static inline unsigned int arch_get_dma_bitsize(void) --- -2.11.0 - |