diff options
Diffstat (limited to 'system/xen/xsa/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch')
-rw-r--r-- | system/xen/xsa/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch | 494 |
1 files changed, 0 insertions, 494 deletions
diff --git a/system/xen/xsa/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch b/system/xen/xsa/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch deleted file mode 100644 index 515ad22b66bb3..0000000000000 --- a/system/xen/xsa/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch +++ /dev/null @@ -1,494 +0,0 @@ -From 867988237d3e472fe2c99e81ae733e103422566c Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Thu, 28 Sep 2017 15:17:25 +0100 -Subject: [PATCH 1/2] x86: limit linear page table use to a single level - -That's the only way that they're meant to be used. Without such a -restriction arbitrarily long chains of same-level page tables can be -built, tearing down of which may then cause arbitrarily deep recursion, -causing a stack overflow. To facilitate this restriction, a counter is -being introduced to track both the number of same-level entries in a -page table as well as the number of uses of a page table in another -same-level one (counting into positive and negative direction -respectively, utilizing the fact that both counts can't be non-zero at -the same time). - -Note that the added accounting introduces a restriction on the number -of times a page can be used in other same-level page tables - more than -32k of such uses are no longer possible. - -Note also that some put_page_and_type[_preemptible]() calls are -replaced with open-coded equivalents. This seemed preferrable to -adding "parent_table" to the matrix of functions. - -Note further that cross-domain same-level page table references are no -longer permitted (they probably never should have been). - -This is XSA-240. - -Reported-by: Jann Horn <jannh@google.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Signed-off-by: George Dunlap <george.dunlap@citrix.com> ---- - xen/arch/x86/domain.c | 1 + - xen/arch/x86/mm.c | 171 ++++++++++++++++++++++++++++++++++++++----- - xen/include/asm-x86/domain.h | 2 + - xen/include/asm-x86/mm.h | 25 +++++-- - 4 files changed, 175 insertions(+), 24 deletions(-) - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index d7e699228c..d7ed72c246 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -1226,6 +1226,7 @@ int arch_set_info_guest( - rc = -ERESTART; - /* Fallthrough */ - case -ERESTART: -+ v->arch.old_guest_ptpg = NULL; - v->arch.old_guest_table = - pagetable_get_page(v->arch.guest_table); - v->arch.guest_table = pagetable_null(); -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 86f5eda52d..1e469bd354 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -747,6 +747,61 @@ static void put_data_page( - put_page(page); - } - -+static bool inc_linear_entries(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc; -+ -+ do { -+ /* -+ * The check below checks for the "linear use" count being non-zero -+ * as well as overflow. Signed integer overflow is undefined behavior -+ * according to the C spec. However, as long as linear_pt_count is -+ * smaller in size than 'int', the arithmetic operation of the -+ * increment below won't overflow; rather the result will be truncated -+ * when stored. Ensure that this is always true. -+ */ -+ BUILD_BUG_ON(sizeof(nc) >= sizeof(int)); -+ oc = nc++; -+ if ( nc <= 0 ) -+ return false; -+ nc = cmpxchg(&pg->linear_pt_count, oc, nc); -+ } while ( oc != nc ); -+ -+ return true; -+} -+ -+static void dec_linear_entries(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) oc; -+ -+ oc = arch_fetch_and_add(&pg->linear_pt_count, -1); -+ ASSERT(oc > 0); -+} -+ -+static bool inc_linear_uses(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc; -+ -+ do { -+ /* See the respective comment in inc_linear_entries(). */ -+ BUILD_BUG_ON(sizeof(nc) >= sizeof(int)); -+ oc = nc--; -+ if ( nc >= 0 ) -+ return false; -+ nc = cmpxchg(&pg->linear_pt_count, oc, nc); -+ } while ( oc != nc ); -+ -+ return true; -+} -+ -+static void dec_linear_uses(struct page_info *pg) -+{ -+ typeof(pg->linear_pt_count) oc; -+ -+ oc = arch_fetch_and_add(&pg->linear_pt_count, 1); -+ ASSERT(oc < 0); -+} -+ - /* - * We allow root tables to map each other (a.k.a. linear page tables). It - * needs some special care with reference counts and access permissions: -@@ -777,15 +832,35 @@ get_##level##_linear_pagetable( \ - \ - if ( (pfn = level##e_get_pfn(pde)) != pde_pfn ) \ - { \ -+ struct page_info *ptpg = mfn_to_page(pde_pfn); \ -+ \ -+ /* Make sure the page table belongs to the correct domain. */ \ -+ if ( unlikely(page_get_owner(ptpg) != d) ) \ -+ return 0; \ -+ \ - /* Make sure the mapped frame belongs to the correct domain. */ \ - if ( unlikely(!get_page_from_pagenr(pfn, d)) ) \ - return 0; \ - \ - /* \ -- * Ensure that the mapped frame is an already-validated page table. \ -+ * Ensure that the mapped frame is an already-validated page table \ -+ * and is not itself having linear entries, as well as that the \ -+ * containing page table is not iself in use as a linear page table \ -+ * elsewhere. \ - * If so, atomically increment the count (checking for overflow). \ - */ \ - page = mfn_to_page(pfn); \ -+ if ( !inc_linear_entries(ptpg) ) \ -+ { \ -+ put_page(page); \ -+ return 0; \ -+ } \ -+ if ( !inc_linear_uses(page) ) \ -+ { \ -+ dec_linear_entries(ptpg); \ -+ put_page(page); \ -+ return 0; \ -+ } \ - y = page->u.inuse.type_info; \ - do { \ - x = y; \ -@@ -793,6 +868,8 @@ get_##level##_linear_pagetable( \ - unlikely((x & (PGT_type_mask|PGT_validated)) != \ - (PGT_##level##_page_table|PGT_validated)) ) \ - { \ -+ dec_linear_uses(page); \ -+ dec_linear_entries(ptpg); \ - put_page(page); \ - return 0; \ - } \ -@@ -1226,6 +1303,9 @@ get_page_from_l4e( - l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \ - } while ( 0 ) - -+static int _put_page_type(struct page_info *page, bool preemptible, -+ struct page_info *ptpg); -+ - void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner) - { - unsigned long pfn = l1e_get_pfn(l1e); -@@ -1296,17 +1376,22 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) - if ( l2e_get_flags(l2e) & _PAGE_PSE ) - put_superpage(l2e_get_pfn(l2e)); - else -- put_page_and_type(l2e_get_page(l2e)); -+ { -+ struct page_info *pg = l2e_get_page(l2e); -+ int rc = _put_page_type(pg, false, mfn_to_page(pfn)); -+ -+ ASSERT(!rc); -+ put_page(pg); -+ } - - return 0; - } - --static int __put_page_type(struct page_info *, int preemptible); -- - static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, - int partial, bool_t defer) - { - struct page_info *pg; -+ int rc; - - if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) ) - return 1; -@@ -1329,21 +1414,28 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, - if ( unlikely(partial > 0) ) - { - ASSERT(!defer); -- return __put_page_type(pg, 1); -+ return _put_page_type(pg, true, mfn_to_page(pfn)); - } - - if ( defer ) - { -+ current->arch.old_guest_ptpg = mfn_to_page(pfn); - current->arch.old_guest_table = pg; - return 0; - } - -- return put_page_and_type_preemptible(pg); -+ rc = _put_page_type(pg, true, mfn_to_page(pfn)); -+ if ( likely(!rc) ) -+ put_page(pg); -+ -+ return rc; - } - - static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, - int partial, bool_t defer) - { -+ int rc = 1; -+ - if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && - (l4e_get_pfn(l4e) != pfn) ) - { -@@ -1352,18 +1444,22 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, - if ( unlikely(partial > 0) ) - { - ASSERT(!defer); -- return __put_page_type(pg, 1); -+ return _put_page_type(pg, true, mfn_to_page(pfn)); - } - - if ( defer ) - { -+ current->arch.old_guest_ptpg = mfn_to_page(pfn); - current->arch.old_guest_table = pg; - return 0; - } - -- return put_page_and_type_preemptible(pg); -+ rc = _put_page_type(pg, true, mfn_to_page(pfn)); -+ if ( likely(!rc) ) -+ put_page(pg); - } -- return 1; -+ -+ return rc; - } - - static int alloc_l1_table(struct page_info *page) -@@ -1561,6 +1657,7 @@ static int alloc_l3_table(struct page_info *page) - { - page->nr_validated_ptes = i; - page->partial_pte = 0; -+ current->arch.old_guest_ptpg = NULL; - current->arch.old_guest_table = page; - } - while ( i-- > 0 ) -@@ -1654,6 +1751,7 @@ static int alloc_l4_table(struct page_info *page) - { - if ( current->arch.old_guest_table ) - page->nr_validated_ptes++; -+ current->arch.old_guest_ptpg = NULL; - current->arch.old_guest_table = page; - } - } -@@ -2403,14 +2501,20 @@ int free_page_type(struct page_info *pag - } - - --static int __put_final_page_type( -- struct page_info *page, unsigned long type, int preemptible) -+static int _put_final_page_type(struct page_info *page, unsigned long type, -+ bool preemptible, struct page_info *ptpg) - { - int rc = free_page_type(page, type, preemptible); - - /* No need for atomic update of type_info here: noone else updates it. */ - if ( rc == 0 ) - { -+ if ( ptpg && PGT_type_equal(type, ptpg->u.inuse.type_info) ) -+ { -+ dec_linear_uses(page); -+ dec_linear_entries(ptpg); -+ } -+ ASSERT(!page->linear_pt_count || page_get_owner(page)->is_dying); - /* - * Record TLB information for flush later. We do not stamp page tables - * when running in shadow mode: -@@ -2446,8 +2550,8 @@ static int __put_final_page_type( - } - - --static int __put_page_type(struct page_info *page, -- int preemptible) -+static int _put_page_type(struct page_info *page, bool preemptible, -+ struct page_info *ptpg) - { - unsigned long nx, x, y = page->u.inuse.type_info; - int rc = 0; -@@ -2474,12 +2578,28 @@ static int __put_page_type(struct page_info *page, - x, nx)) != x) ) - continue; - /* We cleared the 'valid bit' so we do the clean up. */ -- rc = __put_final_page_type(page, x, preemptible); -+ rc = _put_final_page_type(page, x, preemptible, ptpg); -+ ptpg = NULL; - if ( x & PGT_partial ) - put_page(page); - break; - } - -+ if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) -+ { -+ /* -+ * page_set_tlbflush_timestamp() accesses the same union -+ * linear_pt_count lives in. Unvalidated page table pages, -+ * however, should occur during domain destruction only -+ * anyway. Updating of linear_pt_count luckily is not -+ * necessary anymore for a dying domain. -+ */ -+ ASSERT(page_get_owner(page)->is_dying); -+ ASSERT(page->linear_pt_count < 0); -+ ASSERT(ptpg->linear_pt_count > 0); -+ ptpg = NULL; -+ } -+ - /* - * Record TLB information for flush later. We do not stamp page - * tables when running in shadow mode: -@@ -2499,6 +2619,13 @@ static int __put_page_type(struct page_info *page, - return -EINTR; - } - -+ if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) -+ { -+ ASSERT(!rc); -+ dec_linear_uses(page); -+ dec_linear_entries(ptpg); -+ } -+ - return rc; - } - -@@ -2638,6 +2765,7 @@ static int __get_page_type(struct page_info *page, unsigned long type, - page->nr_validated_ptes = 0; - page->partial_pte = 0; - } -+ page->linear_pt_count = 0; - rc = alloc_page_type(page, type, preemptible); - } - -@@ -2652,7 +2780,7 @@ static int __get_page_type(struct page_info *page, unsigned long type, - - void put_page_type(struct page_info *page) - { -- int rc = __put_page_type(page, 0); -+ int rc = _put_page_type(page, false, NULL); - ASSERT(rc == 0); - (void)rc; - } -@@ -2668,7 +2796,7 @@ int get_page_type(struct page_info *page, unsigned long type) - - int put_page_type_preemptible(struct page_info *page) - { -- return __put_page_type(page, 1); -+ return _put_page_type(page, true, NULL); - } - - int get_page_type_preemptible(struct page_info *page, unsigned long type) -@@ -2878,11 +3006,14 @@ int put_old_guest_table(struct vcpu *v) - if ( !v->arch.old_guest_table ) - return 0; - -- switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table) ) -+ switch ( rc = _put_page_type(v->arch.old_guest_table, true, -+ v->arch.old_guest_ptpg) ) - { - case -EINTR: - case -ERESTART: - return -ERESTART; -+ case 0: -+ put_page(v->arch.old_guest_table); - } - - v->arch.old_guest_table = NULL; -@@ -3042,6 +3173,7 @@ int new_guest_cr3(unsigned long mfn) - rc = -ERESTART; - /* fallthrough */ - case -ERESTART: -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; - break; - default: -@@ -3310,7 +3442,10 @@ long do_mmuext_op( - if ( type == PGT_l1_page_table ) - put_page_and_type(page); - else -+ { -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; -+ } - } - } - -@@ -3346,6 +3481,7 @@ long do_mmuext_op( - { - case -EINTR: - case -ERESTART: -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; - rc = 0; - break; -@@ -3425,6 +3561,7 @@ long do_mmuext_op( - rc = -ERESTART; - /* fallthrough */ - case -ERESTART: -+ curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; - break; - default: -diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h -index 924caac834..5a512918cc 100644 ---- a/xen/include/asm-x86/domain.h -+++ b/xen/include/asm-x86/domain.h -@@ -527,6 +527,8 @@ struct arch_vcpu - pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */ - pagetable_t guest_table; /* (MFN) guest notion of cr3 */ - struct page_info *old_guest_table; /* partially destructed pagetable */ -+ struct page_info *old_guest_ptpg; /* containing page table of the */ -+ /* former, if any */ - /* guest_table holds a ref to the page, and also a type-count unless - * shadow refcounts are in use */ - pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */ -diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h -index 119d7dec6b..445da50d47 100644 ---- a/xen/include/asm-x86/mm.h -+++ b/xen/include/asm-x86/mm.h -@@ -124,11 +124,11 @@ struct page_info - u32 tlbflush_timestamp; - - /* -- * When PGT_partial is true then this field is valid and indicates -- * that PTEs in the range [0, @nr_validated_ptes) have been validated. -- * An extra page reference must be acquired (or not dropped) whenever -- * PGT_partial gets set, and it must be dropped when the flag gets -- * cleared. This is so that a get() leaving a page in partially -+ * When PGT_partial is true then the first two fields are valid and -+ * indicate that PTEs in the range [0, @nr_validated_ptes) have been -+ * validated. An extra page reference must be acquired (or not dropped) -+ * whenever PGT_partial gets set, and it must be dropped when the flag -+ * gets cleared. This is so that a get() leaving a page in partially - * validated state (where the caller would drop the reference acquired - * due to the getting of the type [apparently] failing [-ERESTART]) - * would not accidentally result in a page left with zero general -@@ -152,10 +152,18 @@ struct page_info - * put_page_from_lNe() (due to the apparent failure), and hence it - * must be dropped when the put operation is resumed (and completes), - * but it must not be acquired if picking up the page for validation. -+ * -+ * The 3rd field, @linear_pt_count, indicates -+ * - by a positive value, how many same-level page table entries a page -+ * table has, -+ * - by a negative value, in how many same-level page tables a page is -+ * in use. - */ - struct { -- u16 nr_validated_ptes; -- s8 partial_pte; -+ u16 nr_validated_ptes:PAGETABLE_ORDER + 1; -+ u16 :16 - PAGETABLE_ORDER - 1 - 2; -+ s16 partial_pte:2; -+ s16 linear_pt_count; - }; - - /* -@@ -206,6 +214,9 @@ struct page_info - #define PGT_count_width PG_shift(9) - #define PGT_count_mask ((1UL<<PGT_count_width)-1) - -+/* Are the 'type mask' bits identical? */ -+#define PGT_type_equal(x, y) (!(((x) ^ (y)) & PGT_type_mask)) -+ - /* Cleared when the owning guest 'frees' this page. */ - #define _PGC_allocated PG_shift(1) - #define PGC_allocated PG_mask(1, 1) --- -2.14.1 - |