diff options
Diffstat (limited to '0011-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch')
-rw-r--r-- | 0011-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch | 197 |
1 files changed, 197 insertions, 0 deletions
diff --git a/0011-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch b/0011-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch new file mode 100644 index 0000000..5520627 --- /dev/null +++ b/0011-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch @@ -0,0 +1,197 @@ +From a603386b422f5cb4c5e2639a7e20a1d99dba2175 Mon Sep 17 00:00:00 2001 +From: Julien Grall <jgrall@amazon.com> +Date: Tue, 11 Oct 2022 14:54:44 +0200 +Subject: [PATCH 11/26] xen/x86: p2m: Add preemption in p2m_teardown() + +The list p2m->pages contain all the pages used by the P2M. On large +instance this can be quite large and the time spent to call +d->arch.paging.free_page() will take more than 1ms for a 80GB guest +on a Xen running in nested environment on a c5.metal. + +By extrapolation, it would take > 100ms for a 8TB guest (what we +current security support). So add some preemption in p2m_teardown() +and propagate to the callers. Note there are 3 places where +the preemption is not enabled: + - hap_final_teardown()/shadow_final_teardown(): We are + preventing update the P2M once the domain is dying (so + no more pages could be allocated) and most of the P2M pages + will be freed in preemptive manneer when relinquishing the + resources. So this is fine to disable preemption. + - shadow_enable(): This is fine because it will undo the allocation + that may have been made by p2m_alloc_table() (so only the root + page table). + +The preemption is arbitrarily checked every 1024 iterations. + +We now need to include <xen/event.h> in p2m-basic in order to +import the definition for local_events_need_delivery() used by +general_preempt_check(). Ideally, the inclusion should happen in +xen/sched.h but it opened a can of worms. + +Note that with the current approach, Xen doesn't keep track on whether +the alt/nested P2Ms have been cleared. So there are some redundant work. +However, this is not expected to incurr too much overhead (the P2M lock +shouldn't be contended during teardown). So this is optimization is +left outside of the security event. + +This is part of CVE-2022-33746 / XSA-410. + +Signed-off-by: Julien Grall <jgrall@amazon.com> +Signed-off-by: Jan Beulich <jbeulich@suse.com> +master commit: 8a2111250b424edc49c65c4d41b276766d30635c +master date: 2022-10-11 14:24:48 +0200 +--- + xen/arch/x86/mm/hap/hap.c | 22 ++++++++++++++++------ + xen/arch/x86/mm/p2m.c | 18 +++++++++++++++--- + xen/arch/x86/mm/shadow/common.c | 12 +++++++++--- + xen/include/asm-x86/p2m.h | 2 +- + 4 files changed, 41 insertions(+), 13 deletions(-) + +diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c +index a44fcfd95e1e..1f9a157a0c34 100644 +--- a/xen/arch/x86/mm/hap/hap.c ++++ b/xen/arch/x86/mm/hap/hap.c +@@ -548,17 +548,17 @@ void hap_final_teardown(struct domain *d) + + if ( hvm_altp2m_supported() ) + for ( i = 0; i < MAX_ALTP2M; i++ ) +- p2m_teardown(d->arch.altp2m_p2m[i], true); ++ p2m_teardown(d->arch.altp2m_p2m[i], true, NULL); + + /* Destroy nestedp2m's first */ + for (i = 0; i < MAX_NESTEDP2M; i++) { +- p2m_teardown(d->arch.nested_p2m[i], true); ++ p2m_teardown(d->arch.nested_p2m[i], true, NULL); + } + + if ( d->arch.paging.hap.total_pages != 0 ) + hap_teardown(d, NULL); + +- p2m_teardown(p2m_get_hostp2m(d), true); ++ p2m_teardown(p2m_get_hostp2m(d), true, NULL); + /* Free any memory that the p2m teardown released */ + paging_lock(d); + hap_set_allocation(d, 0, NULL); +@@ -612,14 +612,24 @@ void hap_teardown(struct domain *d, bool *preempted) + FREE_XENHEAP_PAGE(d->arch.altp2m_visible_eptp); + + for ( i = 0; i < MAX_ALTP2M; i++ ) +- p2m_teardown(d->arch.altp2m_p2m[i], false); ++ { ++ p2m_teardown(d->arch.altp2m_p2m[i], false, preempted); ++ if ( preempted && *preempted ) ++ return; ++ } + } + + /* Destroy nestedp2m's after altp2m. */ + for ( i = 0; i < MAX_NESTEDP2M; i++ ) +- p2m_teardown(d->arch.nested_p2m[i], false); ++ { ++ p2m_teardown(d->arch.nested_p2m[i], false, preempted); ++ if ( preempted && *preempted ) ++ return; ++ } + +- p2m_teardown(p2m_get_hostp2m(d), false); ++ p2m_teardown(p2m_get_hostp2m(d), false, preempted); ++ if ( preempted && *preempted ) ++ return; + + paging_lock(d); /* Keep various asserts happy */ + +diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c +index aba4f17cbe12..8781df9dda8d 100644 +--- a/xen/arch/x86/mm/p2m.c ++++ b/xen/arch/x86/mm/p2m.c +@@ -749,12 +749,13 @@ int p2m_alloc_table(struct p2m_domain *p2m) + * hvm fixme: when adding support for pvh non-hardware domains, this path must + * cleanup any foreign p2m types (release refcnts on them). + */ +-void p2m_teardown(struct p2m_domain *p2m, bool remove_root) ++void p2m_teardown(struct p2m_domain *p2m, bool remove_root, bool *preempted) + /* Return all the p2m pages to Xen. + * We know we don't have any extra mappings to these pages */ + { + struct page_info *pg, *root_pg = NULL; + struct domain *d; ++ unsigned int i = 0; + + if (p2m == NULL) + return; +@@ -773,8 +774,19 @@ void p2m_teardown(struct p2m_domain *p2m, bool remove_root) + } + + while ( (pg = page_list_remove_head(&p2m->pages)) ) +- if ( pg != root_pg ) +- d->arch.paging.free_page(d, pg); ++ { ++ if ( pg == root_pg ) ++ continue; ++ ++ d->arch.paging.free_page(d, pg); ++ ++ /* Arbitrarily check preemption every 1024 iterations */ ++ if ( preempted && !(++i % 1024) && general_preempt_check() ) ++ { ++ *preempted = true; ++ break; ++ } ++ } + + if ( root_pg ) + page_list_add(root_pg, &p2m->pages); +diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c +index ac9a1ae07808..3b0d781991b5 100644 +--- a/xen/arch/x86/mm/shadow/common.c ++++ b/xen/arch/x86/mm/shadow/common.c +@@ -2770,8 +2770,12 @@ int shadow_enable(struct domain *d, u32 mode) + out_locked: + paging_unlock(d); + out_unlocked: ++ /* ++ * This is fine to ignore the preemption here because only the root ++ * will be allocated by p2m_alloc_table(). ++ */ + if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) ) +- p2m_teardown(p2m, true); ++ p2m_teardown(p2m, true, NULL); + if ( rv != 0 && pg != NULL ) + { + pg->count_info &= ~PGC_count_mask; +@@ -2824,7 +2828,9 @@ void shadow_teardown(struct domain *d, bool *preempted) + for_each_vcpu ( d, v ) + shadow_vcpu_teardown(v); + +- p2m_teardown(p2m_get_hostp2m(d), false); ++ p2m_teardown(p2m_get_hostp2m(d), false, preempted); ++ if ( preempted && *preempted ) ++ return; + + paging_lock(d); + +@@ -2945,7 +2951,7 @@ void shadow_final_teardown(struct domain *d) + shadow_teardown(d, NULL); + + /* It is now safe to pull down the p2m map. */ +- p2m_teardown(p2m_get_hostp2m(d), true); ++ p2m_teardown(p2m_get_hostp2m(d), true, NULL); + /* Free any shadow memory that the p2m teardown released */ + paging_lock(d); + shadow_set_allocation(d, 0, NULL); +diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h +index c3c16748e7d5..2db9ab0122f2 100644 +--- a/xen/include/asm-x86/p2m.h ++++ b/xen/include/asm-x86/p2m.h +@@ -574,7 +574,7 @@ int p2m_init(struct domain *d); + int p2m_alloc_table(struct p2m_domain *p2m); + + /* Return all the p2m resources to Xen. */ +-void p2m_teardown(struct p2m_domain *p2m, bool remove_root); ++void p2m_teardown(struct p2m_domain *p2m, bool remove_root, bool *preempted); + void p2m_final_teardown(struct domain *d); + + /* Add a page to a domain's p2m table */ +-- +2.37.3 + |