1 files changed, 197 insertions, 0 deletions
diff --git a/0011-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch b/0011-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch
new file mode 100644
index 0000000..5520627
--- /dev/null
+++ b/0011-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch
@@ -0,0 +1,197 @@
+From a603386b422f5cb4c5e2639a7e20a1d99dba2175 Mon Sep 17 00:00:00 2001
+From: Julien Grall <jgrall@amazon.com>
+Date: Tue, 11 Oct 2022 14:54:44 +0200
+Subject: [PATCH 11/26] xen/x86: p2m: Add preemption in p2m_teardown()
+
+The list p2m->pages contain all the pages used by the P2M. On large
+instance this can be quite large and the time spent to call
+d->arch.paging.free_page() will take more than 1ms for a 80GB guest
+on a Xen running in nested environment on a c5.metal.
+
+By extrapolation, it would take > 100ms for a 8TB guest (what we
+current security support). So add some preemption in p2m_teardown()
+and propagate to the callers. Note there are 3 places where
+the preemption is not enabled:
+    - hap_final_teardown()/shadow_final_teardown(): We are
+      preventing update the P2M once the domain is dying (so
+      no more pages could be allocated) and most of the P2M pages
+      will be freed in preemptive manneer when relinquishing the
+      resources. So this is fine to disable preemption.
+    - shadow_enable(): This is fine because it will undo the allocation
+      that may have been made by p2m_alloc_table() (so only the root
+      page table).
+
+The preemption is arbitrarily checked every 1024 iterations.
+
+We now need to include <xen/event.h> in p2m-basic in order to
+import the definition for local_events_need_delivery() used by
+general_preempt_check(). Ideally, the inclusion should happen in
+xen/sched.h but it opened a can of worms.
+
+Note that with the current approach, Xen doesn't keep track on whether
+the alt/nested P2Ms have been cleared. So there are some redundant work.
+However, this is not expected to incurr too much overhead (the P2M lock
+shouldn't be contended during teardown). So this is optimization is
+left outside of the security event.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+master commit: 8a2111250b424edc49c65c4d41b276766d30635c
+master date: 2022-10-11 14:24:48 +0200
+---
+ xen/arch/x86/mm/hap/hap.c       | 22 ++++++++++++++++------
+ xen/arch/x86/mm/p2m.c           | 18 +++++++++++++++---
+ xen/arch/x86/mm/shadow/common.c | 12 +++++++++---
+ xen/include/asm-x86/p2m.h       |  2 +-
+ 4 files changed, 41 insertions(+), 13 deletions(-)
+
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index a44fcfd95e1e..1f9a157a0c34 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -548,17 +548,17 @@ void hap_final_teardown(struct domain *d)
+ 
+     if ( hvm_altp2m_supported() )
+         for ( i = 0; i < MAX_ALTP2M; i++ )
+-            p2m_teardown(d->arch.altp2m_p2m[i], true);
++            p2m_teardown(d->arch.altp2m_p2m[i], true, NULL);
+ 
+     /* Destroy nestedp2m's first */
+     for (i = 0; i < MAX_NESTEDP2M; i++) {
+-        p2m_teardown(d->arch.nested_p2m[i], true);
++        p2m_teardown(d->arch.nested_p2m[i], true, NULL);
+     }
+ 
+     if ( d->arch.paging.hap.total_pages != 0 )
+         hap_teardown(d, NULL);
+ 
+-    p2m_teardown(p2m_get_hostp2m(d), true);
++    p2m_teardown(p2m_get_hostp2m(d), true, NULL);
+     /* Free any memory that the p2m teardown released */
+     paging_lock(d);
+     hap_set_allocation(d, 0, NULL);
+@@ -612,14 +612,24 @@ void hap_teardown(struct domain *d, bool *preempted)
+         FREE_XENHEAP_PAGE(d->arch.altp2m_visible_eptp);
+ 
+         for ( i = 0; i < MAX_ALTP2M; i++ )
+-            p2m_teardown(d->arch.altp2m_p2m[i], false);
++        {
++            p2m_teardown(d->arch.altp2m_p2m[i], false, preempted);
++            if ( preempted && *preempted )
++                return;
++        }
+     }
+ 
+     /* Destroy nestedp2m's after altp2m. */
+     for ( i = 0; i < MAX_NESTEDP2M; i++ )
+-        p2m_teardown(d->arch.nested_p2m[i], false);
++    {
++        p2m_teardown(d->arch.nested_p2m[i], false, preempted);
++        if ( preempted && *preempted )
++            return;
++    }
+ 
+-    p2m_teardown(p2m_get_hostp2m(d), false);
++    p2m_teardown(p2m_get_hostp2m(d), false, preempted);
++    if ( preempted && *preempted )
++        return;
+ 
+     paging_lock(d); /* Keep various asserts happy */
+ 
+diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
+index aba4f17cbe12..8781df9dda8d 100644
+--- a/xen/arch/x86/mm/p2m.c
++++ b/xen/arch/x86/mm/p2m.c
+@@ -749,12 +749,13 @@ int p2m_alloc_table(struct p2m_domain *p2m)
+  * hvm fixme: when adding support for pvh non-hardware domains, this path must
+  * cleanup any foreign p2m types (release refcnts on them).
+  */
+-void p2m_teardown(struct p2m_domain *p2m, bool remove_root)
++void p2m_teardown(struct p2m_domain *p2m, bool remove_root, bool *preempted)
+ /* Return all the p2m pages to Xen.
+  * We know we don't have any extra mappings to these pages */
+ {
+     struct page_info *pg, *root_pg = NULL;
+     struct domain *d;
++    unsigned int i = 0;
+ 
+     if (p2m == NULL)
+         return;
+@@ -773,8 +774,19 @@ void p2m_teardown(struct p2m_domain *p2m, bool remove_root)
+     }
+ 
+     while ( (pg = page_list_remove_head(&p2m->pages)) )
+-        if ( pg != root_pg )
+-            d->arch.paging.free_page(d, pg);
++    {
++        if ( pg == root_pg )
++            continue;
++
++        d->arch.paging.free_page(d, pg);
++
++        /* Arbitrarily check preemption every 1024 iterations */
++        if ( preempted && !(++i % 1024) && general_preempt_check() )
++        {
++            *preempted = true;
++            break;
++        }
++    }
+ 
+     if ( root_pg )
+         page_list_add(root_pg, &p2m->pages);
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index ac9a1ae07808..3b0d781991b5 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -2770,8 +2770,12 @@ int shadow_enable(struct domain *d, u32 mode)
+  out_locked:
+     paging_unlock(d);
+  out_unlocked:
++    /*
++     * This is fine to ignore the preemption here because only the root
++     * will be allocated by p2m_alloc_table().
++     */
+     if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) )
+-        p2m_teardown(p2m, true);
++        p2m_teardown(p2m, true, NULL);
+     if ( rv != 0 && pg != NULL )
+     {
+         pg->count_info &= ~PGC_count_mask;
+@@ -2824,7 +2828,9 @@ void shadow_teardown(struct domain *d, bool *preempted)
+     for_each_vcpu ( d, v )
+         shadow_vcpu_teardown(v);
+ 
+-    p2m_teardown(p2m_get_hostp2m(d), false);
++    p2m_teardown(p2m_get_hostp2m(d), false, preempted);
++    if ( preempted && *preempted )
++        return;
+ 
+     paging_lock(d);
+ 
+@@ -2945,7 +2951,7 @@ void shadow_final_teardown(struct domain *d)
+         shadow_teardown(d, NULL);
+ 
+     /* It is now safe to pull down the p2m map. */
+-    p2m_teardown(p2m_get_hostp2m(d), true);
++    p2m_teardown(p2m_get_hostp2m(d), true, NULL);
+     /* Free any shadow memory that the p2m teardown released */
+     paging_lock(d);
+     shadow_set_allocation(d, 0, NULL);
+diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
+index c3c16748e7d5..2db9ab0122f2 100644
+--- a/xen/include/asm-x86/p2m.h
++++ b/xen/include/asm-x86/p2m.h
+@@ -574,7 +574,7 @@ int p2m_init(struct domain *d);
+ int p2m_alloc_table(struct p2m_domain *p2m);
+ 
+ /* Return all the p2m resources to Xen. */
+-void p2m_teardown(struct p2m_domain *p2m, bool remove_root);
++void p2m_teardown(struct p2m_domain *p2m, bool remove_root, bool *preempted);
+ void p2m_final_teardown(struct domain *d);
+ 
+ /* Add a page to a domain's p2m table */
+-- 
+2.37.3
+