]>
Commit | Line | Data |
---|---|---|
23d51c33 JR |
1 | Description: x86: make page table handling error paths preemptible |
2 | ... as they may take significant amounts of time. | |
3 | . | |
4 | This requires cloning the tweaked continuation logic from | |
5 | do_mmuext_op() to do_mmu_update(). | |
6 | . | |
7 | Note that in mod_l[34]_entry() a negative "preemptible" value gets | |
8 | passed to put_page_from_l[34]e() now, telling the callee to store the | |
9 | respective page in current->arch.old_guest_table (for a hypercall | |
10 | continuation to pick up), rather than carrying out the put right away. | |
11 | This is going to be made a little more explicit by a subsequent cleanup | |
12 | patch. | |
13 | From: Jan Beulich <jbeulich@suse.com> | |
14 | Origin: upstream | |
15 | Id: CVE-2013-1918 XSA-45 | |
16 | --- | |
17 | --- a/xen/arch/x86/mm.c | |
18 | +++ b/xen/arch/x86/mm.c | |
19 | @@ -1241,7 +1241,16 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, | |
20 | #endif | |
21 | ||
22 | if ( unlikely(partial > 0) ) | |
23 | + { | |
24 | + ASSERT(preemptible >= 0); | |
25 | return __put_page_type(l3e_get_page(l3e), preemptible); | |
26 | + } | |
27 | + | |
28 | + if ( preemptible < 0 ) | |
29 | + { | |
30 | + current->arch.old_guest_table = l3e_get_page(l3e); | |
31 | + return 0; | |
32 | + } | |
33 | ||
34 | return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible); | |
35 | } | |
36 | @@ -1254,7 +1263,17 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, | |
37 | (l4e_get_pfn(l4e) != pfn) ) | |
38 | { | |
39 | if ( unlikely(partial > 0) ) | |
40 | + { | |
41 | + ASSERT(preemptible >= 0); | |
42 | return __put_page_type(l4e_get_page(l4e), preemptible); | |
43 | + } | |
44 | + | |
45 | + if ( preemptible < 0 ) | |
46 | + { | |
47 | + current->arch.old_guest_table = l4e_get_page(l4e); | |
48 | + return 0; | |
49 | + } | |
50 | + | |
51 | return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible); | |
52 | } | |
53 | return 1; | |
54 | @@ -1549,12 +1568,17 @@ static int alloc_l3_table(struct page_info *page, int preemptible) | |
55 | if ( rc < 0 && rc != -EAGAIN && rc != -EINTR ) | |
56 | { | |
57 | MEM_LOG("Failure in alloc_l3_table: entry %d", i); | |
58 | + if ( i ) | |
59 | + { | |
60 | + page->nr_validated_ptes = i; | |
61 | + page->partial_pte = 0; | |
62 | + current->arch.old_guest_table = page; | |
63 | + } | |
64 | while ( i-- > 0 ) | |
65 | { | |
66 | if ( !is_guest_l3_slot(i) ) | |
67 | continue; | |
68 | unadjust_guest_l3e(pl3e[i], d); | |
69 | - put_page_from_l3e(pl3e[i], pfn, 0, 0); | |
70 | } | |
71 | } | |
72 | ||
73 | @@ -1584,22 +1608,24 @@ static int alloc_l4_table(struct page_info *page, int preemptible) | |
74 | page->nr_validated_ptes = i; | |
75 | page->partial_pte = partial ?: 1; | |
76 | } | |
77 | - else if ( rc == -EINTR ) | |
78 | + else if ( rc < 0 ) | |
79 | { | |
80 | + if ( rc != -EINTR ) | |
81 | + MEM_LOG("Failure in alloc_l4_table: entry %d", i); | |
82 | if ( i ) | |
83 | { | |
84 | page->nr_validated_ptes = i; | |
85 | page->partial_pte = 0; | |
86 | - rc = -EAGAIN; | |
87 | + if ( rc == -EINTR ) | |
88 | + rc = -EAGAIN; | |
89 | + else | |
90 | + { | |
91 | + if ( current->arch.old_guest_table ) | |
92 | + page->nr_validated_ptes++; | |
93 | + current->arch.old_guest_table = page; | |
94 | + } | |
95 | } | |
96 | } | |
97 | - else if ( rc < 0 ) | |
98 | - { | |
99 | - MEM_LOG("Failure in alloc_l4_table: entry %d", i); | |
100 | - while ( i-- > 0 ) | |
101 | - if ( is_guest_l4_slot(d, i) ) | |
102 | - put_page_from_l4e(pl4e[i], pfn, 0, 0); | |
103 | - } | |
104 | if ( rc < 0 ) | |
105 | return rc; | |
106 | ||
107 | @@ -2047,7 +2073,7 @@ static int mod_l3_entry(l3_pgentry_t *pl3e, | |
108 | pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); | |
109 | } | |
110 | ||
111 | - put_page_from_l3e(ol3e, pfn, 0, 0); | |
112 | + put_page_from_l3e(ol3e, pfn, 0, -preemptible); | |
113 | return rc; | |
114 | } | |
115 | ||
116 | @@ -2110,7 +2136,7 @@ static int mod_l4_entry(l4_pgentry_t *pl4e, | |
117 | return -EFAULT; | |
118 | } | |
119 | ||
120 | - put_page_from_l4e(ol4e, pfn, 0, 0); | |
121 | + put_page_from_l4e(ol4e, pfn, 0, -preemptible); | |
122 | return rc; | |
123 | } | |
124 | ||
125 | @@ -2268,7 +2294,15 @@ static int alloc_page_type(struct page_info *page, unsigned long type, | |
126 | PRtype_info ": caf=%08lx taf=%" PRtype_info, | |
127 | page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)), | |
128 | type, page->count_info, page->u.inuse.type_info); | |
129 | - page->u.inuse.type_info = 0; | |
130 | + if ( page != current->arch.old_guest_table ) | |
131 | + page->u.inuse.type_info = 0; | |
132 | + else | |
133 | + { | |
134 | + ASSERT((page->u.inuse.type_info & | |
135 | + (PGT_count_mask | PGT_validated)) == 1); | |
136 | + get_page_light(page); | |
137 | + page->u.inuse.type_info |= PGT_partial; | |
138 | + } | |
139 | } | |
140 | else | |
141 | { | |
142 | @@ -3218,21 +3252,17 @@ long do_mmuext_op( | |
143 | } | |
144 | ||
145 | if ( (rc = xsm_memory_pin_page(d, pg_owner, page)) != 0 ) | |
146 | - { | |
147 | - put_page_and_type(page); | |
148 | okay = 0; | |
149 | - break; | |
150 | - } | |
151 | - | |
152 | - if ( unlikely(test_and_set_bit(_PGT_pinned, | |
153 | - &page->u.inuse.type_info)) ) | |
154 | + else if ( unlikely(test_and_set_bit(_PGT_pinned, | |
155 | + &page->u.inuse.type_info)) ) | |
156 | { | |
157 | MEM_LOG("Mfn %lx already pinned", page_to_mfn(page)); | |
158 | - put_page_and_type(page); | |
159 | okay = 0; | |
160 | - break; | |
161 | } | |
162 | ||
163 | + if ( unlikely(!okay) ) | |
164 | + goto pin_drop; | |
165 | + | |
166 | /* A page is dirtied when its pin status is set. */ | |
167 | paging_mark_dirty(pg_owner, page_to_mfn(page)); | |
168 | ||
169 | @@ -3246,7 +3276,13 @@ long do_mmuext_op( | |
170 | &page->u.inuse.type_info)); | |
171 | spin_unlock(&pg_owner->page_alloc_lock); | |
172 | if ( drop_ref ) | |
173 | - put_page_and_type(page); | |
174 | + { | |
175 | + pin_drop: | |
176 | + if ( type == PGT_l1_page_table ) | |
177 | + put_page_and_type(page); | |
178 | + else | |
179 | + curr->arch.old_guest_table = page; | |
180 | + } | |
181 | } | |
182 | ||
183 | break; | |
184 | @@ -3652,11 +3688,28 @@ long do_mmu_update( | |
185 | void *va; | |
186 | unsigned long gpfn, gmfn, mfn; | |
187 | struct page_info *page; | |
188 | - int rc = 0, i = 0; | |
189 | - unsigned int cmd, done = 0, pt_dom; | |
190 | - struct vcpu *v = current; | |
191 | + unsigned int cmd, i = 0, done = 0, pt_dom; | |
192 | + struct vcpu *curr = current, *v = curr; | |
193 | struct domain *d = v->domain, *pt_owner = d, *pg_owner; | |
194 | struct domain_mmap_cache mapcache; | |
195 | + int rc = put_old_guest_table(curr); | |
196 | + | |
197 | + if ( unlikely(rc) ) | |
198 | + { | |
199 | + if ( likely(rc == -EAGAIN) ) | |
200 | + rc = hypercall_create_continuation( | |
201 | + __HYPERVISOR_mmu_update, "hihi", ureqs, count, pdone, | |
202 | + foreigndom); | |
203 | + return rc; | |
204 | + } | |
205 | + | |
206 | + if ( unlikely(count == MMU_UPDATE_PREEMPTED) && | |
207 | + likely(guest_handle_is_null(ureqs)) ) | |
208 | + { | |
209 | + /* See the curr->arch.old_guest_table related | |
210 | + * hypercall_create_continuation() below. */ | |
211 | + return (int)foreigndom; | |
212 | + } | |
213 | ||
214 | if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) | |
215 | { | |
216 | @@ -3705,7 +3758,7 @@ long do_mmu_update( | |
217 | ||
218 | for ( i = 0; i < count; i++ ) | |
219 | { | |
220 | - if ( hypercall_preempt_check() ) | |
221 | + if ( curr->arch.old_guest_table || hypercall_preempt_check() ) | |
222 | { | |
223 | rc = -EAGAIN; | |
224 | break; | |
225 | @@ -3886,9 +3939,27 @@ long do_mmu_update( | |
226 | } | |
227 | ||
228 | if ( rc == -EAGAIN ) | |
229 | + { | |
230 | + ASSERT(i < count); | |
231 | rc = hypercall_create_continuation( | |
232 | __HYPERVISOR_mmu_update, "hihi", | |
233 | ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); | |
234 | + } | |
235 | + else if ( curr->arch.old_guest_table ) | |
236 | + { | |
237 | + XEN_GUEST_HANDLE(void) null; | |
238 | + | |
239 | + ASSERT(rc || i == count); | |
240 | + set_xen_guest_handle(null, NULL); | |
241 | + /* | |
242 | + * In order to have a way to communicate the final return value to | |
243 | + * our continuation, we pass this in place of "foreigndom", building | |
244 | + * on the fact that this argument isn't needed anymore. | |
245 | + */ | |
246 | + rc = hypercall_create_continuation( | |
247 | + __HYPERVISOR_mmu_update, "hihi", null, | |
248 | + MMU_UPDATE_PREEMPTED, null, rc); | |
249 | + } | |
250 | ||
251 | put_pg_owner(pg_owner); | |
252 |