From e8792e4aaaf1f55759aafc5fd1cdb9d147e65b68 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jan=20R=C4=99korajski?= Date: Sun, 4 May 2014 11:48:29 +0200 Subject: [PATCH] - added fix for Bad page map BUGs in Xen PVM - rel 2 --- kernel-small_fixes.patch | 107 +++++++++++++++++++++++++++++++++++++++ kernel.spec | 2 +- 2 files changed, 108 insertions(+), 1 deletion(-) diff --git a/kernel-small_fixes.patch b/kernel-small_fixes.patch index 2b0f82db..740b2867 100644 --- a/kernel-small_fixes.patch +++ b/kernel-small_fixes.patch @@ -70,3 +70,110 @@ index 3b1ea34..eaa808e 100644 /* Ask for all the pages supported by this device */ result = scsi_vpd_inquiry(sdev, buf, 0, buf_len); if (result) + +David Vrabel identified a regression when using automatic NUMA balancing +under Xen whereby page table entries were getting corrupted due to the +use of native PTE operations. Quoting him + + Xen PV guest page tables require that their entries use machine + addresses if the preset bit (_PAGE_PRESENT) is set, and (for + successful migration) non-present PTEs must use pseudo-physical + addresses. This is because on migration MFNs in present PTEs are + translated to PFNs (canonicalised) so they may be translated back + to the new MFN in the destination domain (uncanonicalised). + + pte_mknonnuma(), pmd_mknonnuma(), pte_mknuma() and pmd_mknuma() + set and clear the _PAGE_PRESENT bit using pte_set_flags(), + pte_clear_flags(), etc. + + In a Xen PV guest, these functions must translate MFNs to PFNs + when clearing _PAGE_PRESENT and translate PFNs to MFNs when setting + _PAGE_PRESENT. + +His suggested fix converted p[te|md]_[set|clear]_flags to using +paravirt-friendly ops but this is overkill. He suggested an alternative of +using p[te|md]_modify in the NUMA page table operations but this is does +more work than necessary and would require looking up a VMA for protections. + +This patch modifies the NUMA page table operations to use paravirt friendly +operations to set/clear the flags of interest. Unfortunately this will take +a performance hit when updating the PTEs on CONFIG_PARAVIRT but I do not +see a way around it that does not break Xen. + +Cc: stable@vger.kernel.org +Signed-off-by: Mel Gorman +Acked-by: David Vrabel +Tested-by: David Vrabel +--- + include/asm-generic/pgtable.h | 31 +++++++++++++++++++++++-------- + 1 file changed, 23 insertions(+), 8 deletions(-) + +diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h +index 34c7bdc..38a7437 100644 +--- a/include/asm-generic/pgtable.h ++++ b/include/asm-generic/pgtable.h +@@ -680,24 +680,35 @@ static inline int pmd_numa(pmd_t pmd) + #ifndef pte_mknonnuma + static inline pte_t pte_mknonnuma(pte_t pte) + { +- pte = pte_clear_flags(pte, _PAGE_NUMA); +- return pte_set_flags(pte, _PAGE_PRESENT|_PAGE_ACCESSED); ++ pteval_t val = pte_val(pte); ++ ++ val &= ~_PAGE_NUMA; ++ val |= (_PAGE_PRESENT|_PAGE_ACCESSED); ++ return __pte(val); + } + #endif + + #ifndef pmd_mknonnuma + static inline pmd_t pmd_mknonnuma(pmd_t pmd) + { +- pmd = pmd_clear_flags(pmd, _PAGE_NUMA); +- return pmd_set_flags(pmd, _PAGE_PRESENT|_PAGE_ACCESSED); ++ pmdval_t val = pmd_val(pmd); ++ ++ val &= ~_PAGE_NUMA; ++ val |= (_PAGE_PRESENT|_PAGE_ACCESSED); ++ ++ return __pmd(val); + } + #endif + + #ifndef pte_mknuma + static inline pte_t pte_mknuma(pte_t pte) + { +- pte = pte_set_flags(pte, _PAGE_NUMA); +- return pte_clear_flags(pte, _PAGE_PRESENT); ++ pteval_t val = pte_val(pte); ++ ++ val &= ~_PAGE_PRESENT; ++ val |= _PAGE_NUMA; ++ ++ return __pte(val); + } + #endif + +@@ -716,8 +727,12 @@ static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr, + #ifndef pmd_mknuma + static inline pmd_t pmd_mknuma(pmd_t pmd) + { +- pmd = pmd_set_flags(pmd, _PAGE_NUMA); +- return pmd_clear_flags(pmd, _PAGE_PRESENT); ++ pmdval_t val = pmd_val(pmd); ++ ++ val &= ~_PAGE_PRESENT; ++ val |= _PAGE_NUMA; ++ ++ return __pmd(val); + } + #endif + +-- +1.8.4.5 + +-- +To unsubscribe from this list: send the line "unsubscribe linux-kernel" in +the body of a message to majordomo@vger.kernel.org +More majordomo info at http://vger.kernel.org/majordomo-info.html +Please read the FAQ at http://www.tux.org/lkml/ diff --git a/kernel.spec b/kernel.spec index 8280d0bd..e995f5ac 100644 --- a/kernel.spec +++ b/kernel.spec @@ -68,7 +68,7 @@ %define have_pcmcia 0 %endif -%define rel 1 +%define rel 2 %define basever 3.14 %define postver .2 -- 2.43.0