X-Git-Url: http://git.pld-linux.org/?a=blobdiff_plain;f=kernel-small_fixes.patch;h=dc2c31ff9b1c67f8176b7c034942e6a78f7a9b5b;hb=96f9c67f08faa78b1f5f4ad8404e371d811a1108;hp=70e90479752f388e49d143d5db82452551063ac6;hpb=28b2046740f32f01e3b8c623715bb9418de6cc9f;p=packages%2Fkernel.git

diff --git a/kernel-small_fixes.patch b/kernel-small_fixes.patch
index 70e90479..dc2c31ff 100644
--- a/kernel-small_fixes.patch
+++ b/kernel-small_fixes.patch
@@ -1,1196 +1,109 @@
---- linux-2.6.33/scripts/mod/modpost.c~	2010-02-24 19:52:17.000000000 +0100
-+++ linux-2.6.33/scripts/mod/modpost.c	2010-03-07 14:26:47.242168558 +0100
-@@ -15,7 +15,8 @@
- #include <stdio.h>
- #include <ctype.h>
- #include "modpost.h"
--#include "../../include/generated/autoconf.h"
-+// PLD architectures don't use CONFIG_SYMBOL_PREFIX
-+//#include "../../include/generated/autoconf.h"
- #include "../../include/linux/license.h"
- 
- /* Some toolchains use a `_' prefix for all user symbols. */
-
---- linux-3.0/scripts/kconfig/lxdialog/check-lxdialog.sh~	2011-07-22 04:17:23.000000000 +0200
-+++ linux-3.0/scripts/kconfig/lxdialog/check-lxdialog.sh	2011-08-25 21:26:04.799150642 +0200
-@@ -9,6 +9,12 @@
- 			$cc -print-file-name=lib${lib}.${ext} | grep -q /
- 			if [ $? -eq 0 ]; then
- 				echo "-l${lib}"
-+				for libt in tinfow tinfo ; do
-+					$cc -print-file-name=lib${libt}.${ext} | grep -q /
-+					if [ $? -eq 0 ]; then
-+						echo "-l${libt}"
-+					fi
-+				done
- 				exit
- 			fi
- 		done
-
-From 7a29ac474a47eb8cf212b45917683ae89d6fa13b Mon Sep 17 00:00:00 2001
-From: Chris Mason <clm@fb.com>
-Date: Tue, 10 Nov 2015 10:10:34 +1100
-Subject: xfs: give all workqueues rescuer threads
-
-We're consistently hitting deadlocks here with XFS on recent kernels.
-After some digging through the crash files, it looks like everyone in
-the system is waiting for XFS to reclaim memory.
-
-Something like this:
-
-PID: 2733434  TASK: ffff8808cd242800  CPU: 19  COMMAND: "java"
- #0 [ffff880019c53588] __schedule at ffffffff818c4df2
- #1 [ffff880019c535d8] schedule at ffffffff818c5517
- #2 [ffff880019c535f8] _xfs_log_force_lsn at ffffffff81316348
- #3 [ffff880019c53688] xfs_log_force_lsn at ffffffff813164fb
- #4 [ffff880019c536b8] xfs_iunpin_wait at ffffffff8130835e
- #5 [ffff880019c53728] xfs_reclaim_inode at ffffffff812fd453
- #6 [ffff880019c53778] xfs_reclaim_inodes_ag at ffffffff812fd8c7
- #7 [ffff880019c53928] xfs_reclaim_inodes_nr at ffffffff812fe433
- #8 [ffff880019c53958] xfs_fs_free_cached_objects at ffffffff8130d3b9
- #9 [ffff880019c53968] super_cache_scan at ffffffff811a6f73
-#10 [ffff880019c539c8] shrink_slab at ffffffff811460e6
-#11 [ffff880019c53aa8] shrink_zone at ffffffff8114a53f
-#12 [ffff880019c53b48] do_try_to_free_pages at ffffffff8114a8ba
-#13 [ffff880019c53be8] try_to_free_pages at ffffffff8114ad5a
-#14 [ffff880019c53c78] __alloc_pages_nodemask at ffffffff8113e1b8
-#15 [ffff880019c53d88] alloc_kmem_pages_node at ffffffff8113e671
-#16 [ffff880019c53dd8] copy_process at ffffffff8104f781
-#17 [ffff880019c53ec8] do_fork at ffffffff8105129c
-#18 [ffff880019c53f38] sys_clone at ffffffff810515b6
-#19 [ffff880019c53f48] stub_clone at ffffffff818c8e4d
-
-xfs_log_force_lsn is waiting for logs to get cleaned, which is waiting
-for IO, which is waiting for workers to complete the IO which is waiting
-for worker threads that don't exist yet:
-
-PID: 2752451  TASK: ffff880bd6bdda00  CPU: 37  COMMAND: "kworker/37:1"
- #0 [ffff8808d20abbb0] __schedule at ffffffff818c4df2
- #1 [ffff8808d20abc00] schedule at ffffffff818c5517
- #2 [ffff8808d20abc20] schedule_timeout at ffffffff818c7c6c
- #3 [ffff8808d20abcc0] wait_for_completion_killable at ffffffff818c6495
- #4 [ffff8808d20abd30] kthread_create_on_node at ffffffff8106ec82
- #5 [ffff8808d20abdf0] create_worker at ffffffff8106752f
- #6 [ffff8808d20abe40] worker_thread at ffffffff810699be
- #7 [ffff8808d20abec0] kthread at ffffffff8106ef59
- #8 [ffff8808d20abf50] ret_from_fork at ffffffff818c8ac8
-
-I think we should be using WQ_MEM_RECLAIM to make sure this thread
-pool makes progress when we're not able to allocate new workers.
-
-[dchinner: make all workqueues WQ_MEM_RECLAIM]
-
-Signed-off-by: Chris Mason <clm@fb.com>
-Reviewed-by: Dave Chinner <dchinner@redhat.com>
-Signed-off-by: Dave Chinner <david@fromorbit.com>
----
- fs/xfs/xfs_super.c | 7 ++++---
- 1 file changed, 4 insertions(+), 3 deletions(-)
-
-diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
-index 29531ec..65fbfb7 100644
---- a/fs/xfs/xfs_super.c
-+++ b/fs/xfs/xfs_super.c
-@@ -838,17 +838,18 @@ xfs_init_mount_workqueues(
- 		goto out_destroy_unwritten;
- 
- 	mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
--			WQ_FREEZABLE, 0, mp->m_fsname);
-+			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
- 	if (!mp->m_reclaim_workqueue)
- 		goto out_destroy_cil;
- 
- 	mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
--			WQ_FREEZABLE|WQ_HIGHPRI, 0, mp->m_fsname);
-+			WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0,
-+			mp->m_fsname);
- 	if (!mp->m_log_workqueue)
- 		goto out_destroy_reclaim;
- 
- 	mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
--			WQ_FREEZABLE, 0, mp->m_fsname);
-+			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
- 	if (!mp->m_eofblocks_workqueue)
- 		goto out_destroy_log;
- 
--- 
-cgit v0.11.2
-
-commit c2d42c16ad83006a706d83e51a7268db04af733a
-Author: Andrew Morton <akpm@linux-foundation.org>
-Date:   Thu Nov 5 18:48:43 2015 -0800
-
-    mm/vmstat.c: uninline node_page_state()
-    
-    With x86_64 (config http://ozlabs.org/~akpm/config-akpm2.txt) and old gcc
-    (4.4.4), drivers/base/node.c:node_read_meminfo() is using 2344 bytes of
-    stack.  Uninlining node_page_state() reduces this to 440 bytes.
-    
-    The stack consumption issue is fixed by newer gcc (4.8.4) however with
-    that compiler this patch reduces the node.o text size from 7314 bytes to
-    4578.
-    
-    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-
-diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
-index 82e7db7..49dfe40 100644
---- a/include/linux/vmstat.h
-+++ b/include/linux/vmstat.h
-@@ -161,30 +161,8 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
- }
- 
- #ifdef CONFIG_NUMA
--/*
-- * Determine the per node value of a stat item. This function
-- * is called frequently in a NUMA machine, so try to be as
-- * frugal as possible.
-- */
--static inline unsigned long node_page_state(int node,
--				 enum zone_stat_item item)
+; https://lkml.org/lkml/2019/7/10/244
+diff -ur linux-5.3/drivers/scsi/aacraid.org/aacraid.h linux-5.3/drivers/scsi/aacraid/aacraid.h
+--- linux-5.3/drivers/scsi/aacraid.org/aacraid.h	2019-11-01 22:42:37.011469816 +0100
++++ linux-5.3/drivers/scsi/aacraid/aacraid.h	2019-11-04 09:29:51.321486211 +0100
+@@ -2740,17 +2740,6 @@
+ int aac_rx_deliver_producer(struct fib * fib);
+ void aac_reinit_aif(struct aac_dev *aac, unsigned int index);
+ 
+-static inline int aac_is_src(struct aac_dev *dev)
 -{
--	struct zone *zones = NODE_DATA(node)->node_zones;
+-	u16 device = dev->pdev->device;
 -
--	return
--#ifdef CONFIG_ZONE_DMA
--		zone_page_state(&zones[ZONE_DMA], item) +
--#endif
--#ifdef CONFIG_ZONE_DMA32
--		zone_page_state(&zones[ZONE_DMA32], item) +
--#endif
--#ifdef CONFIG_HIGHMEM
--		zone_page_state(&zones[ZONE_HIGHMEM], item) +
--#endif
--		zone_page_state(&zones[ZONE_NORMAL], item) +
--		zone_page_state(&zones[ZONE_MOVABLE], item);
--}
- 
-+extern unsigned long node_page_state(int node, enum zone_stat_item item);
- extern void zone_statistics(struct zone *, struct zone *, gfp_t gfp);
- 
- #else
-diff --git a/mm/vmstat.c b/mm/vmstat.c
-index fbf1448..ffcb4f5 100644
---- a/mm/vmstat.c
-+++ b/mm/vmstat.c
-@@ -591,6 +591,28 @@ void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags)
- 	else
- 		__inc_zone_state(z, NUMA_OTHER);
- }
-+
-+/*
-+ * Determine the per node value of a stat item.
-+ */
-+unsigned long node_page_state(int node, enum zone_stat_item item)
-+{
-+	struct zone *zones = NODE_DATA(node)->node_zones;
-+
-+	return
-+#ifdef CONFIG_ZONE_DMA
-+		zone_page_state(&zones[ZONE_DMA], item) +
-+#endif
-+#ifdef CONFIG_ZONE_DMA32
-+		zone_page_state(&zones[ZONE_DMA32], item) +
-+#endif
-+#ifdef CONFIG_HIGHMEM
-+		zone_page_state(&zones[ZONE_HIGHMEM], item) +
-+#endif
-+		zone_page_state(&zones[ZONE_NORMAL], item) +
-+		zone_page_state(&zones[ZONE_MOVABLE], item);
-+}
-+
- #endif
- 
- #ifdef CONFIG_COMPACTION
-commit 016c13daa5c9e4827eca703e2f0621c131f2cca3
-Author: Mel Gorman <mgorman@techsingularity.net>
-Date:   Fri Nov 6 16:28:18 2015 -0800
-
-    mm, page_alloc: use masks and shifts when converting GFP flags to migrate types
-    
-    This patch redefines which GFP bits are used for specifying mobility and
-    the order of the migrate types.  Once redefined it's possible to convert
-    GFP flags to a migrate type with a simple mask and shift.  The only
-    downside is that readers of OOM kill messages and allocation failures may
-    have been used to the existing values but scripts/gfp-translate will help.
-    
-    Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
-    Acked-by: Vlastimil Babka <vbabka@suse.cz>
-    Cc: Christoph Lameter <cl@linux.com>
-    Cc: David Rientjes <rientjes@google.com>
-    Cc: Johannes Weiner <hannes@cmpxchg.org>
-    Cc: Michal Hocko <mhocko@suse.com>
-    Cc: Vitaly Wool <vitalywool@gmail.com>
-    Cc: Rik van Riel <riel@redhat.com>
-    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-
-diff --git a/include/linux/gfp.h b/include/linux/gfp.h
-index f92cbd2..440fca3 100644
---- a/include/linux/gfp.h
-+++ b/include/linux/gfp.h
-@@ -14,7 +14,7 @@ struct vm_area_struct;
- #define ___GFP_HIGHMEM		0x02u
- #define ___GFP_DMA32		0x04u
- #define ___GFP_MOVABLE		0x08u
--#define ___GFP_WAIT		0x10u
-+#define ___GFP_RECLAIMABLE	0x10u
- #define ___GFP_HIGH		0x20u
- #define ___GFP_IO		0x40u
- #define ___GFP_FS		0x80u
-@@ -29,7 +29,7 @@ struct vm_area_struct;
- #define ___GFP_NOMEMALLOC	0x10000u
- #define ___GFP_HARDWALL		0x20000u
- #define ___GFP_THISNODE		0x40000u
--#define ___GFP_RECLAIMABLE	0x80000u
-+#define ___GFP_WAIT		0x80000u
- #define ___GFP_NOACCOUNT	0x100000u
- #define ___GFP_NOTRACK		0x200000u
- #define ___GFP_NO_KSWAPD	0x400000u
-@@ -126,6 +126,7 @@ struct vm_area_struct;
- 
- /* This mask makes up all the page movable related flags */
- #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
-+#define GFP_MOVABLE_SHIFT 3
- 
- /* Control page allocator reclaim behavior */
- #define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
-@@ -152,14 +153,15 @@ struct vm_area_struct;
- /* Convert GFP flags to their corresponding migrate type */
- static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
- {
--	WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
-+	VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
-+	BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE);
-+	BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE);
- 
- 	if (unlikely(page_group_by_mobility_disabled))
- 		return MIGRATE_UNMOVABLE;
- 
- 	/* Group based on mobility */
--	return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
--		((gfp_flags & __GFP_RECLAIMABLE) != 0);
-+	return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
- }
- 
- #ifdef CONFIG_HIGHMEM
-diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
-index e326843..38bed71 100644
---- a/include/linux/mmzone.h
-+++ b/include/linux/mmzone.h
-@@ -37,8 +37,8 @@
- 
- enum {
- 	MIGRATE_UNMOVABLE,
--	MIGRATE_RECLAIMABLE,
- 	MIGRATE_MOVABLE,
-+	MIGRATE_RECLAIMABLE,
- 	MIGRATE_PCPTYPES,	/* the number of types on the pcp lists */
- 	MIGRATE_RESERVE = MIGRATE_PCPTYPES,
- #ifdef CONFIG_CMA
-commit 974a786e63c96a2401a78ddba926f34c128474f1
-Author: Mel Gorman <mgorman@techsingularity.net>
-Date:   Fri Nov 6 16:28:34 2015 -0800
-
-    mm, page_alloc: remove MIGRATE_RESERVE
-    
-    MIGRATE_RESERVE preserves an old property of the buddy allocator that
-    existed prior to fragmentation avoidance -- min_free_kbytes worth of pages
-    tended to remain contiguous until the only alternative was to fail the
-    allocation.  At the time it was discovered that high-order atomic
-    allocations relied on this property so MIGRATE_RESERVE was introduced.  A
-    later patch will introduce an alternative MIGRATE_HIGHATOMIC so this patch
-    deletes MIGRATE_RESERVE and supporting code so it'll be easier to review.
-    Note that this patch in isolation may look like a false regression if
-    someone was bisecting high-order atomic allocation failures.
-    
-    Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
-    Acked-by: Vlastimil Babka <vbabka@suse.cz>
-    Cc: Christoph Lameter <cl@linux.com>
-    Cc: David Rientjes <rientjes@google.com>
-    Cc: Johannes Weiner <hannes@cmpxchg.org>
-    Cc: Michal Hocko <mhocko@suse.com>
-    Cc: Vitaly Wool <vitalywool@gmail.com>
-    Cc: Rik van Riel <riel@redhat.com>
-    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-
-diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
-index 1e88aae..b86cfa3 100644
---- a/include/linux/mmzone.h
-+++ b/include/linux/mmzone.h
-@@ -39,8 +39,6 @@ enum {
- 	MIGRATE_UNMOVABLE,
- 	MIGRATE_MOVABLE,
- 	MIGRATE_RECLAIMABLE,
--	MIGRATE_PCPTYPES,	/* the number of types on the pcp lists */
--	MIGRATE_RESERVE = MIGRATE_PCPTYPES,
- #ifdef CONFIG_CMA
- 	/*
- 	 * MIGRATE_CMA migration type is designed to mimic the way
-@@ -63,6 +61,8 @@ enum {
- 	MIGRATE_TYPES
- };
- 
-+#define MIGRATE_PCPTYPES (MIGRATE_RECLAIMABLE+1)
-+
- #ifdef CONFIG_CMA
- #  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
- #else
-@@ -429,12 +429,6 @@ struct zone {
- 
- 	const char		*name;
- 
--	/*
--	 * Number of MIGRATE_RESERVE page block. To maintain for just
--	 * optimization. Protected by zone->lock.
--	 */
--	int			nr_migrate_reserve_block;
--
- #ifdef CONFIG_MEMORY_ISOLATION
- 	/*
- 	 * Number of isolated pageblock. It is used to solve incorrect
-diff --git a/mm/huge_memory.c b/mm/huge_memory.c
-index 9812d46..dabd247 100644
---- a/mm/huge_memory.c
-+++ b/mm/huge_memory.c
-@@ -116,7 +116,7 @@ static void set_recommended_min_free_kbytes(void)
- 	for_each_populated_zone(zone)
- 		nr_zones++;
- 
--	/* Make sure at least 2 hugepages are free for MIGRATE_RESERVE */
-+	/* Ensure 2 pageblocks are free to assist fragmentation avoidance */
- 	recommended_min = pageblock_nr_pages * nr_zones * 2;
- 
- 	/*
-diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index 8dc6e3c..5888126 100644
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -817,7 +817,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
- 			if (unlikely(has_isolate_pageblock(zone)))
- 				mt = get_pageblock_migratetype(page);
- 
--			/* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
- 			__free_one_page(page, page_to_pfn(page), zone, 0, mt);
- 			trace_mm_page_pcpu_drain(page, 0, mt);
- 		} while (--to_free && --batch_free && !list_empty(list));
-@@ -1417,15 +1416,14 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
-  * the free lists for the desirable migrate type are depleted
-  */
- static int fallbacks[MIGRATE_TYPES][4] = {
--	[MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,     MIGRATE_RESERVE },
--	[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,     MIGRATE_RESERVE },
--	[MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE,   MIGRATE_RESERVE },
-+	[MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,   MIGRATE_TYPES },
-+	[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,   MIGRATE_TYPES },
-+	[MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES },
- #ifdef CONFIG_CMA
--	[MIGRATE_CMA]         = { MIGRATE_RESERVE }, /* Never used */
-+	[MIGRATE_CMA]         = { MIGRATE_TYPES }, /* Never used */
- #endif
--	[MIGRATE_RESERVE]     = { MIGRATE_RESERVE }, /* Never used */
- #ifdef CONFIG_MEMORY_ISOLATION
--	[MIGRATE_ISOLATE]     = { MIGRATE_RESERVE }, /* Never used */
-+	[MIGRATE_ISOLATE]     = { MIGRATE_TYPES }, /* Never used */
- #endif
- };
- 
-@@ -1598,7 +1596,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
- 	*can_steal = false;
- 	for (i = 0;; i++) {
- 		fallback_mt = fallbacks[migratetype][i];
--		if (fallback_mt == MIGRATE_RESERVE)
-+		if (fallback_mt == MIGRATE_TYPES)
- 			break;
- 
- 		if (list_empty(&area->free_list[fallback_mt]))
-@@ -1676,25 +1674,13 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order,
- {
- 	struct page *page;
- 
--retry_reserve:
- 	page = __rmqueue_smallest(zone, order, migratetype);
--
--	if (unlikely(!page) && migratetype != MIGRATE_RESERVE) {
-+	if (unlikely(!page)) {
- 		if (migratetype == MIGRATE_MOVABLE)
- 			page = __rmqueue_cma_fallback(zone, order);
- 
- 		if (!page)
- 			page = __rmqueue_fallback(zone, order, migratetype);
--
--		/*
--		 * Use MIGRATE_RESERVE rather than fail an allocation. goto
--		 * is used because __rmqueue_smallest is an inline function
--		 * and we want just one call site
--		 */
--		if (!page) {
--			migratetype = MIGRATE_RESERVE;
--			goto retry_reserve;
--		}
- 	}
- 
- 	trace_mm_page_alloc_zone_locked(page, order, migratetype);
-@@ -3492,7 +3478,6 @@ static void show_migration_types(unsigned char type)
- 		[MIGRATE_UNMOVABLE]	= 'U',
- 		[MIGRATE_RECLAIMABLE]	= 'E',
- 		[MIGRATE_MOVABLE]	= 'M',
--		[MIGRATE_RESERVE]	= 'R',
- #ifdef CONFIG_CMA
- 		[MIGRATE_CMA]		= 'C',
- #endif
-@@ -4303,120 +4288,6 @@ static inline unsigned long wait_table_bits(unsigned long size)
- }
- 
- /*
-- * Check if a pageblock contains reserved pages
-- */
--static int pageblock_is_reserved(unsigned long start_pfn, unsigned long end_pfn)
--{
--	unsigned long pfn;
--
--	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
--		if (!pfn_valid_within(pfn) || PageReserved(pfn_to_page(pfn)))
--			return 1;
--	}
+-	if (device == PMC_DEVICE_S6 ||
+-		device == PMC_DEVICE_S7 ||
+-		device == PMC_DEVICE_S8)
+-		return 1;
 -	return 0;
 -}
 -
--/*
-- * Mark a number of pageblocks as MIGRATE_RESERVE. The number
-- * of blocks reserved is based on min_wmark_pages(zone). The memory within
-- * the reserve will tend to store contiguous free pages. Setting min_free_kbytes
-- * higher will lead to a bigger reserve which will get freed as contiguous
-- * blocks as reclaim kicks in
-- */
--static void setup_zone_migrate_reserve(struct zone *zone)
--{
--	unsigned long start_pfn, pfn, end_pfn, block_end_pfn;
--	struct page *page;
--	unsigned long block_migratetype;
--	int reserve;
--	int old_reserve;
--
--	/*
--	 * Get the start pfn, end pfn and the number of blocks to reserve
--	 * We have to be careful to be aligned to pageblock_nr_pages to
--	 * make sure that we always check pfn_valid for the first page in
--	 * the block.
--	 */
--	start_pfn = zone->zone_start_pfn;
--	end_pfn = zone_end_pfn(zone);
--	start_pfn = roundup(start_pfn, pageblock_nr_pages);
--	reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
--							pageblock_order;
--
--	/*
--	 * Reserve blocks are generally in place to help high-order atomic
--	 * allocations that are short-lived. A min_free_kbytes value that
--	 * would result in more than 2 reserve blocks for atomic allocations
--	 * is assumed to be in place to help anti-fragmentation for the
--	 * future allocation of hugepages at runtime.
--	 */
--	reserve = min(2, reserve);
--	old_reserve = zone->nr_migrate_reserve_block;
--
--	/* When memory hot-add, we almost always need to do nothing */
--	if (reserve == old_reserve)
--		return;
--	zone->nr_migrate_reserve_block = reserve;
--
--	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
--		if (!early_page_nid_uninitialised(pfn, zone_to_nid(zone)))
--			return;
--
--		if (!pfn_valid(pfn))
--			continue;
--		page = pfn_to_page(pfn);
--
--		/* Watch out for overlapping nodes */
--		if (page_to_nid(page) != zone_to_nid(zone))
--			continue;
--
--		block_migratetype = get_pageblock_migratetype(page);
--
--		/* Only test what is necessary when the reserves are not met */
--		if (reserve > 0) {
--			/*
--			 * Blocks with reserved pages will never free, skip
--			 * them.
--			 */
--			block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
--			if (pageblock_is_reserved(pfn, block_end_pfn))
--				continue;
--
--			/* If this block is reserved, account for it */
--			if (block_migratetype == MIGRATE_RESERVE) {
--				reserve--;
--				continue;
--			}
--
--			/* Suitable for reserving if this block is movable */
--			if (block_migratetype == MIGRATE_MOVABLE) {
--				set_pageblock_migratetype(page,
--							MIGRATE_RESERVE);
--				move_freepages_block(zone, page,
--							MIGRATE_RESERVE);
--				reserve--;
--				continue;
--			}
--		} else if (!old_reserve) {
--			/*
--			 * At boot time we don't need to scan the whole zone
--			 * for turning off MIGRATE_RESERVE.
--			 */
--			break;
--		}
--
--		/*
--		 * If the reserve is met and this is a previous reserved block,
--		 * take it back
--		 */
--		if (block_migratetype == MIGRATE_RESERVE) {
--			set_pageblock_migratetype(page, MIGRATE_MOVABLE);
--			move_freepages_block(zone, page, MIGRATE_MOVABLE);
--		}
--	}
--}
--
--/*
-  * Initially all pages are reserved - free ones are freed
-  * up by free_all_bootmem() once the early boot process is
-  * done. Non-atomic initialization, single-pass.
-@@ -4455,9 +4326,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
- 		 * movable at startup. This will force kernel allocations
- 		 * to reserve their blocks rather than leaking throughout
- 		 * the address space during boot when many long-lived
--		 * kernel allocations are made. Later some blocks near
--		 * the start are marked MIGRATE_RESERVE by
--		 * setup_zone_migrate_reserve()
-+		 * kernel allocations are made.
- 		 *
- 		 * bitmap is created for zone's valid pfn range. but memmap
- 		 * can be created for invalid pages (for alignment)
-@@ -6018,7 +5887,6 @@ static void __setup_per_zone_wmarks(void)
- 			high_wmark_pages(zone) - low_wmark_pages(zone) -
- 			atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
- 
--		setup_zone_migrate_reserve(zone);
- 		spin_unlock_irqrestore(&zone->lock, flags);
- 	}
- 
-diff --git a/mm/vmstat.c b/mm/vmstat.c
-index ffcb4f5..5b289dc 100644
---- a/mm/vmstat.c
-+++ b/mm/vmstat.c
-@@ -923,7 +923,6 @@ static char * const migratetype_names[MIGRATE_TYPES] = {
- 	"Unmovable",
- 	"Reclaimable",
- 	"Movable",
--	"Reserve",
- #ifdef CONFIG_CMA
- 	"CMA",
- #endif
-diff --git a/mm/backing-dev.c b/mm/backing-dev.c
-index 8ed2ffd963c5..7340353f8aea 100644
---- a/mm/backing-dev.c
-+++ b/mm/backing-dev.c
-@@ -957,8 +957,9 @@ EXPORT_SYMBOL(congestion_wait);
-  * jiffies for either a BDI to exit congestion of the given @sync queue
-  * or a write to complete.
-  *
-- * In the absence of zone congestion, cond_resched() is called to yield
-- * the processor if necessary but otherwise does not sleep.
-+ * In the absence of zone congestion, a short sleep or a cond_resched is
-+ * performed to yield the processor and to allow other subsystems to make
-+ * a forward progress.
-  *
-  * The return value is 0 if the sleep is for the full timeout. Otherwise,
-  * it is the number of jiffies that were still remaining when the function
-@@ -978,7 +979,19 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
- 	 */
- 	if (atomic_read(&nr_wb_congested[sync]) == 0 ||
- 	    !test_bit(ZONE_CONGESTED, &zone->flags)) {
--		cond_resched();
-+
-+		/*
-+		 * Memory allocation/reclaim might be called from a WQ
-+		 * context and the current implementation of the WQ
-+		 * concurrency control doesn't recognize that a particular
-+		 * WQ is congested if the worker thread is looping without
-+		 * ever sleeping. Therefore we have to do a short sleep
-+		 * here rather than calling cond_resched().
-+		 */
-+		if (current->flags & PF_WQ_WORKER)
-+			schedule_timeout(1);
-+		else
-+			cond_resched();
- 
- 		/* In case we scheduled, work out time remaining */
- 		ret = timeout - (jiffies - start);
-diff --git a/mm/vmstat.c b/mm/vmstat.c
-index 45dcbcb5c594..0975da8e3432 100644
---- a/mm/vmstat.c
-+++ b/mm/vmstat.c
-@@ -1381,6 +1381,7 @@ static const struct file_operations proc_vmstat_file_operations = {
- #endif /* CONFIG_PROC_FS */
- 
- #ifdef CONFIG_SMP
-+static struct workqueue_struct *vmstat_wq;
- static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
- int sysctl_stat_interval __read_mostly = HZ;
- static cpumask_var_t cpu_stat_off;
-@@ -1393,7 +1394,7 @@ static void vmstat_update(struct work_struct *w)
- 		 * to occur in the future. Keep on running the
- 		 * update worker thread.
- 		 */
--		schedule_delayed_work_on(smp_processor_id(),
-+		queue_delayed_work_on(smp_processor_id(), vmstat_wq,
- 			this_cpu_ptr(&vmstat_work),
- 			round_jiffies_relative(sysctl_stat_interval));
- 	} else {
-@@ -1462,7 +1463,7 @@ static void vmstat_shepherd(struct work_struct *w)
- 		if (need_update(cpu) &&
- 			cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
- 
--			schedule_delayed_work_on(cpu,
-+			queue_delayed_work_on(cpu, vmstat_wq,
- 				&per_cpu(vmstat_work, cpu), 0);
- 
- 	put_online_cpus();
-@@ -1551,6 +1552,7 @@ static int __init setup_vmstat(void)
- 
- 	start_shepherd_timer();
- 	cpu_notifier_register_done();
-+	vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
- #endif
- #ifdef CONFIG_PROC_FS
- 	proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
--- 
-2.6.2
-
-From 09ccfd238e5a0e670d8178cf50180ea81ae09ae1 Mon Sep 17 00:00:00 2001
-From: WANG Cong <xiyou.wangcong@gmail.com>
-Date: Mon, 14 Dec 2015 13:48:36 -0800
-Subject: pptp: verify sockaddr_len in pptp_bind() and pptp_connect()
-
-Reported-by: Dmitry Vyukov <dvyukov@gmail.com>
-Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
----
- drivers/net/ppp/pptp.c | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
-index fc69e41..597c53e 100644
---- a/drivers/net/ppp/pptp.c
-+++ b/drivers/net/ppp/pptp.c
-@@ -419,6 +419,9 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr,
- 	struct pptp_opt *opt = &po->proto.pptp;
- 	int error = 0;
- 
-+	if (sockaddr_len < sizeof(struct sockaddr_pppox))
-+		return -EINVAL;
-+
- 	lock_sock(sk);
- 
- 	opt->src_addr = sp->sa_addr.pptp;
-@@ -440,6 +443,9 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr,
- 	struct flowi4 fl4;
- 	int error = 0;
- 
-+	if (sockaddr_len < sizeof(struct sockaddr_pppox))
-+		return -EINVAL;
-+
- 	if (sp->sa_protocol != PX_PROTO_PPTP)
- 		return -EINVAL;
- 
--- 
-cgit v0.11.2
-
-commit cc57858831e3e9678291de730c4b4d2e52a19f59
-Author: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
-Date:   Fri Dec 18 15:19:16 2015 +1100
-
-    md/raid10: fix data corruption and crash during resync
-    
-    The commit c31df25f20e3 ("md/raid10: make sync_request_write() call
-    bio_copy_data()") replaced manual data copying with bio_copy_data() but
-    it doesn't work as intended. The source bio (fbio) is already processed,
-    so its bvec_iter has bi_size == 0 and bi_idx == bi_vcnt.  Because of
-    this, bio_copy_data() either does not copy anything, or worse, copies
-    data from the ->bi_next bio if it is set.  This causes wrong data to be
-    written to drives during resync and sometimes lockups/crashes in
-    bio_copy_data():
-    
-    [  517.338478] NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [md126_raid10:3319]
-    [  517.347324] Modules linked in: raid10 xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 ipt_REJECT nf_reject_ipv4 xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables x86_pkg_temp_thermal coretemp kvm_intel kvm crct10dif_pclmul crc32_pclmul cryptd shpchp pcspkr ipmi_si ipmi_msghandler tpm_crb acpi_power_meter acpi_cpufreq ext4 mbcache jbd2 sr_mod cdrom sd_mod e1000e ax88179_178a usbnet mii ahci ata_generic crc32c_intel libahci ptp pata_acpi libata pps_core wmi sunrpc dm_mirror dm_region_hash dm_log dm_mod
-    [  517.440555] CPU: 0 PID: 3319 Comm: md126_raid10 Not tainted 4.3.0-rc6+ #1
-    [  517.448384] Hardware name: Intel Corporation PURLEY/PURLEY, BIOS PLYDCRB1.86B.0055.D14.1509221924 09/22/2015
-    [  517.459768] task: ffff880153773980 ti: ffff880150df8000 task.ti: ffff880150df8000
-    [  517.468529] RIP: 0010:[<ffffffff812e1888>]  [<ffffffff812e1888>] bio_copy_data+0xc8/0x3c0
-    [  517.478164] RSP: 0018:ffff880150dfbc98  EFLAGS: 00000246
-    [  517.484341] RAX: ffff880169356688 RBX: 0000000000001000 RCX: 0000000000000000
-    [  517.492558] RDX: 0000000000000000 RSI: ffffea0001ac2980 RDI: ffffea0000d835c0
-    [  517.500773] RBP: ffff880150dfbd08 R08: 0000000000000001 R09: ffff880153773980
-    [  517.508987] R10: ffff880169356600 R11: 0000000000001000 R12: 0000000000010000
-    [  517.517199] R13: 000000000000e000 R14: 0000000000000000 R15: 0000000000001000
-    [  517.525412] FS:  0000000000000000(0000) GS:ffff880174a00000(0000) knlGS:0000000000000000
-    [  517.534844] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
-    [  517.541507] CR2: 00007f8a044d5fed CR3: 0000000169504000 CR4: 00000000001406f0
-    [  517.549722] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
-    [  517.557929] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
-    [  517.566144] Stack:
-    [  517.568626]  ffff880174a16bc0 ffff880153773980 ffff880169356600 0000000000000000
-    [  517.577659]  0000000000000001 0000000000000001 ffff880153773980 ffff88016a61a800
-    [  517.586715]  ffff880150dfbcf8 0000000000000001 ffff88016dd209e0 0000000000001000
-    [  517.595773] Call Trace:
-    [  517.598747]  [<ffffffffa043ef95>] raid10d+0xfc5/0x1690 [raid10]
-    [  517.605610]  [<ffffffff816697ae>] ? __schedule+0x29e/0x8e2
-    [  517.611987]  [<ffffffff814ff206>] md_thread+0x106/0x140
-    [  517.618072]  [<ffffffff810c1d80>] ? wait_woken+0x80/0x80
-    [  517.624252]  [<ffffffff814ff100>] ? super_1_load+0x520/0x520
-    [  517.630817]  [<ffffffff8109ef89>] kthread+0xc9/0xe0
-    [  517.636506]  [<ffffffff8109eec0>] ? flush_kthread_worker+0x70/0x70
-    [  517.643653]  [<ffffffff8166d99f>] ret_from_fork+0x3f/0x70
-    [  517.649929]  [<ffffffff8109eec0>] ? flush_kthread_worker+0x70/0x70
-    
-    Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
-    Reviewed-by: Shaohua Li <shli@kernel.org>
-    Cc: stable@vger.kernel.org (v4.2+)
-    Fixes: c31df25f20e3 ("md/raid10: make sync_request_write() call bio_copy_data()")
-    Signed-off-by: NeilBrown <neilb@suse.com>
-
-diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
-index 41d70bc..84e597e 100644
---- a/drivers/md/raid10.c
-+++ b/drivers/md/raid10.c
-@@ -1946,6 +1946,8 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
- 
- 	first = i;
- 	fbio = r10_bio->devs[i].bio;
-+	fbio->bi_iter.bi_size = r10_bio->sectors << 9;
-+	fbio->bi_iter.bi_idx = 0;
- 
- 	vcnt = (r10_bio->sectors + (PAGE_SIZE >> 9) - 1) >> (PAGE_SHIFT - 9);
- 	/* now find blocks with errors */
-@@ -1989,7 +1991,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
- 		bio_reset(tbio);
- 
- 		tbio->bi_vcnt = vcnt;
--		tbio->bi_iter.bi_size = r10_bio->sectors << 9;
-+		tbio->bi_iter.bi_size = fbio->bi_iter.bi_size;
- 		tbio->bi_rw = WRITE;
- 		tbio->bi_private = r10_bio;
- 		tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
-From: Michal Hocko <mhocko@suse.com>
-
-kernel test robot has reported the following crash:
-[    3.870718] BUG: unable to handle kernel NULL pointer dereferenceNULL pointer dereference at 00000100
- at 00000100
-[    3.872615] IP: [<c1074df6>] __queue_work+0x26/0x390 [<c1074df6>] __queue_work+0x26/0x390
-[    3.873758] *pdpt = 0000000000000000 *pde = f000ff53f000ff53 *pde = f000ff53f000ff53
-[    3.875096] Oops: 0000 [#1] PREEMPT PREEMPT SMP SMP
-[    3.876130] CPU: 0 PID: 24 Comm: kworker/0:1 Not tainted 4.4.0-rc4-00139-g373ccbe #1
-[    3.878135] Workqueue: events vmstat_shepherd
-[    3.879207] task: cb684600 ti: cb7ba000 task.ti: cb7ba000
-[    3.880445] EIP: 0060:[<c1074df6>] EFLAGS: 00010046 CPU: 0
-[    3.881704] EIP is at __queue_work+0x26/0x390
-[    3.882823] EAX: 00000046 EBX: cbb37800 ECX: cbb37800 EDX: 00000000
-[    3.884457] ESI: 00000000 EDI: 00000000 EBP: cb7bbe68 ESP: cb7bbe38
-[    3.886005]  DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
-[    3.887229] CR0: 8005003b CR2: 00000100 CR3: 01fd5000 CR4: 000006b0
-[    3.888663] Stack:
-[    3.895204] Call Trace:
-[    3.895854]  [<c1a381dd>] ? mutex_unlock+0xd/0x10
-[    3.897120]  [<c1075221>] __queue_delayed_work+0xa1/0x160
-[    3.898530]  [<c10764c6>] queue_delayed_work_on+0x36/0x60
-[    3.899790]  [<c11494bd>] vmstat_shepherd+0xad/0xf0
-[    3.900899]  [<c1075a7a>] process_one_work+0x1aa/0x4c0
-[    3.902093]  [<c10759e2>] ? process_one_work+0x112/0x4c0
-[    3.903520]  [<c10ac31e>] ? do_raw_spin_lock+0xe/0x150
-[    3.904853]  [<c1075dd1>] worker_thread+0x41/0x440
-[    3.906023]  [<c1075d90>] ? process_one_work+0x4c0/0x4c0
-[    3.907242]  [<c107b7c0>] kthread+0xb0/0xd0
-[    3.908188]  [<c1a3c651>] ret_from_kernel_thread+0x21/0x40
-[    3.909601]  [<c107b710>] ? __kthread_parkme+0x80/0x80
-
-The reason is that start_shepherd_timer schedules the shepherd work item
-which uses vmstat_wq (vmstat_shepherd) before setup_vmstat allocates
-that workqueue so if the further initialization takes more than HZ
-we might end up scheduling on a NULL vmstat_wq. This is really unlikely
-but not impossible.
-
-Fixes: 373ccbe59270 ("mm, vmstat: allow WQ concurrency to discover memory reclaim doesn't make any progress")
-Reported-by: kernel test robot <ying.huang@linux.intel.com>
-Signed-off-by: Michal Hocko <mhocko@suse.com>
----
-Hi Linus,
-I am not marking this for stable because I hope we can sneak it into 4.4.
-The patch is trivial and obvious. I am sorry about the breakage. If you prefer 
-to postpone it to 4.5-rc1 because this is not really that critical and shouldn't
-happen most of the time then I will repost with stable tag added.
-
-Thanks!
-
- mm/vmstat.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/mm/vmstat.c b/mm/vmstat.c
-index 4ebc17d948cb..c54fd2924f25 100644
---- a/mm/vmstat.c
-+++ b/mm/vmstat.c
-@@ -1483,6 +1483,7 @@ static void __init start_shepherd_timer(void)
- 		BUG();
- 	cpumask_copy(cpu_stat_off, cpu_online_mask);
- 
-+	vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
- 	schedule_delayed_work(&shepherd,
- 		round_jiffies_relative(sysctl_stat_interval));
- }
-@@ -1550,7 +1551,6 @@ static int __init setup_vmstat(void)
- 
- 	start_shepherd_timer();
- 	cpu_notifier_register_done();
--	vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
- #endif
- #ifdef CONFIG_PROC_FS
- 	proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
--- 
-2.6.4
-
-From: Dave Chinner <dchinner@redhat.com>
-
-When we do inode readahead in log recovery, we do can do the
-readahead before we've replayed the icreate transaction that stamps
-the buffer with inode cores. The inode readahead verifier catches
-this and marks the buffer as !done to indicate that it doesn't yet
-contain valid inodes.
-
-In adding buffer error notification  (i.e. setting b_error = -EIO at
-the same time as as we clear the done flag) to such a readahead
-verifier failure, we can then get subsequent inode recovery failing
-with this error:
-
-XFS (dm-0): metadata I/O error: block 0xa00060 ("xlog_recover_do..(read#2)") error 5 numblks 32
-
-This occurs when readahead completion races with icreate item replay
-such as:
-
-	inode readahead
-		find buffer
-		lock buffer
-		submit RA io
-	....
-	icreate recovery
-	    xfs_trans_get_buffer
-		find buffer
-		lock buffer
-		<blocks on RA completion>
-	.....
-	<ra completion>
-		fails verifier
-		clear XBF_DONE
-		set bp->b_error = -EIO
-		release and unlock buffer
-	<icreate gains lock>
-	icreate initialises buffer
-	marks buffer as done
-	adds buffer to delayed write queue
-	releases buffer
-
-At this point, we have an initialised inode buffer that is up to
-date but has an -EIO state registered against it. When we finally
-get to recovering an inode in that buffer:
-
-	inode item recovery
-	    xfs_trans_read_buffer
-		find buffer
-		lock buffer
-		sees XBF_DONE is set, returns buffer
-	    sees bp->b_error is set
-		fail log recovery!
-
-Essentially, we need xfs_trans_get_buf_map() to clear the error status of
-the buffer when doing a lookup. This function returns uninitialised
-buffers, so the buffer returned can not be in an error state and
-none of the code that uses this function expects b_error to be set
-on return. Indeed, there is an ASSERT(!bp->b_error); in the
-transaction case in xfs_trans_get_buf_map() that would have caught
-this if log recovery used transactions....
-
-This patch firstly changes the inode readahead failure to set -EIO
-on the buffer, and secondly changes xfs_buf_get_map() to never
-return a buffer with an error state set so this first change doesn't
-cause unexpected log recovery failures.
-
-Signed-off-by: Dave Chinner <dchinner@redhat.com>
----
- fs/xfs/libxfs/xfs_inode_buf.c | 12 +++++++-----
- fs/xfs/xfs_buf.c              |  7 +++++++
- 2 files changed, 14 insertions(+), 5 deletions(-)
-
-diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
-index 1b8d98a..ff17c48 100644
---- a/fs/xfs/libxfs/xfs_inode_buf.c
-+++ b/fs/xfs/libxfs/xfs_inode_buf.c
-@@ -62,11 +62,12 @@ xfs_inobp_check(
-  * has not had the inode cores stamped into it. Hence for readahead, the buffer
-  * may be potentially invalid.
-  *
-- * If the readahead buffer is invalid, we don't want to mark it with an error,
-- * but we do want to clear the DONE status of the buffer so that a followup read
-- * will re-read it from disk. This will ensure that we don't get an unnecessary
-- * warnings during log recovery and we don't get unnecssary panics on debug
-- * kernels.
-+ * If the readahead buffer is invalid, we need to mark it with an error and
-+ * clear the DONE status of the buffer so that a followup read will re-read it
-+ * from disk. We don't report the error otherwise to avoid warnings during log
-+ * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
-+ * because all we want to do is say readahead failed; there is no-one to report
-+ * the error to, so this will distinguish it from a non-ra verifier failure.
-  */
- static void
- xfs_inode_buf_verify(
-@@ -93,6 +94,7 @@ xfs_inode_buf_verify(
- 						XFS_RANDOM_ITOBP_INOTOBP))) {
- 			if (readahead) {
- 				bp->b_flags &= ~XBF_DONE;
-+				xfs_buf_ioerror(bp, -EIO);
- 				return;
- 			}
- 
-diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
-index 45a8ea7..ae86b16 100644
---- a/fs/xfs/xfs_buf.c
-+++ b/fs/xfs/xfs_buf.c
-@@ -604,6 +604,13 @@ found:
- 		}
- 	}
- 
-+	/*
-+	 * Clear b_error if this is a lookup from a caller that doesn't expect
-+	 * valid data to be found in the buffer.
-+	 */
-+	if (!(flags & XBF_READ))
-+		xfs_buf_ioerror(bp, 0);
-+
- 	XFS_STATS_INC(xb_get);
- 	trace_xfs_buf_get(bp, flags, _RET_IP_);
- 	return bp;
--- 
-2.5.0
-
-_______________________________________________
-xfs mailing list
-xfs@oss.sgi.com
-http://oss.sgi.com/mailman/listinfo/xfs
-From: Dave Chinner <dchinner@redhat.com>
-
-When we do dquot readahead in log recovery, we do not use a verifier
-as the underlying buffer may not have dquots in it. e.g. the
-allocation operation hasn't yet been replayed. Hence we do not want
-to fail recovery because we detect an operation to be replayed has
-not been run yet. This problem was addressed for inodes in commit
-d891400 ("xfs: inode buffers may not be valid during recovery
-readahead") but the problem was not recognised to exist for dquots
-and their buffers as the dquot readahead did not have a verifier.
-
-The result of not using a verifier is that when the buffer is then
-next read to replay a dquot modification, the dquot buffer verifier
-will only be attached to the buffer if *readahead is not complete*.
-Hence we can read the buffer, replay the dquot changes and then add
-it to the delwri submission list without it having a verifier
-attached to it. This then generates warnings in xfs_buf_ioapply(),
-which catches and warns about this case.
-
-Fix this and make it handle the same readahead verifier error cases
-as for inode buffers by adding a new readahead verifier that has a
-write operation as well as a read operation that marks the buffer as
-not done if any corruption is detected.  Also make sure we don't run
-readahead if the dquot buffer has been marked as cancelled by
-recovery.
-
-This will result in readahead either succeeding and the buffer
-having a valid write verifier, or readahead failing and the buffer
-state requiring the subsequent read to resubmit the IO with the new
-verifier.  In either case, this will result in the buffer always
-ending up with a valid write verifier on it.
-
-Note: we also need to fix the inode buffer readahead error handling
-to mark the buffer with EIO. Brian noticed the code I copied from
-there wrong during review, so fix it at the same time. Add comments
-linking the two functions that handle readahead verifier errors
-together so we don't forget this behavioural link in future.
-
-cc: <stable@vger.kernel.org> # 3.12 - current
-Signed-off-by: Dave Chinner <dchinner@redhat.com>
-Reviewed-by: Brian Foster <bfoster@redhat.com>
-Signed-off-by: Dave Chinner <david@fromorbit.com>
----
- fs/xfs/libxfs/xfs_dquot_buf.c  | 36 ++++++++++++++++++++++++++++++------
- fs/xfs/libxfs/xfs_inode_buf.c  |  2 ++
- fs/xfs/libxfs/xfs_quota_defs.h |  2 +-
- fs/xfs/libxfs/xfs_shared.h     |  1 +
- fs/xfs/xfs_log_recover.c       |  9 +++++++--
- 5 files changed, 41 insertions(+), 9 deletions(-)
-
-diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
-index 11cefb2..3cc3cf7 100644
---- a/fs/xfs/libxfs/xfs_dquot_buf.c
-+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
-@@ -54,7 +54,7 @@ xfs_dqcheck(
- 	xfs_dqid_t	 id,
- 	uint		 type,	  /* used only when IO_dorepair is true */
- 	uint		 flags,
--	char		 *str)
-+	const char	 *str)
+ static inline int aac_supports_2T(struct aac_dev *dev)
  {
- 	xfs_dqblk_t	 *d = (xfs_dqblk_t *)ddq;
- 	int		errs = 0;
-@@ -207,7 +207,8 @@ xfs_dquot_buf_verify_crc(
- STATIC bool
- xfs_dquot_buf_verify(
- 	struct xfs_mount	*mp,
--	struct xfs_buf		*bp)
-+	struct xfs_buf		*bp,
-+	int			warn)
+ 	return (dev->adapter_info.options & AAC_OPT_NEW_COMM_64);
+diff -ur linux-5.3/drivers/scsi/aacraid.org/comminit.c linux-5.3/drivers/scsi/aacraid/comminit.c
+--- linux-5.3/drivers/scsi/aacraid.org/comminit.c	2019-11-01 22:42:37.014803249 +0100
++++ linux-5.3/drivers/scsi/aacraid/comminit.c	2019-11-04 09:29:51.321486211 +0100
+@@ -41,8 +41,11 @@
  {
- 	struct xfs_dqblk	*d = (struct xfs_dqblk *)bp->b_addr;
- 	xfs_dqid_t		id = 0;
-@@ -240,8 +241,7 @@ xfs_dquot_buf_verify(
- 		if (i == 0)
- 			id = be32_to_cpu(ddq->d_id);
- 
--		error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
--				       "xfs_dquot_buf_verify");
-+		error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__);
- 		if (error)
- 			return false;
- 	}
-@@ -256,7 +256,7 @@ xfs_dquot_buf_read_verify(
+ 	u32 status = 0;
  
- 	if (!xfs_dquot_buf_verify_crc(mp, bp))
- 		xfs_buf_ioerror(bp, -EFSBADCRC);
--	else if (!xfs_dquot_buf_verify(mp, bp))
-+	else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN))
- 		xfs_buf_ioerror(bp, -EFSCORRUPTED);
- 
- 	if (bp->b_error)
-@@ -264,6 +264,25 @@ xfs_dquot_buf_read_verify(
- }
- 
- /*
-+ * readahead errors are silent and simply leave the buffer as !done so a real
-+ * read will then be run with the xfs_dquot_buf_ops verifier. See
-+ * xfs_inode_buf_verify() for why we use EIO and ~XBF_DONE here rather than
-+ * reporting the failure.
-+ */
-+static void
-+xfs_dquot_buf_readahead_verify(
-+	struct xfs_buf	*bp)
-+{
-+	struct xfs_mount	*mp = bp->b_target->bt_mount;
-+
-+	if (!xfs_dquot_buf_verify_crc(mp, bp) ||
-+	    !xfs_dquot_buf_verify(mp, bp, 0)) {
-+		xfs_buf_ioerror(bp, -EIO);
-+		bp->b_flags &= ~XBF_DONE;
+-	if (aac_is_src(dev))
++	if (dev->pdev->device == PMC_DEVICE_S6 ||
++		dev->pdev->device == PMC_DEVICE_S7 ||
++		dev->pdev->device == PMC_DEVICE_S8) {
+ 		status = src_readl(dev, MUnit.OMR);
 +	}
-+}
-+
-+/*
-  * we don't calculate the CRC here as that is done when the dquot is flushed to
-  * the buffer after the update is done. This ensures that the dquot in the
-  * buffer always has an up-to-date CRC value.
-@@ -274,7 +293,7 @@ xfs_dquot_buf_write_verify(
- {
- 	struct xfs_mount	*mp = bp->b_target->bt_mount;
- 
--	if (!xfs_dquot_buf_verify(mp, bp)) {
-+	if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) {
- 		xfs_buf_ioerror(bp, -EFSCORRUPTED);
- 		xfs_verifier_error(bp);
- 		return;
-@@ -287,3 +306,8 @@ const struct xfs_buf_ops xfs_dquot_buf_ops = {
- 	.verify_write = xfs_dquot_buf_write_verify,
- };
- 
-+const struct xfs_buf_ops xfs_dquot_buf_ra_ops = {
-+
-+	.verify_read = xfs_dquot_buf_readahead_verify,
-+	.verify_write = xfs_dquot_buf_write_verify,
-+};
-diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
-index ff17c48..1aabfda 100644
---- a/fs/xfs/libxfs/xfs_inode_buf.c
-+++ b/fs/xfs/libxfs/xfs_inode_buf.c
-@@ -68,6 +68,8 @@ xfs_inobp_check(
-  * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
-  * because all we want to do is say readahead failed; there is no-one to report
-  * the error to, so this will distinguish it from a non-ra verifier failure.
-+ * Changes to this readahead error behavour also need to be reflected in
-+ * xfs_dquot_buf_readahead_verify().
-  */
- static void
- xfs_inode_buf_verify(
-diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
-index 1b0a083..f51078f 100644
---- a/fs/xfs/libxfs/xfs_quota_defs.h
-+++ b/fs/xfs/libxfs/xfs_quota_defs.h
-@@ -153,7 +153,7 @@ typedef __uint16_t	xfs_qwarncnt_t;
- #define XFS_QMOPT_RESBLK_MASK	(XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
- 
- extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq,
--		       xfs_dqid_t id, uint type, uint flags, char *str);
-+		       xfs_dqid_t id, uint type, uint flags, const char *str);
- extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
- 
- #endif	/* __XFS_QUOTA_H__ */
-diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
-index 5be5297..15c3ceb 100644
---- a/fs/xfs/libxfs/xfs_shared.h
-+++ b/fs/xfs/libxfs/xfs_shared.h
-@@ -49,6 +49,7 @@ extern const struct xfs_buf_ops xfs_inobt_buf_ops;
- extern const struct xfs_buf_ops xfs_inode_buf_ops;
- extern const struct xfs_buf_ops xfs_inode_buf_ra_ops;
- extern const struct xfs_buf_ops xfs_dquot_buf_ops;
-+extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops;
- extern const struct xfs_buf_ops xfs_sb_buf_ops;
- extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
- extern const struct xfs_buf_ops xfs_symlink_buf_ops;
-diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
-index c5ecaac..5991cdc 100644
---- a/fs/xfs/xfs_log_recover.c
-+++ b/fs/xfs/xfs_log_recover.c
-@@ -3204,6 +3204,7 @@ xlog_recover_dquot_ra_pass2(
- 	struct xfs_disk_dquot	*recddq;
- 	struct xfs_dq_logformat	*dq_f;
- 	uint			type;
-+	int			len;
- 
- 
- 	if (mp->m_qflags == 0)
-@@ -3224,8 +3225,12 @@ xlog_recover_dquot_ra_pass2(
- 	ASSERT(dq_f);
- 	ASSERT(dq_f->qlf_len == 1);
- 
--	xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno,
--			  XFS_FSB_TO_BB(mp, dq_f->qlf_len), NULL);
-+	len = XFS_FSB_TO_BB(mp, dq_f->qlf_len);
-+	if (xlog_peek_buffer_cancelled(log, dq_f->qlf_blkno, len, 0))
-+		return;
-+
-+	xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, len,
-+			  &xfs_dquot_buf_ra_ops);
+ 	return (status & AAC_INT_MODE_MSIX);
  }
  
- STATIC void
--- 
-2.5.0
+@@ -349,7 +352,8 @@
+ 	/* FIB should be freed only after getting the response from the F/W */
+ 	if (status != -ERESTARTSYS)
+ 		aac_fib_free(fibctx);
+-	if (aac_is_src(dev) &&
++	if ((dev->pdev->device == PMC_DEVICE_S7 ||
++	     dev->pdev->device == PMC_DEVICE_S8) &&
+ 	     dev->msi_enabled)
+ 		aac_set_intx_mode(dev);
+ 	return status;
+@@ -610,7 +614,8 @@
+ 		dev->max_fib_size = status[1] & 0xFFE0;
+ 		host->sg_tablesize = status[2] >> 16;
+ 		dev->sg_tablesize = status[2] & 0xFFFF;
+-		if (aac_is_src(dev)) {
++		if (dev->pdev->device == PMC_DEVICE_S7 ||
++		    dev->pdev->device == PMC_DEVICE_S8) {
+ 			if (host->can_queue > (status[3] >> 16) -
+ 					AAC_NUM_MGT_FIB)
+ 				host->can_queue = (status[3] >> 16) -
+@@ -629,7 +634,9 @@
+ 			pr_warn("numacb=%d ignored\n", numacb);
+ 	}
+ 
+-	if (aac_is_src(dev))
++	if (dev->pdev->device == PMC_DEVICE_S6 ||
++	    dev->pdev->device == PMC_DEVICE_S7 ||
++	    dev->pdev->device == PMC_DEVICE_S8)
+ 		aac_define_int_mode(dev);
+ 	/*
+ 	 *	Ok now init the communication subsystem
+diff -ur linux-5.3/drivers/scsi/aacraid.org/commsup.c linux-5.3/drivers/scsi/aacraid/commsup.c
+--- linux-5.3/drivers/scsi/aacraid.org/commsup.c	2019-11-01 22:42:37.014803249 +0100
++++ linux-5.3/drivers/scsi/aacraid/commsup.c	2019-11-04 09:29:51.321486211 +0100
+@@ -2593,7 +2593,9 @@
+ {
+ 	int i;
+ 
+-	if (aac_is_src(dev)) {
++	if (dev->pdev->device == PMC_DEVICE_S6 ||
++	    dev->pdev->device == PMC_DEVICE_S7 ||
++	    dev->pdev->device == PMC_DEVICE_S8) {
+ 		if (dev->max_msix > 1) {
+ 			for (i = 0; i < dev->max_msix; i++)
+ 				free_irq(pci_irq_vector(dev->pdev, i),
+diff -ur linux-5.3/drivers/scsi/aacraid.org/linit.c linux-5.3/drivers/scsi/aacraid/linit.c
+--- linux-5.3/drivers/scsi/aacraid.org/linit.c	2019-11-01 22:42:37.011469816 +0100
++++ linux-5.3/drivers/scsi/aacraid/linit.c	2019-11-04 09:29:51.321486211 +0100
+@@ -1567,8 +1567,9 @@
+ 	aac_send_shutdown(aac);
+ 
+ 	aac_adapter_disable_int(aac);
+-
+-	if (aac_is_src(aac)) {
++	if (aac->pdev->device == PMC_DEVICE_S6 ||
++	    aac->pdev->device == PMC_DEVICE_S7 ||
++	    aac->pdev->device == PMC_DEVICE_S8) {
+ 		if (aac->max_msix > 1) {
+ 			for (i = 0; i < aac->max_msix; i++) {
+ 				free_irq(pci_irq_vector(aac->pdev, i),
+@@ -1858,7 +1859,8 @@
+ 	aac_adapter_enable_int(dev);
+ 
+ 
+-	if (aac_is_src(dev))
++	if (dev->pdev->device == PMC_DEVICE_S7 ||
++	    dev->pdev->device == PMC_DEVICE_S8)
+ 		aac_define_int_mode(dev);
+ 
+ 	if (dev->msi_enabled)
 
-_______________________________________________
-xfs mailing list
-xfs@oss.sgi.com
-http://oss.sgi.com/mailman/listinfo/xfs