X-Git-Url: http://git.pld-linux.org/?a=blobdiff_plain;f=kernel-small_fixes.patch;h=12130347edfedd51684642703b98acab57e2463f;hb=4e1a4c84e6398a77c78048ea1fa3fe653dce7712;hp=a0928ee62871ca10c67827acbc6519ca0355b3af;hpb=b95c51470294fc508076f9bb98e28c31db8eabaa;p=packages%2Fkernel.git

diff --git a/kernel-small_fixes.patch b/kernel-small_fixes.patch
index a0928ee6..12130347 100644
--- a/kernel-small_fixes.patch
+++ b/kernel-small_fixes.patch
@@ -27,646 +27,45 @@
  			fi
  		done
 
-From 7a29ac474a47eb8cf212b45917683ae89d6fa13b Mon Sep 17 00:00:00 2001
-From: Chris Mason <clm@fb.com>
-Date: Tue, 10 Nov 2015 10:10:34 +1100
-Subject: xfs: give all workqueues rescuer threads
+From: Shaohua Li <shli@fb.com>
 
-We're consistently hitting deadlocks here with XFS on recent kernels.
-After some digging through the crash files, it looks like everyone in
-the system is waiting for XFS to reclaim memory.
+Basically this is a copy of commit 001e4a8775f6(ext4: implement cgroup
+writeback support). Tested with a fio test, verified writeback is
+throttled against cgroup io.max write bandwidth, also verified moving
+the fio test to another cgroup and the writeback is throttled against
+new cgroup setting.
 
-Something like this:
-
-PID: 2733434  TASK: ffff8808cd242800  CPU: 19  COMMAND: "java"
- #0 [ffff880019c53588] __schedule at ffffffff818c4df2
- #1 [ffff880019c535d8] schedule at ffffffff818c5517
- #2 [ffff880019c535f8] _xfs_log_force_lsn at ffffffff81316348
- #3 [ffff880019c53688] xfs_log_force_lsn at ffffffff813164fb
- #4 [ffff880019c536b8] xfs_iunpin_wait at ffffffff8130835e
- #5 [ffff880019c53728] xfs_reclaim_inode at ffffffff812fd453
- #6 [ffff880019c53778] xfs_reclaim_inodes_ag at ffffffff812fd8c7
- #7 [ffff880019c53928] xfs_reclaim_inodes_nr at ffffffff812fe433
- #8 [ffff880019c53958] xfs_fs_free_cached_objects at ffffffff8130d3b9
- #9 [ffff880019c53968] super_cache_scan at ffffffff811a6f73
-#10 [ffff880019c539c8] shrink_slab at ffffffff811460e6
-#11 [ffff880019c53aa8] shrink_zone at ffffffff8114a53f
-#12 [ffff880019c53b48] do_try_to_free_pages at ffffffff8114a8ba
-#13 [ffff880019c53be8] try_to_free_pages at ffffffff8114ad5a
-#14 [ffff880019c53c78] __alloc_pages_nodemask at ffffffff8113e1b8
-#15 [ffff880019c53d88] alloc_kmem_pages_node at ffffffff8113e671
-#16 [ffff880019c53dd8] copy_process at ffffffff8104f781
-#17 [ffff880019c53ec8] do_fork at ffffffff8105129c
-#18 [ffff880019c53f38] sys_clone at ffffffff810515b6
-#19 [ffff880019c53f48] stub_clone at ffffffff818c8e4d
-
-xfs_log_force_lsn is waiting for logs to get cleaned, which is waiting
-for IO, which is waiting for workers to complete the IO which is waiting
-for worker threads that don't exist yet:
-
-PID: 2752451  TASK: ffff880bd6bdda00  CPU: 37  COMMAND: "kworker/37:1"
- #0 [ffff8808d20abbb0] __schedule at ffffffff818c4df2
- #1 [ffff8808d20abc00] schedule at ffffffff818c5517
- #2 [ffff8808d20abc20] schedule_timeout at ffffffff818c7c6c
- #3 [ffff8808d20abcc0] wait_for_completion_killable at ffffffff818c6495
- #4 [ffff8808d20abd30] kthread_create_on_node at ffffffff8106ec82
- #5 [ffff8808d20abdf0] create_worker at ffffffff8106752f
- #6 [ffff8808d20abe40] worker_thread at ffffffff810699be
- #7 [ffff8808d20abec0] kthread at ffffffff8106ef59
- #8 [ffff8808d20abf50] ret_from_fork at ffffffff818c8ac8
-
-I think we should be using WQ_MEM_RECLAIM to make sure this thread
-pool makes progress when we're not able to allocate new workers.
-
-[dchinner: make all workqueues WQ_MEM_RECLAIM]
-
-Signed-off-by: Chris Mason <clm@fb.com>
-Reviewed-by: Dave Chinner <dchinner@redhat.com>
-Signed-off-by: Dave Chinner <david@fromorbit.com>
+Cc: Tejun Heo <tj@kernel.org>
+Signed-off-by: Shaohua Li <shli@fb.com>
 ---
- fs/xfs/xfs_super.c | 7 ++++---
- 1 file changed, 4 insertions(+), 3 deletions(-)
-
+ fs/xfs/xfs_aops.c  | 2 ++
+ fs/xfs/xfs_super.c | 1 +
+ 2 files changed, 3 insertions(+)
+
+diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
+index f18e593..6535054 100644
+--- a/fs/xfs/xfs_aops.c
++++ b/fs/xfs/xfs_aops.c
+@@ -630,8 +630,10 @@ xfs_add_to_ioend(
+ 		if (wpc->ioend)
+ 			list_add(&wpc->ioend->io_list, iolist);
+ 		wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh);
++		wbc_init_bio(wbc, wpc->ioend->io_bio);
+ 	}
+ 
++	wbc_account_io(wbc, bh->b_page, bh->b_size);
+ 	/*
+ 	 * If the buffer doesn't fit into the bio we need to allocate a new
+ 	 * one.  This shouldn't happen more than once for a given buffer.
 diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
-index 29531ec..65fbfb7 100644
+index 584cf2d..aea3bc2 100644
 --- a/fs/xfs/xfs_super.c
 +++ b/fs/xfs/xfs_super.c
-@@ -838,17 +838,18 @@ xfs_init_mount_workqueues(
- 		goto out_destroy_unwritten;
- 
- 	mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
--			WQ_FREEZABLE, 0, mp->m_fsname);
-+			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
- 	if (!mp->m_reclaim_workqueue)
- 		goto out_destroy_cil;
- 
- 	mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
--			WQ_FREEZABLE|WQ_HIGHPRI, 0, mp->m_fsname);
-+			WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0,
-+			mp->m_fsname);
- 	if (!mp->m_log_workqueue)
- 		goto out_destroy_reclaim;
- 
- 	mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
--			WQ_FREEZABLE, 0, mp->m_fsname);
-+			WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
- 	if (!mp->m_eofblocks_workqueue)
- 		goto out_destroy_log;
- 
--- 
-cgit v0.11.2
-
-commit c2d42c16ad83006a706d83e51a7268db04af733a
-Author: Andrew Morton <akpm@linux-foundation.org>
-Date:   Thu Nov 5 18:48:43 2015 -0800
-
-    mm/vmstat.c: uninline node_page_state()
-    
-    With x86_64 (config http://ozlabs.org/~akpm/config-akpm2.txt) and old gcc
-    (4.4.4), drivers/base/node.c:node_read_meminfo() is using 2344 bytes of
-    stack.  Uninlining node_page_state() reduces this to 440 bytes.
-    
-    The stack consumption issue is fixed by newer gcc (4.8.4) however with
-    that compiler this patch reduces the node.o text size from 7314 bytes to
-    4578.
-    
-    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-
-diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
-index 82e7db7..49dfe40 100644
---- a/include/linux/vmstat.h
-+++ b/include/linux/vmstat.h
-@@ -161,30 +161,8 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
- }
- 
- #ifdef CONFIG_NUMA
--/*
-- * Determine the per node value of a stat item. This function
-- * is called frequently in a NUMA machine, so try to be as
-- * frugal as possible.
-- */
--static inline unsigned long node_page_state(int node,
--				 enum zone_stat_item item)
--{
--	struct zone *zones = NODE_DATA(node)->node_zones;
--
--	return
--#ifdef CONFIG_ZONE_DMA
--		zone_page_state(&zones[ZONE_DMA], item) +
--#endif
--#ifdef CONFIG_ZONE_DMA32
--		zone_page_state(&zones[ZONE_DMA32], item) +
--#endif
--#ifdef CONFIG_HIGHMEM
--		zone_page_state(&zones[ZONE_HIGHMEM], item) +
--#endif
--		zone_page_state(&zones[ZONE_NORMAL], item) +
--		zone_page_state(&zones[ZONE_MOVABLE], item);
--}
- 
-+extern unsigned long node_page_state(int node, enum zone_stat_item item);
- extern void zone_statistics(struct zone *, struct zone *, gfp_t gfp);
- 
- #else
-diff --git a/mm/vmstat.c b/mm/vmstat.c
-index fbf1448..ffcb4f5 100644
---- a/mm/vmstat.c
-+++ b/mm/vmstat.c
-@@ -591,6 +591,28 @@ void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags)
- 	else
- 		__inc_zone_state(z, NUMA_OTHER);
- }
-+
-+/*
-+ * Determine the per node value of a stat item.
-+ */
-+unsigned long node_page_state(int node, enum zone_stat_item item)
-+{
-+	struct zone *zones = NODE_DATA(node)->node_zones;
-+
-+	return
-+#ifdef CONFIG_ZONE_DMA
-+		zone_page_state(&zones[ZONE_DMA], item) +
-+#endif
-+#ifdef CONFIG_ZONE_DMA32
-+		zone_page_state(&zones[ZONE_DMA32], item) +
-+#endif
-+#ifdef CONFIG_HIGHMEM
-+		zone_page_state(&zones[ZONE_HIGHMEM], item) +
-+#endif
-+		zone_page_state(&zones[ZONE_NORMAL], item) +
-+		zone_page_state(&zones[ZONE_MOVABLE], item);
-+}
-+
- #endif
- 
- #ifdef CONFIG_COMPACTION
-commit 016c13daa5c9e4827eca703e2f0621c131f2cca3
-Author: Mel Gorman <mgorman@techsingularity.net>
-Date:   Fri Nov 6 16:28:18 2015 -0800
-
-    mm, page_alloc: use masks and shifts when converting GFP flags to migrate types
-    
-    This patch redefines which GFP bits are used for specifying mobility and
-    the order of the migrate types.  Once redefined it's possible to convert
-    GFP flags to a migrate type with a simple mask and shift.  The only
-    downside is that readers of OOM kill messages and allocation failures may
-    have been used to the existing values but scripts/gfp-translate will help.
-    
-    Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
-    Acked-by: Vlastimil Babka <vbabka@suse.cz>
-    Cc: Christoph Lameter <cl@linux.com>
-    Cc: David Rientjes <rientjes@google.com>
-    Cc: Johannes Weiner <hannes@cmpxchg.org>
-    Cc: Michal Hocko <mhocko@suse.com>
-    Cc: Vitaly Wool <vitalywool@gmail.com>
-    Cc: Rik van Riel <riel@redhat.com>
-    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-
-diff --git a/include/linux/gfp.h b/include/linux/gfp.h
-index f92cbd2..440fca3 100644
---- a/include/linux/gfp.h
-+++ b/include/linux/gfp.h
-@@ -14,7 +14,7 @@ struct vm_area_struct;
- #define ___GFP_HIGHMEM		0x02u
- #define ___GFP_DMA32		0x04u
- #define ___GFP_MOVABLE		0x08u
--#define ___GFP_WAIT		0x10u
-+#define ___GFP_RECLAIMABLE	0x10u
- #define ___GFP_HIGH		0x20u
- #define ___GFP_IO		0x40u
- #define ___GFP_FS		0x80u
-@@ -29,7 +29,7 @@ struct vm_area_struct;
- #define ___GFP_NOMEMALLOC	0x10000u
- #define ___GFP_HARDWALL		0x20000u
- #define ___GFP_THISNODE		0x40000u
--#define ___GFP_RECLAIMABLE	0x80000u
-+#define ___GFP_WAIT		0x80000u
- #define ___GFP_NOACCOUNT	0x100000u
- #define ___GFP_NOTRACK		0x200000u
- #define ___GFP_NO_KSWAPD	0x400000u
-@@ -126,6 +126,7 @@ struct vm_area_struct;
- 
- /* This mask makes up all the page movable related flags */
- #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
-+#define GFP_MOVABLE_SHIFT 3
- 
- /* Control page allocator reclaim behavior */
- #define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
-@@ -152,14 +153,15 @@ struct vm_area_struct;
- /* Convert GFP flags to their corresponding migrate type */
- static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
- {
--	WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
-+	VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
-+	BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE);
-+	BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE);
- 
- 	if (unlikely(page_group_by_mobility_disabled))
- 		return MIGRATE_UNMOVABLE;
- 
- 	/* Group based on mobility */
--	return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
--		((gfp_flags & __GFP_RECLAIMABLE) != 0);
-+	return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT;
- }
- 
- #ifdef CONFIG_HIGHMEM
-diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
-index e326843..38bed71 100644
---- a/include/linux/mmzone.h
-+++ b/include/linux/mmzone.h
-@@ -37,8 +37,8 @@
- 
- enum {
- 	MIGRATE_UNMOVABLE,
--	MIGRATE_RECLAIMABLE,
- 	MIGRATE_MOVABLE,
-+	MIGRATE_RECLAIMABLE,
- 	MIGRATE_PCPTYPES,	/* the number of types on the pcp lists */
- 	MIGRATE_RESERVE = MIGRATE_PCPTYPES,
- #ifdef CONFIG_CMA
-commit 974a786e63c96a2401a78ddba926f34c128474f1
-Author: Mel Gorman <mgorman@techsingularity.net>
-Date:   Fri Nov 6 16:28:34 2015 -0800
-
-    mm, page_alloc: remove MIGRATE_RESERVE
-    
-    MIGRATE_RESERVE preserves an old property of the buddy allocator that
-    existed prior to fragmentation avoidance -- min_free_kbytes worth of pages
-    tended to remain contiguous until the only alternative was to fail the
-    allocation.  At the time it was discovered that high-order atomic
-    allocations relied on this property so MIGRATE_RESERVE was introduced.  A
-    later patch will introduce an alternative MIGRATE_HIGHATOMIC so this patch
-    deletes MIGRATE_RESERVE and supporting code so it'll be easier to review.
-    Note that this patch in isolation may look like a false regression if
-    someone was bisecting high-order atomic allocation failures.
-    
-    Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
-    Acked-by: Vlastimil Babka <vbabka@suse.cz>
-    Cc: Christoph Lameter <cl@linux.com>
-    Cc: David Rientjes <rientjes@google.com>
-    Cc: Johannes Weiner <hannes@cmpxchg.org>
-    Cc: Michal Hocko <mhocko@suse.com>
-    Cc: Vitaly Wool <vitalywool@gmail.com>
-    Cc: Rik van Riel <riel@redhat.com>
-    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-
-diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
-index 1e88aae..b86cfa3 100644
---- a/include/linux/mmzone.h
-+++ b/include/linux/mmzone.h
-@@ -39,8 +39,6 @@ enum {
- 	MIGRATE_UNMOVABLE,
- 	MIGRATE_MOVABLE,
- 	MIGRATE_RECLAIMABLE,
--	MIGRATE_PCPTYPES,	/* the number of types on the pcp lists */
--	MIGRATE_RESERVE = MIGRATE_PCPTYPES,
- #ifdef CONFIG_CMA
- 	/*
- 	 * MIGRATE_CMA migration type is designed to mimic the way
-@@ -63,6 +61,8 @@ enum {
- 	MIGRATE_TYPES
- };
- 
-+#define MIGRATE_PCPTYPES (MIGRATE_RECLAIMABLE+1)
-+
- #ifdef CONFIG_CMA
- #  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
- #else
-@@ -429,12 +429,6 @@ struct zone {
- 
- 	const char		*name;
- 
--	/*
--	 * Number of MIGRATE_RESERVE page block. To maintain for just
--	 * optimization. Protected by zone->lock.
--	 */
--	int			nr_migrate_reserve_block;
--
- #ifdef CONFIG_MEMORY_ISOLATION
- 	/*
- 	 * Number of isolated pageblock. It is used to solve incorrect
-diff --git a/mm/huge_memory.c b/mm/huge_memory.c
-index 9812d46..dabd247 100644
---- a/mm/huge_memory.c
-+++ b/mm/huge_memory.c
-@@ -116,7 +116,7 @@ static void set_recommended_min_free_kbytes(void)
- 	for_each_populated_zone(zone)
- 		nr_zones++;
- 
--	/* Make sure at least 2 hugepages are free for MIGRATE_RESERVE */
-+	/* Ensure 2 pageblocks are free to assist fragmentation avoidance */
- 	recommended_min = pageblock_nr_pages * nr_zones * 2;
- 
- 	/*
-diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index 8dc6e3c..5888126 100644
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -817,7 +817,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
- 			if (unlikely(has_isolate_pageblock(zone)))
- 				mt = get_pageblock_migratetype(page);
- 
--			/* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
- 			__free_one_page(page, page_to_pfn(page), zone, 0, mt);
- 			trace_mm_page_pcpu_drain(page, 0, mt);
- 		} while (--to_free && --batch_free && !list_empty(list));
-@@ -1417,15 +1416,14 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
-  * the free lists for the desirable migrate type are depleted
-  */
- static int fallbacks[MIGRATE_TYPES][4] = {
--	[MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,     MIGRATE_RESERVE },
--	[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,     MIGRATE_RESERVE },
--	[MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE,   MIGRATE_RESERVE },
-+	[MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,   MIGRATE_TYPES },
-+	[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,   MIGRATE_TYPES },
-+	[MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES },
- #ifdef CONFIG_CMA
--	[MIGRATE_CMA]         = { MIGRATE_RESERVE }, /* Never used */
-+	[MIGRATE_CMA]         = { MIGRATE_TYPES }, /* Never used */
- #endif
--	[MIGRATE_RESERVE]     = { MIGRATE_RESERVE }, /* Never used */
- #ifdef CONFIG_MEMORY_ISOLATION
--	[MIGRATE_ISOLATE]     = { MIGRATE_RESERVE }, /* Never used */
-+	[MIGRATE_ISOLATE]     = { MIGRATE_TYPES }, /* Never used */
- #endif
- };
- 
-@@ -1598,7 +1596,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
- 	*can_steal = false;
- 	for (i = 0;; i++) {
- 		fallback_mt = fallbacks[migratetype][i];
--		if (fallback_mt == MIGRATE_RESERVE)
-+		if (fallback_mt == MIGRATE_TYPES)
- 			break;
- 
- 		if (list_empty(&area->free_list[fallback_mt]))
-@@ -1676,25 +1674,13 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order,
- {
- 	struct page *page;
- 
--retry_reserve:
- 	page = __rmqueue_smallest(zone, order, migratetype);
--
--	if (unlikely(!page) && migratetype != MIGRATE_RESERVE) {
-+	if (unlikely(!page)) {
- 		if (migratetype == MIGRATE_MOVABLE)
- 			page = __rmqueue_cma_fallback(zone, order);
- 
- 		if (!page)
- 			page = __rmqueue_fallback(zone, order, migratetype);
--
--		/*
--		 * Use MIGRATE_RESERVE rather than fail an allocation. goto
--		 * is used because __rmqueue_smallest is an inline function
--		 * and we want just one call site
--		 */
--		if (!page) {
--			migratetype = MIGRATE_RESERVE;
--			goto retry_reserve;
--		}
- 	}
- 
- 	trace_mm_page_alloc_zone_locked(page, order, migratetype);
-@@ -3492,7 +3478,6 @@ static void show_migration_types(unsigned char type)
- 		[MIGRATE_UNMOVABLE]	= 'U',
- 		[MIGRATE_RECLAIMABLE]	= 'E',
- 		[MIGRATE_MOVABLE]	= 'M',
--		[MIGRATE_RESERVE]	= 'R',
- #ifdef CONFIG_CMA
- 		[MIGRATE_CMA]		= 'C',
- #endif
-@@ -4303,120 +4288,6 @@ static inline unsigned long wait_table_bits(unsigned long size)
- }
- 
- /*
-- * Check if a pageblock contains reserved pages
-- */
--static int pageblock_is_reserved(unsigned long start_pfn, unsigned long end_pfn)
--{
--	unsigned long pfn;
--
--	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
--		if (!pfn_valid_within(pfn) || PageReserved(pfn_to_page(pfn)))
--			return 1;
--	}
--	return 0;
--}
--
--/*
-- * Mark a number of pageblocks as MIGRATE_RESERVE. The number
-- * of blocks reserved is based on min_wmark_pages(zone). The memory within
-- * the reserve will tend to store contiguous free pages. Setting min_free_kbytes
-- * higher will lead to a bigger reserve which will get freed as contiguous
-- * blocks as reclaim kicks in
-- */
--static void setup_zone_migrate_reserve(struct zone *zone)
--{
--	unsigned long start_pfn, pfn, end_pfn, block_end_pfn;
--	struct page *page;
--	unsigned long block_migratetype;
--	int reserve;
--	int old_reserve;
--
--	/*
--	 * Get the start pfn, end pfn and the number of blocks to reserve
--	 * We have to be careful to be aligned to pageblock_nr_pages to
--	 * make sure that we always check pfn_valid for the first page in
--	 * the block.
--	 */
--	start_pfn = zone->zone_start_pfn;
--	end_pfn = zone_end_pfn(zone);
--	start_pfn = roundup(start_pfn, pageblock_nr_pages);
--	reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
--							pageblock_order;
--
--	/*
--	 * Reserve blocks are generally in place to help high-order atomic
--	 * allocations that are short-lived. A min_free_kbytes value that
--	 * would result in more than 2 reserve blocks for atomic allocations
--	 * is assumed to be in place to help anti-fragmentation for the
--	 * future allocation of hugepages at runtime.
--	 */
--	reserve = min(2, reserve);
--	old_reserve = zone->nr_migrate_reserve_block;
--
--	/* When memory hot-add, we almost always need to do nothing */
--	if (reserve == old_reserve)
--		return;
--	zone->nr_migrate_reserve_block = reserve;
--
--	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
--		if (!early_page_nid_uninitialised(pfn, zone_to_nid(zone)))
--			return;
--
--		if (!pfn_valid(pfn))
--			continue;
--		page = pfn_to_page(pfn);
--
--		/* Watch out for overlapping nodes */
--		if (page_to_nid(page) != zone_to_nid(zone))
--			continue;
--
--		block_migratetype = get_pageblock_migratetype(page);
--
--		/* Only test what is necessary when the reserves are not met */
--		if (reserve > 0) {
--			/*
--			 * Blocks with reserved pages will never free, skip
--			 * them.
--			 */
--			block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
--			if (pageblock_is_reserved(pfn, block_end_pfn))
--				continue;
--
--			/* If this block is reserved, account for it */
--			if (block_migratetype == MIGRATE_RESERVE) {
--				reserve--;
--				continue;
--			}
--
--			/* Suitable for reserving if this block is movable */
--			if (block_migratetype == MIGRATE_MOVABLE) {
--				set_pageblock_migratetype(page,
--							MIGRATE_RESERVE);
--				move_freepages_block(zone, page,
--							MIGRATE_RESERVE);
--				reserve--;
--				continue;
--			}
--		} else if (!old_reserve) {
--			/*
--			 * At boot time we don't need to scan the whole zone
--			 * for turning off MIGRATE_RESERVE.
--			 */
--			break;
--		}
--
--		/*
--		 * If the reserve is met and this is a previous reserved block,
--		 * take it back
--		 */
--		if (block_migratetype == MIGRATE_RESERVE) {
--			set_pageblock_migratetype(page, MIGRATE_MOVABLE);
--			move_freepages_block(zone, page, MIGRATE_MOVABLE);
--		}
--	}
--}
--
--/*
-  * Initially all pages are reserved - free ones are freed
-  * up by free_all_bootmem() once the early boot process is
-  * done. Non-atomic initialization, single-pass.
-@@ -4455,9 +4326,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
- 		 * movable at startup. This will force kernel allocations
- 		 * to reserve their blocks rather than leaking throughout
- 		 * the address space during boot when many long-lived
--		 * kernel allocations are made. Later some blocks near
--		 * the start are marked MIGRATE_RESERVE by
--		 * setup_zone_migrate_reserve()
-+		 * kernel allocations are made.
- 		 *
- 		 * bitmap is created for zone's valid pfn range. but memmap
- 		 * can be created for invalid pages (for alignment)
-@@ -6018,7 +5887,6 @@ static void __setup_per_zone_wmarks(void)
- 			high_wmark_pages(zone) - low_wmark_pages(zone) -
- 			atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
- 
--		setup_zone_migrate_reserve(zone);
- 		spin_unlock_irqrestore(&zone->lock, flags);
- 	}
- 
-diff --git a/mm/vmstat.c b/mm/vmstat.c
-index ffcb4f5..5b289dc 100644
---- a/mm/vmstat.c
-+++ b/mm/vmstat.c
-@@ -923,7 +923,6 @@ static char * const migratetype_names[MIGRATE_TYPES] = {
- 	"Unmovable",
- 	"Reclaimable",
- 	"Movable",
--	"Reserve",
- #ifdef CONFIG_CMA
- 	"CMA",
- #endif
-diff --git a/mm/backing-dev.c b/mm/backing-dev.c
-index 8ed2ffd963c5..7340353f8aea 100644
---- a/mm/backing-dev.c
-+++ b/mm/backing-dev.c
-@@ -957,8 +957,9 @@ EXPORT_SYMBOL(congestion_wait);
-  * jiffies for either a BDI to exit congestion of the given @sync queue
-  * or a write to complete.
-  *
-- * In the absence of zone congestion, cond_resched() is called to yield
-- * the processor if necessary but otherwise does not sleep.
-+ * In the absence of zone congestion, a short sleep or a cond_resched is
-+ * performed to yield the processor and to allow other subsystems to make
-+ * a forward progress.
-  *
-  * The return value is 0 if the sleep is for the full timeout. Otherwise,
-  * it is the number of jiffies that were still remaining when the function
-@@ -978,7 +979,19 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
- 	 */
- 	if (atomic_read(&nr_wb_congested[sync]) == 0 ||
- 	    !test_bit(ZONE_CONGESTED, &zone->flags)) {
--		cond_resched();
-+
-+		/*
-+		 * Memory allocation/reclaim might be called from a WQ
-+		 * context and the current implementation of the WQ
-+		 * concurrency control doesn't recognize that a particular
-+		 * WQ is congested if the worker thread is looping without
-+		 * ever sleeping. Therefore we have to do a short sleep
-+		 * here rather than calling cond_resched().
-+		 */
-+		if (current->flags & PF_WQ_WORKER)
-+			schedule_timeout(1);
-+		else
-+			cond_resched();
- 
- 		/* In case we scheduled, work out time remaining */
- 		ret = timeout - (jiffies - start);
-diff --git a/mm/vmstat.c b/mm/vmstat.c
-index 45dcbcb5c594..0975da8e3432 100644
---- a/mm/vmstat.c
-+++ b/mm/vmstat.c
-@@ -1381,6 +1381,7 @@ static const struct file_operations proc_vmstat_file_operations = {
- #endif /* CONFIG_PROC_FS */
- 
- #ifdef CONFIG_SMP
-+static struct workqueue_struct *vmstat_wq;
- static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
- int sysctl_stat_interval __read_mostly = HZ;
- static cpumask_var_t cpu_stat_off;
-@@ -1393,7 +1394,7 @@ static void vmstat_update(struct work_struct *w)
- 		 * to occur in the future. Keep on running the
- 		 * update worker thread.
- 		 */
--		schedule_delayed_work_on(smp_processor_id(),
-+		queue_delayed_work_on(smp_processor_id(), vmstat_wq,
- 			this_cpu_ptr(&vmstat_work),
- 			round_jiffies_relative(sysctl_stat_interval));
- 	} else {
-@@ -1462,7 +1463,7 @@ static void vmstat_shepherd(struct work_struct *w)
- 		if (need_update(cpu) &&
- 			cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
- 
--			schedule_delayed_work_on(cpu,
-+			queue_delayed_work_on(cpu, vmstat_wq,
- 				&per_cpu(vmstat_work, cpu), 0);
- 
- 	put_online_cpus();
-@@ -1551,6 +1552,7 @@ static int __init setup_vmstat(void)
- 
- 	start_shepherd_timer();
- 	cpu_notifier_register_done();
-+	vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
- #endif
- #ifdef CONFIG_PROC_FS
- 	proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
--- 
-2.6.2
-
+@@ -1634,6 +1634,7 @@ xfs_fs_fill_super(
+ 	sb->s_max_links = XFS_MAXLINK;
+ 	sb->s_time_gran = 1;
+ 	set_posix_acl_flag(sb);
++	sb->s_iflags |= SB_I_CGROUPWB;
+ 
+ 	/* version 5 superblocks support inode version counters. */
+ 	if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)