From e1bba219cb52f8e69e8d832ef6a255cefefed95c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Arkadiusz=20Mi=C5=9Bkiewicz?= Date: Wed, 18 Nov 2015 11:04:52 +0100 Subject: [PATCH] - backport mm improvements/fixes from git and one from mailing list (http://www.spinics.net/lists/linux-mm/msg96871.html) --- kernel-small_fixes.patch | 555 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 555 insertions(+) diff --git a/kernel-small_fixes.patch b/kernel-small_fixes.patch index 68d235cd..20d43776 100644 --- a/kernel-small_fixes.patch +++ b/kernel-small_fixes.patch @@ -116,3 +116,558 @@ index 29531ec..65fbfb7 100644 -- cgit v0.11.2 +commit c2d42c16ad83006a706d83e51a7268db04af733a +Author: Andrew Morton +Date: Thu Nov 5 18:48:43 2015 -0800 + + mm/vmstat.c: uninline node_page_state() + + With x86_64 (config http://ozlabs.org/~akpm/config-akpm2.txt) and old gcc + (4.4.4), drivers/base/node.c:node_read_meminfo() is using 2344 bytes of + stack. Uninlining node_page_state() reduces this to 440 bytes. + + The stack consumption issue is fixed by newer gcc (4.8.4) however with + that compiler this patch reduces the node.o text size from 7314 bytes to + 4578. + + Signed-off-by: Andrew Morton + Signed-off-by: Linus Torvalds + +diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h +index 82e7db7..49dfe40 100644 +--- a/include/linux/vmstat.h ++++ b/include/linux/vmstat.h +@@ -161,30 +161,8 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone, + } + + #ifdef CONFIG_NUMA +-/* +- * Determine the per node value of a stat item. This function +- * is called frequently in a NUMA machine, so try to be as +- * frugal as possible. +- */ +-static inline unsigned long node_page_state(int node, +- enum zone_stat_item item) +-{ +- struct zone *zones = NODE_DATA(node)->node_zones; +- +- return +-#ifdef CONFIG_ZONE_DMA +- zone_page_state(&zones[ZONE_DMA], item) + +-#endif +-#ifdef CONFIG_ZONE_DMA32 +- zone_page_state(&zones[ZONE_DMA32], item) + +-#endif +-#ifdef CONFIG_HIGHMEM +- zone_page_state(&zones[ZONE_HIGHMEM], item) + +-#endif +- zone_page_state(&zones[ZONE_NORMAL], item) + +- zone_page_state(&zones[ZONE_MOVABLE], item); +-} + ++extern unsigned long node_page_state(int node, enum zone_stat_item item); + extern void zone_statistics(struct zone *, struct zone *, gfp_t gfp); + + #else +diff --git a/mm/vmstat.c b/mm/vmstat.c +index fbf1448..ffcb4f5 100644 +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -591,6 +591,28 @@ void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags) + else + __inc_zone_state(z, NUMA_OTHER); + } ++ ++/* ++ * Determine the per node value of a stat item. ++ */ ++unsigned long node_page_state(int node, enum zone_stat_item item) ++{ ++ struct zone *zones = NODE_DATA(node)->node_zones; ++ ++ return ++#ifdef CONFIG_ZONE_DMA ++ zone_page_state(&zones[ZONE_DMA], item) + ++#endif ++#ifdef CONFIG_ZONE_DMA32 ++ zone_page_state(&zones[ZONE_DMA32], item) + ++#endif ++#ifdef CONFIG_HIGHMEM ++ zone_page_state(&zones[ZONE_HIGHMEM], item) + ++#endif ++ zone_page_state(&zones[ZONE_NORMAL], item) + ++ zone_page_state(&zones[ZONE_MOVABLE], item); ++} ++ + #endif + + #ifdef CONFIG_COMPACTION +commit 016c13daa5c9e4827eca703e2f0621c131f2cca3 +Author: Mel Gorman +Date: Fri Nov 6 16:28:18 2015 -0800 + + mm, page_alloc: use masks and shifts when converting GFP flags to migrate types + + This patch redefines which GFP bits are used for specifying mobility and + the order of the migrate types. Once redefined it's possible to convert + GFP flags to a migrate type with a simple mask and shift. The only + downside is that readers of OOM kill messages and allocation failures may + have been used to the existing values but scripts/gfp-translate will help. + + Signed-off-by: Mel Gorman + Acked-by: Vlastimil Babka + Cc: Christoph Lameter + Cc: David Rientjes + Cc: Johannes Weiner + Cc: Michal Hocko + Cc: Vitaly Wool + Cc: Rik van Riel + Signed-off-by: Andrew Morton + Signed-off-by: Linus Torvalds + +diff --git a/include/linux/gfp.h b/include/linux/gfp.h +index f92cbd2..440fca3 100644 +--- a/include/linux/gfp.h ++++ b/include/linux/gfp.h +@@ -14,7 +14,7 @@ struct vm_area_struct; + #define ___GFP_HIGHMEM 0x02u + #define ___GFP_DMA32 0x04u + #define ___GFP_MOVABLE 0x08u +-#define ___GFP_WAIT 0x10u ++#define ___GFP_RECLAIMABLE 0x10u + #define ___GFP_HIGH 0x20u + #define ___GFP_IO 0x40u + #define ___GFP_FS 0x80u +@@ -29,7 +29,7 @@ struct vm_area_struct; + #define ___GFP_NOMEMALLOC 0x10000u + #define ___GFP_HARDWALL 0x20000u + #define ___GFP_THISNODE 0x40000u +-#define ___GFP_RECLAIMABLE 0x80000u ++#define ___GFP_WAIT 0x80000u + #define ___GFP_NOACCOUNT 0x100000u + #define ___GFP_NOTRACK 0x200000u + #define ___GFP_NO_KSWAPD 0x400000u +@@ -126,6 +126,7 @@ struct vm_area_struct; + + /* This mask makes up all the page movable related flags */ + #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) ++#define GFP_MOVABLE_SHIFT 3 + + /* Control page allocator reclaim behavior */ + #define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\ +@@ -152,14 +153,15 @@ struct vm_area_struct; + /* Convert GFP flags to their corresponding migrate type */ + static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) + { +- WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); ++ VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); ++ BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE); ++ BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE); + + if (unlikely(page_group_by_mobility_disabled)) + return MIGRATE_UNMOVABLE; + + /* Group based on mobility */ +- return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) | +- ((gfp_flags & __GFP_RECLAIMABLE) != 0); ++ return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT; + } + + #ifdef CONFIG_HIGHMEM +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index e326843..38bed71 100644 +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -37,8 +37,8 @@ + + enum { + MIGRATE_UNMOVABLE, +- MIGRATE_RECLAIMABLE, + MIGRATE_MOVABLE, ++ MIGRATE_RECLAIMABLE, + MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ + MIGRATE_RESERVE = MIGRATE_PCPTYPES, + #ifdef CONFIG_CMA +commit 974a786e63c96a2401a78ddba926f34c128474f1 +Author: Mel Gorman +Date: Fri Nov 6 16:28:34 2015 -0800 + + mm, page_alloc: remove MIGRATE_RESERVE + + MIGRATE_RESERVE preserves an old property of the buddy allocator that + existed prior to fragmentation avoidance -- min_free_kbytes worth of pages + tended to remain contiguous until the only alternative was to fail the + allocation. At the time it was discovered that high-order atomic + allocations relied on this property so MIGRATE_RESERVE was introduced. A + later patch will introduce an alternative MIGRATE_HIGHATOMIC so this patch + deletes MIGRATE_RESERVE and supporting code so it'll be easier to review. + Note that this patch in isolation may look like a false regression if + someone was bisecting high-order atomic allocation failures. + + Signed-off-by: Mel Gorman + Acked-by: Vlastimil Babka + Cc: Christoph Lameter + Cc: David Rientjes + Cc: Johannes Weiner + Cc: Michal Hocko + Cc: Vitaly Wool + Cc: Rik van Riel + Signed-off-by: Andrew Morton + Signed-off-by: Linus Torvalds + +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index 1e88aae..b86cfa3 100644 +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -39,8 +39,6 @@ enum { + MIGRATE_UNMOVABLE, + MIGRATE_MOVABLE, + MIGRATE_RECLAIMABLE, +- MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ +- MIGRATE_RESERVE = MIGRATE_PCPTYPES, + #ifdef CONFIG_CMA + /* + * MIGRATE_CMA migration type is designed to mimic the way +@@ -63,6 +61,8 @@ enum { + MIGRATE_TYPES + }; + ++#define MIGRATE_PCPTYPES (MIGRATE_RECLAIMABLE+1) ++ + #ifdef CONFIG_CMA + # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) + #else +@@ -429,12 +429,6 @@ struct zone { + + const char *name; + +- /* +- * Number of MIGRATE_RESERVE page block. To maintain for just +- * optimization. Protected by zone->lock. +- */ +- int nr_migrate_reserve_block; +- + #ifdef CONFIG_MEMORY_ISOLATION + /* + * Number of isolated pageblock. It is used to solve incorrect +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +index 9812d46..dabd247 100644 +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -116,7 +116,7 @@ static void set_recommended_min_free_kbytes(void) + for_each_populated_zone(zone) + nr_zones++; + +- /* Make sure at least 2 hugepages are free for MIGRATE_RESERVE */ ++ /* Ensure 2 pageblocks are free to assist fragmentation avoidance */ + recommended_min = pageblock_nr_pages * nr_zones * 2; + + /* +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 8dc6e3c..5888126 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -817,7 +817,6 @@ static void free_pcppages_bulk(struct zone *zone, int count, + if (unlikely(has_isolate_pageblock(zone))) + mt = get_pageblock_migratetype(page); + +- /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ + __free_one_page(page, page_to_pfn(page), zone, 0, mt); + trace_mm_page_pcpu_drain(page, 0, mt); + } while (--to_free && --batch_free && !list_empty(list)); +@@ -1417,15 +1416,14 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, + * the free lists for the desirable migrate type are depleted + */ + static int fallbacks[MIGRATE_TYPES][4] = { +- [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, +- [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, +- [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, ++ [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_TYPES }, ++ [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_TYPES }, ++ [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES }, + #ifdef CONFIG_CMA +- [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */ ++ [MIGRATE_CMA] = { MIGRATE_TYPES }, /* Never used */ + #endif +- [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ + #ifdef CONFIG_MEMORY_ISOLATION +- [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */ ++ [MIGRATE_ISOLATE] = { MIGRATE_TYPES }, /* Never used */ + #endif + }; + +@@ -1598,7 +1596,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order, + *can_steal = false; + for (i = 0;; i++) { + fallback_mt = fallbacks[migratetype][i]; +- if (fallback_mt == MIGRATE_RESERVE) ++ if (fallback_mt == MIGRATE_TYPES) + break; + + if (list_empty(&area->free_list[fallback_mt])) +@@ -1676,25 +1674,13 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order, + { + struct page *page; + +-retry_reserve: + page = __rmqueue_smallest(zone, order, migratetype); +- +- if (unlikely(!page) && migratetype != MIGRATE_RESERVE) { ++ if (unlikely(!page)) { + if (migratetype == MIGRATE_MOVABLE) + page = __rmqueue_cma_fallback(zone, order); + + if (!page) + page = __rmqueue_fallback(zone, order, migratetype); +- +- /* +- * Use MIGRATE_RESERVE rather than fail an allocation. goto +- * is used because __rmqueue_smallest is an inline function +- * and we want just one call site +- */ +- if (!page) { +- migratetype = MIGRATE_RESERVE; +- goto retry_reserve; +- } + } + + trace_mm_page_alloc_zone_locked(page, order, migratetype); +@@ -3492,7 +3478,6 @@ static void show_migration_types(unsigned char type) + [MIGRATE_UNMOVABLE] = 'U', + [MIGRATE_RECLAIMABLE] = 'E', + [MIGRATE_MOVABLE] = 'M', +- [MIGRATE_RESERVE] = 'R', + #ifdef CONFIG_CMA + [MIGRATE_CMA] = 'C', + #endif +@@ -4303,120 +4288,6 @@ static inline unsigned long wait_table_bits(unsigned long size) + } + + /* +- * Check if a pageblock contains reserved pages +- */ +-static int pageblock_is_reserved(unsigned long start_pfn, unsigned long end_pfn) +-{ +- unsigned long pfn; +- +- for (pfn = start_pfn; pfn < end_pfn; pfn++) { +- if (!pfn_valid_within(pfn) || PageReserved(pfn_to_page(pfn))) +- return 1; +- } +- return 0; +-} +- +-/* +- * Mark a number of pageblocks as MIGRATE_RESERVE. The number +- * of blocks reserved is based on min_wmark_pages(zone). The memory within +- * the reserve will tend to store contiguous free pages. Setting min_free_kbytes +- * higher will lead to a bigger reserve which will get freed as contiguous +- * blocks as reclaim kicks in +- */ +-static void setup_zone_migrate_reserve(struct zone *zone) +-{ +- unsigned long start_pfn, pfn, end_pfn, block_end_pfn; +- struct page *page; +- unsigned long block_migratetype; +- int reserve; +- int old_reserve; +- +- /* +- * Get the start pfn, end pfn and the number of blocks to reserve +- * We have to be careful to be aligned to pageblock_nr_pages to +- * make sure that we always check pfn_valid for the first page in +- * the block. +- */ +- start_pfn = zone->zone_start_pfn; +- end_pfn = zone_end_pfn(zone); +- start_pfn = roundup(start_pfn, pageblock_nr_pages); +- reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> +- pageblock_order; +- +- /* +- * Reserve blocks are generally in place to help high-order atomic +- * allocations that are short-lived. A min_free_kbytes value that +- * would result in more than 2 reserve blocks for atomic allocations +- * is assumed to be in place to help anti-fragmentation for the +- * future allocation of hugepages at runtime. +- */ +- reserve = min(2, reserve); +- old_reserve = zone->nr_migrate_reserve_block; +- +- /* When memory hot-add, we almost always need to do nothing */ +- if (reserve == old_reserve) +- return; +- zone->nr_migrate_reserve_block = reserve; +- +- for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { +- if (!early_page_nid_uninitialised(pfn, zone_to_nid(zone))) +- return; +- +- if (!pfn_valid(pfn)) +- continue; +- page = pfn_to_page(pfn); +- +- /* Watch out for overlapping nodes */ +- if (page_to_nid(page) != zone_to_nid(zone)) +- continue; +- +- block_migratetype = get_pageblock_migratetype(page); +- +- /* Only test what is necessary when the reserves are not met */ +- if (reserve > 0) { +- /* +- * Blocks with reserved pages will never free, skip +- * them. +- */ +- block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn); +- if (pageblock_is_reserved(pfn, block_end_pfn)) +- continue; +- +- /* If this block is reserved, account for it */ +- if (block_migratetype == MIGRATE_RESERVE) { +- reserve--; +- continue; +- } +- +- /* Suitable for reserving if this block is movable */ +- if (block_migratetype == MIGRATE_MOVABLE) { +- set_pageblock_migratetype(page, +- MIGRATE_RESERVE); +- move_freepages_block(zone, page, +- MIGRATE_RESERVE); +- reserve--; +- continue; +- } +- } else if (!old_reserve) { +- /* +- * At boot time we don't need to scan the whole zone +- * for turning off MIGRATE_RESERVE. +- */ +- break; +- } +- +- /* +- * If the reserve is met and this is a previous reserved block, +- * take it back +- */ +- if (block_migratetype == MIGRATE_RESERVE) { +- set_pageblock_migratetype(page, MIGRATE_MOVABLE); +- move_freepages_block(zone, page, MIGRATE_MOVABLE); +- } +- } +-} +- +-/* + * Initially all pages are reserved - free ones are freed + * up by free_all_bootmem() once the early boot process is + * done. Non-atomic initialization, single-pass. +@@ -4455,9 +4326,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, + * movable at startup. This will force kernel allocations + * to reserve their blocks rather than leaking throughout + * the address space during boot when many long-lived +- * kernel allocations are made. Later some blocks near +- * the start are marked MIGRATE_RESERVE by +- * setup_zone_migrate_reserve() ++ * kernel allocations are made. + * + * bitmap is created for zone's valid pfn range. but memmap + * can be created for invalid pages (for alignment) +@@ -6018,7 +5887,6 @@ static void __setup_per_zone_wmarks(void) + high_wmark_pages(zone) - low_wmark_pages(zone) - + atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); + +- setup_zone_migrate_reserve(zone); + spin_unlock_irqrestore(&zone->lock, flags); + } + +diff --git a/mm/vmstat.c b/mm/vmstat.c +index ffcb4f5..5b289dc 100644 +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -923,7 +923,6 @@ static char * const migratetype_names[MIGRATE_TYPES] = { + "Unmovable", + "Reclaimable", + "Movable", +- "Reserve", + #ifdef CONFIG_CMA + "CMA", + #endif +diff --git a/mm/backing-dev.c b/mm/backing-dev.c +index 8ed2ffd963c5..7340353f8aea 100644 +--- a/mm/backing-dev.c ++++ b/mm/backing-dev.c +@@ -957,8 +957,9 @@ EXPORT_SYMBOL(congestion_wait); + * jiffies for either a BDI to exit congestion of the given @sync queue + * or a write to complete. + * +- * In the absence of zone congestion, cond_resched() is called to yield +- * the processor if necessary but otherwise does not sleep. ++ * In the absence of zone congestion, a short sleep or a cond_resched is ++ * performed to yield the processor and to allow other subsystems to make ++ * a forward progress. + * + * The return value is 0 if the sleep is for the full timeout. Otherwise, + * it is the number of jiffies that were still remaining when the function +@@ -978,7 +979,19 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout) + */ + if (atomic_read(&nr_wb_congested[sync]) == 0 || + !test_bit(ZONE_CONGESTED, &zone->flags)) { +- cond_resched(); ++ ++ /* ++ * Memory allocation/reclaim might be called from a WQ ++ * context and the current implementation of the WQ ++ * concurrency control doesn't recognize that a particular ++ * WQ is congested if the worker thread is looping without ++ * ever sleeping. Therefore we have to do a short sleep ++ * here rather than calling cond_resched(). ++ */ ++ if (current->flags & PF_WQ_WORKER) ++ schedule_timeout(1); ++ else ++ cond_resched(); + + /* In case we scheduled, work out time remaining */ + ret = timeout - (jiffies - start); +diff --git a/mm/vmstat.c b/mm/vmstat.c +index 45dcbcb5c594..0975da8e3432 100644 +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -1381,6 +1381,7 @@ static const struct file_operations proc_vmstat_file_operations = { + #endif /* CONFIG_PROC_FS */ + + #ifdef CONFIG_SMP ++static struct workqueue_struct *vmstat_wq; + static DEFINE_PER_CPU(struct delayed_work, vmstat_work); + int sysctl_stat_interval __read_mostly = HZ; + static cpumask_var_t cpu_stat_off; +@@ -1393,7 +1394,7 @@ static void vmstat_update(struct work_struct *w) + * to occur in the future. Keep on running the + * update worker thread. + */ +- schedule_delayed_work_on(smp_processor_id(), ++ queue_delayed_work_on(smp_processor_id(), vmstat_wq, + this_cpu_ptr(&vmstat_work), + round_jiffies_relative(sysctl_stat_interval)); + } else { +@@ -1462,7 +1463,7 @@ static void vmstat_shepherd(struct work_struct *w) + if (need_update(cpu) && + cpumask_test_and_clear_cpu(cpu, cpu_stat_off)) + +- schedule_delayed_work_on(cpu, ++ queue_delayed_work_on(cpu, vmstat_wq, + &per_cpu(vmstat_work, cpu), 0); + + put_online_cpus(); +@@ -1551,6 +1552,7 @@ static int __init setup_vmstat(void) + + start_shepherd_timer(); + cpu_notifier_register_done(); ++ vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + #endif + #ifdef CONFIG_PROC_FS + proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); +-- +2.6.2 + + -- 2.44.0