X-Git-Url: http://git.pld-linux.org/?a=blobdiff_plain;f=kernel-small_fixes.patch;h=dc2c31ff9b1c67f8176b7c034942e6a78f7a9b5b;hb=96f9c67f08faa78b1f5f4ad8404e371d811a1108;hp=70e90479752f388e49d143d5db82452551063ac6;hpb=28b2046740f32f01e3b8c623715bb9418de6cc9f;p=packages%2Fkernel.git diff --git a/kernel-small_fixes.patch b/kernel-small_fixes.patch index 70e90479..dc2c31ff 100644 --- a/kernel-small_fixes.patch +++ b/kernel-small_fixes.patch @@ -1,1196 +1,109 @@ ---- linux-2.6.33/scripts/mod/modpost.c~ 2010-02-24 19:52:17.000000000 +0100 -+++ linux-2.6.33/scripts/mod/modpost.c 2010-03-07 14:26:47.242168558 +0100 -@@ -15,7 +15,8 @@ - #include - #include - #include "modpost.h" --#include "../../include/generated/autoconf.h" -+// PLD architectures don't use CONFIG_SYMBOL_PREFIX -+//#include "../../include/generated/autoconf.h" - #include "../../include/linux/license.h" - - /* Some toolchains use a `_' prefix for all user symbols. */ - ---- linux-3.0/scripts/kconfig/lxdialog/check-lxdialog.sh~ 2011-07-22 04:17:23.000000000 +0200 -+++ linux-3.0/scripts/kconfig/lxdialog/check-lxdialog.sh 2011-08-25 21:26:04.799150642 +0200 -@@ -9,6 +9,12 @@ - $cc -print-file-name=lib${lib}.${ext} | grep -q / - if [ $? -eq 0 ]; then - echo "-l${lib}" -+ for libt in tinfow tinfo ; do -+ $cc -print-file-name=lib${libt}.${ext} | grep -q / -+ if [ $? -eq 0 ]; then -+ echo "-l${libt}" -+ fi -+ done - exit - fi - done - -From 7a29ac474a47eb8cf212b45917683ae89d6fa13b Mon Sep 17 00:00:00 2001 -From: Chris Mason -Date: Tue, 10 Nov 2015 10:10:34 +1100 -Subject: xfs: give all workqueues rescuer threads - -We're consistently hitting deadlocks here with XFS on recent kernels. -After some digging through the crash files, it looks like everyone in -the system is waiting for XFS to reclaim memory. - -Something like this: - -PID: 2733434 TASK: ffff8808cd242800 CPU: 19 COMMAND: "java" - #0 [ffff880019c53588] __schedule at ffffffff818c4df2 - #1 [ffff880019c535d8] schedule at ffffffff818c5517 - #2 [ffff880019c535f8] _xfs_log_force_lsn at ffffffff81316348 - #3 [ffff880019c53688] xfs_log_force_lsn at ffffffff813164fb - #4 [ffff880019c536b8] xfs_iunpin_wait at ffffffff8130835e - #5 [ffff880019c53728] xfs_reclaim_inode at ffffffff812fd453 - #6 [ffff880019c53778] xfs_reclaim_inodes_ag at ffffffff812fd8c7 - #7 [ffff880019c53928] xfs_reclaim_inodes_nr at ffffffff812fe433 - #8 [ffff880019c53958] xfs_fs_free_cached_objects at ffffffff8130d3b9 - #9 [ffff880019c53968] super_cache_scan at ffffffff811a6f73 -#10 [ffff880019c539c8] shrink_slab at ffffffff811460e6 -#11 [ffff880019c53aa8] shrink_zone at ffffffff8114a53f -#12 [ffff880019c53b48] do_try_to_free_pages at ffffffff8114a8ba -#13 [ffff880019c53be8] try_to_free_pages at ffffffff8114ad5a -#14 [ffff880019c53c78] __alloc_pages_nodemask at ffffffff8113e1b8 -#15 [ffff880019c53d88] alloc_kmem_pages_node at ffffffff8113e671 -#16 [ffff880019c53dd8] copy_process at ffffffff8104f781 -#17 [ffff880019c53ec8] do_fork at ffffffff8105129c -#18 [ffff880019c53f38] sys_clone at ffffffff810515b6 -#19 [ffff880019c53f48] stub_clone at ffffffff818c8e4d - -xfs_log_force_lsn is waiting for logs to get cleaned, which is waiting -for IO, which is waiting for workers to complete the IO which is waiting -for worker threads that don't exist yet: - -PID: 2752451 TASK: ffff880bd6bdda00 CPU: 37 COMMAND: "kworker/37:1" - #0 [ffff8808d20abbb0] __schedule at ffffffff818c4df2 - #1 [ffff8808d20abc00] schedule at ffffffff818c5517 - #2 [ffff8808d20abc20] schedule_timeout at ffffffff818c7c6c - #3 [ffff8808d20abcc0] wait_for_completion_killable at ffffffff818c6495 - #4 [ffff8808d20abd30] kthread_create_on_node at ffffffff8106ec82 - #5 [ffff8808d20abdf0] create_worker at ffffffff8106752f - #6 [ffff8808d20abe40] worker_thread at ffffffff810699be - #7 [ffff8808d20abec0] kthread at ffffffff8106ef59 - #8 [ffff8808d20abf50] ret_from_fork at ffffffff818c8ac8 - -I think we should be using WQ_MEM_RECLAIM to make sure this thread -pool makes progress when we're not able to allocate new workers. - -[dchinner: make all workqueues WQ_MEM_RECLAIM] - -Signed-off-by: Chris Mason -Reviewed-by: Dave Chinner -Signed-off-by: Dave Chinner ---- - fs/xfs/xfs_super.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c -index 29531ec..65fbfb7 100644 ---- a/fs/xfs/xfs_super.c -+++ b/fs/xfs/xfs_super.c -@@ -838,17 +838,18 @@ xfs_init_mount_workqueues( - goto out_destroy_unwritten; - - mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", -- WQ_FREEZABLE, 0, mp->m_fsname); -+ WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); - if (!mp->m_reclaim_workqueue) - goto out_destroy_cil; - - mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", -- WQ_FREEZABLE|WQ_HIGHPRI, 0, mp->m_fsname); -+ WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0, -+ mp->m_fsname); - if (!mp->m_log_workqueue) - goto out_destroy_reclaim; - - mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", -- WQ_FREEZABLE, 0, mp->m_fsname); -+ WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); - if (!mp->m_eofblocks_workqueue) - goto out_destroy_log; - --- -cgit v0.11.2 - -commit c2d42c16ad83006a706d83e51a7268db04af733a -Author: Andrew Morton -Date: Thu Nov 5 18:48:43 2015 -0800 - - mm/vmstat.c: uninline node_page_state() - - With x86_64 (config http://ozlabs.org/~akpm/config-akpm2.txt) and old gcc - (4.4.4), drivers/base/node.c:node_read_meminfo() is using 2344 bytes of - stack. Uninlining node_page_state() reduces this to 440 bytes. - - The stack consumption issue is fixed by newer gcc (4.8.4) however with - that compiler this patch reduces the node.o text size from 7314 bytes to - 4578. - - Signed-off-by: Andrew Morton - Signed-off-by: Linus Torvalds - -diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h -index 82e7db7..49dfe40 100644 ---- a/include/linux/vmstat.h -+++ b/include/linux/vmstat.h -@@ -161,30 +161,8 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone, - } - - #ifdef CONFIG_NUMA --/* -- * Determine the per node value of a stat item. This function -- * is called frequently in a NUMA machine, so try to be as -- * frugal as possible. -- */ --static inline unsigned long node_page_state(int node, -- enum zone_stat_item item) +; https://lkml.org/lkml/2019/7/10/244 +diff -ur linux-5.3/drivers/scsi/aacraid.org/aacraid.h linux-5.3/drivers/scsi/aacraid/aacraid.h +--- linux-5.3/drivers/scsi/aacraid.org/aacraid.h 2019-11-01 22:42:37.011469816 +0100 ++++ linux-5.3/drivers/scsi/aacraid/aacraid.h 2019-11-04 09:29:51.321486211 +0100 +@@ -2740,17 +2740,6 @@ + int aac_rx_deliver_producer(struct fib * fib); + void aac_reinit_aif(struct aac_dev *aac, unsigned int index); + +-static inline int aac_is_src(struct aac_dev *dev) -{ -- struct zone *zones = NODE_DATA(node)->node_zones; +- u16 device = dev->pdev->device; - -- return --#ifdef CONFIG_ZONE_DMA -- zone_page_state(&zones[ZONE_DMA], item) + --#endif --#ifdef CONFIG_ZONE_DMA32 -- zone_page_state(&zones[ZONE_DMA32], item) + --#endif --#ifdef CONFIG_HIGHMEM -- zone_page_state(&zones[ZONE_HIGHMEM], item) + --#endif -- zone_page_state(&zones[ZONE_NORMAL], item) + -- zone_page_state(&zones[ZONE_MOVABLE], item); --} - -+extern unsigned long node_page_state(int node, enum zone_stat_item item); - extern void zone_statistics(struct zone *, struct zone *, gfp_t gfp); - - #else -diff --git a/mm/vmstat.c b/mm/vmstat.c -index fbf1448..ffcb4f5 100644 ---- a/mm/vmstat.c -+++ b/mm/vmstat.c -@@ -591,6 +591,28 @@ void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags) - else - __inc_zone_state(z, NUMA_OTHER); - } -+ -+/* -+ * Determine the per node value of a stat item. -+ */ -+unsigned long node_page_state(int node, enum zone_stat_item item) -+{ -+ struct zone *zones = NODE_DATA(node)->node_zones; -+ -+ return -+#ifdef CONFIG_ZONE_DMA -+ zone_page_state(&zones[ZONE_DMA], item) + -+#endif -+#ifdef CONFIG_ZONE_DMA32 -+ zone_page_state(&zones[ZONE_DMA32], item) + -+#endif -+#ifdef CONFIG_HIGHMEM -+ zone_page_state(&zones[ZONE_HIGHMEM], item) + -+#endif -+ zone_page_state(&zones[ZONE_NORMAL], item) + -+ zone_page_state(&zones[ZONE_MOVABLE], item); -+} -+ - #endif - - #ifdef CONFIG_COMPACTION -commit 016c13daa5c9e4827eca703e2f0621c131f2cca3 -Author: Mel Gorman -Date: Fri Nov 6 16:28:18 2015 -0800 - - mm, page_alloc: use masks and shifts when converting GFP flags to migrate types - - This patch redefines which GFP bits are used for specifying mobility and - the order of the migrate types. Once redefined it's possible to convert - GFP flags to a migrate type with a simple mask and shift. The only - downside is that readers of OOM kill messages and allocation failures may - have been used to the existing values but scripts/gfp-translate will help. - - Signed-off-by: Mel Gorman - Acked-by: Vlastimil Babka - Cc: Christoph Lameter - Cc: David Rientjes - Cc: Johannes Weiner - Cc: Michal Hocko - Cc: Vitaly Wool - Cc: Rik van Riel - Signed-off-by: Andrew Morton - Signed-off-by: Linus Torvalds - -diff --git a/include/linux/gfp.h b/include/linux/gfp.h -index f92cbd2..440fca3 100644 ---- a/include/linux/gfp.h -+++ b/include/linux/gfp.h -@@ -14,7 +14,7 @@ struct vm_area_struct; - #define ___GFP_HIGHMEM 0x02u - #define ___GFP_DMA32 0x04u - #define ___GFP_MOVABLE 0x08u --#define ___GFP_WAIT 0x10u -+#define ___GFP_RECLAIMABLE 0x10u - #define ___GFP_HIGH 0x20u - #define ___GFP_IO 0x40u - #define ___GFP_FS 0x80u -@@ -29,7 +29,7 @@ struct vm_area_struct; - #define ___GFP_NOMEMALLOC 0x10000u - #define ___GFP_HARDWALL 0x20000u - #define ___GFP_THISNODE 0x40000u --#define ___GFP_RECLAIMABLE 0x80000u -+#define ___GFP_WAIT 0x80000u - #define ___GFP_NOACCOUNT 0x100000u - #define ___GFP_NOTRACK 0x200000u - #define ___GFP_NO_KSWAPD 0x400000u -@@ -126,6 +126,7 @@ struct vm_area_struct; - - /* This mask makes up all the page movable related flags */ - #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) -+#define GFP_MOVABLE_SHIFT 3 - - /* Control page allocator reclaim behavior */ - #define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\ -@@ -152,14 +153,15 @@ struct vm_area_struct; - /* Convert GFP flags to their corresponding migrate type */ - static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) - { -- WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); -+ VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); -+ BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE); -+ BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE); - - if (unlikely(page_group_by_mobility_disabled)) - return MIGRATE_UNMOVABLE; - - /* Group based on mobility */ -- return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) | -- ((gfp_flags & __GFP_RECLAIMABLE) != 0); -+ return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT; - } - - #ifdef CONFIG_HIGHMEM -diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h -index e326843..38bed71 100644 ---- a/include/linux/mmzone.h -+++ b/include/linux/mmzone.h -@@ -37,8 +37,8 @@ - - enum { - MIGRATE_UNMOVABLE, -- MIGRATE_RECLAIMABLE, - MIGRATE_MOVABLE, -+ MIGRATE_RECLAIMABLE, - MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ - MIGRATE_RESERVE = MIGRATE_PCPTYPES, - #ifdef CONFIG_CMA -commit 974a786e63c96a2401a78ddba926f34c128474f1 -Author: Mel Gorman -Date: Fri Nov 6 16:28:34 2015 -0800 - - mm, page_alloc: remove MIGRATE_RESERVE - - MIGRATE_RESERVE preserves an old property of the buddy allocator that - existed prior to fragmentation avoidance -- min_free_kbytes worth of pages - tended to remain contiguous until the only alternative was to fail the - allocation. At the time it was discovered that high-order atomic - allocations relied on this property so MIGRATE_RESERVE was introduced. A - later patch will introduce an alternative MIGRATE_HIGHATOMIC so this patch - deletes MIGRATE_RESERVE and supporting code so it'll be easier to review. - Note that this patch in isolation may look like a false regression if - someone was bisecting high-order atomic allocation failures. - - Signed-off-by: Mel Gorman - Acked-by: Vlastimil Babka - Cc: Christoph Lameter - Cc: David Rientjes - Cc: Johannes Weiner - Cc: Michal Hocko - Cc: Vitaly Wool - Cc: Rik van Riel - Signed-off-by: Andrew Morton - Signed-off-by: Linus Torvalds - -diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h -index 1e88aae..b86cfa3 100644 ---- a/include/linux/mmzone.h -+++ b/include/linux/mmzone.h -@@ -39,8 +39,6 @@ enum { - MIGRATE_UNMOVABLE, - MIGRATE_MOVABLE, - MIGRATE_RECLAIMABLE, -- MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ -- MIGRATE_RESERVE = MIGRATE_PCPTYPES, - #ifdef CONFIG_CMA - /* - * MIGRATE_CMA migration type is designed to mimic the way -@@ -63,6 +61,8 @@ enum { - MIGRATE_TYPES - }; - -+#define MIGRATE_PCPTYPES (MIGRATE_RECLAIMABLE+1) -+ - #ifdef CONFIG_CMA - # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) - #else -@@ -429,12 +429,6 @@ struct zone { - - const char *name; - -- /* -- * Number of MIGRATE_RESERVE page block. To maintain for just -- * optimization. Protected by zone->lock. -- */ -- int nr_migrate_reserve_block; -- - #ifdef CONFIG_MEMORY_ISOLATION - /* - * Number of isolated pageblock. It is used to solve incorrect -diff --git a/mm/huge_memory.c b/mm/huge_memory.c -index 9812d46..dabd247 100644 ---- a/mm/huge_memory.c -+++ b/mm/huge_memory.c -@@ -116,7 +116,7 @@ static void set_recommended_min_free_kbytes(void) - for_each_populated_zone(zone) - nr_zones++; - -- /* Make sure at least 2 hugepages are free for MIGRATE_RESERVE */ -+ /* Ensure 2 pageblocks are free to assist fragmentation avoidance */ - recommended_min = pageblock_nr_pages * nr_zones * 2; - - /* -diff --git a/mm/page_alloc.c b/mm/page_alloc.c -index 8dc6e3c..5888126 100644 ---- a/mm/page_alloc.c -+++ b/mm/page_alloc.c -@@ -817,7 +817,6 @@ static void free_pcppages_bulk(struct zone *zone, int count, - if (unlikely(has_isolate_pageblock(zone))) - mt = get_pageblock_migratetype(page); - -- /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ - __free_one_page(page, page_to_pfn(page), zone, 0, mt); - trace_mm_page_pcpu_drain(page, 0, mt); - } while (--to_free && --batch_free && !list_empty(list)); -@@ -1417,15 +1416,14 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, - * the free lists for the desirable migrate type are depleted - */ - static int fallbacks[MIGRATE_TYPES][4] = { -- [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, -- [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, -- [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, -+ [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_TYPES }, -+ [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_TYPES }, -+ [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES }, - #ifdef CONFIG_CMA -- [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */ -+ [MIGRATE_CMA] = { MIGRATE_TYPES }, /* Never used */ - #endif -- [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ - #ifdef CONFIG_MEMORY_ISOLATION -- [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */ -+ [MIGRATE_ISOLATE] = { MIGRATE_TYPES }, /* Never used */ - #endif - }; - -@@ -1598,7 +1596,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order, - *can_steal = false; - for (i = 0;; i++) { - fallback_mt = fallbacks[migratetype][i]; -- if (fallback_mt == MIGRATE_RESERVE) -+ if (fallback_mt == MIGRATE_TYPES) - break; - - if (list_empty(&area->free_list[fallback_mt])) -@@ -1676,25 +1674,13 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order, - { - struct page *page; - --retry_reserve: - page = __rmqueue_smallest(zone, order, migratetype); -- -- if (unlikely(!page) && migratetype != MIGRATE_RESERVE) { -+ if (unlikely(!page)) { - if (migratetype == MIGRATE_MOVABLE) - page = __rmqueue_cma_fallback(zone, order); - - if (!page) - page = __rmqueue_fallback(zone, order, migratetype); -- -- /* -- * Use MIGRATE_RESERVE rather than fail an allocation. goto -- * is used because __rmqueue_smallest is an inline function -- * and we want just one call site -- */ -- if (!page) { -- migratetype = MIGRATE_RESERVE; -- goto retry_reserve; -- } - } - - trace_mm_page_alloc_zone_locked(page, order, migratetype); -@@ -3492,7 +3478,6 @@ static void show_migration_types(unsigned char type) - [MIGRATE_UNMOVABLE] = 'U', - [MIGRATE_RECLAIMABLE] = 'E', - [MIGRATE_MOVABLE] = 'M', -- [MIGRATE_RESERVE] = 'R', - #ifdef CONFIG_CMA - [MIGRATE_CMA] = 'C', - #endif -@@ -4303,120 +4288,6 @@ static inline unsigned long wait_table_bits(unsigned long size) - } - - /* -- * Check if a pageblock contains reserved pages -- */ --static int pageblock_is_reserved(unsigned long start_pfn, unsigned long end_pfn) --{ -- unsigned long pfn; -- -- for (pfn = start_pfn; pfn < end_pfn; pfn++) { -- if (!pfn_valid_within(pfn) || PageReserved(pfn_to_page(pfn))) -- return 1; -- } +- if (device == PMC_DEVICE_S6 || +- device == PMC_DEVICE_S7 || +- device == PMC_DEVICE_S8) +- return 1; - return 0; -} - --/* -- * Mark a number of pageblocks as MIGRATE_RESERVE. The number -- * of blocks reserved is based on min_wmark_pages(zone). The memory within -- * the reserve will tend to store contiguous free pages. Setting min_free_kbytes -- * higher will lead to a bigger reserve which will get freed as contiguous -- * blocks as reclaim kicks in -- */ --static void setup_zone_migrate_reserve(struct zone *zone) --{ -- unsigned long start_pfn, pfn, end_pfn, block_end_pfn; -- struct page *page; -- unsigned long block_migratetype; -- int reserve; -- int old_reserve; -- -- /* -- * Get the start pfn, end pfn and the number of blocks to reserve -- * We have to be careful to be aligned to pageblock_nr_pages to -- * make sure that we always check pfn_valid for the first page in -- * the block. -- */ -- start_pfn = zone->zone_start_pfn; -- end_pfn = zone_end_pfn(zone); -- start_pfn = roundup(start_pfn, pageblock_nr_pages); -- reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> -- pageblock_order; -- -- /* -- * Reserve blocks are generally in place to help high-order atomic -- * allocations that are short-lived. A min_free_kbytes value that -- * would result in more than 2 reserve blocks for atomic allocations -- * is assumed to be in place to help anti-fragmentation for the -- * future allocation of hugepages at runtime. -- */ -- reserve = min(2, reserve); -- old_reserve = zone->nr_migrate_reserve_block; -- -- /* When memory hot-add, we almost always need to do nothing */ -- if (reserve == old_reserve) -- return; -- zone->nr_migrate_reserve_block = reserve; -- -- for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { -- if (!early_page_nid_uninitialised(pfn, zone_to_nid(zone))) -- return; -- -- if (!pfn_valid(pfn)) -- continue; -- page = pfn_to_page(pfn); -- -- /* Watch out for overlapping nodes */ -- if (page_to_nid(page) != zone_to_nid(zone)) -- continue; -- -- block_migratetype = get_pageblock_migratetype(page); -- -- /* Only test what is necessary when the reserves are not met */ -- if (reserve > 0) { -- /* -- * Blocks with reserved pages will never free, skip -- * them. -- */ -- block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn); -- if (pageblock_is_reserved(pfn, block_end_pfn)) -- continue; -- -- /* If this block is reserved, account for it */ -- if (block_migratetype == MIGRATE_RESERVE) { -- reserve--; -- continue; -- } -- -- /* Suitable for reserving if this block is movable */ -- if (block_migratetype == MIGRATE_MOVABLE) { -- set_pageblock_migratetype(page, -- MIGRATE_RESERVE); -- move_freepages_block(zone, page, -- MIGRATE_RESERVE); -- reserve--; -- continue; -- } -- } else if (!old_reserve) { -- /* -- * At boot time we don't need to scan the whole zone -- * for turning off MIGRATE_RESERVE. -- */ -- break; -- } -- -- /* -- * If the reserve is met and this is a previous reserved block, -- * take it back -- */ -- if (block_migratetype == MIGRATE_RESERVE) { -- set_pageblock_migratetype(page, MIGRATE_MOVABLE); -- move_freepages_block(zone, page, MIGRATE_MOVABLE); -- } -- } --} -- --/* - * Initially all pages are reserved - free ones are freed - * up by free_all_bootmem() once the early boot process is - * done. Non-atomic initialization, single-pass. -@@ -4455,9 +4326,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, - * movable at startup. This will force kernel allocations - * to reserve their blocks rather than leaking throughout - * the address space during boot when many long-lived -- * kernel allocations are made. Later some blocks near -- * the start are marked MIGRATE_RESERVE by -- * setup_zone_migrate_reserve() -+ * kernel allocations are made. - * - * bitmap is created for zone's valid pfn range. but memmap - * can be created for invalid pages (for alignment) -@@ -6018,7 +5887,6 @@ static void __setup_per_zone_wmarks(void) - high_wmark_pages(zone) - low_wmark_pages(zone) - - atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); - -- setup_zone_migrate_reserve(zone); - spin_unlock_irqrestore(&zone->lock, flags); - } - -diff --git a/mm/vmstat.c b/mm/vmstat.c -index ffcb4f5..5b289dc 100644 ---- a/mm/vmstat.c -+++ b/mm/vmstat.c -@@ -923,7 +923,6 @@ static char * const migratetype_names[MIGRATE_TYPES] = { - "Unmovable", - "Reclaimable", - "Movable", -- "Reserve", - #ifdef CONFIG_CMA - "CMA", - #endif -diff --git a/mm/backing-dev.c b/mm/backing-dev.c -index 8ed2ffd963c5..7340353f8aea 100644 ---- a/mm/backing-dev.c -+++ b/mm/backing-dev.c -@@ -957,8 +957,9 @@ EXPORT_SYMBOL(congestion_wait); - * jiffies for either a BDI to exit congestion of the given @sync queue - * or a write to complete. - * -- * In the absence of zone congestion, cond_resched() is called to yield -- * the processor if necessary but otherwise does not sleep. -+ * In the absence of zone congestion, a short sleep or a cond_resched is -+ * performed to yield the processor and to allow other subsystems to make -+ * a forward progress. - * - * The return value is 0 if the sleep is for the full timeout. Otherwise, - * it is the number of jiffies that were still remaining when the function -@@ -978,7 +979,19 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout) - */ - if (atomic_read(&nr_wb_congested[sync]) == 0 || - !test_bit(ZONE_CONGESTED, &zone->flags)) { -- cond_resched(); -+ -+ /* -+ * Memory allocation/reclaim might be called from a WQ -+ * context and the current implementation of the WQ -+ * concurrency control doesn't recognize that a particular -+ * WQ is congested if the worker thread is looping without -+ * ever sleeping. Therefore we have to do a short sleep -+ * here rather than calling cond_resched(). -+ */ -+ if (current->flags & PF_WQ_WORKER) -+ schedule_timeout(1); -+ else -+ cond_resched(); - - /* In case we scheduled, work out time remaining */ - ret = timeout - (jiffies - start); -diff --git a/mm/vmstat.c b/mm/vmstat.c -index 45dcbcb5c594..0975da8e3432 100644 ---- a/mm/vmstat.c -+++ b/mm/vmstat.c -@@ -1381,6 +1381,7 @@ static const struct file_operations proc_vmstat_file_operations = { - #endif /* CONFIG_PROC_FS */ - - #ifdef CONFIG_SMP -+static struct workqueue_struct *vmstat_wq; - static DEFINE_PER_CPU(struct delayed_work, vmstat_work); - int sysctl_stat_interval __read_mostly = HZ; - static cpumask_var_t cpu_stat_off; -@@ -1393,7 +1394,7 @@ static void vmstat_update(struct work_struct *w) - * to occur in the future. Keep on running the - * update worker thread. - */ -- schedule_delayed_work_on(smp_processor_id(), -+ queue_delayed_work_on(smp_processor_id(), vmstat_wq, - this_cpu_ptr(&vmstat_work), - round_jiffies_relative(sysctl_stat_interval)); - } else { -@@ -1462,7 +1463,7 @@ static void vmstat_shepherd(struct work_struct *w) - if (need_update(cpu) && - cpumask_test_and_clear_cpu(cpu, cpu_stat_off)) - -- schedule_delayed_work_on(cpu, -+ queue_delayed_work_on(cpu, vmstat_wq, - &per_cpu(vmstat_work, cpu), 0); - - put_online_cpus(); -@@ -1551,6 +1552,7 @@ static int __init setup_vmstat(void) - - start_shepherd_timer(); - cpu_notifier_register_done(); -+ vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); - #endif - #ifdef CONFIG_PROC_FS - proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); --- -2.6.2 - -From 09ccfd238e5a0e670d8178cf50180ea81ae09ae1 Mon Sep 17 00:00:00 2001 -From: WANG Cong -Date: Mon, 14 Dec 2015 13:48:36 -0800 -Subject: pptp: verify sockaddr_len in pptp_bind() and pptp_connect() - -Reported-by: Dmitry Vyukov -Signed-off-by: Cong Wang -Signed-off-by: David S. Miller ---- - drivers/net/ppp/pptp.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c -index fc69e41..597c53e 100644 ---- a/drivers/net/ppp/pptp.c -+++ b/drivers/net/ppp/pptp.c -@@ -419,6 +419,9 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr, - struct pptp_opt *opt = &po->proto.pptp; - int error = 0; - -+ if (sockaddr_len < sizeof(struct sockaddr_pppox)) -+ return -EINVAL; -+ - lock_sock(sk); - - opt->src_addr = sp->sa_addr.pptp; -@@ -440,6 +443,9 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, - struct flowi4 fl4; - int error = 0; - -+ if (sockaddr_len < sizeof(struct sockaddr_pppox)) -+ return -EINVAL; -+ - if (sp->sa_protocol != PX_PROTO_PPTP) - return -EINVAL; - --- -cgit v0.11.2 - -commit cc57858831e3e9678291de730c4b4d2e52a19f59 -Author: Artur Paszkiewicz -Date: Fri Dec 18 15:19:16 2015 +1100 - - md/raid10: fix data corruption and crash during resync - - The commit c31df25f20e3 ("md/raid10: make sync_request_write() call - bio_copy_data()") replaced manual data copying with bio_copy_data() but - it doesn't work as intended. The source bio (fbio) is already processed, - so its bvec_iter has bi_size == 0 and bi_idx == bi_vcnt. Because of - this, bio_copy_data() either does not copy anything, or worse, copies - data from the ->bi_next bio if it is set. This causes wrong data to be - written to drives during resync and sometimes lockups/crashes in - bio_copy_data(): - - [ 517.338478] NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [md126_raid10:3319] - [ 517.347324] Modules linked in: raid10 xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 ipt_REJECT nf_reject_ipv4 xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables x86_pkg_temp_thermal coretemp kvm_intel kvm crct10dif_pclmul crc32_pclmul cryptd shpchp pcspkr ipmi_si ipmi_msghandler tpm_crb acpi_power_meter acpi_cpufreq ext4 mbcache jbd2 sr_mod cdrom sd_mod e1000e ax88179_178a usbnet mii ahci ata_generic crc32c_intel libahci ptp pata_acpi libata pps_core wmi sunrpc dm_mirror dm_region_hash dm_log dm_mod - [ 517.440555] CPU: 0 PID: 3319 Comm: md126_raid10 Not tainted 4.3.0-rc6+ #1 - [ 517.448384] Hardware name: Intel Corporation PURLEY/PURLEY, BIOS PLYDCRB1.86B.0055.D14.1509221924 09/22/2015 - [ 517.459768] task: ffff880153773980 ti: ffff880150df8000 task.ti: ffff880150df8000 - [ 517.468529] RIP: 0010:[] [] bio_copy_data+0xc8/0x3c0 - [ 517.478164] RSP: 0018:ffff880150dfbc98 EFLAGS: 00000246 - [ 517.484341] RAX: ffff880169356688 RBX: 0000000000001000 RCX: 0000000000000000 - [ 517.492558] RDX: 0000000000000000 RSI: ffffea0001ac2980 RDI: ffffea0000d835c0 - [ 517.500773] RBP: ffff880150dfbd08 R08: 0000000000000001 R09: ffff880153773980 - [ 517.508987] R10: ffff880169356600 R11: 0000000000001000 R12: 0000000000010000 - [ 517.517199] R13: 000000000000e000 R14: 0000000000000000 R15: 0000000000001000 - [ 517.525412] FS: 0000000000000000(0000) GS:ffff880174a00000(0000) knlGS:0000000000000000 - [ 517.534844] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 - [ 517.541507] CR2: 00007f8a044d5fed CR3: 0000000169504000 CR4: 00000000001406f0 - [ 517.549722] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 - [ 517.557929] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 - [ 517.566144] Stack: - [ 517.568626] ffff880174a16bc0 ffff880153773980 ffff880169356600 0000000000000000 - [ 517.577659] 0000000000000001 0000000000000001 ffff880153773980 ffff88016a61a800 - [ 517.586715] ffff880150dfbcf8 0000000000000001 ffff88016dd209e0 0000000000001000 - [ 517.595773] Call Trace: - [ 517.598747] [] raid10d+0xfc5/0x1690 [raid10] - [ 517.605610] [] ? __schedule+0x29e/0x8e2 - [ 517.611987] [] md_thread+0x106/0x140 - [ 517.618072] [] ? wait_woken+0x80/0x80 - [ 517.624252] [] ? super_1_load+0x520/0x520 - [ 517.630817] [] kthread+0xc9/0xe0 - [ 517.636506] [] ? flush_kthread_worker+0x70/0x70 - [ 517.643653] [] ret_from_fork+0x3f/0x70 - [ 517.649929] [] ? flush_kthread_worker+0x70/0x70 - - Signed-off-by: Artur Paszkiewicz - Reviewed-by: Shaohua Li - Cc: stable@vger.kernel.org (v4.2+) - Fixes: c31df25f20e3 ("md/raid10: make sync_request_write() call bio_copy_data()") - Signed-off-by: NeilBrown - -diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c -index 41d70bc..84e597e 100644 ---- a/drivers/md/raid10.c -+++ b/drivers/md/raid10.c -@@ -1946,6 +1946,8 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) - - first = i; - fbio = r10_bio->devs[i].bio; -+ fbio->bi_iter.bi_size = r10_bio->sectors << 9; -+ fbio->bi_iter.bi_idx = 0; - - vcnt = (r10_bio->sectors + (PAGE_SIZE >> 9) - 1) >> (PAGE_SHIFT - 9); - /* now find blocks with errors */ -@@ -1989,7 +1991,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) - bio_reset(tbio); - - tbio->bi_vcnt = vcnt; -- tbio->bi_iter.bi_size = r10_bio->sectors << 9; -+ tbio->bi_iter.bi_size = fbio->bi_iter.bi_size; - tbio->bi_rw = WRITE; - tbio->bi_private = r10_bio; - tbio->bi_iter.bi_sector = r10_bio->devs[i].addr; -From: Michal Hocko - -kernel test robot has reported the following crash: -[ 3.870718] BUG: unable to handle kernel NULL pointer dereferenceNULL pointer dereference at 00000100 - at 00000100 -[ 3.872615] IP: [] __queue_work+0x26/0x390 [] __queue_work+0x26/0x390 -[ 3.873758] *pdpt = 0000000000000000 *pde = f000ff53f000ff53 *pde = f000ff53f000ff53 -[ 3.875096] Oops: 0000 [#1] PREEMPT PREEMPT SMP SMP -[ 3.876130] CPU: 0 PID: 24 Comm: kworker/0:1 Not tainted 4.4.0-rc4-00139-g373ccbe #1 -[ 3.878135] Workqueue: events vmstat_shepherd -[ 3.879207] task: cb684600 ti: cb7ba000 task.ti: cb7ba000 -[ 3.880445] EIP: 0060:[] EFLAGS: 00010046 CPU: 0 -[ 3.881704] EIP is at __queue_work+0x26/0x390 -[ 3.882823] EAX: 00000046 EBX: cbb37800 ECX: cbb37800 EDX: 00000000 -[ 3.884457] ESI: 00000000 EDI: 00000000 EBP: cb7bbe68 ESP: cb7bbe38 -[ 3.886005] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 -[ 3.887229] CR0: 8005003b CR2: 00000100 CR3: 01fd5000 CR4: 000006b0 -[ 3.888663] Stack: -[ 3.895204] Call Trace: -[ 3.895854] [] ? mutex_unlock+0xd/0x10 -[ 3.897120] [] __queue_delayed_work+0xa1/0x160 -[ 3.898530] [] queue_delayed_work_on+0x36/0x60 -[ 3.899790] [] vmstat_shepherd+0xad/0xf0 -[ 3.900899] [] process_one_work+0x1aa/0x4c0 -[ 3.902093] [] ? process_one_work+0x112/0x4c0 -[ 3.903520] [] ? do_raw_spin_lock+0xe/0x150 -[ 3.904853] [] worker_thread+0x41/0x440 -[ 3.906023] [] ? process_one_work+0x4c0/0x4c0 -[ 3.907242] [] kthread+0xb0/0xd0 -[ 3.908188] [] ret_from_kernel_thread+0x21/0x40 -[ 3.909601] [] ? __kthread_parkme+0x80/0x80 - -The reason is that start_shepherd_timer schedules the shepherd work item -which uses vmstat_wq (vmstat_shepherd) before setup_vmstat allocates -that workqueue so if the further initialization takes more than HZ -we might end up scheduling on a NULL vmstat_wq. This is really unlikely -but not impossible. - -Fixes: 373ccbe59270 ("mm, vmstat: allow WQ concurrency to discover memory reclaim doesn't make any progress") -Reported-by: kernel test robot -Signed-off-by: Michal Hocko ---- -Hi Linus, -I am not marking this for stable because I hope we can sneak it into 4.4. -The patch is trivial and obvious. I am sorry about the breakage. If you prefer -to postpone it to 4.5-rc1 because this is not really that critical and shouldn't -happen most of the time then I will repost with stable tag added. - -Thanks! - - mm/vmstat.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/mm/vmstat.c b/mm/vmstat.c -index 4ebc17d948cb..c54fd2924f25 100644 ---- a/mm/vmstat.c -+++ b/mm/vmstat.c -@@ -1483,6 +1483,7 @@ static void __init start_shepherd_timer(void) - BUG(); - cpumask_copy(cpu_stat_off, cpu_online_mask); - -+ vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); - schedule_delayed_work(&shepherd, - round_jiffies_relative(sysctl_stat_interval)); - } -@@ -1550,7 +1551,6 @@ static int __init setup_vmstat(void) - - start_shepherd_timer(); - cpu_notifier_register_done(); -- vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); - #endif - #ifdef CONFIG_PROC_FS - proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); --- -2.6.4 - -From: Dave Chinner - -When we do inode readahead in log recovery, we do can do the -readahead before we've replayed the icreate transaction that stamps -the buffer with inode cores. The inode readahead verifier catches -this and marks the buffer as !done to indicate that it doesn't yet -contain valid inodes. - -In adding buffer error notification (i.e. setting b_error = -EIO at -the same time as as we clear the done flag) to such a readahead -verifier failure, we can then get subsequent inode recovery failing -with this error: - -XFS (dm-0): metadata I/O error: block 0xa00060 ("xlog_recover_do..(read#2)") error 5 numblks 32 - -This occurs when readahead completion races with icreate item replay -such as: - - inode readahead - find buffer - lock buffer - submit RA io - .... - icreate recovery - xfs_trans_get_buffer - find buffer - lock buffer - - ..... - - fails verifier - clear XBF_DONE - set bp->b_error = -EIO - release and unlock buffer - - icreate initialises buffer - marks buffer as done - adds buffer to delayed write queue - releases buffer - -At this point, we have an initialised inode buffer that is up to -date but has an -EIO state registered against it. When we finally -get to recovering an inode in that buffer: - - inode item recovery - xfs_trans_read_buffer - find buffer - lock buffer - sees XBF_DONE is set, returns buffer - sees bp->b_error is set - fail log recovery! - -Essentially, we need xfs_trans_get_buf_map() to clear the error status of -the buffer when doing a lookup. This function returns uninitialised -buffers, so the buffer returned can not be in an error state and -none of the code that uses this function expects b_error to be set -on return. Indeed, there is an ASSERT(!bp->b_error); in the -transaction case in xfs_trans_get_buf_map() that would have caught -this if log recovery used transactions.... - -This patch firstly changes the inode readahead failure to set -EIO -on the buffer, and secondly changes xfs_buf_get_map() to never -return a buffer with an error state set so this first change doesn't -cause unexpected log recovery failures. - -Signed-off-by: Dave Chinner ---- - fs/xfs/libxfs/xfs_inode_buf.c | 12 +++++++----- - fs/xfs/xfs_buf.c | 7 +++++++ - 2 files changed, 14 insertions(+), 5 deletions(-) - -diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c -index 1b8d98a..ff17c48 100644 ---- a/fs/xfs/libxfs/xfs_inode_buf.c -+++ b/fs/xfs/libxfs/xfs_inode_buf.c -@@ -62,11 +62,12 @@ xfs_inobp_check( - * has not had the inode cores stamped into it. Hence for readahead, the buffer - * may be potentially invalid. - * -- * If the readahead buffer is invalid, we don't want to mark it with an error, -- * but we do want to clear the DONE status of the buffer so that a followup read -- * will re-read it from disk. This will ensure that we don't get an unnecessary -- * warnings during log recovery and we don't get unnecssary panics on debug -- * kernels. -+ * If the readahead buffer is invalid, we need to mark it with an error and -+ * clear the DONE status of the buffer so that a followup read will re-read it -+ * from disk. We don't report the error otherwise to avoid warnings during log -+ * recovery and we don't get unnecssary panics on debug kernels. We use EIO here -+ * because all we want to do is say readahead failed; there is no-one to report -+ * the error to, so this will distinguish it from a non-ra verifier failure. - */ - static void - xfs_inode_buf_verify( -@@ -93,6 +94,7 @@ xfs_inode_buf_verify( - XFS_RANDOM_ITOBP_INOTOBP))) { - if (readahead) { - bp->b_flags &= ~XBF_DONE; -+ xfs_buf_ioerror(bp, -EIO); - return; - } - -diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c -index 45a8ea7..ae86b16 100644 ---- a/fs/xfs/xfs_buf.c -+++ b/fs/xfs/xfs_buf.c -@@ -604,6 +604,13 @@ found: - } - } - -+ /* -+ * Clear b_error if this is a lookup from a caller that doesn't expect -+ * valid data to be found in the buffer. -+ */ -+ if (!(flags & XBF_READ)) -+ xfs_buf_ioerror(bp, 0); -+ - XFS_STATS_INC(xb_get); - trace_xfs_buf_get(bp, flags, _RET_IP_); - return bp; --- -2.5.0 - -_______________________________________________ -xfs mailing list -xfs@oss.sgi.com -http://oss.sgi.com/mailman/listinfo/xfs -From: Dave Chinner - -When we do dquot readahead in log recovery, we do not use a verifier -as the underlying buffer may not have dquots in it. e.g. the -allocation operation hasn't yet been replayed. Hence we do not want -to fail recovery because we detect an operation to be replayed has -not been run yet. This problem was addressed for inodes in commit -d891400 ("xfs: inode buffers may not be valid during recovery -readahead") but the problem was not recognised to exist for dquots -and their buffers as the dquot readahead did not have a verifier. - -The result of not using a verifier is that when the buffer is then -next read to replay a dquot modification, the dquot buffer verifier -will only be attached to the buffer if *readahead is not complete*. -Hence we can read the buffer, replay the dquot changes and then add -it to the delwri submission list without it having a verifier -attached to it. This then generates warnings in xfs_buf_ioapply(), -which catches and warns about this case. - -Fix this and make it handle the same readahead verifier error cases -as for inode buffers by adding a new readahead verifier that has a -write operation as well as a read operation that marks the buffer as -not done if any corruption is detected. Also make sure we don't run -readahead if the dquot buffer has been marked as cancelled by -recovery. - -This will result in readahead either succeeding and the buffer -having a valid write verifier, or readahead failing and the buffer -state requiring the subsequent read to resubmit the IO with the new -verifier. In either case, this will result in the buffer always -ending up with a valid write verifier on it. - -Note: we also need to fix the inode buffer readahead error handling -to mark the buffer with EIO. Brian noticed the code I copied from -there wrong during review, so fix it at the same time. Add comments -linking the two functions that handle readahead verifier errors -together so we don't forget this behavioural link in future. - -cc: # 3.12 - current -Signed-off-by: Dave Chinner -Reviewed-by: Brian Foster -Signed-off-by: Dave Chinner ---- - fs/xfs/libxfs/xfs_dquot_buf.c | 36 ++++++++++++++++++++++++++++++------ - fs/xfs/libxfs/xfs_inode_buf.c | 2 ++ - fs/xfs/libxfs/xfs_quota_defs.h | 2 +- - fs/xfs/libxfs/xfs_shared.h | 1 + - fs/xfs/xfs_log_recover.c | 9 +++++++-- - 5 files changed, 41 insertions(+), 9 deletions(-) - -diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c -index 11cefb2..3cc3cf7 100644 ---- a/fs/xfs/libxfs/xfs_dquot_buf.c -+++ b/fs/xfs/libxfs/xfs_dquot_buf.c -@@ -54,7 +54,7 @@ xfs_dqcheck( - xfs_dqid_t id, - uint type, /* used only when IO_dorepair is true */ - uint flags, -- char *str) -+ const char *str) + static inline int aac_supports_2T(struct aac_dev *dev) { - xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; - int errs = 0; -@@ -207,7 +207,8 @@ xfs_dquot_buf_verify_crc( - STATIC bool - xfs_dquot_buf_verify( - struct xfs_mount *mp, -- struct xfs_buf *bp) -+ struct xfs_buf *bp, -+ int warn) + return (dev->adapter_info.options & AAC_OPT_NEW_COMM_64); +diff -ur linux-5.3/drivers/scsi/aacraid.org/comminit.c linux-5.3/drivers/scsi/aacraid/comminit.c +--- linux-5.3/drivers/scsi/aacraid.org/comminit.c 2019-11-01 22:42:37.014803249 +0100 ++++ linux-5.3/drivers/scsi/aacraid/comminit.c 2019-11-04 09:29:51.321486211 +0100 +@@ -41,8 +41,11 @@ { - struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; - xfs_dqid_t id = 0; -@@ -240,8 +241,7 @@ xfs_dquot_buf_verify( - if (i == 0) - id = be32_to_cpu(ddq->d_id); - -- error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, -- "xfs_dquot_buf_verify"); -+ error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__); - if (error) - return false; - } -@@ -256,7 +256,7 @@ xfs_dquot_buf_read_verify( + u32 status = 0; - if (!xfs_dquot_buf_verify_crc(mp, bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); -- else if (!xfs_dquot_buf_verify(mp, bp)) -+ else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) -@@ -264,6 +264,25 @@ xfs_dquot_buf_read_verify( - } - - /* -+ * readahead errors are silent and simply leave the buffer as !done so a real -+ * read will then be run with the xfs_dquot_buf_ops verifier. See -+ * xfs_inode_buf_verify() for why we use EIO and ~XBF_DONE here rather than -+ * reporting the failure. -+ */ -+static void -+xfs_dquot_buf_readahead_verify( -+ struct xfs_buf *bp) -+{ -+ struct xfs_mount *mp = bp->b_target->bt_mount; -+ -+ if (!xfs_dquot_buf_verify_crc(mp, bp) || -+ !xfs_dquot_buf_verify(mp, bp, 0)) { -+ xfs_buf_ioerror(bp, -EIO); -+ bp->b_flags &= ~XBF_DONE; +- if (aac_is_src(dev)) ++ if (dev->pdev->device == PMC_DEVICE_S6 || ++ dev->pdev->device == PMC_DEVICE_S7 || ++ dev->pdev->device == PMC_DEVICE_S8) { + status = src_readl(dev, MUnit.OMR); + } -+} -+ -+/* - * we don't calculate the CRC here as that is done when the dquot is flushed to - * the buffer after the update is done. This ensures that the dquot in the - * buffer always has an up-to-date CRC value. -@@ -274,7 +293,7 @@ xfs_dquot_buf_write_verify( - { - struct xfs_mount *mp = bp->b_target->bt_mount; - -- if (!xfs_dquot_buf_verify(mp, bp)) { -+ if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); - return; -@@ -287,3 +306,8 @@ const struct xfs_buf_ops xfs_dquot_buf_ops = { - .verify_write = xfs_dquot_buf_write_verify, - }; - -+const struct xfs_buf_ops xfs_dquot_buf_ra_ops = { -+ -+ .verify_read = xfs_dquot_buf_readahead_verify, -+ .verify_write = xfs_dquot_buf_write_verify, -+}; -diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c -index ff17c48..1aabfda 100644 ---- a/fs/xfs/libxfs/xfs_inode_buf.c -+++ b/fs/xfs/libxfs/xfs_inode_buf.c -@@ -68,6 +68,8 @@ xfs_inobp_check( - * recovery and we don't get unnecssary panics on debug kernels. We use EIO here - * because all we want to do is say readahead failed; there is no-one to report - * the error to, so this will distinguish it from a non-ra verifier failure. -+ * Changes to this readahead error behavour also need to be reflected in -+ * xfs_dquot_buf_readahead_verify(). - */ - static void - xfs_inode_buf_verify( -diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h -index 1b0a083..f51078f 100644 ---- a/fs/xfs/libxfs/xfs_quota_defs.h -+++ b/fs/xfs/libxfs/xfs_quota_defs.h -@@ -153,7 +153,7 @@ typedef __uint16_t xfs_qwarncnt_t; - #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) - - extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, -- xfs_dqid_t id, uint type, uint flags, char *str); -+ xfs_dqid_t id, uint type, uint flags, const char *str); - extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); - - #endif /* __XFS_QUOTA_H__ */ -diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h -index 5be5297..15c3ceb 100644 ---- a/fs/xfs/libxfs/xfs_shared.h -+++ b/fs/xfs/libxfs/xfs_shared.h -@@ -49,6 +49,7 @@ extern const struct xfs_buf_ops xfs_inobt_buf_ops; - extern const struct xfs_buf_ops xfs_inode_buf_ops; - extern const struct xfs_buf_ops xfs_inode_buf_ra_ops; - extern const struct xfs_buf_ops xfs_dquot_buf_ops; -+extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops; - extern const struct xfs_buf_ops xfs_sb_buf_ops; - extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops; - extern const struct xfs_buf_ops xfs_symlink_buf_ops; -diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c -index c5ecaac..5991cdc 100644 ---- a/fs/xfs/xfs_log_recover.c -+++ b/fs/xfs/xfs_log_recover.c -@@ -3204,6 +3204,7 @@ xlog_recover_dquot_ra_pass2( - struct xfs_disk_dquot *recddq; - struct xfs_dq_logformat *dq_f; - uint type; -+ int len; - - - if (mp->m_qflags == 0) -@@ -3224,8 +3225,12 @@ xlog_recover_dquot_ra_pass2( - ASSERT(dq_f); - ASSERT(dq_f->qlf_len == 1); - -- xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, -- XFS_FSB_TO_BB(mp, dq_f->qlf_len), NULL); -+ len = XFS_FSB_TO_BB(mp, dq_f->qlf_len); -+ if (xlog_peek_buffer_cancelled(log, dq_f->qlf_blkno, len, 0)) -+ return; -+ -+ xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, len, -+ &xfs_dquot_buf_ra_ops); + return (status & AAC_INT_MODE_MSIX); } - STATIC void --- -2.5.0 +@@ -349,7 +352,8 @@ + /* FIB should be freed only after getting the response from the F/W */ + if (status != -ERESTARTSYS) + aac_fib_free(fibctx); +- if (aac_is_src(dev) && ++ if ((dev->pdev->device == PMC_DEVICE_S7 || ++ dev->pdev->device == PMC_DEVICE_S8) && + dev->msi_enabled) + aac_set_intx_mode(dev); + return status; +@@ -610,7 +614,8 @@ + dev->max_fib_size = status[1] & 0xFFE0; + host->sg_tablesize = status[2] >> 16; + dev->sg_tablesize = status[2] & 0xFFFF; +- if (aac_is_src(dev)) { ++ if (dev->pdev->device == PMC_DEVICE_S7 || ++ dev->pdev->device == PMC_DEVICE_S8) { + if (host->can_queue > (status[3] >> 16) - + AAC_NUM_MGT_FIB) + host->can_queue = (status[3] >> 16) - +@@ -629,7 +634,9 @@ + pr_warn("numacb=%d ignored\n", numacb); + } + +- if (aac_is_src(dev)) ++ if (dev->pdev->device == PMC_DEVICE_S6 || ++ dev->pdev->device == PMC_DEVICE_S7 || ++ dev->pdev->device == PMC_DEVICE_S8) + aac_define_int_mode(dev); + /* + * Ok now init the communication subsystem +diff -ur linux-5.3/drivers/scsi/aacraid.org/commsup.c linux-5.3/drivers/scsi/aacraid/commsup.c +--- linux-5.3/drivers/scsi/aacraid.org/commsup.c 2019-11-01 22:42:37.014803249 +0100 ++++ linux-5.3/drivers/scsi/aacraid/commsup.c 2019-11-04 09:29:51.321486211 +0100 +@@ -2593,7 +2593,9 @@ + { + int i; + +- if (aac_is_src(dev)) { ++ if (dev->pdev->device == PMC_DEVICE_S6 || ++ dev->pdev->device == PMC_DEVICE_S7 || ++ dev->pdev->device == PMC_DEVICE_S8) { + if (dev->max_msix > 1) { + for (i = 0; i < dev->max_msix; i++) + free_irq(pci_irq_vector(dev->pdev, i), +diff -ur linux-5.3/drivers/scsi/aacraid.org/linit.c linux-5.3/drivers/scsi/aacraid/linit.c +--- linux-5.3/drivers/scsi/aacraid.org/linit.c 2019-11-01 22:42:37.011469816 +0100 ++++ linux-5.3/drivers/scsi/aacraid/linit.c 2019-11-04 09:29:51.321486211 +0100 +@@ -1567,8 +1567,9 @@ + aac_send_shutdown(aac); + + aac_adapter_disable_int(aac); +- +- if (aac_is_src(aac)) { ++ if (aac->pdev->device == PMC_DEVICE_S6 || ++ aac->pdev->device == PMC_DEVICE_S7 || ++ aac->pdev->device == PMC_DEVICE_S8) { + if (aac->max_msix > 1) { + for (i = 0; i < aac->max_msix; i++) { + free_irq(pci_irq_vector(aac->pdev, i), +@@ -1858,7 +1859,8 @@ + aac_adapter_enable_int(dev); + + +- if (aac_is_src(dev)) ++ if (dev->pdev->device == PMC_DEVICE_S7 || ++ dev->pdev->device == PMC_DEVICE_S8) + aac_define_int_mode(dev); + + if (dev->msi_enabled) -_______________________________________________ -xfs mailing list -xfs@oss.sgi.com -http://oss.sgi.com/mailman/listinfo/xfs