From: Ɓukasz Krotowski Date: Wed, 17 Jul 2013 14:40:20 +0000 (+0200) Subject: - added parallel dm-crypt patches (disabled by default) X-Git-Tag: auto/th/kernel-3.10.9-1~9 X-Git-Url: http://git.pld-linux.org/gitweb.cgi?a=commitdiff_plain;h=101a744858c305f43487d5dd1610dc9d588b6877;p=packages%2Fkernel.git - added parallel dm-crypt patches (disabled by default) --- diff --git a/dm-crypt-dont-allocate-partial-pages.patch b/dm-crypt-dont-allocate-partial-pages.patch new file mode 100644 index 00000000..8354ab83 --- /dev/null +++ b/dm-crypt-dont-allocate-partial-pages.patch @@ -0,0 +1,251 @@ +dm-crypt: don't allocate pages for a partial request. + +This patch changes crypt_alloc_buffer so that it always allocates pages for +a full request. + +This change enables further simplification and removing of one refcounts +in the next patches. + +Note: the next patch is needed to fix a theoretical deadlock + +Signed-off-by: Mikulas Patocka + +--- + drivers/md/dm-crypt.c | 133 +++++++++----------------------------------------- + 1 file changed, 25 insertions(+), 108 deletions(-) + +Index: linux-3.9.2-fast/drivers/md/dm-crypt.c +=================================================================== +--- linux-3.9.2-fast.orig/drivers/md/dm-crypt.c 2013-05-15 21:47:30.000000000 +0200 ++++ linux-3.9.2-fast/drivers/md/dm-crypt.c 2013-05-15 22:49:13.000000000 +0200 +@@ -59,7 +59,6 @@ struct dm_crypt_io { + atomic_t io_pending; + int error; + sector_t sector; +- struct dm_crypt_io *base_io; + }; + + struct dm_crypt_request { +@@ -162,7 +161,6 @@ struct crypt_config { + }; + + #define MIN_IOS 16 +-#define MIN_POOL_PAGES 32 + + static struct kmem_cache *_crypt_io_pool; + +@@ -777,14 +775,13 @@ static int crypt_convert(struct crypt_co + return 0; + } + ++static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone); ++ + /* + * Generate a new unfragmented bio with the given size + * This should never violate the device limitations +- * May return a smaller bio when running out of pages, indicated by +- * *out_of_pages set to 1. + */ +-static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, +- unsigned *out_of_pages) ++static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) + { + struct crypt_config *cc = io->cc; + struct bio *clone; +@@ -798,37 +795,22 @@ static struct bio *crypt_alloc_buffer(st + return NULL; + + clone_init(io, clone); +- *out_of_pages = 0; + + for (i = 0; i < nr_iovecs; i++) { + page = mempool_alloc(cc->page_pool, gfp_mask); +- if (!page) { +- *out_of_pages = 1; +- break; +- } +- +- /* +- * If additional pages cannot be allocated without waiting, +- * return a partially-allocated bio. The caller will then try +- * to allocate more bios while submitting this partial bio. +- */ +- gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT; + + len = (size > PAGE_SIZE) ? PAGE_SIZE : size; + + if (!bio_add_page(clone, page, len, 0)) { + mempool_free(page, cc->page_pool); +- break; ++ crypt_free_buffer_pages(cc, clone); ++ bio_put(clone); ++ return NULL; + } + + size -= len; + } + +- if (!clone->bi_size) { +- bio_put(clone); +- return NULL; +- } +- + return clone; + } + +@@ -855,7 +837,6 @@ static struct dm_crypt_io *crypt_io_allo + io->base_bio = bio; + io->sector = sector; + io->error = 0; +- io->base_io = NULL; + io->ctx.req = NULL; + atomic_set(&io->io_pending, 0); + +@@ -870,13 +851,11 @@ static void crypt_inc_pending(struct dm_ + /* + * One of the bios was finished. Check for completion of + * the whole request and correctly clean up the buffer. +- * If base_io is set, wait for the last fragment to complete. + */ + static void crypt_dec_pending(struct dm_crypt_io *io) + { + struct crypt_config *cc = io->cc; + struct bio *base_bio = io->base_bio; +- struct dm_crypt_io *base_io = io->base_io; + int error = io->error; + + if (!atomic_dec_and_test(&io->io_pending)) +@@ -886,13 +865,7 @@ static void crypt_dec_pending(struct dm_ + mempool_free(io->ctx.req, cc->req_pool); + mempool_free(io, cc->io_pool); + +- if (likely(!base_io)) +- bio_endio(base_bio, error); +- else { +- if (error && !base_io->error) +- base_io->error = error; +- crypt_dec_pending(base_io); +- } ++ bio_endio(base_bio, error); + } + + /* +@@ -1030,10 +1003,7 @@ static void kcryptd_crypt_write_convert( + { + struct crypt_config *cc = io->cc; + struct bio *clone; +- struct dm_crypt_io *new_io; + int crypt_finished; +- unsigned out_of_pages = 0; +- unsigned remaining = io->base_bio->bi_size; + sector_t sector = io->sector; + int r; + +@@ -1043,81 +1013,28 @@ static void kcryptd_crypt_write_convert( + crypt_inc_pending(io); + crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector); + +- /* +- * The allocated buffers can be smaller than the whole bio, +- * so repeat the whole process until all the data can be handled. +- */ +- while (remaining) { +- clone = crypt_alloc_buffer(io, remaining, &out_of_pages); +- if (unlikely(!clone)) { +- io->error = -ENOMEM; +- break; +- } +- +- io->ctx.bio_out = clone; +- io->ctx.idx_out = 0; +- +- remaining -= clone->bi_size; +- sector += bio_sectors(clone); +- +- crypt_inc_pending(io); +- +- r = crypt_convert(cc, &io->ctx); +- if (r < 0) +- io->error = -EIO; +- +- crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); +- +- /* Encryption was already finished, submit io now */ +- if (crypt_finished) { +- kcryptd_crypt_write_io_submit(io, 0); +- +- /* +- * If there was an error, do not try next fragments. +- * For async, error is processed in async handler. +- */ +- if (unlikely(r < 0)) +- break; ++ clone = crypt_alloc_buffer(io, io->base_bio->bi_size); ++ if (unlikely(!clone)) { ++ io->error = -ENOMEM; ++ goto dec; ++ } + +- io->sector = sector; +- } ++ io->ctx.bio_out = clone; ++ io->ctx.idx_out = 0; + +- /* +- * Out of memory -> run queues +- * But don't wait if split was due to the io size restriction +- */ +- if (unlikely(out_of_pages)) +- congestion_wait(BLK_RW_ASYNC, HZ/100); ++ sector += bio_sectors(clone); + +- /* +- * With async crypto it is unsafe to share the crypto context +- * between fragments, so switch to a new dm_crypt_io structure. +- */ +- if (unlikely(!crypt_finished && remaining)) { +- new_io = crypt_io_alloc(io->cc, io->base_bio, +- sector); +- crypt_inc_pending(new_io); +- crypt_convert_init(cc, &new_io->ctx, NULL, +- io->base_bio, sector); +- new_io->ctx.idx_in = io->ctx.idx_in; +- new_io->ctx.offset_in = io->ctx.offset_in; +- +- /* +- * Fragments after the first use the base_io +- * pending count. +- */ +- if (!io->base_io) +- new_io->base_io = io; +- else { +- new_io->base_io = io->base_io; +- crypt_inc_pending(io->base_io); +- crypt_dec_pending(io); +- } ++ crypt_inc_pending(io); ++ r = crypt_convert(cc, &io->ctx); ++ if (r) ++ io->error = -EIO; ++ crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); + +- io = new_io; +- } +- } ++ /* Encryption was already finished, submit io now */ ++ if (crypt_finished) ++ kcryptd_crypt_write_io_submit(io, 0); + ++dec: + crypt_dec_pending(io); + } + +@@ -1556,7 +1473,7 @@ static int crypt_ctr(struct dm_target *t + goto bad; + } + +- cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); ++ cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0); + if (!cc->page_pool) { + ti->error = "Cannot allocate page mempool"; + goto bad; diff --git a/dm-crypt-fix-allocation-deadlock.patch b/dm-crypt-fix-allocation-deadlock.patch new file mode 100644 index 00000000..8f439daa --- /dev/null +++ b/dm-crypt-fix-allocation-deadlock.patch @@ -0,0 +1,111 @@ +dm-crypt: avoid deadlock in mempools + +This patch fixes a theoretical deadlock introduced in the previous patch. + +The function crypt_alloc_buffer may be called concurrently. If we allocate +from the mempool concurrently, there is a possibility of deadlock. +For example, if we have mempool of 256 pages, two processes, each wanting 256, +pages allocate from the mempool concurrently, it may deadlock in a situation +where both processes have allocated 128 pages and the mempool is exhausted. + +In order to avoid this scenarios, we allocate the pages under a mutex. + +In order to not degrade performance with excessive locking, we try +non-blocking allocations without a mutex first and if it fails, we fallback +to a blocking allocation with a mutex. + +Signed-off-by: Mikulas Patocka + +--- + drivers/md/dm-crypt.c | 36 +++++++++++++++++++++++++++++++++--- + 1 file changed, 33 insertions(+), 3 deletions(-) + +Index: linux-3.9.2-fast/drivers/md/dm-crypt.c +=================================================================== +--- linux-3.9.2-fast.orig/drivers/md/dm-crypt.c 2013-05-15 22:49:13.000000000 +0200 ++++ linux-3.9.2-fast/drivers/md/dm-crypt.c 2013-05-15 22:49:53.000000000 +0200 +@@ -118,6 +118,7 @@ struct crypt_config { + mempool_t *req_pool; + mempool_t *page_pool; + struct bio_set *bs; ++ struct mutex bio_alloc_lock; + + struct workqueue_struct *io_queue; + struct workqueue_struct *crypt_queue; +@@ -780,24 +781,46 @@ static void crypt_free_buffer_pages(stru + /* + * Generate a new unfragmented bio with the given size + * This should never violate the device limitations ++ * ++ * This function may be called concurrently. If we allocate from the mempool ++ * concurrently, there is a possibility of deadlock. For example, if we have ++ * mempool of 256 pages, two processes, each wanting 256, pages allocate from ++ * the mempool concurrently, it may deadlock in a situation where both processes ++ * have allocated 128 pages and the mempool is exhausted. ++ * ++ * In order to avoid this scenarios, we allocate the pages under a mutex. ++ * ++ * In order to not degrade performance with excessive locking, we try ++ * non-blocking allocations without a mutex first and if it fails, we fallback ++ * to a blocking allocation with a mutex. + */ + static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) + { + struct crypt_config *cc = io->cc; + struct bio *clone; + unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; +- gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; ++ gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM; + unsigned i, len; + struct page *page; + ++retry: ++ if (unlikely(gfp_mask & __GFP_WAIT)) ++ mutex_lock(&cc->bio_alloc_lock); ++ + clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); + if (!clone) +- return NULL; ++ goto return_clone; + + clone_init(io, clone); + + for (i = 0; i < nr_iovecs; i++) { + page = mempool_alloc(cc->page_pool, gfp_mask); ++ if (!page) { ++ crypt_free_buffer_pages(cc, clone); ++ bio_put(clone); ++ gfp_mask |= __GFP_WAIT; ++ goto retry; ++ } + + len = (size > PAGE_SIZE) ? PAGE_SIZE : size; + +@@ -805,12 +828,17 @@ static struct bio *crypt_alloc_buffer(st + mempool_free(page, cc->page_pool); + crypt_free_buffer_pages(cc, clone); + bio_put(clone); +- return NULL; ++ clone = NULL; ++ goto return_clone; + } + + size -= len; + } + ++return_clone: ++ if (unlikely(gfp_mask & __GFP_WAIT)) ++ mutex_unlock(&cc->bio_alloc_lock); ++ + return clone; + } + +@@ -1485,6 +1513,8 @@ static int crypt_ctr(struct dm_target *t + goto bad; + } + ++ mutex_init(&cc->bio_alloc_lock); ++ + ret = -EINVAL; + if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) { + ti->error = "Invalid iv_offset sector"; diff --git a/dm-crypt-offload-writes-to-thread.patch b/dm-crypt-offload-writes-to-thread.patch new file mode 100644 index 00000000..da2e68f2 --- /dev/null +++ b/dm-crypt-offload-writes-to-thread.patch @@ -0,0 +1,232 @@ +dm-crypt: offload writes to thread + +Submitting write bios directly in the encryption thread caused serious +performance degradation. On multiprocessor machine encryption requests +finish in a different order than they were submitted in. Consequently, write +requests would be submitted in a different order and it could cause severe +performance degradation. + +This patch moves submitting write requests to a separate thread so that +the requests can be sorted before submitting. + +Sorting is implemented in the next patch. + +Note: it is required that a previous patch "dm-crypt: don't allocate pages +for a partial request." is applied before applying this patch. Without +that, this patch could introduce a crash. + +Signed-off-by: Mikulas Patocka + +--- + drivers/md/dm-crypt.c | 120 ++++++++++++++++++++++++++++++++++++++++---------- + 1 file changed, 97 insertions(+), 23 deletions(-) + +Index: linux-3.9.2-fast/drivers/md/dm-crypt.c +=================================================================== +--- linux-3.9.2-fast.orig/drivers/md/dm-crypt.c 2013-05-15 22:49:53.000000000 +0200 ++++ linux-3.9.2-fast/drivers/md/dm-crypt.c 2013-05-15 22:49:57.000000000 +0200 +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -59,6 +60,8 @@ struct dm_crypt_io { + atomic_t io_pending; + int error; + sector_t sector; ++ ++ struct list_head list; + }; + + struct dm_crypt_request { +@@ -123,6 +126,10 @@ struct crypt_config { + struct workqueue_struct *io_queue; + struct workqueue_struct *crypt_queue; + ++ struct task_struct *write_thread; ++ wait_queue_head_t write_thread_wait; ++ struct list_head write_thread_list; ++ + char *cipher; + char *cipher_string; + +@@ -977,37 +984,89 @@ static int kcryptd_io_read(struct dm_cry + return 0; + } + ++static void kcryptd_io_read_work(struct work_struct *work) ++{ ++ struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); ++ ++ crypt_inc_pending(io); ++ if (kcryptd_io_read(io, GFP_NOIO)) ++ io->error = -ENOMEM; ++ crypt_dec_pending(io); ++} ++ ++static void kcryptd_queue_read(struct dm_crypt_io *io) ++{ ++ struct crypt_config *cc = io->cc; ++ ++ INIT_WORK(&io->work, kcryptd_io_read_work); ++ queue_work(cc->io_queue, &io->work); ++} ++ + static void kcryptd_io_write(struct dm_crypt_io *io) + { + struct bio *clone = io->ctx.bio_out; ++ + generic_make_request(clone); + } + +-static void kcryptd_io(struct work_struct *work) ++static int dmcrypt_write(void *data) + { +- struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); ++ struct crypt_config *cc = data; ++ while (1) { ++ struct list_head local_list; ++ struct blk_plug plug; + +- if (bio_data_dir(io->base_bio) == READ) { +- crypt_inc_pending(io); +- if (kcryptd_io_read(io, GFP_NOIO)) +- io->error = -ENOMEM; +- crypt_dec_pending(io); +- } else +- kcryptd_io_write(io); +-} ++ DECLARE_WAITQUEUE(wait, current); + +-static void kcryptd_queue_io(struct dm_crypt_io *io) +-{ +- struct crypt_config *cc = io->cc; ++ spin_lock_irq(&cc->write_thread_wait.lock); ++continue_locked: + +- INIT_WORK(&io->work, kcryptd_io); +- queue_work(cc->io_queue, &io->work); ++ if (!list_empty(&cc->write_thread_list)) ++ goto pop_from_list; ++ ++ __set_current_state(TASK_INTERRUPTIBLE); ++ __add_wait_queue(&cc->write_thread_wait, &wait); ++ ++ spin_unlock_irq(&cc->write_thread_wait.lock); ++ ++ if (unlikely(kthread_should_stop())) { ++ set_task_state(current, TASK_RUNNING); ++ remove_wait_queue(&cc->write_thread_wait, &wait); ++ break; ++ } ++ ++ schedule(); ++ ++ set_task_state(current, TASK_RUNNING); ++ spin_lock_irq(&cc->write_thread_wait.lock); ++ __remove_wait_queue(&cc->write_thread_wait, &wait); ++ goto continue_locked; ++ ++pop_from_list: ++ local_list = cc->write_thread_list; ++ local_list.next->prev = &local_list; ++ local_list.prev->next = &local_list; ++ INIT_LIST_HEAD(&cc->write_thread_list); ++ ++ spin_unlock_irq(&cc->write_thread_wait.lock); ++ ++ blk_start_plug(&plug); ++ do { ++ struct dm_crypt_io *io = container_of(local_list.next, ++ struct dm_crypt_io, list); ++ list_del(&io->list); ++ kcryptd_io_write(io); ++ } while (!list_empty(&local_list)); ++ blk_finish_plug(&plug); ++ } ++ return 0; + } + +-static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) ++static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io) + { + struct bio *clone = io->ctx.bio_out; + struct crypt_config *cc = io->cc; ++ unsigned long flags; + + if (unlikely(io->error < 0)) { + crypt_free_buffer_pages(cc, clone); +@@ -1021,10 +1080,10 @@ static void kcryptd_crypt_write_io_submi + + clone->bi_sector = cc->start + io->sector; + +- if (async) +- kcryptd_queue_io(io); +- else +- generic_make_request(clone); ++ spin_lock_irqsave(&cc->write_thread_wait.lock, flags); ++ list_add_tail(&io->list, &cc->write_thread_list); ++ wake_up_locked(&cc->write_thread_wait); ++ spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags); + } + + static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) +@@ -1060,7 +1119,7 @@ static void kcryptd_crypt_write_convert( + + /* Encryption was already finished, submit io now */ + if (crypt_finished) +- kcryptd_crypt_write_io_submit(io, 0); ++ kcryptd_crypt_write_io_submit(io); + + dec: + crypt_dec_pending(io); +@@ -1118,7 +1177,7 @@ static void kcryptd_async_done(struct cr + if (bio_data_dir(io->base_bio) == READ) + kcryptd_crypt_read_done(io); + else +- kcryptd_crypt_write_io_submit(io, 1); ++ kcryptd_crypt_write_io_submit(io); + } + + static void kcryptd_crypt(struct work_struct *work) +@@ -1262,6 +1321,9 @@ static void crypt_dtr(struct dm_target * + if (!cc) + return; + ++ if (cc->write_thread) ++ kthread_stop(cc->write_thread); ++ + if (cc->io_queue) + destroy_workqueue(cc->io_queue); + if (cc->crypt_queue) +@@ -1578,6 +1640,18 @@ static int crypt_ctr(struct dm_target *t + goto bad; + } + ++ init_waitqueue_head(&cc->write_thread_wait); ++ INIT_LIST_HEAD(&cc->write_thread_list); ++ ++ cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write"); ++ if (IS_ERR(cc->write_thread)) { ++ ret = PTR_ERR(cc->write_thread); ++ cc->write_thread = NULL; ++ ti->error = "Couldn't spawn write thread"; ++ goto bad; ++ } ++ wake_up_process(cc->write_thread); ++ + ti->num_flush_bios = 1; + ti->discard_zeroes_data_unsupported = true; + +@@ -1611,7 +1685,7 @@ static int crypt_map(struct dm_target *t + + if (bio_data_dir(io->base_bio) == READ) { + if (kcryptd_io_read(io, GFP_NOWAIT)) +- kcryptd_queue_io(io); ++ kcryptd_queue_read(io); + } else + kcryptd_queue_crypt(io); + diff --git a/dm-crypt-remove-percpu.patch b/dm-crypt-remove-percpu.patch new file mode 100644 index 00000000..24672766 --- /dev/null +++ b/dm-crypt-remove-percpu.patch @@ -0,0 +1,185 @@ +dm-crypt: remove per-cpu structure + +Remove per-cpu structure and make it per-convert_context instead. +This allows moving requests between different cpus. + +Signed-off-by: Mikulas Patocka + +--- + drivers/md/dm-crypt.c | 61 +++++++++----------------------------------------- + 1 file changed, 12 insertions(+), 49 deletions(-) + +Index: linux-3.8.6-fast/drivers/md/dm-crypt.c +=================================================================== +--- linux-3.8.6-fast.orig/drivers/md/dm-crypt.c 2013-04-11 17:29:10.000000000 +0200 ++++ linux-3.8.6-fast/drivers/md/dm-crypt.c 2013-04-11 17:29:10.000000000 +0200 +@@ -18,7 +18,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -44,6 +43,7 @@ struct convert_context { + unsigned int idx_out; + sector_t cc_sector; + atomic_t cc_pending; ++ struct ablkcipher_request *req; + }; + + /* +@@ -105,15 +105,7 @@ struct iv_lmk_private { + enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; + + /* +- * Duplicated per-CPU state for cipher. +- */ +-struct crypt_cpu { +- struct ablkcipher_request *req; +-}; +- +-/* +- * The fields in here must be read only after initialization, +- * changing state should be in crypt_cpu. ++ * The fields in here must be read only after initialization. + */ + struct crypt_config { + struct dm_dev *dev; +@@ -143,12 +135,6 @@ struct crypt_config { + sector_t iv_offset; + unsigned int iv_size; + +- /* +- * Duplicated per cpu state. Access through +- * per_cpu_ptr() only. +- */ +- struct crypt_cpu __percpu *cpu; +- + /* ESSIV: struct crypto_cipher *essiv_tfm */ + void *iv_private; + struct crypto_ablkcipher **tfms; +@@ -184,11 +170,6 @@ static void clone_init(struct dm_crypt_i + static void kcryptd_queue_crypt(struct dm_crypt_io *io); + static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq); + +-static struct crypt_cpu *this_crypt_config(struct crypt_config *cc) +-{ +- return this_cpu_ptr(cc->cpu); +-} +- + /* + * Use this to access cipher attributes that are the same for each CPU. + */ +@@ -738,16 +719,15 @@ static void kcryptd_async_done(struct cr + static void crypt_alloc_req(struct crypt_config *cc, + struct convert_context *ctx) + { +- struct crypt_cpu *this_cc = this_crypt_config(cc); + unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1); + +- if (!this_cc->req) +- this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO); ++ if (!ctx->req) ++ ctx->req = mempool_alloc(cc->req_pool, GFP_NOIO); + +- ablkcipher_request_set_tfm(this_cc->req, cc->tfms[key_index]); +- ablkcipher_request_set_callback(this_cc->req, ++ ablkcipher_request_set_tfm(ctx->req, cc->tfms[key_index]); ++ ablkcipher_request_set_callback(ctx->req, + CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, +- kcryptd_async_done, dmreq_of_req(cc, this_cc->req)); ++ kcryptd_async_done, dmreq_of_req(cc, ctx->req)); + } + + /* +@@ -756,7 +736,6 @@ static void crypt_alloc_req(struct crypt + static int crypt_convert(struct crypt_config *cc, + struct convert_context *ctx) + { +- struct crypt_cpu *this_cc = this_crypt_config(cc); + int r; + + atomic_set(&ctx->cc_pending, 1); +@@ -768,7 +747,7 @@ static int crypt_convert(struct crypt_co + + atomic_inc(&ctx->cc_pending); + +- r = crypt_convert_block(cc, ctx, this_cc->req); ++ r = crypt_convert_block(cc, ctx, ctx->req); + + switch (r) { + /* async */ +@@ -777,7 +756,7 @@ static int crypt_convert(struct crypt_co + INIT_COMPLETION(ctx->restart); + /* fall through*/ + case -EINPROGRESS: +- this_cc->req = NULL; ++ ctx->req = NULL; + ctx->cc_sector++; + continue; + +@@ -877,6 +856,7 @@ static struct dm_crypt_io *crypt_io_allo + io->sector = sector; + io->error = 0; + io->base_io = NULL; ++ io->ctx.req = NULL; + atomic_set(&io->io_pending, 0); + + return io; +@@ -902,6 +882,8 @@ static void crypt_dec_pending(struct dm_ + if (!atomic_dec_and_test(&io->io_pending)) + return; + ++ if (io->ctx.req) ++ mempool_free(io->ctx.req, cc->req_pool); + mempool_free(io, cc->io_pool); + + if (likely(!base_io)) +@@ -1329,8 +1311,6 @@ static int crypt_wipe_key(struct crypt_c + static void crypt_dtr(struct dm_target *ti) + { + struct crypt_config *cc = ti->private; +- struct crypt_cpu *cpu_cc; +- int cpu; + + ti->private = NULL; + +@@ -1342,13 +1322,6 @@ static void crypt_dtr(struct dm_target * + if (cc->crypt_queue) + destroy_workqueue(cc->crypt_queue); + +- if (cc->cpu) +- for_each_possible_cpu(cpu) { +- cpu_cc = per_cpu_ptr(cc->cpu, cpu); +- if (cpu_cc->req) +- mempool_free(cpu_cc->req, cc->req_pool); +- } +- + crypt_free_tfms(cc); + + if (cc->bs) +@@ -1367,9 +1340,6 @@ static void crypt_dtr(struct dm_target * + if (cc->dev) + dm_put_device(ti, cc->dev); + +- if (cc->cpu) +- free_percpu(cc->cpu); +- + kzfree(cc->cipher); + kzfree(cc->cipher_string); + +@@ -1424,13 +1394,6 @@ static int crypt_ctr_cipher(struct dm_ta + if (tmp) + DMWARN("Ignoring unexpected additional cipher options"); + +- cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)), +- __alignof__(struct crypt_cpu)); +- if (!cc->cpu) { +- ti->error = "Cannot allocate per cpu state"; +- goto bad_mem; +- } +- + /* + * For compatibility with the original dm-crypt mapping format, if + * only the cipher name is supplied, use cbc-plain. diff --git a/dm-crypt-sort-requests.patch b/dm-crypt-sort-requests.patch new file mode 100644 index 00000000..90bfbae9 --- /dev/null +++ b/dm-crypt-sort-requests.patch @@ -0,0 +1,137 @@ +dm-crypt: sort writes + +Write requests are sorted in a red-black tree structure and are submitted +in the sorted order. + +In theory the sorting should be performed by the underlying disk scheduler, +however, in practice the disk scheduler accepts and sorts only 128 requests. +In order to sort more requests, we need to implement our own sorting. + +Signed-off-by: Mikulas Patocka + +--- + drivers/md/dm-crypt.c | 50 +++++++++++++++++++++++++++++++++++--------------- + 1 file changed, 35 insertions(+), 15 deletions(-) + +Index: linux-3.9.2-fast/drivers/md/dm-crypt.c +=================================================================== +--- linux-3.9.2-fast.orig/drivers/md/dm-crypt.c 2013-05-15 22:49:57.000000000 +0200 ++++ linux-3.9.2-fast/drivers/md/dm-crypt.c 2013-05-15 22:50:01.000000000 +0200 +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -61,7 +62,7 @@ struct dm_crypt_io { + int error; + sector_t sector; + +- struct list_head list; ++ struct rb_node rb_node; + }; + + struct dm_crypt_request { +@@ -128,7 +129,7 @@ struct crypt_config { + + struct task_struct *write_thread; + wait_queue_head_t write_thread_wait; +- struct list_head write_thread_list; ++ struct rb_root write_tree; + + char *cipher; + char *cipher_string; +@@ -1013,7 +1014,7 @@ static int dmcrypt_write(void *data) + { + struct crypt_config *cc = data; + while (1) { +- struct list_head local_list; ++ struct rb_root write_tree; + struct blk_plug plug; + + DECLARE_WAITQUEUE(wait, current); +@@ -1021,7 +1022,7 @@ static int dmcrypt_write(void *data) + spin_lock_irq(&cc->write_thread_wait.lock); + continue_locked: + +- if (!list_empty(&cc->write_thread_list)) ++ if (!RB_EMPTY_ROOT(&cc->write_tree)) + goto pop_from_list; + + __set_current_state(TASK_INTERRUPTIBLE); +@@ -1043,20 +1044,23 @@ continue_locked: + goto continue_locked; + + pop_from_list: +- local_list = cc->write_thread_list; +- local_list.next->prev = &local_list; +- local_list.prev->next = &local_list; +- INIT_LIST_HEAD(&cc->write_thread_list); +- ++ write_tree = cc->write_tree; ++ cc->write_tree = RB_ROOT; + spin_unlock_irq(&cc->write_thread_wait.lock); + ++ BUG_ON(rb_parent(write_tree.rb_node)); ++ ++ /* ++ * Note: we cannot walk the tree here with rb_next because ++ * the structures may be freed when kcryptd_io_write is called. ++ */ + blk_start_plug(&plug); + do { +- struct dm_crypt_io *io = container_of(local_list.next, +- struct dm_crypt_io, list); +- list_del(&io->list); ++ struct dm_crypt_io *io = rb_entry(rb_first(&write_tree), ++ struct dm_crypt_io, rb_node); ++ rb_erase(&io->rb_node, &write_tree); + kcryptd_io_write(io); +- } while (!list_empty(&local_list)); ++ } while (!RB_EMPTY_ROOT(&write_tree)); + blk_finish_plug(&plug); + } + return 0; +@@ -1067,6 +1071,8 @@ static void kcryptd_crypt_write_io_submi + struct bio *clone = io->ctx.bio_out; + struct crypt_config *cc = io->cc; + unsigned long flags; ++ sector_t sector; ++ struct rb_node **p, *parent; + + if (unlikely(io->error < 0)) { + crypt_free_buffer_pages(cc, clone); +@@ -1081,7 +1087,21 @@ static void kcryptd_crypt_write_io_submi + clone->bi_sector = cc->start + io->sector; + + spin_lock_irqsave(&cc->write_thread_wait.lock, flags); +- list_add_tail(&io->list, &cc->write_thread_list); ++ p = &cc->write_tree.rb_node; ++ parent = NULL; ++ sector = io->sector; ++ while (*p) { ++ parent = *p; ++#define io_node rb_entry(parent, struct dm_crypt_io, rb_node) ++ if (sector < io_node->sector) ++ p = &io_node->rb_node.rb_left; ++ else ++ p = &io_node->rb_node.rb_right; ++#undef io_node ++ } ++ rb_link_node(&io->rb_node, parent, p); ++ rb_insert_color(&io->rb_node, &cc->write_tree); ++ + wake_up_locked(&cc->write_thread_wait); + spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags); + } +@@ -1641,7 +1661,7 @@ static int crypt_ctr(struct dm_target *t + } + + init_waitqueue_head(&cc->write_thread_wait); +- INIT_LIST_HEAD(&cc->write_thread_list); ++ cc->write_tree = RB_ROOT; + + cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write"); + if (IS_ERR(cc->write_thread)) { diff --git a/dm-crypt-unbound-workqueue.patch b/dm-crypt-unbound-workqueue.patch new file mode 100644 index 00000000..04335699 --- /dev/null +++ b/dm-crypt-unbound-workqueue.patch @@ -0,0 +1,27 @@ +dm-crypt: use unbound workqueue for request processing + +Use unbound workqueue so that work is automatically ballanced between +available CPUs. + +Signed-off-by: Mikulas Patocka + +--- + drivers/md/dm-crypt.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +Index: linux-3.8.6-fast/drivers/md/dm-crypt.c +=================================================================== +--- linux-3.8.6-fast.orig/drivers/md/dm-crypt.c 2013-04-11 17:29:10.000000000 +0200 ++++ linux-3.8.6-fast/drivers/md/dm-crypt.c 2013-04-11 17:29:13.000000000 +0200 +@@ -1623,8 +1623,9 @@ static int crypt_ctr(struct dm_target *t + cc->crypt_queue = alloc_workqueue("kcryptd", + WQ_NON_REENTRANT| + WQ_CPU_INTENSIVE| +- WQ_MEM_RECLAIM, +- 1); ++ WQ_MEM_RECLAIM| ++ WQ_UNBOUND, ++ num_online_cpus()); + if (!cc->crypt_queue) { + ti->error = "Couldn't create kcryptd queue"; + goto bad; diff --git a/kernel.spec b/kernel.spec index b9982686..ad5c3131 100644 --- a/kernel.spec +++ b/kernel.spec @@ -29,6 +29,7 @@ %bcond_without imq # imq support %bcond_without esfq # esfq support %bcond_without ipv6 # ipv6 support +%bcond_with padmcrypt # parallel dm-crypt %bcond_without vserver # support for VServer (enabled by default) @@ -225,6 +226,14 @@ Patch250: kernel-fix_256colors_menuconfig.patch # https://patchwork.kernel.org/patch/236261/ Patch400: kernel-virtio-gl-accel.patch +# http://people.redhat.com/mpatocka/patches/kernel/dm-crypt-paralelizace/current/series.html +Patch500: dm-crypt-remove-percpu.patch +Patch501: dm-crypt-unbound-workqueue.patch +Patch502: dm-crypt-dont-allocate-partial-pages.patch +Patch503: dm-crypt-fix-allocation-deadlock.patch +Patch504: dm-crypt-offload-writes-to-thread.patch +Patch505: dm-crypt-sort-requests.patch + Patch2000: kernel-small_fixes.patch Patch2001: kernel-pwc-uncompress.patch Patch2003: kernel-regressions.patch @@ -703,6 +712,16 @@ cd linux-%{basever} # virtio-gl %patch400 -p1 +# parallel dm-crypt +%if %{with padmcrypt} +%patch500 -p1 +%patch501 -p1 +%patch502 -p1 +%patch503 -p1 +%patch504 -p1 +%patch505 -p1 +%endif + %endif # vanilla # Small fixes: