+dm-crypt: offload writes to thread
+
+Submitting write bios directly in the encryption thread caused serious
+performance degradation. On multiprocessor machine encryption requests
+finish in a different order than they were submitted in. Consequently, write
+requests would be submitted in a different order and it could cause severe
+performance degradation.
+
+This patch moves submitting write requests to a separate thread so that
+the requests can be sorted before submitting.
+
+Sorting is implemented in the next patch.
+
+Note: it is required that a previous patch "dm-crypt: don't allocate pages
+for a partial request." is applied before applying this patch. Without
+that, this patch could introduce a crash.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+
+---
+ drivers/md/dm-crypt.c | 120 ++++++++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 97 insertions(+), 23 deletions(-)
+
+Index: linux-3.9.2-fast/drivers/md/dm-crypt.c
+===================================================================
+--- linux-3.9.2-fast.orig/drivers/md/dm-crypt.c 2013-05-15 22:49:53.000000000 +0200
++++ linux-3.9.2-fast/drivers/md/dm-crypt.c 2013-05-15 22:49:57.000000000 +0200
+@@ -17,6 +17,7 @@
+ #include <linux/slab.h>
+ #include <linux/crypto.h>
+ #include <linux/workqueue.h>
++#include <linux/kthread.h>
+ #include <linux/backing-dev.h>
+ #include <linux/atomic.h>
+ #include <linux/scatterlist.h>
+@@ -59,6 +60,8 @@ struct dm_crypt_io {
+ atomic_t io_pending;
+ int error;
+ sector_t sector;
++
++ struct list_head list;
+ };
+
+ struct dm_crypt_request {
+@@ -123,6 +126,10 @@ struct crypt_config {
+ struct workqueue_struct *io_queue;
+ struct workqueue_struct *crypt_queue;
+
++ struct task_struct *write_thread;
++ wait_queue_head_t write_thread_wait;
++ struct list_head write_thread_list;
++
+ char *cipher;
+ char *cipher_string;
+
+@@ -977,37 +984,89 @@ static int kcryptd_io_read(struct dm_cry
+ return 0;
+ }
+
++static void kcryptd_io_read_work(struct work_struct *work)
++{
++ struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
++
++ crypt_inc_pending(io);
++ if (kcryptd_io_read(io, GFP_NOIO))
++ io->error = -ENOMEM;
++ crypt_dec_pending(io);
++}
++
++static void kcryptd_queue_read(struct dm_crypt_io *io)
++{
++ struct crypt_config *cc = io->cc;
++
++ INIT_WORK(&io->work, kcryptd_io_read_work);
++ queue_work(cc->io_queue, &io->work);
++}
++
+ static void kcryptd_io_write(struct dm_crypt_io *io)
+ {
+ struct bio *clone = io->ctx.bio_out;
++
+ generic_make_request(clone);
+ }
+
+-static void kcryptd_io(struct work_struct *work)
++static int dmcrypt_write(void *data)
+ {
+- struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
++ struct crypt_config *cc = data;
++ while (1) {
++ struct list_head local_list;
++ struct blk_plug plug;
+
+- if (bio_data_dir(io->base_bio) == READ) {
+- crypt_inc_pending(io);
+- if (kcryptd_io_read(io, GFP_NOIO))
+- io->error = -ENOMEM;
+- crypt_dec_pending(io);
+- } else
+- kcryptd_io_write(io);
+-}
++ DECLARE_WAITQUEUE(wait, current);
+
+-static void kcryptd_queue_io(struct dm_crypt_io *io)
+-{
+- struct crypt_config *cc = io->cc;
++ spin_lock_irq(&cc->write_thread_wait.lock);
++continue_locked:
+
+- INIT_WORK(&io->work, kcryptd_io);
+- queue_work(cc->io_queue, &io->work);
++ if (!list_empty(&cc->write_thread_list))
++ goto pop_from_list;
++
++ __set_current_state(TASK_INTERRUPTIBLE);
++ __add_wait_queue(&cc->write_thread_wait, &wait);
++
++ spin_unlock_irq(&cc->write_thread_wait.lock);
++
++ if (unlikely(kthread_should_stop())) {
++ set_task_state(current, TASK_RUNNING);
++ remove_wait_queue(&cc->write_thread_wait, &wait);
++ break;
++ }
++
++ schedule();
++
++ set_task_state(current, TASK_RUNNING);
++ spin_lock_irq(&cc->write_thread_wait.lock);
++ __remove_wait_queue(&cc->write_thread_wait, &wait);
++ goto continue_locked;
++
++pop_from_list:
++ local_list = cc->write_thread_list;
++ local_list.next->prev = &local_list;
++ local_list.prev->next = &local_list;
++ INIT_LIST_HEAD(&cc->write_thread_list);
++
++ spin_unlock_irq(&cc->write_thread_wait.lock);
++
++ blk_start_plug(&plug);
++ do {
++ struct dm_crypt_io *io = container_of(local_list.next,
++ struct dm_crypt_io, list);
++ list_del(&io->list);
++ kcryptd_io_write(io);
++ } while (!list_empty(&local_list));
++ blk_finish_plug(&plug);
++ }
++ return 0;
+ }
+
+-static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
++static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io)
+ {
+ struct bio *clone = io->ctx.bio_out;
+ struct crypt_config *cc = io->cc;
++ unsigned long flags;
+
+ if (unlikely(io->error < 0)) {
+ crypt_free_buffer_pages(cc, clone);
+@@ -1021,10 +1080,10 @@ static void kcryptd_crypt_write_io_submi
+
+ clone->bi_sector = cc->start + io->sector;
+
+- if (async)
+- kcryptd_queue_io(io);
+- else
+- generic_make_request(clone);
++ spin_lock_irqsave(&cc->write_thread_wait.lock, flags);
++ list_add_tail(&io->list, &cc->write_thread_list);
++ wake_up_locked(&cc->write_thread_wait);
++ spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags);
+ }
+
+ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
+@@ -1060,7 +1119,7 @@ static void kcryptd_crypt_write_convert(
+
+ /* Encryption was already finished, submit io now */
+ if (crypt_finished)
+- kcryptd_crypt_write_io_submit(io, 0);
++ kcryptd_crypt_write_io_submit(io);
+
+ dec:
+ crypt_dec_pending(io);
+@@ -1118,7 +1177,7 @@ static void kcryptd_async_done(struct cr
+ if (bio_data_dir(io->base_bio) == READ)
+ kcryptd_crypt_read_done(io);
+ else
+- kcryptd_crypt_write_io_submit(io, 1);
++ kcryptd_crypt_write_io_submit(io);
+ }
+
+ static void kcryptd_crypt(struct work_struct *work)
+@@ -1262,6 +1321,9 @@ static void crypt_dtr(struct dm_target *
+ if (!cc)
+ return;
+
++ if (cc->write_thread)
++ kthread_stop(cc->write_thread);
++
+ if (cc->io_queue)
+ destroy_workqueue(cc->io_queue);
+ if (cc->crypt_queue)
+@@ -1578,6 +1640,18 @@ static int crypt_ctr(struct dm_target *t
+ goto bad;
+ }
+
++ init_waitqueue_head(&cc->write_thread_wait);
++ INIT_LIST_HEAD(&cc->write_thread_list);
++
++ cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
++ if (IS_ERR(cc->write_thread)) {
++ ret = PTR_ERR(cc->write_thread);
++ cc->write_thread = NULL;
++ ti->error = "Couldn't spawn write thread";
++ goto bad;
++ }
++ wake_up_process(cc->write_thread);
++
+ ti->num_flush_bios = 1;
+ ti->discard_zeroes_data_unsupported = true;
+
+@@ -1611,7 +1685,7 @@ static int crypt_map(struct dm_target *t
+
+ if (bio_data_dir(io->base_bio) == READ) {
+ if (kcryptd_io_read(io, GFP_NOWAIT))
+- kcryptd_queue_io(io);
++ kcryptd_queue_read(io);
+ } else
+ kcryptd_queue_crypt(io);
+