[pve-devel] [PATCH zfsonlinux 2/3] Cherry-pick fix for zpl_mount deadlock

Stoiko Ivanov s.ivanov at proxmox.com
Mon Jul 16 11:29:24 CEST 2018


Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
---
 zfs-patches/0004-Fix-zpl_mount-deadlock.patch | 92 +++++++++++++++++++++++++++
 zfs-patches/series                            |  1 +
 2 files changed, 93 insertions(+)
 create mode 100644 zfs-patches/0004-Fix-zpl_mount-deadlock.patch

diff --git a/zfs-patches/0004-Fix-zpl_mount-deadlock.patch b/zfs-patches/0004-Fix-zpl_mount-deadlock.patch
new file mode 100644
index 0000000..8947309
--- /dev/null
+++ b/zfs-patches/0004-Fix-zpl_mount-deadlock.patch
@@ -0,0 +1,92 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Brian Behlendorf <behlendorf1 at llnl.gov>
+Date: Wed, 11 Jul 2018 15:49:10 -0700
+Subject: [PATCH] Fix zpl_mount() deadlock
+
+Commit 93b43af10 inadvertently introduced the following scenario which
+can result in a deadlock.  This issue was most easily reproduced by
+LXD containers using a ZFS storage backend but should be reproducible
+under any workload which is frequently mounting and unmounting.
+
+-- THREAD A --
+spa_sync()
+  spa_sync_upgrades()
+    rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); <- Waiting on B
+
+-- THREAD B --
+mount_fs()
+  zpl_mount()
+    zpl_mount_impl()
+      dmu_objset_hold()
+        dmu_objset_hold_flags()
+          dsl_pool_hold()
+            dsl_pool_config_enter()
+              rrw_enter(&dp->dp_config_rwlock, RW_READER, tag);
+    sget()
+      sget_userns()
+        grab_super()
+          down_write(&s->s_umount); <- Waiting on C
+
+-- THREAD C --
+cleanup_mnt()
+  deactivate_super()
+    down_write(&s->s_umount);
+    deactivate_locked_super()
+      zpl_kill_sb()
+        kill_anon_super()
+          generic_shutdown_super()
+            sync_filesystem()
+              zpl_sync_fs()
+                zfs_sync()
+                  zil_commit()
+                    txg_wait_synced() <- Waiting on A
+
+Reviewed by: Alek Pinchuk <apinchuk at datto.com>
+Signed-off-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Closes #7598
+Closes #7659
+Closes #7691
+Closes #7693
+
+(Cherry-picked from ac09630d8b0bf6c92084a30fdaefd03fd0adbdc1)
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ include/sys/zfs_vfsops.h |  1 +
+ module/zfs/zpl_super.c   | 11 ++++++++++-
+ 2 files changed, 11 insertions(+), 1 deletion(-)
+
+diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h
+index 2326da42..927153b2 100644
+--- a/include/sys/zfs_vfsops.h
++++ b/include/sys/zfs_vfsops.h
+@@ -32,6 +32,7 @@
+ #include <sys/zil.h>
+ #include <sys/sa.h>
+ #include <sys/rrwlock.h>
++#include <sys/dsl_dataset.h>
+ #include <sys/zfs_ioctl.h>
+ 
+ #ifdef	__cplusplus
+diff --git a/module/zfs/zpl_super.c b/module/zfs/zpl_super.c
+index fc10271b..5c426b0a 100644
+--- a/module/zfs/zpl_super.c
++++ b/module/zfs/zpl_super.c
+@@ -271,8 +271,17 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
+ 	if (err)
+ 		return (ERR_PTR(-err));
+ 
++	/*
++	 * The dsl pool lock must be released prior to calling sget().
++	 * It is possible sget() may block on the lock in grab_super()
++	 * while deactivate_super() holds that same lock and waits for
++	 * a txg sync.  If the dsl_pool lock is held over over sget()
++	 * this can prevent the pool sync and cause a deadlock.
++	 */
++	dsl_pool_rele(dmu_objset_pool(os), FTAG);
+ 	s = zpl_sget(fs_type, zpl_test_super, set_anon_super, flags, os);
+-	dmu_objset_rele(os, FTAG);
++	dsl_dataset_rele(dmu_objset_ds(os), FTAG);
++
+ 	if (IS_ERR(s))
+ 		return (ERR_CAST(s));
+ 
diff --git a/zfs-patches/series b/zfs-patches/series
index 3d515ce..815e5cd 100644
--- a/zfs-patches/series
+++ b/zfs-patches/series
@@ -1,3 +1,4 @@
 0001-remove-DKMS-modules-and-dracut-build.patch
 0002-import-with-d-dev-disk-by-id-in-scan-service.patch
 0003-always-load-ZFS-module-on-boot.patch
+0004-Fix-zpl_mount-deadlock.patch
-- 
2.11.0





More information about the pve-devel mailing list