From ffcf8fde8a658d34c191ed225ee9e5af4b5d2917 Mon Sep 17 00:00:00 2001
From: Robert Nelson <robertcnelson@gmail.com>
Date: Tue, 25 Jan 2022 09:46:51 -0600
Subject: [PATCH] kernel v5.16.2 rebase with: device-tree/etc AUFS:
 https://github.com/sfjro/aufs5-standalone/commit/41291d0ae20e0a803a7d9c6cccd1bb3525f6439b
 BBDTBS:
 https://github.com/beagleboard/BeagleBoard-DeviceTrees/commit/6c03c4a088a5c84808617a169b598185dc975e0b
 TI_AMX3_CM3:
 http://git.ti.com/gitweb/?p=processor-firmware/ti-amx3-cm3-pm-firmware.git;a=commit;h=fb484c5e54f2e31cf0a338d2927a06a2870bcc2c
 WPANUSB:
 https://github.com/statropy/wpanusb/commit/251f0167545bf2dcaa3cad991a59dbf5ab05490a
 BCFSERIAL:
 https://github.com/statropy/bcfserial/commit/aded88429a8a00143596b41f4c1f50d9ae3d4069
 WIRELESS_REGDB:
 https://git.kernel.org/pub/scm/linux/kernel/git/sforshee/wireless-regdb.git/commit/?id=2ce78ed90f71955f7b223c17b5cda6c8a7708efe

Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
---
 patch.sh                                      |    22 +-
 patches/aufs/0001-merge-aufs-kbuild.patch     |     6 +-
 patches/aufs/0002-merge-aufs-base.patch       |    46 +-
 patches/aufs/0003-merge-aufs-mmap.patch       |    58 +-
 patches/aufs/0004-merge-aufs-standalone.patch |    55 +-
 patches/aufs/0005-merge-aufs.patch            |    41 +-
 ...-https-github.com-statropy-bcfserial.patch |     4 +-
 patches/defconfig                             |    19 +-
 patches/git/AUFS                              |     2 +-
 patches/ref_omap2plus_defconfig               |     3 +-
 ...01-merge-CONFIG_PREEMPT_RT-Patch-Set.patch | 12331 ++++++++--------
 ...-BeagleBoard.org-Device-Tree-Changes.patch |     4 +-
 ...eless-regdb-regulatory-database-file.patch |     4 +-
 ...sb-https-github.com-statropy-wpanusb.patch |     4 +-
 version.sh                                    |     6 +-
 15 files changed, 6242 insertions(+), 6363 deletions(-)

diff --git a/patch.sh b/patch.sh
index acd44c1ca..b45705de7 100644
--- a/patch.sh
+++ b/patch.sh
@@ -166,18 +166,18 @@ aufs () {
 		${git_bin} add .
 		${git_bin} commit -a -m 'merge: aufs' -m "https://github.com/sfjro/${aufs_prefix}standalone/commit/${aufs_hash}" -s
 
-		wget https://raw.githubusercontent.com/sfjro/${aufs_prefix}standalone/aufs${KERNEL_REL}/rt.patch
-		patch -p1 < rt.patch || aufs_fail
-		rm -rf rt.patch
-		${git_bin} add .
-		${git_bin} commit -a -m 'merge: aufs-rt' -s
+		#wget https://raw.githubusercontent.com/sfjro/${aufs_prefix}standalone/aufs${KERNEL_REL}/rt.patch
+		#patch -p1 < rt.patch || aufs_fail
+		#rm -rf rt.patch
+		#${git_bin} add .
+		#${git_bin} commit -a -m 'merge: aufs-rt' -s
 
-		${git_bin} format-patch -6 -o ../patches/aufs/
+		${git_bin} format-patch -5 -o ../patches/aufs/
 		echo "AUFS: https://github.com/sfjro/${aufs_prefix}standalone/commit/${aufs_hash}" > ../patches/git/AUFS
 
 		rm -rf ../${aufs_prefix}standalone/ || true
 
-		${git_bin} reset --hard HEAD~6
+		${git_bin} reset --hard HEAD~5
 
 		start_cleanup
 
@@ -186,10 +186,10 @@ aufs () {
 		${git} "${DIR}/patches/aufs/0003-merge-aufs-mmap.patch"
 		${git} "${DIR}/patches/aufs/0004-merge-aufs-standalone.patch"
 		${git} "${DIR}/patches/aufs/0005-merge-aufs.patch"
-		${git} "${DIR}/patches/aufs/0006-merge-aufs-rt.patch"
+		#${git} "${DIR}/patches/aufs/0006-merge-aufs-rt.patch"
 
 		wdir="aufs"
-		number=6
+		number=5
 		cleanup
 	fi
 
@@ -480,7 +480,7 @@ local_patch () {
 }
 
 #external_git
-#aufs
+aufs
 wpanusb
 bcfserial
 #rt
@@ -597,7 +597,7 @@ fixes
 packaging () {
 	#do_backport="enable"
 	if [ "x${do_backport}" = "xenable" ] ; then
-		backport_tag="v5.15.13"
+		backport_tag="v5.15.15"
 
 		subsystem="bindeb-pkg"
 		#regenerate="enable"
diff --git a/patches/aufs/0001-merge-aufs-kbuild.patch b/patches/aufs/0001-merge-aufs-kbuild.patch
index 852a1f7bb..ae7312dd5 100644
--- a/patches/aufs/0001-merge-aufs-kbuild.patch
+++ b/patches/aufs/0001-merge-aufs-kbuild.patch
@@ -1,7 +1,7 @@
-From 0912d4b0e400dd3059333e399a2eba7ad1d8421b Mon Sep 17 00:00:00 2001
+From e4b4265c962801701b45435be2602cd8371dc881 Mon Sep 17 00:00:00 2001
 From: Robert Nelson <robertcnelson@gmail.com>
-Date: Fri, 31 Dec 2021 10:14:07 -0600
-Subject: [PATCH 1/6] merge: aufs-kbuild
+Date: Tue, 25 Jan 2022 09:29:34 -0600
+Subject: [PATCH 1/5] merge: aufs-kbuild
 
 Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
 ---
diff --git a/patches/aufs/0002-merge-aufs-base.patch b/patches/aufs/0002-merge-aufs-base.patch
index 1c152c415..a760e260f 100644
--- a/patches/aufs/0002-merge-aufs-base.patch
+++ b/patches/aufs/0002-merge-aufs-base.patch
@@ -1,7 +1,7 @@
-From b736001ccc09929d1f2221394ad6543be0890362 Mon Sep 17 00:00:00 2001
+From c20b7877a692cb446f61bf3c0fdbbc4f92e82fc2 Mon Sep 17 00:00:00 2001
 From: Robert Nelson <robertcnelson@gmail.com>
-Date: Fri, 31 Dec 2021 10:14:07 -0600
-Subject: [PATCH 2/6] merge: aufs-base
+Date: Tue, 25 Jan 2022 09:29:35 -0600
+Subject: [PATCH 2/5] merge: aufs-base
 
 Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
 ---
@@ -11,19 +11,18 @@ Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
  fs/fcntl.c                    |  4 +++-
  fs/namespace.c                |  6 ++++++
  fs/splice.c                   | 10 +++++-----
- fs/sync.c                     |  2 +-
  include/linux/fs.h            |  4 ++++
  include/linux/lockdep.h       |  2 ++
  include/linux/mnt_namespace.h |  3 +++
  include/linux/splice.h        |  6 ++++++
  kernel/locking/lockdep.c      |  3 ++-
- 12 files changed, 64 insertions(+), 9 deletions(-)
+ 11 files changed, 63 insertions(+), 8 deletions(-)
 
 diff --git a/MAINTAINERS b/MAINTAINERS
-index 3b79fd441dde..3fb0a57f61b0 100644
+index dd36acc87ce6..0cfff91bfa5e 100644
 --- a/MAINTAINERS
 +++ b/MAINTAINERS
-@@ -3122,6 +3122,19 @@ F:	include/uapi/linux/audit.h
+@@ -3179,6 +3179,19 @@ F:	include/uapi/linux/audit.h
  F:	kernel/audit*
  F:	lib/*audit.c
  
@@ -44,10 +43,10 @@ index 3b79fd441dde..3fb0a57f61b0 100644
  M:	Miguel Ojeda <ojeda@kernel.org>
  S:	Maintained
 diff --git a/drivers/block/loop.c b/drivers/block/loop.c
-index c00ae30fde89..667e1307806f 100644
+index c3a36cfaa855..4bcea5a2fc25 100644
 --- a/drivers/block/loop.c
 +++ b/drivers/block/loop.c
-@@ -798,6 +798,24 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
+@@ -635,6 +635,24 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
  	return error;
  }
  
@@ -108,7 +107,7 @@ index 9c6c6a3e2de5..02382fa9bd34 100644
  		return error;
  
 diff --git a/fs/namespace.c b/fs/namespace.c
-index 659a8f39c61a..128367073782 100644
+index b696543adab8..c45740054bc7 100644
 --- a/fs/namespace.c
 +++ b/fs/namespace.c
 @@ -808,6 +808,12 @@ static inline int check_mnt(struct mount *mnt)
@@ -152,24 +151,11 @@ index 5dbce4dcc1a7..3e6ba363b777 100644
  {
  	unsigned int p_space;
  	int ret;
-diff --git a/fs/sync.c b/fs/sync.c
-index 1373a610dc78..b7b5a0a0df6f 100644
---- a/fs/sync.c
-+++ b/fs/sync.c
-@@ -28,7 +28,7 @@
-  * wait == 1 case since in that case write_inode() functions do
-  * sync_dirty_buffer() and thus effectively write one block at a time.
-  */
--static int __sync_filesystem(struct super_block *sb, int wait)
-+int __sync_filesystem(struct super_block *sb, int wait)
- {
- 	if (wait)
- 		sync_inodes_sb(sb);
 diff --git a/include/linux/fs.h b/include/linux/fs.h
-index 56eba723477e..e60d8ad85400 100644
+index bbf812ce89a8..ac21259fbeda 100644
 --- a/include/linux/fs.h
 +++ b/include/linux/fs.h
-@@ -1381,6 +1381,7 @@ extern void fasync_free(struct fasync_struct *);
+@@ -1378,6 +1378,7 @@ extern void fasync_free(struct fasync_struct *);
  /* can be called from interrupts */
  extern void kill_fasync(struct fasync_struct **, int, int);
  
@@ -177,7 +163,7 @@ index 56eba723477e..e60d8ad85400 100644
  extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
  extern int f_setown(struct file *filp, unsigned long arg, int force);
  extern void f_delown(struct file *filp);
-@@ -2092,6 +2093,7 @@ struct file_operations {
+@@ -2091,6 +2092,7 @@ struct file_operations {
  	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
  	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
  	int (*check_flags)(int);
@@ -185,7 +171,7 @@ index 56eba723477e..e60d8ad85400 100644
  	int (*flock) (struct file *, int, struct file_lock *);
  	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
  	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
-@@ -2615,6 +2617,7 @@ extern int current_umask(void);
+@@ -2613,6 +2615,7 @@ extern int current_umask(void);
  extern void ihold(struct inode * inode);
  extern void iput(struct inode *);
  extern int generic_update_time(struct inode *, struct timespec64 *, int);
@@ -193,7 +179,7 @@ index 56eba723477e..e60d8ad85400 100644
  
  /* /sys/fs */
  extern struct kobject *fs_kobj;
-@@ -2778,6 +2781,7 @@ static inline bool sb_is_blkdev_sb(struct super_block *sb)
+@@ -2776,6 +2779,7 @@ static inline bool sb_is_blkdev_sb(struct super_block *sb)
  }
  
  void emergency_thaw_all(void);
@@ -202,7 +188,7 @@ index 56eba723477e..e60d8ad85400 100644
  extern const struct file_operations def_blk_fops;
  extern const struct file_operations def_chr_fops;
 diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
-index 9fe165beb0f9..e47f7e15eeaf 100644
+index 467b94257105..dab20282c82e 100644
 --- a/include/linux/lockdep.h
 +++ b/include/linux/lockdep.h
 @@ -248,6 +248,8 @@ static inline int lockdep_match_key(struct lockdep_map *lock,
@@ -250,7 +236,7 @@ index a55179fd60fc..8e21c53cf883 100644
 +			 unsigned int flags);
  #endif
 diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
-index d624231eab2b..eff2ca26e6c5 100644
+index 2270ec68f10a..fbb8e650c174 100644
 --- a/kernel/locking/lockdep.c
 +++ b/kernel/locking/lockdep.c
 @@ -189,7 +189,7 @@ static
diff --git a/patches/aufs/0003-merge-aufs-mmap.patch b/patches/aufs/0003-merge-aufs-mmap.patch
index 3a1127481..0de213cf9 100644
--- a/patches/aufs/0003-merge-aufs-mmap.patch
+++ b/patches/aufs/0003-merge-aufs-mmap.patch
@@ -1,7 +1,7 @@
-From 866625ed09e77b9fc63f4c2c0fb56333408519ed Mon Sep 17 00:00:00 2001
+From c24226294a36a11a1c6a6ffba175ef89fce5cddd Mon Sep 17 00:00:00 2001
 From: Robert Nelson <robertcnelson@gmail.com>
-Date: Fri, 31 Dec 2021 10:14:08 -0600
-Subject: [PATCH 3/6] merge: aufs-mmap
+Date: Tue, 25 Jan 2022 09:29:35 -0600
+Subject: [PATCH 3/5] merge: aufs-mmap
 
 Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
 ---
@@ -21,10 +21,10 @@ Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
  create mode 100644 mm/prfile.c
 
 diff --git a/fs/proc/base.c b/fs/proc/base.c
-index 1f394095eb88..93f2479ef319 100644
+index 13eda8de2998..24fd5e986cb7 100644
 --- a/fs/proc/base.c
 +++ b/fs/proc/base.c
-@@ -2189,7 +2189,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
+@@ -2191,7 +2191,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
  	rc = -ENOENT;
  	vma = find_exact_vma(mm, vm_start, vm_end);
  	if (vma && vma->vm_file) {
@@ -50,7 +50,7 @@ index 13452b32e2bd..38acccfef9d4 100644
  		ino = inode->i_ino;
  	}
 diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
-index cf25be3e0321..70af1c25ffa2 100644
+index ad667dbc96f5..d50423613c8e 100644
 --- a/fs/proc/task_mmu.c
 +++ b/fs/proc/task_mmu.c
 @@ -280,7 +280,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
@@ -65,7 +65,7 @@ index cf25be3e0321..70af1c25ffa2 100644
  		dev = inode->i_sb->s_dev;
  		ino = inode->i_ino;
  		pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
-@@ -1865,7 +1868,7 @@ static int show_numa_map(struct seq_file *m, void *v)
+@@ -1869,7 +1872,7 @@ static int show_numa_map(struct seq_file *m, void *v)
  	struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
  	struct vm_area_struct *vma = v;
  	struct numa_maps *md = &numa_priv->md;
@@ -91,10 +91,10 @@ index a6d21fc0033c..02c2de31196e 100644
  		ino = inode->i_ino;
  		pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
 diff --git a/include/linux/mm.h b/include/linux/mm.h
-index 73a52aba448f..5dd42acf0707 100644
+index a7e4a9e7d807..30699240b45d 100644
 --- a/include/linux/mm.h
 +++ b/include/linux/mm.h
-@@ -1806,6 +1806,28 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
+@@ -1926,6 +1926,28 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
  	unmap_mapping_range(mapping, holebegin, holelen, 0);
  }
  
@@ -124,10 +124,10 @@ index 73a52aba448f..5dd42acf0707 100644
  		void *buf, int len, unsigned int gup_flags);
  extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
 diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
-index 7f8ee09c711f..3a9a798a4ae1 100644
+index c3a6e6209600..45f5754d51cd 100644
 --- a/include/linux/mm_types.h
 +++ b/include/linux/mm_types.h
-@@ -294,6 +294,7 @@ struct vm_region {
+@@ -370,6 +370,7 @@ struct vm_region {
  	unsigned long	vm_top;		/* region allocated to here */
  	unsigned long	vm_pgoff;	/* the offset in vm_file corresponding to vm_start */
  	struct file	*vm_file;	/* the backing file or NULL */
@@ -135,7 +135,7 @@ index 7f8ee09c711f..3a9a798a4ae1 100644
  
  	int		vm_usage;	/* region usage count (access under nommu_region_sem) */
  	bool		vm_icache_flushed : 1; /* true if the icache has been flushed for
-@@ -373,6 +374,7 @@ struct vm_area_struct {
+@@ -449,6 +450,7 @@ struct vm_area_struct {
  	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
  					   units */
  	struct file * vm_file;		/* File we map to (can be NULL). */
@@ -144,10 +144,10 @@ index 7f8ee09c711f..3a9a798a4ae1 100644
  
  #ifdef CONFIG_SWAP
 diff --git a/kernel/fork.c b/kernel/fork.c
-index 10885c649ca4..f3a9cd12011b 100644
+index 3244cc56b697..ae63cce182fd 100644
 --- a/kernel/fork.c
 +++ b/kernel/fork.c
-@@ -573,7 +573,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
+@@ -572,7 +572,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
  		if (file) {
  			struct address_space *mapping = file->f_mapping;
  
@@ -157,7 +157,7 @@ index 10885c649ca4..f3a9cd12011b 100644
  			if (tmp->vm_flags & VM_SHARED)
  				mapping_allow_writable(mapping);
 diff --git a/mm/Makefile b/mm/Makefile
-index fc60a40ce954..c715b0138237 100644
+index d6c0042e3aa0..1f11c655a632 100644
 --- a/mm/Makefile
 +++ b/mm/Makefile
 @@ -52,7 +52,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
@@ -170,10 +170,10 @@ index fc60a40ce954..c715b0138237 100644
  # Give 'page_alloc' its own module-parameter namespace
  page-alloc-y := page_alloc.o
 diff --git a/mm/filemap.c b/mm/filemap.c
-index 82a17c35eb96..348e22067abd 100644
+index 39c4c46c6133..c9491cd04c85 100644
 --- a/mm/filemap.c
 +++ b/mm/filemap.c
-@@ -3349,7 +3349,7 @@ vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
+@@ -3356,7 +3356,7 @@ vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
  	vm_fault_t ret = VM_FAULT_LOCKED;
  
  	sb_start_pagefault(mapping->host->i_sb);
@@ -183,7 +183,7 @@ index 82a17c35eb96..348e22067abd 100644
  	if (page->mapping != mapping) {
  		unlock_page(page);
 diff --git a/mm/mmap.c b/mm/mmap.c
-index 88dcc5c25225..6c276614ca96 100644
+index bfb0ea164a90..6bbc17d4733d 100644
 --- a/mm/mmap.c
 +++ b/mm/mmap.c
 @@ -183,7 +183,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
@@ -204,7 +204,7 @@ index 88dcc5c25225..6c276614ca96 100644
  		}
  		if (next->anon_vma)
  			anon_vma_merge(vma, next);
-@@ -1873,7 +1873,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
+@@ -1872,7 +1872,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
  	return addr;
  
  unmap_and_free_vma:
@@ -213,7 +213,7 @@ index 88dcc5c25225..6c276614ca96 100644
  	vma->vm_file = NULL;
  
  	/* Undo any partial mapping done by a device driver. */
-@@ -2731,7 +2731,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
+@@ -2730,7 +2730,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
  		goto out_free_mpol;
  
  	if (new->vm_file)
@@ -222,7 +222,7 @@ index 88dcc5c25225..6c276614ca96 100644
  
  	if (new->vm_ops && new->vm_ops->open)
  		new->vm_ops->open(new);
-@@ -2750,7 +2750,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
+@@ -2749,7 +2749,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
  	if (new->vm_ops && new->vm_ops->close)
  		new->vm_ops->close(new);
  	if (new->vm_file)
@@ -231,7 +231,7 @@ index 88dcc5c25225..6c276614ca96 100644
  	unlink_anon_vmas(new);
   out_free_mpol:
  	mpol_put(vma_policy(new));
-@@ -2945,7 +2945,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
+@@ -2944,7 +2944,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
  	struct vm_area_struct *vma;
  	unsigned long populate = 0;
  	unsigned long ret = -EINVAL;
@@ -240,7 +240,7 @@ index 88dcc5c25225..6c276614ca96 100644
  
  	pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n",
  		     current->comm, current->pid);
-@@ -3001,10 +3001,27 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
+@@ -3000,10 +3000,27 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
  	if (vma->vm_flags & VM_LOCKED)
  		flags |= MAP_LOCKED;
  
@@ -269,7 +269,7 @@ index 88dcc5c25225..6c276614ca96 100644
  out:
  	mmap_write_unlock(mm);
  	if (populate)
-@@ -3285,7 +3302,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
+@@ -3284,7 +3301,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
  		if (anon_vma_clone(new_vma, vma))
  			goto out_free_mempol;
  		if (new_vma->vm_file)
@@ -279,10 +279,10 @@ index 88dcc5c25225..6c276614ca96 100644
  			new_vma->vm_ops->open(new_vma);
  		vma_link(mm, new_vma, prev, rb_link, rb_parent);
 diff --git a/mm/nommu.c b/mm/nommu.c
-index 02d2427b8f9e..a7419302ab4e 100644
+index 55a9e48a7a02..8dc77ce96335 100644
 --- a/mm/nommu.c
 +++ b/mm/nommu.c
-@@ -523,7 +523,7 @@ static void __put_nommu_region(struct vm_region *region)
+@@ -522,7 +522,7 @@ static void __put_nommu_region(struct vm_region *region)
  		up_write(&nommu_region_sem);
  
  		if (region->vm_file)
@@ -291,7 +291,7 @@ index 02d2427b8f9e..a7419302ab4e 100644
  
  		/* IO memory and memory shared directly out of the pagecache
  		 * from ramfs/tmpfs mustn't be released here */
-@@ -655,7 +655,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
+@@ -654,7 +654,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
  	if (vma->vm_ops && vma->vm_ops->close)
  		vma->vm_ops->close(vma);
  	if (vma->vm_file)
@@ -300,7 +300,7 @@ index 02d2427b8f9e..a7419302ab4e 100644
  	put_nommu_region(vma->vm_region);
  	vm_area_free(vma);
  }
-@@ -1175,7 +1175,7 @@ unsigned long do_mmap(struct file *file,
+@@ -1174,7 +1174,7 @@ unsigned long do_mmap(struct file *file,
  					goto error_just_free;
  				}
  			}
@@ -309,7 +309,7 @@ index 02d2427b8f9e..a7419302ab4e 100644
  			kmem_cache_free(vm_region_jar, region);
  			region = pregion;
  			result = start;
-@@ -1252,10 +1252,10 @@ unsigned long do_mmap(struct file *file,
+@@ -1251,10 +1251,10 @@ unsigned long do_mmap(struct file *file,
  	up_write(&nommu_region_sem);
  error:
  	if (region->vm_file)
diff --git a/patches/aufs/0004-merge-aufs-standalone.patch b/patches/aufs/0004-merge-aufs-standalone.patch
index c24430fcc..fefb19738 100644
--- a/patches/aufs/0004-merge-aufs-standalone.patch
+++ b/patches/aufs/0004-merge-aufs-standalone.patch
@@ -1,7 +1,7 @@
-From 5cac58abf4f5e33f7b0159eb3472b040273291df Mon Sep 17 00:00:00 2001
+From aba885d6b8de7c3944753990927cb4c2d9e3dfc1 Mon Sep 17 00:00:00 2001
 From: Robert Nelson <robertcnelson@gmail.com>
-Date: Fri, 31 Dec 2021 10:14:08 -0600
-Subject: [PATCH 4/6] merge: aufs-standalone
+Date: Tue, 25 Jan 2022 09:29:35 -0600
+Subject: [PATCH 4/5] merge: aufs-standalone
 
 Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
 ---
@@ -14,12 +14,11 @@ Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
  fs/open.c                | 1 +
  fs/read_write.c          | 2 ++
  fs/splice.c              | 2 ++
- fs/sync.c                | 1 +
  fs/xattr.c               | 1 +
  kernel/locking/lockdep.c | 1 +
  kernel/task_work.c       | 1 +
  security/security.c      | 8 ++++++++
- 14 files changed, 27 insertions(+)
+ 13 files changed, 26 insertions(+)
 
 diff --git a/fs/dcache.c b/fs/dcache.c
 index bc5095b734f5..9508bd57a3bc 100644
@@ -42,7 +41,7 @@ index bc5095b734f5..9508bd57a3bc 100644
  /**
   * d_ancestor - search for an ancestor
 diff --git a/fs/exec.c b/fs/exec.c
-index ac7b51b51f38..52a8be4ebc1e 100644
+index 537d92c41105..0ab811e0fdaa 100644
 --- a/fs/exec.c
 +++ b/fs/exec.c
 @@ -111,6 +111,7 @@ bool path_noexec(const struct path *path)
@@ -86,7 +85,7 @@ index 45437f8e1003..786af52904fc 100644
  void __init files_init(void)
  {
 diff --git a/fs/namespace.c b/fs/namespace.c
-index 128367073782..db9936562011 100644
+index c45740054bc7..d3d750635610 100644
 --- a/fs/namespace.c
 +++ b/fs/namespace.c
 @@ -439,6 +439,7 @@ void __mnt_drop_write(struct vfsmount *mnt)
@@ -114,7 +113,7 @@ index 128367073782..db9936562011 100644
  static void lock_mnt_tree(struct mount *mnt)
  {
 diff --git a/fs/notify/group.c b/fs/notify/group.c
-index fb89c351295d..460ad19c2570 100644
+index 6a297efc4788..ed394ccb10e0 100644
 --- a/fs/notify/group.c
 +++ b/fs/notify/group.c
 @@ -100,6 +100,7 @@ void fsnotify_get_group(struct fsnotify_group *group)
@@ -126,7 +125,7 @@ index fb89c351295d..460ad19c2570 100644
  /*
   * Drop a reference to a group.  Free it if it's through.
 diff --git a/fs/open.c b/fs/open.c
-index e0df1536eb69..81b2d7c83add 100644
+index f732fb94600c..ca33d86e73fd 100644
 --- a/fs/open.c
 +++ b/fs/open.c
 @@ -65,6 +65,7 @@ int do_truncate(struct user_namespace *mnt_userns, struct dentry *dentry,
@@ -138,10 +137,10 @@ index e0df1536eb69..81b2d7c83add 100644
  long vfs_truncate(const struct path *path, loff_t length)
  {
 diff --git a/fs/read_write.c b/fs/read_write.c
-index af057c57bdc6..76017f8331fb 100644
+index 0074afa7ecb3..612cf04d9f6b 100644
 --- a/fs/read_write.c
 +++ b/fs/read_write.c
-@@ -492,6 +492,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
+@@ -488,6 +488,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
  	inc_syscr(current);
  	return ret;
  }
@@ -149,7 +148,7 @@ index af057c57bdc6..76017f8331fb 100644
  
  static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
  {
-@@ -602,6 +603,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
+@@ -598,6 +599,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
  	file_end_write(file);
  	return ret;
  }
@@ -177,18 +176,6 @@ index 3e6ba363b777..7c1be373eb7c 100644
  
  /**
   * splice_direct_to_actor - splices data directly between two non-pipes
-diff --git a/fs/sync.c b/fs/sync.c
-index b7b5a0a0df6f..fa5c7fba7f1b 100644
---- a/fs/sync.c
-+++ b/fs/sync.c
-@@ -39,6 +39,7 @@ int __sync_filesystem(struct super_block *sb, int wait)
- 		sb->s_op->sync_fs(sb, wait);
- 	return __sync_blockdev(sb->s_bdev, wait);
- }
-+EXPORT_SYMBOL_GPL(__sync_filesystem);
- 
- /*
-  * Write out and wait upon all dirty data associated with this
 diff --git a/fs/xattr.c b/fs/xattr.c
 index 5c8c5175b385..ff7e9ff774b7 100644
 --- a/fs/xattr.c
@@ -202,7 +189,7 @@ index 5c8c5175b385..ff7e9ff774b7 100644
  ssize_t
  __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name,
 diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
-index eff2ca26e6c5..0627584f7872 100644
+index fbb8e650c174..49c4d11b0893 100644
 --- a/kernel/locking/lockdep.c
 +++ b/kernel/locking/lockdep.c
 @@ -210,6 +210,7 @@ inline struct lock_class *lockdep_hlock_class(struct held_lock *hlock)
@@ -223,10 +210,10 @@ index 1698fbe6f0e1..081b05acadf8 100644
  }
 +EXPORT_SYMBOL_GPL(task_work_run);
 diff --git a/security/security.c b/security/security.c
-index 67264cb08fb3..0d78065d71fe 100644
+index c88167a414b4..125724525d5c 100644
 --- a/security/security.c
 +++ b/security/security.c
-@@ -1147,6 +1147,7 @@ int security_path_rmdir(const struct path *dir, struct dentry *dentry)
+@@ -1148,6 +1148,7 @@ int security_path_rmdir(const struct path *dir, struct dentry *dentry)
  		return 0;
  	return call_int_hook(path_rmdir, 0, dir, dentry);
  }
@@ -234,7 +221,7 @@ index 67264cb08fb3..0d78065d71fe 100644
  
  int security_path_unlink(const struct path *dir, struct dentry *dentry)
  {
-@@ -1163,6 +1164,7 @@ int security_path_symlink(const struct path *dir, struct dentry *dentry,
+@@ -1164,6 +1165,7 @@ int security_path_symlink(const struct path *dir, struct dentry *dentry,
  		return 0;
  	return call_int_hook(path_symlink, 0, dir, dentry, old_name);
  }
@@ -242,7 +229,7 @@ index 67264cb08fb3..0d78065d71fe 100644
  
  int security_path_link(struct dentry *old_dentry, const struct path *new_dir,
  		       struct dentry *new_dentry)
-@@ -1171,6 +1173,7 @@ int security_path_link(struct dentry *old_dentry, const struct path *new_dir,
+@@ -1172,6 +1174,7 @@ int security_path_link(struct dentry *old_dentry, const struct path *new_dir,
  		return 0;
  	return call_int_hook(path_link, 0, old_dentry, new_dir, new_dentry);
  }
@@ -250,7 +237,7 @@ index 67264cb08fb3..0d78065d71fe 100644
  
  int security_path_rename(const struct path *old_dir, struct dentry *old_dentry,
  			 const struct path *new_dir, struct dentry *new_dentry,
-@@ -1198,6 +1201,7 @@ int security_path_truncate(const struct path *path)
+@@ -1199,6 +1202,7 @@ int security_path_truncate(const struct path *path)
  		return 0;
  	return call_int_hook(path_truncate, 0, path);
  }
@@ -258,7 +245,7 @@ index 67264cb08fb3..0d78065d71fe 100644
  
  int security_path_chmod(const struct path *path, umode_t mode)
  {
-@@ -1205,6 +1209,7 @@ int security_path_chmod(const struct path *path, umode_t mode)
+@@ -1206,6 +1210,7 @@ int security_path_chmod(const struct path *path, umode_t mode)
  		return 0;
  	return call_int_hook(path_chmod, 0, path, mode);
  }
@@ -266,7 +253,7 @@ index 67264cb08fb3..0d78065d71fe 100644
  
  int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
  {
-@@ -1212,6 +1217,7 @@ int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
+@@ -1213,6 +1218,7 @@ int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
  		return 0;
  	return call_int_hook(path_chown, 0, path, uid, gid);
  }
@@ -274,7 +261,7 @@ index 67264cb08fb3..0d78065d71fe 100644
  
  int security_path_chroot(const struct path *path)
  {
-@@ -1312,6 +1318,7 @@ int security_inode_permission(struct inode *inode, int mask)
+@@ -1313,6 +1319,7 @@ int security_inode_permission(struct inode *inode, int mask)
  		return 0;
  	return call_int_hook(inode_permission, 0, inode, mask);
  }
@@ -282,7 +269,7 @@ index 67264cb08fb3..0d78065d71fe 100644
  
  int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
  {
-@@ -1509,6 +1516,7 @@ int security_file_permission(struct file *file, int mask)
+@@ -1510,6 +1517,7 @@ int security_file_permission(struct file *file, int mask)
  
  	return fsnotify_perm(file, mask);
  }
diff --git a/patches/aufs/0005-merge-aufs.patch b/patches/aufs/0005-merge-aufs.patch
index 2761d996d..0897846ba 100644
--- a/patches/aufs/0005-merge-aufs.patch
+++ b/patches/aufs/0005-merge-aufs.patch
@@ -1,9 +1,9 @@
-From c52a4ad22612ae69f204abee2e092cb0433ea4e2 Mon Sep 17 00:00:00 2001
+From ac9a8076d11453424fa21a10d845eee81cd00178 Mon Sep 17 00:00:00 2001
 From: Robert Nelson <robertcnelson@gmail.com>
-Date: Fri, 31 Dec 2021 10:14:09 -0600
-Subject: [PATCH 5/6] merge: aufs
+Date: Tue, 25 Jan 2022 09:29:36 -0600
+Subject: [PATCH 5/5] merge: aufs
 
-https://github.com/sfjro/aufs5-standalone/commit/fcc56866b84d43fd03d9e1d91d52f40e8a9d5335
+https://github.com/sfjro/aufs5-standalone/commit/41291d0ae20e0a803a7d9c6cccd1bb3525f6439b
 Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
 ---
  Documentation/ABI/testing/debugfs-aufs        |   55 +
@@ -54,7 +54,7 @@ Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
  fs/aufs/finfo.c                               |  149 ++
  fs/aufs/fstype.h                              |  401 ++++
  fs/aufs/hbl.h                                 |   65 +
- fs/aufs/hfsnotify.c                           |  288 +++
+ fs/aufs/hfsnotify.c                           |  289 +++
  fs/aufs/hfsplus.c                             |   60 +
  fs/aufs/hnotify.c                             |  715 ++++++
  fs/aufs/i_op.c                                | 1513 +++++++++++++
@@ -98,7 +98,7 @@ Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
  fs/aufs/xattr.c                               |  368 ++++
  fs/aufs/xino.c                                | 1926 +++++++++++++++++
  include/uapi/linux/aufs_type.h                |  452 ++++
- 92 files changed, 37847 insertions(+)
+ 92 files changed, 37848 insertions(+)
  create mode 100644 Documentation/ABI/testing/debugfs-aufs
  create mode 100644 Documentation/ABI/testing/sysfs-aufs
  create mode 100644 Documentation/filesystems/aufs/README
@@ -292,7 +292,7 @@ index 000000000000..48500c0569e6
 +		will be empty. About XINO files, see the aufs manual.
 diff --git a/Documentation/filesystems/aufs/README b/Documentation/filesystems/aufs/README
 new file mode 100644
-index 000000000000..d08a56444e22
+index 000000000000..9c05340b9dda
 --- /dev/null
 +++ b/Documentation/filesystems/aufs/README
 @@ -0,0 +1,396 @@
@@ -667,7 +667,7 @@ index 000000000000..d08a56444e22
 +The Parted Magic Project made a donation (2013/9 and 11).
 +Pavel Barta made a donation (2013/10).
 +Nikolay Pertsev made a donation (2014/5).
-+James B made a donation (2014/7 and 2015/7).
++James B made a donation (2014/7, 2015/7, and 2021/12).
 +Stefano Di Biase made a donation (2014/8).
 +Daniel Epellei made a donation (2015/1).
 +OmegaPhil made a donation (2016/1, 2018/4).
@@ -16266,10 +16266,10 @@ index 000000000000..33b6f7da81eb
 +#endif /* __AUFS_HBL_H__ */
 diff --git a/fs/aufs/hfsnotify.c b/fs/aufs/hfsnotify.c
 new file mode 100644
-index 000000000000..b029fa2085a8
+index 000000000000..597d045a48b4
 --- /dev/null
 +++ b/fs/aufs/hfsnotify.c
-@@ -0,0 +1,288 @@
+@@ -0,0 +1,289 @@
 +// SPDX-License-Identifier: GPL-2.0
 +/*
 + * Copyright (C) 2005-2021 Junjiro R. Okajima
@@ -16442,7 +16442,8 @@ index 000000000000..b029fa2085a8
 +	struct inode *h_dir, *h_inode;
 +	struct fsnotify_mark *inode_mark;
 +
-+	AuDebugOn(data_type != FSNOTIFY_EVENT_INODE);
++	AuDebugOn(!(data_type == FSNOTIFY_EVENT_INODE
++		    || data_type == FSNOTIFY_EVENT_DENTRY));
 +
 +	err = 0;
 +	/* if FS_UNMOUNT happens, there must be another bug */
@@ -28767,7 +28768,7 @@ index 000000000000..91e62d79e099
 +}
 diff --git a/fs/aufs/super.c b/fs/aufs/super.c
 new file mode 100644
-index 000000000000..666e1837ffbb
+index 000000000000..e67ffdb3797e
 --- /dev/null
 +++ b/fs/aufs/super.c
 @@ -0,0 +1,1050 @@
@@ -29226,7 +29227,7 @@ index 000000000000..666e1837ffbb
 +			continue;
 +
 +		h_sb = au_sbr_sb(sb, bindex);
-+		e = vfsub_sync_filesystem(h_sb, wait);
++		e = vfsub_sync_filesystem(h_sb);
 +		if (unlikely(e && !err))
 +			err = e;
 +		/* go on even if an error happens */
@@ -32060,7 +32061,7 @@ index 000000000000..a3f709ee7475
 +}
 diff --git a/fs/aufs/vfsub.c b/fs/aufs/vfsub.c
 new file mode 100644
-index 000000000000..f6e27fbcf584
+index 000000000000..955c8cea646b
 --- /dev/null
 +++ b/fs/aufs/vfsub.c
 @@ -0,0 +1,919 @@
@@ -32103,13 +32104,13 @@ index 000000000000..f6e27fbcf584
 +}
 +#endif
 +
-+int vfsub_sync_filesystem(struct super_block *h_sb, int wait)
++int vfsub_sync_filesystem(struct super_block *h_sb)
 +{
 +	int err;
 +
 +	lockdep_off();
 +	down_read(&h_sb->s_umount);
-+	err = __sync_filesystem(h_sb, wait);
++	err = sync_filesystem(h_sb);
 +	up_read(&h_sb->s_umount);
 +	lockdep_on();
 +
@@ -32985,7 +32986,7 @@ index 000000000000..f6e27fbcf584
 +}
 diff --git a/fs/aufs/vfsub.h b/fs/aufs/vfsub.h
 new file mode 100644
-index 000000000000..0c76f421b95f
+index 000000000000..dca38d84b626
 --- /dev/null
 +++ b/fs/aufs/vfsub.h
 @@ -0,0 +1,358 @@
@@ -33075,7 +33076,7 @@ index 000000000000..0c76f421b95f
 +AuStubInt0(vfsub_test_mntns, struct vfsmount *mnt, struct super_block *h_sb);
 +#endif
 +
-+int vfsub_sync_filesystem(struct super_block *h_sb, int wait);
++int vfsub_sync_filesystem(struct super_block *h_sb);
 +
 +/* ---------------------------------------------------------------------- */
 +
@@ -38135,7 +38136,7 @@ index 000000000000..e6683663885b
 +}
 diff --git a/include/uapi/linux/aufs_type.h b/include/uapi/linux/aufs_type.h
 new file mode 100644
-index 000000000000..5219a4d117b3
+index 000000000000..d4c1fabad7a7
 --- /dev/null
 +++ b/include/uapi/linux/aufs_type.h
 @@ -0,0 +1,452 @@
@@ -38181,7 +38182,7 @@ index 000000000000..5219a4d117b3
 +#include <limits.h>
 +#endif /* __KERNEL__ */
 +
-+#define AUFS_VERSION	"5.15.5-20211129"
++#define AUFS_VERSION	"5.16-20220117"
 +
 +/* todo? move this to linux-2.6.19/include/magic.h */
 +#define AUFS_SUPER_MAGIC	('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
diff --git a/patches/bcfserial/0001-merge-bcfserial-https-github.com-statropy-bcfserial.patch b/patches/bcfserial/0001-merge-bcfserial-https-github.com-statropy-bcfserial.patch
index 875a31c98..c99bfb8d2 100644
--- a/patches/bcfserial/0001-merge-bcfserial-https-github.com-statropy-bcfserial.patch
+++ b/patches/bcfserial/0001-merge-bcfserial-https-github.com-statropy-bcfserial.patch
@@ -1,6 +1,6 @@
-From e4502c2b1db738244393c92dcede021a504bdac1 Mon Sep 17 00:00:00 2001
+From 48ea340f91a8c31ae5601a1e31d0ed8c2f711084 Mon Sep 17 00:00:00 2001
 From: Robert Nelson <robertcnelson@gmail.com>
-Date: Mon, 10 Jan 2022 15:16:45 -0600
+Date: Tue, 25 Jan 2022 09:30:40 -0600
 Subject: [PATCH] merge: bcfserial: https://github.com/statropy/bcfserial
 
 https://github.com/statropy/bcfserial/commit/aded88429a8a00143596b41f4c1f50d9ae3d4069
diff --git a/patches/defconfig b/patches/defconfig
index 6eed1a195..716042520 100644
--- a/patches/defconfig
+++ b/patches/defconfig
@@ -1,6 +1,6 @@
 #
 # Automatically generated file; DO NOT EDIT.
-# Linux/arm 5.16.0 Kernel Configuration
+# Linux/arm 5.16.2 Kernel Configuration
 #
 CONFIG_CC_VERSION_TEXT="arm-linux-gnueabi-gcc (GCC) 11.1.0"
 CONFIG_CC_IS_GCC=y
@@ -7163,6 +7163,23 @@ CONFIG_ROMFS_ON_MTD=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
 # CONFIG_EROFS_FS is not set
+CONFIG_AUFS_FS=m
+CONFIG_AUFS_BRANCH_MAX_127=y
+# CONFIG_AUFS_BRANCH_MAX_511 is not set
+# CONFIG_AUFS_BRANCH_MAX_1023 is not set
+# CONFIG_AUFS_BRANCH_MAX_32767 is not set
+CONFIG_AUFS_SBILIST=y
+# CONFIG_AUFS_HNOTIFY is not set
+CONFIG_AUFS_EXPORT=y
+CONFIG_AUFS_XATTR=y
+# CONFIG_AUFS_FHSM is not set
+# CONFIG_AUFS_RDU is not set
+# CONFIG_AUFS_DIRREN is not set
+# CONFIG_AUFS_SHWH is not set
+# CONFIG_AUFS_BR_RAMFS is not set
+# CONFIG_AUFS_BR_FUSE is not set
+CONFIG_AUFS_BDEV_LOOP=y
+# CONFIG_AUFS_DEBUG is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V2=y
diff --git a/patches/git/AUFS b/patches/git/AUFS
index f4a5297b6..3f4560b27 100644
--- a/patches/git/AUFS
+++ b/patches/git/AUFS
@@ -1 +1 @@
-AUFS: https://github.com/sfjro/aufs5-standalone/commit/03a3ee3199ae847b9e5ac7596900c64fc17cfd5a
+AUFS: https://github.com/sfjro/aufs5-standalone/commit/41291d0ae20e0a803a7d9c6cccd1bb3525f6439b
diff --git a/patches/ref_omap2plus_defconfig b/patches/ref_omap2plus_defconfig
index 8a2149ab2..cf18ac017 100644
--- a/patches/ref_omap2plus_defconfig
+++ b/patches/ref_omap2plus_defconfig
@@ -1,6 +1,6 @@
 #
 # Automatically generated file; DO NOT EDIT.
-# Linux/arm 5.16.0 Kernel Configuration
+# Linux/arm 5.16.2 Kernel Configuration
 #
 CONFIG_CC_VERSION_TEXT="arm-linux-gnueabi-gcc (GCC) 11.1.0"
 CONFIG_CC_IS_GCC=y
@@ -6565,6 +6565,7 @@ CONFIG_CRAMFS_BLOCKDEV=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
 # CONFIG_EROFS_FS is not set
+# CONFIG_AUFS_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V2=y
diff --git a/patches/rt/0001-merge-CONFIG_PREEMPT_RT-Patch-Set.patch b/patches/rt/0001-merge-CONFIG_PREEMPT_RT-Patch-Set.patch
index fa54e4183..e30ec554b 100644
--- a/patches/rt/0001-merge-CONFIG_PREEMPT_RT-Patch-Set.patch
+++ b/patches/rt/0001-merge-CONFIG_PREEMPT_RT-Patch-Set.patch
@@ -1,285 +1,216 @@
-From ad6fffd861ddb8f59a92bef3121b4aacc3363656 Mon Sep 17 00:00:00 2001
+From 612e116498198779d63b7bdbb1d7f94c8aae9b1b Mon Sep 17 00:00:00 2001
 From: Robert Nelson <robertcnelson@gmail.com>
-Date: Fri, 31 Dec 2021 10:15:06 -0600
+Date: Tue, 25 Jan 2022 09:30:58 -0600
 Subject: [PATCH] merge: CONFIG_PREEMPT_RT Patch Set
 
-patch-5.15.10-rt24.patch.xz
+patch-5.16.2-rt19.patch.xz
 
 Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
 ---
- Documentation/dev-tools/kcov.rst              |   5 +
- arch/alpha/include/asm/spinlock_types.h       |   2 +-
- arch/arm/Kconfig                              |   5 +-
- arch/arm/include/asm/spinlock_types.h         |   2 +-
- arch/arm/include/asm/thread_info.h            |   6 +-
- arch/arm/kernel/asm-offsets.c                 |   1 +
- arch/arm/kernel/entry-armv.S                  |  19 +-
- arch/arm/kernel/signal.c                      |   3 +-
- arch/arm/kernel/smp.c                         |   2 -
- arch/arm/mm/fault.c                           |   6 +
- arch/arm64/Kconfig                            |   3 +
- arch/arm64/include/asm/pgtable.h              |   2 +-
- arch/arm64/include/asm/preempt.h              |  25 +-
- arch/arm64/include/asm/signal.h               |   4 +
- arch/arm64/include/asm/spinlock_types.h       |   2 +-
- arch/arm64/include/asm/thread_info.h          |   8 +-
- arch/arm64/kernel/asm-offsets.c               |   1 +
- arch/arm64/kernel/fpsimd.c                    |  23 +-
- arch/arm64/kernel/signal.c                    |  10 +-
- arch/arm64/kvm/arm.c                          |   6 +-
- arch/csky/include/asm/spinlock_types.h        |   2 +-
- arch/hexagon/include/asm/spinlock_types.h     |   2 +-
- arch/ia64/include/asm/spinlock_types.h        |   2 +-
- arch/powerpc/Kconfig                          |   3 +
- .../include/asm/simple_spinlock_types.h       |   2 +-
- arch/powerpc/include/asm/smp.h                |   1 +
- arch/powerpc/include/asm/spinlock_types.h     |   2 +-
- arch/powerpc/include/asm/stackprotector.h     |   4 +
- arch/powerpc/include/asm/thread_info.h        |   7 +
- arch/powerpc/kernel/interrupt.c               |   8 +-
- arch/powerpc/kernel/irq.c                     |   4 +
- arch/powerpc/kernel/kgdb.c                    |  10 +-
- arch/powerpc/kernel/smp.c                     |   5 +
- arch/powerpc/kernel/traps.c                   |   7 +-
- arch/powerpc/kexec/crash.c                    |   3 -
- arch/powerpc/kvm/Kconfig                      |   1 +
- arch/powerpc/platforms/pseries/iommu.c        |  31 +-
- arch/riscv/include/asm/spinlock_types.h       |   2 +-
- arch/s390/include/asm/spinlock_types.h        |   2 +-
- arch/sh/include/asm/spinlock_types.h          |   2 +-
- arch/sh/kernel/irq.c                          |   2 +
- arch/sparc/kernel/irq_64.c                    |   2 +
- arch/x86/Kconfig                              |   2 +
- arch/x86/include/asm/irq_stack.h              |   3 +
- arch/x86/include/asm/preempt.h                |  33 +-
- arch/x86/include/asm/signal.h                 |  13 +
- arch/x86/include/asm/stackprotector.h         |   8 +-
- arch/x86/include/asm/thread_info.h            |   5 +
- arch/x86/kernel/cpu/mshyperv.c                |   3 +-
- arch/x86/kernel/dumpstack_32.c                |   2 +-
- arch/x86/kernel/dumpstack_64.c                |   3 +-
- arch/x86/kernel/i8259.c                       |   3 +-
- arch/x86/kernel/irq_32.c                      |   2 +
- arch/x86/kernel/kgdb.c                        |   9 +-
- arch/x86/kernel/unwind_frame.c                |  16 +-
- arch/x86/kernel/unwind_orc.c                  |   2 +-
- arch/x86/kvm/x86.c                            |   8 +
- arch/xtensa/include/asm/spinlock_types.h      |   2 +-
- block/blk-mq.c                                |   6 +-
- crypto/cryptd.c                               |  19 +-
- crypto/testmgr.c                              |   4 +-
- drivers/block/zram/zram_drv.c                 |  36 +
- drivers/block/zram/zram_drv.h                 |   1 +
- drivers/char/random.c                         |  16 +-
- drivers/char/tpm/tpm_tis.c                    |  29 +-
- drivers/firmware/efi/efi.c                    |   5 +-
- drivers/gpu/drm/i915/display/intel_crtc.c     |  15 +-
- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c   |   5 +-
- drivers/gpu/drm/i915/gt/intel_context.h       |   3 +-
- drivers/gpu/drm/i915/gt/intel_context_types.h |   1 +
- drivers/gpu/drm/i915/gt/intel_engine_pm.c     |  38 +-
- .../drm/i915/gt/intel_execlists_submission.c  |  17 +-
- drivers/gpu/drm/i915/i915_irq.c               |   6 +-
- drivers/gpu/drm/i915/i915_request.c           |   2 -
- drivers/gpu/drm/i915/i915_request.h           |   3 +-
- drivers/gpu/drm/i915/i915_trace.h             |   6 +-
- drivers/gpu/drm/i915/i915_utils.h             |   2 +-
- drivers/hv/hyperv_vmbus.h                     |   1 +
- drivers/hv/vmbus_drv.c                        |   5 +-
- drivers/leds/trigger/Kconfig                  |   1 +
- drivers/md/raid5.c                            |   7 +-
- drivers/md/raid5.h                            |   1 +
- .../net/ethernet/netronome/nfp/abm/qdisc.c    |   2 +-
- drivers/scsi/fcoe/fcoe.c                      |  16 +-
- drivers/scsi/fcoe/fcoe_ctlr.c                 |   4 +-
- drivers/scsi/libfc/fc_exch.c                  |   4 +-
- drivers/tty/serial/8250/8250.h                |  47 +-
- drivers/tty/serial/8250/8250_core.c           |  17 +-
- drivers/tty/serial/8250/8250_fsl.c            |   9 +
- drivers/tty/serial/8250/8250_ingenic.c        |   7 +
- drivers/tty/serial/8250/8250_mtk.c            |  29 +-
- drivers/tty/serial/8250/8250_port.c           |  92 +-
- drivers/tty/serial/amba-pl011.c               |  17 +-
- drivers/tty/serial/omap-serial.c              |  12 +-
- drivers/virt/acrn/irqfd.c                     |   1 -
- fs/afs/dir_silly.c                            |   2 +-
- fs/cifs/readdir.c                             |   2 +-
- fs/dcache.c                                   |  37 +-
- fs/fscache/internal.h                         |   1 -
- fs/fscache/main.c                             |   6 -
- fs/fscache/object.c                           |  13 +-
- fs/fuse/readdir.c                             |   2 +-
- fs/namei.c                                    |   4 +-
- fs/namespace.c                                |  20 +-
- fs/nfs/dir.c                                  |   4 +-
- fs/nfs/unlink.c                               |   4 +-
- fs/proc/base.c                                |   3 +-
- fs/proc/proc_sysctl.c                         |   2 +-
- include/asm-generic/softirq_stack.h           |   2 +-
- include/linux/console.h                       |  19 +
- include/linux/dcache.h                        |   4 +-
- include/linux/entry-common.h                  |   8 +-
- include/linux/irq_work.h                      |   8 +
- include/linux/irqdesc.h                       |   1 +
- include/linux/irqflags.h                      |  23 +-
- include/linux/kernel.h                        |  13 +-
- include/linux/kgdb.h                          |   3 +
- include/linux/mm_types.h                      |   4 +
- include/linux/netdevice.h                     |   4 -
- include/linux/nfs_xdr.h                       |   2 +-
- include/linux/preempt.h                       |  68 +-
- include/linux/printk.h                        |  88 +-
- include/linux/random.h                        |   2 +-
- include/linux/ratelimit_types.h               |   2 +-
- include/linux/rcupdate.h                      |   7 +
- include/linux/rtmutex.h                       |   9 +
- include/linux/sched.h                         | 169 ++-
- include/linux/sched/mm.h                      |  20 +
- include/linux/serial_8250.h                   |   5 +
- include/linux/skbuff.h                        |   7 +
- include/linux/smp.h                           |   3 +
- include/linux/spinlock_types_up.h             |   2 +-
- include/linux/suspend.h                       |  10 +-
- include/linux/thread_info.h                   |  12 +-
- include/linux/trace_events.h                  |   5 +-
- include/linux/u64_stats_sync.h                |  52 +-
- include/net/act_api.h                         |  10 +-
- include/net/gen_stats.h                       |  59 +-
- include/net/netfilter/xt_rateest.h            |   2 +-
- include/net/pkt_cls.h                         |   4 +-
- include/net/sch_generic.h                     |  78 +-
- init/Kconfig                                  |   5 +-
- init/main.c                                   |   1 +
- kernel/Kconfig.preempt                        |   6 +
- kernel/cgroup/rstat.c                         |   5 +-
- kernel/debug/debug_core.c                     |  45 +-
- kernel/debug/kdb/kdb_io.c                     |  18 +-
- kernel/entry/common.c                         |  12 +-
- kernel/exit.c                                 |   7 +
- kernel/fork.c                                 |  18 +-
- kernel/irq/handle.c                           |  10 +-
- kernel/irq/manage.c                           |  12 +-
- kernel/irq/spurious.c                         |   8 +
- kernel/irq_work.c                             | 130 ++-
- kernel/kcov.c                                 |  36 +-
- kernel/kprobes.c                              |   8 +-
- kernel/ksysfs.c                               |  12 +
- kernel/kthread.c                              |  16 +-
- kernel/locking/lockdep.c                      |   2 +
- kernel/locking/rtmutex.c                      |  20 +-
- kernel/locking/rtmutex_api.c                  |  30 +-
- kernel/locking/spinlock_rt.c                  |  23 +-
- kernel/panic.c                                |  30 +-
- kernel/power/main.c                           |  10 +-
- kernel/printk/Makefile                        |   1 -
- kernel/printk/internal.h                      |  36 -
- kernel/printk/printk.c                        | 969 +++++++++---------
- kernel/printk/printk_safe.c                   |  52 -
- kernel/ptrace.c                               |  38 +-
- kernel/rcu/tasks.h                            |   9 +-
- kernel/rcu/tree.c                             |   7 +-
- kernel/sched/core.c                           | 180 +++-
- kernel/sched/deadline.c                       |   2 +-
- kernel/sched/fair.c                           |  21 +-
- kernel/sched/features.h                       |   8 +
- kernel/sched/psi.c                            |  14 +-
- kernel/sched/rt.c                             |   2 +-
- kernel/sched/sched.h                          |   9 +
- kernel/sched/swait.c                          |   1 +
- kernel/sched/topology.c                       |   2 +-
- kernel/signal.c                               |  36 +-
- kernel/smp.c                                  |  14 +-
- kernel/time/clockevents.c                     |   9 +-
- kernel/time/ntp.c                             |  14 +-
- kernel/time/timekeeping.c                     |  30 +-
- kernel/time/timekeeping_debug.c               |   2 +-
- kernel/trace/trace.c                          |  46 +-
- kernel/trace/trace_events.c                   |   1 +
- kernel/trace/trace_output.c                   |  14 +-
- kernel/workqueue.c                            |   4 -
- lib/bug.c                                     |   1 +
- lib/dump_stack.c                              |   4 +-
- lib/irq_poll.c                                |   2 +
- lib/locking-selftest.c                        | 170 ++-
- lib/nmi_backtrace.c                           |   4 +-
- lib/ratelimit.c                               |   4 +-
- lib/scatterlist.c                             |  11 +-
- mm/Kconfig                                    |   2 +-
- mm/memory.c                                   |   2 +-
- mm/page_alloc.c                               |   4 +-
- mm/vmalloc.c                                  |  10 +-
- mm/workingset.c                               |   5 +-
- mm/zsmalloc.c                                 |  84 +-
- net/Kconfig                                   |   2 +-
- net/core/dev.c                                |  33 +-
- net/core/gen_estimator.c                      |  52 +-
- net/core/gen_stats.c                          | 186 ++--
- net/ipv4/inet_hashtables.c                    |  19 +-
- net/ipv6/inet6_hashtables.c                   |   5 +-
- net/netfilter/xt_RATEEST.c                    |   7 +-
- net/sched/act_api.c                           |  21 +-
- net/sched/act_bpf.c                           |   2 +-
- net/sched/act_ife.c                           |   4 +-
- net/sched/act_mpls.c                          |   2 +-
- net/sched/act_police.c                        |   4 +-
- net/sched/act_sample.c                        |   2 +-
- net/sched/act_simple.c                        |   3 +-
- net/sched/act_skbedit.c                       |   2 +-
- net/sched/act_skbmod.c                        |   2 +-
- net/sched/sch_api.c                           |  18 +-
- net/sched/sch_atm.c                           |   6 +-
- net/sched/sch_cbq.c                           |  15 +-
- net/sched/sch_drr.c                           |  13 +-
- net/sched/sch_ets.c                           |  17 +-
- net/sched/sch_generic.c                       |  13 +-
- net/sched/sch_gred.c                          |  65 +-
- net/sched/sch_hfsc.c                          |  11 +-
- net/sched/sch_htb.c                           |  43 +-
- net/sched/sch_mq.c                            |  30 +-
- net/sched/sch_mqprio.c                        |  63 +-
- net/sched/sch_multiq.c                        |   3 +-
- net/sched/sch_prio.c                          |   4 +-
- net/sched/sch_qfq.c                           |  13 +-
- net/sched/sch_taprio.c                        |   2 +-
- net/sunrpc/svc_xprt.c                         |   4 +-
- samples/kfifo/bytestream-example.c            |  12 +-
- samples/kfifo/inttype-example.c               |  12 +-
- samples/kfifo/record-example.c                |  12 +-
- security/smack/smack_lsm.c                    |   9 +-
- sound/soc/mediatek/common/mtk-afe-fe-dai.c    |   1 -
- 240 files changed, 2915 insertions(+), 1729 deletions(-)
- delete mode 100644 kernel/printk/printk_safe.c
+ .../admin-guide/cgroup-v1/memory.rst          |    2 +
+ arch/alpha/include/asm/spinlock_types.h       |    2 +-
+ arch/arm/Kconfig                              |    5 +-
+ arch/arm/include/asm/spinlock_types.h         |    2 +-
+ arch/arm/include/asm/thread_info.h            |    6 +-
+ arch/arm/kernel/asm-offsets.c                 |    1 +
+ arch/arm/kernel/entry-armv.S                  |   19 +-
+ arch/arm/kernel/signal.c                      |    3 +-
+ arch/arm/mm/fault.c                           |    6 +
+ arch/arm64/Kconfig                            |    2 +
+ arch/arm64/include/asm/pgtable.h              |    2 +-
+ arch/arm64/include/asm/preempt.h              |   25 +-
+ arch/arm64/include/asm/signal.h               |    4 +
+ arch/arm64/include/asm/spinlock_types.h       |    2 +-
+ arch/arm64/include/asm/thread_info.h          |    8 +-
+ arch/arm64/kernel/asm-offsets.c               |    1 +
+ arch/arm64/kernel/fpsimd.c                    |   25 +-
+ arch/arm64/kernel/signal.c                    |   10 +-
+ arch/arm64/kvm/arm.c                          |    6 +-
+ arch/csky/include/asm/spinlock_types.h        |    2 +-
+ arch/hexagon/include/asm/spinlock_types.h     |    2 +-
+ arch/ia64/include/asm/spinlock_types.h        |    2 +-
+ arch/ia64/include/asm/thread_info.h           |    6 +-
+ arch/powerpc/Kconfig                          |    3 +
+ .../include/asm/simple_spinlock_types.h       |    2 +-
+ arch/powerpc/include/asm/spinlock_types.h     |    2 +-
+ arch/powerpc/include/asm/stackprotector.h     |    4 +
+ arch/powerpc/include/asm/thread_info.h        |    8 +
+ arch/powerpc/kernel/interrupt.c               |    8 +-
+ arch/powerpc/kernel/irq.c                     |    4 +
+ arch/powerpc/kernel/traps.c                   |    7 +-
+ arch/powerpc/kvm/Kconfig                      |    1 +
+ arch/powerpc/platforms/pseries/iommu.c        |   31 +-
+ arch/riscv/include/asm/spinlock_types.h       |    2 +-
+ arch/s390/include/asm/spinlock_types.h        |    2 +-
+ arch/sh/include/asm/spinlock_types.h          |    2 +-
+ arch/sh/kernel/irq.c                          |    2 +
+ arch/sparc/kernel/irq_64.c                    |    2 +
+ arch/x86/Kconfig                              |    2 +
+ arch/x86/include/asm/pgtable.h                |    1 +
+ arch/x86/include/asm/preempt.h                |   33 +-
+ arch/x86/include/asm/signal.h                 |   13 +
+ arch/x86/include/asm/thread_info.h            |    5 +
+ arch/x86/kernel/cpu/mshyperv.c                |    2 +-
+ arch/x86/kvm/x86.c                            |    6 +
+ arch/xtensa/include/asm/spinlock_types.h      |    2 +-
+ block/blk-mq.c                                |    6 +-
+ crypto/cryptd.c                               |   19 +-
+ drivers/block/zram/zram_drv.c                 |   36 +
+ drivers/block/zram/zram_drv.h                 |    1 +
+ drivers/char/random.c                         |   91 +-
+ drivers/char/tpm/tpm_tis.c                    |   29 +-
+ drivers/gpu/drm/i915/display/intel_crtc.c     |   15 +-
+ drivers/gpu/drm/i915/gt/intel_breadcrumbs.c   |    5 +-
+ drivers/gpu/drm/i915/gt/intel_context.h       |    3 +-
+ drivers/gpu/drm/i915/gt/intel_context_types.h |    1 +
+ drivers/gpu/drm/i915/gt/intel_engine_pm.c     |   38 +-
+ .../drm/i915/gt/intel_execlists_submission.c  |   17 +-
+ drivers/gpu/drm/i915/i915_irq.c               |    6 +-
+ drivers/gpu/drm/i915/i915_request.c           |    2 -
+ drivers/gpu/drm/i915/i915_request.h           |    3 +-
+ drivers/gpu/drm/i915/i915_trace.h             |    6 +-
+ drivers/gpu/drm/i915/i915_utils.h             |    2 +-
+ drivers/hv/vmbus_drv.c                        |    2 +-
+ drivers/i2c/busses/i2c-cht-wc.c               |   11 +-
+ drivers/i2c/i2c-core-base.c                   |    2 +-
+ drivers/md/raid5.c                            |    7 +-
+ drivers/md/raid5.h                            |    1 +
+ drivers/mfd/ezx-pcap.c                        |    4 +-
+ drivers/misc/hi6421v600-irq.c                 |    6 +-
+ drivers/mmc/core/block.c                      |   22 +-
+ drivers/net/usb/lan78xx.c                     |    7 +-
+ drivers/scsi/fcoe/fcoe.c                      |   16 +-
+ drivers/scsi/fcoe/fcoe_ctlr.c                 |    4 +-
+ drivers/scsi/libfc/fc_exch.c                  |    4 +-
+ drivers/staging/greybus/gpio.c                |    5 +-
+ drivers/tty/serial/8250/8250.h                |   47 +-
+ drivers/tty/serial/8250/8250_core.c           |   17 +-
+ drivers/tty/serial/8250/8250_fsl.c            |    9 +
+ drivers/tty/serial/8250/8250_ingenic.c        |    7 +
+ drivers/tty/serial/8250/8250_mtk.c            |   29 +-
+ drivers/tty/serial/8250/8250_port.c           |   92 +-
+ drivers/tty/serial/8250/Kconfig               |    1 +
+ drivers/tty/serial/amba-pl011.c               |   17 +-
+ drivers/tty/serial/omap-serial.c              |   12 +-
+ drivers/tty/vt/vt.c                           |    2 +-
+ drivers/virt/acrn/irqfd.c                     |    1 -
+ fs/afs/dir_silly.c                            |    2 +-
+ fs/cifs/readdir.c                             |    2 +-
+ fs/dcache.c                                   |   37 +-
+ fs/fscache/internal.h                         |    1 -
+ fs/fscache/main.c                             |    6 -
+ fs/fscache/object.c                           |   13 +-
+ fs/fuse/readdir.c                             |    2 +-
+ fs/namei.c                                    |    4 +-
+ fs/namespace.c                                |   20 +-
+ fs/nfs/dir.c                                  |    4 +-
+ fs/nfs/unlink.c                               |    4 +-
+ fs/proc/base.c                                |    3 +-
+ fs/proc/proc_sysctl.c                         |    2 +-
+ include/asm-generic/softirq_stack.h           |    2 +-
+ include/linux/blk-mq.h                        |   11 +
+ include/linux/console.h                       |   36 +-
+ include/linux/dcache.h                        |    4 +-
+ include/linux/entry-common.h                  |    8 +-
+ include/linux/interrupt.h                     |   16 +
+ include/linux/irqdesc.h                       |    1 +
+ include/linux/irqflags.h                      |   23 +-
+ include/linux/local_lock_internal.h           |    6 +-
+ include/linux/netdevice.h                     |   13 +-
+ include/linux/nfs_xdr.h                       |    2 +-
+ include/linux/preempt.h                       |   63 +-
+ include/linux/printk.h                        |   59 +-
+ include/linux/random.h                        |    3 +-
+ include/linux/ratelimit_types.h               |    2 +-
+ include/linux/rcupdate.h                      |    7 +
+ include/linux/rtmutex.h                       |    9 +
+ include/linux/rwlock.h                        |    6 +
+ include/linux/rwlock_api_smp.h                |    8 +
+ include/linux/rwlock_rt.h                     |   10 +
+ include/linux/sched.h                         |  130 +-
+ include/linux/sched/task_stack.h              |   10 +
+ include/linux/serial_8250.h                   |    5 +
+ include/linux/smp.h                           |    3 +
+ include/linux/spinlock_api_up.h               |    1 +
+ include/linux/spinlock_types_up.h             |    2 +-
+ include/linux/thread_info.h                   |   12 +-
+ include/linux/trace_events.h                  |    5 +-
+ include/linux/u64_stats_sync.h                |   42 +-
+ include/trace/events/net.h                    |   14 -
+ init/Kconfig                                  |    4 +
+ init/main.c                                   |    1 +
+ kernel/Kconfig.preempt                        |    6 +
+ kernel/cgroup/rstat.c                         |    5 +-
+ kernel/entry/common.c                         |   12 +-
+ kernel/exit.c                                 |    2 +
+ kernel/fork.c                                 |  262 ++--
+ kernel/irq/chip.c                             |    4 +-
+ kernel/irq/handle.c                           |   11 +-
+ kernel/irq/internals.h                        |    2 +-
+ kernel/irq/irqdesc.c                          |   21 +
+ kernel/irq/manage.c                           |    3 +
+ kernel/ksysfs.c                               |   12 +
+ kernel/locking/lockdep.c                      |    2 +
+ kernel/locking/rtmutex.c                      |    5 +-
+ kernel/locking/rtmutex_api.c                  |   30 +-
+ kernel/locking/spinlock.c                     |   10 +
+ kernel/locking/spinlock_rt.c                  |   18 +-
+ kernel/locking/ww_rt_mutex.c                  |    2 +-
+ kernel/panic.c                                |   25 +-
+ kernel/printk/printk.c                        | 1088 +++++++++++----
+ kernel/ptrace.c                               |   38 +-
+ kernel/rcu/tasks.h                            |    9 +-
+ kernel/rcu/tree.c                             |    7 +-
+ kernel/sched/core.c                           |   93 +-
+ kernel/sched/fair.c                           |   16 +-
+ kernel/sched/features.h                       |    3 +
+ kernel/sched/sched.h                          |    9 +
+ kernel/sched/swait.c                          |    1 +
+ kernel/signal.c                               |   36 +-
+ kernel/smp.c                                  |   14 +-
+ kernel/softirq.c                              |   76 +
+ kernel/time/hrtimer.c                         |    4 +-
+ kernel/time/timer.c                           |    2 +-
+ kernel/trace/trace.c                          |   46 +-
+ kernel/trace/trace_events.c                   |    1 +
+ kernel/trace/trace_output.c                   |   14 +-
+ lib/dump_stack.c                              |    4 +-
+ lib/irq_poll.c                                |    2 +
+ lib/locking-selftest.c                        |  172 ++-
+ lib/nmi_backtrace.c                           |    4 +-
+ mm/memcontrol.c                               | 1241 +++++++++--------
+ mm/vmalloc.c                                  |   10 +-
+ mm/workingset.c                               |    5 +-
+ mm/zsmalloc.c                                 |  529 +++----
+ net/core/dev.c                                |  139 +-
+ net/core/link_watch.c                         |    4 +-
+ net/core/rtnetlink.c                          |    8 +-
+ net/hsr/hsr_device.c                          |    6 +-
+ net/ipv4/inet_hashtables.c                    |   53 +-
+ net/ipv6/inet6_hashtables.c                   |    5 +-
+ net/sunrpc/svc_xprt.c                         |    4 +-
+ 182 files changed, 3652 insertions(+), 1933 deletions(-)
 
-diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst
-index d2c4c27e1702..d83c9ab49427 100644
---- a/Documentation/dev-tools/kcov.rst
-+++ b/Documentation/dev-tools/kcov.rst
-@@ -50,6 +50,7 @@ program using kcov:
-     #include <sys/mman.h>
-     #include <unistd.h>
-     #include <fcntl.h>
-+    #include <linux/types.h>
- 
-     #define KCOV_INIT_TRACE			_IOR('c', 1, unsigned long)
-     #define KCOV_ENABLE			_IO('c', 100)
-@@ -177,6 +178,8 @@ Comparison operands collection is similar to coverage collection:
- 	/* Read number of comparisons collected. */
- 	n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
- 	for (i = 0; i < n; i++) {
-+		uint64_t ip;
-+
- 		type = cover[i * KCOV_WORDS_PER_CMP + 1];
- 		/* arg1 and arg2 - operands of the comparison. */
- 		arg1 = cover[i * KCOV_WORDS_PER_CMP + 2];
-@@ -251,6 +254,8 @@ selectively from different subsystems.
- 
- .. code-block:: c
- 
-+    /* Same includes and defines as above. */
-+
-     struct kcov_remote_arg {
- 	__u32		trace_mode;
- 	__u32		area_size;
+diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
+index faac50149a22..2cc502a75ef6 100644
+--- a/Documentation/admin-guide/cgroup-v1/memory.rst
++++ b/Documentation/admin-guide/cgroup-v1/memory.rst
+@@ -64,6 +64,7 @@ Brief summary of control files.
+ 				     threads
+  cgroup.procs			     show list of processes
+  cgroup.event_control		     an interface for event_fd()
++				     This knob is not available on CONFIG_PREEMPT_RT systems.
+  memory.usage_in_bytes		     show current usage for memory
+ 				     (See 5.5 for details)
+  memory.memsw.usage_in_bytes	     show current usage for memory+Swap
+@@ -75,6 +76,7 @@ Brief summary of control files.
+  memory.max_usage_in_bytes	     show max memory usage recorded
+  memory.memsw.max_usage_in_bytes     show max memory+Swap usage recorded
+  memory.soft_limit_in_bytes	     set/show soft limit of memory usage
++				     This knob is not available on CONFIG_PREEMPT_RT systems.
+  memory.stat			     show various statistics
+  memory.use_hierarchy		     set/show hierarchical account enabled
+                                      This knob is deprecated and shouldn't be
 diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h
 index 1d5716bc060b..2526fd3be5fd 100644
 --- a/arch/alpha/include/asm/spinlock_types.h
@@ -294,10 +225,10 @@ index 1d5716bc060b..2526fd3be5fd 100644
  #endif
  
 diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
-index 4ebd512043be..5ac2009727bd 100644
+index c2724d986fa0..7496417526be 100644
 --- a/arch/arm/Kconfig
 +++ b/arch/arm/Kconfig
-@@ -32,6 +32,7 @@ config ARM
+@@ -33,6 +33,7 @@ config ARM
  	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
  	select ARCH_SUPPORTS_ATOMIC_RMW
  	select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
@@ -329,7 +260,7 @@ index 4ebd512043be..5ac2009727bd 100644
 +	select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
  	select RTC_LIB
  	select SYS_SUPPORTS_APM_EMULATION
- 	select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
+ 	select THREAD_INFO_IN_TASK if CURRENT_POINTER_IN_TPIDRURO
 diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
 index 5976958647fe..0c14b36ef101 100644
 --- a/arch/arm/include/asm/spinlock_types.h
@@ -344,18 +275,18 @@ index 5976958647fe..0c14b36ef101 100644
  #endif
  
 diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
-index 9a18da3e10cc..2fa63d96a4f0 100644
+index 164e15f26485..666da94ed9b7 100644
 --- a/arch/arm/include/asm/thread_info.h
 +++ b/arch/arm/include/asm/thread_info.h
-@@ -52,6 +52,7 @@ struct cpu_context_save {
+@@ -54,6 +54,7 @@ struct cpu_context_save {
  struct thread_info {
  	unsigned long		flags;		/* low level flags */
  	int			preempt_count;	/* 0 => preemptable, <0 => bug */
 +	int			preempt_lazy_count; /* 0 => preemptable, <0 => bug */
+ #ifndef CONFIG_THREAD_INFO_IN_TASK
  	struct task_struct	*task;		/* main task structure */
- 	__u32			cpu;		/* cpu */
- 	__u32			cpu_domain;	/* cpu domain */
-@@ -134,6 +135,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
+ #endif
+@@ -152,6 +153,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
  #define TIF_SYSCALL_TRACEPOINT	6	/* syscall tracepoint instrumentation */
  #define TIF_SECCOMP		7	/* seccomp syscall filtering active */
  #define TIF_NOTIFY_SIGNAL	8	/* signal notifications exist */
@@ -363,7 +294,7 @@ index 9a18da3e10cc..2fa63d96a4f0 100644
  
  #define TIF_USING_IWMMXT	17
  #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
-@@ -148,6 +150,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
+@@ -166,6 +168,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
  #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
  #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
  #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
@@ -371,7 +302,7 @@ index 9a18da3e10cc..2fa63d96a4f0 100644
  #define _TIF_USING_IWMMXT	(1 << TIF_USING_IWMMXT)
  
  /* Checks for any syscall work in entry-common.S */
-@@ -157,7 +160,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
+@@ -175,7 +178,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
  /*
   * Change these and you break ASM code in entry-common.S
   */
@@ -382,7 +313,7 @@ index 9a18da3e10cc..2fa63d96a4f0 100644
  				 _TIF_NOTIFY_SIGNAL)
  
 diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
-index a646a3f6440f..beb09d74684f 100644
+index 645845e4982a..73e321c6d152 100644
 --- a/arch/arm/kernel/asm-offsets.c
 +++ b/arch/arm/kernel/asm-offsets.c
 @@ -43,6 +43,7 @@ int main(void)
@@ -390,14 +321,14 @@ index a646a3f6440f..beb09d74684f 100644
    DEFINE(TI_FLAGS,		offsetof(struct thread_info, flags));
    DEFINE(TI_PREEMPT,		offsetof(struct thread_info, preempt_count));
 +  DEFINE(TI_PREEMPT_LAZY,	offsetof(struct thread_info, preempt_lazy_count));
+ #ifndef CONFIG_THREAD_INFO_IN_TASK
    DEFINE(TI_TASK,		offsetof(struct thread_info, task));
-   DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
-   DEFINE(TI_CPU_DOMAIN,		offsetof(struct thread_info, cpu_domain));
+ #endif
 diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
-index 241b73d64df7..f3a9dd2e98c6 100644
+index 5cd057859fe9..4db90d80d175 100644
 --- a/arch/arm/kernel/entry-armv.S
 +++ b/arch/arm/kernel/entry-armv.S
-@@ -206,11 +206,18 @@ __irq_svc:
+@@ -203,11 +203,18 @@ __irq_svc:
  
  #ifdef CONFIG_PREEMPTION
  	ldr	r8, [tsk, #TI_PREEMPT]		@ get preempt count
@@ -418,7 +349,7 @@ index 241b73d64df7..f3a9dd2e98c6 100644
  #endif
  
  	svc_exit r5, irq = 1			@ return from exception
-@@ -225,8 +232,14 @@ svc_preempt:
+@@ -222,8 +229,14 @@ svc_preempt:
  1:	bl	preempt_schedule_irq		@ irq en/disable is done inside
  	ldr	r0, [tsk, #TI_FLAGS]		@ get new tasks TI_FLAGS
  	tst	r0, #_TIF_NEED_RESCHED
@@ -448,25 +379,11 @@ index a41e27ace391..1e29cec7716f 100644
  			schedule();
  		} else {
  			if (unlikely(!user_mode(regs)))
-diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
-index 842427ff2b3c..b943e2df9540 100644
---- a/arch/arm/kernel/smp.c
-+++ b/arch/arm/kernel/smp.c
-@@ -667,9 +667,7 @@ static void do_handle_IPI(int ipinr)
- 		break;
- 
- 	case IPI_CPU_BACKTRACE:
--		printk_deferred_enter();
- 		nmi_cpu_backtrace(get_irq_regs());
--		printk_deferred_exit();
- 		break;
- 
- 	default:
 diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
-index efa402025031..59487ee9fd61 100644
+index bc8779d54a64..12dba4284b21 100644
 --- a/arch/arm/mm/fault.c
 +++ b/arch/arm/mm/fault.c
-@@ -400,6 +400,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
+@@ -407,6 +407,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
  	if (addr < TASK_SIZE)
  		return do_page_fault(addr, fsr, regs);
  
@@ -476,7 +393,7 @@ index efa402025031..59487ee9fd61 100644
  	if (user_mode(regs))
  		goto bad_area;
  
-@@ -470,6 +473,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
+@@ -477,6 +480,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
  static int
  do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
  {
@@ -487,14 +404,14 @@ index efa402025031..59487ee9fd61 100644
  	return 0;
  }
 diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
-index fee914c716aa..aeaa2c2ecc23 100644
+index c4207cf9bb17..260866cf53c9 100644
 --- a/arch/arm64/Kconfig
 +++ b/arch/arm64/Kconfig
-@@ -88,6 +88,7 @@ config ARM64
+@@ -89,6 +89,7 @@ config ARM64
  	select ARCH_SUPPORTS_ATOMIC_RMW
  	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
  	select ARCH_SUPPORTS_NUMA_BALANCING
-+	select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
++	select ARCH_SUPPORTS_RT
  	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
  	select ARCH_WANT_DEFAULT_BPF_JIT
  	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
@@ -503,19 +420,11 @@ index fee914c716aa..aeaa2c2ecc23 100644
  	select HAVE_PERF_USER_STACK_DUMP
  	select HAVE_REGS_AND_STACK_ACCESS_API
 +	select HAVE_PREEMPT_LAZY
+ 	select HAVE_POSIX_CPU_TIMERS_TASK_WORK
  	select HAVE_FUNCTION_ARG_ACCESS_API
  	select HAVE_FUTEX_CMPXCHG if FUTEX
- 	select MMU_GATHER_RCU_TABLE_FREE
-@@ -213,6 +215,7 @@ config ARM64
- 	select PCI_DOMAINS_GENERIC if PCI
- 	select PCI_ECAM if (ACPI && PCI)
- 	select PCI_SYSCALL if PCI
-+	select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
- 	select POWER_RESET
- 	select POWER_SUPPLY
- 	select SPARSE_IRQ
 diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
-index 72f95c6a7051..30fe8c324ce6 100644
+index c4ba047a82d2..7c83a6655d1c 100644
 --- a/arch/arm64/include/asm/pgtable.h
 +++ b/arch/arm64/include/asm/pgtable.h
 @@ -1001,7 +1001,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
@@ -596,7 +505,7 @@ index 18782f0c4721..11ab1c077697 100644
  #endif
  
 diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
-index 6623c99f0984..c55ccec33a5a 100644
+index e1317b7c4525..861594d9662d 100644
 --- a/arch/arm64/include/asm/thread_info.h
 +++ b/arch/arm64/include/asm/thread_info.h
 @@ -26,6 +26,7 @@ struct thread_info {
@@ -607,7 +516,7 @@ index 6623c99f0984..c55ccec33a5a 100644
  	union {
  		u64		preempt_count;	/* 0 => preemptible, <0 => bug */
  		struct {
-@@ -67,6 +68,7 @@ int arch_dup_task_struct(struct task_struct *dst,
+@@ -68,6 +69,7 @@ int arch_dup_task_struct(struct task_struct *dst,
  #define TIF_UPROBE		4	/* uprobe breakpoint or singlestep */
  #define TIF_MTE_ASYNC_FAULT	5	/* MTE Asynchronous Tag Check Fault */
  #define TIF_NOTIFY_SIGNAL	6	/* signal notifications exist */
@@ -615,7 +524,7 @@ index 6623c99f0984..c55ccec33a5a 100644
  #define TIF_SYSCALL_TRACE	8	/* syscall trace active */
  #define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
  #define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
-@@ -97,8 +99,10 @@ int arch_dup_task_struct(struct task_struct *dst,
+@@ -98,8 +100,10 @@ int arch_dup_task_struct(struct task_struct *dst,
  #define _TIF_SVE		(1 << TIF_SVE)
  #define _TIF_MTE_ASYNC_FAULT	(1 << TIF_MTE_ASYNC_FAULT)
  #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
@@ -627,7 +536,7 @@ index 6623c99f0984..c55ccec33a5a 100644
  				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
  				 _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
  				 _TIF_NOTIFY_SIGNAL)
-@@ -107,6 +111,8 @@ int arch_dup_task_struct(struct task_struct *dst,
+@@ -108,6 +112,8 @@ int arch_dup_task_struct(struct task_struct *dst,
  				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
  				 _TIF_SYSCALL_EMU)
  
@@ -637,11 +546,11 @@ index 6623c99f0984..c55ccec33a5a 100644
  #define INIT_SCS							\
  	.scs_base	= init_shadow_call_stack,			\
 diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
-index 551427ae8cc5..96a4f6c9eb78 100644
+index 6d0c3afd36b8..9b11f996b8ea 100644
 --- a/arch/arm64/kernel/asm-offsets.c
 +++ b/arch/arm64/kernel/asm-offsets.c
-@@ -31,6 +31,7 @@ int main(void)
-   BLANK();
+@@ -32,6 +32,7 @@ int main(void)
+   DEFINE(TSK_TI_CPU,		offsetof(struct task_struct, thread_info.cpu));
    DEFINE(TSK_TI_FLAGS,		offsetof(struct task_struct, thread_info.flags));
    DEFINE(TSK_TI_PREEMPT,	offsetof(struct task_struct, thread_info.preempt_count));
 +  DEFINE(TSK_TI_PREEMPT_LAZY,	offsetof(struct task_struct, thread_info.preempt_lazy_count));
@@ -649,10 +558,10 @@ index 551427ae8cc5..96a4f6c9eb78 100644
    DEFINE(TSK_TI_TTBR0,		offsetof(struct task_struct, thread_info.ttbr0));
  #endif
 diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
-index ff4962750b3d..99484e8bbade 100644
+index fa244c426f61..38ebf3382002 100644
 --- a/arch/arm64/kernel/fpsimd.c
 +++ b/arch/arm64/kernel/fpsimd.c
-@@ -179,10 +179,19 @@ static void __get_cpu_fpsimd_context(void)
+@@ -201,10 +201,19 @@ static void __get_cpu_fpsimd_context(void)
   *
   * The double-underscore version must only be called if you know the task
   * can't be preempted.
@@ -673,7 +582,7 @@ index ff4962750b3d..99484e8bbade 100644
  	__get_cpu_fpsimd_context();
  }
  
-@@ -203,7 +212,10 @@ static void __put_cpu_fpsimd_context(void)
+@@ -225,7 +234,10 @@ static void __put_cpu_fpsimd_context(void)
  static void put_cpu_fpsimd_context(void)
  {
  	__put_cpu_fpsimd_context();
@@ -685,15 +594,16 @@ index ff4962750b3d..99484e8bbade 100644
  }
  
  static bool have_cpu_fpsimd_context(void)
-@@ -1033,6 +1045,7 @@ void fpsimd_thread_switch(struct task_struct *next)
+@@ -1125,6 +1137,8 @@ static void fpsimd_flush_thread_vl(enum vec_type type)
+ 
  void fpsimd_flush_thread(void)
  {
- 	int vl, supported_vl;
 +	void *sve_state = NULL;
- 
++
  	if (!system_supports_fpsimd())
  		return;
-@@ -1045,7 +1058,10 @@ void fpsimd_flush_thread(void)
+ 
+@@ -1136,11 +1150,16 @@ void fpsimd_flush_thread(void)
  
  	if (system_supports_sve()) {
  		clear_thread_flag(TIF_SVE);
@@ -702,10 +612,8 @@ index ff4962750b3d..99484e8bbade 100644
 +		/* Defer kfree() while in atomic context */
 +		sve_state = current->thread.sve_state;
 +		current->thread.sve_state = NULL;
- 
- 		/*
- 		 * Reset the task vector length as required.
-@@ -1079,6 +1095,7 @@ void fpsimd_flush_thread(void)
++
+ 		fpsimd_flush_thread_vl(ARM64_VEC_SVE);
  	}
  
  	put_cpu_fpsimd_context();
@@ -714,7 +622,7 @@ index ff4962750b3d..99484e8bbade 100644
  
  /*
 diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
-index c287b9407f28..1d65f2801e13 100644
+index 8f6372b44b65..ab23598fdeb0 100644
 --- a/arch/arm64/kernel/signal.c
 +++ b/arch/arm64/kernel/signal.c
 @@ -920,7 +920,7 @@ static void do_signal(struct pt_regs *regs)
@@ -742,10 +650,10 @@ index c287b9407f28..1d65f2801e13 100644
  				uprobe_notify_resume(regs);
  
 diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
-index 9b328bb05596..12b29d851012 100644
+index e4727dc771bf..08508fc5fa17 100644
 --- a/arch/arm64/kvm/arm.c
 +++ b/arch/arm64/kvm/arm.c
-@@ -811,7 +811,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+@@ -820,7 +820,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
  		 * involves poking the GIC, which must be done in a
  		 * non-preemptible context.
  		 */
@@ -754,7 +662,7 @@ index 9b328bb05596..12b29d851012 100644
  
  		kvm_pmu_flush_hwstate(vcpu);
  
-@@ -835,7 +835,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+@@ -844,7 +844,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
  				kvm_timer_sync_user(vcpu);
  			kvm_vgic_sync_hwstate(vcpu);
  			local_irq_enable();
@@ -763,7 +671,7 @@ index 9b328bb05596..12b29d851012 100644
  			continue;
  		}
  
-@@ -907,7 +907,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+@@ -916,7 +916,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
  		/* Exit types that need handling before we can be preempted */
  		handle_exit_early(vcpu, ret);
  
@@ -811,11 +719,34 @@ index 6e345fefcdca..14b8a161c165 100644
  # error "please don't include this file directly"
  #endif
  
+diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
+index 51d20cb37706..1684716f0820 100644
+--- a/arch/ia64/include/asm/thread_info.h
++++ b/arch/ia64/include/asm/thread_info.h
+@@ -55,15 +55,15 @@ struct thread_info {
+ #ifndef ASM_OFFSETS_C
+ /* how to get the thread information struct from C */
+ #define current_thread_info()	((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
+-#define alloc_thread_stack_node(tsk, node)	\
++#define arch_alloc_thread_stack_node(tsk, node)	\
+ 		((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE))
+ #define task_thread_info(tsk)	((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
+ #else
+ #define current_thread_info()	((struct thread_info *) 0)
+-#define alloc_thread_stack_node(tsk, node)	((unsigned long *) 0)
++#define arch_alloc_thread_stack_node(tsk, node)	((unsigned long *) 0)
+ #define task_thread_info(tsk)	((struct thread_info *) 0)
+ #endif
+-#define free_thread_stack(tsk)	/* nothing */
++#define arch_free_thread_stack(tsk)	/* nothing */
+ #define task_stack_page(tsk)	((void *)(tsk))
+ 
+ #define __HAVE_THREAD_FUNCTIONS
 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
-index 6b9f523882c5..3eec2a6395e4 100644
+index dea74d7717c0..6255e4d37539 100644
 --- a/arch/powerpc/Kconfig
 +++ b/arch/powerpc/Kconfig
-@@ -151,6 +151,7 @@ config PPC
+@@ -153,6 +153,7 @@ config PPC
  	select ARCH_STACKWALK
  	select ARCH_SUPPORTS_ATOMIC_RMW
  	select ARCH_SUPPORTS_DEBUG_PAGEALLOC	if PPC_BOOK3S || PPC_8xx || 40x
@@ -823,7 +754,7 @@ index 6b9f523882c5..3eec2a6395e4 100644
  	select ARCH_USE_BUILTIN_BSWAP
  	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
  	select ARCH_USE_MEMTEST
-@@ -219,6 +220,7 @@ config PPC
+@@ -221,6 +222,7 @@ config PPC
  	select HAVE_IOREMAP_PROT
  	select HAVE_IRQ_EXIT_ON_IRQ_STACK
  	select HAVE_IRQ_TIME_ACCOUNTING
@@ -831,7 +762,7 @@ index 6b9f523882c5..3eec2a6395e4 100644
  	select HAVE_KERNEL_GZIP
  	select HAVE_KERNEL_LZMA			if DEFAULT_UIMAGE
  	select HAVE_KERNEL_LZO			if DEFAULT_UIMAGE
-@@ -235,6 +237,7 @@ config PPC
+@@ -237,6 +239,7 @@ config PPC
  	select HAVE_PERF_EVENTS_NMI		if PPC64
  	select HAVE_PERF_REGS
  	select HAVE_PERF_USER_STACK_DUMP
@@ -852,18 +783,6 @@ index 0f3cdd8faa95..08243338069d 100644
  # error "please don't include this file directly"
  #endif
  
-diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
-index 7ef1cd8168a0..f9e63cacd220 100644
---- a/arch/powerpc/include/asm/smp.h
-+++ b/arch/powerpc/include/asm/smp.h
-@@ -62,6 +62,7 @@ struct smp_ops_t {
- 
- extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
- extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
-+extern void smp_send_debugger_break_cpu(unsigned int cpu);
- extern void smp_send_debugger_break(void);
- extern void start_secondary_resume(void);
- extern void smp_generic_give_timebase(void);
 diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
 index c5d742f18021..d5f8a74ed2e8 100644
 --- a/arch/powerpc/include/asm/spinlock_types.h
@@ -894,19 +813,27 @@ index 1c8460e23583..b1653c160bab 100644
  	canary ^= LINUX_VERSION_CODE;
  	canary &= CANARY_MASK;
 diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
-index b4ec6c7dd72e..07df83231ec2 100644
+index 5725029aaa29..829315ee9c56 100644
 --- a/arch/powerpc/include/asm/thread_info.h
 +++ b/arch/powerpc/include/asm/thread_info.h
 @@ -47,6 +47,8 @@
  struct thread_info {
  	int		preempt_count;		/* 0 => preemptable,
  						   <0 => BUG */
-+	int             preempt_lazy_count;	/* 0 => preemptable,
++	int		preempt_lazy_count;	/* 0 => preemptable,
 +						   <0 => BUG */
- 	unsigned long	local_flags;		/* private flags for thread */
- #ifdef CONFIG_LIVEPATCH
- 	unsigned long *livepatch_sp;
-@@ -93,6 +95,7 @@ void arch_setup_new_exec(void);
+ #ifdef CONFIG_SMP
+ 	unsigned int	cpu;
+ #endif
+@@ -71,6 +73,7 @@ struct thread_info {
+ #define INIT_THREAD_INFO(tsk)			\
+ {						\
+ 	.preempt_count = INIT_PREEMPT_COUNT,	\
++	.preempt_lazy_count = 0,		\
+ 	.flags =	0,			\
+ }
+ 
+@@ -96,6 +99,7 @@ void arch_setup_new_exec(void);
  #define TIF_PATCH_PENDING	6	/* pending live patching update */
  #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
  #define TIF_SINGLESTEP		8	/* singlestepping active */
@@ -914,7 +841,7 @@ index b4ec6c7dd72e..07df83231ec2 100644
  #define TIF_SECCOMP		10	/* secure computing */
  #define TIF_RESTOREALL		11	/* Restore all regs (implies NOERROR) */
  #define TIF_NOERROR		12	/* Force successful syscall return */
-@@ -108,6 +111,7 @@ void arch_setup_new_exec(void);
+@@ -111,6 +115,7 @@ void arch_setup_new_exec(void);
  #define TIF_POLLING_NRFLAG	19	/* true if poll_idle() is polling TIF_NEED_RESCHED */
  #define TIF_32BIT		20	/* 32 bit binary */
  
@@ -922,7 +849,7 @@ index b4ec6c7dd72e..07df83231ec2 100644
  /* as above, but as bit values */
  #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
  #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
-@@ -119,6 +123,7 @@ void arch_setup_new_exec(void);
+@@ -122,6 +127,7 @@ void arch_setup_new_exec(void);
  #define _TIF_PATCH_PENDING	(1<<TIF_PATCH_PENDING)
  #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
  #define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
@@ -930,7 +857,7 @@ index b4ec6c7dd72e..07df83231ec2 100644
  #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
  #define _TIF_RESTOREALL		(1<<TIF_RESTOREALL)
  #define _TIF_NOERROR		(1<<TIF_NOERROR)
-@@ -132,10 +137,12 @@ void arch_setup_new_exec(void);
+@@ -135,10 +141,12 @@ void arch_setup_new_exec(void);
  				 _TIF_SYSCALL_EMU)
  
  #define _TIF_USER_WORK_MASK	(_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
@@ -1006,47 +933,6 @@ index c4f1d6b7d992..02e17a57da83 100644
  
  irq_hw_number_t virq_to_hw(unsigned int virq)
  {
-diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
-index bdee7262c080..d57d37497862 100644
---- a/arch/powerpc/kernel/kgdb.c
-+++ b/arch/powerpc/kernel/kgdb.c
-@@ -120,11 +120,19 @@ int kgdb_skipexception(int exception, struct pt_regs *regs)
- 
- static int kgdb_debugger_ipi(struct pt_regs *regs)
- {
--	kgdb_nmicallback(raw_smp_processor_id(), regs);
-+	int cpu = raw_smp_processor_id();
-+
-+	if (!kgdb_roundup_delay(cpu))
-+		kgdb_nmicallback(cpu, regs);
- 	return 0;
- }
- 
- #ifdef CONFIG_SMP
-+void kgdb_roundup_cpu(unsigned int cpu)
-+{
-+	smp_send_debugger_break_cpu(cpu);
-+}
-+
- void kgdb_roundup_cpus(void)
- {
- 	smp_send_debugger_break();
-diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
-index 605bab448f84..26c2179cf64a 100644
---- a/arch/powerpc/kernel/smp.c
-+++ b/arch/powerpc/kernel/smp.c
-@@ -589,6 +589,11 @@ static void debugger_ipi_callback(struct pt_regs *regs)
- 	debugger_ipi(regs);
- }
- 
-+void smp_send_debugger_break_cpu(unsigned int cpu)
-+{
-+	smp_send_nmi_ipi(cpu, debugger_ipi_callback, 1000000);
-+}
-+
- void smp_send_debugger_break(void)
- {
- 	smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000);
 diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
 index 11741703d26e..7e4e1f489f56 100644
 --- a/arch/powerpc/kernel/traps.c
@@ -1070,20 +956,6 @@ index 11741703d26e..7e4e1f489f56 100644
  	       IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
  	       IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
  	       debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
-diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
-index 22ceeeb705ab..d5359701f787 100644
---- a/arch/powerpc/kexec/crash.c
-+++ b/arch/powerpc/kexec/crash.c
-@@ -312,9 +312,6 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
- 	unsigned int i;
- 	int (*old_handler)(struct pt_regs *regs);
- 
--	/* Avoid hardlocking with irresponsive CPU holding logbuf_lock */
--	printk_deferred_enter();
--
- 	/*
- 	 * This function is only called after the system
- 	 * has panicked or is otherwise in a critical state.
 diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
 index ff581d70f20c..e5c84d55bdfb 100644
 --- a/arch/powerpc/kvm/Kconfig
@@ -1097,7 +969,7 @@ index ff581d70f20c..e5c84d55bdfb 100644
  	select HAVE_KVM_IRQFD
  	select HAVE_KVM_IRQ_ROUTING
 diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
-index 8322ca86d5ac..f524145d7dd3 100644
+index 8f998e55735b..637b015d6900 100644
 --- a/arch/powerpc/platforms/pseries/iommu.c
 +++ b/arch/powerpc/platforms/pseries/iommu.c
 @@ -24,6 +24,7 @@
@@ -1271,10 +1143,10 @@ index c8848bb681a1..41fa1be980a3 100644
  #ifdef CONFIG_HOTPLUG_CPU
  void fixup_irqs(void)
 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 1f96809606ac..3493d2b94530 100644
+index 5c2ccb85f2ef..34ce7f969e28 100644
 --- a/arch/x86/Kconfig
 +++ b/arch/x86/Kconfig
-@@ -107,6 +107,7 @@ config X86
+@@ -108,6 +108,7 @@ config X86
  	select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP	if NR_CPUS <= 4096
  	select ARCH_SUPPORTS_LTO_CLANG
  	select ARCH_SUPPORTS_LTO_CLANG_THIN
@@ -1282,7 +1154,7 @@ index 1f96809606ac..3493d2b94530 100644
  	select ARCH_USE_BUILTIN_BSWAP
  	select ARCH_USE_MEMTEST
  	select ARCH_USE_QUEUED_RWLOCKS
-@@ -230,6 +231,7 @@ config X86
+@@ -234,6 +235,7 @@ config X86
  	select HAVE_PCI
  	select HAVE_PERF_REGS
  	select HAVE_PERF_USER_STACK_DUMP
@@ -1290,27 +1162,18 @@ index 1f96809606ac..3493d2b94530 100644
  	select MMU_GATHER_RCU_TABLE_FREE		if PARAVIRT
  	select HAVE_POSIX_CPU_TIMERS_TASK_WORK
  	select HAVE_REGS_AND_STACK_ACCESS_API
-diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
-index 8d55bd11848c..f755c217f67b 100644
---- a/arch/x86/include/asm/irq_stack.h
-+++ b/arch/x86/include/asm/irq_stack.h
-@@ -201,6 +201,7 @@
- 			      IRQ_CONSTRAINTS, regs, vector);		\
- }
- 
-+#ifndef CONFIG_PREEMPT_RT
- /*
-  * Macro to invoke __do_softirq on the irq stack. This is only called from
-  * task context when bottom halves are about to be reenabled and soft
-@@ -214,6 +215,8 @@
- 	__this_cpu_write(hardirq_stack_inuse, false);			\
- }
+diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
+index 448cd01eb3ec..a34430b7af4a 100644
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -22,6 +22,7 @@
+ #define pgprot_decrypted(prot)	__pgprot(__sme_clr(pgprot_val(prot)))
  
-+#endif
-+
- #else /* CONFIG_X86_64 */
- /* System vector handlers always run on the stack they interrupted. */
- #define run_sysvec_on_irqstack_cond(func, regs)				\
+ #ifndef __ASSEMBLY__
++#include <linux/spinlock.h>
+ #include <asm/x86_init.h>
+ #include <asm/pkru.h>
+ #include <asm/fpu/api.h>
 diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
 index fe5efbcba824..ab8cb5fc2329 100644
 --- a/arch/x86/include/asm/preempt.h
@@ -1389,46 +1252,20 @@ index 2dfb5fea13af..fc03f4f7ed84 100644
  #ifndef CONFIG_COMPAT
  #define compat_sigset_t compat_sigset_t
  typedef sigset_t compat_sigset_t;
-diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
-index 24a8d6c4fb18..2fc22c27df18 100644
---- a/arch/x86/include/asm/stackprotector.h
-+++ b/arch/x86/include/asm/stackprotector.h
-@@ -50,7 +50,7 @@
-  */
- static __always_inline void boot_init_stack_canary(void)
- {
--	u64 canary;
-+	u64 canary = 0;
- 	u64 tsc;
- 
- #ifdef CONFIG_X86_64
-@@ -61,8 +61,14 @@ static __always_inline void boot_init_stack_canary(void)
- 	 * of randomness. The TSC only matters for very early init,
- 	 * there it already has some randomness on most systems. Later
- 	 * on during the bootup the random pool has true entropy too.
-+	 * For preempt-rt we need to weaken the randomness a bit, as
-+	 * we can't call into the random generator from atomic context
-+	 * due to locking constraints. We just leave canary
-+	 * uninitialized and use the TSC based randomness on top of it.
- 	 */
-+#ifndef CONFIG_PREEMPT_RT
- 	get_random_bytes(&canary, sizeof(canary));
-+#endif
- 	tsc = rdtsc();
- 	canary += tsc + (tsc << 32UL);
- 	canary &= CANARY_MASK;
 diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
-index cf132663c219..75dc786e6365 100644
+index ebec69c35e95..39005bff5b8f 100644
 --- a/arch/x86/include/asm/thread_info.h
 +++ b/arch/x86/include/asm/thread_info.h
-@@ -57,11 +57,14 @@ struct thread_info {
+@@ -57,6 +57,8 @@ struct thread_info {
  	unsigned long		flags;		/* low level flags */
  	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
  	u32			status;		/* thread synchronous flags */
 +	int			preempt_lazy_count;	/* 0 => lazy preemptable
-+							   <0 => BUG */
- };
- 
++							  <0 => BUG */
+ #ifdef CONFIG_SMP
+ 	u32			cpu;		/* current CPU */
+ #endif
+@@ -65,6 +67,7 @@ struct thread_info {
  #define INIT_THREAD_INFO(tsk)			\
  {						\
  	.flags		= 0,			\
@@ -1436,7 +1273,7 @@ index cf132663c219..75dc786e6365 100644
  }
  
  #else /* !__ASSEMBLY__ */
-@@ -90,6 +93,7 @@ struct thread_info {
+@@ -93,6 +96,7 @@ struct thread_info {
  #define TIF_NOTSC		16	/* TSC is not accessible in userland */
  #define TIF_NOTIFY_SIGNAL	17	/* signal notifications exist */
  #define TIF_SLD			18	/* Restore split lock detection on context switch */
@@ -1444,7 +1281,7 @@ index cf132663c219..75dc786e6365 100644
  #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
  #define TIF_POLLING_NRFLAG	21	/* idle is polling for TIF_NEED_RESCHED */
  #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
-@@ -114,6 +118,7 @@ struct thread_info {
+@@ -117,6 +121,7 @@ struct thread_info {
  #define _TIF_NOTSC		(1 << TIF_NOTSC)
  #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
  #define _TIF_SLD		(1 << TIF_SLD)
@@ -1453,185 +1290,35 @@ index cf132663c219..75dc786e6365 100644
  #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
  #define _TIF_SPEC_FORCE_UPDATE	(1 << TIF_SPEC_FORCE_UPDATE)
 diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
-index ef6316fef99f..86974cd60942 100644
+index ff55df60228f..2a0f83678911 100644
 --- a/arch/x86/kernel/cpu/mshyperv.c
 +++ b/arch/x86/kernel/cpu/mshyperv.c
-@@ -75,11 +75,12 @@ void hv_remove_vmbus_handler(void)
- DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0)
- {
- 	struct pt_regs *old_regs = set_irq_regs(regs);
-+	u64 ip = regs ? instruction_pointer(regs) : 0;
- 
+@@ -79,7 +79,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0)
  	inc_irq_stat(hyperv_stimer0_count);
  	if (hv_stimer0_handler)
  		hv_stimer0_handler();
 -	add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0);
-+	add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0, ip);
++	add_interrupt_randomness(HYPERV_STIMER0_VECTOR);
  	ack_APIC_irq();
  
  	set_irq_regs(old_regs);
-diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
-index 722fd712e1cf..82cc3a7be6bd 100644
---- a/arch/x86/kernel/dumpstack_32.c
-+++ b/arch/x86/kernel/dumpstack_32.c
-@@ -141,7 +141,7 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
- 	 */
- 	if (visit_mask) {
- 		if (*visit_mask & (1UL << info->type)) {
--			printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
-+			pr_warn_once("WARNING: stack recursion on stack type %d\n", info->type);
- 			goto unknown;
- 		}
- 		*visit_mask |= 1UL << info->type;
-diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
-index 6c5defd6569a..5f725b0ceb29 100644
---- a/arch/x86/kernel/dumpstack_64.c
-+++ b/arch/x86/kernel/dumpstack_64.c
-@@ -207,7 +207,8 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
- 	if (visit_mask) {
- 		if (*visit_mask & (1UL << info->type)) {
- 			if (task == current)
--				printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
-+				pr_warn_once("WARNING: stack recursion on stack type %d\n",
-+					     info->type);
- 			goto unknown;
- 		}
- 		*visit_mask |= 1UL << info->type;
-diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
-index 15aefa3f3e18..52af9a89ad47 100644
---- a/arch/x86/kernel/i8259.c
-+++ b/arch/x86/kernel/i8259.c
-@@ -207,8 +207,7 @@ static void mask_and_ack_8259A(struct irq_data *data)
- 		 * lets ACK and report it. [once per IRQ]
- 		 */
- 		if (!(spurious_irq_mask & irqmask)) {
--			printk_deferred(KERN_DEBUG
--			       "spurious 8259A interrupt: IRQ%d.\n", irq);
-+			printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq);
- 			spurious_irq_mask |= irqmask;
- 		}
- 		atomic_inc(&irq_err_count);
-diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
-index 044902d5a3c4..e5dd6da78713 100644
---- a/arch/x86/kernel/irq_32.c
-+++ b/arch/x86/kernel/irq_32.c
-@@ -132,6 +132,7 @@ int irq_init_percpu_irqstack(unsigned int cpu)
- 	return 0;
- }
- 
-+#ifndef CONFIG_PREEMPT_RT
- void do_softirq_own_stack(void)
- {
- 	struct irq_stack *irqstk;
-@@ -148,6 +149,7 @@ void do_softirq_own_stack(void)
- 
- 	call_on_stack(__do_softirq, isp);
- }
-+#endif
- 
- void __handle_irq(struct irq_desc *desc, struct pt_regs *regs)
- {
-diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
-index 3a43a2dee658..37bd37cdf2b6 100644
---- a/arch/x86/kernel/kgdb.c
-+++ b/arch/x86/kernel/kgdb.c
-@@ -502,9 +502,12 @@ static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs)
- 		if (atomic_read(&kgdb_active) != -1) {
- 			/* KGDB CPU roundup */
- 			cpu = raw_smp_processor_id();
--			kgdb_nmicallback(cpu, regs);
--			set_bit(cpu, was_in_debug_nmi);
--			touch_nmi_watchdog();
-+
-+			if (!kgdb_roundup_delay(cpu)) {
-+				kgdb_nmicallback(cpu, regs);
-+				set_bit(cpu, was_in_debug_nmi);
-+				touch_nmi_watchdog();
-+			}
- 
- 			return NMI_HANDLED;
- 		}
-diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
-index d7c44b257f7f..2d0361cd304f 100644
---- a/arch/x86/kernel/unwind_frame.c
-+++ b/arch/x86/kernel/unwind_frame.c
-@@ -41,9 +41,9 @@ static void unwind_dump(struct unwind_state *state)
- 
- 	dumped_before = true;
- 
--	printk_deferred("unwind stack type:%d next_sp:%p mask:0x%lx graph_idx:%d\n",
--			state->stack_info.type, state->stack_info.next_sp,
--			state->stack_mask, state->graph_idx);
-+	printk("unwind stack type:%d next_sp:%p mask:0x%lx graph_idx:%d\n",
-+	       state->stack_info.type, state->stack_info.next_sp,
-+	       state->stack_mask, state->graph_idx);
- 
- 	for (sp = PTR_ALIGN(state->orig_sp, sizeof(long)); sp;
- 	     sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
-@@ -59,13 +59,11 @@ static void unwind_dump(struct unwind_state *state)
- 
- 			if (zero) {
- 				if (!prev_zero)
--					printk_deferred("%p: %0*x ...\n",
--							sp, BITS_PER_LONG/4, 0);
-+					printk("%p: %0*x ...\n", sp, BITS_PER_LONG/4, 0);
- 				continue;
- 			}
- 
--			printk_deferred("%p: %0*lx (%pB)\n",
--					sp, BITS_PER_LONG/4, word, (void *)word);
-+			printk("%p: %0*lx (%pB)\n", sp, BITS_PER_LONG/4, word, (void *)word);
- 		}
- 	}
- }
-@@ -342,13 +340,13 @@ bool unwind_next_frame(struct unwind_state *state)
- 		goto the_end;
- 
- 	if (state->regs) {
--		printk_deferred_once(KERN_WARNING
-+		pr_warn_once(
- 			"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
- 			state->regs, state->task->comm,
- 			state->task->pid, next_bp);
- 		unwind_dump(state);
- 	} else {
--		printk_deferred_once(KERN_WARNING
-+		pr_warn_once(
- 			"WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n",
- 			state->bp, state->task->comm,
- 			state->task->pid, next_bp);
-diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
-index a1202536fc57..a26a7c3849f5 100644
---- a/arch/x86/kernel/unwind_orc.c
-+++ b/arch/x86/kernel/unwind_orc.c
-@@ -9,7 +9,7 @@
- #include <asm/orc_lookup.h>
- 
- #define orc_warn(fmt, ...) \
--	printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)
-+	pr_warn_once("WARNING: " fmt, ##__VA_ARGS__)
- 
- #define orc_warn_current(args...)					\
- ({									\
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index eff065ce6f8e..d47927a4e8c3 100644
+index 0b5c61bb24a1..9dbf870229bd 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
-@@ -8511,6 +8511,14 @@ int kvm_arch_init(void *opaque)
+@@ -8655,6 +8655,12 @@ int kvm_arch_init(void *opaque)
  		goto out;
  	}
  
-+#ifdef CONFIG_PREEMPT_RT
-+	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
++	if (IS_ENABLED(CONFIG_PREEMPT_RT) && !boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
 +		pr_err("RT requires X86_FEATURE_CONSTANT_TSC\n");
 +		r = -EOPNOTSUPP;
 +		goto out;
 +	}
-+#endif
 +
  	r = -ENOMEM;
- 	x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu),
- 					  __alignof__(struct fpu), SLAB_ACCOUNT,
+ 
+ 	x86_emulator_cache = kvm_alloc_emulator_cache();
 diff --git a/arch/xtensa/include/asm/spinlock_types.h b/arch/xtensa/include/asm/spinlock_types.h
 index 64c9389254f1..797aed7df3dd 100644
 --- a/arch/xtensa/include/asm/spinlock_types.h
@@ -1646,10 +1333,10 @@ index 64c9389254f1..797aed7df3dd 100644
  #endif
  
 diff --git a/block/blk-mq.c b/block/blk-mq.c
-index 82de39926a9f..330b6274bf6b 100644
+index 8874a63ae952..1f7569d135fa 100644
 --- a/block/blk-mq.c
 +++ b/block/blk-mq.c
-@@ -1563,14 +1563,14 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
+@@ -1857,14 +1857,14 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
  		return;
  
  	if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
@@ -1728,29 +1415,8 @@ index a1bea0f4baa8..5f8ca8c1f59c 100644
  
  	if (!req)
  		return;
-diff --git a/crypto/testmgr.c b/crypto/testmgr.c
-index 70f69f0910c9..58eee8eab4bf 100644
---- a/crypto/testmgr.c
-+++ b/crypto/testmgr.c
-@@ -1061,14 +1061,14 @@ static void generate_random_testvec_config(struct testvec_config *cfg,
- 
- static void crypto_disable_simd_for_test(void)
- {
--	preempt_disable();
-+	migrate_disable();
- 	__this_cpu_write(crypto_simd_disabled_for_test, true);
- }
- 
- static void crypto_reenable_simd_for_test(void)
- {
- 	__this_cpu_write(crypto_simd_disabled_for_test, false);
--	preempt_enable();
-+	migrate_enable();
- }
- 
- /*
 diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
-index 6383c81ac5b3..abb695f5f5e4 100644
+index 25071126995b..6ff2bcfb9d0e 100644
 --- a/drivers/block/zram/zram_drv.c
 +++ b/drivers/block/zram/zram_drv.c
 @@ -59,6 +59,40 @@ static void zram_free_page(struct zram *zram, size_t index);
@@ -1802,7 +1468,7 @@ index 6383c81ac5b3..abb695f5f5e4 100644
  
  static inline bool init_done(struct zram *zram)
  {
-@@ -1169,6 +1204,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
+@@ -1199,6 +1234,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
  
  	if (!huge_class_size)
  		huge_class_size = zs_huge_class_size(zram->mem_pool);
@@ -1823,53 +1489,126 @@ index 80c3b43b4828..d8f6d880f915 100644
  	ktime_t ac_time;
  #endif
 diff --git a/drivers/char/random.c b/drivers/char/random.c
-index 605969ed0f96..56b2d5a7e2a0 100644
+index 7470ee24db2f..4b93ca6ecef9 100644
 --- a/drivers/char/random.c
 +++ b/drivers/char/random.c
-@@ -1242,26 +1242,25 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
+@@ -200,7 +200,7 @@
+  *	void add_device_randomness(const void *buf, unsigned int size);
+  * 	void add_input_randomness(unsigned int type, unsigned int code,
+  *                                unsigned int value);
+- *	void add_interrupt_randomness(int irq, int irq_flags);
++ *	void add_interrupt_randomness(int irq);
+  * 	void add_disk_randomness(struct gendisk *disk);
+  *
+  * add_device_randomness() is for adding data to the random pool that
+@@ -1260,9 +1260,65 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
  	return *ptr;
  }
  
 -void add_interrupt_randomness(int irq, int irq_flags)
-+void add_interrupt_randomness(int irq, int irq_flags, __u64 ip)
++static bool process_interrupt_randomness_pool(struct fast_pool *fast_pool)
  {
  	struct entropy_store	*r;
++
++	if (unlikely(crng_init == 0)) {
++		bool pool_reset = false;
++
++		if ((fast_pool->count >= 64) &&
++		    crng_fast_load((char *) fast_pool->pool,
++				   sizeof(fast_pool->pool)))
++			pool_reset = true;
++
++		return pool_reset;
++	}
++
++	if ((fast_pool->count < 64) &&
++	    !time_after(jiffies, fast_pool->last + HZ))
++		return false;
++
++	r = &input_pool;
++	if (!spin_trylock(&r->lock))
++		return false;
++
++	__mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool));
++	spin_unlock(&r->lock);
++
++	/* award one bit for the contents of the fast pool */
++	credit_entropy_bits(r, 1);
++	return true;
++}
++
++#ifdef CONFIG_PREEMPT_RT
++void process_interrupt_randomness(void)
++{
++	struct fast_pool *cpu_pool;
++	struct fast_pool fast_pool;
++
++	lockdep_assert_irqs_enabled();
++
++	migrate_disable();
++	cpu_pool = this_cpu_ptr(&irq_randomness);
++
++	local_irq_disable();
++	memcpy(&fast_pool, cpu_pool, sizeof(fast_pool));
++	local_irq_enable();
++
++	if (process_interrupt_randomness_pool(&fast_pool)) {
++		local_irq_disable();
++		cpu_pool->last = jiffies;
++		cpu_pool->count = 0;
++		local_irq_enable();
++	}
++	memzero_explicit(&fast_pool, sizeof(fast_pool));
++	migrate_enable();
++}
++#endif
++
++void add_interrupt_randomness(int irq)
++{
  	struct fast_pool	*fast_pool = this_cpu_ptr(&irq_randomness);
--	struct pt_regs		*regs = get_irq_regs();
+ 	struct pt_regs		*regs = get_irq_regs();
  	unsigned long		now = jiffies;
- 	cycles_t		cycles = random_get_entropy();
- 	__u32			c_high, j_high;
--	__u64			ip;
- 
- 	if (cycles == 0)
--		cycles = get_reg(fast_pool, regs);
-+		cycles = get_reg(fast_pool, NULL);
- 	c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0;
- 	j_high = (sizeof(now) > 4) ? now >> 32 : 0;
- 	fast_pool->pool[0] ^= cycles ^ j_high ^ irq;
- 	fast_pool->pool[1] ^= now ^ c_high;
--	ip = regs ? instruction_pointer(regs) : _RET_IP_;
-+	if (!ip)
-+		ip = _RET_IP_;
- 	fast_pool->pool[2] ^= ip;
- 	fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 :
--		get_reg(fast_pool, regs);
-+		get_reg(fast_pool, NULL);
- 
+@@ -1284,32 +1340,17 @@ void add_interrupt_randomness(int irq, int irq_flags)
  	fast_mix(fast_pool);
  	add_interrupt_bench(cycles);
-@@ -1507,9 +1506,8 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller,
- 	print_once = true;
- #endif
- 	if (__ratelimit(&unseeded_warning))
--		printk_deferred(KERN_NOTICE "random: %s called from %pS "
--				"with crng_init=%d\n", func_name, caller,
--				crng_init);
-+		pr_notice("random: %s called from %pS with crng_init=%d\n",
-+			  func_name, caller, crng_init);
+ 
+-	if (unlikely(crng_init == 0)) {
+-		if ((fast_pool->count >= 64) &&
+-		    crng_fast_load((char *) fast_pool->pool,
+-				   sizeof(fast_pool->pool))) {
+-			fast_pool->count = 0;
++	/*
++	 * On PREEMPT_RT the entropy can not be fed into the input_pool because
++	 * it needs to acquire sleeping locks with disabled interrupts.
++	 * This is deferred to the threaded handler.
++	 */
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
++		if (process_interrupt_randomness_pool(fast_pool)) {
+ 			fast_pool->last = now;
++			fast_pool->count = 0;
+ 		}
+-		return;
+ 	}
+-
+-	if ((fast_pool->count < 64) &&
+-	    !time_after(now, fast_pool->last + HZ))
+-		return;
+-
+-	r = &input_pool;
+-	if (!spin_trylock(&r->lock))
+-		return;
+-
+-	fast_pool->last = now;
+-	__mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool));
+-	spin_unlock(&r->lock);
+-
+-	fast_pool->count = 0;
+-
+-	/* award one bit for the contents of the fast pool */
+-	credit_entropy_bits(r, 1);
  }
+ EXPORT_SYMBOL_GPL(add_interrupt_randomness);
  
- /*
 diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
 index d3f2e5364c27..9c4a99757afd 100644
 --- a/drivers/char/tpm/tpm_tis.c
@@ -1924,29 +1663,6 @@ index d3f2e5364c27..9c4a99757afd 100644
  
  	return 0;
  }
-diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
-index 847f33ffc4ae..ae79c3300129 100644
---- a/drivers/firmware/efi/efi.c
-+++ b/drivers/firmware/efi/efi.c
-@@ -66,7 +66,7 @@ struct mm_struct efi_mm = {
- 
- struct workqueue_struct *efi_rts_wq;
- 
--static bool disable_runtime;
-+static bool disable_runtime = IS_ENABLED(CONFIG_PREEMPT_RT);
- static int __init setup_noefi(char *arg)
- {
- 	disable_runtime = true;
-@@ -97,6 +97,9 @@ static int __init parse_efi_cmdline(char *str)
- 	if (parse_option_str(str, "noruntime"))
- 		disable_runtime = true;
- 
-+	if (parse_option_str(str, "runtime"))
-+		disable_runtime = false;
-+
- 	if (parse_option_str(str, "nosoftreserve"))
- 		set_bit(EFI_MEM_NO_SOFT_RESERVE, &efi.flags);
- 
 diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c
 index 254e67141a77..7a39029b083f 100644
 --- a/drivers/gpu/drm/i915/display/intel_crtc.c
@@ -2015,33 +1731,33 @@ index 209cf265bf74..6e1b9068d944 100644
  }
  
 diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
-index c41098950746..601274ba86e4 100644
+index 246c37d72cd7..d8c74bbf9aae 100644
 --- a/drivers/gpu/drm/i915/gt/intel_context.h
 +++ b/drivers/gpu/drm/i915/gt/intel_context.h
-@@ -163,7 +163,8 @@ static inline void intel_context_enter(struct intel_context *ce)
+@@ -211,7 +211,8 @@ static inline void intel_context_enter(struct intel_context *ce)
  
  static inline void intel_context_mark_active(struct intel_context *ce)
  {
 -	lockdep_assert_held(&ce->timeline->mutex);
 +	lockdep_assert(lockdep_is_held(&ce->timeline->mutex) ||
-+		       test_bit(CONTEXT_IS_PARKED, &ce->flags));
++		       test_bit(CONTEXT_IS_PARKING, &ce->flags));
  	++ce->active_count;
  }
  
 diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
-index e54351a170e2..1022be795e68 100644
+index 9e0177dc5484..30cd81ad8911 100644
 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
 +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
-@@ -112,6 +112,7 @@ struct intel_context {
- #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
- #define CONTEXT_NOPREEMPT		8
+@@ -118,6 +118,7 @@ struct intel_context {
  #define CONTEXT_LRCA_DIRTY		9
-+#define CONTEXT_IS_PARKED		10
+ #define CONTEXT_GUC_INIT		10
+ #define CONTEXT_PERMA_PIN		11
++#define CONTEXT_IS_PARKING		12
  
  	struct {
  		u64 timeout_us;
 diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
-index 1f07ac4e0672..e84f03a276d1 100644
+index a1334b48dde7..a8a2ad44b7e3 100644
 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
 +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
 @@ -80,39 +80,6 @@ static int __engine_unpark(struct intel_wakeref *wf)
@@ -2091,30 +1807,30 @@ index 1f07ac4e0672..e84f03a276d1 100644
 -	unsigned long flags;
  	bool result = true;
  
- 	/* GPU is pointing to the void, as good as in the kernel context. */
-@@ -201,7 +167,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
+ 	/*
+@@ -214,7 +180,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
  	 * engine->wakeref.count, we may see the request completion and retire
  	 * it causing an underflow of the engine->wakeref.
  	 */
 -	flags = __timeline_mark_lock(ce);
-+	set_bit(CONTEXT_IS_PARKED, &ce->flags);
++	set_bit(CONTEXT_IS_PARKING, &ce->flags);
  	GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
  
  	rq = __i915_request_create(ce, GFP_NOWAIT);
-@@ -233,7 +199,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
+@@ -246,7 +212,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
  
  	result = false;
  out_unlock:
 -	__timeline_mark_unlock(ce, flags);
-+	clear_bit(CONTEXT_IS_PARKED, &ce->flags);
++	clear_bit(CONTEXT_IS_PARKING, &ce->flags);
  	return result;
  }
  
 diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
-index cafb0608ffb4..07156996fc82 100644
+index bedb80057046..1dbcac05f44e 100644
 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
 +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
-@@ -1283,7 +1283,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
+@@ -1284,7 +1284,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
  	 * and context switches) submission.
  	 */
  
@@ -2123,7 +1839,7 @@ index cafb0608ffb4..07156996fc82 100644
  
  	/*
  	 * If the queue is higher priority than the last
-@@ -1383,7 +1383,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
+@@ -1384,7 +1384,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
  				 * Even if ELSP[1] is occupied and not worthy
  				 * of timeslices, our queue might be.
  				 */
@@ -2132,7 +1848,7 @@ index cafb0608ffb4..07156996fc82 100644
  				return;
  			}
  		}
-@@ -1409,7 +1409,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
+@@ -1410,7 +1410,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
  
  		if (last && !can_merge_rq(last, rq)) {
  			spin_unlock(&ve->base.sched_engine->lock);
@@ -2141,7 +1857,7 @@ index cafb0608ffb4..07156996fc82 100644
  			return; /* leave this for another sibling */
  		}
  
-@@ -1571,7 +1571,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
+@@ -1572,7 +1572,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
  	 */
  	sched_engine->queue_priority_hint = queue_prio(sched_engine);
  	i915_sched_engine_reset_on_empty(sched_engine);
@@ -2150,7 +1866,7 @@ index cafb0608ffb4..07156996fc82 100644
  
  	/*
  	 * We can skip poking the HW if we ended up with exactly the same set
-@@ -1597,13 +1597,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
+@@ -1598,13 +1598,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
  	}
  }
  
@@ -2164,7 +1880,7 @@ index cafb0608ffb4..07156996fc82 100644
  static void clear_ports(struct i915_request **ports, int count)
  {
  	memset_p((void **)ports, NULL, count);
-@@ -2423,7 +2416,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
+@@ -2424,7 +2417,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
  	}
  
  	if (!engine->execlists.pending[0]) {
@@ -2174,10 +1890,10 @@ index cafb0608ffb4..07156996fc82 100644
  	}
  
 diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
-index 9bc4f4a8e12e..547347241a47 100644
+index 77680bca46ee..be8faaaa6022 100644
 --- a/drivers/gpu/drm/i915/i915_irq.c
 +++ b/drivers/gpu/drm/i915/i915_irq.c
-@@ -886,7 +886,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
+@@ -916,7 +916,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
  	 */
  	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
  
@@ -2187,7 +1903,7 @@ index 9bc4f4a8e12e..547347241a47 100644
  
  	/* Get optional system timestamp before query. */
  	if (stime)
-@@ -950,7 +951,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
+@@ -980,7 +981,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
  	if (etime)
  		*etime = ktime_get();
  
@@ -2198,10 +1914,10 @@ index 9bc4f4a8e12e..547347241a47 100644
  	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
  
 diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
-index 79da5eca60af..b9dd6100c6d1 100644
+index 89cccefeea63..4665a4d4924e 100644
 --- a/drivers/gpu/drm/i915/i915_request.c
 +++ b/drivers/gpu/drm/i915/i915_request.c
-@@ -559,7 +559,6 @@ bool __i915_request_submit(struct i915_request *request)
+@@ -560,7 +560,6 @@ bool __i915_request_submit(struct i915_request *request)
  
  	RQ_TRACE(request, "\n");
  
@@ -2209,7 +1925,7 @@ index 79da5eca60af..b9dd6100c6d1 100644
  	lockdep_assert_held(&engine->sched_engine->lock);
  
  	/*
-@@ -668,7 +667,6 @@ void __i915_request_unsubmit(struct i915_request *request)
+@@ -669,7 +668,6 @@ void __i915_request_unsubmit(struct i915_request *request)
  	 */
  	RQ_TRACE(request, "\n");
  
@@ -2218,21 +1934,21 @@ index 79da5eca60af..b9dd6100c6d1 100644
  
  	/*
 diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
-index 1bc1349ba3c2..a2f713b4ac2f 100644
+index dc359242d1ae..b7fe67405fd3 100644
 --- a/drivers/gpu/drm/i915/i915_request.h
 +++ b/drivers/gpu/drm/i915/i915_request.h
-@@ -609,7 +609,8 @@ i915_request_timeline(const struct i915_request *rq)
+@@ -642,7 +642,8 @@ i915_request_timeline(const struct i915_request *rq)
  {
  	/* Valid only while the request is being constructed (or retired). */
  	return rcu_dereference_protected(rq->timeline,
 -					 lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex));
 +					 lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex) ||
-+					 test_bit(CONTEXT_IS_PARKED, &rq->context->flags));
++					 test_bit(CONTEXT_IS_PARKING, &rq->context->flags));
  }
  
  static inline struct i915_gem_context *
 diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
-index 63fec1c3c132..f345a0f12bf6 100644
+index 8104981a6604..89a4089bc4ba 100644
 --- a/drivers/gpu/drm/i915/i915_trace.h
 +++ b/drivers/gpu/drm/i915/i915_trace.h
 @@ -2,6 +2,10 @@
@@ -2256,10 +1972,10 @@ index 63fec1c3c132..f345a0f12bf6 100644
  	     TP_PROTO(struct i915_request *rq),
  	     TP_ARGS(rq)
 diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
-index 5259edacde38..b36b27c09049 100644
+index 7a5925072466..b7b56fb1e2fc 100644
 --- a/drivers/gpu/drm/i915/i915_utils.h
 +++ b/drivers/gpu/drm/i915/i915_utils.h
-@@ -343,7 +343,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
+@@ -344,7 +344,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
  #define wait_for(COND, MS)		_wait_for((COND), (MS) * 1000, 10, 1000)
  
  /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
@@ -2268,62 +1984,56 @@ index 5259edacde38..b36b27c09049 100644
  # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
  #else
  # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
-diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
-index d030577ad6a2..ef1db3367df7 100644
---- a/drivers/hv/hyperv_vmbus.h
-+++ b/drivers/hv/hyperv_vmbus.h
-@@ -19,6 +19,7 @@
- #include <linux/atomic.h>
- #include <linux/hyperv.h>
- #include <linux/interrupt.h>
-+#include <linux/irq.h>
- 
- #include "hv_trace.h"
- 
 diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
-index 392c1ac4f819..c5e9725fb5ff 100644
+index 392c1ac4f819..7ae04ccb1043 100644
 --- a/drivers/hv/vmbus_drv.c
 +++ b/drivers/hv/vmbus_drv.c
-@@ -22,6 +22,7 @@
- #include <linux/clockchips.h>
- #include <linux/cpu.h>
- #include <linux/sched/task_stack.h>
-+#include <linux/irq.h>
- 
- #include <linux/delay.h>
- #include <linux/notifier.h>
-@@ -1337,6 +1338,8 @@ static void vmbus_isr(void)
- 	void *page_addr = hv_cpu->synic_event_page;
- 	struct hv_message *msg;
- 	union hv_synic_event_flags *event;
-+	struct pt_regs *regs = get_irq_regs();
-+	u64 ip = regs ? instruction_pointer(regs) : 0;
- 	bool handled = false;
- 
- 	if (unlikely(page_addr == NULL))
-@@ -1381,7 +1384,7 @@ static void vmbus_isr(void)
+@@ -1381,7 +1381,7 @@ static void vmbus_isr(void)
  			tasklet_schedule(&hv_cpu->msg_dpc);
  	}
  
 -	add_interrupt_randomness(vmbus_interrupt, 0);
-+	add_interrupt_randomness(vmbus_interrupt, 0, ip);
++	add_interrupt_randomness(vmbus_interrupt);
  }
  
  static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
-diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig
-index 1f1d57288085..dc6816d36d06 100644
---- a/drivers/leds/trigger/Kconfig
-+++ b/drivers/leds/trigger/Kconfig
-@@ -64,6 +64,7 @@ config LEDS_TRIGGER_BACKLIGHT
- 
- config LEDS_TRIGGER_CPU
- 	bool "LED CPU Trigger"
-+	depends on !PREEMPT_RT
- 	help
- 	  This allows LEDs to be controlled by active CPUs. This shows
- 	  the active CPUs across an array of LEDs so you can see which
+diff --git a/drivers/i2c/busses/i2c-cht-wc.c b/drivers/i2c/busses/i2c-cht-wc.c
+index 1cf68f85b2e1..8ccf0c928bb4 100644
+--- a/drivers/i2c/busses/i2c-cht-wc.c
++++ b/drivers/i2c/busses/i2c-cht-wc.c
+@@ -99,15 +99,8 @@ static irqreturn_t cht_wc_i2c_adap_thread_handler(int id, void *data)
+ 	 * interrupt handler as well, so running the client irq handler from
+ 	 * this thread will cause things to lock up.
+ 	 */
+-	if (reg & CHT_WC_EXTCHGRIRQ_CLIENT_IRQ) {
+-		/*
+-		 * generic_handle_irq expects local IRQs to be disabled
+-		 * as normally it is called from interrupt context.
+-		 */
+-		local_irq_disable();
+-		generic_handle_irq(adap->client_irq);
+-		local_irq_enable();
+-	}
++	if (reg & CHT_WC_EXTCHGRIRQ_CLIENT_IRQ)
++		generic_handle_irq_safe(adap->client_irq);
+ 
+ 	return IRQ_HANDLED;
+ }
+diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
+index 73253e667de1..698f3e928fcf 100644
+--- a/drivers/i2c/i2c-core-base.c
++++ b/drivers/i2c/i2c-core-base.c
+@@ -1423,7 +1423,7 @@ int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr)
+ 	if (irq <= 0)
+ 		return -ENXIO;
+ 
+-	generic_handle_irq(irq);
++	generic_handle_irq_safe(irq);
+ 
+ 	return 0;
+ }
 diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
-index 02ed53b20654..e459744c7a0d 100644
+index 9c1a5877cf9f..e748c0e33349 100644
 --- a/drivers/md/raid5.c
 +++ b/drivers/md/raid5.c
 @@ -2217,8 +2217,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
@@ -2367,21 +2077,138 @@ index 5c05acf20e1f..665fe138ab4f 100644
  		struct page	*spare_page; /* Used when checking P/Q in raid6 */
  		void		*scribble;  /* space for constructing buffer
  					     * lists and performing address
-diff --git a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
-index 2473fb5f75e5..2a5cc64227e9 100644
---- a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
-+++ b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
-@@ -458,7 +458,7 @@ nfp_abm_qdisc_graft(struct nfp_abm_link *alink, u32 handle, u32 child_handle,
- static void
- nfp_abm_stats_calculate(struct nfp_alink_stats *new,
- 			struct nfp_alink_stats *old,
--			struct gnet_stats_basic_packed *bstats,
-+			struct gnet_stats_basic_sync *bstats,
- 			struct gnet_stats_queue *qstats)
+diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
+index 70fa18b04ad2..b14d3f98e1eb 100644
+--- a/drivers/mfd/ezx-pcap.c
++++ b/drivers/mfd/ezx-pcap.c
+@@ -193,13 +193,11 @@ static void pcap_isr_work(struct work_struct *work)
+ 		ezx_pcap_write(pcap, PCAP_REG_MSR, isr | msr);
+ 		ezx_pcap_write(pcap, PCAP_REG_ISR, isr);
+ 
+-		local_irq_disable();
+ 		service = isr & ~msr;
+ 		for (irq = pcap->irq_base; service; service >>= 1, irq++) {
+ 			if (service & 1)
+-				generic_handle_irq(irq);
++				generic_handle_irq_safe(irq);
+ 		}
+-		local_irq_enable();
+ 		ezx_pcap_write(pcap, PCAP_REG_MSR, pcap->msr);
+ 	} while (gpio_get_value(pdata->gpio));
+ }
+diff --git a/drivers/misc/hi6421v600-irq.c b/drivers/misc/hi6421v600-irq.c
+index 1c763796cf1f..caa3de37698b 100644
+--- a/drivers/misc/hi6421v600-irq.c
++++ b/drivers/misc/hi6421v600-irq.c
+@@ -117,8 +117,8 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv)
+ 			 * If both powerkey down and up IRQs are received,
+ 			 * handle them at the right order
+ 			 */
+-			generic_handle_irq(priv->irqs[POWERKEY_DOWN]);
+-			generic_handle_irq(priv->irqs[POWERKEY_UP]);
++			generic_handle_irq_safe(priv->irqs[POWERKEY_DOWN]);
++			generic_handle_irq_safe(priv->irqs[POWERKEY_UP]);
+ 			pending &= ~HISI_IRQ_POWERKEY_UP_DOWN;
+ 		}
+ 
+@@ -126,7 +126,7 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv)
+ 			continue;
+ 
+ 		for_each_set_bit(offset, &pending, BITS_PER_BYTE) {
+-			generic_handle_irq(priv->irqs[offset + i * BITS_PER_BYTE]);
++			generic_handle_irq_safe(priv->irqs[offset + i * BITS_PER_BYTE]);
+ 		}
+ 	}
+ 
+diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
+index 90e1bcd03b46..52309b84be88 100644
+--- a/drivers/mmc/core/block.c
++++ b/drivers/mmc/core/block.c
+@@ -2051,7 +2051,8 @@ static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, struct request *req)
+ 		mmc_put_card(mq->card, &mq->ctx);
+ }
+ 
+-static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req)
++static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req,
++				bool can_sleep)
  {
- 	_bstats_update(bstats, new->tx_bytes - old->tx_bytes,
+ 	struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req);
+ 	struct mmc_request *mrq = &mqrq->brq.mrq;
+@@ -2063,10 +2064,14 @@ static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req)
+ 	 * Block layer timeouts race with completions which means the normal
+ 	 * completion path cannot be used during recovery.
+ 	 */
+-	if (mq->in_recovery)
++	if (mq->in_recovery) {
+ 		mmc_blk_mq_complete_rq(mq, req);
+-	else if (likely(!blk_should_fake_timeout(req->q)))
+-		blk_mq_complete_request(req);
++	} else if (likely(!blk_should_fake_timeout(req->q))) {
++		if (can_sleep)
++			blk_mq_complete_request_direct(req, mmc_blk_mq_complete);
++		else
++			blk_mq_complete_request(req);
++	}
+ 
+ 	mmc_blk_mq_dec_in_flight(mq, req);
+ }
+@@ -2087,7 +2092,7 @@ void mmc_blk_mq_recovery(struct mmc_queue *mq)
+ 
+ 	mmc_blk_urgent_bkops(mq, mqrq);
+ 
+-	mmc_blk_mq_post_req(mq, req);
++	mmc_blk_mq_post_req(mq, req, true);
+ }
+ 
+ static void mmc_blk_mq_complete_prev_req(struct mmc_queue *mq,
+@@ -2106,7 +2111,7 @@ static void mmc_blk_mq_complete_prev_req(struct mmc_queue *mq,
+ 	if (prev_req)
+ 		*prev_req = mq->complete_req;
+ 	else
+-		mmc_blk_mq_post_req(mq, mq->complete_req);
++		mmc_blk_mq_post_req(mq, mq->complete_req, true);
+ 
+ 	mq->complete_req = NULL;
+ 
+@@ -2178,7 +2183,8 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq)
+ 	mq->rw_wait = false;
+ 	wake_up(&mq->wait);
+ 
+-	mmc_blk_mq_post_req(mq, req);
++	/* context unknown */
++	mmc_blk_mq_post_req(mq, req, false);
+ }
+ 
+ static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err)
+@@ -2238,7 +2244,7 @@ static int mmc_blk_mq_issue_rw_rq(struct mmc_queue *mq,
+ 	err = mmc_start_request(host, &mqrq->brq.mrq);
+ 
+ 	if (prev_req)
+-		mmc_blk_mq_post_req(mq, prev_req);
++		mmc_blk_mq_post_req(mq, prev_req, true);
+ 
+ 	if (err)
+ 		mq->rw_wait = false;
+diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
+index 075f8abde5cd..6cf28f688190 100644
+--- a/drivers/net/usb/lan78xx.c
++++ b/drivers/net/usb/lan78xx.c
+@@ -1367,11 +1367,8 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb)
+ 		netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata);
+ 		lan78xx_defer_kevent(dev, EVENT_LINK_RESET);
+ 
+-		if (dev->domain_data.phyirq > 0) {
+-			local_irq_disable();
+-			generic_handle_irq(dev->domain_data.phyirq);
+-			local_irq_enable();
+-		}
++		if (dev->domain_data.phyirq > 0)
++			generic_handle_irq_safe(dev->domain_data.phyirq);
+ 	} else {
+ 		netdev_warn(dev->net,
+ 			    "unexpected interrupt: 0x%08x\n", intdata);
 diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
-index 5ae6c207d3ac..660908027dc5 100644
+index 6415f88738ad..556284ea978b 100644
 --- a/drivers/scsi/fcoe/fcoe.c
 +++ b/drivers/scsi/fcoe/fcoe.c
 @@ -1450,11 +1450,11 @@ static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev,
@@ -2477,8 +2304,24 @@ index 841000445b9a..26d661ddc950 100644
  
  	/* peek cache of free slot */
  	if (pool->left != FC_XID_UNKNOWN) {
+diff --git a/drivers/staging/greybus/gpio.c b/drivers/staging/greybus/gpio.c
+index 7e6347fe93f9..8a7cf1d0e968 100644
+--- a/drivers/staging/greybus/gpio.c
++++ b/drivers/staging/greybus/gpio.c
+@@ -391,10 +391,7 @@ static int gb_gpio_request_handler(struct gb_operation *op)
+ 		return -EINVAL;
+ 	}
+ 
+-	local_irq_disable();
+-	ret = generic_handle_irq(irq);
+-	local_irq_enable();
+-
++	ret = generic_handle_irq_safe(irq);
+ 	if (ret)
+ 		dev_err(dev, "failed to invoke irq handler\n");
+ 
 diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
-index 6473361525d1..2321d02e9b7a 100644
+index 6473361525d1..7b1a88934d6f 100644
 --- a/drivers/tty/serial/8250/8250.h
 +++ b/drivers/tty/serial/8250/8250.h
 @@ -132,12 +132,55 @@ static inline void serial_dl_write(struct uart_8250_port *up, int value)
@@ -2495,12 +2338,12 @@ index 6473361525d1..2321d02e9b7a 100644
 +	is_console = uart_console(port);
 +
 +	if (is_console)
-+		console_atomic_lock(flags);
++		printk_cpu_sync_get_irqsave(flags);
 +
 +	serial_out(up, UART_IER, ier);
 +
 +	if (is_console)
-+		console_atomic_unlock(flags);
++		printk_cpu_sync_put_irqrestore(flags);
 +}
 +
 +static inline unsigned char serial8250_clear_IER(struct uart_8250_port *up)
@@ -2517,13 +2360,13 @@ index 6473361525d1..2321d02e9b7a 100644
 +		clearval = UART_IER_UUE;
 +
 +	if (is_console)
-+		console_atomic_lock(flags);
++		printk_cpu_sync_get_irqsave(flags);
 +
 +	prior = serial_port_in(port, UART_IER);
 +	serial_port_out(port, UART_IER, clearval);
 +
 +	if (is_console)
-+		console_atomic_unlock(flags);
++		printk_cpu_sync_put_irqrestore(flags);
 +
 +	return prior;
 +}
@@ -2597,10 +2440,10 @@ index 1ce193daea7f..fad00c0414e3 100644
  	.device		= uart_console_device,
  	.setup		= univ8250_console_setup,
 diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c
-index fc65a2293ce9..19a92530040f 100644
+index 9c01c531349d..d9b651290e1c 100644
 --- a/drivers/tty/serial/8250/8250_fsl.c
 +++ b/drivers/tty/serial/8250/8250_fsl.c
-@@ -60,9 +60,18 @@ int fsl8250_handle_irq(struct uart_port *port)
+@@ -56,9 +56,18 @@ int fsl8250_handle_irq(struct uart_port *port)
  
  	/* Stop processing interrupts on input overrun */
  	if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) {
@@ -2611,16 +2454,16 @@ index fc65a2293ce9..19a92530040f 100644
 +		is_console = uart_console(port);
 +
 +		if (is_console)
-+			console_atomic_lock(flags);
++			printk_cpu_sync_get_irqsave(flags);
  		up->ier = port->serial_in(port, UART_IER);
 +		if (is_console)
-+			console_atomic_unlock(flags);
++			printk_cpu_sync_put_irqrestore(flags);
 +
  		if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) {
  			port->ops->stop_rx(port);
  		} else {
 diff --git a/drivers/tty/serial/8250/8250_ingenic.c b/drivers/tty/serial/8250/8250_ingenic.c
-index 65402d05eff9..8122645ab05c 100644
+index 65402d05eff9..061d8e4072c6 100644
 --- a/drivers/tty/serial/8250/8250_ingenic.c
 +++ b/drivers/tty/serial/8250/8250_ingenic.c
 @@ -146,6 +146,8 @@ OF_EARLYCON_DECLARE(x1000_uart, "ingenic,x1000-uart",
@@ -2638,15 +2481,15 @@ index 65402d05eff9..8122645ab05c 100644
  		 */
 +		is_console = uart_console(p);
 +		if (is_console)
-+			console_atomic_lock(flags);
++			printk_cpu_sync_get_irqsave(flags);
  		ier = p->serial_in(p, UART_IER);
 +		if (is_console)
-+			console_atomic_unlock(flags);
++			printk_cpu_sync_put_irqrestore(flags);
  
  		if (ier & UART_IER_MSI)
  			value |= UART_MCR_MDCE | UART_MCR_FCM;
 diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c
-index fb65dc601b23..9af18b5d8296 100644
+index fb65dc601b23..e5032e5abd8e 100644
 --- a/drivers/tty/serial/8250/8250_mtk.c
 +++ b/drivers/tty/serial/8250/8250_mtk.c
 @@ -218,12 +218,37 @@ static void mtk8250_shutdown(struct uart_port *port)
@@ -2662,13 +2505,13 @@ index fb65dc601b23..9af18b5d8296 100644
 +	is_console = uart_console(port);
 +
 +	if (is_console)
-+		console_atomic_lock(flags);
++		printk_cpu_sync_get_irqsave(flags);
 +
 +	ier = serial_in(up, UART_IER);
 +	serial_out(up, UART_IER, ier & (~mask));
 +
 +	if (is_console)
-+		console_atomic_unlock(flags);
++		printk_cpu_sync_put_irqrestore(flags);
  }
  
  static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask)
@@ -2679,18 +2522,18 @@ index fb65dc601b23..9af18b5d8296 100644
 +	unsigned int ier;
 +
 +	if (uart_console(port))
-+		console_atomic_lock(flags);
++		printk_cpu_sync_get_irqsave(flags);
 +
 +	ier = serial_in(up, UART_IER);
 +	serial_out(up, UART_IER, ier | mask);
 +
 +	if (uart_console(port))
-+		console_atomic_unlock(flags);
++		printk_cpu_sync_put_irqrestore(flags);
  }
  
  static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
 diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
-index ec88b706e882..7774c63ce53d 100644
+index 46e2079ad1aa..49883a0a58a5 100644
 --- a/drivers/tty/serial/8250/8250_port.c
 +++ b/drivers/tty/serial/8250/8250_port.c
 @@ -762,7 +762,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
@@ -2801,9 +2644,9 @@ index ec88b706e882..7774c63ce53d 100644
 +
 +	wait_for_xmitr(up, UART_LSR_THRE);
 +
-+	console_atomic_lock(flags);
++	printk_cpu_sync_get_irqsave(flags);
 +	serial8250_console_putchar_locked(port, ch);
-+	console_atomic_unlock(flags);
++	printk_cpu_sync_put_irqrestore(flags);
 +}
 +
  /*
@@ -2820,7 +2663,7 @@ index ec88b706e882..7774c63ce53d 100644
 +	unsigned long flags;
 +	unsigned int ier;
 +
-+	console_atomic_lock(flags);
++	printk_cpu_sync_get_irqsave(flags);
 +
 +	touch_nmi_watchdog();
 +
@@ -2836,7 +2679,7 @@ index ec88b706e882..7774c63ce53d 100644
 +	wait_for_xmitr(up, BOTH_EMPTY);
 +	serial8250_set_IER(up, ier);
 +
-+	console_atomic_unlock(flags);
++	printk_cpu_sync_put_irqrestore(flags);
 +}
 +
  /*
@@ -2916,6 +2759,18 @@ index ec88b706e882..7774c63ce53d 100644
  	if (options)
  		uart_parse_options(options, &baud, &parity, &bits, &flow);
  	else if (probe)
+diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
+index 8cd11aa63ed5..9b6695bdafc9 100644
+--- a/drivers/tty/serial/8250/Kconfig
++++ b/drivers/tty/serial/8250/Kconfig
+@@ -9,6 +9,7 @@ config SERIAL_8250
+ 	depends on !S390
+ 	select SERIAL_CORE
+ 	select SERIAL_MCTRL_GPIO if GPIOLIB
++	select HAVE_ATOMIC_CONSOLE
+ 	help
+ 	  This selects whether you want to include the driver for the standard
+ 	  serial ports.  The standard answer is Y.  People who might say N
 diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
 index 52518a606c06..1ca270b9857a 100644
 --- a/drivers/tty/serial/amba-pl011.c
@@ -2990,6 +2845,19 @@ index 0862941862c8..10970632f0e4 100644
  }
  
  static int __init
+diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
+index 7359c3e80d63..ab4712cc9327 100644
+--- a/drivers/tty/vt/vt.c
++++ b/drivers/tty/vt/vt.c
+@@ -3161,7 +3161,7 @@ static struct console vt_console_driver = {
+ 	.write		= vt_console_print,
+ 	.device		= vt_console_device,
+ 	.unblank	= unblank_screen,
+-	.flags		= CON_PRINTBUFFER,
++	.flags		= CON_PRINTBUFFER|CON_MIGHT_SLEEP,
+ 	.index		= -1,
+ };
+ #endif
 diff --git a/drivers/virt/acrn/irqfd.c b/drivers/virt/acrn/irqfd.c
 index df5184979b28..d4ad211dce7a 100644
 --- a/drivers/virt/acrn/irqfd.c
@@ -3192,7 +3060,7 @@ index 6a675652129b..7a972d144b54 100644
  	return fscache_object_congested();
  }
 diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
-index bc267832310c..3176913fae6c 100644
+index b4e565711045..5ef0c106fb9d 100644
 --- a/fs/fuse/readdir.c
 +++ b/fs/fuse/readdir.c
 @@ -158,7 +158,7 @@ static int fuse_direntplus_link(struct file *file,
@@ -3205,7 +3073,7 @@ index bc267832310c..3176913fae6c 100644
  	if (!o->nodeid) {
  		/*
 diff --git a/fs/namei.c b/fs/namei.c
-index 1946d9667790..d89890a17f1b 100644
+index 1f9d2187c765..49552c066ce5 100644
 --- a/fs/namei.c
 +++ b/fs/namei.c
 @@ -1633,7 +1633,7 @@ static struct dentry *__lookup_slow(const struct qstr *name,
@@ -3217,7 +3085,7 @@ index 1946d9667790..d89890a17f1b 100644
  
  	/* Don't go there if it's already dead */
  	if (unlikely(IS_DEADDIR(inode)))
-@@ -3194,7 +3194,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
+@@ -3192,7 +3192,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
  	struct dentry *dentry;
  	int error, create_error = 0;
  	umode_t mode = op->mode;
@@ -3227,7 +3095,7 @@ index 1946d9667790..d89890a17f1b 100644
  	if (unlikely(IS_DEADDIR(dir_inode)))
  		return ERR_PTR(-ENOENT);
 diff --git a/fs/namespace.c b/fs/namespace.c
-index db9936562011..8a9c40376d94 100644
+index d3d750635610..92bbf3d86e00 100644
 --- a/fs/namespace.c
 +++ b/fs/namespace.c
 @@ -343,8 +343,24 @@ int __mnt_want_write(struct vfsmount *m)
@@ -3258,10 +3126,10 @@ index db9936562011..8a9c40376d94 100644
  	 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
  	 * be set to match its requirements. So we must not load that until
 diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
-index 5b68c44848ca..85a1006e0a85 100644
+index 731d31015b6a..d7c2571391b7 100644
 --- a/fs/nfs/dir.c
 +++ b/fs/nfs/dir.c
-@@ -636,7 +636,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
+@@ -638,7 +638,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
  		unsigned long dir_verifier)
  {
  	struct qstr filename = QSTR_INIT(entry->name, entry->len);
@@ -3270,7 +3138,7 @@ index 5b68c44848ca..85a1006e0a85 100644
  	struct dentry *dentry;
  	struct dentry *alias;
  	struct inode *inode;
-@@ -1875,7 +1875,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
+@@ -1860,7 +1860,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
  		    struct file *file, unsigned open_flags,
  		    umode_t mode)
  {
@@ -3302,7 +3170,7 @@ index 5fa11e1aca4c..984f26eb888c 100644
  	status = -EBUSY;
  	spin_lock(&dentry->d_lock);
 diff --git a/fs/proc/base.c b/fs/proc/base.c
-index 93f2479ef319..3e5c11507c91 100644
+index 24fd5e986cb7..a9a3dd989e3d 100644
 --- a/fs/proc/base.c
 +++ b/fs/proc/base.c
 @@ -96,6 +96,7 @@
@@ -3313,7 +3181,7 @@ index 93f2479ef319..3e5c11507c91 100644
  #include <linux/cn_proc.h>
  #include <trace/events/oom.h>
  #include "internal.h"
-@@ -2043,7 +2044,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
+@@ -2045,7 +2046,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
  
  	child = d_hash_and_lookup(dir, &qname);
  	if (!child) {
@@ -3348,54 +3216,102 @@ index eceeecf6a5bd..d3e2d81656e0 100644
  void do_softirq_own_stack(void);
  #else
  static inline void do_softirq_own_stack(void)
+diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
+index 2949d9ac7484..131b45dfec67 100644
+--- a/include/linux/blk-mq.h
++++ b/include/linux/blk-mq.h
+@@ -752,6 +752,17 @@ static inline void blk_mq_set_request_complete(struct request *rq)
+ 	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
+ }
+ 
++/*
++ * Complete the request directly instead of deferring it to softirq or
++ * completing it another CPU. Useful in preemptible instead of an interrupt.
++ */
++static inline void blk_mq_complete_request_direct(struct request *rq,
++		   void (*complete)(struct request *rq))
++{
++	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
++	complete(rq);
++}
++
+ void blk_mq_start_request(struct request *rq);
+ void blk_mq_end_request(struct request *rq, blk_status_t error);
+ void __blk_mq_end_request(struct request *rq, blk_status_t error);
 diff --git a/include/linux/console.h b/include/linux/console.h
-index a97f277cfdfa..487a4266ab2c 100644
+index a97f277cfdfa..15432b6e11a4 100644
 --- a/include/linux/console.h
 +++ b/include/linux/console.h
-@@ -16,6 +16,13 @@
+@@ -16,6 +16,7 @@
  
  #include <linux/atomic.h>
  #include <linux/types.h>
-+#include <linux/printk.h>
-+#include <linux/seqlock.h>
-+
-+struct latched_seq {
-+	seqcount_latch_t	latch;
-+	u64			val[2];
-+};
++#include <linux/mutex.h>
  
  struct vc_data;
  struct console_font_op;
-@@ -136,10 +143,12 @@ static inline int con_debug_leave(void)
- #define CON_ANYTIME	(16) /* Safe to call when cpu is offline */
+@@ -133,13 +134,25 @@ static inline int con_debug_leave(void)
+ #define CON_CONSDEV	(2) /* Preferred console, /dev/console */
+ #define CON_ENABLED	(4)
+ #define CON_BOOT	(8)
+-#define CON_ANYTIME	(16) /* Safe to call when cpu is offline */
++#define CON_ANYTIME	(16) /* Safe to call before per-cpu resources ready */
  #define CON_BRL		(32) /* Used for a braille device */
  #define CON_EXTENDED	(64) /* Use the extended output format a la /dev/kmsg */
-+#define CON_HANDOVER	(128) /* Device was previously a boot console. */
++#define CON_PAUSED	(128) /* Sleep while console is locked */
++#define CON_MIGHT_SLEEP	(256) /* Can only be called from sleepable context */
++
++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
++struct console_atomic_data {
++	u64	seq;
++	char	*text;
++	char	*ext_text;
++	char	*dropped_text;
++};
++#endif
  
  struct console {
  	char	name[16];
  	void	(*write)(struct console *, const char *, unsigned);
-+	void	(*write_atomic)(struct console *co, const char *s, unsigned int count);
++	void	(*write_atomic)(struct console *, const char *, unsigned);
  	int	(*read)(struct console *, char *, unsigned);
  	struct tty_driver *(*device)(struct console *, int *);
  	void	(*unblank)(void);
-@@ -149,6 +158,16 @@ struct console {
- 	short	flags;
- 	short	index;
+@@ -151,6 +164,26 @@ struct console {
  	int	cflag;
-+#ifdef CONFIG_PRINTK
-+	char	sync_buf[CONSOLE_LOG_MAX];
-+	struct latched_seq printk_seq;
-+	struct latched_seq printk_sync_seq;
-+#ifdef CONFIG_HAVE_NMI
-+	struct latched_seq printk_sync_nmi_seq;
-+#endif
-+#endif /* CONFIG_PRINTK */
-+
-+	struct task_struct *thread;
  	uint	ispeed;
  	uint	ospeed;
++	u64	seq;
++	atomic_long_t dropped;
++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
++	struct console_atomic_data *atomic_data;
++#endif
++	struct task_struct *thread;
++
++	/*
++	 * The per-console lock is used by printing kthreads to synchronize
++	 * this console with callers of console_lock(). This is necessary in
++	 * order to allow printing kthreads to run in parallel to each other,
++	 * while each safely accessing their own @flags and synchronizing
++	 * against direct printing via console_lock/console_unlock.
++	 *
++	 * Note: For synchronizing against direct printing via
++	 *       console_trylock/console_unlock, see the static global
++	 *       variable @console_lock_count.
++	 */
++	struct mutex lock;
++
  	void	*data;
+ 	struct	 console *next;
+ };
+@@ -165,6 +198,7 @@ extern int console_set_on_cmdline;
+ extern struct console *early_console;
+ 
+ enum con_flush_mode {
++	CONSOLE_ATOMIC_FLUSH_PENDING,
+ 	CONSOLE_FLUSH_PENDING,
+ 	CONSOLE_REPLAY_ALL,
+ };
 diff --git a/include/linux/dcache.h b/include/linux/dcache.h
 index 9e23d33bb6f1..9f89d4887e35 100644
 --- a/include/linux/dcache.h
@@ -3439,56 +3355,45 @@ index 2e2b8d6140ed..71064a2c2caf 100644
  	 ARCH_EXIT_TO_USER_MODE_WORK)
  
  /**
-diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
-index ec2a47a81e42..8cd11a223260 100644
---- a/include/linux/irq_work.h
-+++ b/include/linux/irq_work.h
-@@ -3,6 +3,7 @@
- #define _LINUX_IRQ_WORK_H
- 
- #include <linux/smp_types.h>
-+#include <linux/rcuwait.h>
- 
- /*
-  * An entry can be in one of four states:
-@@ -16,11 +17,13 @@
- struct irq_work {
- 	struct __call_single_node node;
- 	void (*func)(struct irq_work *);
-+	struct rcuwait irqwait;
- };
- 
- #define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){	\
- 	.node = { .u_flags = (_flags), },			\
- 	.func = (_func),					\
-+	.irqwait = __RCUWAIT_INITIALIZER(irqwait),		\
- }
+diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
+index 1f22a30c0963..9c35024be942 100644
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -554,6 +554,22 @@ extern void __raise_softirq_irqoff(unsigned int nr);
+ extern void raise_softirq_irqoff(unsigned int nr);
+ extern void raise_softirq(unsigned int nr);
  
- #define IRQ_WORK_INIT(_func) __IRQ_WORK_INIT(_func, 0)
-@@ -46,6 +49,11 @@ static inline bool irq_work_is_busy(struct irq_work *work)
- 	return atomic_read(&work->node.a_flags) & IRQ_WORK_BUSY;
- }
- 
-+static inline bool irq_work_is_hard(struct irq_work *work)
++#ifdef CONFIG_PREEMPT_RT
++extern void raise_timer_softirq(void);
++extern void raise_hrtimer_softirq(void);
++
++#else
++static inline void raise_timer_softirq(void)
 +{
-+	return atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ;
++	raise_softirq(TIMER_SOFTIRQ);
 +}
 +
- bool irq_work_queue(struct irq_work *work);
- bool irq_work_queue_on(struct irq_work *work, int cpu);
++static inline void raise_hrtimer_softirq(void)
++{
++	raise_softirq_irqoff(HRTIMER_SOFTIRQ);
++}
++#endif
++
+ DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
  
+ static inline struct task_struct *this_cpu_ksoftirqd(void)
 diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
-index 59aea39785bf..ab70314af3d5 100644
+index 93d270ca0c56..a77584593f7d 100644
 --- a/include/linux/irqdesc.h
 +++ b/include/linux/irqdesc.h
-@@ -68,6 +68,7 @@ struct irq_desc {
- 	unsigned int		irqs_unhandled;
- 	atomic_t		threads_handled;
- 	int			threads_handled_last;
-+	u64			random_ip;
- 	raw_spinlock_t		lock;
- 	struct cpumask		*percpu_enabled;
- 	const struct cpumask	*percpu_affinity;
+@@ -160,6 +160,7 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc)
+ 
+ int handle_irq_desc(struct irq_desc *desc);
+ int generic_handle_irq(unsigned int irq);
++int generic_handle_irq_safe(unsigned int irq);
+ 
+ #ifdef CONFIG_IRQ_DOMAIN
+ /*
 diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
 index 600c10da321a..4b140938b03e 100644
 --- a/include/linux/irqflags.h
@@ -3530,125 +3435,52 @@ index 600c10da321a..4b140938b03e 100644
  #if defined(CONFIG_IRQSOFF_TRACER) || \
  	defined(CONFIG_PREEMPT_TRACER)
   extern void stop_critical_timings(void);
-diff --git a/include/linux/kernel.h b/include/linux/kernel.h
-index 2776423a587e..e8696e4a45aa 100644
---- a/include/linux/kernel.h
-+++ b/include/linux/kernel.h
-@@ -111,8 +111,8 @@ static __always_inline void might_resched(void)
- #endif /* CONFIG_PREEMPT_* */
- 
- #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
--extern void ___might_sleep(const char *file, int line, int preempt_offset);
--extern void __might_sleep(const char *file, int line, int preempt_offset);
-+extern void __might_resched(const char *file, int line, unsigned int offsets);
-+extern void __might_sleep(const char *file, int line);
- extern void __cant_sleep(const char *file, int line, int preempt_offset);
- extern void __cant_migrate(const char *file, int line);
- 
-@@ -129,7 +129,7 @@ extern void __cant_migrate(const char *file, int line);
-  * supposed to.
-  */
- # define might_sleep() \
--	do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
-+	do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
- /**
-  * cant_sleep - annotation for functions that cannot sleep
-  *
-@@ -168,10 +168,9 @@ extern void __cant_migrate(const char *file, int line);
-  */
- # define non_block_end() WARN_ON(current->non_block_count-- == 0)
- #else
--  static inline void ___might_sleep(const char *file, int line,
--				   int preempt_offset) { }
--  static inline void __might_sleep(const char *file, int line,
--				   int preempt_offset) { }
-+  static inline void __might_resched(const char *file, int line,
-+				     unsigned int offsets) { }
-+static inline void __might_sleep(const char *file, int line) { }
- # define might_sleep() do { might_resched(); } while (0)
- # define cant_sleep() do { } while (0)
- # define cant_migrate()		do { } while (0)
-diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
-index 258cdde8d356..9bca0d98db5a 100644
---- a/include/linux/kgdb.h
-+++ b/include/linux/kgdb.h
-@@ -212,6 +212,8 @@ extern void kgdb_call_nmi_hook(void *ignored);
-  */
- extern void kgdb_roundup_cpus(void);
- 
-+extern void kgdb_roundup_cpu(unsigned int cpu);
-+
- /**
-  *	kgdb_arch_set_pc - Generic call back to the program counter
-  *	@regs: Current &struct pt_regs.
-@@ -365,5 +367,6 @@ extern void kgdb_free_init_mem(void);
- #define dbg_late_init()
- static inline void kgdb_panic(const char *msg) {}
- static inline void kgdb_free_init_mem(void) { }
-+static inline void kgdb_roundup_cpu(unsigned int cpu) {}
- #endif /* ! CONFIG_KGDB */
- #endif /* _KGDB_H_ */
-diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
-index 3a9a798a4ae1..3ea692eeb8d3 100644
---- a/include/linux/mm_types.h
-+++ b/include/linux/mm_types.h
-@@ -12,6 +12,7 @@
- #include <linux/completion.h>
- #include <linux/cpumask.h>
- #include <linux/uprobes.h>
-+#include <linux/rcupdate.h>
- #include <linux/page-flags-layout.h>
- #include <linux/workqueue.h>
- #include <linux/seqlock.h>
-@@ -574,6 +575,9 @@ struct mm_struct {
- 		bool tlb_flush_batched;
- #endif
- 		struct uprobes_state uprobes_state;
-+#ifdef CONFIG_PREEMPT_RT
-+		struct rcu_head delayed_drop;
-+#endif
- #ifdef CONFIG_HUGETLB_PAGE
- 		atomic_long_t hugetlb_usage;
- #endif
+diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h
+index 975e33b793a7..6d635e8306d6 100644
+--- a/include/linux/local_lock_internal.h
++++ b/include/linux/local_lock_internal.h
+@@ -44,9 +44,9 @@ static inline void local_lock_debug_init(local_lock_t *l)
+ }
+ #else /* CONFIG_DEBUG_LOCK_ALLOC */
+ # define LOCAL_LOCK_DEBUG_INIT(lockname)
+-static inline void local_lock_acquire(local_lock_t *l) { }
+-static inline void local_lock_release(local_lock_t *l) { }
+-static inline void local_lock_debug_init(local_lock_t *l) { }
++# define local_lock_acquire(__ll)  do { typecheck(local_lock_t *, __ll); } while (0)
++# define local_lock_release(__ll)  do { typecheck(local_lock_t *, __ll); } while (0)
++# define local_lock_debug_init(__ll)  do { typecheck(local_lock_t *, __ll); } while (0)
+ #endif /* !CONFIG_DEBUG_LOCK_ALLOC */
+ 
+ #define INIT_LOCAL_LOCK(lockname)	{ LOCAL_LOCK_DEBUG_INIT(lockname) }
 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
-index ce81cc96a98d..4230c0fe2dcb 100644
+index 6aadcc0ecb5b..4b041364ee2b 100644
 --- a/include/linux/netdevice.h
 +++ b/include/linux/netdevice.h
-@@ -1916,7 +1916,6 @@ enum netdev_ml_priv_type {
-  *	@sfp_bus:	attached &struct sfp_bus structure.
-  *
-  *	@qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
-- *	@qdisc_running_key: lockdep class annotating Qdisc->running seqcount
-  *
-  *	@proto_down:	protocol port state information can be sent to the
-  *			switch driver and used to set the phys state of the
-@@ -2250,7 +2249,6 @@ struct net_device {
- 	struct phy_device	*phydev;
- 	struct sfp_bus		*sfp_bus;
- 	struct lock_class_key	*qdisc_tx_busylock;
--	struct lock_class_key	*qdisc_running_key;
- 	bool			proto_down;
- 	unsigned		wol_enabled:1;
- 	unsigned		threaded:1;
-@@ -2360,13 +2358,11 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
- #define netdev_lockdep_set_classes(dev)				\
- {								\
- 	static struct lock_class_key qdisc_tx_busylock_key;	\
--	static struct lock_class_key qdisc_running_key;		\
- 	static struct lock_class_key qdisc_xmit_lock_key;	\
- 	static struct lock_class_key dev_addr_list_lock_key;	\
- 	unsigned int i;						\
- 								\
- 	(dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key;	\
--	(dev)->qdisc_running_key = &qdisc_running_key;		\
- 	lockdep_set_class(&(dev)->addr_list_lock,		\
- 			  &dev_addr_list_lock_key);		\
- 	for (i = 0; i < (dev)->num_tx_queues; i++)		\
+@@ -4003,8 +4003,17 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
+ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
+ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
+ int netif_rx(struct sk_buff *skb);
+-int netif_rx_ni(struct sk_buff *skb);
+-int netif_rx_any_context(struct sk_buff *skb);
++
++static inline int netif_rx_ni(struct sk_buff *skb)
++{
++	return netif_rx(skb);
++}
++
++static inline int netif_rx_any_context(struct sk_buff *skb)
++{
++	return netif_rx(skb);
++}
++
+ int netif_receive_skb(struct sk_buff *skb);
+ int netif_receive_skb_core(struct sk_buff *skb);
+ void netif_receive_skb_list(struct list_head *head);
 diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
-index e9698b6278a5..1c8393c1280c 100644
+index 967a0098f0a9..57979c3dc4a7 100644
 --- a/include/linux/nfs_xdr.h
 +++ b/include/linux/nfs_xdr.h
-@@ -1692,7 +1692,7 @@ struct nfs_unlinkdata {
+@@ -1684,7 +1684,7 @@ struct nfs_unlinkdata {
  	struct nfs_removeargs args;
  	struct nfs_removeres res;
  	struct dentry *dentry;
@@ -3658,23 +3490,10 @@ index e9698b6278a5..1c8393c1280c 100644
  	struct nfs_fattr dir_attr;
  	long timeout;
 diff --git a/include/linux/preempt.h b/include/linux/preempt.h
-index 4d244e295e85..3da73c968211 100644
+index b4381f255a5c..c05c5247986f 100644
 --- a/include/linux/preempt.h
 +++ b/include/linux/preempt.h
-@@ -122,9 +122,10 @@
-  * The preempt_count offset after spin_lock()
-  */
- #if !defined(CONFIG_PREEMPT_RT)
--#define PREEMPT_LOCK_OFFSET	PREEMPT_DISABLE_OFFSET
-+#define PREEMPT_LOCK_OFFSET		PREEMPT_DISABLE_OFFSET
- #else
--#define PREEMPT_LOCK_OFFSET	0
-+/* Locks on RT do not disable preemption */
-+#define PREEMPT_LOCK_OFFSET		0
- #endif
- 
- /*
-@@ -174,6 +175,20 @@ extern void preempt_count_sub(int val);
+@@ -196,6 +196,20 @@ extern void preempt_count_sub(int val);
  #define preempt_count_inc() preempt_count_add(1)
  #define preempt_count_dec() preempt_count_sub(1)
  
@@ -3695,7 +3514,7 @@ index 4d244e295e85..3da73c968211 100644
  #ifdef CONFIG_PREEMPT_COUNT
  
  #define preempt_disable() \
-@@ -182,13 +197,25 @@ do { \
+@@ -204,13 +218,25 @@ do { \
  	barrier(); \
  } while (0)
  
@@ -3722,7 +3541,7 @@ index 4d244e295e85..3da73c968211 100644
  
  #define preemptible()	(preempt_count() == 0 && !irqs_disabled())
  
-@@ -213,6 +240,18 @@ do { \
+@@ -235,6 +261,18 @@ do { \
  		__preempt_schedule(); \
  } while (0)
  
@@ -3741,7 +3560,7 @@ index 4d244e295e85..3da73c968211 100644
  #else /* !CONFIG_PREEMPTION */
  #define preempt_enable() \
  do { \
-@@ -220,6 +259,12 @@ do { \
+@@ -242,6 +280,12 @@ do { \
  	preempt_count_dec(); \
  } while (0)
  
@@ -3754,7 +3573,7 @@ index 4d244e295e85..3da73c968211 100644
  #define preempt_enable_notrace() \
  do { \
  	barrier(); \
-@@ -258,8 +303,12 @@ do { \
+@@ -280,8 +324,12 @@ do { \
  #define preempt_disable_notrace()		barrier()
  #define preempt_enable_no_resched_notrace()	barrier()
  #define preempt_enable_notrace()		barrier()
@@ -3767,7 +3586,7 @@ index 4d244e295e85..3da73c968211 100644
  #endif /* CONFIG_PREEMPT_COUNT */
  
  #ifdef MODULE
-@@ -278,7 +327,7 @@ do { \
+@@ -300,7 +348,7 @@ do { \
  } while (0)
  #define preempt_fold_need_resched() \
  do { \
@@ -3776,7 +3595,7 @@ index 4d244e295e85..3da73c968211 100644
  		set_preempt_need_resched(); \
  } while (0)
  
-@@ -394,8 +443,15 @@ extern void migrate_enable(void);
+@@ -416,8 +464,15 @@ extern void migrate_enable(void);
  
  #else
  
@@ -3795,174 +3614,113 @@ index 4d244e295e85..3da73c968211 100644
  #endif /* CONFIG_SMP */
  
 diff --git a/include/linux/printk.h b/include/linux/printk.h
-index 9497f6b98339..f1b9cd8d11d6 100644
+index 9497f6b98339..6596f02d1f05 100644
 --- a/include/linux/printk.h
 +++ b/include/linux/printk.h
-@@ -47,6 +47,12 @@ static inline const char *printk_skip_headers(const char *buffer)
+@@ -170,6 +170,8 @@ extern void __printk_safe_exit(void);
+ #define printk_deferred_enter __printk_safe_enter
+ #define printk_deferred_exit __printk_safe_exit
  
- #define CONSOLE_EXT_LOG_MAX	8192
- 
-+/*
-+ * The maximum size of a record formatted for console printing
-+ * (i.e. with the prefix prepended to every line).
-+ */
-+#define CONSOLE_LOG_MAX		1024
++extern bool pr_flush(int timeout_ms, bool reset_on_progress);
 +
- /* printk's without a loglevel use this.. */
- #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
- 
-@@ -155,20 +161,7 @@ int vprintk(const char *fmt, va_list args);
- asmlinkage __printf(1, 2) __cold
- int _printk(const char *fmt, ...);
- 
--/*
-- * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ !
-- */
--__printf(1, 2) __cold int _printk_deferred(const char *fmt, ...);
--
--extern void __printk_safe_enter(void);
--extern void __printk_safe_exit(void);
--/*
-- * The printk_deferred_enter/exit macros are available only as a hack for
-- * some code paths that need to defer all printk console printing. Interrupts
-- * must be disabled for the deferred duration.
-- */
--#define printk_deferred_enter __printk_safe_enter
--#define printk_deferred_exit __printk_safe_exit
-+bool pr_flush(int timeout_ms, bool reset_on_progress);
- 
  /*
   * Please don't use printk_ratelimit(), because it shares ratelimiting state
-@@ -210,18 +203,10 @@ int _printk(const char *s, ...)
+  * with all other unrelated printk_ratelimit() callsites.  Instead use
+@@ -224,6 +226,11 @@ static inline void printk_deferred_exit(void)
  {
- 	return 0;
  }
--static inline __printf(1, 2) __cold
--int _printk_deferred(const char *s, ...)
--{
--	return 0;
--}
--
--static inline void printk_deferred_enter(void)
--{
--}
  
--static inline void printk_deferred_exit(void)
 +static inline bool pr_flush(int timeout_ms, bool reset_on_progress)
- {
++{
 +	return true;
- }
- 
++}
++
  static inline int printk_ratelimit(void)
-@@ -284,17 +269,30 @@ static inline void printk_trigger_flush(void)
- extern int __printk_cpu_trylock(void);
- extern void __printk_wait_on_cpu_lock(void);
- extern void __printk_cpu_unlock(void);
-+extern bool kgdb_roundup_delay(unsigned int cpu);
+ {
+ 	return 0;
+@@ -281,45 +288,45 @@ static inline void printk_trigger_flush(void)
+ #endif
+ 
+ #ifdef CONFIG_SMP
+-extern int __printk_cpu_trylock(void);
+-extern void __printk_wait_on_cpu_lock(void);
+-extern void __printk_cpu_unlock(void);
++extern int __printk_cpu_sync_try_get(void);
++extern void __printk_cpu_sync_wait(void);
++extern void __printk_cpu_sync_put(void);
 +
 +#else
 +
-+#define __printk_cpu_trylock()		1
-+#define __printk_wait_on_cpu_lock()
-+#define __printk_cpu_unlock()
-+
-+static inline bool kgdb_roundup_delay(unsigned int cpu)
-+{
-+	return false;
-+}
++#define __printk_cpu_sync_try_get() true
++#define __printk_cpu_sync_wait()
++#define __printk_cpu_sync_put()
 +#endif /* CONFIG_SMP */
  
  /**
 - * printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning
 - *                             lock and disable interrupts.
-+ * raw_printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning
-+ *                                 lock and disable interrupts.
++ * printk_cpu_sync_get_irqsave() - Disable interrupts and acquire the printk
++ *                                 cpu-reentrant spinning lock.
   * @flags: Stack-allocated storage for saving local interrupt state,
 - *         to be passed to printk_cpu_unlock_irqrestore().
-+ *         to be passed to raw_printk_cpu_unlock_irqrestore().
++ *         to be passed to printk_cpu_sync_put_irqrestore().
   *
   * If the lock is owned by another CPU, spin until it becomes available.
   * Interrupts are restored while spinning.
   */
 -#define printk_cpu_lock_irqsave(flags)		\
-+#define raw_printk_cpu_lock_irqsave(flags)	\
- 	for (;;) {				\
- 		local_irq_save(flags);		\
- 		if (__printk_cpu_trylock())	\
-@@ -304,22 +302,30 @@ extern void __printk_cpu_unlock(void);
+-	for (;;) {				\
+-		local_irq_save(flags);		\
+-		if (__printk_cpu_trylock())	\
+-			break;			\
+-		local_irq_restore(flags);	\
+-		__printk_wait_on_cpu_lock();	\
++#define printk_cpu_sync_get_irqsave(flags)		\
++	for (;;) {					\
++		local_irq_save(flags);			\
++		if (__printk_cpu_sync_try_get())	\
++			break;				\
++		local_irq_restore(flags);		\
++		__printk_cpu_sync_wait();		\
  	}
  
  /**
 - * printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant spinning
 - *                                  lock and restore interrupts.
 - * @flags: Caller's saved interrupt state, from printk_cpu_lock_irqsave().
-+ * raw_printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant
-+ *                                      spinning lock and restore interrupts.
-+ * @flags: Caller's saved interrupt state from raw_printk_cpu_lock_irqsave().
++ * printk_cpu_sync_put_irqrestore() - Release the printk cpu-reentrant spinning
++ *                                    lock and restore interrupts.
++ * @flags: Caller's saved interrupt state, from printk_cpu_sync_get_irqsave().
   */
 -#define printk_cpu_unlock_irqrestore(flags)	\
-+#define raw_printk_cpu_unlock_irqrestore(flags)	\
++#define printk_cpu_sync_put_irqrestore(flags)	\
  	do {					\
- 		__printk_cpu_unlock();		\
+-		__printk_cpu_unlock();		\
++		__printk_cpu_sync_put();	\
  		local_irq_restore(flags);	\
 -	} while (0)				\
 -
 -#else
-+	} while (0)
- 
+-
 -#define printk_cpu_lock_irqsave(flags) ((void)flags)
 -#define printk_cpu_unlock_irqrestore(flags) ((void)flags)
-+/*
-+ * Used to synchronize atomic consoles.
-+ *
-+ * The same as raw_printk_cpu_lock_irqsave() except that hardware interrupts
-+ * are _not_ restored while spinning.
-+ */
-+#define console_atomic_lock(flags)		\
-+	do {					\
-+		local_irq_save(flags);		\
-+		while (!__printk_cpu_trylock())	\
-+			cpu_relax();		\
-+	} while (0)
- 
+-
 -#endif /* CONFIG_SMP */
-+#define console_atomic_unlock raw_printk_cpu_unlock_irqrestore
++	} while (0)
  
  extern int kptr_restrict;
  
-@@ -448,8 +454,6 @@ struct pi_entry {
-  * See the vsnprintf() documentation for format string extensions over C99.
-  */
- #define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__)
--#define printk_deferred(fmt, ...)					\
--	printk_index_wrap(_printk_deferred, fmt, ##__VA_ARGS__)
- 
- /**
-  * pr_emerg - Print an emergency-level message
-@@ -587,13 +591,9 @@ struct pi_entry {
- #ifdef CONFIG_PRINTK
- #define printk_once(fmt, ...)					\
- 	DO_ONCE_LITE(printk, fmt, ##__VA_ARGS__)
--#define printk_deferred_once(fmt, ...)				\
--	DO_ONCE_LITE(printk_deferred, fmt, ##__VA_ARGS__)
- #else
- #define printk_once(fmt, ...)					\
- 	no_printk(fmt, ##__VA_ARGS__)
--#define printk_deferred_once(fmt, ...)				\
--	no_printk(fmt, ##__VA_ARGS__)
- #endif
- 
- #define pr_emerg_once(fmt, ...)					\
 diff --git a/include/linux/random.h b/include/linux/random.h
-index f45b8be3e3c4..0e41d0527809 100644
+index f45b8be3e3c4..a02c285a5ee5 100644
 --- a/include/linux/random.h
 +++ b/include/linux/random.h
-@@ -35,7 +35,7 @@ static inline void add_latent_entropy(void) {}
+@@ -35,7 +35,8 @@ static inline void add_latent_entropy(void) {}
  
  extern void add_input_randomness(unsigned int type, unsigned int code,
  				 unsigned int value) __latent_entropy;
 -extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy;
-+extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) __latent_entropy;
++extern void add_interrupt_randomness(int irq) __latent_entropy;
++extern void process_interrupt_randomness(void);
  
  extern void get_random_bytes(void *buf, int nbytes);
  extern int wait_for_random_bytes(void);
@@ -3980,10 +3738,10 @@ index b676aa419eef..c21c7f8103e2 100644
  #define DEFAULT_RATELIMIT_INTERVAL	(5 * HZ)
  #define DEFAULT_RATELIMIT_BURST		10
 diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
-index 434d12fe2d4f..de6d1a21f113 100644
+index 5e0beb5c5659..3c61f246966d 100644
 --- a/include/linux/rcupdate.h
 +++ b/include/linux/rcupdate.h
-@@ -94,6 +94,13 @@ void rcu_init_tasks_generic(void);
+@@ -95,6 +95,13 @@ void rcu_init_tasks_generic(void);
  static inline void rcu_init_tasks_generic(void) { }
  #endif
  
@@ -4024,8 +3782,79 @@ index 9deedfeec2b1..7d049883a08a 100644
  extern int rt_mutex_trylock(struct rt_mutex *lock);
  
  extern void rt_mutex_unlock(struct rt_mutex *lock);
+diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
+index 2c0ad417ce3c..8f416c5e929e 100644
+--- a/include/linux/rwlock.h
++++ b/include/linux/rwlock.h
+@@ -55,6 +55,12 @@ do {								\
+ #define write_lock(lock)	_raw_write_lock(lock)
+ #define read_lock(lock)		_raw_read_lock(lock)
+ 
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++#define write_lock_nested(lock, subclass)	_raw_write_lock_nested(lock, subclass)
++#else
++#define write_lock_nested(lock, subclass)	_raw_write_lock(lock)
++#endif
++
+ #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+ 
+ #define read_lock_irqsave(lock, flags)			\
+diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h
+index f1db6f17c4fb..dceb0a59b692 100644
+--- a/include/linux/rwlock_api_smp.h
++++ b/include/linux/rwlock_api_smp.h
+@@ -17,6 +17,7 @@
+ 
+ void __lockfunc _raw_read_lock(rwlock_t *lock)		__acquires(lock);
+ void __lockfunc _raw_write_lock(rwlock_t *lock)		__acquires(lock);
++void __lockfunc _raw_write_lock_nested(rwlock_t *lock, int subclass)	__acquires(lock);
+ void __lockfunc _raw_read_lock_bh(rwlock_t *lock)	__acquires(lock);
+ void __lockfunc _raw_write_lock_bh(rwlock_t *lock)	__acquires(lock);
+ void __lockfunc _raw_read_lock_irq(rwlock_t *lock)	__acquires(lock);
+@@ -209,6 +210,13 @@ static inline void __raw_write_lock(rwlock_t *lock)
+ 	LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
+ }
+ 
++static inline void __raw_write_lock_nested(rwlock_t *lock, int subclass)
++{
++	preempt_disable();
++	rwlock_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
++	LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
++}
++
+ #endif /* !CONFIG_GENERIC_LOCKBREAK || CONFIG_DEBUG_LOCK_ALLOC */
+ 
+ static inline void __raw_write_unlock(rwlock_t *lock)
+diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h
+index 49c1f3842ed5..8544ff05e594 100644
+--- a/include/linux/rwlock_rt.h
++++ b/include/linux/rwlock_rt.h
+@@ -28,6 +28,7 @@ extern void rt_read_lock(rwlock_t *rwlock);
+ extern int rt_read_trylock(rwlock_t *rwlock);
+ extern void rt_read_unlock(rwlock_t *rwlock);
+ extern void rt_write_lock(rwlock_t *rwlock);
++extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass);
+ extern int rt_write_trylock(rwlock_t *rwlock);
+ extern void rt_write_unlock(rwlock_t *rwlock);
+ 
+@@ -83,6 +84,15 @@ static __always_inline void write_lock(rwlock_t *rwlock)
+ 	rt_write_lock(rwlock);
+ }
+ 
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++static __always_inline void write_lock_nested(rwlock_t *rwlock, int subclass)
++{
++	rt_write_lock_nested(rwlock, subclass);
++}
++#else
++#define write_lock_nested(lock, subclass)	rt_write_lock(((void)(subclass), (lock)))
++#endif
++
+ static __always_inline void write_lock_bh(rwlock_t *rwlock)
+ {
+ 	local_bh_disable();
 diff --git a/include/linux/sched.h b/include/linux/sched.h
-index c1a927ddec64..4401d0f05cb3 100644
+index 78c351e35fec..971d20337ad3 100644
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
 @@ -118,12 +118,8 @@ struct task_group;
@@ -4041,7 +3870,7 @@ index c1a927ddec64..4401d0f05cb3 100644
  /*
   * Special states are those that do not use the normal wait-loop pattern. See
   * the comment with set_special_state().
-@@ -1084,6 +1080,10 @@ struct task_struct {
+@@ -1082,6 +1078,10 @@ struct task_struct {
  	/* Restored if set_restore_sigmask() was used: */
  	sigset_t			saved_sigmask;
  	struct sigpending		pending;
@@ -4052,7 +3881,7 @@ index c1a927ddec64..4401d0f05cb3 100644
  	unsigned long			sas_ss_sp;
  	size_t				sas_ss_size;
  	unsigned int			sas_ss_flags;
-@@ -1730,6 +1730,16 @@ static __always_inline bool is_percpu_thread(void)
+@@ -1727,6 +1727,16 @@ static __always_inline bool is_percpu_thread(void)
  #endif
  }
  
@@ -4069,7 +3898,7 @@ index c1a927ddec64..4401d0f05cb3 100644
  /* Per-process atomic flags. */
  #define PFA_NO_NEW_PRIVS		0	/* May not gain new privileges. */
  #define PFA_SPREAD_PAGE			1	/* Spread page cache over cpuset */
-@@ -2005,6 +2015,118 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
+@@ -1999,6 +2009,118 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
  	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
  }
  
@@ -4188,94 +4017,36 @@ index c1a927ddec64..4401d0f05cb3 100644
  /*
   * cond_resched() and cond_resched_lock(): latency reduction via
   * explicit rescheduling in places that are safe. The return
-@@ -2039,7 +2161,7 @@ static inline int _cond_resched(void) { return 0; }
- #endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
+diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
+index d10150587d81..ccd1336aa7f4 100644
+--- a/include/linux/sched/task_stack.h
++++ b/include/linux/sched/task_stack.h
+@@ -70,6 +70,7 @@ static inline void *try_get_task_stack(struct task_struct *tsk)
+ }
  
- #define cond_resched() ({			\
--	___might_sleep(__FILE__, __LINE__, 0);	\
-+	__might_resched(__FILE__, __LINE__, 0);	\
- 	_cond_resched();			\
- })
+ extern void put_task_stack(struct task_struct *tsk);
++extern void put_task_stack_sched(struct task_struct *tsk);
+ #else
+ static inline void *try_get_task_stack(struct task_struct *tsk)
+ {
+@@ -77,8 +78,17 @@ static inline void *try_get_task_stack(struct task_struct *tsk)
+ }
  
-@@ -2047,19 +2169,38 @@ extern int __cond_resched_lock(spinlock_t *lock);
- extern int __cond_resched_rwlock_read(rwlock_t *lock);
- extern int __cond_resched_rwlock_write(rwlock_t *lock);
+ static inline void put_task_stack(struct task_struct *tsk) {}
++static inline void put_task_stack_sched(struct task_struct *tsk) {}
+ #endif
  
--#define cond_resched_lock(lock) ({				\
--	___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
--	__cond_resched_lock(lock);				\
-+#define MIGHT_RESCHED_RCU_SHIFT		8
-+#define MIGHT_RESCHED_PREEMPT_MASK	((1U << MIGHT_RESCHED_RCU_SHIFT) - 1)
-+
-+#ifndef CONFIG_PREEMPT_RT
-+/*
-+ * Non RT kernels have an elevated preempt count due to the held lock,
-+ * but are not allowed to be inside a RCU read side critical section
-+ */
-+# define PREEMPT_LOCK_RESCHED_OFFSETS	PREEMPT_LOCK_OFFSET
++#ifdef CONFIG_ARCH_THREAD_STACK_ALLOCATOR
++static inline void task_stack_cleanup(struct task_struct *tsk) {}
 +#else
-+/*
-+ * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in
-+ * cond_resched*lock() has to take that into account because it checks for
-+ * preempt_count() and rcu_preempt_depth().
-+ */
-+# define PREEMPT_LOCK_RESCHED_OFFSETS	\
-+	(PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT))
++extern void task_stack_cleanup(struct task_struct *tsk);
 +#endif
 +
-+#define cond_resched_lock(lock) ({						\
-+	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);	\
-+	__cond_resched_lock(lock);						\
- })
- 
--#define cond_resched_rwlock_read(lock) ({			\
--	__might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);	\
--	__cond_resched_rwlock_read(lock);			\
-+#define cond_resched_rwlock_read(lock) ({					\
-+	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);	\
-+	__cond_resched_rwlock_read(lock);					\
- })
- 
--#define cond_resched_rwlock_write(lock) ({			\
--	__might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);	\
--	__cond_resched_rwlock_write(lock);			\
-+#define cond_resched_rwlock_write(lock) ({					\
-+	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);	\
-+	__cond_resched_rwlock_write(lock);					\
- })
- 
- static inline void cond_resched_rcu(void)
-diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
-index 5561486fddef..8358352428d4 100644
---- a/include/linux/sched/mm.h
-+++ b/include/linux/sched/mm.h
-@@ -49,6 +49,26 @@ static inline void mmdrop(struct mm_struct *mm)
- 		__mmdrop(mm);
- }
- 
-+#ifdef CONFIG_PREEMPT_RT
-+extern void __mmdrop_delayed(struct rcu_head *rhp);
-+
-+/*
-+ * Invoked from finish_task_switch(). Delegates the heavy lifting on RT
-+ * kernels via RCU.
-+ */
-+static inline void mmdrop_sched(struct mm_struct *mm)
-+{
-+	/* Provides a full memory barrier. See mmdrop() */
-+	if (atomic_dec_and_test(&mm->mm_count))
-+		call_rcu(&mm->delayed_drop, __mmdrop_delayed);
-+}
-+#else
-+static inline void mmdrop_sched(struct mm_struct *mm)
-+{
-+	mmdrop(mm);
-+}
-+#endif
++void exit_task_stack_account(struct task_struct *tsk);
 +
- /**
-  * mmget() - Pin the address space associated with a &struct mm_struct.
-  * @mm: The address space to pin.
+ #define task_stack_end_corrupted(task) \
+ 		(*(end_of_stack(task)) != STACK_END_MAGIC)
+ 
 diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
 index 5db211f43b29..aa011f668705 100644
 --- a/include/linux/serial_8250.h
@@ -4306,36 +4077,11 @@ index 5db211f43b29..aa011f668705 100644
  int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
  int serial8250_console_exit(struct uart_port *port);
  
-diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
-index b8c273af2910..a66f6ddbdd56 100644
---- a/include/linux/skbuff.h
-+++ b/include/linux/skbuff.h
-@@ -297,6 +297,7 @@ struct sk_buff_head {
- 
- 	__u32		qlen;
- 	spinlock_t	lock;
-+	raw_spinlock_t	raw_lock;
- };
- 
- struct sk_buff;
-@@ -1932,6 +1933,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list)
- 	__skb_queue_head_init(list);
- }
- 
-+static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
-+{
-+	raw_spin_lock_init(&list->raw_lock);
-+	__skb_queue_head_init(list);
-+}
-+
- static inline void skb_queue_head_init_class(struct sk_buff_head *list,
- 		struct lock_class_key *class)
- {
 diff --git a/include/linux/smp.h b/include/linux/smp.h
-index 510519e8a1eb..7ac9fdb5ad09 100644
+index a80ab58ae3f1..dd3441d8af44 100644
 --- a/include/linux/smp.h
 +++ b/include/linux/smp.h
-@@ -268,6 +268,9 @@ static inline int get_boot_cpu_id(void)
+@@ -267,6 +267,9 @@ static inline int get_boot_cpu_id(void)
  #define get_cpu()		({ preempt_disable(); __smp_processor_id(); })
  #define put_cpu()		preempt_enable()
  
@@ -4345,6 +4091,18 @@ index 510519e8a1eb..7ac9fdb5ad09 100644
  /*
   * Callback to arch code if there's nosmp or maxcpus=0 on the
   * boot command line:
+diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
+index d0d188861ad6..b8ba00ccccde 100644
+--- a/include/linux/spinlock_api_up.h
++++ b/include/linux/spinlock_api_up.h
+@@ -59,6 +59,7 @@
+ #define _raw_spin_lock_nested(lock, subclass)	__LOCK(lock)
+ #define _raw_read_lock(lock)			__LOCK(lock)
+ #define _raw_write_lock(lock)			__LOCK(lock)
++#define _raw_write_lock_nested(lock, subclass)	__LOCK(lock)
+ #define _raw_spin_lock_bh(lock)			__LOCK_BH(lock)
+ #define _raw_read_lock_bh(lock)			__LOCK_BH(lock)
+ #define _raw_write_lock_bh(lock)		__LOCK_BH(lock)
 diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h
 index c09b6407ae1b..7f86a2016ac5 100644
 --- a/include/linux/spinlock_types_up.h
@@ -4358,38 +4116,8 @@ index c09b6407ae1b..7f86a2016ac5 100644
  # error "please don't include this file directly"
  #endif
  
-diff --git a/include/linux/suspend.h b/include/linux/suspend.h
-index 8af13ba60c7e..79b6933ef8a0 100644
---- a/include/linux/suspend.h
-+++ b/include/linux/suspend.h
-@@ -550,23 +550,17 @@ static inline void unlock_system_sleep(void) {}
- #ifdef CONFIG_PM_SLEEP_DEBUG
- extern bool pm_print_times_enabled;
- extern bool pm_debug_messages_on;
--extern __printf(2, 3) void __pm_pr_dbg(bool defer, const char *fmt, ...);
-+extern __printf(1, 2) void pm_pr_dbg(const char *fmt, ...);
- #else
- #define pm_print_times_enabled	(false)
- #define pm_debug_messages_on	(false)
- 
- #include <linux/printk.h>
- 
--#define __pm_pr_dbg(defer, fmt, ...) \
-+#define pm_pr_dbg(fmt, ...) \
- 	no_printk(KERN_DEBUG fmt, ##__VA_ARGS__)
- #endif
- 
--#define pm_pr_dbg(fmt, ...) \
--	__pm_pr_dbg(false, fmt, ##__VA_ARGS__)
--
--#define pm_deferred_pr_dbg(fmt, ...) \
--	__pm_pr_dbg(true, fmt, ##__VA_ARGS__)
--
- #ifdef CONFIG_PM_AUTOSLEEP
- 
- /* kernel/power/autosleep.c */
 diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
-index 0999f6317978..7af834b7c114 100644
+index ad0c4e041030..3033c8f05298 100644
 --- a/include/linux/thread_info.h
 +++ b/include/linux/thread_info.h
 @@ -163,7 +163,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
@@ -4412,7 +4140,7 @@ index 0999f6317978..7af834b7c114 100644
  #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
  static inline int arch_within_stack_frames(const void * const stack,
 diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
-index 57113190448c..827725f41149 100644
+index 2d167ac3452c..3f80b9da186e 100644
 --- a/include/linux/trace_events.h
 +++ b/include/linux/trace_events.h
 @@ -69,6 +69,7 @@ struct trace_entry {
@@ -4444,7 +4172,7 @@ index 57113190448c..827725f41149 100644
  
  #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
-index e81856c0ba13..81dc1f5e181a 100644
+index e8ec116c916b..6ad4e9032d53 100644
 --- a/include/linux/u64_stats_sync.h
 +++ b/include/linux/u64_stats_sync.h
 @@ -66,7 +66,7 @@
@@ -4452,35 +4180,11 @@ index e81856c0ba13..81dc1f5e181a 100644
  
  struct u64_stats_sync {
 -#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
-+#if BITS_PER_LONG==32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
  	seqcount_t	seq;
  #endif
  };
-@@ -83,6 +83,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p)
- 	return local64_read(&p->v);
- }
- 
-+static inline void u64_stats_set(u64_stats_t *p, u64 val)
-+{
-+	local64_set(&p->v, val);
-+}
-+
- static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
- {
- 	local64_add(val, &p->v);
-@@ -104,6 +109,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p)
- 	return p->v;
- }
- 
-+static inline void u64_stats_set(u64_stats_t *p, u64 val)
-+{
-+	p->v = val;
-+}
-+
- static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
- {
- 	p->v += val;
-@@ -115,7 +125,7 @@ static inline void u64_stats_inc(u64_stats_t *p)
+@@ -125,7 +125,7 @@ static inline void u64_stats_inc(u64_stats_t *p)
  }
  #endif
  
@@ -4489,7 +4193,7 @@ index e81856c0ba13..81dc1f5e181a 100644
  #define u64_stats_init(syncp)	seqcount_init(&(syncp)->seq)
  #else
  static inline void u64_stats_init(struct u64_stats_sync *syncp)
-@@ -125,15 +135,19 @@ static inline void u64_stats_init(struct u64_stats_sync *syncp)
+@@ -135,15 +135,19 @@ static inline void u64_stats_init(struct u64_stats_sync *syncp)
  
  static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
  {
@@ -4511,7 +4215,7 @@ index e81856c0ba13..81dc1f5e181a 100644
  #endif
  }
  
-@@ -142,8 +156,11 @@ u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
+@@ -152,8 +156,11 @@ u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
  {
  	unsigned long flags = 0;
  
@@ -4525,7 +4229,7 @@ index e81856c0ba13..81dc1f5e181a 100644
  	write_seqcount_begin(&syncp->seq);
  #endif
  	return flags;
-@@ -153,15 +170,18 @@ static inline void
+@@ -163,15 +170,18 @@ static inline void
  u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
  				unsigned long flags)
  {
@@ -4547,7 +4251,7 @@ index e81856c0ba13..81dc1f5e181a 100644
  	return read_seqcount_begin(&syncp->seq);
  #else
  	return 0;
-@@ -170,7 +190,7 @@ static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *
+@@ -180,7 +190,7 @@ static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *
  
  static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
  {
@@ -4556,7 +4260,7 @@ index e81856c0ba13..81dc1f5e181a 100644
  	preempt_disable();
  #endif
  	return __u64_stats_fetch_begin(syncp);
-@@ -179,7 +199,7 @@ static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *sy
+@@ -189,7 +199,7 @@ static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *sy
  static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
  					 unsigned int start)
  {
@@ -4565,7 +4269,7 @@ index e81856c0ba13..81dc1f5e181a 100644
  	return read_seqcount_retry(&syncp->seq, start);
  #else
  	return false;
-@@ -189,7 +209,7 @@ static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+@@ -199,7 +209,7 @@ static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
  static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
  					 unsigned int start)
  {
@@ -4574,7 +4278,7 @@ index e81856c0ba13..81dc1f5e181a 100644
  	preempt_enable();
  #endif
  	return __u64_stats_fetch_retry(syncp, start);
-@@ -203,7 +223,9 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+@@ -213,7 +223,9 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
   */
  static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
  {
@@ -4585,7 +4289,7 @@ index e81856c0ba13..81dc1f5e181a 100644
  	local_irq_disable();
  #endif
  	return __u64_stats_fetch_begin(syncp);
-@@ -212,7 +234,9 @@ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync
+@@ -222,7 +234,9 @@ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync
  static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
  					     unsigned int start)
  {
@@ -4596,400 +4300,58 @@ index e81856c0ba13..81dc1f5e181a 100644
  	local_irq_enable();
  #endif
  	return __u64_stats_fetch_retry(syncp, start);
-diff --git a/include/net/act_api.h b/include/net/act_api.h
-index f19f7f4a463c..b5b624c7e488 100644
---- a/include/net/act_api.h
-+++ b/include/net/act_api.h
-@@ -30,13 +30,13 @@ struct tc_action {
- 	atomic_t			tcfa_bindcnt;
- 	int				tcfa_action;
- 	struct tcf_t			tcfa_tm;
--	struct gnet_stats_basic_packed	tcfa_bstats;
--	struct gnet_stats_basic_packed	tcfa_bstats_hw;
-+	struct gnet_stats_basic_sync	tcfa_bstats;
-+	struct gnet_stats_basic_sync	tcfa_bstats_hw;
- 	struct gnet_stats_queue		tcfa_qstats;
- 	struct net_rate_estimator __rcu *tcfa_rate_est;
- 	spinlock_t			tcfa_lock;
--	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
--	struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw;
-+	struct gnet_stats_basic_sync __percpu *cpu_bstats;
-+	struct gnet_stats_basic_sync __percpu *cpu_bstats_hw;
- 	struct gnet_stats_queue __percpu *cpu_qstats;
- 	struct tc_cookie	__rcu *act_cookie;
- 	struct tcf_chain	__rcu *goto_chain;
-@@ -206,7 +206,7 @@ static inline void tcf_action_update_bstats(struct tc_action *a,
- 					    struct sk_buff *skb)
- {
- 	if (likely(a->cpu_bstats)) {
--		bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
-+		bstats_update(this_cpu_ptr(a->cpu_bstats), skb);
- 		return;
- 	}
- 	spin_lock(&a->tcfa_lock);
-diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
-index 1424e02cef90..7aa2b8e1fb29 100644
---- a/include/net/gen_stats.h
-+++ b/include/net/gen_stats.h
-@@ -7,14 +7,17 @@
- #include <linux/rtnetlink.h>
- #include <linux/pkt_sched.h>
- 
--/* Note: this used to be in include/uapi/linux/gen_stats.h */
--struct gnet_stats_basic_packed {
--	__u64	bytes;
--	__u64	packets;
--};
--
--struct gnet_stats_basic_cpu {
--	struct gnet_stats_basic_packed bstats;
-+/* Throughput stats.
-+ * Must be initialized beforehand with gnet_stats_basic_sync_init().
-+ *
-+ * If no reads can ever occur parallel to writes (e.g. stack-allocated
-+ * bstats), then the internal stat values can be written to and read
-+ * from directly. Otherwise, use _bstats_set/update() for writes and
-+ * gnet_stats_add_basic() for reads.
-+ */
-+struct gnet_stats_basic_sync {
-+	u64_stats_t bytes;
-+	u64_stats_t packets;
- 	struct u64_stats_sync syncp;
- } __aligned(2 * sizeof(u64));
- 
-@@ -34,6 +37,7 @@ struct gnet_dump {
- 	struct tc_stats   tc_stats;
- };
- 
-+void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b);
- int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
- 			  struct gnet_dump *d, int padattr);
- 
-@@ -42,41 +46,38 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
- 				 spinlock_t *lock, struct gnet_dump *d,
- 				 int padattr);
- 
--int gnet_stats_copy_basic(const seqcount_t *running,
--			  struct gnet_dump *d,
--			  struct gnet_stats_basic_cpu __percpu *cpu,
--			  struct gnet_stats_basic_packed *b);
--void __gnet_stats_copy_basic(const seqcount_t *running,
--			     struct gnet_stats_basic_packed *bstats,
--			     struct gnet_stats_basic_cpu __percpu *cpu,
--			     struct gnet_stats_basic_packed *b);
--int gnet_stats_copy_basic_hw(const seqcount_t *running,
--			     struct gnet_dump *d,
--			     struct gnet_stats_basic_cpu __percpu *cpu,
--			     struct gnet_stats_basic_packed *b);
-+int gnet_stats_copy_basic(struct gnet_dump *d,
-+			  struct gnet_stats_basic_sync __percpu *cpu,
-+			  struct gnet_stats_basic_sync *b, bool running);
-+void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
-+			  struct gnet_stats_basic_sync __percpu *cpu,
-+			  struct gnet_stats_basic_sync *b, bool running);
-+int gnet_stats_copy_basic_hw(struct gnet_dump *d,
-+			     struct gnet_stats_basic_sync __percpu *cpu,
-+			     struct gnet_stats_basic_sync *b, bool running);
- int gnet_stats_copy_rate_est(struct gnet_dump *d,
- 			     struct net_rate_estimator __rcu **ptr);
- int gnet_stats_copy_queue(struct gnet_dump *d,
- 			  struct gnet_stats_queue __percpu *cpu_q,
- 			  struct gnet_stats_queue *q, __u32 qlen);
--void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
--			     const struct gnet_stats_queue __percpu *cpu_q,
--			     const struct gnet_stats_queue *q, __u32 qlen);
-+void gnet_stats_add_queue(struct gnet_stats_queue *qstats,
-+			  const struct gnet_stats_queue __percpu *cpu_q,
-+			  const struct gnet_stats_queue *q);
- int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
- 
- int gnet_stats_finish_copy(struct gnet_dump *d);
- 
--int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
--		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
-+int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
-+		      struct gnet_stats_basic_sync __percpu *cpu_bstats,
- 		      struct net_rate_estimator __rcu **rate_est,
- 		      spinlock_t *lock,
--		      seqcount_t *running, struct nlattr *opt);
-+		      bool running, struct nlattr *opt);
- void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
--int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
--			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
-+int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
-+			  struct gnet_stats_basic_sync __percpu *cpu_bstats,
- 			  struct net_rate_estimator __rcu **ptr,
- 			  spinlock_t *lock,
--			  seqcount_t *running, struct nlattr *opt);
-+			  bool running, struct nlattr *opt);
- bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
- bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
- 			struct gnet_stats_rate_est64 *sample);
-diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
-index 832ab69efda5..4c3809e141f4 100644
---- a/include/net/netfilter/xt_rateest.h
-+++ b/include/net/netfilter/xt_rateest.h
-@@ -6,7 +6,7 @@
- 
- struct xt_rateest {
- 	/* keep lock and bstats on same cache line to speedup xt_rateest_tg() */
--	struct gnet_stats_basic_packed	bstats;
-+	struct gnet_stats_basic_sync	bstats;
- 	spinlock_t			lock;
- 
- 
-diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
-index 83a6d0792180..4a5833108083 100644
---- a/include/net/pkt_cls.h
-+++ b/include/net/pkt_cls.h
-@@ -765,7 +765,7 @@ struct tc_cookie {
- };
- 
- struct tc_qopt_offload_stats {
--	struct gnet_stats_basic_packed *bstats;
-+	struct gnet_stats_basic_sync *bstats;
- 	struct gnet_stats_queue *qstats;
- };
- 
-@@ -885,7 +885,7 @@ struct tc_gred_qopt_offload_params {
- };
- 
- struct tc_gred_qopt_offload_stats {
--	struct gnet_stats_basic_packed bstats[MAX_DPs];
-+	struct gnet_stats_basic_sync bstats[MAX_DPs];
- 	struct gnet_stats_queue qstats[MAX_DPs];
- 	struct red_stats *xstats[MAX_DPs];
- };
-diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
-index 8c2d611639fc..73c76ffdf803 100644
---- a/include/net/sch_generic.h
-+++ b/include/net/sch_generic.h
-@@ -40,6 +40,13 @@ enum qdisc_state_t {
- 	__QDISC_STATE_DRAINING,
- };
- 
-+enum qdisc_state2_t {
-+	/* Only for !TCQ_F_NOLOCK qdisc. Never access it directly.
-+	 * Use qdisc_run_begin/end() or qdisc_is_running() instead.
-+	 */
-+	__QDISC_STATE2_RUNNING,
-+};
-+
- #define QDISC_STATE_MISSED	BIT(__QDISC_STATE_MISSED)
- #define QDISC_STATE_DRAINING	BIT(__QDISC_STATE_DRAINING)
- 
-@@ -97,7 +104,7 @@ struct Qdisc {
- 	struct netdev_queue	*dev_queue;
- 
- 	struct net_rate_estimator __rcu *rate_est;
--	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
-+	struct gnet_stats_basic_sync __percpu *cpu_bstats;
- 	struct gnet_stats_queue	__percpu *cpu_qstats;
- 	int			pad;
- 	refcount_t		refcnt;
-@@ -107,10 +114,10 @@ struct Qdisc {
- 	 */
- 	struct sk_buff_head	gso_skb ____cacheline_aligned_in_smp;
- 	struct qdisc_skb_head	q;
--	struct gnet_stats_basic_packed bstats;
--	seqcount_t		running;
-+	struct gnet_stats_basic_sync bstats;
- 	struct gnet_stats_queue	qstats;
- 	unsigned long		state;
-+	unsigned long		state2; /* must be written under qdisc spinlock */
- 	struct Qdisc            *next_sched;
- 	struct sk_buff_head	skb_bad_txq;
- 
-@@ -143,11 +150,15 @@ static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc)
- 	return NULL;
- }
- 
-+/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc
-+ * root_lock section, or provide their own memory barriers -- ordering
-+ * against qdisc_run_begin/end() atomic bit operations.
-+ */
- static inline bool qdisc_is_running(struct Qdisc *qdisc)
- {
- 	if (qdisc->flags & TCQ_F_NOLOCK)
- 		return spin_is_locked(&qdisc->seqlock);
--	return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
-+	return test_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
- }
- 
- static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
-@@ -167,6 +178,9 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
- 	return !READ_ONCE(qdisc->q.qlen);
- }
- 
-+/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with
-+ * the qdisc root lock acquired.
-+ */
- static inline bool qdisc_run_begin(struct Qdisc *qdisc)
- {
- 	if (qdisc->flags & TCQ_F_NOLOCK) {
-@@ -206,15 +220,8 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
- 		 * after it releases the lock at the end of qdisc_run_end().
- 		 */
- 		return spin_trylock(&qdisc->seqlock);
--	} else if (qdisc_is_running(qdisc)) {
--		return false;
- 	}
--	/* Variant of write_seqcount_begin() telling lockdep a trylock
--	 * was attempted.
--	 */
--	raw_write_seqcount_begin(&qdisc->running);
--	seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
--	return true;
-+	return !__test_and_set_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
- }
- 
- static inline void qdisc_run_end(struct Qdisc *qdisc)
-@@ -226,7 +233,7 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
- 				      &qdisc->state)))
- 			__netif_schedule(qdisc);
- 	} else {
--		write_seqcount_end(&qdisc->running);
-+		__clear_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
- 	}
- }
- 
-@@ -592,14 +599,6 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
- 	return qdisc_lock(root);
- }
+diff --git a/include/trace/events/net.h b/include/trace/events/net.h
+index 78c448c6ab4c..032b431b987b 100644
+--- a/include/trace/events/net.h
++++ b/include/trace/events/net.h
+@@ -260,13 +260,6 @@ DEFINE_EVENT(net_dev_rx_verbose_template, netif_rx_entry,
+ 	TP_ARGS(skb)
+ );
  
--static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
--{
--	struct Qdisc *root = qdisc_root_sleeping(qdisc);
+-DEFINE_EVENT(net_dev_rx_verbose_template, netif_rx_ni_entry,
 -
--	ASSERT_RTNL();
--	return &root->running;
--}
+-	TP_PROTO(const struct sk_buff *skb),
 -
- static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
- {
- 	return qdisc->dev_queue->dev;
-@@ -849,14 +848,16 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
- 	return sch->enqueue(skb, sch, to_free);
- }
- 
--static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
-+static inline void _bstats_update(struct gnet_stats_basic_sync *bstats,
- 				  __u64 bytes, __u32 packets)
- {
--	bstats->bytes += bytes;
--	bstats->packets += packets;
-+	u64_stats_update_begin(&bstats->syncp);
-+	u64_stats_add(&bstats->bytes, bytes);
-+	u64_stats_add(&bstats->packets, packets);
-+	u64_stats_update_end(&bstats->syncp);
- }
+-	TP_ARGS(skb)
+-);
+-
+ DECLARE_EVENT_CLASS(net_dev_rx_exit_template,
  
--static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
-+static inline void bstats_update(struct gnet_stats_basic_sync *bstats,
- 				 const struct sk_buff *skb)
- {
- 	_bstats_update(bstats,
-@@ -864,26 +865,10 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
- 		       skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1);
- }
+ 	TP_PROTO(int ret),
+@@ -312,13 +305,6 @@ DEFINE_EVENT(net_dev_rx_exit_template, netif_rx_exit,
+ 	TP_ARGS(ret)
+ );
  
--static inline void _bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
--				      __u64 bytes, __u32 packets)
--{
--	u64_stats_update_begin(&bstats->syncp);
--	_bstats_update(&bstats->bstats, bytes, packets);
--	u64_stats_update_end(&bstats->syncp);
--}
+-DEFINE_EVENT(net_dev_rx_exit_template, netif_rx_ni_exit,
 -
--static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
--				     const struct sk_buff *skb)
--{
--	u64_stats_update_begin(&bstats->syncp);
--	bstats_update(&bstats->bstats, skb);
--	u64_stats_update_end(&bstats->syncp);
--}
+-	TP_PROTO(int ret),
 -
- static inline void qdisc_bstats_cpu_update(struct Qdisc *sch,
- 					   const struct sk_buff *skb)
- {
--	bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb);
-+	bstats_update(this_cpu_ptr(sch->cpu_bstats), skb);
- }
- 
- static inline void qdisc_bstats_update(struct Qdisc *sch,
-@@ -972,10 +957,9 @@ static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch,  __u32 *qlen,
- 					     __u32 *backlog)
- {
- 	struct gnet_stats_queue qstats = { 0 };
--	__u32 len = qdisc_qlen_sum(sch);
- 
--	__gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len);
--	*qlen = qstats.qlen;
-+	gnet_stats_add_queue(&qstats, sch->cpu_qstats, &sch->qstats);
-+	*qlen = qstats.qlen + qdisc_qlen(sch);
- 	*backlog = qstats.backlog;
- }
- 
-@@ -1316,7 +1300,7 @@ void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64);
- struct mini_Qdisc {
- 	struct tcf_proto *filter_list;
- 	struct tcf_block *block;
--	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
-+	struct gnet_stats_basic_sync __percpu *cpu_bstats;
- 	struct gnet_stats_queue	__percpu *cpu_qstats;
- 	struct rcu_head rcu;
- };
-@@ -1324,7 +1308,7 @@ struct mini_Qdisc {
- static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq,
- 						const struct sk_buff *skb)
- {
--	bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb);
-+	bstats_update(this_cpu_ptr(miniq->cpu_bstats), skb);
- }
+-	TP_ARGS(ret)
+-);
+-
+ DEFINE_EVENT(net_dev_rx_exit_template, netif_receive_skb_list_exit,
  
- static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq)
+ 	TP_PROTO(int ret),
 diff --git a/init/Kconfig b/init/Kconfig
-index 11f8a845f259..0b8a65ae1d72 100644
+index 4b7bac10c72d..732dbb61ec7e 100644
 --- a/init/Kconfig
 +++ b/init/Kconfig
-@@ -901,7 +901,7 @@ config NUMA_BALANCING
- 	bool "Memory placement aware NUMA scheduler"
- 	depends on ARCH_SUPPORTS_NUMA_BALANCING
- 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
--	depends on SMP && NUMA && MIGRATION
-+	depends on SMP && NUMA && MIGRATION && !PREEMPT_RT
- 	help
- 	  This option adds support for automatic NUMA aware memory/task placement.
- 	  The mechanism is quite primitive and is based on migrating memory when
-@@ -938,6 +938,7 @@ config PAGE_COUNTER
- 
- config MEMCG
- 	bool "Memory controller"
-+	depends on !PREEMPT_RT
- 	select PAGE_COUNTER
- 	select EVENTFD
- 	help
-@@ -1896,6 +1897,7 @@ choice
+@@ -1542,6 +1542,10 @@ config PRINTK
+ 	  very difficult to diagnose system problems, saying N here is
+ 	  strongly discouraged.
  
- config SLAB
- 	bool "SLAB"
-+	depends on !PREEMPT_RT
- 	select HAVE_HARDENED_USERCOPY_ALLOCATOR
- 	help
- 	  The regular slab allocator that is established and known to work
-@@ -1916,6 +1918,7 @@ config SLUB
- config SLOB
- 	depends on EXPERT
- 	bool "SLOB (Simple Allocator)"
-+	depends on !PREEMPT_RT
- 	help
- 	   SLOB replaces the stock allocator with a drastically simpler
- 	   allocator. SLOB is generally more space efficient but
++config HAVE_ATOMIC_CONSOLE
++	bool
++	default n
++
+ config BUG
+ 	bool "BUG() support" if EXPERT
+ 	default y
 diff --git a/init/main.c b/init/main.c
-index bcd132d4e7bd..af4c7f963955 100644
+index bb984ed79de0..eb30d1f729e9 100644
 --- a/init/main.c
 +++ b/init/main.c
-@@ -1604,6 +1604,7 @@ static noinline void __init kernel_init_freeable(void)
+@@ -1597,6 +1597,7 @@ static noinline void __init kernel_init_freeable(void)
  
  	rcu_init_tasks_generic();
  	do_pre_smp_initcalls();
@@ -4998,7 +4360,7 @@ index bcd132d4e7bd..af4c7f963955 100644
  
  	smp_init();
 diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
-index 5876e30c5740..5df0776264c2 100644
+index ce77f0265660..5d3e650cdf48 100644
 --- a/kernel/Kconfig.preempt
 +++ b/kernel/Kconfig.preempt
 @@ -1,5 +1,11 @@
@@ -5010,9 +4372,9 @@ index 5876e30c5740..5df0776264c2 100644
 +config PREEMPT_LAZY
 +	def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT
 +
- choice
- 	prompt "Preemption Model"
- 	default PREEMPT_NONE
+ config PREEMPT_NONE_BUILD
+ 	bool
+ 
 diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
 index 1486768f2318..bb3b805436c4 100644
 --- a/kernel/cgroup/rstat.c
@@ -5037,106 +4399,6 @@ index 1486768f2318..bb3b805436c4 100644
  
  		/* if @may_sleep, play nice and yield if necessary */
  		if (may_sleep && (need_resched() ||
-diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
-index da06a5553835..3e39636da842 100644
---- a/kernel/debug/debug_core.c
-+++ b/kernel/debug/debug_core.c
-@@ -238,35 +238,42 @@ NOKPROBE_SYMBOL(kgdb_call_nmi_hook);
- static DEFINE_PER_CPU(call_single_data_t, kgdb_roundup_csd) =
- 	CSD_INIT(kgdb_call_nmi_hook, NULL);
- 
--void __weak kgdb_roundup_cpus(void)
-+void __weak kgdb_roundup_cpu(unsigned int cpu)
- {
- 	call_single_data_t *csd;
-+	int ret;
-+
-+	csd = &per_cpu(kgdb_roundup_csd, cpu);
-+
-+	/*
-+	 * If it didn't round up last time, don't try again
-+	 * since smp_call_function_single_async() will block.
-+	 *
-+	 * If rounding_up is false then we know that the
-+	 * previous call must have at least started and that
-+	 * means smp_call_function_single_async() won't block.
-+	 */
-+	if (kgdb_info[cpu].rounding_up)
-+		return;
-+	kgdb_info[cpu].rounding_up = true;
-+
-+	ret = smp_call_function_single_async(cpu, csd);
-+	if (ret)
-+		kgdb_info[cpu].rounding_up = false;
-+}
-+NOKPROBE_SYMBOL(kgdb_roundup_cpu);
-+
-+void __weak kgdb_roundup_cpus(void)
-+{
- 	int this_cpu = raw_smp_processor_id();
- 	int cpu;
--	int ret;
- 
- 	for_each_online_cpu(cpu) {
- 		/* No need to roundup ourselves */
- 		if (cpu == this_cpu)
- 			continue;
- 
--		csd = &per_cpu(kgdb_roundup_csd, cpu);
--
--		/*
--		 * If it didn't round up last time, don't try again
--		 * since smp_call_function_single_async() will block.
--		 *
--		 * If rounding_up is false then we know that the
--		 * previous call must have at least started and that
--		 * means smp_call_function_single_async() won't block.
--		 */
--		if (kgdb_info[cpu].rounding_up)
--			continue;
--		kgdb_info[cpu].rounding_up = true;
--
--		ret = smp_call_function_single_async(cpu, csd);
--		if (ret)
--			kgdb_info[cpu].rounding_up = false;
-+		kgdb_roundup_cpu(cpu);
- 	}
- }
- NOKPROBE_SYMBOL(kgdb_roundup_cpus);
-diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
-index 6735ac36b718..539a2f0dc89d 100644
---- a/kernel/debug/kdb/kdb_io.c
-+++ b/kernel/debug/kdb/kdb_io.c
-@@ -559,23 +559,17 @@ static void kdb_msg_write(const char *msg, int msg_len)
- 		cp++;
- 	}
- 
-+	/* mirror output on atomic consoles */
- 	for_each_console(c) {
- 		if (!(c->flags & CON_ENABLED))
- 			continue;
- 		if (c == dbg_io_ops->cons)
- 			continue;
--		/*
--		 * Set oops_in_progress to encourage the console drivers to
--		 * disregard their internal spin locks: in the current calling
--		 * context the risk of deadlock is a bigger problem than risks
--		 * due to re-entering the console driver. We operate directly on
--		 * oops_in_progress rather than using bust_spinlocks() because
--		 * the calls bust_spinlocks() makes on exit are not appropriate
--		 * for this calling context.
--		 */
--		++oops_in_progress;
--		c->write(c, msg, msg_len);
--		--oops_in_progress;
-+
-+		if (!c->write_atomic)
-+			continue;
-+		c->write_atomic(c, msg, msg_len);
-+
- 		touch_nmi_watchdog();
- 	}
- }
 diff --git a/kernel/entry/common.c b/kernel/entry/common.c
 index d5a61d565ad5..a9579f8bf4f0 100644
 --- a/kernel/entry/common.c
@@ -5170,531 +4432,570 @@ index d5a61d565ad5..a9579f8bf4f0 100644
  	}
  }
 diff --git a/kernel/exit.c b/kernel/exit.c
-index 91a43e57a32e..1d099609568d 100644
+index f702a6a63686..383a56795e82 100644
 --- a/kernel/exit.c
 +++ b/kernel/exit.c
-@@ -64,6 +64,7 @@
- #include <linux/rcuwait.h>
- #include <linux/compat.h>
- #include <linux/io_uring.h>
-+#include <linux/kprobes.h>
- 
- #include <linux/uaccess.h>
- #include <asm/unistd.h>
-@@ -168,8 +169,14 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
- {
- 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
- 
-+	kprobe_flush_task(tsk);
+@@ -171,6 +171,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
+ 	kprobe_flush_task(tsk);
  	perf_event_delayed_put(tsk);
  	trace_sched_process_free(tsk);
-+
-+	/* RT enabled kernels delay freeing the VMAP'ed task stack */
-+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
-+		put_task_stack(tsk);
-+
++	task_stack_cleanup(tsk);
  	put_task_struct(tsk);
  }
  
+@@ -871,6 +872,7 @@ void __noreturn do_exit(long code)
+ 		put_page(tsk->task_frag.page);
+ 
+ 	validate_creds_for_do_exit(tsk);
++	exit_task_stack_account(tsk);
+ 
+ 	check_stack_usage();
+ 	preempt_disable();
 diff --git a/kernel/fork.c b/kernel/fork.c
-index f3a9cd12011b..0de4abdd7059 100644
+index ae63cce182fd..3e587c69ed26 100644
 --- a/kernel/fork.c
 +++ b/kernel/fork.c
-@@ -289,7 +289,10 @@ static inline void free_thread_stack(struct task_struct *tsk)
- 			return;
- 		}
+@@ -178,13 +178,23 @@ static inline void free_task_struct(struct task_struct *tsk)
  
--		vfree_atomic(tsk->stack);
-+		if (!IS_ENABLED(CONFIG_PREEMPT_RT))
-+			vfree_atomic(tsk->stack);
-+		else
-+			vfree(tsk->stack);
- 		return;
- 	}
- #endif
-@@ -705,6 +708,19 @@ void __mmdrop(struct mm_struct *mm)
- }
- EXPORT_SYMBOL_GPL(__mmdrop);
+ #ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR
  
-+#ifdef CONFIG_PREEMPT_RT
-+/*
-+ * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is
-+ * by far the least expensive way to do that.
-+ */
-+void __mmdrop_delayed(struct rcu_head *rhp)
++#define THREAD_STACK_DELAYED_FREE	1UL
++
++static void thread_stack_mark_delayed_free(struct task_struct *tsk)
 +{
-+	struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
++	unsigned long val = (unsigned long)tsk->stack;
 +
-+	__mmdrop(mm);
++	val |= THREAD_STACK_DELAYED_FREE;
++	WRITE_ONCE(tsk->stack, (void *)val);
 +}
-+#endif
 +
- static void mmdrop_async_fn(struct work_struct *work)
- {
- 	struct mm_struct *mm;
-diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
-index 221d80c31e94..1543934f26d2 100644
---- a/kernel/irq/handle.c
-+++ b/kernel/irq/handle.c
-@@ -190,12 +190,18 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags
- 
- irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
- {
--	irqreturn_t retval;
-+	struct pt_regs *regs = get_irq_regs();
-+	u64 ip = regs ? instruction_pointer(regs) : 0;
- 	unsigned int flags = 0;
-+	irqreturn_t retval;
- 
- 	retval = __handle_irq_event_percpu(desc, &flags);
+ /*
+  * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
+  * kmemcache based allocator.
+  */
+ # if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
  
--	add_interrupt_randomness(desc->irq_data.irq, flags);
-+#ifdef CONFIG_PREEMPT_RT
-+	desc->random_ip = ip;
-+#else
-+	add_interrupt_randomness(desc->irq_data.irq, flags, ip);
-+#endif
+-#ifdef CONFIG_VMAP_STACK
++#  ifdef CONFIG_VMAP_STACK
+ /*
+  * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
+  * flush.  Try to minimize the number of calls by caching stacks.
+@@ -209,11 +219,35 @@ static int free_vm_stack_cache(unsigned int cpu)
  
- 	if (!irq_settings_no_debug(desc))
- 		note_interrupt(desc, retval);
-diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
-index 27667e82ecc9..894e4db1fffc 100644
---- a/kernel/irq/manage.c
-+++ b/kernel/irq/manage.c
-@@ -1259,6 +1259,8 @@ static int irq_thread(void *data)
- 	irqreturn_t (*handler_fn)(struct irq_desc *desc,
- 			struct irqaction *action);
+ 	return 0;
+ }
+-#endif
  
-+	sched_set_fifo(current);
+-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
++static int memcg_charge_kernel_stack(struct vm_struct *vm)
+ {
+-#ifdef CONFIG_VMAP_STACK
++	int i;
++	int ret;
 +
- 	if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD,
- 					   &action->thread_flags))
- 		handler_fn = irq_forced_thread_fn;
-@@ -1279,6 +1281,12 @@ static int irq_thread(void *data)
- 		if (action_ret == IRQ_WAKE_THREAD)
- 			irq_wake_secondary(desc, action);
++	BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
++	BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
++
++	for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
++		ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0);
++		if (ret)
++			goto err;
++	}
++	return 0;
++err:
++	/*
++	 * If memcg_kmem_charge_page() fails, page's memory cgroup pointer is
++	 * NULL, and memcg_kmem_uncharge_page() in free_thread_stack() will
++	 * ignore this page.
++	 */
++	for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
++		memcg_kmem_uncharge_page(vm->pages[i], 0);
++	return ret;
++}
++
++static int alloc_thread_stack_node(struct task_struct *tsk, int node)
++{
++	struct vm_struct *vm;
+ 	void *stack;
+ 	int i;
  
-+		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
-+			migrate_disable();
-+			add_interrupt_randomness(action->irq, 0,
-+				 desc->random_ip ^ (unsigned long) action);
-+			migrate_enable();
+@@ -231,9 +265,14 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+ 		/* Clear stale pointers from reused stack. */
+ 		memset(s->addr, 0, THREAD_SIZE);
+ 
++		if (memcg_charge_kernel_stack(s)) {
++			vfree(s->addr);
++			return -ENOMEM;
 +		}
- 		wake_threads_waitq(desc);
++
+ 		tsk->stack_vm_area = s;
+ 		tsk->stack = s->addr;
+-		return s->addr;
++		return 0;
  	}
  
-@@ -1424,8 +1432,6 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
- 	if (IS_ERR(t))
- 		return PTR_ERR(t);
- 
--	sched_set_fifo(t);
--
  	/*
- 	 * We keep the reference to the task struct even if
- 	 * the thread dies to avoid that the interrupt code
-@@ -2827,7 +2833,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state);
-  *	This call sets the internal irqchip state of an interrupt,
-  *	depending on the value of @which.
-  *
-- *	This function should be called with preemption disabled if the
-+ *	This function should be called with migration disabled if the
-  *	interrupt controller has per-cpu registers.
-  */
- int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
-diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
-index c481d8458325..02b2daf07441 100644
---- a/kernel/irq/spurious.c
-+++ b/kernel/irq/spurious.c
-@@ -447,6 +447,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
- 
- static int __init irqfixup_setup(char *str)
- {
-+	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
-+		pr_warn("irqfixup boot option not supported with PREEMPT_RT\n");
-+		return 1;
-+	}
- 	irqfixup = 1;
- 	printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
- 	printk(KERN_WARNING "This may impact system performance.\n");
-@@ -459,6 +463,10 @@ module_param(irqfixup, int, 0644);
- 
- static int __init irqpoll_setup(char *str)
- {
-+	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
-+		pr_warn("irqpoll boot option not supported with PREEMPT_RT\n");
-+		return 1;
+@@ -246,71 +285,93 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
+ 				     THREADINFO_GFP & ~__GFP_ACCOUNT,
+ 				     PAGE_KERNEL,
+ 				     0, node, __builtin_return_address(0));
++	if (!stack)
++		return -ENOMEM;
+ 
++	vm = find_vm_area(stack);
++	if (memcg_charge_kernel_stack(vm)) {
++		vfree(stack);
++		return -ENOMEM;
 +	}
- 	irqfixup = 2;
- 	printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
- 				"enabled\n");
-diff --git a/kernel/irq_work.c b/kernel/irq_work.c
-index db8c248ebc8c..f7df715ec28e 100644
---- a/kernel/irq_work.c
-+++ b/kernel/irq_work.c
-@@ -18,11 +18,36 @@
- #include <linux/cpu.h>
- #include <linux/notifier.h>
- #include <linux/smp.h>
-+#include <linux/smpboot.h>
- #include <asm/processor.h>
- #include <linux/kasan.h>
- 
- static DEFINE_PER_CPU(struct llist_head, raised_list);
- static DEFINE_PER_CPU(struct llist_head, lazy_list);
-+static DEFINE_PER_CPU(struct task_struct *, irq_workd);
-+
-+static void wake_irq_workd(void)
-+{
-+	struct task_struct *tsk = __this_cpu_read(irq_workd);
-+
-+	if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk)
-+		wake_up_process(tsk);
+ 	/*
+ 	 * We can't call find_vm_area() in interrupt context, and
+ 	 * free_thread_stack() can be called in interrupt context,
+ 	 * so cache the vm_struct.
+ 	 */
+-	if (stack) {
+-		tsk->stack_vm_area = find_vm_area(stack);
+-		tsk->stack = stack;
++	tsk->stack_vm_area = vm;
++	tsk->stack = stack;
++	return 0;
 +}
 +
-+#ifdef CONFIG_SMP
-+static void irq_work_wake(struct irq_work *entry)
++static void free_thread_stack(struct task_struct *tsk, bool cache_only)
 +{
-+	wake_irq_workd();
-+}
-+
-+static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) =
-+	IRQ_WORK_INIT_HARD(irq_work_wake);
-+#endif
++	int i;
 +
-+static int irq_workd_should_run(unsigned int cpu)
-+{
-+	return !llist_empty(this_cpu_ptr(&lazy_list));
-+}
- 
- /*
-  * Claim the entry so that no one else will poke at it.
-@@ -52,15 +77,29 @@ void __weak arch_irq_work_raise(void)
- /* Enqueue on current CPU, work must already be claimed and preempt disabled */
- static void __irq_work_queue_local(struct irq_work *work)
- {
-+	struct llist_head *list;
-+	bool rt_lazy_work = false;
-+	bool lazy_work = false;
-+	int work_flags;
-+
-+	work_flags = atomic_read(&work->node.a_flags);
-+	if (work_flags & IRQ_WORK_LAZY)
-+		lazy_work = true;
-+	else if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
-+		 !(work_flags & IRQ_WORK_HARD_IRQ))
-+		rt_lazy_work = true;
-+
-+	if (lazy_work || rt_lazy_work)
-+		list = this_cpu_ptr(&lazy_list);
-+	else
-+		list = this_cpu_ptr(&raised_list);
++	for (i = 0; i < NR_CACHED_STACKS; i++) {
++		if (this_cpu_cmpxchg(cached_stacks[i], NULL,
++				     tsk->stack_vm_area) != NULL)
++			continue;
 +
-+	if (!llist_add(&work->node.llist, list))
++		tsk->stack = NULL;
++		tsk->stack_vm_area = NULL;
 +		return;
+ 	}
+-	return stack;
+-#else
++	if (cache_only) {
++		thread_stack_mark_delayed_free(tsk);
++		return;
++	}
 +
- 	/* If the work is "lazy", handle it from next tick if any */
--	if (atomic_read(&work->node.a_flags) & IRQ_WORK_LAZY) {
--		if (llist_add(&work->node.llist, this_cpu_ptr(&lazy_list)) &&
--		    tick_nohz_tick_stopped())
--			arch_irq_work_raise();
--	} else {
--		if (llist_add(&work->node.llist, this_cpu_ptr(&raised_list)))
--			arch_irq_work_raise();
--	}
-+	if (!lazy_work || tick_nohz_tick_stopped())
-+		arch_irq_work_raise();
- }
- 
- /* Enqueue the irq work @work on the current CPU */
-@@ -104,17 +143,34 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
- 	if (cpu != smp_processor_id()) {
- 		/* Arch remote IPI send/receive backend aren't NMI safe */
- 		WARN_ON_ONCE(in_nmi());
-+
-+		/*
-+		 * On PREEMPT_RT the items which are not marked as
-+		 * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work
-+		 * item is used on the remote CPU to wake the thread.
-+		 */
-+		if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
-+		    !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) {
-+
-+			if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu)))
-+				goto out;
++	vfree(tsk->stack);
++	tsk->stack = NULL;
++	tsk->stack_vm_area = NULL;
++}
 +
-+			work = &per_cpu(irq_work_wakeup, cpu);
-+			if (!irq_work_claim(work))
-+				goto out;
-+		}
++#  else /* !CONFIG_VMAP_STACK */
 +
- 		__smp_call_single_queue(cpu, &work->node.llist);
- 	} else {
- 		__irq_work_queue_local(work);
- 	}
-+out:
- 	preempt_enable();
++static int alloc_thread_stack_node(struct task_struct *tsk, int node)
++{
+ 	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
+ 					     THREAD_SIZE_ORDER);
  
- 	return true;
- #endif /* CONFIG_SMP */
+ 	if (likely(page)) {
+ 		tsk->stack = kasan_reset_tag(page_address(page));
+-		return tsk->stack;
++		return 0;
+ 	}
+-	return NULL;
+-#endif
++	return -ENOMEM;
  }
  
--
- bool irq_work_needs_cpu(void)
+-static inline void free_thread_stack(struct task_struct *tsk)
++static void free_thread_stack(struct task_struct *tsk, bool cache_only)
  {
- 	struct llist_head *raised, *lazy;
-@@ -160,6 +216,10 @@ void irq_work_single(void *arg)
- 	 * else claimed it meanwhile.
- 	 */
- 	(void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
-+
-+	if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
-+	    !arch_irq_work_has_interrupt())
-+		rcuwait_wake_up(&work->irqwait);
- }
- 
- static void irq_work_run_list(struct llist_head *list)
-@@ -167,7 +227,12 @@ static void irq_work_run_list(struct llist_head *list)
- 	struct irq_work *work, *tmp;
- 	struct llist_node *llnode;
- 
--	BUG_ON(!irqs_disabled());
-+	/*
-+	 * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed
-+	 * in a per-CPU thread in preemptible context. Only the items which are
-+	 * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context.
-+	 */
-+	BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT));
- 
- 	if (llist_empty(list))
+-#ifdef CONFIG_VMAP_STACK
+-	struct vm_struct *vm = task_stack_vm_area(tsk);
+-
+-	if (vm) {
+-		int i;
+-
+-		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+-			memcg_kmem_uncharge_page(vm->pages[i], 0);
+-
+-		for (i = 0; i < NR_CACHED_STACKS; i++) {
+-			if (this_cpu_cmpxchg(cached_stacks[i],
+-					NULL, tsk->stack_vm_area) != NULL)
+-				continue;
+-
+-			return;
+-		}
+-
+-		vfree_atomic(tsk->stack);
++	if (cache_only) {
++		thread_stack_mark_delayed_free(tsk);
  		return;
-@@ -184,7 +249,10 @@ static void irq_work_run_list(struct llist_head *list)
- void irq_work_run(void)
- {
- 	irq_work_run_list(this_cpu_ptr(&raised_list));
--	irq_work_run_list(this_cpu_ptr(&lazy_list));
-+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
-+		irq_work_run_list(this_cpu_ptr(&lazy_list));
-+	else
-+		wake_irq_workd();
+ 	}
+-#endif
+-
+ 	__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
++	tsk->stack = NULL;
  }
- EXPORT_SYMBOL_GPL(irq_work_run);
- 
-@@ -194,7 +262,11 @@ void irq_work_tick(void)
- 
- 	if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
- 		irq_work_run_list(raised);
--	irq_work_run_list(this_cpu_ptr(&lazy_list));
+-# else
 +
-+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
-+		irq_work_run_list(this_cpu_ptr(&lazy_list));
-+	else
-+		wake_irq_workd();
++#  endif /* CONFIG_VMAP_STACK */
++# else /* !(THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)) */
++
+ static struct kmem_cache *thread_stack_cache;
+ 
+-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
+-						  int node)
++static int alloc_thread_stack_node(struct task_struct *tsk, int node)
+ {
+ 	unsigned long *stack;
+ 	stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
+ 	stack = kasan_reset_tag(stack);
+ 	tsk->stack = stack;
+-	return stack;
++	return stack ? 0 : -ENOMEM;
  }
  
- /*
-@@ -204,8 +276,42 @@ void irq_work_tick(void)
- void irq_work_sync(struct irq_work *work)
+-static void free_thread_stack(struct task_struct *tsk)
++static void free_thread_stack(struct task_struct *tsk, bool cache_only)
  {
- 	lockdep_assert_irqs_enabled();
-+	might_sleep();
-+
-+	if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
-+	    !arch_irq_work_has_interrupt()) {
-+		rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work),
-+				   TASK_UNINTERRUPTIBLE);
++	if (cache_only) {
++		thread_stack_mark_delayed_free(tsk);
 +		return;
 +	}
+ 	kmem_cache_free(thread_stack_cache, tsk->stack);
++	tsk->stack = NULL;
+ }
  
- 	while (irq_work_is_busy(work))
- 		cpu_relax();
+ void thread_stack_cache_init(void)
+@@ -320,8 +381,36 @@ void thread_stack_cache_init(void)
+ 					THREAD_SIZE, NULL);
+ 	BUG_ON(thread_stack_cache == NULL);
  }
- EXPORT_SYMBOL_GPL(irq_work_sync);
+-# endif
+-#endif
++
++# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */
 +
-+static void run_irq_workd(unsigned int cpu)
++void task_stack_cleanup(struct task_struct *tsk)
 +{
-+	irq_work_run_list(this_cpu_ptr(&lazy_list));
++	unsigned long val = (unsigned long)tsk->stack;
++
++	if (!(val & THREAD_STACK_DELAYED_FREE))
++		return;
++
++	WRITE_ONCE(tsk->stack, (void *)(val & ~THREAD_STACK_DELAYED_FREE));
++	free_thread_stack(tsk, false);
 +}
 +
-+static void irq_workd_setup(unsigned int cpu)
++#else /* CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
++static int alloc_thread_stack_node(struct task_struct *tsk, int node)
 +{
-+	sched_set_fifo_low(current);
-+}
++	unsigned long *stack;
 +
-+static struct smp_hotplug_thread irqwork_threads = {
-+	.store                  = &irq_workd,
-+	.setup			= irq_workd_setup,
-+	.thread_should_run      = irq_workd_should_run,
-+	.thread_fn              = run_irq_workd,
-+	.thread_comm            = "irq_work/%u",
-+};
++	stack = arch_alloc_thread_stack_node(tsk, node);
++	tsk->stack = stack;
++	return stack ? 0 : -ENOMEM;
++}
 +
-+static __init int irq_work_init_threads(void)
++static void free_thread_stack(struct task_struct *tsk, bool cache_only)
 +{
-+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
-+		BUG_ON(smpboot_register_percpu_thread(&irqwork_threads));
-+	return 0;
++	arch_free_thread_stack(tsk);
 +}
-+early_initcall(irq_work_init_threads);
-diff --git a/kernel/kcov.c b/kernel/kcov.c
-index 80bfe71bbe13..36ca640c4f8e 100644
---- a/kernel/kcov.c
-+++ b/kernel/kcov.c
-@@ -88,6 +88,7 @@ static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas);
- 
- struct kcov_percpu_data {
- 	void			*irq_area;
-+	local_lock_t		lock;
- 
- 	unsigned int		saved_mode;
- 	unsigned int		saved_size;
-@@ -96,7 +97,9 @@ struct kcov_percpu_data {
- 	int			saved_sequence;
- };
- 
--static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data);
-+static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data) = {
-+	.lock = INIT_LOCAL_LOCK(lock),
-+};
- 
- /* Must be called with kcov_remote_lock locked. */
- static struct kcov_remote *kcov_remote_find(u64 handle)
-@@ -824,7 +827,7 @@ void kcov_remote_start(u64 handle)
- 	if (!in_task() && !in_serving_softirq())
- 		return;
++
++#endif /* !CONFIG_ARCH_THREAD_STACK_ALLOCATOR */
  
--	local_irq_save(flags);
-+	local_lock_irqsave(&kcov_percpu_data.lock, flags);
+ /* SLAB cache for signal_struct structures (tsk->signal) */
+ static struct kmem_cache *signal_cachep;
+@@ -376,70 +465,55 @@ void vm_area_free(struct vm_area_struct *vma)
  
- 	/*
- 	 * Check that kcov_remote_start() is not called twice in background
-@@ -832,7 +835,7 @@ void kcov_remote_start(u64 handle)
- 	 */
- 	mode = READ_ONCE(t->kcov_mode);
- 	if (WARN_ON(in_task() && kcov_mode_enabled(mode))) {
--		local_irq_restore(flags);
-+		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
- 		return;
- 	}
- 	/*
-@@ -841,14 +844,15 @@ void kcov_remote_start(u64 handle)
- 	 * happened while collecting coverage from a background thread.
- 	 */
- 	if (WARN_ON(in_serving_softirq() && t->kcov_softirq)) {
--		local_irq_restore(flags);
-+		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
- 		return;
- 	}
+ static void account_kernel_stack(struct task_struct *tsk, int account)
+ {
+-	void *stack = task_stack_page(tsk);
+-	struct vm_struct *vm = task_stack_vm_area(tsk);
+-
+-	if (vm) {
++	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
++		struct vm_struct *vm = task_stack_vm_area(tsk);
+ 		int i;
  
- 	spin_lock(&kcov_remote_lock);
- 	remote = kcov_remote_find(handle);
- 	if (!remote) {
--		spin_unlock_irqrestore(&kcov_remote_lock, flags);
-+		spin_unlock(&kcov_remote_lock);
-+		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
- 		return;
- 	}
- 	kcov_debug("handle = %llx, context: %s\n", handle,
-@@ -869,19 +873,19 @@ void kcov_remote_start(u64 handle)
- 		size = CONFIG_KCOV_IRQ_AREA_SIZE;
- 		area = this_cpu_ptr(&kcov_percpu_data)->irq_area;
- 	}
--	spin_unlock_irqrestore(&kcov_remote_lock, flags);
-+	spin_unlock(&kcov_remote_lock);
- 
- 	/* Can only happen when in_task(). */
- 	if (!area) {
-+		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
- 		area = vmalloc(size * sizeof(unsigned long));
- 		if (!area) {
- 			kcov_put(kcov);
- 			return;
- 		}
-+		local_lock_irqsave(&kcov_percpu_data.lock, flags);
+ 		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+ 			mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB,
+ 					      account * (PAGE_SIZE / 1024));
+ 	} else {
++		void *stack = task_stack_page(tsk);
++
+ 		/* All stack pages are in the same node. */
+ 		mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB,
+ 				      account * (THREAD_SIZE / 1024));
  	}
+ }
  
--	local_irq_save(flags);
+-static int memcg_charge_kernel_stack(struct task_struct *tsk)
++void exit_task_stack_account(struct task_struct *tsk)
+ {
+-#ifdef CONFIG_VMAP_STACK
+-	struct vm_struct *vm = task_stack_vm_area(tsk);
+-	int ret;
 -
- 	/* Reset coverage size. */
- 	*(u64 *)area = 0;
+-	BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
++	account_kernel_stack(tsk, -1);
+ 
+-	if (vm) {
++	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
++		struct vm_struct *vm;
+ 		int i;
  
-@@ -891,7 +895,7 @@ void kcov_remote_start(u64 handle)
+-		BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
+-
+-		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+-			/*
+-			 * If memcg_kmem_charge_page() fails, page's
+-			 * memory cgroup pointer is NULL, and
+-			 * memcg_kmem_uncharge_page() in free_thread_stack()
+-			 * will ignore this page.
+-			 */
+-			ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL,
+-						     0);
+-			if (ret)
+-				return ret;
+-		}
++		vm = task_stack_vm_area(tsk);
++		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
++			memcg_kmem_uncharge_page(vm->pages[i], 0);
  	}
- 	kcov_start(t, kcov, size, area, mode, sequence);
+-#endif
+-	return 0;
+ }
  
--	local_irq_restore(flags);
-+	local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+-static void release_task_stack(struct task_struct *tsk)
++static void release_task_stack(struct task_struct *tsk, bool cache_only)
+ {
+ 	if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
+ 		return;  /* Better to leak the stack than to free prematurely */
  
+-	account_kernel_stack(tsk, -1);
+-	free_thread_stack(tsk);
+-	tsk->stack = NULL;
+-#ifdef CONFIG_VMAP_STACK
+-	tsk->stack_vm_area = NULL;
+-#endif
++	free_thread_stack(tsk, cache_only);
  }
- EXPORT_SYMBOL(kcov_remote_start);
-@@ -965,12 +969,12 @@ void kcov_remote_stop(void)
- 	if (!in_task() && !in_serving_softirq())
- 		return;
  
--	local_irq_save(flags);
-+	local_lock_irqsave(&kcov_percpu_data.lock, flags);
+ #ifdef CONFIG_THREAD_INFO_IN_TASK
+ void put_task_stack(struct task_struct *tsk)
+ {
+ 	if (refcount_dec_and_test(&tsk->stack_refcount))
+-		release_task_stack(tsk);
++		release_task_stack(tsk, false);
++}
++
++void put_task_stack_sched(struct task_struct *tsk)
++{
++	if (refcount_dec_and_test(&tsk->stack_refcount))
++		release_task_stack(tsk, true);
+ }
+ #endif
  
- 	mode = READ_ONCE(t->kcov_mode);
- 	barrier();
- 	if (!kcov_mode_enabled(mode)) {
--		local_irq_restore(flags);
-+		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
- 		return;
- 	}
- 	/*
-@@ -978,12 +982,12 @@ void kcov_remote_stop(void)
- 	 * actually found the remote handle and started collecting coverage.
+@@ -453,7 +527,7 @@ void free_task(struct task_struct *tsk)
+ 	 * The task is finally done with both the stack and thread_info,
+ 	 * so free both.
  	 */
- 	if (in_serving_softirq() && !t->kcov_softirq) {
--		local_irq_restore(flags);
-+		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
- 		return;
- 	}
- 	/* Make sure that kcov_softirq is only set when in softirq. */
- 	if (WARN_ON(!in_serving_softirq() && t->kcov_softirq)) {
--		local_irq_restore(flags);
-+		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
- 		return;
- 	}
+-	release_task_stack(tsk);
++	release_task_stack(tsk, false);
+ #else
+ 	/*
+ 	 * If the task had a separate stack allocation, it should be gone
+@@ -873,8 +947,6 @@ void set_task_stack_end_magic(struct task_struct *tsk)
+ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ {
+ 	struct task_struct *tsk;
+-	unsigned long *stack;
+-	struct vm_struct *stack_vm_area __maybe_unused;
+ 	int err;
  
-@@ -1013,7 +1017,7 @@ void kcov_remote_stop(void)
- 		spin_unlock(&kcov_remote_lock);
- 	}
+ 	if (node == NUMA_NO_NODE)
+@@ -883,32 +955,18 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ 	if (!tsk)
+ 		return NULL;
  
--	local_irq_restore(flags);
-+	local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+-	stack = alloc_thread_stack_node(tsk, node);
+-	if (!stack)
++	err = arch_dup_task_struct(tsk, orig);
++	if (err)
+ 		goto free_tsk;
  
- 	/* Get in kcov_remote_start(). */
- 	kcov_put(kcov);
-@@ -1034,8 +1038,8 @@ static int __init kcov_init(void)
- 	int cpu;
+-	if (memcg_charge_kernel_stack(tsk))
+-		goto free_stack;
+-
+-	stack_vm_area = task_stack_vm_area(tsk);
+-
+-	err = arch_dup_task_struct(tsk, orig);
++	err = alloc_thread_stack_node(tsk, node);
++	if (err)
++		goto free_tsk;
  
- 	for_each_possible_cpu(cpu) {
--		void *area = vmalloc(CONFIG_KCOV_IRQ_AREA_SIZE *
--				sizeof(unsigned long));
-+		void *area = vmalloc_node(CONFIG_KCOV_IRQ_AREA_SIZE *
-+				sizeof(unsigned long), cpu_to_node(cpu));
- 		if (!area)
- 			return -ENOMEM;
- 		per_cpu_ptr(&kcov_percpu_data, cpu)->irq_area = area;
-diff --git a/kernel/kprobes.c b/kernel/kprobes.c
-index 2ef90d15699f..2ab883d856b5 100644
---- a/kernel/kprobes.c
-+++ b/kernel/kprobes.c
-@@ -1250,10 +1250,10 @@ void kprobe_busy_end(void)
- }
+-	/*
+-	 * arch_dup_task_struct() clobbers the stack-related fields.  Make
+-	 * sure they're properly initialized before using any stack-related
+-	 * functions again.
+-	 */
+-	tsk->stack = stack;
+-#ifdef CONFIG_VMAP_STACK
+-	tsk->stack_vm_area = stack_vm_area;
+-#endif
+ #ifdef CONFIG_THREAD_INFO_IN_TASK
+ 	refcount_set(&tsk->stack_refcount, 1);
+ #endif
+-
+-	if (err)
+-		goto free_stack;
++	account_kernel_stack(tsk, 1);
  
- /*
-- * This function is called from finish_task_switch when task tk becomes dead,
-- * so that we can recycle any function-return probe instances associated
-- * with this task. These left over instances represent probed functions
-- * that have been called but will never return.
-+ * This function is called from delayed_put_task_struct() when a task is
-+ * dead and cleaned up to recycle any function-return probe instances
-+ * associated with this task. These left over instances represent probed
-+ * functions that have been called but will never return.
+ 	err = scs_prepare(tsk, node);
+ 	if (err)
+@@ -952,8 +1010,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ 	tsk->wake_q.next = NULL;
+ 	tsk->pf_io_worker = NULL;
+ 
+-	account_kernel_stack(tsk, 1);
+-
+ 	kcov_task_init(tsk);
+ 	kmap_local_fork(tsk);
+ 
+@@ -972,7 +1028,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ 	return tsk;
+ 
+ free_stack:
+-	free_thread_stack(tsk);
++	exit_task_stack_account(tsk);
++	free_thread_stack(tsk, false);
+ free_tsk:
+ 	free_task_struct(tsk);
+ 	return NULL;
+@@ -2468,6 +2525,7 @@ static __latent_entropy struct task_struct *copy_process(
+ 	exit_creds(p);
+ bad_fork_free:
+ 	WRITE_ONCE(p->__state, TASK_DEAD);
++	exit_task_stack_account(p);
+ 	put_task_stack(p);
+ 	delayed_free_task(p);
+ fork_out:
+diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
+index f895265d7548..c09324663088 100644
+--- a/kernel/irq/chip.c
++++ b/kernel/irq/chip.c
+@@ -575,8 +575,6 @@ EXPORT_SYMBOL_GPL(handle_simple_irq);
   */
- void kprobe_flush_task(struct task_struct *tk)
+ void handle_untracked_irq(struct irq_desc *desc)
+ {
+-	unsigned int flags = 0;
+-
+ 	raw_spin_lock(&desc->lock);
+ 
+ 	if (!irq_may_run(desc))
+@@ -593,7 +591,7 @@ void handle_untracked_irq(struct irq_desc *desc)
+ 	irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
+ 	raw_spin_unlock(&desc->lock);
+ 
+-	__handle_irq_event_percpu(desc, &flags);
++	__handle_irq_event_percpu(desc);
+ 
+ 	raw_spin_lock(&desc->lock);
+ 	irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
+diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
+index 27182003b879..9489f93b3db3 100644
+--- a/kernel/irq/handle.c
++++ b/kernel/irq/handle.c
+@@ -136,7 +136,7 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
+ 	wake_up_process(action->thread);
+ }
+ 
+-irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags)
++irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
  {
+ 	irqreturn_t retval = IRQ_NONE;
+ 	unsigned int irq = desc->irq_data.irq;
+@@ -174,10 +174,6 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags
+ 			}
+ 
+ 			__irq_wake_thread(desc, action);
+-
+-			fallthrough;	/* to add to randomness */
+-		case IRQ_HANDLED:
+-			*flags |= action->flags;
+ 			break;
+ 
+ 		default:
+@@ -193,11 +189,10 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags
+ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
+ {
+ 	irqreturn_t retval;
+-	unsigned int flags = 0;
+ 
+-	retval = __handle_irq_event_percpu(desc, &flags);
++	retval = __handle_irq_event_percpu(desc);
+ 
+-	add_interrupt_randomness(desc->irq_data.irq, flags);
++	add_interrupt_randomness(desc->irq_data.irq);
+ 
+ 	if (!irq_settings_no_debug(desc))
+ 		note_interrupt(desc, retval);
+diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
+index 54363527feea..99cbdf55a8bd 100644
+--- a/kernel/irq/internals.h
++++ b/kernel/irq/internals.h
+@@ -103,7 +103,7 @@ extern int __irq_get_irqchip_state(struct irq_data *data,
+ 
+ extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
+ 
+-irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags);
++irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc);
+ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc);
+ irqreturn_t handle_irq_event(struct irq_desc *desc);
+ 
+diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
+index 2267e6527db3..97223df2f460 100644
+--- a/kernel/irq/irqdesc.c
++++ b/kernel/irq/irqdesc.c
+@@ -662,6 +662,27 @@ int generic_handle_irq(unsigned int irq)
+ }
+ EXPORT_SYMBOL_GPL(generic_handle_irq);
+ 
++/**
++ * generic_handle_irq_safe - Invoke the handler for a particular irq
++ * @irq:	The irq number to handle
++ *
++ * Returns:	0 on success, or -EINVAL if conversion has failed
++ *
++ * This function must be called either from an IRQ context with irq regs
++ * initialized or with care from any context.
++ */
++int generic_handle_irq_safe(unsigned int irq)
++{
++	unsigned long flags;
++	int ret;
++
++	local_irq_save(flags);
++	ret = handle_irq_desc(irq_to_desc(irq));
++	local_irq_restore(flags);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(generic_handle_irq_safe);
++
+ #ifdef CONFIG_IRQ_DOMAIN
+ /**
+  * generic_handle_domain_irq - Invoke the handler for a HW irq belonging
+diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
+index 7405e384e5ed..d641de1f879f 100644
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -1281,6 +1281,9 @@ static int irq_thread(void *data)
+ 		if (action_ret == IRQ_WAKE_THREAD)
+ 			irq_wake_secondary(desc, action);
+ 
++		if (IS_ENABLED(CONFIG_PREEMPT_RT))
++			process_interrupt_randomness();
++
+ 		wake_threads_waitq(desc);
+ 	}
+ 
 diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
 index 35859da8bd4f..dfff31ed644a 100644
 --- a/kernel/ksysfs.c
@@ -5725,59 +5026,11 @@ index 35859da8bd4f..dfff31ed644a 100644
  #endif
  	NULL
  };
-diff --git a/kernel/kthread.c b/kernel/kthread.c
-index 5b37a8567168..4a4d7092a2d8 100644
---- a/kernel/kthread.c
-+++ b/kernel/kthread.c
-@@ -270,6 +270,7 @@ EXPORT_SYMBOL_GPL(kthread_parkme);
- 
- static int kthread(void *_create)
- {
-+	static const struct sched_param param = { .sched_priority = 0 };
- 	/* Copy data: it's on kthread's stack */
- 	struct kthread_create_info *create = _create;
- 	int (*threadfn)(void *data) = create->threadfn;
-@@ -300,6 +301,13 @@ static int kthread(void *_create)
- 	init_completion(&self->parked);
- 	current->vfork_done = &self->exited;
- 
-+	/*
-+	 * The new thread inherited kthreadd's priority and CPU mask. Reset
-+	 * back to default in case they have been changed.
-+	 */
-+	sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
-+	set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_KTHREAD));
-+
- 	/* OK, tell user we're spawned, wait for stop or wakeup */
- 	__set_current_state(TASK_UNINTERRUPTIBLE);
- 	create->result = current;
-@@ -397,7 +405,6 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
- 	}
- 	task = create->result;
- 	if (!IS_ERR(task)) {
--		static const struct sched_param param = { .sched_priority = 0 };
- 		char name[TASK_COMM_LEN];
- 
- 		/*
-@@ -406,13 +413,6 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
- 		 */
- 		vsnprintf(name, sizeof(name), namefmt, args);
- 		set_task_comm(task, name);
--		/*
--		 * root may have changed our (kthreadd's) priority or CPU mask.
--		 * The kernel thread should not inherit these properties.
--		 */
--		sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);
--		set_cpus_allowed_ptr(task,
--				     housekeeping_cpumask(HK_FLAG_KTHREAD));
- 	}
- 	kfree(create);
- 	return task;
 diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
-index 0627584f7872..5bef051a55c5 100644
+index 49c4d11b0893..c3d5a9cbf54d 100644
 --- a/kernel/locking/lockdep.c
 +++ b/kernel/locking/lockdep.c
-@@ -5475,6 +5475,7 @@ static noinstr void check_flags(unsigned long flags)
+@@ -5487,6 +5487,7 @@ static noinstr void check_flags(unsigned long flags)
  		}
  	}
  
@@ -5785,7 +5038,7 @@ index 0627584f7872..5bef051a55c5 100644
  	/*
  	 * We dont accurately track softirq state in e.g.
  	 * hardirq contexts (such as on 4KSTACKS), so only
-@@ -5489,6 +5490,7 @@ static noinstr void check_flags(unsigned long flags)
+@@ -5501,6 +5502,7 @@ static noinstr void check_flags(unsigned long flags)
  			DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
  		}
  	}
@@ -5794,37 +5047,22 @@ index 0627584f7872..5bef051a55c5 100644
  	if (!debug_locks)
  		print_irqtrace_events(current);
 diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
-index 6bb116c559b4..3665583361c0 100644
+index 1f25a4d7de27..e85d5df3f42c 100644
 --- a/kernel/locking/rtmutex.c
 +++ b/kernel/locking/rtmutex.c
-@@ -1097,8 +1097,26 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
+@@ -1103,8 +1103,11 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
+ 	 * the other will detect the deadlock and return -EDEADLOCK,
  	 * which is wrong, as the other waiter is not in a deadlock
  	 * situation.
++	 *
++	 * Except for ww_mutex, in that case the chain walk must already deal
++	 * with spurious cycles, see the comments at [3] and [6].
  	 */
 -	if (owner == task)
-+	if (owner == task) {
-+#if defined(DEBUG_WW_MUTEXES) && defined(CONFIG_DEBUG_LOCKING_API_SELFTESTS)
-+		/*
-+		 * The lockdep selftest for ww-mutex assumes in a few cases
-+		 * the ww_ctx->contending_lock assignment via
-+		 * __ww_mutex_check_kill() which does not happen if the rtmutex
-+		 * detects the deadlock early.
-+		 */
-+		if (build_ww_mutex() && ww_ctx) {
-+			struct rt_mutex *rtm;
-+
-+			/* Check whether the waiter should backout immediately */
-+			rtm = container_of(lock, struct rt_mutex, rtmutex);
-+
-+			__ww_mutex_add_waiter(waiter, rtm, ww_ctx);
-+			__ww_mutex_check_kill(rtm, waiter, ww_ctx);
-+		}
-+#endif
++	if (owner == task && !(build_ww_mutex() && ww_ctx))
  		return -EDEADLK;
-+	}
  
  	raw_spin_lock(&task->pi_lock);
- 	waiter->task = task;
 diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
 index 5c9299aaabae..900220941caa 100644
 --- a/kernel/locking/rtmutex_api.c
@@ -5898,56 +5136,51 @@ index 5c9299aaabae..900220941caa 100644
  /**
   * rt_mutex_trylock - try to lock a rt_mutex
   *
-diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c
-index d2912e44d61f..9e396a09fe0f 100644
---- a/kernel/locking/spinlock_rt.c
-+++ b/kernel/locking/spinlock_rt.c
-@@ -24,6 +24,17 @@
- #define RT_MUTEX_BUILD_SPINLOCKS
- #include "rtmutex.c"
- 
-+/*
-+ * __might_resched() skips the state check as rtlocks are state
-+ * preserving. Take RCU nesting into account as spin/read/write_lock() can
-+ * legitimately nest into an RCU read side critical section.
-+ */
-+#define RTLOCK_RESCHED_OFFSETS						\
-+	(rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT)
+diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
+index b562f9289372..7f49baaa4979 100644
+--- a/kernel/locking/spinlock.c
++++ b/kernel/locking/spinlock.c
+@@ -300,6 +300,16 @@ void __lockfunc _raw_write_lock(rwlock_t *lock)
+ 	__raw_write_lock(lock);
+ }
+ EXPORT_SYMBOL(_raw_write_lock);
 +
-+#define rtlock_might_resched()						\
-+	__might_resched(__FILE__, __LINE__, RTLOCK_RESCHED_OFFSETS)
++#ifndef CONFIG_DEBUG_LOCK_ALLOC
++#define __raw_write_lock_nested(lock, subclass)	__raw_write_lock(((void)(subclass), (lock)))
++#endif
 +
- static __always_inline void rtlock_lock(struct rt_mutex_base *rtm)
- {
- 	if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
-@@ -32,7 +43,7 @@ static __always_inline void rtlock_lock(struct rt_mutex_base *rtm)
++void __lockfunc _raw_write_lock_nested(rwlock_t *lock, int subclass)
++{
++	__raw_write_lock_nested(lock, subclass);
++}
++EXPORT_SYMBOL(_raw_write_lock_nested);
+ #endif
  
- static __always_inline void __rt_spin_lock(spinlock_t *lock)
- {
--	___might_sleep(__FILE__, __LINE__, 0);
-+	rtlock_might_resched();
- 	rtlock_lock(&lock->lock);
- 	rcu_read_lock();
- 	migrate_disable();
-@@ -210,7 +221,7 @@ EXPORT_SYMBOL(rt_write_trylock);
+ #ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
+diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c
+index b2e553f9255b..48a19ed8486d 100644
+--- a/kernel/locking/spinlock_rt.c
++++ b/kernel/locking/spinlock_rt.c
+@@ -239,6 +239,18 @@ void __sched rt_write_lock(rwlock_t *rwlock)
+ }
+ EXPORT_SYMBOL(rt_write_lock);
  
- void __sched rt_read_lock(rwlock_t *rwlock)
- {
--	___might_sleep(__FILE__, __LINE__, 0);
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass)
++{
 +	rtlock_might_resched();
- 	rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
- 	rwbase_read_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
- 	rcu_read_lock();
-@@ -220,7 +231,7 @@ EXPORT_SYMBOL(rt_read_lock);
- 
- void __sched rt_write_lock(rwlock_t *rwlock)
++	rwlock_acquire(&rwlock->dep_map, subclass, 0, _RET_IP_);
++	rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
++	rcu_read_lock();
++	migrate_disable();
++}
++EXPORT_SYMBOL(rt_write_lock_nested);
++#endif
++
+ void __sched rt_read_unlock(rwlock_t *rwlock)
  {
--	___might_sleep(__FILE__, __LINE__, 0);
-+	rtlock_might_resched();
- 	rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
- 	rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
- 	rcu_read_lock();
-@@ -246,12 +257,6 @@ void __sched rt_write_unlock(rwlock_t *rwlock)
+ 	rwlock_release(&rwlock->dep_map, _RET_IP_);
+@@ -257,12 +269,6 @@ void __sched rt_write_unlock(rwlock_t *rwlock)
  }
  EXPORT_SYMBOL(rt_write_unlock);
  
@@ -5960,260 +5193,179 @@ index d2912e44d61f..9e396a09fe0f 100644
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
  void __rt_rwlock_init(rwlock_t *rwlock, const char *name,
  		      struct lock_class_key *key)
+diff --git a/kernel/locking/ww_rt_mutex.c b/kernel/locking/ww_rt_mutex.c
+index 0e00205cf467..d1473c624105 100644
+--- a/kernel/locking/ww_rt_mutex.c
++++ b/kernel/locking/ww_rt_mutex.c
+@@ -26,7 +26,7 @@ int ww_mutex_trylock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
+ 
+ 	if (__rt_mutex_trylock(&rtm->rtmutex)) {
+ 		ww_mutex_set_context_fastpath(lock, ww_ctx);
+-		mutex_acquire_nest(&rtm->dep_map, 0, 1, ww_ctx->dep_map, _RET_IP_);
++		mutex_acquire_nest(&rtm->dep_map, 0, 1, &ww_ctx->dep_map, _RET_IP_);
+ 		return 1;
+ 	}
+ 
 diff --git a/kernel/panic.c b/kernel/panic.c
-index cefd7d82366f..d509c0694af9 100644
+index cefd7d82366f..556665ef1152 100644
 --- a/kernel/panic.c
 +++ b/kernel/panic.c
-@@ -178,12 +178,28 @@ static void panic_print_sys_info(void)
- void panic(const char *fmt, ...)
- {
- 	static char buf[1024];
-+	va_list args2;
- 	va_list args;
- 	long i, i_next = 0, len;
- 	int state = 0;
- 	int old_cpu, this_cpu;
- 	bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
- 
-+	console_verbose();
-+	pr_emerg("Kernel panic - not syncing:\n");
-+	va_start(args2, fmt);
-+	va_copy(args, args2);
-+	vprintk(fmt, args2);
-+	va_end(args2);
-+#ifdef CONFIG_DEBUG_BUGVERBOSE
-+	/*
-+	 * Avoid nested stack-dumping if a panic occurs during oops processing
-+	 */
-+	if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
-+		dump_stack();
-+#endif
-+	pr_flush(1000, true);
-+
- 	/*
- 	 * Disable local interrupts. This will prevent panic_smp_self_stop
- 	 * from deadlocking the first cpu that invokes the panic, since
-@@ -214,24 +230,13 @@ void panic(const char *fmt, ...)
- 	if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
+@@ -215,7 +215,6 @@ void panic(const char *fmt, ...)
  		panic_smp_self_stop();
  
--	console_verbose();
- 	bust_spinlocks(1);
--	va_start(args, fmt);
+ 	console_verbose();
+-	bust_spinlocks(1);
+ 	va_start(args, fmt);
  	len = vscnprintf(buf, sizeof(buf), fmt, args);
  	va_end(args);
+@@ -239,6 +238,11 @@ void panic(const char *fmt, ...)
+ 	 */
+ 	kgdb_panic(buf);
  
- 	if (len && buf[len - 1] == '\n')
- 		buf[len - 1] = '\0';
- 
--	pr_emerg("Kernel panic - not syncing: %s\n", buf);
--#ifdef CONFIG_DEBUG_BUGVERBOSE
--	/*
--	 * Avoid nested stack-dumping if a panic occurs during oops processing
--	 */
--	if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
--		dump_stack();
--#endif
--
++	/* Use atomic consoles to dump the kernel log. */
++	console_flush_on_panic(CONSOLE_ATOMIC_FLUSH_PENDING);
++
++	bust_spinlocks(1);
++
  	/*
- 	 * If kgdb is enabled, give it a chance to run before we stop all
- 	 * the other CPUs or else we won't be able to debug processes left
-@@ -540,9 +545,11 @@ static u64 oops_id;
- 
- static int init_oops_id(void)
- {
-+#ifndef CONFIG_PREEMPT_RT
- 	if (!oops_id)
- 		get_random_bytes(&oops_id, sizeof(oops_id));
- 	else
-+#endif
- 		oops_id++;
- 
- 	return 0;
-@@ -553,6 +560,7 @@ static void print_oops_end_marker(void)
- {
- 	init_oops_id();
- 	pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id);
-+	pr_flush(1000, true);
- }
- 
- /*
-diff --git a/kernel/power/main.c b/kernel/power/main.c
-index 44169f3081fd..eaa725ca079c 100644
---- a/kernel/power/main.c
-+++ b/kernel/power/main.c
-@@ -543,14 +543,13 @@ static int __init pm_debug_messages_setup(char *str)
- __setup("pm_debug_messages", pm_debug_messages_setup);
- 
- /**
-- * __pm_pr_dbg - Print a suspend debug message to the kernel log.
-- * @defer: Whether or not to use printk_deferred() to print the message.
-+ * pm_pr_dbg - Print a suspend debug message to the kernel log.
-  * @fmt: Message format.
-  *
-  * The message will be emitted if enabled through the pm_debug_messages
-  * sysfs attribute.
-  */
--void __pm_pr_dbg(bool defer, const char *fmt, ...)
-+void pm_pr_dbg(const char *fmt, ...)
- {
- 	struct va_format vaf;
- 	va_list args;
-@@ -563,10 +562,7 @@ void __pm_pr_dbg(bool defer, const char *fmt, ...)
- 	vaf.fmt = fmt;
- 	vaf.va = &args;
- 
--	if (defer)
--		printk_deferred(KERN_DEBUG "PM: %pV", &vaf);
--	else
--		printk(KERN_DEBUG "PM: %pV", &vaf);
-+	printk(KERN_DEBUG "PM: %pV", &vaf);
- 
- 	va_end(args);
+ 	 * If we have crashed and we have a crash kernel loaded let it handle
+ 	 * everything else.
+@@ -533,26 +537,9 @@ void oops_enter(void)
+ 		trigger_all_cpu_backtrace();
  }
-diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
-index d118739874c0..bc6b856a0ff4 100644
---- a/kernel/printk/Makefile
-+++ b/kernel/printk/Makefile
-@@ -1,6 +1,5 @@
- # SPDX-License-Identifier: GPL-2.0-only
- obj-y	= printk.o
--obj-$(CONFIG_PRINTK)	+= printk_safe.o
- obj-$(CONFIG_A11Y_BRAILLE_CONSOLE)	+= braille.o
- obj-$(CONFIG_PRINTK)	+= printk_ringbuffer.o
- obj-$(CONFIG_PRINTK_INDEX)	+= index.o
-diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
-index 9f3ed2fdb721..de8ab059dd96 100644
---- a/kernel/printk/internal.h
-+++ b/kernel/printk/internal.h
-@@ -2,7 +2,6 @@
- /*
-  * internal.h - printk internal definitions
-  */
--#include <linux/percpu.h>
- 
- #ifdef CONFIG_PRINTK
- 
-@@ -12,41 +11,6 @@ enum printk_info_flags {
- 	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
- };
  
--__printf(4, 0)
--int vprintk_store(int facility, int level,
--		  const struct dev_printk_info *dev_info,
--		  const char *fmt, va_list args);
--
--__printf(1, 0) int vprintk_default(const char *fmt, va_list args);
--__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);
--
--bool printk_percpu_data_ready(void);
--
--#define printk_safe_enter_irqsave(flags)	\
--	do {					\
--		local_irq_save(flags);		\
--		__printk_safe_enter();		\
--	} while (0)
--
--#define printk_safe_exit_irqrestore(flags)	\
--	do {					\
--		__printk_safe_exit();		\
--		local_irq_restore(flags);	\
--	} while (0)
--
--void defer_console_output(void);
--
- u16 printk_parse_prefix(const char *text, int *level,
- 			enum printk_info_flags *flags);
--#else
--
 -/*
-- * In !PRINTK builds we still export console_sem
-- * semaphore and some of console functions (console_unlock()/etc.), so
-- * printk-safe must preserve the existing local IRQ guarantees.
+- * 64-bit random ID for oopses:
 - */
--#define printk_safe_enter_irqsave(flags) local_irq_save(flags)
--#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags)
+-static u64 oops_id;
 -
--static inline bool printk_percpu_data_ready(void) { return false; }
- #endif /* CONFIG_PRINTK */
+-static int init_oops_id(void)
+-{
+-	if (!oops_id)
+-		get_random_bytes(&oops_id, sizeof(oops_id));
+-	else
+-		oops_id++;
+-
+-	return 0;
+-}
+-late_initcall(init_oops_id);
+-
+ static void print_oops_end_marker(void)
+ {
+-	init_oops_id();
+-	pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id);
++	pr_warn("---[ end trace %016llx ]---\n", 0ULL);
+ }
+ 
+ /*
 diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
-index 99221b016c68..0cc8e8acf545 100644
+index 57b132b658e1..20453a3bc429 100644
 --- a/kernel/printk/printk.c
 +++ b/kernel/printk/printk.c
-@@ -44,6 +44,10 @@
+@@ -44,6 +44,7 @@
  #include <linux/irq_work.h>
  #include <linux/ctype.h>
  #include <linux/uio.h>
-+#include <linux/kdb.h>
-+#include <linux/kgdb.h>
-+#include <linux/kthread.h>
 +#include <linux/clocksource.h>
  #include <linux/sched/clock.h>
  #include <linux/sched/debug.h>
  #include <linux/sched/task_stack.h>
-@@ -225,19 +229,7 @@ static int nr_ext_console_drivers;
- 
- static int __down_trylock_console_sem(unsigned long ip)
- {
--	int lock_failed;
--	unsigned long flags;
--
--	/*
--	 * Here and in __up_console_sem() we need to be in safe mode,
--	 * because spindump/WARN/etc from under console ->lock will
--	 * deadlock in printk()->down_trylock_console_sem() otherwise.
--	 */
--	printk_safe_enter_irqsave(flags);
--	lock_failed = down_trylock(&console_sem);
--	printk_safe_exit_irqrestore(flags);
--
--	if (lock_failed)
-+	if (down_trylock(&console_sem))
- 		return 1;
- 	mutex_acquire(&console_lock_dep_map, 0, 1, ip);
- 	return 0;
-@@ -246,13 +238,9 @@ static int __down_trylock_console_sem(unsigned long ip)
- 
- static void __up_console_sem(unsigned long ip)
- {
--	unsigned long flags;
--
- 	mutex_release(&console_lock_dep_map, ip);
+@@ -214,6 +215,26 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
+ /* Number of registered extended console drivers. */
+ static int nr_ext_console_drivers;
  
--	printk_safe_enter_irqsave(flags);
- 	up(&console_sem);
--	printk_safe_exit_irqrestore(flags);
- }
++/*
++ * Used to synchronize printing kthreads against direct printing via
++ * console_trylock/console_unlock.
++ *
++ * Values:
++ * -1 = console locked (via trylock), kthreads will not print
++ *  0 = no kthread printing, console not locked (via trylock)
++ * >0 = kthread(s) actively printing
++ *
++ * Note: For synchronizing against direct printing via
++ *       console_lock/console_unlock, see the @lock variable in
++ *       struct console.
++ */
++static atomic_t console_lock_count = ATOMIC_INIT(0);
++
++#define console_excl_trylock() (atomic_cmpxchg(&console_lock_count, 0, -1) == 0)
++#define console_excl_unlock() atomic_cmpxchg(&console_lock_count, -1, 0)
++#define console_printer_tryenter() atomic_inc_unless_negative(&console_lock_count)
++#define console_printer_exit() atomic_dec(&console_lock_count)
++
+ /*
+  * Helper macros to handle lockdep when locking/unlocking console_sem. We use
+  * macros instead of functions so that _RET_IP_ contains useful information.
+@@ -257,19 +278,37 @@ static void __up_console_sem(unsigned long ip)
  #define up_console_sem() __up_console_sem(_RET_IP_)
  
-@@ -266,11 +254,6 @@ static void __up_console_sem(unsigned long ip)
+ /*
+- * This is used for debugging the mess that is the VT code by
+- * keeping track if we have the console semaphore held. It's
+- * definitely not the perfect debug tool (we don't know if _WE_
+- * hold it and are racing, but it helps tracking those weird code
+- * paths in the console code where we end up in places I want
+- * locked without the console semaphore held).
++ * Tracks whether kthread printers are all paused. A value of true implies
++ * that the console is locked via console_lock() or the console is suspended.
++ * Reading and writing to this variable requires holding @console_sem.
   */
- static int console_locked, console_suspended;
+-static int console_locked, console_suspended;
++static bool consoles_paused;
  
--/*
+ /*
 - * If exclusive_console is non-NULL then only this console is to be printed to.
-- */
++ * Pause or unpause all kthread printers.
++ *
++ * Requires the console_lock.
+  */
 -static struct console *exclusive_console;
--
++static void __pause_all_consoles(bool do_pause)
++{
++	struct console *con;
++
++	for_each_console(con) {
++		mutex_lock(&con->lock);
++		if (do_pause)
++			con->flags |= CON_PAUSED;
++		else
++			con->flags &= ~CON_PAUSED;
++		mutex_unlock(&con->lock);
++	}
++
++	consoles_paused = do_pause;
++}
++
++#define pause_all_consoles() __pause_all_consoles(true)
++#define unpause_all_consoles() __pause_all_consoles(false)
++
++static int console_suspended;
+ 
  /*
   *	Array of consoles built from command line options (console=)
-  */
-@@ -350,10 +333,13 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
-  * non-prinatable characters are escaped in the "\xff" notation.
-  */
- 
-+#ifdef CONFIG_PRINTK
+@@ -353,6 +392,20 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
  /* syslog_lock protects syslog_* variables and write access to clear_seq. */
  static DEFINE_MUTEX(syslog_lock);
  
--#ifdef CONFIG_PRINTK
-+/* Set to enable sync mode. Once set, it is never cleared. */
-+static bool sync_mode;
++/*
++ * A flag to signify if printk_late_init() has already started the kthread
++ * printers. If true, any later registered consoles must start their own
++ * kthread directly. The flag is write protected by the console_lock.
++ */
++static bool kthreads_started;
++
++static inline bool kthread_printers_active(void)
++{
++	return (kthreads_started &&
++		system_state == SYSTEM_RUNNING &&
++		!oops_in_progress);
++}
 +
+ #ifdef CONFIG_PRINTK
  DECLARE_WAIT_QUEUE_HEAD(log_wait);
  /* All 3 protected by @syslog_lock. */
- /* the next printk record to read by syslog(READ) or /proc/kmsg */
-@@ -361,17 +347,6 @@ static u64 syslog_seq;
+@@ -361,12 +414,6 @@ static u64 syslog_seq;
  static size_t syslog_partial;
  static bool syslog_time;
  
@@ -6223,303 +5375,72 @@ index 99221b016c68..0cc8e8acf545 100644
 -static u64 exclusive_console_stop_seq;
 -static unsigned long console_dropped;
 -
--struct latched_seq {
--	seqcount_latch_t	latch;
--	u64			val[2];
--};
--
- /*
-  * The next printk record to read after the last 'clear' command. There are
-  * two copies (updated with seqcount_latch) so that reads can locklessly
-@@ -389,9 +364,6 @@ static struct latched_seq clear_seq = {
- #define PREFIX_MAX		32
- #endif
+ struct latched_seq {
+ 	seqcount_latch_t	latch;
+ 	u64			val[2];
+@@ -392,6 +439,9 @@ static struct latched_seq clear_seq = {
+ /* the maximum size of a formatted record (i.e. with prefix added per line) */
+ #define CONSOLE_LOG_MAX		1024
  
--/* the maximum size of a formatted record (i.e. with prefix added per line) */
--#define CONSOLE_LOG_MAX		1024
--
++/* the maximum size for a dropped text message */
++#define DROPPED_TEXT_MAX	64
++
  /* the maximum size allowed to be reserved for a record */
  #define LOG_LINE_MAX		(CONSOLE_LOG_MAX - PREFIX_MAX)
  
-@@ -430,12 +402,12 @@ static struct printk_ringbuffer *prb = &printk_rb_static;
-  */
- static bool __printk_percpu_data_ready __read_mostly;
- 
--bool printk_percpu_data_ready(void)
-+static bool printk_percpu_data_ready(void)
- {
- 	return __printk_percpu_data_ready;
- }
- 
--/* Must be called under syslog_lock. */
-+/* Must be called under associated write-protection lock. */
- static void latched_seq_write(struct latched_seq *ls, u64 val)
- {
- 	raw_write_seqcount_latch(&ls->latch);
-@@ -1747,188 +1719,152 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
- 	return do_syslog(type, buf, len, SYSLOG_FROM_READER);
+@@ -1823,6 +1873,7 @@ static int console_lock_spinning_disable_and_check(void)
+ 	return 1;
  }
  
--/*
-- * Special console_lock variants that help to reduce the risk of soft-lockups.
-- * They allow to pass console_lock to another printk() call using a busy wait.
-- */
-+int printk_delay_msec __read_mostly;
- 
--#ifdef CONFIG_LOCKDEP
--static struct lockdep_map console_owner_dep_map = {
--	.name = "console_owner"
--};
--#endif
-+static inline void printk_delay(int level)
-+{
-+	boot_delay_msec(level);
++#if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ /**
+  * console_trylock_spinning - try to get console_lock by busy waiting
+  *
+@@ -1886,49 +1937,38 @@ static int console_trylock_spinning(void)
  
--static DEFINE_RAW_SPINLOCK(console_owner_lock);
--static struct task_struct *console_owner;
--static bool console_waiter;
-+	if (unlikely(printk_delay_msec)) {
-+		int m = printk_delay_msec;
+ 	return 1;
+ }
++#endif /* CONFIG_PREEMPT_RT */
  
--/**
-- * console_lock_spinning_enable - mark beginning of code where another
-- *	thread might safely busy wait
-- *
-- * This basically converts console_lock into a spinlock. This marks
-- * the section where the console_lock owner can not sleep, because
-- * there may be a waiter spinning (like a spinlock). Also it must be
-- * ready to hand over the lock at the end of the section.
-- */
--static void console_lock_spinning_enable(void)
-+		while (m--) {
-+			mdelay(1);
-+			touch_nmi_watchdog();
-+		}
-+	}
-+}
-+
-+static bool kernel_sync_mode(void)
- {
--	raw_spin_lock(&console_owner_lock);
--	console_owner = current;
--	raw_spin_unlock(&console_owner_lock);
-+	return (oops_in_progress || sync_mode);
-+}
- 
--	/* The waiter may spin on us after setting console_owner */
--	spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
-+static bool console_may_sync(struct console *con)
-+{
-+	if (!(con->flags & CON_ENABLED))
-+		return false;
-+	if (con->write_atomic && kernel_sync_mode())
-+		return true;
-+	if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread)
-+		return true;
-+	if (con->write && (con->flags & CON_BOOT) && !con->thread)
-+		return true;
-+	return false;
- }
- 
--/**
-- * console_lock_spinning_disable_and_check - mark end of code where another
-- *	thread was able to busy wait and check if there is a waiter
-- *
-- * This is called at the end of the section where spinning is allowed.
-- * It has two functions. First, it is a signal that it is no longer
-- * safe to start busy waiting for the lock. Second, it checks if
-- * there is a busy waiter and passes the lock rights to her.
-- *
-- * Important: Callers lose the lock if there was a busy waiter.
-- *	They must not touch items synchronized by console_lock
-- *	in this case.
-- *
-- * Return: 1 if the lock rights were passed, 0 otherwise.
-- */
--static int console_lock_spinning_disable_and_check(void)
-+static bool call_sync_console_driver(struct console *con, const char *text, size_t text_len)
- {
--	int waiter;
-+	if (!(con->flags & CON_ENABLED))
-+		return false;
- 
--	raw_spin_lock(&console_owner_lock);
--	waiter = READ_ONCE(console_waiter);
--	console_owner = NULL;
--	raw_spin_unlock(&console_owner_lock);
-+	if (con->write_atomic && kernel_sync_mode()) {
-+		con->write_atomic(con, text, text_len);
-+		return true;
-+	}
- 
--	if (!waiter) {
--		spin_release(&console_owner_dep_map, _THIS_IP_);
--		return 0;
-+	if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread) {
-+		if (console_trylock()) {
-+			con->write_atomic(con, text, text_len);
-+			console_unlock();
-+			return true;
-+		}
-+
-+	} else if (con->write && (con->flags & CON_BOOT) && !con->thread) {
-+		if (console_trylock()) {
-+			con->write(con, text, text_len);
-+			console_unlock();
-+			return true;
-+		}
- 	}
- 
--	/* The waiter is now free to continue */
--	WRITE_ONCE(console_waiter, false);
-+	return false;
-+}
- 
--	spin_release(&console_owner_dep_map, _THIS_IP_);
-+static bool have_atomic_console(void)
-+{
-+	struct console *con;
- 
--	/*
--	 * Hand off console_lock to waiter. The waiter will perform
--	 * the up(). After this, the waiter is the console_lock owner.
--	 */
--	mutex_release(&console_lock_dep_map, _THIS_IP_);
--	return 1;
-+	for_each_console(con) {
-+		if (!(con->flags & CON_ENABLED))
-+			continue;
-+		if (con->write_atomic)
-+			return true;
-+	}
-+	return false;
- }
- 
--/**
-- * console_trylock_spinning - try to get console_lock by busy waiting
-- *
-- * This allows to busy wait for the console_lock when the current
-- * owner is running in specially marked sections. It means that
-- * the current owner is running and cannot reschedule until it
-- * is ready to lose the lock.
-- *
-- * Return: 1 if we got the lock, 0 othrewise
-- */
--static int console_trylock_spinning(void)
-+static bool print_sync(struct console *con, u64 *seq)
- {
--	struct task_struct *owner = NULL;
--	bool waiter;
--	bool spin = false;
--	unsigned long flags;
-+	struct printk_info info;
-+	struct printk_record r;
-+	size_t text_len;
- 
--	if (console_trylock())
--		return 1;
-+	prb_rec_init_rd(&r, &info, &con->sync_buf[0], sizeof(con->sync_buf));
- 
--	printk_safe_enter_irqsave(flags);
-+	if (!prb_read_valid(prb, *seq, &r))
-+		return false;
- 
--	raw_spin_lock(&console_owner_lock);
--	owner = READ_ONCE(console_owner);
--	waiter = READ_ONCE(console_waiter);
--	if (!waiter && owner && owner != current) {
--		WRITE_ONCE(console_waiter, true);
--		spin = true;
--	}
--	raw_spin_unlock(&console_owner_lock);
-+	text_len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
- 
--	/*
--	 * If there is an active printk() writing to the
--	 * consoles, instead of having it write our data too,
--	 * see if we can offload that load from the active
--	 * printer, and do some printing ourselves.
--	 * Go into a spin only if there isn't already a waiter
--	 * spinning, and there is an active printer, and
--	 * that active printer isn't us (recursive printk?).
--	 */
--	if (!spin) {
--		printk_safe_exit_irqrestore(flags);
--		return 0;
--	}
-+	if (!call_sync_console_driver(con, &con->sync_buf[0], text_len))
-+		return false;
- 
--	/* We spin waiting for the owner to release us */
--	spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
--	/* Owner will clear console_waiter on hand off */
--	while (READ_ONCE(console_waiter))
--		cpu_relax();
--	spin_release(&console_owner_dep_map, _THIS_IP_);
-+	*seq = r.info->seq;
- 
--	printk_safe_exit_irqrestore(flags);
--	/*
--	 * The owner passed the console lock to us.
--	 * Since we did not spin on console lock, annotate
--	 * this as a trylock. Otherwise lockdep will
--	 * complain.
--	 */
--	mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
-+	touch_softlockup_watchdog_sync();
-+	clocksource_touch_watchdog();
-+	rcu_cpu_stall_reset();
-+	touch_nmi_watchdog();
- 
--	return 1;
-+	if (text_len)
-+		printk_delay(r.info->level);
-+
-+	return true;
- }
- 
--/*
+ /*
 - * Call the console drivers, asking them to write out
 - * log_buf[start] to log_buf[end - 1].
 - * The console_lock must be held.
-- */
++ * Call the specified console driver, asking it to write out the specified
++ * text and length. If @dropped_text is non-NULL and any records have been
++ * dropped, a dropped message will be written out first.
+  */
 -static void call_console_drivers(const char *ext_text, size_t ext_len,
 -				 const char *text, size_t len)
-+static u64 read_console_seq(struct console *con)
++static void call_console_driver(struct console *con, const char *text, size_t len,
++				char *dropped_text, bool atomic_printing)
  {
 -	static char dropped_text[64];
 -	size_t dropped_len = 0;
 -	struct console *con;
-+	u64 seq2;
-+	u64 seq;
++	unsigned long dropped = 0;
++	size_t dropped_len;
  
--	trace_console_rcuidle(text, len);
-+	seq = latched_seq_read_nolock(&con->printk_seq);
-+	seq2 = latched_seq_read_nolock(&con->printk_sync_seq);
-+	if (seq2 > seq)
-+		seq = seq2;
-+#ifdef CONFIG_HAVE_NMI
-+	seq2 = latched_seq_read_nolock(&con->printk_sync_nmi_seq);
-+	if (seq2 > seq)
-+		seq = seq2;
-+#endif
-+	return seq;
-+}
+ 	trace_console_rcuidle(text, len);
  
 -	if (!console_drivers)
 -		return;
-+static void print_sync_until(struct console *con, u64 seq, bool is_locked)
-+{
-+	u64 printk_seq;
++	if (dropped_text)
++		dropped = atomic_long_xchg_relaxed(&con->dropped, 0);
  
 -	if (console_dropped) {
 -		dropped_len = snprintf(dropped_text, sizeof(dropped_text),
--				       "** %lu printk messages dropped **\n",
++	if (dropped) {
++		dropped_len = snprintf(dropped_text, DROPPED_TEXT_MAX,
+ 				       "** %lu printk messages dropped **\n",
 -				       console_dropped);
 -		console_dropped = 0;
--	}
-+	while (!__printk_cpu_trylock())
-+		cpu_relax();
++				       dropped);
++		if (atomic_printing)
++			con->write_atomic(con, dropped_text, dropped_len);
++		else
++			con->write(con, dropped_text, dropped_len);
+ 	}
  
 -	for_each_console(con) {
 -		if (exclusive_console && con != exclusive_console)
@@ -6538,419 +5459,641 @@ index 99221b016c68..0cc8e8acf545 100644
 -				con->write(con, dropped_text, dropped_len);
 -			con->write(con, text, len);
 -		}
-+	for (;;) {
-+		printk_seq = read_console_seq(con);
-+		if (printk_seq >= seq)
-+			break;
-+		if (!print_sync(con, &printk_seq))
-+			break;
-+
-+		if (is_locked)
-+			latched_seq_write(&con->printk_seq, printk_seq + 1);
-+#ifdef CONFIG_PRINTK_NMI
-+		else if (in_nmi())
-+			latched_seq_write(&con->printk_sync_nmi_seq, printk_seq + 1);
-+#endif
-+		else
-+			latched_seq_write(&con->printk_sync_seq, printk_seq + 1);
- 	}
-+
-+	__printk_cpu_unlock();
+-	}
++	if (atomic_printing)
++		con->write_atomic(con, text, len);
++	else
++		con->write(con, text, len);
  }
  
  /*
-@@ -2001,20 +1937,6 @@ static u8 *__printk_recursion_counter(void)
- 		local_irq_restore(flags);		\
- 	} while (0)
- 
--int printk_delay_msec __read_mostly;
--
--static inline void printk_delay(void)
--{
--	if (unlikely(printk_delay_msec)) {
--		int m = printk_delay_msec;
--
--		while (m--) {
--			mdelay(1);
--			touch_nmi_watchdog();
--		}
--	}
--}
--
+@@ -2018,7 +2058,7 @@ static inline void printk_delay(void)
  static inline u32 printk_caller_id(void)
  {
  	return in_task() ? task_pid_nr(current) :
-@@ -2095,13 +2017,14 @@ static u16 printk_sprint(char *text, u16 size, int facility,
+-		0x80000000 + raw_smp_processor_id();
++		0x80000000 + smp_processor_id();
  }
  
- __printf(4, 0)
--int vprintk_store(int facility, int level,
--		  const struct dev_printk_info *dev_info,
--		  const char *fmt, va_list args)
-+static int vprintk_store(int facility, int level,
-+			 const struct dev_printk_info *dev_info,
-+			 const char *fmt, va_list args)
+ /**
+@@ -2100,7 +2140,6 @@ int vprintk_store(int facility, int level,
+ 		  const struct dev_printk_info *dev_info,
+ 		  const char *fmt, va_list args)
  {
- 	const u32 caller_id = printk_caller_id();
+-	const u32 caller_id = printk_caller_id();
  	struct prb_reserved_entry e;
  	enum printk_info_flags flags = 0;
-+	bool final_commit = false;
  	struct printk_record r;
- 	unsigned long irqflags;
- 	u16 trunc_msg_len = 0;
-@@ -2112,6 +2035,7 @@ int vprintk_store(int facility, int level,
+@@ -2110,10 +2149,14 @@ int vprintk_store(int facility, int level,
+ 	u8 *recursion_ptr;
+ 	u16 reserve_size;
+ 	va_list args2;
++	u32 caller_id;
  	u16 text_len;
  	int ret = 0;
  	u64 ts_nsec;
-+	u64 seq;
  
++	if (!printk_enter_irqsave(recursion_ptr, irqflags))
++		return 0;
++
  	/*
  	 * Since the duration of printk() can vary depending on the message
-@@ -2150,6 +2074,7 @@ int vprintk_store(int facility, int level,
- 	if (flags & LOG_CONT) {
- 		prb_rec_init_wr(&r, reserve_size);
- 		if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) {
-+			seq = r.info->seq;
- 			text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size,
- 						 facility, &flags, fmt, args);
- 			r.info->text_len += text_len;
-@@ -2157,6 +2082,7 @@ int vprintk_store(int facility, int level,
- 			if (flags & LOG_NEWLINE) {
- 				r.info->flags |= LOG_NEWLINE;
- 				prb_final_commit(&e);
-+				final_commit = true;
- 			} else {
- 				prb_commit(&e);
- 			}
-@@ -2180,6 +2106,7 @@ int vprintk_store(int facility, int level,
- 		if (!prb_reserve(&e, prb, &r))
- 			goto out;
- 	}
-+	seq = r.info->seq;
- 
- 	/* fill message */
- 	text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args);
-@@ -2195,13 +2122,25 @@ int vprintk_store(int facility, int level,
- 		memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info));
- 
- 	/* A message without a trailing newline can be continued. */
--	if (!(flags & LOG_NEWLINE))
-+	if (!(flags & LOG_NEWLINE)) {
- 		prb_commit(&e);
--	else
-+	} else {
- 		prb_final_commit(&e);
-+		final_commit = true;
-+	}
+ 	 * and state of the ringbuffer, grab the timestamp now so that it is
+@@ -2122,8 +2165,7 @@ int vprintk_store(int facility, int level,
+ 	 */
+ 	ts_nsec = local_clock();
  
- 	ret = text_len + trunc_msg_len;
- out:
-+	/* only the kernel may perform synchronous printing */
-+	if (facility == 0 && final_commit) {
-+		struct console *con;
-+
-+		for_each_console(con) {
-+			if (console_may_sync(con))
-+				print_sync_until(con, seq + 1, false);
-+		}
-+	}
-+
- 	printk_exit_irqrestore(recursion_ptr, irqflags);
- 	return ret;
- }
-@@ -2211,50 +2150,43 @@ asmlinkage int vprintk_emit(int facility, int level,
- 			    const char *fmt, va_list args)
- {
- 	int printed_len;
--	bool in_sched = false;
+-	if (!printk_enter_irqsave(recursion_ptr, irqflags))
+-		return 0;
++	caller_id = printk_caller_id();
  
- 	/* Suppress unimportant messages after panic happens */
- 	if (unlikely(suppress_printk))
- 		return 0;
+ 	/*
+ 	 * The sprintf needs to come first since the syslog prefix might be
+@@ -2223,27 +2265,36 @@ asmlinkage int vprintk_emit(int facility, int level,
+ 		in_sched = true;
+ 	}
  
--	if (level == LOGLEVEL_SCHED) {
-+	if (level == LOGLEVEL_SCHED)
- 		level = LOGLEVEL_DEFAULT;
--		in_sched = true;
--	}
--
 -	boot_delay_msec(level);
 -	printk_delay();
- 
+-
  	printed_len = vprintk_store(facility, level, dev_info, fmt, args);
  
--	/* If called from the scheduler, we can not call up(). */
+ 	/* If called from the scheduler, we can not call up(). */
 -	if (!in_sched) {
--		/*
--		 * Disable preemption to avoid being preempted while holding
--		 * console_sem which would prevent anyone from printing to
--		 * console
--		 */
--		preempt_disable();
++	if (!in_sched && !kthread_printers_active()) {
++		/*
++		 * Try to acquire and then immediately release the console
++		 * semaphore.  The release will print out buffers.
++		 */
++#if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		/*
++		 * Use the non-spinning trylock since PREEMPT_RT does not
++		 * support console lock handovers.
++		 *
++		 * Direct printing will most likely involve taking spinlocks.
++		 * For PREEMPT_RT, this is only allowed if in a preemptible
++		 * context.
++		 */
++		if (preemptible() && console_trylock())
++			console_unlock();
++#else
+ 		/*
+ 		 * Disable preemption to avoid being preempted while holding
+ 		 * console_sem which would prevent anyone from printing to
+ 		 * console
+ 		 */
+ 		preempt_disable();
 -		/*
 -		 * Try to acquire and then immediately release the console
 -		 * semaphore.  The release will print out buffers and wake up
 -		 * /dev/kmsg and syslog() users.
 -		 */
--		if (console_trylock_spinning())
--			console_unlock();
--		preempt_enable();
--	}
--
- 	wake_up_klogd();
- 	return printed_len;
- }
- EXPORT_SYMBOL(vprintk_emit);
- 
--int vprintk_default(const char *fmt, va_list args)
-+__printf(1, 0)
-+static int vprintk_default(const char *fmt, va_list args)
- {
- 	return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
- }
--EXPORT_SYMBOL_GPL(vprintk_default);
-+
-+__printf(1, 0)
-+static int vprintk_func(const char *fmt, va_list args)
-+{
-+#ifdef CONFIG_KGDB_KDB
-+	/* Allow to pass printk() to kdb but avoid a recursion. */
-+	if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0))
-+		return vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
+ 		if (console_trylock_spinning())
+ 			console_unlock();
+ 		preempt_enable();
 +#endif
-+	return vprintk_default(fmt, args);
-+}
-+
-+asmlinkage int vprintk(const char *fmt, va_list args)
-+{
-+	return vprintk_func(fmt, args);
-+}
-+EXPORT_SYMBOL(vprintk);
+ 	}
  
- asmlinkage __visible int _printk(const char *fmt, ...)
- {
-@@ -2269,37 +2201,162 @@ asmlinkage __visible int _printk(const char *fmt, ...)
+ 	wake_up_klogd();
+@@ -2270,18 +2321,91 @@ asmlinkage __visible int _printk(const char *fmt, ...)
  }
  EXPORT_SYMBOL(_printk);
  
--#else /* CONFIG_PRINTK */
-+static int printk_kthread_func(void *data)
++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
++static void __free_atomic_data(struct console_atomic_data *d)
 +{
-+	struct console *con = data;
-+	unsigned long dropped = 0;
-+	char *dropped_text = NULL;
-+	struct printk_info info;
-+	struct printk_record r;
-+	char *ext_text = NULL;
-+	size_t dropped_len;
-+	int ret = -ENOMEM;
-+	char *text = NULL;
-+	char *write_text;
-+	size_t len;
-+	int error;
-+	u64 seq;
- 
--#define CONSOLE_LOG_MAX		0
--#define printk_time		false
-+	if (con->flags & CON_EXTENDED) {
-+		ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
-+		if (!ext_text)
-+			goto out;
-+	}
-+	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
-+	dropped_text = kmalloc(64, GFP_KERNEL);
-+	if (!text || !dropped_text)
-+		goto out;
-+	if (con->flags & CON_EXTENDED)
-+		write_text = ext_text;
-+	else
-+		write_text = text;
- 
--#define prb_read_valid(rb, seq, r)	false
--#define prb_first_valid_seq(rb)		0
-+	seq = read_console_seq(con);
- 
--static u64 syslog_seq;
--static u64 console_seq;
--static u64 exclusive_console_stop_seq;
--static unsigned long console_dropped;
-+	prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
-+
-+	for (;;) {
-+		error = wait_event_interruptible(log_wait,
-+						 prb_read_valid(prb, seq, &r) || kthread_should_stop());
-+
-+		if (kthread_should_stop())
-+			break;
++	kfree(d->text);
++	kfree(d->ext_text);
++	kfree(d->dropped_text);
++}
 +
-+		if (error)
-+			continue;
++static void free_atomic_data(struct console_atomic_data *d)
++{
++	int count = 1;
++	int i;
 +
-+		if (seq != r.info->seq) {
-+			dropped += r.info->seq - seq;
-+			seq = r.info->seq;
-+		}
++	if (!d)
++		return;
 +
-+		seq++;
++#ifdef CONFIG_HAVE_NMI
++	count = 2;
++#endif
 +
-+		if (!(con->flags & CON_ENABLED))
-+			continue;
++	for (i = 0; i < count; i++)
++		__free_atomic_data(&d[i]);
++	kfree(d);
++}
 +
-+		if (suppress_message_printing(r.info->level))
-+			continue;
++static int __alloc_atomic_data(struct console_atomic_data *d, short flags)
++{
++	d->text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
++	if (!d->text)
++		return -1;
++
++	if (flags & CON_EXTENDED) {
++		d->ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
++		if (!d->ext_text)
++			return -1;
++	} else {
++		d->dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL);
++		if (!d->dropped_text)
++			return -1;
++	}
 +
-+		if (con->flags & CON_EXTENDED) {
-+			len = info_print_ext_header(ext_text,
-+						    CONSOLE_EXT_LOG_MAX,
-+						    r.info);
-+			len += msg_print_ext_body(ext_text + len,
-+						  CONSOLE_EXT_LOG_MAX - len,
-+						  &r.text_buf[0], r.info->text_len,
-+						  &r.info->dev_info);
-+		} else {
-+			len = record_print_text(&r,
-+						console_msg_format & MSG_FORMAT_SYSLOG,
-+						printk_time);
-+		}
++	return 0;
++}
 +
-+		console_lock();
++static struct console_atomic_data *alloc_atomic_data(short flags)
++{
++	struct console_atomic_data *d;
++	int count = 1;
++	int i;
 +
-+		/*
-+		 * Even though the printk kthread is always preemptible, it is
-+		 * still not allowed to call cond_resched() from within
-+		 * console drivers. The task may become non-preemptible in the
-+		 * console driver call chain. For example, vt_console_print()
-+		 * takes a spinlock and then can call into fbcon_redraw(),
-+		 * which can conditionally invoke cond_resched().
-+		 */
-+		console_may_schedule = 0;
++#ifdef CONFIG_HAVE_NMI
++	count = 2;
++#endif
 +
-+		if (kernel_sync_mode() && con->write_atomic) {
-+			console_unlock();
-+			break;
-+		}
++	d = kzalloc(sizeof(*d) * count, GFP_KERNEL);
++	if (!d)
++		goto err_out;
 +
-+		if (!(con->flags & CON_EXTENDED) && dropped) {
-+			dropped_len = snprintf(dropped_text, 64,
-+					       "** %lu printk messages dropped **\n",
-+					       dropped);
-+			dropped = 0;
++	for (i = 0; i < count; i++) {
++		if (__alloc_atomic_data(&d[i], flags) != 0)
++			goto err_out;
++	}
 +
-+			con->write(con, dropped_text, dropped_len);
-+			printk_delay(r.info->level);
-+		}
++	return d;
++err_out:
++	free_atomic_data(d);
++	return NULL;
++}
++#endif /* CONFIG_HAVE_ATOMIC_CONSOLE */
 +
-+		con->write(con, write_text, len);
-+		if (len)
-+			printk_delay(r.info->level);
++static void start_printk_kthread(struct console *con);
 +
-+		latched_seq_write(&con->printk_seq, seq);
+ #else /* CONFIG_PRINTK */
+ 
+ #define CONSOLE_LOG_MAX		0
++#define DROPPED_TEXT_MAX	0
+ #define printk_time		false
+ 
+ #define prb_read_valid(rb, seq, r)	false
+ #define prb_first_valid_seq(rb)		0
++#define prb_next_seq(rb)		0
 +
-+		console_unlock();
-+	}
-+	ret = 0;
-+out:
-+	kfree(dropped_text);
-+	kfree(text);
-+	kfree(ext_text);
-+	pr_info("%sconsole [%s%d]: printing thread stopped\n",
-+		(con->flags & CON_BOOT) ? "boot" : "",
-+		con->name, con->index);
-+	return ret;
-+}
++#define free_atomic_data(d)
  
--static size_t record_print_text(const struct printk_record *r,
--				bool syslog, bool time)
-+/* Must be called within console_lock(). */
-+static void start_printk_kthread(struct console *con)
- {
--	return 0;
-+	con->thread = kthread_run(printk_kthread_func, con,
-+				  "pr/%s%d", con->name, con->index);
-+	if (IS_ERR(con->thread)) {
-+		pr_err("%sconsole [%s%d]: unable to start printing thread\n",
-+		       (con->flags & CON_BOOT) ? "boot" : "",
-+		       con->name, con->index);
-+		return;
-+	}
-+	pr_info("%sconsole [%s%d]: printing thread started\n",
-+		(con->flags & CON_BOOT) ? "boot" : "",
-+		con->name, con->index);
- }
--static ssize_t info_print_ext_header(char *buf, size_t size,
--				     struct printk_info *info)
-+
-+/* protected by console_lock */
-+static bool kthreads_started;
-+
-+/* Must be called within console_lock(). */
-+static void console_try_thread(struct console *con)
- {
--	return 0;
-+	if (kthreads_started) {
-+		start_printk_kthread(con);
-+		return;
-+	}
-+
-+	/*
-+	 * The printing threads have not been started yet. If this console
-+	 * can print synchronously, print all unprinted messages.
-+	 */
-+	if (console_may_sync(con)) {
-+		unsigned long flags;
-+
-+		local_irq_save(flags);
-+		print_sync_until(con, prb_next_seq(prb), true);
-+		local_irq_restore(flags);
-+	}
- }
--static ssize_t msg_print_ext_body(char *buf, size_t size,
--				  char *text, size_t text_len,
--				  struct dev_printk_info *dev_info) { return 0; }
--static void console_lock_spinning_enable(void) { }
--static int console_lock_spinning_disable_and_check(void) { return 0; }
+ static u64 syslog_seq;
+-static u64 console_seq;
+-static u64 exclusive_console_stop_seq;
+-static unsigned long console_dropped;
+ 
+ static size_t record_print_text(const struct printk_record *r,
+ 				bool syslog, bool time)
+@@ -2298,9 +2422,10 @@ static ssize_t msg_print_ext_body(char *buf, size_t size,
+ 				  struct dev_printk_info *dev_info) { return 0; }
+ static void console_lock_spinning_enable(void) { }
+ static int console_lock_spinning_disable_and_check(void) { return 0; }
 -static void call_console_drivers(const char *ext_text, size_t ext_len,
 -				 const char *text, size_t len) {}
--static bool suppress_message_printing(int level) { return false; }
++static void call_console_driver(struct console *con, const char *text, size_t len,
++				char *dropped_text, bool atomic_printing) {}
+ static bool suppress_message_printing(int level) { return false; }
++static void start_printk_kthread(struct console *con) {}
  
  #endif /* CONFIG_PRINTK */
  
-@@ -2556,34 +2613,6 @@ int is_console_locked(void)
+@@ -2476,6 +2601,7 @@ void suspend_console(void)
+ 	if (!console_suspend_enabled)
+ 		return;
+ 	pr_info("Suspending console(s) (use no_console_suspend to debug)\n");
++	pr_flush(1000, true);
+ 	console_lock();
+ 	console_suspended = 1;
+ 	up_console_sem();
+@@ -2488,6 +2614,7 @@ void resume_console(void)
+ 	down_console_sem();
+ 	console_suspended = 0;
+ 	console_unlock();
++	pr_flush(1000, true);
+ }
+ 
+ /**
+@@ -2524,7 +2651,7 @@ void console_lock(void)
+ 	down_console_sem();
+ 	if (console_suspended)
+ 		return;
+-	console_locked = 1;
++	pause_all_consoles();
+ 	console_may_schedule = 1;
+ }
+ EXPORT_SYMBOL(console_lock);
+@@ -2545,46 +2672,387 @@ int console_trylock(void)
+ 		up_console_sem();
+ 		return 0;
+ 	}
+-	console_locked = 1;
++	if (!console_excl_trylock()) {
++		up_console_sem();
++		return 0;
++	}
+ 	console_may_schedule = 0;
+ 	return 1;
+ }
+ EXPORT_SYMBOL(console_trylock);
+ 
++/*
++ * This is used to help to make sure that certain paths within the VT code are
++ * running with the console lock held. It is definitely not the perfect debug
++ * tool (it is not known if the VT code is the task holding the console lock),
++ * but it helps tracking those weird code paths in the console code such as
++ * when the console is suspended: where the console is not locked but no
++ * console printing may occur.
++ *
++ * Note: This returns true when the console is suspended but is not locked.
++ *       This is intentional because the VT code must consider that situation
++ *       the same as if the console was locked.
++ */
+ int is_console_locked(void)
+ {
+-	return console_locked;
++	return (consoles_paused || atomic_read(&console_lock_count));
  }
  EXPORT_SYMBOL(is_console_locked);
  
--/*
+ /*
 - * Check if we have any console that is capable of printing while cpu is
 - * booting or shutting down. Requires console_sem.
-- */
++ * Check if the given console is currently capable and allowed to print
++ * records.
++ *
++ * Requires the console_lock.
+  */
 -static int have_callable_console(void)
--{
++static inline bool console_is_usable(struct console *con, bool atomic_printing)
+ {
 -	struct console *con;
--
++	if (!(con->flags & CON_ENABLED))
++		return false;
+ 
 -	for_each_console(con)
 -		if ((con->flags & CON_ENABLED) &&
 -				(con->flags & CON_ANYTIME))
 -			return 1;
--
++	if (atomic_printing) {
++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
++		if (!con->write_atomic)
++			return false;
++		if (!con->atomic_data)
++			return false;
++#else
++		return false;
++#endif
++	} else if (!con->write) {
++		return false;
++	}
+ 
 -	return 0;
--}
--
--/*
++	/*
++	 * Console drivers may assume that per-cpu resources have been
++	 * allocated. So unless they're explicitly marked as being able to
++	 * cope (CON_ANYTIME) don't call them until per-cpu resources have
++	 * been allocated.
++	 */
++	if (!printk_percpu_data_ready() &&
++	    !(con->flags & CON_ANYTIME))
++		return false;
++
++	return true;
++}
++
++static void __console_unlock(void)
++{
++	/*
++	 * Depending on whether console_lock() or console_trylock() was used,
++	 * appropriately allow the kthread printers to continue.
++	 */
++	if (consoles_paused)
++		unpause_all_consoles();
++	else
++		console_excl_unlock();
++
++	/* Wake the kthread printers. */
++	wake_up_klogd();
++
++	up_console_sem();
++}
++
++static u64 read_console_seq(struct console *con)
++{
++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
++	unsigned long flags;
++	u64 seq2;
++	u64 seq;
++
++	if (!con->atomic_data)
++		return con->seq;
++
++	printk_cpu_sync_get_irqsave(flags);
++
++	seq = con->seq;
++	seq2 = con->atomic_data[0].seq;
++	if (seq2 > seq)
++		seq = seq2;
++#ifdef CONFIG_HAVE_NMI
++	seq2 = con->atomic_data[1].seq;
++	if (seq2 > seq)
++		seq = seq2;
++#endif
++
++	printk_cpu_sync_put_irqrestore(flags);
++
++	return seq;
++#else /* CONFIG_HAVE_ATOMIC_CONSOLE */
++	return con->seq;
++#endif
++}
++
++static void write_console_seq(struct console *con, u64 val, bool atomic_printing)
++{
++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
++	unsigned long flags;
++	u64 *seq;
++
++	if (!con->atomic_data) {
++		con->seq = val;
++		return;
++	}
++
++	printk_cpu_sync_get_irqsave(flags);
++
++	if (atomic_printing) {
++		seq = &con->atomic_data[0].seq;
++#ifdef CONFIG_HAVE_NMI
++		if (in_nmi())
++			seq = &con->atomic_data[1].seq;
++#endif
++	} else {
++		seq = &con->seq;
++	}
++	*seq = val;
++
++	printk_cpu_sync_put_irqrestore(flags);
++#else /* CONFIG_HAVE_ATOMIC_CONSOLE */
++	con->seq = val;
++#endif
+ }
+ 
+ /*
 - * Can we actually use the console at this time on this cpu?
-- *
++ * Print one record for the given console. The record printed is whatever
++ * record is the next available record for the given console.
++ *
++ * @text is a buffer of size CONSOLE_LOG_MAX.
++ *
++ * If extended messages should be printed, @ext_text is a buffer of size
++ * CONSOLE_EXT_LOG_MAX. Otherwise @ext_text must be NULL.
++ *
++ * If dropped messages should be printed, @dropped_text is a buffer of size
++ * DROPPED_TEXT_MAX. Otherise @dropped_text must be NULL.
++ *
++ * @atomic_printing specifies if atomic printing should be used.
++ *
++ * Requires the console_lock.
+  *
 - * Console drivers may assume that per-cpu resources have been allocated. So
 - * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
 - * call them until this CPU is officially up.
-- */
++ * Returns false if the given console has no next record to print, otherwise
++ * true.
++ *
++ * @handover will be set to true if a printk waiter has taken over the
++ * console_lock, in which case the caller is no longer holding the
++ * console_lock. A NULL pointer may be provided to disable allowing
++ * the console_lock to be taken over by a printk waiter.
+  */
 -static inline int can_use_console(void)
--{
++static bool console_emit_next_record(struct console *con, char *text, char *ext_text,
++				     char *dropped_text, bool atomic_printing,
++				     bool *handover)
+ {
 -	return cpu_online(raw_smp_processor_id()) || have_callable_console();
--}
--
++	struct printk_info info;
++	struct printk_record r;
++	unsigned long flags;
++	bool allow_handover;
++	char *write_text;
++	size_t len;
++	u64 seq;
++
++	prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
++
++	if (handover)
++		*handover = false;
++
++	seq = read_console_seq(con);
++
++	if (!prb_read_valid(prb, seq, &r))
++		return false;
++
++	if (seq != r.info->seq) {
++		atomic_long_add((unsigned long)(r.info->seq - seq), &con->dropped);
++		write_console_seq(con, r.info->seq, atomic_printing);
++		seq = r.info->seq;
++	}
++
++	/* Skip record that has level above the console loglevel. */
++	if (suppress_message_printing(r.info->level)) {
++		write_console_seq(con, seq + 1, atomic_printing);
++		goto skip;
++	}
++
++	if (ext_text) {
++		write_text = ext_text;
++		len = info_print_ext_header(ext_text, CONSOLE_EXT_LOG_MAX, r.info);
++		len += msg_print_ext_body(ext_text + len, CONSOLE_EXT_LOG_MAX - len,
++					  &r.text_buf[0], r.info->text_len, &r.info->dev_info);
++	} else {
++		write_text = text;
++		len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
++	}
++
++#if (IS_ENABLED(CONFIG_PREEMPT_RT))
++	/* PREEMPT_RT does not support console lock handovers. */
++	allow_handover = false;
++#else
++	/* Handovers may only happen between trylock contexts. */
++	allow_handover = (handover && atomic_read(&console_lock_count) == -1);
++#endif
++
++	if (allow_handover) {
++		/*
++		 * While actively printing out messages, if another printk()
++		 * were to occur on another CPU, it may wait for this one to
++		 * finish. This task can not be preempted if there is a
++		 * waiter waiting to take over.
++		 *
++		 * Interrupts are disabled because the hand over to a waiter
++		 * must not be interrupted until the hand over is completed
++		 * (@console_waiter is cleared).
++		 */
++		printk_safe_enter_irqsave(flags);
++		console_lock_spinning_enable();
++	}
++
++	stop_critical_timings();	/* don't trace print latency */
++	call_console_driver(con, write_text, len, dropped_text, atomic_printing);
++	start_critical_timings();
++
++	write_console_seq(con, seq + 1, atomic_printing);
++
++	if (allow_handover) {
++		*handover = console_lock_spinning_disable_and_check();
++		printk_safe_exit_irqrestore(flags);
++	}
++
++	boot_delay_msec(r.info->level);
++	printk_delay();
++skip:
++	return true;
+ }
+ 
++/*
++ * Print out all remaining records to all consoles.
++ *
++ * Requires the console_lock.
++ *
++ * Returns true if a console was available for flushing, otherwise false.
++ *
++ * @next_seq is set to the highest sequence number of all of the consoles that
++ * were flushed.
++ *
++ * @handover will be set to true if a printk waiter has taken over the
++ * console_lock, in which case the caller is no longer holding the
++ * console_lock.
++ */
++static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover)
++{
++	static char dropped_text[DROPPED_TEXT_MAX];
++	static char ext_text[CONSOLE_EXT_LOG_MAX];
++	static char text[CONSOLE_LOG_MAX];
++	bool any_usable = false;
++	struct console *con;
++	bool any_progress;
++
++	*next_seq = 0;
++	*handover = false;
++
++	do {
++		/* Let the kthread printers do the work if they can. */
++		if (kthread_printers_active())
++			return false;
++
++		any_progress = false;
++
++		for_each_console(con) {
++			bool progress;
++
++			if (!console_is_usable(con, false))
++				continue;
++			if ((con->flags & CON_MIGHT_SLEEP) && !do_cond_resched)
++				continue;
++			any_usable = true;
++
++			if (con->flags & CON_EXTENDED) {
++				/* Extended consoles do not print "dropped messages". */
++				progress = console_emit_next_record(con, &text[0],
++								    &ext_text[0], NULL,
++								    false, handover);
++			} else {
++				progress = console_emit_next_record(con, &text[0],
++								    NULL, &dropped_text[0],
++								    false, handover);
++			}
++			if (*handover)
++				return true;
++
++			/* Track the highest seq flushed. */
++			if (con->seq > *next_seq)
++				*next_seq = con->seq;
++
++			if (!progress)
++				continue;
++			any_progress = true;
++
++			if (do_cond_resched)
++				cond_resched();
++		}
++	} while (any_progress);
++
++	return any_usable;
++}
++
++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
++static void atomic_console_flush_all(void)
++{
++	bool any_usable = false;
++	unsigned long flags;
++	struct console *con;
++	bool any_progress;
++	int index = 0;
++
++	if (console_suspended)
++		return;
++
++#ifdef CONFIG_HAVE_NMI
++	if (in_nmi())
++		index = 1;
++#endif
++
++	printk_cpu_sync_get_irqsave(flags);
++
++	do {
++		any_progress = false;
++
++		for_each_console(con) {
++			bool progress;
++
++			if (!console_is_usable(con, true))
++				continue;
++			any_usable = true;
++
++			if (con->flags & CON_EXTENDED) {
++				/* Extended consoles do not print "dropped messages". */
++				progress = console_emit_next_record(con,
++							&con->atomic_data->text[index],
++							&con->atomic_data->ext_text[index],
++							NULL,
++							true, NULL);
++			} else {
++				progress = console_emit_next_record(con,
++							&con->atomic_data->text[index],
++							NULL,
++							&con->atomic_data->dropped_text[index],
++							true, NULL);
++			}
++
++			if (!progress)
++				continue;
++			any_progress = true;
++
++			touch_softlockup_watchdog_sync();
++			clocksource_touch_watchdog();
++			rcu_cpu_stall_reset();
++			touch_nmi_watchdog();
++		}
++	} while (any_progress);
++
++	printk_cpu_sync_put_irqrestore(flags);
++}
++#else /* CONFIG_HAVE_ATOMIC_CONSOLE */
++#define atomic_console_flush_all()
++#endif
++
  /**
   * console_unlock - unlock the console system
   *
-@@ -2600,140 +2629,13 @@ static inline int can_use_console(void)
+@@ -2601,21 +3069,16 @@ static inline int can_use_console(void)
   */
  void console_unlock(void)
  {
@@ -6961,7 +6104,11 @@ index 99221b016c68..0cc8e8acf545 100644
 -	struct printk_info info;
 -	struct printk_record r;
 -	u64 __maybe_unused next_seq;
--
++	bool do_cond_resched;
++	bool handover;
++	bool flushed;
++	u64 next_seq;
+ 
  	if (console_suspended) {
  		up_console_sem();
  		return;
@@ -6969,24 +6116,16 @@ index 99221b016c68..0cc8e8acf545 100644
  
 -	prb_rec_init_rd(&r, &info, text, sizeof(text));
 -
--	/*
--	 * Console drivers are called with interrupts disabled, so
--	 * @console_may_schedule should be cleared before; however, we may
--	 * end up dumping a lot of lines, for example, if called from
--	 * console registration path, and should invoke cond_resched()
--	 * between lines if allowable.  Not doing so can cause a very long
--	 * scheduling stall on a slow console leading to RCU stall and
--	 * softlockup warnings which exacerbate the issue with more
--	 * messages practically incapacitating the system.
--	 *
--	 * console_trylock() is not able to detect the preemptive
--	 * context reliably. Therefore the value must be stored before
--	 * and cleared after the "again" goto label.
--	 */
--	do_cond_resched = console_may_schedule;
+ 	/*
+ 	 * Console drivers are called with interrupts disabled, so
+ 	 * @console_may_schedule should be cleared before; however, we may
+@@ -2631,110 +3094,27 @@ void console_unlock(void)
+ 	 * and cleared after the "again" goto label.
+ 	 */
+ 	do_cond_resched = console_may_schedule;
 -again:
 -	console_may_schedule = 0;
--
+ 
 -	/*
 -	 * We released the console_sem lock, so we need to recheck if
 -	 * cpu is online and (if not) is there at least one CON_ANYTIME
@@ -7002,11 +6141,15 @@ index 99221b016c68..0cc8e8acf545 100644
 -		size_t ext_len = 0;
 -		int handover;
 -		size_t len;
--
++	do {
++		console_may_schedule = 0;
+ 
 -skip:
 -		if (!prb_read_valid(prb, console_seq, &r))
--			break;
--
++		flushed = console_flush_all(do_cond_resched, &next_seq, &handover);
++		if (handover)
+ 			break;
+ 
 -		if (console_seq != r.info->seq) {
 -			console_dropped += r.info->seq - console_seq;
 -			console_seq = r.info->seq;
@@ -7027,7 +6170,8 @@ index 99221b016c68..0cc8e8acf545 100644
 -			     console_seq >= exclusive_console_stop_seq)) {
 -			exclusive_console = NULL;
 -		}
--
++		__console_unlock();
+ 
 -		/*
 -		 * Handle extended console text first because later
 -		 * record_print_text() will modify the record buffer in-place.
@@ -7046,9 +6190,12 @@ index 99221b016c68..0cc8e8acf545 100644
 -				console_msg_format & MSG_FORMAT_SYSLOG,
 -				printk_time);
 -		console_seq++;
--
--		/*
--		 * While actively printing out messages, if another printk()
++		/* Were there any consoles available for flushing? */
++		if (!flushed)
++			break;
+ 
+ 		/*
+-		 * While actively printing out messages, if another printk()
 -		 * were to occur on another CPU, it may wait for this one to
 -		 * finish. This task can not be preempted if there is a
 -		 * waiter waiting to take over.
@@ -7056,7 +6203,11 @@ index 99221b016c68..0cc8e8acf545 100644
 -		 * Interrupts are disabled because the hand over to a waiter
 -		 * must not be interrupted until the hand over is completed
 -		 * (@console_waiter is cleared).
--		 */
++		 * Some context may have added new records after
++		 * console_flush_all() but before unlocking the console.
++		 * Re-check if there is a new record to flush. If the trylock
++		 * fails, another context is already handling the printing.
+ 		 */
 -		printk_safe_enter_irqsave(flags);
 -		console_lock_spinning_enable();
 -
@@ -7076,8 +6227,8 @@ index 99221b016c68..0cc8e8acf545 100644
 -	/* Get consistent value of the next-to-be-used sequence number. */
 -	next_seq = console_seq;
 -
- 	console_locked = 0;
- 	up_console_sem();
+-	console_locked = 0;
+-	up_console_sem();
 -
 -	/*
 -	 * Someone could have filled up the buffer again, so re-check if there's
@@ -7088,71 +6239,74 @@ index 99221b016c68..0cc8e8acf545 100644
 -	retry = prb_read_valid(prb, next_seq, NULL);
 -	if (retry && console_trylock())
 -		goto again;
++	} while (prb_read_valid(prb, next_seq, NULL) && console_trylock());
  }
  EXPORT_SYMBOL(console_unlock);
  
-@@ -2783,18 +2685,20 @@ void console_unblank(void)
+@@ -2765,10 +3145,15 @@ void console_unblank(void)
+ 	if (oops_in_progress) {
+ 		if (down_trylock_console_sem() != 0)
+ 			return;
+-	} else
++		if (!console_excl_trylock()) {
++			up_console_sem();
++			return;
++		}
++	} else {
++		pr_flush(1000, true);
+ 		console_lock();
++	}
+ 
+-	console_locked = 1;
+ 	console_may_schedule = 0;
+ 	for_each_console(c)
+ 		if ((c->flags & CON_ENABLED) && c->unblank)
+@@ -2784,6 +3169,11 @@ void console_unblank(void)
   */
  void console_flush_on_panic(enum con_flush_mode mode)
  {
--	/*
--	 * If someone else is holding the console lock, trylock will fail
--	 * and may_schedule may be set.  Ignore and proceed to unlock so
--	 * that messages are flushed out.  As this can be called from any
--	 * context and we don't want to get preempted while flushing,
--	 * ensure may_schedule is cleared.
--	 */
--	console_trylock();
--	console_may_schedule = 0;
-+	if (!console_trylock())
++	if (mode == CONSOLE_ATOMIC_FLUSH_PENDING) {
++		atomic_console_flush_all();
 +		return;
++	}
 +
-+#ifdef CONFIG_PRINTK
+ 	/*
+ 	 * If someone else is holding the console lock, trylock will fail
+ 	 * and may_schedule may be set.  Ignore and proceed to unlock so
+@@ -2794,8 +3184,14 @@ void console_flush_on_panic(enum con_flush_mode mode)
+ 	console_trylock();
+ 	console_may_schedule = 0;
+ 
+-	if (mode == CONSOLE_REPLAY_ALL)
+-		console_seq = prb_first_valid_seq(prb);
 +	if (mode == CONSOLE_REPLAY_ALL) {
 +		struct console *c;
 +		u64 seq;
 +
 +		seq = prb_first_valid_seq(prb);
 +		for_each_console(c)
-+			latched_seq_write(&c->printk_seq, seq);
++			write_console_seq(c, seq, false);
 +	}
-+#endif
- 
--	if (mode == CONSOLE_REPLAY_ALL)
--		console_seq = prb_first_valid_seq(prb);
  	console_unlock();
  }
  
-@@ -2930,6 +2834,7 @@ static int try_enable_new_console(struct console *newcon, bool user_specified)
- void register_console(struct console *newcon)
+@@ -2826,6 +3222,7 @@ struct tty_driver *console_device(int *index)
+  */
+ void console_stop(struct console *console)
  {
- 	struct console *bcon = NULL;
-+	u64 __maybe_unused seq = 0;
- 	int err;
- 
- 	for_each_console(bcon) {
-@@ -2952,6 +2857,8 @@ void register_console(struct console *newcon)
- 		}
- 	}
- 
-+	newcon->thread = NULL;
-+
- 	if (console_drivers && console_drivers->flags & CON_BOOT)
- 		bcon = console_drivers;
- 
-@@ -2993,8 +2900,10 @@ void register_console(struct console *newcon)
- 	 * the real console are the same physical device, it's annoying to
- 	 * see the beginning boot messages twice
- 	 */
--	if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV))
-+	if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) {
- 		newcon->flags &= ~CON_PRINTBUFFER;
-+		newcon->flags |= CON_HANDOVER;
-+	}
++	pr_flush(1000, true);
+ 	console_lock();
+ 	console->flags &= ~CON_ENABLED;
+ 	console_unlock();
+@@ -2837,6 +3234,7 @@ void console_start(struct console *console)
+ 	console_lock();
+ 	console->flags |= CON_ENABLED;
+ 	console_unlock();
++	pr_flush(1000, true);
+ }
+ EXPORT_SYMBOL(console_start);
  
- 	/*
- 	 *	Put this console in the list - keep the
-@@ -3016,27 +2925,21 @@ void register_console(struct console *newcon)
+@@ -3017,27 +3415,25 @@ void register_console(struct console *newcon)
  	if (newcon->flags & CON_EXTENDED)
  		nr_ext_console_drivers++;
  
@@ -7171,213 +6325,68 @@ index 99221b016c68..0cc8e8acf545 100644
 -		 */
 -		exclusive_console = newcon;
 -		exclusive_console_stop_seq = console_seq;
-+#ifdef CONFIG_PRINTK
-+	if (!(newcon->flags & CON_PRINTBUFFER))
-+		seq = prb_next_seq(prb);
++	if (consoles_paused)
++		newcon->flags |= CON_PAUSED;
  
--		/* Get a consistent copy of @syslog_seq. */
--		mutex_lock(&syslog_lock);
--		console_seq = syslog_seq;
--		mutex_unlock(&syslog_lock);
--	}
-+	seqcount_latch_init(&newcon->printk_seq.latch);
-+	latched_seq_write(&newcon->printk_seq, seq);
-+	seqcount_latch_init(&newcon->printk_sync_seq.latch);
-+	latched_seq_write(&newcon->printk_sync_seq, seq);
-+#ifdef CONFIG_HAVE_NMI
-+	seqcount_latch_init(&newcon->printk_sync_nmi_seq.latch);
-+	latched_seq_write(&newcon->printk_sync_nmi_seq, seq);
++	atomic_long_set(&newcon->dropped, 0);
++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
++	newcon->atomic_data = NULL;
 +#endif
-+
-+	console_try_thread(newcon);
-+#endif /* CONFIG_PRINTK */
++	mutex_init(&newcon->lock);
++	if (newcon->flags & CON_PRINTBUFFER) {
+ 		/* Get a consistent copy of @syslog_seq. */
+ 		mutex_lock(&syslog_lock);
+-		console_seq = syslog_seq;
++		write_console_seq(newcon, syslog_seq, false);
+ 		mutex_unlock(&syslog_lock);
++	} else {
++		/* Begin with next message. */
++		write_console_seq(newcon, prb_next_seq(prb), false);
+ 	}
++	if (kthreads_started)
++		start_printk_kthread(newcon);
  	console_unlock();
  	console_sysfs_notify();
  
-@@ -3110,6 +3013,9 @@ int unregister_console(struct console *console)
+@@ -3094,6 +3490,11 @@ int unregister_console(struct console *console)
+ 		}
+ 	}
+ 
++	if (console->thread) {
++		kthread_stop(console->thread);
++		console->thread = NULL;
++	}
++
+ 	if (res)
+ 		goto out_disable_unlock;
+ 
+@@ -3111,6 +3512,10 @@ int unregister_console(struct console *console)
  	console_unlock();
  	console_sysfs_notify();
  
-+	if (console->thread && !IS_ERR(console->thread))
-+		kthread_stop(console->thread);
++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
++	free_atomic_data(console->atomic_data);
++#endif
 +
  	if (console->exit)
  		res = console->exit(console);
  
-@@ -3192,6 +3098,15 @@ static int __init printk_late_init(void)
- 			unregister_console(con);
- 		}
- 	}
+@@ -3199,11 +3604,205 @@ static int __init printk_late_init(void)
+ 	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online",
+ 					console_cpu_notify, NULL);
+ 	WARN_ON(ret < 0);
 +
-+#ifdef CONFIG_PRINTK
 +	console_lock();
 +	for_each_console(con)
 +		start_printk_kthread(con);
 +	kthreads_started = true;
 +	console_unlock();
-+#endif
 +
- 	ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL,
- 					console_cpu_notify);
- 	WARN_ON(ret < 0);
-@@ -3207,7 +3122,6 @@ late_initcall(printk_late_init);
-  * Delayed printk version, for scheduler-internal messages:
-  */
- #define PRINTK_PENDING_WAKEUP	0x01
--#define PRINTK_PENDING_OUTPUT	0x02
- 
- static DEFINE_PER_CPU(int, printk_pending);
- 
-@@ -3215,14 +3129,8 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)
- {
- 	int pending = __this_cpu_xchg(printk_pending, 0);
- 
--	if (pending & PRINTK_PENDING_OUTPUT) {
--		/* If trylock fails, someone else is doing the printing */
--		if (console_trylock())
--			console_unlock();
--	}
--
- 	if (pending & PRINTK_PENDING_WAKEUP)
--		wake_up_interruptible(&log_wait);
-+		wake_up_interruptible_all(&log_wait);
- }
- 
- static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
-@@ -3241,42 +3149,9 @@ void wake_up_klogd(void)
- 	preempt_enable();
- }
- 
--void defer_console_output(void)
--{
--	if (!printk_percpu_data_ready())
--		return;
--
--	preempt_disable();
--	__this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
--	irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
--	preempt_enable();
--}
--
- void printk_trigger_flush(void)
- {
--	defer_console_output();
--}
--
--int vprintk_deferred(const char *fmt, va_list args)
--{
--	int r;
--
--	r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args);
--	defer_console_output();
--
--	return r;
--}
--
--int _printk_deferred(const char *fmt, ...)
--{
--	va_list args;
--	int r;
--
--	va_start(args, fmt);
--	r = vprintk_deferred(fmt, args);
--	va_end(args);
--
--	return r;
-+	wake_up_klogd();
+ 	return 0;
  }
+ late_initcall(printk_late_init);
  
- /*
-@@ -3405,6 +3280,24 @@ void kmsg_dump(enum kmsg_dump_reason reason)
- {
- 	struct kmsg_dumper *dumper;
- 
-+	if (!oops_in_progress) {
-+		/*
-+		 * If atomic consoles are available, activate kernel sync mode
-+		 * to make sure any final messages are visible. The trailing
-+		 * printk message is important to flush any pending messages.
-+		 */
-+		if (have_atomic_console()) {
-+			sync_mode = true;
-+			pr_info("enabled sync mode\n");
-+		}
-+
-+		/*
-+		 * Give the printing threads time to flush, allowing up to
-+		 * 1s of no printing forward progress before giving up.
-+		 */
-+		pr_flush(1000, true);
-+	}
-+
- 	rcu_read_lock();
- 	list_for_each_entry_rcu(dumper, &dump_list, list) {
- 		enum kmsg_dump_reason max_reason = dumper->max_reason;
-@@ -3587,6 +3480,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
- #ifdef CONFIG_SMP
- static atomic_t printk_cpulock_owner = ATOMIC_INIT(-1);
- static atomic_t printk_cpulock_nested = ATOMIC_INIT(0);
-+static unsigned int kgdb_cpu = -1;
- 
- /**
-  * __printk_wait_on_cpu_lock() - Busy wait until the printk cpu-reentrant
-@@ -3666,6 +3560,9 @@ EXPORT_SYMBOL(__printk_cpu_trylock);
-  */
- void __printk_cpu_unlock(void)
- {
-+	bool trigger_kgdb = false;
-+	unsigned int cpu;
-+
- 	if (atomic_read(&printk_cpulock_nested)) {
- 		atomic_dec(&printk_cpulock_nested);
- 		return;
-@@ -3676,6 +3573,12 @@ void __printk_cpu_unlock(void)
- 	 * LMM(__printk_cpu_unlock:A)
- 	 */
- 
-+	cpu = smp_processor_id();
-+	if (kgdb_cpu == cpu) {
-+		trigger_kgdb = true;
-+		kgdb_cpu = -1;
-+	}
-+
- 	/*
- 	 * Guarantee loads and stores from this CPU when it was the
- 	 * lock owner are visible to the next lock owner. This pairs
-@@ -3696,6 +3599,98 @@ void __printk_cpu_unlock(void)
- 	 */
- 	atomic_set_release(&printk_cpulock_owner,
- 			   -1); /* LMM(__printk_cpu_unlock:B) */
-+
-+	if (trigger_kgdb) {
-+		pr_warn("re-triggering kgdb roundup for CPU#%d\n", cpu);
-+		kgdb_roundup_cpu(cpu);
-+	}
- }
- EXPORT_SYMBOL(__printk_cpu_unlock);
-+
-+bool kgdb_roundup_delay(unsigned int cpu)
-+{
-+	if (cpu != atomic_read(&printk_cpulock_owner))
-+		return false;
-+
-+	kgdb_cpu = cpu;
-+	return true;
-+}
-+EXPORT_SYMBOL(kgdb_roundup_delay);
- #endif /* CONFIG_SMP */
-+
-+#ifdef CONFIG_PRINTK
-+static void pr_msleep(bool may_sleep, int ms)
-+{
-+	if (may_sleep) {
-+		msleep(ms);
-+	} else {
-+		while (ms--)
-+			udelay(1000);
-+	}
-+}
-+
+ #if defined CONFIG_PRINTK
 +/**
 + * pr_flush() - Wait for printing threads to catch up.
 + *
@@ -7390,7 +6399,7 @@ index 99221b016c68..0cc8e8acf545 100644
 + * If @reset_on_progress is true, the timeout will be reset whenever any
 + * printer has been seen to make some forward progress.
 + *
-+ * Context: Any context.
++ * Context: Process context. May sleep while acquiring console lock.
 + * Return: true if all enabled printers are caught up.
 + */
 +bool pr_flush(int timeout_ms, bool reset_on_progress)
@@ -7398,27 +6407,26 @@ index 99221b016c68..0cc8e8acf545 100644
 +	int remaining = timeout_ms;
 +	struct console *con;
 +	u64 last_diff = 0;
-+	bool may_sleep;
 +	u64 printk_seq;
 +	u64 diff;
 +	u64 seq;
 +
-+	may_sleep = (preemptible() &&
-+		     !in_softirq() &&
-+		     system_state >= SYSTEM_RUNNING);
++	might_sleep();
 +
 +	seq = prb_next_seq(prb);
 +
 +	for (;;) {
 +		diff = 0;
 +
++		console_lock();
 +		for_each_console(con) {
-+			if (!(con->flags & CON_ENABLED))
++			if (!console_is_usable(con, false))
 +				continue;
-+			printk_seq = read_console_seq(con);
++			printk_seq = con->seq;
 +			if (printk_seq < seq)
 +				diff += seq - printk_seq;
 +		}
++		console_unlock();
 +
 +		if (diff != last_diff && reset_on_progress)
 +			remaining = timeout_ms;
@@ -7427,12 +6435,13 @@ index 99221b016c68..0cc8e8acf545 100644
 +			break;
 +
 +		if (remaining < 0) {
-+			pr_msleep(may_sleep, 100);
++			/* no timeout limit */
++			msleep(100);
 +		} else if (remaining < 100) {
-+			pr_msleep(may_sleep, remaining);
++			msleep(remaining);
 +			remaining = 0;
 +		} else {
-+			pr_msleep(may_sleep, 100);
++			msleep(100);
 +			remaining -= 100;
 +		}
 +
@@ -7442,143 +6451,370 @@ index 99221b016c68..0cc8e8acf545 100644
 +	return (diff == 0);
 +}
 +EXPORT_SYMBOL(pr_flush);
-+#endif /* CONFIG_PRINTK */
-diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
-deleted file mode 100644
-index ef0f9a2044da..000000000000
---- a/kernel/printk/printk_safe.c
-+++ /dev/null
-@@ -1,52 +0,0 @@
--// SPDX-License-Identifier: GPL-2.0-or-later
--/*
-- * printk_safe.c - Safe printk for printk-deadlock-prone contexts
-- */
--
--#include <linux/preempt.h>
--#include <linux/kdb.h>
--#include <linux/smp.h>
--#include <linux/cpumask.h>
--#include <linux/printk.h>
--#include <linux/kprobes.h>
--
--#include "internal.h"
--
--static DEFINE_PER_CPU(int, printk_context);
--
--/* Can be preempted by NMI. */
--void __printk_safe_enter(void)
--{
--	this_cpu_inc(printk_context);
--}
--
--/* Can be preempted by NMI. */
--void __printk_safe_exit(void)
--{
--	this_cpu_dec(printk_context);
--}
--
--asmlinkage int vprintk(const char *fmt, va_list args)
--{
--#ifdef CONFIG_KGDB_KDB
--	/* Allow to pass printk() to kdb but avoid a recursion. */
--	if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0))
--		return vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
--#endif
--
--	/*
--	 * Use the main logbuf even in NMI. But avoid calling console
--	 * drivers that might have their own locks.
--	 */
--	if (this_cpu_read(printk_context) || in_nmi()) {
--		int len;
--
--		len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
--		defer_console_output();
--		return len;
--	}
--
--	/* No obstacles. */
--	return vprintk_default(fmt, args);
--}
--EXPORT_SYMBOL(vprintk);
-diff --git a/kernel/ptrace.c b/kernel/ptrace.c
-index f8589bf8d7dc..df08e8e64a83 100644
---- a/kernel/ptrace.c
-+++ b/kernel/ptrace.c
-@@ -197,7 +197,18 @@ static bool ptrace_freeze_traced(struct task_struct *task)
- 	spin_lock_irq(&task->sighand->siglock);
- 	if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
- 	    !__fatal_signal_pending(task)) {
-+#ifdef CONFIG_PREEMPT_RT
-+		unsigned long flags;
 +
-+		raw_spin_lock_irqsave(&task->pi_lock, flags);
-+		if (READ_ONCE(task->__state) & __TASK_TRACED)
-+			WRITE_ONCE(task->__state, __TASK_TRACED);
-+		else
-+			task->saved_state = __TASK_TRACED;
-+		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
-+#else
- 		WRITE_ONCE(task->__state, __TASK_TRACED);
-+#endif
- 		ret = true;
- 	}
- 	spin_unlock_irq(&task->sighand->siglock);
-@@ -207,7 +218,11 @@ static bool ptrace_freeze_traced(struct task_struct *task)
- 
- static void ptrace_unfreeze_traced(struct task_struct *task)
- {
--	if (READ_ONCE(task->__state) != __TASK_TRACED)
-+	unsigned long flags;
-+	bool frozen = true;
++static bool printer_should_wake(struct console *con, u64 seq)
++{
++	short flags;
 +
-+	if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
-+	    READ_ONCE(task->__state) != __TASK_TRACED)
- 		return;
- 
- 	WARN_ON(!task->ptrace || task->parent != current);
-@@ -217,12 +232,21 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
- 	 * Recheck state under the lock to close this race.
- 	 */
- 	spin_lock_irq(&task->sighand->siglock);
--	if (READ_ONCE(task->__state) == __TASK_TRACED) {
--		if (__fatal_signal_pending(task))
--			wake_up_state(task, __TASK_TRACED);
--		else
--			WRITE_ONCE(task->__state, TASK_TRACED);
--	}
-+	raw_spin_lock_irqsave(&task->pi_lock, flags);
-+	if (READ_ONCE(task->__state) == __TASK_TRACED)
-+		WRITE_ONCE(task->__state, TASK_TRACED);
++	if (kthread_should_stop())
++		return true;
 +
-+#ifdef CONFIG_PREEMPT_RT
-+	else if (task->saved_state == __TASK_TRACED)
-+		task->saved_state = TASK_TRACED;
++	/*
++	 * This is an unsafe read to con->flags, but false positives
++	 * are not an issue as long as they are rare.
++	 */
++	flags = data_race(READ_ONCE(con->flags));
++
++	if (!(flags & CON_ENABLED) ||
++	    (flags & CON_PAUSED) ||
++	    atomic_read(&console_lock_count) == -1) {
++		return false;
++	}
++
++	return prb_read_valid(prb, seq, NULL);
++}
++
++static int printk_kthread_func(void *data)
++{
++	struct console *con = data;
++	char *dropped_text = NULL;
++	char *ext_text = NULL;
++	bool progress;
++	u64 seq = 0;
++	char *text;
++	int error;
++
++	pr_info("%sconsole [%s%d]: printing thread started\n",
++		(con->flags & CON_BOOT) ? "boot" : "",
++		con->name, con->index);
++
++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE
++	if (con->write_atomic)
++		con->atomic_data = alloc_atomic_data(con->flags);
 +#endif
-+	else
-+		frozen = false;
-+	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 +
-+	if (frozen && __fatal_signal_pending(task))
-+		wake_up_state(task, __TASK_TRACED);
++	text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
++	if (!text)
++		goto out;
 +
- 	spin_unlock_irq(&task->sighand->siglock);
- }
- 
-diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
-index 6591914af486..a404897d826f 100644
---- a/kernel/rcu/tasks.h
-+++ b/kernel/rcu/tasks.h
-@@ -1347,7 +1347,7 @@ static void test_rcu_tasks_callback(struct rcu_head *rhp)
- 	rttd->notrun = true;
- }
- 
--static void rcu_tasks_initiate_self_tests(void)
-+void rcu_tasks_initiate_self_tests(void)
++	if (con->flags & CON_EXTENDED) {
++		ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
++		if (!ext_text)
++			goto out;
++	} else {
++		dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL);
++		if (!dropped_text)
++			goto out;
++	}
++
++	for (;;) {
++		error = wait_event_interruptible(log_wait, printer_should_wake(con, seq));
++
++		if (kthread_should_stop())
++			break;
++
++		if (error)
++			continue;
++
++		do {
++			error = mutex_lock_interruptible(&con->lock);
++			if (error)
++				break;
++
++			if (!console_is_usable(con, false)) {
++				mutex_unlock(&con->lock);
++				break;
++			}
++
++			if ((con->flags & CON_PAUSED) || !console_printer_tryenter()) {
++				mutex_unlock(&con->lock);
++				break;
++			}
++
++			/*
++			 * Even though the printk kthread is always preemptible, it is
++			 * still not allowed to call cond_resched() from within
++			 * console drivers. The task may become non-preemptible in the
++			 * console driver call chain. For example, vt_console_print()
++			 * takes a spinlock and then can call into fbcon_redraw(),
++			 * which can conditionally invoke cond_resched().
++			 */
++			console_may_schedule = 0;
++			progress = console_emit_next_record(con, text, ext_text,
++							    dropped_text, false, NULL);
++
++			seq = con->seq;
++
++			console_printer_exit();
++
++			mutex_unlock(&con->lock);
++		} while (progress);
++	}
++out:
++	kfree(dropped_text);
++	kfree(ext_text);
++	kfree(text);
++	pr_info("%sconsole [%s%d]: printing thread stopped\n",
++		(con->flags & CON_BOOT) ? "boot" : "",
++		con->name, con->index);
++	return 0;
++}
++
++/* Must be called within console_lock(). */
++static void start_printk_kthread(struct console *con)
++{
++	con->thread = kthread_run(printk_kthread_func, con,
++				  "pr/%s%d", con->name, con->index);
++	if (IS_ERR(con->thread)) {
++		con->thread = NULL;
++		pr_err("%sconsole [%s%d]: unable to start printing thread\n",
++			(con->flags & CON_BOOT) ? "boot" : "",
++			con->name, con->index);
++		return;
++	}
++}
++
+ /*
+  * Delayed printk version, for scheduler-internal messages:
+  */
+@@ -3223,7 +3822,7 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)
+ 	}
+ 
+ 	if (pending & PRINTK_PENDING_WAKEUP)
+-		wake_up_interruptible(&log_wait);
++		wake_up_interruptible_all(&log_wait);
+ }
+ 
+ static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
+@@ -3586,26 +4185,26 @@ EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
+ #endif
+ 
+ #ifdef CONFIG_SMP
+-static atomic_t printk_cpulock_owner = ATOMIC_INIT(-1);
+-static atomic_t printk_cpulock_nested = ATOMIC_INIT(0);
++static atomic_t printk_cpu_sync_owner = ATOMIC_INIT(-1);
++static atomic_t printk_cpu_sync_nested = ATOMIC_INIT(0);
+ 
+ /**
+- * __printk_wait_on_cpu_lock() - Busy wait until the printk cpu-reentrant
+- *                               spinning lock is not owned by any CPU.
++ * __printk_cpu_sync_wait() - Busy wait until the printk cpu-reentrant
++ *                            spinning lock is not owned by any CPU.
+  *
+  * Context: Any context.
+  */
+-void __printk_wait_on_cpu_lock(void)
++void __printk_cpu_sync_wait(void)
+ {
+ 	do {
+ 		cpu_relax();
+-	} while (atomic_read(&printk_cpulock_owner) != -1);
++	} while (atomic_read(&printk_cpu_sync_owner) != -1);
+ }
+-EXPORT_SYMBOL(__printk_wait_on_cpu_lock);
++EXPORT_SYMBOL(__printk_cpu_sync_wait);
+ 
+ /**
+- * __printk_cpu_trylock() - Try to acquire the printk cpu-reentrant
+- *                          spinning lock.
++ * __printk_cpu_sync_try_get() - Try to acquire the printk cpu-reentrant
++ *                               spinning lock.
+  *
+  * If no processor has the lock, the calling processor takes the lock and
+  * becomes the owner. If the calling processor is already the owner of the
+@@ -3614,7 +4213,7 @@ EXPORT_SYMBOL(__printk_wait_on_cpu_lock);
+  * Context: Any context. Expects interrupts to be disabled.
+  * Return: 1 on success, otherwise 0.
+  */
+-int __printk_cpu_trylock(void)
++int __printk_cpu_sync_try_get(void)
+ {
+ 	int cpu;
+ 	int old;
+@@ -3624,79 +4223,80 @@ int __printk_cpu_trylock(void)
+ 	/*
+ 	 * Guarantee loads and stores from this CPU when it is the lock owner
+ 	 * are _not_ visible to the previous lock owner. This pairs with
+-	 * __printk_cpu_unlock:B.
++	 * __printk_cpu_sync_put:B.
+ 	 *
+ 	 * Memory barrier involvement:
+ 	 *
+-	 * If __printk_cpu_trylock:A reads from __printk_cpu_unlock:B, then
+-	 * __printk_cpu_unlock:A can never read from __printk_cpu_trylock:B.
++	 * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B,
++	 * then __printk_cpu_sync_put:A can never read from
++	 * __printk_cpu_sync_try_get:B.
+ 	 *
+ 	 * Relies on:
+ 	 *
+-	 * RELEASE from __printk_cpu_unlock:A to __printk_cpu_unlock:B
++	 * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B
+ 	 * of the previous CPU
+ 	 *    matching
+-	 * ACQUIRE from __printk_cpu_trylock:A to __printk_cpu_trylock:B
+-	 * of this CPU
++	 * ACQUIRE from __printk_cpu_sync_try_get:A to
++	 * __printk_cpu_sync_try_get:B of this CPU
+ 	 */
+-	old = atomic_cmpxchg_acquire(&printk_cpulock_owner, -1,
+-				     cpu); /* LMM(__printk_cpu_trylock:A) */
++	old = atomic_cmpxchg_acquire(&printk_cpu_sync_owner, -1,
++				     cpu); /* LMM(__printk_cpu_sync_try_get:A) */
+ 	if (old == -1) {
+ 		/*
+ 		 * This CPU is now the owner and begins loading/storing
+-		 * data: LMM(__printk_cpu_trylock:B)
++		 * data: LMM(__printk_cpu_sync_try_get:B)
+ 		 */
+ 		return 1;
+ 
+ 	} else if (old == cpu) {
+ 		/* This CPU is already the owner. */
+-		atomic_inc(&printk_cpulock_nested);
++		atomic_inc(&printk_cpu_sync_nested);
+ 		return 1;
+ 	}
+ 
+ 	return 0;
+ }
+-EXPORT_SYMBOL(__printk_cpu_trylock);
++EXPORT_SYMBOL(__printk_cpu_sync_try_get);
+ 
+ /**
+- * __printk_cpu_unlock() - Release the printk cpu-reentrant spinning lock.
++ * __printk_cpu_sync_put() - Release the printk cpu-reentrant spinning lock.
+  *
+  * The calling processor must be the owner of the lock.
+  *
+  * Context: Any context. Expects interrupts to be disabled.
+  */
+-void __printk_cpu_unlock(void)
++void __printk_cpu_sync_put(void)
+ {
+-	if (atomic_read(&printk_cpulock_nested)) {
+-		atomic_dec(&printk_cpulock_nested);
++	if (atomic_read(&printk_cpu_sync_nested)) {
++		atomic_dec(&printk_cpu_sync_nested);
+ 		return;
+ 	}
+ 
+ 	/*
+ 	 * This CPU is finished loading/storing data:
+-	 * LMM(__printk_cpu_unlock:A)
++	 * LMM(__printk_cpu_sync_put:A)
+ 	 */
+ 
+ 	/*
+ 	 * Guarantee loads and stores from this CPU when it was the
+ 	 * lock owner are visible to the next lock owner. This pairs
+-	 * with __printk_cpu_trylock:A.
++	 * with __printk_cpu_sync_try_get:A.
+ 	 *
+ 	 * Memory barrier involvement:
+ 	 *
+-	 * If __printk_cpu_trylock:A reads from __printk_cpu_unlock:B,
+-	 * then __printk_cpu_trylock:B reads from __printk_cpu_unlock:A.
++	 * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B,
++	 * then __printk_cpu_sync_try_get:B reads from __printk_cpu_sync_put:A.
+ 	 *
+ 	 * Relies on:
+ 	 *
+-	 * RELEASE from __printk_cpu_unlock:A to __printk_cpu_unlock:B
++	 * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B
+ 	 * of this CPU
+ 	 *    matching
+-	 * ACQUIRE from __printk_cpu_trylock:A to __printk_cpu_trylock:B
+-	 * of the next CPU
++	 * ACQUIRE from __printk_cpu_sync_try_get:A to
++	 * __printk_cpu_sync_try_get:B of the next CPU
+ 	 */
+-	atomic_set_release(&printk_cpulock_owner,
+-			   -1); /* LMM(__printk_cpu_unlock:B) */
++	atomic_set_release(&printk_cpu_sync_owner,
++			   -1); /* LMM(__printk_cpu_sync_put:B) */
+ }
+-EXPORT_SYMBOL(__printk_cpu_unlock);
++EXPORT_SYMBOL(__printk_cpu_sync_put);
+ #endif /* CONFIG_SMP */
+diff --git a/kernel/ptrace.c b/kernel/ptrace.c
+index f8589bf8d7dc..df08e8e64a83 100644
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -197,7 +197,18 @@ static bool ptrace_freeze_traced(struct task_struct *task)
+ 	spin_lock_irq(&task->sighand->siglock);
+ 	if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
+ 	    !__fatal_signal_pending(task)) {
++#ifdef CONFIG_PREEMPT_RT
++		unsigned long flags;
++
++		raw_spin_lock_irqsave(&task->pi_lock, flags);
++		if (READ_ONCE(task->__state) & __TASK_TRACED)
++			WRITE_ONCE(task->__state, __TASK_TRACED);
++		else
++			task->saved_state = __TASK_TRACED;
++		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++#else
+ 		WRITE_ONCE(task->__state, __TASK_TRACED);
++#endif
+ 		ret = true;
+ 	}
+ 	spin_unlock_irq(&task->sighand->siglock);
+@@ -207,7 +218,11 @@ static bool ptrace_freeze_traced(struct task_struct *task)
+ 
+ static void ptrace_unfreeze_traced(struct task_struct *task)
+ {
+-	if (READ_ONCE(task->__state) != __TASK_TRACED)
++	unsigned long flags;
++	bool frozen = true;
++
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
++	    READ_ONCE(task->__state) != __TASK_TRACED)
+ 		return;
+ 
+ 	WARN_ON(!task->ptrace || task->parent != current);
+@@ -217,12 +232,21 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
+ 	 * Recheck state under the lock to close this race.
+ 	 */
+ 	spin_lock_irq(&task->sighand->siglock);
+-	if (READ_ONCE(task->__state) == __TASK_TRACED) {
+-		if (__fatal_signal_pending(task))
+-			wake_up_state(task, __TASK_TRACED);
+-		else
+-			WRITE_ONCE(task->__state, TASK_TRACED);
+-	}
++	raw_spin_lock_irqsave(&task->pi_lock, flags);
++	if (READ_ONCE(task->__state) == __TASK_TRACED)
++		WRITE_ONCE(task->__state, TASK_TRACED);
++
++#ifdef CONFIG_PREEMPT_RT
++	else if (task->saved_state == __TASK_TRACED)
++		task->saved_state = TASK_TRACED;
++#endif
++	else
++		frozen = false;
++	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++
++	if (frozen && __fatal_signal_pending(task))
++		wake_up_state(task, __TASK_TRACED);
++
+ 	spin_unlock_irq(&task->sighand->siglock);
+ }
+ 
+diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
+index 7da3c81c3f59..7f9d3df35854 100644
+--- a/kernel/rcu/tasks.h
++++ b/kernel/rcu/tasks.h
+@@ -1345,7 +1345,7 @@ static void test_rcu_tasks_callback(struct rcu_head *rhp)
+ 	rttd->notrun = true;
+ }
+ 
+-static void rcu_tasks_initiate_self_tests(void)
++void rcu_tasks_initiate_self_tests(void)
  {
  	pr_info("Running RCU-tasks wait API self tests\n");
  #ifdef CONFIG_TASKS_RCU
-@@ -1384,9 +1384,7 @@ static int rcu_tasks_verify_self_tests(void)
+@@ -1382,9 +1382,7 @@ static int rcu_tasks_verify_self_tests(void)
  	return ret;
  }
  late_initcall(rcu_tasks_verify_self_tests);
@@ -7589,7 +6825,7 @@ index 6591914af486..a404897d826f 100644
  
  void __init rcu_init_tasks_generic(void)
  {
-@@ -1401,9 +1399,6 @@ void __init rcu_init_tasks_generic(void)
+@@ -1399,9 +1397,6 @@ void __init rcu_init_tasks_generic(void)
  #ifdef CONFIG_TASKS_TRACE_RCU
  	rcu_spawn_tasks_trace_kthread();
  #endif
@@ -7600,10 +6836,10 @@ index 6591914af486..a404897d826f 100644
  
  #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
 diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
-index bdd1dc6de71a..9a04550cc54b 100644
+index ef8d36f580fc..44fb12fc7b82 100644
 --- a/kernel/rcu/tree.c
 +++ b/kernel/rcu/tree.c
-@@ -2278,13 +2278,13 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
+@@ -2276,13 +2276,13 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
  {
  	unsigned long flags;
  	unsigned long mask;
@@ -7619,7 +6855,7 @@ index bdd1dc6de71a..9a04550cc54b 100644
  	if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq ||
  	    rdp->gpwrap) {
  
-@@ -2446,7 +2446,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
+@@ -2444,7 +2444,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
  	int div;
  	bool __maybe_unused empty;
  	unsigned long flags;
@@ -7628,7 +6864,7 @@ index bdd1dc6de71a..9a04550cc54b 100644
  	struct rcu_head *rhp;
  	struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
  	long bl, count = 0;
-@@ -2472,6 +2472,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
+@@ -2470,6 +2470,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
  	rcu_nocb_lock(rdp);
  	WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
  	pending = rcu_segcblist_n_cbs(&rdp->cblist);
@@ -7637,22 +6873,10 @@ index bdd1dc6de71a..9a04550cc54b 100644
  	div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;
  	bl = max(rdp->blimit, pending >> div);
 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index 0d12ec7be301..39adf3a8067b 100644
+index 77563109c0ea..04165fa6ff25 100644
 --- a/kernel/sched/core.c
 +++ b/kernel/sched/core.c
-@@ -74,7 +74,11 @@ __read_mostly int sysctl_resched_latency_warn_once = 1;
-  * Number of tasks to iterate in a single balance run.
-  * Limited because this is done with IRQs disabled.
-  */
-+#ifdef CONFIG_PREEMPT_RT
-+const_debug unsigned int sysctl_sched_nr_migrate = 8;
-+#else
- const_debug unsigned int sysctl_sched_nr_migrate = 32;
-+#endif
- 
- /*
-  * period over which we measure -rt task CPU usage in us.
-@@ -982,6 +986,46 @@ void resched_curr(struct rq *rq)
+@@ -986,6 +986,46 @@ void resched_curr(struct rq *rq)
  		trace_sched_wake_idle_without_ipi(cpu);
  }
  
@@ -7699,7 +6923,7 @@ index 0d12ec7be301..39adf3a8067b 100644
  void resched_cpu(int cpu)
  {
  	struct rq *rq = cpu_rq(cpu);
-@@ -2137,6 +2181,7 @@ void migrate_disable(void)
+@@ -2160,6 +2200,7 @@ void migrate_disable(void)
  	preempt_disable();
  	this_rq()->nr_pinned++;
  	p->migration_disabled = 1;
@@ -7707,7 +6931,7 @@ index 0d12ec7be301..39adf3a8067b 100644
  	preempt_enable();
  }
  EXPORT_SYMBOL_GPL(migrate_disable);
-@@ -2148,6 +2193,8 @@ void migrate_enable(void)
+@@ -2171,6 +2212,8 @@ void migrate_enable(void)
  	if (p->migration_disabled > 1) {
  		p->migration_disabled--;
  		return;
@@ -7716,7 +6940,7 @@ index 0d12ec7be301..39adf3a8067b 100644
  	}
  
  	/*
-@@ -2165,6 +2212,7 @@ void migrate_enable(void)
+@@ -2188,6 +2231,7 @@ void migrate_enable(void)
  	barrier();
  	p->migration_disabled = 0;
  	this_rq()->nr_pinned--;
@@ -7724,19 +6948,7 @@ index 0d12ec7be301..39adf3a8067b 100644
  	preempt_enable();
  }
  EXPORT_SYMBOL_GPL(migrate_enable);
-@@ -2944,9 +2992,8 @@ void force_compatible_cpus_allowed_ptr(struct task_struct *p)
- 
- out_set_mask:
- 	if (printk_ratelimit()) {
--		printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n",
--				task_pid_nr(p), p->comm,
--				cpumask_pr_args(override_mask));
-+		printk("Overriding affinity for process %d (%s) to CPUs %*pbl\n",
-+		       task_pid_nr(p), p->comm, cpumask_pr_args(override_mask));
- 	}
- 
- 	WARN_ON(set_cpus_allowed_ptr(p, override_mask));
-@@ -3202,7 +3249,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
+@@ -3225,7 +3269,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
  		 * is actually now running somewhere else!
  		 */
  		while (task_running(rq, p)) {
@@ -7745,7 +6957,7 @@ index 0d12ec7be301..39adf3a8067b 100644
  				return 0;
  			cpu_relax();
  		}
-@@ -3217,7 +3264,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
+@@ -3240,7 +3284,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
  		running = task_running(rq, p);
  		queued = task_on_rq_queued(p);
  		ncsw = 0;
@@ -7754,27 +6966,7 @@ index 0d12ec7be301..39adf3a8067b 100644
  			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
  		task_rq_unlock(rq, p, &rf);
  
-@@ -3251,7 +3298,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
- 			ktime_t to = NSEC_PER_SEC / HZ;
- 
- 			set_current_state(TASK_UNINTERRUPTIBLE);
--			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
-+			schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
- 			continue;
- 		}
- 
-@@ -3376,8 +3423,8 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
- 		 * leave kernel.
- 		 */
- 		if (p->mm && printk_ratelimit()) {
--			printk_deferred("process %d (%s) no longer affine to cpu%d\n",
--					task_pid_nr(p), p->comm, cpu);
-+			printk("process %d (%s) no longer affine to cpu%d\n",
-+			       task_pid_nr(p), p->comm, cpu);
- 		}
- 	}
- 
-@@ -4384,6 +4431,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
+@@ -4418,6 +4462,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
  	p->on_cpu = 0;
  #endif
  	init_task_preempt_count(p);
@@ -7784,33 +6976,21 @@ index 0d12ec7be301..39adf3a8067b 100644
  #ifdef CONFIG_SMP
  	plist_node_init(&p->pushable_tasks, MAX_PRIO);
  	RB_CLEAR_NODE(&p->pushable_dl_tasks);
-@@ -4840,20 +4890,18 @@ static struct rq *finish_task_switch(struct task_struct *prev)
- 	 */
- 	if (mm) {
- 		membarrier_mm_sync_core_before_usermode(mm);
--		mmdrop(mm);
-+		mmdrop_sched(mm);
- 	}
- 	if (unlikely(prev_state == TASK_DEAD)) {
+@@ -4880,8 +4927,11 @@ static struct rq *finish_task_switch(struct task_struct *prev)
  		if (prev->sched_class->task_dead)
  			prev->sched_class->task_dead(prev);
  
- 		/*
--		 * Remove function-return probe instances associated with this
--		 * task and put them back on the free list.
-+		 * Release VMAP'ed task stack immediate for reuse. On RT
-+		 * enabled kernels this is delayed for latency reasons.
- 		 */
--		kprobe_flush_task(prev);
--
 -		/* Task is done with its stack. */
 -		put_task_stack(prev);
-+		if (!IS_ENABLED(CONFIG_PREEMPT_RT))
-+			put_task_stack(prev);
++		/*
++		 * Cache only the VMAP stack. The final deallocation is in
++		 * delayed_put_task_struct.
++		 */
++		put_task_stack_sched(prev);
  
  		put_task_struct_rcu_user(prev);
  	}
-@@ -6254,6 +6302,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
+@@ -6216,6 +6266,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
  
  	next = pick_next_task(rq, prev, &rf);
  	clear_tsk_need_resched(prev);
@@ -7818,7 +6998,7 @@ index 0d12ec7be301..39adf3a8067b 100644
  	clear_preempt_need_resched();
  #ifdef CONFIG_SCHED_DEBUG
  	rq->last_seen_need_resched_ns = 0;
-@@ -6471,6 +6520,30 @@ static void __sched notrace preempt_schedule_common(void)
+@@ -6427,6 +6478,30 @@ static void __sched notrace preempt_schedule_common(void)
  	} while (need_resched());
  }
  
@@ -7849,7 +7029,7 @@ index 0d12ec7be301..39adf3a8067b 100644
  #ifdef CONFIG_PREEMPTION
  /*
   * This is the entry point to schedule() from in-kernel preemption
-@@ -6484,7 +6557,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
+@@ -6440,7 +6515,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
  	 */
  	if (likely(!preemptible()))
  		return;
@@ -7859,7 +7039,7 @@ index 0d12ec7be301..39adf3a8067b 100644
  	preempt_schedule_common();
  }
  NOKPROBE_SYMBOL(preempt_schedule);
-@@ -6517,6 +6591,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
+@@ -6473,6 +6549,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
  	if (likely(!preemptible()))
  		return;
  
@@ -7869,7 +7049,7 @@ index 0d12ec7be301..39adf3a8067b 100644
  	do {
  		/*
  		 * Because the function tracer can trace preempt_count_sub()
-@@ -8675,7 +8752,9 @@ void __init init_idle(struct task_struct *idle, int cpu)
+@@ -8653,7 +8732,9 @@ void __init init_idle(struct task_struct *idle, int cpu)
  
  	/* Set the preempt count _outside_ the spinlocks! */
  	init_idle_preempt_count(idle, cpu);
@@ -7880,144 +7060,11 @@ index 0d12ec7be301..39adf3a8067b 100644
  	/*
  	 * The idle tasks have their own, simple scheduling class:
  	 */
-@@ -9469,14 +9548,8 @@ void __init sched_init(void)
- }
- 
- #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
--static inline int preempt_count_equals(int preempt_offset)
--{
--	int nested = preempt_count() + rcu_preempt_depth();
--
--	return (nested == preempt_offset);
--}
- 
--void __might_sleep(const char *file, int line, int preempt_offset)
-+void __might_sleep(const char *file, int line)
- {
- 	unsigned int state = get_current_state();
- 	/*
-@@ -9490,11 +9563,32 @@ void __might_sleep(const char *file, int line, int preempt_offset)
- 			(void *)current->task_state_change,
- 			(void *)current->task_state_change);
- 
--	___might_sleep(file, line, preempt_offset);
-+	__might_resched(file, line, 0);
- }
- EXPORT_SYMBOL(__might_sleep);
- 
--void ___might_sleep(const char *file, int line, int preempt_offset)
-+static void print_preempt_disable_ip(int preempt_offset, unsigned long ip)
-+{
-+	if (!IS_ENABLED(CONFIG_DEBUG_PREEMPT))
-+		return;
-+
-+	if (preempt_count() == preempt_offset)
-+		return;
-+
-+	pr_err("Preemption disabled at:");
-+	print_ip_sym(KERN_ERR, ip);
-+}
-+
-+static inline bool resched_offsets_ok(unsigned int offsets)
-+{
-+	unsigned int nested = preempt_count();
-+
-+	nested += rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT;
-+
-+	return nested == offsets;
-+}
-+
-+void __might_resched(const char *file, int line, unsigned int offsets)
- {
- 	/* Ratelimiting timestamp: */
- 	static unsigned long prev_jiffy;
-@@ -9504,7 +9598,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
- 	/* WARN_ON_ONCE() by default, no rate limit required: */
- 	rcu_sleep_check();
- 
--	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-+	if ((resched_offsets_ok(offsets) && !irqs_disabled() &&
- 	     !is_idle_task(current) && !current->non_block_count) ||
- 	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
- 	    oops_in_progress)
-@@ -9517,29 +9611,33 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
- 	/* Save this before calling printk(), since that will clobber it: */
- 	preempt_disable_ip = get_preempt_disable_ip(current);
- 
--	printk(KERN_ERR
--		"BUG: sleeping function called from invalid context at %s:%d\n",
--			file, line);
--	printk(KERN_ERR
--		"in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
--			in_atomic(), irqs_disabled(), current->non_block_count,
--			current->pid, current->comm);
-+	pr_err("BUG: sleeping function called from invalid context at %s:%d\n",
-+	       file, line);
-+	pr_err("in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
-+	       in_atomic(), irqs_disabled(), current->non_block_count,
-+	       current->pid, current->comm);
-+	pr_err("preempt_count: %x, expected: %x\n", preempt_count(),
-+	       offsets & MIGHT_RESCHED_PREEMPT_MASK);
-+
-+	if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
-+		pr_err("RCU nest depth: %d, expected: %u\n",
-+		       rcu_preempt_depth(), offsets >> MIGHT_RESCHED_RCU_SHIFT);
-+	}
- 
- 	if (task_stack_end_corrupted(current))
--		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
-+		pr_emerg("Thread overran stack, or stack corrupted\n");
- 
- 	debug_show_held_locks(current);
- 	if (irqs_disabled())
- 		print_irqtrace_events(current);
--	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
--	    && !preempt_count_equals(preempt_offset)) {
--		pr_err("Preemption disabled at:");
--		print_ip_sym(KERN_ERR, preempt_disable_ip);
--	}
-+
-+	print_preempt_disable_ip(offsets & MIGHT_RESCHED_PREEMPT_MASK,
-+				 preempt_disable_ip);
-+
- 	dump_stack();
- 	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
- }
--EXPORT_SYMBOL(___might_sleep);
-+EXPORT_SYMBOL(__might_resched);
- 
- void __cant_sleep(const char *file, int line, int preempt_offset)
- {
-diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
-index e94314633b39..fd7c4f972aaf 100644
---- a/kernel/sched/deadline.c
-+++ b/kernel/sched/deadline.c
-@@ -800,7 +800,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
- 	 * entity.
- 	 */
- 	if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
--		printk_deferred_once("sched: DL replenish lagged too much\n");
-+		printk_once("sched: DL replenish lagged too much\n");
- 		dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
- 		dl_se->runtime = pi_of(dl_se)->dl_runtime;
- 	}
 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index 6f16dfb74246..f2d0fb3ac43f 100644
+index 6e476f6d9435..22a0d3a8c760 100644
 --- a/kernel/sched/fair.c
 +++ b/kernel/sched/fair.c
-@@ -4237,10 +4237,7 @@ static inline void check_schedstat_required(void)
- 			trace_sched_stat_iowait_enabled()  ||
- 			trace_sched_stat_blocked_enabled() ||
- 			trace_sched_stat_runtime_enabled())  {
--		printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
--			     "stat_blocked and stat_runtime require the "
--			     "kernel parameter schedstats=enable or "
--			     "kernel.sched_schedstats=1\n");
-+		printk_once("Scheduler tracepoints stat_sleep, stat_iowait, stat_blocked and stat_runtime require the kernel parameter schedstats=enable or kernel.sched_schedstats=1\n");
- 	}
- #endif
- }
-@@ -4448,7 +4445,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+@@ -4393,7 +4393,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
  	ideal_runtime = sched_slice(cfs_rq, curr);
  	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
  	if (delta_exec > ideal_runtime) {
@@ -8026,7 +7073,7 @@ index 6f16dfb74246..f2d0fb3ac43f 100644
  		/*
  		 * The current task ran long enough, ensure it doesn't get
  		 * re-elected due to buddy favours.
-@@ -4472,7 +4469,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+@@ -4417,7 +4417,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
  		return;
  
  	if (delta > ideal_runtime)
@@ -8035,7 +7082,7 @@ index 6f16dfb74246..f2d0fb3ac43f 100644
  }
  
  static void
-@@ -4615,7 +4612,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
+@@ -4563,7 +4563,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
  	 * validating it and just reschedule.
  	 */
  	if (queued) {
@@ -8044,7 +7091,7 @@ index 6f16dfb74246..f2d0fb3ac43f 100644
  		return;
  	}
  	/*
-@@ -4755,7 +4752,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
+@@ -4712,7 +4712,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
  	 * hierarchy can be throttled
  	 */
  	if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
@@ -8053,7 +7100,7 @@ index 6f16dfb74246..f2d0fb3ac43f 100644
  }
  
  static __always_inline
-@@ -5518,7 +5515,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
+@@ -5475,7 +5475,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
  
  		if (delta < 0) {
  			if (task_current(rq, p))
@@ -8062,7 +7109,7 @@ index 6f16dfb74246..f2d0fb3ac43f 100644
  			return;
  		}
  		hrtick_start(rq, delta);
-@@ -7208,7 +7205,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+@@ -7172,7 +7172,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
  	return;
  
  preempt:
@@ -8071,7 +7118,7 @@ index 6f16dfb74246..f2d0fb3ac43f 100644
  	/*
  	 * Only set the backward buddy when the current task is still
  	 * on the rq. This can happen when a wakeup gets interleaved
-@@ -11109,7 +11106,7 @@ static void task_fork_fair(struct task_struct *p)
+@@ -11207,7 +11207,7 @@ static void task_fork_fair(struct task_struct *p)
  		 * 'current' within the tree based on its new key value.
  		 */
  		swap(curr->vruntime, se->vruntime);
@@ -8080,7 +7127,7 @@ index 6f16dfb74246..f2d0fb3ac43f 100644
  	}
  
  	se->vruntime -= cfs_rq->min_vruntime;
-@@ -11136,7 +11133,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
+@@ -11234,7 +11234,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
  	 */
  	if (task_current(rq, p)) {
  		if (p->prio > oldprio)
@@ -8090,79 +7137,24 @@ index 6f16dfb74246..f2d0fb3ac43f 100644
  		check_preempt_curr(rq, p, 0);
  }
 diff --git a/kernel/sched/features.h b/kernel/sched/features.h
-index 7f8dace0964c..d5cee51819bf 100644
+index 1cf435bbcd9c..d5cee51819bf 100644
 --- a/kernel/sched/features.h
 +++ b/kernel/sched/features.h
-@@ -46,11 +46,19 @@ SCHED_FEAT(DOUBLE_TICK, false)
-  */
- SCHED_FEAT(NONTASK_CAPACITY, true)
+@@ -48,6 +48,9 @@ SCHED_FEAT(NONTASK_CAPACITY, true)
  
-+#ifdef CONFIG_PREEMPT_RT
-+SCHED_FEAT(TTWU_QUEUE, false)
+ #ifdef CONFIG_PREEMPT_RT
+ SCHED_FEAT(TTWU_QUEUE, false)
 +# ifdef CONFIG_PREEMPT_LAZY
 +SCHED_FEAT(PREEMPT_LAZY, true)
 +# endif
-+#else
-+
- /*
-  * Queue remote wakeups on the target CPU and process them
-  * using the scheduler IPI. Reduces rq->lock contention/bounces.
-  */
- SCHED_FEAT(TTWU_QUEUE, true)
-+#endif
+ #else
  
  /*
-  * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
-diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
-index 1652f2bb54b7..ab54b2012469 100644
---- a/kernel/sched/psi.c
-+++ b/kernel/sched/psi.c
-@@ -710,10 +710,10 @@ static void psi_group_change(struct psi_group *group, int cpu,
- 		if (groupc->tasks[t]) {
- 			groupc->tasks[t]--;
- 		} else if (!psi_bug) {
--			printk_deferred(KERN_ERR "psi: task underflow! cpu=%d t=%d tasks=[%u %u %u %u] clear=%x set=%x\n",
--					cpu, t, groupc->tasks[0],
--					groupc->tasks[1], groupc->tasks[2],
--					groupc->tasks[3], clear, set);
-+			pr_err("psi: task underflow! cpu=%d t=%d tasks=[%u %u %u %u] clear=%x set=%x\n",
-+			       cpu, t, groupc->tasks[0],
-+			       groupc->tasks[1], groupc->tasks[2],
-+			       groupc->tasks[3], clear, set);
- 			psi_bug = 1;
- 		}
- 	}
-@@ -779,9 +779,9 @@ static void psi_flags_change(struct task_struct *task, int clear, int set)
- 	if (((task->psi_flags & set) ||
- 	     (task->psi_flags & clear) != clear) &&
- 	    !psi_bug) {
--		printk_deferred(KERN_ERR "psi: inconsistent task state! task=%d:%s cpu=%d psi_flags=%x clear=%x set=%x\n",
--				task->pid, task->comm, task_cpu(task),
--				task->psi_flags, clear, set);
-+		pr_err("psi: inconsistent task state! task=%d:%s cpu=%d psi_flags=%x clear=%x set=%x\n",
-+		       task->pid, task->comm, task_cpu(task),
-+		       task->psi_flags, clear, set);
- 		psi_bug = 1;
- 	}
- 
-diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
-index bfef3f39b555..ef8228d19382 100644
---- a/kernel/sched/rt.c
-+++ b/kernel/sched/rt.c
-@@ -977,7 +977,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
- 		 */
- 		if (likely(rt_b->rt_runtime)) {
- 			rt_rq->rt_throttled = 1;
--			printk_deferred_once("sched: RT throttling activated\n");
-+			printk_once("sched: RT throttling activated\n");
- 		} else {
- 			/*
- 			 * In case we did anyway, make it go away,
 diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index 4f432826933d..8df6227922aa 100644
+index 0e66749486e7..2a8f54801263 100644
 --- a/kernel/sched/sched.h
 +++ b/kernel/sched/sched.h
-@@ -2318,6 +2318,15 @@ extern void reweight_task(struct task_struct *p, int prio);
+@@ -2300,6 +2300,15 @@ extern void reweight_task(struct task_struct *p, int prio);
  extern void resched_curr(struct rq *rq);
  extern void resched_cpu(int cpu);
  
@@ -8190,21 +7182,8 @@ index e1c655f928c7..f230b1ac7f91 100644
  	raw_spin_lock_irq(&q->lock);
  	list_splice_init(&q->task_list, &tmp);
  	while (!list_empty(&tmp)) {
-diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
-index 4e8698e62f07..3d0157bd4e14 100644
---- a/kernel/sched/topology.c
-+++ b/kernel/sched/topology.c
-@@ -526,7 +526,7 @@ static int init_rootdomain(struct root_domain *rd)
- #ifdef HAVE_RT_PUSH_IPI
- 	rd->rto_cpu = -1;
- 	raw_spin_lock_init(&rd->rto_lock);
--	init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
-+	rd->rto_push_work = IRQ_WORK_INIT_HARD(rto_push_irq_work_func);
- #endif
- 
- 	rd->visit_gen = 0;
 diff --git a/kernel/signal.c b/kernel/signal.c
-index 5892c91696f8..d3a69e89b9ee 100644
+index dfcee3888b00..1424f77d3b95 100644
 --- a/kernel/signal.c
 +++ b/kernel/signal.c
 @@ -1324,6 +1324,34 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t,
@@ -8242,7 +7221,7 @@ index 5892c91696f8..d3a69e89b9ee 100644
  	spin_lock_irqsave(&t->sighand->siglock, flags);
  	action = &t->sighand->action[sig-1];
  	ignored = action->sa.sa_handler == SIG_IGN;
-@@ -2296,16 +2324,8 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
+@@ -2271,16 +2299,8 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
  		if (gstop_done && ptrace_reparented(current))
  			do_notify_parent_cldstop(current, false, why);
  
@@ -8260,7 +7239,7 @@ index 5892c91696f8..d3a69e89b9ee 100644
  		cgroup_leave_frozen(true);
  	} else {
 diff --git a/kernel/smp.c b/kernel/smp.c
-index f43ede0ab183..f0f26e1a0031 100644
+index 01a7c1706a58..250311c2009f 100644
 --- a/kernel/smp.c
 +++ b/kernel/smp.c
 @@ -690,10 +690,20 @@ void flush_smp_call_function_from_idle(void)
@@ -8286,159 +7265,152 @@ index f43ede0ab183..f0f26e1a0031 100644
  
  	local_irq_restore(flags);
  }
-diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
-index 003ccf338d20..00fc43605c6b 100644
---- a/kernel/time/clockevents.c
-+++ b/kernel/time/clockevents.c
-@@ -203,8 +203,7 @@ static int clockevents_increase_min_delta(struct clock_event_device *dev)
- {
- 	/* Nothing to do if we already reached the limit */
- 	if (dev->min_delta_ns >= MIN_DELTA_LIMIT) {
--		printk_deferred(KERN_WARNING
--				"CE: Reprogramming failure. Giving up\n");
-+		pr_warn("CE: Reprogramming failure. Giving up\n");
- 		dev->next_event = KTIME_MAX;
- 		return -ETIME;
- 	}
-@@ -217,10 +216,8 @@ static int clockevents_increase_min_delta(struct clock_event_device *dev)
- 	if (dev->min_delta_ns > MIN_DELTA_LIMIT)
- 		dev->min_delta_ns = MIN_DELTA_LIMIT;
- 
--	printk_deferred(KERN_WARNING
--			"CE: %s increased min_delta_ns to %llu nsec\n",
--			dev->name ? dev->name : "?",
--			(unsigned long long) dev->min_delta_ns);
-+	pr_warn("CE: %s increased min_delta_ns to %llu nsec\n",
-+		dev->name ? dev->name : "?", (unsigned long long) dev->min_delta_ns);
- 	return 0;
+diff --git a/kernel/softirq.c b/kernel/softirq.c
+index 41f470929e99..22948c2109f5 100644
+--- a/kernel/softirq.c
++++ b/kernel/softirq.c
+@@ -624,6 +624,22 @@ static inline void tick_irq_exit(void)
+ #endif
  }
  
-diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
-index 406dccb79c2b..829d7797811f 100644
---- a/kernel/time/ntp.c
-+++ b/kernel/time/ntp.c
-@@ -939,9 +939,7 @@ static long hardpps_update_freq(struct pps_normtime freq_norm)
- 		time_status |= STA_PPSERROR;
- 		pps_errcnt++;
- 		pps_dec_freq_interval();
--		printk_deferred(KERN_ERR
--			"hardpps: PPSERROR: interval too long - %lld s\n",
--			freq_norm.sec);
-+		pr_err("hardpps: PPSERROR: interval too long - %lld s\n", freq_norm.sec);
- 		return 0;
- 	}
++static DEFINE_PER_CPU(struct task_struct *, timersd);
++static DEFINE_PER_CPU(unsigned long, pending_timer_softirq);
++
++static unsigned int local_pending_timers(void)
++{
++        return __this_cpu_read(pending_timer_softirq);
++}
++
++static void wake_timersd(void)
++{
++        struct task_struct *tsk = __this_cpu_read(timersd);
++
++        if (tsk)
++                wake_up_process(tsk);
++}
++
+ static inline void __irq_exit_rcu(void)
+ {
+ #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
+@@ -635,6 +651,8 @@ static inline void __irq_exit_rcu(void)
+ 	preempt_count_sub(HARDIRQ_OFFSET);
+ 	if (!in_interrupt() && local_softirq_pending())
+ 		invoke_softirq();
++	if (IS_ENABLED(CONFIG_PREEMPT_RT) && !in_interrupt() && local_pending_timers())
++		wake_timersd();
  
-@@ -954,8 +952,7 @@ static long hardpps_update_freq(struct pps_normtime freq_norm)
- 	delta = shift_right(ftemp - pps_freq, NTP_SCALE_SHIFT);
- 	pps_freq = ftemp;
- 	if (delta > PPS_MAXWANDER || delta < -PPS_MAXWANDER) {
--		printk_deferred(KERN_WARNING
--				"hardpps: PPSWANDER: change=%ld\n", delta);
-+		pr_warn("hardpps: PPSWANDER: change=%ld\n", delta);
- 		time_status |= STA_PPSWANDER;
- 		pps_stbcnt++;
- 		pps_dec_freq_interval();
-@@ -999,9 +996,8 @@ static void hardpps_update_phase(long error)
- 	 * the time offset is updated.
- 	 */
- 	if (jitter > (pps_jitter << PPS_POPCORN)) {
--		printk_deferred(KERN_WARNING
--				"hardpps: PPSJITTER: jitter=%ld, limit=%ld\n",
--				jitter, (pps_jitter << PPS_POPCORN));
-+		pr_warn("hardpps: PPSJITTER: jitter=%ld, limit=%ld\n",
-+			jitter, (pps_jitter << PPS_POPCORN));
- 		time_status |= STA_PPSJITTER;
- 		pps_jitcnt++;
- 	} else if (time_status & STA_PPSTIME) {
-@@ -1058,7 +1054,7 @@ void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_t
- 		time_status |= STA_PPSJITTER;
- 		/* restart the frequency calibration interval */
- 		pps_fbase = *raw_ts;
--		printk_deferred(KERN_ERR "hardpps: PPSJITTER: bad pulse\n");
-+		pr_err("hardpps: PPSJITTER: bad pulse\n");
- 		return;
- 	}
+ 	tick_irq_exit();
+ }
+@@ -963,11 +981,69 @@ static struct smp_hotplug_thread softirq_threads = {
+ 	.thread_comm		= "ksoftirqd/%u",
+ };
  
-diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
-index b348749a9fc6..a81beb312038 100644
---- a/kernel/time/timekeeping.c
-+++ b/kernel/time/timekeeping.c
-@@ -203,22 +203,23 @@ static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
- 	const char *name = tk->tkr_mono.clock->name;
- 
- 	if (offset > max_cycles) {
--		printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
--				offset, name, max_cycles);
--		printk_deferred("         timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
-+		printk("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
-+		       offset, name, max_cycles);
-+		printk("         timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
- 	} else {
- 		if (offset > (max_cycles >> 1)) {
--			printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n",
--					offset, name, max_cycles >> 1);
--			printk_deferred("      timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
-+			printk("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n",
-+			       offset, name, max_cycles >> 1);
-+			printk("      timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
- 		}
++static void timersd_setup(unsigned int cpu)
++{
++        sched_set_fifo_low(current);
++}
++
++static int timersd_should_run(unsigned int cpu)
++{
++        return local_pending_timers();
++}
++
++static void run_timersd(unsigned int cpu)
++{
++	unsigned int timer_si;
++
++	ksoftirqd_run_begin();
++
++	timer_si = local_pending_timers();
++	__this_cpu_write(pending_timer_softirq, 0);
++	or_softirq_pending(timer_si);
++
++	__do_softirq();
++
++	ksoftirqd_run_end();
++}
++
++#ifdef CONFIG_PREEMPT_RT
++static void raise_ktimers_thread(unsigned int nr)
++{
++	trace_softirq_raise(nr);
++	__this_cpu_or(pending_timer_softirq, 1 << nr);
++}
++
++void raise_hrtimer_softirq(void)
++{
++	raise_ktimers_thread(HRTIMER_SOFTIRQ);
++}
++
++void raise_timer_softirq(void)
++{
++	unsigned long flags;
++
++	local_irq_save(flags);
++	raise_ktimers_thread(TIMER_SOFTIRQ);
++	wake_timersd();
++	local_irq_restore(flags);
++}
++#endif
++
++static struct smp_hotplug_thread timer_threads = {
++        .store                  = &timersd,
++        .setup                  = timersd_setup,
++        .thread_should_run      = timersd_should_run,
++        .thread_fn              = run_timersd,
++        .thread_comm            = "ktimers/%u",
++};
++
+ static __init int spawn_ksoftirqd(void)
+ {
+ 	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
+ 				  takeover_tasklets);
+ 	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		BUG_ON(smpboot_register_percpu_thread(&timer_threads));
+ 
+ 	return 0;
+ }
+diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
+index 0ea8702eb516..dead5e738ecf 100644
+--- a/kernel/time/hrtimer.c
++++ b/kernel/time/hrtimer.c
+@@ -1805,7 +1805,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
+ 	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
+ 		cpu_base->softirq_expires_next = KTIME_MAX;
+ 		cpu_base->softirq_activated = 1;
+-		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
++		raise_hrtimer_softirq();
  	}
  
- 	if (tk->underflow_seen) {
- 		if (jiffies - tk->last_warning > WARNING_FREQ) {
--			printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
--			printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
--			printk_deferred("         Your kernel is probably still fine.\n");
-+			printk("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n",
-+			       name);
-+			printk("         Please report this, consider using a different clocksource, if possible.\n");
-+			printk("         Your kernel is probably still fine.\n");
- 			tk->last_warning = jiffies;
- 		}
- 		tk->underflow_seen = 0;
-@@ -226,9 +227,10 @@ static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
- 
- 	if (tk->overflow_seen) {
- 		if (jiffies - tk->last_warning > WARNING_FREQ) {
--			printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
--			printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
--			printk_deferred("         Your kernel is probably still fine.\n");
-+			printk("WARNING: Overflow in clocksource '%s' observed, time update capped.\n",
-+			       name);
-+			printk("         Please report this, consider using a different clocksource, if possible.\n");
-+			printk("         Your kernel is probably still fine.\n");
- 			tk->last_warning = jiffies;
- 		}
- 		tk->overflow_seen = 0;
-@@ -1669,9 +1671,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
- 					   const struct timespec64 *delta)
- {
- 	if (!timespec64_valid_strict(delta)) {
--		printk_deferred(KERN_WARNING
--				"__timekeeping_inject_sleeptime: Invalid "
--				"sleep delta value!\n");
-+		pr_warn("%s: Invalid sleep delta value!\n", __func__);
- 		return;
+ 	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
+@@ -1918,7 +1918,7 @@ void hrtimer_run_queues(void)
+ 	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
+ 		cpu_base->softirq_expires_next = KTIME_MAX;
+ 		cpu_base->softirq_activated = 1;
+-		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
++		raise_hrtimer_softirq();
  	}
- 	tk_xtime_add(tk, delta);
-diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
-index b73e8850e58d..149cc4b08d8e 100644
---- a/kernel/time/timekeeping_debug.c
-+++ b/kernel/time/timekeeping_debug.c
-@@ -49,7 +49,7 @@ void tk_debug_account_sleep_time(const struct timespec64 *t)
- 	int bin = min(fls(t->tv_sec), NUM_BINS-1);
  
- 	sleep_time_bin[bin]++;
--	pm_deferred_pr_dbg("Timekeeping suspended for %lld.%03lu seconds\n",
-+	pm_pr_dbg("Timekeeping suspended for %lld.%03lu seconds\n",
- 			   (s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC);
+ 	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
+diff --git a/kernel/time/timer.c b/kernel/time/timer.c
+index 85f1021ad459..beb4b1cc7c48 100644
+--- a/kernel/time/timer.c
++++ b/kernel/time/timer.c
+@@ -1766,7 +1766,7 @@ static void run_local_timers(void)
+ 		if (time_before(jiffies, base->next_expiry))
+ 			return;
+ 	}
+-	raise_softirq(TIMER_SOFTIRQ);
++	raise_timer_softirq();
  }
  
+ /*
 diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
-index 18db461f77cd..547de22e8942 100644
+index 78ea542ce3bc..52fd4bbc36a4 100644
 --- a/kernel/trace/trace.c
 +++ b/kernel/trace/trace.c
-@@ -2630,7 +2630,13 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
+@@ -2606,7 +2606,13 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
  		trace_flags |= TRACE_FLAG_NEED_RESCHED;
  	if (test_preempt_need_resched())
  		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
@@ -8453,7 +7425,7 @@ index 18db461f77cd..547de22e8942 100644
  		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
  }
  
-@@ -4206,15 +4212,17 @@ unsigned long trace_total_entries(struct trace_array *tr)
+@@ -4182,15 +4188,17 @@ unsigned long trace_total_entries(struct trace_array *tr)
  
  static void print_lat_help_header(struct seq_file *m)
  {
@@ -8480,7 +7452,7 @@ index 18db461f77cd..547de22e8942 100644
  }
  
  static void print_event_info(struct array_buffer *buf, struct seq_file *m)
-@@ -4248,14 +4256,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file
+@@ -4224,14 +4232,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file
  
  	print_event_info(buf, m);
  
@@ -8506,7 +7478,7 @@ index 18db461f77cd..547de22e8942 100644
  
  void
 diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
-index 44d031ffe511..01165b0ed6aa 100644
+index 92be9cb1d7d4..b900902bf1b6 100644
 --- a/kernel/trace/trace_events.c
 +++ b/kernel/trace/trace_events.c
 @@ -184,6 +184,7 @@ static int trace_define_common_fields(void)
@@ -8518,10 +7490,10 @@ index 44d031ffe511..01165b0ed6aa 100644
  	return ret;
  }
 diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
-index c2ca40e8595b..be070d258c3b 100644
+index 3547e7176ff7..2745a023173a 100644
 --- a/kernel/trace/trace_output.c
 +++ b/kernel/trace/trace_output.c
-@@ -451,6 +451,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
+@@ -442,6 +442,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
  {
  	char hardsoft_irq;
  	char need_resched;
@@ -8529,7 +7501,7 @@ index c2ca40e8595b..be070d258c3b 100644
  	char irqs_off;
  	int hardirq;
  	int softirq;
-@@ -481,6 +482,9 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
+@@ -472,6 +473,9 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
  		break;
  	}
  
@@ -8539,7 +7511,7 @@ index c2ca40e8595b..be070d258c3b 100644
  	hardsoft_irq =
  		(nmi && hardirq)     ? 'Z' :
  		nmi                  ? 'z' :
-@@ -489,14 +493,20 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
+@@ -480,14 +484,20 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
  		softirq              ? 's' :
  		                       '.' ;
  
@@ -8562,50 +7534,8 @@ index c2ca40e8595b..be070d258c3b 100644
  	if (entry->preempt_count & 0xf0)
  		trace_seq_printf(s, "%x", entry->preempt_count >> 4);
  	else
-diff --git a/kernel/workqueue.c b/kernel/workqueue.c
-index 76988f39ed5a..86b6c5a9b274 100644
---- a/kernel/workqueue.c
-+++ b/kernel/workqueue.c
-@@ -4836,9 +4836,7 @@ void show_workqueue_state(void)
- 				 * drivers that queue work while holding locks
- 				 * also taken in their write paths.
- 				 */
--				printk_deferred_enter();
- 				show_pwq(pwq);
--				printk_deferred_exit();
- 			}
- 			raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
- 			/*
-@@ -4862,7 +4860,6 @@ void show_workqueue_state(void)
- 		 * queue work while holding locks also taken in their write
- 		 * paths.
- 		 */
--		printk_deferred_enter();
- 		pr_info("pool %d:", pool->id);
- 		pr_cont_pool_info(pool);
- 		pr_cont(" hung=%us workers=%d",
-@@ -4877,7 +4874,6 @@ void show_workqueue_state(void)
- 			first = false;
- 		}
- 		pr_cont("\n");
--		printk_deferred_exit();
- 	next_pool:
- 		raw_spin_unlock_irqrestore(&pool->lock, flags);
- 		/*
-diff --git a/lib/bug.c b/lib/bug.c
-index 45a0584f6541..03a87df69ed2 100644
---- a/lib/bug.c
-+++ b/lib/bug.c
-@@ -206,6 +206,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
- 	else
- 		pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n",
- 			(void *)bugaddr);
-+	pr_flush(1000, true);
- 
- 	return BUG_TRAP_TYPE_BUG;
- }
 diff --git a/lib/dump_stack.c b/lib/dump_stack.c
-index 6b7f1bf6715d..6e8ae42c7e27 100644
+index 6b7f1bf6715d..83471e81501a 100644
 --- a/lib/dump_stack.c
 +++ b/lib/dump_stack.c
 @@ -102,9 +102,9 @@ asmlinkage __visible void dump_stack_lvl(const char *log_lvl)
@@ -8613,10 +7543,10 @@ index 6b7f1bf6715d..6e8ae42c7e27 100644
  	 * against other CPUs
  	 */
 -	printk_cpu_lock_irqsave(flags);
-+	raw_printk_cpu_lock_irqsave(flags);
++	printk_cpu_sync_get_irqsave(flags);
  	__dump_stack(log_lvl);
 -	printk_cpu_unlock_irqrestore(flags);
-+	raw_printk_cpu_unlock_irqrestore(flags);
++	printk_cpu_sync_put_irqrestore(flags);
  }
  EXPORT_SYMBOL(dump_stack_lvl);
  
@@ -8639,7 +7569,7 @@ index 2f17b488d58e..2b9f797642f6 100644
  	return 0;
  }
 diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
-index 161108e5d2fe..1266ea3726d7 100644
+index 71652e1c397c..8d24279fad05 100644
 --- a/lib/locking-selftest.c
 +++ b/lib/locking-selftest.c
 @@ -26,6 +26,12 @@
@@ -8907,18 +7837,20 @@ index 161108e5d2fe..1266ea3726d7 100644
  
  #define DO_TESTCASE_2x3(desc, name)				\
  	DO_TESTCASE_3(desc, name, 12);				\
-@@ -1651,6 +1700,20 @@ static void ww_test_fail_acquire(void)
+@@ -1651,6 +1700,22 @@ static void ww_test_fail_acquire(void)
  #endif
  }
  
 +#ifdef CONFIG_PREEMPT_RT
 +#define ww_mutex_base_lock(b)			rt_mutex_lock(b)
++#define ww_mutex_base_trylock(b)		rt_mutex_trylock(b)
 +#define ww_mutex_base_lock_nest_lock(b, b2)	rt_mutex_lock_nest_lock(b, b2)
 +#define ww_mutex_base_lock_interruptible(b)	rt_mutex_lock_interruptible(b)
 +#define ww_mutex_base_lock_killable(b)		rt_mutex_lock_killable(b)
 +#define ww_mutex_base_unlock(b)			rt_mutex_unlock(b)
 +#else
 +#define ww_mutex_base_lock(b)			mutex_lock(b)
++#define ww_mutex_base_trylock(b)		mutex_trylock(b)
 +#define ww_mutex_base_lock_nest_lock(b, b2)	mutex_lock_nest_lock(b, b2)
 +#define ww_mutex_base_lock_interruptible(b)	mutex_lock_interruptible(b)
 +#define ww_mutex_base_lock_killable(b)		mutex_lock_killable(b)
@@ -8928,7 +7860,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  static void ww_test_normal(void)
  {
  	int ret;
-@@ -1665,50 +1728,50 @@ static void ww_test_normal(void)
+@@ -1665,50 +1730,50 @@ static void ww_test_normal(void)
  
  	/* mutex_lock (and indirectly, mutex_lock_nested) */
  	o.ctx = (void *)~0UL;
@@ -8992,7 +7924,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  	WARN_ON(o.ctx != (void *)~0UL);
  }
  
-@@ -1721,7 +1784,7 @@ static void ww_test_two_contexts(void)
+@@ -1721,7 +1786,7 @@ static void ww_test_two_contexts(void)
  static void ww_test_diff_class(void)
  {
  	WWAI(&t);
@@ -9001,7 +7933,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  	t.ww_class = NULL;
  #endif
  	WWL(&o, &t);
-@@ -1785,7 +1848,7 @@ static void ww_test_edeadlk_normal(void)
+@@ -1785,7 +1850,7 @@ static void ww_test_edeadlk_normal(void)
  {
  	int ret;
  
@@ -9010,7 +7942,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  	o2.ctx = &t2;
  	mutex_release(&o2.base.dep_map, _THIS_IP_);
  
-@@ -1801,7 +1864,7 @@ static void ww_test_edeadlk_normal(void)
+@@ -1801,7 +1866,7 @@ static void ww_test_edeadlk_normal(void)
  
  	o2.ctx = NULL;
  	mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
@@ -9019,7 +7951,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  	WWU(&o);
  
  	WWL(&o2, &t);
-@@ -1811,7 +1874,7 @@ static void ww_test_edeadlk_normal_slow(void)
+@@ -1811,7 +1876,7 @@ static void ww_test_edeadlk_normal_slow(void)
  {
  	int ret;
  
@@ -9028,7 +7960,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  	mutex_release(&o2.base.dep_map, _THIS_IP_);
  	o2.ctx = &t2;
  
-@@ -1827,7 +1890,7 @@ static void ww_test_edeadlk_normal_slow(void)
+@@ -1827,7 +1892,7 @@ static void ww_test_edeadlk_normal_slow(void)
  
  	o2.ctx = NULL;
  	mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
@@ -9037,7 +7969,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  	WWU(&o);
  
  	ww_mutex_lock_slow(&o2, &t);
-@@ -1837,7 +1900,7 @@ static void ww_test_edeadlk_no_unlock(void)
+@@ -1837,7 +1902,7 @@ static void ww_test_edeadlk_no_unlock(void)
  {
  	int ret;
  
@@ -9046,7 +7978,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  	o2.ctx = &t2;
  	mutex_release(&o2.base.dep_map, _THIS_IP_);
  
-@@ -1853,7 +1916,7 @@ static void ww_test_edeadlk_no_unlock(void)
+@@ -1853,7 +1918,7 @@ static void ww_test_edeadlk_no_unlock(void)
  
  	o2.ctx = NULL;
  	mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
@@ -9055,7 +7987,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  
  	WWL(&o2, &t);
  }
-@@ -1862,7 +1925,7 @@ static void ww_test_edeadlk_no_unlock_slow(void)
+@@ -1862,7 +1927,7 @@ static void ww_test_edeadlk_no_unlock_slow(void)
  {
  	int ret;
  
@@ -9064,7 +7996,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  	mutex_release(&o2.base.dep_map, _THIS_IP_);
  	o2.ctx = &t2;
  
-@@ -1878,7 +1941,7 @@ static void ww_test_edeadlk_no_unlock_slow(void)
+@@ -1878,7 +1943,7 @@ static void ww_test_edeadlk_no_unlock_slow(void)
  
  	o2.ctx = NULL;
  	mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
@@ -9073,7 +8005,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  
  	ww_mutex_lock_slow(&o2, &t);
  }
-@@ -1887,7 +1950,7 @@ static void ww_test_edeadlk_acquire_more(void)
+@@ -1887,7 +1952,7 @@ static void ww_test_edeadlk_acquire_more(void)
  {
  	int ret;
  
@@ -9082,7 +8014,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  	mutex_release(&o2.base.dep_map, _THIS_IP_);
  	o2.ctx = &t2;
  
-@@ -1908,7 +1971,7 @@ static void ww_test_edeadlk_acquire_more_slow(void)
+@@ -1908,7 +1973,7 @@ static void ww_test_edeadlk_acquire_more_slow(void)
  {
  	int ret;
  
@@ -9091,7 +8023,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  	mutex_release(&o2.base.dep_map, _THIS_IP_);
  	o2.ctx = &t2;
  
-@@ -1929,11 +1992,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk(void)
+@@ -1929,11 +1994,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk(void)
  {
  	int ret;
  
@@ -9105,7 +8037,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  	mutex_release(&o3.base.dep_map, _THIS_IP_);
  	o3.ctx = &t2;
  
-@@ -1955,11 +2018,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk_slow(void)
+@@ -1955,11 +2020,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk_slow(void)
  {
  	int ret;
  
@@ -9119,7 +8051,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  	mutex_release(&o3.base.dep_map, _THIS_IP_);
  	o3.ctx = &t2;
  
-@@ -1980,7 +2043,7 @@ static void ww_test_edeadlk_acquire_wrong(void)
+@@ -1980,7 +2045,7 @@ static void ww_test_edeadlk_acquire_wrong(void)
  {
  	int ret;
  
@@ -9128,7 +8060,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  	mutex_release(&o2.base.dep_map, _THIS_IP_);
  	o2.ctx = &t2;
  
-@@ -2005,7 +2068,7 @@ static void ww_test_edeadlk_acquire_wrong_slow(void)
+@@ -2005,7 +2070,7 @@ static void ww_test_edeadlk_acquire_wrong_slow(void)
  {
  	int ret;
  
@@ -9137,7 +8069,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  	mutex_release(&o2.base.dep_map, _THIS_IP_);
  	o2.ctx = &t2;
  
-@@ -2646,8 +2709,8 @@ static void wait_context_tests(void)
+@@ -2646,8 +2711,8 @@ static void wait_context_tests(void)
  
  static void local_lock_2(void)
  {
@@ -9148,7 +8080,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  
  	HARDIRQ_ENTER();
  	spin_lock(&lock_A);		/* IN-IRQ */
-@@ -2656,18 +2719,18 @@ static void local_lock_2(void)
+@@ -2656,18 +2721,18 @@ static void local_lock_2(void)
  
  	HARDIRQ_DISABLE();
  	spin_lock(&lock_A);
@@ -9171,7 +8103,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  
  	HARDIRQ_ENTER();
  	spin_lock(&lock_A);		/* IN-IRQ */
-@@ -2676,18 +2739,18 @@ static void local_lock_3A(void)
+@@ -2676,18 +2741,18 @@ static void local_lock_3A(void)
  
  	HARDIRQ_DISABLE();
  	spin_lock(&lock_A);
@@ -9194,7 +8126,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  
  	HARDIRQ_ENTER();
  	spin_lock(&lock_A);		/* IN-IRQ */
-@@ -2696,8 +2759,8 @@ static void local_lock_3B(void)
+@@ -2696,8 +2761,8 @@ static void local_lock_3B(void)
  
  	HARDIRQ_DISABLE();
  	spin_lock(&lock_A);
@@ -9205,7 +8137,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  	spin_unlock(&lock_A);
  	HARDIRQ_ENABLE();
  
-@@ -2812,7 +2875,7 @@ void locking_selftest(void)
+@@ -2812,7 +2877,7 @@ void locking_selftest(void)
  	printk("------------------------\n");
  	printk("| Locking API testsuite:\n");
  	printk("----------------------------------------------------------------------------\n");
@@ -9214,7 +8146,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  	printk("  --------------------------------------------------------------------------\n");
  
  	init_shared_classes();
-@@ -2885,12 +2948,11 @@ void locking_selftest(void)
+@@ -2885,12 +2950,11 @@ void locking_selftest(void)
  	DO_TESTCASE_6x1RR("rlock W1R2/R2R3/W3W1", W1R2_R2R3_W3W1);
  
  	printk("  --------------------------------------------------------------------------\n");
@@ -9229,7 +8161,7 @@ index 161108e5d2fe..1266ea3726d7 100644
  	DO_TESTCASE_6x6("safe-A + unsafe-B #1", irqsafe3);
  	DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4);
 diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
-index 199ab201d501..06410209197a 100644
+index 199ab201d501..d01aec6ae15c 100644
 --- a/lib/nmi_backtrace.c
 +++ b/lib/nmi_backtrace.c
 @@ -99,7 +99,7 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)
@@ -9237,7 +8169,7 @@ index 199ab201d501..06410209197a 100644
  		 * against other CPUs.
  		 */
 -		printk_cpu_lock_irqsave(flags);
-+		raw_printk_cpu_lock_irqsave(flags);
++		printk_cpu_sync_get_irqsave(flags);
  		if (!READ_ONCE(backtrace_idle) && regs && cpu_in_idle(instruction_pointer(regs))) {
  			pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n",
  				cpu, (void *)instruction_pointer(regs));
@@ -9246,115 +8178,1650 @@ index 199ab201d501..06410209197a 100644
  				dump_stack();
  		}
 -		printk_cpu_unlock_irqrestore(flags);
-+		raw_printk_cpu_unlock_irqrestore(flags);
++		printk_cpu_sync_put_irqrestore(flags);
  		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
  		return true;
  	}
-diff --git a/lib/ratelimit.c b/lib/ratelimit.c
-index e01a93f46f83..524cf65dce53 100644
---- a/lib/ratelimit.c
-+++ b/lib/ratelimit.c
-@@ -47,9 +47,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
- 	if (time_is_before_jiffies(rs->begin + rs->interval)) {
- 		if (rs->missed) {
- 			if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
--				printk_deferred(KERN_WARNING
--						"%s: %d callbacks suppressed\n",
--						func, rs->missed);
-+				pr_warn("%s: %d callbacks suppressed\n", func, rs->missed);
- 				rs->missed = 0;
- 			}
- 		}
-diff --git a/lib/scatterlist.c b/lib/scatterlist.c
-index abb3432ed744..d5e82e4a57ad 100644
---- a/lib/scatterlist.c
-+++ b/lib/scatterlist.c
-@@ -828,8 +828,7 @@ static bool sg_miter_get_next_page(struct sg_mapping_iter *miter)
-  *   stops @miter.
-  *
-  * Context:
-- *   Don't care if @miter is stopped, or not proceeded yet.
-- *   Otherwise, preemption disabled if the SG_MITER_ATOMIC is set.
-+ *   Don't care.
-  *
-  * Returns:
-  *   true if @miter contains the valid mapping.  false if end of sg
-@@ -865,8 +864,7 @@ EXPORT_SYMBOL(sg_miter_skip);
-  *   @miter->addr and @miter->length point to the current mapping.
-  *
-  * Context:
-- *   Preemption disabled if SG_MITER_ATOMIC.  Preemption must stay disabled
-- *   till @miter is stopped.  May sleep if !SG_MITER_ATOMIC.
-+ *   May sleep if !SG_MITER_ATOMIC.
-  *
-  * Returns:
-  *   true if @miter contains the next mapping.  false if end of sg
-@@ -906,8 +904,7 @@ EXPORT_SYMBOL(sg_miter_next);
-  *   need to be released during iteration.
-  *
-  * Context:
-- *   Preemption disabled if the SG_MITER_ATOMIC is set.  Don't care
-- *   otherwise.
-+ *   Don't care otherwise.
-  */
- void sg_miter_stop(struct sg_mapping_iter *miter)
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c
+index 2ed5f2a0879d..eb6873f43ef5 100644
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -169,7 +169,6 @@ struct mem_cgroup_event {
+ 	struct work_struct remove;
+ };
+ 
+-static void mem_cgroup_threshold(struct mem_cgroup *memcg);
+ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
+ 
+ /* Stuffs for move charges at task migration. */
+@@ -261,8 +260,10 @@ bool mem_cgroup_kmem_disabled(void)
+ 	return cgroup_memory_nokmem;
+ }
+ 
++struct memcg_stock_pcp;
+ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
+-				      unsigned int nr_pages);
++				      unsigned int nr_pages,
++				      bool stock_lock_acquried);
+ 
+ static void obj_cgroup_release(struct percpu_ref *ref)
  {
-@@ -922,7 +919,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
- 			flush_dcache_page(miter->page);
+@@ -296,7 +297,7 @@ static void obj_cgroup_release(struct percpu_ref *ref)
+ 	nr_pages = nr_bytes >> PAGE_SHIFT;
  
- 		if (miter->__flags & SG_MITER_ATOMIC) {
--			WARN_ON_ONCE(preemptible());
-+			WARN_ON_ONCE(!pagefault_disabled());
- 			kunmap_atomic(miter->addr);
- 		} else
- 			kunmap(miter->page);
-diff --git a/mm/Kconfig b/mm/Kconfig
-index c048dea7e342..88778414465b 100644
---- a/mm/Kconfig
-+++ b/mm/Kconfig
-@@ -371,7 +371,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
- 
- config TRANSPARENT_HUGEPAGE
- 	bool "Transparent Hugepage Support"
--	depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
-+	depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT
- 	select COMPACTION
- 	select XARRAY_MULTI
- 	help
-diff --git a/mm/memory.c b/mm/memory.c
-index c52be6d6b605..e2c623027e32 100644
---- a/mm/memory.c
-+++ b/mm/memory.c
-@@ -5265,7 +5265,7 @@ void __might_fault(const char *file, int line)
+ 	if (nr_pages)
+-		obj_cgroup_uncharge_pages(objcg, nr_pages);
++		obj_cgroup_uncharge_pages(objcg, nr_pages, false);
+ 
+ 	spin_lock_irqsave(&css_set_lock, flags);
+ 	list_del(&objcg->list);
+@@ -521,43 +522,6 @@ static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
+ 	return excess;
+ }
+ 
+-static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid)
+-{
+-	unsigned long excess;
+-	struct mem_cgroup_per_node *mz;
+-	struct mem_cgroup_tree_per_node *mctz;
+-
+-	mctz = soft_limit_tree.rb_tree_per_node[nid];
+-	if (!mctz)
+-		return;
+-	/*
+-	 * Necessary to update all ancestors when hierarchy is used.
+-	 * because their event counter is not touched.
+-	 */
+-	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
+-		mz = memcg->nodeinfo[nid];
+-		excess = soft_limit_excess(memcg);
+-		/*
+-		 * We have to update the tree if mz is on RB-tree or
+-		 * mem is over its softlimit.
+-		 */
+-		if (excess || mz->on_tree) {
+-			unsigned long flags;
+-
+-			spin_lock_irqsave(&mctz->lock, flags);
+-			/* if on-tree, remove it */
+-			if (mz->on_tree)
+-				__mem_cgroup_remove_exceeded(mz, mctz);
+-			/*
+-			 * Insert again. mz->usage_in_excess will be updated.
+-			 * If excess is 0, no tree ops.
+-			 */
+-			__mem_cgroup_insert_exceeded(mz, mctz, excess);
+-			spin_unlock_irqrestore(&mctz->lock, flags);
+-		}
+-	}
+-}
+-
+ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
+ {
+ 	struct mem_cgroup_tree_per_node *mctz;
+@@ -699,6 +663,8 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+ 	memcg = pn->memcg;
+ 
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_disable();
+ 	/* Update memcg */
+ 	__this_cpu_add(memcg->vmstats_percpu->state[idx], val);
+ 
+@@ -706,6 +672,8 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ 	__this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
+ 
+ 	memcg_rstat_updated(memcg);
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_enable();
+ }
+ 
+ /**
+@@ -788,8 +756,12 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
+ 	if (mem_cgroup_disabled())
  		return;
- 	if (pagefault_disabled())
+ 
++	if (IS_ENABLED(PREEMPT_RT))
++		preempt_disable();
+ 	__this_cpu_add(memcg->vmstats_percpu->events[idx], count);
+ 	memcg_rstat_updated(memcg);
++	if (IS_ENABLED(PREEMPT_RT))
++		preempt_enable();
+ }
+ 
+ static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
+@@ -821,50 +793,6 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
+ 	__this_cpu_add(memcg->vmstats_percpu->nr_page_events, nr_pages);
+ }
+ 
+-static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
+-				       enum mem_cgroup_events_target target)
+-{
+-	unsigned long val, next;
+-
+-	val = __this_cpu_read(memcg->vmstats_percpu->nr_page_events);
+-	next = __this_cpu_read(memcg->vmstats_percpu->targets[target]);
+-	/* from time_after() in jiffies.h */
+-	if ((long)(next - val) < 0) {
+-		switch (target) {
+-		case MEM_CGROUP_TARGET_THRESH:
+-			next = val + THRESHOLDS_EVENTS_TARGET;
+-			break;
+-		case MEM_CGROUP_TARGET_SOFTLIMIT:
+-			next = val + SOFTLIMIT_EVENTS_TARGET;
+-			break;
+-		default:
+-			break;
+-		}
+-		__this_cpu_write(memcg->vmstats_percpu->targets[target], next);
+-		return true;
+-	}
+-	return false;
+-}
+-
+-/*
+- * Check events in order.
+- *
+- */
+-static void memcg_check_events(struct mem_cgroup *memcg, int nid)
+-{
+-	/* threshold event is triggered in finer grain than soft limit */
+-	if (unlikely(mem_cgroup_event_ratelimit(memcg,
+-						MEM_CGROUP_TARGET_THRESH))) {
+-		bool do_softlimit;
+-
+-		do_softlimit = mem_cgroup_event_ratelimit(memcg,
+-						MEM_CGROUP_TARGET_SOFTLIMIT);
+-		mem_cgroup_threshold(memcg);
+-		if (unlikely(do_softlimit))
+-			mem_cgroup_update_tree(memcg, nid);
+-	}
+-}
+-
+ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
+ {
+ 	/*
+@@ -2091,26 +2019,40 @@ struct obj_stock {
+ };
+ 
+ struct memcg_stock_pcp {
++	/* Protects memcg_stock_pcp */
++	local_lock_t stock_lock;
+ 	struct mem_cgroup *cached; /* this never be root cgroup */
+ 	unsigned int nr_pages;
++#ifndef CONFIG_PREEMPTION
++	/* Protects only task_obj */
++	local_lock_t task_obj_lock;
+ 	struct obj_stock task_obj;
++#endif
+ 	struct obj_stock irq_obj;
+ 
+ 	struct work_struct work;
+ 	unsigned long flags;
+ #define FLUSHING_CACHED_CHARGE	0
+ };
+-static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
++static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock) = {
++	.stock_lock = INIT_LOCAL_LOCK(stock_lock),
++#ifndef CONFIG_PREEMPTION
++	.task_obj_lock = INIT_LOCAL_LOCK(task_obj_lock),
++#endif
++};
+ static DEFINE_MUTEX(percpu_charge_mutex);
+ 
+ #ifdef CONFIG_MEMCG_KMEM
+-static void drain_obj_stock(struct obj_stock *stock);
++static struct obj_cgroup *drain_obj_stock(struct obj_stock *stock,
++					  bool stock_lock_acquried);
+ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
+ 				     struct mem_cgroup *root_memcg);
+ 
+ #else
+-static inline void drain_obj_stock(struct obj_stock *stock)
++static inline struct obj_cgroup *drain_obj_stock(struct obj_stock *stock,
++						 bool stock_lock_acquried)
+ {
++	return NULL;
+ }
+ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
+ 				     struct mem_cgroup *root_memcg)
+@@ -2139,7 +2081,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+ 	if (nr_pages > MEMCG_CHARGE_BATCH)
+ 		return ret;
+ 
+-	local_irq_save(flags);
++	local_lock_irqsave(&memcg_stock.stock_lock, flags);
+ 
+ 	stock = this_cpu_ptr(&memcg_stock);
+ 	if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
+@@ -2147,7 +2089,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+ 		ret = true;
+ 	}
+ 
+-	local_irq_restore(flags);
++	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ 
+ 	return ret;
+ }
+@@ -2175,38 +2117,43 @@ static void drain_stock(struct memcg_stock_pcp *stock)
+ 
+ static void drain_local_stock(struct work_struct *dummy)
+ {
+-	struct memcg_stock_pcp *stock;
+-	unsigned long flags;
++	struct memcg_stock_pcp *stock_pcp;
++	struct obj_cgroup *old;
+ 
+ 	/*
+ 	 * The only protection from cpu hotplug (memcg_hotplug_cpu_dead) vs.
+ 	 * drain_stock races is that we always operate on local CPU stock
+ 	 * here with IRQ disabled
+ 	 */
+-	local_irq_save(flags);
++#ifndef CONFIG_PREEMPTION
++	local_lock(&memcg_stock.task_obj_lock);
++	old = drain_obj_stock(&this_cpu_ptr(&memcg_stock)->task_obj, NULL);
++	local_unlock(&memcg_stock.task_obj_lock);
++	if (old)
++		obj_cgroup_put(old);
++#endif
+ 
+-	stock = this_cpu_ptr(&memcg_stock);
+-	drain_obj_stock(&stock->irq_obj);
+-	if (in_task())
+-		drain_obj_stock(&stock->task_obj);
+-	drain_stock(stock);
+-	clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
++	local_lock_irq(&memcg_stock.stock_lock);
++	stock_pcp = this_cpu_ptr(&memcg_stock);
++	old = drain_obj_stock(&stock_pcp->irq_obj, stock_pcp);
+ 
+-	local_irq_restore(flags);
++	drain_stock(stock_pcp);
++	clear_bit(FLUSHING_CACHED_CHARGE, &stock_pcp->flags);
++
++	local_unlock_irq(&memcg_stock.stock_lock);
++	if (old)
++		obj_cgroup_put(old);
+ }
+ 
+ /*
+  * Cache charges(val) to local per_cpu area.
+  * This will be consumed by consume_stock() function, later.
+  */
+-static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
++static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+ {
+-	struct memcg_stock_pcp *stock;
+-	unsigned long flags;
+-
+-	local_irq_save(flags);
++	struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock);
+ 
+-	stock = this_cpu_ptr(&memcg_stock);
++	lockdep_assert_held(&stock->stock_lock);
+ 	if (stock->cached != memcg) { /* reset if necessary */
+ 		drain_stock(stock);
+ 		css_get(&memcg->css);
+@@ -2216,8 +2163,20 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+ 
+ 	if (stock->nr_pages > MEMCG_CHARGE_BATCH)
+ 		drain_stock(stock);
++}
+ 
+-	local_irq_restore(flags);
++static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages,
++			 bool stock_lock_acquried)
++{
++	unsigned long flags;
++
++	if (stock_lock_acquried) {
++		__refill_stock(memcg, nr_pages);
++		return;
++	}
++	local_lock_irqsave(&memcg_stock.stock_lock, flags);
++	__refill_stock(memcg, nr_pages);
++	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ }
+ 
+ /*
+@@ -2226,7 +2185,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+  */
+ static void drain_all_stock(struct mem_cgroup *root_memcg)
+ {
+-	int cpu, curcpu;
++	int cpu;
+ 
+ 	/* If someone's already draining, avoid adding running more workers. */
+ 	if (!mutex_trylock(&percpu_charge_mutex))
+@@ -2237,7 +2196,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
+ 	 * as well as workers from this path always operate on the local
+ 	 * per-cpu data. CPU up doesn't touch memcg_stock at all.
+ 	 */
+-	curcpu = get_cpu();
++	cpus_read_lock();
+ 	for_each_online_cpu(cpu) {
+ 		struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
+ 		struct mem_cgroup *memcg;
+@@ -2253,14 +2212,10 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
+ 		rcu_read_unlock();
+ 
+ 		if (flush &&
+-		    !test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
+-			if (cpu == curcpu)
+-				drain_local_stock(&stock->work);
+-			else
+-				schedule_work_on(cpu, &stock->work);
+-		}
++		    !test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags))
++			schedule_work_on(cpu, &stock->work);
+ 	}
+-	put_cpu();
++	cpus_read_unlock();
+ 	mutex_unlock(&percpu_charge_mutex);
+ }
+ 
+@@ -2661,7 +2616,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
+ 
+ done_restock:
+ 	if (batch > nr_pages)
+-		refill_stock(memcg, batch - nr_pages);
++		refill_stock(memcg, batch - nr_pages, false);
+ 
+ 	/*
+ 	 * If the hierarchy is above the normal consumption range, schedule
+@@ -2774,28 +2729,36 @@ static struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg)
+  * can only be accessed after disabling interrupt. User context code can
+  * access interrupt object stock, but not vice versa.
+  */
+-static inline struct obj_stock *get_obj_stock(unsigned long *pflags)
++static inline struct obj_stock *get_obj_stock(unsigned long *pflags,
++					      bool *stock_lock_acquried)
+ {
+ 	struct memcg_stock_pcp *stock;
+ 
++#ifndef CONFIG_PREEMPTION
+ 	if (likely(in_task())) {
+ 		*pflags = 0UL;
+-		preempt_disable();
++		*stock_lock_acquried = false;
++		local_lock(&memcg_stock.task_obj_lock);
+ 		stock = this_cpu_ptr(&memcg_stock);
+ 		return &stock->task_obj;
+ 	}
+-
+-	local_irq_save(*pflags);
++#endif
++	*stock_lock_acquried = true;
++	local_lock_irqsave(&memcg_stock.stock_lock, *pflags);
+ 	stock = this_cpu_ptr(&memcg_stock);
+ 	return &stock->irq_obj;
+ }
+ 
+-static inline void put_obj_stock(unsigned long flags)
++static inline void put_obj_stock(unsigned long flags,
++				 bool stock_lock_acquried)
+ {
+-	if (likely(in_task()))
+-		preempt_enable();
+-	else
+-		local_irq_restore(flags);
++#ifndef CONFIG_PREEMPTION
++	if (likely(!stock_lock_acquried)) {
++		local_unlock(&memcg_stock.task_obj_lock);
++		return;
++	}
++#endif
++	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ }
+ 
+ /*
+@@ -2973,7 +2936,8 @@ static void memcg_free_cache_id(int id)
+  * @nr_pages: number of pages to uncharge
+  */
+ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
+-				      unsigned int nr_pages)
++				      unsigned int nr_pages,
++				      bool stock_lock_acquried)
+ {
+ 	struct mem_cgroup *memcg;
+ 
+@@ -2981,7 +2945,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
+ 
+ 	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ 		page_counter_uncharge(&memcg->kmem, nr_pages);
+-	refill_stock(memcg, nr_pages);
++	refill_stock(memcg, nr_pages, stock_lock_acquried);
+ 
+ 	css_put(&memcg->css);
+ }
+@@ -3055,7 +3019,7 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
  		return;
--	__might_sleep(file, line, 0);
-+	__might_sleep(file, line);
- #if defined(CONFIG_DEBUG_ATOMIC_SLEEP)
- 	if (current->mm)
- 		might_lock_read(&current->mm->mmap_lock);
-diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index 23d3339ac4e8..e71b9634a321 100644
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -3149,9 +3149,9 @@ static void drain_local_pages_wq(struct work_struct *work)
- 	 * cpu which is alright but we also have to make sure to not move to
- 	 * a different one.
+ 
+ 	objcg = __folio_objcg(folio);
+-	obj_cgroup_uncharge_pages(objcg, nr_pages);
++	obj_cgroup_uncharge_pages(objcg, nr_pages, false);
+ 	folio->memcg_data = 0;
+ 	obj_cgroup_put(objcg);
+ }
+@@ -3063,17 +3027,21 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
+ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
+ 		     enum node_stat_item idx, int nr)
+ {
++	bool stock_lock_acquried;
+ 	unsigned long flags;
+-	struct obj_stock *stock = get_obj_stock(&flags);
++	struct obj_cgroup *old = NULL;
++	struct obj_stock *stock;
+ 	int *bytes;
+ 
++	stock = get_obj_stock(&flags, &stock_lock_acquried);
+ 	/*
+ 	 * Save vmstat data in stock and skip vmstat array update unless
+ 	 * accumulating over a page of vmstat data or when pgdat or idx
+ 	 * changes.
  	 */
--	preempt_disable();
-+	migrate_disable();
- 	drain_local_pages(drain->zone);
--	preempt_enable();
-+	migrate_enable();
+ 	if (stock->cached_objcg != objcg) {
+-		drain_obj_stock(stock);
++		old = drain_obj_stock(stock, stock_lock_acquried);
++
+ 		obj_cgroup_get(objcg);
+ 		stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
+ 				? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0;
+@@ -3117,38 +3085,43 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
+ 	if (nr)
+ 		mod_objcg_mlstate(objcg, pgdat, idx, nr);
+ 
+-	put_obj_stock(flags);
++	put_obj_stock(flags, stock_lock_acquried);
++	if (old)
++		obj_cgroup_put(old);
+ }
+ 
+ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
+ {
++	bool stock_lock_acquried;
+ 	unsigned long flags;
+-	struct obj_stock *stock = get_obj_stock(&flags);
++	struct obj_stock *stock;
+ 	bool ret = false;
+ 
++	stock = get_obj_stock(&flags, &stock_lock_acquried);
+ 	if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
+ 		stock->nr_bytes -= nr_bytes;
+ 		ret = true;
+ 	}
+ 
+-	put_obj_stock(flags);
++	put_obj_stock(flags, stock_lock_acquried);
+ 
+ 	return ret;
+ }
+ 
+-static void drain_obj_stock(struct obj_stock *stock)
++static struct obj_cgroup *drain_obj_stock(struct obj_stock *stock,
++					  bool stock_lock_acquried)
+ {
+ 	struct obj_cgroup *old = stock->cached_objcg;
+ 
+ 	if (!old)
+-		return;
++		return NULL;
+ 
+ 	if (stock->nr_bytes) {
+ 		unsigned int nr_pages = stock->nr_bytes >> PAGE_SHIFT;
+ 		unsigned int nr_bytes = stock->nr_bytes & (PAGE_SIZE - 1);
+ 
+ 		if (nr_pages)
+-			obj_cgroup_uncharge_pages(old, nr_pages);
++			obj_cgroup_uncharge_pages(old, nr_pages, stock_lock_acquried);
+ 
+ 		/*
+ 		 * The leftover is flushed to the centralized per-memcg value.
+@@ -3183,8 +3156,8 @@ static void drain_obj_stock(struct obj_stock *stock)
+ 		stock->cached_pgdat = NULL;
+ 	}
+ 
+-	obj_cgroup_put(old);
+ 	stock->cached_objcg = NULL;
++	return old;
+ }
+ 
+ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
+@@ -3192,11 +3165,13 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
+ {
+ 	struct mem_cgroup *memcg;
+ 
++#ifndef CONFIG_PREEMPTION
+ 	if (in_task() && stock->task_obj.cached_objcg) {
+ 		memcg = obj_cgroup_memcg(stock->task_obj.cached_objcg);
+ 		if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
+ 			return true;
+ 	}
++#endif
+ 	if (stock->irq_obj.cached_objcg) {
+ 		memcg = obj_cgroup_memcg(stock->irq_obj.cached_objcg);
+ 		if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
+@@ -3209,12 +3184,15 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
+ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
+ 			     bool allow_uncharge)
+ {
++	bool stock_lock_acquried;
+ 	unsigned long flags;
+-	struct obj_stock *stock = get_obj_stock(&flags);
++	struct obj_stock *stock;
+ 	unsigned int nr_pages = 0;
++	struct obj_cgroup *old = NULL;
+ 
++	stock = get_obj_stock(&flags, &stock_lock_acquried);
+ 	if (stock->cached_objcg != objcg) { /* reset if necessary */
+-		drain_obj_stock(stock);
++		old = drain_obj_stock(stock, stock_lock_acquried);
+ 		obj_cgroup_get(objcg);
+ 		stock->cached_objcg = objcg;
+ 		stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
+@@ -3228,10 +3206,12 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
+ 		stock->nr_bytes &= (PAGE_SIZE - 1);
+ 	}
+ 
+-	put_obj_stock(flags);
++	put_obj_stock(flags, stock_lock_acquried);
++	if (old)
++		obj_cgroup_put(old);
+ 
+ 	if (nr_pages)
+-		obj_cgroup_uncharge_pages(objcg, nr_pages);
++		obj_cgroup_uncharge_pages(objcg, nr_pages, false);
+ }
+ 
+ int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size)
+@@ -3751,8 +3731,12 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
+ 		}
+ 		break;
+ 	case RES_SOFT_LIMIT:
++#ifndef CONFIG_PREEMPT_RT
+ 		memcg->soft_limit = nr_pages;
+ 		ret = 0;
++#else
++		ret = -EOPNOTSUPP;
++#endif
+ 		break;
+ 	}
+ 	return ret ?: nbytes;
+@@ -4057,119 +4041,454 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css,
+ 	return 0;
+ }
+ 
+-static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
++static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg)
+ {
+-	struct mem_cgroup_threshold_ary *t;
+-	unsigned long usage;
+-	int i;
+-
+-	rcu_read_lock();
+-	if (!swap)
+-		t = rcu_dereference(memcg->thresholds.primary);
+-	else
+-		t = rcu_dereference(memcg->memsw_thresholds.primary);
+-
+-	if (!t)
+-		goto unlock;
+-
+-	usage = mem_cgroup_usage(memcg, swap);
++	struct mem_cgroup_eventfd_list *ev;
+ 
+-	/*
+-	 * current_threshold points to threshold just below or equal to usage.
+-	 * If it's not true, a threshold was crossed after last
+-	 * call of __mem_cgroup_threshold().
+-	 */
+-	i = t->current_threshold;
++	spin_lock(&memcg_oom_lock);
+ 
+-	/*
+-	 * Iterate backward over array of thresholds starting from
+-	 * current_threshold and check if a threshold is crossed.
+-	 * If none of thresholds below usage is crossed, we read
+-	 * only one element of the array here.
+-	 */
+-	for (; i >= 0 && unlikely(t->entries[i].threshold > usage); i--)
+-		eventfd_signal(t->entries[i].eventfd, 1);
++	list_for_each_entry(ev, &memcg->oom_notify, list)
++		eventfd_signal(ev->eventfd, 1);
+ 
+-	/* i = current_threshold + 1 */
+-	i++;
++	spin_unlock(&memcg_oom_lock);
++	return 0;
++}
+ 
+-	/*
+-	 * Iterate forward over array of thresholds starting from
+-	 * current_threshold+1 and check if a threshold is crossed.
+-	 * If none of thresholds above usage is crossed, we read
+-	 * only one element of the array here.
+-	 */
+-	for (; i < t->size && unlikely(t->entries[i].threshold <= usage); i++)
+-		eventfd_signal(t->entries[i].eventfd, 1);
++static void mem_cgroup_oom_notify(struct mem_cgroup *memcg)
++{
++	struct mem_cgroup *iter;
+ 
+-	/* Update current_threshold */
+-	t->current_threshold = i - 1;
+-unlock:
+-	rcu_read_unlock();
++	for_each_mem_cgroup_tree(iter, memcg)
++		mem_cgroup_oom_notify_cb(iter);
+ }
+ 
+-static void mem_cgroup_threshold(struct mem_cgroup *memcg)
++static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
+ {
+-	while (memcg) {
+-		__mem_cgroup_threshold(memcg, false);
+-		if (do_memsw_account())
+-			__mem_cgroup_threshold(memcg, true);
++	struct mem_cgroup *memcg = mem_cgroup_from_seq(sf);
+ 
+-		memcg = parent_mem_cgroup(memcg);
+-	}
++	seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable);
++	seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
++	seq_printf(sf, "oom_kill %lu\n",
++		   atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
++	return 0;
+ }
+ 
+-static int compare_thresholds(const void *a, const void *b)
++static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css,
++	struct cftype *cft, u64 val)
+ {
+-	const struct mem_cgroup_threshold *_a = a;
+-	const struct mem_cgroup_threshold *_b = b;
++	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+ 
+-	if (_a->threshold > _b->threshold)
+-		return 1;
++	/* cannot set to root cgroup and only 0 and 1 are allowed */
++	if (mem_cgroup_is_root(memcg) || !((val == 0) || (val == 1)))
++		return -EINVAL;
+ 
+-	if (_a->threshold < _b->threshold)
+-		return -1;
++	memcg->oom_kill_disable = val;
++	if (!val)
++		memcg_oom_recover(memcg);
+ 
+ 	return 0;
+ }
+ 
+-static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg)
+-{
+-	struct mem_cgroup_eventfd_list *ev;
+-
+-	spin_lock(&memcg_oom_lock);
++#ifdef CONFIG_CGROUP_WRITEBACK
+ 
+-	list_for_each_entry(ev, &memcg->oom_notify, list)
+-		eventfd_signal(ev->eventfd, 1);
++#include <trace/events/writeback.h>
+ 
+-	spin_unlock(&memcg_oom_lock);
+-	return 0;
++static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp)
++{
++	return wb_domain_init(&memcg->cgwb_domain, gfp);
+ }
+ 
+-static void mem_cgroup_oom_notify(struct mem_cgroup *memcg)
++static void memcg_wb_domain_exit(struct mem_cgroup *memcg)
+ {
+-	struct mem_cgroup *iter;
++	wb_domain_exit(&memcg->cgwb_domain);
++}
+ 
+-	for_each_mem_cgroup_tree(iter, memcg)
+-		mem_cgroup_oom_notify_cb(iter);
++static void memcg_wb_domain_size_changed(struct mem_cgroup *memcg)
++{
++	wb_domain_size_changed(&memcg->cgwb_domain);
+ }
+ 
+-static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
+-	struct eventfd_ctx *eventfd, const char *args, enum res_type type)
++struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
+ {
+-	struct mem_cgroup_thresholds *thresholds;
+-	struct mem_cgroup_threshold_ary *new;
+-	unsigned long threshold;
+-	unsigned long usage;
+-	int i, size, ret;
++	struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
+ 
+-	ret = page_counter_memparse(args, "-1", &threshold);
+-	if (ret)
+-		return ret;
++	if (!memcg->css.parent)
++		return NULL;
+ 
+-	mutex_lock(&memcg->thresholds_lock);
++	return &memcg->cgwb_domain;
++}
+ 
+-	if (type == _MEM) {
++/**
++ * mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
++ * @wb: bdi_writeback in question
++ * @pfilepages: out parameter for number of file pages
++ * @pheadroom: out parameter for number of allocatable pages according to memcg
++ * @pdirty: out parameter for number of dirty pages
++ * @pwriteback: out parameter for number of pages under writeback
++ *
++ * Determine the numbers of file, headroom, dirty, and writeback pages in
++ * @wb's memcg.  File, dirty and writeback are self-explanatory.  Headroom
++ * is a bit more involved.
++ *
++ * A memcg's headroom is "min(max, high) - used".  In the hierarchy, the
++ * headroom is calculated as the lowest headroom of itself and the
++ * ancestors.  Note that this doesn't consider the actual amount of
++ * available memory in the system.  The caller should further cap
++ * *@pheadroom accordingly.
++ */
++void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
++			 unsigned long *pheadroom, unsigned long *pdirty,
++			 unsigned long *pwriteback)
++{
++	struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
++	struct mem_cgroup *parent;
++
++	mem_cgroup_flush_stats();
++
++	*pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
++	*pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
++	*pfilepages = memcg_page_state(memcg, NR_INACTIVE_FILE) +
++			memcg_page_state(memcg, NR_ACTIVE_FILE);
++
++	*pheadroom = PAGE_COUNTER_MAX;
++	while ((parent = parent_mem_cgroup(memcg))) {
++		unsigned long ceiling = min(READ_ONCE(memcg->memory.max),
++					    READ_ONCE(memcg->memory.high));
++		unsigned long used = page_counter_read(&memcg->memory);
++
++		*pheadroom = min(*pheadroom, ceiling - min(ceiling, used));
++		memcg = parent;
++	}
++}
++
++/*
++ * Foreign dirty flushing
++ *
++ * There's an inherent mismatch between memcg and writeback.  The former
++ * tracks ownership per-page while the latter per-inode.  This was a
++ * deliberate design decision because honoring per-page ownership in the
++ * writeback path is complicated, may lead to higher CPU and IO overheads
++ * and deemed unnecessary given that write-sharing an inode across
++ * different cgroups isn't a common use-case.
++ *
++ * Combined with inode majority-writer ownership switching, this works well
++ * enough in most cases but there are some pathological cases.  For
++ * example, let's say there are two cgroups A and B which keep writing to
++ * different but confined parts of the same inode.  B owns the inode and
++ * A's memory is limited far below B's.  A's dirty ratio can rise enough to
++ * trigger balance_dirty_pages() sleeps but B's can be low enough to avoid
++ * triggering background writeback.  A will be slowed down without a way to
++ * make writeback of the dirty pages happen.
++ *
++ * Conditions like the above can lead to a cgroup getting repeatedly and
++ * severely throttled after making some progress after each
++ * dirty_expire_interval while the underlying IO device is almost
++ * completely idle.
++ *
++ * Solving this problem completely requires matching the ownership tracking
++ * granularities between memcg and writeback in either direction.  However,
++ * the more egregious behaviors can be avoided by simply remembering the
++ * most recent foreign dirtying events and initiating remote flushes on
++ * them when local writeback isn't enough to keep the memory clean enough.
++ *
++ * The following two functions implement such mechanism.  When a foreign
++ * page - a page whose memcg and writeback ownerships don't match - is
++ * dirtied, mem_cgroup_track_foreign_dirty() records the inode owning
++ * bdi_writeback on the page owning memcg.  When balance_dirty_pages()
++ * decides that the memcg needs to sleep due to high dirty ratio, it calls
++ * mem_cgroup_flush_foreign() which queues writeback on the recorded
++ * foreign bdi_writebacks which haven't expired.  Both the numbers of
++ * recorded bdi_writebacks and concurrent in-flight foreign writebacks are
++ * limited to MEMCG_CGWB_FRN_CNT.
++ *
++ * The mechanism only remembers IDs and doesn't hold any object references.
++ * As being wrong occasionally doesn't matter, updates and accesses to the
++ * records are lockless and racy.
++ */
++void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
++					     struct bdi_writeback *wb)
++{
++	struct mem_cgroup *memcg = folio_memcg(folio);
++	struct memcg_cgwb_frn *frn;
++	u64 now = get_jiffies_64();
++	u64 oldest_at = now;
++	int oldest = -1;
++	int i;
++
++	trace_track_foreign_dirty(folio, wb);
++
++	/*
++	 * Pick the slot to use.  If there is already a slot for @wb, keep
++	 * using it.  If not replace the oldest one which isn't being
++	 * written out.
++	 */
++	for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++) {
++		frn = &memcg->cgwb_frn[i];
++		if (frn->bdi_id == wb->bdi->id &&
++		    frn->memcg_id == wb->memcg_css->id)
++			break;
++		if (time_before64(frn->at, oldest_at) &&
++		    atomic_read(&frn->done.cnt) == 1) {
++			oldest = i;
++			oldest_at = frn->at;
++		}
++	}
++
++	if (i < MEMCG_CGWB_FRN_CNT) {
++		/*
++		 * Re-using an existing one.  Update timestamp lazily to
++		 * avoid making the cacheline hot.  We want them to be
++		 * reasonably up-to-date and significantly shorter than
++		 * dirty_expire_interval as that's what expires the record.
++		 * Use the shorter of 1s and dirty_expire_interval / 8.
++		 */
++		unsigned long update_intv =
++			min_t(unsigned long, HZ,
++			      msecs_to_jiffies(dirty_expire_interval * 10) / 8);
++
++		if (time_before64(frn->at, now - update_intv))
++			frn->at = now;
++	} else if (oldest >= 0) {
++		/* replace the oldest free one */
++		frn = &memcg->cgwb_frn[oldest];
++		frn->bdi_id = wb->bdi->id;
++		frn->memcg_id = wb->memcg_css->id;
++		frn->at = now;
++	}
++}
++
++/* issue foreign writeback flushes for recorded foreign dirtying events */
++void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
++{
++	struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
++	unsigned long intv = msecs_to_jiffies(dirty_expire_interval * 10);
++	u64 now = jiffies_64;
++	int i;
++
++	for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++) {
++		struct memcg_cgwb_frn *frn = &memcg->cgwb_frn[i];
++
++		/*
++		 * If the record is older than dirty_expire_interval,
++		 * writeback on it has already started.  No need to kick it
++		 * off again.  Also, don't start a new one if there's
++		 * already one in flight.
++		 */
++		if (time_after64(frn->at, now - intv) &&
++		    atomic_read(&frn->done.cnt) == 1) {
++			frn->at = 0;
++			trace_flush_foreign(wb, frn->bdi_id, frn->memcg_id);
++			cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id,
++					       WB_REASON_FOREIGN_FLUSH,
++					       &frn->done);
++		}
++	}
++}
++
++#else	/* CONFIG_CGROUP_WRITEBACK */
++
++static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp)
++{
++	return 0;
++}
++
++static void memcg_wb_domain_exit(struct mem_cgroup *memcg)
++{
++}
++
++static void memcg_wb_domain_size_changed(struct mem_cgroup *memcg)
++{
++}
++
++#endif	/* CONFIG_CGROUP_WRITEBACK */
++
++#ifndef CONFIG_PREEMPT_RT
++/*
++ * DO NOT USE IN NEW FILES.
++ *
++ * "cgroup.event_control" implementation.
++ *
++ * This is way over-engineered.  It tries to support fully configurable
++ * events for each user.  Such level of flexibility is completely
++ * unnecessary especially in the light of the planned unified hierarchy.
++ *
++ * Please deprecate this and replace with something simpler if at all
++ * possible.
++ */
++
++static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
++				       enum mem_cgroup_events_target target)
++{
++	unsigned long val, next;
++
++	val = __this_cpu_read(memcg->vmstats_percpu->nr_page_events);
++	next = __this_cpu_read(memcg->vmstats_percpu->targets[target]);
++	/* from time_after() in jiffies.h */
++	if ((long)(next - val) < 0) {
++		switch (target) {
++		case MEM_CGROUP_TARGET_THRESH:
++			next = val + THRESHOLDS_EVENTS_TARGET;
++			break;
++		case MEM_CGROUP_TARGET_SOFTLIMIT:
++			next = val + SOFTLIMIT_EVENTS_TARGET;
++			break;
++		default:
++			break;
++		}
++		__this_cpu_write(memcg->vmstats_percpu->targets[target], next);
++		return true;
++	}
++	return false;
++}
++
++static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid)
++{
++	unsigned long excess;
++	struct mem_cgroup_per_node *mz;
++	struct mem_cgroup_tree_per_node *mctz;
++
++	mctz = soft_limit_tree.rb_tree_per_node[nid];
++	if (!mctz)
++		return;
++	/*
++	 * Necessary to update all ancestors when hierarchy is used.
++	 * because their event counter is not touched.
++	 */
++	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
++		mz = memcg->nodeinfo[nid];
++		excess = soft_limit_excess(memcg);
++		/*
++		 * We have to update the tree if mz is on RB-tree or
++		 * mem is over its softlimit.
++		 */
++		if (excess || mz->on_tree) {
++			unsigned long flags;
++
++			spin_lock_irqsave(&mctz->lock, flags);
++			/* if on-tree, remove it */
++			if (mz->on_tree)
++				__mem_cgroup_remove_exceeded(mz, mctz);
++			/*
++			 * Insert again. mz->usage_in_excess will be updated.
++			 * If excess is 0, no tree ops.
++			 */
++			__mem_cgroup_insert_exceeded(mz, mctz, excess);
++			spin_unlock_irqrestore(&mctz->lock, flags);
++		}
++	}
++}
++
++static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
++{
++	struct mem_cgroup_threshold_ary *t;
++	unsigned long usage;
++	int i;
++
++	rcu_read_lock();
++	if (!swap)
++		t = rcu_dereference(memcg->thresholds.primary);
++	else
++		t = rcu_dereference(memcg->memsw_thresholds.primary);
++
++	if (!t)
++		goto unlock;
++
++	usage = mem_cgroup_usage(memcg, swap);
++
++	/*
++	 * current_threshold points to threshold just below or equal to usage.
++	 * If it's not true, a threshold was crossed after last
++	 * call of __mem_cgroup_threshold().
++	 */
++	i = t->current_threshold;
++
++	/*
++	 * Iterate backward over array of thresholds starting from
++	 * current_threshold and check if a threshold is crossed.
++	 * If none of thresholds below usage is crossed, we read
++	 * only one element of the array here.
++	 */
++	for (; i >= 0 && unlikely(t->entries[i].threshold > usage); i--)
++		eventfd_signal(t->entries[i].eventfd, 1);
++
++	/* i = current_threshold + 1 */
++	i++;
++
++	/*
++	 * Iterate forward over array of thresholds starting from
++	 * current_threshold+1 and check if a threshold is crossed.
++	 * If none of thresholds above usage is crossed, we read
++	 * only one element of the array here.
++	 */
++	for (; i < t->size && unlikely(t->entries[i].threshold <= usage); i++)
++		eventfd_signal(t->entries[i].eventfd, 1);
++
++	/* Update current_threshold */
++	t->current_threshold = i - 1;
++unlock:
++	rcu_read_unlock();
++}
++
++static void mem_cgroup_threshold(struct mem_cgroup *memcg)
++{
++	while (memcg) {
++		__mem_cgroup_threshold(memcg, false);
++		if (do_memsw_account())
++			__mem_cgroup_threshold(memcg, true);
++
++		memcg = parent_mem_cgroup(memcg);
++	}
++}
++
++/*
++ * Check events in order.
++ *
++ */
++static void memcg_check_events(struct mem_cgroup *memcg, int nid)
++{
++	/* threshold event is triggered in finer grain than soft limit */
++	if (unlikely(mem_cgroup_event_ratelimit(memcg,
++						MEM_CGROUP_TARGET_THRESH))) {
++		bool do_softlimit;
++
++		do_softlimit = mem_cgroup_event_ratelimit(memcg,
++						MEM_CGROUP_TARGET_SOFTLIMIT);
++		mem_cgroup_threshold(memcg);
++		if (unlikely(do_softlimit))
++			mem_cgroup_update_tree(memcg, nid);
++	}
++}
++
++static int compare_thresholds(const void *a, const void *b)
++{
++	const struct mem_cgroup_threshold *_a = a;
++	const struct mem_cgroup_threshold *_b = b;
++
++	if (_a->threshold > _b->threshold)
++		return 1;
++
++	if (_a->threshold < _b->threshold)
++		return -1;
++
++	return 0;
++}
++
++static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
++	struct eventfd_ctx *eventfd, const char *args, enum res_type type)
++{
++	struct mem_cgroup_thresholds *thresholds;
++	struct mem_cgroup_threshold_ary *new;
++	unsigned long threshold;
++	unsigned long usage;
++	int i, size, ret;
++
++	ret = page_counter_memparse(args, "-1", &threshold);
++	if (ret)
++		return ret;
++
++	mutex_lock(&memcg->thresholds_lock);
++
++	if (type == _MEM) {
+ 		thresholds = &memcg->thresholds;
+ 		usage = mem_cgroup_usage(memcg, false);
+ 	} else if (type == _MEMSWAP) {
+@@ -4256,384 +4575,131 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
+ 
+ 	mutex_lock(&memcg->thresholds_lock);
+ 
+-	if (type == _MEM) {
+-		thresholds = &memcg->thresholds;
+-		usage = mem_cgroup_usage(memcg, false);
+-	} else if (type == _MEMSWAP) {
+-		thresholds = &memcg->memsw_thresholds;
+-		usage = mem_cgroup_usage(memcg, true);
+-	} else
+-		BUG();
+-
+-	if (!thresholds->primary)
+-		goto unlock;
+-
+-	/* Check if a threshold crossed before removing */
+-	__mem_cgroup_threshold(memcg, type == _MEMSWAP);
+-
+-	/* Calculate new number of threshold */
+-	size = entries = 0;
+-	for (i = 0; i < thresholds->primary->size; i++) {
+-		if (thresholds->primary->entries[i].eventfd != eventfd)
+-			size++;
+-		else
+-			entries++;
+-	}
+-
+-	new = thresholds->spare;
+-
+-	/* If no items related to eventfd have been cleared, nothing to do */
+-	if (!entries)
+-		goto unlock;
+-
+-	/* Set thresholds array to NULL if we don't have thresholds */
+-	if (!size) {
+-		kfree(new);
+-		new = NULL;
+-		goto swap_buffers;
+-	}
+-
+-	new->size = size;
+-
+-	/* Copy thresholds and find current threshold */
+-	new->current_threshold = -1;
+-	for (i = 0, j = 0; i < thresholds->primary->size; i++) {
+-		if (thresholds->primary->entries[i].eventfd == eventfd)
+-			continue;
+-
+-		new->entries[j] = thresholds->primary->entries[i];
+-		if (new->entries[j].threshold <= usage) {
+-			/*
+-			 * new->current_threshold will not be used
+-			 * until rcu_assign_pointer(), so it's safe to increment
+-			 * it here.
+-			 */
+-			++new->current_threshold;
+-		}
+-		j++;
+-	}
+-
+-swap_buffers:
+-	/* Swap primary and spare array */
+-	thresholds->spare = thresholds->primary;
+-
+-	rcu_assign_pointer(thresholds->primary, new);
+-
+-	/* To be sure that nobody uses thresholds */
+-	synchronize_rcu();
+-
+-	/* If all events are unregistered, free the spare array */
+-	if (!new) {
+-		kfree(thresholds->spare);
+-		thresholds->spare = NULL;
+-	}
+-unlock:
+-	mutex_unlock(&memcg->thresholds_lock);
+-}
+-
+-static void mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
+-	struct eventfd_ctx *eventfd)
+-{
+-	return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEM);
+-}
+-
+-static void memsw_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
+-	struct eventfd_ctx *eventfd)
+-{
+-	return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEMSWAP);
+-}
+-
+-static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg,
+-	struct eventfd_ctx *eventfd, const char *args)
+-{
+-	struct mem_cgroup_eventfd_list *event;
+-
+-	event = kmalloc(sizeof(*event),	GFP_KERNEL);
+-	if (!event)
+-		return -ENOMEM;
+-
+-	spin_lock(&memcg_oom_lock);
+-
+-	event->eventfd = eventfd;
+-	list_add(&event->list, &memcg->oom_notify);
+-
+-	/* already in OOM ? */
+-	if (memcg->under_oom)
+-		eventfd_signal(eventfd, 1);
+-	spin_unlock(&memcg_oom_lock);
+-
+-	return 0;
+-}
+-
+-static void mem_cgroup_oom_unregister_event(struct mem_cgroup *memcg,
+-	struct eventfd_ctx *eventfd)
+-{
+-	struct mem_cgroup_eventfd_list *ev, *tmp;
+-
+-	spin_lock(&memcg_oom_lock);
+-
+-	list_for_each_entry_safe(ev, tmp, &memcg->oom_notify, list) {
+-		if (ev->eventfd == eventfd) {
+-			list_del(&ev->list);
+-			kfree(ev);
+-		}
+-	}
+-
+-	spin_unlock(&memcg_oom_lock);
+-}
+-
+-static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
+-{
+-	struct mem_cgroup *memcg = mem_cgroup_from_seq(sf);
+-
+-	seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable);
+-	seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
+-	seq_printf(sf, "oom_kill %lu\n",
+-		   atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
+-	return 0;
+-}
+-
+-static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css,
+-	struct cftype *cft, u64 val)
+-{
+-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+-
+-	/* cannot set to root cgroup and only 0 and 1 are allowed */
+-	if (mem_cgroup_is_root(memcg) || !((val == 0) || (val == 1)))
+-		return -EINVAL;
+-
+-	memcg->oom_kill_disable = val;
+-	if (!val)
+-		memcg_oom_recover(memcg);
+-
+-	return 0;
+-}
+-
+-#ifdef CONFIG_CGROUP_WRITEBACK
+-
+-#include <trace/events/writeback.h>
+-
+-static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp)
+-{
+-	return wb_domain_init(&memcg->cgwb_domain, gfp);
+-}
+-
+-static void memcg_wb_domain_exit(struct mem_cgroup *memcg)
+-{
+-	wb_domain_exit(&memcg->cgwb_domain);
+-}
+-
+-static void memcg_wb_domain_size_changed(struct mem_cgroup *memcg)
+-{
+-	wb_domain_size_changed(&memcg->cgwb_domain);
+-}
+-
+-struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
+-{
+-	struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
+-
+-	if (!memcg->css.parent)
+-		return NULL;
+-
+-	return &memcg->cgwb_domain;
+-}
+-
+-/**
+- * mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
+- * @wb: bdi_writeback in question
+- * @pfilepages: out parameter for number of file pages
+- * @pheadroom: out parameter for number of allocatable pages according to memcg
+- * @pdirty: out parameter for number of dirty pages
+- * @pwriteback: out parameter for number of pages under writeback
+- *
+- * Determine the numbers of file, headroom, dirty, and writeback pages in
+- * @wb's memcg.  File, dirty and writeback are self-explanatory.  Headroom
+- * is a bit more involved.
+- *
+- * A memcg's headroom is "min(max, high) - used".  In the hierarchy, the
+- * headroom is calculated as the lowest headroom of itself and the
+- * ancestors.  Note that this doesn't consider the actual amount of
+- * available memory in the system.  The caller should further cap
+- * *@pheadroom accordingly.
+- */
+-void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
+-			 unsigned long *pheadroom, unsigned long *pdirty,
+-			 unsigned long *pwriteback)
+-{
+-	struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
+-	struct mem_cgroup *parent;
+-
+-	mem_cgroup_flush_stats();
++	if (type == _MEM) {
++		thresholds = &memcg->thresholds;
++		usage = mem_cgroup_usage(memcg, false);
++	} else if (type == _MEMSWAP) {
++		thresholds = &memcg->memsw_thresholds;
++		usage = mem_cgroup_usage(memcg, true);
++	} else
++		BUG();
+ 
+-	*pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
+-	*pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
+-	*pfilepages = memcg_page_state(memcg, NR_INACTIVE_FILE) +
+-			memcg_page_state(memcg, NR_ACTIVE_FILE);
++	if (!thresholds->primary)
++		goto unlock;
+ 
+-	*pheadroom = PAGE_COUNTER_MAX;
+-	while ((parent = parent_mem_cgroup(memcg))) {
+-		unsigned long ceiling = min(READ_ONCE(memcg->memory.max),
+-					    READ_ONCE(memcg->memory.high));
+-		unsigned long used = page_counter_read(&memcg->memory);
++	/* Check if a threshold crossed before removing */
++	__mem_cgroup_threshold(memcg, type == _MEMSWAP);
+ 
+-		*pheadroom = min(*pheadroom, ceiling - min(ceiling, used));
+-		memcg = parent;
++	/* Calculate new number of threshold */
++	size = entries = 0;
++	for (i = 0; i < thresholds->primary->size; i++) {
++		if (thresholds->primary->entries[i].eventfd != eventfd)
++			size++;
++		else
++			entries++;
+ 	}
+-}
+ 
+-/*
+- * Foreign dirty flushing
+- *
+- * There's an inherent mismatch between memcg and writeback.  The former
+- * tracks ownership per-page while the latter per-inode.  This was a
+- * deliberate design decision because honoring per-page ownership in the
+- * writeback path is complicated, may lead to higher CPU and IO overheads
+- * and deemed unnecessary given that write-sharing an inode across
+- * different cgroups isn't a common use-case.
+- *
+- * Combined with inode majority-writer ownership switching, this works well
+- * enough in most cases but there are some pathological cases.  For
+- * example, let's say there are two cgroups A and B which keep writing to
+- * different but confined parts of the same inode.  B owns the inode and
+- * A's memory is limited far below B's.  A's dirty ratio can rise enough to
+- * trigger balance_dirty_pages() sleeps but B's can be low enough to avoid
+- * triggering background writeback.  A will be slowed down without a way to
+- * make writeback of the dirty pages happen.
+- *
+- * Conditions like the above can lead to a cgroup getting repeatedly and
+- * severely throttled after making some progress after each
+- * dirty_expire_interval while the underlying IO device is almost
+- * completely idle.
+- *
+- * Solving this problem completely requires matching the ownership tracking
+- * granularities between memcg and writeback in either direction.  However,
+- * the more egregious behaviors can be avoided by simply remembering the
+- * most recent foreign dirtying events and initiating remote flushes on
+- * them when local writeback isn't enough to keep the memory clean enough.
+- *
+- * The following two functions implement such mechanism.  When a foreign
+- * page - a page whose memcg and writeback ownerships don't match - is
+- * dirtied, mem_cgroup_track_foreign_dirty() records the inode owning
+- * bdi_writeback on the page owning memcg.  When balance_dirty_pages()
+- * decides that the memcg needs to sleep due to high dirty ratio, it calls
+- * mem_cgroup_flush_foreign() which queues writeback on the recorded
+- * foreign bdi_writebacks which haven't expired.  Both the numbers of
+- * recorded bdi_writebacks and concurrent in-flight foreign writebacks are
+- * limited to MEMCG_CGWB_FRN_CNT.
+- *
+- * The mechanism only remembers IDs and doesn't hold any object references.
+- * As being wrong occasionally doesn't matter, updates and accesses to the
+- * records are lockless and racy.
+- */
+-void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
+-					     struct bdi_writeback *wb)
+-{
+-	struct mem_cgroup *memcg = folio_memcg(folio);
+-	struct memcg_cgwb_frn *frn;
+-	u64 now = get_jiffies_64();
+-	u64 oldest_at = now;
+-	int oldest = -1;
+-	int i;
++	new = thresholds->spare;
+ 
+-	trace_track_foreign_dirty(folio, wb);
++	/* If no items related to eventfd have been cleared, nothing to do */
++	if (!entries)
++		goto unlock;
+ 
+-	/*
+-	 * Pick the slot to use.  If there is already a slot for @wb, keep
+-	 * using it.  If not replace the oldest one which isn't being
+-	 * written out.
+-	 */
+-	for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++) {
+-		frn = &memcg->cgwb_frn[i];
+-		if (frn->bdi_id == wb->bdi->id &&
+-		    frn->memcg_id == wb->memcg_css->id)
+-			break;
+-		if (time_before64(frn->at, oldest_at) &&
+-		    atomic_read(&frn->done.cnt) == 1) {
+-			oldest = i;
+-			oldest_at = frn->at;
+-		}
++	/* Set thresholds array to NULL if we don't have thresholds */
++	if (!size) {
++		kfree(new);
++		new = NULL;
++		goto swap_buffers;
+ 	}
+ 
+-	if (i < MEMCG_CGWB_FRN_CNT) {
+-		/*
+-		 * Re-using an existing one.  Update timestamp lazily to
+-		 * avoid making the cacheline hot.  We want them to be
+-		 * reasonably up-to-date and significantly shorter than
+-		 * dirty_expire_interval as that's what expires the record.
+-		 * Use the shorter of 1s and dirty_expire_interval / 8.
+-		 */
+-		unsigned long update_intv =
+-			min_t(unsigned long, HZ,
+-			      msecs_to_jiffies(dirty_expire_interval * 10) / 8);
++	new->size = size;
+ 
+-		if (time_before64(frn->at, now - update_intv))
+-			frn->at = now;
+-	} else if (oldest >= 0) {
+-		/* replace the oldest free one */
+-		frn = &memcg->cgwb_frn[oldest];
+-		frn->bdi_id = wb->bdi->id;
+-		frn->memcg_id = wb->memcg_css->id;
+-		frn->at = now;
++	/* Copy thresholds and find current threshold */
++	new->current_threshold = -1;
++	for (i = 0, j = 0; i < thresholds->primary->size; i++) {
++		if (thresholds->primary->entries[i].eventfd == eventfd)
++			continue;
++
++		new->entries[j] = thresholds->primary->entries[i];
++		if (new->entries[j].threshold <= usage) {
++			/*
++			 * new->current_threshold will not be used
++			 * until rcu_assign_pointer(), so it's safe to increment
++			 * it here.
++			 */
++			++new->current_threshold;
++		}
++		j++;
+ 	}
+-}
+ 
+-/* issue foreign writeback flushes for recorded foreign dirtying events */
+-void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
+-{
+-	struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
+-	unsigned long intv = msecs_to_jiffies(dirty_expire_interval * 10);
+-	u64 now = jiffies_64;
+-	int i;
++swap_buffers:
++	/* Swap primary and spare array */
++	thresholds->spare = thresholds->primary;
+ 
+-	for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++) {
+-		struct memcg_cgwb_frn *frn = &memcg->cgwb_frn[i];
++	rcu_assign_pointer(thresholds->primary, new);
+ 
+-		/*
+-		 * If the record is older than dirty_expire_interval,
+-		 * writeback on it has already started.  No need to kick it
+-		 * off again.  Also, don't start a new one if there's
+-		 * already one in flight.
+-		 */
+-		if (time_after64(frn->at, now - intv) &&
+-		    atomic_read(&frn->done.cnt) == 1) {
+-			frn->at = 0;
+-			trace_flush_foreign(wb, frn->bdi_id, frn->memcg_id);
+-			cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id,
+-					       WB_REASON_FOREIGN_FLUSH,
+-					       &frn->done);
+-		}
++	/* To be sure that nobody uses thresholds */
++	synchronize_rcu();
++
++	/* If all events are unregistered, free the spare array */
++	if (!new) {
++		kfree(thresholds->spare);
++		thresholds->spare = NULL;
+ 	}
++unlock:
++	mutex_unlock(&memcg->thresholds_lock);
+ }
+ 
+-#else	/* CONFIG_CGROUP_WRITEBACK */
+-
+-static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp)
++static void mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
++	struct eventfd_ctx *eventfd)
+ {
+-	return 0;
++	return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEM);
+ }
+ 
+-static void memcg_wb_domain_exit(struct mem_cgroup *memcg)
++static void memsw_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
++	struct eventfd_ctx *eventfd)
+ {
++	return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEMSWAP);
+ }
+ 
+-static void memcg_wb_domain_size_changed(struct mem_cgroup *memcg)
++static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg,
++	struct eventfd_ctx *eventfd, const char *args)
+ {
++	struct mem_cgroup_eventfd_list *event;
++
++	event = kmalloc(sizeof(*event),	GFP_KERNEL);
++	if (!event)
++		return -ENOMEM;
++
++	spin_lock(&memcg_oom_lock);
++
++	event->eventfd = eventfd;
++	list_add(&event->list, &memcg->oom_notify);
++
++	/* already in OOM ? */
++	if (memcg->under_oom)
++		eventfd_signal(eventfd, 1);
++	spin_unlock(&memcg_oom_lock);
++
++	return 0;
+ }
+ 
+-#endif	/* CONFIG_CGROUP_WRITEBACK */
++static void mem_cgroup_oom_unregister_event(struct mem_cgroup *memcg,
++	struct eventfd_ctx *eventfd)
++{
++	struct mem_cgroup_eventfd_list *ev, *tmp;
+ 
+-/*
+- * DO NOT USE IN NEW FILES.
+- *
+- * "cgroup.event_control" implementation.
+- *
+- * This is way over-engineered.  It tries to support fully configurable
+- * events for each user.  Such level of flexibility is completely
+- * unnecessary especially in the light of the planned unified hierarchy.
+- *
+- * Please deprecate this and replace with something simpler if at all
+- * possible.
+- */
++	spin_lock(&memcg_oom_lock);
++
++	list_for_each_entry_safe(ev, tmp, &memcg->oom_notify, list) {
++		if (ev->eventfd == eventfd) {
++			list_del(&ev->list);
++			kfree(ev);
++		}
++	}
++
++	spin_unlock(&memcg_oom_lock);
++}
+ 
+ /*
+  * Unregister event and free resources.
+@@ -4845,6 +4911,18 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
+ 	return ret;
+ }
+ 
++#else
++
++static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
++					 char *buf, size_t nbytes, loff_t off)
++{
++	return -EOPNOTSUPP;
++}
++
++static void memcg_check_events(struct mem_cgroup *memcg, int nid) { }
++
++#endif
++
+ static struct cftype mem_cgroup_legacy_files[] = {
+ 	{
+ 		.name = "usage_in_bytes",
+@@ -7017,7 +7095,7 @@ void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
+ 
+ 	mod_memcg_state(memcg, MEMCG_SOCK, -nr_pages);
+ 
+-	refill_stock(memcg, nr_pages);
++	refill_stock(memcg, nr_pages, false);
  }
  
- /*
+ static int __init cgroup_memory(char *s)
+@@ -7157,9 +7235,18 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+ 	 * i_pages lock which is taken with interrupts-off. It is
+ 	 * important here to have the interrupts disabled because it is the
+ 	 * only synchronisation we have for updating the per-CPU variables.
++	 * On PREEMPT_RT interrupts are never disabled and the updates to per-CPU
++	 * variables are synchronised by keeping preemption disabled.
+ 	 */
+-	VM_BUG_ON(!irqs_disabled());
+-	mem_cgroup_charge_statistics(memcg, -nr_entries);
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
++		VM_BUG_ON(!irqs_disabled());
++		mem_cgroup_charge_statistics(memcg, -nr_entries);
++	} else {
++		preempt_disable();
++		mem_cgroup_charge_statistics(memcg, -nr_entries);
++		preempt_enable();
++	}
++
+ 	memcg_check_events(memcg, page_to_nid(page));
+ 
+ 	css_put(&memcg->css);
 diff --git a/mm/vmalloc.c b/mm/vmalloc.c
-index e8a807c78110..3c5be440ab30 100644
+index d2a00ad4e1dd..ce8dcad7639f 100644
 --- a/mm/vmalloc.c
 +++ b/mm/vmalloc.c
-@@ -1918,11 +1918,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
+@@ -1922,11 +1922,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
  		return ERR_PTR(err);
  	}
  
@@ -9369,7 +9836,7 @@ index e8a807c78110..3c5be440ab30 100644
  
  	return vaddr;
  }
-@@ -2001,7 +2002,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
+@@ -2005,7 +2006,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  	order = get_order(size);
  
  	rcu_read_lock();
@@ -9379,7 +9846,7 @@ index e8a807c78110..3c5be440ab30 100644
  	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
  		unsigned long pages_off;
  
-@@ -2024,7 +2026,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
+@@ -2028,7 +2030,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  		break;
  	}
  
@@ -9389,7 +9856,7 @@ index e8a807c78110..3c5be440ab30 100644
  
  	/* Allocate new block if nothing was found */
 diff --git a/mm/workingset.c b/mm/workingset.c
-index d5b81e4f4cbe..d7ceee62e3c6 100644
+index 8c03afe1d67c..4579883eb109 100644
 --- a/mm/workingset.c
 +++ b/mm/workingset.c
 @@ -433,6 +433,8 @@ static struct list_lru shadow_nodes;
@@ -9412,10 +9879,25 @@ index d5b81e4f4cbe..d7ceee62e3c6 100644
  	if (node->count && node->count == node->nr_values) {
  		if (list_empty(&node->private_list)) {
 diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
-index b897ce3b399a..6a58c1df0cc7 100644
+index b897ce3b399a..7e03cc9363bb 100644
 --- a/mm/zsmalloc.c
 +++ b/mm/zsmalloc.c
-@@ -57,6 +57,7 @@
+@@ -30,6 +30,14 @@
+ 
+ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+ 
++/*
++ * lock ordering:
++ *	page_lock
++ *	pool->migrate_lock
++ *	class->lock
++ *	zspage->lock
++ */
++
+ #include <linux/module.h>
+ #include <linux/kernel.h>
+ #include <linux/sched.h>
+@@ -57,6 +65,7 @@
  #include <linux/wait.h>
  #include <linux/pagemap.h>
  #include <linux/fs.h>
@@ -9423,28 +9905,82 @@ index b897ce3b399a..6a58c1df0cc7 100644
  
  #define ZSPAGE_MAGIC	0x58
  
-@@ -77,6 +78,20 @@
+@@ -100,15 +109,6 @@
  
- #define ZS_HANDLE_SIZE (sizeof(unsigned long))
+ #define _PFN_BITS		(MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT)
  
-+#ifdef CONFIG_PREEMPT_RT
-+
-+struct zsmalloc_handle {
-+	unsigned long addr;
-+	spinlock_t lock;
-+};
-+
-+#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle))
-+
-+#else
-+
-+#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long))
-+#endif
-+
+-/*
+- * Memory for allocating for handle keeps object position by
+- * encoding <page, obj_idx> and the encoded value has a room
+- * in least bit(ie, look at obj_to_location).
+- * We use the bit to synchronize between object access by
+- * user and migration.
+- */
+-#define HANDLE_PIN_BIT	0
+-
+ /*
+  * Head in allocated object should have OBJ_ALLOCATED_TAG
+  * to identify the object was allocated or not.
+@@ -121,6 +121,7 @@
+ #define OBJ_INDEX_BITS	(BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS)
+ #define OBJ_INDEX_MASK	((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
+ 
++#define HUGE_BITS	1
+ #define FULLNESS_BITS	2
+ #define CLASS_BITS	8
+ #define ISOLATED_BITS	3
+@@ -158,7 +159,7 @@ enum fullness_group {
+ 	NR_ZS_FULLNESS,
+ };
+ 
+-enum zs_stat_type {
++enum class_stat_type {
+ 	CLASS_EMPTY,
+ 	CLASS_ALMOST_EMPTY,
+ 	CLASS_ALMOST_FULL,
+@@ -213,22 +214,6 @@ struct size_class {
+ 	struct zs_size_stat stats;
+ };
+ 
+-/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
+-static void SetPageHugeObject(struct page *page)
+-{
+-	SetPageOwnerPriv1(page);
+-}
+-
+-static void ClearPageHugeObject(struct page *page)
+-{
+-	ClearPageOwnerPriv1(page);
+-}
+-
+-static int PageHugeObject(struct page *page)
+-{
+-	return PageOwnerPriv1(page);
+-}
+-
  /*
-  * Object location (<PFN>, <obj_idx>) is encoded as
-  * a single (unsigned long) handle value.
-@@ -293,6 +308,7 @@ struct zspage {
+  * Placed within free objects to form a singly linked list.
+  * For every zspage, zspage->freeobj gives head of this list.
+@@ -269,15 +254,14 @@ struct zs_pool {
+ #ifdef CONFIG_COMPACTION
+ 	struct inode *inode;
+ 	struct work_struct free_work;
+-	/* A wait queue for when migration races with async_free_zspage() */
+-	struct wait_queue_head migration_wait;
+-	atomic_long_t isolated_pages;
+-	bool destroying;
+ #endif
++	/* protect page/zspage migration */
++	rwlock_t migrate_lock;
+ };
+ 
+ struct zspage {
+ 	struct {
++		unsigned int huge:HUGE_BITS;
+ 		unsigned int fullness:FULLNESS_BITS;
+ 		unsigned int class:CLASS_BITS + 1;
+ 		unsigned int isolated:ISOLATED_BITS;
+@@ -293,17 +277,32 @@ struct zspage {
  };
  
  struct mapping_area {
@@ -9452,190 +9988,1094 @@ index b897ce3b399a..6a58c1df0cc7 100644
  	char *vm_buf; /* copy buffer for objects that span pages */
  	char *vm_addr; /* address of kmap_atomic()'ed pages */
  	enum zs_mapmode vm_mm; /* mapping mode */
-@@ -322,7 +338,7 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
+ };
  
- static int create_cache(struct zs_pool *pool)
++/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
++static void SetZsHugePage(struct zspage *zspage)
++{
++	zspage->huge = 1;
++}
++
++static bool ZsHugePage(struct zspage *zspage)
++{
++	return zspage->huge;
++}
++
+ #ifdef CONFIG_COMPACTION
+ static int zs_register_migration(struct zs_pool *pool);
+ static void zs_unregister_migration(struct zs_pool *pool);
+ static void migrate_lock_init(struct zspage *zspage);
+ static void migrate_read_lock(struct zspage *zspage);
+ static void migrate_read_unlock(struct zspage *zspage);
++static void migrate_write_lock(struct zspage *zspage);
++static void migrate_write_lock_nested(struct zspage *zspage);
++static void migrate_write_unlock(struct zspage *zspage);
+ static void kick_deferred_free(struct zs_pool *pool);
+ static void init_deferred_free(struct zs_pool *pool);
+ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage);
+@@ -315,6 +314,9 @@ static void zs_unregister_migration(struct zs_pool *pool) {}
+ static void migrate_lock_init(struct zspage *zspage) {}
+ static void migrate_read_lock(struct zspage *zspage) {}
+ static void migrate_read_unlock(struct zspage *zspage) {}
++static void migrate_write_lock(struct zspage *zspage) {}
++static void migrate_write_lock_nested(struct zspage *zspage) {}
++static void migrate_write_unlock(struct zspage *zspage) {}
+ static void kick_deferred_free(struct zs_pool *pool) {}
+ static void init_deferred_free(struct zs_pool *pool) {}
+ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
+@@ -366,14 +368,10 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
+ 	kmem_cache_free(pool->zspage_cachep, zspage);
+ }
+ 
++/* class->lock(which owns the handle) synchronizes races */
+ static void record_obj(unsigned long handle, unsigned long obj)
  {
--	pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
-+	pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE,
- 					0, 0, NULL);
- 	if (!pool->handle_cachep)
- 		return 1;
-@@ -346,10 +362,27 @@ static void destroy_cache(struct zs_pool *pool)
+-	/*
+-	 * lsb of @obj represents handle lock while other bits
+-	 * represent object value the handle is pointing so
+-	 * updating shouldn't do store tearing.
+-	 */
+-	WRITE_ONCE(*(unsigned long *)handle, obj);
++	*(unsigned long *)handle = obj;
+ }
+ 
+ /* zpool driver */
+@@ -455,12 +453,9 @@ MODULE_ALIAS("zpool-zsmalloc");
+ #endif /* CONFIG_ZPOOL */
  
- static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
+ /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
+-static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
+-
+-static bool is_zspage_isolated(struct zspage *zspage)
+-{
+-	return zspage->isolated;
+-}
++static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = {
++	.lock	= INIT_LOCAL_LOCK(lock),
++};
+ 
+ static __maybe_unused int is_first_page(struct page *page)
  {
--	return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
--			gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
-+	void *p;
+@@ -517,6 +512,12 @@ static void get_zspage_mapping(struct zspage *zspage,
+ 	*class_idx = zspage->class;
+ }
+ 
++static struct size_class *zspage_class(struct zs_pool *pool,
++					     struct zspage *zspage)
++{
++	return pool->size_class[zspage->class];
++}
 +
-+	p = kmem_cache_alloc(pool->handle_cachep,
-+			     gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
-+#ifdef CONFIG_PREEMPT_RT
-+	if (p) {
-+		struct zsmalloc_handle *zh = p;
+ static void set_zspage_mapping(struct zspage *zspage,
+ 				unsigned int class_idx,
+ 				enum fullness_group fullness)
+@@ -543,21 +544,21 @@ static int get_size_class_index(int size)
+ 	return min_t(int, ZS_SIZE_CLASSES - 1, idx);
+ }
+ 
+-/* type can be of enum type zs_stat_type or fullness_group */
+-static inline void zs_stat_inc(struct size_class *class,
++/* type can be of enum type class_stat_type or fullness_group */
++static inline void class_stat_inc(struct size_class *class,
+ 				int type, unsigned long cnt)
+ {
+ 	class->stats.objs[type] += cnt;
+ }
+ 
+-/* type can be of enum type zs_stat_type or fullness_group */
+-static inline void zs_stat_dec(struct size_class *class,
++/* type can be of enum type class_stat_type or fullness_group */
++static inline void class_stat_dec(struct size_class *class,
+ 				int type, unsigned long cnt)
+ {
+ 	class->stats.objs[type] -= cnt;
+ }
+ 
+-/* type can be of enum type zs_stat_type or fullness_group */
++/* type can be of enum type class_stat_type or fullness_group */
+ static inline unsigned long zs_stat_get(struct size_class *class,
+ 				int type)
+ {
+@@ -719,7 +720,7 @@ static void insert_zspage(struct size_class *class,
+ {
+ 	struct zspage *head;
+ 
+-	zs_stat_inc(class, fullness, 1);
++	class_stat_inc(class, fullness, 1);
+ 	head = list_first_entry_or_null(&class->fullness_list[fullness],
+ 					struct zspage, list);
+ 	/*
+@@ -741,10 +742,9 @@ static void remove_zspage(struct size_class *class,
+ 				enum fullness_group fullness)
+ {
+ 	VM_BUG_ON(list_empty(&class->fullness_list[fullness]));
+-	VM_BUG_ON(is_zspage_isolated(zspage));
+ 
+ 	list_del_init(&zspage->list);
+-	zs_stat_dec(class, fullness, 1);
++	class_stat_dec(class, fullness, 1);
+ }
+ 
+ /*
+@@ -767,13 +767,9 @@ static enum fullness_group fix_fullness_group(struct size_class *class,
+ 	if (newfg == currfg)
+ 		goto out;
+ 
+-	if (!is_zspage_isolated(zspage)) {
+-		remove_zspage(class, zspage, currfg);
+-		insert_zspage(class, zspage, newfg);
+-	}
+-
++	remove_zspage(class, zspage, currfg);
++	insert_zspage(class, zspage, newfg);
+ 	set_zspage_mapping(zspage, class_idx, newfg);
+-
+ out:
+ 	return newfg;
+ }
+@@ -824,7 +820,9 @@ static struct zspage *get_zspage(struct page *page)
+ 
+ static struct page *get_next_page(struct page *page)
+ {
+-	if (unlikely(PageHugeObject(page)))
++	struct zspage *zspage = get_zspage(page);
 +
-+		spin_lock_init(&zh->lock);
-+	}
-+#endif
-+	return (unsigned long)p;
++	if (unlikely(ZsHugePage(zspage)))
+ 		return NULL;
+ 
+ 	return page->freelist;
+@@ -844,6 +842,12 @@ static void obj_to_location(unsigned long obj, struct page **page,
+ 	*obj_idx = (obj & OBJ_INDEX_MASK);
  }
  
-+#ifdef CONFIG_PREEMPT_RT
-+static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle)
++static void obj_to_page(unsigned long obj, struct page **page)
 +{
-+	return (void *)(handle & ~((1 << OBJ_TAG_BITS) - 1));
++	obj >>= OBJ_TAG_BITS;
++	*page = pfn_to_page(obj >> OBJ_INDEX_BITS);
++}
++
+ /**
+  * location_to_obj - get obj value encoded from (<page>, <obj_idx>)
+  * @page: page object resides in zspage
+@@ -865,33 +869,22 @@ static unsigned long handle_to_obj(unsigned long handle)
+ 	return *(unsigned long *)handle;
+ }
+ 
+-static unsigned long obj_to_head(struct page *page, void *obj)
++static bool obj_allocated(struct page *page, void *obj, unsigned long *phandle)
+ {
+-	if (unlikely(PageHugeObject(page))) {
++	unsigned long handle;
++	struct zspage *zspage = get_zspage(page);
++
++	if (unlikely(ZsHugePage(zspage))) {
+ 		VM_BUG_ON_PAGE(!is_first_page(page), page);
+-		return page->index;
++		handle = page->index;
+ 	} else
+-		return *(unsigned long *)obj;
+-}
+-
+-static inline int testpin_tag(unsigned long handle)
+-{
+-	return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
+-}
+-
+-static inline int trypin_tag(unsigned long handle)
+-{
+-	return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
+-}
++		handle = *(unsigned long *)obj;
+ 
+-static void pin_tag(unsigned long handle) __acquires(bitlock)
+-{
+-	bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
+-}
++	if (!(handle & OBJ_ALLOCATED_TAG))
++		return false;
+ 
+-static void unpin_tag(unsigned long handle) __releases(bitlock)
+-{
+-	bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
++	*phandle = handle & ~OBJ_ALLOCATED_TAG;
++	return true;
+ }
+ 
+ static void reset_page(struct page *page)
+@@ -900,7 +893,6 @@ static void reset_page(struct page *page)
+ 	ClearPagePrivate(page);
+ 	set_page_private(page, 0);
+ 	page_mapcount_reset(page);
+-	ClearPageHugeObject(page);
+ 	page->freelist = NULL;
+ }
+ 
+@@ -952,7 +944,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class,
+ 
+ 	cache_free_zspage(pool, zspage);
+ 
+-	zs_stat_dec(class, OBJ_ALLOCATED, class->objs_per_zspage);
++	class_stat_dec(class, OBJ_ALLOCATED, class->objs_per_zspage);
+ 	atomic_long_sub(class->pages_per_zspage,
+ 					&pool->pages_allocated);
+ }
+@@ -963,6 +955,11 @@ static void free_zspage(struct zs_pool *pool, struct size_class *class,
+ 	VM_BUG_ON(get_zspage_inuse(zspage));
+ 	VM_BUG_ON(list_empty(&zspage->list));
+ 
++	/*
++	 * Since zs_free couldn't be sleepable, this function cannot call
++	 * lock_page. The page locks trylock_zspage got will be released
++	 * by __free_zspage.
++	 */
+ 	if (!trylock_zspage(zspage)) {
+ 		kick_deferred_free(pool);
+ 		return;
+@@ -1042,7 +1039,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
+ 			SetPagePrivate(page);
+ 			if (unlikely(class->objs_per_zspage == 1 &&
+ 					class->pages_per_zspage == 1))
+-				SetPageHugeObject(page);
++				SetZsHugePage(zspage);
+ 		} else {
+ 			prev_page->freelist = page;
+ 		}
+@@ -1246,8 +1243,6 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
+ 	unsigned long obj, off;
+ 	unsigned int obj_idx;
+ 
+-	unsigned int class_idx;
+-	enum fullness_group fg;
+ 	struct size_class *class;
+ 	struct mapping_area *area;
+ 	struct page *pages[2];
+@@ -1260,21 +1255,26 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
+ 	 */
+ 	BUG_ON(in_interrupt());
+ 
+-	/* From now on, migration cannot move the object */
+-	pin_tag(handle);
+-
++	/* It guarantees it can get zspage from handle safely */
++	read_lock(&pool->migrate_lock);
+ 	obj = handle_to_obj(handle);
+ 	obj_to_location(obj, &page, &obj_idx);
+ 	zspage = get_zspage(page);
+ 
+-	/* migration cannot move any subpage in this zspage */
++	/*
++	 * migration cannot move any zpages in this zspage. Here, class->lock
++	 * is too heavy since callers would take some time until they calls
++	 * zs_unmap_object API so delegate the locking from class to zspage
++	 * which is smaller granularity.
++	 */
+ 	migrate_read_lock(zspage);
++	read_unlock(&pool->migrate_lock);
+ 
+-	get_zspage_mapping(zspage, &class_idx, &fg);
+-	class = pool->size_class[class_idx];
++	class = zspage_class(pool, zspage);
+ 	off = (class->size * obj_idx) & ~PAGE_MASK;
+ 
+-	area = &get_cpu_var(zs_map_area);
++	local_lock(&zs_map_area.lock);
++	area = this_cpu_ptr(&zs_map_area);
+ 	area->vm_mm = mm;
+ 	if (off + class->size <= PAGE_SIZE) {
+ 		/* this object is contained entirely within a page */
+@@ -1290,7 +1290,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
+ 
+ 	ret = __zs_map_object(area, pages, off, class->size);
+ out:
+-	if (likely(!PageHugeObject(page)))
++	if (likely(!ZsHugePage(zspage)))
+ 		ret += ZS_HANDLE_SIZE;
+ 
+ 	return ret;
+@@ -1304,16 +1304,13 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
+ 	unsigned long obj, off;
+ 	unsigned int obj_idx;
+ 
+-	unsigned int class_idx;
+-	enum fullness_group fg;
+ 	struct size_class *class;
+ 	struct mapping_area *area;
+ 
+ 	obj = handle_to_obj(handle);
+ 	obj_to_location(obj, &page, &obj_idx);
+ 	zspage = get_zspage(page);
+-	get_zspage_mapping(zspage, &class_idx, &fg);
+-	class = pool->size_class[class_idx];
++	class = zspage_class(pool, zspage);
+ 	off = (class->size * obj_idx) & ~PAGE_MASK;
+ 
+ 	area = this_cpu_ptr(&zs_map_area);
+@@ -1328,10 +1325,9 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
+ 
+ 		__zs_unmap_object(area, pages, off, class->size);
+ 	}
+-	put_cpu_var(zs_map_area);
++	local_unlock(&zs_map_area.lock);
+ 
+ 	migrate_read_unlock(zspage);
+-	unpin_tag(handle);
+ }
+ EXPORT_SYMBOL_GPL(zs_unmap_object);
+ 
+@@ -1354,17 +1350,19 @@ size_t zs_huge_class_size(struct zs_pool *pool)
+ }
+ EXPORT_SYMBOL_GPL(zs_huge_class_size);
+ 
+-static unsigned long obj_malloc(struct size_class *class,
++static unsigned long obj_malloc(struct zs_pool *pool,
+ 				struct zspage *zspage, unsigned long handle)
+ {
+ 	int i, nr_page, offset;
+ 	unsigned long obj;
+ 	struct link_free *link;
++	struct size_class *class;
+ 
+ 	struct page *m_page;
+ 	unsigned long m_offset;
+ 	void *vaddr;
+ 
++	class = pool->size_class[zspage->class];
+ 	handle |= OBJ_ALLOCATED_TAG;
+ 	obj = get_freeobj(zspage);
+ 
+@@ -1379,7 +1377,7 @@ static unsigned long obj_malloc(struct size_class *class,
+ 	vaddr = kmap_atomic(m_page);
+ 	link = (struct link_free *)vaddr + m_offset / sizeof(*link);
+ 	set_freeobj(zspage, link->next >> OBJ_TAG_BITS);
+-	if (likely(!PageHugeObject(m_page)))
++	if (likely(!ZsHugePage(zspage)))
+ 		/* record handle in the header of allocated chunk */
+ 		link->handle = handle;
+ 	else
+@@ -1388,7 +1386,6 @@ static unsigned long obj_malloc(struct size_class *class,
+ 
+ 	kunmap_atomic(vaddr);
+ 	mod_zspage_inuse(zspage, 1);
+-	zs_stat_inc(class, OBJ_USED, 1);
+ 
+ 	obj = location_to_obj(m_page, obj);
+ 
+@@ -1424,13 +1421,15 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
+ 	size += ZS_HANDLE_SIZE;
+ 	class = pool->size_class[get_size_class_index(size)];
+ 
++	/* class->lock effectively protects the zpage migration */
+ 	spin_lock(&class->lock);
+ 	zspage = find_get_zspage(class);
+ 	if (likely(zspage)) {
+-		obj = obj_malloc(class, zspage, handle);
++		obj = obj_malloc(pool, zspage, handle);
+ 		/* Now move the zspage to another fullness group, if required */
+ 		fix_fullness_group(class, zspage);
+ 		record_obj(handle, obj);
++		class_stat_inc(class, OBJ_USED, 1);
+ 		spin_unlock(&class->lock);
+ 
+ 		return handle;
+@@ -1445,14 +1444,15 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
+ 	}
+ 
+ 	spin_lock(&class->lock);
+-	obj = obj_malloc(class, zspage, handle);
++	obj = obj_malloc(pool, zspage, handle);
+ 	newfg = get_fullness_group(class, zspage);
+ 	insert_zspage(class, zspage, newfg);
+ 	set_zspage_mapping(zspage, class->index, newfg);
+ 	record_obj(handle, obj);
+ 	atomic_long_add(class->pages_per_zspage,
+ 				&pool->pages_allocated);
+-	zs_stat_inc(class, OBJ_ALLOCATED, class->objs_per_zspage);
++	class_stat_inc(class, OBJ_ALLOCATED, class->objs_per_zspage);
++	class_stat_inc(class, OBJ_USED, 1);
+ 
+ 	/* We completely set up zspage so mark them as movable */
+ 	SetZsPageMovable(pool, zspage);
+@@ -1462,7 +1462,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
+ }
+ EXPORT_SYMBOL_GPL(zs_malloc);
+ 
+-static void obj_free(struct size_class *class, unsigned long obj)
++static void obj_free(int class_size, unsigned long obj)
+ {
+ 	struct link_free *link;
+ 	struct zspage *zspage;
+@@ -1472,18 +1472,20 @@ static void obj_free(struct size_class *class, unsigned long obj)
+ 	void *vaddr;
+ 
+ 	obj_to_location(obj, &f_page, &f_objidx);
+-	f_offset = (class->size * f_objidx) & ~PAGE_MASK;
++	f_offset = (class_size * f_objidx) & ~PAGE_MASK;
+ 	zspage = get_zspage(f_page);
+ 
+ 	vaddr = kmap_atomic(f_page);
+ 
+ 	/* Insert this object in containing zspage's freelist */
+ 	link = (struct link_free *)(vaddr + f_offset);
+-	link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
++	if (likely(!ZsHugePage(zspage)))
++		link->next = get_freeobj(zspage) << OBJ_TAG_BITS;
++	else
++		f_page->index = 0;
+ 	kunmap_atomic(vaddr);
+ 	set_freeobj(zspage, f_objidx);
+ 	mod_zspage_inuse(zspage, -1);
+-	zs_stat_dec(class, OBJ_USED, 1);
+ }
+ 
+ void zs_free(struct zs_pool *pool, unsigned long handle)
+@@ -1491,42 +1493,33 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
+ 	struct zspage *zspage;
+ 	struct page *f_page;
+ 	unsigned long obj;
+-	unsigned int f_objidx;
+-	int class_idx;
+ 	struct size_class *class;
+ 	enum fullness_group fullness;
+-	bool isolated;
+ 
+ 	if (unlikely(!handle))
+ 		return;
+ 
+-	pin_tag(handle);
++	/*
++	 * The pool->migrate_lock protects the race with zpage's migration
++	 * so it's safe to get the page from handle.
++	 */
++	read_lock(&pool->migrate_lock);
+ 	obj = handle_to_obj(handle);
+-	obj_to_location(obj, &f_page, &f_objidx);
++	obj_to_page(obj, &f_page);
+ 	zspage = get_zspage(f_page);
+-
+-	migrate_read_lock(zspage);
+-
+-	get_zspage_mapping(zspage, &class_idx, &fullness);
+-	class = pool->size_class[class_idx];
+-
++	class = zspage_class(pool, zspage);
+ 	spin_lock(&class->lock);
+-	obj_free(class, obj);
++	read_unlock(&pool->migrate_lock);
++
++	obj_free(class->size, obj);
++	class_stat_dec(class, OBJ_USED, 1);
+ 	fullness = fix_fullness_group(class, zspage);
+-	if (fullness != ZS_EMPTY) {
+-		migrate_read_unlock(zspage);
++	if (fullness != ZS_EMPTY)
+ 		goto out;
+-	}
+ 
+-	isolated = is_zspage_isolated(zspage);
+-	migrate_read_unlock(zspage);
+-	/* If zspage is isolated, zs_page_putback will free the zspage */
+-	if (likely(!isolated))
+-		free_zspage(pool, class, zspage);
++	free_zspage(pool, class, zspage);
+ out:
+-
+ 	spin_unlock(&class->lock);
+-	unpin_tag(handle);
+ 	cache_free_handle(pool, handle);
+ }
+ EXPORT_SYMBOL_GPL(zs_free);
+@@ -1601,7 +1594,6 @@ static void zs_object_copy(struct size_class *class, unsigned long dst,
+ static unsigned long find_alloced_obj(struct size_class *class,
+ 					struct page *page, int *obj_idx)
+ {
+-	unsigned long head;
+ 	int offset = 0;
+ 	int index = *obj_idx;
+ 	unsigned long handle = 0;
+@@ -1611,13 +1603,8 @@ static unsigned long find_alloced_obj(struct size_class *class,
+ 	offset += class->size * index;
+ 
+ 	while (offset < PAGE_SIZE) {
+-		head = obj_to_head(page, addr + offset);
+-		if (head & OBJ_ALLOCATED_TAG) {
+-			handle = head & ~OBJ_ALLOCATED_TAG;
+-			if (trypin_tag(handle))
+-				break;
+-			handle = 0;
+-		}
++		if (obj_allocated(page, addr + offset, &handle))
++			break;
+ 
+ 		offset += class->size;
+ 		index++;
+@@ -1663,25 +1650,16 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
+ 
+ 		/* Stop if there is no more space */
+ 		if (zspage_full(class, get_zspage(d_page))) {
+-			unpin_tag(handle);
+ 			ret = -ENOMEM;
+ 			break;
+ 		}
+ 
+ 		used_obj = handle_to_obj(handle);
+-		free_obj = obj_malloc(class, get_zspage(d_page), handle);
++		free_obj = obj_malloc(pool, get_zspage(d_page), handle);
+ 		zs_object_copy(class, free_obj, used_obj);
+ 		obj_idx++;
+-		/*
+-		 * record_obj updates handle's value to free_obj and it will
+-		 * invalidate lock bit(ie, HANDLE_PIN_BIT) of handle, which
+-		 * breaks synchronization using pin_tag(e,g, zs_free) so
+-		 * let's keep the lock bit.
+-		 */
+-		free_obj |= BIT(HANDLE_PIN_BIT);
+ 		record_obj(handle, free_obj);
+-		unpin_tag(handle);
+-		obj_free(class, used_obj);
++		obj_free(class->size, used_obj);
+ 	}
+ 
+ 	/* Remember last position in this iteration */
+@@ -1706,7 +1684,6 @@ static struct zspage *isolate_zspage(struct size_class *class, bool source)
+ 		zspage = list_first_entry_or_null(&class->fullness_list[fg[i]],
+ 							struct zspage, list);
+ 		if (zspage) {
+-			VM_BUG_ON(is_zspage_isolated(zspage));
+ 			remove_zspage(class, zspage, fg[i]);
+ 			return zspage;
+ 		}
+@@ -1727,8 +1704,6 @@ static enum fullness_group putback_zspage(struct size_class *class,
+ {
+ 	enum fullness_group fullness;
+ 
+-	VM_BUG_ON(is_zspage_isolated(zspage));
+-
+ 	fullness = get_fullness_group(class, zspage);
+ 	insert_zspage(class, zspage, fullness);
+ 	set_zspage_mapping(zspage, class->index, fullness);
+@@ -1797,6 +1772,11 @@ static void migrate_write_lock(struct zspage *zspage)
+ 	write_lock(&zspage->lock);
+ }
+ 
++static void migrate_write_lock_nested(struct zspage *zspage)
++{
++	write_lock_nested(&zspage->lock, SINGLE_DEPTH_NESTING);
 +}
-+#endif
 +
- static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
+ static void migrate_write_unlock(struct zspage *zspage)
+ {
+ 	write_unlock(&zspage->lock);
+@@ -1810,35 +1790,10 @@ static void inc_zspage_isolation(struct zspage *zspage)
+ 
+ static void dec_zspage_isolation(struct zspage *zspage)
+ {
++	VM_BUG_ON(zspage->isolated == 0);
+ 	zspage->isolated--;
+ }
+ 
+-static void putback_zspage_deferred(struct zs_pool *pool,
+-				    struct size_class *class,
+-				    struct zspage *zspage)
+-{
+-	enum fullness_group fg;
+-
+-	fg = putback_zspage(class, zspage);
+-	if (fg == ZS_EMPTY)
+-		schedule_work(&pool->free_work);
+-
+-}
+-
+-static inline void zs_pool_dec_isolated(struct zs_pool *pool)
+-{
+-	VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0);
+-	atomic_long_dec(&pool->isolated_pages);
+-	/*
+-	 * Checking pool->destroying must happen after atomic_long_dec()
+-	 * for pool->isolated_pages above. Paired with the smp_mb() in
+-	 * zs_unregister_migration().
+-	 */
+-	smp_mb__after_atomic();
+-	if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying)
+-		wake_up_all(&pool->migration_wait);
+-}
+-
+ static void replace_sub_page(struct size_class *class, struct zspage *zspage,
+ 				struct page *newpage, struct page *oldpage)
+ {
+@@ -1857,19 +1812,14 @@ static void replace_sub_page(struct size_class *class, struct zspage *zspage,
+ 
+ 	create_page_chain(class, zspage, pages);
+ 	set_first_obj_offset(newpage, get_first_obj_offset(oldpage));
+-	if (unlikely(PageHugeObject(oldpage)))
++	if (unlikely(ZsHugePage(zspage)))
+ 		newpage->index = oldpage->index;
+ 	__SetPageMovable(newpage, page_mapping(oldpage));
+ }
+ 
+ static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
  {
- 	kmem_cache_free(pool->handle_cachep, (void *)handle);
-@@ -368,12 +401,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
+-	struct zs_pool *pool;
+-	struct size_class *class;
+-	int class_idx;
+-	enum fullness_group fullness;
+ 	struct zspage *zspage;
+-	struct address_space *mapping;
+ 
+ 	/*
+ 	 * Page is locked so zspage couldn't be destroyed. For detail, look at
+@@ -1879,41 +1829,9 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
+ 	VM_BUG_ON_PAGE(PageIsolated(page), page);
+ 
+ 	zspage = get_zspage(page);
+-
+-	/*
+-	 * Without class lock, fullness could be stale while class_idx is okay
+-	 * because class_idx is constant unless page is freed so we should get
+-	 * fullness again under class lock.
+-	 */
+-	get_zspage_mapping(zspage, &class_idx, &fullness);
+-	mapping = page_mapping(page);
+-	pool = mapping->private_data;
+-	class = pool->size_class[class_idx];
+-
+-	spin_lock(&class->lock);
+-	if (get_zspage_inuse(zspage) == 0) {
+-		spin_unlock(&class->lock);
+-		return false;
+-	}
+-
+-	/* zspage is isolated for object migration */
+-	if (list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
+-		spin_unlock(&class->lock);
+-		return false;
+-	}
+-
+-	/*
+-	 * If this is first time isolation for the zspage, isolate zspage from
+-	 * size_class to prevent further object allocation from the zspage.
+-	 */
+-	if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
+-		get_zspage_mapping(zspage, &class_idx, &fullness);
+-		atomic_long_inc(&pool->isolated_pages);
+-		remove_zspage(class, zspage, fullness);
+-	}
+-
++	migrate_write_lock(zspage);
+ 	inc_zspage_isolation(zspage);
+-	spin_unlock(&class->lock);
++	migrate_write_unlock(zspage);
+ 
+ 	return true;
+ }
+@@ -1923,16 +1841,13 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
+ {
+ 	struct zs_pool *pool;
+ 	struct size_class *class;
+-	int class_idx;
+-	enum fullness_group fullness;
+ 	struct zspage *zspage;
+ 	struct page *dummy;
+ 	void *s_addr, *d_addr, *addr;
+-	int offset, pos;
+-	unsigned long handle, head;
++	int offset;
++	unsigned long handle;
+ 	unsigned long old_obj, new_obj;
+ 	unsigned int obj_idx;
+-	int ret = -EAGAIN;
+ 
+ 	/*
+ 	 * We cannot support the _NO_COPY case here, because copy needs to
+@@ -1945,35 +1860,25 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
+ 	VM_BUG_ON_PAGE(!PageMovable(page), page);
+ 	VM_BUG_ON_PAGE(!PageIsolated(page), page);
  
- static void record_obj(unsigned long handle, unsigned long obj)
- {
-+#ifdef CONFIG_PREEMPT_RT
-+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+-	zspage = get_zspage(page);
+-
+-	/* Concurrent compactor cannot migrate any subpage in zspage */
+-	migrate_write_lock(zspage);
+-	get_zspage_mapping(zspage, &class_idx, &fullness);
+ 	pool = mapping->private_data;
+-	class = pool->size_class[class_idx];
+-	offset = get_first_obj_offset(page);
+ 
++	/*
++	 * The pool migrate_lock protects the race between zpage migration
++	 * and zs_free.
++	 */
++	write_lock(&pool->migrate_lock);
++	zspage = get_zspage(page);
++	class = zspage_class(pool, zspage);
 +
-+	WRITE_ONCE(zh->addr, obj);
-+#else
++	/*
++	 * the class lock protects zpage alloc/free in the zspage.
++	 */
+ 	spin_lock(&class->lock);
+-	if (!get_zspage_inuse(zspage)) {
+-		/*
+-		 * Set "offset" to end of the page so that every loops
+-		 * skips unnecessary object scanning.
+-		 */
+-		offset = PAGE_SIZE;
+-	}
++	/* the migrate_write_lock protects zpage access via zs_map_object */
++	migrate_write_lock(zspage);
+ 
+-	pos = offset;
++	offset = get_first_obj_offset(page);
+ 	s_addr = kmap_atomic(page);
+-	while (pos < PAGE_SIZE) {
+-		head = obj_to_head(page, s_addr + pos);
+-		if (head & OBJ_ALLOCATED_TAG) {
+-			handle = head & ~OBJ_ALLOCATED_TAG;
+-			if (!trypin_tag(handle))
+-				goto unpin_objects;
+-		}
+-		pos += class->size;
+-	}
+ 
  	/*
- 	 * lsb of @obj represents handle lock while other bits
- 	 * represent object value the handle is pointing so
- 	 * updating shouldn't do store tearing.
- 	 */
- 	WRITE_ONCE(*(unsigned long *)handle, obj);
-+#endif
- }
+ 	 * Here, any user cannot access all objects in the zspage so let's move.
+@@ -1982,42 +1887,30 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
+ 	memcpy(d_addr, s_addr, PAGE_SIZE);
+ 	kunmap_atomic(d_addr);
+ 
+-	for (addr = s_addr + offset; addr < s_addr + pos;
++	for (addr = s_addr + offset; addr < s_addr + PAGE_SIZE;
+ 					addr += class->size) {
+-		head = obj_to_head(page, addr);
+-		if (head & OBJ_ALLOCATED_TAG) {
+-			handle = head & ~OBJ_ALLOCATED_TAG;
+-			BUG_ON(!testpin_tag(handle));
++		if (obj_allocated(page, addr, &handle)) {
+ 
+ 			old_obj = handle_to_obj(handle);
+ 			obj_to_location(old_obj, &dummy, &obj_idx);
+ 			new_obj = (unsigned long)location_to_obj(newpage,
+ 								obj_idx);
+-			new_obj |= BIT(HANDLE_PIN_BIT);
+ 			record_obj(handle, new_obj);
+ 		}
+ 	}
++	kunmap_atomic(s_addr);
  
- /* zpool driver */
-@@ -455,7 +494,9 @@ MODULE_ALIAS("zpool-zsmalloc");
- #endif /* CONFIG_ZPOOL */
+ 	replace_sub_page(class, zspage, newpage, page);
+-	get_page(newpage);
+-
+-	dec_zspage_isolation(zspage);
+-
+ 	/*
+-	 * Page migration is done so let's putback isolated zspage to
+-	 * the list if @page is final isolated subpage in the zspage.
++	 * Since we complete the data copy and set up new zspage structure,
++	 * it's okay to release migration_lock.
+ 	 */
+-	if (!is_zspage_isolated(zspage)) {
+-		/*
+-		 * We cannot race with zs_destroy_pool() here because we wait
+-		 * for isolation to hit zero before we start destroying.
+-		 * Also, we ensure that everyone can see pool->destroying before
+-		 * we start waiting.
+-		 */
+-		putback_zspage_deferred(pool, class, zspage);
+-		zs_pool_dec_isolated(pool);
+-	}
++	write_unlock(&pool->migrate_lock);
++	spin_unlock(&class->lock);
++	dec_zspage_isolation(zspage);
++	migrate_write_unlock(zspage);
+ 
++	get_page(newpage);
+ 	if (page_zone(newpage) != page_zone(page)) {
+ 		dec_zone_page_state(page, NR_ZSPAGES);
+ 		inc_zone_page_state(newpage, NR_ZSPAGES);
+@@ -2025,55 +1918,21 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
+ 
+ 	reset_page(page);
+ 	put_page(page);
+-	page = newpage;
+-
+-	ret = MIGRATEPAGE_SUCCESS;
+-unpin_objects:
+-	for (addr = s_addr + offset; addr < s_addr + pos;
+-						addr += class->size) {
+-		head = obj_to_head(page, addr);
+-		if (head & OBJ_ALLOCATED_TAG) {
+-			handle = head & ~OBJ_ALLOCATED_TAG;
+-			BUG_ON(!testpin_tag(handle));
+-			unpin_tag(handle);
+-		}
+-	}
+-	kunmap_atomic(s_addr);
+-	spin_unlock(&class->lock);
+-	migrate_write_unlock(zspage);
  
- /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
--static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
-+static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = {
-+	.lock	= INIT_LOCAL_LOCK(lock),
-+};
+-	return ret;
++	return MIGRATEPAGE_SUCCESS;
+ }
  
- static bool is_zspage_isolated(struct zspage *zspage)
+ static void zs_page_putback(struct page *page)
  {
-@@ -862,7 +903,13 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx)
+-	struct zs_pool *pool;
+-	struct size_class *class;
+-	int class_idx;
+-	enum fullness_group fg;
+-	struct address_space *mapping;
+ 	struct zspage *zspage;
  
- static unsigned long handle_to_obj(unsigned long handle)
- {
-+#ifdef CONFIG_PREEMPT_RT
-+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
-+
-+	return zh->addr;
-+#else
- 	return *(unsigned long *)handle;
-+#endif
- }
+ 	VM_BUG_ON_PAGE(!PageMovable(page), page);
+ 	VM_BUG_ON_PAGE(!PageIsolated(page), page);
  
- static unsigned long obj_to_head(struct page *page, void *obj)
-@@ -876,22 +923,46 @@ static unsigned long obj_to_head(struct page *page, void *obj)
+ 	zspage = get_zspage(page);
+-	get_zspage_mapping(zspage, &class_idx, &fg);
+-	mapping = page_mapping(page);
+-	pool = mapping->private_data;
+-	class = pool->size_class[class_idx];
+-
+-	spin_lock(&class->lock);
++	migrate_write_lock(zspage);
+ 	dec_zspage_isolation(zspage);
+-	if (!is_zspage_isolated(zspage)) {
+-		/*
+-		 * Due to page_lock, we cannot free zspage immediately
+-		 * so let's defer.
+-		 */
+-		putback_zspage_deferred(pool, class, zspage);
+-		zs_pool_dec_isolated(pool);
+-	}
+-	spin_unlock(&class->lock);
++	migrate_write_unlock(zspage);
+ }
  
- static inline int testpin_tag(unsigned long handle)
- {
-+#ifdef CONFIG_PREEMPT_RT
-+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
-+
-+	return spin_is_locked(&zh->lock);
-+#else
- 	return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
-+#endif
+ static const struct address_space_operations zsmalloc_aops = {
+@@ -2095,36 +1954,8 @@ static int zs_register_migration(struct zs_pool *pool)
+ 	return 0;
  }
  
- static inline int trypin_tag(unsigned long handle)
+-static bool pool_isolated_are_drained(struct zs_pool *pool)
+-{
+-	return atomic_long_read(&pool->isolated_pages) == 0;
+-}
+-
+-/* Function for resolving migration */
+-static void wait_for_isolated_drain(struct zs_pool *pool)
+-{
+-
+-	/*
+-	 * We're in the process of destroying the pool, so there are no
+-	 * active allocations. zs_page_isolate() fails for completely free
+-	 * zspages, so we need only wait for the zs_pool's isolated
+-	 * count to hit zero.
+-	 */
+-	wait_event(pool->migration_wait,
+-		   pool_isolated_are_drained(pool));
+-}
+-
+ static void zs_unregister_migration(struct zs_pool *pool)
  {
-+#ifdef CONFIG_PREEMPT_RT
-+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
-+
-+	return spin_trylock(&zh->lock);
-+#else
- 	return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
-+#endif
+-	pool->destroying = true;
+-	/*
+-	 * We need a memory barrier here to ensure global visibility of
+-	 * pool->destroying. Thus pool->isolated pages will either be 0 in which
+-	 * case we don't care, or it will be > 0 and pool->destroying will
+-	 * ensure that we wake up once isolation hits 0.
+-	 */
+-	smp_mb();
+-	wait_for_isolated_drain(pool); /* This can block */
+ 	flush_work(&pool->free_work);
+ 	iput(pool->inode);
  }
+@@ -2154,7 +1985,6 @@ static void async_free_zspage(struct work_struct *work)
+ 		spin_unlock(&class->lock);
+ 	}
  
- static void pin_tag(unsigned long handle) __acquires(bitlock)
- {
-+#ifdef CONFIG_PREEMPT_RT
-+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+-
+ 	list_for_each_entry_safe(zspage, tmp, &free_pages, list) {
+ 		list_del(&zspage->list);
+ 		lock_zspage(zspage);
+@@ -2218,8 +2048,13 @@ static unsigned long __zs_compact(struct zs_pool *pool,
+ 	struct zspage *dst_zspage = NULL;
+ 	unsigned long pages_freed = 0;
+ 
++	/* protect the race between zpage migration and zs_free */
++	write_lock(&pool->migrate_lock);
++	/* protect zpage allocation/free */
+ 	spin_lock(&class->lock);
+ 	while ((src_zspage = isolate_zspage(class, true))) {
++		/* protect someone accessing the zspage(i.e., zs_map_object) */
++		migrate_write_lock(src_zspage);
+ 
+ 		if (!zs_can_compact(class))
+ 			break;
+@@ -2228,6 +2063,8 @@ static unsigned long __zs_compact(struct zs_pool *pool,
+ 		cc.s_page = get_first_page(src_zspage);
+ 
+ 		while ((dst_zspage = isolate_zspage(class, false))) {
++			migrate_write_lock_nested(dst_zspage);
 +
-+	return spin_lock(&zh->lock);
-+#else
- 	bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
-+#endif
- }
+ 			cc.d_page = get_first_page(dst_zspage);
+ 			/*
+ 			 * If there is no more space in dst_page, resched
+@@ -2237,6 +2074,10 @@ static unsigned long __zs_compact(struct zs_pool *pool,
+ 				break;
+ 
+ 			putback_zspage(class, dst_zspage);
++			migrate_write_unlock(dst_zspage);
++			dst_zspage = NULL;
++			if (rwlock_is_contended(&pool->migrate_lock))
++				break;
+ 		}
  
- static void unpin_tag(unsigned long handle) __releases(bitlock)
- {
-+#ifdef CONFIG_PREEMPT_RT
-+	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
+ 		/* Stop if we couldn't find slot */
+@@ -2244,19 +2085,28 @@ static unsigned long __zs_compact(struct zs_pool *pool,
+ 			break;
+ 
+ 		putback_zspage(class, dst_zspage);
++		migrate_write_unlock(dst_zspage);
 +
-+	return spin_unlock(&zh->lock);
-+#else
- 	bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
-+#endif
- }
+ 		if (putback_zspage(class, src_zspage) == ZS_EMPTY) {
++			migrate_write_unlock(src_zspage);
+ 			free_zspage(pool, class, src_zspage);
+ 			pages_freed += class->pages_per_zspage;
+-		}
++		} else
++			migrate_write_unlock(src_zspage);
+ 		spin_unlock(&class->lock);
++		write_unlock(&pool->migrate_lock);
+ 		cond_resched();
++		write_lock(&pool->migrate_lock);
+ 		spin_lock(&class->lock);
+ 	}
  
- static void reset_page(struct page *page)
-@@ -1274,7 +1345,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
- 	class = pool->size_class[class_idx];
- 	off = (class->size * obj_idx) & ~PAGE_MASK;
+-	if (src_zspage)
++	if (src_zspage) {
+ 		putback_zspage(class, src_zspage);
++		migrate_write_unlock(src_zspage);
++	}
  
--	area = &get_cpu_var(zs_map_area);
-+	local_lock(&zs_map_area.lock);
-+	area = this_cpu_ptr(&zs_map_area);
- 	area->vm_mm = mm;
- 	if (off + class->size <= PAGE_SIZE) {
- 		/* this object is contained entirely within a page */
-@@ -1328,7 +1400,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
+ 	spin_unlock(&class->lock);
++	write_unlock(&pool->migrate_lock);
  
- 		__zs_unmap_object(area, pages, off, class->size);
- 	}
--	put_cpu_var(zs_map_area);
-+	local_unlock(&zs_map_area.lock);
+ 	return pages_freed;
+ }
+@@ -2362,15 +2212,12 @@ struct zs_pool *zs_create_pool(const char *name)
+ 		return NULL;
  
- 	migrate_read_unlock(zspage);
- 	unpin_tag(handle);
-diff --git a/net/Kconfig b/net/Kconfig
-index fb13460c6dab..074472dfa94a 100644
---- a/net/Kconfig
-+++ b/net/Kconfig
-@@ -294,7 +294,7 @@ config CGROUP_NET_CLASSID
- 
- config NET_RX_BUSY_POLL
- 	bool
--	default y
-+	default y if !PREEMPT_RT
+ 	init_deferred_free(pool);
++	rwlock_init(&pool->migrate_lock);
+ 
+ 	pool->name = kstrdup(name, GFP_KERNEL);
+ 	if (!pool->name)
+ 		goto err;
+ 
+-#ifdef CONFIG_COMPACTION
+-	init_waitqueue_head(&pool->migration_wait);
+-#endif
+-
+ 	if (create_cache(pool))
+ 		goto err;
  
- config BQL
- 	bool
 diff --git a/net/core/dev.c b/net/core/dev.c
-index 91f53eeb0e79..f39077436e9d 100644
+index c4708e2487fb..daed8ce42db1 100644
 --- a/net/core/dev.c
 +++ b/net/core/dev.c
-@@ -225,14 +225,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
- static inline void rps_lock(struct softnet_data *sd)
+@@ -222,18 +222,38 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
+ 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
+ }
+ 
+-static inline void rps_lock(struct softnet_data *sd)
++static inline void rps_lock_irqsave(struct softnet_data *sd,
++				    unsigned long *flags)
  {
- #ifdef CONFIG_RPS
+-#ifdef CONFIG_RPS
 -	spin_lock(&sd->input_pkt_queue.lock);
-+	raw_spin_lock(&sd->input_pkt_queue.raw_lock);
- #endif
+-#endif
++	if (IS_ENABLED(CONFIG_RPS))
++		spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags);
++	else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		local_irq_save(*flags);
  }
  
- static inline void rps_unlock(struct softnet_data *sd)
+-static inline void rps_unlock(struct softnet_data *sd)
++static inline void rps_lock_irq_disable(struct softnet_data *sd)
  {
- #ifdef CONFIG_RPS
+-#ifdef CONFIG_RPS
 -	spin_unlock(&sd->input_pkt_queue.lock);
-+	raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
- #endif
+-#endif
++	if (IS_ENABLED(CONFIG_RPS))
++		spin_lock_irq(&sd->input_pkt_queue.lock);
++	else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		local_irq_disable();
++}
++
++static inline void rps_unlock_irq_restore(struct softnet_data *sd,
++					  unsigned long *flags)
++{
++	if (IS_ENABLED(CONFIG_RPS))
++		spin_unlock_irqrestore(&sd->input_pkt_queue.lock, *flags);
++	else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		local_irq_restore(*flags);
++}
++
++static inline void rps_unlock_irq_enable(struct softnet_data *sd)
++{
++	if (IS_ENABLED(CONFIG_RPS))
++		spin_unlock_irq(&sd->input_pkt_queue.lock);
++	else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		local_irq_enable();
+ }
+ 
+ static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
+@@ -371,12 +391,12 @@ static void list_netdevice(struct net_device *dev)
+ 
+ 	ASSERT_RTNL();
+ 
+-	write_lock_bh(&dev_base_lock);
++	write_lock(&dev_base_lock);
+ 	list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
+ 	netdev_name_node_add(net, dev->name_node);
+ 	hlist_add_head_rcu(&dev->index_hlist,
+ 			   dev_index_hash(net, dev->ifindex));
+-	write_unlock_bh(&dev_base_lock);
++	write_unlock(&dev_base_lock);
+ 
+ 	dev_base_seq_inc(net);
  }
+@@ -389,11 +409,11 @@ static void unlist_netdevice(struct net_device *dev)
+ 	ASSERT_RTNL();
  
-@@ -3042,6 +3042,7 @@ static void __netif_reschedule(struct Qdisc *q)
+ 	/* Unlink dev from the device chain */
+-	write_lock_bh(&dev_base_lock);
++	write_lock(&dev_base_lock);
+ 	list_del_rcu(&dev->dev_list);
+ 	netdev_name_node_del(dev->name_node);
+ 	hlist_del_rcu(&dev->index_hlist);
+-	write_unlock_bh(&dev_base_lock);
++	write_unlock(&dev_base_lock);
+ 
+ 	dev_base_seq_inc(dev_net(dev));
+ }
+@@ -1272,15 +1292,15 @@ int dev_change_name(struct net_device *dev, const char *newname)
+ 
+ 	netdev_adjacent_rename_links(dev, oldname);
+ 
+-	write_lock_bh(&dev_base_lock);
++	write_lock(&dev_base_lock);
+ 	netdev_name_node_del(dev->name_node);
+-	write_unlock_bh(&dev_base_lock);
++	write_unlock(&dev_base_lock);
+ 
+ 	synchronize_rcu();
+ 
+-	write_lock_bh(&dev_base_lock);
++	write_lock(&dev_base_lock);
+ 	netdev_name_node_add(net, dev->name_node);
+-	write_unlock_bh(&dev_base_lock);
++	write_unlock(&dev_base_lock);
+ 
+ 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
+ 	ret = notifier_to_errno(ret);
+@@ -3048,6 +3068,7 @@ static void __netif_reschedule(struct Qdisc *q)
  	sd->output_queue_tailp = &q->next_sched;
  	raise_softirq_irqoff(NET_TX_SOFTIRQ);
  	local_irq_restore(flags);
@@ -9643,7 +11083,7 @@ index 91f53eeb0e79..f39077436e9d 100644
  }
  
  void __netif_schedule(struct Qdisc *q)
-@@ -3104,6 +3105,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
+@@ -3110,6 +3131,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
  	__this_cpu_write(softnet_data.completion_queue, skb);
  	raise_softirq_irqoff(NET_TX_SOFTIRQ);
  	local_irq_restore(flags);
@@ -9651,41 +11091,80 @@ index 91f53eeb0e79..f39077436e9d 100644
  }
  EXPORT_SYMBOL(__dev_kfree_skb_irq);
  
-@@ -3831,7 +3833,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
+@@ -3836,8 +3858,12 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
+ 	 * separate lock before trying to get qdisc main lock.
  	 * This permits qdisc->running owner to get the lock more
  	 * often and dequeue packets faster.
++	 * On PREEMPT_RT it is possible to preempt the qdisc owner during xmit
++	 * and then other tasks will only enqueue packets. The packets will be
++	 * sent after the qdisc owner is scheduled again. To prevent this
++	 * scenario the task always serialize on the lock.
  	 */
-+#ifdef CONFIG_PREEMPT_RT
-+	contended = true;
-+#else
- 	contended = qdisc_is_running(q);
-+#endif
+-	contended = qdisc_is_running(q);
++	contended = IS_ENABLED(CONFIG_PREEMPT_RT) || qdisc_is_running(q);
  	if (unlikely(contended))
  		spin_lock(&q->busylock);
  
-@@ -4656,6 +4662,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
- 	rps_unlock(sd);
+@@ -4640,9 +4666,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
  
- 	local_irq_restore(flags);
-+	preempt_check_resched_rt();
+ 	sd = &per_cpu(softnet_data, cpu);
+ 
+-	local_irq_save(flags);
+-
+-	rps_lock(sd);
++	rps_lock_irqsave(sd, &flags);
+ 	if (!netif_running(skb->dev))
+ 		goto drop;
+ 	qlen = skb_queue_len(&sd->input_pkt_queue);
+@@ -4651,26 +4675,30 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
+ enqueue:
+ 			__skb_queue_tail(&sd->input_pkt_queue, skb);
+ 			input_queue_tail_incr_save(sd, qtail);
+-			rps_unlock(sd);
+-			local_irq_restore(flags);
++			rps_unlock_irq_restore(sd, &flags);
+ 			return NET_RX_SUCCESS;
+ 		}
+ 
+ 		/* Schedule NAPI for backlog device
+ 		 * We can use non atomic operation since we own the queue lock
++		 * PREEMPT_RT needs to disable interrupts here for
++		 * synchronisation needed in napi_schedule.
+ 		 */
++		if (IS_ENABLED(CONFIG_PREEMPT_RT))
++			local_irq_disable();
++
+ 		if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
+ 			if (!rps_ipi_queued(sd))
+ 				____napi_schedule(sd, &sd->backlog);
+ 		}
++		if (IS_ENABLED(CONFIG_PREEMPT_RT))
++			local_irq_enable();
+ 		goto enqueue;
+ 	}
+ 
+ drop:
+ 	sd->dropped++;
+-	rps_unlock(sd);
+-
+-	local_irq_restore(flags);
++	rps_unlock_irq_restore(sd, &flags);
  
  	atomic_long_inc(&skb->dev->rx_dropped);
  	kfree_skb(skb);
-@@ -4896,7 +4903,7 @@ static int netif_rx_internal(struct sk_buff *skb)
+@@ -4911,7 +4939,6 @@ static int netif_rx_internal(struct sk_buff *skb)
  		struct rps_dev_flow voidflow, *rflow = &voidflow;
  		int cpu;
  
 -		preempt_disable();
-+		migrate_disable();
  		rcu_read_lock();
  
  		cpu = get_rps_cpu(skb->dev, skb, &rflow);
-@@ -4906,14 +4913,14 @@ static int netif_rx_internal(struct sk_buff *skb)
+@@ -4921,14 +4948,12 @@ static int netif_rx_internal(struct sk_buff *skb)
  		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
  
  		rcu_read_unlock();
 -		preempt_enable();
-+		migrate_enable();
  	} else
  #endif
  	{
@@ -9693,26 +11172,100 @@ index 91f53eeb0e79..f39077436e9d 100644
  
 -		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
 -		put_cpu();
-+		ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
-+		put_cpu_light();
++		ret = enqueue_to_backlog(skb, smp_processor_id(), &qtail);
  	}
  	return ret;
  }
-@@ -4952,11 +4959,9 @@ int netif_rx_ni(struct sk_buff *skb)
+@@ -4952,47 +4977,17 @@ int netif_rx(struct sk_buff *skb)
+ {
+ 	int ret;
+ 
++	local_bh_disable();
+ 	trace_netif_rx_entry(skb);
  
- 	trace_netif_rx_ni_entry(skb);
+ 	ret = netif_rx_internal(skb);
+ 	trace_netif_rx_exit(ret);
++	local_bh_enable();
+ 
+ 	return ret;
+ }
+ EXPORT_SYMBOL(netif_rx);
  
+-int netif_rx_ni(struct sk_buff *skb)
+-{
+-	int err;
+-
+-	trace_netif_rx_ni_entry(skb);
+-
 -	preempt_disable();
-+	local_bh_disable();
- 	err = netif_rx_internal(skb);
+-	err = netif_rx_internal(skb);
 -	if (local_softirq_pending())
 -		do_softirq();
 -	preempt_enable();
-+	local_bh_enable();
- 	trace_netif_rx_ni_exit(err);
+-	trace_netif_rx_ni_exit(err);
+-
+-	return err;
+-}
+-EXPORT_SYMBOL(netif_rx_ni);
+-
+-int netif_rx_any_context(struct sk_buff *skb)
+-{
+-	/*
+-	 * If invoked from contexts which do not invoke bottom half
+-	 * processing either at return from interrupt or when softrqs are
+-	 * reenabled, use netif_rx_ni() which invokes bottomhalf processing
+-	 * directly.
+-	 */
+-	if (in_interrupt())
+-		return netif_rx(skb);
+-	else
+-		return netif_rx_ni(skb);
+-}
+-EXPORT_SYMBOL(netif_rx_any_context);
+-
+ static __latent_entropy void net_tx_action(struct softirq_action *h)
+ {
+ 	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
+@@ -5764,8 +5759,7 @@ static void flush_backlog(struct work_struct *work)
+ 	local_bh_disable();
+ 	sd = this_cpu_ptr(&softnet_data);
  
- 	return err;
-@@ -6399,12 +6404,14 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
+-	local_irq_disable();
+-	rps_lock(sd);
++	rps_lock_irq_disable(sd);
+ 	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
+ 		if (skb->dev->reg_state == NETREG_UNREGISTERING) {
+ 			__skb_unlink(skb, &sd->input_pkt_queue);
+@@ -5773,8 +5767,7 @@ static void flush_backlog(struct work_struct *work)
+ 			input_queue_head_incr(sd);
+ 		}
+ 	}
+-	rps_unlock(sd);
+-	local_irq_enable();
++	rps_unlock_irq_enable(sd);
+ 
+ 	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
+ 		if (skb->dev->reg_state == NETREG_UNREGISTERING) {
+@@ -5792,16 +5785,14 @@ static bool flush_required(int cpu)
+ 	struct softnet_data *sd = &per_cpu(softnet_data, cpu);
+ 	bool do_flush;
+ 
+-	local_irq_disable();
+-	rps_lock(sd);
++	rps_lock_irq_disable(sd);
+ 
+ 	/* as insertion into process_queue happens with the rps lock held,
+ 	 * process_queue access may race only with dequeue
+ 	 */
+ 	do_flush = !skb_queue_empty(&sd->input_pkt_queue) ||
+ 		   !skb_queue_empty_lockless(&sd->process_queue);
+-	rps_unlock(sd);
+-	local_irq_enable();
++	rps_unlock_irq_enable(sd);
+ 
+ 	return do_flush;
+ #endif
+@@ -6415,12 +6406,14 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
  		sd->rps_ipi_list = NULL;
  
  		local_irq_enable();
@@ -9720,493 +11273,124 @@ index 91f53eeb0e79..f39077436e9d 100644
  
  		/* Send pending IPI's to kick RPS processing on remote cpus. */
  		net_rps_send_ipi(remsd);
- 	} else
- #endif
- 		local_irq_enable();
-+	preempt_check_resched_rt();
- }
- 
- static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
-@@ -6482,6 +6489,7 @@ void __napi_schedule(struct napi_struct *n)
- 	local_irq_save(flags);
- 	____napi_schedule(this_cpu_ptr(&softnet_data), n);
- 	local_irq_restore(flags);
-+	preempt_check_resched_rt();
- }
- EXPORT_SYMBOL(__napi_schedule);
- 
-@@ -11304,6 +11312,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
- 
- 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
- 	local_irq_enable();
-+	preempt_check_resched_rt();
- 
- #ifdef CONFIG_RPS
- 	remsd = oldsd->rps_ipi_list;
-@@ -11317,7 +11326,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
- 		netif_rx_ni(skb);
- 		input_queue_head_incr(oldsd);
- 	}
--	while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
-+	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
- 		netif_rx_ni(skb);
- 		input_queue_head_incr(oldsd);
- 	}
-@@ -11633,7 +11642,7 @@ static int __init net_dev_init(void)
- 
- 		INIT_WORK(flush, flush_backlog);
- 
--		skb_queue_head_init(&sd->input_pkt_queue);
-+		skb_queue_head_init_raw(&sd->input_pkt_queue);
- 		skb_queue_head_init(&sd->process_queue);
- #ifdef CONFIG_XFRM_OFFLOAD
- 		skb_queue_head_init(&sd->xfrm_backlog);
-diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
-index 8e582e29a41e..4fcbdd71c59f 100644
---- a/net/core/gen_estimator.c
-+++ b/net/core/gen_estimator.c
-@@ -40,10 +40,10 @@
-  */
- 
- struct net_rate_estimator {
--	struct gnet_stats_basic_packed	*bstats;
-+	struct gnet_stats_basic_sync	*bstats;
- 	spinlock_t		*stats_lock;
--	seqcount_t		*running;
--	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
-+	bool			running;
-+	struct gnet_stats_basic_sync __percpu *cpu_bstats;
- 	u8			ewma_log;
- 	u8			intvl_log; /* period : (250ms << intvl_log) */
- 
-@@ -60,13 +60,13 @@ struct net_rate_estimator {
- };
- 
- static void est_fetch_counters(struct net_rate_estimator *e,
--			       struct gnet_stats_basic_packed *b)
-+			       struct gnet_stats_basic_sync *b)
- {
--	memset(b, 0, sizeof(*b));
-+	gnet_stats_basic_sync_init(b);
- 	if (e->stats_lock)
- 		spin_lock(e->stats_lock);
- 
--	__gnet_stats_copy_basic(e->running, b, e->cpu_bstats, e->bstats);
-+	gnet_stats_add_basic(b, e->cpu_bstats, e->bstats, e->running);
- 
- 	if (e->stats_lock)
- 		spin_unlock(e->stats_lock);
-@@ -76,14 +76,18 @@ static void est_fetch_counters(struct net_rate_estimator *e,
- static void est_timer(struct timer_list *t)
- {
- 	struct net_rate_estimator *est = from_timer(est, t, timer);
--	struct gnet_stats_basic_packed b;
-+	struct gnet_stats_basic_sync b;
-+	u64 b_bytes, b_packets;
- 	u64 rate, brate;
- 
- 	est_fetch_counters(est, &b);
--	brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log);
-+	b_bytes = u64_stats_read(&b.bytes);
-+	b_packets = u64_stats_read(&b.packets);
-+
-+	brate = (b_bytes - est->last_bytes) << (10 - est->intvl_log);
- 	brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log);
- 
--	rate = (b.packets - est->last_packets) << (10 - est->intvl_log);
-+	rate = (b_packets - est->last_packets) << (10 - est->intvl_log);
- 	rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);
- 
- 	write_seqcount_begin(&est->seq);
-@@ -91,8 +95,8 @@ static void est_timer(struct timer_list *t)
- 	est->avpps += rate;
- 	write_seqcount_end(&est->seq);
- 
--	est->last_bytes = b.bytes;
--	est->last_packets = b.packets;
-+	est->last_bytes = b_bytes;
-+	est->last_packets = b_packets;
- 
- 	est->next_jiffies += ((HZ/4) << est->intvl_log);
- 
-@@ -109,7 +113,9 @@ static void est_timer(struct timer_list *t)
-  * @cpu_bstats: bstats per cpu
-  * @rate_est: rate estimator statistics
-  * @lock: lock for statistics and control path
-- * @running: qdisc running seqcount
-+ * @running: true if @bstats represents a running qdisc, thus @bstats'
-+ *           internal values might change during basic reads. Only used
-+ *           if @bstats_cpu is NULL
-  * @opt: rate estimator configuration TLV
-  *
-  * Creates a new rate estimator with &bstats as source and &rate_est
-@@ -121,16 +127,16 @@ static void est_timer(struct timer_list *t)
-  * Returns 0 on success or a negative error code.
-  *
-  */
--int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
--		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
-+int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
-+		      struct gnet_stats_basic_sync __percpu *cpu_bstats,
- 		      struct net_rate_estimator __rcu **rate_est,
- 		      spinlock_t *lock,
--		      seqcount_t *running,
-+		      bool running,
- 		      struct nlattr *opt)
- {
- 	struct gnet_estimator *parm = nla_data(opt);
- 	struct net_rate_estimator *old, *est;
--	struct gnet_stats_basic_packed b;
-+	struct gnet_stats_basic_sync b;
- 	int intvl_log;
- 
- 	if (nla_len(opt) < sizeof(*parm))
-@@ -164,8 +170,8 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
- 	est_fetch_counters(est, &b);
- 	if (lock)
- 		local_bh_enable();
--	est->last_bytes = b.bytes;
--	est->last_packets = b.packets;
-+	est->last_bytes = u64_stats_read(&b.bytes);
-+	est->last_packets = u64_stats_read(&b.packets);
- 
- 	if (lock)
- 		spin_lock_bh(lock);
-@@ -214,7 +220,9 @@ EXPORT_SYMBOL(gen_kill_estimator);
-  * @cpu_bstats: bstats per cpu
-  * @rate_est: rate estimator statistics
-  * @lock: lock for statistics and control path
-- * @running: qdisc running seqcount (might be NULL)
-+ * @running: true if @bstats represents a running qdisc, thus @bstats'
-+ *           internal values might change during basic reads. Only used
-+ *           if @cpu_bstats is NULL
-  * @opt: rate estimator configuration TLV
-  *
-  * Replaces the configuration of a rate estimator by calling
-@@ -222,11 +230,11 @@ EXPORT_SYMBOL(gen_kill_estimator);
-  *
-  * Returns 0 on success or a negative error code.
-  */
--int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
--			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
-+int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
-+			  struct gnet_stats_basic_sync __percpu *cpu_bstats,
- 			  struct net_rate_estimator __rcu **rate_est,
- 			  spinlock_t *lock,
--			  seqcount_t *running, struct nlattr *opt)
-+			  bool running, struct nlattr *opt)
- {
- 	return gen_new_estimator(bstats, cpu_bstats, rate_est,
- 				 lock, running, opt);
-diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
-index e491b083b348..a10335b4ba2d 100644
---- a/net/core/gen_stats.c
-+++ b/net/core/gen_stats.c
-@@ -18,7 +18,7 @@
- #include <linux/gen_stats.h>
- #include <net/netlink.h>
- #include <net/gen_stats.h>
--
-+#include <net/sch_generic.h>
- 
- static inline int
- gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr)
-@@ -114,63 +114,112 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
- }
- EXPORT_SYMBOL(gnet_stats_start_copy);
- 
--static void
--__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
--			    struct gnet_stats_basic_cpu __percpu *cpu)
-+/* Must not be inlined, due to u64_stats seqcount_t lockdep key */
-+void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b)
- {
-+	u64_stats_set(&b->bytes, 0);
-+	u64_stats_set(&b->packets, 0);
-+	u64_stats_init(&b->syncp);
-+}
-+EXPORT_SYMBOL(gnet_stats_basic_sync_init);
-+
-+static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats,
-+				     struct gnet_stats_basic_sync __percpu *cpu)
-+{
-+	u64 t_bytes = 0, t_packets = 0;
- 	int i;
- 
- 	for_each_possible_cpu(i) {
--		struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i);
-+		struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i);
- 		unsigned int start;
- 		u64 bytes, packets;
- 
- 		do {
- 			start = u64_stats_fetch_begin_irq(&bcpu->syncp);
--			bytes = bcpu->bstats.bytes;
--			packets = bcpu->bstats.packets;
-+			bytes = u64_stats_read(&bcpu->bytes);
-+			packets = u64_stats_read(&bcpu->packets);
- 		} while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
- 
--		bstats->bytes += bytes;
--		bstats->packets += packets;
-+		t_bytes += bytes;
-+		t_packets += packets;
-+	}
-+	_bstats_update(bstats, t_bytes, t_packets);
-+}
-+
-+void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
-+			  struct gnet_stats_basic_sync __percpu *cpu,
-+			  struct gnet_stats_basic_sync *b, bool running)
-+{
-+	unsigned int start;
-+	u64 bytes = 0;
-+	u64 packets = 0;
-+
-+	WARN_ON_ONCE((cpu || running) && in_hardirq());
-+
-+	if (cpu) {
-+		gnet_stats_add_basic_cpu(bstats, cpu);
-+		return;
- 	}
-+	do {
-+		if (running)
-+			start = u64_stats_fetch_begin_irq(&b->syncp);
-+		bytes = u64_stats_read(&b->bytes);
-+		packets = u64_stats_read(&b->packets);
-+	} while (running && u64_stats_fetch_retry_irq(&b->syncp, start));
-+
-+	_bstats_update(bstats, bytes, packets);
- }
-+EXPORT_SYMBOL(gnet_stats_add_basic);
- 
--void
--__gnet_stats_copy_basic(const seqcount_t *running,
--			struct gnet_stats_basic_packed *bstats,
--			struct gnet_stats_basic_cpu __percpu *cpu,
--			struct gnet_stats_basic_packed *b)
-+static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets,
-+				  struct gnet_stats_basic_sync __percpu *cpu,
-+				  struct gnet_stats_basic_sync *b, bool running)
- {
--	unsigned int seq;
-+	unsigned int start;
- 
- 	if (cpu) {
--		__gnet_stats_copy_basic_cpu(bstats, cpu);
-+		u64 t_bytes = 0, t_packets = 0;
-+		int i;
-+
-+		for_each_possible_cpu(i) {
-+			struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i);
-+			unsigned int start;
-+			u64 bytes, packets;
-+
-+			do {
-+				start = u64_stats_fetch_begin_irq(&bcpu->syncp);
-+				bytes = u64_stats_read(&bcpu->bytes);
-+				packets = u64_stats_read(&bcpu->packets);
-+			} while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
-+
-+			t_bytes += bytes;
-+			t_packets += packets;
-+		}
-+		*ret_bytes = t_bytes;
-+		*ret_packets = t_packets;
- 		return;
- 	}
- 	do {
- 		if (running)
--			seq = read_seqcount_begin(running);
--		bstats->bytes = b->bytes;
--		bstats->packets = b->packets;
--	} while (running && read_seqcount_retry(running, seq));
-+			start = u64_stats_fetch_begin_irq(&b->syncp);
-+		*ret_bytes = u64_stats_read(&b->bytes);
-+		*ret_packets = u64_stats_read(&b->packets);
-+	} while (running && u64_stats_fetch_retry_irq(&b->syncp, start));
- }
--EXPORT_SYMBOL(__gnet_stats_copy_basic);
+ 	} else
+ #endif
+ 		local_irq_enable();
++	preempt_check_resched_rt();
+ }
  
- static int
--___gnet_stats_copy_basic(const seqcount_t *running,
--			 struct gnet_dump *d,
--			 struct gnet_stats_basic_cpu __percpu *cpu,
--			 struct gnet_stats_basic_packed *b,
--			 int type)
-+___gnet_stats_copy_basic(struct gnet_dump *d,
-+			 struct gnet_stats_basic_sync __percpu *cpu,
-+			 struct gnet_stats_basic_sync *b,
-+			 int type, bool running)
- {
--	struct gnet_stats_basic_packed bstats = {0};
-+	u64 bstats_bytes, bstats_packets;
- 
--	__gnet_stats_copy_basic(running, &bstats, cpu, b);
-+	gnet_stats_read_basic(&bstats_bytes, &bstats_packets, cpu, b, running);
- 
- 	if (d->compat_tc_stats && type == TCA_STATS_BASIC) {
--		d->tc_stats.bytes = bstats.bytes;
--		d->tc_stats.packets = bstats.packets;
-+		d->tc_stats.bytes = bstats_bytes;
-+		d->tc_stats.packets = bstats_packets;
- 	}
+ static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
+@@ -6460,8 +6453,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
  
- 	if (d->tail) {
-@@ -178,24 +227,28 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
- 		int res;
- 
- 		memset(&sb, 0, sizeof(sb));
--		sb.bytes = bstats.bytes;
--		sb.packets = bstats.packets;
-+		sb.bytes = bstats_bytes;
-+		sb.packets = bstats_packets;
- 		res = gnet_stats_copy(d, type, &sb, sizeof(sb), TCA_STATS_PAD);
--		if (res < 0 || sb.packets == bstats.packets)
-+		if (res < 0 || sb.packets == bstats_packets)
- 			return res;
- 		/* emit 64bit stats only if needed */
--		return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats.packets,
--				       sizeof(bstats.packets), TCA_STATS_PAD);
-+		return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats_packets,
-+				       sizeof(bstats_packets), TCA_STATS_PAD);
+ 		}
+ 
+-		local_irq_disable();
+-		rps_lock(sd);
++		rps_lock_irq_disable(sd);
+ 		if (skb_queue_empty(&sd->input_pkt_queue)) {
+ 			/*
+ 			 * Inline a custom version of __napi_complete().
+@@ -6477,8 +6469,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
+ 			skb_queue_splice_tail_init(&sd->input_pkt_queue,
+ 						   &sd->process_queue);
+ 		}
+-		rps_unlock(sd);
+-		local_irq_enable();
++		rps_unlock_irq_enable(sd);
  	}
- 	return 0;
+ 
+ 	return work;
+@@ -6498,6 +6489,7 @@ void __napi_schedule(struct napi_struct *n)
+ 	local_irq_save(flags);
+ 	____napi_schedule(this_cpu_ptr(&softnet_data), n);
+ 	local_irq_restore(flags);
++	preempt_check_resched_rt();
  }
+ EXPORT_SYMBOL(__napi_schedule);
  
- /**
-  * gnet_stats_copy_basic - copy basic statistics into statistic TLV
-- * @running: seqcount_t pointer
-  * @d: dumping handle
-  * @cpu: copy statistic per cpu
-  * @b: basic statistics
-+ * @running: true if @b represents a running qdisc, thus @b's
-+ *           internal values might change during basic reads.
-+ *           Only used if @cpu is NULL
-+ *
-+ * Context: task; must not be run from IRQ or BH contexts
-  *
-  * Appends the basic statistics to the top level TLV created by
-  * gnet_stats_start_copy().
-@@ -204,22 +257,25 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
-  * if the room in the socket buffer was not sufficient.
-  */
- int
--gnet_stats_copy_basic(const seqcount_t *running,
--		      struct gnet_dump *d,
--		      struct gnet_stats_basic_cpu __percpu *cpu,
--		      struct gnet_stats_basic_packed *b)
-+gnet_stats_copy_basic(struct gnet_dump *d,
-+		      struct gnet_stats_basic_sync __percpu *cpu,
-+		      struct gnet_stats_basic_sync *b,
-+		      bool running)
- {
--	return ___gnet_stats_copy_basic(running, d, cpu, b,
--					TCA_STATS_BASIC);
-+	return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC, running);
- }
- EXPORT_SYMBOL(gnet_stats_copy_basic);
+@@ -11329,6 +11321,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
  
- /**
-  * gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV
-- * @running: seqcount_t pointer
-  * @d: dumping handle
-  * @cpu: copy statistic per cpu
-  * @b: basic statistics
-+ * @running: true if @b represents a running qdisc, thus @b's
-+ *           internal values might change during basic reads.
-+ *           Only used if @cpu is NULL
-+ *
-+ * Context: task; must not be run from IRQ or BH contexts
-  *
-  * Appends the basic statistics to the top level TLV created by
-  * gnet_stats_start_copy().
-@@ -228,13 +284,12 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
-  * if the room in the socket buffer was not sufficient.
-  */
- int
--gnet_stats_copy_basic_hw(const seqcount_t *running,
--			 struct gnet_dump *d,
--			 struct gnet_stats_basic_cpu __percpu *cpu,
--			 struct gnet_stats_basic_packed *b)
-+gnet_stats_copy_basic_hw(struct gnet_dump *d,
-+			 struct gnet_stats_basic_sync __percpu *cpu,
-+			 struct gnet_stats_basic_sync *b,
-+			 bool running)
- {
--	return ___gnet_stats_copy_basic(running, d, cpu, b,
--					TCA_STATS_BASIC_HW);
-+	return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC_HW, running);
- }
- EXPORT_SYMBOL(gnet_stats_copy_basic_hw);
+ 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
+ 	local_irq_enable();
++	preempt_check_resched_rt();
+ 
+ #ifdef CONFIG_RPS
+ 	remsd = oldsd->rps_ipi_list;
+diff --git a/net/core/link_watch.c b/net/core/link_watch.c
+index 1a455847da54..9599afd0862d 100644
+--- a/net/core/link_watch.c
++++ b/net/core/link_watch.c
+@@ -55,7 +55,7 @@ static void rfc2863_policy(struct net_device *dev)
+ 	if (operstate == dev->operstate)
+ 		return;
+ 
+-	write_lock_bh(&dev_base_lock);
++	write_lock(&dev_base_lock);
+ 
+ 	switch(dev->link_mode) {
+ 	case IF_LINK_MODE_TESTING:
+@@ -74,7 +74,7 @@ static void rfc2863_policy(struct net_device *dev)
  
-@@ -282,16 +337,15 @@ gnet_stats_copy_rate_est(struct gnet_dump *d,
+ 	dev->operstate = operstate;
+ 
+-	write_unlock_bh(&dev_base_lock);
++	write_unlock(&dev_base_lock);
  }
- EXPORT_SYMBOL(gnet_stats_copy_rate_est);
  
--static void
--__gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
--			    const struct gnet_stats_queue __percpu *q)
-+static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats,
-+				     const struct gnet_stats_queue __percpu *q)
- {
- 	int i;
  
- 	for_each_possible_cpu(i) {
- 		const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i);
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index 2af8aeeadadf..716be2f88cd7 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -842,9 +842,9 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
+ 	}
  
--		qstats->qlen = 0;
-+		qstats->qlen += qcpu->backlog;
- 		qstats->backlog += qcpu->backlog;
- 		qstats->drops += qcpu->drops;
- 		qstats->requeues += qcpu->requeues;
-@@ -299,24 +353,21 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
+ 	if (dev->operstate != operstate) {
+-		write_lock_bh(&dev_base_lock);
++		write_lock(&dev_base_lock);
+ 		dev->operstate = operstate;
+-		write_unlock_bh(&dev_base_lock);
++		write_unlock(&dev_base_lock);
+ 		netdev_state_change(dev);
  	}
  }
+@@ -2779,11 +2779,11 @@ static int do_setlink(const struct sk_buff *skb,
+ 	if (tb[IFLA_LINKMODE]) {
+ 		unsigned char value = nla_get_u8(tb[IFLA_LINKMODE]);
+ 
+-		write_lock_bh(&dev_base_lock);
++		write_lock(&dev_base_lock);
+ 		if (dev->link_mode ^ value)
+ 			status |= DO_SETLINK_NOTIFY;
+ 		dev->link_mode = value;
+-		write_unlock_bh(&dev_base_lock);
++		write_unlock(&dev_base_lock);
+ 	}
  
--void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
--			     const struct gnet_stats_queue __percpu *cpu,
--			     const struct gnet_stats_queue *q,
--			     __u32 qlen)
-+void gnet_stats_add_queue(struct gnet_stats_queue *qstats,
-+			  const struct gnet_stats_queue __percpu *cpu,
-+			  const struct gnet_stats_queue *q)
- {
- 	if (cpu) {
--		__gnet_stats_copy_queue_cpu(qstats, cpu);
-+		gnet_stats_add_queue_cpu(qstats, cpu);
+ 	if (tb[IFLA_VFINFO_LIST]) {
+diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
+index 737e4f17e1c6..e57fdad9ef94 100644
+--- a/net/hsr/hsr_device.c
++++ b/net/hsr/hsr_device.c
+@@ -30,13 +30,13 @@ static bool is_slave_up(struct net_device *dev)
+ 
+ static void __hsr_set_operstate(struct net_device *dev, int transition)
+ {
+-	write_lock_bh(&dev_base_lock);
++	write_lock(&dev_base_lock);
+ 	if (dev->operstate != transition) {
+ 		dev->operstate = transition;
+-		write_unlock_bh(&dev_base_lock);
++		write_unlock(&dev_base_lock);
+ 		netdev_state_change(dev);
  	} else {
--		qstats->qlen = q->qlen;
--		qstats->backlog = q->backlog;
--		qstats->drops = q->drops;
--		qstats->requeues = q->requeues;
--		qstats->overlimits = q->overlimits;
-+		qstats->qlen += q->qlen;
-+		qstats->backlog += q->backlog;
-+		qstats->drops += q->drops;
-+		qstats->requeues += q->requeues;
-+		qstats->overlimits += q->overlimits;
+-		write_unlock_bh(&dev_base_lock);
++		write_unlock(&dev_base_lock);
  	}
--
--	qstats->qlen = qlen;
  }
--EXPORT_SYMBOL(__gnet_stats_copy_queue);
-+EXPORT_SYMBOL(gnet_stats_add_queue);
- 
- /**
-  * gnet_stats_copy_queue - copy queue statistics into statistics TLV
-@@ -339,7 +390,8 @@ gnet_stats_copy_queue(struct gnet_dump *d,
- {
- 	struct gnet_stats_queue qstats = {0};
  
--	__gnet_stats_copy_queue(&qstats, cpu_q, q, qlen);
-+	gnet_stats_add_queue(&qstats, cpu_q, q);
-+	qstats.qlen = qlen;
- 
- 	if (d->compat_tc_stats) {
- 		d->tc_stats.drops = qstats.drops;
 diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
-index 75737267746f..e460c84b1f8e 100644
+index 75737267746f..7bd1e10086f0 100644
 --- a/net/ipv4/inet_hashtables.c
 +++ b/net/ipv4/inet_hashtables.c
 @@ -637,7 +637,9 @@ int __inet_hash(struct sock *sk, struct sock *osk)
@@ -10219,7 +11403,7 @@ index 75737267746f..e460c84b1f8e 100644
  		return 0;
  	}
  	WARN_ON(!sk_unhashed(sk));
-@@ -669,11 +671,8 @@ int inet_hash(struct sock *sk)
+@@ -669,45 +671,54 @@ int inet_hash(struct sock *sk)
  {
  	int err = 0;
  
@@ -10232,38 +11416,66 @@ index 75737267746f..e460c84b1f8e 100644
  
  	return err;
  }
-@@ -684,17 +683,20 @@ void inet_unhash(struct sock *sk)
- 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
- 	struct inet_listen_hashbucket *ilb = NULL;
- 	spinlock_t *lock;
-+	bool state_listen;
+ EXPORT_SYMBOL_GPL(inet_hash);
  
+-void inet_unhash(struct sock *sk)
++static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb)
+ {
+-	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+-	struct inet_listen_hashbucket *ilb = NULL;
+-	spinlock_t *lock;
+-
  	if (sk_unhashed(sk))
  		return;
  
- 	if (sk->sk_state == TCP_LISTEN) {
-+		state_listen = true;
- 		ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+-	if (sk->sk_state == TCP_LISTEN) {
+-		ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
 -		lock = &ilb->lock;
-+		spin_lock(&ilb->lock);
- 	} else {
-+		state_listen = false;
- 		lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
-+		spin_lock_bh(lock);
- 	}
+-	} else {
+-		lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
+-	}
 -	spin_lock_bh(lock);
- 	if (sk_unhashed(sk))
- 		goto unlock;
- 
-@@ -707,7 +709,10 @@ void inet_unhash(struct sock *sk)
+-	if (sk_unhashed(sk))
+-		goto unlock;
+-
+ 	if (rcu_access_pointer(sk->sk_reuseport_cb))
+ 		reuseport_stop_listen_sock(sk);
+ 	if (ilb) {
++		struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
++
+ 		inet_unhash2(hashinfo, sk);
+ 		ilb->count--;
+ 	}
  	__sk_nulls_del_node_init_rcu(sk);
  	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
- unlock:
+-unlock:
 -	spin_unlock_bh(lock);
-+	if (state_listen)
++}
++
++void inet_unhash(struct sock *sk)
++{
++	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
++
++	if (sk_unhashed(sk))
++		return;
++
++	if (sk->sk_state == TCP_LISTEN) {
++		struct inet_listen_hashbucket *ilb;
++
++		ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
++		/* Don't disable bottom halves while acquiring the lock to
++		 * avoid circular locking dependency on PREEMPT_RT.
++		 */
++		spin_lock(&ilb->lock);
++		__inet_unhash(sk, ilb);
 +		spin_unlock(&ilb->lock);
-+	else
++	} else {
++		spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
++
++		spin_lock_bh(lock);
++		__inet_unhash(sk, NULL);
 +		spin_unlock_bh(lock);
++	}
  }
  EXPORT_SYMBOL_GPL(inet_unhash);
  
@@ -10284,1137 +11496,8 @@ index 67c9114835c8..0a2e7f228391 100644
  
  	return err;
  }
-diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
-index 0d5c422f8745..8aec1b529364 100644
---- a/net/netfilter/xt_RATEEST.c
-+++ b/net/netfilter/xt_RATEEST.c
-@@ -94,11 +94,11 @@ static unsigned int
- xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par)
- {
- 	const struct xt_rateest_target_info *info = par->targinfo;
--	struct gnet_stats_basic_packed *stats = &info->est->bstats;
-+	struct gnet_stats_basic_sync *stats = &info->est->bstats;
- 
- 	spin_lock_bh(&info->est->lock);
--	stats->bytes += skb->len;
--	stats->packets++;
-+	u64_stats_add(&stats->bytes, skb->len);
-+	u64_stats_inc(&stats->packets);
- 	spin_unlock_bh(&info->est->lock);
- 
- 	return XT_CONTINUE;
-@@ -143,6 +143,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
- 	if (!est)
- 		goto err1;
- 
-+	gnet_stats_basic_sync_init(&est->bstats);
- 	strlcpy(est->name, info->name, sizeof(est->name));
- 	spin_lock_init(&est->lock);
- 	est->refcnt		= 1;
-diff --git a/net/sched/act_api.c b/net/sched/act_api.c
-index 7dd3a2dc5fa4..3258da3d5bed 100644
---- a/net/sched/act_api.c
-+++ b/net/sched/act_api.c
-@@ -480,16 +480,18 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
- 		atomic_set(&p->tcfa_bindcnt, 1);
- 
- 	if (cpustats) {
--		p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
-+		p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
- 		if (!p->cpu_bstats)
- 			goto err1;
--		p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
-+		p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
- 		if (!p->cpu_bstats_hw)
- 			goto err2;
- 		p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
- 		if (!p->cpu_qstats)
- 			goto err3;
- 	}
-+	gnet_stats_basic_sync_init(&p->tcfa_bstats);
-+	gnet_stats_basic_sync_init(&p->tcfa_bstats_hw);
- 	spin_lock_init(&p->tcfa_lock);
- 	p->tcfa_index = index;
- 	p->tcfa_tm.install = jiffies;
-@@ -499,7 +501,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
- 	if (est) {
- 		err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
- 					&p->tcfa_rate_est,
--					&p->tcfa_lock, NULL, est);
-+					&p->tcfa_lock, false, est);
- 		if (err)
- 			goto err4;
- 	}
-@@ -1126,13 +1128,13 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets,
- 			     u64 drops, bool hw)
- {
- 	if (a->cpu_bstats) {
--		_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
-+		_bstats_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
- 
- 		this_cpu_ptr(a->cpu_qstats)->drops += drops;
- 
- 		if (hw)
--			_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
--					   bytes, packets);
-+			_bstats_update(this_cpu_ptr(a->cpu_bstats_hw),
-+				       bytes, packets);
- 		return;
- 	}
- 
-@@ -1171,9 +1173,10 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
- 	if (err < 0)
- 		goto errout;
- 
--	if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
--	    gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw,
--				     &p->tcfa_bstats_hw) < 0 ||
-+	if (gnet_stats_copy_basic(&d, p->cpu_bstats,
-+				  &p->tcfa_bstats, false) < 0 ||
-+	    gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw,
-+				     &p->tcfa_bstats_hw, false) < 0 ||
- 	    gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
- 	    gnet_stats_copy_queue(&d, p->cpu_qstats,
- 				  &p->tcfa_qstats,
-diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
-index 5c36013339e1..f2bf896331a5 100644
---- a/net/sched/act_bpf.c
-+++ b/net/sched/act_bpf.c
-@@ -41,7 +41,7 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
- 	int action, filter_res;
- 
- 	tcf_lastuse_update(&prog->tcf_tm);
--	bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
-+	bstats_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
- 
- 	filter = rcu_dereference(prog->filter);
- 	if (at_ingress) {
-diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
-index 7064a365a1a9..b757f90a2d58 100644
---- a/net/sched/act_ife.c
-+++ b/net/sched/act_ife.c
-@@ -718,7 +718,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
- 	u8 *tlv_data;
- 	u16 metalen;
- 
--	bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
-+	bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
- 	tcf_lastuse_update(&ife->tcf_tm);
- 
- 	if (skb_at_tc_ingress(skb))
-@@ -806,7 +806,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
- 			exceed_mtu = true;
- 	}
- 
--	bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
-+	bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
- 	tcf_lastuse_update(&ife->tcf_tm);
- 
- 	if (!metalen) {		/* no metadata to send */
-diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
-index e4529b428cf4..8faa4c58305e 100644
---- a/net/sched/act_mpls.c
-+++ b/net/sched/act_mpls.c
-@@ -59,7 +59,7 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a,
- 	int ret, mac_len;
- 
- 	tcf_lastuse_update(&m->tcf_tm);
--	bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
-+	bstats_update(this_cpu_ptr(m->common.cpu_bstats), skb);
- 
- 	/* Ensure 'data' points at mac_header prior calling mpls manipulating
- 	 * functions.
-diff --git a/net/sched/act_police.c b/net/sched/act_police.c
-index 832157a840fc..9e77ba8401e5 100644
---- a/net/sched/act_police.c
-+++ b/net/sched/act_police.c
-@@ -125,7 +125,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
- 					    police->common.cpu_bstats,
- 					    &police->tcf_rate_est,
- 					    &police->tcf_lock,
--					    NULL, est);
-+					    false, est);
- 		if (err)
- 			goto failure;
- 	} else if (tb[TCA_POLICE_AVRATE] &&
-@@ -248,7 +248,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
- 	int ret;
- 
- 	tcf_lastuse_update(&police->tcf_tm);
--	bstats_cpu_update(this_cpu_ptr(police->common.cpu_bstats), skb);
-+	bstats_update(this_cpu_ptr(police->common.cpu_bstats), skb);
- 
- 	ret = READ_ONCE(police->tcf_action);
- 	p = rcu_dereference_bh(police->params);
-diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
-index 230501eb9e06..ce859b0e0deb 100644
---- a/net/sched/act_sample.c
-+++ b/net/sched/act_sample.c
-@@ -163,7 +163,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
- 	int retval;
- 
- 	tcf_lastuse_update(&s->tcf_tm);
--	bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
-+	bstats_update(this_cpu_ptr(s->common.cpu_bstats), skb);
- 	retval = READ_ONCE(s->tcf_action);
- 
- 	psample_group = rcu_dereference_bh(s->psample_group);
-diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
-index cbbe1861d3a2..e617ab4505ca 100644
---- a/net/sched/act_simple.c
-+++ b/net/sched/act_simple.c
-@@ -36,7 +36,8 @@ static int tcf_simp_act(struct sk_buff *skb, const struct tc_action *a,
- 	 * then it would look like "hello_3" (without quotes)
- 	 */
- 	pr_info("simple: %s_%llu\n",
--	       (char *)d->tcfd_defdata, d->tcf_bstats.packets);
-+		(char *)d->tcfd_defdata,
-+		u64_stats_read(&d->tcf_bstats.packets));
- 	spin_unlock(&d->tcf_lock);
- 	return d->tcf_action;
- }
-diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
-index 605418538347..d30ecbfc8f84 100644
---- a/net/sched/act_skbedit.c
-+++ b/net/sched/act_skbedit.c
-@@ -31,7 +31,7 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
- 	int action;
- 
- 	tcf_lastuse_update(&d->tcf_tm);
--	bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
-+	bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb);
- 
- 	params = rcu_dereference_bh(d->params);
- 	action = READ_ONCE(d->tcf_action);
-diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
-index ecb9ee666095..9b6b52c5e24e 100644
---- a/net/sched/act_skbmod.c
-+++ b/net/sched/act_skbmod.c
-@@ -31,7 +31,7 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
- 	u64 flags;
- 
- 	tcf_lastuse_update(&d->tcf_tm);
--	bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
-+	bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb);
- 
- 	action = READ_ONCE(d->tcf_action);
- 	if (unlikely(action == TC_ACT_SHOT))
-diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
-index 12f39a2dffd4..ad0bdefb3205 100644
---- a/net/sched/sch_api.c
-+++ b/net/sched/sch_api.c
-@@ -884,7 +884,7 @@ static void qdisc_offload_graft_root(struct net_device *dev,
- static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
- 			 u32 portid, u32 seq, u16 flags, int event)
- {
--	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
-+	struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
- 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
- 	struct tcmsg *tcm;
- 	struct nlmsghdr  *nlh;
-@@ -942,8 +942,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
- 		cpu_qstats = q->cpu_qstats;
- 	}
- 
--	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
--				  &d, cpu_bstats, &q->bstats) < 0 ||
-+	if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
- 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
- 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
- 		goto nla_put_failure;
-@@ -1264,26 +1263,17 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
- 		rcu_assign_pointer(sch->stab, stab);
- 	}
- 	if (tca[TCA_RATE]) {
--		seqcount_t *running;
--
- 		err = -EOPNOTSUPP;
- 		if (sch->flags & TCQ_F_MQROOT) {
- 			NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
- 			goto err_out4;
- 		}
- 
--		if (sch->parent != TC_H_ROOT &&
--		    !(sch->flags & TCQ_F_INGRESS) &&
--		    (!p || !(p->flags & TCQ_F_MQROOT)))
--			running = qdisc_root_sleeping_running(sch);
--		else
--			running = &sch->running;
--
- 		err = gen_new_estimator(&sch->bstats,
- 					sch->cpu_bstats,
- 					&sch->rate_est,
- 					NULL,
--					running,
-+					true,
- 					tca[TCA_RATE]);
- 		if (err) {
- 			NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
-@@ -1359,7 +1349,7 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
- 				      sch->cpu_bstats,
- 				      &sch->rate_est,
- 				      NULL,
--				      qdisc_root_sleeping_running(sch),
-+				      true,
- 				      tca[TCA_RATE]);
- 	}
- out:
-diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
-index 7d8518176b45..4c8e994cf0a5 100644
---- a/net/sched/sch_atm.c
-+++ b/net/sched/sch_atm.c
-@@ -52,7 +52,7 @@ struct atm_flow_data {
- 	struct atm_qdisc_data	*parent;	/* parent qdisc */
- 	struct socket		*sock;		/* for closing */
- 	int			ref;		/* reference count */
--	struct gnet_stats_basic_packed	bstats;
-+	struct gnet_stats_basic_sync	bstats;
- 	struct gnet_stats_queue	qstats;
- 	struct list_head	list;
- 	struct atm_flow_data	*excess;	/* flow for excess traffic;
-@@ -548,6 +548,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt,
- 	pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
- 	INIT_LIST_HEAD(&p->flows);
- 	INIT_LIST_HEAD(&p->link.list);
-+	gnet_stats_basic_sync_init(&p->link.bstats);
- 	list_add(&p->link.list, &p->flows);
- 	p->link.q = qdisc_create_dflt(sch->dev_queue,
- 				      &pfifo_qdisc_ops, sch->handle, extack);
-@@ -652,8 +653,7 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
- {
- 	struct atm_flow_data *flow = (struct atm_flow_data *)arg;
- 
--	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
--				  d, NULL, &flow->bstats) < 0 ||
-+	if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 ||
- 	    gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
- 		return -1;
- 
-diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
-index e0da15530f0e..02d9f0dfe356 100644
---- a/net/sched/sch_cbq.c
-+++ b/net/sched/sch_cbq.c
-@@ -116,7 +116,7 @@ struct cbq_class {
- 	long			avgidle;
- 	long			deficit;	/* Saved deficit for WRR */
- 	psched_time_t		penalized;
--	struct gnet_stats_basic_packed bstats;
-+	struct gnet_stats_basic_sync bstats;
- 	struct gnet_stats_queue qstats;
- 	struct net_rate_estimator __rcu *rate_est;
- 	struct tc_cbq_xstats	xstats;
-@@ -565,8 +565,7 @@ cbq_update(struct cbq_sched_data *q)
- 		long avgidle = cl->avgidle;
- 		long idle;
- 
--		cl->bstats.packets++;
--		cl->bstats.bytes += len;
-+		_bstats_update(&cl->bstats, len, 1);
- 
- 		/*
- 		 * (now - last) is total time between packet right edges.
-@@ -1384,8 +1383,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
- 	if (cl->undertime != PSCHED_PASTPERFECT)
- 		cl->xstats.undertime = cl->undertime - q->now;
- 
--	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
--				  d, NULL, &cl->bstats) < 0 ||
-+	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
- 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
- 	    gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
- 		return -1;
-@@ -1519,7 +1517,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
- 			err = gen_replace_estimator(&cl->bstats, NULL,
- 						    &cl->rate_est,
- 						    NULL,
--						    qdisc_root_sleeping_running(sch),
-+						    true,
- 						    tca[TCA_RATE]);
- 			if (err) {
- 				NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator");
-@@ -1611,6 +1609,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
- 	if (cl == NULL)
- 		goto failure;
- 
-+	gnet_stats_basic_sync_init(&cl->bstats);
- 	err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
- 	if (err) {
- 		kfree(cl);
-@@ -1619,9 +1618,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
- 
- 	if (tca[TCA_RATE]) {
- 		err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
--					NULL,
--					qdisc_root_sleeping_running(sch),
--					tca[TCA_RATE]);
-+					NULL, true, tca[TCA_RATE]);
- 		if (err) {
- 			NL_SET_ERR_MSG(extack, "Couldn't create new estimator");
- 			tcf_block_put(cl->block);
-diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
-index 642cd179b7a7..18e4f7a0b291 100644
---- a/net/sched/sch_drr.c
-+++ b/net/sched/sch_drr.c
-@@ -19,7 +19,7 @@ struct drr_class {
- 	struct Qdisc_class_common	common;
- 	unsigned int			filter_cnt;
- 
--	struct gnet_stats_basic_packed		bstats;
-+	struct gnet_stats_basic_sync		bstats;
- 	struct gnet_stats_queue		qstats;
- 	struct net_rate_estimator __rcu *rate_est;
- 	struct list_head		alist;
-@@ -85,8 +85,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
- 		if (tca[TCA_RATE]) {
- 			err = gen_replace_estimator(&cl->bstats, NULL,
- 						    &cl->rate_est,
--						    NULL,
--						    qdisc_root_sleeping_running(sch),
-+						    NULL, true,
- 						    tca[TCA_RATE]);
- 			if (err) {
- 				NL_SET_ERR_MSG(extack, "Failed to replace estimator");
-@@ -106,6 +105,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
- 	if (cl == NULL)
- 		return -ENOBUFS;
- 
-+	gnet_stats_basic_sync_init(&cl->bstats);
- 	cl->common.classid = classid;
- 	cl->quantum	   = quantum;
- 	cl->qdisc	   = qdisc_create_dflt(sch->dev_queue,
-@@ -118,9 +118,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
- 
- 	if (tca[TCA_RATE]) {
- 		err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
--					    NULL,
--					    qdisc_root_sleeping_running(sch),
--					    tca[TCA_RATE]);
-+					    NULL, true, tca[TCA_RATE]);
- 		if (err) {
- 			NL_SET_ERR_MSG(extack, "Failed to replace estimator");
- 			qdisc_put(cl->qdisc);
-@@ -267,8 +265,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
- 	if (qlen)
- 		xstats.deficit = cl->deficit;
- 
--	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
--				  d, NULL, &cl->bstats) < 0 ||
-+	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
- 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
- 	    gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0)
- 		return -1;
-diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
-index 92a686807971..e007fc75ef2f 100644
---- a/net/sched/sch_ets.c
-+++ b/net/sched/sch_ets.c
-@@ -41,7 +41,7 @@ struct ets_class {
- 	struct Qdisc *qdisc;
- 	u32 quantum;
- 	u32 deficit;
--	struct gnet_stats_basic_packed bstats;
-+	struct gnet_stats_basic_sync bstats;
- 	struct gnet_stats_queue qstats;
- };
- 
-@@ -325,8 +325,7 @@ static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg,
- 	struct ets_class *cl = ets_class_from_arg(sch, arg);
- 	struct Qdisc *cl_q = cl->qdisc;
- 
--	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
--				  d, NULL, &cl_q->bstats) < 0 ||
-+	if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 ||
- 	    qdisc_qstats_copy(d, cl_q) < 0)
- 		return -1;
- 
-@@ -661,7 +660,6 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
- 
- 	q->nbands = nbands;
- 	for (i = nstrict; i < q->nstrict; i++) {
--		INIT_LIST_HEAD(&q->classes[i].alist);
- 		if (q->classes[i].qdisc->q.qlen) {
- 			list_add_tail(&q->classes[i].alist, &q->active);
- 			q->classes[i].deficit = quanta[i];
-@@ -689,7 +687,11 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
- 	ets_offload_change(sch);
- 	for (i = q->nbands; i < oldbands; i++) {
- 		qdisc_put(q->classes[i].qdisc);
--		memset(&q->classes[i], 0, sizeof(q->classes[i]));
-+		q->classes[i].qdisc = NULL;
-+		q->classes[i].quantum = 0;
-+		q->classes[i].deficit = 0;
-+		gnet_stats_basic_sync_init(&q->classes[i].bstats);
-+		memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
- 	}
- 	return 0;
- }
-@@ -698,7 +700,7 @@ static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
- 			  struct netlink_ext_ack *extack)
- {
- 	struct ets_sched *q = qdisc_priv(sch);
--	int err;
-+	int err, i;
- 
- 	if (!opt)
- 		return -EINVAL;
-@@ -708,6 +710,9 @@ static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
- 		return err;
- 
- 	INIT_LIST_HEAD(&q->active);
-+	for (i = 0; i < TCQ_ETS_MAX_BANDS; i++)
-+		INIT_LIST_HEAD(&q->classes[i].alist);
-+
- 	return ets_qdisc_change(sch, opt, extack);
- }
- 
-diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
-index 66d2fbe9ef50..2b9c1a42dca8 100644
---- a/net/sched/sch_generic.c
-+++ b/net/sched/sch_generic.c
-@@ -304,8 +304,8 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
- 
- /*
-  * Transmit possibly several skbs, and handle the return status as
-- * required. Owning running seqcount bit guarantees that
-- * only one CPU can execute this function.
-+ * required. Owning qdisc running bit guarantees that only one CPU
-+ * can execute this function.
-  *
-  * Returns to the caller:
-  *				false  - hardware queue frozen backoff
-@@ -606,7 +606,6 @@ struct Qdisc noop_qdisc = {
- 	.ops		=	&noop_qdisc_ops,
- 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
- 	.dev_queue	=	&noop_netdev_queue,
--	.running	=	SEQCNT_ZERO(noop_qdisc.running),
- 	.busylock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
- 	.gso_skb = {
- 		.next = (struct sk_buff *)&noop_qdisc.gso_skb,
-@@ -867,7 +866,6 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
- EXPORT_SYMBOL(pfifo_fast_ops);
- 
- static struct lock_class_key qdisc_tx_busylock;
--static struct lock_class_key qdisc_running_key;
- 
- struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
- 			  const struct Qdisc_ops *ops,
-@@ -892,11 +890,12 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
- 	__skb_queue_head_init(&sch->gso_skb);
- 	__skb_queue_head_init(&sch->skb_bad_txq);
- 	qdisc_skb_head_init(&sch->q);
-+	gnet_stats_basic_sync_init(&sch->bstats);
- 	spin_lock_init(&sch->q.lock);
- 
- 	if (ops->static_flags & TCQ_F_CPUSTATS) {
- 		sch->cpu_bstats =
--			netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
-+			netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
- 		if (!sch->cpu_bstats)
- 			goto errout1;
- 
-@@ -916,10 +915,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
- 	lockdep_set_class(&sch->seqlock,
- 			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
- 
--	seqcount_init(&sch->running);
--	lockdep_set_class(&sch->running,
--			  dev->qdisc_running_key ?: &qdisc_running_key);
--
- 	sch->ops = ops;
- 	sch->flags = ops->static_flags;
- 	sch->enqueue = ops->enqueue;
-diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
-index 621dc6afde8f..1073c76d05c4 100644
---- a/net/sched/sch_gred.c
-+++ b/net/sched/sch_gred.c
-@@ -56,6 +56,7 @@ struct gred_sched {
- 	u32 		DPs;
- 	u32 		def;
- 	struct red_vars wred_set;
-+	struct tc_gred_qopt_offload *opt;
- };
- 
- static inline int gred_wred_mode(struct gred_sched *table)
-@@ -311,48 +312,50 @@ static void gred_offload(struct Qdisc *sch, enum tc_gred_command command)
- {
- 	struct gred_sched *table = qdisc_priv(sch);
- 	struct net_device *dev = qdisc_dev(sch);
--	struct tc_gred_qopt_offload opt = {
--		.command	= command,
--		.handle		= sch->handle,
--		.parent		= sch->parent,
--	};
-+	struct tc_gred_qopt_offload *opt = table->opt;
- 
- 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
- 		return;
- 
-+	memset(opt, 0, sizeof(*opt));
-+	opt->command = command;
-+	opt->handle = sch->handle;
-+	opt->parent = sch->parent;
-+
- 	if (command == TC_GRED_REPLACE) {
- 		unsigned int i;
- 
--		opt.set.grio_on = gred_rio_mode(table);
--		opt.set.wred_on = gred_wred_mode(table);
--		opt.set.dp_cnt = table->DPs;
--		opt.set.dp_def = table->def;
-+		opt->set.grio_on = gred_rio_mode(table);
-+		opt->set.wred_on = gred_wred_mode(table);
-+		opt->set.dp_cnt = table->DPs;
-+		opt->set.dp_def = table->def;
- 
- 		for (i = 0; i < table->DPs; i++) {
- 			struct gred_sched_data *q = table->tab[i];
- 
- 			if (!q)
- 				continue;
--			opt.set.tab[i].present = true;
--			opt.set.tab[i].limit = q->limit;
--			opt.set.tab[i].prio = q->prio;
--			opt.set.tab[i].min = q->parms.qth_min >> q->parms.Wlog;
--			opt.set.tab[i].max = q->parms.qth_max >> q->parms.Wlog;
--			opt.set.tab[i].is_ecn = gred_use_ecn(q);
--			opt.set.tab[i].is_harddrop = gred_use_harddrop(q);
--			opt.set.tab[i].probability = q->parms.max_P;
--			opt.set.tab[i].backlog = &q->backlog;
-+			opt->set.tab[i].present = true;
-+			opt->set.tab[i].limit = q->limit;
-+			opt->set.tab[i].prio = q->prio;
-+			opt->set.tab[i].min = q->parms.qth_min >> q->parms.Wlog;
-+			opt->set.tab[i].max = q->parms.qth_max >> q->parms.Wlog;
-+			opt->set.tab[i].is_ecn = gred_use_ecn(q);
-+			opt->set.tab[i].is_harddrop = gred_use_harddrop(q);
-+			opt->set.tab[i].probability = q->parms.max_P;
-+			opt->set.tab[i].backlog = &q->backlog;
- 		}
--		opt.set.qstats = &sch->qstats;
-+		opt->set.qstats = &sch->qstats;
- 	}
- 
--	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, &opt);
-+	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, opt);
- }
- 
- static int gred_offload_dump_stats(struct Qdisc *sch)
- {
- 	struct gred_sched *table = qdisc_priv(sch);
- 	struct tc_gred_qopt_offload *hw_stats;
-+	u64 bytes = 0, packets = 0;
- 	unsigned int i;
- 	int ret;
- 
-@@ -364,9 +367,11 @@ static int gred_offload_dump_stats(struct Qdisc *sch)
- 	hw_stats->handle = sch->handle;
- 	hw_stats->parent = sch->parent;
- 
--	for (i = 0; i < MAX_DPs; i++)
-+	for (i = 0; i < MAX_DPs; i++) {
-+		gnet_stats_basic_sync_init(&hw_stats->stats.bstats[i]);
- 		if (table->tab[i])
- 			hw_stats->stats.xstats[i] = &table->tab[i]->stats;
-+	}
- 
- 	ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats);
- 	/* Even if driver returns failure adjust the stats - in case offload
-@@ -375,19 +380,19 @@ static int gred_offload_dump_stats(struct Qdisc *sch)
- 	for (i = 0; i < MAX_DPs; i++) {
- 		if (!table->tab[i])
- 			continue;
--		table->tab[i]->packetsin += hw_stats->stats.bstats[i].packets;
--		table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes;
-+		table->tab[i]->packetsin += u64_stats_read(&hw_stats->stats.bstats[i].packets);
-+		table->tab[i]->bytesin += u64_stats_read(&hw_stats->stats.bstats[i].bytes);
- 		table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog;
- 
--		_bstats_update(&sch->bstats,
--			       hw_stats->stats.bstats[i].bytes,
--			       hw_stats->stats.bstats[i].packets);
-+		bytes += u64_stats_read(&hw_stats->stats.bstats[i].bytes);
-+		packets += u64_stats_read(&hw_stats->stats.bstats[i].packets);
- 		sch->qstats.qlen += hw_stats->stats.qstats[i].qlen;
- 		sch->qstats.backlog += hw_stats->stats.qstats[i].backlog;
- 		sch->qstats.drops += hw_stats->stats.qstats[i].drops;
- 		sch->qstats.requeues += hw_stats->stats.qstats[i].requeues;
- 		sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits;
- 	}
-+	_bstats_update(&sch->bstats, bytes, packets);
- 
- 	kfree(hw_stats);
- 	return ret;
-@@ -728,6 +733,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt,
- static int gred_init(struct Qdisc *sch, struct nlattr *opt,
- 		     struct netlink_ext_ack *extack)
- {
-+	struct gred_sched *table = qdisc_priv(sch);
- 	struct nlattr *tb[TCA_GRED_MAX + 1];
- 	int err;
- 
-@@ -751,6 +757,12 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt,
- 		sch->limit = qdisc_dev(sch)->tx_queue_len
- 		             * psched_mtu(qdisc_dev(sch));
- 
-+	if (qdisc_dev(sch)->netdev_ops->ndo_setup_tc) {
-+		table->opt = kzalloc(sizeof(*table->opt), GFP_KERNEL);
-+		if (!table->opt)
-+			return -ENOMEM;
-+	}
-+
- 	return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack);
- }
- 
-@@ -907,6 +919,7 @@ static void gred_destroy(struct Qdisc *sch)
- 			gred_destroy_vq(table->tab[i]);
- 	}
- 	gred_offload(sch, TC_GRED_DESTROY);
-+	kfree(table->opt);
- }
- 
- static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
-diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
-index b7ac30cca035..d3979a6000e7 100644
---- a/net/sched/sch_hfsc.c
-+++ b/net/sched/sch_hfsc.c
-@@ -111,7 +111,7 @@ enum hfsc_class_flags {
- struct hfsc_class {
- 	struct Qdisc_class_common cl_common;
- 
--	struct gnet_stats_basic_packed bstats;
-+	struct gnet_stats_basic_sync bstats;
- 	struct gnet_stats_queue qstats;
- 	struct net_rate_estimator __rcu *rate_est;
- 	struct tcf_proto __rcu *filter_list; /* filter list */
-@@ -965,7 +965,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
- 			err = gen_replace_estimator(&cl->bstats, NULL,
- 						    &cl->rate_est,
- 						    NULL,
--						    qdisc_root_sleeping_running(sch),
-+						    true,
- 						    tca[TCA_RATE]);
- 			if (err)
- 				return err;
-@@ -1033,9 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
- 
- 	if (tca[TCA_RATE]) {
- 		err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
--					NULL,
--					qdisc_root_sleeping_running(sch),
--					tca[TCA_RATE]);
-+					NULL, true, tca[TCA_RATE]);
- 		if (err) {
- 			tcf_block_put(cl->block);
- 			kfree(cl);
-@@ -1328,7 +1326,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
- 	xstats.work    = cl->cl_total;
- 	xstats.rtwork  = cl->cl_cumul;
- 
--	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
-+	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
- 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
- 	    gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
- 		return -1;
-@@ -1406,6 +1404,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
- 	if (err)
- 		return err;
- 
-+	gnet_stats_basic_sync_init(&q->root.bstats);
- 	q->root.cl_common.classid = sch->handle;
- 	q->root.sched   = q;
- 	q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
-diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
-index 5067a6e5d4fd..cf1d45db4e84 100644
---- a/net/sched/sch_htb.c
-+++ b/net/sched/sch_htb.c
-@@ -113,8 +113,8 @@ struct htb_class {
- 	/*
- 	 * Written often fields
- 	 */
--	struct gnet_stats_basic_packed bstats;
--	struct gnet_stats_basic_packed bstats_bias;
-+	struct gnet_stats_basic_sync bstats;
-+	struct gnet_stats_basic_sync bstats_bias;
- 	struct tc_htb_xstats	xstats;	/* our special stats */
- 
- 	/* token bucket parameters */
-@@ -1308,10 +1308,11 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
- static void htb_offload_aggregate_stats(struct htb_sched *q,
- 					struct htb_class *cl)
- {
-+	u64 bytes = 0, packets = 0;
- 	struct htb_class *c;
- 	unsigned int i;
- 
--	memset(&cl->bstats, 0, sizeof(cl->bstats));
-+	gnet_stats_basic_sync_init(&cl->bstats);
- 
- 	for (i = 0; i < q->clhash.hashsize; i++) {
- 		hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
-@@ -1323,14 +1324,15 @@ static void htb_offload_aggregate_stats(struct htb_sched *q,
- 			if (p != cl)
- 				continue;
- 
--			cl->bstats.bytes += c->bstats_bias.bytes;
--			cl->bstats.packets += c->bstats_bias.packets;
-+			bytes += u64_stats_read(&c->bstats_bias.bytes);
-+			packets += u64_stats_read(&c->bstats_bias.packets);
- 			if (c->level == 0) {
--				cl->bstats.bytes += c->leaf.q->bstats.bytes;
--				cl->bstats.packets += c->leaf.q->bstats.packets;
-+				bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
-+				packets += u64_stats_read(&c->leaf.q->bstats.packets);
- 			}
- 		}
- 	}
-+	_bstats_update(&cl->bstats, bytes, packets);
- }
- 
- static int
-@@ -1357,16 +1359,16 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
- 			if (cl->leaf.q)
- 				cl->bstats = cl->leaf.q->bstats;
- 			else
--				memset(&cl->bstats, 0, sizeof(cl->bstats));
--			cl->bstats.bytes += cl->bstats_bias.bytes;
--			cl->bstats.packets += cl->bstats_bias.packets;
-+				gnet_stats_basic_sync_init(&cl->bstats);
-+			_bstats_update(&cl->bstats,
-+				       u64_stats_read(&cl->bstats_bias.bytes),
-+				       u64_stats_read(&cl->bstats_bias.packets));
- 		} else {
- 			htb_offload_aggregate_stats(q, cl);
- 		}
- 	}
- 
--	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
--				  d, NULL, &cl->bstats) < 0 ||
-+	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
- 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
- 	    gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
- 		return -1;
-@@ -1578,8 +1580,9 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
- 		WARN_ON(old != q);
- 
- 	if (cl->parent) {
--		cl->parent->bstats_bias.bytes += q->bstats.bytes;
--		cl->parent->bstats_bias.packets += q->bstats.packets;
-+		_bstats_update(&cl->parent->bstats_bias,
-+			       u64_stats_read(&q->bstats.bytes),
-+			       u64_stats_read(&q->bstats.packets));
- 	}
- 
- 	offload_opt = (struct tc_htb_qopt_offload) {
-@@ -1849,6 +1852,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
- 		if (!cl)
- 			goto failure;
- 
-+		gnet_stats_basic_sync_init(&cl->bstats);
-+		gnet_stats_basic_sync_init(&cl->bstats_bias);
-+
- 		err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
- 		if (err) {
- 			kfree(cl);
-@@ -1858,7 +1864,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
- 			err = gen_new_estimator(&cl->bstats, NULL,
- 						&cl->rate_est,
- 						NULL,
--						qdisc_root_sleeping_running(sch),
-+						true,
- 						tca[TCA_RATE] ? : &est.nla);
- 			if (err)
- 				goto err_block_put;
-@@ -1922,8 +1928,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
- 				htb_graft_helper(dev_queue, old_q);
- 				goto err_kill_estimator;
- 			}
--			parent->bstats_bias.bytes += old_q->bstats.bytes;
--			parent->bstats_bias.packets += old_q->bstats.packets;
-+			_bstats_update(&parent->bstats_bias,
-+				       u64_stats_read(&old_q->bstats.bytes),
-+				       u64_stats_read(&old_q->bstats.packets));
- 			qdisc_put(old_q);
- 		}
- 		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
-@@ -1983,7 +1990,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
- 			err = gen_replace_estimator(&cl->bstats, NULL,
- 						    &cl->rate_est,
- 						    NULL,
--						    qdisc_root_sleeping_running(sch),
-+						    true,
- 						    tca[TCA_RATE]);
- 			if (err)
- 				return err;
-diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
-index db18d8a860f9..24c5d97d88dd 100644
---- a/net/sched/sch_mq.c
-+++ b/net/sched/sch_mq.c
-@@ -153,10 +153,9 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
- 	struct net_device *dev = qdisc_dev(sch);
- 	struct Qdisc *qdisc;
- 	unsigned int ntx;
--	__u32 qlen = 0;
- 
- 	sch->q.qlen = 0;
--	memset(&sch->bstats, 0, sizeof(sch->bstats));
-+	gnet_stats_basic_sync_init(&sch->bstats);
- 	memset(&sch->qstats, 0, sizeof(sch->qstats));
- 
- 	/* MQ supports lockless qdiscs. However, statistics accounting needs
-@@ -168,25 +167,11 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
- 		qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
- 		spin_lock_bh(qdisc_lock(qdisc));
- 
--		if (qdisc_is_percpu_stats(qdisc)) {
--			qlen = qdisc_qlen_sum(qdisc);
--			__gnet_stats_copy_basic(NULL, &sch->bstats,
--						qdisc->cpu_bstats,
--						&qdisc->bstats);
--			__gnet_stats_copy_queue(&sch->qstats,
--						qdisc->cpu_qstats,
--						&qdisc->qstats, qlen);
--			sch->q.qlen		+= qlen;
--		} else {
--			sch->q.qlen		+= qdisc->q.qlen;
--			sch->bstats.bytes	+= qdisc->bstats.bytes;
--			sch->bstats.packets	+= qdisc->bstats.packets;
--			sch->qstats.qlen	+= qdisc->qstats.qlen;
--			sch->qstats.backlog	+= qdisc->qstats.backlog;
--			sch->qstats.drops	+= qdisc->qstats.drops;
--			sch->qstats.requeues	+= qdisc->qstats.requeues;
--			sch->qstats.overlimits	+= qdisc->qstats.overlimits;
--		}
-+		gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
-+				     &qdisc->bstats, false);
-+		gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
-+				     &qdisc->qstats);
-+		sch->q.qlen += qdisc_qlen(qdisc);
- 
- 		spin_unlock_bh(qdisc_lock(qdisc));
- 	}
-@@ -269,8 +254,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
- 	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
- 
- 	sch = dev_queue->qdisc_sleeping;
--	if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats,
--				  &sch->bstats) < 0 ||
-+	if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 ||
- 	    qdisc_qstats_copy(d, sch) < 0)
- 		return -1;
- 	return 0;
-diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
-index 50e15add6068..42d4101e4f3d 100644
---- a/net/sched/sch_mqprio.c
-+++ b/net/sched/sch_mqprio.c
-@@ -412,7 +412,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
- 	unsigned int ntx, tc;
- 
- 	sch->q.qlen = 0;
--	memset(&sch->bstats, 0, sizeof(sch->bstats));
-+	gnet_stats_basic_sync_init(&sch->bstats);
- 	memset(&sch->qstats, 0, sizeof(sch->qstats));
- 
- 	/* MQ supports lockless qdiscs. However, statistics accounting needs
-@@ -424,25 +424,11 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
- 		qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
- 		spin_lock_bh(qdisc_lock(qdisc));
- 
--		if (qdisc_is_percpu_stats(qdisc)) {
--			__u32 qlen = qdisc_qlen_sum(qdisc);
--
--			__gnet_stats_copy_basic(NULL, &sch->bstats,
--						qdisc->cpu_bstats,
--						&qdisc->bstats);
--			__gnet_stats_copy_queue(&sch->qstats,
--						qdisc->cpu_qstats,
--						&qdisc->qstats, qlen);
--			sch->q.qlen		+= qlen;
--		} else {
--			sch->q.qlen		+= qdisc->q.qlen;
--			sch->bstats.bytes	+= qdisc->bstats.bytes;
--			sch->bstats.packets	+= qdisc->bstats.packets;
--			sch->qstats.backlog	+= qdisc->qstats.backlog;
--			sch->qstats.drops	+= qdisc->qstats.drops;
--			sch->qstats.requeues	+= qdisc->qstats.requeues;
--			sch->qstats.overlimits	+= qdisc->qstats.overlimits;
--		}
-+		gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
-+				     &qdisc->bstats, false);
-+		gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
-+				     &qdisc->qstats);
-+		sch->q.qlen += qdisc_qlen(qdisc);
- 
- 		spin_unlock_bh(qdisc_lock(qdisc));
- 	}
-@@ -534,12 +520,13 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
- {
- 	if (cl >= TC_H_MIN_PRIORITY) {
- 		int i;
--		__u32 qlen = 0;
-+		__u32 qlen;
- 		struct gnet_stats_queue qstats = {0};
--		struct gnet_stats_basic_packed bstats = {0};
-+		struct gnet_stats_basic_sync bstats;
- 		struct net_device *dev = qdisc_dev(sch);
- 		struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
- 
-+		gnet_stats_basic_sync_init(&bstats);
- 		/* Drop lock here it will be reclaimed before touching
- 		 * statistics this is required because the d->lock we
- 		 * hold here is the look on dev_queue->qdisc_sleeping
-@@ -554,40 +541,28 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
- 
- 			spin_lock_bh(qdisc_lock(qdisc));
- 
--			if (qdisc_is_percpu_stats(qdisc)) {
--				qlen = qdisc_qlen_sum(qdisc);
--
--				__gnet_stats_copy_basic(NULL, &bstats,
--							qdisc->cpu_bstats,
--							&qdisc->bstats);
--				__gnet_stats_copy_queue(&qstats,
--							qdisc->cpu_qstats,
--							&qdisc->qstats,
--							qlen);
--			} else {
--				qlen		+= qdisc->q.qlen;
--				bstats.bytes	+= qdisc->bstats.bytes;
--				bstats.packets	+= qdisc->bstats.packets;
--				qstats.backlog	+= qdisc->qstats.backlog;
--				qstats.drops	+= qdisc->qstats.drops;
--				qstats.requeues	+= qdisc->qstats.requeues;
--				qstats.overlimits += qdisc->qstats.overlimits;
--			}
-+			gnet_stats_add_basic(&bstats, qdisc->cpu_bstats,
-+					     &qdisc->bstats, false);
-+			gnet_stats_add_queue(&qstats, qdisc->cpu_qstats,
-+					     &qdisc->qstats);
-+			sch->q.qlen += qdisc_qlen(qdisc);
-+
- 			spin_unlock_bh(qdisc_lock(qdisc));
- 		}
-+		qlen = qdisc_qlen(sch) + qstats.qlen;
- 
- 		/* Reclaim root sleeping lock before completing stats */
- 		if (d->lock)
- 			spin_lock_bh(d->lock);
--		if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 ||
-+		if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 ||
- 		    gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
- 			return -1;
- 	} else {
- 		struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
- 
- 		sch = dev_queue->qdisc_sleeping;
--		if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d,
--					  sch->cpu_bstats, &sch->bstats) < 0 ||
-+		if (gnet_stats_copy_basic(d, sch->cpu_bstats,
-+					  &sch->bstats, true) < 0 ||
- 		    qdisc_qstats_copy(d, sch) < 0)
- 			return -1;
- 	}
-diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
-index e282e7382117..cd8ab90c4765 100644
---- a/net/sched/sch_multiq.c
-+++ b/net/sched/sch_multiq.c
-@@ -338,8 +338,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
- 	struct Qdisc *cl_q;
- 
- 	cl_q = q->queues[cl - 1];
--	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
--				  d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
-+	if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, &cl_q->bstats, true) < 0 ||
- 	    qdisc_qstats_copy(d, cl_q) < 0)
- 		return -1;
- 
-diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
-index 03fdf31ccb6a..3b8d7197c06b 100644
---- a/net/sched/sch_prio.c
-+++ b/net/sched/sch_prio.c
-@@ -361,8 +361,8 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
- 	struct Qdisc *cl_q;
- 
- 	cl_q = q->queues[cl - 1];
--	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
--				  d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
-+	if (gnet_stats_copy_basic(d, cl_q->cpu_bstats,
-+				  &cl_q->bstats, true) < 0 ||
- 	    qdisc_qstats_copy(d, cl_q) < 0)
- 		return -1;
- 
-diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
-index 58a9d42b52b8..0b7f9ba28deb 100644
---- a/net/sched/sch_qfq.c
-+++ b/net/sched/sch_qfq.c
-@@ -131,7 +131,7 @@ struct qfq_class {
- 
- 	unsigned int filter_cnt;
- 
--	struct gnet_stats_basic_packed bstats;
-+	struct gnet_stats_basic_sync bstats;
- 	struct gnet_stats_queue qstats;
- 	struct net_rate_estimator __rcu *rate_est;
- 	struct Qdisc *qdisc;
-@@ -451,7 +451,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
- 			err = gen_replace_estimator(&cl->bstats, NULL,
- 						    &cl->rate_est,
- 						    NULL,
--						    qdisc_root_sleeping_running(sch),
-+						    true,
- 						    tca[TCA_RATE]);
- 			if (err)
- 				return err;
-@@ -465,6 +465,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
- 	if (cl == NULL)
- 		return -ENOBUFS;
- 
-+	gnet_stats_basic_sync_init(&cl->bstats);
- 	cl->common.classid = classid;
- 	cl->deficit = lmax;
- 
-@@ -477,7 +478,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
- 		err = gen_new_estimator(&cl->bstats, NULL,
- 					&cl->rate_est,
- 					NULL,
--					qdisc_root_sleeping_running(sch),
-+					true,
- 					tca[TCA_RATE]);
- 		if (err)
- 			goto destroy_class;
-@@ -639,8 +640,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
- 	xstats.weight = cl->agg->class_weight;
- 	xstats.lmax = cl->agg->lmax;
- 
--	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
--				  d, NULL, &cl->bstats) < 0 ||
-+	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
- 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
- 	    qdisc_qstats_copy(d, cl->qdisc) < 0)
- 		return -1;
-@@ -1234,8 +1234,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
- 		return err;
- 	}
- 
--	cl->bstats.bytes += len;
--	cl->bstats.packets += gso_segs;
-+	_bstats_update(&cl->bstats, len, gso_segs);
- 	sch->qstats.backlog += len;
- 	++sch->q.qlen;
- 
-diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
-index a66398fb2d6d..377f896bdedc 100644
---- a/net/sched/sch_taprio.c
-+++ b/net/sched/sch_taprio.c
-@@ -1984,7 +1984,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
- 	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
- 
- 	sch = dev_queue->qdisc_sleeping;
--	if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
-+	if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 ||
- 	    qdisc_qstats_copy(d, sch) < 0)
- 		return -1;
- 	return 0;
 diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
-index 6316bd2b8f37..dfc9d12caef8 100644
+index 1e99ba1b9d72..9b20e4d6bfe4 100644
 --- a/net/sunrpc/svc_xprt.c
 +++ b/net/sunrpc/svc_xprt.c
 @@ -441,7 +441,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
@@ -11435,202 +11518,6 @@ index 6316bd2b8f37..dfc9d12caef8 100644
  	trace_svc_xprt_do_enqueue(xprt, rqstp);
  }
  EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
-diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c
-index 5a90aa527877..642d0748c169 100644
---- a/samples/kfifo/bytestream-example.c
-+++ b/samples/kfifo/bytestream-example.c
-@@ -22,10 +22,10 @@
- #define	PROC_FIFO	"bytestream-fifo"
- 
- /* lock for procfs read access */
--static DEFINE_MUTEX(read_lock);
-+static DEFINE_MUTEX(read_access);
- 
- /* lock for procfs write access */
--static DEFINE_MUTEX(write_lock);
-+static DEFINE_MUTEX(write_access);
- 
- /*
-  * define DYNAMIC in this example for a dynamically allocated fifo.
-@@ -116,12 +116,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf,
- 	int ret;
- 	unsigned int copied;
- 
--	if (mutex_lock_interruptible(&write_lock))
-+	if (mutex_lock_interruptible(&write_access))
- 		return -ERESTARTSYS;
- 
- 	ret = kfifo_from_user(&test, buf, count, &copied);
- 
--	mutex_unlock(&write_lock);
-+	mutex_unlock(&write_access);
- 	if (ret)
- 		return ret;
- 
-@@ -134,12 +134,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf,
- 	int ret;
- 	unsigned int copied;
- 
--	if (mutex_lock_interruptible(&read_lock))
-+	if (mutex_lock_interruptible(&read_access))
- 		return -ERESTARTSYS;
- 
- 	ret = kfifo_to_user(&test, buf, count, &copied);
- 
--	mutex_unlock(&read_lock);
-+	mutex_unlock(&read_access);
- 	if (ret)
- 		return ret;
- 
-diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c
-index e5403d8c971a..c61482ba94f4 100644
---- a/samples/kfifo/inttype-example.c
-+++ b/samples/kfifo/inttype-example.c
-@@ -22,10 +22,10 @@
- #define	PROC_FIFO	"int-fifo"
- 
- /* lock for procfs read access */
--static DEFINE_MUTEX(read_lock);
-+static DEFINE_MUTEX(read_access);
- 
- /* lock for procfs write access */
--static DEFINE_MUTEX(write_lock);
-+static DEFINE_MUTEX(write_access);
- 
- /*
-  * define DYNAMIC in this example for a dynamically allocated fifo.
-@@ -109,12 +109,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf,
- 	int ret;
- 	unsigned int copied;
- 
--	if (mutex_lock_interruptible(&write_lock))
-+	if (mutex_lock_interruptible(&write_access))
- 		return -ERESTARTSYS;
- 
- 	ret = kfifo_from_user(&test, buf, count, &copied);
- 
--	mutex_unlock(&write_lock);
-+	mutex_unlock(&write_access);
- 	if (ret)
- 		return ret;
- 
-@@ -127,12 +127,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf,
- 	int ret;
- 	unsigned int copied;
- 
--	if (mutex_lock_interruptible(&read_lock))
-+	if (mutex_lock_interruptible(&read_access))
- 		return -ERESTARTSYS;
- 
- 	ret = kfifo_to_user(&test, buf, count, &copied);
- 
--	mutex_unlock(&read_lock);
-+	mutex_unlock(&read_access);
- 	if (ret)
- 		return ret;
- 
-diff --git a/samples/kfifo/record-example.c b/samples/kfifo/record-example.c
-index f64f3d62d6c2..e4087b2d3fc4 100644
---- a/samples/kfifo/record-example.c
-+++ b/samples/kfifo/record-example.c
-@@ -22,10 +22,10 @@
- #define	PROC_FIFO	"record-fifo"
- 
- /* lock for procfs read access */
--static DEFINE_MUTEX(read_lock);
-+static DEFINE_MUTEX(read_access);
- 
- /* lock for procfs write access */
--static DEFINE_MUTEX(write_lock);
-+static DEFINE_MUTEX(write_access);
- 
- /*
-  * define DYNAMIC in this example for a dynamically allocated fifo.
-@@ -123,12 +123,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf,
- 	int ret;
- 	unsigned int copied;
- 
--	if (mutex_lock_interruptible(&write_lock))
-+	if (mutex_lock_interruptible(&write_access))
- 		return -ERESTARTSYS;
- 
- 	ret = kfifo_from_user(&test, buf, count, &copied);
- 
--	mutex_unlock(&write_lock);
-+	mutex_unlock(&write_access);
- 	if (ret)
- 		return ret;
- 
-@@ -141,12 +141,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf,
- 	int ret;
- 	unsigned int copied;
- 
--	if (mutex_lock_interruptible(&read_lock))
-+	if (mutex_lock_interruptible(&read_access))
- 		return -ERESTARTSYS;
- 
- 	ret = kfifo_to_user(&test, buf, count, &copied);
- 
--	mutex_unlock(&read_lock);
-+	mutex_unlock(&read_access);
- 	if (ret)
- 		return ret;
- 
-diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
-index 21a0e7c3b8de..e7dd316da551 100644
---- a/security/smack/smack_lsm.c
-+++ b/security/smack/smack_lsm.c
-@@ -51,8 +51,10 @@
- #define SMK_RECEIVING	1
- #define SMK_SENDING	2
- 
-+#ifdef SMACK_IPV6_PORT_LABELING
- static DEFINE_MUTEX(smack_ipv6_lock);
- static LIST_HEAD(smk_ipv6_port_list);
-+#endif
- struct kmem_cache *smack_rule_cache;
- int smack_enabled __initdata;
- 
-@@ -2603,7 +2605,6 @@ static void smk_ipv6_port_label(struct socket *sock, struct sockaddr *address)
- 	mutex_unlock(&smack_ipv6_lock);
- 	return;
- }
--#endif
- 
- /**
-  * smk_ipv6_port_check - check Smack port access
-@@ -2666,6 +2667,7 @@ static int smk_ipv6_port_check(struct sock *sk, struct sockaddr_in6 *address,
- 
- 	return smk_ipv6_check(skp, object, address, act);
- }
-+#endif
- 
- /**
-  * smack_inode_setsecurity - set smack xattrs
-@@ -2852,8 +2854,9 @@ static int smack_socket_connect(struct socket *sock, struct sockaddr *sap,
- 			rc = smk_ipv6_check(ssp->smk_out, rsp, sip,
- 					    SMK_CONNECTING);
- 		}
--		if (__is_defined(SMACK_IPV6_PORT_LABELING))
--			rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING);
-+#ifdef SMACK_IPV6_PORT_LABELING
-+		rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING);
-+#endif
- 
- 		return rc;
- 	}
-diff --git a/sound/soc/mediatek/common/mtk-afe-fe-dai.c b/sound/soc/mediatek/common/mtk-afe-fe-dai.c
-index e95c7c018e7d..4f2c2379531b 100644
---- a/sound/soc/mediatek/common/mtk-afe-fe-dai.c
-+++ b/sound/soc/mediatek/common/mtk-afe-fe-dai.c
-@@ -288,7 +288,6 @@ const struct snd_soc_dai_ops mtk_afe_fe_ops = {
- };
- EXPORT_SYMBOL_GPL(mtk_afe_fe_ops);
- 
--static DEFINE_MUTEX(irqs_lock);
- int mtk_dynamic_irq_acquire(struct mtk_base_afe *afe)
- {
- 	int i;
 -- 
 2.30.2
 
diff --git a/patches/soc/ti/beagleboard_dtbs/0001-Add-BeagleBoard.org-Device-Tree-Changes.patch b/patches/soc/ti/beagleboard_dtbs/0001-Add-BeagleBoard.org-Device-Tree-Changes.patch
index ff017cbf7..38e97090a 100644
--- a/patches/soc/ti/beagleboard_dtbs/0001-Add-BeagleBoard.org-Device-Tree-Changes.patch
+++ b/patches/soc/ti/beagleboard_dtbs/0001-Add-BeagleBoard.org-Device-Tree-Changes.patch
@@ -1,6 +1,6 @@
-From 347366afcfb36f7ba0b5f0d4cb1a55f81bb8439e Mon Sep 17 00:00:00 2001
+From 3be1a61a6e5ce784c29e0b89e4053ba6494d86a8 Mon Sep 17 00:00:00 2001
 From: Robert Nelson <robertcnelson@gmail.com>
-Date: Mon, 10 Jan 2022 15:17:52 -0600
+Date: Tue, 25 Jan 2022 09:33:07 -0600
 Subject: [PATCH] Add BeagleBoard.org Device Tree Changes
 
 https://github.com/beagleboard/BeagleBoard-DeviceTrees/tree/v5.16.x
diff --git a/patches/wireless_regdb/0001-Add-wireless-regdb-regulatory-database-file.patch b/patches/wireless_regdb/0001-Add-wireless-regdb-regulatory-database-file.patch
index 9489224f7..5091d4d17 100644
--- a/patches/wireless_regdb/0001-Add-wireless-regdb-regulatory-database-file.patch
+++ b/patches/wireless_regdb/0001-Add-wireless-regdb-regulatory-database-file.patch
@@ -1,6 +1,6 @@
-From 34d1cf39e5943f8840730ece14b337c94b8dd455 Mon Sep 17 00:00:00 2001
+From 4b48e2028eaf39c5088b0ba2ce8c910d897314dc Mon Sep 17 00:00:00 2001
 From: Robert Nelson <robertcnelson@gmail.com>
-Date: Mon, 10 Jan 2022 15:17:27 -0600
+Date: Tue, 25 Jan 2022 09:32:42 -0600
 Subject: [PATCH] Add wireless-regdb regulatory database file
 
 https://git.kernel.org/pub/scm/linux/kernel/git/sforshee/wireless-regdb.git/commit/?id=2ce78ed90f71955f7b223c17b5cda6c8a7708efe
diff --git a/patches/wpanusb/0001-merge-wpanusb-https-github.com-statropy-wpanusb.patch b/patches/wpanusb/0001-merge-wpanusb-https-github.com-statropy-wpanusb.patch
index d5f84dce0..adcc2aabf 100644
--- a/patches/wpanusb/0001-merge-wpanusb-https-github.com-statropy-wpanusb.patch
+++ b/patches/wpanusb/0001-merge-wpanusb-https-github.com-statropy-wpanusb.patch
@@ -1,6 +1,6 @@
-From f4ef195954278e9107b9a7cd114991daee91b22b Mon Sep 17 00:00:00 2001
+From 0381e77e70e9a9714bcdc5da3b8e5d43bc0d0246 Mon Sep 17 00:00:00 2001
 From: Robert Nelson <robertcnelson@gmail.com>
-Date: Mon, 10 Jan 2022 15:00:23 -0600
+Date: Tue, 25 Jan 2022 09:29:52 -0600
 Subject: [PATCH] merge: wpanusb: https://github.com/statropy/wpanusb
 
 https://github.com/statropy/wpanusb/commit/251f0167545bf2dcaa3cad991a59dbf5ab05490a
diff --git a/version.sh b/version.sh
index 2331d1556..72b09d456 100644
--- a/version.sh
+++ b/version.sh
@@ -38,10 +38,10 @@ toolchain="gcc_11_arm"
 
 #Kernel
 KERNEL_REL=5.16
-KERNEL_TAG=${KERNEL_REL}
-kernel_rt=".X-rtY"
+KERNEL_TAG=${KERNEL_REL}.2
+kernel_rt=".2-rt19"
 #Kernel Build
-BUILD=${build_prefix}10
+BUILD=${build_prefix}10.1
 
 #v5.X-rcX + upto SHA
 #prev_KERNEL_SHA=""
-- 
GitLab