From 322359ca8121ca8e52b1c85ecdda945047999c42 Mon Sep 17 00:00:00 2001
From: Robert Nelson <robertcnelson@gmail.com>
Date: Mon, 8 Mar 2021 12:18:37 -0600
Subject: [PATCH] kernel v5.12-rc2 rebase with: device-tree/etc BBDTBS:
 https://github.com/beagleboard/BeagleBoard-DeviceTrees/commit/eb25f2800649020dc1537f2e2f5c0f6e977344d6
 TI_AMX3_CM3:
 http://git.ti.com/gitweb/?p=processor-firmware/ti-amx3-cm3-pm-firmware.git;a=commit;h=6a849767df85ce9399494f53fb5c753665396653
 WPANUSB:
 https://github.com/statropy/wpanusb/commit/7ba5f3d24d95f804e80b2d8d28e35b34c15219c2

Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
---
 patches/defconfig                             |     2 +-
 patches/ref_omap2plus_defconfig               |     2 +-
 ...01-merge-CONFIG_PREEMPT_RT-Patch-Set.patch | 17876 ++++++----------
 ...001-Add-BeagleBoard.org-DTBS-v5.12.x.patch |     4 +-
 ...sb-https-github.com-statropy-wpanusb.patch |     4 +-
 version.sh                                    |     4 +-
 6 files changed, 6826 insertions(+), 11066 deletions(-)

diff --git a/patches/defconfig b/patches/defconfig
index f736755a7..4c9d1003c 100644
--- a/patches/defconfig
+++ b/patches/defconfig
@@ -1,6 +1,6 @@
 #
 # Automatically generated file; DO NOT EDIT.
-# Linux/arm 5.12.0-rc1 Kernel Configuration
+# Linux/arm 5.12.0-rc2 Kernel Configuration
 #
 CONFIG_CC_VERSION_TEXT="arm-none-linux-gnueabihf-gcc (GNU Toolchain for the A-profile Architecture 10.2-2020.11 (arm-10.16)) 10.2.1 20201103"
 CONFIG_CC_IS_GCC=y
diff --git a/patches/ref_omap2plus_defconfig b/patches/ref_omap2plus_defconfig
index 3842da1ec..104770f4b 100644
--- a/patches/ref_omap2plus_defconfig
+++ b/patches/ref_omap2plus_defconfig
@@ -1,6 +1,6 @@
 #
 # Automatically generated file; DO NOT EDIT.
-# Linux/arm 5.12.0-rc1 Kernel Configuration
+# Linux/arm 5.12.0-rc2 Kernel Configuration
 #
 CONFIG_CC_VERSION_TEXT="arm-none-linux-gnueabihf-gcc (GNU Toolchain for the A-profile Architecture 10.2-2020.11 (arm-10.16)) 10.2.1 20201103"
 CONFIG_CC_IS_GCC=y
diff --git a/patches/rt/0001-merge-CONFIG_PREEMPT_RT-Patch-Set.patch b/patches/rt/0001-merge-CONFIG_PREEMPT_RT-Patch-Set.patch
index 1639484e6..12f6b9e69 100644
--- a/patches/rt/0001-merge-CONFIG_PREEMPT_RT-Patch-Set.patch
+++ b/patches/rt/0001-merge-CONFIG_PREEMPT_RT-Patch-Set.patch
@@ -1,35 +1,33 @@
-From fd74741cc96dd0ea170a8a18a2c8801da9e704b2 Mon Sep 17 00:00:00 2001
+From cff641c4fc9d0b3cd3d1a7041277527581de4807 Mon Sep 17 00:00:00 2001
 From: Robert Nelson <robertcnelson@gmail.com>
-Date: Fri, 23 Oct 2020 10:38:09 -0500
+Date: Thu, 4 Mar 2021 11:48:09 -0600
 Subject: [PATCH] merge: CONFIG_PREEMPT_RT Patch Set
 
-patch-5.9.1-rt18.patch.xz
+patch-5.11.2-rt9.patch.xz
 
 Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
 ---
- Documentation/RCU/checklist.rst               |    4 +-
+ .../Expedited-Grace-Periods.rst               |    4 +-
+ .../RCU/Design/Requirements/Requirements.rst  |   26 +-
+ Documentation/RCU/checklist.rst               |    2 +-
+ Documentation/RCU/rcubarrier.rst              |    6 +-
  Documentation/RCU/stallwarn.rst               |    4 +-
- Documentation/admin-guide/kdump/gdbmacros.txt |  159 +-
- .../admin-guide/kdump/vmcoreinfo.rst          |  131 +-
- Documentation/locking/seqlock.rst             |   18 +
- MAINTAINERS                                   |    1 +
- arch/Kconfig                                  |    8 +
+ Documentation/RCU/whatisRCU.rst               |   10 +-
+ .../admin-guide/kernel-parameters.txt         |   11 +
+ arch/Kconfig                                  |    1 +
  arch/alpha/include/asm/spinlock_types.h       |    4 -
  arch/arm/Kconfig                              |    5 +-
  arch/arm/include/asm/spinlock_types.h         |    4 -
- arch/arm/include/asm/switch_to.h              |    8 +
- arch/arm/include/asm/thread_info.h            |    8 +-
+ arch/arm/include/asm/thread_info.h            |    6 +-
  arch/arm/kernel/asm-offsets.c                 |    1 +
  arch/arm/kernel/entry-armv.S                  |   19 +-
- arch/arm/kernel/entry-common.S                |    9 +-
  arch/arm/kernel/signal.c                      |    3 +-
  arch/arm/kernel/smp.c                         |    2 -
  arch/arm/mm/fault.c                           |    6 +
- arch/arm/mm/highmem.c                         |   55 +-
  arch/arm64/Kconfig                            |    3 +
  arch/arm64/include/asm/preempt.h              |   28 +-
  arch/arm64/include/asm/spinlock_types.h       |    4 -
- arch/arm64/include/asm/thread_info.h          |    6 +-
+ arch/arm64/include/asm/thread_info.h          |    8 +-
  arch/arm64/kernel/asm-offsets.c               |    1 +
  arch/arm64/kernel/entry.S                     |   13 +-
  arch/arm64/kernel/fpsimd.c                    |   14 +-
@@ -37,54 +35,57 @@ Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
  arch/arm64/kvm/arm.c                          |    6 +-
  arch/hexagon/include/asm/spinlock_types.h     |    4 -
  arch/ia64/include/asm/spinlock_types.h        |    4 -
- arch/mips/Kconfig                             |    2 +-
- arch/powerpc/Kconfig                          |    5 +-
+ arch/powerpc/Kconfig                          |    3 +
+ arch/powerpc/include/asm/cmpxchg.h            |    2 +-
+ .../include/asm/simple_spinlock_types.h       |    2 +-
  arch/powerpc/include/asm/spinlock_types.h     |    4 -
  arch/powerpc/include/asm/stackprotector.h     |    4 +
- arch/powerpc/include/asm/thread_info.h        |   16 +-
+ arch/powerpc/include/asm/thread_info.h        |   15 +-
  arch/powerpc/kernel/asm-offsets.c             |    1 +
  arch/powerpc/kernel/entry_32.S                |   23 +-
  arch/powerpc/kernel/exceptions-64e.S          |   16 +-
  arch/powerpc/kernel/irq.c                     |    2 +
  arch/powerpc/kernel/misc_32.S                 |    2 +
  arch/powerpc/kernel/misc_64.S                 |    2 +
- arch/powerpc/kernel/syscall_64.c              |    9 +-
+ arch/powerpc/kernel/nvram_64.c                |   12 +-
+ arch/powerpc/kernel/syscall_64.c              |   10 +-
  arch/powerpc/kernel/traps.c                   |    8 +-
  arch/powerpc/kernel/watchdog.c                |    5 -
  arch/powerpc/kexec/crash.c                    |    3 -
  arch/powerpc/kvm/Kconfig                      |    1 +
- arch/powerpc/platforms/pseries/iommu.c        |   16 +-
+ arch/powerpc/mm/mem.c                         |    2 +-
+ arch/powerpc/platforms/powernv/opal-kmsg.c    |    3 +-
+ arch/powerpc/platforms/pseries/iommu.c        |   31 +-
+ arch/powerpc/xmon/xmon.c                      |    6 +-
  arch/s390/include/asm/spinlock_types.h        |    4 -
  arch/sh/include/asm/spinlock_types.h          |    4 -
  arch/sh/kernel/irq.c                          |    2 +
  arch/sparc/kernel/irq_64.c                    |    2 +
+ arch/um/kernel/kmsg_dump.c                    |   13 +-
  arch/x86/Kconfig                              |    2 +
  arch/x86/crypto/aesni-intel_glue.c            |   22 +-
  arch/x86/crypto/cast5_avx_glue.c              |   21 +-
  arch/x86/crypto/glue_helper.c                 |   26 +-
- arch/x86/include/asm/fpu/api.h                |   11 +-
+ arch/x86/include/asm/fpu/api.h                |    1 +
  arch/x86/include/asm/preempt.h                |   36 +-
  arch/x86/include/asm/signal.h                 |   13 +
  arch/x86/include/asm/stackprotector.h         |    8 +-
- arch/x86/include/asm/thread_info.h            |   11 +
+ arch/x86/include/asm/thread_info.h            |    7 +
  arch/x86/kernel/cpu/mshyperv.c                |    3 +-
  arch/x86/kernel/fpu/core.c                    |   12 +
  arch/x86/kernel/irq_32.c                      |    2 +
  arch/x86/kernel/irq_64.c                      |    2 +
- arch/x86/kernel/process_32.c                  |   32 +
- arch/x86/kernel/tsc.c                         |   10 +-
  arch/x86/kvm/x86.c                            |    8 +
- arch/x86/mm/highmem_32.c                      |   10 +-
- arch/x86/mm/iomap_32.c                        |   15 +-
  arch/xtensa/include/asm/spinlock_types.h      |    4 -
- block/blk-mq.c                                |    8 +-
+ block/blk-mq.c                                |  115 +-
  crypto/cryptd.c                               |   19 +-
- drivers/base/core.c                           |   46 +-
- drivers/block/zram/zram_drv.c                 |   41 +-
+ drivers/atm/eni.c                             |    2 +-
+ drivers/block/zram/zram_drv.c                 |   36 +
  drivers/block/zram/zram_drv.h                 |    1 +
  drivers/char/random.c                         |   11 +-
  drivers/char/tpm/tpm-dev-common.c             |    1 -
  drivers/char/tpm/tpm_tis.c                    |   29 +-
+ drivers/firewire/ohci.c                       |    4 +-
  drivers/firmware/efi/efi.c                    |    5 +-
  drivers/gpu/drm/i915/display/intel_sprite.c   |   15 +-
  drivers/gpu/drm/i915/gt/intel_engine_pm.c     |    8 +-
@@ -92,10 +93,21 @@ Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
  drivers/gpu/drm/i915/i915_trace.h             |    6 +-
  drivers/gpu/drm/radeon/radeon_display.c       |    2 +
  drivers/hv/hyperv_vmbus.h                     |    1 +
- drivers/hv/vmbus_drv.c                        |    5 +-
+ drivers/hv/vmbus_drv.c                        |   10 +-
  drivers/leds/trigger/Kconfig                  |    1 +
  drivers/md/raid5.c                            |    7 +-
  drivers/md/raid5.h                            |    1 +
+ drivers/mtd/mtdoops.c                         |    5 +-
+ drivers/net/ethernet/chelsio/cxgb/common.h    |    6 +-
+ drivers/net/ethernet/chelsio/cxgb/cxgb2.c     |   54 +-
+ drivers/net/ethernet/chelsio/cxgb/sge.c       |   53 +-
+ drivers/net/ethernet/chelsio/cxgb/sge.h       |    3 +-
+ drivers/net/ethernet/chelsio/cxgb/subr.c      |   64 +-
+ drivers/net/ethernet/dlink/sundance.c         |    2 +-
+ drivers/net/ethernet/jme.c                    |   10 +-
+ drivers/net/ethernet/jme.h                    |    2 +-
+ drivers/net/wireless/ath/ath9k/beacon.c       |    2 +-
+ drivers/pci/controller/pci-hyperv.c           |    2 +-
  drivers/scsi/fcoe/fcoe.c                      |   16 +-
  drivers/scsi/fcoe/fcoe_ctlr.c                 |    4 +-
  drivers/scsi/libfc/fc_exch.c                  |    4 +-
@@ -110,62 +122,56 @@ Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
  fs/afs/dir_silly.c                            |    2 +-
  fs/cifs/readdir.c                             |    2 +-
  fs/dcache.c                                   |   39 +-
- fs/exec.c                                     |   17 +-
  fs/fuse/readdir.c                             |    2 +-
  fs/inode.c                                    |    2 +-
- fs/io-wq.c                                    |   52 +-
  fs/namei.c                                    |    4 +-
  fs/namespace.c                                |    8 +-
  fs/nfs/dir.c                                  |    4 +-
  fs/nfs/unlink.c                               |    4 +-
- fs/proc/array.c                               |    4 +-
  fs/proc/base.c                                |    3 +-
  fs/proc/proc_sysctl.c                         |    2 +-
+ fs/pstore/platform.c                          |    5 +-
  include/asm-generic/preempt.h                 |    3 +
+ include/linux/blkdev.h                        |    2 +-
  include/linux/bottom_half.h                   |    8 +-
- include/linux/console.h                       |    7 +
- include/linux/cpuhotplug.h                    |    1 +
- include/linux/cpumask.h                       |    6 +
- include/linux/crash_core.h                    |    3 +
+ include/linux/console.h                       |   11 +
  include/linux/dcache.h                        |    4 +-
  include/linux/debug_locks.h                   |    3 +-
- include/linux/delay.h                         |    6 +
- include/linux/dev_printk.h                    |    8 +
  include/linux/entry-common.h                  |    2 +-
  include/linux/fs.h                            |    2 +-
  include/linux/hardirq.h                       |    3 +-
- include/linux/highmem.h                       |   34 +-
- include/linux/interrupt.h                     |   16 +-
+ include/linux/highmem-internal.h              |   27 +-
+ include/linux/hrtimer.h                       |    6 +
+ include/linux/interrupt.h                     |   35 +-
  include/linux/irq_work.h                      |    6 +
  include/linux/irqdesc.h                       |    1 +
  include/linux/irqflags.h                      |   23 +-
+ include/linux/kcov.h                          |    1 +
  include/linux/kernel.h                        |    5 +
- include/linux/local_lock_internal.h           |  118 +-
- include/linux/mhi.h                           |    3 +-
+ include/linux/kmsg_dump.h                     |   52 +-
+ include/linux/local_lock_internal.h           |  126 +-
  include/linux/mm_types.h                      |    4 +
  include/linux/mutex.h                         |   34 +-
- include/linux/mutex_rt.h                      |  131 +
+ include/linux/mutex_rt.h                      |  130 ++
  include/linux/nfs_xdr.h                       |    2 +-
+ include/linux/notifier.h                      |    6 +-
  include/linux/pid.h                           |    1 +
- include/linux/preempt.h                       |  160 +-
- include/linux/printk.h                        |   26 +-
+ include/linux/preempt.h                       |   85 +-
+ include/linux/printk.h                        |   30 +-
  include/linux/random.h                        |    2 +-
  include/linux/rbtree.h                        |   27 +-
- include/linux/rbtree_latch.h                  |    6 +-
  include/linux/rbtree_type.h                   |   31 +
  include/linux/rcupdate.h                      |   10 +-
- include/linux/rtmutex.h                       |   47 +-
- include/linux/rwlock_rt.h                     |  109 +
+ include/linux/rtmutex.h                       |   46 +-
+ include/linux/rwlock_rt.h                     |  109 ++
  include/linux/rwlock_types.h                  |    4 +
  include/linux/rwlock_types_rt.h               |   56 +
- include/linux/rwsem-rt.h                      |   69 +
+ include/linux/rwsem-rt.h                      |   70 +
  include/linux/rwsem.h                         |   12 +
- include/linux/sched.h                         |  117 +-
- include/linux/sched/hotplug.h                 |    2 +
+ include/linux/sched.h                         |  106 +-
  include/linux/sched/mm.h                      |   11 +
  include/linux/sched/rt.h                      |    8 -
  include/linux/sched/wake_q.h                  |   13 +-
- include/linux/seqlock.h                       |  368 ++-
  include/linux/serial_8250.h                   |    5 +
  include/linux/shmem_fs.h                      |    2 +-
  include/linux/signal.h                        |    1 +
@@ -178,119 +184,125 @@ Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
  include/linux/spinlock_types_nort.h           |   39 +
  include/linux/spinlock_types_raw.h            |   65 +
  include/linux/spinlock_types_rt.h             |   38 +
- include/linux/spinlock_types_up.h             |    4 -
- include/linux/stop_machine.h                  |    5 +
+ include/linux/spinlock_types_up.h             |    2 +-
  include/linux/thread_info.h                   |   12 +-
- include/linux/trace_events.h                  |    2 +
+ include/linux/trace_events.h                  |   76 +-
  include/linux/u64_stats_sync.h                |   42 +-
  include/linux/vmstat.h                        |    4 +
  include/linux/wait.h                          |    1 +
  include/linux/ww_mutex.h                      |    8 +
+ include/linux/zpool.h                         |    3 +
  include/net/gen_stats.h                       |   11 +-
  include/net/net_seq_lock.h                    |   15 +
  include/net/sch_generic.h                     |   27 +-
- include/trace/events/sched.h                  |   12 +
- init/Kconfig                                  |    8 +-
+ init/Kconfig                                  |    5 +-
  kernel/Kconfig.locks                          |    2 +-
- kernel/Kconfig.preempt                        |    6 +
+ kernel/Kconfig.preempt                        |    7 +
  kernel/cgroup/cpuset.c                        |   70 +-
  kernel/cgroup/rstat.c                         |    5 +-
- kernel/cpu.c                                  |    9 +-
+ kernel/debug/kdb/kdb_main.c                   |   10 +-
  kernel/entry/common.c                         |   12 +-
  kernel/exit.c                                 |    2 +-
  kernel/fork.c                                 |   27 +-
  kernel/futex.c                                |   88 +-
  kernel/irq/handle.c                           |    8 +-
- kernel/irq/manage.c                           |    8 +-
+ kernel/irq/manage.c                           |   12 +-
  kernel/irq/spurious.c                         |    8 +
- kernel/irq_work.c                             |   58 +-
+ kernel/irq_work.c                             |   69 +-
  kernel/kexec_core.c                           |    1 -
  kernel/ksysfs.c                               |   12 +
+ kernel/kthread.c                              |   16 +-
  kernel/locking/Makefile                       |   10 +-
  kernel/locking/lockdep.c                      |    2 +
- kernel/locking/mutex-rt.c                     |  222 ++
- kernel/locking/rtmutex-debug.c                |  102 -
+ kernel/locking/mutex-rt.c                     |  224 +++
+ kernel/locking/rtmutex-debug.c                |  102 --
  kernel/locking/rtmutex-debug.h                |   11 -
- kernel/locking/rtmutex.c                      |  936 ++++++-
+ kernel/locking/rtmutex.c                      |  939 +++++++++--
  kernel/locking/rtmutex.h                      |    7 -
  kernel/locking/rtmutex_common.h               |   36 +-
- kernel/locking/rwlock-rt.c                    |  334 +++
- kernel/locking/rwsem-rt.c                     |  292 +++
+ kernel/locking/rwlock-rt.c                    |  334 ++++
+ kernel/locking/rwsem-rt.c                     |  318 ++++
  kernel/locking/rwsem.c                        |    6 +
  kernel/locking/spinlock.c                     |    7 +
  kernel/locking/spinlock_debug.c               |    5 +
- kernel/panic.c                                |    5 +-
- kernel/printk/Makefile                        |    2 +-
+ kernel/notifier.c                             |   12 +-
+ kernel/panic.c                                |   33 +-
+ kernel/printk/Makefile                        |    1 -
  kernel/printk/internal.h                      |   74 -
- kernel/printk/printk.c                        | 2266 +++++++++--------
- kernel/printk/printk_ringbuffer.c             | 2086 +++++++++++++++
- kernel/printk/printk_ringbuffer.h             |  382 +++
- kernel/printk/printk_safe.c                   |  414 ---
- kernel/ptrace.c                               |    9 +-
+ kernel/printk/printk.c                        | 1459 +++++++++--------
+ kernel/printk/printk_safe.c                   |  414 -----
+ kernel/ptrace.c                               |   32 +-
  kernel/rcu/Kconfig                            |    4 +-
  kernel/rcu/rcutorture.c                       |   97 +-
  kernel/rcu/tree.c                             |    4 +-
  kernel/rcu/update.c                           |    4 +-
- kernel/sched/core.c                           | 1083 ++++++--
- kernel/sched/cpudeadline.c                    |    4 +-
- kernel/sched/cpupri.c                         |    4 +-
- kernel/sched/deadline.c                       |   47 +-
+ kernel/sched/core.c                           |  226 ++-
+ kernel/sched/cputime.c                        |    4 +-
  kernel/sched/fair.c                           |   16 +-
  kernel/sched/features.h                       |    8 +
- kernel/sched/rt.c                             |   81 +-
- kernel/sched/sched.h                          |   69 +-
+ kernel/sched/sched.h                          |   10 +
  kernel/sched/swait.c                          |    1 +
- kernel/sched/topology.c                       |    1 +
- kernel/signal.c                               |  105 +-
- kernel/softirq.c                              |  372 ++-
- kernel/stop_machine.c                         |   23 +-
- kernel/time/hrtimer.c                         |   32 +-
- kernel/time/sched_clock.c                     |    6 +-
+ kernel/sched/topology.c                       |    3 +-
+ kernel/signal.c                               |  103 +-
+ kernel/smp.c                                  |   14 +
+ kernel/softirq.c                              |  309 +++-
+ kernel/time/hrtimer.c                         |   30 +
  kernel/time/tick-sched.c                      |    2 +-
- kernel/time/timekeeping.c                     |   10 +-
- kernel/time/timer.c                           |    2 +
- kernel/trace/trace.c                          |   53 +-
- kernel/trace/trace.h                          |    2 +
- kernel/trace/trace_events.c                   |    2 +
+ kernel/time/timer.c                           |    8 +-
+ kernel/trace/blktrace.c                       |   17 +-
+ kernel/trace/trace.c                          |  231 +--
+ kernel/trace/trace.h                          |   57 +-
+ kernel/trace/trace_branch.c                   |    6 +-
+ kernel/trace/trace_event_perf.c               |    5 +-
+ kernel/trace/trace_events.c                   |   20 +-
+ kernel/trace/trace_events_inject.c            |    6 +-
+ kernel/trace/trace_functions.c                |   28 +-
+ kernel/trace/trace_functions_graph.c          |   32 +-
+ kernel/trace/trace_hwlat.c                    |    7 +-
+ kernel/trace/trace_irqsoff.c                  |   86 +-
+ kernel/trace/trace_kprobe.c                   |   10 +-
+ kernel/trace/trace_mmiotrace.c                |   14 +-
  kernel/trace/trace_output.c                   |   19 +-
- kernel/workqueue.c                            |    4 +
+ kernel/trace/trace_sched_wakeup.c             |   71 +-
+ kernel/trace/trace_syscalls.c                 |   20 +-
+ kernel/trace/trace_uprobe.c                   |    4 +-
  lib/Kconfig.debug                             |    2 +-
- lib/cpumask.c                                 |   18 +
+ lib/bug.c                                     |    1 +
  lib/debugobjects.c                            |    5 +-
- lib/dump_stack.c                              |    2 +
  lib/irq_poll.c                                |    5 +
- lib/locking-selftest.c                        |   50 +
+ lib/locking-selftest.c                        |   51 +
  lib/nmi_backtrace.c                           |    6 -
  lib/scatterlist.c                             |    2 +-
- lib/smp_processor_id.c                        |    5 +
  mm/Kconfig                                    |    2 +-
- mm/highmem.c                                  |    5 +-
- mm/memcontrol.c                               |   64 +-
- mm/page_alloc.c                               |  180 +-
+ mm/memcontrol.c                               |   66 +-
+ mm/page_alloc.c                               |   53 +-
  mm/shmem.c                                    |   31 +-
  mm/slab.c                                     |   90 +-
  mm/slab.h                                     |    2 +-
- mm/slub.c                                     |  145 +-
- mm/swap.c                                     |   65 +-
+ mm/slub.c                                     |  249 ++-
  mm/vmalloc.c                                  |   13 +-
  mm/vmstat.c                                   |   12 +
  mm/workingset.c                               |    5 +-
+ mm/z3fold.c                                   |    1 +
+ mm/zbud.c                                     |    1 +
+ mm/zpool.c                                    |   13 +
  mm/zsmalloc.c                                 |   85 +-
- mm/zswap.c                                    |   41 +-
+ mm/zswap.c                                    |   51 +-
  net/Kconfig                                   |    2 +-
  net/core/dev.c                                |   33 +-
  net/core/gen_estimator.c                      |    6 +-
  net/core/gen_stats.c                          |   12 +-
+ net/core/skbuff.c                             |    1 +
  net/core/sock.c                               |    6 +-
  net/ipv4/inet_hashtables.c                    |   19 +-
  net/ipv6/inet6_hashtables.c                   |    5 +-
+ net/mac80211/iface.c                          |    1 +
+ net/mac80211/rx.c                             |    1 +
  net/sched/sch_api.c                           |    2 +-
  net/sched/sch_generic.c                       |   10 +
  net/sunrpc/svc_xprt.c                         |    4 +-
  net/xfrm/xfrm_state.c                         |    9 +-
- scripts/gdb/linux/dmesg.py                    |  147 +-
- scripts/gdb/linux/utils.py                    |    7 +
- 283 files changed, 10833 insertions(+), 3450 deletions(-)
+ 295 files changed, 6753 insertions(+), 3061 deletions(-)
  create mode 100644 include/linux/mutex_rt.h
  create mode 100644 include/linux/rbtree_type.h
  create mode 100644 include/linux/rwlock_rt.h
@@ -305,25 +317,168 @@ Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
  create mode 100644 kernel/locking/rwlock-rt.c
  create mode 100644 kernel/locking/rwsem-rt.c
  delete mode 100644 kernel/printk/internal.h
- create mode 100644 kernel/printk/printk_ringbuffer.c
- create mode 100644 kernel/printk/printk_ringbuffer.h
  delete mode 100644 kernel/printk/printk_safe.c
 
+diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
+index 72f0f6fbd53c..6f89cf1e567d 100644
+--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
++++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
+@@ -38,7 +38,7 @@ sections.
+ RCU-preempt Expedited Grace Periods
+ ===================================
+ 
+-``CONFIG_PREEMPT=y`` kernels implement RCU-preempt.
++``CONFIG_PREEMPTION=y`` kernels implement RCU-preempt.
+ The overall flow of the handling of a given CPU by an RCU-preempt
+ expedited grace period is shown in the following diagram:
+ 
+@@ -112,7 +112,7 @@ things.
+ RCU-sched Expedited Grace Periods
+ ---------------------------------
+ 
+-``CONFIG_PREEMPT=n`` kernels implement RCU-sched. The overall flow of
++``CONFIG_PREEMPTION=n`` kernels implement RCU-sched. The overall flow of
+ the handling of a given CPU by an RCU-sched expedited grace period is
+ shown in the following diagram:
+ 
+diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
+index d4c9a016074b..4bb0aa36d4de 100644
+--- a/Documentation/RCU/Design/Requirements/Requirements.rst
++++ b/Documentation/RCU/Design/Requirements/Requirements.rst
+@@ -78,7 +78,7 @@ RCU treats a nested set as one big RCU read-side critical section.
+ Production-quality implementations of ``rcu_read_lock()`` and
+ ``rcu_read_unlock()`` are extremely lightweight, and in fact have
+ exactly zero overhead in Linux kernels built for production use with
+-``CONFIG_PREEMPT=n``.
++``CONFIG_PREEMPTION=n``.
+ 
+ This guarantee allows ordering to be enforced with extremely low
+ overhead to readers, for example:
+@@ -1182,7 +1182,7 @@ and has become decreasingly so as memory sizes have expanded and memory
+ costs have plummeted. However, as I learned from Matt Mackall's
+ `bloatwatch <http://elinux.org/Linux_Tiny-FAQ>`__ efforts, memory
+ footprint is critically important on single-CPU systems with
+-non-preemptible (``CONFIG_PREEMPT=n``) kernels, and thus `tiny
++non-preemptible (``CONFIG_PREEMPTION=n``) kernels, and thus `tiny
+ RCU <https://lkml.kernel.org/g/20090113221724.GA15307@linux.vnet.ibm.com>`__
+ was born. Josh Triplett has since taken over the small-memory banner
+ with his `Linux kernel tinification <https://tiny.wiki.kernel.org/>`__
+@@ -1498,7 +1498,7 @@ limitations.
+ 
+ Implementations of RCU for which ``rcu_read_lock()`` and
+ ``rcu_read_unlock()`` generate no code, such as Linux-kernel RCU when
+-``CONFIG_PREEMPT=n``, can be nested arbitrarily deeply. After all, there
++``CONFIG_PREEMPTION=n``, can be nested arbitrarily deeply. After all, there
+ is no overhead. Except that if all these instances of
+ ``rcu_read_lock()`` and ``rcu_read_unlock()`` are visible to the
+ compiler, compilation will eventually fail due to exhausting memory,
+@@ -1771,7 +1771,7 @@ implementation can be a no-op.
+ 
+ However, once the scheduler has spawned its first kthread, this early
+ boot trick fails for ``synchronize_rcu()`` (as well as for
+-``synchronize_rcu_expedited()``) in ``CONFIG_PREEMPT=y`` kernels. The
++``synchronize_rcu_expedited()``) in ``CONFIG_PREEMPTION=y`` kernels. The
+ reason is that an RCU read-side critical section might be preempted,
+ which means that a subsequent ``synchronize_rcu()`` really does have to
+ wait for something, as opposed to simply returning immediately.
+@@ -2040,7 +2040,7 @@ the following:
+        5 rcu_read_unlock();
+        6 do_something_with(v, user_v);
+ 
+-If the compiler did make this transformation in a ``CONFIG_PREEMPT=n`` kernel
++If the compiler did make this transformation in a ``CONFIG_PREEMPTION=n`` kernel
+ build, and if ``get_user()`` did page fault, the result would be a quiescent
+ state in the middle of an RCU read-side critical section.  This misplaced
+ quiescent state could result in line 4 being a use-after-free access,
+@@ -2319,10 +2319,10 @@ decides to throw at it.
+ 
+ The Linux kernel is used for real-time workloads, especially in
+ conjunction with the `-rt
+-patchset <https://rt.wiki.kernel.org/index.php/Main_Page>`__. The
++patchset <https://wiki.linuxfoundation.org/realtime/>`__. The
+ real-time-latency response requirements are such that the traditional
+ approach of disabling preemption across RCU read-side critical sections
+-is inappropriate. Kernels built with ``CONFIG_PREEMPT=y`` therefore use
++is inappropriate. Kernels built with ``CONFIG_PREEMPTION=y`` therefore use
+ an RCU implementation that allows RCU read-side critical sections to be
+ preempted. This requirement made its presence known after users made it
+ clear that an earlier `real-time
+@@ -2444,7 +2444,7 @@ includes ``rcu_read_lock_bh()``, ``rcu_read_unlock_bh()``,
+ ``call_rcu_bh()``, ``rcu_barrier_bh()``, and
+ ``rcu_read_lock_bh_held()``. However, the update-side APIs are now
+ simple wrappers for other RCU flavors, namely RCU-sched in
+-CONFIG_PREEMPT=n kernels and RCU-preempt otherwise.
++CONFIG_PREEMPTION=n kernels and RCU-preempt otherwise.
+ 
+ Sched Flavor (Historical)
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
+@@ -2462,11 +2462,11 @@ not have this property, given that any point in the code outside of an
+ RCU read-side critical section can be a quiescent state. Therefore,
+ *RCU-sched* was created, which follows â€œclassicâ€ RCU in that an
+ RCU-sched grace period waits for pre-existing interrupt and NMI
+-handlers. In kernels built with ``CONFIG_PREEMPT=n``, the RCU and
++handlers. In kernels built with ``CONFIG_PREEMPTION=n``, the RCU and
+ RCU-sched APIs have identical implementations, while kernels built with
+-``CONFIG_PREEMPT=y`` provide a separate implementation for each.
++``CONFIG_PREEMPTION=y`` provide a separate implementation for each.
+ 
+-Note well that in ``CONFIG_PREEMPT=y`` kernels,
++Note well that in ``CONFIG_PREEMPTION=y`` kernels,
+ ``rcu_read_lock_sched()`` and ``rcu_read_unlock_sched()`` disable and
+ re-enable preemption, respectively. This means that if there was a
+ preemption attempt during the RCU-sched read-side critical section,
+@@ -2629,10 +2629,10 @@ userspace execution also delimit tasks-RCU read-side critical sections.
+ 
+ The tasks-RCU API is quite compact, consisting only of
+ ``call_rcu_tasks()``, ``synchronize_rcu_tasks()``, and
+-``rcu_barrier_tasks()``. In ``CONFIG_PREEMPT=n`` kernels, trampolines
++``rcu_barrier_tasks()``. In ``CONFIG_PREEMPTION=n`` kernels, trampolines
+ cannot be preempted, so these APIs map to ``call_rcu()``,
+ ``synchronize_rcu()``, and ``rcu_barrier()``, respectively. In
+-``CONFIG_PREEMPT=y`` kernels, trampolines can be preempted, and these
++``CONFIG_PREEMPTION=y`` kernels, trampolines can be preempted, and these
+ three APIs are therefore implemented by separate functions that check
+ for voluntary context switches.
+ 
 diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst
-index 2efed9926c3f..54a79d03438a 100644
+index bb7128eb322e..a56566549114 100644
 --- a/Documentation/RCU/checklist.rst
 +++ b/Documentation/RCU/checklist.rst
-@@ -214,8 +214,8 @@ over a rather long period of time, but improvements are always welcome!
+@@ -214,7 +214,7 @@ over a rather long period of time, but improvements are always welcome!
  	the rest of the system.
  
  7.	As of v4.20, a given kernel implements only one RCU flavor,
 -	which is RCU-sched for PREEMPT=n and RCU-preempt for PREEMPT=y.
--	If the updater uses call_rcu() or synchronize_rcu(),
-+	which is RCU-sched for PREEMPTION=n and RCU-preempt for
-+	PREEMPTION=y. If the updater uses call_rcu() or synchronize_rcu(),
++	which is RCU-sched for PREEMPTION=n and RCU-preempt for PREEMPTION=y.
+ 	If the updater uses call_rcu() or synchronize_rcu(),
  	then the corresponding readers my use rcu_read_lock() and
  	rcu_read_unlock(), rcu_read_lock_bh() and rcu_read_unlock_bh(),
- 	or any pair of primitives that disables and re-enables preemption,
+diff --git a/Documentation/RCU/rcubarrier.rst b/Documentation/RCU/rcubarrier.rst
+index f64f4413a47c..3b4a24877496 100644
+--- a/Documentation/RCU/rcubarrier.rst
++++ b/Documentation/RCU/rcubarrier.rst
+@@ -9,7 +9,7 @@ RCU (read-copy update) is a synchronization mechanism that can be thought
+ of as a replacement for read-writer locking (among other things), but with
+ very low-overhead readers that are immune to deadlock, priority inversion,
+ and unbounded latency. RCU read-side critical sections are delimited
+-by rcu_read_lock() and rcu_read_unlock(), which, in non-CONFIG_PREEMPT
++by rcu_read_lock() and rcu_read_unlock(), which, in non-CONFIG_PREEMPTION
+ kernels, generate no code whatsoever.
+ 
+ This means that RCU writers are unaware of the presence of concurrent
+@@ -329,10 +329,10 @@ Answer: This cannot happen. The reason is that on_each_cpu() has its last
+ 	to smp_call_function() and further to smp_call_function_on_cpu(),
+ 	causing this latter to spin until the cross-CPU invocation of
+ 	rcu_barrier_func() has completed. This by itself would prevent
+-	a grace period from completing on non-CONFIG_PREEMPT kernels,
++	a grace period from completing on non-CONFIG_PREEMPTION kernels,
+ 	since each CPU must undergo a context switch (or other quiescent
+ 	state) before the grace period can complete. However, this is
+-	of no use in CONFIG_PREEMPT kernels.
++	of no use in CONFIG_PREEMPTION kernels.
+ 
+ 	Therefore, on_each_cpu() disables preemption across its call
+ 	to smp_call_function() and also across the local call to
 diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst
 index c9ab6af4d3be..e97d1b4876ef 100644
 --- a/Documentation/RCU/stallwarn.rst
@@ -346,414 +501,84 @@ index c9ab6af4d3be..e97d1b4876ef 100644
  	happen to preempt a low-priority task in the middle of an RCU
  	read-side critical section.   This is especially damaging if
  	that low-priority task is not permitted to run on any other CPU,
-diff --git a/Documentation/admin-guide/kdump/gdbmacros.txt b/Documentation/admin-guide/kdump/gdbmacros.txt
-index 220d0a80ca2c..82aecdcae8a6 100644
---- a/Documentation/admin-guide/kdump/gdbmacros.txt
-+++ b/Documentation/admin-guide/kdump/gdbmacros.txt
-@@ -170,57 +170,103 @@ document trapinfo
- 	address the kernel panicked.
- end
- 
--define dump_log_idx
--	set $idx = $arg0
--	if ($argc > 1)
--		set $prev_flags = $arg1
-+define dump_record
-+	set var $desc = $arg0
-+	set var $info = $arg1
-+	if ($argc > 2)
-+		set var $prev_flags = $arg2
- 	else
--		set $prev_flags = 0
-+		set var $prev_flags = 0
- 	end
--	set $msg = ((struct printk_log *) (log_buf + $idx))
--	set $prefix = 1
--	set $newline = 1
--	set $log = log_buf + $idx + sizeof(*$msg)
--
--	# prev & LOG_CONT && !(msg->flags & LOG_PREIX)
--	if (($prev_flags & 8) && !($msg->flags & 4))
--		set $prefix = 0
-+
-+	set var $prefix = 1
-+	set var $newline = 1
-+
-+	set var $begin = $desc->text_blk_lpos.begin % (1U << prb->text_data_ring.size_bits)
-+	set var $next = $desc->text_blk_lpos.next % (1U << prb->text_data_ring.size_bits)
-+
-+	# handle data-less record
-+	if ($begin & 1)
-+		set var $text_len = 0
-+		set var $log = ""
-+	else
-+		# handle wrapping data block
-+		if ($begin > $next)
-+			set var $begin = 0
-+		end
-+
-+		# skip over descriptor id
-+		set var $begin = $begin + sizeof(long)
-+
-+		# handle truncated message
-+		if ($next - $begin < $info->text_len)
-+			set var $text_len = $next - $begin
-+		else
-+			set var $text_len = $info->text_len
-+		end
-+
-+		set var $log = &prb->text_data_ring.data[$begin]
-+	end
-+
-+	# prev & LOG_CONT && !(info->flags & LOG_PREIX)
-+	if (($prev_flags & 8) && !($info->flags & 4))
-+		set var $prefix = 0
- 	end
- 
--	# msg->flags & LOG_CONT
--	if ($msg->flags & 8)
-+	# info->flags & LOG_CONT
-+	if ($info->flags & 8)
- 		# (prev & LOG_CONT && !(prev & LOG_NEWLINE))
- 		if (($prev_flags & 8) && !($prev_flags & 2))
--			set $prefix = 0
-+			set var $prefix = 0
- 		end
--		# (!(msg->flags & LOG_NEWLINE))
--		if (!($msg->flags & 2))
--			set $newline = 0
-+		# (!(info->flags & LOG_NEWLINE))
-+		if (!($info->flags & 2))
-+			set var $newline = 0
- 		end
- 	end
- 
- 	if ($prefix)
--		printf "[%5lu.%06lu] ", $msg->ts_nsec / 1000000000, $msg->ts_nsec % 1000000000
-+		printf "[%5lu.%06lu] ", $info->ts_nsec / 1000000000, $info->ts_nsec % 1000000000
- 	end
--	if ($msg->text_len != 0)
--		eval "printf \"%%%d.%ds\", $log", $msg->text_len, $msg->text_len
-+	if ($text_len)
-+		eval "printf \"%%%d.%ds\", $log", $text_len, $text_len
- 	end
- 	if ($newline)
- 		printf "\n"
- 	end
--	if ($msg->dict_len > 0)
--		set $dict = $log + $msg->text_len
--		set $idx = 0
--		set $line = 1
--		while ($idx < $msg->dict_len)
--			if ($line)
--				printf " "
--				set $line = 0
-+
-+	# handle dictionary data
-+
-+	set var $dict = &$info->dev_info.subsystem[0]
-+	set var $dict_len = sizeof($info->dev_info.subsystem)
-+	if ($dict[0] != '\0')
-+		printf " SUBSYSTEM="
-+		set var $idx = 0
-+		while ($idx < $dict_len)
-+			set var $c = $dict[$idx]
-+			if ($c == '\0')
-+				loop_break
-+			else
-+				if ($c < ' ' || $c >= 127 || $c == '\\')
-+					printf "\\x%02x", $c
-+				else
-+					printf "%c", $c
-+				end
- 			end
--			set $c = $dict[$idx]
-+			set var $idx = $idx + 1
-+		end
-+		printf "\n"
-+	end
-+
-+	set var $dict = &$info->dev_info.device[0]
-+	set var $dict_len = sizeof($info->dev_info.device)
-+	if ($dict[0] != '\0')
-+		printf " DEVICE="
-+		set var $idx = 0
-+		while ($idx < $dict_len)
-+			set var $c = $dict[$idx]
- 			if ($c == '\0')
--				printf "\n"
--				set $line = 1
-+				loop_break
- 			else
- 				if ($c < ' ' || $c >= 127 || $c == '\\')
- 					printf "\\x%02x", $c
-@@ -228,33 +274,46 @@ define dump_log_idx
- 					printf "%c", $c
- 				end
- 			end
--			set $idx = $idx + 1
-+			set var $idx = $idx + 1
- 		end
- 		printf "\n"
- 	end
- end
--document dump_log_idx
--	Dump a single log given its index in the log buffer.  The first
--	parameter is the index into log_buf, the second is optional and
--	specified the previous log buffer's flags, used for properly
--	formatting continued lines.
-+document dump_record
-+	Dump a single record. The first parameter is the descriptor,
-+	the second parameter is the info, the third parameter is
-+	optional and specifies the previous record's flags, used for
-+	properly formatting continued lines.
- end
- 
- define dmesg
--	set $i = log_first_idx
--	set $end_idx = log_first_idx
--	set $prev_flags = 0
-+	# definitions from kernel/printk/printk_ringbuffer.h
-+	set var $desc_committed = 1
-+	set var $desc_finalized = 2
-+	set var $desc_sv_bits = sizeof(long) * 8
-+	set var $desc_flags_shift = $desc_sv_bits - 2
-+	set var $desc_flags_mask = 3 << $desc_flags_shift
-+	set var $id_mask = ~$desc_flags_mask
-+
-+	set var $desc_count = 1U << prb->desc_ring.count_bits
-+	set var $prev_flags = 0
-+
-+	set var $id = prb->desc_ring.tail_id.counter
-+	set var $end_id = prb->desc_ring.head_id.counter
- 
- 	while (1)
--		set $msg = ((struct printk_log *) (log_buf + $i))
--		if ($msg->len == 0)
--			set $i = 0
--		else
--			dump_log_idx $i $prev_flags
--			set $i = $i + $msg->len
--			set $prev_flags = $msg->flags
-+		set var $desc = &prb->desc_ring.descs[$id % $desc_count]
-+		set var $info = &prb->desc_ring.infos[$id % $desc_count]
-+
-+		# skip non-committed record
-+		set var $state = 3 & ($desc->state_var.counter >> $desc_flags_shift)
-+		if ($state == $desc_committed || $state == $desc_finalized)
-+			dump_record $desc $info $prev_flags
-+			set var $prev_flags = $info->flags
- 		end
--		if ($i == $end_idx)
-+
-+		set var $id = ($id + 1) & $id_mask
-+		if ($id == $end_id)
- 			loop_break
- 		end
- 	end
-diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
-index 2baad0bfb09d..e44a6c01f336 100644
---- a/Documentation/admin-guide/kdump/vmcoreinfo.rst
-+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
-@@ -189,50 +189,123 @@ from this.
- Free areas descriptor. User-space tools use this value to iterate the
- free_area ranges. MAX_ORDER is used by the zone buddy allocator.
- 
--log_first_idx
-+prb
-+---
-+
-+A pointer to the printk ringbuffer (struct printk_ringbuffer). This
-+may be pointing to the static boot ringbuffer or the dynamically
-+allocated ringbuffer, depending on when the the core dump occurred.
-+Used by user-space tools to read the active kernel log buffer.
-+
-+printk_rb_static
-+----------------
-+
-+A pointer to the static boot printk ringbuffer. If @prb has a
-+different value, this is useful for viewing the initial boot messages,
-+which may have been overwritten in the dynamically allocated
-+ringbuffer.
-+
-+clear_seq
-+---------
-+
-+The sequence number of the printk() record after the last clear
-+command. It indicates the first record after the last
-+SYSLOG_ACTION_CLEAR, like issued by 'dmesg -c'. Used by user-space
-+tools to dump a subset of the dmesg log.
-+
-+printk_ringbuffer
-+-----------------
-+
-+The size of a printk_ringbuffer structure. This structure contains all
-+information required for accessing the various components of the
-+kernel log buffer.
-+
-+(printk_ringbuffer, desc_ring|text_data_ring|dict_data_ring|fail)
-+-----------------------------------------------------------------
-+
-+Offsets for the various components of the printk ringbuffer. Used by
-+user-space tools to view the kernel log buffer without requiring the
-+declaration of the structure.
-+
-+prb_desc_ring
- -------------
- 
--Index of the first record stored in the buffer log_buf. Used by
--user-space tools to read the strings in the log_buf.
-+The size of the prb_desc_ring structure. This structure contains
-+information about the set of record descriptors.
- 
--log_buf
---------
-+(prb_desc_ring, count_bits|descs|head_id|tail_id)
-+-------------------------------------------------
-+
-+Offsets for the fields describing the set of record descriptors. Used
-+by user-space tools to be able to traverse the descriptors without
-+requiring the declaration of the structure.
-+
-+prb_desc
-+--------
-+
-+The size of the prb_desc structure. This structure contains
-+information about a single record descriptor.
-+
-+(prb_desc, info|state_var|text_blk_lpos|dict_blk_lpos)
-+------------------------------------------------------
-+
-+Offsets for the fields describing a record descriptors. Used by
-+user-space tools to be able to read descriptors without requiring
-+the declaration of the structure.
-+
-+prb_data_blk_lpos
-+-----------------
-+
-+The size of the prb_data_blk_lpos structure. This structure contains
-+information about where the text or dictionary data (data block) is
-+located within the respective data ring.
-+
-+(prb_data_blk_lpos, begin|next)
-+-------------------------------
- 
--Console output is written to the ring buffer log_buf at index
--log_first_idx. Used to get the kernel log.
-+Offsets for the fields describing the location of a data block. Used
-+by user-space tools to be able to locate data blocks without
-+requiring the declaration of the structure.
- 
--log_buf_len
-+printk_info
- -----------
- 
--log_buf's length.
-+The size of the printk_info structure. This structure contains all
-+the meta-data for a record.
- 
--clear_idx
-----------
-+(printk_info, seq|ts_nsec|text_len|dict_len|caller_id)
-+------------------------------------------------------
- 
--The index that the next printk() record to read after the last clear
--command. It indicates the first record after the last SYSLOG_ACTION
--_CLEAR, like issued by 'dmesg -c'. Used by user-space tools to dump
--the dmesg log.
-+Offsets for the fields providing the meta-data for a record. Used by
-+user-space tools to be able to read the information without requiring
-+the declaration of the structure.
- 
--log_next_idx
--------------
-+prb_data_ring
-+-------------
- 
--The index of the next record to store in the buffer log_buf. Used to
--compute the index of the current buffer position.
-+The size of the prb_data_ring structure. This structure contains
-+information about a set of data blocks.
- 
--printk_log
------------
-+(prb_data_ring, size_bits|data|head_lpos|tail_lpos)
-+---------------------------------------------------
- 
--The size of a structure printk_log. Used to compute the size of
--messages, and extract dmesg log. It encapsulates header information for
--log_buf, such as timestamp, syslog level, etc.
-+Offsets for the fields describing a set of data blocks. Used by
-+user-space tools to be able to access the data blocks without
-+requiring the declaration of the structure.
- 
--(printk_log, ts_nsec|len|text_len|dict_len)
---------------------------------------------
-+atomic_long_t
-+-------------
-+
-+The size of the atomic_long_t structure. Used by user-space tools to
-+be able to copy the full structure, regardless of its
-+architecture-specific implementation.
-+
-+(atomic_long_t, counter)
-+------------------------
- 
--It represents field offsets in struct printk_log. User space tools
--parse it and check whether the values of printk_log's members have been
--changed.
-+Offset for the long value of an atomic_long_t variable. Used by
-+user-space tools to access the long value without requiring the
-+architecture-specific declaration.
- 
- (free_area.free_list, MIGRATE_TYPES)
- ------------------------------------
-diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst
-index 62c5ad98c11c..a334b584f2b3 100644
---- a/Documentation/locking/seqlock.rst
-+++ b/Documentation/locking/seqlock.rst
-@@ -139,6 +139,24 @@ with the associated LOCKTYPE lock acquired.
- 
- Read path: same as in :ref:`seqcount_t`.
- 
-+
-+.. _seqcount_latch_t:
-+
-+Latch sequence counters (``seqcount_latch_t``)
-+----------------------------------------------
-+
-+Latch sequence counters are a multiversion concurrency control mechanism
-+where the embedded seqcount_t counter even/odd value is used to switch
-+between two copies of protected data. This allows the sequence counter
-+read path to safely interrupt its own write side critical section.
-+
-+Use seqcount_latch_t when the write side sections cannot be protected
-+from interruption by readers. This is typically the case when the read
-+side can be invoked from NMI handlers.
-+
-+Check `raw_write_seqcount_latch()` for more information.
-+
-+
- .. _seqlock_t:
- 
- Sequential locks (``seqlock_t``)
-diff --git a/MAINTAINERS b/MAINTAINERS
-index 867157311dc8..7ae63272d994 100644
---- a/MAINTAINERS
-+++ b/MAINTAINERS
-@@ -13960,6 +13960,7 @@ PRINTK
- M:	Petr Mladek <pmladek@suse.com>
- M:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
- R:	Steven Rostedt <rostedt@goodmis.org>
-+R:	John Ogness <john.ogness@linutronix.de>
- S:	Maintained
- F:	include/linux/printk.h
- F:	kernel/printk/
+diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst
+index 1a4723f48bd9..17e95ab2a201 100644
+--- a/Documentation/RCU/whatisRCU.rst
++++ b/Documentation/RCU/whatisRCU.rst
+@@ -683,7 +683,7 @@ Quick Quiz #1:
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ This section presents a "toy" RCU implementation that is based on
+ "classic RCU".  It is also short on performance (but only for updates) and
+-on features such as hotplug CPU and the ability to run in CONFIG_PREEMPT
++on features such as hotplug CPU and the ability to run in CONFIG_PREEMPTION
+ kernels.  The definitions of rcu_dereference() and rcu_assign_pointer()
+ are the same as those shown in the preceding section, so they are omitted.
+ ::
+@@ -739,7 +739,7 @@ Quick Quiz #2:
+ Quick Quiz #3:
+ 		If it is illegal to block in an RCU read-side
+ 		critical section, what the heck do you do in
+-		PREEMPT_RT, where normal spinlocks can block???
++		CONFIG_PREEMPT_RT, where normal spinlocks can block???
+ 
+ :ref:`Answers to Quick Quiz <8_whatisRCU>`
+ 
+@@ -1093,7 +1093,7 @@ Quick Quiz #2:
+ 		overhead is **negative**.
+ 
+ Answer:
+-		Imagine a single-CPU system with a non-CONFIG_PREEMPT
++		Imagine a single-CPU system with a non-CONFIG_PREEMPTION
+ 		kernel where a routing table is used by process-context
+ 		code, but can be updated by irq-context code (for example,
+ 		by an "ICMP REDIRECT" packet).	The usual way of handling
+@@ -1120,10 +1120,10 @@ Answer:
+ Quick Quiz #3:
+ 		If it is illegal to block in an RCU read-side
+ 		critical section, what the heck do you do in
+-		PREEMPT_RT, where normal spinlocks can block???
++		CONFIG_PREEMPT_RT, where normal spinlocks can block???
+ 
+ Answer:
+-		Just as PREEMPT_RT permits preemption of spinlock
++		Just as CONFIG_PREEMPT_RT permits preemption of spinlock
+ 		critical sections, it permits preemption of RCU
+ 		read-side critical sections.  It also permits
+ 		spinlocks blocking while in RCU read-side critical
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index a10b545c2070..9503320c6652 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -4092,6 +4092,10 @@
+ 			value, meaning that RCU_SOFTIRQ is used by default.
+ 			Specify rcutree.use_softirq=0 to use rcuc kthreads.
+ 
++			But note that CONFIG_PREEMPT_RT=y kernels disable
++			this kernel boot parameter, forcibly setting it
++			to zero.
++
+ 	rcutree.rcu_fanout_exact= [KNL]
+ 			Disable autobalancing of the rcu_node combining
+ 			tree.  This is used by rcutorture, and might
+@@ -4470,6 +4474,13 @@
+ 			only normal grace-period primitives.  No effect
+ 			on CONFIG_TINY_RCU kernels.
+ 
++			But note that CONFIG_PREEMPT_RT=y kernels enables
++			this kernel boot parameter, forcibly setting
++			it to the value one, that is, converting any
++			post-boot attempt at an expedited RCU grace
++			period to instead use normal non-expedited
++			grace-period processing.
++
+ 	rcupdate.rcu_task_ipi_delay= [KNL]
+ 			Set time in jiffies during which RCU tasks will
+ 			avoid sending IPIs, starting with the beginning
 diff --git a/arch/Kconfig b/arch/Kconfig
-index af14a567b493..5c8e173dc7c2 100644
+index 24862d15f3a3..90c8f8518bb4 100644
 --- a/arch/Kconfig
 +++ b/arch/Kconfig
-@@ -34,6 +34,7 @@ config OPROFILE
+@@ -37,6 +37,7 @@ config OPROFILE
  	tristate "OProfile system profiling"
  	depends on PROFILING
  	depends on HAVE_OPROFILE
@@ -761,20 +586,6 @@ index af14a567b493..5c8e173dc7c2 100644
  	select RING_BUFFER
  	select RING_BUFFER_ALLOW_SWAP
  	help
-@@ -414,6 +415,13 @@ config MMU_GATHER_NO_GATHER
- 	bool
- 	depends on MMU_GATHER_TABLE_FREE
- 
-+config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
-+	bool
-+	help
-+	  Temporary select until all architectures can be converted to have
-+	  irqs disabled over activate_mm. Architectures that do IPI based TLB
-+	  shootdowns should enable this.
-+
- config ARCH_HAVE_NMI_SAFE_CMPXCHG
- 	bool
- 
 diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h
 index 1d5716bc060b..6883bc952d22 100644
 --- a/arch/alpha/include/asm/spinlock_types.h
@@ -791,10 +602,10 @@ index 1d5716bc060b..6883bc952d22 100644
  	volatile unsigned int lock;
  } arch_spinlock_t;
 diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
-index e00d94b16658..b7c20565754d 100644
+index 138248999df7..321b8979222d 100644
 --- a/arch/arm/Kconfig
 +++ b/arch/arm/Kconfig
-@@ -31,6 +31,7 @@ config ARM
+@@ -30,6 +30,7 @@ config ARM
  	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
  	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
  	select ARCH_SUPPORTS_ATOMIC_RMW
@@ -802,16 +613,16 @@ index e00d94b16658..b7c20565754d 100644
  	select ARCH_USE_BUILTIN_BSWAP
  	select ARCH_USE_CMPXCHG_LOCKREF
  	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
-@@ -64,7 +65,7 @@ config ARM
+@@ -66,7 +67,7 @@ config ARM
  	select HARDIRQS_SW_RESEND
  	select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
  	select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
 -	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
 +	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT
  	select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
+ 	select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
  	select HAVE_ARCH_MMAP_RND_BITS if MMU
- 	select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT
-@@ -102,6 +103,7 @@ config ARM
+@@ -107,6 +108,7 @@ config ARM
  	select HAVE_PERF_EVENTS
  	select HAVE_PERF_REGS
  	select HAVE_PERF_USER_STACK_DUMP
@@ -819,14 +630,14 @@ index e00d94b16658..b7c20565754d 100644
  	select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
  	select HAVE_REGS_AND_STACK_ACCESS_API
  	select HAVE_RSEQ
-@@ -117,6 +119,7 @@ config ARM
+@@ -122,6 +124,7 @@ config ARM
  	select OLD_SIGSUSPEND3
  	select PCI_SYSCALL if PCI
  	select PERF_USE_VMALLOC
 +	select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
  	select RTC_LIB
+ 	select SET_FS
  	select SYS_SUPPORTS_APM_EMULATION
- 	# Above selects are sorted alphabetically; please add new ones
 diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
 index 5976958647fe..a37c0803954b 100644
 --- a/arch/arm/include/asm/spinlock_types.h
@@ -842,37 +653,11 @@ index 5976958647fe..a37c0803954b 100644
  #define TICKET_SHIFT	16
  
  typedef struct {
-diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h
-index 007d8fea7157..285e6248454f 100644
---- a/arch/arm/include/asm/switch_to.h
-+++ b/arch/arm/include/asm/switch_to.h
-@@ -4,6 +4,13 @@
- 
- #include <linux/thread_info.h>
- 
-+#if defined CONFIG_PREEMPT_RT && defined CONFIG_HIGHMEM
-+void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p);
-+#else
-+static inline void
-+switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
-+#endif
-+
- /*
-  * For v7 SMP cores running a preemptible kernel we may be pre-empted
-  * during a TLB maintenance operation, so execute an inner-shareable dsb
-@@ -26,6 +33,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info
- #define switch_to(prev,next,last)					\
- do {									\
- 	__complete_pending_tlbi();					\
-+	switch_kmaps(prev, next);					\
- 	last = __switch_to(prev,task_thread_info(prev), task_thread_info(next));	\
- } while (0)
- 
 diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
-index 536b6b979f63..875aaf9af946 100644
+index 70d4cbc49ae1..b86418b4dfef 100644
 --- a/arch/arm/include/asm/thread_info.h
 +++ b/arch/arm/include/asm/thread_info.h
-@@ -46,6 +46,7 @@ struct cpu_context_save {
+@@ -54,6 +54,7 @@ struct cpu_context_save {
  struct thread_info {
  	unsigned long		flags;		/* low level flags */
  	int			preempt_count;	/* 0 => preemptable, <0 => bug */
@@ -880,39 +665,37 @@ index 536b6b979f63..875aaf9af946 100644
  	mm_segment_t		addr_limit;	/* address limit */
  	struct task_struct	*task;		/* main task structure */
  	__u32			cpu;		/* cpu */
-@@ -134,7 +135,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
- #define TIF_SYSCALL_TRACE	4	/* syscall trace active */
- #define TIF_SYSCALL_AUDIT	5	/* syscall auditing active */
+@@ -146,6 +147,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
  #define TIF_SYSCALL_TRACEPOINT	6	/* syscall tracepoint instrumentation */
--#define TIF_SECCOMP		7	/* seccomp syscall filtering active */
-+#define TIF_NEED_RESCHED_LAZY	7
-+#define TIF_SECCOMP		8	/* seccomp syscall filtering active */
+ #define TIF_SECCOMP		7	/* seccomp syscall filtering active */
+ #define TIF_NOTIFY_SIGNAL	8	/* signal notifications exist */
++#define TIF_NEED_RESCHED_LAZY	9
  
  #define TIF_USING_IWMMXT	17
  #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
-@@ -143,6 +145,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
- #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
- #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
- #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
+@@ -160,6 +162,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
+ #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
+ #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+ #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
 +#define _TIF_NEED_RESCHED_LAZY	(1 << TIF_NEED_RESCHED_LAZY)
- #define _TIF_UPROBE		(1 << TIF_UPROBE)
- #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
- #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
-@@ -158,7 +161,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
+ #define _TIF_USING_IWMMXT	(1 << TIF_USING_IWMMXT)
+ 
+ /* Checks for any syscall work in entry-common.S */
+@@ -169,7 +172,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
+ /*
   * Change these and you break ASM code in entry-common.S
   */
- #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
--				 _TIF_NOTIFY_RESUME | _TIF_UPROBE)
-+				 _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
-+				 _TIF_NEED_RESCHED_LAZY)
+-#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
++#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
++				 _TIF_SIGPENDING | \
+ 				 _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
+ 				 _TIF_NOTIFY_SIGNAL)
  
- #endif /* __KERNEL__ */
- #endif /* __ASM_ARM_THREAD_INFO_H */
 diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
-index a1570c8bab25..88a0eb048ca1 100644
+index be8050b0c3df..884e40a525ce 100644
 --- a/arch/arm/kernel/asm-offsets.c
 +++ b/arch/arm/kernel/asm-offsets.c
-@@ -41,6 +41,7 @@ int main(void)
+@@ -42,6 +42,7 @@ int main(void)
    BLANK();
    DEFINE(TI_FLAGS,		offsetof(struct thread_info, flags));
    DEFINE(TI_PREEMPT,		offsetof(struct thread_info, preempt_count));
@@ -921,7 +704,7 @@ index a1570c8bab25..88a0eb048ca1 100644
    DEFINE(TI_TASK,		offsetof(struct thread_info, task));
    DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
 diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
-index 55a47df04773..1e689a727cb9 100644
+index 0ea8529a4872..fa0d155d21b3 100644
 --- a/arch/arm/kernel/entry-armv.S
 +++ b/arch/arm/kernel/entry-armv.S
 @@ -206,11 +206,18 @@ __irq_svc:
@@ -961,36 +744,8 @@ index 55a47df04773..1e689a727cb9 100644
  #endif
  
  __und_fault:
-diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
-index 271cb8a1eba1..fd039b1b3731 100644
---- a/arch/arm/kernel/entry-common.S
-+++ b/arch/arm/kernel/entry-common.S
-@@ -53,7 +53,9 @@ __ret_fast_syscall:
- 	cmp	r2, #TASK_SIZE
- 	blne	addr_limit_check_failed
- 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
--	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
-+	tst	r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
-+	bne	fast_work_pending
-+	tst	r1, #_TIF_SECCOMP
- 	bne	fast_work_pending
- 
- 
-@@ -90,8 +92,11 @@ __ret_fast_syscall:
- 	cmp	r2, #TASK_SIZE
- 	blne	addr_limit_check_failed
- 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
--	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
-+	tst	r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP)
-+	bne	do_slower_path
-+	tst	r1, #_TIF_SECCOMP
- 	beq	no_work_pending
-+do_slower_path:
-  UNWIND(.fnend		)
- ENDPROC(ret_fast_syscall)
- 
 diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
-index c1892f733f20..fb8c02954901 100644
+index a3a38d0a4c85..f04ccf19ab1f 100644
 --- a/arch/arm/kernel/signal.c
 +++ b/arch/arm/kernel/signal.c
 @@ -649,7 +649,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
@@ -1004,17 +759,15 @@ index c1892f733f20..fb8c02954901 100644
  		} else {
  			if (unlikely(!user_mode(regs)))
 diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
-index 5d9da61eff62..7f83a013dccc 100644
+index 5c48eb4fd0e5..77a720c1f402 100644
 --- a/arch/arm/kernel/smp.c
 +++ b/arch/arm/kernel/smp.c
-@@ -680,11 +680,9 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
+@@ -671,9 +671,7 @@ static void do_handle_IPI(int ipinr)
  		break;
  
  	case IPI_CPU_BACKTRACE:
 -		printk_nmi_enter();
- 		irq_enter();
- 		nmi_cpu_backtrace(regs);
- 		irq_exit();
+ 		nmi_cpu_backtrace(get_irq_regs());
 -		printk_nmi_exit();
  		break;
  
@@ -1043,134 +796,11 @@ index efa402025031..59487ee9fd61 100644
  	do_bad_area(addr, fsr, regs);
  	return 0;
  }
-diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
-index 187fab227b50..954a115f6205 100644
---- a/arch/arm/mm/highmem.c
-+++ b/arch/arm/mm/highmem.c
-@@ -31,8 +31,14 @@ static inline pte_t get_fixmap_pte(unsigned long vaddr)
- 	return *ptep;
- }
- 
-+static unsigned int fixmap_idx(int type)
-+{
-+	return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
-+}
-+
- void *kmap_atomic_high_prot(struct page *page, pgprot_t prot)
- {
-+	pte_t pte = mk_pte(page, kmap_prot);
- 	unsigned int idx;
- 	unsigned long vaddr;
- 	void *kmap;
-@@ -53,7 +59,7 @@ void *kmap_atomic_high_prot(struct page *page, pgprot_t prot)
- 
- 	type = kmap_atomic_idx_push();
- 
--	idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
-+	idx = fixmap_idx(type);
- 	vaddr = __fix_to_virt(idx);
- #ifdef CONFIG_DEBUG_HIGHMEM
- 	/*
-@@ -61,13 +67,16 @@ void *kmap_atomic_high_prot(struct page *page, pgprot_t prot)
- 	 * Make sure it was indeed properly unmapped.
- 	 */
- 	BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
-+#endif
-+#ifdef CONFIG_PREEMPT_RT
-+	current->kmap_pte[type] = pte;
- #endif
- 	/*
- 	 * When debugging is off, kunmap_atomic leaves the previous mapping
- 	 * in place, so the contained TLB flush ensures the TLB is updated
- 	 * with the new mapping.
- 	 */
--	set_fixmap_pte(idx, mk_pte(page, prot));
-+	set_fixmap_pte(idx, pte);
- 
- 	return (void *)vaddr;
- }
-@@ -80,16 +89,19 @@ void kunmap_atomic_high(void *kvaddr)
- 
- 	if (kvaddr >= (void *)FIXADDR_START) {
- 		type = kmap_atomic_idx();
--		idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
-+		idx = fixmap_idx(type);
- 
- 		if (cache_is_vivt())
- 			__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
-+#ifdef CONFIG_PREEMPT_RT
-+		current->kmap_pte[type] = __pte(0);
-+#endif
- #ifdef CONFIG_DEBUG_HIGHMEM
- 		BUG_ON(vaddr != __fix_to_virt(idx));
--		set_fixmap_pte(idx, __pte(0));
- #else
- 		(void) idx;  /* to kill a warning */
- #endif
-+		set_fixmap_pte(idx, __pte(0));
- 		kmap_atomic_idx_pop();
- 	} else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
- 		/* this address was obtained through kmap_high_get() */
-@@ -100,22 +112,51 @@ EXPORT_SYMBOL(kunmap_atomic_high);
- 
- void *kmap_atomic_pfn(unsigned long pfn)
- {
-+	pte_t pte = pfn_pte(pfn, kmap_prot);
- 	unsigned long vaddr;
- 	int idx, type;
- 	struct page *page = pfn_to_page(pfn);
- 
--	preempt_disable();
-+	migrate_disable();
- 	pagefault_disable();
- 	if (!PageHighMem(page))
- 		return page_address(page);
- 
- 	type = kmap_atomic_idx_push();
--	idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
-+	idx = fixmap_idx(type);
- 	vaddr = __fix_to_virt(idx);
- #ifdef CONFIG_DEBUG_HIGHMEM
- 	BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
- #endif
--	set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
-+#ifdef CONFIG_PREEMPT_RT
-+	current->kmap_pte[type] = pte;
-+#endif
-+	set_fixmap_pte(idx, pte);
- 
- 	return (void *)vaddr;
- }
-+
-+#if defined CONFIG_PREEMPT_RT
-+void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
-+{
-+	int i;
-+
-+	/*
-+	 * Clear @prev's kmap_atomic mappings
-+	 */
-+	for (i = 0; i < prev_p->kmap_idx; i++) {
-+		int idx = fixmap_idx(i);
-+
-+		set_fixmap_pte(idx, __pte(0));
-+	}
-+	/*
-+	 * Restore @next_p's kmap_atomic mappings
-+	 */
-+	for (i = 0; i < next_p->kmap_idx; i++) {
-+		int idx = fixmap_idx(i);
-+
-+		if (!pte_none(next_p->kmap_pte[i]))
-+			set_fixmap_pte(idx, next_p->kmap_pte[i]);
-+	}
-+}
-+#endif
 diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
-index 6d232837cbee..4da217d5b84c 100644
+index f39568b28ec1..39bcde5ff5ec 100644
 --- a/arch/arm64/Kconfig
 +++ b/arch/arm64/Kconfig
-@@ -75,6 +75,7 @@ config ARM64
+@@ -76,6 +76,7 @@ config ARM64
  	select ARCH_SUPPORTS_ATOMIC_RMW
  	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
  	select ARCH_SUPPORTS_NUMA_BALANCING
@@ -1178,15 +808,15 @@ index 6d232837cbee..4da217d5b84c 100644
  	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
  	select ARCH_WANT_DEFAULT_BPF_JIT
  	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
-@@ -169,6 +170,7 @@ config ARM64
- 	select HAVE_PERF_EVENTS
+@@ -177,6 +178,7 @@ config ARM64
  	select HAVE_PERF_REGS
  	select HAVE_PERF_USER_STACK_DUMP
-+	select HAVE_PREEMPT_LAZY
  	select HAVE_REGS_AND_STACK_ACCESS_API
++	select HAVE_PREEMPT_LAZY
  	select HAVE_FUNCTION_ARG_ACCESS_API
  	select HAVE_FUTEX_CMPXCHG if FUTEX
-@@ -190,6 +192,7 @@ config ARM64
+ 	select MMU_GATHER_RCU_TABLE_FREE
+@@ -197,6 +199,7 @@ config ARM64
  	select PCI_DOMAINS_GENERIC if PCI
  	select PCI_ECAM if (ACPI && PCI)
  	select PCI_SYSCALL if PCI
@@ -1259,10 +889,10 @@ index 18782f0c4721..6672b05350b4 100644
  #include <asm-generic/qrwlock_types.h>
  
 diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
-index 5e784e16ee89..5d9f16f92c11 100644
+index 9f4e3b266f21..d3fa570c7235 100644
 --- a/arch/arm64/include/asm/thread_info.h
 +++ b/arch/arm64/include/asm/thread_info.h
-@@ -29,6 +29,7 @@ struct thread_info {
+@@ -26,6 +26,7 @@ struct thread_info {
  #ifdef CONFIG_ARM64_SW_TTBR0_PAN
  	u64			ttbr0;		/* saved TTBR0_EL1 */
  #endif
@@ -1270,34 +900,37 @@ index 5e784e16ee89..5d9f16f92c11 100644
  	union {
  		u64		preempt_count;	/* 0 => preemptible, <0 => bug */
  		struct {
-@@ -67,6 +68,7 @@ void arch_release_task_struct(struct task_struct *tsk);
- #define TIF_FOREIGN_FPSTATE	3	/* CPU's FP state is not current's */
+@@ -65,6 +66,7 @@ void arch_release_task_struct(struct task_struct *tsk);
  #define TIF_UPROBE		4	/* uprobe breakpoint or singlestep */
- #define TIF_FSCHECK		5	/* Check FS is USER_DS on return */
-+#define TIF_NEED_RESCHED_LAZY	6
+ #define TIF_MTE_ASYNC_FAULT	5	/* MTE Asynchronous Tag Check Fault */
+ #define TIF_NOTIFY_SIGNAL	6	/* signal notifications exist */
++#define TIF_NEED_RESCHED_LAZY	7
  #define TIF_SYSCALL_TRACE	8	/* syscall trace active */
  #define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
  #define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
-@@ -93,14 +95,16 @@ void arch_release_task_struct(struct task_struct *tsk);
- #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
- #define _TIF_UPROBE		(1 << TIF_UPROBE)
- #define _TIF_FSCHECK		(1 << TIF_FSCHECK)
-+#define _TIF_NEED_RESCHED_LAZY	(1 << TIF_NEED_RESCHED_LAZY)
- #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
- #define _TIF_32BIT		(1 << TIF_32BIT)
+@@ -95,8 +97,10 @@ void arch_release_task_struct(struct task_struct *tsk);
  #define _TIF_SVE		(1 << TIF_SVE)
+ #define _TIF_MTE_ASYNC_FAULT	(1 << TIF_MTE_ASYNC_FAULT)
+ #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
++#define _TIF_NEED_RESCHED_LAZY	(1 << TIF_NEED_RESCHED_LAZY)
  
- #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+-#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
++#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
++				 _TIF_SIGPENDING | \
  				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
--				 _TIF_UPROBE | _TIF_FSCHECK)
-+				 _TIF_UPROBE | _TIF_FSCHECK | _TIF_NEED_RESCHED_LAZY)
- 
-+#define _TIF_NEED_RESCHED_MASK	(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
- #define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+ 				 _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
+ 				 _TIF_NOTIFY_SIGNAL)
+@@ -105,6 +109,8 @@ void arch_release_task_struct(struct task_struct *tsk);
  				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
  				 _TIF_SYSCALL_EMU)
+ 
++#define _TIF_NEED_RESCHED_MASK	(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
++
+ #ifdef CONFIG_SHADOW_CALL_STACK
+ #define INIT_SCS							\
+ 	.scs_base	= init_shadow_call_stack,			\
 diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
-index 7d32fc959b1a..b2f29bd2ae87 100644
+index 301784463587..993589687994 100644
 --- a/arch/arm64/kernel/asm-offsets.c
 +++ b/arch/arm64/kernel/asm-offsets.c
 @@ -30,6 +30,7 @@ int main(void)
@@ -1305,14 +938,14 @@ index 7d32fc959b1a..b2f29bd2ae87 100644
    DEFINE(TSK_TI_FLAGS,		offsetof(struct task_struct, thread_info.flags));
    DEFINE(TSK_TI_PREEMPT,	offsetof(struct task_struct, thread_info.preempt_count));
 +  DEFINE(TSK_TI_PREEMPT_LAZY,	offsetof(struct task_struct, thread_info.preempt_lazy_count));
-   DEFINE(TSK_TI_ADDR_LIMIT,	offsetof(struct task_struct, thread_info.addr_limit));
  #ifdef CONFIG_ARM64_SW_TTBR0_PAN
    DEFINE(TSK_TI_TTBR0,		offsetof(struct task_struct, thread_info.ttbr0));
+ #endif
 diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
-index 55af8b504b65..de7cd345c633 100644
+index c9bae73f2621..1d3b9ceefb15 100644
 --- a/arch/arm64/kernel/entry.S
 +++ b/arch/arm64/kernel/entry.S
-@@ -624,9 +624,18 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
+@@ -678,9 +678,18 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
  	mrs	x0, daif
  	orr	x24, x24, x0
  alternative_else_nop_endif
@@ -1332,12 +965,12 @@ index 55af8b504b65..de7cd345c633 100644
 +2:
  #endif
  
- #ifdef CONFIG_ARM64_PSEUDO_NMI
+ 	mov	x0, sp
 diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
-index 55c8f3ec6705..638a41b27feb 100644
+index 062b21f30f94..0ea2df6554e5 100644
 --- a/arch/arm64/kernel/fpsimd.c
 +++ b/arch/arm64/kernel/fpsimd.c
-@@ -224,6 +224,16 @@ static void sve_free(struct task_struct *task)
+@@ -226,6 +226,16 @@ static void sve_free(struct task_struct *task)
  	__sve_free(task);
  }
  
@@ -1354,7 +987,7 @@ index 55c8f3ec6705..638a41b27feb 100644
  /*
   * TIF_SVE controls whether a task can use SVE without trapping while
   * in userspace, and also the way a task's FPSIMD/SVE state is stored
-@@ -1020,6 +1030,7 @@ void fpsimd_thread_switch(struct task_struct *next)
+@@ -1022,6 +1032,7 @@ void fpsimd_thread_switch(struct task_struct *next)
  void fpsimd_flush_thread(void)
  {
  	int vl, supported_vl;
@@ -1362,7 +995,7 @@ index 55c8f3ec6705..638a41b27feb 100644
  
  	if (!system_supports_fpsimd())
  		return;
-@@ -1032,7 +1043,7 @@ void fpsimd_flush_thread(void)
+@@ -1034,7 +1045,7 @@ void fpsimd_flush_thread(void)
  
  	if (system_supports_sve()) {
  		clear_thread_flag(TIF_SVE);
@@ -1371,7 +1004,7 @@ index 55c8f3ec6705..638a41b27feb 100644
  
  		/*
  		 * Reset the task vector length as required.
-@@ -1066,6 +1077,7 @@ void fpsimd_flush_thread(void)
+@@ -1068,6 +1079,7 @@ void fpsimd_flush_thread(void)
  	}
  
  	put_cpu_fpsimd_context();
@@ -1380,23 +1013,23 @@ index 55c8f3ec6705..638a41b27feb 100644
  
  /*
 diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
-index 3b4f31f35e45..441a970fc7ce 100644
+index 6237486ff6bb..ab411f336c39 100644
 --- a/arch/arm64/kernel/signal.c
 +++ b/arch/arm64/kernel/signal.c
-@@ -921,7 +921,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
- 		/* Check valid user FS if needed */
- 		addr_limit_user_check();
- 
+@@ -915,7 +915,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
+ 				 unsigned long thread_flags)
+ {
+ 	do {
 -		if (thread_flags & _TIF_NEED_RESCHED) {
 +		if (thread_flags & _TIF_NEED_RESCHED_MASK) {
  			/* Unmask Debug and SError for the next task */
  			local_daif_restore(DAIF_PROCCTX_NOIRQ);
  
 diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
-index b588c3b5c2f0..19cb5b101226 100644
+index fe60d25c000e..c8ad6b98fbab 100644
 --- a/arch/arm64/kvm/arm.c
 +++ b/arch/arm64/kvm/arm.c
-@@ -681,7 +681,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+@@ -732,7 +732,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
  		 * involves poking the GIC, which must be done in a
  		 * non-preemptible context.
  		 */
@@ -1405,7 +1038,7 @@ index b588c3b5c2f0..19cb5b101226 100644
  
  		kvm_pmu_flush_hwstate(vcpu);
  
-@@ -730,7 +730,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+@@ -781,7 +781,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
  				kvm_timer_sync_user(vcpu);
  			kvm_vgic_sync_hwstate(vcpu);
  			local_irq_enable();
@@ -1414,15 +1047,15 @@ index b588c3b5c2f0..19cb5b101226 100644
  			continue;
  		}
  
-@@ -802,7 +802,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+@@ -853,7 +853,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
  		/* Exit types that need handling before we can be preempted */
  		handle_exit_early(vcpu, ret);
  
 -		preempt_enable();
 +		migrate_enable();
  
- 		ret = handle_exit(vcpu, ret);
- 	}
+ 		/*
+ 		 * The ARMv8 architecture doesn't give the hypervisor
 diff --git a/arch/hexagon/include/asm/spinlock_types.h b/arch/hexagon/include/asm/spinlock_types.h
 index 19d233497ba5..de72fb23016d 100644
 --- a/arch/hexagon/include/asm/spinlock_types.h
@@ -1453,32 +1086,19 @@ index 6e345fefcdca..681408d6816f 100644
  typedef struct {
  	volatile unsigned int lock;
  } arch_spinlock_t;
-diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
-index 8f328298f8cc..7624e089b736 100644
---- a/arch/mips/Kconfig
-+++ b/arch/mips/Kconfig
-@@ -2653,7 +2653,7 @@ config MIPS_CRC_SUPPORT
- #
- config HIGHMEM
- 	bool "High Memory Support"
--	depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA
-+	depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT
- 
- config CPU_SUPPORTS_HIGHMEM
- 	bool
 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
-index 787e829b6f25..7c4e90562f90 100644
+index 107bb4319e0e..876c0f683e2b 100644
 --- a/arch/powerpc/Kconfig
 +++ b/arch/powerpc/Kconfig
-@@ -143,6 +143,7 @@ config PPC
- 	select ARCH_MIGHT_HAVE_PC_SERIO
+@@ -147,6 +147,7 @@ config PPC
  	select ARCH_OPTIONAL_KERNEL_RWX		if ARCH_HAS_STRICT_KERNEL_RWX
  	select ARCH_SUPPORTS_ATOMIC_RMW
-+	select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
+ 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC	if PPC32 || PPC_BOOK3S_64
++	select ARCH_SUPPORTS_RT			if HAVE_POSIX_CPU_TIMERS_TASK_WORK
  	select ARCH_USE_BUILTIN_BSWAP
  	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
  	select ARCH_USE_QUEUED_RWLOCKS		if PPC_QUEUED_SPINLOCKS
-@@ -225,6 +226,7 @@ config PPC
+@@ -233,6 +234,7 @@ config PPC
  	select HAVE_HARDLOCKUP_DETECTOR_PERF	if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
  	select HAVE_PERF_REGS
  	select HAVE_PERF_USER_STACK_DUMP
@@ -1486,22 +1106,39 @@ index 787e829b6f25..7c4e90562f90 100644
  	select MMU_GATHER_RCU_TABLE_FREE
  	select MMU_GATHER_PAGE_SIZE
  	select HAVE_REGS_AND_STACK_ACCESS_API
-@@ -246,6 +248,7 @@ config PPC
- 	select OLD_SIGSUSPEND
- 	select PCI_DOMAINS			if PCI
- 	select PCI_SYSCALL			if PCI
-+	select HAVE_POSIX_CPU_TIMERS_TASK_WORK	if !KVM
- 	select PPC_DAWR				if PPC64
- 	select RTC_LIB
- 	select SPARSE_IRQ
-@@ -403,7 +406,7 @@ menu "Kernel options"
- 
- config HIGHMEM
- 	bool "High memory support"
--	depends on PPC32
-+	depends on PPC32 && !PREEMPT_RT
+@@ -240,6 +242,7 @@ config PPC
+ 	select HAVE_SYSCALL_TRACEPOINTS
+ 	select HAVE_VIRT_CPU_ACCOUNTING
+ 	select HAVE_IRQ_TIME_ACCOUNTING
++	select HAVE_POSIX_CPU_TIMERS_TASK_WORK  if !KVM
+ 	select HAVE_RSEQ
+ 	select IOMMU_HELPER			if PPC64
+ 	select IRQ_DOMAIN
+diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
+index cf091c4c22e5..7371f7e23c35 100644
+--- a/arch/powerpc/include/asm/cmpxchg.h
++++ b/arch/powerpc/include/asm/cmpxchg.h
+@@ -5,7 +5,7 @@
+ #ifdef __KERNEL__
+ #include <linux/compiler.h>
+ #include <asm/synch.h>
+-#include <linux/bug.h>
++#include <linux/bits.h>
+ 
+ #ifdef __BIG_ENDIAN
+ #define BITOFF_CAL(size, off)	((sizeof(u32) - size - off) * BITS_PER_BYTE)
+diff --git a/arch/powerpc/include/asm/simple_spinlock_types.h b/arch/powerpc/include/asm/simple_spinlock_types.h
+index 0f3cdd8faa95..d45561e9e6ba 100644
+--- a/arch/powerpc/include/asm/simple_spinlock_types.h
++++ b/arch/powerpc/include/asm/simple_spinlock_types.h
+@@ -2,7 +2,7 @@
+ #ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
+ #define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
  
- source "kernel/Kconfig.hz"
+-#ifndef __LINUX_SPINLOCK_TYPES_H
++#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__LINUX_RT_MUTEX_H)
+ # error "please don't include this file directly"
+ #endif
  
 diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
 index c5d742f18021..cc6922a011ba 100644
@@ -1535,7 +1172,7 @@ index 1c8460e23583..b1653c160bab 100644
  	canary ^= LINUX_VERSION_CODE;
  	canary &= CANARY_MASK;
 diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
-index ca6c97025704..3eda5d38c418 100644
+index 3d8a47af7a25..03442b9afcfb 100644
 --- a/arch/powerpc/include/asm/thread_info.h
 +++ b/arch/powerpc/include/asm/thread_info.h
 @@ -48,6 +48,8 @@
@@ -1547,7 +1184,7 @@ index ca6c97025704..3eda5d38c418 100644
  	unsigned long	local_flags;		/* private flags for thread */
  #ifdef CONFIG_LIVEPATCH
  	unsigned long *livepatch_sp;
-@@ -98,11 +100,12 @@ void arch_setup_new_exec(void);
+@@ -96,11 +98,12 @@ void arch_setup_new_exec(void);
  #define TIF_SINGLESTEP		8	/* singlestepping active */
  #define TIF_NOHZ		9	/* in adaptive nohz mode */
  #define TIF_SECCOMP		10	/* secure computing */
@@ -1563,7 +1200,7 @@ index ca6c97025704..3eda5d38c418 100644
  #define TIF_EMULATE_STACK_STORE	16	/* Is an instruction emulation
  						for stack store? */
  #define TIF_MEMDIE		17	/* is terminating due to OOM killer */
-@@ -111,6 +114,9 @@ void arch_setup_new_exec(void);
+@@ -109,6 +112,9 @@ void arch_setup_new_exec(void);
  #endif
  #define TIF_POLLING_NRFLAG	19	/* true if poll_idle() is polling TIF_NEED_RESCHED */
  #define TIF_32BIT		20	/* 32 bit binary */
@@ -1573,30 +1210,31 @@ index ca6c97025704..3eda5d38c418 100644
  
  /* as above, but as bit values */
  #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
-@@ -130,6 +136,7 @@ void arch_setup_new_exec(void);
+@@ -129,16 +135,19 @@ void arch_setup_new_exec(void);
  #define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
  #define _TIF_EMULATE_STACK_STORE	(1<<TIF_EMULATE_STACK_STORE)
  #define _TIF_NOHZ		(1<<TIF_NOHZ)
 +#define _TIF_NEED_RESCHED_LAZY	(1<<TIF_NEED_RESCHED_LAZY)
- #define _TIF_FSCHECK		(1<<TIF_FSCHECK)
  #define _TIF_SYSCALL_EMU	(1<<TIF_SYSCALL_EMU)
  #define _TIF_SYSCALL_DOTRACE	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
-@@ -139,8 +146,9 @@ void arch_setup_new_exec(void);
+ 				 _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
+ 				 _TIF_NOHZ | _TIF_SYSCALL_EMU)
+ 
  #define _TIF_USER_WORK_MASK	(_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
++				 _TIF_NEED_RESCHED_LAZY | \
  				 _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
  				 _TIF_RESTORE_TM | _TIF_PATCH_PENDING | \
--				 _TIF_FSCHECK)
-+				 _TIF_FSCHECK | _TIF_NEED_RESCHED_LAZY)
+ 				 _TIF_NOTIFY_SIGNAL)
  #define _TIF_PERSYSCALL_MASK	(_TIF_RESTOREALL|_TIF_NOERROR)
 +#define _TIF_NEED_RESCHED_MASK	(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
  
  /* Bits in local_flags */
  /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
 diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
-index 8711c2164b45..3ded638add45 100644
+index b12d7c049bfe..c098f496bbee 100644
 --- a/arch/powerpc/kernel/asm-offsets.c
 +++ b/arch/powerpc/kernel/asm-offsets.c
-@@ -188,6 +188,7 @@ int main(void)
+@@ -191,6 +191,7 @@ int main(void)
  	OFFSET(TI_FLAGS, thread_info, flags);
  	OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags);
  	OFFSET(TI_PREEMPT, thread_info, preempt_count);
@@ -1605,10 +1243,10 @@ index 8711c2164b45..3ded638add45 100644
  #ifdef CONFIG_PPC64
  	OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size);
 diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
-index f4d0af8e1136..22907e641938 100644
+index 1c9b0ccc2172..2650aea9d3c6 100644
 --- a/arch/powerpc/kernel/entry_32.S
 +++ b/arch/powerpc/kernel/entry_32.S
-@@ -415,7 +415,9 @@ ret_from_syscall:
+@@ -420,7 +420,9 @@ ret_from_syscall:
  	mtmsr	r10
  	lwz	r9,TI_FLAGS(r2)
  	li	r8,-MAX_ERRNO
@@ -1619,7 +1257,7 @@ index f4d0af8e1136..22907e641938 100644
  	bne-	syscall_exit_work
  	cmplw	0,r3,r8
  	blt+	syscall_exit_cont
-@@ -532,13 +534,13 @@ syscall_dotrace:
+@@ -537,13 +539,13 @@ syscall_dotrace:
  	b	syscall_dotrace_cont
  
  syscall_exit_work:
@@ -1635,7 +1273,7 @@ index f4d0af8e1136..22907e641938 100644
  	bne-	1f
  	lwz	r11,_CCR(r1)			/* Load CR */
  	neg	r3,r3
-@@ -547,12 +549,12 @@ syscall_exit_work:
+@@ -552,12 +554,12 @@ syscall_exit_work:
  
  1:	stw	r6,RESULT(r1)	/* Save result */
  	stw	r3,GPR3(r1)	/* Update return value */
@@ -1650,7 +1288,7 @@ index f4d0af8e1136..22907e641938 100644
  	addi	r12,r2,TI_FLAGS
  3:	lwarx	r8,0,r12
  	andc	r8,r8,r11
-@@ -942,7 +944,14 @@ resume_kernel:
+@@ -940,7 +942,14 @@ resume_kernel:
  	cmpwi	0,r0,0		/* if non-zero, just restore regs and return */
  	bne	restore_kuap
  	andi.	r8,r8,_TIF_NEED_RESCHED
@@ -1665,7 +1303,7 @@ index f4d0af8e1136..22907e641938 100644
  	lwz	r3,_MSR(r1)
  	andi.	r0,r3,MSR_EE	/* interrupts off? */
  	beq	restore_kuap	/* don't schedule if so */
-@@ -1265,7 +1274,7 @@ global_dbcr0:
+@@ -1258,7 +1267,7 @@ global_dbcr0:
  #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
  
  do_work:			/* r10 contains MSR_KERNEL here */
@@ -1674,8 +1312,8 @@ index f4d0af8e1136..22907e641938 100644
  	beq	do_user_signal
  
  do_resched:			/* r10 contains MSR_KERNEL here */
-@@ -1286,7 +1295,7 @@ recheck:
- 	SYNC
+@@ -1277,7 +1286,7 @@ recheck:
+ 	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
  	mtmsr	r10		/* disable interrupts */
  	lwz	r9,TI_FLAGS(r2)
 -	andi.	r0,r9,_TIF_NEED_RESCHED
@@ -1684,10 +1322,10 @@ index f4d0af8e1136..22907e641938 100644
  	andi.	r0,r9,_TIF_USER_WORK_MASK
  	beq	restore_user
 diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
-index d9ed79415100..50f7a1b8a6c8 100644
+index 74d07dc0bb48..a241bb078aa9 100644
 --- a/arch/powerpc/kernel/exceptions-64e.S
 +++ b/arch/powerpc/kernel/exceptions-64e.S
-@@ -1081,7 +1081,7 @@ _GLOBAL(ret_from_except_lite)
+@@ -1080,7 +1080,7 @@ _GLOBAL(ret_from_except_lite)
  	li	r10, -1
  	mtspr	SPRN_DBSR,r10
  	b	restore
@@ -1696,7 +1334,7 @@ index d9ed79415100..50f7a1b8a6c8 100644
  	beq	2f
  	bl	restore_interrupts
  	SCHEDULE_USER
-@@ -1133,12 +1133,20 @@ resume_kernel:
+@@ -1132,12 +1132,20 @@ resume_kernel:
  	bne-	0b
  1:
  
@@ -1719,7 +1357,7 @@ index d9ed79415100..50f7a1b8a6c8 100644
  	cmpwi	cr0,r8,0
  	bne	restore
  	ld	r0,SOFTE(r1)
-@@ -1159,7 +1167,7 @@ resume_kernel:
+@@ -1158,7 +1166,7 @@ resume_kernel:
  	 * interrupted after loading SRR0/1.
  	 */
  	wrteei	0
@@ -1729,10 +1367,10 @@ index d9ed79415100..50f7a1b8a6c8 100644
  restore:
  	/*
 diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
-index bf21ebd36190..171e41ae389b 100644
+index cc7a6271b6b4..ef164a39cfa8 100644
 --- a/arch/powerpc/kernel/irq.c
 +++ b/arch/powerpc/kernel/irq.c
-@@ -784,10 +784,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_mostly;
+@@ -728,10 +728,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_mostly;
  void *softirq_ctx[NR_CPUS] __read_mostly;
  void *hardirq_ctx[NR_CPUS] __read_mostly;
  
@@ -1746,7 +1384,7 @@ index bf21ebd36190..171e41ae389b 100644
  irq_hw_number_t virq_to_hw(unsigned int virq)
  {
 diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
-index b24f866fef81..a4b903822d50 100644
+index 717e658b90fd..08ee95ad6593 100644
 --- a/arch/powerpc/kernel/misc_32.S
 +++ b/arch/powerpc/kernel/misc_32.S
 @@ -31,6 +31,7 @@
@@ -1766,7 +1404,7 @@ index b24f866fef81..a4b903822d50 100644
  /*
   * void call_do_irq(struct pt_regs *regs, void *sp);
 diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
-index 7bb46ad98207..442832bf599f 100644
+index 070465825c21..a6b33f7b3264 100644
 --- a/arch/powerpc/kernel/misc_64.S
 +++ b/arch/powerpc/kernel/misc_64.S
 @@ -27,6 +27,7 @@
@@ -1785,11 +1423,52 @@ index 7bb46ad98207..442832bf599f 100644
  
  _GLOBAL(call_do_irq)
  	mflr	r0
+diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
+index 532f22637783..1ef55f4b389a 100644
+--- a/arch/powerpc/kernel/nvram_64.c
++++ b/arch/powerpc/kernel/nvram_64.c
+@@ -73,7 +73,8 @@ static const char *nvram_os_partitions[] = {
+ };
+ 
+ static void oops_to_nvram(struct kmsg_dumper *dumper,
+-			  enum kmsg_dump_reason reason);
++			  enum kmsg_dump_reason reason,
++			  struct kmsg_dumper_iter *iter);
+ 
+ static struct kmsg_dumper nvram_kmsg_dumper = {
+ 	.dump = oops_to_nvram
+@@ -643,7 +644,8 @@ void __init nvram_init_oops_partition(int rtas_partition_exists)
+  * partition.  If that's too much, go back and capture uncompressed text.
+  */
+ static void oops_to_nvram(struct kmsg_dumper *dumper,
+-			  enum kmsg_dump_reason reason)
++			  enum kmsg_dump_reason reason,
++			  struct kmsg_dumper_iter *iter)
+ {
+ 	struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
+ 	static unsigned int oops_count = 0;
+@@ -681,13 +683,13 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
+ 		return;
+ 
+ 	if (big_oops_buf) {
+-		kmsg_dump_get_buffer(dumper, false,
++		kmsg_dump_get_buffer(iter, false,
+ 				     big_oops_buf, big_oops_buf_sz, &text_len);
+ 		rc = zip_oops(text_len);
+ 	}
+ 	if (rc != 0) {
+-		kmsg_dump_rewind(dumper);
+-		kmsg_dump_get_buffer(dumper, false,
++		kmsg_dump_rewind(iter);
++		kmsg_dump_get_buffer(iter, false,
+ 				     oops_data, oops_data_sz, &text_len);
+ 		err_type = ERR_TYPE_KERNEL_PANIC;
+ 		oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
 diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
-index 8e50818aa50b..4be309aedbb4 100644
+index 7c85ed04a164..092c014b0653 100644
 --- a/arch/powerpc/kernel/syscall_64.c
 +++ b/arch/powerpc/kernel/syscall_64.c
-@@ -193,7 +193,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
+@@ -217,7 +217,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
  	ti_flags = READ_ONCE(*ti_flagsp);
  	while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
  		local_irq_enable();
@@ -1798,7 +1477,7 @@ index 8e50818aa50b..4be309aedbb4 100644
  			schedule();
  		} else {
  			/*
-@@ -277,7 +277,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
+@@ -307,7 +307,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
  	ti_flags = READ_ONCE(*ti_flagsp);
  	while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
  		local_irq_enable(); /* returning to user: may enable */
@@ -1807,7 +1486,7 @@ index 8e50818aa50b..4be309aedbb4 100644
  			schedule();
  		} else {
  			if (ti_flags & _TIF_SIGPENDING)
-@@ -361,11 +361,14 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
+@@ -395,11 +395,15 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
  		/* Returning to a kernel context with local irqs enabled. */
  		WARN_ON_ONCE(!(regs->msr & MSR_EE));
  again:
@@ -1818,13 +1497,14 @@ index 8e50818aa50b..4be309aedbb4 100644
  				if (preempt_count() == 0)
  					preempt_schedule_irq();
 +			} else if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED_LAZY)) {
-+				if (current_thread_info()->preempt_lazy_count == 0)
++				if ((preempt_count() == 0) &&
++				    (current_thread_info()->preempt_lazy_count == 0))
 +					preempt_schedule_irq();
  			}
  		}
  
 diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
-index d1ebe152f210..f5a1468b8d5b 100644
+index 3ec7b443fe6b..e7bb99775ffe 100644
 --- a/arch/powerpc/kernel/traps.c
 +++ b/arch/powerpc/kernel/traps.c
 @@ -170,7 +170,6 @@ extern void panic_flush_kmsg_start(void)
@@ -1896,8 +1576,42 @@ index 549591d9aaa2..efb5bfe93f70 100644
  	select HAVE_KVM_IRQCHIP
  	select HAVE_KVM_IRQFD
  	select HAVE_KVM_IRQ_ROUTING
+diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
+index afab328d0887..d6c3f0b79f1d 100644
+--- a/arch/powerpc/mm/mem.c
++++ b/arch/powerpc/mm/mem.c
+@@ -54,7 +54,6 @@
+ 
+ #include <mm/mmu_decl.h>
+ 
+-static DEFINE_MUTEX(linear_mapping_mutex);
+ unsigned long long memory_limit;
+ bool init_mem_is_free;
+ 
+@@ -72,6 +71,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ EXPORT_SYMBOL(phys_mem_access_prot);
+ 
+ #ifdef CONFIG_MEMORY_HOTPLUG
++static DEFINE_MUTEX(linear_mapping_mutex);
+ 
+ #ifdef CONFIG_NUMA
+ int memory_add_physaddr_to_nid(u64 start)
+diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c
+index 6c3bc4b4da98..ec862846bc82 100644
+--- a/arch/powerpc/platforms/powernv/opal-kmsg.c
++++ b/arch/powerpc/platforms/powernv/opal-kmsg.c
+@@ -20,7 +20,8 @@
+  * message, it just ensures that OPAL completely flushes the console buffer.
+  */
+ static void kmsg_dump_opal_console_flush(struct kmsg_dumper *dumper,
+-				     enum kmsg_dump_reason reason)
++					 enum kmsg_dump_reason reason,
++					 struct kmsg_dumper_iter *iter)
+ {
+ 	/*
+ 	 * Outside of a panic context the pollers will continue to run,
 diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
-index 6d47b4a3ce39..15eef4d607ed 100644
+index 9fc5217f0c8e..4fdb9370b913 100644
 --- a/arch/powerpc/platforms/pseries/iommu.c
 +++ b/arch/powerpc/platforms/pseries/iommu.c
 @@ -24,6 +24,7 @@
@@ -1908,68 +1622,114 @@ index 6d47b4a3ce39..15eef4d607ed 100644
  #include <asm/io.h>
  #include <asm/prom.h>
  #include <asm/rtas.h>
-@@ -177,6 +178,7 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
+@@ -190,7 +191,13 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
+ 	return ret;
  }
  
- static DEFINE_PER_CPU(__be64 *, tce_page);
-+static DEFINE_LOCAL_IRQ_LOCK(tcp_page_lock);
+-static DEFINE_PER_CPU(__be64 *, tce_page);
++struct tce_page {
++	__be64 * page;
++	local_lock_t lock;
++};
++static DEFINE_PER_CPU(struct tce_page, tce_page) = {
++	.lock = INIT_LOCAL_LOCK(lock),
++};
  
  static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
  				     long npages, unsigned long uaddr,
-@@ -198,7 +200,8 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+@@ -212,9 +219,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
  		                           direction, attrs);
  	}
  
 -	local_irq_save(flags);	/* to protect tcep and the page behind it */
 +	/* to protect tcep and the page behind it */
-+	local_lock_irqsave(tcp_page_lock, flags);
++	local_lock_irqsave(&tce_page.lock, flags);
  
- 	tcep = __this_cpu_read(tce_page);
+-	tcep = __this_cpu_read(tce_page);
++	tcep = __this_cpu_read(tce_page.page);
  
-@@ -209,7 +212,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+ 	/* This is safe to do since interrupts are off when we're called
+ 	 * from iommu_alloc{,_sg}()
+@@ -223,12 +231,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
  		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
  		/* If allocation fails, fall back to the loop implementation */
  		if (!tcep) {
 -			local_irq_restore(flags);
-+			local_unlock_irqrestore(tcp_page_lock, flags);
++			local_unlock_irqrestore(&tce_page.lock, flags);
  			return tce_build_pSeriesLP(tbl->it_index, tcenum,
  					tbl->it_page_shift,
  					npages, uaddr, direction, attrs);
-@@ -244,7 +247,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+ 		}
+-		__this_cpu_write(tce_page, tcep);
++		__this_cpu_write(tce_page.page, tcep);
+ 	}
+ 
+ 	rpn = __pa(uaddr) >> TCE_SHIFT;
+@@ -258,7 +266,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
  		tcenum += limit;
  	} while (npages > 0 && !rc);
  
 -	local_irq_restore(flags);
-+	local_unlock_irqrestore(tcp_page_lock, flags);
++	local_unlock_irqrestore(&tce_page.lock, flags);
  
  	if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
  		ret = (int)rc;
-@@ -415,13 +418,14 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
+@@ -429,16 +437,17 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
  				DMA_BIDIRECTIONAL, 0);
  	}
  
 -	local_irq_disable();	/* to protect tcep and the page behind it */
+-	tcep = __this_cpu_read(tce_page);
 +	/* to protect tcep and the page behind it */
-+	local_lock_irq(tcp_page_lock);
- 	tcep = __this_cpu_read(tce_page);
++	local_lock_irq(&tce_page.lock);
++	tcep = __this_cpu_read(tce_page.page);
  
  	if (!tcep) {
  		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
  		if (!tcep) {
 -			local_irq_enable();
-+			local_unlock_irq(tcp_page_lock);
++			local_unlock_irq(&tce_page.lock);
  			return -ENOMEM;
  		}
- 		__this_cpu_write(tce_page, tcep);
-@@ -467,7 +471,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
+-		__this_cpu_write(tce_page, tcep);
++		__this_cpu_write(tce_page.page, tcep);
+ 	}
+ 
+ 	proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
+@@ -481,7 +490,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
  
  	/* error cleanup: caller will clear whole range */
  
 -	local_irq_enable();
-+	local_unlock_irq(tcp_page_lock);
++	local_unlock_irq(&tce_page.lock);
  	return rc;
  }
  
+diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
+index dcd817ca2edf..f51367a3b231 100644
+--- a/arch/powerpc/xmon/xmon.c
++++ b/arch/powerpc/xmon/xmon.c
+@@ -3005,7 +3005,7 @@ print_address(unsigned long addr)
+ static void
+ dump_log_buf(void)
+ {
+-	struct kmsg_dumper dumper = { .active = 1 };
++	struct kmsg_dumper_iter iter = { .active = 1 };
+ 	unsigned char buf[128];
+ 	size_t len;
+ 
+@@ -3017,9 +3017,9 @@ dump_log_buf(void)
+ 	catch_memory_errors = 1;
+ 	sync();
+ 
+-	kmsg_dump_rewind_nolock(&dumper);
++	kmsg_dump_rewind(&iter);
+ 	xmon_start_pagination();
+-	while (kmsg_dump_get_line_nolock(&dumper, false, buf, sizeof(buf), &len)) {
++	while (kmsg_dump_get_line(&iter, false, buf, sizeof(buf), &len)) {
+ 		buf[len] = '\0';
+ 		printf("%s", buf);
+ 	}
 diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
 index cfed272e4fd5..8e28e8176ec8 100644
 --- a/arch/s390/include/asm/spinlock_types.h
@@ -2001,7 +1761,7 @@ index e82369f286a2..22ca9a98bbb8 100644
  	volatile unsigned int lock;
  } arch_spinlock_t;
 diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
-index 5717c7cbdd97..c4e46252377e 100644
+index ab5f790b0cd2..5db7af565dec 100644
 --- a/arch/sh/kernel/irq.c
 +++ b/arch/sh/kernel/irq.c
 @@ -148,6 +148,7 @@ void irq_ctx_exit(int cpu)
@@ -2040,19 +1800,63 @@ index 3ec9f1402aad..eb21682abfcb 100644
  
  #ifdef CONFIG_HOTPLUG_CPU
  void fixup_irqs(void)
+diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c
+index 6516ef1f8274..deab9b56b51f 100644
+--- a/arch/um/kernel/kmsg_dump.c
++++ b/arch/um/kernel/kmsg_dump.c
+@@ -1,5 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0
+ #include <linux/kmsg_dump.h>
++#include <linux/spinlock.h>
+ #include <linux/console.h>
+ #include <linux/string.h>
+ #include <shared/init.h>
+@@ -7,10 +8,13 @@
+ #include <os.h>
+ 
+ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
+-				enum kmsg_dump_reason reason)
++				enum kmsg_dump_reason reason,
++				struct kmsg_dumper_iter *iter)
+ {
++	static DEFINE_SPINLOCK(lock);
+ 	static char line[1024];
+ 	struct console *con;
++	unsigned long flags;
+ 	size_t len = 0;
+ 
+ 	/* only dump kmsg when no console is available */
+@@ -29,11 +33,16 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
+ 	if (con)
+ 		return;
+ 
++	if (!spin_trylock_irqsave(&lock, flags))
++		return;
++
+ 	printf("kmsg_dump:\n");
+-	while (kmsg_dump_get_line(dumper, true, line, sizeof(line), &len)) {
++	while (kmsg_dump_get_line(iter, true, line, sizeof(line), &len)) {
+ 		line[len] = '\0';
+ 		printf("%s", line);
+ 	}
++
++	spin_unlock_irqrestore(&lock, flags);
+ }
+ 
+ static struct kmsg_dumper kmsg_dumper = {
 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index 7101ac64bb20..13a1f43f6034 100644
+index 21f851179ff0..f4b80446b40a 100644
 --- a/arch/x86/Kconfig
 +++ b/arch/x86/Kconfig
-@@ -92,6 +92,7 @@ config X86
- 	select ARCH_SUPPORTS_ACPI
- 	select ARCH_SUPPORTS_ATOMIC_RMW
+@@ -96,6 +96,7 @@ config X86
+ 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC
  	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
+ 	select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP	if NR_CPUS <= 4096
 +	select ARCH_SUPPORTS_RT
  	select ARCH_USE_BUILTIN_BSWAP
  	select ARCH_USE_QUEUED_RWLOCKS
  	select ARCH_USE_QUEUED_SPINLOCKS
-@@ -208,6 +209,7 @@ config X86
+@@ -215,6 +216,7 @@ config X86
  	select HAVE_PCI
  	select HAVE_PERF_REGS
  	select HAVE_PERF_USER_STACK_DUMP
@@ -2349,38 +2153,17 @@ index d3d91a0abf88..6d0774721514 100644
  		nbytes = walk.nbytes;
  	}
 diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
-index b774c52e5411..7959a9c10e5a 100644
+index 67a4f1cb2aac..41d3be7da969 100644
 --- a/arch/x86/include/asm/fpu/api.h
 +++ b/arch/x86/include/asm/fpu/api.h
-@@ -23,6 +23,7 @@ extern void kernel_fpu_begin(void);
+@@ -28,6 +28,7 @@ extern void kernel_fpu_begin_mask(unsigned int kfpu_mask);
  extern void kernel_fpu_end(void);
  extern bool irq_fpu_usable(void);
  extern void fpregs_mark_activate(void);
 +extern void kernel_fpu_resched(void);
  
- /*
-  * Use fpregs_lock() while editing CPU's FPU registers or fpu->state.
-@@ -33,12 +34,18 @@ extern void fpregs_mark_activate(void);
- static inline void fpregs_lock(void)
- {
- 	preempt_disable();
--	local_bh_disable();
-+	/*
-+	 * On RT disabling preemption is good enough because bottom halfs
-+	 * are always running in thread context.
-+	 */
-+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
-+		local_bh_disable();
- }
- 
- static inline void fpregs_unlock(void)
- {
--	local_bh_enable();
-+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
-+		local_bh_enable();
- 	preempt_enable();
- }
- 
+ /* Code that is unaware of kernel_fpu_begin_mask() can use this */
+ static inline void kernel_fpu_begin(void)
 diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
 index 69485ca13665..471dec2d78e1 100644
 --- a/arch/x86/include/asm/preempt.h
@@ -2494,51 +2277,41 @@ index 7fb482f0f25b..3df0a95c9e13 100644
  	canary += tsc + (tsc << 32UL);
  	canary &= CANARY_MASK;
 diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
-index 267701ae3d86..350e342d9f25 100644
+index 0d751d5da702..2e62434951fa 100644
 --- a/arch/x86/include/asm/thread_info.h
 +++ b/arch/x86/include/asm/thread_info.h
-@@ -56,17 +56,24 @@ struct task_struct;
- struct thread_info {
+@@ -57,11 +57,14 @@ struct thread_info {
  	unsigned long		flags;		/* low level flags */
+ 	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
  	u32			status;		/* thread synchronous flags */
 +	int			preempt_lazy_count;	/* 0 => lazy preemptable
-+							  <0 => BUG */
++							   <0 => BUG */
  };
  
  #define INIT_THREAD_INFO(tsk)			\
  {						\
  	.flags		= 0,			\
-+	.preempt_lazy_count = 0,		\
++	.preempt_lazy_count	= 0,		\
  }
  
  #else /* !__ASSEMBLY__ */
- 
- #include <asm/asm-offsets.h>
- 
-+#define GET_THREAD_INFO(reg) \
-+	_ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \
-+	_ASM_SUB $(THREAD_SIZE),reg ;
-+
- #endif
- 
- /*
-@@ -93,6 +100,7 @@ struct thread_info {
+@@ -90,6 +93,7 @@ struct thread_info {
  #define TIF_NOTSC		16	/* TSC is not accessible in userland */
- #define TIF_IA32		17	/* IA32 compatibility process */
+ #define TIF_NOTIFY_SIGNAL	17	/* signal notifications exist */
  #define TIF_SLD			18	/* Restore split lock detection on context switch */
 +#define TIF_NEED_RESCHED_LAZY	19	/* lazy rescheduling necessary */
  #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
  #define TIF_POLLING_NRFLAG	21	/* idle is polling for TIF_NEED_RESCHED */
  #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
-@@ -123,6 +131,7 @@ struct thread_info {
+@@ -113,6 +117,7 @@ struct thread_info {
  #define _TIF_NOTSC		(1 << TIF_NOTSC)
- #define _TIF_IA32		(1 << TIF_IA32)
+ #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
  #define _TIF_SLD		(1 << TIF_SLD)
 +#define _TIF_NEED_RESCHED_LAZY	(1 << TIF_NEED_RESCHED_LAZY)
  #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
  #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
  #define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
-@@ -156,6 +165,8 @@ struct thread_info {
+@@ -143,6 +148,8 @@ struct thread_info {
  
  #define _TIF_WORK_CTXSW_NEXT	(_TIF_WORK_CTXSW)
  
@@ -2548,10 +2321,10 @@ index 267701ae3d86..350e342d9f25 100644
  
  /*
 diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
-index 31125448b174..0a3270aadfce 100644
+index 43b54bef5448..2471e53b293f 100644
 --- a/arch/x86/kernel/cpu/mshyperv.c
 +++ b/arch/x86/kernel/cpu/mshyperv.c
-@@ -75,11 +75,12 @@ EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
+@@ -80,11 +80,12 @@ EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
  DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0)
  {
  	struct pt_regs *old_regs = set_irq_regs(regs);
@@ -2566,10 +2339,10 @@ index 31125448b174..0a3270aadfce 100644
  
  	set_irq_regs(old_regs);
 diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
-index eb86a2b831b1..23aaf9e132e9 100644
+index 571220ac8bea..d315d45b64fa 100644
 --- a/arch/x86/kernel/fpu/core.c
 +++ b/arch/x86/kernel/fpu/core.c
-@@ -158,6 +158,18 @@ void kernel_fpu_end(void)
+@@ -159,6 +159,18 @@ void kernel_fpu_end(void)
  }
  EXPORT_SYMBOL_GPL(kernel_fpu_end);
  
@@ -2622,116 +2395,11 @@ index 440eed558558..7cfc4e6b7c94 100644
  	run_on_irqstack_cond(__do_softirq, NULL);
  }
 +#endif
-diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
-index 4f2f54e1281c..17189e47cee8 100644
---- a/arch/x86/kernel/process_32.c
-+++ b/arch/x86/kernel/process_32.c
-@@ -38,6 +38,7 @@
- #include <linux/io.h>
- #include <linux/kdebug.h>
- #include <linux/syscalls.h>
-+#include <linux/highmem.h>
- 
- #include <asm/ldt.h>
- #include <asm/processor.h>
-@@ -126,6 +127,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
- }
- EXPORT_SYMBOL_GPL(start_thread);
- 
-+#ifdef CONFIG_PREEMPT_RT
-+static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
-+{
-+	int i;
-+
-+	/*
-+	 * Clear @prev's kmap_atomic mappings
-+	 */
-+	for (i = 0; i < prev_p->kmap_idx; i++) {
-+		int idx = i + KM_TYPE_NR * smp_processor_id();
-+		pte_t *ptep = kmap_pte - idx;
-+
-+		kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
-+	}
-+	/*
-+	 * Restore @next_p's kmap_atomic mappings
-+	 */
-+	for (i = 0; i < next_p->kmap_idx; i++) {
-+		int idx = i + KM_TYPE_NR * smp_processor_id();
-+
-+		if (!pte_none(next_p->kmap_pte[i]))
-+			set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
-+	}
-+}
-+#else
-+static inline void
-+switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
-+#endif
-+
- 
- /*
-  *	switch_to(x,y) should switch tasks from x to y.
-@@ -187,6 +217,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
- 
- 	switch_to_extra(prev_p, next_p);
- 
-+	switch_kmaps(prev_p, next_p);
-+
- 	/*
- 	 * Leave lazy mode, flushing any hypercalls made here.
- 	 * This must be done before restoring TLS segments so
-diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
-index 49d925043171..f70dffc2771f 100644
---- a/arch/x86/kernel/tsc.c
-+++ b/arch/x86/kernel/tsc.c
-@@ -54,7 +54,7 @@ struct clocksource *art_related_clocksource;
- 
- struct cyc2ns {
- 	struct cyc2ns_data data[2];	/*  0 + 2*16 = 32 */
--	seqcount_t	   seq;		/* 32 + 4    = 36 */
-+	seqcount_latch_t   seq;		/* 32 + 4    = 36 */
- 
- }; /* fits one cacheline */
- 
-@@ -73,14 +73,14 @@ __always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
- 	preempt_disable_notrace();
- 
- 	do {
--		seq = this_cpu_read(cyc2ns.seq.sequence);
-+		seq = this_cpu_read(cyc2ns.seq.seqcount.sequence);
- 		idx = seq & 1;
- 
- 		data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
- 		data->cyc2ns_mul    = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
- 		data->cyc2ns_shift  = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);
- 
--	} while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
-+	} while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence)));
- }
- 
- __always_inline void cyc2ns_read_end(void)
-@@ -186,7 +186,7 @@ static void __init cyc2ns_init_boot_cpu(void)
- {
- 	struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
- 
--	seqcount_init(&c2n->seq);
-+	seqcount_latch_init(&c2n->seq);
- 	__set_cyc2ns_scale(tsc_khz, smp_processor_id(), rdtsc());
- }
- 
-@@ -203,7 +203,7 @@ static void __init cyc2ns_init_secondary_cpus(void)
- 
- 	for_each_possible_cpu(cpu) {
- 		if (cpu != this_cpu) {
--			seqcount_init(&c2n->seq);
-+			seqcount_latch_init(&c2n->seq);
- 			c2n = per_cpu_ptr(&cyc2ns, cpu);
- 			c2n->data[0] = data[0];
- 			c2n->data[1] = data[1];
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
-index ce856e0ece84..257dcd9ba1a0 100644
+index 1b404e4d7dd8..babf6c0a7685 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
-@@ -7513,6 +7513,14 @@ int kvm_arch_init(void *opaque)
+@@ -7907,6 +7907,14 @@ int kvm_arch_init(void *opaque)
  		goto out;
  	}
  
@@ -2746,84 +2414,6 @@ index ce856e0ece84..257dcd9ba1a0 100644
  	r = -ENOMEM;
  	x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu),
  					  __alignof__(struct fpu), SLAB_ACCOUNT,
-diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
-index 075fe51317b0..95da91a7c7af 100644
---- a/arch/x86/mm/highmem_32.c
-+++ b/arch/x86/mm/highmem_32.c
-@@ -8,12 +8,17 @@ void *kmap_atomic_high_prot(struct page *page, pgprot_t prot)
- {
- 	unsigned long vaddr;
- 	int idx, type;
-+	pte_t pte;
- 
- 	type = kmap_atomic_idx_push();
- 	idx = type + KM_TYPE_NR*smp_processor_id();
- 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
- 	BUG_ON(!pte_none(*(kmap_pte-idx)));
--	set_pte(kmap_pte-idx, mk_pte(page, prot));
-+	pte = mk_pte(page, prot);
-+#ifdef CONFIG_PREEMPT_RT
-+	current->kmap_pte[type] = pte;
-+#endif
-+	set_pte(kmap_pte-idx, pte);
- 	arch_flush_lazy_mmu_mode();
- 
- 	return (void *)vaddr;
-@@ -50,6 +55,9 @@ void kunmap_atomic_high(void *kvaddr)
- 		 * is a bad idea also, in case the page changes cacheability
- 		 * attributes or becomes a protected page in a hypervisor.
- 		 */
-+#ifdef CONFIG_PREEMPT_RT
-+		current->kmap_pte[type] = __pte(0);
-+#endif
- 		kpte_clear_flush(kmap_pte-idx, vaddr);
- 		kmap_atomic_idx_pop();
- 		arch_flush_lazy_mmu_mode();
-diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
-index f60398aeb644..0ef360874c75 100644
---- a/arch/x86/mm/iomap_32.c
-+++ b/arch/x86/mm/iomap_32.c
-@@ -46,16 +46,22 @@ EXPORT_SYMBOL_GPL(iomap_free);
- 
- void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
- {
-+	pte_t pte = pfn_pte(pfn, prot);
- 	unsigned long vaddr;
- 	int idx, type;
- 
--	preempt_disable();
-+	migrate_disable();
- 	pagefault_disable();
- 
- 	type = kmap_atomic_idx_push();
- 	idx = type + KM_TYPE_NR * smp_processor_id();
- 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
--	set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
-+	WARN_ON(!pte_none(*(kmap_pte - idx)));
-+
-+#ifdef CONFIG_PREEMPT_RT
-+	current->kmap_pte[type] = pte;
-+#endif
-+	set_pte(kmap_pte - idx, pte);
- 	arch_flush_lazy_mmu_mode();
- 
- 	return (void *)vaddr;
-@@ -106,11 +112,14 @@ iounmap_atomic(void __iomem *kvaddr)
- 		 * is a bad idea also, in case the page changes cacheability
- 		 * attributes or becomes a protected page in a hypervisor.
- 		 */
-+#ifdef CONFIG_PREEMPT_RT
-+		current->kmap_pte[type] = __pte(0);
-+#endif
- 		kpte_clear_flush(kmap_pte-idx, vaddr);
- 		kmap_atomic_idx_pop();
- 	}
- 
- 	pagefault_enable();
--	preempt_enable();
-+	migrate_enable();
- }
- EXPORT_SYMBOL_GPL(iounmap_atomic);
 diff --git a/arch/xtensa/include/asm/spinlock_types.h b/arch/xtensa/include/asm/spinlock_types.h
 index 64c9389254f1..dc846323b1cd 100644
 --- a/arch/xtensa/include/asm/spinlock_types.h
@@ -2840,43 +2430,188 @@ index 64c9389254f1..dc846323b1cd 100644
  #include <asm-generic/qrwlock_types.h>
  
 diff --git a/block/blk-mq.c b/block/blk-mq.c
-index cdced4aca2e8..e37aa31332b7 100644
+index f285a9123a8b..e284fc612f10 100644
 --- a/block/blk-mq.c
 +++ b/block/blk-mq.c
-@@ -604,6 +604,7 @@ static void blk_mq_trigger_softirq(struct request *rq)
- 	if (list->next == &rq->ipi_list)
- 		raise_softirq_irqoff(BLOCK_SOFTIRQ);
- 	local_irq_restore(flags);
-+	preempt_check_resched_rt();
- }
+@@ -41,7 +41,7 @@
+ #include "blk-mq-sched.h"
+ #include "blk-rq-qos.h"
  
- static int blk_softirq_cpu_dead(unsigned int cpu)
-@@ -617,6 +618,7 @@ static int blk_softirq_cpu_dead(unsigned int cpu)
- 			 this_cpu_ptr(&blk_cpu_done));
- 	raise_softirq_irqoff(BLOCK_SOFTIRQ);
- 	local_irq_enable();
-+	preempt_check_resched_rt();
+-static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
++static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
  
- 	return 0;
+ static void blk_mq_poll_stats_start(struct request_queue *q);
+ static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
+@@ -567,80 +567,29 @@ void blk_mq_end_request(struct request *rq, blk_status_t error)
  }
-@@ -1603,14 +1605,14 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
- 		return;
- 
- 	if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
--		int cpu = get_cpu();
-+		int cpu = get_cpu_light();
- 		if (cpumask_test_cpu(cpu, hctx->cpumask)) {
- 			__blk_mq_run_hw_queue(hctx);
--			put_cpu();
-+			put_cpu_light();
- 			return;
- 		}
+ EXPORT_SYMBOL(blk_mq_end_request);
  
--		put_cpu();
-+		put_cpu_light();
- 	}
+-/*
+- * Softirq action handler - move entries to local list and loop over them
+- * while passing them to the queue registered handler.
+- */
+-static __latent_entropy void blk_done_softirq(struct softirq_action *h)
++static void blk_complete_reqs(struct llist_head *list)
+ {
+-	struct list_head *cpu_list, local_list;
+-
+-	local_irq_disable();
+-	cpu_list = this_cpu_ptr(&blk_cpu_done);
+-	list_replace_init(cpu_list, &local_list);
+-	local_irq_enable();
+-
+-	while (!list_empty(&local_list)) {
+-		struct request *rq;
++	struct llist_node *entry = llist_reverse_order(llist_del_all(list));
++	struct request *rq, *next;
+ 
+-		rq = list_entry(local_list.next, struct request, ipi_list);
+-		list_del_init(&rq->ipi_list);
++	llist_for_each_entry_safe(rq, next, entry, ipi_list)
+ 		rq->q->mq_ops->complete(rq);
+-	}
+ }
+ 
+-static void blk_mq_trigger_softirq(struct request *rq)
++static __latent_entropy void blk_done_softirq(struct softirq_action *h)
+ {
+-	struct list_head *list;
+-	unsigned long flags;
+-
+-	local_irq_save(flags);
+-	list = this_cpu_ptr(&blk_cpu_done);
+-	list_add_tail(&rq->ipi_list, list);
+-
+-	/*
+-	 * If the list only contains our just added request, signal a raise of
+-	 * the softirq.  If there are already entries there, someone already
+-	 * raised the irq but it hasn't run yet.
+-	 */
+-	if (list->next == &rq->ipi_list)
+-		raise_softirq_irqoff(BLOCK_SOFTIRQ);
+-	local_irq_restore(flags);
++	blk_complete_reqs(this_cpu_ptr(&blk_cpu_done));
+ }
+ 
+ static int blk_softirq_cpu_dead(unsigned int cpu)
+ {
+-	/*
+-	 * If a CPU goes away, splice its entries to the current CPU
+-	 * and trigger a run of the softirq
+-	 */
+-	local_irq_disable();
+-	list_splice_init(&per_cpu(blk_cpu_done, cpu),
+-			 this_cpu_ptr(&blk_cpu_done));
+-	raise_softirq_irqoff(BLOCK_SOFTIRQ);
+-	local_irq_enable();
+-
++	blk_complete_reqs(&per_cpu(blk_cpu_done, cpu));
+ 	return 0;
+ }
+ 
+-
+ static void __blk_mq_complete_request_remote(void *data)
+ {
+-	struct request *rq = data;
+-
+-	/*
+-	 * For most of single queue controllers, there is only one irq vector
+-	 * for handling I/O completion, and the only irq's affinity is set
+-	 * to all possible CPUs.  On most of ARCHs, this affinity means the irq
+-	 * is handled on one specific CPU.
+-	 *
+-	 * So complete I/O requests in softirq context in case of single queue
+-	 * devices to avoid degrading I/O performance due to irqsoff latency.
+-	 */
+-	if (rq->q->nr_hw_queues == 1)
+-		blk_mq_trigger_softirq(rq);
+-	else
+-		rq->q->mq_ops->complete(rq);
++	__raise_softirq_irqoff(BLOCK_SOFTIRQ);
+ }
+ 
+ static inline bool blk_mq_complete_need_ipi(struct request *rq)
+@@ -669,6 +618,30 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq)
+ 	return cpu_online(rq->mq_ctx->cpu);
+ }
+ 
++static void blk_mq_complete_send_ipi(struct request *rq)
++{
++	struct llist_head *list;
++	unsigned int cpu;
++
++	cpu = rq->mq_ctx->cpu;
++	list = &per_cpu(blk_cpu_done, cpu);
++	if (llist_add(&rq->ipi_list, list)) {
++		INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq);
++		smp_call_function_single_async(cpu, &rq->csd);
++	}
++}
++
++static void blk_mq_raise_softirq(struct request *rq)
++{
++	struct llist_head *list;
++
++	preempt_disable();
++	list = this_cpu_ptr(&blk_cpu_done);
++	if (llist_add(&rq->ipi_list, list))
++		raise_softirq(BLOCK_SOFTIRQ);
++	preempt_enable();
++}
++
+ bool blk_mq_complete_request_remote(struct request *rq)
+ {
+ 	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
+@@ -681,15 +654,15 @@ bool blk_mq_complete_request_remote(struct request *rq)
+ 		return false;
+ 
+ 	if (blk_mq_complete_need_ipi(rq)) {
+-		INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq);
+-		smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd);
+-	} else {
+-		if (rq->q->nr_hw_queues > 1)
+-			return false;
+-		blk_mq_trigger_softirq(rq);
++		blk_mq_complete_send_ipi(rq);
++		return true;
+ 	}
+ 
+-	return true;
++	if (rq->q->nr_hw_queues == 1) {
++		blk_mq_raise_softirq(rq);
++		return true;
++	}
++	return false;
+ }
+ EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote);
+ 
+@@ -1587,14 +1560,14 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
+ 		return;
+ 
+ 	if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
+-		int cpu = get_cpu();
++		int cpu = get_cpu_light();
+ 		if (cpumask_test_cpu(cpu, hctx->cpumask)) {
+ 			__blk_mq_run_hw_queue(hctx);
+-			put_cpu();
++			put_cpu_light();
+ 			return;
+ 		}
+ 
+-		put_cpu();
++		put_cpu_light();
+ 	}
  
  	kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
+@@ -3904,7 +3877,7 @@ static int __init blk_mq_init(void)
+ 	int i;
+ 
+ 	for_each_possible_cpu(i)
+-		INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
++		init_llist_head(&per_cpu(blk_cpu_done, i));
+ 	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
+ 
+ 	cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
 diff --git a/crypto/cryptd.c b/crypto/cryptd.c
 index a1bea0f4baa8..5f8ca8c1f59c 100644
 --- a/crypto/cryptd.c
@@ -2938,96 +2673,24 @@ index a1bea0f4baa8..5f8ca8c1f59c 100644
  
  	if (!req)
  		return;
-diff --git a/drivers/base/core.c b/drivers/base/core.c
-index bb5806a2bd4c..f90e9f77bf8c 100644
---- a/drivers/base/core.c
-+++ b/drivers/base/core.c
-@@ -4061,22 +4061,21 @@ void device_shutdown(void)
-  */
- 
- #ifdef CONFIG_PRINTK
--static int
--create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen)
-+static void
-+set_dev_info(const struct device *dev, struct dev_printk_info *dev_info)
- {
- 	const char *subsys;
--	size_t pos = 0;
-+
-+	memset(dev_info, 0, sizeof(*dev_info));
- 
- 	if (dev->class)
- 		subsys = dev->class->name;
- 	else if (dev->bus)
- 		subsys = dev->bus->name;
- 	else
--		return 0;
-+		return;
- 
--	pos += snprintf(hdr + pos, hdrlen - pos, "SUBSYSTEM=%s", subsys);
--	if (pos >= hdrlen)
--		goto overflow;
-+	strscpy(dev_info->subsystem, subsys, sizeof(dev_info->subsystem));
- 
- 	/*
- 	 * Add device identifier DEVICE=:
-@@ -4092,41 +4091,28 @@ create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen)
- 			c = 'b';
- 		else
- 			c = 'c';
--		pos++;
--		pos += snprintf(hdr + pos, hdrlen - pos,
--				"DEVICE=%c%u:%u",
--				c, MAJOR(dev->devt), MINOR(dev->devt));
-+
-+		snprintf(dev_info->device, sizeof(dev_info->device),
-+			 "%c%u:%u", c, MAJOR(dev->devt), MINOR(dev->devt));
- 	} else if (strcmp(subsys, "net") == 0) {
- 		struct net_device *net = to_net_dev(dev);
- 
--		pos++;
--		pos += snprintf(hdr + pos, hdrlen - pos,
--				"DEVICE=n%u", net->ifindex);
-+		snprintf(dev_info->device, sizeof(dev_info->device),
-+			 "n%u", net->ifindex);
- 	} else {
--		pos++;
--		pos += snprintf(hdr + pos, hdrlen - pos,
--				"DEVICE=+%s:%s", subsys, dev_name(dev));
-+		snprintf(dev_info->device, sizeof(dev_info->device),
-+			 "+%s:%s", subsys, dev_name(dev));
+diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
+index 316a9947541f..e96a4e8a4a10 100644
+--- a/drivers/atm/eni.c
++++ b/drivers/atm/eni.c
+@@ -2054,7 +2054,7 @@ static int eni_send(struct atm_vcc *vcc,struct sk_buff *skb)
  	}
--
--	if (pos >= hdrlen)
--		goto overflow;
--
--	return pos;
--
--overflow:
--	dev_WARN(dev, "device/subsystem name too long");
--	return 0;
- }
- 
- int dev_vprintk_emit(int level, const struct device *dev,
- 		     const char *fmt, va_list args)
- {
--	char hdr[128];
--	size_t hdrlen;
-+	struct dev_printk_info dev_info;
- 
--	hdrlen = create_syslog_header(dev, hdr, sizeof(hdr));
-+	set_dev_info(dev, &dev_info);
- 
--	return vprintk_emit(0, level, hdrlen ? hdr : NULL, hdrlen, fmt, args);
-+	return vprintk_emit(0, level, &dev_info, fmt, args);
- }
- EXPORT_SYMBOL(dev_vprintk_emit);
- 
+ 	submitted++;
+ 	ATM_SKB(skb)->vcc = vcc;
+-	tasklet_disable(&ENI_DEV(vcc->dev)->task);
++	tasklet_disable_in_atomic(&ENI_DEV(vcc->dev)->task);
+ 	res = do_tx(skb);
+ 	tasklet_enable(&ENI_DEV(vcc->dev)->task);
+ 	if (res == enq_ok) return 0;
 diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
-index 9100ac36670a..4bd301dd22c3 100644
+index e2933cb7a82a..cc2aeefa20c8 100644
 --- a/drivers/block/zram/zram_drv.c
 +++ b/drivers/block/zram/zram_drv.c
-@@ -56,6 +56,40 @@ static void zram_free_page(struct zram *zram, size_t index);
+@@ -59,6 +59,40 @@ static void zram_free_page(struct zram *zram, size_t index);
  static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
  				u32 index, int offset, struct bio *bio);
  
@@ -3068,7 +2731,7 @@ index 9100ac36670a..4bd301dd22c3 100644
  
  static int zram_slot_trylock(struct zram *zram, u32 index)
  {
-@@ -71,6 +105,7 @@ static void zram_slot_unlock(struct zram *zram, u32 index)
+@@ -74,6 +108,7 @@ static void zram_slot_unlock(struct zram *zram, u32 index)
  {
  	bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
  }
@@ -3076,7 +2739,7 @@ index 9100ac36670a..4bd301dd22c3 100644
  
  static inline bool init_done(struct zram *zram)
  {
-@@ -1158,6 +1193,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
+@@ -1165,6 +1200,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
  
  	if (!huge_class_size)
  		huge_class_size = zs_huge_class_size(zram->mem_pool);
@@ -3084,40 +2747,8 @@ index 9100ac36670a..4bd301dd22c3 100644
  	return true;
  }
  
-@@ -1220,6 +1256,7 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
- 	unsigned long handle;
- 	unsigned int size;
- 	void *src, *dst;
-+	struct zcomp_strm *zstrm;
- 
- 	zram_slot_lock(zram, index);
- 	if (zram_test_flag(zram, index, ZRAM_WB)) {
-@@ -1250,6 +1287,7 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
- 
- 	size = zram_get_obj_size(zram, index);
- 
-+	zstrm = zcomp_stream_get(zram->comp);
- 	src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
- 	if (size == PAGE_SIZE) {
- 		dst = kmap_atomic(page);
-@@ -1257,14 +1295,13 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
- 		kunmap_atomic(dst);
- 		ret = 0;
- 	} else {
--		struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
- 
- 		dst = kmap_atomic(page);
- 		ret = zcomp_decompress(zstrm, src, size, dst);
- 		kunmap_atomic(dst);
--		zcomp_stream_put(zram->comp);
- 	}
- 	zs_unmap_object(zram->mem_pool, handle);
-+	zcomp_stream_put(zram->comp);
- 	zram_slot_unlock(zram, index);
- 
- 	/* Should NEVER happen. Return bio error if it does. */
 diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
-index f2fd46daa760..7e4dd447e1dd 100644
+index 419a7e8281ee..561c7ba1421f 100644
 --- a/drivers/block/zram/zram_drv.h
 +++ b/drivers/block/zram/zram_drv.h
 @@ -63,6 +63,7 @@ struct zram_table_entry {
@@ -3129,7 +2760,7 @@ index f2fd46daa760..7e4dd447e1dd 100644
  	ktime_t ac_time;
  #endif
 diff --git a/drivers/char/random.c b/drivers/char/random.c
-index d20ba1b104ca..081a1a4b9d70 100644
+index 5f3b8ac9d97b..ee92e44484a8 100644
 --- a/drivers/char/random.c
 +++ b/drivers/char/random.c
 @@ -1252,28 +1252,27 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs)
@@ -3179,10 +2810,10 @@ index 1784530b8387..c08cbb306636 100644
  static ssize_t tpm_dev_transmit(struct tpm_chip *chip, struct tpm_space *space,
  				u8 *buf, size_t bufsiz)
 diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
-index 0b214963539d..39cee10c59f8 100644
+index 4ed6e660273a..c2bd0d40b5fc 100644
 --- a/drivers/char/tpm/tpm_tis.c
 +++ b/drivers/char/tpm/tpm_tis.c
-@@ -49,6 +49,31 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da
+@@ -50,6 +50,31 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da
  	return container_of(data, struct tpm_tis_tcg_phy, priv);
  }
  
@@ -3211,10 +2842,10 @@ index 0b214963539d..39cee10c59f8 100644
 +	tpm_tis_flush(iobase);
 +}
 +
- static bool interrupts = true;
- module_param(interrupts, bool, 0444);
+ static int interrupts = -1;
+ module_param(interrupts, int, 0444);
  MODULE_PARM_DESC(interrupts, "Enable interrupts");
-@@ -146,7 +171,7 @@ static int tpm_tcg_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len,
+@@ -169,7 +194,7 @@ static int tpm_tcg_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len,
  	struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
  
  	while (len--)
@@ -3223,7 +2854,7 @@ index 0b214963539d..39cee10c59f8 100644
  
  	return 0;
  }
-@@ -173,7 +198,7 @@ static int tpm_tcg_write32(struct tpm_tis_data *data, u32 addr, u32 value)
+@@ -196,7 +221,7 @@ static int tpm_tcg_write32(struct tpm_tis_data *data, u32 addr, u32 value)
  {
  	struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
  
@@ -3232,11 +2863,33 @@ index 0b214963539d..39cee10c59f8 100644
  
  	return 0;
  }
+diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
+index 9811c40956e5..17c9d825188b 100644
+--- a/drivers/firewire/ohci.c
++++ b/drivers/firewire/ohci.c
+@@ -2545,7 +2545,7 @@ static int ohci_cancel_packet(struct fw_card *card, struct fw_packet *packet)
+ 	struct driver_data *driver_data = packet->driver_data;
+ 	int ret = -ENOENT;
+ 
+-	tasklet_disable(&ctx->tasklet);
++	tasklet_disable_in_atomic(&ctx->tasklet);
+ 
+ 	if (packet->ack != 0)
+ 		goto out;
+@@ -3465,7 +3465,7 @@ static int ohci_flush_iso_completions(struct fw_iso_context *base)
+ 	struct iso_context *ctx = container_of(base, struct iso_context, base);
+ 	int ret = 0;
+ 
+-	tasklet_disable(&ctx->context.tasklet);
++	tasklet_disable_in_atomic(&ctx->context.tasklet);
+ 
+ 	if (!test_and_set_bit_lock(0, &ctx->flushing_completions)) {
+ 		context_tasklet((unsigned long)&ctx->context);
 diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
-index 3aa07c3b5136..25a8e81d1742 100644
+index df3f9bcab581..709c65c0a816 100644
 --- a/drivers/firmware/efi/efi.c
 +++ b/drivers/firmware/efi/efi.c
-@@ -62,7 +62,7 @@ struct mm_struct efi_mm = {
+@@ -66,7 +66,7 @@ struct mm_struct efi_mm = {
  
  struct workqueue_struct *efi_rts_wq;
  
@@ -3245,7 +2898,7 @@ index 3aa07c3b5136..25a8e81d1742 100644
  static int __init setup_noefi(char *arg)
  {
  	disable_runtime = true;
-@@ -93,6 +93,9 @@ static int __init parse_efi_cmdline(char *str)
+@@ -97,6 +97,9 @@ static int __init parse_efi_cmdline(char *str)
  	if (parse_option_str(str, "noruntime"))
  		disable_runtime = true;
  
@@ -3256,10 +2909,10 @@ index 3aa07c3b5136..25a8e81d1742 100644
  		set_bit(EFI_MEM_NO_SOFT_RESERVE, &efi.flags);
  
 diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c
-index c89f5f7ccb06..1b9d5e690a9f 100644
+index 3da2544fa1c0..338ed08a2e0e 100644
 --- a/drivers/gpu/drm/i915/display/intel_sprite.c
 +++ b/drivers/gpu/drm/i915/display/intel_sprite.c
-@@ -118,7 +118,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
+@@ -122,7 +122,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
  			"PSR idle timed out 0x%x, atomic update may fail\n",
  			psr_status);
  
@@ -3269,7 +2922,7 @@ index c89f5f7ccb06..1b9d5e690a9f 100644
  
  	crtc->debug.min_vbl = min;
  	crtc->debug.max_vbl = max;
-@@ -143,11 +144,13 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
+@@ -147,11 +148,13 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
  			break;
  		}
  
@@ -3285,7 +2938,7 @@ index c89f5f7ccb06..1b9d5e690a9f 100644
  	}
  
  	finish_wait(wq, &wait);
-@@ -180,7 +183,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
+@@ -184,7 +187,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
  	return;
  
  irq_disable:
@@ -3295,7 +2948,7 @@ index c89f5f7ccb06..1b9d5e690a9f 100644
  }
  
  /**
-@@ -218,7 +222,8 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state)
+@@ -233,7 +237,8 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state)
  		new_crtc_state->uapi.event = NULL;
  	}
  
@@ -3306,10 +2959,10 @@ index c89f5f7ccb06..1b9d5e690a9f 100644
  	if (intel_vgpu_active(dev_priv))
  		return;
 diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
-index 8ec3eecf3e39..31bb90153041 100644
+index 499b09cb4acf..777fd6010f48 100644
 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
 +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
-@@ -59,9 +59,10 @@ static int __engine_unpark(struct intel_wakeref *wf)
+@@ -73,9 +73,10 @@ static int __engine_unpark(struct intel_wakeref *wf)
  
  static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
  {
@@ -3322,7 +2975,7 @@ index 8ec3eecf3e39..31bb90153041 100644
  	mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
  
  	return flags;
-@@ -71,7 +72,8 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
+@@ -85,7 +86,8 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
  					  unsigned long flags)
  {
  	mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
@@ -3333,10 +2986,10 @@ index 8ec3eecf3e39..31bb90153041 100644
  
  #else
 diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
-index 1fa67700d8f4..4ee735b91e97 100644
+index 6cdb052e3850..c9c39723bf49 100644
 --- a/drivers/gpu/drm/i915/i915_irq.c
 +++ b/drivers/gpu/drm/i915/i915_irq.c
-@@ -865,6 +865,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
+@@ -878,6 +878,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
  	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
  
  	/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
@@ -3344,7 +2997,7 @@ index 1fa67700d8f4..4ee735b91e97 100644
  
  	/* Get optional system timestamp before query. */
  	if (stime)
-@@ -916,6 +917,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
+@@ -929,6 +930,7 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
  		*etime = ktime_get();
  
  	/* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
@@ -3377,10 +3030,10 @@ index a4addcc64978..396b6598694d 100644
  	     TP_PROTO(struct i915_request *rq),
  	     TP_ARGS(rq)
 diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
-index e0ae911ef427..781edf550436 100644
+index 3a6fedad002d..4800dfd8a5fb 100644
 --- a/drivers/gpu/drm/radeon/radeon_display.c
 +++ b/drivers/gpu/drm/radeon/radeon_display.c
-@@ -1822,6 +1822,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
+@@ -1813,6 +1813,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
  	struct radeon_device *rdev = dev->dev_private;
  
  	/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
@@ -3388,7 +3041,7 @@ index e0ae911ef427..781edf550436 100644
  
  	/* Get optional system timestamp before query. */
  	if (stime)
-@@ -1914,6 +1915,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
+@@ -1905,6 +1906,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
  		*etime = ktime_get();
  
  	/* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
@@ -3397,7 +3050,7 @@ index e0ae911ef427..781edf550436 100644
  	/* Decode into vertical and horizontal scanout position. */
  	*vpos = position & 0x1fff;
 diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
-index 40e2b9f91163..d9de4813ffac 100644
+index 9416e09ebd58..4a5767a15544 100644
 --- a/drivers/hv/hyperv_vmbus.h
 +++ b/drivers/hv/hyperv_vmbus.h
 @@ -18,6 +18,7 @@
@@ -3409,7 +3062,7 @@ index 40e2b9f91163..d9de4813ffac 100644
  #include "hv_trace.h"
  
 diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
-index 946d0aba101f..9447315ccd7c 100644
+index d491fdcee61f..e25ffe09400a 100644
 --- a/drivers/hv/vmbus_drv.c
 +++ b/drivers/hv/vmbus_drv.c
 @@ -22,6 +22,7 @@
@@ -3420,7 +3073,7 @@ index 946d0aba101f..9447315ccd7c 100644
  
  #include <linux/delay.h>
  #include <linux/notifier.h>
-@@ -1303,6 +1304,8 @@ static void vmbus_isr(void)
+@@ -1310,6 +1311,8 @@ static void vmbus_isr(void)
  	void *page_addr = hv_cpu->synic_event_page;
  	struct hv_message *msg;
  	union hv_synic_event_flags *event;
@@ -3429,15 +3082,34 @@ index 946d0aba101f..9447315ccd7c 100644
  	bool handled = false;
  
  	if (unlikely(page_addr == NULL))
-@@ -1347,7 +1350,7 @@ static void vmbus_isr(void)
+@@ -1354,7 +1357,7 @@ static void vmbus_isr(void)
  			tasklet_schedule(&hv_cpu->msg_dpc);
  	}
  
--	add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
-+	add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0, ip);
+-	add_interrupt_randomness(hv_get_vector(), 0);
++	add_interrupt_randomness(hv_get_vector(), 0, ip);
  }
  
  /*
+@@ -1362,7 +1365,8 @@ static void vmbus_isr(void)
+  * buffer and call into Hyper-V to transfer the data.
+  */
+ static void hv_kmsg_dump(struct kmsg_dumper *dumper,
+-			 enum kmsg_dump_reason reason)
++			 enum kmsg_dump_reason reason,
++			 struct kmsg_dumper_iter *iter)
+ {
+ 	size_t bytes_written;
+ 	phys_addr_t panic_pa;
+@@ -1377,7 +1381,7 @@ static void hv_kmsg_dump(struct kmsg_dumper *dumper,
+ 	 * Write dump contents to the page. No need to synchronize; panic should
+ 	 * be single-threaded.
+ 	 */
+-	kmsg_dump_get_buffer(dumper, false, hv_panic_page, HV_HYP_PAGE_SIZE,
++	kmsg_dump_get_buffer(iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
+ 			     &bytes_written);
+ 	if (bytes_written)
+ 		hyperv_report_panic_msg(panic_pa, bytes_written);
 diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig
 index ce9429ca6dde..29ccbd6acf43 100644
 --- a/drivers/leds/trigger/Kconfig
@@ -3451,10 +3123,10 @@ index ce9429ca6dde..29ccbd6acf43 100644
  	  This allows LEDs to be controlled by active CPUs. This shows
  	  the active CPUs across an array of LEDs so you can see which
 diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
-index 225380efd1e2..7ae33e2edd35 100644
+index 3a90cc0e43ca..6c20c6e9bf1f 100644
 --- a/drivers/md/raid5.c
 +++ b/drivers/md/raid5.c
-@@ -2077,8 +2077,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
+@@ -2216,8 +2216,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
  	struct raid5_percpu *percpu;
  	unsigned long cpu;
  
@@ -3465,7 +3137,7 @@ index 225380efd1e2..7ae33e2edd35 100644
  	if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
  		ops_run_biofill(sh);
  		overlap_clear++;
-@@ -2137,7 +2138,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
+@@ -2276,7 +2277,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
  			if (test_and_clear_bit(R5_Overlap, &dev->flags))
  				wake_up(&sh->raid_conf->wait_for_overlap);
  		}
@@ -3475,7 +3147,7 @@ index 225380efd1e2..7ae33e2edd35 100644
  }
  
  static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh)
-@@ -6902,6 +6904,7 @@ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
+@@ -7097,6 +7099,7 @@ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
  			__func__, cpu);
  		return -ENOMEM;
  	}
@@ -3484,10 +3156,10 @@ index 225380efd1e2..7ae33e2edd35 100644
  }
  
 diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
-index 16fc29472f5c..d16edbdcde6e 100644
+index 5c05acf20e1f..665fe138ab4f 100644
 --- a/drivers/md/raid5.h
 +++ b/drivers/md/raid5.h
-@@ -627,6 +627,7 @@ struct r5conf {
+@@ -635,6 +635,7 @@ struct r5conf {
  	int			recovery_disabled;
  	/* per cpu variables */
  	struct raid5_percpu {
@@ -3495,8 +3167,495 @@ index 16fc29472f5c..d16edbdcde6e 100644
  		struct page	*spare_page; /* Used when checking P/Q in raid6 */
  		void		*scribble;  /* space for constructing buffer
  					     * lists and performing address
+diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
+index 774970bfcf85..6bc2c728adb7 100644
+--- a/drivers/mtd/mtdoops.c
++++ b/drivers/mtd/mtdoops.c
+@@ -267,7 +267,8 @@ static void find_next_position(struct mtdoops_context *cxt)
+ }
+ 
+ static void mtdoops_do_dump(struct kmsg_dumper *dumper,
+-			    enum kmsg_dump_reason reason)
++			    enum kmsg_dump_reason reason,
++			    struct kmsg_dumper_iter *iter)
+ {
+ 	struct mtdoops_context *cxt = container_of(dumper,
+ 			struct mtdoops_context, dump);
+@@ -276,7 +277,7 @@ static void mtdoops_do_dump(struct kmsg_dumper *dumper,
+ 	if (reason == KMSG_DUMP_OOPS && !dump_oops)
+ 		return;
+ 
+-	kmsg_dump_get_buffer(dumper, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE,
++	kmsg_dump_get_buffer(iter, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE,
+ 			     record_size - MTDOOPS_HEADER_SIZE, NULL);
+ 
+ 	if (reason != KMSG_DUMP_OOPS) {
+diff --git a/drivers/net/ethernet/chelsio/cxgb/common.h b/drivers/net/ethernet/chelsio/cxgb/common.h
+index 6475060649e9..0321be77366c 100644
+--- a/drivers/net/ethernet/chelsio/cxgb/common.h
++++ b/drivers/net/ethernet/chelsio/cxgb/common.h
+@@ -238,7 +238,6 @@ struct adapter {
+ 	int msg_enable;
+ 	u32 mmio_len;
+ 
+-	struct work_struct ext_intr_handler_task;
+ 	struct adapter_params params;
+ 
+ 	/* Terminator modules. */
+@@ -257,6 +256,7 @@ struct adapter {
+ 
+ 	/* guards async operations */
+ 	spinlock_t async_lock ____cacheline_aligned;
++	u32 pending_thread_intr;
+ 	u32 slow_intr_mask;
+ 	int t1powersave;
+ };
+@@ -334,8 +334,7 @@ void t1_interrupts_enable(adapter_t *adapter);
+ void t1_interrupts_disable(adapter_t *adapter);
+ void t1_interrupts_clear(adapter_t *adapter);
+ int t1_elmer0_ext_intr_handler(adapter_t *adapter);
+-void t1_elmer0_ext_intr(adapter_t *adapter);
+-int t1_slow_intr_handler(adapter_t *adapter);
++irqreturn_t t1_slow_intr_handler(adapter_t *adapter);
+ 
+ int t1_link_start(struct cphy *phy, struct cmac *mac, struct link_config *lc);
+ const struct board_info *t1_get_board_info(unsigned int board_id);
+@@ -347,7 +346,6 @@ int t1_get_board_rev(adapter_t *adapter, const struct board_info *bi,
+ int t1_init_hw_modules(adapter_t *adapter);
+ int t1_init_sw_modules(adapter_t *adapter, const struct board_info *bi);
+ void t1_free_sw_modules(adapter_t *adapter);
+-void t1_fatal_err(adapter_t *adapter);
+ void t1_link_changed(adapter_t *adapter, int port_id);
+ void t1_link_negotiated(adapter_t *adapter, int port_id, int link_stat,
+ 			    int speed, int duplex, int pause);
+diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+index 0e4a0f413960..512da98019c6 100644
+--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
++++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+@@ -211,9 +211,10 @@ static int cxgb_up(struct adapter *adapter)
+ 	t1_interrupts_clear(adapter);
+ 
+ 	adapter->params.has_msi = !disable_msi && !pci_enable_msi(adapter->pdev);
+-	err = request_irq(adapter->pdev->irq, t1_interrupt,
+-			  adapter->params.has_msi ? 0 : IRQF_SHARED,
+-			  adapter->name, adapter);
++	err = request_threaded_irq(adapter->pdev->irq, t1_interrupt,
++				   t1_interrupt_thread,
++				   adapter->params.has_msi ? 0 : IRQF_SHARED,
++				   adapter->name, adapter);
+ 	if (err) {
+ 		if (adapter->params.has_msi)
+ 			pci_disable_msi(adapter->pdev);
+@@ -916,51 +917,6 @@ static void mac_stats_task(struct work_struct *work)
+ 	spin_unlock(&adapter->work_lock);
+ }
+ 
+-/*
+- * Processes elmer0 external interrupts in process context.
+- */
+-static void ext_intr_task(struct work_struct *work)
+-{
+-	struct adapter *adapter =
+-		container_of(work, struct adapter, ext_intr_handler_task);
+-
+-	t1_elmer0_ext_intr_handler(adapter);
+-
+-	/* Now reenable external interrupts */
+-	spin_lock_irq(&adapter->async_lock);
+-	adapter->slow_intr_mask |= F_PL_INTR_EXT;
+-	writel(F_PL_INTR_EXT, adapter->regs + A_PL_CAUSE);
+-	writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
+-		   adapter->regs + A_PL_ENABLE);
+-	spin_unlock_irq(&adapter->async_lock);
+-}
+-
+-/*
+- * Interrupt-context handler for elmer0 external interrupts.
+- */
+-void t1_elmer0_ext_intr(struct adapter *adapter)
+-{
+-	/*
+-	 * Schedule a task to handle external interrupts as we require
+-	 * a process context.  We disable EXT interrupts in the interim
+-	 * and let the task reenable them when it's done.
+-	 */
+-	adapter->slow_intr_mask &= ~F_PL_INTR_EXT;
+-	writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
+-		   adapter->regs + A_PL_ENABLE);
+-	schedule_work(&adapter->ext_intr_handler_task);
+-}
+-
+-void t1_fatal_err(struct adapter *adapter)
+-{
+-	if (adapter->flags & FULL_INIT_DONE) {
+-		t1_sge_stop(adapter->sge);
+-		t1_interrupts_disable(adapter);
+-	}
+-	pr_alert("%s: encountered fatal error, operation suspended\n",
+-		 adapter->name);
+-}
+-
+ static const struct net_device_ops cxgb_netdev_ops = {
+ 	.ndo_open		= cxgb_open,
+ 	.ndo_stop		= cxgb_close,
+@@ -1062,8 +1018,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+ 			spin_lock_init(&adapter->async_lock);
+ 			spin_lock_init(&adapter->mac_lock);
+ 
+-			INIT_WORK(&adapter->ext_intr_handler_task,
+-				  ext_intr_task);
+ 			INIT_DELAYED_WORK(&adapter->stats_update_task,
+ 					  mac_stats_task);
+ 
+diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
+index 2d9c2b5a690a..cda01f22c71c 100644
+--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
++++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
+@@ -940,10 +940,11 @@ void t1_sge_intr_clear(struct sge *sge)
+ /*
+  * SGE 'Error' interrupt handler
+  */
+-int t1_sge_intr_error_handler(struct sge *sge)
++bool t1_sge_intr_error_handler(struct sge *sge)
+ {
+ 	struct adapter *adapter = sge->adapter;
+ 	u32 cause = readl(adapter->regs + A_SG_INT_CAUSE);
++	bool wake = false;
+ 
+ 	if (adapter->port[0].dev->hw_features & NETIF_F_TSO)
+ 		cause &= ~F_PACKET_TOO_BIG;
+@@ -967,11 +968,14 @@ int t1_sge_intr_error_handler(struct sge *sge)
+ 		sge->stats.pkt_mismatch++;
+ 		pr_alert("%s: SGE packet mismatch\n", adapter->name);
+ 	}
+-	if (cause & SGE_INT_FATAL)
+-		t1_fatal_err(adapter);
++	if (cause & SGE_INT_FATAL) {
++		t1_interrupts_disable(adapter);
++		adapter->pending_thread_intr |= F_PL_INTR_SGE_ERR;
++		wake = true;
++	}
+ 
+ 	writel(cause, adapter->regs + A_SG_INT_CAUSE);
+-	return 0;
++	return wake;
+ }
+ 
+ const struct sge_intr_counts *t1_sge_get_intr_counts(const struct sge *sge)
+@@ -1619,11 +1623,46 @@ int t1_poll(struct napi_struct *napi, int budget)
+ 	return work_done;
+ }
+ 
++irqreturn_t t1_interrupt_thread(int irq, void *data)
++{
++	struct adapter *adapter = data;
++	u32 pending_thread_intr;
++
++	spin_lock_irq(&adapter->async_lock);
++	pending_thread_intr = adapter->pending_thread_intr;
++	adapter->pending_thread_intr = 0;
++	spin_unlock_irq(&adapter->async_lock);
++
++	if (!pending_thread_intr)
++		return IRQ_NONE;
++
++	if (pending_thread_intr & F_PL_INTR_EXT)
++		t1_elmer0_ext_intr_handler(adapter);
++
++	/* This error is fatal, interrupts remain off */
++	if (pending_thread_intr & F_PL_INTR_SGE_ERR) {
++		pr_alert("%s: encountered fatal error, operation suspended\n",
++			 adapter->name);
++		t1_sge_stop(adapter->sge);
++		return IRQ_HANDLED;
++	}
++
++	spin_lock_irq(&adapter->async_lock);
++	adapter->slow_intr_mask |= F_PL_INTR_EXT;
++
++	writel(F_PL_INTR_EXT, adapter->regs + A_PL_CAUSE);
++	writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
++	       adapter->regs + A_PL_ENABLE);
++	spin_unlock_irq(&adapter->async_lock);
++
++	return IRQ_HANDLED;
++}
++
+ irqreturn_t t1_interrupt(int irq, void *data)
+ {
+ 	struct adapter *adapter = data;
+ 	struct sge *sge = adapter->sge;
+-	int handled;
++	irqreturn_t handled;
+ 
+ 	if (likely(responses_pending(adapter))) {
+ 		writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE);
+@@ -1645,10 +1684,10 @@ irqreturn_t t1_interrupt(int irq, void *data)
+ 	handled = t1_slow_intr_handler(adapter);
+ 	spin_unlock(&adapter->async_lock);
+ 
+-	if (!handled)
++	if (handled == IRQ_NONE)
+ 		sge->stats.unhandled_irqs++;
+ 
+-	return IRQ_RETVAL(handled != 0);
++	return handled;
+ }
+ 
+ /*
+diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.h b/drivers/net/ethernet/chelsio/cxgb/sge.h
+index a1ba591b3431..716705b96f26 100644
+--- a/drivers/net/ethernet/chelsio/cxgb/sge.h
++++ b/drivers/net/ethernet/chelsio/cxgb/sge.h
+@@ -74,6 +74,7 @@ struct sge *t1_sge_create(struct adapter *, struct sge_params *);
+ int t1_sge_configure(struct sge *, struct sge_params *);
+ int t1_sge_set_coalesce_params(struct sge *, struct sge_params *);
+ void t1_sge_destroy(struct sge *);
++irqreturn_t t1_interrupt_thread(int irq, void *data);
+ irqreturn_t t1_interrupt(int irq, void *cookie);
+ int t1_poll(struct napi_struct *, int);
+ 
+@@ -81,7 +82,7 @@ netdev_tx_t t1_start_xmit(struct sk_buff *skb, struct net_device *dev);
+ void t1_vlan_mode(struct adapter *adapter, netdev_features_t features);
+ void t1_sge_start(struct sge *);
+ void t1_sge_stop(struct sge *);
+-int t1_sge_intr_error_handler(struct sge *);
++bool t1_sge_intr_error_handler(struct sge *sge);
+ void t1_sge_intr_enable(struct sge *);
+ void t1_sge_intr_disable(struct sge *);
+ void t1_sge_intr_clear(struct sge *);
+diff --git a/drivers/net/ethernet/chelsio/cxgb/subr.c b/drivers/net/ethernet/chelsio/cxgb/subr.c
+index ea0f8741d7cf..310add28fcf5 100644
+--- a/drivers/net/ethernet/chelsio/cxgb/subr.c
++++ b/drivers/net/ethernet/chelsio/cxgb/subr.c
+@@ -170,7 +170,7 @@ void t1_link_changed(adapter_t *adapter, int port_id)
+ 	t1_link_negotiated(adapter, port_id, link_ok, speed, duplex, fc);
+ }
+ 
+-static int t1_pci_intr_handler(adapter_t *adapter)
++static bool t1_pci_intr_handler(adapter_t *adapter)
+ {
+ 	u32 pcix_cause;
+ 
+@@ -179,9 +179,13 @@ static int t1_pci_intr_handler(adapter_t *adapter)
+ 	if (pcix_cause) {
+ 		pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_CAUSE,
+ 				       pcix_cause);
+-		t1_fatal_err(adapter);    /* PCI errors are fatal */
++		/* PCI errors are fatal */
++		t1_interrupts_disable(adapter);
++		adapter->pending_thread_intr |= F_PL_INTR_SGE_ERR;
++		pr_alert("%s: PCI error encountered.\n", adapter->name);
++		return true;
+ 	}
+-	return 0;
++	return false;
+ }
+ 
+ #ifdef CONFIG_CHELSIO_T1_1G
+@@ -210,13 +214,16 @@ static int fpga_phy_intr_handler(adapter_t *adapter)
+ /*
+  * Slow path interrupt handler for FPGAs.
+  */
+-static int fpga_slow_intr(adapter_t *adapter)
++static irqreturn_t fpga_slow_intr(adapter_t *adapter)
+ {
+ 	u32 cause = readl(adapter->regs + A_PL_CAUSE);
++	irqreturn_t ret = IRQ_NONE;
+ 
+ 	cause &= ~F_PL_INTR_SGE_DATA;
+-	if (cause & F_PL_INTR_SGE_ERR)
+-		t1_sge_intr_error_handler(adapter->sge);
++	if (cause & F_PL_INTR_SGE_ERR) {
++		if (t1_sge_intr_error_handler(adapter->sge))
++			ret = IRQ_WAKE_THREAD;
++	}
+ 
+ 	if (cause & FPGA_PCIX_INTERRUPT_GMAC)
+ 		fpga_phy_intr_handler(adapter);
+@@ -231,14 +238,19 @@ static int fpga_slow_intr(adapter_t *adapter)
+ 		/* Clear TP interrupt */
+ 		writel(tp_cause, adapter->regs + FPGA_TP_ADDR_INTERRUPT_CAUSE);
+ 	}
+-	if (cause & FPGA_PCIX_INTERRUPT_PCIX)
+-		t1_pci_intr_handler(adapter);
++	if (cause & FPGA_PCIX_INTERRUPT_PCIX) {
++		if (t1_pci_intr_handler(adapter))
++			ret = IRQ_WAKE_THREAD;
++	}
+ 
+ 	/* Clear the interrupts just processed. */
+ 	if (cause)
+ 		writel(cause, adapter->regs + A_PL_CAUSE);
+ 
+-	return cause != 0;
++	if (ret != IRQ_NONE)
++		return ret;
++
++	return cause == 0 ? IRQ_NONE : IRQ_HANDLED;
+ }
+ #endif
+ 
+@@ -842,31 +854,45 @@ void t1_interrupts_clear(adapter_t* adapter)
+ /*
+  * Slow path interrupt handler for ASICs.
+  */
+-static int asic_slow_intr(adapter_t *adapter)
++static irqreturn_t asic_slow_intr(adapter_t *adapter)
+ {
+ 	u32 cause = readl(adapter->regs + A_PL_CAUSE);
++	irqreturn_t ret = IRQ_HANDLED;
+ 
+ 	cause &= adapter->slow_intr_mask;
+ 	if (!cause)
+-		return 0;
+-	if (cause & F_PL_INTR_SGE_ERR)
+-		t1_sge_intr_error_handler(adapter->sge);
++		return IRQ_NONE;
++	if (cause & F_PL_INTR_SGE_ERR) {
++		if (t1_sge_intr_error_handler(adapter->sge))
++			ret = IRQ_WAKE_THREAD;
++	}
+ 	if (cause & F_PL_INTR_TP)
+ 		t1_tp_intr_handler(adapter->tp);
+ 	if (cause & F_PL_INTR_ESPI)
+ 		t1_espi_intr_handler(adapter->espi);
+-	if (cause & F_PL_INTR_PCIX)
+-		t1_pci_intr_handler(adapter);
+-	if (cause & F_PL_INTR_EXT)
+-		t1_elmer0_ext_intr(adapter);
++	if (cause & F_PL_INTR_PCIX) {
++		if (t1_pci_intr_handler(adapter))
++			ret = IRQ_WAKE_THREAD;
++	}
++	if (cause & F_PL_INTR_EXT) {
++		/* Wake the threaded interrupt to handle external interrupts as
++		 * we require a process context. We disable EXT interrupts in
++		 * the interim and let the thread reenable them when it's done.
++		 */
++		adapter->pending_thread_intr |= F_PL_INTR_EXT;
++		adapter->slow_intr_mask &= ~F_PL_INTR_EXT;
++		writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
++		       adapter->regs + A_PL_ENABLE);
++		ret = IRQ_WAKE_THREAD;
++	}
+ 
+ 	/* Clear the interrupts just processed. */
+ 	writel(cause, adapter->regs + A_PL_CAUSE);
+ 	readl(adapter->regs + A_PL_CAUSE); /* flush writes */
+-	return 1;
++	return ret;
+ }
+ 
+-int t1_slow_intr_handler(adapter_t *adapter)
++irqreturn_t t1_slow_intr_handler(adapter_t *adapter)
+ {
+ #ifdef CONFIG_CHELSIO_T1_1G
+ 	if (!t1_is_asic(adapter))
+diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
+index e3a8858915b3..df0eab479d51 100644
+--- a/drivers/net/ethernet/dlink/sundance.c
++++ b/drivers/net/ethernet/dlink/sundance.c
+@@ -963,7 +963,7 @@ static void tx_timeout(struct net_device *dev, unsigned int txqueue)
+ 	unsigned long flag;
+ 
+ 	netif_stop_queue(dev);
+-	tasklet_disable(&np->tx_tasklet);
++	tasklet_disable_in_atomic(&np->tx_tasklet);
+ 	iowrite16(0, ioaddr + IntrEnable);
+ 	printk(KERN_WARNING "%s: Transmit timed out, TxStatus %2.2x "
+ 		   "TxFrameId %2.2x,"
+diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
+index e9efe074edc1..f1b9284e0bea 100644
+--- a/drivers/net/ethernet/jme.c
++++ b/drivers/net/ethernet/jme.c
+@@ -1265,9 +1265,9 @@ jme_stop_shutdown_timer(struct jme_adapter *jme)
+ 	jwrite32f(jme, JME_APMC, apmc);
+ }
+ 
+-static void jme_link_change_tasklet(struct tasklet_struct *t)
++static void jme_link_change_work(struct work_struct *work)
+ {
+-	struct jme_adapter *jme = from_tasklet(jme, t, linkch_task);
++	struct jme_adapter *jme = container_of(work, struct jme_adapter, linkch_task);
+ 	struct net_device *netdev = jme->dev;
+ 	int rc;
+ 
+@@ -1510,7 +1510,7 @@ jme_intr_msi(struct jme_adapter *jme, u32 intrstat)
+ 		 * all other events are ignored
+ 		 */
+ 		jwrite32(jme, JME_IEVE, intrstat);
+-		tasklet_schedule(&jme->linkch_task);
++		schedule_work(&jme->linkch_task);
+ 		goto out_reenable;
+ 	}
+ 
+@@ -1832,7 +1832,6 @@ jme_open(struct net_device *netdev)
+ 	jme_clear_pm_disable_wol(jme);
+ 	JME_NAPI_ENABLE(jme);
+ 
+-	tasklet_setup(&jme->linkch_task, jme_link_change_tasklet);
+ 	tasklet_setup(&jme->txclean_task, jme_tx_clean_tasklet);
+ 	tasklet_setup(&jme->rxclean_task, jme_rx_clean_tasklet);
+ 	tasklet_setup(&jme->rxempty_task, jme_rx_empty_tasklet);
+@@ -1920,7 +1919,7 @@ jme_close(struct net_device *netdev)
+ 
+ 	JME_NAPI_DISABLE(jme);
+ 
+-	tasklet_kill(&jme->linkch_task);
++	cancel_work_sync(&jme->linkch_task);
+ 	tasklet_kill(&jme->txclean_task);
+ 	tasklet_kill(&jme->rxclean_task);
+ 	tasklet_kill(&jme->rxempty_task);
+@@ -3035,6 +3034,7 @@ jme_init_one(struct pci_dev *pdev,
+ 	atomic_set(&jme->rx_empty, 1);
+ 
+ 	tasklet_setup(&jme->pcc_task, jme_pcc_tasklet);
++	INIT_WORK(&jme->linkch_task, jme_link_change_work);
+ 	jme->dpi.cur = PCC_P1;
+ 
+ 	jme->reg_ghc = 0;
+diff --git a/drivers/net/ethernet/jme.h b/drivers/net/ethernet/jme.h
+index a2c3b00d939d..2af76329b4a2 100644
+--- a/drivers/net/ethernet/jme.h
++++ b/drivers/net/ethernet/jme.h
+@@ -411,7 +411,7 @@ struct jme_adapter {
+ 	struct tasklet_struct	rxempty_task;
+ 	struct tasklet_struct	rxclean_task;
+ 	struct tasklet_struct	txclean_task;
+-	struct tasklet_struct	linkch_task;
++	struct work_struct	linkch_task;
+ 	struct tasklet_struct	pcc_task;
+ 	unsigned long		flags;
+ 	u32			reg_txcs;
+diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c
+index 71e2ada86793..72e2e71aac0e 100644
+--- a/drivers/net/wireless/ath/ath9k/beacon.c
++++ b/drivers/net/wireless/ath/ath9k/beacon.c
+@@ -251,7 +251,7 @@ void ath9k_beacon_ensure_primary_slot(struct ath_softc *sc)
+ 	int first_slot = ATH_BCBUF;
+ 	int slot;
+ 
+-	tasklet_disable(&sc->bcon_tasklet);
++	tasklet_disable_in_atomic(&sc->bcon_tasklet);
+ 
+ 	/* Find first taken slot. */
+ 	for (slot = 0; slot < ATH_BCBUF; slot++) {
+diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
+index 6db8d96a78eb..316e8b9b1338 100644
+--- a/drivers/pci/controller/pci-hyperv.c
++++ b/drivers/pci/controller/pci-hyperv.c
+@@ -1458,7 +1458,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+ 	 * Prevents hv_pci_onchannelcallback() from running concurrently
+ 	 * in the tasklet.
+ 	 */
+-	tasklet_disable(&channel->callback_event);
++	tasklet_disable_in_atomic(&channel->callback_event);
+ 
+ 	/*
+ 	 * Since this function is called with IRQ locks held, can't
 diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
-index 0f9274960dc6..dc97e4f1f4ad 100644
+index 03bf49adaafe..52e0a2486b43 100644
 --- a/drivers/scsi/fcoe/fcoe.c
 +++ b/drivers/scsi/fcoe/fcoe.c
 @@ -1452,11 +1452,11 @@ static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev,
@@ -3576,10 +3735,10 @@ index 5ea426effa60..0d6b9acc7cf8 100644
  	list_for_each_entry_safe(fcf, next, &del_list, list) {
  		/* Removes fcf from current list */
 diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
-index 96a2952cf626..1cb773a4a02e 100644
+index 841000445b9a..26d661ddc950 100644
 --- a/drivers/scsi/libfc/fc_exch.c
 +++ b/drivers/scsi/libfc/fc_exch.c
-@@ -826,10 +826,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport,
+@@ -825,10 +825,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport,
  	}
  	memset(ep, 0, sizeof(*ep));
  
@@ -3712,10 +3871,10 @@ index cae61d1ebec5..47dd23056271 100644
  	.device		= uart_console_device,
  	.setup		= univ8250_console_setup,
 diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c
-index 0d0c80905c58..f8116a8839d8 100644
+index fbcc90c31ca1..b33cb454ce03 100644
 --- a/drivers/tty/serial/8250/8250_fsl.c
 +++ b/drivers/tty/serial/8250/8250_fsl.c
-@@ -53,9 +53,18 @@ int fsl8250_handle_irq(struct uart_port *port)
+@@ -60,9 +60,18 @@ int fsl8250_handle_irq(struct uart_port *port)
  
  	/* Stop processing interrupts on input overrun */
  	if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) {
@@ -3735,7 +3894,7 @@ index 0d0c80905c58..f8116a8839d8 100644
  			port->ops->stop_rx(port);
  		} else {
 diff --git a/drivers/tty/serial/8250/8250_ingenic.c b/drivers/tty/serial/8250/8250_ingenic.c
-index dde766fa465f..f4cceca82748 100644
+index 988bf6bcce42..bcd26d672539 100644
 --- a/drivers/tty/serial/8250/8250_ingenic.c
 +++ b/drivers/tty/serial/8250/8250_ingenic.c
 @@ -146,6 +146,8 @@ OF_EARLYCON_DECLARE(x1000_uart, "ingenic,x1000-uart",
@@ -3761,7 +3920,7 @@ index dde766fa465f..f4cceca82748 100644
  		if (ier & UART_IER_MSI)
  			value |= UART_MCR_MDCE | UART_MCR_FCM;
 diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c
-index 7b0dec14c8b8..d323ff7051f6 100644
+index f7d3023f860f..8133713dcf5e 100644
 --- a/drivers/tty/serial/8250/8250_mtk.c
 +++ b/drivers/tty/serial/8250/8250_mtk.c
 @@ -213,12 +213,37 @@ static void mtk8250_shutdown(struct uart_port *port)
@@ -3805,7 +3964,7 @@ index 7b0dec14c8b8..d323ff7051f6 100644
  
  static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
 diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
-index c71d647eb87a..4325a15be573 100644
+index b0af13074cd3..b05f8c34b291 100644
 --- a/drivers/tty/serial/8250/8250_port.c
 +++ b/drivers/tty/serial/8250/8250_port.c
 @@ -757,7 +757,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
@@ -3887,7 +4046,7 @@ index c71d647eb87a..4325a15be573 100644
  	spin_unlock_irqrestore(&port->lock, flags);
  
  	synchronize_irq(port->irq);
-@@ -2768,7 +2761,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
+@@ -2771,7 +2764,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
  	if (up->capabilities & UART_CAP_RTOIE)
  		up->ier |= UART_IER_RTOIE;
  
@@ -3896,7 +4055,7 @@ index c71d647eb87a..4325a15be573 100644
  
  	if (up->capabilities & UART_CAP_EFR) {
  		unsigned char efr = 0;
-@@ -3234,7 +3227,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_defaults);
+@@ -3237,7 +3230,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_defaults);
  
  #ifdef CONFIG_SERIAL_8250_CONSOLE
  
@@ -3905,7 +4064,7 @@ index c71d647eb87a..4325a15be573 100644
  {
  	struct uart_8250_port *up = up_to_u8250p(port);
  
-@@ -3242,6 +3235,18 @@ static void serial8250_console_putchar(struct uart_port *port, int ch)
+@@ -3245,6 +3238,18 @@ static void serial8250_console_putchar(struct uart_port *port, int ch)
  	serial_port_out(port, UART_TX, ch);
  }
  
@@ -3924,7 +4083,7 @@ index c71d647eb87a..4325a15be573 100644
  /*
   *	Restore serial console when h/w power-off detected
   */
-@@ -3263,6 +3268,32 @@ static void serial8250_console_restore(struct uart_8250_port *up)
+@@ -3266,6 +3271,32 @@ static void serial8250_console_restore(struct uart_8250_port *up)
  	serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS);
  }
  
@@ -3957,7 +4116,7 @@ index c71d647eb87a..4325a15be573 100644
  /*
   *	Print a string to the serial port trying not to disturb
   *	any possible real use of the port...
-@@ -3279,24 +3310,12 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
+@@ -3282,24 +3313,12 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
  	struct uart_port *port = &up->port;
  	unsigned long flags;
  	unsigned int ier;
@@ -3984,7 +4143,7 @@ index c71d647eb87a..4325a15be573 100644
  
  	/* check scratch reg to see if port powered off during system sleep */
  	if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
-@@ -3310,7 +3329,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
+@@ -3313,7 +3332,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
  		mdelay(port->rs485.delay_rts_before_send);
  	}
  
@@ -3994,7 +4153,7 @@ index c71d647eb87a..4325a15be573 100644
  
  	/*
  	 *	Finally, wait for transmitter to become empty
-@@ -3323,8 +3344,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
+@@ -3326,8 +3347,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
  		if (em485->tx_stopped)
  			up->rs485_stop_tx(up);
  	}
@@ -4004,7 +4163,7 @@ index c71d647eb87a..4325a15be573 100644
  
  	/*
  	 *	The receive handling will happen properly because the
-@@ -3336,8 +3356,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
+@@ -3339,8 +3359,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
  	if (up->msr_saved_flags)
  		serial8250_modem_status(up);
  
@@ -4014,7 +4173,7 @@ index c71d647eb87a..4325a15be573 100644
  }
  
  static unsigned int probe_baud(struct uart_port *port)
-@@ -3357,6 +3376,7 @@ static unsigned int probe_baud(struct uart_port *port)
+@@ -3360,6 +3379,7 @@ static unsigned int probe_baud(struct uart_port *port)
  
  int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
  {
@@ -4022,7 +4181,7 @@ index c71d647eb87a..4325a15be573 100644
  	int baud = 9600;
  	int bits = 8;
  	int parity = 'n';
-@@ -3366,6 +3386,8 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
+@@ -3369,6 +3389,8 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
  	if (!port->iobase && !port->membase)
  		return -ENODEV;
  
@@ -4032,10 +4191,10 @@ index c71d647eb87a..4325a15be573 100644
  		uart_parse_options(options, &baud, &parity, &bits, &flow);
  	else if (probe)
 diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
-index 67498594d7d7..e5cf42bbabf3 100644
+index c255476cce28..8131baf3362c 100644
 --- a/drivers/tty/serial/amba-pl011.c
 +++ b/drivers/tty/serial/amba-pl011.c
-@@ -2198,18 +2198,24 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
+@@ -2201,18 +2201,24 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
  {
  	struct uart_amba_port *uap = amba_ports[co->index];
  	unsigned int old_cr = 0, new_cr;
@@ -4064,7 +4223,7 @@ index 67498594d7d7..e5cf42bbabf3 100644
  
  	/*
  	 *	First save the CR then disable the interrupts
-@@ -2235,8 +2241,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
+@@ -2238,8 +2244,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
  		pl011_write(old_cr, uap, REG_CR);
  
  	if (locked)
@@ -4119,10 +4278,10 @@ index 04f75a44f243..60cbce1995a5 100644
  	_enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode);
  
 diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
-index 6df0922e7e30..5b2371b26bf8 100644
+index 80bf4c6f4c7b..a975cfed14f5 100644
 --- a/fs/cifs/readdir.c
 +++ b/fs/cifs/readdir.c
-@@ -81,7 +81,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
+@@ -82,7 +82,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
  	struct inode *inode;
  	struct super_block *sb = parent->d_sb;
  	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -4132,10 +4291,10 @@ index 6df0922e7e30..5b2371b26bf8 100644
  	cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
  
 diff --git a/fs/dcache.c b/fs/dcache.c
-index ea0485861d93..26a187abf13a 100644
+index d429c984133c..28474a11c8f3 100644
 --- a/fs/dcache.c
 +++ b/fs/dcache.c
-@@ -2503,9 +2503,10 @@ EXPORT_SYMBOL(d_rehash);
+@@ -2511,9 +2511,10 @@ EXPORT_SYMBOL(d_rehash);
  static inline unsigned start_dir_add(struct inode *dir)
  {
  
@@ -4148,7 +4307,7 @@ index ea0485861d93..26a187abf13a 100644
  			return n;
  		cpu_relax();
  	}
-@@ -2513,26 +2514,30 @@ static inline unsigned start_dir_add(struct inode *dir)
+@@ -2521,26 +2522,30 @@ static inline unsigned start_dir_add(struct inode *dir)
  
  static inline void end_dir_add(struct inode *dir, unsigned n)
  {
@@ -4191,7 +4350,7 @@ index ea0485861d93..26a187abf13a 100644
  {
  	unsigned int hash = name->hash;
  	struct hlist_bl_head *b = in_lookup_hash(parent, hash);
-@@ -2546,7 +2551,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
+@@ -2554,7 +2559,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
  
  retry:
  	rcu_read_lock();
@@ -4200,7 +4359,7 @@ index ea0485861d93..26a187abf13a 100644
  	r_seq = read_seqbegin(&rename_lock);
  	dentry = __d_lookup_rcu(parent, name, &d_seq);
  	if (unlikely(dentry)) {
-@@ -2574,7 +2579,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
+@@ -2582,7 +2587,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
  	}
  
  	hlist_bl_lock(b);
@@ -4209,7 +4368,7 @@ index ea0485861d93..26a187abf13a 100644
  		hlist_bl_unlock(b);
  		rcu_read_unlock();
  		goto retry;
-@@ -2647,7 +2652,7 @@ void __d_lookup_done(struct dentry *dentry)
+@@ -2655,7 +2660,7 @@ void __d_lookup_done(struct dentry *dentry)
  	hlist_bl_lock(b);
  	dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
  	__hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
@@ -4218,39 +4377,8 @@ index ea0485861d93..26a187abf13a 100644
  	dentry->d_wait = NULL;
  	hlist_bl_unlock(b);
  	INIT_HLIST_NODE(&dentry->d_u.d_alias);
-diff --git a/fs/exec.c b/fs/exec.c
-index a91003e28eaa..d4fb18baf1fb 100644
---- a/fs/exec.c
-+++ b/fs/exec.c
-@@ -1130,11 +1130,24 @@ static int exec_mmap(struct mm_struct *mm)
- 	}
- 
- 	task_lock(tsk);
--	active_mm = tsk->active_mm;
- 	membarrier_exec_mmap(mm);
--	tsk->mm = mm;
-+
-+	local_irq_disable();
-+	active_mm = tsk->active_mm;
- 	tsk->active_mm = mm;
-+	tsk->mm = mm;
-+	/*
-+	 * This prevents preemption while active_mm is being loaded and
-+	 * it and mm are being updated, which could cause problems for
-+	 * lazy tlb mm refcounting when these are updated by context
-+	 * switches. Not all architectures can handle irqs off over
-+	 * activate_mm yet.
-+	 */
-+	if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
-+		local_irq_enable();
- 	activate_mm(active_mm, mm);
-+	if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
-+		local_irq_enable();
- 	tsk->mm->vmacache_seqnum = 0;
- 	vmacache_flush(tsk);
- 	task_unlock(tsk);
 diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
-index 90e3f01bd796..ec8fd82117f4 100644
+index 3441ffa740f3..2fcae5cfd272 100644
 --- a/fs/fuse/readdir.c
 +++ b/fs/fuse/readdir.c
 @@ -158,7 +158,7 @@ static int fuse_direntplus_link(struct file *file,
@@ -4263,11 +4391,11 @@ index 90e3f01bd796..ec8fd82117f4 100644
  	if (!o->nodeid) {
  		/*
 diff --git a/fs/inode.c b/fs/inode.c
-index 72c4c347afb7..1b27d29265cf 100644
+index 497326faa124..c93acfaa84f8 100644
 --- a/fs/inode.c
 +++ b/fs/inode.c
-@@ -158,7 +158,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
- 	inode->i_bdev = NULL;
+@@ -157,7 +157,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
+ 	inode->i_pipe = NULL;
  	inode->i_cdev = NULL;
  	inode->i_link = NULL;
 -	inode->i_dir_seq = 0;
@@ -4275,214 +4403,8 @@ index 72c4c347afb7..1b27d29265cf 100644
  	inode->i_rdev = 0;
  	inode->dirtied_when = 0;
  
-diff --git a/fs/io-wq.c b/fs/io-wq.c
-index 414beb543883..f1cf739d63c2 100644
---- a/fs/io-wq.c
-+++ b/fs/io-wq.c
-@@ -87,7 +87,7 @@ enum {
-  */
- struct io_wqe {
- 	struct {
--		spinlock_t lock;
-+		raw_spinlock_t lock;
- 		struct io_wq_work_list work_list;
- 		unsigned long hash_map;
- 		unsigned flags;
-@@ -148,7 +148,7 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
- 
- 	if (current->files != worker->restore_files) {
- 		__acquire(&wqe->lock);
--		spin_unlock_irq(&wqe->lock);
-+		raw_spin_unlock_irq(&wqe->lock);
- 		dropped_lock = true;
- 
- 		task_lock(current);
-@@ -166,7 +166,7 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
- 	if (worker->mm) {
- 		if (!dropped_lock) {
- 			__acquire(&wqe->lock);
--			spin_unlock_irq(&wqe->lock);
-+			raw_spin_unlock_irq(&wqe->lock);
- 			dropped_lock = true;
- 		}
- 		__set_current_state(TASK_RUNNING);
-@@ -220,17 +220,17 @@ static void io_worker_exit(struct io_worker *worker)
- 	worker->flags = 0;
- 	preempt_enable();
- 
--	spin_lock_irq(&wqe->lock);
-+	raw_spin_lock_irq(&wqe->lock);
- 	hlist_nulls_del_rcu(&worker->nulls_node);
- 	list_del_rcu(&worker->all_list);
- 	if (__io_worker_unuse(wqe, worker)) {
- 		__release(&wqe->lock);
--		spin_lock_irq(&wqe->lock);
-+		raw_spin_lock_irq(&wqe->lock);
- 	}
- 	acct->nr_workers--;
- 	nr_workers = wqe->acct[IO_WQ_ACCT_BOUND].nr_workers +
- 			wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers;
--	spin_unlock_irq(&wqe->lock);
-+	raw_spin_unlock_irq(&wqe->lock);
- 
- 	/* all workers gone, wq exit can proceed */
- 	if (!nr_workers && refcount_dec_and_test(&wqe->wq->refs))
-@@ -504,7 +504,7 @@ static void io_worker_handle_work(struct io_worker *worker)
- 		else if (!wq_list_empty(&wqe->work_list))
- 			wqe->flags |= IO_WQE_FLAG_STALLED;
- 
--		spin_unlock_irq(&wqe->lock);
-+		raw_spin_unlock_irq(&wqe->lock);
- 		if (!work)
- 			break;
- 		io_assign_current_work(worker, work);
-@@ -538,17 +538,17 @@ static void io_worker_handle_work(struct io_worker *worker)
- 				io_wqe_enqueue(wqe, linked);
- 
- 			if (hash != -1U && !next_hashed) {
--				spin_lock_irq(&wqe->lock);
-+				raw_spin_lock_irq(&wqe->lock);
- 				wqe->hash_map &= ~BIT_ULL(hash);
- 				wqe->flags &= ~IO_WQE_FLAG_STALLED;
- 				/* skip unnecessary unlock-lock wqe->lock */
- 				if (!work)
- 					goto get_next;
--				spin_unlock_irq(&wqe->lock);
-+				raw_spin_unlock_irq(&wqe->lock);
- 			}
- 		} while (work);
- 
--		spin_lock_irq(&wqe->lock);
-+		raw_spin_lock_irq(&wqe->lock);
- 	} while (1);
- }
- 
-@@ -563,7 +563,7 @@ static int io_wqe_worker(void *data)
- 	while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
- 		set_current_state(TASK_INTERRUPTIBLE);
- loop:
--		spin_lock_irq(&wqe->lock);
-+		raw_spin_lock_irq(&wqe->lock);
- 		if (io_wqe_run_queue(wqe)) {
- 			__set_current_state(TASK_RUNNING);
- 			io_worker_handle_work(worker);
-@@ -574,7 +574,7 @@ static int io_wqe_worker(void *data)
- 			__release(&wqe->lock);
- 			goto loop;
- 		}
--		spin_unlock_irq(&wqe->lock);
-+		raw_spin_unlock_irq(&wqe->lock);
- 		if (signal_pending(current))
- 			flush_signals(current);
- 		if (schedule_timeout(WORKER_IDLE_TIMEOUT))
-@@ -586,11 +586,11 @@ static int io_wqe_worker(void *data)
- 	}
- 
- 	if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
--		spin_lock_irq(&wqe->lock);
-+		raw_spin_lock_irq(&wqe->lock);
- 		if (!wq_list_empty(&wqe->work_list))
- 			io_worker_handle_work(worker);
- 		else
--			spin_unlock_irq(&wqe->lock);
-+			raw_spin_unlock_irq(&wqe->lock);
- 	}
- 
- 	io_worker_exit(worker);
-@@ -630,9 +630,9 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
- 
- 	worker->flags &= ~IO_WORKER_F_RUNNING;
- 
--	spin_lock_irq(&wqe->lock);
-+	raw_spin_lock_irq(&wqe->lock);
- 	io_wqe_dec_running(wqe, worker);
--	spin_unlock_irq(&wqe->lock);
-+	raw_spin_unlock_irq(&wqe->lock);
- }
- 
- static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
-@@ -656,7 +656,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
- 		return false;
- 	}
- 
--	spin_lock_irq(&wqe->lock);
-+	raw_spin_lock_irq(&wqe->lock);
- 	hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
- 	list_add_tail_rcu(&worker->all_list, &wqe->all_list);
- 	worker->flags |= IO_WORKER_F_FREE;
-@@ -665,7 +665,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
- 	if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
- 		worker->flags |= IO_WORKER_F_FIXED;
- 	acct->nr_workers++;
--	spin_unlock_irq(&wqe->lock);
-+	raw_spin_unlock_irq(&wqe->lock);
- 
- 	if (index == IO_WQ_ACCT_UNBOUND)
- 		atomic_inc(&wq->user->processes);
-@@ -720,12 +720,12 @@ static int io_wq_manager(void *data)
- 			if (!node_online(node))
- 				continue;
- 
--			spin_lock_irq(&wqe->lock);
-+			raw_spin_lock_irq(&wqe->lock);
- 			if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
- 				fork_worker[IO_WQ_ACCT_BOUND] = true;
- 			if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND))
- 				fork_worker[IO_WQ_ACCT_UNBOUND] = true;
--			spin_unlock_irq(&wqe->lock);
-+			raw_spin_unlock_irq(&wqe->lock);
- 			if (fork_worker[IO_WQ_ACCT_BOUND])
- 				create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND);
- 			if (fork_worker[IO_WQ_ACCT_UNBOUND])
-@@ -821,10 +821,10 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
- 	}
- 
- 	work_flags = work->flags;
--	spin_lock_irqsave(&wqe->lock, flags);
-+	raw_spin_lock_irqsave(&wqe->lock, flags);
- 	io_wqe_insert_work(wqe, work);
- 	wqe->flags &= ~IO_WQE_FLAG_STALLED;
--	spin_unlock_irqrestore(&wqe->lock, flags);
-+	raw_spin_unlock_irqrestore(&wqe->lock, flags);
- 
- 	if ((work_flags & IO_WQ_WORK_CONCURRENT) ||
- 	    !atomic_read(&acct->nr_running))
-@@ -951,13 +951,13 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
- 	unsigned long flags;
- 
- retry:
--	spin_lock_irqsave(&wqe->lock, flags);
-+	raw_spin_lock_irqsave(&wqe->lock, flags);
- 	wq_list_for_each(node, prev, &wqe->work_list) {
- 		work = container_of(node, struct io_wq_work, list);
- 		if (!match->fn(work, match->data))
- 			continue;
- 		io_wqe_remove_pending(wqe, work, prev);
--		spin_unlock_irqrestore(&wqe->lock, flags);
-+		raw_spin_unlock_irqrestore(&wqe->lock, flags);
- 		io_run_cancel(work, wqe);
- 		match->nr_pending++;
- 		if (!match->cancel_all)
-@@ -966,7 +966,7 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
- 		/* not safe to continue after unlock */
- 		goto retry;
- 	}
--	spin_unlock_irqrestore(&wqe->lock, flags);
-+	raw_spin_unlock_irqrestore(&wqe->lock, flags);
- }
- 
- static void io_wqe_cancel_running_work(struct io_wqe *wqe,
-@@ -1074,7 +1074,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
- 		}
- 		atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
- 		wqe->wq = wq;
--		spin_lock_init(&wqe->lock);
-+		raw_spin_lock_init(&wqe->lock);
- 		INIT_WQ_LIST(&wqe->work_list);
- 		INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
- 		INIT_LIST_HEAD(&wqe->all_list);
 diff --git a/fs/namei.c b/fs/namei.c
-index e99e2a9da0f7..8b7cbca327de 100644
+index 78443a85480a..ad687cecc61c 100644
 --- a/fs/namei.c
 +++ b/fs/namei.c
 @@ -1520,7 +1520,7 @@ static struct dentry *__lookup_slow(const struct qstr *name,
@@ -4494,7 +4416,7 @@ index e99e2a9da0f7..8b7cbca327de 100644
  
  	/* Don't go there if it's already dead */
  	if (unlikely(IS_DEADDIR(inode)))
-@@ -3018,7 +3018,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
+@@ -3021,7 +3021,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
  	struct dentry *dentry;
  	int error, create_error = 0;
  	umode_t mode = op->mode;
@@ -4504,14 +4426,14 @@ index e99e2a9da0f7..8b7cbca327de 100644
  	if (unlikely(IS_DEADDIR(dir_inode)))
  		return ERR_PTR(-ENOENT);
 diff --git a/fs/namespace.c b/fs/namespace.c
-index bae0e95b3713..c7e53f6ba7df 100644
+index eed3453ec40a..c073fa5754fc 100644
 --- a/fs/namespace.c
 +++ b/fs/namespace.c
 @@ -14,6 +14,7 @@
  #include <linux/mnt_namespace.h>
  #include <linux/user_namespace.h>
  #include <linux/namei.h>
-+#include <linux/delay.h>
++#include <linux/hrtimer.h>
  #include <linux/security.h>
  #include <linux/cred.h>
  #include <linux/idr.h>
@@ -4530,10 +4452,10 @@ index bae0e95b3713..c7e53f6ba7df 100644
  	 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
  	 * be set to match its requirements. So we must not load that until
 diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
-index cb52db9a0cfb..b5703ca9492c 100644
+index ef827ae193d2..87484e36ac50 100644
 --- a/fs/nfs/dir.c
 +++ b/fs/nfs/dir.c
-@@ -484,7 +484,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
+@@ -635,7 +635,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
  		unsigned long dir_verifier)
  {
  	struct qstr filename = QSTR_INIT(entry->name, entry->len);
@@ -4542,7 +4464,7 @@ index cb52db9a0cfb..b5703ca9492c 100644
  	struct dentry *dentry;
  	struct dentry *alias;
  	struct inode *inode;
-@@ -1665,7 +1665,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
+@@ -1859,7 +1859,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
  		    struct file *file, unsigned open_flags,
  		    umode_t mode)
  {
@@ -4573,24 +4495,8 @@ index b27ebdccef70..f86c98a7ed04 100644
  
  	status = -EBUSY;
  	spin_lock(&dentry->d_lock);
-diff --git a/fs/proc/array.c b/fs/proc/array.c
-index 65ec2029fa80..7052441be967 100644
---- a/fs/proc/array.c
-+++ b/fs/proc/array.c
-@@ -382,9 +382,9 @@ static inline void task_context_switch_counts(struct seq_file *m,
- static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
- {
- 	seq_printf(m, "Cpus_allowed:\t%*pb\n",
--		   cpumask_pr_args(task->cpus_ptr));
-+		   cpumask_pr_args(&task->cpus_mask));
- 	seq_printf(m, "Cpus_allowed_list:\t%*pbl\n",
--		   cpumask_pr_args(task->cpus_ptr));
-+		   cpumask_pr_args(&task->cpus_mask));
- }
- 
- static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
 diff --git a/fs/proc/base.c b/fs/proc/base.c
-index 617db4e0faa0..9e4520113c02 100644
+index bda8e8ece720..c8698a7de321 100644
 --- a/fs/proc/base.c
 +++ b/fs/proc/base.c
 @@ -96,6 +96,7 @@
@@ -4601,7 +4507,7 @@ index 617db4e0faa0..9e4520113c02 100644
  #include <trace/events/oom.h>
  #include "internal.h"
  #include "fd.h"
-@@ -2033,7 +2034,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
+@@ -2038,7 +2039,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
  
  	child = d_hash_and_lookup(dir, &qname);
  	if (!child) {
@@ -4611,10 +4517,10 @@ index 617db4e0faa0..9e4520113c02 100644
  		if (IS_ERR(child))
  			goto end_instantiate;
 diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
-index 6c1166ccdaea..dab5f1749ef1 100644
+index d2018f70d1fa..59d715b66468 100644
 --- a/fs/proc/proc_sysctl.c
 +++ b/fs/proc/proc_sysctl.c
-@@ -685,7 +685,7 @@ static bool proc_sys_fill_cache(struct file *file,
+@@ -683,7 +683,7 @@ static bool proc_sys_fill_cache(struct file *file,
  
  	child = d_lookup(dir, &qname);
  	if (!child) {
@@ -4623,6 +4529,29 @@ index 6c1166ccdaea..dab5f1749ef1 100644
  		child = d_alloc_parallel(dir, &qname, &wq);
  		if (IS_ERR(child))
  			return false;
+diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
+index 32f64abc277c..7906d0a2fb8a 100644
+--- a/fs/pstore/platform.c
++++ b/fs/pstore/platform.c
+@@ -383,7 +383,8 @@ void pstore_record_init(struct pstore_record *record,
+  * end of the buffer.
+  */
+ static void pstore_dump(struct kmsg_dumper *dumper,
+-			enum kmsg_dump_reason reason)
++			enum kmsg_dump_reason reason,
++			struct kmsg_dumper_iter *iter)
+ {
+ 	unsigned long	total = 0;
+ 	const char	*why;
+@@ -435,7 +436,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
+ 		dst_size -= header_size;
+ 
+ 		/* Write dump contents. */
+-		if (!kmsg_dump_get_buffer(dumper, true, dst + header_size,
++		if (!kmsg_dump_get_buffer(iter, true, dst + header_size,
+ 					  dst_size, &dump_size))
+ 			break;
+ 
 diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
 index d683f5e6d791..71c1535db56a 100644
 --- a/include/asm-generic/preempt.h
@@ -4637,6 +4566,19 @@ index d683f5e6d791..71c1535db56a 100644
  extern asmlinkage void preempt_schedule(void);
  #define __preempt_schedule() preempt_schedule()
  extern asmlinkage void preempt_schedule_notrace(void);
+diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
+index f94ee3089e01..89a444c5a583 100644
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -153,7 +153,7 @@ struct request {
+ 	 */
+ 	union {
+ 		struct hlist_node hash;	/* merge hash */
+-		struct list_head ipi_list;
++		struct llist_node ipi_list;
+ 	};
+ 
+ 	/*
 diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h
 index a19519f4241d..eed86eb0a1de 100644
 --- a/include/linux/bottom_half.h
@@ -4662,10 +4604,18 @@ index a19519f4241d..eed86eb0a1de 100644
 +
  #endif /* _LINUX_BH_H */
 diff --git a/include/linux/console.h b/include/linux/console.h
-index 0670d3491e0e..00d7437a92e1 100644
+index 20874db50bc8..69bfff368294 100644
 --- a/include/linux/console.h
 +++ b/include/linux/console.h
-@@ -137,10 +137,12 @@ static inline int con_debug_leave(void)
+@@ -16,6 +16,7 @@
+ 
+ #include <linux/atomic.h>
+ #include <linux/types.h>
++#include <linux/printk.h>
+ 
+ struct vc_data;
+ struct console_font_op;
+@@ -136,10 +137,12 @@ static inline int con_debug_leave(void)
  #define CON_ANYTIME	(16) /* Safe to call when cpu is offline */
  #define CON_BRL		(32) /* Used for a braille device */
  #define CON_EXTENDED	(64) /* Use the extended output format a la /dev/kmsg */
@@ -4674,20 +4624,23 @@ index 0670d3491e0e..00d7437a92e1 100644
  struct console {
  	char	name[16];
  	void	(*write)(struct console *, const char *, unsigned);
-+	void	(*write_atomic)(struct console *, const char *, unsigned);
++	void	(*write_atomic)(struct console *co, const char *s, unsigned int count);
  	int	(*read)(struct console *, char *, unsigned);
  	struct tty_driver *(*device)(struct console *, int *);
  	void	(*unblank)(void);
-@@ -150,6 +152,8 @@ struct console {
+@@ -149,6 +152,11 @@ struct console {
  	short	flags;
  	short	index;
  	int	cflag;
++#ifdef CONFIG_PRINTK
++	char	sync_buf[CONSOLE_LOG_MAX];
++#endif
 +	atomic64_t printk_seq;
 +	struct task_struct *thread;
  	void	*data;
  	struct	 console *next;
  };
-@@ -230,4 +234,7 @@ extern void console_init(void);
+@@ -229,4 +237,7 @@ extern void console_init(void);
  void dummycon_register_output_notifier(struct notifier_block *nb);
  void dummycon_unregister_output_notifier(struct notifier_block *nb);
  
@@ -4695,61 +4648,11 @@ index 0670d3491e0e..00d7437a92e1 100644
 +extern void console_atomic_unlock(unsigned int flags);
 +
  #endif /* _LINUX_CONSOLE_H */
-diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
-index bf9181cef444..26b29e78dbc6 100644
---- a/include/linux/cpuhotplug.h
-+++ b/include/linux/cpuhotplug.h
-@@ -151,6 +151,7 @@ enum cpuhp_state {
- 	CPUHP_AP_ONLINE,
- 	CPUHP_TEARDOWN_CPU,
- 	CPUHP_AP_ONLINE_IDLE,
-+	CPUHP_AP_SCHED_WAIT_EMPTY,
- 	CPUHP_AP_SMPBOOT_THREADS,
- 	CPUHP_AP_X86_VDSO_VMA_ONLINE,
- 	CPUHP_AP_IRQ_AFFINITY_ONLINE,
-diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
-index f0d895d6ac39..383684e30f12 100644
---- a/include/linux/cpumask.h
-+++ b/include/linux/cpumask.h
-@@ -199,6 +199,11 @@ static inline int cpumask_any_and_distribute(const struct cpumask *src1p,
- 	return cpumask_next_and(-1, src1p, src2p);
- }
- 
-+static inline int cpumask_any_distribute(const struct cpumask *srcp)
-+{
-+	return cpumask_first(srcp);
-+}
-+
- #define for_each_cpu(cpu, mask)			\
- 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
- #define for_each_cpu_not(cpu, mask)		\
-@@ -252,6 +257,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
- unsigned int cpumask_local_spread(unsigned int i, int node);
- int cpumask_any_and_distribute(const struct cpumask *src1p,
- 			       const struct cpumask *src2p);
-+int cpumask_any_distribute(const struct cpumask *srcp);
- 
- /**
-  * for_each_cpu - iterate over every cpu in a mask
-diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
-index 6594dbc34a37..206bde8308b2 100644
---- a/include/linux/crash_core.h
-+++ b/include/linux/crash_core.h
-@@ -55,6 +55,9 @@ phys_addr_t paddr_vmcoreinfo_note(void);
- #define VMCOREINFO_OFFSET(name, field) \
- 	vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
- 			      (unsigned long)offsetof(struct name, field))
-+#define VMCOREINFO_TYPE_OFFSET(name, field) \
-+	vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
-+			      (unsigned long)offsetof(name, field))
- #define VMCOREINFO_LENGTH(name, value) \
- 	vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value)
- #define VMCOREINFO_NUMBER(name) \
 diff --git a/include/linux/dcache.h b/include/linux/dcache.h
-index 65d975bf9390..1e23dd02ac4e 100644
+index d7b369fc15d3..d7d8d9a69ecf 100644
 --- a/include/linux/dcache.h
 +++ b/include/linux/dcache.h
-@@ -106,7 +106,7 @@ struct dentry {
+@@ -107,7 +107,7 @@ struct dentry {
  
  	union {
  		struct list_head d_lru;		/* LRU list */
@@ -4758,7 +4661,7 @@ index 65d975bf9390..1e23dd02ac4e 100644
  	};
  	struct list_head d_child;	/* child of parent list */
  	struct list_head d_subdirs;	/* our children */
-@@ -238,7 +238,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op
+@@ -239,7 +239,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op
  extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
  extern struct dentry * d_alloc_anon(struct super_block *);
  extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
@@ -4768,73 +4671,38 @@ index 65d975bf9390..1e23dd02ac4e 100644
  extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
  extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
 diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
-index e7e45f0cc7da..5a9e3e3769ce 100644
+index 2915f56ad421..5a9e3e3769ce 100644
 --- a/include/linux/debug_locks.h
 +++ b/include/linux/debug_locks.h
-@@ -2,9 +2,8 @@
- #ifndef __LINUX_DEBUG_LOCKING_H
+@@ -3,8 +3,7 @@
  #define __LINUX_DEBUG_LOCKING_H
  
--#include <linux/kernel.h>
  #include <linux/atomic.h>
 -#include <linux/bug.h>
+-#include <linux/printk.h>
 +#include <linux/cache.h>
  
  struct task_struct;
  
-diff --git a/include/linux/delay.h b/include/linux/delay.h
-index 1d0e2ce6b6d9..02b37178b54f 100644
---- a/include/linux/delay.h
-+++ b/include/linux/delay.h
-@@ -76,4 +76,10 @@ static inline void fsleep(unsigned long usecs)
- 		msleep(DIV_ROUND_UP(usecs, 1000));
- }
- 
-+#ifdef CONFIG_PREEMPT_RT
-+extern void cpu_chill(void);
-+#else
-+# define cpu_chill()	cpu_relax()
-+#endif
-+
- #endif /* defined(_LINUX_DELAY_H) */
-diff --git a/include/linux/dev_printk.h b/include/linux/dev_printk.h
-index 3028b644b4fb..6f009559ee54 100644
---- a/include/linux/dev_printk.h
-+++ b/include/linux/dev_printk.h
-@@ -21,6 +21,14 @@
- 
- struct device;
- 
-+#define PRINTK_INFO_SUBSYSTEM_LEN	16
-+#define PRINTK_INFO_DEVICE_LEN		48
-+
-+struct dev_printk_info {
-+	char subsystem[PRINTK_INFO_SUBSYSTEM_LEN];
-+	char device[PRINTK_INFO_DEVICE_LEN];
-+};
-+
- #ifdef CONFIG_PRINTK
- 
- __printf(3, 0) __cold
 diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
-index 159c7476b11b..7f5b22e7c354 100644
+index a104b298019a..1176ed623406 100644
 --- a/include/linux/entry-common.h
 +++ b/include/linux/entry-common.h
-@@ -69,7 +69,7 @@
+@@ -58,7 +58,7 @@
  
  #define EXIT_TO_USER_MODE_WORK						\
  	(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE |		\
--	 _TIF_NEED_RESCHED | _TIF_PATCH_PENDING |			\
-+	 _TIF_NEED_RESCHED_MASK | _TIF_PATCH_PENDING |			\
+-	 _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |	\
++	 _TIF_NEED_RESCHED_MASK | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |	\
  	 ARCH_EXIT_TO_USER_MODE_WORK)
  
  /**
 diff --git a/include/linux/fs.h b/include/linux/fs.h
-index 7519ae003a08..946b42e7c72d 100644
+index 91f3fbe5b57f..6b497344616b 100644
 --- a/include/linux/fs.h
 +++ b/include/linux/fs.h
-@@ -704,7 +704,7 @@ struct inode {
- 		struct block_device	*i_bdev;
+@@ -698,7 +698,7 @@ struct inode {
+ 		struct pipe_inode_info	*i_pipe;
  		struct cdev		*i_cdev;
  		char			*i_link;
 -		unsigned		i_dir_seq;
@@ -4843,17 +4711,17 @@ index 7519ae003a08..946b42e7c72d 100644
  
  	__u32			i_generation;
 diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
-index 754f67ac4326..41408d5e4014 100644
+index 7c9d6a2d7e90..76878b357ffa 100644
 --- a/include/linux/hardirq.h
 +++ b/include/linux/hardirq.h
-@@ -8,6 +8,7 @@
+@@ -6,6 +6,7 @@
+ #include <linux/preempt.h>
+ #include <linux/lockdep.h>
  #include <linux/ftrace_irq.h>
++#include <linux/sched.h>
  #include <linux/vtime.h>
  #include <asm/hardirq.h>
-+#include <linux/sched.h>
  
- extern void synchronize_irq(unsigned int irq);
- extern bool synchronize_hardirq(unsigned int irq);
 @@ -115,7 +116,6 @@ extern void rcu_nmi_exit(void);
  	do {							\
  		lockdep_off();					\
@@ -4870,106 +4738,92 @@ index 754f67ac4326..41408d5e4014 100644
  		arch_nmi_exit();				\
  		lockdep_on();					\
  	} while (0)
-diff --git a/include/linux/highmem.h b/include/linux/highmem.h
-index 14e6202ce47f..aa41a6b3d482 100644
---- a/include/linux/highmem.h
-+++ b/include/linux/highmem.h
-@@ -8,6 +8,7 @@
- #include <linux/mm.h>
- #include <linux/uaccess.h>
- #include <linux/hardirq.h>
-+#include <linux/sched.h>
- 
- #include <asm/cacheflush.h>
+diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
+index 1bbe96dc8be6..d8b7b42b13d4 100644
+--- a/include/linux/highmem-internal.h
++++ b/include/linux/highmem-internal.h
+@@ -90,7 +90,11 @@ static inline void __kunmap_local(void *vaddr)
  
-@@ -83,7 +84,7 @@ static inline void kunmap(struct page *page)
-  */
  static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
  {
 -	preempt_disable();
-+	migrate_disable();
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		migrate_disable();
++	else
++		preempt_disable();
++
  	pagefault_disable();
- 	if (!PageHighMem(page))
- 		return page_address(page);
-@@ -153,7 +154,7 @@ static inline void kunmap(struct page *page)
+ 	return __kmap_local_page_prot(page, prot);
+ }
+@@ -102,7 +106,11 @@ static inline void *kmap_atomic(struct page *page)
  
- static inline void *kmap_atomic(struct page *page)
+ static inline void *kmap_atomic_pfn(unsigned long pfn)
  {
 -	preempt_disable();
-+	migrate_disable();
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		migrate_disable();
++	else
++		preempt_disable();
++
  	pagefault_disable();
- 	return page_address(page);
+ 	return __kmap_local_pfn_prot(pfn, kmap_prot);
  }
-@@ -178,32 +179,51 @@ static inline void kunmap_atomic_high(void *addr)
- 
- #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
- 
-+#ifndef CONFIG_PREEMPT_RT
- DECLARE_PER_CPU(int, __kmap_atomic_idx);
-+#endif
- 
- static inline int kmap_atomic_idx_push(void)
+@@ -111,7 +119,10 @@ static inline void __kunmap_atomic(void *addr)
  {
-+#ifndef CONFIG_PREEMPT_RT
- 	int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1;
- 
--#ifdef CONFIG_DEBUG_HIGHMEM
-+# ifdef CONFIG_DEBUG_HIGHMEM
- 	WARN_ON_ONCE(in_irq() && !irqs_disabled());
- 	BUG_ON(idx >= KM_TYPE_NR);
--#endif
-+# endif
- 	return idx;
-+#else
-+	current->kmap_idx++;
-+	BUG_ON(current->kmap_idx > KM_TYPE_NR);
-+	return current->kmap_idx - 1;
-+#endif
+ 	kunmap_local_indexed(addr);
+ 	pagefault_enable();
+-	preempt_enable();
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		migrate_enable();
++	else
++		preempt_enable();
  }
  
- static inline int kmap_atomic_idx(void)
- {
-+#ifndef CONFIG_PREEMPT_RT
- 	return __this_cpu_read(__kmap_atomic_idx) - 1;
-+#else
-+	return current->kmap_idx - 1;
-+#endif
- }
+ unsigned int __nr_free_highpages(void);
+@@ -184,7 +195,10 @@ static inline void __kunmap_local(void *addr)
  
- static inline void kmap_atomic_idx_pop(void)
+ static inline void *kmap_atomic(struct page *page)
  {
--#ifdef CONFIG_DEBUG_HIGHMEM
-+#ifndef CONFIG_PREEMPT_RT
-+# ifdef CONFIG_DEBUG_HIGHMEM
- 	int idx = __this_cpu_dec_return(__kmap_atomic_idx);
- 
- 	BUG_ON(idx < 0);
--#else
-+# else
- 	__this_cpu_dec(__kmap_atomic_idx);
-+# endif
-+#else
-+	current->kmap_idx--;
-+# ifdef CONFIG_DEBUG_HIGHMEM
-+	BUG_ON(current->kmap_idx < 0);
-+# endif
+-	preempt_disable();
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		migrate_disable();
++	else
++		preempt_disable();
+ 	pagefault_disable();
+ 	return page_address(page);
+ }
+@@ -205,7 +219,10 @@ static inline void __kunmap_atomic(void *addr)
+ 	kunmap_flush_on_unmap(addr);
  #endif
+ 	pagefault_enable();
+-	preempt_enable();
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		migrate_enable();
++	else
++		preempt_enable();
  }
  
-@@ -218,7 +238,7 @@ do {                                                            \
- 	BUILD_BUG_ON(__same_type((addr), struct page *));       \
- 	kunmap_atomic_high(addr);                                  \
- 	pagefault_enable();                                     \
--	preempt_enable();                                       \
-+	migrate_enable();					\
- } while (0)
- 
+ static inline unsigned int nr_free_highpages(void) { return 0; }
+diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
+index bb5e7b0a4274..e425a26a5ed8 100644
+--- a/include/linux/hrtimer.h
++++ b/include/linux/hrtimer.h
+@@ -540,4 +540,10 @@ int hrtimers_dead_cpu(unsigned int cpu);
+ #define hrtimers_dead_cpu	NULL
+ #endif
  
++#ifdef CONFIG_PREEMPT_RT
++extern void cpu_chill(void);
++#else
++# define cpu_chill()	cpu_relax()
++#endif
++
+ #endif
 diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
-index f9aee3538461..a490d9f801aa 100644
+index bb8ff9083e7d..ed6e49bceff1 100644
 --- a/include/linux/interrupt.h
 +++ b/include/linux/interrupt.h
-@@ -560,7 +560,7 @@ struct softirq_action
+@@ -569,7 +569,7 @@ struct softirq_action
  asmlinkage void do_softirq(void);
  asmlinkage void __do_softirq(void);
  
@@ -4978,7 +4832,7 @@ index f9aee3538461..a490d9f801aa 100644
  void do_softirq_own_stack(void);
  #else
  static inline void do_softirq_own_stack(void)
-@@ -654,7 +654,7 @@ enum
+@@ -663,26 +663,20 @@ enum
  	TASKLET_STATE_RUN	/* Tasklet is running (SMP only) */
  };
  
@@ -4987,16 +4841,21 @@ index f9aee3538461..a490d9f801aa 100644
  static inline int tasklet_trylock(struct tasklet_struct *t)
  {
  	return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
-@@ -665,15 +665,11 @@ static inline void tasklet_unlock(struct tasklet_struct *t)
- 	smp_mb__before_atomic();
- 	clear_bit(TASKLET_STATE_RUN, &(t)->state);
  }
+ 
+-static inline void tasklet_unlock(struct tasklet_struct *t)
+-{
+-	smp_mb__before_atomic();
+-	clear_bit(TASKLET_STATE_RUN, &(t)->state);
+-}
 -
 -static inline void tasklet_unlock_wait(struct tasklet_struct *t)
 -{
 -	while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
 -}
++void tasklet_unlock(struct tasklet_struct *t);
 +void tasklet_unlock_wait(struct tasklet_struct *t);
++void tasklet_unlock_spin_wait(struct tasklet_struct *t);
  #else
 -#define tasklet_trylock(t) 1
 -#define tasklet_unlock_wait(t) do { } while (0)
@@ -5004,14 +4863,33 @@ index f9aee3538461..a490d9f801aa 100644
 +static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; }
 +static inline void tasklet_unlock(struct tasklet_struct *t) { }
 +static inline void tasklet_unlock_wait(struct tasklet_struct *t) { }
++static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t) { }
  #endif
  
  extern void __tasklet_schedule(struct tasklet_struct *t);
+@@ -707,6 +701,17 @@ static inline void tasklet_disable_nosync(struct tasklet_struct *t)
+ 	smp_mb__after_atomic();
+ }
+ 
++/*
++ * Do not use in new code. There is no real reason to invoke this from
++ * atomic contexts.
++ */
++static inline void tasklet_disable_in_atomic(struct tasklet_struct *t)
++{
++	tasklet_disable_nosync(t);
++	tasklet_unlock_spin_wait(t);
++	smp_mb();
++}
++
+ static inline void tasklet_disable(struct tasklet_struct *t)
+ {
+ 	tasklet_disable_nosync(t);
 diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
-index 30823780c192..f941f2d7d71c 100644
+index ec2a47a81e42..dbbef9089789 100644
 --- a/include/linux/irq_work.h
 +++ b/include/linux/irq_work.h
-@@ -55,4 +55,10 @@ static inline void irq_work_run(void) { }
+@@ -64,4 +64,10 @@ static inline void irq_work_run(void) { }
  static inline void irq_work_single(void *arg) { }
  #endif
  
@@ -5023,7 +4901,7 @@ index 30823780c192..f941f2d7d71c 100644
 +
  #endif /* _LINUX_IRQ_WORK_H */
 diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
-index 5745491303e0..2b9caf39fb07 100644
+index 891b323266df..45d8bda8fd53 100644
 --- a/include/linux/irqdesc.h
 +++ b/include/linux/irqdesc.h
 @@ -68,6 +68,7 @@ struct irq_desc {
@@ -5035,7 +4913,7 @@ index 5745491303e0..2b9caf39fb07 100644
  	struct cpumask		*percpu_enabled;
  	const struct cpumask	*percpu_affinity;
 diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
-index 3ed4e8771b64..a437b2e70d37 100644
+index 8de0e1373de7..24dd1f92254c 100644
 --- a/include/linux/irqflags.h
 +++ b/include/linux/irqflags.h
 @@ -71,14 +71,6 @@ do {						\
@@ -5075,11 +4953,23 @@ index 3ed4e8771b64..a437b2e70d37 100644
  #if defined(CONFIG_IRQSOFF_TRACER) || \
  	defined(CONFIG_PREEMPT_TRACER)
   extern void stop_critical_timings(void);
+diff --git a/include/linux/kcov.h b/include/linux/kcov.h
+index 4e3037dc1204..55dc338f6bcd 100644
+--- a/include/linux/kcov.h
++++ b/include/linux/kcov.h
+@@ -2,6 +2,7 @@
+ #ifndef _LINUX_KCOV_H
+ #define _LINUX_KCOV_H
+ 
++#include <linux/sched.h>
+ #include <uapi/linux/kcov.h>
+ 
+ struct task_struct;
 diff --git a/include/linux/kernel.h b/include/linux/kernel.h
-index c25b8e41c0ea..b3b514a1f12f 100644
+index f7902d8c1048..df23f03b3eb9 100644
 --- a/include/linux/kernel.h
 +++ b/include/linux/kernel.h
-@@ -218,6 +218,10 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset);
+@@ -107,6 +107,10 @@ extern void __cant_migrate(const char *file, int line);
   */
  # define might_sleep() \
  	do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
@@ -5090,16 +4980,113 @@ index c25b8e41c0ea..b3b514a1f12f 100644
  /**
   * cant_sleep - annotation for functions that cannot sleep
   *
-@@ -249,6 +253,7 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset);
+@@ -150,6 +154,7 @@ extern void __cant_migrate(const char *file, int line);
    static inline void __might_sleep(const char *file, int line,
  				   int preempt_offset) { }
  # define might_sleep() do { might_resched(); } while (0)
 +# define might_sleep_no_state_check() do { might_resched(); } while (0)
  # define cant_sleep() do { } while (0)
+ # define cant_migrate()		do { } while (0)
  # define sched_annotate_sleep() do { } while (0)
- # define non_block_start() do { } while (0)
+diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
+index 3378bcbe585e..86673930c8ea 100644
+--- a/include/linux/kmsg_dump.h
++++ b/include/linux/kmsg_dump.h
+@@ -29,6 +29,18 @@ enum kmsg_dump_reason {
+ 	KMSG_DUMP_MAX
+ };
+ 
++/**
++ * struct kmsg_dumper_iter - iterator for kernel crash message dumper
++ * @active:	Flag that specifies if this is currently dumping
++ * @cur_seq:	Points to the oldest message to dump (private)
++ * @next_seq:	Points after the newest message to dump (private)
++ */
++struct kmsg_dumper_iter {
++	bool	active;
++	u64	cur_seq;
++	u64	next_seq;
++};
++
+ /**
+  * struct kmsg_dumper - kernel crash message dumper structure
+  * @list:	Entry in the dumper list (private)
+@@ -39,33 +51,22 @@ enum kmsg_dump_reason {
+  */
+ struct kmsg_dumper {
+ 	struct list_head list;
+-	void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason);
++	void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason,
++		     struct kmsg_dumper_iter *iter);
+ 	enum kmsg_dump_reason max_reason;
+-	bool active;
+ 	bool registered;
+-
+-	/* private state of the kmsg iterator */
+-	u32 cur_idx;
+-	u32 next_idx;
+-	u64 cur_seq;
+-	u64 next_seq;
+ };
+ 
+ #ifdef CONFIG_PRINTK
+ void kmsg_dump(enum kmsg_dump_reason reason);
+ 
+-bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
+-			       char *line, size_t size, size_t *len);
+-
+-bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
++bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
+ 			char *line, size_t size, size_t *len);
+ 
+-bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
+-			  char *buf, size_t size, size_t *len);
+-
+-void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper);
++bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog,
++			  char *buf, size_t size, size_t *len_out);
+ 
+-void kmsg_dump_rewind(struct kmsg_dumper *dumper);
++void kmsg_dump_rewind(struct kmsg_dumper_iter *iter);
+ 
+ int kmsg_dump_register(struct kmsg_dumper *dumper);
+ 
+@@ -77,30 +78,19 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason)
+ {
+ }
+ 
+-static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper,
+-					     bool syslog, const char *line,
+-					     size_t size, size_t *len)
+-{
+-	return false;
+-}
+-
+-static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
++static inline bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
+ 				const char *line, size_t size, size_t *len)
+ {
+ 	return false;
+ }
+ 
+-static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
++static inline bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog,
+ 					char *buf, size_t size, size_t *len)
+ {
+ 	return false;
+ }
+ 
+-static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
+-{
+-}
+-
+-static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper)
++static inline void kmsg_dump_rewind(struct kmsg_dumper_iter *iter)
+ {
+ }
+ 
 diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h
-index 4a8795b21d77..998d2c34cf0c 100644
+index 4a8795b21d77..271f911f2803 100644
 --- a/include/linux/local_lock_internal.h
 +++ b/include/linux/local_lock_internal.h
 @@ -7,33 +7,90 @@
@@ -5203,8 +5190,12 @@ index 4a8795b21d77..998d2c34cf0c 100644
  static inline void local_lock_acquire(local_lock_t *l)
  {
  	lock_map_acquire(&l->dep_map);
-@@ -55,26 +112,55 @@ static inline void local_lock_release(local_lock_t *l) { }
+@@ -53,21 +110,50 @@ static inline void local_lock_acquire(local_lock_t *l) { }
+ static inline void local_lock_release(local_lock_t *l) { }
+ #endif /* !CONFIG_DEBUG_LOCK_ALLOC */
  
++#ifdef CONFIG_PREEMPT_RT
++
  #define __local_lock(lock)					\
  	do {							\
 -		preempt_disable();				\
@@ -5217,8 +5208,6 @@ index 4a8795b21d77..998d2c34cf0c 100644
 +		local_lock_release(this_cpu_ptr(lock));		\
 +		migrate_enable();				\
 +	} while (0)
-+
-+#ifdef CONFIG_PREEMPT_RT
 +
  #define __local_lock_irq(lock)					\
  	do {							\
@@ -5232,10 +5221,9 @@ index 4a8795b21d77..998d2c34cf0c 100644
 -		local_irq_save(flags);				\
 +		migrate_disable();				\
 +		flags = 0;					\
- 		local_lock_acquire(this_cpu_ptr(lock));		\
- 	} while (0)
- 
--#define __local_unlock(lock)					\
++		local_lock_acquire(this_cpu_ptr(lock));		\
++	} while (0)
++
 +#define __local_unlock_irq(lock)				\
 +	do {							\
 +		local_lock_release(this_cpu_ptr(lock));		\
@@ -5243,14 +5231,23 @@ index 4a8795b21d77..998d2c34cf0c 100644
 +	} while (0)
 +
 +#define __local_unlock_irqrestore(lock, flags)			\
- 	do {							\
- 		local_lock_release(this_cpu_ptr(lock));		\
--		preempt_enable();				\
++	do {							\
++		local_lock_release(this_cpu_ptr(lock));		\
 +		migrate_enable();				\
 +	} while (0)
 +
 +#else
 +
++#define __local_lock(lock)					\
++	do {							\
++		preempt_disable();				\
+ 		local_lock_acquire(this_cpu_ptr(lock));		\
+ 	} while (0)
+ 
+@@ -77,6 +163,18 @@ static inline void local_lock_release(local_lock_t *l) { }
+ 		preempt_enable();				\
+ 	} while (0)
+ 
 +#define __local_lock_irq(lock)					\
 +	do {							\
 +		local_irq_disable();				\
@@ -5261,33 +5258,19 @@ index 4a8795b21d77..998d2c34cf0c 100644
 +	do {							\
 +		local_irq_save(flags);				\
 +		local_lock_acquire(this_cpu_ptr(lock));		\
- 	} while (0)
- 
++	} while (0)
++
  #define __local_unlock_irq(lock)				\
-@@ -88,3 +174,5 @@ static inline void local_lock_release(local_lock_t *l) { }
+ 	do {							\
+ 		local_lock_release(this_cpu_ptr(lock));		\
+@@ -88,3 +186,5 @@ static inline void local_lock_release(local_lock_t *l) { }
  		local_lock_release(this_cpu_ptr(lock));		\
  		local_irq_restore(flags);			\
  	} while (0)
 +
 +#endif
-diff --git a/include/linux/mhi.h b/include/linux/mhi.h
-index c4a940d98912..2b4ed30d4729 100644
---- a/include/linux/mhi.h
-+++ b/include/linux/mhi.h
-@@ -9,10 +9,9 @@
- #include <linux/device.h>
- #include <linux/dma-direction.h>
- #include <linux/mutex.h>
--#include <linux/rwlock_types.h>
- #include <linux/skbuff.h>
- #include <linux/slab.h>
--#include <linux/spinlock_types.h>
-+#include <linux/spinlock.h>
- #include <linux/wait.h>
- #include <linux/workqueue.h>
- 
 diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
-index ed028af3cb19..cbdc39fea2ff 100644
+index 2f395ab624f3..8492582d486c 100644
 --- a/include/linux/mm_types.h
 +++ b/include/linux/mm_types.h
 @@ -12,6 +12,7 @@
@@ -5297,8 +5280,8 @@ index ed028af3cb19..cbdc39fea2ff 100644
 +#include <linux/rcupdate.h>
  #include <linux/page-flags-layout.h>
  #include <linux/workqueue.h>
- 
-@@ -548,6 +549,9 @@ struct mm_struct {
+ #include <linux/seqlock.h>
+@@ -555,6 +556,9 @@ struct mm_struct {
  		bool tlb_flush_batched;
  #endif
  		struct uprobes_state uprobes_state;
@@ -5374,10 +5357,10 @@ index dcd185cbfe79..90f090efcb58 100644
  #endif /* __LINUX_MUTEX_H */
 diff --git a/include/linux/mutex_rt.h b/include/linux/mutex_rt.h
 new file mode 100644
-index 000000000000..7179367bfb5e
+index 000000000000..f0b2e07cd5c5
 --- /dev/null
 +++ b/include/linux/mutex_rt.h
-@@ -0,0 +1,131 @@
+@@ -0,0 +1,130 @@
 +// SPDX-License-Identifier: GPL-2.0-only
 +#ifndef __LINUX_MUTEX_RT_H
 +#define __LINUX_MUTEX_RT_H
@@ -5409,7 +5392,6 @@ index 000000000000..7179367bfb5e
 +
 +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key);
 +extern void __lockfunc _mutex_lock(struct mutex *lock);
-+extern void __lockfunc _mutex_lock_io(struct mutex *lock);
 +extern void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass);
 +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock);
 +extern int __lockfunc _mutex_lock_killable(struct mutex *lock);
@@ -5426,7 +5408,7 @@ index 000000000000..7179367bfb5e
 +#define mutex_lock_killable(l)		_mutex_lock_killable(l)
 +#define mutex_trylock(l)		_mutex_trylock(l)
 +#define mutex_unlock(l)			_mutex_unlock(l)
-+#define mutex_lock_io(l)		_mutex_lock_io(l);
++#define mutex_lock_io(l)		_mutex_lock_io_nested(l, 0);
 +
 +#define __mutex_owner(l)		((l)->lock.owner)
 +
@@ -5457,7 +5439,7 @@ index 000000000000..7179367bfb5e
 +# define mutex_lock_killable_nested(l, s) \
 +					_mutex_lock_killable(l)
 +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
-+# define mutex_lock_io_nested(l, s)	_mutex_lock_io(l)
++# define mutex_lock_io_nested(l, s)	_mutex_lock_io_nested(l, s)
 +#endif
 +
 +# define mutex_init(mutex)				\
@@ -5510,10 +5492,10 @@ index 000000000000..7179367bfb5e
 +
 +#endif
 diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
-index 69cb46f7b8d2..fc682adab66a 100644
+index 3327239fa2f9..f596a16a5f7b 100644
 --- a/include/linux/nfs_xdr.h
 +++ b/include/linux/nfs_xdr.h
-@@ -1670,7 +1670,7 @@ struct nfs_unlinkdata {
+@@ -1684,7 +1684,7 @@ struct nfs_unlinkdata {
  	struct nfs_removeargs args;
  	struct nfs_removeres res;
  	struct dentry *dentry;
@@ -5522,8 +5504,39 @@ index 69cb46f7b8d2..fc682adab66a 100644
  	const struct cred *cred;
  	struct nfs_fattr dir_attr;
  	long timeout;
+diff --git a/include/linux/notifier.h b/include/linux/notifier.h
+index 2fb373a5c1ed..723bc2df6388 100644
+--- a/include/linux/notifier.h
++++ b/include/linux/notifier.h
+@@ -58,7 +58,7 @@ struct notifier_block {
+ };
+ 
+ struct atomic_notifier_head {
+-	spinlock_t lock;
++	raw_spinlock_t lock;
+ 	struct notifier_block __rcu *head;
+ };
+ 
+@@ -78,7 +78,7 @@ struct srcu_notifier_head {
+ };
+ 
+ #define ATOMIC_INIT_NOTIFIER_HEAD(name) do {	\
+-		spin_lock_init(&(name)->lock);	\
++		raw_spin_lock_init(&(name)->lock);	\
+ 		(name)->head = NULL;		\
+ 	} while (0)
+ #define BLOCKING_INIT_NOTIFIER_HEAD(name) do {	\
+@@ -95,7 +95,7 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
+ 		cleanup_srcu_struct(&(name)->srcu);
+ 
+ #define ATOMIC_NOTIFIER_INIT(name) {				\
+-		.lock = __SPIN_LOCK_UNLOCKED(name.lock),	\
++		.lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock),	\
+ 		.head = NULL }
+ #define BLOCKING_NOTIFIER_INIT(name) {				\
+ 		.rwsem = __RWSEM_INITIALIZER((name).rwsem),	\
 diff --git a/include/linux/pid.h b/include/linux/pid.h
-index 176d6cf80e7c..4daecc34c097 100644
+index fa10acb8d6a4..2f86f84e9fc1 100644
 --- a/include/linux/pid.h
 +++ b/include/linux/pid.h
 @@ -3,6 +3,7 @@
@@ -5535,46 +5548,23 @@ index 176d6cf80e7c..4daecc34c097 100644
  #include <linux/refcount.h>
  
 diff --git a/include/linux/preempt.h b/include/linux/preempt.h
-index 7d9c1c0e149c..8a47b9b1bade 100644
+index 69cc8b64aa3a..af39859f02ee 100644
 --- a/include/linux/preempt.h
 +++ b/include/linux/preempt.h
-@@ -77,10 +77,14 @@
- /* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */
- #include <asm/preempt.h>
+@@ -79,7 +79,11 @@
  
--#define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
+ #define nmi_count()	(preempt_count() & NMI_MASK)
+ #define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
 -#define softirq_count()	(preempt_count() & SOFTIRQ_MASK)
--#define irq_count()	(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
--				 | NMI_MASK))
-+#define pc_nmi_count()		(preempt_count() & NMI_MASK)
-+#define hardirq_count()		(preempt_count() & HARDIRQ_MASK)
 +#ifdef CONFIG_PREEMPT_RT
 +# define softirq_count()	(current->softirq_disable_cnt & SOFTIRQ_MASK)
 +#else
 +# define softirq_count()	(preempt_count() & SOFTIRQ_MASK)
 +#endif
-+#define irq_count()		(pc_nmi_count() | hardirq_count() | softirq_count())
- 
- /*
-  * Are we doing bottom half or hardware interrupt processing?
-@@ -95,13 +99,12 @@
-  * Note: due to the BH disabled confusion: in_softirq(),in_interrupt() really
-  *       should not be used in new code.
-  */
-+#define in_nmi()		(pc_nmi_count())
- #define in_irq()		(hardirq_count())
--#define in_softirq()		(softirq_count())
- #define in_interrupt()		(irq_count())
-+#define in_softirq()		(softirq_count())
- #define in_serving_softirq()	(softirq_count() & SOFTIRQ_OFFSET)
--#define in_nmi()		(preempt_count() & NMI_MASK)
--#define in_task()		(!(preempt_count() & \
--				   (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
-+#define in_task()		(!(irq_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
+ #define irq_count()	(nmi_count() | hardirq_count() | softirq_count())
  
  /*
-  * The preempt_count offset after preempt_disable();
-@@ -115,7 +118,11 @@
+@@ -117,7 +121,11 @@
  /*
   * The preempt_count offset after spin_lock()
   */
@@ -5586,7 +5576,7 @@ index 7d9c1c0e149c..8a47b9b1bade 100644
  
  /*
   * The preempt_count offset needed for things like:
-@@ -164,6 +171,20 @@ extern void preempt_count_sub(int val);
+@@ -166,6 +174,20 @@ extern void preempt_count_sub(int val);
  #define preempt_count_inc() preempt_count_add(1)
  #define preempt_count_dec() preempt_count_sub(1)
  
@@ -5607,7 +5597,7 @@ index 7d9c1c0e149c..8a47b9b1bade 100644
  #ifdef CONFIG_PREEMPT_COUNT
  
  #define preempt_disable() \
-@@ -172,13 +193,25 @@ do { \
+@@ -174,13 +196,25 @@ do { \
  	barrier(); \
  } while (0)
  
@@ -5634,7 +5624,7 @@ index 7d9c1c0e149c..8a47b9b1bade 100644
  
  #define preemptible()	(preempt_count() == 0 && !irqs_disabled())
  
-@@ -203,6 +236,18 @@ do { \
+@@ -205,6 +239,18 @@ do { \
  		__preempt_schedule(); \
  } while (0)
  
@@ -5653,7 +5643,7 @@ index 7d9c1c0e149c..8a47b9b1bade 100644
  #else /* !CONFIG_PREEMPTION */
  #define preempt_enable() \
  do { \
-@@ -210,6 +255,12 @@ do { \
+@@ -212,6 +258,12 @@ do { \
  	preempt_count_dec(); \
  } while (0)
  
@@ -5666,15 +5656,20 @@ index 7d9c1c0e149c..8a47b9b1bade 100644
  #define preempt_enable_notrace() \
  do { \
  	barrier(); \
-@@ -248,6 +299,7 @@ do { \
+@@ -250,8 +302,12 @@ do { \
  #define preempt_disable_notrace()		barrier()
  #define preempt_enable_no_resched_notrace()	barrier()
  #define preempt_enable_notrace()		barrier()
 +#define preempt_check_resched_rt()		barrier()
  #define preemptible()				0
  
++#define preempt_lazy_disable()			barrier()
++#define preempt_lazy_enable()			barrier()
++
  #endif /* CONFIG_PREEMPT_COUNT */
-@@ -268,10 +320,22 @@ do { \
+ 
+ #ifdef MODULE
+@@ -270,10 +326,22 @@ do { \
  } while (0)
  #define preempt_fold_need_resched() \
  do { \
@@ -5698,72 +5693,12 @@ index 7d9c1c0e149c..8a47b9b1bade 100644
  #ifdef CONFIG_PREEMPT_NOTIFIERS
  
  struct preempt_notifier;
-@@ -322,6 +386,80 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
+@@ -386,8 +454,15 @@ extern void migrate_enable(void);
  
- #endif
+ #else
  
-+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
-+
-+/*
-+ * Migrate-Disable and why it is undesired.
-+ *
-+ * When a preempted task becomes elegible to run under the ideal model (IOW it
-+ * becomes one of the M highest priority tasks), it might still have to wait
-+ * for the preemptee's migrate_disable() section to complete. Thereby suffering
-+ * a reduction in bandwidth in the exact duration of the migrate_disable()
-+ * section.
-+ *
-+ * Per this argument, the change from preempt_disable() to migrate_disable()
-+ * gets us:
-+ *
-+ * - a higher priority tasks gains reduced wake-up latency; with preempt_disable()
-+ *   it would have had to wait for the lower priority task.
-+ *
-+ * - a lower priority tasks; which under preempt_disable() could've instantly
-+ *   migrated away when another CPU becomes available, is now constrained
-+ *   by the ability to push the higher priority task away, which might itself be
-+ *   in a migrate_disable() section, reducing it's available bandwidth.
-+ *
-+ * IOW it trades latency / moves the interference term, but it stays in the
-+ * system, and as long as it remains unbounded, the system is not fully
-+ * deterministic.
-+ *
-+ *
-+ * The reason we have it anyway.
-+ *
-+ * PREEMPT_RT breaks a number of assumptions traditionally held. By forcing a
-+ * number of primitives into becoming preemptible, they would also allow
-+ * migration. This turns out to break a bunch of per-cpu usage. To this end,
-+ * all these primitives employ migirate_disable() to restore this implicit
-+ * assumption.
-+ *
-+ * This is a 'temporary' work-around at best. The correct solution is getting
-+ * rid of the above assumptions and reworking the code to employ explicit
-+ * per-cpu locking or short preempt-disable regions.
-+ *
-+ * The end goal must be to get rid of migrate_disable(), alternatively we need
-+ * a schedulability theory that does not depend on abritrary migration.
-+ *
-+ *
-+ * Notes on the implementation.
-+ *
-+ * The implementation is particularly tricky since existing code patterns
-+ * dictate neither migrate_disable() nor migrate_enable() is allowed to block.
-+ * This means that it cannot use cpus_read_lock() to serialize against hotplug,
-+ * nor can it easily migrate itself into a pending affinity mask change on
-+ * migrate_enable().
-+ *
-+ *
-+ * Note: even non-work-conserving schedulers like semi-partitioned depends on
-+ *       migration, so migrate_disable() is not only a problem for
-+ *       work-conserving schedulers.
-+ *
-+ */
-+extern void migrate_disable(void);
-+extern void migrate_enable(void);
-+
-+#elif defined(CONFIG_PREEMPT_RT)
-+
+-static inline void migrate_disable(void) { }
+-static inline void migrate_enable(void) { }
 +static inline void migrate_disable(void)
 +{
 +	preempt_lazy_disable();
@@ -5773,29 +5708,32 @@ index 7d9c1c0e149c..8a47b9b1bade 100644
 +{
 +	preempt_lazy_enable();
 +}
-+
-+#else /* !CONFIG_PREEMPT_RT */
-+
- /**
-  * migrate_disable - Prevent migration of the current task
-  *
-@@ -352,4 +490,6 @@ static __always_inline void migrate_enable(void)
- 	preempt_enable();
- }
  
-+#endif /* CONFIG_SMP && CONFIG_PREEMPT_RT */
-+
- #endif /* __LINUX_PREEMPT_H */
+ #endif /* CONFIG_SMP */
+ 
 diff --git a/include/linux/printk.h b/include/linux/printk.h
-index 34c1a7be3e01..c49d5bb3f8ff 100644
+index fe7eb2351610..7e4352467d83 100644
 --- a/include/linux/printk.h
 +++ b/include/linux/printk.h
-@@ -147,22 +147,12 @@ static inline __printf(1, 2) __cold
- void early_printk(const char *s, ...) { }
- #endif
+@@ -46,6 +46,12 @@ static inline const char *printk_skip_headers(const char *buffer)
  
--#ifdef CONFIG_PRINTK_NMI
--extern void printk_nmi_enter(void);
+ #define CONSOLE_EXT_LOG_MAX	8192
+ 
++/*
++ * The maximum size of a record formatted for console printing
++ * (i.e. with the prefix prepended to every line).
++ */
++#define CONSOLE_LOG_MAX		4096
++
+ /* printk's without a loglevel use this.. */
+ #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
+ 
+@@ -149,18 +155,6 @@ static inline __printf(1, 2) __cold
+ void early_printk(const char *s, ...) { }
+ #endif
+ 
+-#ifdef CONFIG_PRINTK_NMI
+-extern void printk_nmi_enter(void);
 -extern void printk_nmi_exit(void);
 -extern void printk_nmi_direct_enter(void);
 -extern void printk_nmi_direct_exit(void);
@@ -5805,18 +5743,11 @@ index 34c1a7be3e01..c49d5bb3f8ff 100644
 -static inline void printk_nmi_direct_enter(void) { }
 -static inline void printk_nmi_direct_exit(void) { }
 -#endif /* PRINTK_NMI */
-+struct dev_printk_info;
+-
+ struct dev_printk_info;
  
  #ifdef CONFIG_PRINTK
--asmlinkage __printf(5, 0)
-+asmlinkage __printf(4, 0)
- int vprintk_emit(int facility, int level,
--		 const char *dict, size_t dictlen,
-+		 const struct dev_printk_info *dev_info,
- 		 const char *fmt, va_list args);
- 
- asmlinkage __printf(1, 0)
-@@ -203,8 +193,6 @@ __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...);
+@@ -207,8 +201,6 @@ __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...);
  void dump_stack_print_info(const char *log_lvl);
  void show_regs_print_info(const char *log_lvl);
  extern asmlinkage void dump_stack(void) __cold;
@@ -5825,7 +5756,7 @@ index 34c1a7be3e01..c49d5bb3f8ff 100644
  #else
  static inline __printf(1, 0)
  int vprintk(const char *s, va_list args)
-@@ -268,14 +256,6 @@ static inline void show_regs_print_info(const char *log_lvl)
+@@ -272,14 +264,6 @@ static inline void show_regs_print_info(const char *log_lvl)
  static inline void dump_stack(void)
  {
  }
@@ -5840,6 +5771,15 @@ index 34c1a7be3e01..c49d5bb3f8ff 100644
  #endif
  
  extern int kptr_restrict;
+@@ -497,6 +481,8 @@ extern int kptr_restrict;
+ 	no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+ #endif
+ 
++bool pr_flush(int timeout_ms, bool reset_on_progress);
++
+ /*
+  * ratelimited messages with local ratelimit_state,
+  * no local ratelimit_state used in the !PRINTK case
 diff --git a/include/linux/random.h b/include/linux/random.h
 index f45b8be3e3c4..0e41d0527809 100644
 --- a/include/linux/random.h
@@ -5900,30 +5840,6 @@ index d7db17996322..c33b0e16d04b 100644
  #define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL }
  
  /* Same as rb_first(), but O(1) */
-diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h
-index 7d012faa509a..3d1a9e716b80 100644
---- a/include/linux/rbtree_latch.h
-+++ b/include/linux/rbtree_latch.h
-@@ -42,8 +42,8 @@ struct latch_tree_node {
- };
- 
- struct latch_tree_root {
--	seqcount_t	seq;
--	struct rb_root	tree[2];
-+	seqcount_latch_t	seq;
-+	struct rb_root		tree[2];
- };
- 
- /**
-@@ -206,7 +206,7 @@ latch_tree_find(void *key, struct latch_tree_root *root,
- 	do {
- 		seq = raw_read_seqcount_latch(&root->seq);
- 		node = __lt_find(key, root, seq & 1, ops->comp);
--	} while (read_seqcount_retry(&root->seq, seq));
-+	} while (read_seqcount_latch_retry(&root->seq, seq));
- 
- 	return node;
- }
 diff --git a/include/linux/rbtree_type.h b/include/linux/rbtree_type.h
 new file mode 100644
 index 000000000000..77a89dd2c7c6
@@ -5962,7 +5878,7 @@ index 000000000000..77a89dd2c7c6
 +
 +#endif
 diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
-index d15d46db61f7..76d19f339419 100644
+index fd02c5fa60cb..8b06b9b16111 100644
 --- a/include/linux/rcupdate.h
 +++ b/include/linux/rcupdate.h
 @@ -52,6 +52,11 @@ void __rcu_read_unlock(void);
@@ -5977,7 +5893,7 @@ index d15d46db61f7..76d19f339419 100644
  
  #else /* #ifdef CONFIG_PREEMPT_RCU */
  
-@@ -70,6 +75,8 @@ static inline int rcu_preempt_depth(void)
+@@ -77,6 +82,8 @@ static inline int rcu_preempt_depth(void)
  	return 0;
  }
  
@@ -5986,7 +5902,7 @@ index d15d46db61f7..76d19f339419 100644
  #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
  
  /* Internal to kernel */
-@@ -312,7 +319,8 @@ static inline void rcu_preempt_sleep_check(void) { }
+@@ -326,7 +333,8 @@ static inline void rcu_preempt_sleep_check(void) { }
  #define rcu_sleep_check()						\
  	do {								\
  		rcu_preempt_sleep_check();				\
@@ -5997,7 +5913,7 @@ index d15d46db61f7..76d19f339419 100644
  		RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map),	\
  				 "Illegal context switch in RCU-sched read-side critical section"); \
 diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
-index 6fd615a0eea9..5308cd7ddddf 100644
+index 6fd615a0eea9..b02009f53026 100644
 --- a/include/linux/rtmutex.h
 +++ b/include/linux/rtmutex.h
 @@ -14,11 +14,15 @@
@@ -6091,14 +6007,13 @@ index 6fd615a0eea9..5308cd7ddddf 100644
  
  #define DEFINE_RT_MUTEX(mutexname) \
  	struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
-@@ -115,9 +112,7 @@ extern void rt_mutex_lock(struct rt_mutex *lock);
+@@ -115,9 +112,6 @@ extern void rt_mutex_lock(struct rt_mutex *lock);
  #endif
  
  extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
 -extern int rt_mutex_timed_lock(struct rt_mutex *lock,
 -			       struct hrtimer_sleeper *timeout);
 -
-+extern int rt_mutex_lock_killable(struct rt_mutex *lock);
  extern int rt_mutex_trylock(struct rt_mutex *lock);
  
  extern void rt_mutex_unlock(struct rt_mutex *lock);
@@ -6296,10 +6211,10 @@ index 000000000000..4762391d659b
 +#endif
 diff --git a/include/linux/rwsem-rt.h b/include/linux/rwsem-rt.h
 new file mode 100644
-index 000000000000..7f7e748ef522
+index 000000000000..0ba8aae9a198
 --- /dev/null
 +++ b/include/linux/rwsem-rt.h
-@@ -0,0 +1,69 @@
+@@ -0,0 +1,70 @@
 +// SPDX-License-Identifier: GPL-2.0-only
 +#ifndef _LINUX_RWSEM_RT_H
 +#define _LINUX_RWSEM_RT_H
@@ -6359,6 +6274,7 @@ index 000000000000..7f7e748ef522
 +}
 +
 +extern void __down_read(struct rw_semaphore *sem);
++extern int __down_read_interruptible(struct rw_semaphore *sem);
 +extern int __down_read_killable(struct rw_semaphore *sem);
 +extern int __down_read_trylock(struct rw_semaphore *sem);
 +extern void __down_write(struct rw_semaphore *sem);
@@ -6370,7 +6286,7 @@ index 000000000000..7f7e748ef522
 +
 +#endif
 diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
-index 25e3fde85617..9101367852bc 100644
+index 4c715be48717..9323af8a9244 100644
 --- a/include/linux/rwsem.h
 +++ b/include/linux/rwsem.h
 @@ -16,6 +16,11 @@
@@ -6400,18 +6316,18 @@ index 25e3fde85617..9101367852bc 100644
   * lock for reading
   */
 diff --git a/include/linux/sched.h b/include/linux/sched.h
-index afe01e232935..c72ae6627e96 100644
+index 6e3a5eeec509..183e9d90841c 100644
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
-@@ -34,6 +34,7 @@
- #include <linux/rseq.h>
- #include <linux/seqlock.h>
- #include <linux/kcsan.h>
-+#include <asm/kmap_types.h>
- 
- /* task_struct member predeclarations (sorted alphabetically): */
- struct audit_context;
-@@ -110,12 +111,8 @@ struct task_group;
+@@ -14,7 +14,6 @@
+ #include <linux/pid.h>
+ #include <linux/sem.h>
+ #include <linux/shm.h>
+-#include <linux/kcov.h>
+ #include <linux/mutex.h>
+ #include <linux/plist.h>
+ #include <linux/hrtimer.h>
+@@ -113,12 +112,8 @@ struct io_uring_task;
  					 __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
  					 TASK_PARKED)
  
@@ -6424,7 +6340,7 @@ index afe01e232935..c72ae6627e96 100644
  #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  
  /*
-@@ -139,6 +136,9 @@ struct task_group;
+@@ -142,6 +137,9 @@ struct io_uring_task;
  		smp_store_mb(current->state, (state_value));	\
  	} while (0)
  
@@ -6434,7 +6350,7 @@ index afe01e232935..c72ae6627e96 100644
  #define set_special_state(state_value)					\
  	do {								\
  		unsigned long flags; /* may shadow */			\
-@@ -192,6 +192,9 @@ struct task_group;
+@@ -195,6 +193,9 @@ struct io_uring_task;
  #define set_current_state(state_value)					\
  	smp_store_mb(current->state, (state_value))
  
@@ -6444,7 +6360,7 @@ index afe01e232935..c72ae6627e96 100644
  /*
   * set_special_state() should be used for those states when the blocking task
   * can not use the regular condition based wait-loop. In that case we must
-@@ -638,6 +641,8 @@ struct task_struct {
+@@ -656,6 +657,8 @@ struct task_struct {
  #endif
  	/* -1 unrunnable, 0 runnable, >0 stopped: */
  	volatile long			state;
@@ -6453,19 +6369,7 @@ index afe01e232935..c72ae6627e96 100644
  
  	/*
  	 * This begins the randomizable portion of task_struct. Only
-@@ -713,6 +718,11 @@ struct task_struct {
- 	int				nr_cpus_allowed;
- 	const cpumask_t			*cpus_ptr;
- 	cpumask_t			cpus_mask;
-+	void				*migration_pending;
-+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
-+	unsigned short			migration_disabled;
-+#endif
-+	unsigned short			migration_flags;
- 
- #ifdef CONFIG_PREEMPT_RCU
- 	int				rcu_read_lock_nesting;
-@@ -941,11 +951,16 @@ struct task_struct {
+@@ -982,11 +985,16 @@ struct task_struct {
  	/* Signal handlers: */
  	struct signal_struct		*signal;
  	struct sighand_struct __rcu		*sighand;
@@ -6482,7 +6386,7 @@ index afe01e232935..c72ae6627e96 100644
  	unsigned long			sas_ss_sp;
  	size_t				sas_ss_size;
  	unsigned int			sas_ss_flags;
-@@ -972,6 +987,7 @@ struct task_struct {
+@@ -1014,6 +1022,7 @@ struct task_struct {
  	raw_spinlock_t			pi_lock;
  
  	struct wake_q_node		wake_q;
@@ -6490,7 +6394,7 @@ index afe01e232935..c72ae6627e96 100644
  
  #ifdef CONFIG_RT_MUTEXES
  	/* PI waiters blocked on a rt_mutex held by this task: */
-@@ -999,6 +1015,9 @@ struct task_struct {
+@@ -1041,6 +1050,9 @@ struct task_struct {
  	int				softirq_context;
  	int				irq_config;
  #endif
@@ -6500,20 +6404,7 @@ index afe01e232935..c72ae6627e96 100644
  
  #ifdef CONFIG_LOCKDEP
  # define MAX_LOCK_DEPTH			48UL
-@@ -1280,6 +1299,12 @@ struct task_struct {
- 	unsigned int			sequential_io;
- 	unsigned int			sequential_io_avg;
- #endif
-+#ifdef CONFIG_PREEMPT_RT
-+# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32
-+	int				kmap_idx;
-+	pte_t				kmap_pte[KM_TYPE_NR];
-+# endif
-+#endif
- #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
- 	unsigned long			task_state_change;
- #endif
-@@ -1722,6 +1747,7 @@ extern struct task_struct *find_get_task_by_vpid(pid_t nr);
+@@ -1775,6 +1787,7 @@ extern struct task_struct *find_get_task_by_vpid(pid_t nr);
  
  extern int wake_up_state(struct task_struct *tsk, unsigned int state);
  extern int wake_up_process(struct task_struct *tsk);
@@ -6521,7 +6412,7 @@ index afe01e232935..c72ae6627e96 100644
  extern void wake_up_new_task(struct task_struct *tsk);
  
  #ifdef CONFIG_SMP
-@@ -1812,6 +1838,89 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
+@@ -1865,6 +1878,89 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
  	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
  }
  
@@ -6611,23 +6502,8 @@ index afe01e232935..c72ae6627e96 100644
  /*
   * cond_resched() and cond_resched_lock(): latency reduction via
   * explicit rescheduling in places that are safe. The return
-diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h
-index 9a62ffdd296f..412cdaba33eb 100644
---- a/include/linux/sched/hotplug.h
-+++ b/include/linux/sched/hotplug.h
-@@ -11,8 +11,10 @@ extern int sched_cpu_activate(unsigned int cpu);
- extern int sched_cpu_deactivate(unsigned int cpu);
- 
- #ifdef CONFIG_HOTPLUG_CPU
-+extern int sched_cpu_wait_empty(unsigned int cpu);
- extern int sched_cpu_dying(unsigned int cpu);
- #else
-+# define sched_cpu_wait_empty	NULL
- # define sched_cpu_dying	NULL
- #endif
- 
 diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
-index f889e332912f..1a08d3d41805 100644
+index 1ae08b8462a4..4c74089aea20 100644
 --- a/include/linux/sched/mm.h
 +++ b/include/linux/sched/mm.h
 @@ -49,6 +49,17 @@ static inline void mmdrop(struct mm_struct *mm)
@@ -6642,12 +6518,12 @@ index f889e332912f..1a08d3d41805 100644
 +		call_rcu(&mm->delayed_drop, __mmdrop_delayed);
 +}
 +#else
-+# define mmdrop_delayed(mm)	mmdrop(mm)
++# define mmdrop_delayed(mm)    mmdrop(mm)
 +#endif
 +
- /*
-  * This has to be called after a get_task_mm()/mmget_not_zero()
-  * followed by taking the mmap_lock for writing before modifying the
+ /**
+  * mmget() - Pin the address space associated with a &struct mm_struct.
+  * @mm: The address space to pin.
 diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
 index e5af028c08b4..994c25640e15 100644
 --- a/include/linux/sched/rt.h
@@ -6696,868 +6572,149 @@ index 26a2013ac39c..6e2dff721547 100644
 +}
  
  #endif /* _LINUX_SCHED_WAKE_Q_H */
-diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
-index 962d9768945f..f73c7eb68f27 100644
---- a/include/linux/seqlock.h
-+++ b/include/linux/seqlock.h
-@@ -17,6 +17,7 @@
- #include <linux/kcsan-checks.h>
- #include <linux/lockdep.h>
- #include <linux/mutex.h>
-+#include <linux/ww_mutex.h>
- #include <linux/preempt.h>
- #include <linux/spinlock.h>
+diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
+index 9e655055112d..ffef674deda7 100644
+--- a/include/linux/serial_8250.h
++++ b/include/linux/serial_8250.h
+@@ -7,6 +7,7 @@
+ #ifndef _LINUX_SERIAL_8250_H
+ #define _LINUX_SERIAL_8250_H
  
-@@ -53,7 +54,7 @@
-  *
-  * If the write serialization mechanism is one of the common kernel
-  * locking primitives, use a sequence counter with associated lock
-- * (seqcount_LOCKTYPE_t) instead.
-+ * (seqcount_LOCKNAME_t) instead.
-  *
-  * If it's desired to automatically handle the sequence counter writer
-  * serialization and non-preemptibility requirements, use a sequential
-@@ -117,7 +118,7 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
- #define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) }
++#include <linux/atomic.h>
+ #include <linux/serial_core.h>
+ #include <linux/serial_reg.h>
+ #include <linux/platform_device.h>
+@@ -125,6 +126,8 @@ struct uart_8250_port {
+ #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
+ 	unsigned char		msr_saved_flags;
  
- /*
-- * Sequence counters with associated locks (seqcount_LOCKTYPE_t)
-+ * Sequence counters with associated locks (seqcount_LOCKNAME_t)
-  *
-  * A sequence counter which associates the lock used for writer
-  * serialization at initialization time. This enables lockdep to validate
-@@ -131,37 +132,59 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
-  * See Documentation/locking/seqlock.rst
-  */
++	atomic_t		console_printing;
++
+ 	struct uart_8250_dma	*dma;
+ 	const struct uart_8250_ops *ops;
  
--#ifdef CONFIG_LOCKDEP
-+/*
-+ * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot
-+ * disable preemption. It can lead to higher latencies, and the write side
-+ * sections will not be able to acquire locks which become sleeping locks
-+ * (e.g. spinlock_t).
-+ *
-+ * To remain preemptible while avoiding a possible livelock caused by the
-+ * reader preempting the writer, use a different technique: let the reader
-+ * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the
-+ * case, acquire then release the associated LOCKNAME writer serialization
-+ * lock. This will allow any possibly-preempted writer to make progress
-+ * until the end of its writer serialization lock critical section.
-+ *
-+ * This lock-unlock technique must be implemented for all of PREEMPT_RT
-+ * sleeping locks.  See Documentation/locking/locktypes.rst
-+ */
-+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
- #define __SEQ_LOCK(expr)	expr
- #else
- #define __SEQ_LOCK(expr)
- #endif
+@@ -180,6 +183,8 @@ void serial8250_init_port(struct uart_8250_port *up);
+ void serial8250_set_defaults(struct uart_8250_port *up);
+ void serial8250_console_write(struct uart_8250_port *up, const char *s,
+ 			      unsigned int count);
++void serial8250_console_write_atomic(struct uart_8250_port *up, const char *s,
++				     unsigned int count);
+ int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
+ int serial8250_console_exit(struct uart_port *port);
  
- /**
-- * typedef seqcount_LOCKNAME_t - sequence counter with LOCKTYPR associated
-+ * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated
-  * @seqcount:	The real sequence counter
-- * @lock:	Pointer to the associated spinlock
-+ * @lock:	Pointer to the associated lock
-  *
-- * A plain sequence counter with external writer synchronization by a
-- * spinlock. The spinlock is associated to the sequence count in the
-+ * A plain sequence counter with external writer synchronization by
-+ * LOCKNAME @lock. The lock is associated to the sequence counter in the
-  * static initializer or init function. This enables lockdep to validate
-  * that the write side critical section is properly serialized.
-+ *
-+ * LOCKNAME:	raw_spinlock, spinlock, rwlock, mutex, or ww_mutex.
-  */
+diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
+index d82b6f396588..12b2e41d8f47 100644
+--- a/include/linux/shmem_fs.h
++++ b/include/linux/shmem_fs.h
+@@ -31,7 +31,7 @@ struct shmem_sb_info {
+ 	struct percpu_counter used_blocks;  /* How many are allocated */
+ 	unsigned long max_inodes;   /* How many inodes are allowed */
+ 	unsigned long free_inodes;  /* How many are left for allocation */
+-	spinlock_t stat_lock;	    /* Serialize shmem_sb_info changes */
++	raw_spinlock_t stat_lock;   /* Serialize shmem_sb_info changes */
+ 	umode_t mode;		    /* Mount mode for root directory */
+ 	unsigned char huge;	    /* Whether to try for hugepages */
+ 	kuid_t uid;		    /* Mount uid for root directory */
+diff --git a/include/linux/signal.h b/include/linux/signal.h
+index 205526c4003a..d47a86790edc 100644
+--- a/include/linux/signal.h
++++ b/include/linux/signal.h
+@@ -265,6 +265,7 @@ static inline void init_sigpending(struct sigpending *sig)
+ }
  
--/**
-+/*
-  * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t
-  * @s:		Pointer to the seqcount_LOCKNAME_t instance
-- * @lock:	Pointer to the associated LOCKTYPE
-+ * @lock:	Pointer to the associated lock
-  */
+ extern void flush_sigqueue(struct sigpending *queue);
++extern void flush_task_sigqueue(struct task_struct *tsk);
  
- /*
-- * SEQCOUNT_LOCKTYPE() - Instantiate seqcount_LOCKNAME_t and helpers
-- * @locktype:		actual typename
-- * @lockname:		name
-+ * SEQCOUNT_LOCKNAME()	- Instantiate seqcount_LOCKNAME_t and helpers
-+ * seqprop_LOCKNAME_*()	- Property accessors for seqcount_LOCKNAME_t
-+ *
-+ * @lockname:		"LOCKNAME" part of seqcount_LOCKNAME_t
-+ * @locktype:		LOCKNAME canonical C data type
-  * @preemptible:	preemptibility of above locktype
-  * @lockmember:		argument for lockdep_assert_held()
-+ * @lockbase:		associated lock release function (prefix only)
-+ * @lock_acquire:	associated lock acquisition function (full call)
-  */
--#define SEQCOUNT_LOCKTYPE(locktype, lockname, preemptible, lockmember)	\
-+#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \
- typedef struct seqcount_##lockname {					\
- 	seqcount_t		seqcount;				\
- 	__SEQ_LOCK(locktype	*lock);					\
-@@ -175,19 +198,45 @@ seqcount_##lockname##_init(seqcount_##lockname##_t *s, locktype *lock)	\
- }									\
- 									\
- static __always_inline seqcount_t *					\
--__seqcount_##lockname##_ptr(seqcount_##lockname##_t *s)			\
-+__seqprop_##lockname##_ptr(seqcount_##lockname##_t *s)			\
- {									\
- 	return &s->seqcount;						\
- }									\
- 									\
-+static __always_inline unsigned						\
-+__seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s)	\
-+{									\
-+	unsigned seq = READ_ONCE(s->seqcount.sequence);			\
-+									\
-+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))				\
-+		return seq;						\
-+									\
-+	if (preemptible && unlikely(seq & 1)) {				\
-+		__SEQ_LOCK(lock_acquire);				\
-+		__SEQ_LOCK(lockbase##_unlock(s->lock));			\
-+									\
-+		/*							\
-+		 * Re-read the sequence counter since the (possibly	\
-+		 * preempted) writer made progress.			\
-+		 */							\
-+		seq = READ_ONCE(s->seqcount.sequence);			\
-+	}								\
-+									\
-+	return seq;							\
-+}									\
-+									\
- static __always_inline bool						\
--__seqcount_##lockname##_preemptible(seqcount_##lockname##_t *s)		\
-+__seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s)	\
- {									\
--	return preemptible;						\
-+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))				\
-+		return preemptible;					\
-+									\
-+	/* PREEMPT_RT relies on the above LOCK+UNLOCK */		\
-+	return false;							\
- }									\
- 									\
- static __always_inline void						\
--__seqcount_##lockname##_assert(seqcount_##lockname##_t *s)		\
-+__seqprop_##lockname##_assert(const seqcount_##lockname##_t *s)		\
- {									\
- 	__SEQ_LOCK(lockdep_assert_held(lockmember));			\
- }
-@@ -196,50 +245,56 @@ __seqcount_##lockname##_assert(seqcount_##lockname##_t *s)		\
-  * __seqprop() for seqcount_t
-  */
+ /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
+ static inline int valid_signal(unsigned long sig)
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index 5f60c9e907c9..7800b6c2115e 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -295,6 +295,7 @@ struct sk_buff_head {
  
--static inline seqcount_t *__seqcount_ptr(seqcount_t *s)
-+static inline seqcount_t *__seqprop_ptr(seqcount_t *s)
- {
- 	return s;
+ 	__u32		qlen;
+ 	spinlock_t	lock;
++	raw_spinlock_t	raw_lock;
+ };
+ 
+ struct sk_buff;
+@@ -1890,6 +1891,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list)
+ 	__skb_queue_head_init(list);
  }
  
--static inline bool __seqcount_preemptible(seqcount_t *s)
-+static inline unsigned __seqprop_sequence(const seqcount_t *s)
++static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
 +{
-+	return READ_ONCE(s->sequence);
++	raw_spin_lock_init(&list->raw_lock);
++	__skb_queue_head_init(list);
 +}
 +
-+static inline bool __seqprop_preemptible(const seqcount_t *s)
- {
- 	return false;
- }
- 
--static inline void __seqcount_assert(seqcount_t *s)
-+static inline void __seqprop_assert(const seqcount_t *s)
+ static inline void skb_queue_head_init_class(struct sk_buff_head *list,
+ 		struct lock_class_key *class)
  {
- 	lockdep_assert_preemption_disabled();
- }
- 
--SEQCOUNT_LOCKTYPE(raw_spinlock_t,	raw_spinlock,	false,	s->lock)
--SEQCOUNT_LOCKTYPE(spinlock_t,		spinlock,	false,	s->lock)
--SEQCOUNT_LOCKTYPE(rwlock_t,		rwlock,		false,	s->lock)
--SEQCOUNT_LOCKTYPE(struct mutex,		mutex,		true,	s->lock)
--SEQCOUNT_LOCKTYPE(struct ww_mutex,	ww_mutex,	true,	&s->lock->base)
-+#define __SEQ_RT	IS_ENABLED(CONFIG_PREEMPT_RT)
+diff --git a/include/linux/smp.h b/include/linux/smp.h
+index 70c6f6284dcf..4c602ca3bc13 100644
+--- a/include/linux/smp.h
++++ b/include/linux/smp.h
+@@ -238,6 +238,9 @@ static inline int get_boot_cpu_id(void)
+ #define get_cpu()		({ preempt_disable(); __smp_processor_id(); })
+ #define put_cpu()		preempt_enable()
  
--/**
-+SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t,  false,    s->lock,        raw_spin, raw_spin_lock(s->lock))
-+SEQCOUNT_LOCKNAME(spinlock,     spinlock_t,      __SEQ_RT, s->lock,        spin,     spin_lock(s->lock))
-+SEQCOUNT_LOCKNAME(rwlock,       rwlock_t,        __SEQ_RT, s->lock,        read,     read_lock(s->lock))
-+SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     s->lock,        mutex,    mutex_lock(s->lock))
-+SEQCOUNT_LOCKNAME(ww_mutex,     struct ww_mutex, true,     &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL))
++#define get_cpu_light()		({ migrate_disable(); __smp_processor_id(); })
++#define put_cpu_light()		migrate_enable()
 +
-+/*
-  * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
-  * @name:	Name of the seqcount_LOCKNAME_t instance
-- * @lock:	Pointer to the associated LOCKTYPE
-+ * @lock:	Pointer to the associated LOCKNAME
-  */
- 
--#define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) {			\
-+#define SEQCOUNT_LOCKNAME_ZERO(seq_name, assoc_lock) {			\
- 	.seqcount		= SEQCNT_ZERO(seq_name.seqcount),	\
- 	__SEQ_LOCK(.lock	= (assoc_lock))				\
- }
- 
--#define SEQCNT_SPINLOCK_ZERO(name, lock)	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
--#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock)	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
--#define SEQCNT_RWLOCK_ZERO(name, lock)		SEQCOUNT_LOCKTYPE_ZERO(name, lock)
--#define SEQCNT_MUTEX_ZERO(name, lock)		SEQCOUNT_LOCKTYPE_ZERO(name, lock)
--#define SEQCNT_WW_MUTEX_ZERO(name, lock) 	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
--
-+#define SEQCNT_SPINLOCK_ZERO(name, lock)	SEQCOUNT_LOCKNAME_ZERO(name, lock)
-+#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock)	SEQCOUNT_LOCKNAME_ZERO(name, lock)
-+#define SEQCNT_RWLOCK_ZERO(name, lock)		SEQCOUNT_LOCKNAME_ZERO(name, lock)
-+#define SEQCNT_MUTEX_ZERO(name, lock)		SEQCOUNT_LOCKNAME_ZERO(name, lock)
-+#define SEQCNT_WW_MUTEX_ZERO(name, lock) 	SEQCOUNT_LOCKNAME_ZERO(name, lock)
- 
- #define __seqprop_case(s, lockname, prop)				\
--	seqcount_##lockname##_t: __seqcount_##lockname##_##prop((void *)(s))
-+	seqcount_##lockname##_t: __seqprop_##lockname##_##prop((void *)(s))
- 
- #define __seqprop(s, prop) _Generic(*(s),				\
--	seqcount_t:		__seqcount_##prop((void *)(s)),		\
-+	seqcount_t:		__seqprop_##prop((void *)(s)),		\
- 	__seqprop_case((s),	raw_spinlock,	prop),			\
- 	__seqprop_case((s),	spinlock,	prop),			\
- 	__seqprop_case((s),	rwlock,		prop),			\
-@@ -247,12 +302,13 @@ SEQCOUNT_LOCKTYPE(struct ww_mutex,	ww_mutex,	true,	&s->lock->base)
- 	__seqprop_case((s),	ww_mutex,	prop))
- 
- #define __seqcount_ptr(s)		__seqprop(s, ptr)
-+#define __seqcount_sequence(s)		__seqprop(s, sequence)
- #define __seqcount_lock_preemptible(s)	__seqprop(s, preemptible)
- #define __seqcount_assert_lock_held(s)	__seqprop(s, assert)
+ /*
+  * Callback to arch code if there's nosmp or maxcpus=0 on the
+  * boot command line:
+diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
+index 79897841a2cc..c3c70291b46c 100644
+--- a/include/linux/spinlock.h
++++ b/include/linux/spinlock.h
+@@ -309,7 +309,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
+ })
  
- /**
-  * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
-- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
-+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
-  *
-  * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
-  * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
-@@ -265,56 +321,45 @@ SEQCOUNT_LOCKTYPE(struct ww_mutex,	ww_mutex,	true,	&s->lock->base)
-  * Return: count to be passed to read_seqcount_retry()
-  */
- #define __read_seqcount_begin(s)					\
--	__read_seqcount_t_begin(__seqcount_ptr(s))
--
--static inline unsigned __read_seqcount_t_begin(const seqcount_t *s)
--{
--	unsigned ret;
--
--repeat:
--	ret = READ_ONCE(s->sequence);
--	if (unlikely(ret & 1)) {
--		cpu_relax();
--		goto repeat;
--	}
--	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
--	return ret;
--}
-+({									\
-+	unsigned seq;							\
-+									\
-+	while ((seq = __seqcount_sequence(s)) & 1)			\
-+		cpu_relax();						\
-+									\
-+	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);			\
-+	seq;								\
-+})
+ /* Include rwlock functions */
+-#include <linux/rwlock.h>
++#ifdef CONFIG_PREEMPT_RT
++# include <linux/rwlock_rt.h>
++#else
++# include <linux/rwlock.h>
++#endif
  
- /**
-  * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
-- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
-+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
-  *
-  * Return: count to be passed to read_seqcount_retry()
-  */
- #define raw_read_seqcount_begin(s)					\
--	raw_read_seqcount_t_begin(__seqcount_ptr(s))
--
--static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s)
--{
--	unsigned ret = __read_seqcount_t_begin(s);
--	smp_rmb();
--	return ret;
--}
-+({									\
-+	unsigned seq = __read_seqcount_begin(s);			\
-+									\
-+	smp_rmb();							\
-+	seq;								\
-+})
+ /*
+  * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
+@@ -320,6 +324,10 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
+ # include <linux/spinlock_api_up.h>
+ #endif
  
- /**
-  * read_seqcount_begin() - begin a seqcount_t read critical section
-- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
-+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
-  *
-  * Return: count to be passed to read_seqcount_retry()
++#ifdef CONFIG_PREEMPT_RT
++# include <linux/spinlock_rt.h>
++#else /* PREEMPT_RT */
++
+ /*
+  * Map the spin_lock functions to the raw variants for PREEMPT_RT=n
   */
- #define read_seqcount_begin(s)						\
--	read_seqcount_t_begin(__seqcount_ptr(s))
--
--static inline unsigned read_seqcount_t_begin(const seqcount_t *s)
--{
--	seqcount_lockdep_reader_access(s);
--	return raw_read_seqcount_t_begin(s);
--}
-+({									\
-+	seqcount_lockdep_reader_access(__seqcount_ptr(s));		\
-+	raw_read_seqcount_begin(s);					\
-+})
+@@ -454,6 +462,8 @@ static __always_inline int spin_is_contended(spinlock_t *lock)
  
- /**
-  * raw_read_seqcount() - read the raw seqcount_t counter value
-- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
-+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
-  *
-  * raw_read_seqcount opens a read critical section of the given
-  * seqcount_t, without any lockdep checking, and without checking or
-@@ -324,20 +369,18 @@ static inline unsigned read_seqcount_t_begin(const seqcount_t *s)
-  * Return: count to be passed to read_seqcount_retry()
-  */
- #define raw_read_seqcount(s)						\
--	raw_read_seqcount_t(__seqcount_ptr(s))
--
--static inline unsigned raw_read_seqcount_t(const seqcount_t *s)
--{
--	unsigned ret = READ_ONCE(s->sequence);
--	smp_rmb();
--	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
--	return ret;
--}
-+({									\
-+	unsigned seq = __seqcount_sequence(s);				\
-+									\
-+	smp_rmb();							\
-+	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);			\
-+	seq;								\
-+})
+ #define assert_spin_locked(lock)	assert_raw_spin_locked(&(lock)->rlock)
  
- /**
-  * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
-  *                        lockdep and w/o counter stabilization
-- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
-+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
-  *
-  * raw_seqcount_begin opens a read critical section of the given
-  * seqcount_t. Unlike read_seqcount_begin(), this function will not wait
-@@ -352,20 +395,17 @@ static inline unsigned raw_read_seqcount_t(const seqcount_t *s)
-  * Return: count to be passed to read_seqcount_retry()
-  */
- #define raw_seqcount_begin(s)						\
--	raw_seqcount_t_begin(__seqcount_ptr(s))
--
--static inline unsigned raw_seqcount_t_begin(const seqcount_t *s)
--{
--	/*
--	 * If the counter is odd, let read_seqcount_retry() fail
--	 * by decrementing the counter.
--	 */
--	return raw_read_seqcount_t(s) & ~1;
--}
-+({									\
-+	/*								\
-+	 * If the counter is odd, let read_seqcount_retry() fail	\
-+	 * by decrementing the counter.					\
-+	 */								\
-+	raw_read_seqcount(s) & ~1;					\
-+})
++#endif /* !PREEMPT_RT */
++
+ /*
+  * Pull the atomic_t declaration:
+  * (asm-mips/atomic.h needs above definitions)
+diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
+index 19a9be9d97ee..da38149f2843 100644
+--- a/include/linux/spinlock_api_smp.h
++++ b/include/linux/spinlock_api_smp.h
+@@ -187,6 +187,8 @@ static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock)
+ 	return 0;
+ }
  
- /**
-  * __read_seqcount_retry() - end a seqcount_t read section w/o barrier
-- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
-+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
-  * @start: count, from read_seqcount_begin()
-  *
-  * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
-@@ -389,7 +429,7 @@ static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)
- 
- /**
-  * read_seqcount_retry() - end a seqcount_t read critical section
-- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
-+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
-  * @start: count, from read_seqcount_begin()
-  *
-  * read_seqcount_retry closes the read critical section of given
-@@ -409,7 +449,7 @@ static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
- 
- /**
-  * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
-- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
-+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
-  */
- #define raw_write_seqcount_begin(s)					\
- do {									\
-@@ -428,7 +468,7 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s)
- 
- /**
-  * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
-- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
-+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
-  */
- #define raw_write_seqcount_end(s)					\
- do {									\
-@@ -448,7 +488,7 @@ static inline void raw_write_seqcount_t_end(seqcount_t *s)
- /**
-  * write_seqcount_begin_nested() - start a seqcount_t write section with
-  *                                 custom lockdep nesting level
-- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
-+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
-  * @subclass: lockdep nesting level
-  *
-  * See Documentation/locking/lockdep-design.rst
-@@ -471,7 +511,7 @@ static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
- 
- /**
-  * write_seqcount_begin() - start a seqcount_t write side critical section
-- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
-+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
-  *
-  * write_seqcount_begin opens a write side critical section of the given
-  * seqcount_t.
-@@ -497,7 +537,7 @@ static inline void write_seqcount_t_begin(seqcount_t *s)
- 
- /**
-  * write_seqcount_end() - end a seqcount_t write side critical section
-- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
-+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
-  *
-  * The write section must've been opened with write_seqcount_begin().
-  */
-@@ -517,7 +557,7 @@ static inline void write_seqcount_t_end(seqcount_t *s)
- 
- /**
-  * raw_write_seqcount_barrier() - do a seqcount_t write barrier
-- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
-+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
-  *
-  * This can be used to provide an ordering guarantee instead of the usual
-  * consistency guarantee. It is one wmb cheaper, because it can collapse
-@@ -571,7 +611,7 @@ static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
- /**
-  * write_seqcount_invalidate() - invalidate in-progress seqcount_t read
-  *                               side operations
-- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
-+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
-  *
-  * After write_seqcount_invalidate, no seqcount_t read side operations
-  * will complete successfully and see data older than this.
-@@ -587,34 +627,73 @@ static inline void write_seqcount_t_invalidate(seqcount_t *s)
- 	kcsan_nestable_atomic_end();
- }
- 
--/**
-- * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy
-- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
-+/*
-+ * Latch sequence counters (seqcount_latch_t)
-  *
-- * Use seqcount_t latching to switch between two storage places protected
-- * by a sequence counter. Doing so allows having interruptible, preemptible,
-- * seqcount_t write side critical sections.
-+ * A sequence counter variant where the counter even/odd value is used to
-+ * switch between two copies of protected data. This allows the read path,
-+ * typically NMIs, to safely interrupt the write side critical section.
-+ *
-+ * As the write sections are fully preemptible, no special handling for
-+ * PREEMPT_RT is needed.
-+ */
-+typedef struct {
-+	seqcount_t seqcount;
-+} seqcount_latch_t;
-+
-+/**
-+ * SEQCNT_LATCH_ZERO() - static initializer for seqcount_latch_t
-+ * @seq_name: Name of the seqcount_latch_t instance
-+ */
-+#define SEQCNT_LATCH_ZERO(seq_name) {					\
-+	.seqcount		= SEQCNT_ZERO(seq_name.seqcount),	\
-+}
-+
-+/**
-+ * seqcount_latch_init() - runtime initializer for seqcount_latch_t
-+ * @s: Pointer to the seqcount_latch_t instance
-+ */
-+static inline void seqcount_latch_init(seqcount_latch_t *s)
-+{
-+	seqcount_init(&s->seqcount);
-+}
-+
-+/**
-+ * raw_read_seqcount_latch() - pick even/odd latch data copy
-+ * @s: Pointer to seqcount_latch_t
-  *
-- * Check raw_write_seqcount_latch() for more details and a full reader and
-- * writer usage example.
-+ * See raw_write_seqcount_latch() for details and a full reader/writer
-+ * usage example.
-  *
-  * Return: sequence counter raw value. Use the lowest bit as an index for
-- * picking which data copy to read. The full counter value must then be
-- * checked with read_seqcount_retry().
-+ * picking which data copy to read. The full counter must then be checked
-+ * with read_seqcount_latch_retry().
-  */
--#define raw_read_seqcount_latch(s)					\
--	raw_read_seqcount_t_latch(__seqcount_ptr(s))
-+static inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s)
-+{
-+	/*
-+	 * Pairs with the first smp_wmb() in raw_write_seqcount_latch().
-+	 * Due to the dependent load, a full smp_rmb() is not needed.
-+	 */
-+	return READ_ONCE(s->seqcount.sequence);
-+}
- 
--static inline int raw_read_seqcount_t_latch(seqcount_t *s)
-+/**
-+ * read_seqcount_latch_retry() - end a seqcount_latch_t read section
-+ * @s:		Pointer to seqcount_latch_t
-+ * @start:	count, from raw_read_seqcount_latch()
-+ *
-+ * Return: true if a read section retry is required, else false
-+ */
-+static inline int
-+read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
- {
--	/* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
--	int seq = READ_ONCE(s->sequence); /* ^^^ */
--	return seq;
-+	return read_seqcount_retry(&s->seqcount, start);
- }
- 
- /**
-- * raw_write_seqcount_latch() - redirect readers to even/odd copy
-- * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
-+ * raw_write_seqcount_latch() - redirect latch readers to even/odd copy
-+ * @s: Pointer to seqcount_latch_t
-  *
-  * The latch technique is a multiversion concurrency control method that allows
-  * queries during non-atomic modifications. If you can guarantee queries never
-@@ -633,7 +712,7 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s)
-  * The basic form is a data structure like::
-  *
-  *	struct latch_struct {
-- *		seqcount_t		seq;
-+ *		seqcount_latch_t	seq;
-  *		struct data_struct	data[2];
-  *	};
-  *
-@@ -643,13 +722,13 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s)
-  *	void latch_modify(struct latch_struct *latch, ...)
-  *	{
-  *		smp_wmb();	// Ensure that the last data[1] update is visible
-- *		latch->seq++;
-+ *		latch->seq.sequence++;
-  *		smp_wmb();	// Ensure that the seqcount update is visible
-  *
-  *		modify(latch->data[0], ...);
-  *
-  *		smp_wmb();	// Ensure that the data[0] update is visible
-- *		latch->seq++;
-+ *		latch->seq.sequence++;
-  *		smp_wmb();	// Ensure that the seqcount update is visible
-  *
-  *		modify(latch->data[1], ...);
-@@ -668,8 +747,8 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s)
-  *			idx = seq & 0x01;
-  *			entry = data_query(latch->data[idx], ...);
-  *
-- *		// read_seqcount_retry() includes needed smp_rmb()
-- *		} while (read_seqcount_retry(&latch->seq, seq));
-+ *		// This includes needed smp_rmb()
-+ *		} while (read_seqcount_latch_retry(&latch->seq, seq));
-  *
-  *		return entry;
-  *	}
-@@ -688,19 +767,16 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s)
-  *	to miss an entire modification sequence, once it resumes it might
-  *	observe the new entry.
-  *
-- * NOTE:
-+ * NOTE2:
-  *
-  *	When data is a dynamic data structure; one should use regular RCU
-  *	patterns to manage the lifetimes of the objects within.
-  */
--#define raw_write_seqcount_latch(s)					\
--	raw_write_seqcount_t_latch(__seqcount_ptr(s))
--
--static inline void raw_write_seqcount_t_latch(seqcount_t *s)
-+static inline void raw_write_seqcount_latch(seqcount_latch_t *s)
- {
--       smp_wmb();      /* prior stores before incrementing "sequence" */
--       s->sequence++;
--       smp_wmb();      /* increment "sequence" before following stores */
-+	smp_wmb();	/* prior stores before incrementing "sequence" */
-+	s->seqcount.sequence++;
-+	smp_wmb();      /* increment "sequence" before following stores */
- }
- 
- /*
-@@ -714,13 +790,17 @@ static inline void raw_write_seqcount_t_latch(seqcount_t *s)
-  *    - Documentation/locking/seqlock.rst
-  */
- typedef struct {
--	struct seqcount seqcount;
-+	/*
-+	 * Make sure that readers don't starve writers on PREEMPT_RT: use
-+	 * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK().
-+	 */
-+	seqcount_spinlock_t seqcount;
- 	spinlock_t lock;
- } seqlock_t;
- 
- #define __SEQLOCK_UNLOCKED(lockname)					\
- 	{								\
--		.seqcount = SEQCNT_ZERO(lockname),			\
-+		.seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \
- 		.lock =	__SPIN_LOCK_UNLOCKED(lockname)			\
- 	}
- 
-@@ -730,8 +810,8 @@ typedef struct {
-  */
- #define seqlock_init(sl)						\
- 	do {								\
--		seqcount_init(&(sl)->seqcount);				\
- 		spin_lock_init(&(sl)->lock);				\
-+		seqcount_spinlock_init(&(sl)->seqcount, &(sl)->lock);	\
- 	} while (0)
- 
- /**
-@@ -778,6 +858,12 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
- 	return read_seqcount_retry(&sl->seqcount, start);
- }
- 
-+/*
-+ * For all seqlock_t write side functions, use write_seqcount_*t*_begin()
-+ * instead of the generic write_seqcount_begin(). This way, no redundant
-+ * lockdep_assert_held() checks are added.
-+ */
-+
- /**
-  * write_seqlock() - start a seqlock_t write side critical section
-  * @sl: Pointer to seqlock_t
-@@ -794,7 +880,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
- static inline void write_seqlock(seqlock_t *sl)
- {
- 	spin_lock(&sl->lock);
--	write_seqcount_t_begin(&sl->seqcount);
-+	write_seqcount_t_begin(&sl->seqcount.seqcount);
- }
- 
- /**
-@@ -806,7 +892,7 @@ static inline void write_seqlock(seqlock_t *sl)
-  */
- static inline void write_sequnlock(seqlock_t *sl)
- {
--	write_seqcount_t_end(&sl->seqcount);
-+	write_seqcount_t_end(&sl->seqcount.seqcount);
- 	spin_unlock(&sl->lock);
- }
- 
-@@ -820,7 +906,7 @@ static inline void write_sequnlock(seqlock_t *sl)
- static inline void write_seqlock_bh(seqlock_t *sl)
- {
- 	spin_lock_bh(&sl->lock);
--	write_seqcount_t_begin(&sl->seqcount);
-+	write_seqcount_t_begin(&sl->seqcount.seqcount);
- }
- 
- /**
-@@ -833,7 +919,7 @@ static inline void write_seqlock_bh(seqlock_t *sl)
-  */
- static inline void write_sequnlock_bh(seqlock_t *sl)
- {
--	write_seqcount_t_end(&sl->seqcount);
-+	write_seqcount_t_end(&sl->seqcount.seqcount);
- 	spin_unlock_bh(&sl->lock);
- }
- 
-@@ -847,7 +933,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl)
- static inline void write_seqlock_irq(seqlock_t *sl)
- {
- 	spin_lock_irq(&sl->lock);
--	write_seqcount_t_begin(&sl->seqcount);
-+	write_seqcount_t_begin(&sl->seqcount.seqcount);
- }
- 
- /**
-@@ -859,7 +945,7 @@ static inline void write_seqlock_irq(seqlock_t *sl)
-  */
- static inline void write_sequnlock_irq(seqlock_t *sl)
- {
--	write_seqcount_t_end(&sl->seqcount);
-+	write_seqcount_t_end(&sl->seqcount.seqcount);
- 	spin_unlock_irq(&sl->lock);
- }
- 
-@@ -868,7 +954,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
- 	unsigned long flags;
- 
- 	spin_lock_irqsave(&sl->lock, flags);
--	write_seqcount_t_begin(&sl->seqcount);
-+	write_seqcount_t_begin(&sl->seqcount.seqcount);
- 	return flags;
- }
- 
-@@ -897,7 +983,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
- static inline void
- write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
- {
--	write_seqcount_t_end(&sl->seqcount);
-+	write_seqcount_t_end(&sl->seqcount.seqcount);
- 	spin_unlock_irqrestore(&sl->lock, flags);
- }
- 
-diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
-index 2b70f736b091..68d756373b53 100644
---- a/include/linux/serial_8250.h
-+++ b/include/linux/serial_8250.h
-@@ -7,6 +7,7 @@
- #ifndef _LINUX_SERIAL_8250_H
- #define _LINUX_SERIAL_8250_H
- 
-+#include <linux/atomic.h>
- #include <linux/serial_core.h>
- #include <linux/serial_reg.h>
- #include <linux/platform_device.h>
-@@ -125,6 +126,8 @@ struct uart_8250_port {
- #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
- 	unsigned char		msr_saved_flags;
- 
-+	atomic_t		console_printing;
-+
- 	struct uart_8250_dma	*dma;
- 	const struct uart_8250_ops *ops;
- 
-@@ -180,6 +183,8 @@ void serial8250_init_port(struct uart_8250_port *up);
- void serial8250_set_defaults(struct uart_8250_port *up);
- void serial8250_console_write(struct uart_8250_port *up, const char *s,
- 			      unsigned int count);
-+void serial8250_console_write_atomic(struct uart_8250_port *up, const char *s,
-+				     unsigned int count);
- int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
- int serial8250_console_exit(struct uart_port *port);
- 
-diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
-index a5a5d1d4d7b1..0470d1582b09 100644
---- a/include/linux/shmem_fs.h
-+++ b/include/linux/shmem_fs.h
-@@ -31,7 +31,7 @@ struct shmem_sb_info {
- 	struct percpu_counter used_blocks;  /* How many are allocated */
- 	unsigned long max_inodes;   /* How many inodes are allowed */
- 	unsigned long free_inodes;  /* How many are left for allocation */
--	spinlock_t stat_lock;	    /* Serialize shmem_sb_info changes */
-+	raw_spinlock_t stat_lock;   /* Serialize shmem_sb_info changes */
- 	umode_t mode;		    /* Mount mode for root directory */
- 	unsigned char huge;	    /* Whether to try for hugepages */
- 	kuid_t uid;		    /* Mount uid for root directory */
-diff --git a/include/linux/signal.h b/include/linux/signal.h
-index 7bbc0e9cf084..3030d984a144 100644
---- a/include/linux/signal.h
-+++ b/include/linux/signal.h
-@@ -263,6 +263,7 @@ static inline void init_sigpending(struct sigpending *sig)
- }
- 
- extern void flush_sigqueue(struct sigpending *queue);
-+extern void flush_task_sigqueue(struct task_struct *tsk);
- 
- /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
- static inline int valid_signal(unsigned long sig)
-diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
-index 04a18e01b362..187ea2e1854e 100644
---- a/include/linux/skbuff.h
-+++ b/include/linux/skbuff.h
-@@ -295,6 +295,7 @@ struct sk_buff_head {
- 
- 	__u32		qlen;
- 	spinlock_t	lock;
-+	raw_spinlock_t	raw_lock;
- };
- 
- struct sk_buff;
-@@ -1884,6 +1885,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list)
- 	__skb_queue_head_init(list);
- }
- 
-+static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
-+{
-+	raw_spin_lock_init(&list->raw_lock);
-+	__skb_queue_head_init(list);
-+}
-+
- static inline void skb_queue_head_init_class(struct sk_buff_head *list,
- 		struct lock_class_key *class)
- {
-diff --git a/include/linux/smp.h b/include/linux/smp.h
-index 80d557ef8a11..47d666fa4fba 100644
---- a/include/linux/smp.h
-+++ b/include/linux/smp.h
-@@ -236,6 +236,9 @@ static inline int get_boot_cpu_id(void)
- #define get_cpu()		({ preempt_disable(); __smp_processor_id(); })
- #define put_cpu()		preempt_enable()
- 
-+#define get_cpu_light()		({ migrate_disable(); __smp_processor_id(); })
-+#define put_cpu_light()		migrate_enable()
-+
- /*
-  * Callback to arch code if there's nosmp or maxcpus=0 on the
-  * boot command line:
-diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
-index f2f12d746dbd..38ff58065dfb 100644
---- a/include/linux/spinlock.h
-+++ b/include/linux/spinlock.h
-@@ -309,7 +309,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
- })
- 
- /* Include rwlock functions */
--#include <linux/rwlock.h>
-+#ifdef CONFIG_PREEMPT_RT
-+# include <linux/rwlock_rt.h>
-+#else
-+# include <linux/rwlock.h>
-+#endif
- 
- /*
-  * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
-@@ -320,6 +324,10 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
- # include <linux/spinlock_api_up.h>
- #endif
- 
-+#ifdef CONFIG_PREEMPT_RT
-+# include <linux/spinlock_rt.h>
-+#else /* PREEMPT_RT */
-+
- /*
-  * Map the spin_lock functions to the raw variants for PREEMPT_RT=n
-  */
-@@ -454,6 +462,8 @@ static __always_inline int spin_is_contended(spinlock_t *lock)
- 
- #define assert_spin_locked(lock)	assert_raw_spin_locked(&(lock)->rlock)
- 
-+#endif /* !PREEMPT_RT */
-+
- /*
-  * Pull the atomic_t declaration:
-  * (asm-mips/atomic.h needs above definitions)
-diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
-index 19a9be9d97ee..da38149f2843 100644
---- a/include/linux/spinlock_api_smp.h
-+++ b/include/linux/spinlock_api_smp.h
-@@ -187,6 +187,8 @@ static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock)
- 	return 0;
- }
- 
--#include <linux/rwlock_api_smp.h>
-+#ifndef CONFIG_PREEMPT_RT
-+# include <linux/rwlock_api_smp.h>
-+#endif
+-#include <linux/rwlock_api_smp.h>
++#ifndef CONFIG_PREEMPT_RT
++# include <linux/rwlock_api_smp.h>
++#endif
  
  #endif /* __LINUX_SPINLOCK_API_SMP_H */
 diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h
@@ -7987,64 +7144,32 @@ index 000000000000..446da786e5d5
 +
 +#endif
 diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h
-index c09b6407ae1b..b0243ba07fb7 100644
+index c09b6407ae1b..d9b371fa13e0 100644
 --- a/include/linux/spinlock_types_up.h
 +++ b/include/linux/spinlock_types_up.h
-@@ -1,10 +1,6 @@
+@@ -1,7 +1,7 @@
  #ifndef __LINUX_SPINLOCK_TYPES_UP_H
  #define __LINUX_SPINLOCK_TYPES_UP_H
  
 -#ifndef __LINUX_SPINLOCK_TYPES_H
--# error "please don't include this file directly"
--#endif
--
- /*
-  * include/linux/spinlock_types_up.h - spinlock type definitions for UP
-  *
-diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
-index 76d8b09384a7..30577c3aecf8 100644
---- a/include/linux/stop_machine.h
-+++ b/include/linux/stop_machine.h
-@@ -24,6 +24,7 @@ typedef int (*cpu_stop_fn_t)(void *arg);
- struct cpu_stop_work {
- 	struct list_head	list;		/* cpu_stopper->works */
- 	cpu_stop_fn_t		fn;
-+	unsigned long		caller;
- 	void			*arg;
- 	struct cpu_stop_done	*done;
- };
-@@ -36,6 +37,8 @@ void stop_machine_park(int cpu);
- void stop_machine_unpark(int cpu);
- void stop_machine_yield(const struct cpumask *cpumask);
- 
-+extern void print_stop_info(const char *log_lvl, struct task_struct *task);
-+
- #else	/* CONFIG_SMP */
- 
- #include <linux/workqueue.h>
-@@ -80,6 +83,8 @@ static inline bool stop_one_cpu_nowait(unsigned int cpu,
- 	return false;
- }
- 
-+static inline void print_stop_info(const char *log_lvl, struct task_struct *task) { }
-+
- #endif	/* CONFIG_SMP */
++#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__LINUX_RT_MUTEX_H)
+ # error "please don't include this file directly"
+ #endif
  
- /*
 diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
-index e93e249a4e9b..c88b9cecc78a 100644
+index 9b2158c69275..8d1cac4052f2 100644
 --- a/include/linux/thread_info.h
 +++ b/include/linux/thread_info.h
-@@ -97,7 +97,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
- #define test_thread_flag(flag) \
- 	test_ti_thread_flag(current_thread_info(), flag)
+@@ -149,7 +149,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
+ 	clear_ti_thread_flag(task_thread_info(t), TIF_##fl)
+ #endif /* !CONFIG_GENERIC_ENTRY */
  
 -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
 +#ifdef CONFIG_PREEMPT_LAZY
 +#define tif_need_resched()	(test_thread_flag(TIF_NEED_RESCHED) || \
 +				 test_thread_flag(TIF_NEED_RESCHED_LAZY))
 +#define tif_need_resched_now()	(test_thread_flag(TIF_NEED_RESCHED))
-+#define tif_need_resched_lazy()	test_thread_flag(TIF_NEED_RESCHED_LAZY))
++#define tif_need_resched_lazy()	test_thread_flag(TIF_NEED_RESCHED_LAZY)
 +
 +#else
 +#define tif_need_resched()	test_thread_flag(TIF_NEED_RESCHED)
@@ -8055,7 +7180,7 @@ index e93e249a4e9b..c88b9cecc78a 100644
  #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
  static inline int arch_within_stack_frames(const void * const stack,
 diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
-index 5c6943354049..75e3acd59635 100644
+index d321fe5ad1a1..89c3f7162267 100644
 --- a/include/linux/trace_events.h
 +++ b/include/linux/trace_events.h
 @@ -67,6 +67,8 @@ struct trace_entry {
@@ -8067,6 +7192,100 @@ index 5c6943354049..75e3acd59635 100644
  };
  
  #define TRACE_EVENT_TYPE_MAX						\
+@@ -148,17 +150,78 @@ enum print_line_t {
+ 
+ enum print_line_t trace_handle_return(struct trace_seq *s);
+ 
+-void tracing_generic_entry_update(struct trace_entry *entry,
+-				  unsigned short type,
+-				  unsigned long flags,
+-				  int pc);
++static inline void tracing_generic_entry_update(struct trace_entry *entry,
++						unsigned short type,
++						unsigned int trace_ctx)
++{
++	entry->preempt_count		= trace_ctx & 0xff;
++	entry->migrate_disable		= (trace_ctx >> 8) & 0xff;
++	entry->preempt_lazy_count	= (trace_ctx >> 16) & 0xff;
++	entry->pid			= current->pid;
++	entry->type			= type;
++	entry->flags			= trace_ctx >> 24;
++}
++
++unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status);
++
++enum trace_flag_type {
++	TRACE_FLAG_IRQS_OFF		= 0x01,
++	TRACE_FLAG_IRQS_NOSUPPORT	= 0x02,
++	TRACE_FLAG_NEED_RESCHED		= 0x04,
++	TRACE_FLAG_HARDIRQ		= 0x08,
++	TRACE_FLAG_SOFTIRQ		= 0x10,
++	TRACE_FLAG_PREEMPT_RESCHED	= 0x20,
++	TRACE_FLAG_NMI			= 0x40,
++	TRACE_FLAG_NEED_RESCHED_LAZY	= 0x80,
++};
++
++#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
++static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags)
++{
++	unsigned int irq_status = irqs_disabled_flags(irqflags) ?
++		TRACE_FLAG_IRQS_OFF : 0;
++	return tracing_gen_ctx_irq_test(irq_status);
++}
++static inline unsigned int tracing_gen_ctx(void)
++{
++	unsigned long irqflags;
++
++	local_save_flags(irqflags);
++	return tracing_gen_ctx_flags(irqflags);
++}
++#else
++
++static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags)
++{
++	return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT);
++}
++static inline unsigned int tracing_gen_ctx(void)
++{
++	return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT);
++}
++#endif
++
++static inline unsigned int tracing_gen_ctx_dec(void)
++{
++	unsigned int trace_ctx;
++
++	trace_ctx = tracing_gen_ctx();
++	/*
++	 * Subtract one from the preeption counter if preemption is enabled,
++	 * see trace_event_buffer_reserve()for details.
++	 */
++	if (IS_ENABLED(CONFIG_PREEMPTION))
++		trace_ctx--;
++	return trace_ctx;
++}
++
+ struct trace_event_file;
+ 
+ struct ring_buffer_event *
+ trace_event_buffer_lock_reserve(struct trace_buffer **current_buffer,
+ 				struct trace_event_file *trace_file,
+ 				int type, unsigned long len,
+-				unsigned long flags, int pc);
++				unsigned int trace_ctx);
+ 
+ #define TRACE_RECORD_CMDLINE	BIT(0)
+ #define TRACE_RECORD_TGID	BIT(1)
+@@ -232,8 +295,7 @@ struct trace_event_buffer {
+ 	struct ring_buffer_event	*event;
+ 	struct trace_event_file		*trace_file;
+ 	void				*entry;
+-	unsigned long			flags;
+-	int				pc;
++	unsigned int			trace_ctx;
+ 	struct pt_regs			*regs;
+ };
+ 
 diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
 index c6abb79501b3..72bf38b97df8 100644
 --- a/include/linux/u64_stats_sync.h
@@ -8196,7 +7415,7 @@ index c6abb79501b3..72bf38b97df8 100644
  #endif
  	return __u64_stats_fetch_retry(syncp, start);
 diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
-index 7557c1070fd7..ed4b63184327 100644
+index 773135fc6e19..1a2cedbc72e6 100644
 --- a/include/linux/vmstat.h
 +++ b/include/linux/vmstat.h
 @@ -63,7 +63,9 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
@@ -8220,7 +7439,7 @@ index 7557c1070fd7..ed4b63184327 100644
  
  static inline void count_vm_events(enum vm_event_item item, long delta)
 diff --git a/include/linux/wait.h b/include/linux/wait.h
-index 27fb99cfeb02..93b42387b4c6 100644
+index fe10e8570a52..e9ce878a4906 100644
 --- a/include/linux/wait.h
 +++ b/include/linux/wait.h
 @@ -10,6 +10,7 @@
@@ -8250,6 +7469,33 @@ index 850424e5d030..8ef2feb0d8fe 100644
  struct ww_acquire_ctx {
  	struct task_struct *task;
  	unsigned long stamp;
+diff --git a/include/linux/zpool.h b/include/linux/zpool.h
+index 51bf43076165..e8997010612a 100644
+--- a/include/linux/zpool.h
++++ b/include/linux/zpool.h
+@@ -73,6 +73,7 @@ u64 zpool_get_total_size(struct zpool *pool);
+  * @malloc:	allocate mem from a pool.
+  * @free:	free mem from a pool.
+  * @shrink:	shrink the pool.
++ * @sleep_mapped: whether zpool driver can sleep during map.
+  * @map:	map a handle.
+  * @unmap:	unmap a handle.
+  * @total_size:	get total size of a pool.
+@@ -100,6 +101,7 @@ struct zpool_driver {
+ 	int (*shrink)(void *pool, unsigned int pages,
+ 				unsigned int *reclaimed);
+ 
++	bool sleep_mapped;
+ 	void *(*map)(void *pool, unsigned long handle,
+ 				enum zpool_mapmode mm);
+ 	void (*unmap)(void *pool, unsigned long handle);
+@@ -112,5 +114,6 @@ void zpool_register_driver(struct zpool_driver *driver);
+ int zpool_unregister_driver(struct zpool_driver *driver);
+ 
+ bool zpool_evictable(struct zpool *pool);
++bool zpool_can_sleep_mapped(struct zpool *pool);
+ 
+ #endif
 diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
 index 1424e02cef90..163f8415e5db 100644
 --- a/include/net/gen_stats.h
@@ -8319,7 +7565,7 @@ index 000000000000..67710bace741
 +
 +#endif
 diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
-index d60e7c39d60c..40be4443b6bd 100644
+index 5b490b5591df..b2aecc88c796 100644
 --- a/include/net/sch_generic.h
 +++ b/include/net/sch_generic.h
 @@ -10,6 +10,7 @@
@@ -8339,7 +7585,7 @@ index d60e7c39d60c..40be4443b6bd 100644
  	struct gnet_stats_queue	qstats;
  	unsigned long		state;
  	struct Qdisc            *next_sched;
-@@ -138,7 +139,11 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc)
+@@ -141,7 +142,11 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc)
  {
  	if (qdisc->flags & TCQ_F_NOLOCK)
  		return spin_is_locked(&qdisc->seqlock);
@@ -8351,7 +7597,7 @@ index d60e7c39d60c..40be4443b6bd 100644
  }
  
  static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
-@@ -162,17 +167,35 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
+@@ -165,17 +170,35 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
  	} else if (qdisc_is_running(qdisc)) {
  		return false;
  	}
@@ -8362,7 +7608,7 @@ index d60e7c39d60c..40be4443b6bd 100644
 +		 * Variant of write_seqcount_t_begin() telling lockdep that a
 +		 * trylock was attempted.
 +		 */
-+		raw_write_seqcount_t_begin(s);
++		do_raw_write_seqcount_begin(s);
 +		seqcount_acquire(&s->dep_map, 0, 1, _RET_IP_);
 +		return true;
 +	}
@@ -8387,7 +7633,7 @@ index d60e7c39d60c..40be4443b6bd 100644
  	if (qdisc->flags & TCQ_F_NOLOCK)
  		spin_unlock(&qdisc->seqlock);
  }
-@@ -547,7 +570,7 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
+@@ -538,7 +561,7 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
  	return qdisc_lock(root);
  }
  
@@ -8396,44 +7642,11 @@ index d60e7c39d60c..40be4443b6bd 100644
  {
  	struct Qdisc *root = qdisc_root_sleeping(qdisc);
  
-diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
-index fec25b9cfbaf..f9bf9d8f30b8 100644
---- a/include/trace/events/sched.h
-+++ b/include/trace/events/sched.h
-@@ -646,6 +646,18 @@ DECLARE_TRACE(sched_update_nr_running_tp,
- 	TP_PROTO(struct rq *rq, int change),
- 	TP_ARGS(rq, change));
- 
-+DECLARE_TRACE(sched_migrate_disable_tp,
-+	      TP_PROTO(struct task_struct *p),
-+	      TP_ARGS(p));
-+
-+DECLARE_TRACE(sched_migrate_enable_tp,
-+	      TP_PROTO(struct task_struct *p),
-+	      TP_ARGS(p));
-+
-+DECLARE_TRACE(sched_migrate_pull_tp,
-+	      TP_PROTO(struct task_struct *p),
-+	      TP_ARGS(p));
-+
- #endif /* _TRACE_SCHED_H */
- 
- /* This part must be outside protection */
 diff --git a/init/Kconfig b/init/Kconfig
-index d6a0b31b13dc..c48887283f88 100644
+index 29ad68325028..77d356fa8668 100644
 --- a/init/Kconfig
 +++ b/init/Kconfig
-@@ -682,7 +682,8 @@ config IKHEADERS
- 
- config LOG_BUF_SHIFT
- 	int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
--	range 12 25
-+	range 12 25 if !H8300
-+	range 12 19 if H8300
- 	default 17
- 	depends on PRINTK
- 	help
-@@ -964,6 +965,7 @@ config CFS_BANDWIDTH
+@@ -968,6 +968,7 @@ config CFS_BANDWIDTH
  config RT_GROUP_SCHED
  	bool "Group scheduling for SCHED_RR/FIFO"
  	depends on CGROUP_SCHED
@@ -8441,7 +7654,7 @@ index d6a0b31b13dc..c48887283f88 100644
  	default n
  	help
  	  This feature lets you explicitly allocate real CPU bandwidth
-@@ -1871,6 +1873,7 @@ choice
+@@ -1884,6 +1885,7 @@ choice
  
  config SLAB
  	bool "SLAB"
@@ -8449,7 +7662,7 @@ index d6a0b31b13dc..c48887283f88 100644
  	select HAVE_HARDENED_USERCOPY_ALLOCATOR
  	help
  	  The regular slab allocator that is established and known to work
-@@ -1891,6 +1894,7 @@ config SLUB
+@@ -1904,6 +1906,7 @@ config SLUB
  config SLOB
  	depends on EXPERT
  	bool "SLOB (Simple Allocator)"
@@ -8457,15 +7670,15 @@ index d6a0b31b13dc..c48887283f88 100644
  	help
  	   SLOB replaces the stock allocator with a drastically simpler
  	   allocator. SLOB is generally more space efficient but
-@@ -1957,7 +1961,7 @@ config SHUFFLE_PAGE_ALLOCATOR
+@@ -1969,7 +1972,7 @@ config SHUFFLE_PAGE_ALLOCATOR
+ 	  Say Y if unsure.
  
  config SLUB_CPU_PARTIAL
- 	default y
--	depends on SLUB && SMP
-+	depends on SLUB && SMP && !PREEMPT_RT
+-	default y
++	default y if !PREEMPT_RT
+ 	depends on SLUB && SMP
  	bool "SLUB per cpu partial cache"
  	help
- 	  Per cpu partial caches accelerate objects allocation and freeing
 diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
 index 3de8fd11873b..4198f0273ecd 100644
 --- a/kernel/Kconfig.locks
@@ -8480,7 +7693,7 @@ index 3de8fd11873b..4198f0273ecd 100644
  config ARCH_HAS_MMIOWB
  	bool
 diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
-index bf82259cff96..c1203c14fee9 100644
+index bf82259cff96..b5cd1e278eb5 100644
 --- a/kernel/Kconfig.preempt
 +++ b/kernel/Kconfig.preempt
 @@ -1,5 +1,11 @@
@@ -8495,8 +7708,16 @@ index bf82259cff96..c1203c14fee9 100644
  choice
  	prompt "Preemption Model"
  	default PREEMPT_NONE
+@@ -59,6 +65,7 @@ config PREEMPT_RT
+ 	bool "Fully Preemptible Kernel (Real-Time)"
+ 	depends on EXPERT && ARCH_SUPPORTS_RT
+ 	select PREEMPTION
++	select RT_MUTEXES
+ 	help
+ 	  This option turns the kernel into a real-time kernel by replacing
+ 	  various locking primitives (spinlocks, rwlocks, etc.) with
 diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
-index 642415b8c3c9..daca5e74d75e 100644
+index 53c70c470a38..8f4b2b9aa06c 100644
 --- a/kernel/cgroup/cpuset.c
 +++ b/kernel/cgroup/cpuset.c
 @@ -345,7 +345,7 @@ void cpuset_read_unlock(void)
@@ -8508,7 +7729,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  
  static struct workqueue_struct *cpuset_migrate_mm_wq;
  
-@@ -1257,7 +1257,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
+@@ -1280,7 +1280,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
  	 * Newly added CPUs will be removed from effective_cpus and
  	 * newly deleted ones will be added back to effective_cpus.
  	 */
@@ -8517,7 +7738,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  	if (adding) {
  		cpumask_or(parent->subparts_cpus,
  			   parent->subparts_cpus, tmp->addmask);
-@@ -1276,7 +1276,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
+@@ -1299,7 +1299,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
  	}
  
  	parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus);
@@ -8526,7 +7747,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  
  	return cmd == partcmd_update;
  }
-@@ -1381,7 +1381,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
+@@ -1404,7 +1404,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
  			continue;
  		rcu_read_unlock();
  
@@ -8535,7 +7756,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  
  		cpumask_copy(cp->effective_cpus, tmp->new_cpus);
  		if (cp->nr_subparts_cpus &&
-@@ -1412,7 +1412,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
+@@ -1435,7 +1435,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
  					= cpumask_weight(cp->subparts_cpus);
  			}
  		}
@@ -8544,7 +7765,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  
  		WARN_ON(!is_in_v2_mode() &&
  			!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
-@@ -1530,7 +1530,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+@@ -1553,7 +1553,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
  			return -EINVAL;
  	}
  
@@ -8553,7 +7774,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  	cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
  
  	/*
-@@ -1541,7 +1541,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
+@@ -1564,7 +1564,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
  			       cs->cpus_allowed);
  		cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus);
  	}
@@ -8562,7 +7783,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  
  	update_cpumasks_hier(cs, &tmp);
  
-@@ -1735,9 +1735,9 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
+@@ -1758,9 +1758,9 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
  			continue;
  		rcu_read_unlock();
  
@@ -8574,7 +7795,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  
  		WARN_ON(!is_in_v2_mode() &&
  			!nodes_equal(cp->mems_allowed, cp->effective_mems));
-@@ -1805,9 +1805,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
+@@ -1828,9 +1828,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
  	if (retval < 0)
  		goto done;
  
@@ -8586,7 +7807,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  
  	/* use trialcs->mems_allowed as a temp variable */
  	update_nodemasks_hier(cs, &trialcs->mems_allowed);
-@@ -1898,9 +1898,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
+@@ -1921,9 +1921,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
  	spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
  			|| (is_spread_page(cs) != is_spread_page(trialcs)));
  
@@ -8598,7 +7819,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  
  	if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
  		rebuild_sched_domains_locked();
-@@ -2409,7 +2409,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
+@@ -2432,7 +2432,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
  	cpuset_filetype_t type = seq_cft(sf)->private;
  	int ret = 0;
  
@@ -8607,7 +7828,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  
  	switch (type) {
  	case FILE_CPULIST:
-@@ -2431,7 +2431,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
+@@ -2454,7 +2454,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
  		ret = -EINVAL;
  	}
  
@@ -8616,7 +7837,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  	return ret;
  }
  
-@@ -2744,14 +2744,14 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
+@@ -2767,14 +2767,14 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
  
  	cpuset_inc();
  
@@ -8633,7 +7854,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  
  	if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
  		goto out_unlock;
-@@ -2778,12 +2778,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
+@@ -2801,12 +2801,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
  	}
  	rcu_read_unlock();
  
@@ -8648,7 +7869,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  out_unlock:
  	percpu_up_write(&cpuset_rwsem);
  	put_online_cpus();
-@@ -2839,7 +2839,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
+@@ -2862,7 +2862,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
  static void cpuset_bind(struct cgroup_subsys_state *root_css)
  {
  	percpu_down_write(&cpuset_rwsem);
@@ -8657,7 +7878,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  
  	if (is_in_v2_mode()) {
  		cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
-@@ -2850,7 +2850,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
+@@ -2873,7 +2873,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
  		top_cpuset.mems_allowed = top_cpuset.effective_mems;
  	}
  
@@ -8666,7 +7887,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  	percpu_up_write(&cpuset_rwsem);
  }
  
-@@ -2947,12 +2947,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
+@@ -2970,12 +2970,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
  {
  	bool is_empty;
  
@@ -8681,7 +7902,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  
  	/*
  	 * Don't call update_tasks_cpumask() if the cpuset becomes empty,
-@@ -2989,10 +2989,10 @@ hotplug_update_tasks(struct cpuset *cs,
+@@ -3012,10 +3012,10 @@ hotplug_update_tasks(struct cpuset *cs,
  	if (nodes_empty(*new_mems))
  		*new_mems = parent_cs(cs)->effective_mems;
  
@@ -8694,7 +7915,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  
  	if (cpus_updated)
  		update_tasks_cpumask(cs);
-@@ -3147,7 +3147,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
+@@ -3170,7 +3170,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
  
  	/* synchronize cpus_allowed to cpu_active_mask */
  	if (cpus_updated) {
@@ -8703,7 +7924,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  		if (!on_dfl)
  			cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
  		/*
-@@ -3167,17 +3167,17 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
+@@ -3190,17 +3190,17 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
  			}
  		}
  		cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
@@ -8724,7 +7945,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  		update_tasks_nodemask(&top_cpuset);
  	}
  
-@@ -3278,11 +3278,11 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
+@@ -3301,11 +3301,11 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
  {
  	unsigned long flags;
  
@@ -8738,7 +7959,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  }
  
  /**
-@@ -3343,11 +3343,11 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
+@@ -3366,11 +3366,11 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
  	nodemask_t mask;
  	unsigned long flags;
  
@@ -8752,7 +7973,7 @@ index 642415b8c3c9..daca5e74d75e 100644
  
  	return mask;
  }
-@@ -3439,14 +3439,14 @@ bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
+@@ -3462,14 +3462,14 @@ bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
  		return true;
  
  	/* Not hardwall and node outside mems_allowed: scan up cpusets */
@@ -8793,38 +8014,46 @@ index d51175cedfca..b424f3157b34 100644
  
  		/* if @may_sleep, play nice and yield if necessary */
  		if (may_sleep && (need_resched() ||
-diff --git a/kernel/cpu.c b/kernel/cpu.c
-index 6ff2578ecf17..fa535eaa4826 100644
---- a/kernel/cpu.c
-+++ b/kernel/cpu.c
-@@ -1602,7 +1602,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
- 		.name			= "ap:online",
- 	},
- 	/*
--	 * Handled on controll processor until the plugged processor manages
-+	 * Handled on control processor until the plugged processor manages
- 	 * this itself.
- 	 */
- 	[CPUHP_TEARDOWN_CPU] = {
-@@ -1611,6 +1611,13 @@ static struct cpuhp_step cpuhp_hp_states[] = {
- 		.teardown.single	= takedown_cpu,
- 		.cant_stop		= true,
- 	},
-+
-+	[CPUHP_AP_SCHED_WAIT_EMPTY] = {
-+		.name			= "sched:waitempty",
-+		.startup.single		= NULL,
-+		.teardown.single	= sched_cpu_wait_empty,
-+	},
-+
- 	/* Handle smpboot threads park/unpark */
- 	[CPUHP_AP_SMPBOOT_THREADS] = {
- 		.name			= "smpboot/threads:online",
+diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
+index 930ac1b25ec7..dbf1d126ac5e 100644
+--- a/kernel/debug/kdb/kdb_main.c
++++ b/kernel/debug/kdb/kdb_main.c
+@@ -2101,7 +2101,7 @@ static int kdb_dmesg(int argc, const char **argv)
+ 	int adjust = 0;
+ 	int n = 0;
+ 	int skip = 0;
+-	struct kmsg_dumper dumper = { .active = 1 };
++	struct kmsg_dumper_iter iter = { .active = 1 };
+ 	size_t len;
+ 	char buf[201];
+ 
+@@ -2126,8 +2126,8 @@ static int kdb_dmesg(int argc, const char **argv)
+ 		kdb_set(2, setargs);
+ 	}
+ 
+-	kmsg_dump_rewind_nolock(&dumper);
+-	while (kmsg_dump_get_line_nolock(&dumper, 1, NULL, 0, NULL))
++	kmsg_dump_rewind(&iter);
++	while (kmsg_dump_get_line(&iter, 1, NULL, 0, NULL))
+ 		n++;
+ 
+ 	if (lines < 0) {
+@@ -2159,8 +2159,8 @@ static int kdb_dmesg(int argc, const char **argv)
+ 	if (skip >= n || skip < 0)
+ 		return 0;
+ 
+-	kmsg_dump_rewind_nolock(&dumper);
+-	while (kmsg_dump_get_line_nolock(&dumper, 1, buf, sizeof(buf), &len)) {
++	kmsg_dump_rewind(&iter);
++	while (kmsg_dump_get_line(&iter, 1, buf, sizeof(buf), &len)) {
+ 		if (skip) {
+ 			skip--;
+ 			continue;
 diff --git a/kernel/entry/common.c b/kernel/entry/common.c
-index 6fdb6105e6d6..adbfcef76991 100644
+index f9d491b17b78..50ba2857c286 100644
 --- a/kernel/entry/common.c
 +++ b/kernel/entry/common.c
-@@ -148,9 +148,17 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
+@@ -158,9 +158,17 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
  
  		local_irq_enable_exit_to_user(ti_work);
  
@@ -8843,7 +8072,7 @@ index 6fdb6105e6d6..adbfcef76991 100644
  		if (ti_work & _TIF_UPROBE)
  			uprobe_notify_resume(regs);
  
-@@ -354,7 +362,7 @@ void irqentry_exit_cond_resched(void)
+@@ -381,7 +389,7 @@ void irqentry_exit_cond_resched(void)
  		rcu_irq_exit_check_preempt();
  		if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
  			WARN_ON_ONCE(!on_thread_stack());
@@ -8853,10 +8082,10 @@ index 6fdb6105e6d6..adbfcef76991 100644
  	}
  }
 diff --git a/kernel/exit.c b/kernel/exit.c
-index 733e80f334e7..a1756dcc17f2 100644
+index 04029e35e69a..346f7b76ceca 100644
 --- a/kernel/exit.c
 +++ b/kernel/exit.c
-@@ -151,7 +151,7 @@ static void __exit_signal(struct task_struct *tsk)
+@@ -152,7 +152,7 @@ static void __exit_signal(struct task_struct *tsk)
  	 * Do this under ->siglock, we can race with another thread
  	 * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
  	 */
@@ -8866,7 +8095,7 @@ index 733e80f334e7..a1756dcc17f2 100644
  	spin_unlock(&sighand->siglock);
  
 diff --git a/kernel/fork.c b/kernel/fork.c
-index da8d360fb032..2cf99526192e 100644
+index a2addc21d63f..11e5d05c9640 100644
 --- a/kernel/fork.c
 +++ b/kernel/fork.c
 @@ -42,6 +42,7 @@
@@ -8877,7 +8106,7 @@ index da8d360fb032..2cf99526192e 100644
  #include <linux/vmacache.h>
  #include <linux/nsproxy.h>
  #include <linux/capability.h>
-@@ -287,7 +288,7 @@ static inline void free_thread_stack(struct task_struct *tsk)
+@@ -288,7 +289,7 @@ static inline void free_thread_stack(struct task_struct *tsk)
  			return;
  		}
  
@@ -8886,7 +8115,7 @@ index da8d360fb032..2cf99526192e 100644
  		return;
  	}
  #endif
-@@ -687,6 +688,19 @@ void __mmdrop(struct mm_struct *mm)
+@@ -689,6 +690,19 @@ void __mmdrop(struct mm_struct *mm)
  }
  EXPORT_SYMBOL_GPL(__mmdrop);
  
@@ -8906,7 +8135,7 @@ index da8d360fb032..2cf99526192e 100644
  static void mmdrop_async_fn(struct work_struct *work)
  {
  	struct mm_struct *mm;
-@@ -728,6 +742,15 @@ void __put_task_struct(struct task_struct *tsk)
+@@ -730,6 +744,15 @@ void __put_task_struct(struct task_struct *tsk)
  	WARN_ON(refcount_read(&tsk->usage));
  	WARN_ON(tsk == current);
  
@@ -8919,10 +8148,10 @@ index da8d360fb032..2cf99526192e 100644
 +	/* Task is done with its stack. */
 +	put_task_stack(tsk);
 +
+ 	io_uring_free(tsk);
  	cgroup_free(tsk);
  	task_numa_free(tsk, true);
- 	security_task_free(tsk);
-@@ -924,6 +947,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+@@ -927,6 +950,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
  	tsk->splice_pipe = NULL;
  	tsk->task_frag.page = NULL;
  	tsk->wake_q.next = NULL;
@@ -8930,7 +8159,7 @@ index da8d360fb032..2cf99526192e 100644
  
  	account_kernel_stack(tsk, 1);
  
-@@ -1970,6 +1994,7 @@ static __latent_entropy struct task_struct *copy_process(
+@@ -1993,6 +2017,7 @@ static __latent_entropy struct task_struct *copy_process(
  	spin_lock_init(&p->alloc_lock);
  
  	init_sigpending(&p->pending);
@@ -8939,10 +8168,10 @@ index da8d360fb032..2cf99526192e 100644
  	p->utime = p->stime = p->gtime = 0;
  #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
 diff --git a/kernel/futex.c b/kernel/futex.c
-index a5876694a60e..1ca3581043fe 100644
+index 45a13eb8894e..a1af87a21c03 100644
 --- a/kernel/futex.c
 +++ b/kernel/futex.c
-@@ -1479,6 +1479,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
+@@ -1497,6 +1497,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
  	struct task_struct *new_owner;
  	bool postunlock = false;
  	DEFINE_WAKE_Q(wake_q);
@@ -8950,14 +8179,15 @@ index a5876694a60e..1ca3581043fe 100644
  	int ret = 0;
  
  	new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
-@@ -1538,13 +1539,13 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
- 	pi_state->owner = new_owner;
- 	raw_spin_unlock(&new_owner->pi_lock);
+@@ -1546,14 +1547,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
+ 		 * not fail.
+ 		 */
+ 		pi_state_update_owner(pi_state, new_owner);
+-		postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
++		postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
++						     &wake_sleeper_q);
+ 	}
  
--	postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
--
-+	postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
-+					     &wake_sleeper_q);
  out_unlock:
  	raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
  
@@ -8967,7 +8197,7 @@ index a5876694a60e..1ca3581043fe 100644
  
  	return ret;
  }
-@@ -2145,6 +2146,16 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
+@@ -2154,6 +2156,16 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
  				 */
  				requeue_pi_wake_futex(this, &key2, hb2);
  				continue;
@@ -8984,7 +8214,7 @@ index a5876694a60e..1ca3581043fe 100644
  			} else if (ret) {
  				/*
  				 * rt_mutex_start_proxy_lock() detected a
-@@ -2830,7 +2841,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
+@@ -2847,7 +2859,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
  		goto no_block;
  	}
  
@@ -8993,16 +8223,16 @@ index a5876694a60e..1ca3581043fe 100644
  
  	/*
  	 * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
-@@ -3171,7 +3182,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+@@ -3172,7 +3184,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+ {
  	struct hrtimer_sleeper timeout, *to;
- 	struct futex_pi_state *pi_state = NULL;
  	struct rt_mutex_waiter rt_waiter;
 -	struct futex_hash_bucket *hb;
 +	struct futex_hash_bucket *hb, *hb2;
  	union futex_key key2 = FUTEX_KEY_INIT;
  	struct futex_q q = futex_q_init;
  	int res, ret;
-@@ -3192,7 +3203,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+@@ -3193,7 +3205,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  	 * The waiter is allocated on our stack, manipulated by the requeue
  	 * code while we sleep on uaddr.
  	 */
@@ -9011,7 +8241,7 @@ index a5876694a60e..1ca3581043fe 100644
  
  	ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
  	if (unlikely(ret != 0))
-@@ -3223,20 +3234,55 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+@@ -3224,20 +3236,55 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  	/* Queue the futex_q, drop the hb lock, wait for wakeup. */
  	futex_wait_queue_me(hb, &q, to);
  
@@ -9078,7 +8308,7 @@ index a5876694a60e..1ca3581043fe 100644
  
  	/* Check if the requeue code acquired the second futex for us. */
  	if (!q.rt_waiter) {
-@@ -3245,7 +3291,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+@@ -3246,14 +3293,16 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  		 * did a lock-steal - fix up the PI-state in that case.
  		 */
  		if (q.pi_state && (q.pi_state->owner != current)) {
@@ -9086,18 +8316,18 @@ index a5876694a60e..1ca3581043fe 100644
 +			spin_lock(&hb2->lock);
 +			BUG_ON(&hb2->lock != q.lock_ptr);
  			ret = fixup_pi_state_owner(uaddr2, &q, current);
- 			if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
- 				pi_state = q.pi_state;
-@@ -3256,7 +3303,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+ 			/*
+ 			 * Drop the reference to the pi state which
  			 * the requeue_pi() code acquired for us.
  			 */
  			put_pi_state(q.pi_state);
 -			spin_unlock(q.lock_ptr);
 +			spin_unlock(&hb2->lock);
- 		}
- 	} else {
- 		struct rt_mutex *pi_mutex;
-@@ -3270,7 +3317,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
++
+ 			/*
+ 			 * Adjust the return value. It's either -EFAULT or
+ 			 * success (1) but the caller expects 0 for success.
+@@ -3272,7 +3321,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
  		pi_mutex = &q.pi_state->pi_mutex;
  		ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
  
@@ -9130,10 +8360,19 @@ index 762a928e18f9..7929fcdb7817 100644
  	if (!noirqdebug)
  		note_interrupt(desc, retval);
 diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
-index 5df903fccb60..881e13ec9709 100644
+index dec3f73e8db9..b279a8683c38 100644
 --- a/kernel/irq/manage.c
 +++ b/kernel/irq/manage.c
-@@ -1175,6 +1175,12 @@ static int irq_thread(void *data)
+@@ -1225,6 +1225,8 @@ static int irq_thread(void *data)
+ 	irqreturn_t (*handler_fn)(struct irq_desc *desc,
+ 			struct irqaction *action);
+ 
++	sched_set_fifo(current);
++
+ 	if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD,
+ 					&action->thread_flags))
+ 		handler_fn = irq_forced_thread_fn;
+@@ -1245,6 +1247,12 @@ static int irq_thread(void *data)
  		if (action_ret == IRQ_WAKE_THREAD)
  			irq_wake_secondary(desc, action);
  
@@ -9146,7 +8385,16 @@ index 5df903fccb60..881e13ec9709 100644
  		wake_threads_waitq(desc);
  	}
  
-@@ -2711,7 +2717,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state);
+@@ -1390,8 +1398,6 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
+ 	if (IS_ERR(t))
+ 		return PTR_ERR(t);
+ 
+-	sched_set_fifo(t);
+-
+ 	/*
+ 	 * We keep the reference to the task struct even if
+ 	 * the thread dies to avoid that the interrupt code
+@@ -2781,7 +2787,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state);
   *	This call sets the internal irqchip state of an interrupt,
   *	depending on the value of @which.
   *
@@ -9182,7 +8430,7 @@ index f865e5f4d382..dc7311dd74b1 100644
  	printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
  				"enabled\n");
 diff --git a/kernel/irq_work.c b/kernel/irq_work.c
-index eca83965b631..8183d30e1bb1 100644
+index e8da1e71583a..c3455910196f 100644
 --- a/kernel/irq_work.c
 +++ b/kernel/irq_work.c
 @@ -18,6 +18,7 @@
@@ -9193,48 +8441,58 @@ index eca83965b631..8183d30e1bb1 100644
  #include <asm/processor.h>
  
  
-@@ -52,13 +53,19 @@ void __weak arch_irq_work_raise(void)
+@@ -52,13 +53,27 @@ void __weak arch_irq_work_raise(void)
  /* Enqueue on current CPU, work must already be claimed and preempt disabled */
  static void __irq_work_queue_local(struct irq_work *work)
  {
-+	struct llist_head *list;
-+	bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT);
-+
-+	lazy_work = atomic_read(&work->flags) & IRQ_WORK_LAZY;
-+
- 	/* If the work is "lazy", handle it from next tick if any */
--	if (atomic_read(&work->flags) & IRQ_WORK_LAZY) {
--		if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
+-	/* If the work is "lazy", handle it from next tick if any */
+-	if (atomic_read(&work->node.a_flags) & IRQ_WORK_LAZY) {
+-		if (llist_add(&work->node.llist, this_cpu_ptr(&lazy_list)) &&
 -		    tick_nohz_tick_stopped())
 -			arch_irq_work_raise();
 -	} else {
--		if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
-+	if (lazy_work || (realtime && !(atomic_read(&work->flags) & IRQ_WORK_HARD_IRQ)))
+-		if (llist_add(&work->node.llist, this_cpu_ptr(&raised_list)))
++	struct llist_head *list;
++	bool lazy_work;
++	int work_flags;
++
++	work_flags = atomic_read(&work->node.a_flags);
++	if (work_flags & IRQ_WORK_LAZY)
++		lazy_work = true;
++	else if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
++		!(work_flags & IRQ_WORK_HARD_IRQ))
++			lazy_work = true;
++	else
++		lazy_work = false;
++
++	if (lazy_work)
 +		list = this_cpu_ptr(&lazy_list);
 +	else
 +		list = this_cpu_ptr(&raised_list);
 +
-+	if (llist_add(&work->llnode, list)) {
++	if (llist_add(&work->node.llist, list)) {
++		/* If the work is "lazy", handle it from next tick if any */
 +		if (!lazy_work || tick_nohz_tick_stopped())
  			arch_irq_work_raise();
  	}
  }
-@@ -102,7 +109,13 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
+@@ -102,7 +117,14 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
  	if (cpu != smp_processor_id()) {
  		/* Arch remote IPI send/receive backend aren't NMI safe */
  		WARN_ON_ONCE(in_nmi());
--		__smp_call_single_queue(cpu, &work->llnode);
+-		__smp_call_single_queue(cpu, &work->node.llist);
 +
-+		if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(atomic_read(&work->flags) & IRQ_WORK_HARD_IRQ)) {
-+			if (llist_add(&work->llnode, &per_cpu(lazy_list, cpu)))
++		if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) {
++			if (llist_add(&work->node.llist, &per_cpu(lazy_list, cpu)))
++				/* && tick_nohz_tick_stopped_cpu(cpu) */
 +				arch_send_call_function_single_ipi(cpu);
 +		} else {
-+			__smp_call_single_queue(cpu, &work->llnode);
++			__smp_call_single_queue(cpu, &work->node.llist);
 +		}
  	} else {
  		__irq_work_queue_local(work);
  	}
-@@ -120,9 +133,8 @@ bool irq_work_needs_cpu(void)
+@@ -120,9 +142,8 @@ bool irq_work_needs_cpu(void)
  	raised = this_cpu_ptr(&raised_list);
  	lazy = this_cpu_ptr(&lazy_list);
  
@@ -9246,7 +8504,7 @@ index eca83965b631..8183d30e1bb1 100644
  
  	/* All work should have been flushed before going offline */
  	WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
-@@ -160,8 +172,12 @@ static void irq_work_run_list(struct llist_head *list)
+@@ -165,8 +186,12 @@ static void irq_work_run_list(struct llist_head *list)
  	struct irq_work *work, *tmp;
  	struct llist_node *llnode;
  
@@ -9260,7 +8518,7 @@ index eca83965b631..8183d30e1bb1 100644
  	if (llist_empty(list))
  		return;
  
-@@ -177,7 +193,16 @@ static void irq_work_run_list(struct llist_head *list)
+@@ -182,7 +207,16 @@ static void irq_work_run_list(struct llist_head *list)
  void irq_work_run(void)
  {
  	irq_work_run_list(this_cpu_ptr(&raised_list));
@@ -9278,7 +8536,7 @@ index eca83965b631..8183d30e1bb1 100644
  }
  EXPORT_SYMBOL_GPL(irq_work_run);
  
-@@ -187,8 +212,17 @@ void irq_work_tick(void)
+@@ -192,8 +226,17 @@ void irq_work_tick(void)
  
  	if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
  		irq_work_run_list(raised);
@@ -9297,10 +8555,10 @@ index eca83965b631..8183d30e1bb1 100644
  /*
   * Synchronize against the irq_work @entry, ensures the entry is not
 diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
-index c19c0dad1ebe..c85754463de6 100644
+index aa919585c24b..01935bb729de 100644
 --- a/kernel/kexec_core.c
 +++ b/kernel/kexec_core.c
-@@ -978,7 +978,6 @@ void crash_kexec(struct pt_regs *regs)
+@@ -977,7 +977,6 @@ void crash_kexec(struct pt_regs *regs)
  	old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
  	if (old_cpu == PANIC_CPU_INVALID) {
  		/* This is the 1st CPU which comes here, so go ahead. */
@@ -9338,6 +8596,54 @@ index 35859da8bd4f..dfff31ed644a 100644
  #endif
  	NULL
  };
+diff --git a/kernel/kthread.c b/kernel/kthread.c
+index 1578973c5740..bb0602597ffd 100644
+--- a/kernel/kthread.c
++++ b/kernel/kthread.c
+@@ -243,6 +243,7 @@ EXPORT_SYMBOL_GPL(kthread_parkme);
+ 
+ static int kthread(void *_create)
+ {
++	static const struct sched_param param = { .sched_priority = 0 };
+ 	/* Copy data: it's on kthread's stack */
+ 	struct kthread_create_info *create = _create;
+ 	int (*threadfn)(void *data) = create->threadfn;
+@@ -273,6 +274,13 @@ static int kthread(void *_create)
+ 	init_completion(&self->parked);
+ 	current->vfork_done = &self->exited;
+ 
++	/*
++	 * The new thread inherited kthreadd's priority and CPU mask. Reset
++	 * back to default in case they have been changed.
++	 */
++	sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
++	set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_KTHREAD));
++
+ 	/* OK, tell user we're spawned, wait for stop or wakeup */
+ 	__set_current_state(TASK_UNINTERRUPTIBLE);
+ 	create->result = current;
+@@ -370,7 +378,6 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
+ 	}
+ 	task = create->result;
+ 	if (!IS_ERR(task)) {
+-		static const struct sched_param param = { .sched_priority = 0 };
+ 		char name[TASK_COMM_LEN];
+ 
+ 		/*
+@@ -379,13 +386,6 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
+ 		 */
+ 		vsnprintf(name, sizeof(name), namefmt, args);
+ 		set_task_comm(task, name);
+-		/*
+-		 * root may have changed our (kthreadd's) priority or CPU mask.
+-		 * The kernel thread should not inherit these properties.
+-		 */
+-		sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);
+-		set_cpus_allowed_ptr(task,
+-				     housekeeping_cpumask(HK_FLAG_KTHREAD));
+ 	}
+ 	kfree(create);
+ 	return task;
 diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
 index 6d11cfb9b41f..c7fbf737e16e 100644
 --- a/kernel/locking/Makefile
@@ -9378,10 +8684,10 @@ index 6d11cfb9b41f..c7fbf737e16e 100644
  obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
  obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
 diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
-index 2facbbd146ec..b870708af581 100644
+index ff0003146262..746ba441ed7a 100644
 --- a/kernel/locking/lockdep.c
 +++ b/kernel/locking/lockdep.c
-@@ -4896,6 +4896,7 @@ static void check_flags(unsigned long flags)
+@@ -5292,6 +5292,7 @@ static noinstr void check_flags(unsigned long flags)
  		}
  	}
  
@@ -9389,7 +8695,7 @@ index 2facbbd146ec..b870708af581 100644
  	/*
  	 * We dont accurately track softirq state in e.g.
  	 * hardirq contexts (such as on 4KSTACKS), so only
-@@ -4910,6 +4911,7 @@ static void check_flags(unsigned long flags)
+@@ -5306,6 +5307,7 @@ static noinstr void check_flags(unsigned long flags)
  			DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
  		}
  	}
@@ -9399,10 +8705,10 @@ index 2facbbd146ec..b870708af581 100644
  		print_irqtrace_events(current);
 diff --git a/kernel/locking/mutex-rt.c b/kernel/locking/mutex-rt.c
 new file mode 100644
-index 000000000000..35b06711997d
+index 000000000000..2b849e6b9b4a
 --- /dev/null
 +++ b/kernel/locking/mutex-rt.c
-@@ -0,0 +1,222 @@
+@@ -0,0 +1,224 @@
 +// SPDX-License-Identifier: GPL-2.0-only
 +/*
 + * Real-Time Preemption Support
@@ -9470,6 +8776,7 @@ index 000000000000..35b06711997d
 +#include <linux/fs.h>
 +#include <linux/futex.h>
 +#include <linux/hrtimer.h>
++#include <linux/blkdev.h>
 +
 +#include "rtmutex_common.h"
 +
@@ -9490,29 +8797,43 @@ index 000000000000..35b06711997d
 +}
 +EXPORT_SYMBOL(__mutex_do_init);
 +
++static int _mutex_lock_blk_flush(struct mutex *lock, int state)
++{
++	/*
++	 * Flush blk before ->pi_blocked_on is set. At schedule() time it is too
++	 * late if one of the callbacks needs to acquire a sleeping lock.
++	 */
++	if (blk_needs_flush_plug(current))
++		blk_schedule_flush_plug(current);
++	return __rt_mutex_lock_state(&lock->lock, state);
++}
++
 +void __lockfunc _mutex_lock(struct mutex *lock)
 +{
 +	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-+	__rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE);
++	_mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE);
 +}
 +EXPORT_SYMBOL(_mutex_lock);
 +
-+void __lockfunc _mutex_lock_io(struct mutex *lock)
++void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass)
 +{
 +	int token;
 +
 +	token = io_schedule_prepare();
-+	_mutex_lock(lock);
++
++	mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
++	__rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE);
++
 +	io_schedule_finish(token);
 +}
-+EXPORT_SYMBOL_GPL(_mutex_lock_io);
++EXPORT_SYMBOL_GPL(_mutex_lock_io_nested);
 +
 +int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
 +{
 +	int ret;
 +
 +	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-+	ret = __rt_mutex_lock_state(&lock->lock, TASK_INTERRUPTIBLE);
++	ret = _mutex_lock_blk_flush(lock, TASK_INTERRUPTIBLE);
 +	if (ret)
 +		mutex_release(&lock->dep_map, _RET_IP_);
 +	return ret;
@@ -9524,7 +8845,7 @@ index 000000000000..35b06711997d
 +	int ret;
 +
 +	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-+	ret = __rt_mutex_lock_state(&lock->lock, TASK_KILLABLE);
++	ret = _mutex_lock_blk_flush(lock, TASK_KILLABLE);
 +	if (ret)
 +		mutex_release(&lock->dep_map, _RET_IP_);
 +	return ret;
@@ -9535,27 +8856,14 @@ index 000000000000..35b06711997d
 +void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
 +{
 +	mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
-+	__rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE);
++	_mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE);
 +}
 +EXPORT_SYMBOL(_mutex_lock_nested);
 +
-+void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass)
-+{
-+	int token;
-+
-+	token = io_schedule_prepare();
-+
-+	mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
-+	__rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE);
-+
-+	io_schedule_finish(token);
-+}
-+EXPORT_SYMBOL_GPL(_mutex_lock_io_nested);
-+
 +void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
 +{
 +	mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_);
-+	__rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE);
++	_mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE);
 +}
 +EXPORT_SYMBOL(_mutex_lock_nest_lock);
 +
@@ -9564,7 +8872,7 @@ index 000000000000..35b06711997d
 +	int ret;
 +
 +	mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
-+	ret = __rt_mutex_lock_state(&lock->lock, TASK_INTERRUPTIBLE);
++	ret = _mutex_lock_blk_flush(lock, TASK_INTERRUPTIBLE);
 +	if (ret)
 +		mutex_release(&lock->dep_map, _RET_IP_);
 +	return ret;
@@ -9576,7 +8884,7 @@ index 000000000000..35b06711997d
 +	int ret;
 +
 +	mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
-+	ret = __rt_mutex_lock_state(&lock->lock, TASK_KILLABLE);
++	ret = _mutex_lock_blk_flush(lock, TASK_KILLABLE);
 +	if (ret)
 +		mutex_release(&lock->dep_map, _RET_IP_);
 +	return ret;
@@ -9790,7 +9098,7 @@ index fc549713bba3..659e93e256c6 100644
 -	debug_rt_mutex_print_deadlock(w);
 -}
 diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
-index cfdd5b93264d..ef22e1b52f8c 100644
+index 2f8cd616d3b2..4ea87d6c9ab7 100644
 --- a/kernel/locking/rtmutex.c
 +++ b/kernel/locking/rtmutex.c
 @@ -8,6 +8,11 @@
@@ -10492,15 +9800,15 @@ index cfdd5b93264d..ef22e1b52f8c 100644
 +	 * Not quite done after calling ww_acquire_done() ?
 +	 */
 +	DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
-+
+ 
+-	rt_mutex_init_waiter(&waiter);
 +	if (ww_ctx->contending_lock) {
 +		/*
 +		 * After -EDEADLK you tried to
 +		 * acquire a different ww_mutex? Bad!
 +		 */
 +		DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
- 
--	rt_mutex_init_waiter(&waiter);
++
 +		/*
 +		 * You called ww_mutex_lock after receiving -EDEADLK,
 +		 * but 'forgot' to unlock everything else first?
@@ -10791,42 +10099,37 @@ index cfdd5b93264d..ef22e1b52f8c 100644
  }
  EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
  
-@@ -1535,35 +2035,31 @@ int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock)
+@@ -1534,36 +2034,17 @@ int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock)
+ 	return __rt_mutex_slowtrylock(lock);
  }
  
- /**
+-/**
 - * rt_mutex_timed_lock - lock a rt_mutex interruptible
 - *			the timeout structure is provided
 - *			by the caller
-+ * rt_mutex_lock_killable - lock a rt_mutex killable
-  *
-  * @lock:		the rt_mutex to be locked
+- *
+- * @lock:		the rt_mutex to be locked
 - * @timeout:		timeout structure or NULL (no timeout)
-  *
-  * Returns:
-  *  0		on success
-  * -EINTR	when interrupted by a signal
+- *
+- * Returns:
+- *  0		on success
+- * -EINTR	when interrupted by a signal
 - * -ETIMEDOUT	when the timeout expired
-  */
+- */
 -int
 -rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
-+int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
++int __sched __rt_mutex_trylock(struct rt_mutex *lock)
  {
 -	int ret;
 -
 -	might_sleep();
-+	return rt_mutex_lock_state(lock, 0, TASK_KILLABLE);
-+}
-+EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
- 
+-
 -	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 -	ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
 -				       RT_MUTEX_MIN_CHAINWALK,
 -				       rt_mutex_slowlock);
 -	if (ret)
 -		mutex_release(&lock->dep_map, _RET_IP_);
-+int __sched __rt_mutex_trylock(struct rt_mutex *lock)
-+{
 +#ifdef CONFIG_PREEMPT_RT
 +	if (WARN_ON_ONCE(in_irq() || in_nmi()))
 +#else
@@ -10841,7 +10144,7 @@ index cfdd5b93264d..ef22e1b52f8c 100644
  
  /**
   * rt_mutex_trylock - try to lock a rt_mutex
-@@ -1580,10 +2076,7 @@ int __sched rt_mutex_trylock(struct rt_mutex *lock)
+@@ -1580,10 +2061,7 @@ int __sched rt_mutex_trylock(struct rt_mutex *lock)
  {
  	int ret;
  
@@ -10853,7 +10156,7 @@ index cfdd5b93264d..ef22e1b52f8c 100644
  	if (ret)
  		mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
  
-@@ -1591,6 +2084,11 @@ int __sched rt_mutex_trylock(struct rt_mutex *lock)
+@@ -1591,6 +2069,11 @@ int __sched rt_mutex_trylock(struct rt_mutex *lock)
  }
  EXPORT_SYMBOL_GPL(rt_mutex_trylock);
  
@@ -10865,7 +10168,7 @@ index cfdd5b93264d..ef22e1b52f8c 100644
  /**
   * rt_mutex_unlock - unlock a rt_mutex
   *
-@@ -1599,16 +2097,13 @@ EXPORT_SYMBOL_GPL(rt_mutex_trylock);
+@@ -1599,16 +2082,13 @@ EXPORT_SYMBOL_GPL(rt_mutex_trylock);
  void __sched rt_mutex_unlock(struct rt_mutex *lock)
  {
  	mutex_release(&lock->dep_map, _RET_IP_);
@@ -10886,7 +10189,7 @@ index cfdd5b93264d..ef22e1b52f8c 100644
  {
  	lockdep_assert_held(&lock->wait_lock);
  
-@@ -1625,23 +2120,35 @@ bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
+@@ -1625,23 +2105,35 @@ bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
  	 * avoid inversion prior to the wakeup.  preempt_disable()
  	 * therein pairs with rt_mutex_postunlock().
  	 */
@@ -10925,7 +10228,7 @@ index cfdd5b93264d..ef22e1b52f8c 100644
  }
  
  /**
-@@ -1655,9 +2162,6 @@ void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
+@@ -1655,9 +2147,6 @@ void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
  void rt_mutex_destroy(struct rt_mutex *lock)
  {
  	WARN_ON(rt_mutex_is_locked(lock));
@@ -10935,7 +10238,7 @@ index cfdd5b93264d..ef22e1b52f8c 100644
  }
  EXPORT_SYMBOL_GPL(rt_mutex_destroy);
  
-@@ -1680,7 +2184,7 @@ void __rt_mutex_init(struct rt_mutex *lock, const char *name,
+@@ -1680,7 +2169,7 @@ void __rt_mutex_init(struct rt_mutex *lock, const char *name,
  	if (name && key)
  		debug_rt_mutex_init(lock, name, key);
  }
@@ -10944,7 +10247,7 @@ index cfdd5b93264d..ef22e1b52f8c 100644
  
  /**
   * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
-@@ -1700,6 +2204,14 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
+@@ -1700,6 +2189,14 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  				struct task_struct *proxy_owner)
  {
  	__rt_mutex_init(lock, NULL, NULL);
@@ -10959,7 +10262,7 @@ index cfdd5b93264d..ef22e1b52f8c 100644
  	debug_rt_mutex_proxy_lock(lock, proxy_owner);
  	rt_mutex_set_owner(lock, proxy_owner);
  }
-@@ -1723,6 +2235,26 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
+@@ -1722,6 +2219,26 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock)
  	rt_mutex_set_owner(lock, NULL);
  }
  
@@ -10986,7 +10289,7 @@ index cfdd5b93264d..ef22e1b52f8c 100644
  /**
   * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task
   * @lock:		the rt_mutex to take
-@@ -1753,6 +2285,34 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+@@ -1752,6 +2269,34 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
  	if (try_to_take_rt_mutex(lock, task, NULL))
  		return 1;
  
@@ -11021,7 +10324,7 @@ index cfdd5b93264d..ef22e1b52f8c 100644
  	/* We enforce deadlock detection for futexes */
  	ret = task_blocks_on_rt_mutex(lock, waiter, task,
  				      RT_MUTEX_FULL_CHAINWALK);
-@@ -1767,7 +2327,8 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+@@ -1766,7 +2311,8 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
  		ret = 0;
  	}
  
@@ -11031,7 +10334,7 @@ index cfdd5b93264d..ef22e1b52f8c 100644
  
  	return ret;
  }
-@@ -1852,12 +2413,15 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
+@@ -1851,12 +2397,15 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
  	raw_spin_lock_irq(&lock->wait_lock);
  	/* sleep on the mutex */
  	set_current_state(TASK_INTERRUPTIBLE);
@@ -11048,7 +10351,7 @@ index cfdd5b93264d..ef22e1b52f8c 100644
  	raw_spin_unlock_irq(&lock->wait_lock);
  
  	return ret;
-@@ -1919,3 +2483,97 @@ bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
+@@ -1918,3 +2467,97 @@ bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
  
  	return cleanup;
  }
@@ -11167,7 +10470,7 @@ index 732f96abf462..338ccd29119a 100644
  						  enum rtmutex_chainwalk walk)
  {
 diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
-index d1d62f942be2..407f3da146cb 100644
+index ca6fb489007b..248a7d91583b 100644
 --- a/kernel/locking/rtmutex_common.h
 +++ b/kernel/locking/rtmutex_common.h
 @@ -15,6 +15,7 @@
@@ -11192,7 +10495,7 @@ index d1d62f942be2..407f3da146cb 100644
  	u64 deadline;
  };
  
-@@ -130,12 +127,15 @@ enum rtmutex_chainwalk {
+@@ -130,11 +127,14 @@ enum rtmutex_chainwalk {
  /*
   * PI-futex support (proxy locking functions, etc.):
   */
@@ -11202,14 +10505,13 @@ index d1d62f942be2..407f3da146cb 100644
  extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
  extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
  				       struct task_struct *proxy_owner);
- extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
- 				  struct task_struct *proxy_owner);
+ extern void rt_mutex_proxy_unlock(struct rt_mutex *lock);
 -extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
 +extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savetate);
  extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
  				     struct rt_mutex_waiter *waiter,
  				     struct task_struct *task);
-@@ -153,9 +153,27 @@ extern int __rt_mutex_futex_trylock(struct rt_mutex *l);
+@@ -152,9 +152,27 @@ extern int __rt_mutex_futex_trylock(struct rt_mutex *l);
  
  extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
  extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
@@ -11242,7 +10544,7 @@ index d1d62f942be2..407f3da146cb 100644
  # include "rtmutex-debug.h"
 diff --git a/kernel/locking/rwlock-rt.c b/kernel/locking/rwlock-rt.c
 new file mode 100644
-index 000000000000..4cd72a2968a6
+index 000000000000..3d2d1f14b513
 --- /dev/null
 +++ b/kernel/locking/rwlock-rt.c
 @@ -0,0 +1,334 @@
@@ -11309,7 +10611,7 @@ index 000000000000..4cd72a2968a6
 +	lock->rtmutex.save_state = 1;
 +}
 +
-+int __read_rt_trylock(struct rt_rw_lock *lock)
++static int __read_rt_trylock(struct rt_rw_lock *lock)
 +{
 +	int r, old;
 +
@@ -11582,15 +10884,16 @@ index 000000000000..4cd72a2968a6
 +EXPORT_SYMBOL(__rt_rwlock_init);
 diff --git a/kernel/locking/rwsem-rt.c b/kernel/locking/rwsem-rt.c
 new file mode 100644
-index 000000000000..bca7a448206d
+index 000000000000..274172d5bb3a
 --- /dev/null
 +++ b/kernel/locking/rwsem-rt.c
-@@ -0,0 +1,292 @@
+@@ -0,0 +1,318 @@
 +// SPDX-License-Identifier: GPL-2.0-only
 +#include <linux/rwsem.h>
 +#include <linux/sched/debug.h>
 +#include <linux/sched/signal.h>
 +#include <linux/export.h>
++#include <linux/blkdev.h>
 +
 +#include "rtmutex_common.h"
 +
@@ -11675,6 +10978,13 @@ index 000000000000..bca7a448206d
 +	if (__down_read_trylock(sem))
 +		return 0;
 +
++	/*
++	 * Flush blk before ->pi_blocked_on is set. At schedule() time it is too
++	 * late if one of the callbacks needs to acquire a sleeping lock.
++	 */
++	if (blk_needs_flush_plug(current))
++		blk_schedule_flush_plug(current);
++
 +	might_sleep();
 +	raw_spin_lock_irq(&m->wait_lock);
 +	/*
@@ -11744,6 +11054,17 @@ index 000000000000..bca7a448206d
 +	WARN_ON_ONCE(ret);
 +}
 +
++int __down_read_interruptible(struct rw_semaphore *sem)
++{
++	int ret;
++
++	ret = __down_read_common(sem, TASK_INTERRUPTIBLE);
++	if (likely(!ret))
++		return ret;
++	WARN_ONCE(ret != -EINTR, "Unexpected state: %d\n", ret);
++	return -EINTR;
++}
++
 +int __down_read_killable(struct rw_semaphore *sem)
 +{
 +	int ret;
@@ -11797,6 +11118,13 @@ index 000000000000..bca7a448206d
 +	struct rt_mutex *m = &sem->rtmutex;
 +	unsigned long flags;
 +
++	/*
++	 * Flush blk before ->pi_blocked_on is set. At schedule() time it is too
++	 * late if one of the callbacks needs to acquire a sleeping lock.
++	 */
++	if (blk_needs_flush_plug(current))
++		blk_schedule_flush_plug(current);
++
 +	/* Take the rtmutex as a first step */
 +	if (__rt_mutex_lock_state(m, state))
 +		return -EINTR;
@@ -11879,7 +11207,7 @@ index 000000000000..bca7a448206d
 +	__up_write_unlock(sem, WRITER_BIAS - 1, flags);
 +}
 diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
-index f11b9bd3431d..fce8a6e3fa7c 100644
+index ba67600c7b2c..084948b9e03f 100644
 --- a/kernel/locking/rwsem.c
 +++ b/kernel/locking/rwsem.c
 @@ -28,6 +28,7 @@
@@ -11890,7 +11218,7 @@ index f11b9bd3431d..fce8a6e3fa7c 100644
  #include "lock_events.h"
  
  /*
-@@ -1482,6 +1483,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
+@@ -1343,6 +1344,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
  	if (tmp & RWSEM_FLAG_WAITERS)
  		rwsem_downgrade_wake(sem);
  }
@@ -11898,7 +11226,7 @@ index f11b9bd3431d..fce8a6e3fa7c 100644
  
  /*
   * lock for reading
-@@ -1617,7 +1619,9 @@ void down_read_non_owner(struct rw_semaphore *sem)
+@@ -1506,7 +1508,9 @@ void down_read_non_owner(struct rw_semaphore *sem)
  {
  	might_sleep();
  	__down_read(sem);
@@ -11908,7 +11236,7 @@ index f11b9bd3431d..fce8a6e3fa7c 100644
  }
  EXPORT_SYMBOL(down_read_non_owner);
  
-@@ -1646,7 +1650,9 @@ EXPORT_SYMBOL(down_write_killable_nested);
+@@ -1535,7 +1539,9 @@ EXPORT_SYMBOL(down_write_killable_nested);
  
  void up_read_non_owner(struct rw_semaphore *sem)
  {
@@ -11986,11 +11314,105 @@ index b9d93087ee66..72e306e0e8a3 100644
  }
 +
 +#endif
+diff --git a/kernel/notifier.c b/kernel/notifier.c
+index 1b019cbca594..c20782f07643 100644
+--- a/kernel/notifier.c
++++ b/kernel/notifier.c
+@@ -142,9 +142,9 @@ int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
+ 	unsigned long flags;
+ 	int ret;
+ 
+-	spin_lock_irqsave(&nh->lock, flags);
++	raw_spin_lock_irqsave(&nh->lock, flags);
+ 	ret = notifier_chain_register(&nh->head, n);
+-	spin_unlock_irqrestore(&nh->lock, flags);
++	raw_spin_unlock_irqrestore(&nh->lock, flags);
+ 	return ret;
+ }
+ EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
+@@ -164,9 +164,9 @@ int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
+ 	unsigned long flags;
+ 	int ret;
+ 
+-	spin_lock_irqsave(&nh->lock, flags);
++	raw_spin_lock_irqsave(&nh->lock, flags);
+ 	ret = notifier_chain_unregister(&nh->head, n);
+-	spin_unlock_irqrestore(&nh->lock, flags);
++	raw_spin_unlock_irqrestore(&nh->lock, flags);
+ 	synchronize_rcu();
+ 	return ret;
+ }
+@@ -182,9 +182,9 @@ int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh,
+ 	 * Musn't use RCU; because then the notifier list can
+ 	 * change between the up and down traversal.
+ 	 */
+-	spin_lock_irqsave(&nh->lock, flags);
++	raw_spin_lock_irqsave(&nh->lock, flags);
+ 	ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v);
+-	spin_unlock_irqrestore(&nh->lock, flags);
++	raw_spin_unlock_irqrestore(&nh->lock, flags);
+ 
+ 	return ret;
+ }
 diff --git a/kernel/panic.c b/kernel/panic.c
-index aef8872ba843..d563542bc7eb 100644
+index 332736a72a58..a14e2f5a9f55 100644
 --- a/kernel/panic.c
 +++ b/kernel/panic.c
-@@ -247,7 +247,6 @@ void panic(const char *fmt, ...)
+@@ -177,12 +177,28 @@ static void panic_print_sys_info(void)
+ void panic(const char *fmt, ...)
+ {
+ 	static char buf[1024];
++	va_list args2;
+ 	va_list args;
+ 	long i, i_next = 0, len;
+ 	int state = 0;
+ 	int old_cpu, this_cpu;
+ 	bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
+ 
++	console_verbose();
++	pr_emerg("Kernel panic - not syncing:\n");
++	va_start(args2, fmt);
++	va_copy(args, args2);
++	vprintk(fmt, args2);
++	va_end(args2);
++#ifdef CONFIG_DEBUG_BUGVERBOSE
++	/*
++	 * Avoid nested stack-dumping if a panic occurs during oops processing
++	 */
++	if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
++		dump_stack();
++#endif
++	pr_flush(1000, true);
++
+ 	/*
+ 	 * Disable local interrupts. This will prevent panic_smp_self_stop
+ 	 * from deadlocking the first cpu that invokes the panic, since
+@@ -213,24 +229,13 @@ void panic(const char *fmt, ...)
+ 	if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
+ 		panic_smp_self_stop();
+ 
+-	console_verbose();
+ 	bust_spinlocks(1);
+-	va_start(args, fmt);
+ 	len = vscnprintf(buf, sizeof(buf), fmt, args);
+ 	va_end(args);
+ 
+ 	if (len && buf[len - 1] == '\n')
+ 		buf[len - 1] = '\0';
+ 
+-	pr_emerg("Kernel panic - not syncing: %s\n", buf);
+-#ifdef CONFIG_DEBUG_BUGVERBOSE
+-	/*
+-	 * Avoid nested stack-dumping if a panic occurs during oops processing
+-	 */
+-	if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
+-		dump_stack();
+-#endif
+-
+ 	/*
+ 	 * If kgdb is enabled, give it a chance to run before we stop all
+ 	 * the other CPUs or else we won't be able to debug processes left
+@@ -247,7 +252,6 @@ void panic(const char *fmt, ...)
  	 * Bypass the panic_cpu check and call __crash_kexec directly.
  	 */
  	if (!_crash_kexec_post_notifiers) {
@@ -11998,7 +11420,7 @@ index aef8872ba843..d563542bc7eb 100644
  		__crash_kexec(NULL);
  
  		/*
-@@ -271,8 +270,6 @@ void panic(const char *fmt, ...)
+@@ -271,8 +275,6 @@ void panic(const char *fmt, ...)
  	 */
  	atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
  
@@ -12007,7 +11429,7 @@ index aef8872ba843..d563542bc7eb 100644
  	kmsg_dump(KMSG_DUMP_PANIC);
  
  	/*
-@@ -542,9 +539,11 @@ static u64 oops_id;
+@@ -542,9 +544,11 @@ static u64 oops_id;
  
  static int init_oops_id(void)
  {
@@ -12019,19 +11441,27 @@ index aef8872ba843..d563542bc7eb 100644
  		oops_id++;
  
  	return 0;
+@@ -555,6 +559,7 @@ static void print_oops_end_marker(void)
+ {
+ 	init_oops_id();
+ 	pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id);
++	pr_flush(1000, true);
+ }
+ 
+ /*
 diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
-index 4d052fc6bcde..59cb24e25f00 100644
+index eee3dc9b60a9..59cb24e25f00 100644
 --- a/kernel/printk/Makefile
 +++ b/kernel/printk/Makefile
-@@ -1,4 +1,4 @@
+@@ -1,5 +1,4 @@
  # SPDX-License-Identifier: GPL-2.0-only
  obj-y	= printk.o
 -obj-$(CONFIG_PRINTK)	+= printk_safe.o
  obj-$(CONFIG_A11Y_BRAILLE_CONSOLE)	+= braille.o
-+obj-$(CONFIG_PRINTK)	+= printk_ringbuffer.o
+ obj-$(CONFIG_PRINTK)	+= printk_ringbuffer.o
 diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
 deleted file mode 100644
-index 660f9a6bf73a..000000000000
+index 3a8fd491758c..000000000000
 --- a/kernel/printk/internal.h
 +++ /dev/null
 @@ -1,74 +0,0 @@
@@ -12051,9 +11481,9 @@ index 660f9a6bf73a..000000000000
 -
 -extern raw_spinlock_t logbuf_lock;
 -
--__printf(5, 0)
+-__printf(4, 0)
 -int vprintk_store(int facility, int level,
--		  const char *dict, size_t dictlen,
+-		  const struct dev_printk_info *dev_info,
 -		  const char *fmt, va_list args);
 -
 -__printf(1, 0) int vprintk_default(const char *fmt, va_list args);
@@ -12110,44 +11540,28 @@ index 660f9a6bf73a..000000000000
 -static inline bool printk_percpu_data_ready(void) { return false; }
 -#endif /* CONFIG_PRINTK */
 diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
-index 9b75f6bfc333..78a277ea5c35 100644
+index 5a95c688621f..a5fc854977bb 100644
 --- a/kernel/printk/printk.c
 +++ b/kernel/printk/printk.c
-@@ -44,9 +44,12 @@
+@@ -44,6 +44,9 @@
  #include <linux/irq_work.h>
  #include <linux/ctype.h>
  #include <linux/uio.h>
 +#include <linux/kthread.h>
++#include <linux/kdb.h>
 +#include <linux/clocksource.h>
  #include <linux/sched/clock.h>
  #include <linux/sched/debug.h>
  #include <linux/sched/task_stack.h>
-+#include <linux/kdb.h>
- 
- #include <linux/uaccess.h>
- #include <asm/sections.h>
-@@ -55,9 +58,9 @@
- #define CREATE_TRACE_POINTS
- #include <trace/events/printk.h>
- 
-+#include "printk_ringbuffer.h"
+@@ -58,7 +61,6 @@
+ #include "printk_ringbuffer.h"
  #include "console_cmdline.h"
  #include "braille.h"
 -#include "internal.h"
  
  int console_printk[4] = {
  	CONSOLE_LOGLEVEL_DEFAULT,	/* console_loglevel */
-@@ -77,6 +80,9 @@ EXPORT_SYMBOL(ignore_console_lock_warning);
- int oops_in_progress;
- EXPORT_SYMBOL(oops_in_progress);
- 
-+/* Set to enable sync mode. Once set, it is never cleared. */
-+static bool sync_mode;
-+
- /*
-  * console_sem protects the console_drivers list, and also
-  * provides serialisation for access to the entire console
-@@ -224,19 +230,7 @@ static int nr_ext_console_drivers;
+@@ -225,19 +227,7 @@ static int nr_ext_console_drivers;
  
  static int __down_trylock_console_sem(unsigned long ip)
  {
@@ -12168,7 +11582,7 @@ index 9b75f6bfc333..78a277ea5c35 100644
  		return 1;
  	mutex_acquire(&console_lock_dep_map, 0, 1, ip);
  	return 0;
-@@ -245,13 +239,9 @@ static int __down_trylock_console_sem(unsigned long ip)
+@@ -246,13 +236,9 @@ static int __down_trylock_console_sem(unsigned long ip)
  
  static void __up_console_sem(unsigned long ip)
  {
@@ -12182,7 +11596,7 @@ index 9b75f6bfc333..78a277ea5c35 100644
  }
  #define up_console_sem() __up_console_sem(_RET_IP_)
  
-@@ -265,11 +255,6 @@ static void __up_console_sem(unsigned long ip)
+@@ -266,11 +252,6 @@ static void __up_console_sem(unsigned long ip)
   */
  static int console_locked, console_suspended;
  
@@ -12194,120 +11608,20 @@ index 9b75f6bfc333..78a277ea5c35 100644
  /*
   *	Array of consoles built from command line options (console=)
   */
-@@ -294,30 +279,22 @@ enum con_msg_format_flags {
- static int console_msg_format = MSG_FORMAT_DEFAULT;
- 
- /*
-- * The printk log buffer consists of a chain of concatenated variable
-- * length records. Every record starts with a record header, containing
-- * the overall length of the record.
-- *
-- * The heads to the first and last entry in the buffer, as well as the
-- * sequence numbers of these entries are maintained when messages are
-- * stored.
-+ * The printk log buffer consists of a sequenced collection of records, each
-+ * containing variable length message text. Every record also contains its
-+ * own meta-data (@info).
-  *
-- * If the heads indicate available messages, the length in the header
-- * tells the start next message. A length == 0 for the next message
-- * indicates a wrap-around to the beginning of the buffer.
-+ * Every record meta-data carries the timestamp in microseconds, as well as
-+ * the standard userspace syslog level and syslog facility. The usual kernel
-+ * messages use LOG_KERN; userspace-injected messages always carry a matching
-+ * syslog facility, by default LOG_USER. The origin of every message can be
-+ * reliably determined that way.
-  *
-- * Every record carries the monotonic timestamp in microseconds, as well as
-- * the standard userspace syslog level and syslog facility. The usual
-- * kernel messages use LOG_KERN; userspace-injected messages always carry
-- * a matching syslog facility, by default LOG_USER. The origin of every
-- * message can be reliably determined that way.
-+ * The human readable log message of a record is available in @text, the
-+ * length of the message text in @text_len. The stored message is not
-+ * terminated.
-  *
-- * The human readable log message directly follows the message header. The
-- * length of the message text is stored in the header, the stored message
-- * is not terminated.
-- *
-- * Optionally, a message can carry a dictionary of properties (key/value pairs),
-- * to provide userspace with a machine-readable message context.
-+ * Optionally, a record can carry a dictionary of properties (key/value
-+ * pairs), to provide userspace with a machine-readable message context.
-  *
-  * Examples for well-defined, commonly used property names are:
-  *   DEVICE=b12:8               device identifier
-@@ -327,25 +304,22 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
-  *                                +sound:card0  subsystem:devname
-  *   SUBSYSTEM=pci              driver-core subsystem name
-  *
-- * Valid characters in property names are [a-zA-Z0-9.-_]. The plain text value
-- * follows directly after a '=' character. Every property is terminated by
-- * a '\0' character. The last property is not terminated.
-- *
-- * Example of a message structure:
-- *   0000  ff 8f 00 00 00 00 00 00      monotonic time in nsec
-- *   0008  34 00                        record is 52 bytes long
-- *   000a        0b 00                  text is 11 bytes long
-- *   000c              1f 00            dictionary is 23 bytes long
-- *   000e                    03 00      LOG_KERN (facility) LOG_ERR (level)
-- *   0010  69 74 27 73 20 61 20 6c      "it's a l"
-- *         69 6e 65                     "ine"
-- *   001b           44 45 56 49 43      "DEVIC"
-- *         45 3d 62 38 3a 32 00 44      "E=b8:2\0D"
-- *         52 49 56 45 52 3d 62 75      "RIVER=bu"
-- *         67                           "g"
-- *   0032     00 00 00                  padding to next message header
-- *
-- * The 'struct printk_log' buffer header must never be directly exported to
-+ * Valid characters in property names are [a-zA-Z0-9.-_]. Property names
-+ * and values are terminated by a '\0' character.
-+ *
-+ * Example of record values:
-+ *   record.text_buf                = "it's a line" (unterminated)
-+ *   record.info.seq                = 56
-+ *   record.info.ts_nsec            = 36863
-+ *   record.info.text_len           = 11
-+ *   record.info.facility           = 0 (LOG_KERN)
-+ *   record.info.flags              = 0
-+ *   record.info.level              = 3 (LOG_ERR)
-+ *   record.info.caller_id          = 299 (task 299)
-+ *   record.info.dev_info.subsystem = "pci" (terminated)
-+ *   record.info.dev_info.device    = "+pci:0000:00:01.0" (terminated)
-+ *
-+ * The 'struct printk_info' buffer must never be directly exported to
-  * userspace, it is a kernel-private implementation detail that might
-  * need to be changed in the future, when the requirements change.
-  *
-@@ -365,82 +339,23 @@ enum log_flags {
+@@ -355,61 +336,43 @@ enum log_flags {
  	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
  };
  
--struct printk_log {
--	u64 ts_nsec;		/* timestamp in nanoseconds */
--	u16 len;		/* length of entire record */
--	u16 text_len;		/* length of text buffer */
--	u16 dict_len;		/* length of dictionary buffer */
--	u8 facility;		/* syslog facility */
--	u8 flags:5;		/* internal record flags */
--	u8 level:3;		/* syslog level */
--#ifdef CONFIG_PRINTK_CALLER
--	u32 caller_id;            /* thread id or processor id */
--#endif
--}
--#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
--__packed __aligned(4)
--#endif
--;
--
 -/*
 - * The logbuf_lock protects kmsg buffer, indices, counters.  This can be taken
 - * within the scheduler's rq lock. It must be released before calling
 - * console_unlock() or anything else that might wake up a process.
 - */
 -DEFINE_RAW_SPINLOCK(logbuf_lock);
--
++#ifdef CONFIG_PRINTK
++/* syslog_lock protects syslog_* variables and write access to clear_seq. */
++static DEFINE_SPINLOCK(syslog_lock);
+ 
 -/*
 - * Helper macros to lock/unlock logbuf_lock and switch between
 - * printk-safe/unsafe modes.
@@ -12335,75 +11649,50 @@ index 9b75f6bfc333..78a277ea5c35 100644
 -		raw_spin_unlock(&logbuf_lock);		\
 -		printk_safe_exit_irqrestore(flags);	\
 -	} while (0)
-+/* The syslog_lock protects syslog_* variables. */
-+static DEFINE_SPINLOCK(syslog_lock);
-+#define syslog_lock_irq() spin_lock_irq(&syslog_lock)
-+#define syslog_unlock_irq() spin_unlock_irq(&syslog_lock)
-+#define syslog_lock_irqsave(flags) spin_lock_irqsave(&syslog_lock, flags)
-+#define syslog_unlock_irqrestore(flags) spin_unlock_irqrestore(&syslog_lock, flags)
++/* Set to enable sync mode. Once set, it is never cleared. */
++static bool sync_mode;
  
- #ifdef CONFIG_PRINTK
+-#ifdef CONFIG_PRINTK
  DECLARE_WAIT_QUEUE_HEAD(log_wait);
 +/* All 3 protected by @syslog_lock. */
  /* the next printk record to read by syslog(READ) or /proc/kmsg */
  static u64 syslog_seq;
--static u32 syslog_idx;
  static size_t syslog_partial;
  static bool syslog_time;
  
--/* index and sequence number of the first record stored in the buffer */
--static u64 log_first_seq;
--static u32 log_first_idx;
--
--/* index and sequence number of the next record to store in the buffer */
--static u64 log_next_seq;
--static u32 log_next_idx;
--
 -/* the next printk record to write to the console */
 -static u64 console_seq;
--static u32 console_idx;
 -static u64 exclusive_console_stop_seq;
--
- /* the next printk record to read after the last 'clear' command */
+-static unsigned long console_dropped;
++struct latched_seq {
++	seqcount_latch_t	latch;
++	u64			val[2];
++};
+ 
+-/* the next printk record to read after the last 'clear' command */
 -static u64 clear_seq;
--static u32 clear_idx;
-+static atomic64_t clear_seq = ATOMIC64_INIT(0);
++/*
++ * The next printk record to read after the last 'clear' command. There are
++ * two copies (updated with seqcount_latch) so that reads can locklessly
++ * access a valid value. Writers are synchronized by @syslog_lock.
++ */
++static struct latched_seq clear_seq = {
++	.latch		= SEQCNT_LATCH_ZERO(clear_seq.latch),
++	.val[0]		= 0,
++	.val[1]		= 0,
++};
  
  #ifdef CONFIG_PRINTK_CALLER
  #define PREFIX_MAX		48
-@@ -453,13 +368,30 @@ static u32 clear_idx;
- #define LOG_FACILITY(v)		((v) >> 3 & 0xff)
- 
- /* record buffer */
--#define LOG_ALIGN __alignof__(struct printk_log)
-+#define LOG_ALIGN __alignof__(unsigned long)
- #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
- #define LOG_BUF_LEN_MAX (u32)(1 << 31)
- static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
- static char *log_buf = __log_buf;
- static u32 log_buf_len = __LOG_BUF_LEN;
- 
-+/*
-+ * Define the average message size. This only affects the number of
-+ * descriptors that will be available. Underestimating is better than
-+ * overestimating (too many available descriptors is better than not enough).
-+ */
-+#define PRB_AVGBITS 5	/* 32 character average length */
-+
-+#if CONFIG_LOG_BUF_SHIFT <= PRB_AVGBITS
-+#error CONFIG_LOG_BUF_SHIFT value too small.
-+#endif
-+_DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS,
-+		 PRB_AVGBITS, &__log_buf[0]);
-+
-+static struct printk_ringbuffer printk_rb_dynamic;
-+
-+static struct printk_ringbuffer *prb = &printk_rb_static;
+ #else
+ #define PREFIX_MAX		32
+ #endif
 +
- /*
-  * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before
-  * per_cpu_areas are initialised. This variable is set to true when
-@@ -467,7 +399,7 @@ static u32 log_buf_len = __LOG_BUF_LEN;
++/* the maximum size allowed to be reserved for a record */
+ #define LOG_LINE_MAX		(1024 - PREFIX_MAX)
+ 
+ #define LOG_LEVEL(v)		((v) & 0x07)
+@@ -447,11 +410,36 @@ static struct printk_ringbuffer *prb = &printk_rb_static;
   */
  static bool __printk_percpu_data_ready __read_mostly;
  
@@ -12412,350 +11701,51 @@ index 9b75f6bfc333..78a277ea5c35 100644
  {
  	return __printk_percpu_data_ready;
  }
-@@ -484,108 +416,6 @@ u32 log_buf_len_get(void)
- 	return log_buf_len;
- }
  
--/* human readable text of the record */
--static char *log_text(const struct printk_log *msg)
--{
--	return (char *)msg + sizeof(struct printk_log);
--}
--
--/* optional key/value pair dictionary attached to the record */
--static char *log_dict(const struct printk_log *msg)
--{
--	return (char *)msg + sizeof(struct printk_log) + msg->text_len;
--}
--
--/* get record by index; idx must point to valid msg */
--static struct printk_log *log_from_idx(u32 idx)
--{
--	struct printk_log *msg = (struct printk_log *)(log_buf + idx);
--
--	/*
--	 * A length == 0 record is the end of buffer marker. Wrap around and
--	 * read the message at the start of the buffer.
--	 */
--	if (!msg->len)
--		return (struct printk_log *)log_buf;
--	return msg;
--}
--
--/* get next record; idx must point to valid msg */
--static u32 log_next(u32 idx)
--{
--	struct printk_log *msg = (struct printk_log *)(log_buf + idx);
--
--	/* length == 0 indicates the end of the buffer; wrap */
--	/*
--	 * A length == 0 record is the end of buffer marker. Wrap around and
--	 * read the message at the start of the buffer as *this* one, and
--	 * return the one after that.
--	 */
--	if (!msg->len) {
--		msg = (struct printk_log *)log_buf;
--		return msg->len;
--	}
--	return idx + msg->len;
--}
--
--/*
-- * Check whether there is enough free space for the given message.
-- *
-- * The same values of first_idx and next_idx mean that the buffer
-- * is either empty or full.
-- *
-- * If the buffer is empty, we must respect the position of the indexes.
-- * They cannot be reset to the beginning of the buffer.
-- */
--static int logbuf_has_space(u32 msg_size, bool empty)
--{
--	u32 free;
--
--	if (log_next_idx > log_first_idx || empty)
--		free = max(log_buf_len - log_next_idx, log_first_idx);
--	else
--		free = log_first_idx - log_next_idx;
--
--	/*
--	 * We need space also for an empty header that signalizes wrapping
--	 * of the buffer.
--	 */
--	return free >= msg_size + sizeof(struct printk_log);
--}
--
--static int log_make_free_space(u32 msg_size)
--{
--	while (log_first_seq < log_next_seq &&
--	       !logbuf_has_space(msg_size, false)) {
--		/* drop old messages until we have enough contiguous space */
--		log_first_idx = log_next(log_first_idx);
--		log_first_seq++;
--	}
--
--	if (clear_seq < log_first_seq) {
--		clear_seq = log_first_seq;
--		clear_idx = log_first_idx;
--	}
--
--	/* sequence numbers are equal, so the log buffer is empty */
--	if (logbuf_has_space(msg_size, log_first_seq == log_next_seq))
--		return 0;
--
--	return -ENOMEM;
--}
--
--/* compute the message size including the padding bytes */
--static u32 msg_used_size(u16 text_len, u16 dict_len, u32 *pad_len)
--{
--	u32 size;
--
--	size = sizeof(struct printk_log) + text_len + dict_len;
--	*pad_len = (-size) & (LOG_ALIGN - 1);
--	size += *pad_len;
--
--	return size;
--}
--
- /*
-  * Define how much of the log buffer we could take at maximum. The value
-  * must be greater than two. Note that only half of the buffer is available
-@@ -594,84 +424,23 @@ static u32 msg_used_size(u16 text_len, u16 dict_len, u32 *pad_len)
- #define MAX_LOG_TAKE_PART 4
- static const char trunc_msg[] = "<truncated>";
- 
--static u32 truncate_msg(u16 *text_len, u16 *trunc_msg_len,
--			u16 *dict_len, u32 *pad_len)
-+static void truncate_msg(u16 *text_len, u16 *trunc_msg_len)
- {
- 	/*
- 	 * The message should not take the whole buffer. Otherwise, it might
- 	 * get removed too soon.
- 	 */
- 	u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART;
-+
- 	if (*text_len > max_text_len)
- 		*text_len = max_text_len;
--	/* enable the warning message */
--	*trunc_msg_len = strlen(trunc_msg);
--	/* disable the "dict" completely */
--	*dict_len = 0;
--	/* compute the size again, count also the warning message */
--	return msg_used_size(*text_len + *trunc_msg_len, 0, pad_len);
--}
--
--/* insert record into the buffer, discard old ones, update heads */
--static int log_store(u32 caller_id, int facility, int level,
--		     enum log_flags flags, u64 ts_nsec,
--		     const char *dict, u16 dict_len,
--		     const char *text, u16 text_len)
--{
--	struct printk_log *msg;
--	u32 size, pad_len;
--	u16 trunc_msg_len = 0;
--
--	/* number of '\0' padding bytes to next message */
--	size = msg_used_size(text_len, dict_len, &pad_len);
--
--	if (log_make_free_space(size)) {
--		/* truncate the message if it is too long for empty buffer */
--		size = truncate_msg(&text_len, &trunc_msg_len,
--				    &dict_len, &pad_len);
--		/* survive when the log buffer is too small for trunc_msg */
--		if (log_make_free_space(size))
--			return 0;
--	}
--
--	if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) {
--		/*
--		 * This message + an additional empty header does not fit
--		 * at the end of the buffer. Add an empty header with len == 0
--		 * to signify a wrap around.
--		 */
--		memset(log_buf + log_next_idx, 0, sizeof(struct printk_log));
--		log_next_idx = 0;
--	}
- 
--	/* fill message */
--	msg = (struct printk_log *)(log_buf + log_next_idx);
--	memcpy(log_text(msg), text, text_len);
--	msg->text_len = text_len;
--	if (trunc_msg_len) {
--		memcpy(log_text(msg) + text_len, trunc_msg, trunc_msg_len);
--		msg->text_len += trunc_msg_len;
--	}
--	memcpy(log_dict(msg), dict, dict_len);
--	msg->dict_len = dict_len;
--	msg->facility = facility;
--	msg->level = level & 7;
--	msg->flags = flags & 0x1f;
--	if (ts_nsec > 0)
--		msg->ts_nsec = ts_nsec;
-+	/* enable the warning message (if there is room) */
-+	*trunc_msg_len = strlen(trunc_msg);
-+	if (*text_len >= *trunc_msg_len)
-+		*text_len -= *trunc_msg_len;
- 	else
--		msg->ts_nsec = local_clock();
--#ifdef CONFIG_PRINTK_CALLER
--	msg->caller_id = caller_id;
--#endif
--	memset(log_dict(msg) + dict_len, 0, pad_len);
--	msg->len = size;
--
--	/* insert message */
--	log_next_idx += msg->len;
--	log_next_seq++;
--
--	return msg->text_len;
-+		*trunc_msg_len = 0;
- }
- 
- int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT);
-@@ -723,13 +492,13 @@ static void append_char(char **pp, char *e, char c)
- 		*(*pp)++ = c;
- }
- 
--static ssize_t msg_print_ext_header(char *buf, size_t size,
--				    struct printk_log *msg, u64 seq)
-+static ssize_t info_print_ext_header(char *buf, size_t size,
-+				     struct printk_info *info)
- {
--	u64 ts_usec = msg->ts_nsec;
-+	u64 ts_usec = info->ts_nsec;
- 	char caller[20];
- #ifdef CONFIG_PRINTK_CALLER
--	u32 id = msg->caller_id;
-+	u32 id = info->caller_id;
- 
- 	snprintf(caller, sizeof(caller), ",caller=%c%u",
- 		 id & 0x80000000 ? 'C' : 'T', id & ~0x80000000);
-@@ -740,13 +509,13 @@ static ssize_t msg_print_ext_header(char *buf, size_t size,
- 	do_div(ts_usec, 1000);
- 
- 	return scnprintf(buf, size, "%u,%llu,%llu,%c%s;",
--			 (msg->facility << 3) | msg->level, seq, ts_usec,
--			 msg->flags & LOG_CONT ? 'c' : '-', caller);
-+			 (info->facility << 3) | info->level, info->seq,
-+			 ts_usec, info->flags & LOG_CONT ? 'c' : '-', caller);
- }
- 
--static ssize_t msg_print_ext_body(char *buf, size_t size,
--				  char *dict, size_t dict_len,
--				  char *text, size_t text_len)
-+static ssize_t msg_add_ext_text(char *buf, size_t size,
-+				const char *text, size_t text_len,
-+				unsigned char endc)
- {
- 	char *p = buf, *e = buf + size;
- 	size_t i;
-@@ -760,45 +529,56 @@ static ssize_t msg_print_ext_body(char *buf, size_t size,
- 		else
- 			append_char(&p, e, c);
- 	}
--	append_char(&p, e, '\n');
-+	append_char(&p, e, endc);
- 
--	if (dict_len) {
--		bool line = true;
-+	return p - buf;
-+}
- 
--		for (i = 0; i < dict_len; i++) {
--			unsigned char c = dict[i];
-+static ssize_t msg_add_dict_text(char *buf, size_t size,
-+				 const char *key, const char *val)
++/* Must be called under syslog_lock. */
++static void latched_seq_write(struct latched_seq *ls, u64 val)
 +{
-+	size_t val_len = strlen(val);
-+	ssize_t len;
- 
--			if (line) {
--				append_char(&p, e, ' ');
--				line = false;
--			}
-+	if (!val_len)
-+		return 0;
- 
--			if (c == '\0') {
--				append_char(&p, e, '\n');
--				line = true;
--				continue;
--			}
-+	len = msg_add_ext_text(buf, size, "", 0, ' ');	/* dict prefix */
-+	len += msg_add_ext_text(buf + len, size - len, key, strlen(key), '=');
-+	len += msg_add_ext_text(buf + len, size - len, val, val_len, '\n');
- 
--			if (c < ' ' || c >= 127 || c == '\\') {
--				p += scnprintf(p, e - p, "\\x%02x", c);
--				continue;
--			}
-+	return len;
++	raw_write_seqcount_latch(&ls->latch);
++	ls->val[0] = val;
++	raw_write_seqcount_latch(&ls->latch);
++	ls->val[1] = val;
 +}
- 
--			append_char(&p, e, c);
--		}
--		append_char(&p, e, '\n');
--	}
-+static ssize_t msg_print_ext_body(char *buf, size_t size,
-+				  char *text, size_t text_len,
-+				  struct dev_printk_info *dev_info)
++
++/* Can be called from any context. */
++static u64 latched_seq_read_nolock(struct latched_seq *ls)
 +{
-+	ssize_t len;
- 
--	return p - buf;
-+	len = msg_add_ext_text(buf, size, text, text_len, '\n');
++	unsigned int seq;
++	unsigned int idx;
++	u64 val;
 +
-+	if (!dev_info)
-+		goto out;
++	do {
++		seq = raw_read_seqcount_latch(&ls->latch);
++		idx = seq & 0x1;
++		val = ls->val[idx];
++	} while (read_seqcount_latch_retry(&ls->latch, seq));
 +
-+	len += msg_add_dict_text(buf + len, size - len, "SUBSYSTEM",
-+				 dev_info->subsystem);
-+	len += msg_add_dict_text(buf + len, size - len, "DEVICE",
-+				 dev_info->device);
-+out:
-+	return len;
- }
++	return val;
++}
++
+ /* Return log buffer address */
+ char *log_buf_addr_get(void)
+ {
+@@ -619,7 +607,7 @@ static ssize_t msg_print_ext_body(char *buf, size_t size,
  
  /* /dev/kmsg - userspace message inject/listen interface */
  struct devkmsg_user {
- 	u64 seq;
--	u32 idx;
+-	u64 seq;
++	atomic64_t seq;
  	struct ratelimit_state rs;
  	struct mutex lock;
  	char buf[CONSOLE_EXT_LOG_MAX];
-+
-+	struct printk_info info;
-+	char text_buf[CONSOLE_EXT_LOG_MAX];
-+	struct printk_record record;
- };
- 
- static __printf(3, 4) __cold
-@@ -808,7 +588,7 @@ int devkmsg_emit(int facility, int level, const char *fmt, ...)
- 	int r;
- 
- 	va_start(args, fmt);
--	r = vprintk_emit(facility, level, NULL, 0, fmt, args);
-+	r = vprintk_emit(facility, level, NULL, fmt, args);
- 	va_end(args);
- 
- 	return r;
-@@ -881,7 +661,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
- 			    size_t count, loff_t *ppos)
- {
- 	struct devkmsg_user *user = file->private_data;
--	struct printk_log *msg;
-+	struct printk_record *r = &user->record;
- 	size_t len;
- 	ssize_t ret;
- 
-@@ -892,41 +672,31 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
+@@ -719,27 +707,22 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
  	if (ret)
  		return ret;
  
 -	logbuf_lock_irq();
--	while (user->seq == log_next_seq) {
-+	if (!prb_read_valid(prb, user->seq, r)) {
+-	if (!prb_read_valid(prb, user->seq, r)) {
++	if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) {
  		if (file->f_flags & O_NONBLOCK) {
  			ret = -EAGAIN;
 -			logbuf_unlock_irq();
@@ -12764,42 +11754,34 @@ index 9b75f6bfc333..78a277ea5c35 100644
  
 -		logbuf_unlock_irq();
  		ret = wait_event_interruptible(log_wait,
--					       user->seq != log_next_seq);
-+					prb_read_valid(prb, user->seq, r));
+-					prb_read_valid(prb, user->seq, r));
++				prb_read_valid(prb, atomic64_read(&user->seq), r));
  		if (ret)
  			goto out;
 -		logbuf_lock_irq();
  	}
  
--	if (user->seq < log_first_seq) {
-+	if (user->seq < prb_first_valid_seq(prb)) {
+-	if (user->seq < prb_first_valid_seq(prb)) {
++	if (r->info->seq != atomic64_read(&user->seq)) {
  		/* our last seen message is gone, return error and reset */
--		user->idx = log_first_idx;
--		user->seq = log_first_seq;
-+		user->seq = prb_first_valid_seq(prb);
+-		user->seq = prb_first_valid_seq(prb);
++		atomic64_set(&user->seq, r->info->seq);
  		ret = -EPIPE;
 -		logbuf_unlock_irq();
  		goto out;
  	}
  
--	msg = log_from_idx(user->idx);
--	len = msg_print_ext_header(user->buf, sizeof(user->buf),
--				   msg, user->seq);
-+	len = info_print_ext_header(user->buf, sizeof(user->buf), r->info);
- 	len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len,
--				  log_dict(msg), msg->dict_len,
--				  log_text(msg), msg->text_len);
-+				  &r->text_buf[0], r->info->text_len,
-+				  &r->info->dev_info);
- 
--	user->idx = log_next(user->idx);
--	user->seq++;
+@@ -748,8 +731,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
+ 				  &r->text_buf[0], r->info->text_len,
+ 				  &r->info->dev_info);
+ 
+-	user->seq = r->info->seq + 1;
 -	logbuf_unlock_irq();
-+	user->seq = r->info->seq + 1;
++	atomic64_set(&user->seq, r->info->seq + 1);
  
  	if (len > count) {
  		ret = -EINVAL;
-@@ -961,12 +731,10 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
+@@ -784,11 +766,10 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
  	if (offset)
  		return -ESPIPE;
  
@@ -12807,25 +11789,22 @@ index 9b75f6bfc333..78a277ea5c35 100644
  	switch (whence) {
  	case SEEK_SET:
  		/* the first record */
--		user->idx = log_first_idx;
--		user->seq = log_first_seq;
-+		user->seq = prb_first_valid_seq(prb);
+-		user->seq = prb_first_valid_seq(prb);
++		atomic64_set(&user->seq, prb_first_valid_seq(prb));
  		break;
  	case SEEK_DATA:
  		/*
-@@ -974,18 +742,15 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
+@@ -796,22 +777,22 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
  		 * like issued by 'dmesg -c'. Reading /dev/kmsg itself
  		 * changes no global state, and does not clear anything.
  		 */
--		user->idx = clear_idx;
 -		user->seq = clear_seq;
-+		user->seq = atomic64_read(&clear_seq);
++		atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq));
  		break;
  	case SEEK_END:
  		/* after the last record */
--		user->idx = log_next_idx;
--		user->seq = log_next_seq;
-+		user->seq = prb_next_seq(prb);
+-		user->seq = prb_next_seq(prb);
++		atomic64_set(&user->seq, prb_next_seq(prb));
  		break;
  	default:
  		ret = -EINVAL;
@@ -12834,16 +11813,23 @@ index 9b75f6bfc333..78a277ea5c35 100644
  	return ret;
  }
  
-@@ -999,15 +764,13 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
+ static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
+ {
+ 	struct devkmsg_user *user = file->private_data;
++	struct printk_info info;
+ 	__poll_t ret = 0;
+ 
+ 	if (!user)
+@@ -819,15 +800,13 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
  
  	poll_wait(file, &log_wait, wait);
  
 -	logbuf_lock_irq();
--	if (user->seq < log_next_seq) {
-+	if (prb_read_valid(prb, user->seq, NULL)) {
+-	if (prb_read_valid(prb, user->seq, NULL)) {
++	if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) {
  		/* return error when data has vanished underneath us */
--		if (user->seq < log_first_seq)
-+		if (user->seq < prb_first_valid_seq(prb))
+-		if (user->seq < prb_first_valid_seq(prb))
++		if (info.seq != atomic64_read(&user->seq))
  			ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
  		else
  			ret = EPOLLIN|EPOLLRDNORM;
@@ -12852,98 +11838,28 @@ index 9b75f6bfc333..78a277ea5c35 100644
  
  	return ret;
  }
-@@ -1037,10 +800,10 @@ static int devkmsg_open(struct inode *inode, struct file *file)
- 
- 	mutex_init(&user->lock);
+@@ -860,9 +839,7 @@ static int devkmsg_open(struct inode *inode, struct file *file)
+ 	prb_rec_init_rd(&user->record, &user->info,
+ 			&user->text_buf[0], sizeof(user->text_buf));
  
 -	logbuf_lock_irq();
--	user->idx = log_first_idx;
--	user->seq = log_first_seq;
+-	user->seq = prb_first_valid_seq(prb);
 -	logbuf_unlock_irq();
-+	prb_rec_init_rd(&user->record, &user->info,
-+			&user->text_buf[0], sizeof(user->text_buf));
-+
-+	user->seq = prb_first_valid_seq(prb);
++	atomic64_set(&user->seq, prb_first_valid_seq(prb));
  
  	file->private_data = user;
  	return 0;
-@@ -1080,23 +843,61 @@ const struct file_operations kmsg_fops = {
-  */
- void log_buf_vmcoreinfo_setup(void)
- {
--	VMCOREINFO_SYMBOL(log_buf);
--	VMCOREINFO_SYMBOL(log_buf_len);
--	VMCOREINFO_SYMBOL(log_first_idx);
--	VMCOREINFO_SYMBOL(clear_idx);
--	VMCOREINFO_SYMBOL(log_next_idx);
-+	struct dev_printk_info *dev_info = NULL;
-+
-+	VMCOREINFO_SYMBOL(prb);
-+	VMCOREINFO_SYMBOL(printk_rb_static);
-+	VMCOREINFO_SYMBOL(clear_seq);
-+
- 	/*
--	 * Export struct printk_log size and field offsets. User space tools can
-+	 * Export struct size and field offsets. User space tools can
- 	 * parse it and detect any changes to structure down the line.
- 	 */
--	VMCOREINFO_STRUCT_SIZE(printk_log);
--	VMCOREINFO_OFFSET(printk_log, ts_nsec);
--	VMCOREINFO_OFFSET(printk_log, len);
--	VMCOREINFO_OFFSET(printk_log, text_len);
--	VMCOREINFO_OFFSET(printk_log, dict_len);
--#ifdef CONFIG_PRINTK_CALLER
--	VMCOREINFO_OFFSET(printk_log, caller_id);
--#endif
+@@ -954,6 +931,9 @@ void log_buf_vmcoreinfo_setup(void)
+ 
+ 	VMCOREINFO_SIZE(atomic_long_t);
+ 	VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter);
 +
-+	VMCOREINFO_SIZE(atomic64_t);
-+	VMCOREINFO_TYPE_OFFSET(atomic64_t, counter);
-+
-+	VMCOREINFO_STRUCT_SIZE(printk_ringbuffer);
-+	VMCOREINFO_OFFSET(printk_ringbuffer, desc_ring);
-+	VMCOREINFO_OFFSET(printk_ringbuffer, text_data_ring);
-+	VMCOREINFO_OFFSET(printk_ringbuffer, fail);
-+
-+	VMCOREINFO_STRUCT_SIZE(prb_desc_ring);
-+	VMCOREINFO_OFFSET(prb_desc_ring, count_bits);
-+	VMCOREINFO_OFFSET(prb_desc_ring, descs);
-+	VMCOREINFO_OFFSET(prb_desc_ring, infos);
-+	VMCOREINFO_OFFSET(prb_desc_ring, head_id);
-+	VMCOREINFO_OFFSET(prb_desc_ring, tail_id);
-+
-+	VMCOREINFO_STRUCT_SIZE(prb_desc);
-+	VMCOREINFO_OFFSET(prb_desc, state_var);
-+	VMCOREINFO_OFFSET(prb_desc, text_blk_lpos);
-+
-+	VMCOREINFO_STRUCT_SIZE(prb_data_blk_lpos);
-+	VMCOREINFO_OFFSET(prb_data_blk_lpos, begin);
-+	VMCOREINFO_OFFSET(prb_data_blk_lpos, next);
-+
-+	VMCOREINFO_STRUCT_SIZE(printk_info);
-+	VMCOREINFO_OFFSET(printk_info, seq);
-+	VMCOREINFO_OFFSET(printk_info, ts_nsec);
-+	VMCOREINFO_OFFSET(printk_info, text_len);
-+	VMCOREINFO_OFFSET(printk_info, caller_id);
-+	VMCOREINFO_OFFSET(printk_info, dev_info);
-+
-+	VMCOREINFO_STRUCT_SIZE(dev_printk_info);
-+	VMCOREINFO_OFFSET(dev_printk_info, subsystem);
-+	VMCOREINFO_LENGTH(printk_info_subsystem, sizeof(dev_info->subsystem));
-+	VMCOREINFO_OFFSET(dev_printk_info, device);
-+	VMCOREINFO_LENGTH(printk_info_device, sizeof(dev_info->device));
-+
-+	VMCOREINFO_STRUCT_SIZE(prb_data_ring);
-+	VMCOREINFO_OFFSET(prb_data_ring, size_bits);
-+	VMCOREINFO_OFFSET(prb_data_ring, data);
-+	VMCOREINFO_OFFSET(prb_data_ring, head_lpos);
-+	VMCOREINFO_OFFSET(prb_data_ring, tail_lpos);
-+
-+	VMCOREINFO_SIZE(atomic_long_t);
-+	VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter);
++	VMCOREINFO_STRUCT_SIZE(latched_seq);
++	VMCOREINFO_OFFSET(latched_seq, val);
  }
  #endif
  
-@@ -1168,17 +969,48 @@ static inline void log_buf_add_cpu(void) {}
+@@ -1025,9 +1005,6 @@ static inline void log_buf_add_cpu(void) {}
  
  static void __init set_percpu_data_ready(void)
  {
@@ -12953,448 +11869,158 @@ index 9b75f6bfc333..78a277ea5c35 100644
  	__printk_percpu_data_ready = true;
  }
  
-+static unsigned int __init add_to_rb(struct printk_ringbuffer *rb,
-+				     struct printk_record *r)
-+{
-+	struct prb_reserved_entry e;
-+	struct printk_record dest_r;
-+
-+	prb_rec_init_wr(&dest_r, r->info->text_len);
-+
-+	if (!prb_reserve(&e, rb, &dest_r))
-+		return 0;
-+
-+	memcpy(&dest_r.text_buf[0], &r->text_buf[0], r->info->text_len);
-+	dest_r.info->text_len = r->info->text_len;
-+	dest_r.info->facility = r->info->facility;
-+	dest_r.info->level = r->info->level;
-+	dest_r.info->flags = r->info->flags;
-+	dest_r.info->ts_nsec = r->info->ts_nsec;
-+	dest_r.info->caller_id = r->info->caller_id;
-+	memcpy(&dest_r.info->dev_info, &r->info->dev_info, sizeof(dest_r.info->dev_info));
-+
-+	prb_final_commit(&e);
-+
-+	return prb_record_text_space(&e);
-+}
-+
-+static char setup_text_buf[LOG_LINE_MAX] __initdata;
-+
- void __init setup_log_buf(int early)
- {
+@@ -1067,7 +1044,6 @@ void __init setup_log_buf(int early)
+ 	struct printk_record r;
+ 	size_t new_descs_size;
+ 	size_t new_infos_size;
 -	unsigned long flags;
-+	struct printk_info *new_infos;
-+	unsigned int new_descs_count;
-+	struct prb_desc *new_descs;
-+	struct printk_info info;
-+	struct printk_record r;
-+	size_t new_descs_size;
-+	size_t new_infos_size;
  	char *new_log_buf;
  	unsigned int free;
-+	u64 seq;
- 
- 	/*
- 	 * Some archs call setup_log_buf() multiple times - first is very
-@@ -1197,24 +1029,71 @@ void __init setup_log_buf(int early)
- 	if (!new_log_buf_len)
- 		return;
- 
-+	new_descs_count = new_log_buf_len >> PRB_AVGBITS;
-+	if (new_descs_count == 0) {
-+		pr_err("new_log_buf_len: %lu too small\n", new_log_buf_len);
-+		return;
-+	}
-+
- 	new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN);
- 	if (unlikely(!new_log_buf)) {
--		pr_err("log_buf_len: %lu bytes not available\n",
--			new_log_buf_len);
-+		pr_err("log_buf_len: %lu text bytes not available\n",
-+		       new_log_buf_len);
- 		return;
- 	}
+ 	u64 seq;
+@@ -1125,8 +1101,6 @@ void __init setup_log_buf(int early)
+ 		 new_descs, ilog2(new_descs_count),
+ 		 new_infos);
  
--	logbuf_lock_irqsave(flags);
-+	new_descs_size = new_descs_count * sizeof(struct prb_desc);
-+	new_descs = memblock_alloc(new_descs_size, LOG_ALIGN);
-+	if (unlikely(!new_descs)) {
-+		pr_err("log_buf_len: %zu desc bytes not available\n",
-+		       new_descs_size);
-+		goto err_free_log_buf;
-+	}
-+
-+	new_infos_size = new_descs_count * sizeof(struct printk_info);
-+	new_infos = memblock_alloc(new_infos_size, LOG_ALIGN);
-+	if (unlikely(!new_infos)) {
-+		pr_err("log_buf_len: %zu info bytes not available\n",
-+		       new_infos_size);
-+		goto err_free_descs;
-+	}
-+
-+	prb_rec_init_rd(&r, &info, &setup_text_buf[0], sizeof(setup_text_buf));
-+
-+	prb_init(&printk_rb_dynamic,
-+		 new_log_buf, ilog2(new_log_buf_len),
-+		 new_descs, ilog2(new_descs_count),
-+		 new_infos);
-+
+-	printk_safe_enter_irqsave(flags);
+-
  	log_buf_len = new_log_buf_len;
  	log_buf = new_log_buf;
  	new_log_buf_len = 0;
--	free = __LOG_BUF_LEN - log_next_idx;
--	memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
--	logbuf_unlock_irqrestore(flags);
-+
-+	free = __LOG_BUF_LEN;
-+	prb_for_each_record(0, &printk_rb_static, seq, &r)
-+		free -= add_to_rb(&printk_rb_dynamic, &r);
-+
-+	/*
-+	 * This is early enough that everything is still running on the
-+	 * boot CPU and interrupts are disabled. So no new messages will
-+	 * appear during the transition to the dynamic buffer.
-+	 */
-+	prb = &printk_rb_dynamic;
-+
-+	if (seq != prb_next_seq(&printk_rb_static)) {
-+		pr_err("dropped %llu messages\n",
-+		       prb_next_seq(&printk_rb_static) - seq);
-+	}
- 
- 	pr_info("log_buf_len: %u bytes\n", log_buf_len);
- 	pr_info("early log buf free: %u(%u%%)\n",
- 		free, (free * 100) / __LOG_BUF_LEN);
-+	return;
-+
-+err_free_descs:
-+	memblock_free(__pa(new_descs), new_descs_size);
-+err_free_log_buf:
-+	memblock_free(__pa(new_log_buf), new_log_buf_len);
- }
- 
- static bool __read_mostly ignore_loglevel;
-@@ -1321,18 +1200,18 @@ static size_t print_caller(u32 id, char *buf)
- #define print_caller(id, buf) 0
- #endif
- 
--static size_t print_prefix(const struct printk_log *msg, bool syslog,
--			   bool time, char *buf)
-+static size_t info_print_prefix(const struct printk_info  *info, bool syslog,
-+				bool time, char *buf)
- {
- 	size_t len = 0;
- 
- 	if (syslog)
--		len = print_syslog((msg->facility << 3) | msg->level, buf);
-+		len = print_syslog((info->facility << 3) | info->level, buf);
- 
- 	if (time)
--		len += print_time(msg->ts_nsec, buf + len);
-+		len += print_time(info->ts_nsec, buf + len);
- 
--	len += print_caller(msg->caller_id, buf + len);
-+	len += print_caller(info->caller_id, buf + len);
+@@ -1142,8 +1116,6 @@ void __init setup_log_buf(int early)
+ 	 */
+ 	prb = &printk_rb_dynamic;
  
- 	if (IS_ENABLED(CONFIG_PRINTK_CALLER) || time) {
- 		buf[len++] = ' ';
-@@ -1342,72 +1221,150 @@ static size_t print_prefix(const struct printk_log *msg, bool syslog,
- 	return len;
+-	printk_safe_exit_irqrestore(flags);
+-
+ 	if (seq != prb_next_seq(&printk_rb_static)) {
+ 		pr_err("dropped %llu messages\n",
+ 		       prb_next_seq(&printk_rb_static) - seq);
+@@ -1420,6 +1392,50 @@ static size_t get_record_print_text_size(struct printk_info *info,
+ 	return ((prefix_len * line_count) + info->text_len + 1);
  }
  
--static size_t msg_print_text(const struct printk_log *msg, bool syslog,
--			     bool time, char *buf, size_t size)
 +/*
-+ * Prepare the record for printing. The text is shifted within the given
-+ * buffer to avoid a need for another one. The following operations are
-+ * done:
-+ *
-+ *   - Add prefix for each line.
-+ *   - Add the trailing newline that has been removed in vprintk_store().
-+ *   - Drop truncated lines that do not longer fit into the buffer.
++ * Beginning with @start_seq, find the first record where it and all following
++ * records up to (but not including) @max_seq fit into @size.
 + *
-+ * Return: The length of the updated/prepared text, including the added
-+ * prefixes and the newline. The dropped line(s) are not counted.
++ * @max_seq is simply an upper bound and does not need to exist. If the caller
++ * does not require an upper bound, -1 can be used for @max_seq.
 + */
-+static size_t record_print_text(struct printk_record *r, bool syslog,
-+				bool time)
- {
--	const char *text = log_text(msg);
--	size_t text_size = msg->text_len;
--	size_t len = 0;
-+	size_t text_len = r->info->text_len;
-+	size_t buf_size = r->text_buf_size;
-+	char *text = r->text_buf;
- 	char prefix[PREFIX_MAX];
--	const size_t prefix_len = print_prefix(msg, syslog, time, prefix);
-+	bool truncated = false;
-+	size_t prefix_len;
-+	size_t line_len;
++static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size,
++				  bool syslog, bool time)
++{
++	struct printk_info info;
++	unsigned int line_count;
 +	size_t len = 0;
-+	char *next;
- 
--	do {
--		const char *next = memchr(text, '\n', text_size);
--		size_t text_len;
++	u64 seq;
++
++	/* Determine the size of the records up to @max_seq. */
++	prb_for_each_info(start_seq, prb, seq, &info, &line_count) {
++		if (info.seq >= max_seq)
++			break;
++		len += get_record_print_text_size(&info, line_count, syslog, time);
++	}
++
 +	/*
-+	 * If the message was truncated because the buffer was not large
-+	 * enough, treat the available text as if it were the full text.
++	 * Adjust the upper bound for the next loop to avoid subtracting
++	 * lengths that were never added.
 +	 */
-+	if (text_len > buf_size)
-+		text_len = buf_size;
- 
-+	prefix_len = info_print_prefix(r->info, syslog, time, prefix);
++	if (seq < max_seq)
++		max_seq = seq;
 +
 +	/*
-+	 * @text_len: bytes of unprocessed text
-+	 * @line_len: bytes of current line _without_ newline
-+	 * @text:     pointer to beginning of current line
-+	 * @len:      number of bytes prepared in r->text_buf
++	 * Move first record forward until length fits into the buffer. Ignore
++	 * newest messages that were not counted in the above cycle. Messages
++	 * might appear and get lost in the meantime. This is a best effort
++	 * that prevents an infinite loop that could occur with a retry.
 +	 */
-+	for (;;) {
-+		next = memchr(text, '\n', text_len);
- 		if (next) {
--			text_len = next - text;
--			next++;
--			text_size -= next - text;
-+			line_len = next - text;
- 		} else {
--			text_len = text_size;
-+			/* Drop truncated line(s). */
-+			if (truncated)
-+				break;
-+			line_len = text_len;
- 		}
- 
--		if (buf) {
--			if (prefix_len + text_len + 1 >= size - len)
-+		/*
-+		 * Truncate the text if there is not enough space to add the
-+		 * prefix and a trailing newline.
-+		 */
-+		if (len + prefix_len + text_len + 1 > buf_size) {
-+			/* Drop even the current line if no space. */
-+			if (len + prefix_len + line_len + 1 > buf_size)
- 				break;
- 
--			memcpy(buf + len, prefix, prefix_len);
--			len += prefix_len;
--			memcpy(buf + len, text, text_len);
--			len += text_len;
--			buf[len++] = '\n';
--		} else {
--			/* SYSLOG_ACTION_* buffer size only calculation */
--			len += prefix_len + text_len + 1;
-+			text_len = buf_size - len - prefix_len - 1;
-+			truncated = true;
- 		}
- 
--		text = next;
--	} while (text);
-+		memmove(text + prefix_len, text, text_len);
-+		memcpy(text, prefix, prefix_len);
-+
-+		len += prefix_len + line_len + 1;
-+
-+		if (text_len == line_len) {
-+			/*
-+			 * Add the trailing newline removed in
-+			 * vprintk_store().
-+			 */
-+			text[prefix_len + line_len] = '\n';
++	prb_for_each_info(start_seq, prb, seq, &info, &line_count) {
++		if (len <= size || info.seq >= max_seq)
 +			break;
-+		}
-+
-+		/*
-+		 * Advance beyond the added prefix and the related line with
-+		 * its newline.
-+		 */
-+		text += prefix_len + line_len + 1;
-+
-+		/*
-+		 * The remaining text has only decreased by the line with its
-+		 * newline.
-+		 *
-+		 * Note that @text_len can become zero. It happens when @text
-+		 * ended with a newline (either due to truncation or the
-+		 * original string ending with "\n\n"). The loop is correctly
-+		 * repeated and (if not truncated) an empty line with a prefix
-+		 * will be prepared.
-+		 */
-+		text_len -= line_len + 1;
++		len -= get_record_print_text_size(&info, line_count, syslog, time);
 +	}
- 
- 	return len;
- }
- 
-+static size_t get_record_print_text_size(struct printk_info *info,
-+					 unsigned int line_count,
-+					 bool syslog, bool time)
-+{
-+	char prefix[PREFIX_MAX];
-+	size_t prefix_len;
 +
-+	prefix_len = info_print_prefix(info, syslog, time, prefix);
-+
-+	/*
-+	 * Each line will be preceded with a prefix. The intermediate
-+	 * newlines are already within the text, but a final trailing
-+	 * newline will be added.
-+	 */
-+	return ((prefix_len * line_count) + info->text_len + 1);
++	return seq;
 +}
 +
  static int syslog_print(char __user *buf, int size)
  {
-+	struct printk_info info;
-+	struct printk_record r;
+ 	struct printk_info info;
+@@ -1427,19 +1443,19 @@ static int syslog_print(char __user *buf, int size)
  	char *text;
--	struct printk_log *msg;
  	int len = 0;
  
- 	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
+-	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
++	text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
  	if (!text)
  		return -ENOMEM;
  
-+	prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
-+
+-	prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
++	prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
+ 
  	while (size > 0) {
  		size_t n;
  		size_t skip;
  
 -		logbuf_lock_irq();
--		if (syslog_seq < log_first_seq) {
--			/* messages are gone, move to first one */
--			syslog_seq = log_first_seq;
--			syslog_idx = log_first_idx;
--			syslog_partial = 0;
--		}
--		if (syslog_seq == log_next_seq) {
++		spin_lock_irq(&syslog_lock);
+ 		if (!prb_read_valid(prb, syslog_seq, &r)) {
 -			logbuf_unlock_irq();
-+		syslog_lock_irq();
-+		if (!prb_read_valid(prb, syslog_seq, &r)) {
-+			syslog_unlock_irq();
++			spin_unlock_irq(&syslog_lock);
  			break;
  		}
-+		if (r.info->seq != syslog_seq) {
-+			/* message is gone, move to next valid one */
-+			syslog_seq = r.info->seq;
-+			syslog_partial = 0;
-+		}
- 
- 		/*
- 		 * To keep reading/counting partial line consistent,
-@@ -1417,13 +1374,10 @@ static int syslog_print(char __user *buf, int size)
- 			syslog_time = printk_time;
- 
- 		skip = syslog_partial;
--		msg = log_from_idx(syslog_idx);
--		n = msg_print_text(msg, true, syslog_time, text,
--				   LOG_LINE_MAX + PREFIX_MAX);
-+		n = record_print_text(&r, true, syslog_time);
- 		if (n - syslog_partial <= size) {
- 			/* message fits into buffer, move forward */
--			syslog_idx = log_next(syslog_idx);
--			syslog_seq++;
-+			syslog_seq = r.info->seq + 1;
- 			n -= syslog_partial;
- 			syslog_partial = 0;
- 		} else if (!len){
-@@ -1432,7 +1386,7 @@ static int syslog_print(char __user *buf, int size)
+ 		if (r.info->seq != syslog_seq) {
+@@ -1468,7 +1484,7 @@ static int syslog_print(char __user *buf, int size)
  			syslog_partial += n;
  		} else
  			n = 0;
 -		logbuf_unlock_irq();
-+		syslog_unlock_irq();
++		spin_unlock_irq(&syslog_lock);
  
  		if (!n)
  			break;
-@@ -1454,11 +1408,14 @@ static int syslog_print(char __user *buf, int size)
- 
+@@ -1491,34 +1507,25 @@ static int syslog_print(char __user *buf, int size)
  static int syslog_print_all(char __user *buf, int size, bool clear)
  {
-+	struct printk_info info;
-+	unsigned int line_count;
-+	struct printk_record r;
-+	u64 newest_seq;
-+	u64 clr_seq;
+ 	struct printk_info info;
+-	unsigned int line_count;
+ 	struct printk_record r;
  	char *text;
  	int len = 0;
--	u64 next_seq;
  	u64 seq;
--	u32 idx;
  	bool time;
  
- 	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
-@@ -1466,63 +1423,58 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
+-	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
++	text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
+ 	if (!text)
  		return -ENOMEM;
  
  	time = printk_time;
 -	logbuf_lock_irq();
-+	clr_seq = atomic64_read(&clear_seq);
-+
  	/*
  	 * Find first record that fits, including all following records,
  	 * into the user-provided buffer for this dump.
  	 */
--	seq = clear_seq;
--	idx = clear_idx;
--	while (seq < log_next_seq) {
--		struct printk_log *msg = log_from_idx(idx);
- 
--		len += msg_print_text(msg, true, time, NULL, 0);
--		idx = log_next(idx);
--		seq++;
--	}
-+	prb_for_each_info(clr_seq, prb, seq, &info, &line_count)
-+		len += get_record_print_text_size(&info, line_count, true, time);
- 
+-	prb_for_each_info(clear_seq, prb, seq, &info, &line_count)
+-		len += get_record_print_text_size(&info, line_count, true, time);
+-
 -	/* move first record forward until length fits into the buffer */
--	seq = clear_seq;
--	idx = clear_idx;
--	while (len > size && seq < log_next_seq) {
--		struct printk_log *msg = log_from_idx(idx);
-+	/*
-+	 * Keep track of the latest in case new records are coming in fast
-+	 * and overwriting the older records.
-+	 */
-+	newest_seq = seq;
- 
--		len -= msg_print_text(msg, true, time, NULL, 0);
--		idx = log_next(idx);
--		seq++;
-+	/*
-+	 * Move first record forward until length fits into the buffer. This
-+	 * is a best effort attempt. If @newest_seq is reached because the
-+	 * ringbuffer is wrapping too fast, just start filling the buffer
-+	 * from there.
-+	 */
-+	prb_for_each_info(clr_seq, prb, seq, &info, &line_count) {
-+		if (len <= size || info.seq > newest_seq)
-+			break;
-+		len -= get_record_print_text_size(&info, line_count, true, time);
- 	}
+-	prb_for_each_info(clear_seq, prb, seq, &info, &line_count) {
+-		if (len <= size)
+-			break;
+-		len -= get_record_print_text_size(&info, line_count, true, time);
+-	}
++	seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1,
++				     size, true, time);
  
--	/* last message fitting into this dump */
--	next_seq = log_next_seq;
-+	prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
+-	prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
++	prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
  
  	len = 0;
--	while (len >= 0 && seq < next_seq) {
--		struct printk_log *msg = log_from_idx(idx);
--		int textlen = msg_print_text(msg, true, time, text,
--					     LOG_LINE_MAX + PREFIX_MAX);
-+	prb_for_each_record(seq, prb, seq, &r) {
-+		int textlen;
- 
--		idx = log_next(idx);
--		seq++;
-+		textlen = record_print_text(&r, true, time);
-+
-+		if (len + textlen > size) {
-+			seq--;
-+			break;
-+		}
+ 	prb_for_each_record(seq, prb, seq, &r) {
+@@ -1531,20 +1538,20 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
+ 			break;
+ 		}
  
 -		logbuf_unlock_irq();
  		if (copy_to_user(buf + len, text, textlen))
@@ -13403,146 +12029,117 @@ index 9b75f6bfc333..78a277ea5c35 100644
  			len += textlen;
 -		logbuf_lock_irq();
  
--		if (seq < log_first_seq) {
--			/* messages are gone, move to next one */
--			seq = log_first_seq;
--			idx = log_first_idx;
--		}
-+		if (len < 0)
-+			break;
+ 		if (len < 0)
+ 			break;
  	}
  
--	if (clear) {
--		clear_seq = log_next_seq;
--		clear_idx = log_next_idx;
--	}
+-	if (clear)
+-		clear_seq = seq;
 -	logbuf_unlock_irq();
-+	if (clear)
-+		atomic64_set(&clear_seq, seq);
++	if (clear) {
++		spin_lock_irq(&syslog_lock);
++		latched_seq_write(&clear_seq, seq);
++		spin_unlock_irq(&syslog_lock);
++	}
  
  	kfree(text);
  	return len;
-@@ -1530,10 +1482,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
+@@ -1552,13 +1559,26 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
  
  static void syslog_clear(void)
  {
 -	logbuf_lock_irq();
--	clear_seq = log_next_seq;
--	clear_idx = log_next_idx;
+-	clear_seq = prb_next_seq(prb);
 -	logbuf_unlock_irq();
-+	atomic64_set(&clear_seq, prb_next_seq(prb));
++	spin_lock_irq(&syslog_lock);
++	latched_seq_write(&clear_seq, prb_next_seq(prb));
++	spin_unlock_irq(&syslog_lock);
++}
++
++/* Return a consistent copy of @syslog_seq. */
++static u64 read_syslog_seq_irq(void)
++{
++	u64 seq;
++
++	spin_lock_irq(&syslog_lock);
++	seq = syslog_seq;
++	spin_unlock_irq(&syslog_lock);
++
++	return seq;
  }
  
  int do_syslog(int type, char __user *buf, int len, int source)
-@@ -1541,6 +1490,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
+ {
++	struct printk_info info;
  	bool clear = false;
  	static int saved_console_loglevel = LOGLEVEL_DEFAULT;
  	int error;
-+	u64 seq;
- 
- 	error = check_syslog_permissions(type, source);
- 	if (error)
-@@ -1558,8 +1508,11 @@ int do_syslog(int type, char __user *buf, int len, int source)
+@@ -1579,8 +1599,9 @@ int do_syslog(int type, char __user *buf, int len, int source)
  			return 0;
  		if (!access_ok(buf, len))
  			return -EFAULT;
-+		syslog_lock_irq();
-+		seq = syslog_seq;
-+		syslog_unlock_irq();
++
  		error = wait_event_interruptible(log_wait,
--						 syslog_seq != log_next_seq);
-+				prb_read_valid(prb, seq, NULL));
+-				prb_read_valid(prb, syslog_seq, NULL));
++				prb_read_valid(prb, read_syslog_seq_irq(), NULL));
  		if (error)
  			return error;
  		error = syslog_print(buf, len);
-@@ -1567,7 +1520,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
- 	/* Read/clear last kernel messages */
- 	case SYSLOG_ACTION_READ_CLEAR:
- 		clear = true;
--		/* FALL THRU */
-+		fallthrough;
- 	/* Read last kernel messages */
- 	case SYSLOG_ACTION_READ_ALL:
- 		if (!buf || len < 0)
-@@ -1607,11 +1560,10 @@ int do_syslog(int type, char __user *buf, int len, int source)
+@@ -1628,10 +1649,15 @@ int do_syslog(int type, char __user *buf, int len, int source)
  		break;
  	/* Number of chars in the log buffer */
  	case SYSLOG_ACTION_SIZE_UNREAD:
 -		logbuf_lock_irq();
--		if (syslog_seq < log_first_seq) {
-+		syslog_lock_irq();
-+		if (syslog_seq < prb_first_valid_seq(prb)) {
+-		if (syslog_seq < prb_first_valid_seq(prb)) {
++		spin_lock_irq(&syslog_lock);
++		if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) {
++			/* No unread messages. */
++			spin_unlock_irq(&syslog_lock);
++			return 0;
++		}
++		if (info.seq != syslog_seq) {
  			/* messages are gone, move to first one */
--			syslog_seq = log_first_seq;
--			syslog_idx = log_first_idx;
-+			syslog_seq = prb_first_valid_seq(prb);
+-			syslog_seq = prb_first_valid_seq(prb);
++			syslog_seq = info.seq;
  			syslog_partial = 0;
  		}
  		if (source == SYSLOG_FROM_PROC) {
-@@ -1620,24 +1572,22 @@ int do_syslog(int type, char __user *buf, int len, int source)
- 			 * for pending data, not the size; return the count of
- 			 * records, not the length.
- 			 */
--			error = log_next_seq - syslog_seq;
-+			error = prb_next_seq(prb) - syslog_seq;
+@@ -1643,7 +1669,6 @@ int do_syslog(int type, char __user *buf, int len, int source)
+ 			error = prb_next_seq(prb) - syslog_seq;
  		} else {
--			u64 seq = syslog_seq;
--			u32 idx = syslog_idx;
  			bool time = syslog_partial ? syslog_time : printk_time;
--
--			while (seq < log_next_seq) {
--				struct printk_log *msg = log_from_idx(idx);
--
--				error += msg_print_text(msg, true, time, NULL,
--							0);
-+			struct printk_info info;
-+			unsigned int line_count;
-+			u64 seq;
-+
-+			prb_for_each_info(syslog_seq, prb, seq, &info,
-+					  &line_count) {
-+				error += get_record_print_text_size(&info, line_count,
-+								    true, time);
- 				time = printk_time;
--				idx = log_next(idx);
--				seq++;
+-			struct printk_info info;
+ 			unsigned int line_count;
+ 			u64 seq;
+ 
+@@ -1655,7 +1680,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
  			}
  			error -= syslog_partial;
  		}
 -		logbuf_unlock_irq();
-+		syslog_unlock_irq();
++		spin_unlock_irq(&syslog_lock);
  		break;
  	/* Size of the log buffer */
  	case SYSLOG_ACTION_SIZE_BUFFER:
-@@ -1657,178 +1607,134 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
+@@ -1674,202 +1699,172 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
+ 	return do_syslog(type, buf, len, SYSLOG_FROM_READER);
  }
  
- /*
+-/*
 - * Special console_lock variants that help to reduce the risk of soft-lockups.
 - * They allow to pass console_lock to another printk() call using a busy wait.
-+ * The per-cpu sprint buffers are used with interrupts disabled, so each CPU
-+ * only requires 2 buffers: for non-NMI and NMI contexts. Recursive printk()
-+ * calls are handled by the global sprint buffers.
-  */
-+#define SPRINT_CTX_DEPTH 2
- 
+- */
+-
 -#ifdef CONFIG_LOCKDEP
 -static struct lockdep_map console_owner_dep_map = {
 -	.name = "console_owner"
-+/* Static sprint buffers for early boot (only 1 CPU) and recursion. */
-+static DECLARE_BITMAP(sprint_global_buffer_map, SPRINT_CTX_DEPTH);
-+static char sprint_global_buffer[SPRINT_CTX_DEPTH][PREFIX_MAX + LOG_LINE_MAX];
-+
-+struct sprint_buffers {
-+	char		buf[SPRINT_CTX_DEPTH][PREFIX_MAX + LOG_LINE_MAX];
-+	atomic_t	index;
- };
+-};
 -#endif
- 
+-
 -static DEFINE_RAW_SPINLOCK(console_owner_lock);
 -static struct task_struct *console_owner;
 -static bool console_waiter;
-+static DEFINE_PER_CPU(struct sprint_buffers, percpu_sprint_buffers);
++int printk_delay_msec __read_mostly;
  
 -/**
 - * console_lock_spinning_enable - mark beginning of code where another
@@ -13552,31 +12149,25 @@ index 9b75f6bfc333..78a277ea5c35 100644
 - * the section where the console_lock owner can not sleep, because
 - * there may be a waiter spinning (like a spinlock). Also it must be
 - * ready to hand over the lock at the end of the section.
-+/*
-+ * Acquire an unused buffer, returning its index. If no buffer is
-+ * available, @count is returned.
-  */
+- */
 -static void console_lock_spinning_enable(void)
-+static int _get_sprint_buf(unsigned long *map, int count)
++static inline void printk_delay(int level)
  {
 -	raw_spin_lock(&console_owner_lock);
 -	console_owner = current;
 -	raw_spin_unlock(&console_owner_lock);
-+	int index;
-+
-+	do {
-+		index = find_first_zero_bit(map, count);
-+		if (index == count)
-+			break;
-+	/*
-+	 * Guarantee map changes are ordered for the other CPUs.
-+	 * Pairs with clear_bit() in _put_sprint_buf().
-+	 */
-+	} while (test_and_set_bit(index, map));
++	boot_delay_msec(level);
  
 -	/* The waiter may spin on us after setting console_owner */
 -	spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
-+	return index;
++	if (unlikely(printk_delay_msec)) {
++		int m = printk_delay_msec;
++
++		while (m--) {
++			mdelay(1);
++			touch_nmi_watchdog();
++		}
++	}
  }
  
 -/**
@@ -13595,35 +12186,69 @@ index 9b75f6bfc333..78a277ea5c35 100644
 - * Return: 1 if the lock rights were passed, 0 otherwise.
 - */
 -static int console_lock_spinning_disable_and_check(void)
-+/* Mark the buffer @index as unused. */
-+static void _put_sprint_buf(unsigned long *map, unsigned int count, unsigned int index)
++static bool kernel_sync_mode(void)
  {
 -	int waiter;
--
++	return (oops_in_progress || sync_mode);
++}
+ 
 -	raw_spin_lock(&console_owner_lock);
 -	waiter = READ_ONCE(console_waiter);
 -	console_owner = NULL;
 -	raw_spin_unlock(&console_owner_lock);
--
++static bool console_can_sync(struct console *con)
++{
++	if (!(con->flags & CON_ENABLED))
++		return false;
++	if (con->write_atomic && kernel_sync_mode())
++		return true;
++	if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread)
++		return true;
++	if (con->write && (con->flags & CON_BOOT) && !con->thread)
++		return true;
++	return false;
++}
+ 
 -	if (!waiter) {
 -		spin_release(&console_owner_dep_map, _THIS_IP_);
 -		return 0;
 -	}
--
++static bool call_sync_console_driver(struct console *con, const char *text, size_t text_len)
++{
++	if (!(con->flags & CON_ENABLED))
++		return false;
++	if (con->write_atomic && kernel_sync_mode())
++		con->write_atomic(con, text, text_len);
++	else if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread)
++		con->write_atomic(con, text, text_len);
++	else if (con->write && (con->flags & CON_BOOT) && !con->thread)
++		con->write(con, text, text_len);
++	else
++		return false;
+ 
 -	/* The waiter is now free to continue */
 -	WRITE_ONCE(console_waiter, false);
--
++	return true;
++}
+ 
 -	spin_release(&console_owner_dep_map, _THIS_IP_);
--
- 	/*
++static bool have_atomic_console(void)
++{
++	struct console *con;
+ 
+-	/*
 -	 * Hand off console_lock to waiter. The waiter will perform
 -	 * the up(). After this, the waiter is the console_lock owner.
-+	 * Guarantee map changes are ordered for the other CPUs.
-+	 * Pairs with test_and_set_bit() in _get_sprint_buf().
- 	 */
+-	 */
 -	mutex_release(&console_lock_dep_map, _THIS_IP_);
 -	return 1;
-+	clear_bit(index, map);
++	for_each_console(con) {
++		if (!(con->flags & CON_ENABLED))
++			continue;
++		if (con->write_atomic)
++			return true;
++	}
++	return false;
  }
  
 -/**
@@ -13633,31 +12258,27 @@ index 9b75f6bfc333..78a277ea5c35 100644
 - * owner is running in specially marked sections. It means that
 - * the current owner is running and cannot reschedule until it
 - * is ready to lose the lock.
-+/*
-+ * Get a buffer sized PREFIX_MAX+LOG_LINE_MAX for sprinting. On success, @id
-+ * is set and interrupts are disabled. @id is used to put back the buffer.
-  *
+- *
 - * Return: 1 if we got the lock, 0 othrewise
-+ * @id is non-negative for per-cpu buffers, negative for global buffers.
-  */
+- */
 -static int console_trylock_spinning(void)
-+static char *get_sprint_buf(int *id, unsigned long *flags)
++static bool print_sync(struct console *con, u64 *seq)
  {
 -	struct task_struct *owner = NULL;
 -	bool waiter;
 -	bool spin = false;
 -	unsigned long flags;
-+	struct sprint_buffers *bufs;
-+	unsigned int index;
-+	unsigned int cpu;
++	struct printk_info info;
++	struct printk_record r;
++	size_t text_len;
  
 -	if (console_trylock())
 -		return 1;
-+	local_irq_save(*flags);
-+	cpu = get_cpu();
++	prb_rec_init_rd(&r, &info, &con->sync_buf[0], sizeof(con->sync_buf));
  
 -	printk_safe_enter_irqsave(flags);
-+	if (printk_percpu_data_ready()) {
++	if (!prb_read_valid(prb, *seq, &r))
++		return false;
  
 -	raw_spin_lock(&console_owner_lock);
 -	owner = READ_ONCE(console_owner);
@@ -13665,22 +12286,11 @@ index 9b75f6bfc333..78a277ea5c35 100644
 -	if (!waiter && owner && owner != current) {
 -		WRITE_ONCE(console_waiter, true);
 -		spin = true;
-+		/*
-+		 * First try with per-cpu pool. Note that the last
-+		 * buffer is reserved for NMI context.
-+		 */
-+		bufs = per_cpu_ptr(&percpu_sprint_buffers, cpu);
-+		index = atomic_read(&bufs->index);
-+		if (index < (SPRINT_CTX_DEPTH - 1) ||
-+		    (in_nmi() && index < SPRINT_CTX_DEPTH)) {
-+			atomic_set(&bufs->index, index + 1);
-+			*id = cpu;
-+			return &bufs->buf[index][0];
-+		}
- 	}
+-	}
 -	raw_spin_unlock(&console_owner_lock);
++	text_len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
  
- 	/*
+-	/*
 -	 * If there is an active printk() writing to the
 -	 * consoles, instead of having it write our data too,
 -	 * see if we can offload that load from the active
@@ -13688,21 +12298,13 @@ index 9b75f6bfc333..78a277ea5c35 100644
 -	 * Go into a spin only if there isn't already a waiter
 -	 * spinning, and there is an active printer, and
 -	 * that active printer isn't us (recursive printk?).
-+	 * Fallback to global pool.
-+	 *
-+	 * The global pool will only ever be used if per-cpu data is not ready
-+	 * yet or printk recurses. Recursion will not occur unless printk is
-+	 * having internal issues.
- 	 */
+-	 */
 -	if (!spin) {
 -		printk_safe_exit_irqrestore(flags);
 -		return 0;
-+	index = _get_sprint_buf(sprint_global_buffer_map, SPRINT_CTX_DEPTH);
-+	if (index != SPRINT_CTX_DEPTH) {
-+		/* Convert to global buffer representation. */
-+		*id = -index - 1;
-+		return &sprint_global_buffer[index][0];
- 	}
+-	}
++	if (!call_sync_console_driver(con, &con->sync_buf[0], text_len))
++		return false;
  
 -	/* We spin waiting for the owner to release us */
 -	spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
@@ -13710,7 +12312,8 @@ index 9b75f6bfc333..78a277ea5c35 100644
 -	while (READ_ONCE(console_waiter))
 -		cpu_relax();
 -	spin_release(&console_owner_dep_map, _THIS_IP_);
--
++	*seq = r.info->seq;
+ 
 -	printk_safe_exit_irqrestore(flags);
 -	/*
 -	 * The owner passed the console lock to us.
@@ -13719,12 +12322,16 @@ index 9b75f6bfc333..78a277ea5c35 100644
 -	 * complain.
 -	 */
 -	mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
--
++	touch_softlockup_watchdog_sync();
++	clocksource_touch_watchdog();
++	rcu_cpu_stall_reset();
++	touch_nmi_watchdog();
+ 
 -	return 1;
-+	/* Failed to get a buffer. */
-+	put_cpu();
-+	local_irq_restore(*flags);
-+	return NULL;
++	if (text_len)
++		printk_delay(r.info->level);
++
++	return true;
  }
  
 -/*
@@ -13734,15 +12341,44 @@ index 9b75f6bfc333..78a277ea5c35 100644
 - */
 -static void call_console_drivers(const char *ext_text, size_t ext_len,
 -				 const char *text, size_t len)
-+/* Put back an sprint buffer and restore interrupts. */
-+static void put_sprint_buf(int id, unsigned long flags)
++static void print_sync_until(struct console *con, u64 seq)
  {
+-	static char dropped_text[64];
+-	size_t dropped_len = 0;
 -	struct console *con;
--
++	unsigned int flags;
++	u64 printk_seq;
+ 
 -	trace_console_rcuidle(text, len);
-+	struct sprint_buffers *bufs;
-+	unsigned int index;
-+	unsigned int cpu;
++	console_atomic_lock(&flags);
++	for (;;) {
++		printk_seq = atomic64_read(&con->printk_seq);
++		if (printk_seq >= seq)
++			break;
++		if (!print_sync(con, &printk_seq))
++			break;
++		atomic64_set(&con->printk_seq, printk_seq + 1);
++	}
++	console_atomic_unlock(flags);
++}
+ 
+-	if (!console_drivers)
+-		return;
++#ifdef CONFIG_PRINTK_NMI
++#define NUM_RECURSION_CTX 2
++#else
++#define NUM_RECURSION_CTX 1
++#endif
+ 
+-	if (console_dropped) {
+-		dropped_len = snprintf(dropped_text, sizeof(dropped_text),
+-				       "** %lu printk messages dropped **\n",
+-				       console_dropped);
+-		console_dropped = 0;
+-	}
++struct printk_recursion {
++	char	count[NUM_RECURSION_CTX];
++};
  
 -	for_each_console(con) {
 -		if (exclusive_console && con != exclusive_console)
@@ -13756,350 +12392,181 @@ index 9b75f6bfc333..78a277ea5c35 100644
 -			continue;
 -		if (con->flags & CON_EXTENDED)
 -			con->write(con, ext_text, ext_len);
--		else
+-		else {
+-			if (dropped_len)
+-				con->write(con, dropped_text, dropped_len);
 -			con->write(con, text, len);
-+	if (id >= 0) {
-+		cpu = id;
-+		bufs = per_cpu_ptr(&percpu_sprint_buffers, cpu);
-+		index = atomic_read(&bufs->index);
-+		atomic_set(&bufs->index, index - 1);
+-		}
++static DEFINE_PER_CPU(struct printk_recursion, percpu_printk_recursion);
++static char printk_recursion_count[NUM_RECURSION_CTX];
++
++static char *printk_recursion_counter(void)
++{
++	struct printk_recursion *rec;
++	char *count;
++
++	if (!printk_percpu_data_ready()) {
++		count = &printk_recursion_count[0];
 +	} else {
-+		/* Convert from global buffer representation. */
-+		index = -id - 1;
-+		_put_sprint_buf(sprint_global_buffer_map,
-+				SPRINT_CTX_DEPTH, index);
- 	}
++		rec = this_cpu_ptr(&percpu_printk_recursion);
 +
-+	put_cpu();
-+	local_irq_restore(flags);
- }
++		count = &rec->count[0];
+ 	}
+-}
  
- int printk_delay_msec __read_mostly;
+-int printk_delay_msec __read_mostly;
++#ifdef CONFIG_PRINTK_NMI
++	if (in_nmi())
++		count++;
++#endif
++
++	return count;
++}
  
 -static inline void printk_delay(void)
-+static inline void printk_delay(int level)
++static bool printk_enter_irqsave(unsigned long *flags)
  {
-+	boot_delay_msec(level);
-+
- 	if (unlikely(printk_delay_msec)) {
- 		int m = printk_delay_msec;
+-	if (unlikely(printk_delay_msec)) {
+-		int m = printk_delay_msec;
++	char *count;
  
-@@ -1839,115 +1745,155 @@ static inline void printk_delay(void)
+-		while (m--) {
+-			mdelay(1);
+-			touch_nmi_watchdog();
+-		}
++	local_irq_save(*flags);
++	count = printk_recursion_counter();
++	/* Only 1 level of recursion allowed. */
++	if (*count > 1) {
++		local_irq_restore(*flags);
++		return false;
  	}
++	(*count)++;
++
++	return true;
++}
++
++static void printk_exit_irqrestore(unsigned long flags)
++{
++	char *count;
++
++	count = printk_recursion_counter();
++	(*count)--;
++	local_irq_restore(flags);
  }
  
--static inline u32 printk_caller_id(void)
-+static bool kernel_sync_mode(void)
- {
--	return in_task() ? task_pid_nr(current) :
--		0x80000000 + raw_smp_processor_id();
-+	return (oops_in_progress || sync_mode);
- }
- 
--/*
-- * Continuation lines are buffered, and not committed to the record buffer
-- * until the line is complete, or a race forces it. The line fragments
-- * though, are printed immediately to the consoles to ensure everything has
-- * reached the console in case of a kernel crash.
-- */
--static struct cont {
--	char buf[LOG_LINE_MAX];
--	size_t len;			/* length == 0 means unused buffer */
--	u32 caller_id;			/* printk_caller_id() of first print */
--	u64 ts_nsec;			/* time of first print */
--	u8 level;			/* log level of first message */
--	u8 facility;			/* log facility of first message */
--	enum log_flags flags;		/* prefix, newline flags */
--} cont;
--
--static void cont_flush(void)
--{
--	if (cont.len == 0)
--		return;
--
--	log_store(cont.caller_id, cont.facility, cont.level, cont.flags,
--		  cont.ts_nsec, NULL, 0, cont.buf, cont.len);
--	cont.len = 0;
-+static bool console_can_sync(struct console *con)
-+{
-+	if (!(con->flags & CON_ENABLED))
-+		return false;
-+	if (con->write_atomic && kernel_sync_mode())
-+		return true;
-+	if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread)
-+		return true;
-+	if (con->write && (con->flags & CON_BOOT) && !con->thread)
-+		return true;
-+	return false;
+ static inline u32 printk_caller_id(void)
+@@ -1950,20 +1945,24 @@ static u16 printk_sprint(char *text, u16 size, int facility, enum log_flags *lfl
  }
  
--static bool cont_add(u32 caller_id, int facility, int level,
--		     enum log_flags flags, const char *text, size_t len)
-+static bool call_sync_console_driver(struct console *con, const char *text, size_t text_len)
+ __printf(4, 0)
+-int vprintk_store(int facility, int level,
+-		  const struct dev_printk_info *dev_info,
+-		  const char *fmt, va_list args)
++static int vprintk_store(int facility, int level,
++			 const struct dev_printk_info *dev_info,
++			 const char *fmt, va_list args)
  {
--	/* If the line gets too long, split it up in separate records. */
--	if (cont.len + len > sizeof(cont.buf)) {
--		cont_flush();
-+	if (!(con->flags & CON_ENABLED))
- 		return false;
--	}
-+	if (con->write_atomic && kernel_sync_mode())
-+		con->write_atomic(con, text, text_len);
-+	else if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread)
-+		con->write_atomic(con, text, text_len);
-+	else if (con->write && (con->flags & CON_BOOT) && !con->thread)
-+		con->write(con, text, text_len);
-+	else
-+		return false;
-+
-+	return true;
-+}
+ 	const u32 caller_id = printk_caller_id();
+ 	struct prb_reserved_entry e;
+ 	enum log_flags lflags = 0;
++	bool final_commit = false;
+ 	struct printk_record r;
++	unsigned long irqflags;
+ 	u16 trunc_msg_len = 0;
+ 	char prefix_buf[8];
+ 	u16 reserve_size;
+ 	va_list args2;
+ 	u16 text_len;
++	int ret = 0;
+ 	u64 ts_nsec;
++	u64 seq;
+ 
+ 	/*
+ 	 * Since the duration of printk() can vary depending on the message
+@@ -1973,6 +1972,9 @@ int vprintk_store(int facility, int level,
+ 	 */
+ 	ts_nsec = local_clock();
+ 
++	if (!printk_enter_irqsave(&irqflags))
++		return 0;
 +
-+static bool any_console_can_sync(void)
-+{
-+	struct console *con;
+ 	/*
+ 	 * The sprintf needs to come first since the syslog prefix might be
+ 	 * passed in as a parameter. An extra byte must be reserved so that
+@@ -1999,6 +2001,7 @@ int vprintk_store(int facility, int level,
+ 	if (lflags & LOG_CONT) {
+ 		prb_rec_init_wr(&r, reserve_size);
+ 		if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) {
++			seq = r.info->seq;
+ 			text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size,
+ 						 facility, &lflags, fmt, args);
+ 			r.info->text_len += text_len;
+@@ -2006,11 +2009,13 @@ int vprintk_store(int facility, int level,
+ 			if (lflags & LOG_NEWLINE) {
+ 				r.info->flags |= LOG_NEWLINE;
+ 				prb_final_commit(&e);
++				final_commit = true;
+ 			} else {
+ 				prb_commit(&e);
+ 			}
  
--	if (!cont.len) {
--		cont.facility = facility;
--		cont.level = level;
--		cont.caller_id = caller_id;
--		cont.ts_nsec = local_clock();
--		cont.flags = flags;
-+	for_each_console(con) {
-+		if (console_can_sync(con))
-+			return true;
+-			return text_len;
++			ret = text_len;
++			goto out;
+ 		}
  	}
-+	return false;
-+}
  
--	memcpy(cont.buf + cont.len, text, len);
--	cont.len += len;
-+static bool have_atomic_console(void)
-+{
-+	struct console *con;
- 
--	// The original flags come from the first line,
--	// but later continuations can add a newline.
--	if (flags & LOG_NEWLINE) {
--		cont.flags |= LOG_NEWLINE;
--		cont_flush();
-+	for_each_console(con) {
-+		if (!(con->flags & CON_ENABLED))
-+			continue;
-+		if (con->write_atomic)
-+			return true;
- 	}
-+	return false;
-+}
-+
-+static bool print_sync(struct console *con, char *buf, size_t buf_size, u64 *seq)
-+{
-+	struct printk_info info;
-+	struct printk_record r;
-+	size_t text_len;
-+
-+	prb_rec_init_rd(&r, &info, buf, buf_size);
-+
-+	if (!prb_read_valid(prb, *seq, &r))
-+		return false;
-+
-+	text_len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
-+
-+	if (!call_sync_console_driver(con, buf, text_len))
-+		return false;
-+
-+	*seq = r.info->seq;
-+
-+	touch_softlockup_watchdog_sync();
-+	clocksource_touch_watchdog();
-+	rcu_cpu_stall_reset();
-+	touch_nmi_watchdog();
-+
-+	if (text_len)
-+		printk_delay(r.info->level);
- 
- 	return true;
- }
- 
--static size_t log_output(int facility, int level, enum log_flags lflags, const char *dict, size_t dictlen, char *text, size_t text_len)
-+static void print_sync_until(u64 seq, struct console *con, char *buf, size_t buf_size)
- {
--	const u32 caller_id = printk_caller_id();
-+	unsigned int flags;
-+	u64 printk_seq;
- 
--	/*
--	 * If an earlier line was buffered, and we're a continuation
--	 * write from the same context, try to add it to the buffer.
--	 */
--	if (cont.len) {
--		if (cont.caller_id == caller_id && (lflags & LOG_CONT)) {
--			if (cont_add(caller_id, facility, level, lflags, text, text_len))
--				return text_len;
-+	if (!con) {
-+		for_each_console(con) {
-+			if (console_can_sync(con))
-+				print_sync_until(seq, con, buf, buf_size);
- 		}
--		/* Otherwise, make sure it's flushed */
--		cont_flush();
-+		return;
- 	}
+@@ -2026,9 +2031,11 @@ int vprintk_store(int facility, int level,
  
--	/* Skip empty continuation lines that couldn't be added - they just flush */
--	if (!text_len && (lflags & LOG_CONT))
--		return 0;
--
--	/* If it doesn't end in a newline, try to buffer the current line */
--	if (!(lflags & LOG_NEWLINE)) {
--		if (cont_add(caller_id, facility, level, lflags, text, text_len))
--			return text_len;
-+	console_atomic_lock(&flags);
-+	for (;;) {
-+		printk_seq = atomic64_read(&con->printk_seq);
-+		if (printk_seq >= seq)
-+			break;
-+		if (!print_sync(con, buf, buf_size, &printk_seq))
-+			break;
-+		atomic64_set(&con->printk_seq, printk_seq + 1);
+ 		prb_rec_init_wr(&r, reserve_size + trunc_msg_len);
+ 		if (!prb_reserve(&e, prb, &r))
+-			return 0;
++			goto out;
  	}
-+	console_atomic_unlock(flags);
-+}
- 
--	/* Store it in the record log */
--	return log_store(caller_id, facility, level, lflags, 0,
--			 dict, dictlen, text, text_len);
-+static inline u32 printk_caller_id(void)
-+{
-+	return in_task() ? task_pid_nr(current) :
-+		0x80000000 + raw_smp_processor_id();
- }
- 
--/* Must be called under logbuf_lock. */
--int vprintk_store(int facility, int level,
--		  const char *dict, size_t dictlen,
--		  const char *fmt, va_list args)
-+__printf(4, 0)
-+static int vprintk_store(int facility, int level,
-+			 const struct dev_printk_info *dev_info,
-+			 const char *fmt, va_list args)
- {
--	static char textbuf[LOG_LINE_MAX];
--	char *text = textbuf;
--	size_t text_len;
-+	const u32 caller_id = printk_caller_id();
-+	struct prb_reserved_entry e;
- 	enum log_flags lflags = 0;
-+	bool final_commit = false;
-+	unsigned long irqflags;
-+	struct printk_record r;
-+	u16 trunc_msg_len = 0;
-+	int sprint_id;
-+	u16 text_len;
-+	u64 ts_nsec;
-+	int ret = 0;
-+	char *text;
-+	u64 seq;
-+
-+	ts_nsec = local_clock();
-+
-+	/* No buffer is available if printk has recursed too much. */
-+	text = get_sprint_buf(&sprint_id, &irqflags);
-+	if (!text)
-+		return 0;
  
- 	/*
- 	 * The printf needs to come first; we need the syslog
- 	 * prefix which might be passed-in as a parameter.
- 	 */
--	text_len = vscnprintf(text, sizeof(textbuf), fmt, args);
-+	text_len = vscnprintf(text, LOG_LINE_MAX, fmt, args);
- 
- 	/* mark and strip a trailing newline */
- 	if (text_len && text[text_len-1] == '\n') {
-@@ -1977,76 +1923,115 @@ int vprintk_store(int facility, int level,
- 	if (level == LOGLEVEL_DEFAULT)
- 		level = default_message_loglevel;
- 
--	if (dict)
-+	if (dev_info)
- 		lflags |= LOG_NEWLINE;
- 
--	return log_output(facility, level, lflags,
--			  dict, dictlen, text, text_len);
-+	if (lflags & LOG_CONT) {
-+		prb_rec_init_wr(&r, text_len);
-+		if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) {
-+			seq = r.info->seq;
-+			memcpy(&r.text_buf[r.info->text_len], text, text_len);
-+			r.info->text_len += text_len;
-+			if (lflags & LOG_NEWLINE) {
-+				r.info->flags |= LOG_NEWLINE;
-+				prb_final_commit(&e);
-+				final_commit = true;
-+			} else {
-+				prb_commit(&e);
-+			}
-+			ret = text_len;
-+			goto out;
-+		}
-+	}
-+
-+	/* Store it in the record log */
-+
-+	prb_rec_init_wr(&r, text_len);
-+
-+	if (!prb_reserve(&e, prb, &r)) {
-+		/* truncate the message if it is too long for empty buffer */
-+		truncate_msg(&text_len, &trunc_msg_len);
-+		prb_rec_init_wr(&r, text_len + trunc_msg_len);
-+		/* survive when the log buffer is too small for trunc_msg */
-+		if (!prb_reserve(&e, prb, &r))
-+			goto out;
-+	}
-+
 +	seq = r.info->seq;
 +
-+	/* fill message */
-+	memcpy(&r.text_buf[0], text, text_len);
-+	if (trunc_msg_len)
-+		memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len);
-+	r.info->text_len = text_len + trunc_msg_len;
-+	r.info->facility = facility;
-+	r.info->level = level & 7;
-+	r.info->flags = lflags & 0x1f;
-+	r.info->ts_nsec = ts_nsec;
-+	r.info->caller_id = caller_id;
-+	if (dev_info)
-+		memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info));
-+
-+	/* insert message */
-+	if ((lflags & LOG_CONT) || !(lflags & LOG_NEWLINE)) {
-+		prb_commit(&e);
+ 	/* fill message */
+ 	text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &lflags, fmt, args);
+ 	if (trunc_msg_len)
+@@ -2043,12 +2050,27 @@ int vprintk_store(int facility, int level,
+ 		memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info));
+ 
+ 	/* A message without a trailing newline can be continued. */
+-	if (!(lflags & LOG_NEWLINE))
++	if (!(lflags & LOG_NEWLINE)) {
+ 		prb_commit(&e);
+-	else
 +	} else {
-+		prb_final_commit(&e);
+ 		prb_final_commit(&e);
 +		final_commit = true;
 +	}
 +
 +	ret = text_len + trunc_msg_len;
 +out:
 +	/* only the kernel may perform synchronous printing */
-+	if (facility == 0 && final_commit && any_console_can_sync())
-+		print_sync_until(seq + 1, NULL, text, PREFIX_MAX + LOG_LINE_MAX);
++	if (facility == 0 && final_commit) {
++		struct console *con;
 +
-+	put_sprint_buf(sprint_id, irqflags);
++		for_each_console(con) {
++			if (console_can_sync(con))
++				print_sync_until(con, seq + 1);
++		}
++	}
+ 
+-	return (text_len + trunc_msg_len);
++	printk_exit_irqrestore(irqflags);
 +	return ret;
  }
  
  asmlinkage int vprintk_emit(int facility, int level,
--			    const char *dict, size_t dictlen,
-+			    const struct dev_printk_info *dev_info,
+@@ -2056,59 +2078,43 @@ asmlinkage int vprintk_emit(int facility, int level,
  			    const char *fmt, va_list args)
  {
  	int printed_len;
--	bool in_sched = false, pending_output;
+-	bool in_sched = false;
 -	unsigned long flags;
--	u64 curr_log_seq;
  
  	/* Suppress unimportant messages after panic happens */
  	if (unlikely(suppress_printk))
@@ -14110,19 +12577,16 @@ index 9b75f6bfc333..78a277ea5c35 100644
  		level = LOGLEVEL_DEFAULT;
 -		in_sched = true;
 -	}
- 
+-
 -	boot_delay_msec(level);
 -	printk_delay();
--
--	/* This stops the holder of console_sem just where we want him */
--	logbuf_lock_irqsave(flags);
--	curr_log_seq = log_next_seq;
--	printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args);
--	pending_output = (curr_log_seq != log_next_seq);
--	logbuf_unlock_irqrestore(flags);
+ 
+-	printk_safe_enter_irqsave(flags);
+ 	printed_len = vprintk_store(facility, level, dev_info, fmt, args);
+-	printk_safe_exit_irqrestore(flags);
 -
 -	/* If called from the scheduler, we can not call up(). */
--	if (!in_sched && pending_output) {
+-	if (!in_sched) {
 -		/*
 -		 * Disable preemption to avoid being preempted while holding
 -		 * console_sem which would prevent anyone from printing to
@@ -14138,17 +12602,14 @@ index 9b75f6bfc333..78a277ea5c35 100644
 -			console_unlock();
 -		preempt_enable();
 -	}
-+	printed_len = vprintk_store(facility, level, dev_info, fmt, args);
  
--	if (pending_output)
--		wake_up_klogd();
-+	wake_up_klogd();
+ 	wake_up_klogd();
  	return printed_len;
  }
  EXPORT_SYMBOL(vprintk_emit);
  
 -asmlinkage int vprintk(const char *fmt, va_list args)
-+ __printf(1, 0)
++__printf(1, 0)
 +static int vprintk_default(const char *fmt, va_list args)
  {
 -	return vprintk_func(fmt, args);
@@ -14160,182 +12621,323 @@ index 9b75f6bfc333..78a277ea5c35 100644
 +__printf(1, 0)
 +static int vprintk_func(const char *fmt, va_list args)
  {
--	return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
+-	return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
 +#ifdef CONFIG_KGDB_KDB
 +	/* Allow to pass printk() to kdb but avoid a recursion. */
 +	if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0))
 +		return vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
 +#endif
 +	return vprintk_default(fmt, args);
- }
--EXPORT_SYMBOL_GPL(vprintk_default);
++}
 +
 +asmlinkage int vprintk(const char *fmt, va_list args)
 +{
 +	return vprintk_func(fmt, args);
-+}
+ }
+-EXPORT_SYMBOL_GPL(vprintk_default);
 +EXPORT_SYMBOL(vprintk);
  
  /**
   * printk - print a kernel message
-@@ -2088,30 +2073,26 @@ EXPORT_SYMBOL(printk);
- #define PREFIX_MAX		0
- #define printk_time		false
+@@ -2144,42 +2150,162 @@ asmlinkage __visible int printk(const char *fmt, ...)
+ }
+ EXPORT_SYMBOL(printk);
  
-+#define prb_read_valid(rb, seq, r)	false
-+#define prb_first_valid_seq(rb)		0
-+
- static u64 syslog_seq;
--static u32 syslog_idx;
--static u64 console_seq;
--static u32 console_idx;
--static u64 exclusive_console_stop_seq;
--static u64 log_first_seq;
--static u32 log_first_idx;
--static u64 log_next_seq;
--static char *log_text(const struct printk_log *msg) { return NULL; }
--static char *log_dict(const struct printk_log *msg) { return NULL; }
--static struct printk_log *log_from_idx(u32 idx) { return NULL; }
--static u32 log_next(u32 idx) { return 0; }
--static ssize_t msg_print_ext_header(char *buf, size_t size,
--				    struct printk_log *msg,
--				    u64 seq) { return 0; }
-+
-+static size_t record_print_text(const struct printk_record *r,
-+				bool syslog, bool time)
-+{
-+	return 0;
-+}
-+static ssize_t info_print_ext_header(char *buf, size_t size,
-+				     struct printk_info *info)
+-#else /* CONFIG_PRINTK */
++static int printk_kthread_func(void *data)
 +{
-+	return 0;
-+}
- static ssize_t msg_print_ext_body(char *buf, size_t size,
--				  char *dict, size_t dict_len,
--				  char *text, size_t text_len) { return 0; }
--static void console_lock_spinning_enable(void) { }
--static int console_lock_spinning_disable_and_check(void) { return 0; }
-+				  char *text, size_t text_len,
-+				  struct dev_printk_info *dev_info) { return 0; }
- static void call_console_drivers(const char *ext_text, size_t ext_len,
- 				 const char *text, size_t len) {}
--static size_t msg_print_text(const struct printk_log *msg, bool syslog,
--			     bool time, char *buf, size_t size) { return 0; }
- static bool suppress_message_printing(int level) { return false; }
++	struct console *con = data;
++	unsigned long dropped = 0;
++	char *dropped_text = NULL;
++	struct printk_info info;
++	struct printk_record r;
++	char *ext_text = NULL;
++	size_t dropped_len;
++	int ret = -ENOMEM;
++	char *text = NULL;
++	char *write_text;
++	u64 printk_seq;
++	size_t len;
++	int error;
++	u64 seq;
  
- #endif /* CONFIG_PRINTK */
-@@ -2350,34 +2331,6 @@ int is_console_locked(void)
- }
- EXPORT_SYMBOL(is_console_locked);
+-#define LOG_LINE_MAX		0
+-#define PREFIX_MAX		0
+-#define printk_time		false
++	if (con->flags & CON_EXTENDED) {
++		ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
++		if (!ext_text)
++			goto out;
++	}
++	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
++	dropped_text = kmalloc(64, GFP_KERNEL);
++	if (!text || !dropped_text)
++		goto out;
  
--/*
-- * Check if we have any console that is capable of printing while cpu is
-- * booting or shutting down. Requires console_sem.
-- */
--static int have_callable_console(void)
+-#define prb_read_valid(rb, seq, r)	false
+-#define prb_first_valid_seq(rb)		0
++	if (con->flags & CON_EXTENDED)
++		write_text = ext_text;
++	else
++		write_text = text;
+ 
+-static u64 syslog_seq;
+-static u64 console_seq;
+-static u64 exclusive_console_stop_seq;
+-static unsigned long console_dropped;
++	seq = atomic64_read(&con->printk_seq);
+ 
+-static size_t record_print_text(const struct printk_record *r,
+-				bool syslog, bool time)
 -{
--	struct console *con;
--
--	for_each_console(con)
--		if ((con->flags & CON_ENABLED) &&
--				(con->flags & CON_ANYTIME))
--			return 1;
--
 -	return 0;
 -}
--
--/*
-- * Can we actually use the console at this time on this cpu?
-- *
-- * Console drivers may assume that per-cpu resources have been allocated. So
-- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
-- * call them until this CPU is officially up.
-- */
--static inline int can_use_console(void)
+-static ssize_t info_print_ext_header(char *buf, size_t size,
+-				     struct printk_info *info)
 -{
--	return cpu_online(raw_smp_processor_id()) || have_callable_console();
+-	return 0;
 -}
--
- /**
-  * console_unlock - unlock the console system
-  *
-@@ -2394,143 +2347,14 @@ static inline int can_use_console(void)
-  */
- void console_unlock(void)
- {
--	static char ext_text[CONSOLE_EXT_LOG_MAX];
--	static char text[LOG_LINE_MAX + PREFIX_MAX];
--	unsigned long flags;
--	bool do_cond_resched, retry;
--
- 	if (console_suspended) {
- 		up_console_sem();
- 		return;
- 	}
+-static ssize_t msg_print_ext_body(char *buf, size_t size,
+-				  char *text, size_t text_len,
+-				  struct dev_printk_info *dev_info) { return 0; }
+-static void console_lock_spinning_enable(void) { }
+-static int console_lock_spinning_disable_and_check(void) { return 0; }
+-static void call_console_drivers(const char *ext_text, size_t ext_len,
+-				 const char *text, size_t len) {}
+-static bool suppress_message_printing(int level) { return false; }
++	prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
  
--	/*
--	 * Console drivers are called with interrupts disabled, so
--	 * @console_may_schedule should be cleared before; however, we may
--	 * end up dumping a lot of lines, for example, if called from
--	 * console registration path, and should invoke cond_resched()
--	 * between lines if allowable.  Not doing so can cause a very long
--	 * scheduling stall on a slow console leading to RCU stall and
--	 * softlockup warnings which exacerbate the issue with more
--	 * messages practically incapacitating the system.
--	 *
--	 * console_trylock() is not able to detect the preemptive
--	 * context reliably. Therefore the value must be stored before
--	 * and cleared after the the "again" goto label.
--	 */
--	do_cond_resched = console_may_schedule;
--again:
--	console_may_schedule = 0;
--
--	/*
--	 * We released the console_sem lock, so we need to recheck if
--	 * cpu is online and (if not) is there at least one CON_ANYTIME
--	 * console.
--	 */
--	if (!can_use_console()) {
--		console_locked = 0;
--		up_console_sem();
--		return;
--	}
--
--	for (;;) {
--		struct printk_log *msg;
--		size_t ext_len = 0;
--		size_t len;
--
--		printk_safe_enter_irqsave(flags);
--		raw_spin_lock(&logbuf_lock);
--		if (console_seq < log_first_seq) {
--			len = snprintf(text, sizeof(text),
--				       "** %llu printk messages dropped **\n",
--				       log_first_seq - console_seq);
--
--			/* messages are gone, move to first one */
--			console_seq = log_first_seq;
--			console_idx = log_first_idx;
--		} else {
--			len = 0;
--		}
--skip:
--		if (console_seq == log_next_seq)
--			break;
--
--		msg = log_from_idx(console_idx);
--		if (suppress_message_printing(msg->level)) {
--			/*
--			 * Skip record we have buffered and already printed
--			 * directly to the console when we received it, and
--			 * record that has level above the console loglevel.
--			 */
--			console_idx = log_next(console_idx);
--			console_seq++;
--			goto skip;
--		}
+-#endif /* CONFIG_PRINTK */
++	for (;;) {
++		error = wait_event_interruptible(log_wait,
++				prb_read_valid(prb, seq, &r) || kthread_should_stop());
+ 
+-#ifdef CONFIG_EARLY_PRINTK
++		if (kthread_should_stop())
++			break;
++
++		if (error)
++			continue;
++
++		if (seq != r.info->seq) {
++			dropped += r.info->seq - seq;
++			seq = r.info->seq;
++		}
++
++		seq++;
++
++		if (!(con->flags & CON_ENABLED))
++			continue;
++
++		if (suppress_message_printing(r.info->level))
++			continue;
++
++		if (con->flags & CON_EXTENDED) {
++			len = info_print_ext_header(ext_text,
++				CONSOLE_EXT_LOG_MAX,
++				r.info);
++			len += msg_print_ext_body(ext_text + len,
++				CONSOLE_EXT_LOG_MAX - len,
++				&r.text_buf[0], r.info->text_len,
++				&r.info->dev_info);
++		} else {
++			len = record_print_text(&r,
++				console_msg_format & MSG_FORMAT_SYSLOG,
++				printk_time);
++		}
++
++		printk_seq = atomic64_read(&con->printk_seq);
++
++		console_lock();
++		console_may_schedule = 0;
++
++		if (kernel_sync_mode() && con->write_atomic) {
++			console_unlock();
++			break;
++		}
++
++		if (!(con->flags & CON_EXTENDED) && dropped) {
++			dropped_len = snprintf(dropped_text, 64,
++					       "** %lu printk messages dropped **\n",
++					       dropped);
++			dropped = 0;
++
++			con->write(con, dropped_text, dropped_len);
++			printk_delay(r.info->level);
++		}
++
++		con->write(con, write_text, len);
++		if (len)
++			printk_delay(r.info->level);
++
++		atomic64_cmpxchg_relaxed(&con->printk_seq, printk_seq, seq);
++
++		console_unlock();
++	}
++out:
++	kfree(dropped_text);
++	kfree(text);
++	kfree(ext_text);
++	pr_info("%sconsole [%s%d]: printing thread stopped\n",
++		(con->flags & CON_BOOT) ? "boot" : "",
++		con->name, con->index);
++	return ret;
++}
++
++/* Must be called within console_lock(). */
++static void start_printk_kthread(struct console *con)
++{
++	con->thread = kthread_run(printk_kthread_func, con,
++				  "pr/%s%d", con->name, con->index);
++	if (IS_ERR(con->thread)) {
++		pr_err("%sconsole [%s%d]: unable to start printing thread\n",
++			(con->flags & CON_BOOT) ? "boot" : "",
++			con->name, con->index);
++		return;
++	}
++	pr_info("%sconsole [%s%d]: printing thread started\n",
++		(con->flags & CON_BOOT) ? "boot" : "",
++		con->name, con->index);
++}
++
++/* protected by console_lock */
++static bool kthreads_started;
++
++/* Must be called within console_lock(). */
++static void console_try_thread(struct console *con)
++{
++	if (kthreads_started) {
++		start_printk_kthread(con);
++		return;
++	}
++
++	/*
++	 * The printing threads have not been started yet. If this console
++	 * can print synchronously, print all unprinted messages.
++	 */
++	if (console_can_sync(con))
++		print_sync_until(con, prb_next_seq(prb));
++}
++
++#else /* CONFIG_PRINTK */
++
++#define prb_first_valid_seq(rb)		0
++#define prb_next_seq(rb)		0
++
++#define console_try_thread(con)
++
++#endif /* CONFIG_PRINTK */
++
++#ifdef CONFIG_EARLY_PRINTK
+ struct console *early_console;
+ 
+ asmlinkage __visible void early_printk(const char *fmt, ...)
+@@ -2420,34 +2546,6 @@ int is_console_locked(void)
+ }
+ EXPORT_SYMBOL(is_console_locked);
+ 
+-/*
+- * Check if we have any console that is capable of printing while cpu is
+- * booting or shutting down. Requires console_sem.
+- */
+-static int have_callable_console(void)
+-{
+-	struct console *con;
+-
+-	for_each_console(con)
+-		if ((con->flags & CON_ENABLED) &&
+-				(con->flags & CON_ANYTIME))
+-			return 1;
+-
+-	return 0;
+-}
+-
+-/*
+- * Can we actually use the console at this time on this cpu?
+- *
+- * Console drivers may assume that per-cpu resources have been allocated. So
+- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
+- * call them until this CPU is officially up.
+- */
+-static inline int can_use_console(void)
+-{
+-	return cpu_online(raw_smp_processor_id()) || have_callable_console();
+-}
+-
+ /**
+  * console_unlock - unlock the console system
+  *
+@@ -2464,142 +2562,14 @@ static inline int can_use_console(void)
+  */
+ void console_unlock(void)
+ {
+-	static char ext_text[CONSOLE_EXT_LOG_MAX];
+-	static char text[LOG_LINE_MAX + PREFIX_MAX];
+-	unsigned long flags;
+-	bool do_cond_resched, retry;
+-	struct printk_info info;
+-	struct printk_record r;
+-
+ 	if (console_suspended) {
+ 		up_console_sem();
+ 		return;
+ 	}
+ 
+-	prb_rec_init_rd(&r, &info, text, sizeof(text));
+-
+-	/*
+-	 * Console drivers are called with interrupts disabled, so
+-	 * @console_may_schedule should be cleared before; however, we may
+-	 * end up dumping a lot of lines, for example, if called from
+-	 * console registration path, and should invoke cond_resched()
+-	 * between lines if allowable.  Not doing so can cause a very long
+-	 * scheduling stall on a slow console leading to RCU stall and
+-	 * softlockup warnings which exacerbate the issue with more
+-	 * messages practically incapacitating the system.
+-	 *
+-	 * console_trylock() is not able to detect the preemptive
+-	 * context reliably. Therefore the value must be stored before
+-	 * and cleared after the "again" goto label.
+-	 */
+-	do_cond_resched = console_may_schedule;
+-again:
+-	console_may_schedule = 0;
+-
+-	/*
+-	 * We released the console_sem lock, so we need to recheck if
+-	 * cpu is online and (if not) is there at least one CON_ANYTIME
+-	 * console.
+-	 */
+-	if (!can_use_console()) {
+-		console_locked = 0;
+-		up_console_sem();
+-		return;
+-	}
+-
+-	for (;;) {
+-		size_t ext_len = 0;
+-		size_t len;
+-
+-		printk_safe_enter_irqsave(flags);
+-		raw_spin_lock(&logbuf_lock);
+-skip:
+-		if (!prb_read_valid(prb, console_seq, &r))
+-			break;
+-
+-		if (console_seq != r.info->seq) {
+-			console_dropped += r.info->seq - console_seq;
+-			console_seq = r.info->seq;
+-		}
+-
+-		if (suppress_message_printing(r.info->level)) {
+-			/*
+-			 * Skip record we have buffered and already printed
+-			 * directly to the console when we received it, and
+-			 * record that has level above the console loglevel.
+-			 */
+-			console_seq++;
+-			goto skip;
+-		}
 -
 -		/* Output to all consoles once old messages replayed. */
 -		if (unlikely(exclusive_console &&
@@ -14343,19 +12945,23 @@ index 9b75f6bfc333..78a277ea5c35 100644
 -			exclusive_console = NULL;
 -		}
 -
--		len += msg_print_text(msg,
--				console_msg_format & MSG_FORMAT_SYSLOG,
--				printk_time, text + len, sizeof(text) - len);
+-		/*
+-		 * Handle extended console text first because later
+-		 * record_print_text() will modify the record buffer in-place.
+-		 */
 -		if (nr_ext_console_drivers) {
--			ext_len = msg_print_ext_header(ext_text,
+-			ext_len = info_print_ext_header(ext_text,
 -						sizeof(ext_text),
--						msg, console_seq);
+-						r.info);
 -			ext_len += msg_print_ext_body(ext_text + ext_len,
 -						sizeof(ext_text) - ext_len,
--						log_dict(msg), msg->dict_len,
--						log_text(msg), msg->text_len);
+-						&r.text_buf[0],
+-						r.info->text_len,
+-						&r.info->dev_info);
 -		}
--		console_idx = log_next(console_idx);
+-		len = record_print_text(&r,
+-				console_msg_format & MSG_FORMAT_SYSLOG,
+-				printk_time);
 -		console_seq++;
 -		raw_spin_unlock(&logbuf_lock);
 -
@@ -14395,7 +13001,7 @@ index 9b75f6bfc333..78a277ea5c35 100644
 -	 * flush, no worries.
 -	 */
 -	raw_spin_lock(&logbuf_lock);
--	retry = console_seq != log_next_seq;
+-	retry = prb_read_valid(prb, console_seq, NULL);
 -	raw_spin_unlock(&logbuf_lock);
 -	printk_safe_exit_irqrestore(flags);
 -
@@ -14404,7 +13010,7 @@ index 9b75f6bfc333..78a277ea5c35 100644
  }
  EXPORT_SYMBOL(console_unlock);
  
-@@ -2580,24 +2404,20 @@ void console_unblank(void)
+@@ -2649,23 +2619,20 @@ void console_unblank(void)
   */
  void console_flush_on_panic(enum con_flush_mode mode)
  {
@@ -14428,8 +13034,7 @@ index 9b75f6bfc333..78a277ea5c35 100644
 -		unsigned long flags;
 -
 -		logbuf_lock_irqsave(flags);
--		console_seq = log_first_seq;
--		console_idx = log_first_idx;
+-		console_seq = prb_first_valid_seq(prb);
 -		logbuf_unlock_irqrestore(flags);
 +		seq = prb_first_valid_seq(prb);
 +		for_each_console(c)
@@ -14439,16 +13044,7 @@ index 9b75f6bfc333..78a277ea5c35 100644
  	console_unlock();
  }
  
-@@ -2711,6 +2531,8 @@ static int try_enable_new_console(struct console *newcon, bool user_specified)
- 	return -ENOENT;
- }
- 
-+static void console_try_thread(struct console *con);
-+
- /*
-  * The console driver calls this routine during kernel initialization
-  * to register the console printing procedure with printk() and to
-@@ -2732,7 +2554,6 @@ static int try_enable_new_console(struct console *newcon, bool user_specified)
+@@ -2800,7 +2767,6 @@ static int try_enable_new_console(struct console *newcon, bool user_specified)
   */
  void register_console(struct console *newcon)
  {
@@ -14456,7 +13052,7 @@ index 9b75f6bfc333..78a277ea5c35 100644
  	struct console *bcon = NULL;
  	int err;
  
-@@ -2756,6 +2577,8 @@ void register_console(struct console *newcon)
+@@ -2824,6 +2790,8 @@ void register_console(struct console *newcon)
  		}
  	}
  
@@ -14465,7 +13061,7 @@ index 9b75f6bfc333..78a277ea5c35 100644
  	if (console_drivers && console_drivers->flags & CON_BOOT)
  		bcon = console_drivers;
  
-@@ -2797,8 +2620,10 @@ void register_console(struct console *newcon)
+@@ -2865,8 +2833,10 @@ void register_console(struct console *newcon)
  	 * the real console are the same physical device, it's annoying to
  	 * see the beginning boot messages twice
  	 */
@@ -14477,7 +13073,7 @@ index 9b75f6bfc333..78a277ea5c35 100644
  
  	/*
  	 *	Put this console in the list - keep the
-@@ -2820,27 +2645,12 @@ void register_console(struct console *newcon)
+@@ -2888,26 +2858,12 @@ void register_console(struct console *newcon)
  	if (newcon->flags & CON_EXTENDED)
  		nr_ext_console_drivers++;
  
@@ -14499,7 +13095,6 @@ index 9b75f6bfc333..78a277ea5c35 100644
 -		exclusive_console = newcon;
 -		exclusive_console_stop_seq = console_seq;
 -		console_seq = syslog_seq;
--		console_idx = syslog_idx;
 -		logbuf_unlock_irqrestore(flags);
 -	}
 +	if (newcon->flags & CON_PRINTBUFFER)
@@ -14511,7 +13106,7 @@ index 9b75f6bfc333..78a277ea5c35 100644
  	console_unlock();
  	console_sysfs_notify();
  
-@@ -2914,6 +2724,9 @@ int unregister_console(struct console *console)
+@@ -2981,6 +2937,9 @@ int unregister_console(struct console *console)
  	console_unlock();
  	console_sysfs_notify();
  
@@ -14521,176 +13116,23 @@ index 9b75f6bfc333..78a277ea5c35 100644
  	if (console->exit)
  		res = console->exit(console);
  
-@@ -2957,6 +2770,154 @@ void __init console_init(void)
+@@ -3063,6 +3022,15 @@ static int __init printk_late_init(void)
+ 			unregister_console(con);
+ 		}
  	}
- }
- 
-+static int printk_kthread_func(void *data)
-+{
-+	struct console *con = data;
-+	unsigned long dropped = 0;
-+	struct printk_info info;
-+	struct printk_record r;
-+	char *ext_text = NULL;
-+	size_t dropped_len;
-+	char *dropped_text;
-+	int ret = -ENOMEM;
-+	char *write_text;
-+	u64 printk_seq;
-+	size_t len;
-+	char *text;
-+	int error;
-+	u64 seq;
-+
-+	if (con->flags & CON_EXTENDED) {
-+		ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
-+		if (!ext_text)
-+			return ret;
-+	}
-+	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
-+	dropped_text = kmalloc(64, GFP_KERNEL);
-+	if (!text || !dropped_text)
-+		goto out;
-+
-+	if (con->flags & CON_EXTENDED)
-+		write_text = ext_text;
-+	else
-+		write_text = text;
-+
-+	seq = atomic64_read(&con->printk_seq);
 +
-+	prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
-+
-+	for (;;) {
-+		error = wait_event_interruptible(log_wait,
-+				prb_read_valid(prb, seq, &r) || kthread_should_stop());
-+
-+		if (kthread_should_stop())
-+			break;
-+
-+		if (error)
-+			continue;
-+
-+		if (seq != r.info->seq) {
-+			dropped += r.info->seq - seq;
-+			seq = r.info->seq;
-+		}
-+
-+		seq++;
-+
-+		if (!(con->flags & CON_ENABLED))
-+			continue;
-+
-+		if (suppress_message_printing(r.info->level))
-+			continue;
-+
-+		if (con->flags & CON_EXTENDED) {
-+			len = info_print_ext_header(ext_text,
-+				CONSOLE_EXT_LOG_MAX,
-+				r.info);
-+			len += msg_print_ext_body(ext_text + len,
-+				CONSOLE_EXT_LOG_MAX - len,
-+				&r.text_buf[0], r.info->text_len,
-+				&r.info->dev_info);
-+		} else {
-+			len = record_print_text(&r,
-+				console_msg_format & MSG_FORMAT_SYSLOG,
-+				printk_time);
-+		}
-+
-+		printk_seq = atomic64_read(&con->printk_seq);
-+
-+		console_lock();
-+		console_may_schedule = 0;
-+
-+		if (kernel_sync_mode() && con->write_atomic) {
-+			console_unlock();
-+			break;
-+		}
-+
-+		if (!(con->flags & CON_EXTENDED) && dropped) {
-+			dropped_len = snprintf(dropped_text, 64,
-+					       "** %lu printk messages dropped **\n",
-+					       dropped);
-+			dropped = 0;
-+
-+			con->write(con, dropped_text, dropped_len);
-+			printk_delay(r.info->level);
-+		}
-+
-+		con->write(con, write_text, len);
-+		if (len)
-+			printk_delay(r.info->level);
-+
-+		atomic64_cmpxchg_relaxed(&con->printk_seq, printk_seq, seq);
-+
-+		console_unlock();
-+	}
-+out:
-+	kfree(dropped_text);
-+	kfree(text);
-+	kfree(ext_text);
-+	pr_info("%sconsole [%s%d]: printing thread stopped\n",
-+		(con->flags & CON_BOOT) ? "boot" : "" ,
-+		con->name, con->index);
-+	return ret;
-+}
-+
-+static void start_printk_kthread(struct console *con)
-+{
-+	con->thread = kthread_run(printk_kthread_func, con,
-+				  "pr/%s%d", con->name, con->index);
-+	if (IS_ERR(con->thread)) {
-+		pr_err("%sconsole [%s%d]: unable to start printing thread\n",
-+			(con->flags & CON_BOOT) ? "boot" : "" ,
-+			con->name, con->index);
-+		return;
-+	}
-+	pr_info("%sconsole [%s%d]: printing thread started\n",
-+		(con->flags & CON_BOOT) ? "boot" : "" ,
-+		con->name, con->index);
-+}
-+
-+static bool kthreads_started;
-+
-+static void console_try_thread(struct console *con)
-+{
-+	unsigned long irqflags;
-+	int sprint_id;
-+	char *buf;
-+
-+	if (kthreads_started) {
-+		start_printk_kthread(con);
-+		return;
-+	}
-+
-+	buf = get_sprint_buf(&sprint_id, &irqflags);
-+	if (!buf)
-+		return;
-+
-+	print_sync_until(prb_next_seq(prb), con, buf, PREFIX_MAX + LOG_LINE_MAX);
-+
-+	put_sprint_buf(sprint_id, irqflags);
-+}
-+
- /*
-  * Some boot consoles access data that is in the init section and which will
-  * be discarded after the initcalls have been run. To make sure that no code
-@@ -2996,6 +2957,13 @@ static int __init printk_late_init(void)
- 			unregister_console(con);
- 		}
- 	}
-+
-+	console_lock();
-+	for_each_console(con)
-+		start_printk_kthread(con);
-+	kthreads_started = true;
-+	console_unlock();
++#ifdef CONFIG_PRINTK
++	console_lock();
++	for_each_console(con)
++		start_printk_kthread(con);
++	kthreads_started = true;
++	console_unlock();
++#endif
 +
  	ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL,
  					console_cpu_notify);
  	WARN_ON(ret < 0);
-@@ -3011,7 +2979,6 @@ late_initcall(printk_late_init);
+@@ -3078,7 +3046,6 @@ late_initcall(printk_late_init);
   * Delayed printk version, for scheduler-internal messages:
   */
  #define PRINTK_PENDING_WAKEUP	0x01
@@ -14698,7 +13140,7 @@ index 9b75f6bfc333..78a277ea5c35 100644
  
  static DEFINE_PER_CPU(int, printk_pending);
  
-@@ -3019,12 +2986,6 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)
+@@ -3086,14 +3053,8 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)
  {
  	int pending = __this_cpu_xchg(printk_pending, 0);
  
@@ -14709,16 +13151,17 @@ index 9b75f6bfc333..78a277ea5c35 100644
 -	}
 -
  	if (pending & PRINTK_PENDING_WAKEUP)
- 		wake_up_interruptible(&log_wait);
+-		wake_up_interruptible(&log_wait);
++		wake_up_interruptible_all(&log_wait);
  }
-@@ -3047,25 +3008,10 @@ void wake_up_klogd(void)
+ 
+ static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
+@@ -3112,25 +3073,10 @@ void wake_up_klogd(void)
  	preempt_enable();
  }
  
 -void defer_console_output(void)
-+__printf(1, 0)
-+static int vprintk_deferred(const char *fmt, va_list args)
- {
+-{
 -	if (!printk_percpu_data_ready())
 -		return;
 -
@@ -14729,10 +13172,12 @@ index 9b75f6bfc333..78a277ea5c35 100644
 -}
 -
 -int vprintk_deferred(const char *fmt, va_list args)
--{
++__printf(1, 0)
++static int vprintk_deferred(const char *fmt, va_list args)
+ {
 -	int r;
 -
--	r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
+-	r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args);
 -	defer_console_output();
 -
 -	return r;
@@ -14740,76 +13185,11 @@ index 9b75f6bfc333..78a277ea5c35 100644
  }
  
  int printk_deferred(const char *fmt, ...)
-@@ -3194,6 +3140,66 @@ const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason)
- }
- EXPORT_SYMBOL_GPL(kmsg_dump_reason_str);
- 
-+/**
-+ * pr_flush() - Wait for printing threads to catch up.
-+ *
-+ * @timeout_ms:        The maximum time (in ms) to wait.
-+ * @reset_on_progress: Reset the timeout if forward progress is seen.
-+ *
-+ * A value of 0 for @timeout_ms means no waiting will occur. A value of -1
-+ * represents infinite waiting.
-+ *
-+ * If @reset_on_progress is true, the timeout will be reset whenever any
-+ * printer has been seen to make some forward progress.
-+ *
-+ * Context: Any context if @timeout_ms is 0. Otherwise process context and
-+ * may sleep if a printer is not caught up.
-+ * Return: true if all enabled printers are caught up.
-+ */
-+static bool pr_flush(int timeout_ms, bool reset_on_progress)
-+{
-+	int remaining = timeout_ms;
-+	struct console *con;
-+	u64 last_diff = 0;
-+	u64 printk_seq;
-+	u64 diff;
-+	u64 seq;
-+
-+	seq = prb_next_seq(prb);
-+
-+	for (;;) {
-+		diff = 0;
-+
-+		for_each_console(con) {
-+			if (!(con->flags & CON_ENABLED))
-+				continue;
-+			printk_seq = atomic64_read(&con->printk_seq);
-+			if (printk_seq < seq)
-+				diff += seq - printk_seq;
-+		}
-+
-+		if (diff != last_diff && reset_on_progress)
-+			remaining = timeout_ms;
-+
-+		if (!diff || remaining == 0)
-+			break;
-+
-+		if (remaining < 0) {
-+			msleep(100);
-+		} else if (remaining < 100) {
-+			msleep(remaining);
-+			remaining = 0;
-+		} else {
-+			msleep(100);
-+			remaining -= 100;
-+		}
-+
-+		last_diff = diff;
-+	}
-+
-+	return (diff == 0);
-+}
-+
- /**
-  * kmsg_dump - dump kernel log to kernel message dumpers.
-  * @reason: the reason (oops, panic etc) for dumping
-@@ -3205,7 +3211,24 @@ EXPORT_SYMBOL_GPL(kmsg_dump_reason_str);
+@@ -3269,8 +3215,26 @@ EXPORT_SYMBOL_GPL(kmsg_dump_reason_str);
+  */
  void kmsg_dump(enum kmsg_dump_reason reason)
  {
++	struct kmsg_dumper_iter iter;
  	struct kmsg_dumper *dumper;
 -	unsigned long flags;
 +
@@ -14825,80 +13205,112 @@ index 9b75f6bfc333..78a277ea5c35 100644
 +		}
 +
 +		/*
-+		 * Give the printing threads time to flush, allowing up to 1
-+		 * second of no printing forward progress before giving up.
++		 * Give the printing threads time to flush, allowing up to
++		 * 1s of no printing forward progress before giving up.
 +		 */
 +		pr_flush(1000, true);
 +	}
  
  	rcu_read_lock();
  	list_for_each_entry_rcu(dumper, &dump_list, list) {
-@@ -3225,12 +3248,7 @@ void kmsg_dump(enum kmsg_dump_reason reason)
- 		/* initialize iterator with data about the stored records */
- 		dumper->active = true;
+@@ -3288,25 +3252,18 @@ void kmsg_dump(enum kmsg_dump_reason reason)
+ 			continue;
  
+ 		/* initialize iterator with data about the stored records */
+-		dumper->active = true;
+-
 -		logbuf_lock_irqsave(flags);
 -		dumper->cur_seq = clear_seq;
--		dumper->cur_idx = clear_idx;
--		dumper->next_seq = log_next_seq;
--		dumper->next_idx = log_next_idx;
+-		dumper->next_seq = prb_next_seq(prb);
 -		logbuf_unlock_irqrestore(flags);
-+		kmsg_dump_rewind_nolock(dumper);
++		iter.active = true;
++		kmsg_dump_rewind(&iter);
  
  		/* invoke dumper which will iterate over records */
- 		dumper->dump(dumper, reason);
-@@ -3263,28 +3281,33 @@ void kmsg_dump(enum kmsg_dump_reason reason)
- bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
- 			       char *line, size_t size, size_t *len)
+-		dumper->dump(dumper, reason);
+-
+-		/* reset iterator */
+-		dumper->active = false;
++		dumper->dump(dumper, reason, &iter);
+ 	}
+ 	rcu_read_unlock();
+ }
+ 
+ /**
+- * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version)
+- * @dumper: registered kmsg dumper
++ * kmsg_dump_get_line - retrieve one kmsg log line
++ * @iter: kmsg dumper iterator
+  * @syslog: include the "<4>" prefixes
+  * @line: buffer to copy the line to
+  * @size: maximum size of the buffer
+@@ -3320,11 +3277,9 @@ void kmsg_dump(enum kmsg_dump_reason reason)
+  *
+  * A return value of FALSE indicates that there are no more records to
+  * read.
+- *
+- * The function is similar to kmsg_dump_get_line(), but grabs no locks.
+  */
+-bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
+-			       char *line, size_t size, size_t *len)
++bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
++			char *line, size_t size, size_t *len)
  {
--	struct printk_log *msg;
-+	struct printk_info info;
-+	unsigned int line_count;
-+	struct printk_record r;
- 	size_t l = 0;
- 	bool ret = false;
+ 	struct printk_info info;
+ 	unsigned int line_count;
+@@ -3334,16 +3289,16 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
  
-+	prb_rec_init_rd(&r, &info, line, size);
-+
- 	if (!dumper->active)
+ 	prb_rec_init_rd(&r, &info, line, size);
+ 
+-	if (!dumper->active)
++	if (!iter->active)
  		goto out;
  
--	if (dumper->cur_seq < log_first_seq) {
--		/* messages are gone, move to first available one */
--		dumper->cur_seq = log_first_seq;
--		dumper->cur_idx = log_first_idx;
--	}
--
--	/* last entry */
--	if (dumper->cur_seq >= log_next_seq)
--		goto out;
-+	/* Read text or count text lines? */
-+	if (line) {
-+		if (!prb_read_valid(prb, dumper->cur_seq, &r))
-+			goto out;
-+		l = record_print_text(&r, syslog, printk_time);
-+	} else {
-+		if (!prb_read_valid_info(prb, dumper->cur_seq,
-+					 &info, &line_count)) {
-+			goto out;
-+		}
-+		l = get_record_print_text_size(&info, line_count, syslog,
-+					       printk_time);
+ 	/* Read text or count text lines? */
+ 	if (line) {
+-		if (!prb_read_valid(prb, dumper->cur_seq, &r))
++		if (!prb_read_valid(prb, iter->cur_seq, &r))
+ 			goto out;
+ 		l = record_print_text(&r, syslog, printk_time);
+ 	} else {
+-		if (!prb_read_valid_info(prb, dumper->cur_seq,
++		if (!prb_read_valid_info(prb, iter->cur_seq,
+ 					 &info, &line_count)) {
+ 			goto out;
+ 		}
+@@ -3352,48 +3307,18 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
  
--	msg = log_from_idx(dumper->cur_idx);
--	l = msg_print_text(msg, syslog, printk_time, line, size);
-+	}
+ 	}
  
--	dumper->cur_idx = log_next(dumper->cur_idx);
--	dumper->cur_seq++;
-+	dumper->cur_seq = r.info->seq + 1;
+-	dumper->cur_seq = r.info->seq + 1;
++	iter->cur_seq = r.info->seq + 1;
  	ret = true;
  out:
  	if (len)
-@@ -3312,14 +3335,7 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
- bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
- 			char *line, size_t size, size_t *len)
- {
+ 		*len = l;
+ 	return ret;
+ }
+-
+-/**
+- * kmsg_dump_get_line - retrieve one kmsg log line
+- * @dumper: registered kmsg dumper
+- * @syslog: include the "<4>" prefixes
+- * @line: buffer to copy the line to
+- * @size: maximum size of the buffer
+- * @len: length of line placed into buffer
+- *
+- * Start at the beginning of the kmsg buffer, with the oldest kmsg
+- * record, and copy one record into the provided buffer.
+- *
+- * Consecutive calls will return the next available record moving
+- * towards the end of the buffer with the youngest messages.
+- *
+- * A return value of FALSE indicates that there are no more records to
+- * read.
+- */
+-bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
+-			char *line, size_t size, size_t *len)
+-{
 -	unsigned long flags;
 -	bool ret;
 -
@@ -14907,125 +13319,118 @@ index 9b75f6bfc333..78a277ea5c35 100644
 -	logbuf_unlock_irqrestore(flags);
 -
 -	return ret;
-+	return kmsg_dump_get_line_nolock(dumper, syslog, line, size, len);
- }
+-}
  EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
  
-@@ -3329,7 +3345,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
+ /**
+  * kmsg_dump_get_buffer - copy kmsg log lines
+- * @dumper: registered kmsg dumper
++ * @iter: kmsg dumper iterator
   * @syslog: include the "<4>" prefixes
   * @buf: buffer to copy the line to
   * @size: maximum size of the buffer
-- * @len: length of line placed into buffer
-+ * @len_out: length of line placed into buffer
-  *
-  * Start at the end of the kmsg buffer and fill the provided buffer
-  * with as many of the the *youngest* kmsg records that fit into it.
-@@ -3343,75 +3359,73 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
+@@ -3410,114 +3335,254 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
+  * A return value of FALSE indicates that there are no more records to
   * read.
   */
- bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
+-bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
 -			  char *buf, size_t size, size_t *len)
++bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog,
 +			  char *buf, size_t size, size_t *len_out)
  {
+ 	struct printk_info info;
+-	unsigned int line_count;
+ 	struct printk_record r;
 -	unsigned long flags;
-+	struct printk_info info;
-+	unsigned int line_count;
-+	struct printk_record r;
  	u64 seq;
--	u32 idx;
  	u64 next_seq;
--	u32 next_idx;
 -	size_t l = 0;
 +	size_t len = 0;
  	bool ret = false;
  	bool time = printk_time;
  
--	if (!dumper->active)
-+	if (!dumper->active || !buf || !size)
+-	prb_rec_init_rd(&r, &info, buf, size);
+-
+-	if (!dumper->active || !buf || !size)
++	if (!iter->active || !buf || !size)
  		goto out;
  
 -	logbuf_lock_irqsave(flags);
--	if (dumper->cur_seq < log_first_seq) {
-+	if (dumper->cur_seq < prb_first_valid_seq(prb)) {
- 		/* messages are gone, move to first available one */
--		dumper->cur_seq = log_first_seq;
--		dumper->cur_idx = log_first_idx;
-+		dumper->cur_seq = prb_first_valid_seq(prb);
+-	if (dumper->cur_seq < prb_first_valid_seq(prb)) {
+-		/* messages are gone, move to first available one */
+-		dumper->cur_seq = prb_first_valid_seq(prb);
++	if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) {
++		if (info.seq != iter->cur_seq) {
++			/* messages are gone, move to first available one */
++			iter->cur_seq = info.seq;
++		}
  	}
  
  	/* last entry */
 -	if (dumper->cur_seq >= dumper->next_seq) {
 -		logbuf_unlock_irqrestore(flags);
-+	if (dumper->cur_seq >= dumper->next_seq)
++	if (iter->cur_seq >= iter->next_seq)
  		goto out;
 -	}
  
 -	/* calculate length of entire buffer */
 -	seq = dumper->cur_seq;
--	idx = dumper->cur_idx;
--	while (seq < dumper->next_seq) {
--		struct printk_log *msg = log_from_idx(idx);
-+	/*
-+	 * Find first record that fits, including all following records,
-+	 * into the user-provided buffer for this dump.
-+	 */
- 
--		l += msg_print_text(msg, true, time, NULL, 0);
--		idx = log_next(idx);
--		seq++;
-+	prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) {
-+		if (info.seq >= dumper->next_seq)
-+			break;
-+		len += get_record_print_text_size(&info, line_count, true, time);
- 	}
- 
+-	while (prb_read_valid_info(prb, seq, &info, &line_count)) {
+-		if (r.info->seq >= dumper->next_seq)
+-			break;
+-		l += get_record_print_text_size(&info, line_count, syslog, time);
+-		seq = r.info->seq + 1;
+-	}
+-
 -	/* move first record forward until length fits into the buffer */
 -	seq = dumper->cur_seq;
--	idx = dumper->cur_idx;
--	while (l >= size && seq < dumper->next_seq) {
--		struct printk_log *msg = log_from_idx(idx);
--
--		l -= msg_print_text(msg, true, time, NULL, 0);
--		idx = log_next(idx);
--		seq++;
+-	while (l >= size && prb_read_valid_info(prb, seq,
+-						&info, &line_count)) {
+-		if (r.info->seq >= dumper->next_seq)
+-			break;
+-		l -= get_record_print_text_size(&info, line_count, syslog, time);
+-		seq = r.info->seq + 1;
+-	}
 +	/*
-+	 * Move first record forward until length fits into the buffer. This
-+	 * is a best effort attempt. If @dumper->next_seq is reached because
-+	 * the ringbuffer is wrapping too fast, just start filling the buffer
-+	 * from there.
++	 * Find first record that fits, including all following records,
++	 * into the user-provided buffer for this dump. Pass in size-1
++	 * because this function (by way of record_print_text()) will
++	 * not write more than size-1 bytes of text into @buf.
 +	 */
-+	prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) {
-+		if (len <= size || info.seq >= dumper->next_seq)
-+			break;
-+		len -= get_record_print_text_size(&info, line_count, true, time);
- 	}
++	seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq,
++				     size - 1, syslog, time);
  
 -	/* last message in next interation */
-+	/* Keep track of the last message for the next interation. */
++	/*
++	 * Next kmsg_dump_get_buffer() invocation will dump block of
++	 * older records stored right before this one.
++	 */
  	next_seq = seq;
--	next_idx = idx;
  
+-	/* actually read text into the buffer now */
 -	l = 0;
--	while (seq < dumper->next_seq) {
--		struct printk_log *msg = log_from_idx(idx);
+-	while (prb_read_valid(prb, seq, &r)) {
+-		if (r.info->seq >= dumper->next_seq)
+-			break;
 +	prb_rec_init_rd(&r, &info, buf, size);
  
--		l += msg_print_text(msg, syslog, time, buf + l, size - l);
--		idx = log_next(idx);
--		seq++;
+-		l += record_print_text(&r, syslog, time);
 +	len = 0;
 +	prb_for_each_record(seq, prb, seq, &r) {
-+		if (r.info->seq >= dumper->next_seq)
++		if (r.info->seq >= iter->next_seq)
 +			break;
-+
+ 
+-		/* adjust record to store to remaining buffer space */
+-		prb_rec_init_rd(&r, &info, buf + l, size - l);
 +		len += record_print_text(&r, syslog, time);
-+
+ 
+-		seq = r.info->seq + 1;
 +		/* Adjust record to store to remaining buffer space. */
 +		prb_rec_init_rd(&r, &info, buf + len, size - len);
  	}
  
- 	dumper->next_seq = next_seq;
--	dumper->next_idx = next_idx;
+-	dumper->next_seq = next_seq;
++	iter->next_seq = next_seq;
  	ret = true;
 -	logbuf_unlock_irqrestore(flags);
  out:
@@ -15036,37 +13441,25 @@ index 9b75f6bfc333..78a277ea5c35 100644
  	return ret;
  }
  EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
-@@ -3423,15 +3437,11 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
+ 
+ /**
+- * kmsg_dump_rewind_nolock - reset the iterator (unlocked version)
+- * @dumper: registered kmsg dumper
++ * kmsg_dump_rewind - reset the iterator
++ * @iter: kmsg dumper iterator
+  *
   * Reset the dumper's iterator so that kmsg_dump_get_line() and
   * kmsg_dump_get_buffer() can be called again and used multiple
   * times within the same dumper.dump() callback.
-- *
-- * The function is similar to kmsg_dump_rewind(), but grabs no locks.
-  */
- void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
- {
--	dumper->cur_seq = clear_seq;
--	dumper->cur_idx = clear_idx;
--	dumper->next_seq = log_next_seq;
--	dumper->next_idx = log_next_idx;
-+	dumper->cur_seq = atomic64_read(&clear_seq);
-+	dumper->next_seq = prb_next_seq(prb);
- }
- 
- /**
-@@ -3444,12 +3454,108 @@ void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
-  */
- void kmsg_dump_rewind(struct kmsg_dumper *dumper)
- {
--	unsigned long flags;
--
--	logbuf_lock_irqsave(flags);
- 	kmsg_dump_rewind_nolock(dumper);
--	logbuf_unlock_irqrestore(flags);
- }
- EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
- 
- #endif
++ */
++void kmsg_dump_rewind(struct kmsg_dumper_iter *iter)
++{
++	iter->cur_seq = latched_seq_read_nolock(&clear_seq);
++	iter->next_seq = prb_next_seq(prb);
++}
++EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
++
++#endif
 +
 +struct prb_cpulock {
 +	atomic_t owner;
@@ -15111,7 +13504,8 @@ index 9b75f6bfc333..78a277ea5c35 100644
 + * prb_lock: Perform a processor-reentrant spin lock.
 + * @cpu_lock: A pointer to the lock object.
 + * @cpu_store: A "flags" pointer to store lock status information.
-+ *
+  *
+- * The function is similar to kmsg_dump_rewind(), but grabs no locks.
 + * If no processor has the lock, the calling processor takes the lock and
 + * becomes the owner. If the calling processor is already the owner of the
 + * lock, this function succeeds immediately. If lock is locked by another
@@ -15119,30 +13513,44 @@ index 9b75f6bfc333..78a277ea5c35 100644
 + * owner.
 + *
 + * It is safe to call this function from any context and state.
-+ */
+  */
+-void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
 +static void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store)
-+{
+ {
+-	dumper->cur_seq = clear_seq;
+-	dumper->next_seq = prb_next_seq(prb);
 +	for (;;) {
 +		if (__prb_trylock(cpu_lock, cpu_store))
 +			break;
 +		cpu_relax();
 +	}
-+}
-+
+ }
+ 
+-/**
+- * kmsg_dump_rewind - reset the iterator
+- * @dumper: registered kmsg dumper
 +/*
 + * prb_unlock: Perform a processor-reentrant spin unlock.
 + * @cpu_lock: A pointer to the lock object.
 + * @cpu_store: A "flags" object storing lock status information.
-+ *
+  *
+- * Reset the dumper's iterator so that kmsg_dump_get_line() and
+- * kmsg_dump_get_buffer() can be called again and used multiple
+- * times within the same dumper.dump() callback.
 + * Release the lock. The calling processor must be the owner of the lock.
 + *
 + * It is safe to call this function from any context and state.
-+ */
+  */
+-void kmsg_dump_rewind(struct kmsg_dumper *dumper)
 +static void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store)
-+{
+ {
+-	unsigned long flags;
 +	unsigned long *flags;
 +	unsigned int cpu;
-+
+ 
+-	logbuf_lock_irqsave(flags);
+-	kmsg_dump_rewind_nolock(dumper);
+-	logbuf_unlock_irqrestore(flags);
 +	cpu = atomic_read(&cpu_lock->owner);
 +	atomic_set_release(&cpu_lock->owner, cpu_store);
 +
@@ -15152,8 +13560,10 @@ index 9b75f6bfc333..78a277ea5c35 100644
 +	}
 +
 +	put_cpu();
-+}
-+
+ }
+-EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
+ 
+-#endif
 +DECLARE_STATIC_PRINTKRB_CPULOCK(printk_cpulock);
 +
 +void console_atomic_lock(unsigned int *flags)
@@ -15167,2489 +13577,82 @@ index 9b75f6bfc333..78a277ea5c35 100644
 +	prb_unlock(&printk_cpulock, flags);
 +}
 +EXPORT_SYMBOL(console_atomic_unlock);
-diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
-new file mode 100644
-index 000000000000..24a960a89aa8
---- /dev/null
-+++ b/kernel/printk/printk_ringbuffer.c
-@@ -0,0 +1,2086 @@
-+// SPDX-License-Identifier: GPL-2.0
-+
-+#include <linux/kernel.h>
-+#include <linux/irqflags.h>
-+#include <linux/string.h>
-+#include <linux/errno.h>
-+#include <linux/bug.h>
-+#include "printk_ringbuffer.h"
++
++static void pr_msleep(bool may_sleep, int ms)
++{
++	if (may_sleep) {
++		msleep(ms);
++	} else {
++		while (ms--)
++			udelay(1000);
++	}
++}
 +
 +/**
-+ * DOC: printk_ringbuffer overview
-+ *
-+ * Data Structure
-+ * --------------
-+ * The printk_ringbuffer is made up of 3 internal ringbuffers:
-+ *
-+ *   desc_ring
-+ *     A ring of descriptors and their meta data (such as sequence number,
-+ *     timestamp, loglevel, etc.) as well as internal state information about
-+ *     the record and logical positions specifying where in the other
-+ *     ringbuffer the text strings are located.
-+ *
-+ *   text_data_ring
-+ *     A ring of data blocks. A data block consists of an unsigned long
-+ *     integer (ID) that maps to a desc_ring index followed by the text
-+ *     string of the record.
-+ *
-+ * The internal state information of a descriptor is the key element to allow
-+ * readers and writers to locklessly synchronize access to the data.
-+ *
-+ * Implementation
-+ * --------------
-+ *
-+ * Descriptor Ring
-+ * ~~~~~~~~~~~~~~~
-+ * The descriptor ring is an array of descriptors. A descriptor contains
-+ * essential meta data to track the data of a printk record using
-+ * blk_lpos structs pointing to associated text data blocks (see
-+ * "Data Rings" below). Each descriptor is assigned an ID that maps
-+ * directly to index values of the descriptor array and has a state. The ID
-+ * and the state are bitwise combined into a single descriptor field named
-+ * @state_var, allowing ID and state to be synchronously and atomically
-+ * updated.
-+ *
-+ * Descriptors have four states:
-+ *
-+ *   reserved
-+ *     A writer is modifying the record.
-+ *
-+ *   committed
-+ *     The record and all its data are written. A writer can reopen the
-+ *     descriptor (transitioning it back to reserved), but in the committed
-+ *     state the data is consistent.
-+ *
-+ *   finalized
-+ *     The record and all its data are complete and available for reading. A
-+ *     writer cannot reopen the descriptor.
-+ *
-+ *   reusable
-+ *     The record exists, but its text and/or meta data may no longer be
-+ *     available.
-+ *
-+ * Querying the @state_var of a record requires providing the ID of the
-+ * descriptor to query. This can yield a possible fifth (pseudo) state:
-+ *
-+ *   miss
-+ *     The descriptor being queried has an unexpected ID.
-+ *
-+ * The descriptor ring has a @tail_id that contains the ID of the oldest
-+ * descriptor and @head_id that contains the ID of the newest descriptor.
-+ *
-+ * When a new descriptor should be created (and the ring is full), the tail
-+ * descriptor is invalidated by first transitioning to the reusable state and
-+ * then invalidating all tail data blocks up to and including the data blocks
-+ * associated with the tail descriptor (for the text ring). Then
-+ * @tail_id is advanced, followed by advancing @head_id. And finally the
-+ * @state_var of the new descriptor is initialized to the new ID and reserved
-+ * state.
-+ *
-+ * The @tail_id can only be advanced if the new @tail_id would be in the
-+ * committed or reusable queried state. This makes it possible that a valid
-+ * sequence number of the tail is always available.
-+ *
-+ * Descriptor Finalization
-+ * ~~~~~~~~~~~~~~~~~~~~~~~
-+ * When a writer calls the commit function prb_commit(), record data is
-+ * fully stored and is consistent within the ringbuffer. However, a writer can
-+ * reopen that record, claiming exclusive access (as with prb_reserve()), and
-+ * modify that record. When finished, the writer must again commit the record.
-+ *
-+ * In order for a record to be made available to readers (and also become
-+ * recyclable for writers), it must be finalized. A finalized record cannot be
-+ * reopened and can never become "unfinalized". Record finalization can occur
-+ * in three different scenarios:
-+ *
-+ *   1) A writer can simultaneously commit and finalize its record by calling
-+ *      prb_final_commit() instead of prb_commit().
-+ *
-+ *   2) When a new record is reserved and the previous record has been
-+ *      committed via prb_commit(), that previous record is automatically
-+ *      finalized.
-+ *
-+ *   3) When a record is committed via prb_commit() and a newer record
-+ *      already exists, the record being committed is automatically finalized.
-+ *
-+ * Data Ring
-+ * ~~~~~~~~~
-+ * The text data ring is a byte array composed of data blocks. Data blocks are
-+ * referenced by blk_lpos structs that point to the logical position of the
-+ * beginning of a data block and the beginning of the next adjacent data
-+ * block. Logical positions are mapped directly to index values of the byte
-+ * array ringbuffer.
-+ *
-+ * Each data block consists of an ID followed by the writer data. The ID is
-+ * the identifier of a descriptor that is associated with the data block. A
-+ * given data block is considered valid if all of the following conditions
-+ * are met:
-+ *
-+ *   1) The descriptor associated with the data block is in the committed
-+ *      or finalized queried state.
-+ *
-+ *   2) The blk_lpos struct within the descriptor associated with the data
-+ *      block references back to the same data block.
-+ *
-+ *   3) The data block is within the head/tail logical position range.
-+ *
-+ * If the writer data of a data block would extend beyond the end of the
-+ * byte array, only the ID of the data block is stored at the logical
-+ * position and the full data block (ID and writer data) is stored at the
-+ * beginning of the byte array. The referencing blk_lpos will point to the
-+ * ID before the wrap and the next data block will be at the logical
-+ * position adjacent the full data block after the wrap.
-+ *
-+ * Data rings have a @tail_lpos that points to the beginning of the oldest
-+ * data block and a @head_lpos that points to the logical position of the
-+ * next (not yet existing) data block.
-+ *
-+ * When a new data block should be created (and the ring is full), tail data
-+ * blocks will first be invalidated by putting their associated descriptors
-+ * into the reusable state and then pushing the @tail_lpos forward beyond
-+ * them. Then the @head_lpos is pushed forward and is associated with a new
-+ * descriptor. If a data block is not valid, the @tail_lpos cannot be
-+ * advanced beyond it.
-+ *
-+ * Info Array
-+ * ~~~~~~~~~~
-+ * The general meta data of printk records are stored in printk_info structs,
-+ * stored in an array with the same number of elements as the descriptor ring.
-+ * Each info corresponds to the descriptor of the same index in the
-+ * descriptor ring. Info validity is confirmed by evaluating the corresponding
-+ * descriptor before and after loading the info.
-+ *
-+ * Usage
-+ * -----
-+ * Here are some simple examples demonstrating writers and readers. For the
-+ * examples a global ringbuffer (test_rb) is available (which is not the
-+ * actual ringbuffer used by printk)::
-+ *
-+ *	DEFINE_PRINTKRB(test_rb, 15, 5);
-+ *
-+ * This ringbuffer allows up to 32768 records (2 ^ 15) and has a size of
-+ * 1 MiB (2 ^ (15 + 5)) for text data.
-+ *
-+ * Sample writer code::
-+ *
-+ *	const char *textstr = "message text";
-+ *	struct prb_reserved_entry e;
-+ *	struct printk_record r;
-+ *
-+ *	// specify how much to allocate
-+ *	prb_rec_init_wr(&r, strlen(textstr) + 1);
-+ *
-+ *	if (prb_reserve(&e, &test_rb, &r)) {
-+ *		snprintf(r.text_buf, r.text_buf_size, "%s", textstr);
-+ *
-+ *		r.info->text_len = strlen(textstr);
-+ *		r.info->ts_nsec = local_clock();
-+ *		r.info->caller_id = printk_caller_id();
-+ *
-+ *		// commit and finalize the record
-+ *		prb_final_commit(&e);
-+ *	}
-+ *
-+ * Note that additional writer functions are available to extend a record
-+ * after it has been committed but not yet finalized. This can be done as
-+ * long as no new records have been reserved and the caller is the same.
-+ *
-+ * Sample writer code (record extending)::
-+ *
-+ *		// alternate rest of previous example
-+ *
-+ *		r.info->text_len = strlen(textstr);
-+ *		r.info->ts_nsec = local_clock();
-+ *		r.info->caller_id = printk_caller_id();
-+ *
-+ *		// commit the record (but do not finalize yet)
-+ *		prb_commit(&e);
-+ *	}
-+ *
-+ *	...
-+ *
-+ *	// specify additional 5 bytes text space to extend
-+ *	prb_rec_init_wr(&r, 5);
-+ *
-+ *	// try to extend, but only if it does not exceed 32 bytes
-+ *	if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id()), 32) {
-+ *		snprintf(&r.text_buf[r.info->text_len],
-+ *			 r.text_buf_size - r.info->text_len, "hello");
-+ *
-+ *		r.info->text_len += 5;
-+ *
-+ *		// commit and finalize the record
-+ *		prb_final_commit(&e);
-+ *	}
-+ *
-+ * Sample reader code::
-+ *
-+ *	struct printk_info info;
-+ *	struct printk_record r;
-+ *	char text_buf[32];
-+ *	u64 seq;
-+ *
-+ *	prb_rec_init_rd(&r, &info, &text_buf[0], sizeof(text_buf));
-+ *
-+ *	prb_for_each_record(0, &test_rb, &seq, &r) {
-+ *		if (info.seq != seq)
-+ *			pr_warn("lost %llu records\n", info.seq - seq);
-+ *
-+ *		if (info.text_len > r.text_buf_size) {
-+ *			pr_warn("record %llu text truncated\n", info.seq);
-+ *			text_buf[r.text_buf_size - 1] = 0;
-+ *		}
-+ *
-+ *		pr_info("%llu: %llu: %s\n", info.seq, info.ts_nsec,
-+ *			&text_buf[0]);
-+ *	}
-+ *
-+ * Note that additional less convenient reader functions are available to
-+ * allow complex record access.
-+ *
-+ * ABA Issues
-+ * ~~~~~~~~~~
-+ * To help avoid ABA issues, descriptors are referenced by IDs (array index
-+ * values combined with tagged bits counting array wraps) and data blocks are
-+ * referenced by logical positions (array index values combined with tagged
-+ * bits counting array wraps). However, on 32-bit systems the number of
-+ * tagged bits is relatively small such that an ABA incident is (at least
-+ * theoretically) possible. For example, if 4 million maximally sized (1KiB)
-+ * printk messages were to occur in NMI context on a 32-bit system, the
-+ * interrupted context would not be able to recognize that the 32-bit integer
-+ * completely wrapped and thus represents a different data block than the one
-+ * the interrupted context expects.
-+ *
-+ * To help combat this possibility, additional state checking is performed
-+ * (such as using cmpxchg() even though set() would suffice). These extra
-+ * checks are commented as such and will hopefully catch any ABA issue that
-+ * a 32-bit system might experience.
-+ *
-+ * Memory Barriers
-+ * ~~~~~~~~~~~~~~~
-+ * Multiple memory barriers are used. To simplify proving correctness and
-+ * generating litmus tests, lines of code related to memory barriers
-+ * (loads, stores, and the associated memory barriers) are labeled::
-+ *
-+ *	LMM(function:letter)
-+ *
-+ * Comments reference the labels using only the "function:letter" part.
-+ *
-+ * The memory barrier pairs and their ordering are:
-+ *
-+ *   desc_reserve:D / desc_reserve:B
-+ *     push descriptor tail (id), then push descriptor head (id)
-+ *
-+ *   desc_reserve:D / data_push_tail:B
-+ *     push data tail (lpos), then set new descriptor reserved (state)
-+ *
-+ *   desc_reserve:D / desc_push_tail:C
-+ *     push descriptor tail (id), then set new descriptor reserved (state)
-+ *
-+ *   desc_reserve:D / prb_first_seq:C
-+ *     push descriptor tail (id), then set new descriptor reserved (state)
-+ *
-+ *   desc_reserve:F / desc_read:D
-+ *     set new descriptor id and reserved (state), then allow writer changes
-+ *
-+ *   data_alloc:A (or data_realloc:A) / desc_read:D
-+ *     set old descriptor reusable (state), then modify new data block area
-+ *
-+ *   data_alloc:A (or data_realloc:A) / data_push_tail:B
-+ *     push data tail (lpos), then modify new data block area
-+ *
-+ *   _prb_commit:B / desc_read:B
-+ *     store writer changes, then set new descriptor committed (state)
-+ *
-+ *   desc_reopen_last:A / _prb_commit:B
-+ *     set descriptor reserved (state), then read descriptor data
-+ *
-+ *   _prb_commit:B / desc_reserve:D
-+ *     set new descriptor committed (state), then check descriptor head (id)
++ * pr_flush() - Wait for printing threads to catch up.
 + *
-+ *   data_push_tail:D / data_push_tail:A
-+ *     set descriptor reusable (state), then push data tail (lpos)
++ * @timeout_ms:        The maximum time (in ms) to wait.
++ * @reset_on_progress: Reset the timeout if forward progress is seen.
 + *
-+ *   desc_push_tail:B / desc_reserve:D
-+ *     set descriptor reusable (state), then push descriptor tail (id)
-+ */
-+
-+#define DATA_SIZE(data_ring)		_DATA_SIZE((data_ring)->size_bits)
-+#define DATA_SIZE_MASK(data_ring)	(DATA_SIZE(data_ring) - 1)
-+
-+#define DESCS_COUNT(desc_ring)		_DESCS_COUNT((desc_ring)->count_bits)
-+#define DESCS_COUNT_MASK(desc_ring)	(DESCS_COUNT(desc_ring) - 1)
-+
-+/* Determine the data array index from a logical position. */
-+#define DATA_INDEX(data_ring, lpos)	((lpos) & DATA_SIZE_MASK(data_ring))
-+
-+/* Determine the desc array index from an ID or sequence number. */
-+#define DESC_INDEX(desc_ring, n)	((n) & DESCS_COUNT_MASK(desc_ring))
-+
-+/* Determine how many times the data array has wrapped. */
-+#define DATA_WRAPS(data_ring, lpos)	((lpos) >> (data_ring)->size_bits)
-+
-+/* Determine if a logical position refers to a data-less block. */
-+#define LPOS_DATALESS(lpos)		((lpos) & 1UL)
-+#define BLK_DATALESS(blk)		(LPOS_DATALESS((blk)->begin) && \
-+					 LPOS_DATALESS((blk)->next))
-+
-+/* Get the logical position at index 0 of the current wrap. */
-+#define DATA_THIS_WRAP_START_LPOS(data_ring, lpos) \
-+((lpos) & ~DATA_SIZE_MASK(data_ring))
-+
-+/* Get the ID for the same index of the previous wrap as the given ID. */
-+#define DESC_ID_PREV_WRAP(desc_ring, id) \
-+DESC_ID((id) - DESCS_COUNT(desc_ring))
-+
-+/*
-+ * A data block: mapped directly to the beginning of the data block area
-+ * specified as a logical position within the data ring.
++ * A value of 0 for @timeout_ms means no waiting will occur. A value of -1
++ * represents infinite waiting.
 + *
-+ * @id:   the ID of the associated descriptor
-+ * @data: the writer data
++ * If @reset_on_progress is true, the timeout will be reset whenever any
++ * printer has been seen to make some forward progress.
 + *
-+ * Note that the size of a data block is only known by its associated
-+ * descriptor.
-+ */
-+struct prb_data_block {
-+	unsigned long	id;
-+	char		data[0];
-+};
-+
-+/*
-+ * Return the descriptor associated with @n. @n can be either a
-+ * descriptor ID or a sequence number.
-+ */
-+static struct prb_desc *to_desc(struct prb_desc_ring *desc_ring, u64 n)
-+{
-+	return &desc_ring->descs[DESC_INDEX(desc_ring, n)];
-+}
-+
-+/*
-+ * Return the printk_info associated with @n. @n can be either a
-+ * descriptor ID or a sequence number.
-+ */
-+static struct printk_info *to_info(struct prb_desc_ring *desc_ring, u64 n)
-+{
-+	return &desc_ring->infos[DESC_INDEX(desc_ring, n)];
-+}
-+
-+static struct prb_data_block *to_block(struct prb_data_ring *data_ring,
-+				       unsigned long begin_lpos)
-+{
-+	return (void *)&data_ring->data[DATA_INDEX(data_ring, begin_lpos)];
-+}
-+
-+/*
-+ * Increase the data size to account for data block meta data plus any
-+ * padding so that the adjacent data block is aligned on the ID size.
++ * Context: Any context.
++ * Return: true if all enabled printers are caught up.
 + */
-+static unsigned int to_blk_size(unsigned int size)
++bool pr_flush(int timeout_ms, bool reset_on_progress)
 +{
-+	struct prb_data_block *db = NULL;
++	int remaining = timeout_ms;
++	struct console *con;
++	u64 last_diff = 0;
++	bool may_sleep;
++	u64 printk_seq;
++	u64 diff;
++	u64 seq;
 +
-+	size += sizeof(*db);
-+	size = ALIGN(size, sizeof(db->id));
-+	return size;
-+}
++	may_sleep = (preemptible() && !in_softirq());
 +
-+/*
-+ * Sanity checker for reserve size. The ringbuffer code assumes that a data
-+ * block does not exceed the maximum possible size that could fit within the
-+ * ringbuffer. This function provides that basic size check so that the
-+ * assumption is safe.
-+ */
-+static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size)
-+{
-+	struct prb_data_block *db = NULL;
++	seq = prb_next_seq(prb);
 +
-+	if (size == 0)
-+		return true;
++	for (;;) {
++		diff = 0;
 +
-+	/*
-+	 * Ensure the alignment padded size could possibly fit in the data
-+	 * array. The largest possible data block must still leave room for
-+	 * at least the ID of the next block.
-+	 */
-+	size = to_blk_size(size);
-+	if (size > DATA_SIZE(data_ring) - sizeof(db->id))
-+		return false;
++		for_each_console(con) {
++			if (!(con->flags & CON_ENABLED))
++				continue;
++			printk_seq = atomic64_read(&con->printk_seq);
++			if (printk_seq < seq)
++				diff += seq - printk_seq;
++		}
 +
-+	return true;
-+}
++		if (diff != last_diff && reset_on_progress)
++			remaining = timeout_ms;
 +
-+/* Query the state of a descriptor. */
-+static enum desc_state get_desc_state(unsigned long id,
-+				      unsigned long state_val)
-+{
-+	if (id != DESC_ID(state_val))
-+		return desc_miss;
++		if (!diff || remaining == 0)
++			break;
 +
-+	return DESC_STATE(state_val);
-+}
++		if (remaining < 0) {
++			pr_msleep(may_sleep, 100);
++		} else if (remaining < 100) {
++			pr_msleep(may_sleep, remaining);
++			remaining = 0;
++		} else {
++			pr_msleep(may_sleep, 100);
++			remaining -= 100;
++		}
 +
-+/*
-+ * Get a copy of a specified descriptor and return its queried state. If the
-+ * descriptor is in an inconsistent state (miss or reserved), the caller can
-+ * only expect the descriptor's @state_var field to be valid.
-+ *
-+ * The sequence number and caller_id can be optionally retrieved. Like all
-+ * non-state_var data, they are only valid if the descriptor is in a
-+ * consistent state.
-+ */
-+static enum desc_state desc_read(struct prb_desc_ring *desc_ring,
-+				 unsigned long id, struct prb_desc *desc_out,
-+				 u64 *seq_out, u32 *caller_id_out)
-+{
-+	struct printk_info *info = to_info(desc_ring, id);
-+	struct prb_desc *desc = to_desc(desc_ring, id);
-+	atomic_long_t *state_var = &desc->state_var;
-+	enum desc_state d_state;
-+	unsigned long state_val;
-+
-+	/* Check the descriptor state. */
-+	state_val = atomic_long_read(state_var); /* LMM(desc_read:A) */
-+	d_state = get_desc_state(id, state_val);
-+	if (d_state == desc_miss || d_state == desc_reserved) {
-+		/*
-+		 * The descriptor is in an inconsistent state. Set at least
-+		 * @state_var so that the caller can see the details of
-+		 * the inconsistent state.
-+		 */
-+		goto out;
++		last_diff = diff;
 +	}
 +
-+	/*
-+	 * Guarantee the state is loaded before copying the descriptor
-+	 * content. This avoids copying obsolete descriptor content that might
-+	 * not apply to the descriptor state. This pairs with _prb_commit:B.
-+	 *
-+	 * Memory barrier involvement:
-+	 *
-+	 * If desc_read:A reads from _prb_commit:B, then desc_read:C reads
-+	 * from _prb_commit:A.
-+	 *
-+	 * Relies on:
-+	 *
-+	 * WMB from _prb_commit:A to _prb_commit:B
-+	 *    matching
-+	 * RMB from desc_read:A to desc_read:C
-+	 */
-+	smp_rmb(); /* LMM(desc_read:B) */
-+
-+	/*
-+	 * Copy the descriptor data. The data is not valid until the
-+	 * state has been re-checked. A memcpy() for all of @desc
-+	 * cannot be used because of the atomic_t @state_var field.
-+	 */
-+	memcpy(&desc_out->text_blk_lpos, &desc->text_blk_lpos,
-+	       sizeof(desc_out->text_blk_lpos)); /* LMM(desc_read:C) */
-+	if (seq_out)
-+		*seq_out = info->seq; /* also part of desc_read:C */
-+	if (caller_id_out)
-+		*caller_id_out = info->caller_id; /* also part of desc_read:C */
-+
-+	/*
-+	 * 1. Guarantee the descriptor content is loaded before re-checking
-+	 *    the state. This avoids reading an obsolete descriptor state
-+	 *    that may not apply to the copied content. This pairs with
-+	 *    desc_reserve:F.
-+	 *
-+	 *    Memory barrier involvement:
-+	 *
-+	 *    If desc_read:C reads from desc_reserve:G, then desc_read:E
-+	 *    reads from desc_reserve:F.
-+	 *
-+	 *    Relies on:
-+	 *
-+	 *    WMB from desc_reserve:F to desc_reserve:G
-+	 *       matching
-+	 *    RMB from desc_read:C to desc_read:E
-+	 *
-+	 * 2. Guarantee the record data is loaded before re-checking the
-+	 *    state. This avoids reading an obsolete descriptor state that may
-+	 *    not apply to the copied data. This pairs with data_alloc:A and
-+	 *    data_realloc:A.
-+	 *
-+	 *    Memory barrier involvement:
-+	 *
-+	 *    If copy_data:A reads from data_alloc:B, then desc_read:E
-+	 *    reads from desc_make_reusable:A.
-+	 *
-+	 *    Relies on:
-+	 *
-+	 *    MB from desc_make_reusable:A to data_alloc:B
-+	 *       matching
-+	 *    RMB from desc_read:C to desc_read:E
-+	 *
-+	 *    Note: desc_make_reusable:A and data_alloc:B can be different
-+	 *          CPUs. However, the data_alloc:B CPU (which performs the
-+	 *          full memory barrier) must have previously seen
-+	 *          desc_make_reusable:A.
-+	 */
-+	smp_rmb(); /* LMM(desc_read:D) */
-+
-+	/*
-+	 * The data has been copied. Return the current descriptor state,
-+	 * which may have changed since the load above.
-+	 */
-+	state_val = atomic_long_read(state_var); /* LMM(desc_read:E) */
-+	d_state = get_desc_state(id, state_val);
-+out:
-+	atomic_long_set(&desc_out->state_var, state_val);
-+	return d_state;
-+}
-+
-+/*
-+ * Take a specified descriptor out of the finalized state by attempting
-+ * the transition from finalized to reusable. Either this context or some
-+ * other context will have been successful.
-+ */
-+static void desc_make_reusable(struct prb_desc_ring *desc_ring,
-+			       unsigned long id)
-+{
-+	unsigned long val_finalized = DESC_SV(id, desc_finalized);
-+	unsigned long val_reusable = DESC_SV(id, desc_reusable);
-+	struct prb_desc *desc = to_desc(desc_ring, id);
-+	atomic_long_t *state_var = &desc->state_var;
-+
-+	atomic_long_cmpxchg_relaxed(state_var, val_finalized,
-+				    val_reusable); /* LMM(desc_make_reusable:A) */
++	return (diff == 0);
 +}
-+
-+/*
-+ * Given the text data ring, put the associated descriptor of each
-+ * data block from @lpos_begin until @lpos_end into the reusable state.
-+ *
-+ * If there is any problem making the associated descriptor reusable, either
-+ * the descriptor has not yet been finalized or another writer context has
-+ * already pushed the tail lpos past the problematic data block. Regardless,
-+ * on error the caller can re-load the tail lpos to determine the situation.
-+ */
-+static bool data_make_reusable(struct printk_ringbuffer *rb,
-+			       struct prb_data_ring *data_ring,
-+			       unsigned long lpos_begin,
-+			       unsigned long lpos_end,
-+			       unsigned long *lpos_out)
-+{
-+	struct prb_desc_ring *desc_ring = &rb->desc_ring;
-+	struct prb_data_block *blk;
-+	enum desc_state d_state;
-+	struct prb_desc desc;
-+	struct prb_data_blk_lpos *blk_lpos = &desc.text_blk_lpos;
-+	unsigned long id;
-+
-+	/* Loop until @lpos_begin has advanced to or beyond @lpos_end. */
-+	while ((lpos_end - lpos_begin) - 1 < DATA_SIZE(data_ring)) {
-+		blk = to_block(data_ring, lpos_begin);
-+
-+		/*
-+		 * Load the block ID from the data block. This is a data race
-+		 * against a writer that may have newly reserved this data
-+		 * area. If the loaded value matches a valid descriptor ID,
-+		 * the blk_lpos of that descriptor will be checked to make
-+		 * sure it points back to this data block. If the check fails,
-+		 * the data area has been recycled by another writer.
-+		 */
-+		id = blk->id; /* LMM(data_make_reusable:A) */
-+
-+		d_state = desc_read(desc_ring, id, &desc,
-+				    NULL, NULL); /* LMM(data_make_reusable:B) */
-+
-+		switch (d_state) {
-+		case desc_miss:
-+		case desc_reserved:
-+		case desc_committed:
-+			return false;
-+		case desc_finalized:
-+			/*
-+			 * This data block is invalid if the descriptor
-+			 * does not point back to it.
-+			 */
-+			if (blk_lpos->begin != lpos_begin)
-+				return false;
-+			desc_make_reusable(desc_ring, id);
-+			break;
-+		case desc_reusable:
-+			/*
-+			 * This data block is invalid if the descriptor
-+			 * does not point back to it.
-+			 */
-+			if (blk_lpos->begin != lpos_begin)
-+				return false;
-+			break;
-+		}
-+
-+		/* Advance @lpos_begin to the next data block. */
-+		lpos_begin = blk_lpos->next;
-+	}
-+
-+	*lpos_out = lpos_begin;
-+	return true;
-+}
-+
-+/*
-+ * Advance the data ring tail to at least @lpos. This function puts
-+ * descriptors into the reusable state if the tail is pushed beyond
-+ * their associated data block.
-+ */
-+static bool data_push_tail(struct printk_ringbuffer *rb,
-+			   struct prb_data_ring *data_ring,
-+			   unsigned long lpos)
-+{
-+	unsigned long tail_lpos_new;
-+	unsigned long tail_lpos;
-+	unsigned long next_lpos;
-+
-+	/* If @lpos is from a data-less block, there is nothing to do. */
-+	if (LPOS_DATALESS(lpos))
-+		return true;
-+
-+	/*
-+	 * Any descriptor states that have transitioned to reusable due to the
-+	 * data tail being pushed to this loaded value will be visible to this
-+	 * CPU. This pairs with data_push_tail:D.
-+	 *
-+	 * Memory barrier involvement:
-+	 *
-+	 * If data_push_tail:A reads from data_push_tail:D, then this CPU can
-+	 * see desc_make_reusable:A.
-+	 *
-+	 * Relies on:
-+	 *
-+	 * MB from desc_make_reusable:A to data_push_tail:D
-+	 *    matches
-+	 * READFROM from data_push_tail:D to data_push_tail:A
-+	 *    thus
-+	 * READFROM from desc_make_reusable:A to this CPU
-+	 */
-+	tail_lpos = atomic_long_read(&data_ring->tail_lpos); /* LMM(data_push_tail:A) */
-+
-+	/*
-+	 * Loop until the tail lpos is at or beyond @lpos. This condition
-+	 * may already be satisfied, resulting in no full memory barrier
-+	 * from data_push_tail:D being performed. However, since this CPU
-+	 * sees the new tail lpos, any descriptor states that transitioned to
-+	 * the reusable state must already be visible.
-+	 */
-+	while ((lpos - tail_lpos) - 1 < DATA_SIZE(data_ring)) {
-+		/*
-+		 * Make all descriptors reusable that are associated with
-+		 * data blocks before @lpos.
-+		 */
-+		if (!data_make_reusable(rb, data_ring, tail_lpos, lpos,
-+					&next_lpos)) {
-+			/*
-+			 * 1. Guarantee the block ID loaded in
-+			 *    data_make_reusable() is performed before
-+			 *    reloading the tail lpos. The failed
-+			 *    data_make_reusable() may be due to a newly
-+			 *    recycled data area causing the tail lpos to
-+			 *    have been previously pushed. This pairs with
-+			 *    data_alloc:A and data_realloc:A.
-+			 *
-+			 *    Memory barrier involvement:
-+			 *
-+			 *    If data_make_reusable:A reads from data_alloc:B,
-+			 *    then data_push_tail:C reads from
-+			 *    data_push_tail:D.
-+			 *
-+			 *    Relies on:
-+			 *
-+			 *    MB from data_push_tail:D to data_alloc:B
-+			 *       matching
-+			 *    RMB from data_make_reusable:A to
-+			 *    data_push_tail:C
-+			 *
-+			 *    Note: data_push_tail:D and data_alloc:B can be
-+			 *          different CPUs. However, the data_alloc:B
-+			 *          CPU (which performs the full memory
-+			 *          barrier) must have previously seen
-+			 *          data_push_tail:D.
-+			 *
-+			 * 2. Guarantee the descriptor state loaded in
-+			 *    data_make_reusable() is performed before
-+			 *    reloading the tail lpos. The failed
-+			 *    data_make_reusable() may be due to a newly
-+			 *    recycled descriptor causing the tail lpos to
-+			 *    have been previously pushed. This pairs with
-+			 *    desc_reserve:D.
-+			 *
-+			 *    Memory barrier involvement:
-+			 *
-+			 *    If data_make_reusable:B reads from
-+			 *    desc_reserve:F, then data_push_tail:C reads
-+			 *    from data_push_tail:D.
-+			 *
-+			 *    Relies on:
-+			 *
-+			 *    MB from data_push_tail:D to desc_reserve:F
-+			 *       matching
-+			 *    RMB from data_make_reusable:B to
-+			 *    data_push_tail:C
-+			 *
-+			 *    Note: data_push_tail:D and desc_reserve:F can
-+			 *          be different CPUs. However, the
-+			 *          desc_reserve:F CPU (which performs the
-+			 *          full memory barrier) must have previously
-+			 *          seen data_push_tail:D.
-+			 */
-+			smp_rmb(); /* LMM(data_push_tail:B) */
-+
-+			tail_lpos_new = atomic_long_read(&data_ring->tail_lpos
-+							); /* LMM(data_push_tail:C) */
-+			if (tail_lpos_new == tail_lpos)
-+				return false;
-+
-+			/* Another CPU pushed the tail. Try again. */
-+			tail_lpos = tail_lpos_new;
-+			continue;
-+		}
-+
-+		/*
-+		 * Guarantee any descriptor states that have transitioned to
-+		 * reusable are stored before pushing the tail lpos. A full
-+		 * memory barrier is needed since other CPUs may have made
-+		 * the descriptor states reusable. This pairs with
-+		 * data_push_tail:A.
-+		 */
-+		if (atomic_long_try_cmpxchg(&data_ring->tail_lpos, &tail_lpos,
-+					    next_lpos)) { /* LMM(data_push_tail:D) */
-+			break;
-+		}
-+	}
-+
-+	return true;
-+}
-+
-+/*
-+ * Advance the desc ring tail. This function advances the tail by one
-+ * descriptor, thus invalidating the oldest descriptor. Before advancing
-+ * the tail, the tail descriptor is made reusable and all data blocks up to
-+ * and including the descriptor's data block are invalidated (i.e. the data
-+ * ring tail is pushed past the data block of the descriptor being made
-+ * reusable).
-+ */
-+static bool desc_push_tail(struct printk_ringbuffer *rb,
-+			   unsigned long tail_id)
-+{
-+	struct prb_desc_ring *desc_ring = &rb->desc_ring;
-+	enum desc_state d_state;
-+	struct prb_desc desc;
-+
-+	d_state = desc_read(desc_ring, tail_id, &desc, NULL, NULL);
-+
-+	switch (d_state) {
-+	case desc_miss:
-+		/*
-+		 * If the ID is exactly 1 wrap behind the expected, it is
-+		 * in the process of being reserved by another writer and
-+		 * must be considered reserved.
-+		 */
-+		if (DESC_ID(atomic_long_read(&desc.state_var)) ==
-+		    DESC_ID_PREV_WRAP(desc_ring, tail_id)) {
-+			return false;
-+		}
-+
-+		/*
-+		 * The ID has changed. Another writer must have pushed the
-+		 * tail and recycled the descriptor already. Success is
-+		 * returned because the caller is only interested in the
-+		 * specified tail being pushed, which it was.
-+		 */
-+		return true;
-+	case desc_reserved:
-+	case desc_committed:
-+		return false;
-+	case desc_finalized:
-+		desc_make_reusable(desc_ring, tail_id);
-+		break;
-+	case desc_reusable:
-+		break;
-+	}
-+
-+	/*
-+	 * Data blocks must be invalidated before their associated
-+	 * descriptor can be made available for recycling. Invalidating
-+	 * them later is not possible because there is no way to trust
-+	 * data blocks once their associated descriptor is gone.
-+	 */
-+
-+	if (!data_push_tail(rb, &rb->text_data_ring, desc.text_blk_lpos.next))
-+		return false;
-+
-+	/*
-+	 * Check the next descriptor after @tail_id before pushing the tail
-+	 * to it because the tail must always be in a finalized or reusable
-+	 * state. The implementation of prb_first_seq() relies on this.
-+	 *
-+	 * A successful read implies that the next descriptor is less than or
-+	 * equal to @head_id so there is no risk of pushing the tail past the
-+	 * head.
-+	 */
-+	d_state = desc_read(desc_ring, DESC_ID(tail_id + 1), &desc,
-+			    NULL, NULL); /* LMM(desc_push_tail:A) */
-+
-+	if (d_state == desc_finalized || d_state == desc_reusable) {
-+		/*
-+		 * Guarantee any descriptor states that have transitioned to
-+		 * reusable are stored before pushing the tail ID. This allows
-+		 * verifying the recycled descriptor state. A full memory
-+		 * barrier is needed since other CPUs may have made the
-+		 * descriptor states reusable. This pairs with desc_reserve:D.
-+		 */
-+		atomic_long_cmpxchg(&desc_ring->tail_id, tail_id,
-+				    DESC_ID(tail_id + 1)); /* LMM(desc_push_tail:B) */
-+	} else {
-+		/*
-+		 * Guarantee the last state load from desc_read() is before
-+		 * reloading @tail_id in order to see a new tail ID in the
-+		 * case that the descriptor has been recycled. This pairs
-+		 * with desc_reserve:D.
-+		 *
-+		 * Memory barrier involvement:
-+		 *
-+		 * If desc_push_tail:A reads from desc_reserve:F, then
-+		 * desc_push_tail:D reads from desc_push_tail:B.
-+		 *
-+		 * Relies on:
-+		 *
-+		 * MB from desc_push_tail:B to desc_reserve:F
-+		 *    matching
-+		 * RMB from desc_push_tail:A to desc_push_tail:D
-+		 *
-+		 * Note: desc_push_tail:B and desc_reserve:F can be different
-+		 *       CPUs. However, the desc_reserve:F CPU (which performs
-+		 *       the full memory barrier) must have previously seen
-+		 *       desc_push_tail:B.
-+		 */
-+		smp_rmb(); /* LMM(desc_push_tail:C) */
-+
-+		/*
-+		 * Re-check the tail ID. The descriptor following @tail_id is
-+		 * not in an allowed tail state. But if the tail has since
-+		 * been moved by another CPU, then it does not matter.
-+		 */
-+		if (atomic_long_read(&desc_ring->tail_id) == tail_id) /* LMM(desc_push_tail:D) */
-+			return false;
-+	}
-+
-+	return true;
-+}
-+
-+/* Reserve a new descriptor, invalidating the oldest if necessary. */
-+static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out)
-+{
-+	struct prb_desc_ring *desc_ring = &rb->desc_ring;
-+	unsigned long prev_state_val;
-+	unsigned long id_prev_wrap;
-+	struct prb_desc *desc;
-+	unsigned long head_id;
-+	unsigned long id;
-+
-+	head_id = atomic_long_read(&desc_ring->head_id); /* LMM(desc_reserve:A) */
-+
-+	do {
-+		desc = to_desc(desc_ring, head_id);
-+
-+		id = DESC_ID(head_id + 1);
-+		id_prev_wrap = DESC_ID_PREV_WRAP(desc_ring, id);
-+
-+		/*
-+		 * Guarantee the head ID is read before reading the tail ID.
-+		 * Since the tail ID is updated before the head ID, this
-+		 * guarantees that @id_prev_wrap is never ahead of the tail
-+		 * ID. This pairs with desc_reserve:D.
-+		 *
-+		 * Memory barrier involvement:
-+		 *
-+		 * If desc_reserve:A reads from desc_reserve:D, then
-+		 * desc_reserve:C reads from desc_push_tail:B.
-+		 *
-+		 * Relies on:
-+		 *
-+		 * MB from desc_push_tail:B to desc_reserve:D
-+		 *    matching
-+		 * RMB from desc_reserve:A to desc_reserve:C
-+		 *
-+		 * Note: desc_push_tail:B and desc_reserve:D can be different
-+		 *       CPUs. However, the desc_reserve:D CPU (which performs
-+		 *       the full memory barrier) must have previously seen
-+		 *       desc_push_tail:B.
-+		 */
-+		smp_rmb(); /* LMM(desc_reserve:B) */
-+
-+		if (id_prev_wrap == atomic_long_read(&desc_ring->tail_id
-+						    )) { /* LMM(desc_reserve:C) */
-+			/*
-+			 * Make space for the new descriptor by
-+			 * advancing the tail.
-+			 */
-+			if (!desc_push_tail(rb, id_prev_wrap))
-+				return false;
-+		}
-+
-+		/*
-+		 * 1. Guarantee the tail ID is read before validating the
-+		 *    recycled descriptor state. A read memory barrier is
-+		 *    sufficient for this. This pairs with desc_push_tail:B.
-+		 *
-+		 *    Memory barrier involvement:
-+		 *
-+		 *    If desc_reserve:C reads from desc_push_tail:B, then
-+		 *    desc_reserve:E reads from desc_make_reusable:A.
-+		 *
-+		 *    Relies on:
-+		 *
-+		 *    MB from desc_make_reusable:A to desc_push_tail:B
-+		 *       matching
-+		 *    RMB from desc_reserve:C to desc_reserve:E
-+		 *
-+		 *    Note: desc_make_reusable:A and desc_push_tail:B can be
-+		 *          different CPUs. However, the desc_push_tail:B CPU
-+		 *          (which performs the full memory barrier) must have
-+		 *          previously seen desc_make_reusable:A.
-+		 *
-+		 * 2. Guarantee the tail ID is stored before storing the head
-+		 *    ID. This pairs with desc_reserve:B.
-+		 *
-+		 * 3. Guarantee any data ring tail changes are stored before
-+		 *    recycling the descriptor. Data ring tail changes can
-+		 *    happen via desc_push_tail()->data_push_tail(). A full
-+		 *    memory barrier is needed since another CPU may have
-+		 *    pushed the data ring tails. This pairs with
-+		 *    data_push_tail:B.
-+		 *
-+		 * 4. Guarantee a new tail ID is stored before recycling the
-+		 *    descriptor. A full memory barrier is needed since
-+		 *    another CPU may have pushed the tail ID. This pairs
-+		 *    with desc_push_tail:C and this also pairs with
-+		 *    prb_first_seq:C.
-+		 *
-+		 * 5. Guarantee the head ID is stored before trying to
-+		 *    finalize the previous descriptor. This pairs with
-+		 *    _prb_commit:B.
-+		 */
-+	} while (!atomic_long_try_cmpxchg(&desc_ring->head_id, &head_id,
-+					  id)); /* LMM(desc_reserve:D) */
-+
-+	desc = to_desc(desc_ring, id);
-+
-+	/*
-+	 * If the descriptor has been recycled, verify the old state val.
-+	 * See "ABA Issues" about why this verification is performed.
-+	 */
-+	prev_state_val = atomic_long_read(&desc->state_var); /* LMM(desc_reserve:E) */
-+	if (prev_state_val &&
-+	    get_desc_state(id_prev_wrap, prev_state_val) != desc_reusable) {
-+		WARN_ON_ONCE(1);
-+		return false;
-+	}
-+
-+	/*
-+	 * Assign the descriptor a new ID and set its state to reserved.
-+	 * See "ABA Issues" about why cmpxchg() instead of set() is used.
-+	 *
-+	 * Guarantee the new descriptor ID and state is stored before making
-+	 * any other changes. A write memory barrier is sufficient for this.
-+	 * This pairs with desc_read:D.
-+	 */
-+	if (!atomic_long_try_cmpxchg(&desc->state_var, &prev_state_val,
-+			DESC_SV(id, desc_reserved))) { /* LMM(desc_reserve:F) */
-+		WARN_ON_ONCE(1);
-+		return false;
-+	}
-+
-+	/* Now data in @desc can be modified: LMM(desc_reserve:G) */
-+
-+	*id_out = id;
-+	return true;
-+}
-+
-+/* Determine the end of a data block. */
-+static unsigned long get_next_lpos(struct prb_data_ring *data_ring,
-+				   unsigned long lpos, unsigned int size)
-+{
-+	unsigned long begin_lpos;
-+	unsigned long next_lpos;
-+
-+	begin_lpos = lpos;
-+	next_lpos = lpos + size;
-+
-+	/* First check if the data block does not wrap. */
-+	if (DATA_WRAPS(data_ring, begin_lpos) == DATA_WRAPS(data_ring, next_lpos))
-+		return next_lpos;
-+
-+	/* Wrapping data blocks store their data at the beginning. */
-+	return (DATA_THIS_WRAP_START_LPOS(data_ring, next_lpos) + size);
-+}
-+
-+/*
-+ * Allocate a new data block, invalidating the oldest data block(s)
-+ * if necessary. This function also associates the data block with
-+ * a specified descriptor.
-+ */
-+static char *data_alloc(struct printk_ringbuffer *rb,
-+			struct prb_data_ring *data_ring, unsigned int size,
-+			struct prb_data_blk_lpos *blk_lpos, unsigned long id)
-+{
-+	struct prb_data_block *blk;
-+	unsigned long begin_lpos;
-+	unsigned long next_lpos;
-+
-+	if (size == 0) {
-+		/* Specify a data-less block. */
-+		blk_lpos->begin = NO_LPOS;
-+		blk_lpos->next = NO_LPOS;
-+		return NULL;
-+	}
-+
-+	size = to_blk_size(size);
-+
-+	begin_lpos = atomic_long_read(&data_ring->head_lpos);
-+
-+	do {
-+		next_lpos = get_next_lpos(data_ring, begin_lpos, size);
-+
-+		if (!data_push_tail(rb, data_ring, next_lpos - DATA_SIZE(data_ring))) {
-+			/* Failed to allocate, specify a data-less block. */
-+			blk_lpos->begin = FAILED_LPOS;
-+			blk_lpos->next = FAILED_LPOS;
-+			return NULL;
-+		}
-+
-+		/*
-+		 * 1. Guarantee any descriptor states that have transitioned
-+		 *    to reusable are stored before modifying the newly
-+		 *    allocated data area. A full memory barrier is needed
-+		 *    since other CPUs may have made the descriptor states
-+		 *    reusable. See data_push_tail:A about why the reusable
-+		 *    states are visible. This pairs with desc_read:D.
-+		 *
-+		 * 2. Guarantee any updated tail lpos is stored before
-+		 *    modifying the newly allocated data area. Another CPU may
-+		 *    be in data_make_reusable() and is reading a block ID
-+		 *    from this area. data_make_reusable() can handle reading
-+		 *    a garbage block ID value, but then it must be able to
-+		 *    load a new tail lpos. A full memory barrier is needed
-+		 *    since other CPUs may have updated the tail lpos. This
-+		 *    pairs with data_push_tail:B.
-+		 */
-+	} while (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &begin_lpos,
-+					  next_lpos)); /* LMM(data_alloc:A) */
-+
-+	blk = to_block(data_ring, begin_lpos);
-+	blk->id = id; /* LMM(data_alloc:B) */
-+
-+	if (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos)) {
-+		/* Wrapping data blocks store their data at the beginning. */
-+		blk = to_block(data_ring, 0);
-+
-+		/*
-+		 * Store the ID on the wrapped block for consistency.
-+		 * The printk_ringbuffer does not actually use it.
-+		 */
-+		blk->id = id;
-+	}
-+
-+	blk_lpos->begin = begin_lpos;
-+	blk_lpos->next = next_lpos;
-+
-+	return &blk->data[0];
-+}
-+
-+/*
-+ * Try to resize an existing data block associated with the descriptor
-+ * specified by @id. If the resized data block should become wrapped, it
-+ * copies the old data to the new data block. If @size yields a data block
-+ * with the same or less size, the data block is left as is.
-+ *
-+ * Fail if this is not the last allocated data block or if there is not
-+ * enough space or it is not possible make enough space.
-+ *
-+ * Return a pointer to the beginning of the entire data buffer or NULL on
-+ * failure.
-+ */
-+static char *data_realloc(struct printk_ringbuffer *rb,
-+			  struct prb_data_ring *data_ring, unsigned int size,
-+			  struct prb_data_blk_lpos *blk_lpos, unsigned long id)
-+{
-+	struct prb_data_block *blk;
-+	unsigned long head_lpos;
-+	unsigned long next_lpos;
-+	bool wrapped;
-+
-+	/* Reallocation only works if @blk_lpos is the newest data block. */
-+	head_lpos = atomic_long_read(&data_ring->head_lpos);
-+	if (head_lpos != blk_lpos->next)
-+		return NULL;
-+
-+	/* Keep track if @blk_lpos was a wrapping data block. */
-+	wrapped = (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, blk_lpos->next));
-+
-+	size = to_blk_size(size);
-+
-+	next_lpos = get_next_lpos(data_ring, blk_lpos->begin, size);
-+
-+	/* If the data block does not increase, there is nothing to do. */
-+	if (head_lpos - next_lpos < DATA_SIZE(data_ring)) {
-+		if (wrapped)
-+			blk = to_block(data_ring, 0);
-+		else
-+			blk = to_block(data_ring, blk_lpos->begin);
-+		return &blk->data[0];
-+	}
-+
-+	if (!data_push_tail(rb, data_ring, next_lpos - DATA_SIZE(data_ring)))
-+		return NULL;
-+
-+	/* The memory barrier involvement is the same as data_alloc:A. */
-+	if (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &head_lpos,
-+				     next_lpos)) { /* LMM(data_realloc:A) */
-+		return NULL;
-+	}
-+
-+	blk = to_block(data_ring, blk_lpos->begin);
-+
-+	if (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, next_lpos)) {
-+		struct prb_data_block *old_blk = blk;
-+
-+		/* Wrapping data blocks store their data at the beginning. */
-+		blk = to_block(data_ring, 0);
-+
-+		/*
-+		 * Store the ID on the wrapped block for consistency.
-+		 * The printk_ringbuffer does not actually use it.
-+		 */
-+		blk->id = id;
-+
-+		if (!wrapped) {
-+			/*
-+			 * Since the allocated space is now in the newly
-+			 * created wrapping data block, copy the content
-+			 * from the old data block.
-+			 */
-+			memcpy(&blk->data[0], &old_blk->data[0],
-+			       (blk_lpos->next - blk_lpos->begin) - sizeof(blk->id));
-+		}
-+	}
-+
-+	blk_lpos->next = next_lpos;
-+
-+	return &blk->data[0];
-+}
-+
-+/* Return the number of bytes used by a data block. */
-+static unsigned int space_used(struct prb_data_ring *data_ring,
-+			       struct prb_data_blk_lpos *blk_lpos)
-+{
-+	/* Data-less blocks take no space. */
-+	if (BLK_DATALESS(blk_lpos))
-+		return 0;
-+
-+	if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next)) {
-+		/* Data block does not wrap. */
-+		return (DATA_INDEX(data_ring, blk_lpos->next) -
-+			DATA_INDEX(data_ring, blk_lpos->begin));
-+	}
-+
-+	/*
-+	 * For wrapping data blocks, the trailing (wasted) space is
-+	 * also counted.
-+	 */
-+	return (DATA_INDEX(data_ring, blk_lpos->next) +
-+		DATA_SIZE(data_ring) - DATA_INDEX(data_ring, blk_lpos->begin));
-+}
-+
-+/*
-+ * Given @blk_lpos, return a pointer to the writer data from the data block
-+ * and calculate the size of the data part. A NULL pointer is returned if
-+ * @blk_lpos specifies values that could never be legal.
-+ *
-+ * This function (used by readers) performs strict validation on the lpos
-+ * values to possibly detect bugs in the writer code. A WARN_ON_ONCE() is
-+ * triggered if an internal error is detected.
-+ */
-+static const char *get_data(struct prb_data_ring *data_ring,
-+			    struct prb_data_blk_lpos *blk_lpos,
-+			    unsigned int *data_size)
-+{
-+	struct prb_data_block *db;
-+
-+	/* Data-less data block description. */
-+	if (BLK_DATALESS(blk_lpos)) {
-+		if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) {
-+			*data_size = 0;
-+			return "";
-+		}
-+		return NULL;
-+	}
-+
-+	/* Regular data block: @begin less than @next and in same wrap. */
-+	if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next) &&
-+	    blk_lpos->begin < blk_lpos->next) {
-+		db = to_block(data_ring, blk_lpos->begin);
-+		*data_size = blk_lpos->next - blk_lpos->begin;
-+
-+	/* Wrapping data block: @begin is one wrap behind @next. */
-+	} else if (DATA_WRAPS(data_ring, blk_lpos->begin + DATA_SIZE(data_ring)) ==
-+		   DATA_WRAPS(data_ring, blk_lpos->next)) {
-+		db = to_block(data_ring, 0);
-+		*data_size = DATA_INDEX(data_ring, blk_lpos->next);
-+
-+	/* Illegal block description. */
-+	} else {
-+		WARN_ON_ONCE(1);
-+		return NULL;
-+	}
-+
-+	/* A valid data block will always be aligned to the ID size. */
-+	if (WARN_ON_ONCE(blk_lpos->begin != ALIGN(blk_lpos->begin, sizeof(db->id))) ||
-+	    WARN_ON_ONCE(blk_lpos->next != ALIGN(blk_lpos->next, sizeof(db->id)))) {
-+		return NULL;
-+	}
-+
-+	/* A valid data block will always have at least an ID. */
-+	if (WARN_ON_ONCE(*data_size < sizeof(db->id)))
-+		return NULL;
-+
-+	/* Subtract block ID space from size to reflect data size. */
-+	*data_size -= sizeof(db->id);
-+
-+	return &db->data[0];
-+}
-+
-+/*
-+ * Attempt to transition the newest descriptor from committed back to reserved
-+ * so that the record can be modified by a writer again. This is only possible
-+ * if the descriptor is not yet finalized and the provided @caller_id matches.
-+ */
-+static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring,
-+					 u32 caller_id, unsigned long *id_out)
-+{
-+	unsigned long prev_state_val;
-+	enum desc_state d_state;
-+	struct prb_desc desc;
-+	struct prb_desc *d;
-+	unsigned long id;
-+	u32 cid;
-+
-+	id = atomic_long_read(&desc_ring->head_id);
-+
-+	/*
-+	 * To reduce unnecessarily reopening, first check if the descriptor
-+	 * state and caller ID are correct.
-+	 */
-+	d_state = desc_read(desc_ring, id, &desc, NULL, &cid);
-+	if (d_state != desc_committed || cid != caller_id)
-+		return NULL;
-+
-+	d = to_desc(desc_ring, id);
-+
-+	prev_state_val = DESC_SV(id, desc_committed);
-+
-+	/*
-+	 * Guarantee the reserved state is stored before reading any
-+	 * record data. A full memory barrier is needed because @state_var
-+	 * modification is followed by reading. This pairs with _prb_commit:B.
-+	 *
-+	 * Memory barrier involvement:
-+	 *
-+	 * If desc_reopen_last:A reads from _prb_commit:B, then
-+	 * prb_reserve_in_last:A reads from _prb_commit:A.
-+	 *
-+	 * Relies on:
-+	 *
-+	 * WMB from _prb_commit:A to _prb_commit:B
-+	 *    matching
-+	 * MB If desc_reopen_last:A to prb_reserve_in_last:A
-+	 */
-+	if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val,
-+			DESC_SV(id, desc_reserved))) { /* LMM(desc_reopen_last:A) */
-+		return NULL;
-+	}
-+
-+	*id_out = id;
-+	return d;
-+}
-+
-+/**
-+ * prb_reserve_in_last() - Re-reserve and extend the space in the ringbuffer
-+ *                         used by the newest record.
-+ *
-+ * @e:         The entry structure to setup.
-+ * @rb:        The ringbuffer to re-reserve and extend data in.
-+ * @r:         The record structure to allocate buffers for.
-+ * @caller_id: The caller ID of the caller (reserving writer).
-+ * @max_size:  Fail if the extended size would be greater than this.
-+ *
-+ * This is the public function available to writers to re-reserve and extend
-+ * data.
-+ *
-+ * The writer specifies the text size to extend (not the new total size) by
-+ * setting the @text_buf_size field of @r. To ensure proper initialization
-+ * of @r, prb_rec_init_wr() should be used.
-+ *
-+ * This function will fail if @caller_id does not match the caller ID of the
-+ * newest record. In that case the caller must reserve new data using
-+ * prb_reserve().
-+ *
-+ * Context: Any context. Disables local interrupts on success.
-+ * Return: true if text data could be extended, otherwise false.
-+ *
-+ * On success:
-+ *
-+ *   - @r->text_buf points to the beginning of the entire text buffer.
-+ *
-+ *   - @r->text_buf_size is set to the new total size of the buffer.
-+ *
-+ *   - @r->info is not touched so that @r->info->text_len could be used
-+ *     to append the text.
-+ *
-+ *   - prb_record_text_space() can be used on @e to query the new
-+ *     actually used space.
-+ *
-+ * Important: All @r->info fields will already be set with the current values
-+ *            for the record. I.e. @r->info->text_len will be less than
-+ *            @text_buf_size. Writers can use @r->info->text_len to know
-+ *            where concatenation begins and writers should update
-+ *            @r->info->text_len after concatenating.
-+ */
-+bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
-+			 struct printk_record *r, u32 caller_id, unsigned int max_size)
-+{
-+	struct prb_desc_ring *desc_ring = &rb->desc_ring;
-+	struct printk_info *info;
-+	unsigned int data_size;
-+	struct prb_desc *d;
-+	unsigned long id;
-+
-+	local_irq_save(e->irqflags);
-+
-+	/* Transition the newest descriptor back to the reserved state. */
-+	d = desc_reopen_last(desc_ring, caller_id, &id);
-+	if (!d) {
-+		local_irq_restore(e->irqflags);
-+		goto fail_reopen;
-+	}
-+
-+	/* Now the writer has exclusive access: LMM(prb_reserve_in_last:A) */
-+
-+	info = to_info(desc_ring, id);
-+
-+	/*
-+	 * Set the @e fields here so that prb_commit() can be used if
-+	 * anything fails from now on.
-+	 */
-+	e->rb = rb;
-+	e->id = id;
-+
-+	/*
-+	 * desc_reopen_last() checked the caller_id, but there was no
-+	 * exclusive access at that point. The descriptor may have
-+	 * changed since then.
-+	 */
-+	if (caller_id != info->caller_id)
-+		goto fail;
-+
-+	if (BLK_DATALESS(&d->text_blk_lpos)) {
-+		if (WARN_ON_ONCE(info->text_len != 0)) {
-+			pr_warn_once("wrong text_len value (%hu, expecting 0)\n",
-+				     info->text_len);
-+			info->text_len = 0;
-+		}
-+
-+		if (!data_check_size(&rb->text_data_ring, r->text_buf_size))
-+			goto fail;
-+
-+		if (r->text_buf_size > max_size)
-+			goto fail;
-+
-+		r->text_buf = data_alloc(rb, &rb->text_data_ring, r->text_buf_size,
-+					 &d->text_blk_lpos, id);
-+	} else {
-+		if (!get_data(&rb->text_data_ring, &d->text_blk_lpos, &data_size))
-+			goto fail;
-+
-+		/*
-+		 * Increase the buffer size to include the original size. If
-+		 * the meta data (@text_len) is not sane, use the full data
-+		 * block size.
-+		 */
-+		if (WARN_ON_ONCE(info->text_len > data_size)) {
-+			pr_warn_once("wrong text_len value (%hu, expecting <=%u)\n",
-+				     info->text_len, data_size);
-+			info->text_len = data_size;
-+		}
-+		r->text_buf_size += info->text_len;
-+
-+		if (!data_check_size(&rb->text_data_ring, r->text_buf_size))
-+			goto fail;
-+
-+		if (r->text_buf_size > max_size)
-+			goto fail;
-+
-+		r->text_buf = data_realloc(rb, &rb->text_data_ring, r->text_buf_size,
-+					   &d->text_blk_lpos, id);
-+	}
-+	if (r->text_buf_size && !r->text_buf)
-+		goto fail;
-+
-+	r->info = info;
-+
-+	e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos);
-+
-+	return true;
-+fail:
-+	prb_commit(e);
-+	/* prb_commit() re-enabled interrupts. */
-+fail_reopen:
-+	/* Make it clear to the caller that the re-reserve failed. */
-+	memset(r, 0, sizeof(*r));
-+	return false;
-+}
-+
-+/*
-+ * Attempt to finalize a specified descriptor. If this fails, the descriptor
-+ * is either already final or it will finalize itself when the writer commits.
-+ */
-+static void desc_make_final(struct prb_desc_ring *desc_ring, unsigned long id)
-+{
-+	unsigned long prev_state_val = DESC_SV(id, desc_committed);
-+	struct prb_desc *d = to_desc(desc_ring, id);
-+
-+	atomic_long_cmpxchg_relaxed(&d->state_var, prev_state_val,
-+			DESC_SV(id, desc_finalized)); /* LMM(desc_make_final:A) */
-+}
-+
-+/**
-+ * prb_reserve() - Reserve space in the ringbuffer.
-+ *
-+ * @e:  The entry structure to setup.
-+ * @rb: The ringbuffer to reserve data in.
-+ * @r:  The record structure to allocate buffers for.
-+ *
-+ * This is the public function available to writers to reserve data.
-+ *
-+ * The writer specifies the text size to reserve by setting the
-+ * @text_buf_size field of @r. To ensure proper initialization of @r,
-+ * prb_rec_init_wr() should be used.
-+ *
-+ * Context: Any context. Disables local interrupts on success.
-+ * Return: true if at least text data could be allocated, otherwise false.
-+ *
-+ * On success, the fields @info and @text_buf of @r will be set by this
-+ * function and should be filled in by the writer before committing. Also
-+ * on success, prb_record_text_space() can be used on @e to query the actual
-+ * space used for the text data block.
-+ *
-+ * Important: @info->text_len needs to be set correctly by the writer in
-+ *            order for data to be readable and/or extended. Its value
-+ *            is initialized to 0.
-+ */
-+bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
-+		 struct printk_record *r)
-+{
-+	struct prb_desc_ring *desc_ring = &rb->desc_ring;
-+	struct printk_info *info;
-+	struct prb_desc *d;
-+	unsigned long id;
-+	u64 seq;
-+
-+	if (!data_check_size(&rb->text_data_ring, r->text_buf_size))
-+		goto fail;
-+
-+	/*
-+	 * Descriptors in the reserved state act as blockers to all further
-+	 * reservations once the desc_ring has fully wrapped. Disable
-+	 * interrupts during the reserve/commit window in order to minimize
-+	 * the likelihood of this happening.
-+	 */
-+	local_irq_save(e->irqflags);
-+
-+	if (!desc_reserve(rb, &id)) {
-+		/* Descriptor reservation failures are tracked. */
-+		atomic_long_inc(&rb->fail);
-+		local_irq_restore(e->irqflags);
-+		goto fail;
-+	}
-+
-+	d = to_desc(desc_ring, id);
-+	info = to_info(desc_ring, id);
-+
-+	/*
-+	 * All @info fields (except @seq) are cleared and must be filled in
-+	 * by the writer. Save @seq before clearing because it is used to
-+	 * determine the new sequence number.
-+	 */
-+	seq = info->seq;
-+	memset(info, 0, sizeof(*info));
-+
-+	/*
-+	 * Set the @e fields here so that prb_commit() can be used if
-+	 * text data allocation fails.
-+	 */
-+	e->rb = rb;
-+	e->id = id;
-+
-+	/*
-+	 * Initialize the sequence number if it has "never been set".
-+	 * Otherwise just increment it by a full wrap.
-+	 *
-+	 * @seq is considered "never been set" if it has a value of 0,
-+	 * _except_ for @infos[0], which was specially setup by the ringbuffer
-+	 * initializer and therefore is always considered as set.
-+	 *
-+	 * See the "Bootstrap" comment block in printk_ringbuffer.h for
-+	 * details about how the initializer bootstraps the descriptors.
-+	 */
-+	if (seq == 0 && DESC_INDEX(desc_ring, id) != 0)
-+		info->seq = DESC_INDEX(desc_ring, id);
-+	else
-+		info->seq = seq + DESCS_COUNT(desc_ring);
-+
-+	/*
-+	 * New data is about to be reserved. Once that happens, previous
-+	 * descriptors are no longer able to be extended. Finalize the
-+	 * previous descriptor now so that it can be made available to
-+	 * readers. (For seq==0 there is no previous descriptor.)
-+	 */
-+	if (info->seq > 0)
-+		desc_make_final(desc_ring, DESC_ID(id - 1));
-+
-+	r->text_buf = data_alloc(rb, &rb->text_data_ring, r->text_buf_size,
-+				 &d->text_blk_lpos, id);
-+	/* If text data allocation fails, a data-less record is committed. */
-+	if (r->text_buf_size && !r->text_buf) {
-+		prb_commit(e);
-+		/* prb_commit() re-enabled interrupts. */
-+		goto fail;
-+	}
-+
-+	r->info = info;
-+
-+	/* Record full text space used by record. */
-+	e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos);
-+
-+	return true;
-+fail:
-+	/* Make it clear to the caller that the reserve failed. */
-+	memset(r, 0, sizeof(*r));
-+	return false;
-+}
-+
-+/* Commit the data (possibly finalizing it) and restore interrupts. */
-+static void _prb_commit(struct prb_reserved_entry *e, unsigned long state_val)
-+{
-+	struct prb_desc_ring *desc_ring = &e->rb->desc_ring;
-+	struct prb_desc *d = to_desc(desc_ring, e->id);
-+	unsigned long prev_state_val = DESC_SV(e->id, desc_reserved);
-+
-+	/* Now the writer has finished all writing: LMM(_prb_commit:A) */
-+
-+	/*
-+	 * Set the descriptor as committed. See "ABA Issues" about why
-+	 * cmpxchg() instead of set() is used.
-+	 *
-+	 * 1  Guarantee all record data is stored before the descriptor state
-+	 *    is stored as committed. A write memory barrier is sufficient
-+	 *    for this. This pairs with desc_read:B and desc_reopen_last:A.
-+	 *
-+	 * 2. Guarantee the descriptor state is stored as committed before
-+	 *    re-checking the head ID in order to possibly finalize this
-+	 *    descriptor. This pairs with desc_reserve:D.
-+	 *
-+	 *    Memory barrier involvement:
-+	 *
-+	 *    If prb_commit:A reads from desc_reserve:D, then
-+	 *    desc_make_final:A reads from _prb_commit:B.
-+	 *
-+	 *    Relies on:
-+	 *
-+	 *    MB _prb_commit:B to prb_commit:A
-+	 *       matching
-+	 *    MB desc_reserve:D to desc_make_final:A
-+	 */
-+	if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val,
-+			DESC_SV(e->id, state_val))) { /* LMM(_prb_commit:B) */
-+		WARN_ON_ONCE(1);
-+	}
-+
-+	/* Restore interrupts, the reserve/commit window is finished. */
-+	local_irq_restore(e->irqflags);
-+}
-+
-+/**
-+ * prb_commit() - Commit (previously reserved) data to the ringbuffer.
-+ *
-+ * @e: The entry containing the reserved data information.
-+ *
-+ * This is the public function available to writers to commit data.
-+ *
-+ * Note that the data is not yet available to readers until it is finalized.
-+ * Finalizing happens automatically when space for the next record is
-+ * reserved.
-+ *
-+ * See prb_final_commit() for a version of this function that finalizes
-+ * immediately.
-+ *
-+ * Context: Any context. Enables local interrupts.
-+ */
-+void prb_commit(struct prb_reserved_entry *e)
-+{
-+	struct prb_desc_ring *desc_ring = &e->rb->desc_ring;
-+	unsigned long head_id;
-+
-+	_prb_commit(e, desc_committed);
-+
-+	/*
-+	 * If this descriptor is no longer the head (i.e. a new record has
-+	 * been allocated), extending the data for this record is no longer
-+	 * allowed and therefore it must be finalized.
-+	 */
-+	head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */
-+	if (head_id != e->id)
-+		desc_make_final(desc_ring, e->id);
-+}
-+
-+/**
-+ * prb_final_commit() - Commit and finalize (previously reserved) data to
-+ *                      the ringbuffer.
-+ *
-+ * @e: The entry containing the reserved data information.
-+ *
-+ * This is the public function available to writers to commit+finalize data.
-+ *
-+ * By finalizing, the data is made immediately available to readers.
-+ *
-+ * This function should only be used if there are no intentions of extending
-+ * this data using prb_reserve_in_last().
-+ *
-+ * Context: Any context. Enables local interrupts.
-+ */
-+void prb_final_commit(struct prb_reserved_entry *e)
-+{
-+	_prb_commit(e, desc_finalized);
-+}
-+
-+/*
-+ * Count the number of lines in provided text. All text has at least 1 line
-+ * (even if @text_size is 0). Each '\n' processed is counted as an additional
-+ * line.
-+ */
-+static unsigned int count_lines(const char *text, unsigned int text_size)
-+{
-+	unsigned int next_size = text_size;
-+	unsigned int line_count = 1;
-+	const char *next = text;
-+
-+	while (next_size) {
-+		next = memchr(next, '\n', next_size);
-+		if (!next)
-+			break;
-+		line_count++;
-+		next++;
-+		next_size = text_size - (next - text);
-+	}
-+
-+	return line_count;
-+}
-+
-+/*
-+ * Given @blk_lpos, copy an expected @len of data into the provided buffer.
-+ * If @line_count is provided, count the number of lines in the data.
-+ *
-+ * This function (used by readers) performs strict validation on the data
-+ * size to possibly detect bugs in the writer code. A WARN_ON_ONCE() is
-+ * triggered if an internal error is detected.
-+ */
-+static bool copy_data(struct prb_data_ring *data_ring,
-+		      struct prb_data_blk_lpos *blk_lpos, u16 len, char *buf,
-+		      unsigned int buf_size, unsigned int *line_count)
-+{
-+	unsigned int data_size;
-+	const char *data;
-+
-+	/* Caller might not want any data. */
-+	if ((!buf || !buf_size) && !line_count)
-+		return true;
-+
-+	data = get_data(data_ring, blk_lpos, &data_size);
-+	if (!data)
-+		return false;
-+
-+	/*
-+	 * Actual cannot be less than expected. It can be more than expected
-+	 * because of the trailing alignment padding.
-+	 *
-+	 * Note that invalid @len values can occur because the caller loads
-+	 * the value during an allowed data race.
-+	 */
-+	if (data_size < (unsigned int)len)
-+		return false;
-+
-+	/* Caller interested in the line count? */
-+	if (line_count)
-+		*line_count = count_lines(data, data_size);
-+
-+	/* Caller interested in the data content? */
-+	if (!buf || !buf_size)
-+		return true;
-+
-+	data_size = min_t(u16, buf_size, len);
-+
-+	memcpy(&buf[0], data, data_size); /* LMM(copy_data:A) */
-+	return true;
-+}
-+
-+/*
-+ * This is an extended version of desc_read(). It gets a copy of a specified
-+ * descriptor. However, it also verifies that the record is finalized and has
-+ * the sequence number @seq. On success, 0 is returned.
-+ *
-+ * Error return values:
-+ * -EINVAL: A finalized record with sequence number @seq does not exist.
-+ * -ENOENT: A finalized record with sequence number @seq exists, but its data
-+ *          is not available. This is a valid record, so readers should
-+ *          continue with the next record.
-+ */
-+static int desc_read_finalized_seq(struct prb_desc_ring *desc_ring,
-+				   unsigned long id, u64 seq,
-+				   struct prb_desc *desc_out)
-+{
-+	struct prb_data_blk_lpos *blk_lpos = &desc_out->text_blk_lpos;
-+	enum desc_state d_state;
-+	u64 s;
-+
-+	d_state = desc_read(desc_ring, id, desc_out, &s, NULL);
-+
-+	/*
-+	 * An unexpected @id (desc_miss) or @seq mismatch means the record
-+	 * does not exist. A descriptor in the reserved or committed state
-+	 * means the record does not yet exist for the reader.
-+	 */
-+	if (d_state == desc_miss ||
-+	    d_state == desc_reserved ||
-+	    d_state == desc_committed ||
-+	    s != seq) {
-+		return -EINVAL;
-+	}
-+
-+	/*
-+	 * A descriptor in the reusable state may no longer have its data
-+	 * available; report it as existing but with lost data. Or the record
-+	 * may actually be a record with lost data.
-+	 */
-+	if (d_state == desc_reusable ||
-+	    (blk_lpos->begin == FAILED_LPOS && blk_lpos->next == FAILED_LPOS)) {
-+		return -ENOENT;
-+	}
-+
-+	return 0;
-+}
-+
-+/*
-+ * Copy the ringbuffer data from the record with @seq to the provided
-+ * @r buffer. On success, 0 is returned.
-+ *
-+ * See desc_read_finalized_seq() for error return values.
-+ */
-+static int prb_read(struct printk_ringbuffer *rb, u64 seq,
-+		    struct printk_record *r, unsigned int *line_count)
-+{
-+	struct prb_desc_ring *desc_ring = &rb->desc_ring;
-+	struct printk_info *info = to_info(desc_ring, seq);
-+	struct prb_desc *rdesc = to_desc(desc_ring, seq);
-+	atomic_long_t *state_var = &rdesc->state_var;
-+	struct prb_desc desc;
-+	unsigned long id;
-+	int err;
-+
-+	/* Extract the ID, used to specify the descriptor to read. */
-+	id = DESC_ID(atomic_long_read(state_var));
-+
-+	/* Get a local copy of the correct descriptor (if available). */
-+	err = desc_read_finalized_seq(desc_ring, id, seq, &desc);
-+
-+	/*
-+	 * If @r is NULL, the caller is only interested in the availability
-+	 * of the record.
-+	 */
-+	if (err || !r)
-+		return err;
-+
-+	/* If requested, copy meta data. */
-+	if (r->info)
-+		memcpy(r->info, info, sizeof(*(r->info)));
-+
-+	/* Copy text data. If it fails, this is a data-less record. */
-+	if (!copy_data(&rb->text_data_ring, &desc.text_blk_lpos, info->text_len,
-+		       r->text_buf, r->text_buf_size, line_count)) {
-+		return -ENOENT;
-+	}
-+
-+	/* Ensure the record is still finalized and has the same @seq. */
-+	return desc_read_finalized_seq(desc_ring, id, seq, &desc);
-+}
-+
-+/* Get the sequence number of the tail descriptor. */
-+static u64 prb_first_seq(struct printk_ringbuffer *rb)
-+{
-+	struct prb_desc_ring *desc_ring = &rb->desc_ring;
-+	enum desc_state d_state;
-+	struct prb_desc desc;
-+	unsigned long id;
-+	u64 seq;
-+
-+	for (;;) {
-+		id = atomic_long_read(&rb->desc_ring.tail_id); /* LMM(prb_first_seq:A) */
-+
-+		d_state = desc_read(desc_ring, id, &desc, &seq, NULL); /* LMM(prb_first_seq:B) */
-+
-+		/*
-+		 * This loop will not be infinite because the tail is
-+		 * _always_ in the finalized or reusable state.
-+		 */
-+		if (d_state == desc_finalized || d_state == desc_reusable)
-+			break;
-+
-+		/*
-+		 * Guarantee the last state load from desc_read() is before
-+		 * reloading @tail_id in order to see a new tail in the case
-+		 * that the descriptor has been recycled. This pairs with
-+		 * desc_reserve:D.
-+		 *
-+		 * Memory barrier involvement:
-+		 *
-+		 * If prb_first_seq:B reads from desc_reserve:F, then
-+		 * prb_first_seq:A reads from desc_push_tail:B.
-+		 *
-+		 * Relies on:
-+		 *
-+		 * MB from desc_push_tail:B to desc_reserve:F
-+		 *    matching
-+		 * RMB prb_first_seq:B to prb_first_seq:A
-+		 */
-+		smp_rmb(); /* LMM(prb_first_seq:C) */
-+	}
-+
-+	return seq;
-+}
-+
-+/*
-+ * Non-blocking read of a record. Updates @seq to the last finalized record
-+ * (which may have no data available).
-+ *
-+ * See the description of prb_read_valid() and prb_read_valid_info()
-+ * for details.
-+ */
-+static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
-+			    struct printk_record *r, unsigned int *line_count)
-+{
-+	u64 tail_seq;
-+	int err;
-+
-+	while ((err = prb_read(rb, *seq, r, line_count))) {
-+		tail_seq = prb_first_seq(rb);
-+
-+		if (*seq < tail_seq) {
-+			/*
-+			 * Behind the tail. Catch up and try again. This
-+			 * can happen for -ENOENT and -EINVAL cases.
-+			 */
-+			*seq = tail_seq;
-+
-+		} else if (err == -ENOENT) {
-+			/* Record exists, but no data available. Skip. */
-+			(*seq)++;
-+
-+		} else {
-+			/* Non-existent/non-finalized record. Must stop. */
-+			return false;
-+		}
-+	}
-+
-+	return true;
-+}
-+
-+/**
-+ * prb_read_valid() - Non-blocking read of a requested record or (if gone)
-+ *                    the next available record.
-+ *
-+ * @rb:  The ringbuffer to read from.
-+ * @seq: The sequence number of the record to read.
-+ * @r:   A record data buffer to store the read record to.
-+ *
-+ * This is the public function available to readers to read a record.
-+ *
-+ * The reader provides the @info and @text_buf buffers of @r to be
-+ * filled in. Any of the buffer pointers can be set to NULL if the reader
-+ * is not interested in that data. To ensure proper initialization of @r,
-+ * prb_rec_init_rd() should be used.
-+ *
-+ * Context: Any context.
-+ * Return: true if a record was read, otherwise false.
-+ *
-+ * On success, the reader must check r->info.seq to see which record was
-+ * actually read. This allows the reader to detect dropped records.
-+ *
-+ * Failure means @seq refers to a not yet written record.
-+ */
-+bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
-+		    struct printk_record *r)
-+{
-+	return _prb_read_valid(rb, &seq, r, NULL);
-+}
-+
-+/**
-+ * prb_read_valid_info() - Non-blocking read of meta data for a requested
-+ *                         record or (if gone) the next available record.
-+ *
-+ * @rb:         The ringbuffer to read from.
-+ * @seq:        The sequence number of the record to read.
-+ * @info:       A buffer to store the read record meta data to.
-+ * @line_count: A buffer to store the number of lines in the record text.
-+ *
-+ * This is the public function available to readers to read only the
-+ * meta data of a record.
-+ *
-+ * The reader provides the @info, @line_count buffers to be filled in.
-+ * Either of the buffer pointers can be set to NULL if the reader is not
-+ * interested in that data.
-+ *
-+ * Context: Any context.
-+ * Return: true if a record's meta data was read, otherwise false.
-+ *
-+ * On success, the reader must check info->seq to see which record meta data
-+ * was actually read. This allows the reader to detect dropped records.
-+ *
-+ * Failure means @seq refers to a not yet written record.
-+ */
-+bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
-+			 struct printk_info *info, unsigned int *line_count)
-+{
-+	struct printk_record r;
-+
-+	prb_rec_init_rd(&r, info, NULL, 0);
-+
-+	return _prb_read_valid(rb, &seq, &r, line_count);
-+}
-+
-+/**
-+ * prb_first_valid_seq() - Get the sequence number of the oldest available
-+ *                         record.
-+ *
-+ * @rb: The ringbuffer to get the sequence number from.
-+ *
-+ * This is the public function available to readers to see what the
-+ * first/oldest valid sequence number is.
-+ *
-+ * This provides readers a starting point to begin iterating the ringbuffer.
-+ *
-+ * Context: Any context.
-+ * Return: The sequence number of the first/oldest record or, if the
-+ *         ringbuffer is empty, 0 is returned.
-+ */
-+u64 prb_first_valid_seq(struct printk_ringbuffer *rb)
-+{
-+	u64 seq = 0;
-+
-+	if (!_prb_read_valid(rb, &seq, NULL, NULL))
-+		return 0;
-+
-+	return seq;
-+}
-+
-+/**
-+ * prb_next_seq() - Get the sequence number after the last available record.
-+ *
-+ * @rb:  The ringbuffer to get the sequence number from.
-+ *
-+ * This is the public function available to readers to see what the next
-+ * newest sequence number available to readers will be.
-+ *
-+ * This provides readers a sequence number to jump to if all currently
-+ * available records should be skipped.
-+ *
-+ * Context: Any context.
-+ * Return: The sequence number of the next newest (not yet available) record
-+ *         for readers.
-+ */
-+u64 prb_next_seq(struct printk_ringbuffer *rb)
-+{
-+	u64 seq = 0;
-+
-+	/* Search forward from the oldest descriptor. */
-+	while (_prb_read_valid(rb, &seq, NULL, NULL))
-+		seq++;
-+
-+	return seq;
-+}
-+
-+/**
-+ * prb_init() - Initialize a ringbuffer to use provided external buffers.
-+ *
-+ * @rb:       The ringbuffer to initialize.
-+ * @text_buf: The data buffer for text data.
-+ * @textbits: The size of @text_buf as a power-of-2 value.
-+ * @descs:    The descriptor buffer for ringbuffer records.
-+ * @descbits: The count of @descs items as a power-of-2 value.
-+ * @infos:    The printk_info buffer for ringbuffer records.
-+ *
-+ * This is the public function available to writers to setup a ringbuffer
-+ * during runtime using provided buffers.
-+ *
-+ * This must match the initialization of DEFINE_PRINTKRB().
-+ *
-+ * Context: Any context.
-+ */
-+void prb_init(struct printk_ringbuffer *rb,
-+	      char *text_buf, unsigned int textbits,
-+	      struct prb_desc *descs, unsigned int descbits,
-+	      struct printk_info *infos)
-+{
-+	memset(descs, 0, _DESCS_COUNT(descbits) * sizeof(descs[0]));
-+	memset(infos, 0, _DESCS_COUNT(descbits) * sizeof(infos[0]));
-+
-+	rb->desc_ring.count_bits = descbits;
-+	rb->desc_ring.descs = descs;
-+	rb->desc_ring.infos = infos;
-+	atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits));
-+	atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits));
-+
-+	rb->text_data_ring.size_bits = textbits;
-+	rb->text_data_ring.data = text_buf;
-+	atomic_long_set(&rb->text_data_ring.head_lpos, BLK0_LPOS(textbits));
-+	atomic_long_set(&rb->text_data_ring.tail_lpos, BLK0_LPOS(textbits));
-+
-+	atomic_long_set(&rb->fail, 0);
-+
-+	atomic_long_set(&(descs[_DESCS_COUNT(descbits) - 1].state_var), DESC0_SV(descbits));
-+	descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.begin = FAILED_LPOS;
-+	descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.next = FAILED_LPOS;
-+
-+	infos[0].seq = -(u64)_DESCS_COUNT(descbits);
-+	infos[_DESCS_COUNT(descbits) - 1].seq = 0;
-+}
-+
-+/**
-+ * prb_record_text_space() - Query the full actual used ringbuffer space for
-+ *                           the text data of a reserved entry.
-+ *
-+ * @e: The successfully reserved entry to query.
-+ *
-+ * This is the public function available to writers to see how much actual
-+ * space is used in the ringbuffer to store the text data of the specified
-+ * entry.
-+ *
-+ * This function is only valid if @e has been successfully reserved using
-+ * prb_reserve().
-+ *
-+ * Context: Any context.
-+ * Return: The size in bytes used by the text data of the associated record.
-+ */
-+unsigned int prb_record_text_space(struct prb_reserved_entry *e)
-+{
-+	return e->text_space;
-+}
-diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h
-new file mode 100644
-index 000000000000..5dc9d022db07
---- /dev/null
-+++ b/kernel/printk/printk_ringbuffer.h
-@@ -0,0 +1,382 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+
-+#ifndef _KERNEL_PRINTK_RINGBUFFER_H
-+#define _KERNEL_PRINTK_RINGBUFFER_H
-+
-+#include <linux/atomic.h>
-+#include <linux/dev_printk.h>
-+
-+/*
-+ * Meta information about each stored message.
-+ *
-+ * All fields are set by the printk code except for @seq, which is
-+ * set by the ringbuffer code.
-+ */
-+struct printk_info {
-+	u64	seq;		/* sequence number */
-+	u64	ts_nsec;	/* timestamp in nanoseconds */
-+	u16	text_len;	/* length of text message */
-+	u8	facility;	/* syslog facility */
-+	u8	flags:5;	/* internal record flags */
-+	u8	level:3;	/* syslog level */
-+	u32	caller_id;	/* thread id or processor id */
-+
-+	struct dev_printk_info	dev_info;
-+};
-+
-+/*
-+ * A structure providing the buffers, used by writers and readers.
-+ *
-+ * Writers:
-+ * Using prb_rec_init_wr(), a writer sets @text_buf_size before calling
-+ * prb_reserve(). On success, prb_reserve() sets @info and @text_buf to
-+ * buffers reserved for that writer.
-+ *
-+ * Readers:
-+ * Using prb_rec_init_rd(), a reader sets all fields before calling
-+ * prb_read_valid(). Note that the reader provides the @info and @text_buf,
-+ * buffers. On success, the struct pointed to by @info will be filled and
-+ * the char array pointed to by @text_buf will be filled with text data.
-+ */
-+struct printk_record {
-+	struct printk_info	*info;
-+	char			*text_buf;
-+	unsigned int		text_buf_size;
-+};
-+
-+/* Specifies the logical position and span of a data block. */
-+struct prb_data_blk_lpos {
-+	unsigned long	begin;
-+	unsigned long	next;
-+};
-+
-+/*
-+ * A descriptor: the complete meta-data for a record.
-+ *
-+ * @state_var: A bitwise combination of descriptor ID and descriptor state.
-+ */
-+struct prb_desc {
-+	atomic_long_t			state_var;
-+	struct prb_data_blk_lpos	text_blk_lpos;
-+};
-+
-+/* A ringbuffer of "ID + data" elements. */
-+struct prb_data_ring {
-+	unsigned int	size_bits;
-+	char		*data;
-+	atomic_long_t	head_lpos;
-+	atomic_long_t	tail_lpos;
-+};
-+
-+/* A ringbuffer of "struct prb_desc" elements. */
-+struct prb_desc_ring {
-+	unsigned int		count_bits;
-+	struct prb_desc		*descs;
-+	struct printk_info	*infos;
-+	atomic_long_t		head_id;
-+	atomic_long_t		tail_id;
-+};
-+
-+/*
-+ * The high level structure representing the printk ringbuffer.
-+ *
-+ * @fail: Count of failed prb_reserve() calls where not even a data-less
-+ *        record was created.
-+ */
-+struct printk_ringbuffer {
-+	struct prb_desc_ring	desc_ring;
-+	struct prb_data_ring	text_data_ring;
-+	atomic_long_t		fail;
-+};
-+
-+/*
-+ * Used by writers as a reserve/commit handle.
-+ *
-+ * @rb:         Ringbuffer where the entry is reserved.
-+ * @irqflags:   Saved irq flags to restore on entry commit.
-+ * @id:         ID of the reserved descriptor.
-+ * @text_space: Total occupied buffer space in the text data ring, including
-+ *              ID, alignment padding, and wrapping data blocks.
-+ *
-+ * This structure is an opaque handle for writers. Its contents are only
-+ * to be used by the ringbuffer implementation.
-+ */
-+struct prb_reserved_entry {
-+	struct printk_ringbuffer	*rb;
-+	unsigned long			irqflags;
-+	unsigned long			id;
-+	unsigned int			text_space;
-+};
-+
-+/* The possible responses of a descriptor state-query. */
-+enum desc_state {
-+	desc_miss	=  -1,	/* ID mismatch (pseudo state) */
-+	desc_reserved	= 0x0,	/* reserved, in use by writer */
-+	desc_committed	= 0x1,	/* committed by writer, could get reopened */
-+	desc_finalized	= 0x2,	/* committed, no further modification allowed */
-+	desc_reusable	= 0x3,	/* free, not yet used by any writer */
-+};
-+
-+#define _DATA_SIZE(sz_bits)	(1UL << (sz_bits))
-+#define _DESCS_COUNT(ct_bits)	(1U << (ct_bits))
-+#define DESC_SV_BITS		(sizeof(unsigned long) * 8)
-+#define DESC_FLAGS_SHIFT	(DESC_SV_BITS - 2)
-+#define DESC_FLAGS_MASK		(3UL << DESC_FLAGS_SHIFT)
-+#define DESC_STATE(sv)		(3UL & (sv >> DESC_FLAGS_SHIFT))
-+#define DESC_SV(id, state)	(((unsigned long)state << DESC_FLAGS_SHIFT) | id)
-+#define DESC_ID_MASK		(~DESC_FLAGS_MASK)
-+#define DESC_ID(sv)		((sv) & DESC_ID_MASK)
-+#define FAILED_LPOS		0x1
-+#define NO_LPOS			0x3
-+
-+#define FAILED_BLK_LPOS	\
-+{				\
-+	.begin	= FAILED_LPOS,	\
-+	.next	= FAILED_LPOS,	\
-+}
-+
-+/*
-+ * Descriptor Bootstrap
-+ *
-+ * The descriptor array is minimally initialized to allow immediate usage
-+ * by readers and writers. The requirements that the descriptor array
-+ * initialization must satisfy:
-+ *
-+ *   Req1
-+ *     The tail must point to an existing (committed or reusable) descriptor.
-+ *     This is required by the implementation of prb_first_seq().
-+ *
-+ *   Req2
-+ *     Readers must see that the ringbuffer is initially empty.
-+ *
-+ *   Req3
-+ *     The first record reserved by a writer is assigned sequence number 0.
-+ *
-+ * To satisfy Req1, the tail initially points to a descriptor that is
-+ * minimally initialized (having no data block, i.e. data-less with the
-+ * data block's lpos @begin and @next values set to FAILED_LPOS).
-+ *
-+ * To satisfy Req2, the initial tail descriptor is initialized to the
-+ * reusable state. Readers recognize reusable descriptors as existing
-+ * records, but skip over them.
-+ *
-+ * To satisfy Req3, the last descriptor in the array is used as the initial
-+ * head (and tail) descriptor. This allows the first record reserved by a
-+ * writer (head + 1) to be the first descriptor in the array. (Only the first
-+ * descriptor in the array could have a valid sequence number of 0.)
-+ *
-+ * The first time a descriptor is reserved, it is assigned a sequence number
-+ * with the value of the array index. A "first time reserved" descriptor can
-+ * be recognized because it has a sequence number of 0 but does not have an
-+ * index of 0. (Only the first descriptor in the array could have a valid
-+ * sequence number of 0.) After the first reservation, all future reservations
-+ * (recycling) simply involve incrementing the sequence number by the array
-+ * count.
-+ *
-+ *   Hack #1
-+ *     Only the first descriptor in the array is allowed to have the sequence
-+ *     number 0. In this case it is not possible to recognize if it is being
-+ *     reserved the first time (set to index value) or has been reserved
-+ *     previously (increment by the array count). This is handled by _always_
-+ *     incrementing the sequence number by the array count when reserving the
-+ *     first descriptor in the array. In order to satisfy Req3, the sequence
-+ *     number of the first descriptor in the array is initialized to minus
-+ *     the array count. Then, upon the first reservation, it is incremented
-+ *     to 0, thus satisfying Req3.
-+ *
-+ *   Hack #2
-+ *     prb_first_seq() can be called at any time by readers to retrieve the
-+ *     sequence number of the tail descriptor. However, due to Req2 and Req3,
-+ *     initially there are no records to report the sequence number of
-+ *     (sequence numbers are u64 and there is nothing less than 0). To handle
-+ *     this, the sequence number of the initial tail descriptor is initialized
-+ *     to 0. Technically this is incorrect, because there is no record with
-+ *     sequence number 0 (yet) and the tail descriptor is not the first
-+ *     descriptor in the array. But it allows prb_read_valid() to correctly
-+ *     report the existence of a record for _any_ given sequence number at all
-+ *     times. Bootstrapping is complete when the tail is pushed the first
-+ *     time, thus finally pointing to the first descriptor reserved by a
-+ *     writer, which has the assigned sequence number 0.
-+ */
-+
-+/*
-+ * Initiating Logical Value Overflows
-+ *
-+ * Both logical position (lpos) and ID values can be mapped to array indexes
-+ * but may experience overflows during the lifetime of the system. To ensure
-+ * that printk_ringbuffer can handle the overflows for these types, initial
-+ * values are chosen that map to the correct initial array indexes, but will
-+ * result in overflows soon.
-+ *
-+ *   BLK0_LPOS
-+ *     The initial @head_lpos and @tail_lpos for data rings. It is at index
-+ *     0 and the lpos value is such that it will overflow on the first wrap.
-+ *
-+ *   DESC0_ID
-+ *     The initial @head_id and @tail_id for the desc ring. It is at the last
-+ *     index of the descriptor array (see Req3 above) and the ID value is such
-+ *     that it will overflow on the second wrap.
-+ */
-+#define BLK0_LPOS(sz_bits)	(-(_DATA_SIZE(sz_bits)))
-+#define DESC0_ID(ct_bits)	DESC_ID(-(_DESCS_COUNT(ct_bits) + 1))
-+#define DESC0_SV(ct_bits)	DESC_SV(DESC0_ID(ct_bits), desc_reusable)
-+
-+/*
-+ * Define a ringbuffer with an external text data buffer. The same as
-+ * DEFINE_PRINTKRB() but requires specifying an external buffer for the
-+ * text data.
-+ *
-+ * Note: The specified external buffer must be of the size:
-+ *       2 ^ (descbits + avgtextbits)
-+ */
-+#define _DEFINE_PRINTKRB(name, descbits, avgtextbits, text_buf)			\
-+static struct prb_desc _##name##_descs[_DESCS_COUNT(descbits)] = {				\
-+	/* the initial head and tail */								\
-+	[_DESCS_COUNT(descbits) - 1] = {							\
-+		/* reusable */									\
-+		.state_var	= ATOMIC_INIT(DESC0_SV(descbits)),				\
-+		/* no associated data block */							\
-+		.text_blk_lpos	= FAILED_BLK_LPOS,						\
-+	},											\
-+};												\
-+static struct printk_info _##name##_infos[_DESCS_COUNT(descbits)] = {				\
-+	/* this will be the first record reserved by a writer */				\
-+	[0] = {											\
-+		/* will be incremented to 0 on the first reservation */				\
-+		.seq = -(u64)_DESCS_COUNT(descbits),						\
-+	},											\
-+	/* the initial head and tail */								\
-+	[_DESCS_COUNT(descbits) - 1] = {							\
-+		/* reports the first seq value during the bootstrap phase */			\
-+		.seq = 0,									\
-+	},											\
-+};												\
-+static struct printk_ringbuffer name = {							\
-+	.desc_ring = {										\
-+		.count_bits	= descbits,							\
-+		.descs		= &_##name##_descs[0],						\
-+		.infos		= &_##name##_infos[0],						\
-+		.head_id	= ATOMIC_INIT(DESC0_ID(descbits)),				\
-+		.tail_id	= ATOMIC_INIT(DESC0_ID(descbits)),				\
-+	},											\
-+	.text_data_ring = {									\
-+		.size_bits	= (avgtextbits) + (descbits),					\
-+		.data		= text_buf,							\
-+		.head_lpos	= ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))),	\
-+		.tail_lpos	= ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))),	\
-+	},											\
-+	.fail			= ATOMIC_LONG_INIT(0),						\
-+}
-+
-+/**
-+ * DEFINE_PRINTKRB() - Define a ringbuffer.
-+ *
-+ * @name:        The name of the ringbuffer variable.
-+ * @descbits:    The number of descriptors as a power-of-2 value.
-+ * @avgtextbits: The average text data size per record as a power-of-2 value.
-+ *
-+ * This is a macro for defining a ringbuffer and all internal structures
-+ * such that it is ready for immediate use. See _DEFINE_PRINTKRB() for a
-+ * variant where the text data buffer can be specified externally.
-+ */
-+#define DEFINE_PRINTKRB(name, descbits, avgtextbits)				\
-+static char _##name##_text[1U << ((avgtextbits) + (descbits))]			\
-+			__aligned(__alignof__(unsigned long));			\
-+_DEFINE_PRINTKRB(name, descbits, avgtextbits, &_##name##_text[0])
-+
-+/* Writer Interface */
-+
-+/**
-+ * prb_rec_init_wd() - Initialize a buffer for writing records.
-+ *
-+ * @r:             The record to initialize.
-+ * @text_buf_size: The needed text buffer size.
-+ */
-+static inline void prb_rec_init_wr(struct printk_record *r,
-+				   unsigned int text_buf_size)
-+{
-+	r->info = NULL;
-+	r->text_buf = NULL;
-+	r->text_buf_size = text_buf_size;
-+}
-+
-+bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
-+		 struct printk_record *r);
-+bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
-+			 struct printk_record *r, u32 caller_id, unsigned int max_size);
-+void prb_commit(struct prb_reserved_entry *e);
-+void prb_final_commit(struct prb_reserved_entry *e);
-+
-+void prb_init(struct printk_ringbuffer *rb,
-+	      char *text_buf, unsigned int text_buf_size,
-+	      struct prb_desc *descs, unsigned int descs_count_bits,
-+	      struct printk_info *infos);
-+unsigned int prb_record_text_space(struct prb_reserved_entry *e);
-+
-+/* Reader Interface */
-+
-+/**
-+ * prb_rec_init_rd() - Initialize a buffer for reading records.
-+ *
-+ * @r:             The record to initialize.
-+ * @info:          A buffer to store record meta-data.
-+ * @text_buf:      A buffer to store text data.
-+ * @text_buf_size: The size of @text_buf.
-+ *
-+ * Initialize all the fields that a reader is interested in. All arguments
-+ * (except @r) are optional. Only record data for arguments that are
-+ * non-NULL or non-zero will be read.
-+ */
-+static inline void prb_rec_init_rd(struct printk_record *r,
-+				   struct printk_info *info,
-+				   char *text_buf, unsigned int text_buf_size)
-+{
-+	r->info = info;
-+	r->text_buf = text_buf;
-+	r->text_buf_size = text_buf_size;
-+}
-+
-+/**
-+ * prb_for_each_record() - Iterate over the records of a ringbuffer.
-+ *
-+ * @from: The sequence number to begin with.
-+ * @rb:   The ringbuffer to iterate over.
-+ * @s:    A u64 to store the sequence number on each iteration.
-+ * @r:    A printk_record to store the record on each iteration.
-+ *
-+ * This is a macro for conveniently iterating over a ringbuffer.
-+ * Note that @s may not be the sequence number of the record on each
-+ * iteration. For the sequence number, @r->info->seq should be checked.
-+ *
-+ * Context: Any context.
-+ */
-+#define prb_for_each_record(from, rb, s, r) \
-+for ((s) = from; prb_read_valid(rb, s, r); (s) = (r)->info->seq + 1)
-+
-+/**
-+ * prb_for_each_info() - Iterate over the meta data of a ringbuffer.
-+ *
-+ * @from: The sequence number to begin with.
-+ * @rb:   The ringbuffer to iterate over.
-+ * @s:    A u64 to store the sequence number on each iteration.
-+ * @i:    A printk_info to store the record meta data on each iteration.
-+ * @lc:   An unsigned int to store the text line count of each record.
-+ *
-+ * This is a macro for conveniently iterating over a ringbuffer.
-+ * Note that @s may not be the sequence number of the record on each
-+ * iteration. For the sequence number, @r->info->seq should be checked.
-+ *
-+ * Context: Any context.
-+ */
-+#define prb_for_each_info(from, rb, s, i, lc) \
-+for ((s) = from; prb_read_valid_info(rb, s, i, lc); (s) = (i)->seq + 1)
-+
-+bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
-+		    struct printk_record *r);
-+bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
-+			 struct printk_info *info, unsigned int *line_count);
-+
-+u64 prb_first_valid_seq(struct printk_ringbuffer *rb);
-+u64 prb_next_seq(struct printk_ringbuffer *rb);
-+
-+#endif /* _KERNEL_PRINTK_RINGBUFFER_H */
++EXPORT_SYMBOL(pr_flush);
 diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
 deleted file mode 100644
-index 50aeae770434..000000000000
+index a0e6f746de6c..000000000000
 --- a/kernel/printk/printk_safe.c
 +++ /dev/null
 @@ -1,414 +0,0 @@
@@ -17677,7 +13680,7 @@ index 50aeae770434..000000000000
 - * is later flushed into the main ring buffer via IRQ work.
 - *
 - * The alternative implementation is chosen transparently
-- * by examinig current printk() context mask stored in @printk_context
+- * by examining current printk() context mask stored in @printk_context
 - * per-CPU variable.
 - *
 - * The implementation allows to flush the strings also from another CPU.
@@ -18030,7 +14033,7 @@ index 50aeae770434..000000000000
 -	    raw_spin_trylock(&logbuf_lock)) {
 -		int len;
 -
--		len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
+-		len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
 -		raw_spin_unlock(&logbuf_lock);
 -		defer_console_output();
 -		return len;
@@ -18068,7 +14071,7 @@ index 50aeae770434..000000000000
 -	printk_safe_flush();
 -}
 diff --git a/kernel/ptrace.c b/kernel/ptrace.c
-index 43d6179508d6..3075006d720e 100644
+index 61db50f7ca86..db33b5240e34 100644
 --- a/kernel/ptrace.c
 +++ b/kernel/ptrace.c
 @@ -180,7 +180,14 @@ static bool ptrace_freeze_traced(struct task_struct *task)
@@ -18087,11 +14090,48 @@ index 43d6179508d6..3075006d720e 100644
  		ret = true;
  	}
  	spin_unlock_irq(&task->sighand->siglock);
+@@ -190,8 +197,8 @@ static bool ptrace_freeze_traced(struct task_struct *task)
+ 
+ static void ptrace_unfreeze_traced(struct task_struct *task)
+ {
+-	if (task->state != __TASK_TRACED)
+-		return;
++	unsigned long flags;
++	bool frozen = true;
+ 
+ 	WARN_ON(!task->ptrace || task->parent != current);
+ 
+@@ -200,12 +207,19 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
+ 	 * Recheck state under the lock to close this race.
+ 	 */
+ 	spin_lock_irq(&task->sighand->siglock);
+-	if (task->state == __TASK_TRACED) {
+-		if (__fatal_signal_pending(task))
+-			wake_up_state(task, __TASK_TRACED);
+-		else
+-			task->state = TASK_TRACED;
+-	}
++
++	raw_spin_lock_irqsave(&task->pi_lock, flags);
++	if (task->state == __TASK_TRACED)
++		task->state = TASK_TRACED;
++	else if (task->saved_state == __TASK_TRACED)
++		task->saved_state = TASK_TRACED;
++	else
++		frozen = false;
++	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++
++	if (frozen && __fatal_signal_pending(task))
++		wake_up_state(task, __TASK_TRACED);
++
+ 	spin_unlock_irq(&task->sighand->siglock);
+ }
+ 
 diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
-index 0ebe15a84985..046974b22b8f 100644
+index cdc57b4f6d48..aa8cc8c977e7 100644
 --- a/kernel/rcu/Kconfig
 +++ b/kernel/rcu/Kconfig
-@@ -186,8 +186,8 @@ config RCU_FAST_NO_HZ
+@@ -188,8 +188,8 @@ config RCU_FAST_NO_HZ
  
  config RCU_BOOST
  	bool "Enable RCU priority boosting"
@@ -18103,10 +14143,10 @@ index 0ebe15a84985..046974b22b8f 100644
  	  This option boosts the priority of preempted RCU readers that
  	  block the current preemptible RCU grace period for too long.
 diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
-index f453bf8d2f1e..a046e0c84db9 100644
+index 528ed10b78fd..e035508dd0f6 100644
 --- a/kernel/rcu/rcutorture.c
 +++ b/kernel/rcu/rcutorture.c
-@@ -74,10 +74,13 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com> and Josh Triplett <josh@
+@@ -61,10 +61,13 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com> and Josh Triplett <josh@
  #define RCUTORTURE_RDR_RBH	 0x08	/*  ... rcu_read_lock_bh(). */
  #define RCUTORTURE_RDR_SCHED	 0x10	/*  ... rcu_read_lock_sched(). */
  #define RCUTORTURE_RDR_RCU	 0x20	/*  ... entering another RCU reader. */
@@ -18122,7 +14162,7 @@ index f453bf8d2f1e..a046e0c84db9 100644
  #define RCUTORTURE_RDR_MAX_LOOPS 0x7	/* Maximum reader extensions. */
  					/* Must be power of two minus one. */
  #define RCUTORTURE_RDR_MAX_SEGS (RCUTORTURE_RDR_MAX_LOOPS + 3)
-@@ -1246,31 +1249,53 @@ static void rcutorture_one_extend(int *readstate, int newstate,
+@@ -1250,31 +1253,53 @@ static void rcutorture_one_extend(int *readstate, int newstate,
  	WARN_ON_ONCE((idxold >> RCUTORTURE_RDR_SHIFT) > 1);
  	rtrsp->rt_readstate = newstate;
  
@@ -18183,7 +14223,7 @@ index f453bf8d2f1e..a046e0c84db9 100644
  	if (statesold & RCUTORTURE_RDR_RCU) {
  		bool lockit = !statesnew && !(torture_random(trsp) & 0xffff);
  
-@@ -1313,6 +1338,12 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
+@@ -1317,6 +1342,12 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
  	int mask = rcutorture_extend_mask_max();
  	unsigned long randmask1 = torture_random(trsp) >> 8;
  	unsigned long randmask2 = randmask1 >> 3;
@@ -18196,7 +14236,7 @@ index f453bf8d2f1e..a046e0c84db9 100644
  
  	WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT);
  	/* Mostly only one bit (need preemption!), sometimes lots of bits. */
-@@ -1320,11 +1351,49 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
+@@ -1324,11 +1355,49 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
  		mask = mask & randmask2;
  	else
  		mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS));
@@ -18252,10 +14292,10 @@ index f453bf8d2f1e..a046e0c84db9 100644
  }
  
 diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
-index f78ee759af9c..367165074e5f 100644
+index 40e5e3dd253e..d60903581300 100644
 --- a/kernel/rcu/tree.c
 +++ b/kernel/rcu/tree.c
-@@ -113,8 +113,10 @@ static struct rcu_state rcu_state = {
+@@ -100,8 +100,10 @@ static struct rcu_state rcu_state = {
  static bool dump_tree;
  module_param(dump_tree, bool, 0444);
  /* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
@@ -18268,10 +14308,10 @@ index f78ee759af9c..367165074e5f 100644
  static bool rcu_fanout_exact;
  module_param(rcu_fanout_exact, bool, 0444);
 diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
-index 2de49b5d8dd2..294f7021a459 100644
+index 39334d2d2b37..b95ae86c40a7 100644
 --- a/kernel/rcu/update.c
 +++ b/kernel/rcu/update.c
-@@ -69,8 +69,10 @@
+@@ -56,8 +56,10 @@
  #ifndef CONFIG_TINY_RCU
  module_param(rcu_expedited, int, 0);
  module_param(rcu_normal, int, 0);
@@ -18284,10 +14324,10 @@ index 2de49b5d8dd2..294f7021a459 100644
  
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index 2d95dc3f4644..2d54f1e7ef86 100644
+index ff74fca39ed2..2c678a0c77ad 100644
 --- a/kernel/sched/core.c
 +++ b/kernel/sched/core.c
-@@ -63,7 +63,11 @@ const_debug unsigned int sysctl_sched_features =
+@@ -64,7 +64,11 @@ const_debug unsigned int sysctl_sched_features =
   * Number of tasks to iterate in a single balance run.
   * Limited because this is done with IRQs disabled.
   */
@@ -18299,7 +14339,7 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  
  /*
   * period over which we measure -rt task CPU usage in us.
-@@ -511,9 +515,15 @@ static bool set_nr_if_polling(struct task_struct *p)
+@@ -504,9 +508,15 @@ static bool set_nr_if_polling(struct task_struct *p)
  #endif
  #endif
  
@@ -18317,7 +14357,7 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  
  	/*
  	 * Atomically grab the task, if ->wake_q is !nil already it means
-@@ -549,7 +559,13 @@ static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
+@@ -542,7 +552,13 @@ static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
   */
  void wake_q_add(struct wake_q_head *head, struct task_struct *task)
  {
@@ -18332,7 +14372,7 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  		get_task_struct(task);
  }
  
-@@ -572,28 +588,39 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
+@@ -565,28 +581,39 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
   */
  void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
  {
@@ -18377,606 +14417,72 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  		put_task_struct(task);
  	}
  }
-@@ -629,6 +656,48 @@ void resched_curr(struct rq *rq)
- 		trace_sched_wake_idle_without_ipi(cpu);
- }
- 
-+#ifdef CONFIG_PREEMPT_LAZY
-+
-+static int tsk_is_polling(struct task_struct *p)
-+{
-+#ifdef TIF_POLLING_NRFLAG
-+	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
-+#else
-+	return 0;
-+#endif
-+}
-+
-+void resched_curr_lazy(struct rq *rq)
-+{
-+	struct task_struct *curr = rq->curr;
-+	int cpu;
-+
-+	if (!sched_feat(PREEMPT_LAZY)) {
-+		resched_curr(rq);
-+		return;
-+	}
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	if (test_tsk_need_resched(curr))
-+		return;
-+
-+	if (test_tsk_need_resched_lazy(curr))
-+		return;
-+
-+	set_tsk_need_resched_lazy(curr);
-+
-+	cpu = cpu_of(rq);
-+	if (cpu == smp_processor_id())
-+		return;
-+
-+	/* NEED_RESCHED_LAZY must be visible before we test polling */
-+	smp_mb();
-+	if (!tsk_is_polling(curr))
-+		smp_send_reschedule(cpu);
-+}
-+#endif
-+
- void resched_cpu(int cpu)
- {
- 	struct rq *rq = cpu_rq(cpu);
-@@ -1700,6 +1769,86 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
- 
- #ifdef CONFIG_SMP
- 
-+#ifdef CONFIG_PREEMPT_RT
-+
-+static void
-+__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags);
-+
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask,
-+				  u32 flags);
-+
-+static void migrate_disable_switch(struct rq *rq, struct task_struct *p)
-+{
-+	if (likely(!p->migration_disabled))
-+		return;
-+
-+	if (p->cpus_ptr != &p->cpus_mask)
-+		return;
-+
-+	/*
-+	 * Violates locking rules! see comment in __do_set_cpus_allowed().
-+	 */
-+	__do_set_cpus_allowed(p, cpumask_of(rq->cpu), SCA_MIGRATE_DISABLE);
-+}
-+
-+void migrate_disable(void)
-+{
-+	struct task_struct *p = current;
-+
-+	if (p->migration_disabled) {
-+		p->migration_disabled++;
-+		return;
-+	}
-+
-+	trace_sched_migrate_disable_tp(p);
-+
-+	preempt_disable();
-+	this_rq()->nr_pinned++;
-+	p->migration_disabled = 1;
-+	preempt_lazy_disable();
-+	preempt_enable();
-+}
-+EXPORT_SYMBOL_GPL(migrate_disable);
-+
-+void migrate_enable(void)
-+{
-+	struct task_struct *p = current;
-+
-+	if (p->migration_disabled > 1) {
-+		p->migration_disabled--;
-+		return;
-+	}
-+
-+	/*
-+	 * Ensure stop_task runs either before or after this, and that
-+	 * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
-+	 */
-+	preempt_disable();
-+	if (p->cpus_ptr != &p->cpus_mask)
-+		__set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE);
-+	/*
-+	 * Mustn't clear migration_disabled() until cpus_ptr points back at the
-+	 * regular cpus_mask, otherwise things that race (eg.
-+	 * select_fallback_rq) get confused.
-+	 */
-+	barrier();
-+	p->migration_disabled = 0;
-+	this_rq()->nr_pinned--;
-+	preempt_lazy_enable();
-+	preempt_enable();
-+
-+	trace_sched_migrate_enable_tp(p);
-+}
-+EXPORT_SYMBOL_GPL(migrate_enable);
-+
-+static inline bool rq_has_pinned_tasks(struct rq *rq)
-+{
-+	return rq->nr_pinned;
-+}
-+
-+#endif
-+
- /*
-  * Per-CPU kthreads are allowed to run on !active && online CPUs, see
-  * __set_cpus_allowed_ptr() and select_fallback_rq().
-@@ -1709,7 +1858,7 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
- 	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
- 		return false;
- 
--	if (is_per_cpu_kthread(p))
-+	if (is_per_cpu_kthread(p) || is_migration_disabled(p))
- 		return cpu_online(cpu);
- 
- 	return cpu_active(cpu);
-@@ -1756,6 +1905,7 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
- struct migration_arg {
- 	struct task_struct *task;
- 	int dest_cpu;
-+	struct completion *done;
- };
- 
- /*
-@@ -1790,6 +1940,7 @@ static int migration_cpu_stop(void *data)
- 	struct migration_arg *arg = data;
- 	struct task_struct *p = arg->task;
- 	struct rq *rq = this_rq();
-+	bool complete = false;
- 	struct rq_flags rf;
- 
- 	/*
-@@ -1812,15 +1963,70 @@ static int migration_cpu_stop(void *data)
- 	 * we're holding p->pi_lock.
- 	 */
- 	if (task_rq(p) == rq) {
-+		if (is_migration_disabled(p))
-+			goto out;
-+
- 		if (task_on_rq_queued(p))
- 			rq = __migrate_task(rq, &rf, p, arg->dest_cpu);
- 		else
- 			p->wake_cpu = arg->dest_cpu;
-+
-+		if (arg->done) {
-+			p->migration_pending = NULL;
-+			complete = true;
-+		}
- 	}
-+out:
- 	rq_unlock(rq, &rf);
- 	raw_spin_unlock(&p->pi_lock);
--
- 	local_irq_enable();
-+
-+	if (complete)
-+		complete_all(arg->done);
-+
-+	return 0;
-+}
-+
-+int push_cpu_stop(void *arg)
-+{
-+	struct rq *lowest_rq = NULL, *rq = this_rq();
-+	struct task_struct *p = arg;
-+
-+	raw_spin_lock_irq(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+
-+	if (task_rq(p) != rq)
-+		goto out_unlock;
-+
-+	if (is_migration_disabled(p)) {
-+		p->migration_flags |= MDF_PUSH;
-+		goto out_unlock;
-+	}
-+
-+	p->migration_flags &= ~MDF_PUSH;
-+
-+	if (p->sched_class->find_lock_rq)
-+		lowest_rq = p->sched_class->find_lock_rq(p, rq);
-+
-+	if (!lowest_rq)
-+		goto out_unlock;
-+
-+	// XXX validate p is still the highest prio task
-+	if (task_rq(p) == rq) {
-+		deactivate_task(rq, p, 0);
-+		set_task_cpu(p, lowest_rq->cpu);
-+		activate_task(lowest_rq, p, 0);
-+		resched_curr(lowest_rq);
-+	}
-+
-+	double_unlock_balance(rq, lowest_rq);
-+
-+out_unlock:
-+	rq->push_busy = false;
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irq(&p->pi_lock);
-+
-+	put_task_struct(p);
- 	return 0;
- }
- 
-@@ -1828,18 +2034,39 @@ static int migration_cpu_stop(void *data)
-  * sched_class::set_cpus_allowed must do the below, but is not required to
-  * actually call this function.
-  */
--void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
-+void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags)
- {
-+	if (flags & (SCA_MIGRATE_ENABLE | SCA_MIGRATE_DISABLE)) {
-+		p->cpus_ptr = new_mask;
-+		return;
-+	}
-+
- 	cpumask_copy(&p->cpus_mask, new_mask);
- 	p->nr_cpus_allowed = cpumask_weight(new_mask);
- }
- 
--void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+static void
-+__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags)
- {
- 	struct rq *rq = task_rq(p);
- 	bool queued, running;
- 
--	lockdep_assert_held(&p->pi_lock);
-+	/*
-+	 * This here violates the locking rules for affinity, since we're only
-+	 * supposed to change these variables while holding both rq->lock and
-+	 * p->pi_lock.
-+	 *
-+	 * HOWEVER, it magically works, because ttwu() is the only code that
-+	 * accesses these variables under p->pi_lock and only does so after
-+	 * smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule()
-+	 * before finish_task().
-+	 *
-+	 * XXX do further audits, this smells like something putrid.
-+	 */
-+	if (flags & SCA_MIGRATE_DISABLE)
-+		SCHED_WARN_ON(!p->on_cpu);
-+	else
-+		lockdep_assert_held(&p->pi_lock);
- 
- 	queued = task_on_rq_queued(p);
- 	running = task_current(rq, p);
-@@ -1855,7 +2082,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
- 	if (running)
- 		put_prev_task(rq, p);
- 
--	p->sched_class->set_cpus_allowed(p, new_mask);
-+	p->sched_class->set_cpus_allowed(p, new_mask, flags);
- 
- 	if (queued)
- 		enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
-@@ -1863,6 +2090,208 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
- 		set_next_task(rq, p);
- }
+@@ -622,6 +649,48 @@ void resched_curr(struct rq *rq)
+ 		trace_sched_wake_idle_without_ipi(cpu);
+ }
  
-+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
++#ifdef CONFIG_PREEMPT_LAZY
++
++static int tsk_is_polling(struct task_struct *p)
 +{
-+	__do_set_cpus_allowed(p, new_mask, 0);
++#ifdef TIF_POLLING_NRFLAG
++	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
++#else
++	return 0;
++#endif
 +}
 +
-+struct set_affinity_pending {
-+	refcount_t		refs;
-+	struct completion	done;
-+	struct cpu_stop_work	stop_work;
-+	struct migration_arg	arg;
-+};
-+
-+/*
-+ * This function is wildly self concurrent; here be dragons.
-+ *
-+ *
-+ * When given a valid mask, __set_cpus_allowed_ptr() must block until the
-+ * designated task is enqueued on an allowed CPU. If that task is currently
-+ * running, we have to kick it out using the CPU stopper.
-+ *
-+ * Migrate-Disable comes along and tramples all over our nice sandcastle.
-+ * Consider:
-+ *
-+ *     Initial conditions: P0->cpus_mask = [0, 1]
-+ *
-+ *     P0@CPU0                  P1
-+ *
-+ *     migrate_disable();
-+ *     <preempted>
-+ *                              set_cpus_allowed_ptr(P0, [1]);
-+ *
-+ * P1 *cannot* return from this set_cpus_allowed_ptr() call until P0 executes
-+ * its outermost migrate_enable() (i.e. it exits its Migrate-Disable region).
-+ * This means we need the following scheme:
-+ *
-+ *     P0@CPU0                  P1
-+ *
-+ *     migrate_disable();
-+ *     <preempted>
-+ *                              set_cpus_allowed_ptr(P0, [1]);
-+ *                                <blocks>
-+ *     <resumes>
-+ *     migrate_enable();
-+ *       __set_cpus_allowed_ptr();
-+ *       <wakes local stopper>
-+ *                         `--> <woken on migration completion>
-+ *
-+ * Now the fun stuff: there may be several P1-like tasks, i.e. multiple
-+ * concurrent set_cpus_allowed_ptr(P0, [*]) calls. CPU affinity changes of any
-+ * task p are serialized by p->pi_lock, which we can leverage: the one that
-+ * should come into effect at the end of the Migrate-Disable region is the last
-+ * one. This means we only need to track a single cpumask (i.e. p->cpus_mask),
-+ * but we still need to properly signal those waiting tasks at the appropriate
-+ * moment.
-+ *
-+ * This is implemented using struct set_affinity_pending. The first
-+ * __set_cpus_allowed_ptr() caller within a given Migrate-Disable region will
-+ * setup an instance of that struct and install it on the targeted task_struct.
-+ * Any and all further callers will reuse that instance. Those then wait for
-+ * a completion signaled at the tail of the CPU stopper callback (1), triggered
-+ * on the end of the Migrate-Disable region (i.e. outermost migrate_enable()).
-+ *
-+ *
-+ * (1) In the cases covered above. There is one more where the completion is
-+ * signaled within affine_move_task() itself: when a subsequent affinity request
-+ * cancels the need for an active migration. Consider:
-+ *
-+ *     Initial conditions: P0->cpus_mask = [0, 1]
-+ *
-+ *     P0@CPU0            P1                             P2
-+ *
-+ *     migrate_disable();
-+ *     <preempted>
-+ *                        set_cpus_allowed_ptr(P0, [1]);
-+ *                          <blocks>
-+ *                                                       set_cpus_allowed_ptr(P0, [0, 1]);
-+ *                                                         <signal completion>
-+ *                          <awakes>
-+ *
-+ * Note that the above is safe vs a concurrent migrate_enable(), as any
-+ * pending affinity completion is preceded an uninstallion of
-+ * p->migration_pending done with p->pi_lock held.
-+ */
-+static int affine_move_task(struct rq *rq, struct rq_flags *rf,
-+			    struct task_struct *p, int dest_cpu, unsigned int flags)
++void resched_curr_lazy(struct rq *rq)
 +{
-+	struct set_affinity_pending my_pending = { }, *pending = NULL;
-+	struct migration_arg arg = {
-+		.task = p,
-+		.dest_cpu = dest_cpu,
-+	};
-+	bool complete = false;
-+
-+	/* Can the task run on the task's current CPU? If so, we're done */
-+	if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
-+		struct task_struct *push_task = NULL;
-+
-+		if ((flags & SCA_MIGRATE_ENABLE) &&
-+		    (p->migration_flags & MDF_PUSH) && !rq->push_busy) {
-+			rq->push_busy = true;
-+			push_task = get_task_struct(p);
-+		}
-+
-+		pending = p->migration_pending;
-+		if (pending) {
-+			refcount_inc(&pending->refs);
-+			p->migration_pending = NULL;
-+			complete = true;
-+		}
-+		task_rq_unlock(rq, p, rf);
-+
-+		if (push_task) {
-+			stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
-+					    p, &rq->push_work);
-+		}
-+
-+		if (complete)
-+			goto do_complete;
-+
-+		return 0;
-+	}
-+
-+	if (!(flags & SCA_MIGRATE_ENABLE)) {
-+		/* serialized by p->pi_lock */
-+		if (!p->migration_pending) {
-+			/* Install the request */
-+			refcount_set(&my_pending.refs, 1);
-+			init_completion(&my_pending.done);
-+			p->migration_pending = &my_pending;
-+		} else {
-+			pending = p->migration_pending;
-+			refcount_inc(&pending->refs);
-+		}
-+	}
-+	pending = p->migration_pending;
-+	/*
-+	 * - !MIGRATE_ENABLE:
-+	 *   we'll have installed a pending if there wasn't one already.
-+	 *
-+	 * - MIGRATE_ENABLE:
-+	 *   we're here because the current CPU isn't matching anymore,
-+	 *   the only way that can happen is because of a concurrent
-+	 *   set_cpus_allowed_ptr() call, which should then still be
-+	 *   pending completion.
-+	 *
-+	 * Either way, we really should have a @pending here.
-+	 */
-+	if (WARN_ON_ONCE(!pending))
-+		return -EINVAL;
-+
-+	arg.done = &pending->done;
-+
-+	if (flags & SCA_MIGRATE_ENABLE) {
-+
-+		p->migration_flags &= ~MDF_PUSH;
-+		task_rq_unlock(rq, p, rf);
-+		pending->arg = arg;
-+		stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
-+				    &pending->arg, &pending->stop_work);
++	struct task_struct *curr = rq->curr;
++	int cpu;
 +
-+		return 0;
++	if (!sched_feat(PREEMPT_LAZY)) {
++		resched_curr(rq);
++		return;
 +	}
 +
-+	if (task_running(rq, p) || p->state == TASK_WAKING) {
-+		/*
-+		 * Lessen races (and headaches) by delegating
-+		 * is_migration_disabled(p) checks to the stopper, which will
-+		 * run on the same CPU as said p.
-+		 */
-+		task_rq_unlock(rq, p, rf);
-+		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
-+
-+	} else {
-+
-+		if (!is_migration_disabled(p)) {
-+			if (task_on_rq_queued(p))
-+				rq = move_queued_task(rq, rf, p, dest_cpu);
-+
-+			p->migration_pending = NULL;
-+			complete = true;
-+		}
-+		task_rq_unlock(rq, p, rf);
++	lockdep_assert_held(&rq->lock);
 +
-+do_complete:
-+		if (complete)
-+			complete_all(&pending->done);
-+	}
++	if (test_tsk_need_resched(curr))
++		return;
 +
-+	wait_for_completion(&pending->done);
++	if (test_tsk_need_resched_lazy(curr))
++		return;
 +
-+	if (refcount_dec_and_test(&pending->refs))
-+		wake_up_var(&pending->refs);
++	set_tsk_need_resched_lazy(curr);
 +
-+	/*
-+	 * Block the original owner of &pending until all subsequent callers
-+	 * have seen the completion and decremented the refcount
-+	 */
-+	wait_var_event(&my_pending.refs, !refcount_read(&my_pending.refs));
++	cpu = cpu_of(rq);
++	if (cpu == smp_processor_id())
++		return;
 +
-+	return 0;
++	/* NEED_RESCHED_LAZY must be visible before we test polling */
++	smp_mb();
++	if (!tsk_is_polling(curr))
++		smp_send_reschedule(cpu);
 +}
++#endif
 +
- /*
-  * Change a given task's CPU affinity. Migrate the thread to a
-  * proper CPU and schedule it away if the CPU it's executing on
-@@ -1873,7 +2302,8 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-  * call is not atomic; no spinlocks may be held.
-  */
- static int __set_cpus_allowed_ptr(struct task_struct *p,
--				  const struct cpumask *new_mask, bool check)
-+				  const struct cpumask *new_mask,
-+				  u32 flags)
- {
- 	const struct cpumask *cpu_valid_mask = cpu_active_mask;
- 	unsigned int dest_cpu;
-@@ -1884,9 +2314,14 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
- 	rq = task_rq_lock(p, &rf);
- 	update_rq_clock(rq);
- 
--	if (p->flags & PF_KTHREAD) {
-+	if (p->flags & PF_KTHREAD || is_migration_disabled(p)) {
- 		/*
--		 * Kernel threads are allowed on online && !active CPUs
-+		 * Kernel threads are allowed on online && !active CPUs.
-+		 *
-+		 * Specifically, migration_disabled() tasks must not fail the
-+		 * cpumask_any_and_distribute() pick below, esp. so on
-+		 * SCA_MIGRATE_ENABLE, otherwise we'll not call
-+		 * set_cpus_allowed_common() and actually reset p->cpus_ptr.
- 		 */
- 		cpu_valid_mask = cpu_online_mask;
- 	}
-@@ -1895,13 +2330,22 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
- 	 * Must re-check here, to close a race against __kthread_bind(),
- 	 * sched_setaffinity() is not guaranteed to observe the flag.
- 	 */
--	if (check && (p->flags & PF_NO_SETAFFINITY)) {
-+	if ((flags & SCA_CHECK) && (p->flags & PF_NO_SETAFFINITY)) {
- 		ret = -EINVAL;
- 		goto out;
- 	}
- 
--	if (cpumask_equal(&p->cpus_mask, new_mask))
--		goto out;
-+	if (!(flags & SCA_MIGRATE_ENABLE)) {
-+		if (cpumask_equal(&p->cpus_mask, new_mask))
-+			goto out;
-+
-+		if (WARN_ON_ONCE(p == current &&
-+				 is_migration_disabled(p) &&
-+				 !cpumask_test_cpu(task_cpu(p), new_mask))) {
-+			ret = -EBUSY;
-+			goto out;
-+		}
-+	}
- 
- 	/*
- 	 * Picking a ~random cpu helps in cases where we are changing affinity
-@@ -1914,7 +2358,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
- 		goto out;
- 	}
- 
--	do_set_cpus_allowed(p, new_mask);
-+	__do_set_cpus_allowed(p, new_mask, flags);
- 
- 	if (p->flags & PF_KTHREAD) {
- 		/*
-@@ -1926,23 +2370,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
- 			p->nr_cpus_allowed != 1);
- 	}
- 
--	/* Can the task run on the task's current CPU? If so, we're done */
--	if (cpumask_test_cpu(task_cpu(p), new_mask))
--		goto out;
-+	return affine_move_task(rq, &rf, p, dest_cpu, flags);
- 
--	if (task_running(rq, p) || p->state == TASK_WAKING) {
--		struct migration_arg arg = { p, dest_cpu };
--		/* Need help from migration thread: drop lock and wait. */
--		task_rq_unlock(rq, p, &rf);
--		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
--		return 0;
--	} else if (task_on_rq_queued(p)) {
--		/*
--		 * OK, since we're going to drop the lock immediately
--		 * afterwards anyway.
--		 */
--		rq = move_queued_task(rq, &rf, p, dest_cpu);
--	}
- out:
- 	task_rq_unlock(rq, p, &rf);
- 
-@@ -1951,7 +2380,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
- 
- int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+ void resched_cpu(int cpu)
  {
--	return __set_cpus_allowed_ptr(p, new_mask, false);
-+	return __set_cpus_allowed_ptr(p, new_mask, 0);
+ 	struct rq *rq = cpu_rq(cpu);
+@@ -1753,6 +1822,7 @@ void migrate_disable(void)
+ 	preempt_disable();
+ 	this_rq()->nr_pinned++;
+ 	p->migration_disabled = 1;
++	preempt_lazy_disable();
+ 	preempt_enable();
  }
- EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
- 
-@@ -1992,6 +2421,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
- 	 * Clearly, migrating tasks to offline CPUs is a fairly daft thing.
- 	 */
- 	WARN_ON_ONCE(!cpu_online(new_cpu));
-+
-+	WARN_ON_ONCE(is_migration_disabled(p));
- #endif
- 
- 	trace_sched_migrate_task(p, new_cpu);
-@@ -2124,6 +2555,18 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
+ EXPORT_SYMBOL_GPL(migrate_disable);
+@@ -1781,6 +1851,7 @@ void migrate_enable(void)
+ 	barrier();
+ 	p->migration_disabled = 0;
+ 	this_rq()->nr_pinned--;
++	preempt_lazy_enable();
+ 	preempt_enable();
+ }
+ EXPORT_SYMBOL_GPL(migrate_enable);
+@@ -2573,6 +2644,18 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
  }
  #endif /* CONFIG_NUMA_BALANCING */
  
@@ -18995,7 +14501,7 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  /*
   * wait_task_inactive - wait for a thread to unschedule.
   *
-@@ -2168,7 +2611,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
+@@ -2617,7 +2700,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
  		 * is actually now running somewhere else!
  		 */
  		while (task_running(rq, p)) {
@@ -19004,7 +14510,7 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  				return 0;
  			cpu_relax();
  		}
-@@ -2183,7 +2626,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
+@@ -2632,7 +2715,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
  		running = task_running(rq, p);
  		queued = task_on_rq_queued(p);
  		ncsw = 0;
@@ -19014,85 +14520,7 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
  		task_rq_unlock(rq, p, &rf);
  
-@@ -2322,6 +2766,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
- 			}
- 			fallthrough;
- 		case possible:
-+			/*
-+			 * XXX When called from select_task_rq() we only
-+			 * hold p->pi_lock and again violate locking order.
-+			 *
-+			 * More yuck to audit.
-+			 */
- 			do_set_cpus_allowed(p, cpu_possible_mask);
- 			state = fail;
- 			break;
-@@ -2356,7 +2806,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
- {
- 	lockdep_assert_held(&p->pi_lock);
- 
--	if (p->nr_cpus_allowed > 1)
-+	if (p->nr_cpus_allowed > 1 && !is_migration_disabled(p))
- 		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
- 	else
- 		cpu = cpumask_any(p->cpus_ptr);
-@@ -2379,6 +2829,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
- 
- void sched_set_stop_task(int cpu, struct task_struct *stop)
- {
-+	static struct lock_class_key stop_pi_lock;
- 	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
- 	struct task_struct *old_stop = cpu_rq(cpu)->stop;
- 
-@@ -2394,6 +2845,20 @@ void sched_set_stop_task(int cpu, struct task_struct *stop)
- 		sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
- 
- 		stop->sched_class = &stop_sched_class;
-+
-+		/*
-+		 * The PI code calls rt_mutex_setprio() with ->pi_lock held to
-+		 * adjust the effective priority of a task. As a result,
-+		 * rt_mutex_setprio() can trigger (RT) balancing operations,
-+		 * which can then trigger wakeups of the stop thread to push
-+		 * around the current task.
-+		 *
-+		 * The stop task itself will never be part of the PI-chain, it
-+		 * never blocks, therefore that ->pi_lock recursion is safe.
-+		 * Tell lockdep about this by placing the stop->pi_lock in its
-+		 * own class.
-+		 */
-+		lockdep_set_class(&stop->pi_lock, &stop_pi_lock);
- 	}
- 
- 	cpu_rq(cpu)->stop = stop;
-@@ -2410,13 +2875,25 @@ void sched_set_stop_task(int cpu, struct task_struct *stop)
- #else
- 
- static inline int __set_cpus_allowed_ptr(struct task_struct *p,
--					 const struct cpumask *new_mask, bool check)
-+					 const struct cpumask *new_mask,
-+					 u32 flags)
- {
- 	return set_cpus_allowed_ptr(p, new_mask);
- }
- 
- #endif /* CONFIG_SMP */
- 
-+#if !defined(CONFIG_SMP) || !defined(CONFIG_PREEMPT_RT)
-+
-+static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { }
-+
-+static inline bool rq_has_pinned_tasks(struct rq *rq)
-+{
-+	return false;
-+}
-+
-+#endif
-+
- static void
- ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
- {
-@@ -2828,7 +3305,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
+@@ -3318,7 +3402,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
  	int cpu, success = 0;
  
  	preempt_disable();
@@ -19101,7 +14529,7 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  		/*
  		 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
  		 * == smp_processor_id()'. Together this means we can special
-@@ -2858,8 +3335,26 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
+@@ -3348,8 +3432,26 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
  	 */
  	raw_spin_lock_irqsave(&p->pi_lock, flags);
  	smp_mb__after_spinlock();
@@ -19118,157 +14546,47 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
 +				success = 1;
 +			}
 +		}
- 		goto unlock;
-+	}
-+	/*
-+	 * If this is a regular wakeup, then we can unconditionally
-+	 * clear the saved state of a "lock sleeper".
-+	 */
-+	if (!(wake_flags & WF_LOCK_SLEEPER))
-+		p->saved_state = TASK_RUNNING;
- 
- 	trace_sched_waking(p);
- 
-@@ -3041,13 +3536,25 @@ bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct t
-  *
-  * Return: 1 if the process was woken up, 0 if it was already running.
-  *
-- * This function executes a full memory barrier before accessing the task state.
-+ * This function executes a full memory barrier before accessing the task state.
-+ */
-+int wake_up_process(struct task_struct *p)
-+{
-+	return try_to_wake_up(p, TASK_NORMAL, 0);
-+}
-+EXPORT_SYMBOL(wake_up_process);
-+
-+/**
-+ * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
-+ * @p: The process to be woken up.
-+ *
-+ * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
-+ * the nature of the wakeup.
-  */
--int wake_up_process(struct task_struct *p)
-+int wake_up_lock_sleeper(struct task_struct *p)
- {
--	return try_to_wake_up(p, TASK_NORMAL, 0);
-+	return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER);
- }
--EXPORT_SYMBOL(wake_up_process);
- 
- int wake_up_state(struct task_struct *p, unsigned int state)
- {
-@@ -3295,6 +3802,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
- 	p->on_cpu = 0;
- #endif
- 	init_task_preempt_count(p);
-+#ifdef CONFIG_HAVE_PREEMPT_LAZY
-+	task_thread_info(p)->preempt_lazy_count = 0;
-+#endif
- #ifdef CONFIG_SMP
- 	plist_node_init(&p->pushable_tasks, MAX_PRIO);
- 	RB_CLEAR_NODE(&p->pushable_dl_tasks);
-@@ -3489,6 +3999,90 @@ static inline void finish_task(struct task_struct *prev)
- #endif
- }
- 
-+#ifdef CONFIG_SMP
-+
-+static void do_balance_callbacks(struct rq *rq, struct callback_head *head)
-+{
-+	void (*func)(struct rq *rq);
-+	struct callback_head *next;
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	while (head) {
-+		func = (void (*)(struct rq *))head->func;
-+		next = head->next;
-+		head->next = NULL;
-+		head = next;
-+
-+		func(rq);
-+	}
-+}
-+
-+static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
-+{
-+	struct callback_head *head = rq->balance_callback;
-+
-+	lockdep_assert_held(&rq->lock);
-+	if (head) {
-+		rq->balance_callback = NULL;
-+		rq->balance_flags &= ~BALANCE_WORK;
-+	}
-+
-+	return head;
-+}
-+
-+static void __balance_callbacks(struct rq *rq)
-+{
-+	do_balance_callbacks(rq, splice_balance_callbacks(rq));
-+}
-+
-+static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
-+{
-+	unsigned long flags;
-+
-+	if (unlikely(head)) {
-+		raw_spin_lock_irqsave(&rq->lock, flags);
-+		do_balance_callbacks(rq, head);
-+		raw_spin_unlock_irqrestore(&rq->lock, flags);
-+	}
-+}
-+
-+static void balance_push(struct rq *rq);
-+
-+static inline void balance_switch(struct rq *rq)
-+{
-+	if (likely(!rq->balance_flags))
-+		return;
-+
-+	if (rq->balance_flags & BALANCE_PUSH) {
-+		balance_push(rq);
-+		return;
-+	}
-+
-+	__balance_callbacks(rq);
-+}
-+
-+#else
-+
-+static inline void __balance_callbacks(struct rq *rq)
-+{
-+}
-+
-+static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
-+{
-+	return NULL;
-+}
-+
-+static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
-+{
-+}
-+
-+static inline void balance_switch(struct rq *rq)
+ 		goto unlock;
++	}
++	/*
++	 * If this is a regular wakeup, then we can unconditionally
++	 * clear the saved state of a "lock sleeper".
++	 */
++	if (!(wake_flags & WF_LOCK_SLEEPER))
++		p->saved_state = TASK_RUNNING;
+ 
+ 	trace_sched_waking(p);
+ 
+@@ -3539,6 +3641,18 @@ int wake_up_process(struct task_struct *p)
+ }
+ EXPORT_SYMBOL(wake_up_process);
+ 
++/**
++ * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock"
++ * @p: The process to be woken up.
++ *
++ * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate
++ * the nature of the wakeup.
++ */
++int wake_up_lock_sleeper(struct task_struct *p)
 +{
++	return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER);
 +}
 +
-+#endif
-+
- static inline void
- prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
+ int wake_up_state(struct task_struct *p, unsigned int state)
  {
-@@ -3514,6 +4108,7 @@ static inline void finish_lock_switch(struct rq *rq)
- 	 * prev into current:
- 	 */
- 	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-+	balance_switch(rq);
- 	raw_spin_unlock_irq(&rq->lock);
- }
- 
-@@ -3631,23 +4226,18 @@ static struct rq *finish_task_switch(struct task_struct *prev)
+ 	return try_to_wake_up(p, state, 0);
+@@ -3786,6 +3900,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
+ 	p->on_cpu = 0;
+ #endif
+ 	init_task_preempt_count(p);
++#ifdef CONFIG_HAVE_PREEMPT_LAZY
++	task_thread_info(p)->preempt_lazy_count = 0;
++#endif
+ #ifdef CONFIG_SMP
+ 	plist_node_init(&p->pushable_tasks, MAX_PRIO);
+ 	RB_CLEAR_NODE(&p->pushable_dl_tasks);
+@@ -4218,23 +4335,18 @@ static struct rq *finish_task_switch(struct task_struct *prev)
  	 *   provided by mmdrop(),
  	 * - a sync_core for SYNC_CORE.
  	 */
@@ -19297,59 +14615,7 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  		put_task_struct_rcu_user(prev);
  	}
  
-@@ -3655,43 +4245,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
- 	return rq;
- }
- 
--#ifdef CONFIG_SMP
--
--/* rq->lock is NOT held, but preemption is disabled */
--static void __balance_callback(struct rq *rq)
--{
--	struct callback_head *head, *next;
--	void (*func)(struct rq *rq);
--	unsigned long flags;
--
--	raw_spin_lock_irqsave(&rq->lock, flags);
--	head = rq->balance_callback;
--	rq->balance_callback = NULL;
--	while (head) {
--		func = (void (*)(struct rq *))head->func;
--		next = head->next;
--		head->next = NULL;
--		head = next;
--
--		func(rq);
--	}
--	raw_spin_unlock_irqrestore(&rq->lock, flags);
--}
--
--static inline void balance_callback(struct rq *rq)
--{
--	if (unlikely(rq->balance_callback))
--		__balance_callback(rq);
--}
--
--#else
--
--static inline void balance_callback(struct rq *rq)
--{
--}
--
--#endif
--
- /**
-  * schedule_tail - first thing a freshly forked thread must call.
-  * @prev: the thread we just switched away from.
-@@ -3711,7 +4264,6 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
- 	 */
- 
- 	rq = finish_task_switch(prev);
--	balance_callback(rq);
- 	preempt_enable();
- 
- 	if (current->set_child_tid)
-@@ -4406,7 +4958,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+@@ -4956,7 +5068,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
   *
   * WARNING: must be called with preemption disabled!
   */
@@ -19358,7 +14624,7 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  {
  	struct task_struct *prev, *next;
  	unsigned long *switch_count;
-@@ -4459,7 +5011,7 @@ static void __sched notrace __schedule(bool preempt)
+@@ -5009,7 +5121,7 @@ static void __sched notrace __schedule(bool preempt)
  	 *  - ptrace_{,un}freeze_traced() can change ->state underneath us.
  	 */
  	prev_state = prev->state;
@@ -19367,7 +14633,7 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  		if (signal_pending_state(prev_state, prev)) {
  			prev->state = TASK_RUNNING;
  		} else {
-@@ -4494,6 +5046,7 @@ static void __sched notrace __schedule(bool preempt)
+@@ -5044,6 +5156,7 @@ static void __sched notrace __schedule(bool preempt)
  
  	next = pick_next_task(rq, prev, &rf);
  	clear_tsk_need_resched(prev);
@@ -19375,30 +14641,7 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  	clear_preempt_need_resched();
  
  	if (likely(prev != next)) {
-@@ -4519,6 +5072,7 @@ static void __sched notrace __schedule(bool preempt)
- 		 */
- 		++*switch_count;
- 
-+		migrate_disable_switch(rq, prev);
- 		psi_sched_switch(prev, next, !task_on_rq_queued(prev));
- 
- 		trace_sched_switch(preempt, prev, next);
-@@ -4527,10 +5081,11 @@ static void __sched notrace __schedule(bool preempt)
- 		rq = context_switch(rq, prev, next, &rf);
- 	} else {
- 		rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
--		rq_unlock_irq(rq, &rf);
--	}
- 
--	balance_callback(rq);
-+		rq_unpin_lock(rq, &rf);
-+		__balance_callbacks(rq);
-+		raw_spin_unlock_irq(&rq->lock);
-+	}
- }
- 
- void __noreturn do_task_dead(void)
-@@ -4541,7 +5096,7 @@ void __noreturn do_task_dead(void)
+@@ -5093,7 +5206,7 @@ void __noreturn do_task_dead(void)
  	/* Tell freezer to ignore us: */
  	current->flags |= PF_NOFREEZE;
  
@@ -19407,7 +14650,7 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  	BUG();
  
  	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
-@@ -4571,9 +5126,6 @@ static inline void sched_submit_work(struct task_struct *tsk)
+@@ -5126,9 +5239,6 @@ static inline void sched_submit_work(struct task_struct *tsk)
  		preempt_enable_no_resched();
  	}
  
@@ -19417,7 +14660,7 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  	/*
  	 * If we are going to sleep and we have plugged IO queued,
  	 * make sure to submit it to avoid deadlocks.
-@@ -4599,7 +5151,7 @@ asmlinkage __visible void __sched schedule(void)
+@@ -5154,7 +5264,7 @@ asmlinkage __visible void __sched schedule(void)
  	sched_submit_work(tsk);
  	do {
  		preempt_disable();
@@ -19426,7 +14669,7 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  		sched_preempt_enable_no_resched();
  	} while (need_resched());
  	sched_update_worker(tsk);
-@@ -4627,7 +5179,7 @@ void __sched schedule_idle(void)
+@@ -5182,7 +5292,7 @@ void __sched schedule_idle(void)
  	 */
  	WARN_ON_ONCE(current->state);
  	do {
@@ -19435,7 +14678,7 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  	} while (need_resched());
  }
  
-@@ -4680,7 +5232,7 @@ static void __sched notrace preempt_schedule_common(void)
+@@ -5235,7 +5345,7 @@ static void __sched notrace preempt_schedule_common(void)
  		 */
  		preempt_disable_notrace();
  		preempt_latency_start(1);
@@ -19444,7 +14687,7 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  		preempt_latency_stop(1);
  		preempt_enable_no_resched_notrace();
  
-@@ -4691,6 +5243,30 @@ static void __sched notrace preempt_schedule_common(void)
+@@ -5246,6 +5356,30 @@ static void __sched notrace preempt_schedule_common(void)
  	} while (need_resched());
  }
  
@@ -19475,7 +14718,7 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  #ifdef CONFIG_PREEMPTION
  /*
   * This is the entry point to schedule() from in-kernel preemption
-@@ -4704,12 +5280,26 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
+@@ -5259,12 +5393,26 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
  	 */
  	if (likely(!preemptible()))
  		return;
@@ -19503,437 +14746,54 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  /**
   * preempt_schedule_notrace - preempt_schedule called by tracing
   *
-@@ -4731,6 +5321,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
- 	if (likely(!preemptible()))
- 		return;
- 
-+	if (!preemptible_lazy())
-+		return;
-+
- 	do {
- 		/*
- 		 * Because the function tracer can trace preempt_count_sub()
-@@ -4753,7 +5346,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
- 		 * an infinite recursion.
- 		 */
- 		prev_ctx = exception_enter();
--		__schedule(true);
-+		__schedule(true, false);
- 		exception_exit(prev_ctx);
- 
- 		preempt_latency_stop(1);
-@@ -4782,7 +5375,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
- 	do {
- 		preempt_disable();
- 		local_irq_enable();
--		__schedule(true);
-+		__schedule(true, false);
- 		local_irq_disable();
- 		sched_preempt_enable_no_resched();
- 	} while (need_resched());
-@@ -4938,9 +5531,11 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
- out_unlock:
- 	/* Avoid rq from going away on us: */
- 	preempt_disable();
--	__task_rq_unlock(rq, &rf);
- 
--	balance_callback(rq);
-+	rq_unpin_lock(rq, &rf);
-+	__balance_callbacks(rq);
-+	raw_spin_unlock(&rq->lock);
-+
- 	preempt_enable();
- }
- #else
-@@ -5214,6 +5809,7 @@ static int __sched_setscheduler(struct task_struct *p,
- 	int retval, oldprio, oldpolicy = -1, queued, running;
- 	int new_effective_prio, policy = attr->sched_policy;
- 	const struct sched_class *prev_class;
-+	struct callback_head *head;
- 	struct rq_flags rf;
- 	int reset_on_fork;
- 	int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
-@@ -5452,6 +6048,7 @@ static int __sched_setscheduler(struct task_struct *p,
- 
- 	/* Avoid rq from going away on us: */
- 	preempt_disable();
-+	head = splice_balance_callbacks(rq);
- 	task_rq_unlock(rq, p, &rf);
- 
- 	if (pi) {
-@@ -5460,7 +6057,7 @@ static int __sched_setscheduler(struct task_struct *p,
- 	}
- 
- 	/* Run balance callbacks after we've adjusted the PI chain: */
--	balance_callback(rq);
-+	balance_callbacks(rq, head);
- 	preempt_enable();
- 
- 	return 0;
-@@ -5955,7 +6552,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
- 	}
- #endif
- again:
--	retval = __set_cpus_allowed_ptr(p, new_mask, true);
-+	retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK);
- 
- 	if (!retval) {
- 		cpuset_cpus_allowed(p, cpus_allowed);
-@@ -6538,7 +7135,7 @@ void init_idle(struct task_struct *idle, int cpu)
- 	 *
- 	 * And since this is boot we can forgo the serialization.
- 	 */
--	set_cpus_allowed_common(idle, cpumask_of(cpu));
-+	set_cpus_allowed_common(idle, cpumask_of(cpu), 0);
- #endif
- 	/*
- 	 * We're having a chicken and egg problem, even though we are
-@@ -6565,7 +7162,9 @@ void init_idle(struct task_struct *idle, int cpu)
- 
- 	/* Set the preempt count _outside_ the spinlocks! */
- 	init_idle_preempt_count(idle, cpu);
--
-+#ifdef CONFIG_HAVE_PREEMPT_LAZY
-+	task_thread_info(idle)->preempt_lazy_count = 0;
-+#endif
- 	/*
- 	 * The idle tasks have their own, simple scheduling class:
- 	 */
-@@ -6670,6 +7269,7 @@ void sched_setnuma(struct task_struct *p, int nid)
- #endif /* CONFIG_NUMA_BALANCING */
- 
- #ifdef CONFIG_HOTPLUG_CPU
-+
- /*
-  * Ensure that the idle task is using init_mm right before its CPU goes
-  * offline.
-@@ -6689,119 +7289,126 @@ void idle_task_exit(void)
- 	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
- }
- 
--/*
-- * Since this CPU is going 'away' for a while, fold any nr_active delta
-- * we might have. Assumes we're called after migrate_tasks() so that the
-- * nr_active count is stable. We need to take the teardown thread which
-- * is calling this into account, so we hand in adjust = 1 to the load
-- * calculation.
-- *
-- * Also see the comment "Global load-average calculations".
-- */
--static void calc_load_migrate(struct rq *rq)
-+static int __balance_push_cpu_stop(void *arg)
- {
--	long delta = calc_load_fold_active(rq, 1);
--	if (delta)
--		atomic_long_add(delta, &calc_load_tasks);
--}
-+	struct task_struct *p = arg;
-+	struct rq *rq = this_rq();
-+	struct rq_flags rf;
-+	int cpu;
- 
--static struct task_struct *__pick_migrate_task(struct rq *rq)
--{
--	const struct sched_class *class;
--	struct task_struct *next;
-+	raw_spin_lock_irq(&p->pi_lock);
-+	rq_lock(rq, &rf);
- 
--	for_each_class(class) {
--		next = class->pick_next_task(rq);
--		if (next) {
--			next->sched_class->put_prev_task(rq, next);
--			return next;
--		}
-+	update_rq_clock(rq);
-+
-+	if (task_rq(p) == rq && task_on_rq_queued(p)) {
-+		cpu = select_fallback_rq(rq->cpu, p);
-+		rq = __migrate_task(rq, &rf, p, cpu);
- 	}
- 
--	/* The idle class should always have a runnable task */
--	BUG();
-+	rq_unlock(rq, &rf);
-+	raw_spin_unlock_irq(&p->pi_lock);
-+
-+	put_task_struct(p);
-+
-+	return 0;
- }
- 
-+static DEFINE_PER_CPU(struct cpu_stop_work, push_work);
-+
- /*
-- * Migrate all tasks from the rq, sleeping tasks will be migrated by
-- * try_to_wake_up()->select_task_rq().
-- *
-- * Called with rq->lock held even though we'er in stop_machine() and
-- * there's no concurrency possible, we hold the required locks anyway
-- * because of lock validation efforts.
-+ * Ensure we only run per-cpu kthreads once the CPU goes !active.
-  */
--static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
-+static void balance_push(struct rq *rq)
- {
--	struct rq *rq = dead_rq;
--	struct task_struct *next, *stop = rq->stop;
--	struct rq_flags orf = *rf;
--	int dest_cpu;
-+	struct task_struct *push_task = rq->curr;
-+
-+	lockdep_assert_held(&rq->lock);
-+	SCHED_WARN_ON(rq->cpu != smp_processor_id());
- 
- 	/*
--	 * Fudge the rq selection such that the below task selection loop
--	 * doesn't get stuck on the currently eligible stop task.
--	 *
--	 * We're currently inside stop_machine() and the rq is either stuck
--	 * in the stop_machine_cpu_stop() loop, or we're executing this code,
--	 * either way we should never end up calling schedule() until we're
--	 * done here.
-+	 * Both the cpu-hotplug and stop task are in this case and are
-+	 * required to complete the hotplug process.
- 	 */
--	rq->stop = NULL;
-+	if (is_per_cpu_kthread(push_task) || is_migration_disabled(push_task)) {
-+		/*
-+		 * If this is the idle task on the outgoing CPU try to wake
-+		 * up the hotplug control thread which might wait for the
-+		 * last task to vanish. The rcuwait_active() check is
-+		 * accurate here because the waiter is pinned on this CPU
-+		 * and can't obviously be running in parallel.
-+		 *
-+		 * On RT kernels this also has to check whether there are
-+		 * pinned and scheduled out tasks on the runqueue. They
-+		 * need to leave the migrate disabled section first.
-+		 */
-+		if (!rq->nr_running && !rq_has_pinned_tasks(rq) &&
-+		    rcuwait_active(&rq->hotplug_wait)) {
-+			raw_spin_unlock(&rq->lock);
-+			rcuwait_wake_up(&rq->hotplug_wait);
-+			raw_spin_lock(&rq->lock);
-+		}
-+		return;
-+	}
- 
-+	get_task_struct(push_task);
- 	/*
--	 * put_prev_task() and pick_next_task() sched
--	 * class method both need to have an up-to-date
--	 * value of rq->clock[_task]
-+	 * Temporarily drop rq->lock such that we can wake-up the stop task.
-+	 * Both preemption and IRQs are still disabled.
- 	 */
--	update_rq_clock(rq);
-+	raw_spin_unlock(&rq->lock);
-+	stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task,
-+			    this_cpu_ptr(&push_work));
-+	/*
-+	 * At this point need_resched() is true and we'll take the loop in
-+	 * schedule(). The next pick is obviously going to be the stop task
-+	 * which is_per_cpu_kthread() and will push this task away.
-+	 */
-+	raw_spin_lock(&rq->lock);
-+}
- 
--	for (;;) {
--		/*
--		 * There's this thread running, bail when that's the only
--		 * remaining thread:
--		 */
--		if (rq->nr_running == 1)
--			break;
-+static void balance_push_set(int cpu, bool on)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	struct rq_flags rf;
-+
-+	rq_lock_irqsave(rq, &rf);
-+	if (on)
-+		rq->balance_flags |= BALANCE_PUSH;
-+	else
-+		rq->balance_flags &= ~BALANCE_PUSH;
-+	rq_unlock_irqrestore(rq, &rf);
-+}
- 
--		next = __pick_migrate_task(rq);
-+/*
-+ * Invoked from a CPUs hotplug control thread after the CPU has been marked
-+ * inactive. All tasks which are not per CPU kernel threads are either
-+ * pushed off this CPU now via balance_push() or placed on a different CPU
-+ * during wakeup. Wait until the CPU is quiescent.
-+ */
-+static void balance_hotplug_wait(void)
-+{
-+	struct rq *rq = this_rq();
- 
--		/*
--		 * Rules for changing task_struct::cpus_mask are holding
--		 * both pi_lock and rq->lock, such that holding either
--		 * stabilizes the mask.
--		 *
--		 * Drop rq->lock is not quite as disastrous as it usually is
--		 * because !cpu_active at this point, which means load-balance
--		 * will not interfere. Also, stop-machine.
--		 */
--		rq_unlock(rq, rf);
--		raw_spin_lock(&next->pi_lock);
--		rq_relock(rq, rf);
-+	rcuwait_wait_event(&rq->hotplug_wait,
-+			   rq->nr_running == 1 && !rq_has_pinned_tasks(rq),
-+			   TASK_UNINTERRUPTIBLE);
-+}
- 
--		/*
--		 * Since we're inside stop-machine, _nothing_ should have
--		 * changed the task, WARN if weird stuff happened, because in
--		 * that case the above rq->lock drop is a fail too.
--		 */
--		if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) {
--			raw_spin_unlock(&next->pi_lock);
--			continue;
--		}
-+#else
- 
--		/* Find suitable destination for @next, with force if needed. */
--		dest_cpu = select_fallback_rq(dead_rq->cpu, next);
--		rq = __migrate_task(rq, rf, next, dest_cpu);
--		if (rq != dead_rq) {
--			rq_unlock(rq, rf);
--			rq = dead_rq;
--			*rf = orf;
--			rq_relock(rq, rf);
--		}
--		raw_spin_unlock(&next->pi_lock);
--	}
-+static inline void balance_push(struct rq *rq)
-+{
-+}
-+
-+static inline void balance_push_set(int cpu, bool on)
-+{
-+}
- 
--	rq->stop = stop;
-+static inline void balance_hotplug_wait(void)
-+{
- }
-+
- #endif /* CONFIG_HOTPLUG_CPU */
- 
- void set_rq_online(struct rq *rq)
-@@ -6887,6 +7494,8 @@ int sched_cpu_activate(unsigned int cpu)
- 	struct rq *rq = cpu_rq(cpu);
- 	struct rq_flags rf;
- 
-+	balance_push_set(cpu, false);
-+
- #ifdef CONFIG_SCHED_SMT
- 	/*
- 	 * When going up, increment the number of cores with SMT present.
-@@ -6922,6 +7531,8 @@ int sched_cpu_activate(unsigned int cpu)
- 
- int sched_cpu_deactivate(unsigned int cpu)
- {
-+	struct rq *rq = cpu_rq(cpu);
-+	struct rq_flags rf;
- 	int ret;
- 
- 	set_cpu_active(cpu, false);
-@@ -6934,6 +7545,16 @@ int sched_cpu_deactivate(unsigned int cpu)
- 	 */
- 	synchronize_rcu();
- 
-+	balance_push_set(cpu, true);
-+
-+	rq_lock_irqsave(rq, &rf);
-+	if (rq->rd) {
-+		update_rq_clock(rq);
-+		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
-+		set_rq_offline(rq);
-+	}
-+	rq_unlock_irqrestore(rq, &rf);
-+
- #ifdef CONFIG_SCHED_SMT
- 	/*
- 	 * When going down, decrement the number of cores with SMT present.
-@@ -6947,6 +7568,7 @@ int sched_cpu_deactivate(unsigned int cpu)
- 
- 	ret = cpuset_cpu_inactive(cpu);
- 	if (ret) {
-+		balance_push_set(cpu, false);
- 		set_cpu_active(cpu, true);
- 		return ret;
- 	}
-@@ -6970,6 +7592,41 @@ int sched_cpu_starting(unsigned int cpu)
- }
- 
- #ifdef CONFIG_HOTPLUG_CPU
-+
-+/*
-+ * Invoked immediately before the stopper thread is invoked to bring the
-+ * CPU down completely. At this point all per CPU kthreads except the
-+ * hotplug thread (current) and the stopper thread (inactive) have been
-+ * either parked or have been unbound from the outgoing CPU. Ensure that
-+ * any of those which might be on the way out are gone.
-+ *
-+ * If after this point a bound task is being woken on this CPU then the
-+ * responsible hotplug callback has failed to do it's job.
-+ * sched_cpu_dying() will catch it with the appropriate fireworks.
-+ */
-+int sched_cpu_wait_empty(unsigned int cpu)
-+{
-+	balance_hotplug_wait();
-+	return 0;
-+}
-+
-+/*
-+ * Since this CPU is going 'away' for a while, fold any nr_active delta we
-+ * might have. Called from the CPU stopper task after ensuring that the
-+ * stopper is the last running task on the CPU, so nr_active count is
-+ * stable. We need to take the teardown thread which is calling this into
-+ * account, so we hand in adjust = 1 to the load calculation.
-+ *
-+ * Also see the comment "Global load-average calculations".
-+ */
-+static void calc_load_migrate(struct rq *rq)
-+{
-+	long delta = calc_load_fold_active(rq, 1);
-+
-+	if (delta)
-+		atomic_long_add(delta, &calc_load_tasks);
-+}
-+
- int sched_cpu_dying(unsigned int cpu)
- {
- 	struct rq *rq = cpu_rq(cpu);
-@@ -6979,12 +7636,7 @@ int sched_cpu_dying(unsigned int cpu)
- 	sched_tick_stop(cpu);
+@@ -5286,6 +5434,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
+ 	if (likely(!preemptible()))
+ 		return;
  
- 	rq_lock_irqsave(rq, &rf);
--	if (rq->rd) {
--		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
--		set_rq_offline(rq);
--	}
--	migrate_tasks(rq, &rf);
--	BUG_ON(rq->nr_running != 1);
-+	BUG_ON(rq->nr_running != 1 || rq_has_pinned_tasks(rq));
- 	rq_unlock_irqrestore(rq, &rf);
++	if (!preemptible_lazy())
++		return;
++
+ 	do {
+ 		/*
+ 		 * Because the function tracer can trace preempt_count_sub()
+@@ -5308,7 +5459,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
+ 		 * an infinite recursion.
+ 		 */
+ 		prev_ctx = exception_enter();
+-		__schedule(true);
++		__schedule(true, false);
+ 		exception_exit(prev_ctx);
  
- 	calc_load_migrate(rq);
-@@ -7191,6 +7843,9 @@ void __init sched_init(void)
+ 		preempt_latency_stop(1);
+@@ -5337,7 +5488,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
+ 	do {
+ 		preempt_disable();
+ 		local_irq_enable();
+-		__schedule(true);
++		__schedule(true, false);
+ 		local_irq_disable();
+ 		sched_preempt_enable_no_resched();
+ 	} while (need_resched());
+@@ -7122,7 +7273,9 @@ void init_idle(struct task_struct *idle, int cpu)
  
- 		rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func);
- #endif
-+#ifdef CONFIG_HOTPLUG_CPU
-+		rcuwait_init(&rq->hotplug_wait);
+ 	/* Set the preempt count _outside_ the spinlocks! */
+ 	init_idle_preempt_count(idle, cpu);
+-
++#ifdef CONFIG_HAVE_PREEMPT_LAZY
++	task_thread_info(idle)->preempt_lazy_count = 0;
 +#endif
- #endif /* CONFIG_SMP */
- 		hrtick_rq_init(rq);
- 		atomic_set(&rq->nr_iowait, 0);
-@@ -7231,7 +7886,7 @@ void __init sched_init(void)
+ 	/*
+ 	 * The idle tasks have their own, simple scheduling class:
+ 	 */
+@@ -7227,6 +7380,7 @@ void sched_setnuma(struct task_struct *p, int nid)
+ #endif /* CONFIG_NUMA_BALANCING */
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
++
+ /*
+  * Ensure that the idle task is using init_mm right before its CPU goes
+  * offline.
+@@ -7898,7 +8052,7 @@ void __init sched_init(void)
  #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
  static inline int preempt_count_equals(int preempt_offset)
  {
@@ -19942,191 +14802,33 @@ index 2d95dc3f4644..2d54f1e7ef86 100644
  
  	return (nested == preempt_offset);
  }
-diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
-index 8cb06c8c7eb1..ceb03d76c0cc 100644
---- a/kernel/sched/cpudeadline.c
-+++ b/kernel/sched/cpudeadline.c
-@@ -120,7 +120,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
- 	const struct sched_dl_entity *dl_se = &p->dl;
- 
- 	if (later_mask &&
--	    cpumask_and(later_mask, cp->free_cpus, p->cpus_ptr)) {
-+	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_mask)) {
- 		unsigned long cap, max_cap = 0;
- 		int cpu, max_cpu = -1;
- 
-@@ -151,7 +151,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
- 
- 		WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));
- 
--		if (cpumask_test_cpu(best_cpu, p->cpus_ptr) &&
-+		if (cpumask_test_cpu(best_cpu, &p->cpus_mask) &&
- 		    dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
- 			if (later_mask)
- 				cpumask_set_cpu(best_cpu, later_mask);
-diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
-index 0033731a0797..11c4df2010de 100644
---- a/kernel/sched/cpupri.c
-+++ b/kernel/sched/cpupri.c
-@@ -73,11 +73,11 @@ static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p,
- 	if (skip)
- 		return 0;
- 
--	if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids)
-+	if (cpumask_any_and(&p->cpus_mask, vec->mask) >= nr_cpu_ids)
- 		return 0;
- 
- 	if (lowest_mask) {
--		cpumask_and(lowest_mask, p->cpus_ptr, vec->mask);
-+		cpumask_and(lowest_mask, &p->cpus_mask, vec->mask);
- 
- 		/*
- 		 * We have to ensure that we have at least one bit
-diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
-index 3862a28cd05d..6df71d487ed0 100644
---- a/kernel/sched/deadline.c
-+++ b/kernel/sched/deadline.c
-@@ -543,7 +543,7 @@ static int push_dl_task(struct rq *rq);
- 
- static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
- {
--	return dl_task(prev);
-+	return rq->online && dl_task(prev);
- }
- 
- static DEFINE_PER_CPU(struct callback_head, dl_push_head);
-@@ -1888,7 +1888,7 @@ static void task_fork_dl(struct task_struct *p)
- static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
- {
- 	if (!task_running(rq, p) &&
--	    cpumask_test_cpu(cpu, p->cpus_ptr))
-+	    cpumask_test_cpu(cpu, &p->cpus_mask))
- 		return 1;
- 	return 0;
- }
-@@ -1978,8 +1978,8 @@ static int find_later_rq(struct task_struct *task)
- 				return this_cpu;
- 			}
- 
--			best_cpu = cpumask_first_and(later_mask,
--							sched_domain_span(sd));
-+			best_cpu = cpumask_any_and_distribute(later_mask,
-+							      sched_domain_span(sd));
- 			/*
- 			 * Last chance: if a CPU being in both later_mask
- 			 * and current sd span is valid, that becomes our
-@@ -2001,7 +2001,7 @@ static int find_later_rq(struct task_struct *task)
- 	if (this_cpu != -1)
- 		return this_cpu;
- 
--	cpu = cpumask_any(later_mask);
-+	cpu = cpumask_any_distribute(later_mask);
- 	if (cpu < nr_cpu_ids)
- 		return cpu;
- 
-@@ -2038,7 +2038,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
- 		/* Retry if something changed. */
- 		if (double_lock_balance(rq, later_rq)) {
- 			if (unlikely(task_rq(task) != rq ||
--				     !cpumask_test_cpu(later_rq->cpu, task->cpus_ptr) ||
-+				     !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
- 				     task_running(rq, task) ||
- 				     !dl_task(task) ||
- 				     !task_on_rq_queued(task))) {
-@@ -2105,6 +2105,9 @@ static int push_dl_task(struct rq *rq)
- 		return 0;
- 
- retry:
-+	if (is_migration_disabled(next_task))
-+		return 0;
-+
- 	if (WARN_ON(next_task == rq->curr))
- 		return 0;
+diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
+index 5f611658eeab..2c36a5fad589 100644
+--- a/kernel/sched/cputime.c
++++ b/kernel/sched/cputime.c
+@@ -60,7 +60,7 @@ void irqtime_account_irq(struct task_struct *curr, unsigned int offset)
+ 	cpu = smp_processor_id();
+ 	delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
+ 	irqtime->irq_start_time += delta;
+-	pc = preempt_count() - offset;
++	pc = irq_count() - offset;
  
-@@ -2182,7 +2185,7 @@ static void push_dl_tasks(struct rq *rq)
- static void pull_dl_task(struct rq *this_rq)
- {
- 	int this_cpu = this_rq->cpu, cpu;
--	struct task_struct *p;
-+	struct task_struct *p, *push_task;
- 	bool resched = false;
- 	struct rq *src_rq;
- 	u64 dmin = LONG_MAX;
-@@ -2212,6 +2215,7 @@ static void pull_dl_task(struct rq *this_rq)
- 			continue;
- 
- 		/* Might drop this_rq->lock */
-+		push_task = NULL;
- 		double_lock_balance(this_rq, src_rq);
- 
- 		/*
-@@ -2243,17 +2247,28 @@ static void pull_dl_task(struct rq *this_rq)
- 					   src_rq->curr->dl.deadline))
- 				goto skip;
- 
--			resched = true;
--
--			deactivate_task(src_rq, p, 0);
--			set_task_cpu(p, this_cpu);
--			activate_task(this_rq, p, 0);
--			dmin = p->dl.deadline;
-+			if (is_migration_disabled(p)) {
-+				trace_sched_migrate_pull_tp(p);
-+				push_task = get_push_task(src_rq);
-+			} else {
-+				deactivate_task(src_rq, p, 0);
-+				set_task_cpu(p, this_cpu);
-+				activate_task(this_rq, p, 0);
-+				dmin = p->dl.deadline;
-+				resched = true;
-+			}
- 
- 			/* Is there any other task even earlier? */
- 		}
- skip:
- 		double_unlock_balance(this_rq, src_rq);
-+
-+		if (push_task) {
-+			raw_spin_unlock(&this_rq->lock);
-+			stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
-+					    push_task, &src_rq->push_work);
-+			raw_spin_lock(&this_rq->lock);
-+		}
- 	}
- 
- 	if (resched)
-@@ -2277,7 +2292,8 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p)
- }
+ 	/*
+ 	 * We do not account for softirq time from ksoftirqd here.
+@@ -421,7 +421,7 @@ void vtime_task_switch(struct task_struct *prev)
  
- static void set_cpus_allowed_dl(struct task_struct *p,
--				const struct cpumask *new_mask)
-+				const struct cpumask *new_mask,
-+				u32 flags)
+ void vtime_account_irq(struct task_struct *tsk, unsigned int offset)
  {
- 	struct root_domain *src_rd;
- 	struct rq *rq;
-@@ -2306,7 +2322,7 @@ static void set_cpus_allowed_dl(struct task_struct *p,
- 		raw_spin_unlock(&src_dl_b->lock);
- 	}
+-	unsigned int pc = preempt_count() - offset;
++	unsigned int pc = irq_count() - offset;
  
--	set_cpus_allowed_common(p, new_mask);
-+	set_cpus_allowed_common(p, new_mask, flags);
- }
- 
- /* Assumes rq->lock is held */
-@@ -2499,6 +2515,7 @@ const struct sched_class dl_sched_class
- 	.rq_online              = rq_online_dl,
- 	.rq_offline             = rq_offline_dl,
- 	.task_woken		= task_woken_dl,
-+	.find_lock_rq		= find_lock_later_rq,
- #endif
- 
- 	.task_tick		= task_tick_dl,
+ 	if (pc & HARDIRQ_OFFSET) {
+ 		vtime_account_hardirq(tsk);
 diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index 1a68a0536add..d31aab136644 100644
+index 04a3ce20da67..2efe2b441a7d 100644
 --- a/kernel/sched/fair.c
 +++ b/kernel/sched/fair.c
-@@ -4357,7 +4357,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+@@ -4372,7 +4372,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
  	ideal_runtime = sched_slice(cfs_rq, curr);
  	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
  	if (delta_exec > ideal_runtime) {
@@ -20135,7 +14837,7 @@ index 1a68a0536add..d31aab136644 100644
  		/*
  		 * The current task ran long enough, ensure it doesn't get
  		 * re-elected due to buddy favours.
-@@ -4381,7 +4381,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+@@ -4396,7 +4396,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
  		return;
  
  	if (delta > ideal_runtime)
@@ -20144,7 +14846,7 @@ index 1a68a0536add..d31aab136644 100644
  }
  
  static void
-@@ -4524,7 +4524,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
+@@ -4539,7 +4539,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
  	 * validating it and just reschedule.
  	 */
  	if (queued) {
@@ -20153,7 +14855,7 @@ index 1a68a0536add..d31aab136644 100644
  		return;
  	}
  	/*
-@@ -4661,7 +4661,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
+@@ -4676,7 +4676,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
  	 * hierarchy can be throttled
  	 */
  	if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
@@ -20162,7 +14864,7 @@ index 1a68a0536add..d31aab136644 100644
  }
  
  static __always_inline
-@@ -5396,7 +5396,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
+@@ -5420,7 +5420,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
  
  		if (delta < 0) {
  			if (rq->curr == p)
@@ -20171,7 +14873,7 @@ index 1a68a0536add..d31aab136644 100644
  			return;
  		}
  		hrtick_start(rq, delta);
-@@ -6953,7 +6953,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+@@ -7004,7 +7004,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
  	return;
  
  preempt:
@@ -20180,7 +14882,7 @@ index 1a68a0536add..d31aab136644 100644
  	/*
  	 * Only set the backward buddy when the current task is still
  	 * on the rq. This can happen when a wakeup gets interleaved
-@@ -10694,7 +10694,7 @@ static void task_fork_fair(struct task_struct *p)
+@@ -10781,7 +10781,7 @@ static void task_fork_fair(struct task_struct *p)
  		 * 'current' within the tree based on its new key value.
  		 */
  		swap(curr->vruntime, se->vruntime);
@@ -20189,7 +14891,7 @@ index 1a68a0536add..d31aab136644 100644
  	}
  
  	se->vruntime -= cfs_rq->min_vruntime;
-@@ -10721,7 +10721,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
+@@ -10808,7 +10808,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
  	 */
  	if (rq->curr == p) {
  		if (p->prio > oldprio)
@@ -20199,7 +14901,7 @@ index 1a68a0536add..d31aab136644 100644
  		check_preempt_curr(rq, p, 0);
  }
 diff --git a/kernel/sched/features.h b/kernel/sched/features.h
-index 7481cd96f391..862e8b3c7d28 100644
+index 68d369cba9e4..5a2e27297126 100644
 --- a/kernel/sched/features.h
 +++ b/kernel/sched/features.h
 @@ -45,11 +45,19 @@ SCHED_FEAT(DOUBLE_TICK, false)
@@ -20222,348 +14924,19 @@ index 7481cd96f391..862e8b3c7d28 100644
  
  /*
   * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
-diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
-index f215eea6a966..03f7b397716d 100644
---- a/kernel/sched/rt.c
-+++ b/kernel/sched/rt.c
-@@ -265,7 +265,7 @@ static void pull_rt_task(struct rq *this_rq);
- static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
- {
- 	/* Try to pull RT tasks here if we lower this rq's prio */
--	return rq->rt.highest_prio.curr > prev->prio;
-+	return rq->online && rq->rt.highest_prio.curr > prev->prio;
- }
- 
- static inline int rt_overloaded(struct rq *rq)
-@@ -1658,7 +1658,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
- static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
- {
- 	if (!task_running(rq, p) &&
--	    cpumask_test_cpu(cpu, p->cpus_ptr))
-+	    cpumask_test_cpu(cpu, &p->cpus_mask))
- 		return 1;
- 
- 	return 0;
-@@ -1752,8 +1752,8 @@ static int find_lowest_rq(struct task_struct *task)
- 				return this_cpu;
- 			}
- 
--			best_cpu = cpumask_first_and(lowest_mask,
--						     sched_domain_span(sd));
-+			best_cpu = cpumask_any_and_distribute(lowest_mask,
-+							      sched_domain_span(sd));
- 			if (best_cpu < nr_cpu_ids) {
- 				rcu_read_unlock();
- 				return best_cpu;
-@@ -1770,7 +1770,7 @@ static int find_lowest_rq(struct task_struct *task)
- 	if (this_cpu != -1)
- 		return this_cpu;
- 
--	cpu = cpumask_any(lowest_mask);
-+	cpu = cpumask_any_distribute(lowest_mask);
- 	if (cpu < nr_cpu_ids)
- 		return cpu;
- 
-@@ -1811,7 +1811,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
- 			 * Also make sure that it wasn't scheduled on its rq.
- 			 */
- 			if (unlikely(task_rq(task) != rq ||
--				     !cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) ||
-+				     !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) ||
- 				     task_running(rq, task) ||
- 				     !rt_task(task) ||
- 				     !task_on_rq_queued(task))) {
-@@ -1859,7 +1859,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
-  * running task can migrate over to a CPU that is running a task
-  * of lesser priority.
-  */
--static int push_rt_task(struct rq *rq)
-+static int push_rt_task(struct rq *rq, bool pull)
- {
- 	struct task_struct *next_task;
- 	struct rq *lowest_rq;
-@@ -1873,6 +1873,39 @@ static int push_rt_task(struct rq *rq)
- 		return 0;
- 
- retry:
-+	if (is_migration_disabled(next_task)) {
-+		struct task_struct *push_task = NULL;
-+		int cpu;
-+
-+		if (!pull)
-+			return 0;
-+
-+		trace_sched_migrate_pull_tp(next_task);
-+
-+		if (rq->push_busy)
-+			return 0;
-+
-+		cpu = find_lowest_rq(rq->curr);
-+		if (cpu == -1 || cpu == rq->cpu)
-+			return 0;
-+
-+		/*
-+		 * Given we found a CPU with lower priority than @next_task,
-+		 * therefore it should be running. However we cannot migrate it
-+		 * to this other CPU, instead attempt to push the current
-+		 * running task on this CPU away.
-+		 */
-+		push_task = get_push_task(rq);
-+		if (push_task) {
-+			raw_spin_unlock(&rq->lock);
-+			stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
-+					    push_task, &rq->push_work);
-+			raw_spin_lock(&rq->lock);
-+		}
-+
-+		return 0;
-+	}
-+
- 	if (WARN_ON(next_task == rq->curr))
- 		return 0;
- 
-@@ -1927,12 +1960,10 @@ static int push_rt_task(struct rq *rq)
- 	deactivate_task(rq, next_task, 0);
- 	set_task_cpu(next_task, lowest_rq->cpu);
- 	activate_task(lowest_rq, next_task, 0);
--	ret = 1;
--
- 	resched_curr(lowest_rq);
-+	ret = 1;
- 
- 	double_unlock_balance(rq, lowest_rq);
--
- out:
- 	put_task_struct(next_task);
- 
-@@ -1942,7 +1973,7 @@ static int push_rt_task(struct rq *rq)
- static void push_rt_tasks(struct rq *rq)
- {
- 	/* push_rt_task will return true if it moved an RT */
--	while (push_rt_task(rq))
-+	while (push_rt_task(rq, false))
- 		;
- }
- 
-@@ -2095,7 +2126,8 @@ void rto_push_irq_work_func(struct irq_work *work)
- 	 */
- 	if (has_pushable_tasks(rq)) {
- 		raw_spin_lock(&rq->lock);
--		push_rt_tasks(rq);
-+		while (push_rt_task(rq, true))
-+			;
- 		raw_spin_unlock(&rq->lock);
- 	}
- 
-@@ -2120,7 +2152,7 @@ static void pull_rt_task(struct rq *this_rq)
- {
- 	int this_cpu = this_rq->cpu, cpu;
- 	bool resched = false;
--	struct task_struct *p;
-+	struct task_struct *p, *push_task;
- 	struct rq *src_rq;
- 	int rt_overload_count = rt_overloaded(this_rq);
- 
-@@ -2167,6 +2199,7 @@ static void pull_rt_task(struct rq *this_rq)
- 		 * double_lock_balance, and another CPU could
- 		 * alter this_rq
- 		 */
-+		push_task = NULL;
- 		double_lock_balance(this_rq, src_rq);
- 
- 		/*
-@@ -2194,11 +2227,15 @@ static void pull_rt_task(struct rq *this_rq)
- 			if (p->prio < src_rq->curr->prio)
- 				goto skip;
- 
--			resched = true;
--
--			deactivate_task(src_rq, p, 0);
--			set_task_cpu(p, this_cpu);
--			activate_task(this_rq, p, 0);
-+			if (is_migration_disabled(p)) {
-+				trace_sched_migrate_pull_tp(p);
-+				push_task = get_push_task(src_rq);
-+			} else {
-+				deactivate_task(src_rq, p, 0);
-+				set_task_cpu(p, this_cpu);
-+				activate_task(this_rq, p, 0);
-+				resched = true;
-+			}
- 			/*
- 			 * We continue with the search, just in
- 			 * case there's an even higher prio task
-@@ -2208,6 +2245,13 @@ static void pull_rt_task(struct rq *this_rq)
- 		}
- skip:
- 		double_unlock_balance(this_rq, src_rq);
-+
-+		if (push_task) {
-+			raw_spin_unlock(&this_rq->lock);
-+			stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
-+					    push_task, &src_rq->push_work);
-+			raw_spin_lock(&this_rq->lock);
-+		}
- 	}
- 
- 	if (resched)
-@@ -2449,6 +2493,7 @@ const struct sched_class rt_sched_class
- 	.rq_offline             = rq_offline_rt,
- 	.task_woken		= task_woken_rt,
- 	.switched_from		= switched_from_rt,
-+	.find_lock_rq		= find_lock_lowest_rq,
- #endif
- 
- 	.task_tick		= task_tick_rt,
 diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index 28709f6b0975..19847e4ae132 100644
+index bb09988451a0..b783aaf10cba 100644
 --- a/kernel/sched/sched.h
 +++ b/kernel/sched/sched.h
-@@ -973,6 +973,7 @@ struct rq {
- 	unsigned long		cpu_capacity_orig;
- 
- 	struct callback_head	*balance_callback;
-+	unsigned char		balance_flags;
- 
- 	unsigned char		nohz_idle_balance;
- 	unsigned char		idle_balance;
-@@ -1003,6 +1004,10 @@ struct rq {
- 
- 	/* This is used to determine avg_idle's max value */
- 	u64			max_idle_balance_cost;
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+	struct rcuwait		hotplug_wait;
-+#endif
- #endif /* CONFIG_SMP */
- 
- #ifdef CONFIG_IRQ_TIME_ACCOUNTING
-@@ -1048,6 +1053,12 @@ struct rq {
- 	/* Must be inspected within a rcu lock section */
- 	struct cpuidle_state	*idle_state;
- #endif
-+
-+#if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
-+	unsigned int		nr_pinned;
-+#endif
-+	unsigned int		push_busy;
-+	struct cpu_stop_work	push_work;
- };
- 
- #ifdef CONFIG_FAIR_GROUP_SCHED
-@@ -1075,6 +1086,16 @@ static inline int cpu_of(struct rq *rq)
- #endif
- }
- 
-+#define MDF_PUSH	0x01
-+
-+static inline bool is_migration_disabled(struct task_struct *p)
-+{
-+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
-+	return p->migration_disabled;
-+#else
-+	return false;
-+#endif
-+}
- 
- #ifdef CONFIG_SCHED_SMT
- extern void __update_idle_core(struct rq *rq);
-@@ -1221,6 +1242,9 @@ static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
- 	rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
- 	rf->clock_update_flags = 0;
- #endif
-+#ifdef CONFIG_SMP
-+	SCHED_WARN_ON(rq->balance_callback);
-+#endif
- }
- 
- static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
-@@ -1382,6 +1406,9 @@ init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
- 
- #ifdef CONFIG_SMP
- 
-+#define BALANCE_WORK	0x01
-+#define BALANCE_PUSH	0x02
-+
- static inline void
- queue_balance_callback(struct rq *rq,
- 		       struct callback_head *head,
-@@ -1389,12 +1416,13 @@ queue_balance_callback(struct rq *rq,
- {
- 	lockdep_assert_held(&rq->lock);
+@@ -1750,6 +1750,7 @@ static inline int task_on_rq_migrating(struct task_struct *p)
+ #define WF_SYNC     0x10 /* Waker goes to sleep after wakeup */
+ #define WF_MIGRATED 0x20 /* Internal use, task got migrated */
+ #define WF_ON_CPU   0x40 /* Wakee is on_cpu */
++#define WF_LOCK_SLEEPER	0x80 /* Wakeup spinlock "sleeper" */
  
--	if (unlikely(head->next))
-+	if (unlikely(head->next || (rq->balance_flags & BALANCE_PUSH)))
- 		return;
- 
- 	head->func = (void (*)(struct callback_head *))func;
- 	head->next = rq->balance_callback;
- 	rq->balance_callback = head;
-+	rq->balance_flags |= BALANCE_WORK;
- }
- 
- #define rcu_dereference_check_sched_domain(p) \
-@@ -1714,6 +1742,7 @@ static inline int task_on_rq_migrating(struct task_struct *p)
- #define WF_FORK			0x02		/* Child wakeup after fork */
- #define WF_MIGRATED		0x04		/* Internal use, task got migrated */
- #define WF_ON_CPU		0x08		/* Wakee is on_cpu */
-+#define WF_LOCK_SLEEPER		0x10		/* Wakeup spinlock "sleeper" */
- 
- /*
-  * To aid in avoiding the subversion of "niceness" due to uneven distribution
-@@ -1795,10 +1824,13 @@ struct sched_class {
- 	void (*task_woken)(struct rq *this_rq, struct task_struct *task);
- 
- 	void (*set_cpus_allowed)(struct task_struct *p,
--				 const struct cpumask *newmask);
-+				 const struct cpumask *newmask,
-+				 u32 flags);
- 
- 	void (*rq_online)(struct rq *rq);
- 	void (*rq_offline)(struct rq *rq);
-+
-+	struct rq *(*find_lock_rq)(struct task_struct *p, struct rq *rq);
- #endif
- 
- 	void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
-@@ -1882,13 +1914,35 @@ static inline bool sched_fair_runnable(struct rq *rq)
- extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
- extern struct task_struct *pick_next_task_idle(struct rq *rq);
- 
-+#define SCA_CHECK		0x01
-+#define SCA_MIGRATE_DISABLE	0x02
-+#define SCA_MIGRATE_ENABLE	0x04
-+
  #ifdef CONFIG_SMP
- 
- extern void update_group_capacity(struct sched_domain *sd, int cpu);
- 
- extern void trigger_load_balance(struct rq *rq);
- 
--extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
-+extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags);
-+
-+static inline struct task_struct *get_push_task(struct rq *rq)
-+{
-+	struct task_struct *p = rq->curr;
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	if (rq->push_busy)
-+		return NULL;
-+
-+	if (p->nr_cpus_allowed == 1)
-+		return NULL;
-+
-+	rq->push_busy = true;
-+	return get_task_struct(p);
-+}
-+
-+extern int push_cpu_stop(void *arg);
- 
- #endif
- 
-@@ -1932,6 +1986,15 @@ extern void reweight_task(struct task_struct *p, int prio);
+ static_assert(WF_EXEC == SD_BALANCE_EXEC);
+@@ -2013,6 +2014,15 @@ extern void reweight_task(struct task_struct *p, int prio);
  extern void resched_curr(struct rq *rq);
  extern void resched_cpu(int cpu);
  
@@ -20592,19 +14965,21 @@ index e1c655f928c7..f230b1ac7f91 100644
  	list_splice_init(&q->task_list, &tmp);
  	while (!list_empty(&tmp)) {
 diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
-index 1bd7e3af904f..04ead8eeb07c 100644
+index 5d3675c7a76b..8c663da4ceb9 100644
 --- a/kernel/sched/topology.c
 +++ b/kernel/sched/topology.c
-@@ -500,6 +500,7 @@ static int init_rootdomain(struct root_domain *rd)
+@@ -526,7 +526,8 @@ static int init_rootdomain(struct root_domain *rd)
+ #ifdef HAVE_RT_PUSH_IPI
  	rd->rto_cpu = -1;
  	raw_spin_lock_init(&rd->rto_lock);
- 	init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
-+	atomic_or(IRQ_WORK_HARD_IRQ, &rd->rto_push_work.flags);
+-	init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
++//	init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
++	rd->rto_push_work = IRQ_WORK_INIT_HARD(rto_push_irq_work_func);
  #endif
  
- 	init_dl_bw(&rd->dl_bw);
+ 	rd->visit_gen = 0;
 diff --git a/kernel/signal.c b/kernel/signal.c
-index a38b3edc6851..c3d7abd1f6ae 100644
+index 5ad8566534e7..e40ed99a62a1 100644
 --- a/kernel/signal.c
 +++ b/kernel/signal.c
 @@ -20,6 +20,7 @@
@@ -20615,24 +14990,24 @@ index a38b3edc6851..c3d7abd1f6ae 100644
  #include <linux/file.h>
  #include <linux/fs.h>
  #include <linux/proc_fs.h>
-@@ -403,13 +404,30 @@ void task_join_group_stop(struct task_struct *task)
- 	}
+@@ -404,13 +405,30 @@ void task_join_group_stop(struct task_struct *task)
+ 	task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING);
  }
  
-+static inline struct sigqueue *get_task_cache(struct task_struct *t)
++static struct sigqueue *sigqueue_from_cache(struct task_struct *t)
 +{
 +	struct sigqueue *q = t->sigqueue_cache;
 +
-+	if (cmpxchg(&t->sigqueue_cache, q, NULL) != q)
-+		return NULL;
-+	return q;
++	if (q && cmpxchg(&t->sigqueue_cache, q, NULL) == q)
++		return q;
++	return NULL;
 +}
 +
-+static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
++static bool sigqueue_add_cache(struct task_struct *t, struct sigqueue *q)
 +{
-+	if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
-+		return 0;
-+	return 1;
++	if (!t->sigqueue_cache && cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
++		return true;
++	return false;
 +}
 +
  /*
@@ -20643,23 +15018,23 @@ index a38b3edc6851..c3d7abd1f6ae 100644
  static struct sigqueue *
 -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
 +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
-+		    int override_rlimit, int fromslab)
++		    int override_rlimit, bool fromslab)
  {
  	struct sigqueue *q = NULL;
  	struct user_struct *user;
-@@ -431,7 +449,10 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
+@@ -432,7 +450,10 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
  	rcu_read_unlock();
  
  	if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
 -		q = kmem_cache_alloc(sigqueue_cachep, flags);
 +		if (!fromslab)
-+			q = get_task_cache(t);
++			q = sigqueue_from_cache(t);
 +		if (!q)
 +			q = kmem_cache_alloc(sigqueue_cachep, flags);
  	} else {
  		print_dropped_signal(sig);
  	}
-@@ -448,6 +469,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
+@@ -449,6 +470,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
  	return q;
  }
  
@@ -20667,17 +15042,17 @@ index a38b3edc6851..c3d7abd1f6ae 100644
 +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags,
 +		 int override_rlimit)
 +{
-+	return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0);
++	return __sigqueue_do_alloc(sig, t, flags, override_rlimit, false);
 +}
 +
  static void __sigqueue_free(struct sigqueue *q)
  {
  	if (q->flags & SIGQUEUE_PREALLOC)
-@@ -457,6 +485,21 @@ static void __sigqueue_free(struct sigqueue *q)
+@@ -458,6 +486,20 @@ static void __sigqueue_free(struct sigqueue *q)
  	kmem_cache_free(sigqueue_cachep, q);
  }
  
-+static void sigqueue_free_current(struct sigqueue *q)
++static void __sigqueue_cache_or_free(struct sigqueue *q)
 +{
 +	struct user_struct *up;
 +
@@ -20685,17 +15060,16 @@ index a38b3edc6851..c3d7abd1f6ae 100644
 +		return;
 +
 +	up = q->user;
-+	if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
-+		if (atomic_dec_and_test(&up->sigpending))
-+			free_uid(up);
-+	} else
-+		  __sigqueue_free(q);
++	if (atomic_dec_and_test(&up->sigpending))
++		free_uid(up);
++	if (!task_is_realtime(current) || !sigqueue_add_cache(current, q))
++		kmem_cache_free(sigqueue_cachep, q);
 +}
 +
  void flush_sigqueue(struct sigpending *queue)
  {
  	struct sigqueue *q;
-@@ -469,6 +512,21 @@ void flush_sigqueue(struct sigpending *queue)
+@@ -470,6 +512,21 @@ void flush_sigqueue(struct sigpending *queue)
  	}
  }
  
@@ -20709,7 +15083,7 @@ index a38b3edc6851..c3d7abd1f6ae 100644
 +
 +	flush_sigqueue(&tsk->pending);
 +
-+	q = get_task_cache(tsk);
++	q = sigqueue_from_cache(tsk);
 +	if (q)
 +		kmem_cache_free(sigqueue_cachep, q);
 +}
@@ -20717,16 +15091,16 @@ index a38b3edc6851..c3d7abd1f6ae 100644
  /*
   * Flush all pending signals for this kthread.
   */
-@@ -593,7 +651,7 @@ static void collect_signal(int sig, struct sigpending *list, kernel_siginfo_t *i
+@@ -594,7 +651,7 @@ static void collect_signal(int sig, struct sigpending *list, kernel_siginfo_t *i
  			(info->si_code == SI_TIMER) &&
  			(info->si_sys_private);
  
 -		__sigqueue_free(first);
-+		sigqueue_free_current(first);
++		__sigqueue_cache_or_free(first);
  	} else {
  		/*
  		 * Ok, it wasn't in the queue.  This must be
-@@ -630,6 +688,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, kernel_siginfo_t *in
+@@ -631,6 +688,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, kernel_siginfo_t *in
  	bool resched_timer = false;
  	int signr;
  
@@ -20735,7 +15109,7 @@ index a38b3edc6851..c3d7abd1f6ae 100644
  	/* We only dequeue private signals from ourselves, we don't let
  	 * signalfd steal them
  	 */
-@@ -1313,6 +1373,34 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t)
+@@ -1314,6 +1373,34 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t)
  	struct k_sigaction *action;
  	int sig = info->si_signo;
  
@@ -20770,17 +15144,16 @@ index a38b3edc6851..c3d7abd1f6ae 100644
  	spin_lock_irqsave(&t->sighand->siglock, flags);
  	action = &t->sighand->action[sig-1];
  	ignored = action->sa.sa_handler == SIG_IGN;
-@@ -1806,7 +1894,8 @@ EXPORT_SYMBOL(kill_pid);
+@@ -1807,7 +1894,7 @@ EXPORT_SYMBOL(kill_pid);
   */
  struct sigqueue *sigqueue_alloc(void)
  {
 -	struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0);
-+	/* Preallocated sigqueue objects always from the slabcache ! */
-+	struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1);
++	struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, true);
  
  	if (q)
  		q->flags |= SIGQUEUE_PREALLOC;
-@@ -2202,16 +2291,8 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
+@@ -2203,16 +2290,8 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
  		if (gstop_done && ptrace_reparented(current))
  			do_notify_parent_cldstop(current, false, why);
  
@@ -20797,8 +15170,40 @@ index a38b3edc6851..c3d7abd1f6ae 100644
  		freezable_schedule();
  		cgroup_leave_frozen(true);
  	} else {
+diff --git a/kernel/smp.c b/kernel/smp.c
+index 1b6070bf97bb..01e9d01d1866 100644
+--- a/kernel/smp.c
++++ b/kernel/smp.c
+@@ -14,6 +14,7 @@
+ #include <linux/export.h>
+ #include <linux/percpu.h>
+ #include <linux/init.h>
++#include <linux/interrupt.h>
+ #include <linux/gfp.h>
+ #include <linux/smp.h>
+ #include <linux/cpu.h>
+@@ -449,6 +450,19 @@ void flush_smp_call_function_from_idle(void)
+ 
+ 	local_irq_save(flags);
+ 	flush_smp_call_function_queue(true);
++
++	if (local_softirq_pending()) {
++
++		if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
++			do_softirq();
++		} else {
++			struct task_struct *ksoftirqd = this_cpu_ksoftirqd();
++
++			if (ksoftirqd && ksoftirqd->state != TASK_RUNNING)
++				wake_up_process(ksoftirqd);
++		}
++	}
++
+ 	local_irq_restore(flags);
+ }
+ 
 diff --git a/kernel/softirq.c b/kernel/softirq.c
-index bf88d7f62433..102f5553884c 100644
+index 9d71046ea247..a9b66aa08636 100644
 --- a/kernel/softirq.c
 +++ b/kernel/softirq.c
 @@ -13,6 +13,7 @@
@@ -20809,17 +15214,17 @@ index bf88d7f62433..102f5553884c 100644
  #include <linux/mm.h>
  #include <linux/notifier.h>
  #include <linux/percpu.h>
-@@ -92,15 +93,225 @@ static bool ksoftirqd_running(unsigned long pending)
- 		!__kthread_should_park(tsk);
- }
+@@ -25,6 +26,7 @@
+ #include <linux/smpboot.h>
+ #include <linux/tick.h>
+ #include <linux/irq.h>
++#include <linux/wait_bit.h>
+ 
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/irq.h>
+@@ -100,20 +102,204 @@ EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
+ #endif
  
-+#ifdef CONFIG_TRACE_IRQFLAGS
-+DEFINE_PER_CPU(int, hardirqs_enabled);
-+DEFINE_PER_CPU(int, hardirq_context);
-+EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
-+EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
-+#endif
-+
  /*
 - * preempt_count and SOFTIRQ_OFFSET usage:
 - * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
@@ -20874,7 +15279,7 @@ index bf88d7f62433..102f5553884c 100644
 + */
 +bool local_bh_blocked(void)
 +{
-+	return this_cpu_read(softirq_ctrl.cnt) != 0;
++	return __this_cpu_read(softirq_ctrl.cnt) != 0;
 +}
 +
 +void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
@@ -20882,24 +15287,24 @@ index bf88d7f62433..102f5553884c 100644
 +	unsigned long flags;
 +	int newcnt;
 +
-+	WARN_ON_ONCE(in_irq());
++	WARN_ON_ONCE(in_hardirq());
 +
 +	/* First entry of a task into a BH disabled section? */
 +	if (!current->softirq_disable_cnt) {
 +		if (preemptible()) {
 +			local_lock(&softirq_ctrl.lock);
++			/* Required to meet the RCU bottomhalf requirements. */
 +			rcu_read_lock();
 +		} else {
 +			DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt));
 +		}
 +	}
 +
-+	preempt_disable();
 +	/*
 +	 * Track the per CPU softirq disabled state. On RT this is per CPU
 +	 * state to allow preemption of bottom half disabled sections.
 +	 */
-+	newcnt = this_cpu_add_return(softirq_ctrl.cnt, cnt);
++	newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt);
 +	/*
 +	 * Reflect the result in the task state to prevent recursion on the
 +	 * local lock and to make softirq_count() & al work.
@@ -20911,7 +15316,6 @@ index bf88d7f62433..102f5553884c 100644
 +		lockdep_softirqs_off(ip);
 +		raw_local_irq_restore(flags);
 +	}
-+	preempt_enable();
 +}
 +EXPORT_SYMBOL(__local_bh_disable_ip);
 +
@@ -20923,16 +15327,14 @@ index bf88d7f62433..102f5553884c 100644
 +	DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
 +			    this_cpu_read(softirq_ctrl.cnt));
 +
-+	preempt_disable();
 +	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) {
 +		raw_local_irq_save(flags);
 +		lockdep_softirqs_on(_RET_IP_);
 +		raw_local_irq_restore(flags);
 +	}
 +
-+	newcnt = this_cpu_sub_return(softirq_ctrl.cnt, cnt);
++	newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt);
 +	current->softirq_disable_cnt = newcnt;
-+	preempt_enable();
 +
 +	if (!newcnt && unlock) {
 +		rcu_read_unlock();
@@ -20951,7 +15353,7 @@ index bf88d7f62433..102f5553884c 100644
 +	lockdep_assert_irqs_enabled();
 +
 +	local_irq_save(flags);
-+	curcnt = this_cpu_read(softirq_ctrl.cnt);
++	curcnt = __this_cpu_read(softirq_ctrl.cnt);
 +
 +	/*
 +	 * If this is not reenabling soft interrupts, no point in trying to
@@ -20981,29 +15383,15 @@ index bf88d7f62433..102f5553884c 100644
 +	__local_bh_enable(cnt, false);
 +	__do_softirq();
 +
-+out:
-+	__local_bh_enable(cnt, preempt_on);
-+	local_irq_restore(flags);
-+}
-+EXPORT_SYMBOL(__local_bh_enable_ip);
-+
-+/*
-+ * Invoked from irq_enter_rcu() to prevent that tick_irq_enter()
-+ * pointlessly wakes the softirq daemon. That's handled in __irq_exit_rcu().
-+ * None of the above logic in the regular bh_disable/enable functions is
-+ * required here.
-+ */
-+static inline void local_bh_disable_irq_enter(void)
-+{
-+	this_cpu_add(softirq_ctrl.cnt, SOFTIRQ_DISABLE_OFFSET);
-+}
-+
-+static inline void local_bh_enable_irq_enter(void)
-+{
-+	this_cpu_sub(softirq_ctrl.cnt, SOFTIRQ_DISABLE_OFFSET);
-+}
-+
-+/*
++out:
++	__local_bh_enable(cnt, preempt_on);
++	local_irq_restore(flags);
++}
++EXPORT_SYMBOL(__local_bh_enable_ip);
+ 
+-#ifdef CONFIG_TRACE_IRQFLAGS
+ /*
+- * This is for softirq.c-internal use, where hardirqs are disabled
 + * Invoked from ksoftirqd_run() outside of the interrupt disabled section
 + * to acquire the per CPU local lock for reentrancy protection.
 + */
@@ -21024,47 +15412,31 @@ index bf88d7f62433..102f5553884c 100644
 +static inline void softirq_handle_begin(void) { }
 +static inline void softirq_handle_end(void) { }
 +
-+static inline void invoke_softirq(void)
++static inline bool should_wake_ksoftirqd(void)
 +{
-+	if (!this_cpu_read(softirq_ctrl.cnt))
-+		wakeup_softirqd();
++	return !this_cpu_read(softirq_ctrl.cnt);
 +}
 +
-+static inline bool should_wake_ksoftirqd(void)
++static inline void invoke_softirq(void)
 +{
-+	return !this_cpu_read(softirq_ctrl.cnt);
++	if (should_wake_ksoftirqd())
++		wakeup_softirqd();
 +}
 +
 +#else /* CONFIG_PREEMPT_RT */
- 
- /*
-  * This one is for softirq.c-internal use,
-@@ -108,11 +319,6 @@ static bool ksoftirqd_running(unsigned long pending)
++
++/*
++ * This one is for softirq.c-internal use, where hardirqs are disabled
+  * legitimately:
   */
- #ifdef CONFIG_TRACE_IRQFLAGS
- 
--DEFINE_PER_CPU(int, hardirqs_enabled);
--DEFINE_PER_CPU(int, hardirq_context);
--EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
--EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
--
++#ifdef CONFIG_TRACE_IRQFLAGS
  void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
  {
  	unsigned long flags;
-@@ -203,6 +409,67 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
+@@ -204,6 +390,32 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
  }
  EXPORT_SYMBOL(__local_bh_enable_ip);
  
-+static inline void local_bh_disable_irq_enter(void)
-+{
-+	local_bh_disable();
-+}
-+
-+static inline void local_bh_enable_irq_enter(void)
-+{
-+	_local_bh_enable();
-+}
-+
 +static inline void softirq_handle_begin(void)
 +{
 +	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
@@ -21086,49 +15458,33 @@ index bf88d7f62433..102f5553884c 100644
 +	local_irq_enable();
 +}
 +
-+static inline void invoke_softirq(void)
++static inline bool should_wake_ksoftirqd(void)
 +{
-+	if (ksoftirqd_running(local_softirq_pending()))
-+		return;
-+
-+	if (!force_irqthreads) {
-+#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
-+		/*
-+		 * We can safely execute softirq on the current stack if
-+		 * it is the irq stack, because it should be near empty
-+		 * at this stage.
-+		 */
-+		__do_softirq();
-+#else
-+		/*
-+		 * Otherwise, irq_exit() is called on the task stack that can
-+		 * be potentially deep already. So call softirq in its own stack
-+		 * to prevent from any overrun.
-+		 */
-+		do_softirq_own_stack();
-+#endif
-+	} else {
-+		wakeup_softirqd();
-+	}
++	return true;
 +}
 +
-+static inline bool should_wake_ksoftirqd(void) { return true; }
-+
+ static inline void invoke_softirq(void)
+ {
+ 	if (ksoftirqd_running(local_softirq_pending()))
+@@ -248,6 +460,8 @@ asmlinkage __visible void do_softirq(void)
+ 	local_irq_restore(flags);
+ }
+ 
 +#endif /* !CONFIG_PREEMPT_RT */
 +
  /*
   * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
   * but break the loop if need_resched() is set or after 2 ms.
-@@ -272,7 +539,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
+@@ -316,7 +530,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
+ 
  	pending = local_softirq_pending();
- 	account_irq_enter_time(current);
  
 -	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
 +	softirq_handle_begin();
  	in_hardirq = lockdep_softirq_start();
+ 	account_softirq_enter(current);
  
- restart:
-@@ -307,8 +574,10 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
+@@ -352,8 +566,10 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
  		pending >>= softirq_bit;
  	}
  
@@ -21140,74 +15496,17 @@ index bf88d7f62433..102f5553884c 100644
  	local_irq_disable();
  
  	pending = local_softirq_pending();
-@@ -322,11 +591,11 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
+@@ -367,8 +583,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
  
+ 	account_softirq_exit(current);
  	lockdep_softirq_end(in_hardirq);
- 	account_irq_exit_time(current);
 -	__local_bh_enable(SOFTIRQ_OFFSET);
 -	WARN_ON_ONCE(in_interrupt());
 +	softirq_handle_end();
  	current_restore_flags(old_flags, PF_MEMALLOC);
  }
  
-+#ifndef CONFIG_PREEMPT_RT
- asmlinkage __visible void do_softirq(void)
- {
- 	__u32 pending;
-@@ -344,6 +613,7 @@ asmlinkage __visible void do_softirq(void)
- 
- 	local_irq_restore(flags);
- }
-+#endif
- 
- /**
-  * irq_enter_rcu - Enter an interrupt context with RCU watching
-@@ -355,9 +625,9 @@ void irq_enter_rcu(void)
- 		 * Prevent raise_softirq from needlessly waking up ksoftirqd
- 		 * here, as softirq will be serviced on return from interrupt.
- 		 */
--		local_bh_disable();
-+		local_bh_disable_irq_enter();
- 		tick_irq_enter();
--		_local_bh_enable();
-+		local_bh_enable_irq_enter();
- 	}
- 	__irq_enter();
- }
-@@ -371,32 +641,6 @@ void irq_enter(void)
- 	irq_enter_rcu();
- }
- 
--static inline void invoke_softirq(void)
--{
--	if (ksoftirqd_running(local_softirq_pending()))
--		return;
--
--	if (!force_irqthreads) {
--#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
--		/*
--		 * We can safely execute softirq on the current stack if
--		 * it is the irq stack, because it should be near empty
--		 * at this stage.
--		 */
--		__do_softirq();
--#else
--		/*
--		 * Otherwise, irq_exit() is called on the task stack that can
--		 * be potentially deep already. So call softirq in its own stack
--		 * to prevent from any overrun.
--		 */
--		do_softirq_own_stack();
--#endif
--	} else {
--		wakeup_softirqd();
--	}
--}
--
- static inline void tick_irq_exit(void)
- {
- #ifdef CONFIG_NO_HZ_COMMON
-@@ -466,7 +710,7 @@ inline void raise_softirq_irqoff(unsigned int nr)
+@@ -463,7 +678,7 @@ inline void raise_softirq_irqoff(unsigned int nr)
  	 * Otherwise we wake up ksoftirqd to make sure we
  	 * schedule the softirq soon.
  	 */
@@ -21216,13 +15515,43 @@ index bf88d7f62433..102f5553884c 100644
  		wakeup_softirqd();
  }
  
-@@ -606,6 +850,29 @@ void tasklet_init(struct tasklet_struct *t,
+@@ -529,6 +744,16 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
+ }
+ EXPORT_SYMBOL(__tasklet_hi_schedule);
+ 
++static inline bool tasklet_clear_sched(struct tasklet_struct *t)
++{
++	if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) {
++		wake_up_var(&t->state);
++		return true;
++	}
++
++	return false;
++}
++
+ static void tasklet_action_common(struct softirq_action *a,
+ 				  struct tasklet_head *tl_head,
+ 				  unsigned int softirq_nr)
+@@ -548,8 +773,7 @@ static void tasklet_action_common(struct softirq_action *a,
+ 
+ 		if (tasklet_trylock(t)) {
+ 			if (!atomic_read(&t->count)) {
+-				if (!test_and_clear_bit(TASKLET_STATE_SCHED,
+-							&t->state))
++				if (!tasklet_clear_sched(t))
+ 					BUG();
+ 				if (t->use_callback)
+ 					t->callback(t);
+@@ -604,21 +828,62 @@ void tasklet_init(struct tasklet_struct *t,
  }
  EXPORT_SYMBOL(tasklet_init);
  
 +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
-+
-+void tasklet_unlock_wait(struct tasklet_struct *t)
++/*
++ * Do not use in new code. There is no real reason to invoke this from
++ * atomic contexts.
++ */
++void tasklet_unlock_spin_wait(struct tasklet_struct *t)
 +{
 +	while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
 +		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
@@ -21240,35 +15569,49 @@ index bf88d7f62433..102f5553884c 100644
 +		}
 +	}
 +}
-+EXPORT_SYMBOL(tasklet_unlock_wait);
++EXPORT_SYMBOL(tasklet_unlock_spin_wait);
 +#endif
 +
  void tasklet_kill(struct tasklet_struct *t)
  {
  	if (in_interrupt())
-@@ -613,7 +880,20 @@ void tasklet_kill(struct tasklet_struct *t)
+ 		pr_notice("Attempt to kill tasklet from interrupt\n");
  
- 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
- 		do {
+-	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
+-		do {
 -			yield();
-+			if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
-+				/*
-+				 * Prevent a live lock when current
-+				 * preempted soft interrupt processing or
-+				 * prevents ksoftirqd from running. If the
-+				 * tasklet runs on a different CPU then
-+				 * this has no effect other than doing the
-+				 * BH disable/enable dance for nothing.
-+				 */
-+				local_bh_disable();
-+				local_bh_enable();
-+			} else {
-+				yield();
-+			}
- 		} while (test_bit(TASKLET_STATE_SCHED, &t->state));
- 	}
+-		} while (test_bit(TASKLET_STATE_SCHED, &t->state));
+-	}
++	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
++		wait_var_event(&t->state, !test_bit(TASKLET_STATE_SCHED, &t->state));
++
  	tasklet_unlock_wait(t);
-@@ -643,18 +923,18 @@ static int ksoftirqd_should_run(unsigned int cpu)
+-	clear_bit(TASKLET_STATE_SCHED, &t->state);
++	tasklet_clear_sched(t);
+ }
+ EXPORT_SYMBOL(tasklet_kill);
+ 
++#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
++void tasklet_unlock(struct tasklet_struct *t)
++{
++	smp_mb__before_atomic();
++	clear_bit(TASKLET_STATE_RUN, &t->state);
++	smp_mb__after_atomic();
++	wake_up_var(&t->state);
++}
++EXPORT_SYMBOL_GPL(tasklet_unlock);
++
++void tasklet_unlock_wait(struct tasklet_struct *t)
++{
++	wait_var_event(&t->state, !test_bit(TASKLET_STATE_RUN, &t->state));
++}
++EXPORT_SYMBOL_GPL(tasklet_unlock_wait);
++#endif
++
+ void __init softirq_init(void)
+ {
+ 	int cpu;
+@@ -641,18 +906,18 @@ static int ksoftirqd_should_run(unsigned int cpu)
  
  static void run_ksoftirqd(unsigned int cpu)
  {
@@ -21290,94 +15633,11 @@ index bf88d7f62433..102f5553884c 100644
  }
  
  #ifdef CONFIG_HOTPLUG_CPU
-diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
-index 865bb0228ab6..fb05a9293d00 100644
---- a/kernel/stop_machine.c
-+++ b/kernel/stop_machine.c
-@@ -42,11 +42,23 @@ struct cpu_stopper {
- 	struct list_head	works;		/* list of pending works */
- 
- 	struct cpu_stop_work	stop_work;	/* for stop_cpus */
-+	unsigned long		caller;
-+	cpu_stop_fn_t		fn;
- };
- 
- static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
- static bool stop_machine_initialized = false;
- 
-+void print_stop_info(const char *log_lvl, struct task_struct *task)
-+{
-+	struct cpu_stopper *stopper = this_cpu_ptr(&cpu_stopper);
-+
-+	if (task != stopper->thread)
-+		return;
-+
-+	printk("%sStopper: %pS <- %pS\n", log_lvl, stopper->fn, (void *)stopper->caller);
-+}
-+
- /* static data for stop_cpus */
- static DEFINE_MUTEX(stop_cpus_mutex);
- static bool stop_cpus_in_progress;
-@@ -123,7 +135,7 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
- int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
- {
- 	struct cpu_stop_done done;
--	struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
-+	struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done, .caller = _RET_IP_ };
- 
- 	cpu_stop_init_done(&done, 1);
- 	if (!cpu_stop_queue_work(cpu, &work))
-@@ -331,7 +343,8 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
- 	work1 = work2 = (struct cpu_stop_work){
- 		.fn = multi_cpu_stop,
- 		.arg = &msdata,
--		.done = &done
-+		.done = &done,
-+		.caller = _RET_IP_,
- 	};
- 
- 	cpu_stop_init_done(&done, 2);
-@@ -367,7 +380,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
- bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
- 			struct cpu_stop_work *work_buf)
- {
--	*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
-+	*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, .caller = _RET_IP_, };
- 	return cpu_stop_queue_work(cpu, work_buf);
- }
- 
-@@ -487,6 +500,8 @@ static void cpu_stopper_thread(unsigned int cpu)
- 		int ret;
- 
- 		/* cpu stop callbacks must not sleep, make in_atomic() == T */
-+		stopper->caller = work->caller;
-+		stopper->fn = fn;
- 		preempt_count_inc();
- 		ret = fn(arg);
- 		if (done) {
-@@ -495,6 +510,8 @@ static void cpu_stopper_thread(unsigned int cpu)
- 			cpu_stop_signal_done(done);
- 		}
- 		preempt_count_dec();
-+		stopper->fn = NULL;
-+		stopper->caller = 0;
- 		WARN_ONCE(preempt_count(),
- 			  "cpu_stop: %ps(%p) leaked preempt count\n", fn, arg);
- 		goto repeat;
 diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
-index 95b6a708b040..822fa0c7f5db 100644
+index 743c852e10f2..3c2e8b1dfbe3 100644
 --- a/kernel/time/hrtimer.c
 +++ b/kernel/time/hrtimer.c
-@@ -1828,7 +1828,7 @@ static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
- 	 * expiry.
- 	 */
- 	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
--		if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
-+		if ((task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT)) || system_state != SYSTEM_RUNNING)
- 			mode |= HRTIMER_MODE_HARD;
- 	}
- 
-@@ -1993,6 +1993,36 @@ SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp,
+@@ -1988,6 +1988,36 @@ SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp,
  }
  #endif
  
@@ -21414,152 +15674,633 @@ index 95b6a708b040..822fa0c7f5db 100644
  /*
   * Functions related to boot-time initialization:
   */
-diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
-index 1c03eec6ca9b..0642013dace4 100644
---- a/kernel/time/sched_clock.c
-+++ b/kernel/time/sched_clock.c
-@@ -35,7 +35,7 @@
-  * into a single 64-byte cache line.
-  */
- struct clock_data {
--	seqcount_t		seq;
-+	seqcount_latch_t	seq;
- 	struct clock_read_data	read_data[2];
- 	ktime_t			wrap_kt;
- 	unsigned long		rate;
-@@ -76,7 +76,7 @@ struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
+diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
+index e10a4af88737..0cc55791b2b6 100644
+--- a/kernel/time/tick-sched.c
++++ b/kernel/time/tick-sched.c
+@@ -973,7 +973,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
+ 	if (unlikely(local_softirq_pending())) {
+ 		static int ratelimit;
+ 
+-		if (ratelimit < 10 &&
++		if (ratelimit < 10 && !local_bh_blocked() &&
+ 		    (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
+ 			pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n",
+ 				(unsigned int) local_softirq_pending());
+diff --git a/kernel/time/timer.c b/kernel/time/timer.c
+index 8dbc008f8942..4d9ffd39f93e 100644
+--- a/kernel/time/timer.c
++++ b/kernel/time/timer.c
+@@ -1263,8 +1263,10 @@ static inline void timer_base_unlock_expiry(struct timer_base *base)
+ static void timer_sync_wait_running(struct timer_base *base)
+ {
+ 	if (atomic_read(&base->timer_waiters)) {
++		raw_spin_unlock_irq(&base->lock);
+ 		spin_unlock(&base->expiry_lock);
+ 		spin_lock(&base->expiry_lock);
++		raw_spin_lock_irq(&base->lock);
+ 	}
+ }
+ 
+@@ -1455,14 +1457,14 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
+ 		if (timer->flags & TIMER_IRQSAFE) {
+ 			raw_spin_unlock(&base->lock);
+ 			call_timer_fn(timer, fn, baseclk);
+-			base->running_timer = NULL;
+ 			raw_spin_lock(&base->lock);
++			base->running_timer = NULL;
+ 		} else {
+ 			raw_spin_unlock_irq(&base->lock);
+ 			call_timer_fn(timer, fn, baseclk);
++			raw_spin_lock_irq(&base->lock);
+ 			base->running_timer = NULL;
+ 			timer_sync_wait_running(base);
+-			raw_spin_lock_irq(&base->lock);
+ 		}
+ 	}
+ }
+@@ -1741,6 +1743,8 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
+ {
+ 	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
+ 
++	irq_work_tick_soft();
++
+ 	__run_timers(base);
+ 	if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
+ 		__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
+diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
+index fb0fe4c66b84..c54eae2ab208 100644
+--- a/kernel/trace/blktrace.c
++++ b/kernel/trace/blktrace.c
+@@ -72,17 +72,17 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
+ 	struct blk_io_trace *t;
+ 	struct ring_buffer_event *event = NULL;
+ 	struct trace_buffer *buffer = NULL;
+-	int pc = 0;
++	unsigned int trace_ctx = 0;
+ 	int cpu = smp_processor_id();
+ 	bool blk_tracer = blk_tracer_enabled;
+ 	ssize_t cgid_len = cgid ? sizeof(cgid) : 0;
+ 
+ 	if (blk_tracer) {
+ 		buffer = blk_tr->array_buffer.buffer;
+-		pc = preempt_count();
++		trace_ctx = tracing_gen_ctx_flags(0);
+ 		event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
+ 						  sizeof(*t) + len + cgid_len,
+-						  0, pc);
++						  trace_ctx);
+ 		if (!event)
+ 			return;
+ 		t = ring_buffer_event_data(event);
+@@ -107,7 +107,7 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
+ 		memcpy((void *) t + sizeof(*t) + cgid_len, data, len);
+ 
+ 		if (blk_tracer)
+-			trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc);
++			trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);
+ 	}
+ }
+ 
+@@ -222,8 +222,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
+ 	struct blk_io_trace *t;
+ 	unsigned long flags = 0;
+ 	unsigned long *sequence;
++	unsigned int trace_ctx = 0;
+ 	pid_t pid;
+-	int cpu, pc = 0;
++	int cpu;
+ 	bool blk_tracer = blk_tracer_enabled;
+ 	ssize_t cgid_len = cgid ? sizeof(cgid) : 0;
+ 
+@@ -252,10 +253,10 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
+ 		tracing_record_cmdline(current);
+ 
+ 		buffer = blk_tr->array_buffer.buffer;
+-		pc = preempt_count();
++		trace_ctx = tracing_gen_ctx_flags(0);
+ 		event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
+ 						  sizeof(*t) + pdu_len + cgid_len,
+-						  0, pc);
++						  trace_ctx);
+ 		if (!event)
+ 			return;
+ 		t = ring_buffer_event_data(event);
+@@ -301,7 +302,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
+ 			memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len);
+ 
+ 		if (blk_tracer) {
+-			trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc);
++			trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);
+ 			return;
+ 		}
+ 	}
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index b5815a022ecc..e33e7bad3fac 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -176,7 +176,7 @@ static union trace_eval_map_item *trace_eval_maps;
+ int tracing_set_tracer(struct trace_array *tr, const char *buf);
+ static void ftrace_trace_userstack(struct trace_array *tr,
+ 				   struct trace_buffer *buffer,
+-				   unsigned long flags, int pc);
++				   unsigned int trace_ctx);
+ 
+ #define MAX_TRACER_SIZE		100
+ static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
+@@ -905,23 +905,23 @@ static inline void trace_access_lock_init(void)
+ 
+ #ifdef CONFIG_STACKTRACE
+ static void __ftrace_trace_stack(struct trace_buffer *buffer,
+-				 unsigned long flags,
+-				 int skip, int pc, struct pt_regs *regs);
++				 unsigned int trace_ctx,
++				 int skip, struct pt_regs *regs);
+ static inline void ftrace_trace_stack(struct trace_array *tr,
+ 				      struct trace_buffer *buffer,
+-				      unsigned long flags,
+-				      int skip, int pc, struct pt_regs *regs);
++				      unsigned int trace_ctx,
++				      int skip, struct pt_regs *regs);
+ 
+ #else
+ static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
+-					unsigned long flags,
+-					int skip, int pc, struct pt_regs *regs)
++					unsigned int trace_ctx,
++					int skip, struct pt_regs *regs)
+ {
+ }
+ static inline void ftrace_trace_stack(struct trace_array *tr,
+ 				      struct trace_buffer *buffer,
+-				      unsigned long flags,
+-				      int skip, int pc, struct pt_regs *regs)
++				      unsigned long trace_ctx,
++				      int skip, struct pt_regs *regs)
+ {
+ }
+ 
+@@ -929,24 +929,24 @@ static inline void ftrace_trace_stack(struct trace_array *tr,
+ 
+ static __always_inline void
+ trace_event_setup(struct ring_buffer_event *event,
+-		  int type, unsigned long flags, int pc)
++		  int type, unsigned int trace_ctx)
+ {
+ 	struct trace_entry *ent = ring_buffer_event_data(event);
+ 
+-	tracing_generic_entry_update(ent, type, flags, pc);
++	tracing_generic_entry_update(ent, type, trace_ctx);
+ }
+ 
+ static __always_inline struct ring_buffer_event *
+ __trace_buffer_lock_reserve(struct trace_buffer *buffer,
+ 			  int type,
+ 			  unsigned long len,
+-			  unsigned long flags, int pc)
++			  unsigned int trace_ctx)
+ {
+ 	struct ring_buffer_event *event;
+ 
+ 	event = ring_buffer_lock_reserve(buffer, len);
+ 	if (event != NULL)
+-		trace_event_setup(event, type, flags, pc);
++		trace_event_setup(event, type, trace_ctx);
+ 
+ 	return event;
+ }
+@@ -1007,25 +1007,22 @@ int __trace_puts(unsigned long ip, const char *str, int size)
+ 	struct ring_buffer_event *event;
+ 	struct trace_buffer *buffer;
+ 	struct print_entry *entry;
+-	unsigned long irq_flags;
++	unsigned int trace_ctx;
+ 	int alloc;
+-	int pc;
  
- int sched_clock_read_retry(unsigned int seq)
+ 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
+ 		return 0;
+ 
+-	pc = preempt_count();
+-
+ 	if (unlikely(tracing_selftest_running || tracing_disabled))
+ 		return 0;
+ 
+ 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
+ 
+-	local_save_flags(irq_flags);
++	trace_ctx = tracing_gen_ctx();
+ 	buffer = global_trace.array_buffer.buffer;
+ 	ring_buffer_nest_start(buffer);
+-	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
+-					    irq_flags, pc);
++	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
++					    trace_ctx);
+ 	if (!event) {
+ 		size = 0;
+ 		goto out;
+@@ -1044,7 +1041,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
+ 		entry->buf[size] = '\0';
+ 
+ 	__buffer_unlock_commit(buffer, event);
+-	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
++	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
+  out:
+ 	ring_buffer_nest_end(buffer);
+ 	return size;
+@@ -1061,25 +1058,22 @@ int __trace_bputs(unsigned long ip, const char *str)
+ 	struct ring_buffer_event *event;
+ 	struct trace_buffer *buffer;
+ 	struct bputs_entry *entry;
+-	unsigned long irq_flags;
++	unsigned int trace_ctx;
+ 	int size = sizeof(struct bputs_entry);
+ 	int ret = 0;
+-	int pc;
+ 
+ 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
+ 		return 0;
+ 
+-	pc = preempt_count();
+-
+ 	if (unlikely(tracing_selftest_running || tracing_disabled))
+ 		return 0;
+ 
+-	local_save_flags(irq_flags);
++	trace_ctx = tracing_gen_ctx();
+ 	buffer = global_trace.array_buffer.buffer;
+ 
+ 	ring_buffer_nest_start(buffer);
+ 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
+-					    irq_flags, pc);
++					    trace_ctx);
+ 	if (!event)
+ 		goto out;
+ 
+@@ -1088,7 +1082,7 @@ int __trace_bputs(unsigned long ip, const char *str)
+ 	entry->str			= str;
+ 
+ 	__buffer_unlock_commit(buffer, event);
+-	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
++	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
+ 
+ 	ret = 1;
+  out:
+@@ -2584,36 +2578,52 @@ enum print_line_t trace_handle_return(struct trace_seq *s)
+ }
+ EXPORT_SYMBOL_GPL(trace_handle_return);
+ 
+-void
+-tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
+-			     unsigned long flags, int pc)
++static unsigned short migration_disable_value(void)
+ {
+-	struct task_struct *tsk = current;
+-
+-	entry->preempt_count		= pc & 0xff;
+-	entry->pid			= (tsk) ? tsk->pid : 0;
+-	entry->type			= type;
+-	entry->flags =
+-#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+-		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
++#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
++	return current->migration_disabled;
+ #else
+-		TRACE_FLAG_IRQS_NOSUPPORT |
++	return 0;
++#endif
++}
++
++unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
++{
++	unsigned int trace_flags = irqs_status;
++	unsigned int pc;
++
++	pc = preempt_count();
++
++	if (pc & NMI_MASK)
++		trace_flags |= TRACE_FLAG_NMI;
++	if (pc & HARDIRQ_MASK)
++		trace_flags |= TRACE_FLAG_HARDIRQ;
++	if (in_serving_softirq())
++		trace_flags |= TRACE_FLAG_SOFTIRQ;
++
++	if (tif_need_resched())
++		trace_flags |= TRACE_FLAG_NEED_RESCHED;
++	if (test_preempt_need_resched())
++		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
++
++#ifdef CONFIG_PREEMPT_LAZY
++	if (need_resched_lazy())
++		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
+ #endif
+-		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
+-		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
+-		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
+-		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
+-		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
++
++	return (pc & 0xff) |
++		(migration_disable_value() & 0xff) << 8 |
++		(preempt_lazy_count() & 0xff) << 16 |
++		(trace_flags << 24);
+ }
+-EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
+ 
+ struct ring_buffer_event *
+ trace_buffer_lock_reserve(struct trace_buffer *buffer,
+ 			  int type,
+ 			  unsigned long len,
+-			  unsigned long flags, int pc)
++			  unsigned int trace_ctx)
+ {
+-	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
++	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
+ }
+ 
+ DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
+@@ -2733,7 +2743,7 @@ struct ring_buffer_event *
+ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
+ 			  struct trace_event_file *trace_file,
+ 			  int type, unsigned long len,
+-			  unsigned long flags, int pc)
++			  unsigned int trace_ctx)
+ {
+ 	struct ring_buffer_event *entry;
+ 	int val;
+@@ -2746,7 +2756,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
+ 		/* Try to use the per cpu buffer first */
+ 		val = this_cpu_inc_return(trace_buffered_event_cnt);
+ 		if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) {
+-			trace_event_setup(entry, type, flags, pc);
++			trace_event_setup(entry, type, trace_ctx);
+ 			entry->array[0] = len;
+ 			return entry;
+ 		}
+@@ -2754,7 +2764,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
+ 	}
+ 
+ 	entry = __trace_buffer_lock_reserve(*current_rb,
+-					    type, len, flags, pc);
++					    type, len, trace_ctx);
+ 	/*
+ 	 * If tracing is off, but we have triggers enabled
+ 	 * we still need to look at the event data. Use the temp_buffer
+@@ -2763,8 +2773,8 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
+ 	 */
+ 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
+ 		*current_rb = temp_buffer;
+-		entry = __trace_buffer_lock_reserve(*current_rb,
+-						    type, len, flags, pc);
++		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
++						    trace_ctx);
+ 	}
+ 	return entry;
+ }
+@@ -2850,7 +2860,7 @@ void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
+ 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
+ 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
+ 				    fbuffer->event, fbuffer->entry,
+-				    fbuffer->flags, fbuffer->pc, fbuffer->regs);
++				    fbuffer->trace_ctx, fbuffer->regs);
+ }
+ EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
+ 
+@@ -2866,7 +2876,7 @@ EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
+ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
+ 				     struct trace_buffer *buffer,
+ 				     struct ring_buffer_event *event,
+-				     unsigned long flags, int pc,
++				     unsigned int trace_ctx,
+ 				     struct pt_regs *regs)
+ {
+ 	__buffer_unlock_commit(buffer, event);
+@@ -2877,8 +2887,8 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
+ 	 * and mmiotrace, but that's ok if they lose a function or
+ 	 * two. They are not that meaningful.
+ 	 */
+-	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
+-	ftrace_trace_userstack(tr, buffer, flags, pc);
++	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
++	ftrace_trace_userstack(tr, buffer, trace_ctx);
+ }
+ 
+ /*
+@@ -2892,9 +2902,8 @@ trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
+ }
+ 
+ void
+-trace_function(struct trace_array *tr,
+-	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
+-	       int pc)
++trace_function(struct trace_array *tr, unsigned long ip, unsigned long
++	       parent_ip, unsigned int trace_ctx)
+ {
+ 	struct trace_event_call *call = &event_function;
+ 	struct trace_buffer *buffer = tr->array_buffer.buffer;
+@@ -2902,7 +2911,7 @@ trace_function(struct trace_array *tr,
+ 	struct ftrace_entry *entry;
+ 
+ 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
+-					    flags, pc);
++					    trace_ctx);
+ 	if (!event)
+ 		return;
+ 	entry	= ring_buffer_event_data(event);
+@@ -2936,8 +2945,8 @@ static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
+ static DEFINE_PER_CPU(int, ftrace_stack_reserve);
+ 
+ static void __ftrace_trace_stack(struct trace_buffer *buffer,
+-				 unsigned long flags,
+-				 int skip, int pc, struct pt_regs *regs)
++				 unsigned int trace_ctx,
++				 int skip, struct pt_regs *regs)
+ {
+ 	struct trace_event_call *call = &event_kernel_stack;
+ 	struct ring_buffer_event *event;
+@@ -2984,7 +2993,7 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer,
+ 
+ 	size = nr_entries * sizeof(unsigned long);
+ 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
+-					    sizeof(*entry) + size, flags, pc);
++					    sizeof(*entry) + size, trace_ctx);
+ 	if (!event)
+ 		goto out;
+ 	entry = ring_buffer_event_data(event);
+@@ -3005,22 +3014,22 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer,
+ 
+ static inline void ftrace_trace_stack(struct trace_array *tr,
+ 				      struct trace_buffer *buffer,
+-				      unsigned long flags,
+-				      int skip, int pc, struct pt_regs *regs)
++				      unsigned int trace_ctx,
++				      int skip, struct pt_regs *regs)
+ {
+ 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
+ 		return;
+ 
+-	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
++	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
+ }
+ 
+-void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
+-		   int pc)
++void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
++		   int skip)
  {
--	return read_seqcount_retry(&cd.seq, seq);
-+	return read_seqcount_latch_retry(&cd.seq, seq);
+ 	struct trace_buffer *buffer = tr->array_buffer.buffer;
+ 
+ 	if (rcu_is_watching()) {
+-		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
++		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
+ 		return;
+ 	}
+ 
+@@ -3034,7 +3043,7 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
+ 		return;
+ 
+ 	rcu_irq_enter_irqson();
+-	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
++	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
+ 	rcu_irq_exit_irqson();
  }
  
- unsigned long long notrace sched_clock(void)
-@@ -258,7 +258,7 @@ void __init generic_sched_clock_init(void)
+@@ -3044,19 +3053,15 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
   */
- static u64 notrace suspended_sched_clock_read(void)
+ void trace_dump_stack(int skip)
  {
--	unsigned int seq = raw_read_seqcount(&cd.seq);
-+	unsigned int seq = raw_read_seqcount_latch(&cd.seq);
+-	unsigned long flags;
+-
+ 	if (tracing_disabled || tracing_selftest_running)
+ 		return;
  
- 	return cd.read_data[seq & 1].epoch_cyc;
+-	local_save_flags(flags);
+-
+ #ifndef CONFIG_UNWINDER_ORC
+ 	/* Skip 1 to skip this function. */
+ 	skip++;
+ #endif
+ 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
+-			     flags, skip, preempt_count(), NULL);
++			     tracing_gen_ctx(), skip, NULL);
  }
-diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
-index f0199a4ba1ad..1de4665dc52f 100644
---- a/kernel/time/tick-sched.c
-+++ b/kernel/time/tick-sched.c
-@@ -925,7 +925,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
- 	if (unlikely(local_softirq_pending())) {
- 		static int ratelimit;
+ EXPORT_SYMBOL_GPL(trace_dump_stack);
  
--		if (ratelimit < 10 &&
-+		if (ratelimit < 10 && !local_bh_blocked() &&
- 		    (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
- 			pr_warn("NOHZ: local_softirq_pending %02x\n",
- 				(unsigned int) local_softirq_pending());
-diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
-index 4c47f388a83f..999c981ae766 100644
---- a/kernel/time/timekeeping.c
-+++ b/kernel/time/timekeeping.c
-@@ -64,7 +64,7 @@ static struct timekeeper shadow_timekeeper;
-  * See @update_fast_timekeeper() below.
-  */
- struct tk_fast {
--	seqcount_raw_spinlock_t	seq;
-+	seqcount_latch_t	seq;
- 	struct tk_read_base	base[2];
- };
+@@ -3065,7 +3070,7 @@ static DEFINE_PER_CPU(int, user_stack_count);
  
-@@ -81,13 +81,13 @@ static struct clocksource dummy_clock = {
- };
+ static void
+ ftrace_trace_userstack(struct trace_array *tr,
+-		       struct trace_buffer *buffer, unsigned long flags, int pc)
++		       struct trace_buffer *buffer, unsigned int trace_ctx)
+ {
+ 	struct trace_event_call *call = &event_user_stack;
+ 	struct ring_buffer_event *event;
+@@ -3092,7 +3097,7 @@ ftrace_trace_userstack(struct trace_array *tr,
+ 	__this_cpu_inc(user_stack_count);
+ 
+ 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
+-					    sizeof(*entry), flags, pc);
++					    sizeof(*entry), trace_ctx);
+ 	if (!event)
+ 		goto out_drop_count;
+ 	entry	= ring_buffer_event_data(event);
+@@ -3112,7 +3117,7 @@ ftrace_trace_userstack(struct trace_array *tr,
+ #else /* CONFIG_USER_STACKTRACE_SUPPORT */
+ static void ftrace_trace_userstack(struct trace_array *tr,
+ 				   struct trace_buffer *buffer,
+-				   unsigned long flags, int pc)
++				   unsigned int trace_ctx)
+ {
+ }
+ #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
+@@ -3242,9 +3247,9 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
+ 	struct trace_buffer *buffer;
+ 	struct trace_array *tr = &global_trace;
+ 	struct bprint_entry *entry;
+-	unsigned long flags;
++	unsigned int trace_ctx;
+ 	char *tbuffer;
+-	int len = 0, size, pc;
++	int len = 0, size;
  
- static struct tk_fast tk_fast_mono ____cacheline_aligned = {
--	.seq     = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_mono.seq, &timekeeper_lock),
-+	.seq     = SEQCNT_LATCH_ZERO(tk_fast_mono.seq),
- 	.base[0] = { .clock = &dummy_clock, },
- 	.base[1] = { .clock = &dummy_clock, },
- };
+ 	if (unlikely(tracing_selftest_running || tracing_disabled))
+ 		return 0;
+@@ -3252,7 +3257,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
+ 	/* Don't pollute graph traces with trace_vprintk internals */
+ 	pause_graph_tracing();
  
- static struct tk_fast tk_fast_raw  ____cacheline_aligned = {
--	.seq     = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_raw.seq, &timekeeper_lock),
-+	.seq     = SEQCNT_LATCH_ZERO(tk_fast_raw.seq),
- 	.base[0] = { .clock = &dummy_clock, },
- 	.base[1] = { .clock = &dummy_clock, },
- };
-@@ -467,7 +467,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
- 					tk_clock_read(tkr),
- 					tkr->cycle_last,
- 					tkr->mask));
--	} while (read_seqcount_retry(&tkf->seq, seq));
-+	} while (read_seqcount_latch_retry(&tkf->seq, seq));
+-	pc = preempt_count();
++	trace_ctx = tracing_gen_ctx();
+ 	preempt_disable_notrace();
  
- 	return now;
- }
-@@ -533,7 +533,7 @@ static __always_inline u64 __ktime_get_real_fast_ns(struct tk_fast *tkf)
- 					tk_clock_read(tkr),
- 					tkr->cycle_last,
- 					tkr->mask));
--	} while (read_seqcount_retry(&tkf->seq, seq));
-+	} while (read_seqcount_latch_retry(&tkf->seq, seq));
+ 	tbuffer = get_trace_buf();
+@@ -3266,12 +3271,11 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
+ 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
+ 		goto out_put;
+ 
+-	local_save_flags(flags);
+ 	size = sizeof(*entry) + sizeof(u32) * len;
+ 	buffer = tr->array_buffer.buffer;
+ 	ring_buffer_nest_start(buffer);
+ 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
+-					    flags, pc);
++					    trace_ctx);
+ 	if (!event)
+ 		goto out;
+ 	entry = ring_buffer_event_data(event);
+@@ -3281,7 +3285,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
+ 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
+ 	if (!call_filter_check_discard(call, entry, buffer, event)) {
+ 		__buffer_unlock_commit(buffer, event);
+-		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
++		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
+ 	}
  
- 	return now;
- }
-diff --git a/kernel/time/timer.c b/kernel/time/timer.c
-index a50364df1054..e25cb9d7f09a 100644
---- a/kernel/time/timer.c
-+++ b/kernel/time/timer.c
-@@ -1765,6 +1765,8 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
+ out:
+@@ -3304,9 +3308,9 @@ __trace_array_vprintk(struct trace_buffer *buffer,
  {
- 	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
+ 	struct trace_event_call *call = &event_print;
+ 	struct ring_buffer_event *event;
+-	int len = 0, size, pc;
++	int len = 0, size;
+ 	struct print_entry *entry;
+-	unsigned long flags;
++	unsigned int trace_ctx;
+ 	char *tbuffer;
  
-+	irq_work_tick_soft();
-+
- 	__run_timers(base);
- 	if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
- 		__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
-diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
-index d3e5de717df2..21706f318a00 100644
---- a/kernel/trace/trace.c
-+++ b/kernel/trace/trace.c
-@@ -2437,6 +2437,15 @@ enum print_line_t trace_handle_return(struct trace_seq *s)
- }
- EXPORT_SYMBOL_GPL(trace_handle_return);
+ 	if (tracing_disabled || tracing_selftest_running)
+@@ -3315,7 +3319,7 @@ __trace_array_vprintk(struct trace_buffer *buffer,
+ 	/* Don't pollute graph traces with trace_vprintk internals */
+ 	pause_graph_tracing();
+ 
+-	pc = preempt_count();
++	trace_ctx = tracing_gen_ctx();
+ 	preempt_disable_notrace();
  
-+static unsigned short migration_disable_value(struct task_struct *tsk)
-+{
-+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
-+	return tsk ? tsk->migration_disabled : 0;
-+#else
-+	return 0;
-+#endif
-+}
-+
- void
- tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
- 			     unsigned long flags, int pc)
-@@ -2444,6 +2453,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
- 	struct task_struct *tsk = current;
- 
- 	entry->preempt_count		= pc & 0xff;
-+	entry->preempt_lazy_count	= preempt_lazy_count();
- 	entry->pid			= (tsk) ? tsk->pid : 0;
- 	entry->type			= type;
- 	entry->flags =
-@@ -2455,8 +2465,11 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
- 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
- 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
- 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
--		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
-+		(tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) |
-+		(need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) |
- 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
-+
-+	entry->migrate_disable = migration_disable_value(tsk);
- }
- EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
  
-@@ -3784,14 +3797,17 @@ unsigned long trace_total_entries(struct trace_array *tr)
+@@ -3327,11 +3331,10 @@ __trace_array_vprintk(struct trace_buffer *buffer,
+ 
+ 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
+ 
+-	local_save_flags(flags);
+ 	size = sizeof(*entry) + len + 1;
+ 	ring_buffer_nest_start(buffer);
+ 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
+-					    flags, pc);
++					    trace_ctx);
+ 	if (!event)
+ 		goto out;
+ 	entry = ring_buffer_event_data(event);
+@@ -3340,7 +3343,7 @@ __trace_array_vprintk(struct trace_buffer *buffer,
+ 	memcpy(&entry->buf, tbuffer, len + 1);
+ 	if (!call_filter_check_discard(call, entry, buffer, event)) {
+ 		__buffer_unlock_commit(buffer, event);
+-		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
++		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
+ 	}
+ 
+ out:
+@@ -3812,14 +3815,17 @@ unsigned long trace_total_entries(struct trace_array *tr)
  
  static void print_lat_help_header(struct seq_file *m)
  {
@@ -21585,7 +16326,7 @@ index d3e5de717df2..21706f318a00 100644
  }
  
  static void print_event_info(struct array_buffer *buf, struct seq_file *m)
-@@ -3825,13 +3841,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file
+@@ -3853,13 +3859,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file
  
  	print_event_info(buf, m);
  
@@ -21608,58 +16349,904 @@ index d3e5de717df2..21706f318a00 100644
 +	seq_printf(m, "#              | |    %.*s   |   |||||||      |         |\n", prec, "       |    ");
  }
  
- void
-@@ -9249,7 +9268,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
- 	tracing_off();
+ void
+@@ -6653,7 +6662,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
+ 	enum event_trigger_type tt = ETT_NONE;
+ 	struct trace_buffer *buffer;
+ 	struct print_entry *entry;
+-	unsigned long irq_flags;
+ 	ssize_t written;
+ 	int size;
+ 	int len;
+@@ -6673,7 +6681,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
+ 
+ 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
+ 
+-	local_save_flags(irq_flags);
+ 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
+ 
+ 	/* If less than "<faulted>", then make sure we can still add that */
+@@ -6682,7 +6689,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
+ 
+ 	buffer = tr->array_buffer.buffer;
+ 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
+-					    irq_flags, preempt_count());
++					    tracing_gen_ctx());
+ 	if (unlikely(!event))
+ 		/* Ring buffer disabled, return as if not open for write */
+ 		return -EBADF;
+@@ -6734,7 +6741,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
+ 	struct ring_buffer_event *event;
+ 	struct trace_buffer *buffer;
+ 	struct raw_data_entry *entry;
+-	unsigned long irq_flags;
+ 	ssize_t written;
+ 	int size;
+ 	int len;
+@@ -6756,14 +6762,13 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
+ 
+ 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
+ 
+-	local_save_flags(irq_flags);
+ 	size = sizeof(*entry) + cnt;
+ 	if (cnt < FAULT_SIZE_ID)
+ 		size += FAULT_SIZE_ID - cnt;
+ 
+ 	buffer = tr->array_buffer.buffer;
+ 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
+-					    irq_flags, preempt_count());
++					    tracing_gen_ctx());
+ 	if (!event)
+ 		/* Ring buffer disabled, return as if not open for write */
+ 		return -EBADF;
+@@ -9344,7 +9349,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
+ 	tracing_off();
+ 
+ 	local_irq_save(flags);
+-	printk_nmi_direct_enter();
+ 
+ 	/* Simulate the iterator */
+ 	trace_init_global_iter(&iter);
+@@ -9424,7 +9428,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
+ 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
+ 	}
+ 	atomic_dec(&dump_running);
+-	printk_nmi_direct_exit();
+ 	local_irq_restore(flags);
+ }
+ EXPORT_SYMBOL_GPL(ftrace_dump);
+diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
+index e448d2da0b99..93fb08ab8bb6 100644
+--- a/kernel/trace/trace.h
++++ b/kernel/trace/trace.h
+@@ -136,25 +136,6 @@ struct kretprobe_trace_entry_head {
+ 	unsigned long		ret_ip;
+ };
+ 
+-/*
+- * trace_flag_type is an enumeration that holds different
+- * states when a trace occurs. These are:
+- *  IRQS_OFF		- interrupts were disabled
+- *  IRQS_NOSUPPORT	- arch does not support irqs_disabled_flags
+- *  NEED_RESCHED	- reschedule is requested
+- *  HARDIRQ		- inside an interrupt handler
+- *  SOFTIRQ		- inside a softirq handler
+- */
+-enum trace_flag_type {
+-	TRACE_FLAG_IRQS_OFF		= 0x01,
+-	TRACE_FLAG_IRQS_NOSUPPORT	= 0x02,
+-	TRACE_FLAG_NEED_RESCHED		= 0x04,
+-	TRACE_FLAG_HARDIRQ		= 0x08,
+-	TRACE_FLAG_SOFTIRQ		= 0x10,
+-	TRACE_FLAG_PREEMPT_RESCHED	= 0x20,
+-	TRACE_FLAG_NMI			= 0x40,
+-};
+-
+ #define TRACE_BUF_SIZE		1024
+ 
+ struct trace_array;
+@@ -589,8 +570,7 @@ struct ring_buffer_event *
+ trace_buffer_lock_reserve(struct trace_buffer *buffer,
+ 			  int type,
+ 			  unsigned long len,
+-			  unsigned long flags,
+-			  int pc);
++			  unsigned int trace_ctx);
+ 
+ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
+ 						struct trace_array_cpu *data);
+@@ -615,11 +595,11 @@ unsigned long trace_total_entries(struct trace_array *tr);
+ void trace_function(struct trace_array *tr,
+ 		    unsigned long ip,
+ 		    unsigned long parent_ip,
+-		    unsigned long flags, int pc);
++		    unsigned int trace_ctx);
+ void trace_graph_function(struct trace_array *tr,
+ 		    unsigned long ip,
+ 		    unsigned long parent_ip,
+-		    unsigned long flags, int pc);
++		    unsigned int trace_ctx);
+ void trace_latency_header(struct seq_file *m);
+ void trace_default_header(struct seq_file *m);
+ void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
+@@ -687,11 +667,10 @@ static inline void latency_fsnotify(struct trace_array *tr) { }
+ #endif
+ 
+ #ifdef CONFIG_STACKTRACE
+-void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
+-		   int pc);
++void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, int skip);
+ #else
+-static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
+-				 int skip, int pc)
++static inline void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
++				 int skip)
+ {
+ }
+ #endif /* CONFIG_STACKTRACE */
+@@ -831,10 +810,10 @@ extern void graph_trace_open(struct trace_iterator *iter);
+ extern void graph_trace_close(struct trace_iterator *iter);
+ extern int __trace_graph_entry(struct trace_array *tr,
+ 			       struct ftrace_graph_ent *trace,
+-			       unsigned long flags, int pc);
++			       unsigned int trace_ctx);
+ extern void __trace_graph_return(struct trace_array *tr,
+ 				 struct ftrace_graph_ret *trace,
+-				 unsigned long flags, int pc);
++				 unsigned int trace_ctx);
+ 
+ #ifdef CONFIG_DYNAMIC_FTRACE
+ extern struct ftrace_hash __rcu *ftrace_graph_hash;
+@@ -1297,15 +1276,15 @@ extern int call_filter_check_discard(struct trace_event_call *call, void *rec,
+ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
+ 				     struct trace_buffer *buffer,
+ 				     struct ring_buffer_event *event,
+-				     unsigned long flags, int pc,
++				     unsigned int trcace_ctx,
+ 				     struct pt_regs *regs);
+ 
+ static inline void trace_buffer_unlock_commit(struct trace_array *tr,
+ 					      struct trace_buffer *buffer,
+ 					      struct ring_buffer_event *event,
+-					      unsigned long flags, int pc)
++					      unsigned int trace_ctx)
+ {
+-	trace_buffer_unlock_commit_regs(tr, buffer, event, flags, pc, NULL);
++	trace_buffer_unlock_commit_regs(tr, buffer, event, trace_ctx, NULL);
+ }
+ 
+ DECLARE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
+@@ -1366,8 +1345,7 @@ __event_trigger_test_discard(struct trace_event_file *file,
+  * @buffer: The ring buffer that the event is being written to
+  * @event: The event meta data in the ring buffer
+  * @entry: The event itself
+- * @irq_flags: The state of the interrupts at the start of the event
+- * @pc: The state of the preempt count at the start of the event.
++ * @trace_ctx: The tracing context flags.
+  *
+  * This is a helper function to handle triggers that require data
+  * from the event itself. It also tests the event against filters and
+@@ -1377,12 +1355,12 @@ static inline void
+ event_trigger_unlock_commit(struct trace_event_file *file,
+ 			    struct trace_buffer *buffer,
+ 			    struct ring_buffer_event *event,
+-			    void *entry, unsigned long irq_flags, int pc)
++			    void *entry, unsigned int trace_ctx)
+ {
+ 	enum event_trigger_type tt = ETT_NONE;
+ 
+ 	if (!__event_trigger_test_discard(file, buffer, event, entry, &tt))
+-		trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc);
++		trace_buffer_unlock_commit(file->tr, buffer, event, trace_ctx);
+ 
+ 	if (tt)
+ 		event_triggers_post_call(file, tt);
+@@ -1394,8 +1372,7 @@ event_trigger_unlock_commit(struct trace_event_file *file,
+  * @buffer: The ring buffer that the event is being written to
+  * @event: The event meta data in the ring buffer
+  * @entry: The event itself
+- * @irq_flags: The state of the interrupts at the start of the event
+- * @pc: The state of the preempt count at the start of the event.
++ * @trace_ctx: The tracing context flags.
+  *
+  * This is a helper function to handle triggers that require data
+  * from the event itself. It also tests the event against filters and
+@@ -1408,14 +1385,14 @@ static inline void
+ event_trigger_unlock_commit_regs(struct trace_event_file *file,
+ 				 struct trace_buffer *buffer,
+ 				 struct ring_buffer_event *event,
+-				 void *entry, unsigned long irq_flags, int pc,
++				 void *entry, unsigned int trace_ctx,
+ 				 struct pt_regs *regs)
+ {
+ 	enum event_trigger_type tt = ETT_NONE;
+ 
+ 	if (!__event_trigger_test_discard(file, buffer, event, entry, &tt))
+ 		trace_buffer_unlock_commit_regs(file->tr, buffer, event,
+-						irq_flags, pc, regs);
++						trace_ctx, regs);
+ 
+ 	if (tt)
+ 		event_triggers_post_call(file, tt);
+diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
+index eff099123aa2..e47fdb4c92fb 100644
+--- a/kernel/trace/trace_branch.c
++++ b/kernel/trace/trace_branch.c
+@@ -37,7 +37,7 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect)
+ 	struct ring_buffer_event *event;
+ 	struct trace_branch *entry;
+ 	unsigned long flags;
+-	int pc;
++	unsigned int trace_ctx;
+ 	const char *p;
+ 
+ 	if (current->trace_recursion & TRACE_BRANCH_BIT)
+@@ -59,10 +59,10 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect)
+ 	if (atomic_read(&data->disabled))
+ 		goto out;
+ 
+-	pc = preempt_count();
++	trace_ctx = tracing_gen_ctx_flags(flags);
+ 	buffer = tr->array_buffer.buffer;
+ 	event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH,
+-					  sizeof(*entry), flags, pc);
++					  sizeof(*entry), trace_ctx);
+ 	if (!event)
+ 		goto out;
+ 
+diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
+index a71181655958..288ad2c274fb 100644
+--- a/kernel/trace/trace_event_perf.c
++++ b/kernel/trace/trace_event_perf.c
+@@ -421,11 +421,8 @@ NOKPROBE_SYMBOL(perf_trace_buf_alloc);
+ void perf_trace_buf_update(void *record, u16 type)
+ {
+ 	struct trace_entry *entry = record;
+-	int pc = preempt_count();
+-	unsigned long flags;
+ 
+-	local_save_flags(flags);
+-	tracing_generic_entry_update(entry, type, flags, pc);
++	tracing_generic_entry_update(entry, type, tracing_gen_ctx());
+ }
+ NOKPROBE_SYMBOL(perf_trace_buf_update);
+ 
+diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
+index d387b774ceeb..a7c176251c63 100644
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -183,6 +183,8 @@ static int trace_define_common_fields(void)
+ 	__common_field(unsigned char, flags);
+ 	__common_field(unsigned char, preempt_count);
+ 	__common_field(int, pid);
++	__common_field(unsigned char, migrate_disable);
++	__common_field(unsigned char, preempt_lazy_count);
+ 
+ 	return ret;
+ }
+@@ -258,22 +260,19 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
+ 	    trace_event_ignore_this_pid(trace_file))
+ 		return NULL;
+ 
+-	local_save_flags(fbuffer->flags);
+-	fbuffer->pc = preempt_count();
+ 	/*
+ 	 * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables
+ 	 * preemption (adding one to the preempt_count). Since we are
+ 	 * interested in the preempt_count at the time the tracepoint was
+ 	 * hit, we need to subtract one to offset the increment.
+ 	 */
+-	if (IS_ENABLED(CONFIG_PREEMPTION))
+-		fbuffer->pc--;
++	fbuffer->trace_ctx = tracing_gen_ctx_dec();
+ 	fbuffer->trace_file = trace_file;
+ 
+ 	fbuffer->event =
+ 		trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file,
+ 						event_call->event.type, len,
+-						fbuffer->flags, fbuffer->pc);
++						fbuffer->trace_ctx);
+ 	if (!fbuffer->event)
+ 		return NULL;
+ 
+@@ -3679,12 +3678,11 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip,
+ 	struct trace_buffer *buffer;
+ 	struct ring_buffer_event *event;
+ 	struct ftrace_entry *entry;
+-	unsigned long flags;
++	unsigned int trace_ctx;
+ 	long disabled;
+ 	int cpu;
+-	int pc;
+ 
+-	pc = preempt_count();
++	trace_ctx = tracing_gen_ctx();
+ 	preempt_disable_notrace();
+ 	cpu = raw_smp_processor_id();
+ 	disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
+@@ -3692,11 +3690,9 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip,
+ 	if (disabled != 1)
+ 		goto out;
+ 
+-	local_save_flags(flags);
+-
+ 	event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file,
+ 						TRACE_FN, sizeof(*entry),
+-						flags, pc);
++						trace_ctx);
+ 	if (!event)
+ 		goto out;
+ 	entry	= ring_buffer_event_data(event);
+@@ -3704,7 +3700,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip,
+ 	entry->parent_ip		= parent_ip;
+ 
+ 	event_trigger_unlock_commit(&event_trace_file, buffer, event,
+-				    entry, flags, pc);
++				    entry, trace_ctx);
+  out:
+ 	atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
+ 	preempt_enable_notrace();
+diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c
+index 22bcf7c51d1e..c188045c5f97 100644
+--- a/kernel/trace/trace_events_inject.c
++++ b/kernel/trace/trace_events_inject.c
+@@ -192,7 +192,6 @@ static void *trace_alloc_entry(struct trace_event_call *call, int *size)
+ static int parse_entry(char *str, struct trace_event_call *call, void **pentry)
+ {
+ 	struct ftrace_event_field *field;
+-	unsigned long irq_flags;
+ 	void *entry = NULL;
+ 	int entry_size;
+ 	u64 val = 0;
+@@ -203,9 +202,8 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry)
+ 	if (!entry)
+ 		return -ENOMEM;
+ 
+-	local_save_flags(irq_flags);
+-	tracing_generic_entry_update(entry, call->event.type, irq_flags,
+-				     preempt_count());
++	tracing_generic_entry_update(entry, call->event.type,
++				     tracing_gen_ctx());
+ 
+ 	while ((len = parse_field(str, call, &field, &val)) > 0) {
+ 		if (is_function_field(field))
+diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
+index c5095dd28e20..9bb03ab44fff 100644
+--- a/kernel/trace/trace_functions.c
++++ b/kernel/trace/trace_functions.c
+@@ -132,10 +132,9 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
+ {
+ 	struct trace_array *tr = op->private;
+ 	struct trace_array_cpu *data;
+-	unsigned long flags;
++	unsigned int trace_ctx;
+ 	int bit;
+ 	int cpu;
+-	int pc;
+ 
+ 	if (unlikely(!tr->function_enabled))
+ 		return;
+@@ -144,15 +143,14 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
+ 	if (bit < 0)
+ 		return;
+ 
+-	pc = preempt_count();
++	trace_ctx = tracing_gen_ctx();
+ 	preempt_disable_notrace();
+ 
+ 	cpu = smp_processor_id();
+ 	data = per_cpu_ptr(tr->array_buffer.data, cpu);
+-	if (!atomic_read(&data->disabled)) {
+-		local_save_flags(flags);
+-		trace_function(tr, ip, parent_ip, flags, pc);
+-	}
++	if (!atomic_read(&data->disabled))
++		trace_function(tr, ip, parent_ip, trace_ctx);
++
+ 	ftrace_test_recursion_unlock(bit);
+ 	preempt_enable_notrace();
+ }
+@@ -184,7 +182,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
+ 	unsigned long flags;
+ 	long disabled;
+ 	int cpu;
+-	int pc;
++	unsigned int trace_ctx;
+ 
+ 	if (unlikely(!tr->function_enabled))
+ 		return;
+@@ -199,9 +197,9 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
+ 	disabled = atomic_inc_return(&data->disabled);
+ 
+ 	if (likely(disabled == 1)) {
+-		pc = preempt_count();
+-		trace_function(tr, ip, parent_ip, flags, pc);
+-		__trace_stack(tr, flags, STACK_SKIP, pc);
++		trace_ctx = tracing_gen_ctx_flags(flags);
++		trace_function(tr, ip, parent_ip, trace_ctx);
++		__trace_stack(tr, trace_ctx, STACK_SKIP);
+ 	}
+ 
+ 	atomic_dec(&data->disabled);
+@@ -404,13 +402,11 @@ ftrace_traceoff(unsigned long ip, unsigned long parent_ip,
+ 
+ static __always_inline void trace_stack(struct trace_array *tr)
+ {
+-	unsigned long flags;
+-	int pc;
++	unsigned int trace_ctx;
+ 
+-	local_save_flags(flags);
+-	pc = preempt_count();
++	trace_ctx = tracing_gen_ctx();
+ 
+-	__trace_stack(tr, flags, FTRACE_STACK_SKIP, pc);
++	__trace_stack(tr, trace_ctx, FTRACE_STACK_SKIP);
+ }
+ 
+ static void
+diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
+index d874dec87131..0aa6e6faa943 100644
+--- a/kernel/trace/trace_functions_graph.c
++++ b/kernel/trace/trace_functions_graph.c
+@@ -96,8 +96,7 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
+ 
+ int __trace_graph_entry(struct trace_array *tr,
+ 				struct ftrace_graph_ent *trace,
+-				unsigned long flags,
+-				int pc)
++				unsigned int trace_ctx)
+ {
+ 	struct trace_event_call *call = &event_funcgraph_entry;
+ 	struct ring_buffer_event *event;
+@@ -105,7 +104,7 @@ int __trace_graph_entry(struct trace_array *tr,
+ 	struct ftrace_graph_ent_entry *entry;
+ 
+ 	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
+-					  sizeof(*entry), flags, pc);
++					  sizeof(*entry), trace_ctx);
+ 	if (!event)
+ 		return 0;
+ 	entry	= ring_buffer_event_data(event);
+@@ -129,10 +128,10 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
+ 	struct trace_array *tr = graph_array;
+ 	struct trace_array_cpu *data;
+ 	unsigned long flags;
++	unsigned int trace_ctx;
+ 	long disabled;
+ 	int ret;
+ 	int cpu;
+-	int pc;
+ 
+ 	if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT))
+ 		return 0;
+@@ -174,8 +173,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
+ 	data = per_cpu_ptr(tr->array_buffer.data, cpu);
+ 	disabled = atomic_inc_return(&data->disabled);
+ 	if (likely(disabled == 1)) {
+-		pc = preempt_count();
+-		ret = __trace_graph_entry(tr, trace, flags, pc);
++		trace_ctx = tracing_gen_ctx_flags(flags);
++		ret = __trace_graph_entry(tr, trace, trace_ctx);
+ 	} else {
+ 		ret = 0;
+ 	}
+@@ -188,7 +187,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
+ 
+ static void
+ __trace_graph_function(struct trace_array *tr,
+-		unsigned long ip, unsigned long flags, int pc)
++		unsigned long ip, unsigned int trace_ctx)
+ {
+ 	u64 time = trace_clock_local();
+ 	struct ftrace_graph_ent ent = {
+@@ -202,22 +201,21 @@ __trace_graph_function(struct trace_array *tr,
+ 		.rettime  = time,
+ 	};
+ 
+-	__trace_graph_entry(tr, &ent, flags, pc);
+-	__trace_graph_return(tr, &ret, flags, pc);
++	__trace_graph_entry(tr, &ent, trace_ctx);
++	__trace_graph_return(tr, &ret, trace_ctx);
+ }
+ 
+ void
+ trace_graph_function(struct trace_array *tr,
+ 		unsigned long ip, unsigned long parent_ip,
+-		unsigned long flags, int pc)
++		unsigned int trace_ctx)
+ {
+-	__trace_graph_function(tr, ip, flags, pc);
++	__trace_graph_function(tr, ip, trace_ctx);
+ }
+ 
+ void __trace_graph_return(struct trace_array *tr,
+ 				struct ftrace_graph_ret *trace,
+-				unsigned long flags,
+-				int pc)
++				unsigned int trace_ctx)
+ {
+ 	struct trace_event_call *call = &event_funcgraph_exit;
+ 	struct ring_buffer_event *event;
+@@ -225,7 +223,7 @@ void __trace_graph_return(struct trace_array *tr,
+ 	struct ftrace_graph_ret_entry *entry;
+ 
+ 	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
+-					  sizeof(*entry), flags, pc);
++					  sizeof(*entry), trace_ctx);
+ 	if (!event)
+ 		return;
+ 	entry	= ring_buffer_event_data(event);
+@@ -239,9 +237,9 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
+ 	struct trace_array *tr = graph_array;
+ 	struct trace_array_cpu *data;
+ 	unsigned long flags;
++	unsigned int trace_ctx;
+ 	long disabled;
+ 	int cpu;
+-	int pc;
+ 
+ 	ftrace_graph_addr_finish(trace);
+ 
+@@ -255,8 +253,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
+ 	data = per_cpu_ptr(tr->array_buffer.data, cpu);
+ 	disabled = atomic_inc_return(&data->disabled);
+ 	if (likely(disabled == 1)) {
+-		pc = preempt_count();
+-		__trace_graph_return(tr, trace, flags, pc);
++		trace_ctx = tracing_gen_ctx_flags(flags);
++		__trace_graph_return(tr, trace, trace_ctx);
+ 	}
+ 	atomic_dec(&data->disabled);
+ 	local_irq_restore(flags);
+diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
+index c0df9b97f147..34dc1a712dcb 100644
+--- a/kernel/trace/trace_hwlat.c
++++ b/kernel/trace/trace_hwlat.c
+@@ -108,14 +108,9 @@ static void trace_hwlat_sample(struct hwlat_sample *sample)
+ 	struct trace_buffer *buffer = tr->array_buffer.buffer;
+ 	struct ring_buffer_event *event;
+ 	struct hwlat_entry *entry;
+-	unsigned long flags;
+-	int pc;
+-
+-	pc = preempt_count();
+-	local_save_flags(flags);
+ 
+ 	event = trace_buffer_lock_reserve(buffer, TRACE_HWLAT, sizeof(*entry),
+-					  flags, pc);
++					  tracing_gen_ctx());
+ 	if (!event)
+ 		return;
+ 	entry	= ring_buffer_event_data(event);
+diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
+index 6756379b661f..590b3d51afae 100644
+--- a/kernel/trace/trace_irqsoff.c
++++ b/kernel/trace/trace_irqsoff.c
+@@ -143,11 +143,14 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip,
+ 	struct trace_array *tr = irqsoff_trace;
+ 	struct trace_array_cpu *data;
+ 	unsigned long flags;
++	unsigned int trace_ctx;
+ 
+ 	if (!func_prolog_dec(tr, &data, &flags))
+ 		return;
+ 
+-	trace_function(tr, ip, parent_ip, flags, preempt_count());
++	trace_ctx = tracing_gen_ctx_flags(flags);
++
++	trace_function(tr, ip, parent_ip, trace_ctx);
+ 
+ 	atomic_dec(&data->disabled);
+ }
+@@ -177,8 +180,8 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
+ 	struct trace_array *tr = irqsoff_trace;
+ 	struct trace_array_cpu *data;
+ 	unsigned long flags;
++	unsigned int trace_ctx;
+ 	int ret;
+-	int pc;
+ 
+ 	if (ftrace_graph_ignore_func(trace))
+ 		return 0;
+@@ -195,8 +198,8 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
+ 	if (!func_prolog_dec(tr, &data, &flags))
+ 		return 0;
+ 
+-	pc = preempt_count();
+-	ret = __trace_graph_entry(tr, trace, flags, pc);
++	trace_ctx = tracing_gen_ctx_flags(flags);
++	ret = __trace_graph_entry(tr, trace, trace_ctx);
+ 	atomic_dec(&data->disabled);
+ 
+ 	return ret;
+@@ -207,15 +210,15 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace)
+ 	struct trace_array *tr = irqsoff_trace;
+ 	struct trace_array_cpu *data;
+ 	unsigned long flags;
+-	int pc;
++	unsigned int trace_ctx;
+ 
+ 	ftrace_graph_addr_finish(trace);
+ 
+ 	if (!func_prolog_dec(tr, &data, &flags))
+ 		return;
+ 
+-	pc = preempt_count();
+-	__trace_graph_return(tr, trace, flags, pc);
++	trace_ctx = tracing_gen_ctx_flags(flags);
++	__trace_graph_return(tr, trace, trace_ctx);
+ 	atomic_dec(&data->disabled);
+ }
+ 
+@@ -267,12 +270,12 @@ static void irqsoff_print_header(struct seq_file *s)
+ static void
+ __trace_function(struct trace_array *tr,
+ 		 unsigned long ip, unsigned long parent_ip,
+-		 unsigned long flags, int pc)
++		 unsigned int trace_ctx)
+ {
+ 	if (is_graph(tr))
+-		trace_graph_function(tr, ip, parent_ip, flags, pc);
++		trace_graph_function(tr, ip, parent_ip, trace_ctx);
+ 	else
+-		trace_function(tr, ip, parent_ip, flags, pc);
++		trace_function(tr, ip, parent_ip, trace_ctx);
+ }
+ 
+ #else
+@@ -322,15 +325,13 @@ check_critical_timing(struct trace_array *tr,
+ {
+ 	u64 T0, T1, delta;
+ 	unsigned long flags;
+-	int pc;
++	unsigned int trace_ctx;
+ 
+ 	T0 = data->preempt_timestamp;
+ 	T1 = ftrace_now(cpu);
+ 	delta = T1-T0;
+ 
+-	local_save_flags(flags);
+-
+-	pc = preempt_count();
++	trace_ctx = tracing_gen_ctx();
+ 
+ 	if (!report_latency(tr, delta))
+ 		goto out;
+@@ -341,9 +342,9 @@ check_critical_timing(struct trace_array *tr,
+ 	if (!report_latency(tr, delta))
+ 		goto out_unlock;
+ 
+-	__trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
++	__trace_function(tr, CALLER_ADDR0, parent_ip, trace_ctx);
+ 	/* Skip 5 functions to get to the irq/preempt enable function */
+-	__trace_stack(tr, flags, 5, pc);
++	__trace_stack(tr, trace_ctx, 5);
+ 
+ 	if (data->critical_sequence != max_sequence)
+ 		goto out_unlock;
+@@ -363,16 +364,15 @@ check_critical_timing(struct trace_array *tr,
+ out:
+ 	data->critical_sequence = max_sequence;
+ 	data->preempt_timestamp = ftrace_now(cpu);
+-	__trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
++	__trace_function(tr, CALLER_ADDR0, parent_ip, trace_ctx);
+ }
+ 
+ static nokprobe_inline void
+-start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc)
++start_critical_timing(unsigned long ip, unsigned long parent_ip)
+ {
+ 	int cpu;
+ 	struct trace_array *tr = irqsoff_trace;
+ 	struct trace_array_cpu *data;
+-	unsigned long flags;
+ 
+ 	if (!tracer_enabled || !tracing_is_enabled())
+ 		return;
+@@ -393,9 +393,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc)
+ 	data->preempt_timestamp = ftrace_now(cpu);
+ 	data->critical_start = parent_ip ? : ip;
+ 
+-	local_save_flags(flags);
+-
+-	__trace_function(tr, ip, parent_ip, flags, pc);
++	__trace_function(tr, ip, parent_ip, tracing_gen_ctx());
+ 
+ 	per_cpu(tracing_cpu, cpu) = 1;
+ 
+@@ -403,12 +401,12 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc)
+ }
+ 
+ static nokprobe_inline void
+-stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc)
++stop_critical_timing(unsigned long ip, unsigned long parent_ip)
+ {
+ 	int cpu;
+ 	struct trace_array *tr = irqsoff_trace;
+ 	struct trace_array_cpu *data;
+-	unsigned long flags;
++	unsigned int trace_ctx;
+ 
+ 	cpu = raw_smp_processor_id();
+ 	/* Always clear the tracing cpu on stopping the trace */
+@@ -428,8 +426,8 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc)
+ 
+ 	atomic_inc(&data->disabled);
+ 
+-	local_save_flags(flags);
+-	__trace_function(tr, ip, parent_ip, flags, pc);
++	trace_ctx = tracing_gen_ctx();
++	__trace_function(tr, ip, parent_ip, trace_ctx);
+ 	check_critical_timing(tr, data, parent_ip ? : ip, cpu);
+ 	data->critical_start = 0;
+ 	atomic_dec(&data->disabled);
+@@ -438,20 +436,16 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc)
+ /* start and stop critical timings used to for stoppage (in idle) */
+ void start_critical_timings(void)
+ {
+-	int pc = preempt_count();
+-
+-	if (preempt_trace(pc) || irq_trace())
+-		start_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc);
++	if (preempt_trace(preempt_count()) || irq_trace())
++		start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+ }
+ EXPORT_SYMBOL_GPL(start_critical_timings);
+ NOKPROBE_SYMBOL(start_critical_timings);
+ 
+ void stop_critical_timings(void)
+ {
+-	int pc = preempt_count();
+-
+-	if (preempt_trace(pc) || irq_trace())
+-		stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc);
++	if (preempt_trace(preempt_count()) || irq_trace())
++		stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+ }
+ EXPORT_SYMBOL_GPL(stop_critical_timings);
+ NOKPROBE_SYMBOL(stop_critical_timings);
+@@ -613,19 +607,15 @@ static void irqsoff_tracer_stop(struct trace_array *tr)
+  */
+ void tracer_hardirqs_on(unsigned long a0, unsigned long a1)
+ {
+-	unsigned int pc = preempt_count();
+-
+-	if (!preempt_trace(pc) && irq_trace())
+-		stop_critical_timing(a0, a1, pc);
++	if (!preempt_trace(preempt_count()) && irq_trace())
++		stop_critical_timing(a0, a1);
+ }
+ NOKPROBE_SYMBOL(tracer_hardirqs_on);
+ 
+ void tracer_hardirqs_off(unsigned long a0, unsigned long a1)
+ {
+-	unsigned int pc = preempt_count();
+-
+-	if (!preempt_trace(pc) && irq_trace())
+-		start_critical_timing(a0, a1, pc);
++	if (!preempt_trace(preempt_count()) && irq_trace())
++		start_critical_timing(a0, a1);
+ }
+ NOKPROBE_SYMBOL(tracer_hardirqs_off);
+ 
+@@ -665,18 +655,14 @@ static struct tracer irqsoff_tracer __read_mostly =
+ #ifdef CONFIG_PREEMPT_TRACER
+ void tracer_preempt_on(unsigned long a0, unsigned long a1)
+ {
+-	int pc = preempt_count();
+-
+-	if (preempt_trace(pc) && !irq_trace())
+-		stop_critical_timing(a0, a1, pc);
++	if (preempt_trace(preempt_count()) && !irq_trace())
++		stop_critical_timing(a0, a1);
+ }
+ 
+ void tracer_preempt_off(unsigned long a0, unsigned long a1)
+ {
+-	int pc = preempt_count();
+-
+-	if (preempt_trace(pc) && !irq_trace())
+-		start_critical_timing(a0, a1, pc);
++	if (preempt_trace(preempt_count()) && !irq_trace())
++		start_critical_timing(a0, a1);
+ }
+ 
+ static int preemptoff_tracer_init(struct trace_array *tr)
+diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
+index 56c7fbff7bd7..f6c459aba8a6 100644
+--- a/kernel/trace/trace_kprobe.c
++++ b/kernel/trace/trace_kprobe.c
+@@ -1386,8 +1386,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
+ 	if (trace_trigger_soft_disabled(trace_file))
+ 		return;
  
- 	local_irq_save(flags);
--	printk_nmi_direct_enter();
+-	local_save_flags(fbuffer.flags);
+-	fbuffer.pc = preempt_count();
++	fbuffer.trace_ctx = tracing_gen_ctx();
+ 	fbuffer.trace_file = trace_file;
+ 
+ 	dsize = __get_data_size(&tk->tp, regs);
+@@ -1396,7 +1395,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
+ 		trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file,
+ 					call->event.type,
+ 					sizeof(*entry) + tk->tp.size + dsize,
+-					fbuffer.flags, fbuffer.pc);
++					fbuffer.trace_ctx);
+ 	if (!fbuffer.event)
+ 		return;
  
- 	/* Simulate the iterator */
- 	trace_init_global_iter(&iter);
-@@ -9329,7 +9347,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
- 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
- 	}
- 	atomic_dec(&dump_running);
--	printk_nmi_direct_exit();
- 	local_irq_restore(flags);
- }
- EXPORT_SYMBOL_GPL(ftrace_dump);
-diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
-index 610d21355526..580f2355a835 100644
---- a/kernel/trace/trace.h
-+++ b/kernel/trace/trace.h
-@@ -143,6 +143,7 @@ struct kretprobe_trace_entry_head {
-  *  NEED_RESCHED	- reschedule is requested
-  *  HARDIRQ		- inside an interrupt handler
-  *  SOFTIRQ		- inside a softirq handler
-+ *  NEED_RESCHED_LAZY	- lazy reschedule is requested
-  */
- enum trace_flag_type {
- 	TRACE_FLAG_IRQS_OFF		= 0x01,
-@@ -152,6 +153,7 @@ enum trace_flag_type {
- 	TRACE_FLAG_SOFTIRQ		= 0x10,
- 	TRACE_FLAG_PREEMPT_RESCHED	= 0x20,
- 	TRACE_FLAG_NMI			= 0x40,
-+	TRACE_FLAG_NEED_RESCHED_LAZY	= 0x80,
- };
+@@ -1434,8 +1433,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
+ 	if (trace_trigger_soft_disabled(trace_file))
+ 		return;
  
- #define TRACE_BUF_SIZE		1024
-diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
-index a85effb2373b..a88a78c240bb 100644
---- a/kernel/trace/trace_events.c
-+++ b/kernel/trace/trace_events.c
-@@ -182,6 +182,8 @@ static int trace_define_common_fields(void)
- 	__common_field(unsigned char, flags);
- 	__common_field(unsigned char, preempt_count);
- 	__common_field(int, pid);
-+	__common_field(unsigned char, migrate_disable);
-+	__common_field(unsigned char, preempt_lazy_count);
+-	local_save_flags(fbuffer.flags);
+-	fbuffer.pc = preempt_count();
++	fbuffer.trace_ctx = tracing_gen_ctx();
+ 	fbuffer.trace_file = trace_file;
+ 
+ 	dsize = __get_data_size(&tk->tp, regs);
+@@ -1443,7 +1441,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
+ 		trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file,
+ 					call->event.type,
+ 					sizeof(*entry) + tk->tp.size + dsize,
+-					fbuffer.flags, fbuffer.pc);
++					fbuffer.trace_ctx);
+ 	if (!fbuffer.event)
+ 		return;
  
- 	return ret;
+diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
+index 84582bf1ed5f..7221ae0b4c47 100644
+--- a/kernel/trace/trace_mmiotrace.c
++++ b/kernel/trace/trace_mmiotrace.c
+@@ -300,10 +300,11 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
+ 	struct trace_buffer *buffer = tr->array_buffer.buffer;
+ 	struct ring_buffer_event *event;
+ 	struct trace_mmiotrace_rw *entry;
+-	int pc = preempt_count();
++	unsigned int trace_ctx;
+ 
++	trace_ctx = tracing_gen_ctx_flags(0);
+ 	event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_RW,
+-					  sizeof(*entry), 0, pc);
++					  sizeof(*entry), trace_ctx);
+ 	if (!event) {
+ 		atomic_inc(&dropped_count);
+ 		return;
+@@ -312,7 +313,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
+ 	entry->rw			= *rw;
+ 
+ 	if (!call_filter_check_discard(call, entry, buffer, event))
+-		trace_buffer_unlock_commit(tr, buffer, event, 0, pc);
++		trace_buffer_unlock_commit(tr, buffer, event, trace_ctx);
+ }
+ 
+ void mmio_trace_rw(struct mmiotrace_rw *rw)
+@@ -330,10 +331,11 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
+ 	struct trace_buffer *buffer = tr->array_buffer.buffer;
+ 	struct ring_buffer_event *event;
+ 	struct trace_mmiotrace_map *entry;
+-	int pc = preempt_count();
++	unsigned int trace_ctx;
+ 
++	trace_ctx = tracing_gen_ctx_flags(0);
+ 	event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_MAP,
+-					  sizeof(*entry), 0, pc);
++					  sizeof(*entry), trace_ctx);
+ 	if (!event) {
+ 		atomic_inc(&dropped_count);
+ 		return;
+@@ -342,7 +344,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
+ 	entry->map			= *map;
+ 
+ 	if (!call_filter_check_discard(call, entry, buffer, event))
+-		trace_buffer_unlock_commit(tr, buffer, event, 0, pc);
++		trace_buffer_unlock_commit(tr, buffer, event, trace_ctx);
  }
+ 
+ void mmio_trace_mapping(struct mmiotrace_map *map)
 diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
-index 000e9dc224c6..bc24ae8e3613 100644
+index 92b1575ae0ca..f80fa69b6943 100644
 --- a/kernel/trace/trace_output.c
 +++ b/kernel/trace/trace_output.c
 @@ -441,6 +441,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
@@ -21708,26 +17295,348 @@ index 000e9dc224c6..bc24ae8e3613 100644
  	return !trace_seq_has_overflowed(s);
  }
  
-diff --git a/kernel/workqueue.c b/kernel/workqueue.c
-index c41c3c17b86a..bb8a84dbabb9 100644
---- a/kernel/workqueue.c
-+++ b/kernel/workqueue.c
-@@ -4905,6 +4905,10 @@ static void unbind_workers(int cpu)
- 		pool->flags |= POOL_DISASSOCIATED;
+diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
+index c0181066dbe9..e5778d1d7a5b 100644
+--- a/kernel/trace/trace_sched_wakeup.c
++++ b/kernel/trace/trace_sched_wakeup.c
+@@ -67,7 +67,7 @@ static bool function_enabled;
+ static int
+ func_prolog_preempt_disable(struct trace_array *tr,
+ 			    struct trace_array_cpu **data,
+-			    int *pc)
++			    unsigned int *trace_ctx)
+ {
+ 	long disabled;
+ 	int cpu;
+@@ -75,7 +75,7 @@ func_prolog_preempt_disable(struct trace_array *tr,
+ 	if (likely(!wakeup_task))
+ 		return 0;
+ 
+-	*pc = preempt_count();
++	*trace_ctx = tracing_gen_ctx();
+ 	preempt_disable_notrace();
+ 
+ 	cpu = raw_smp_processor_id();
+@@ -116,8 +116,8 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
+ {
+ 	struct trace_array *tr = wakeup_trace;
+ 	struct trace_array_cpu *data;
+-	unsigned long flags;
+-	int pc, ret = 0;
++	unsigned int trace_ctx;
++	int ret = 0;
+ 
+ 	if (ftrace_graph_ignore_func(trace))
+ 		return 0;
+@@ -131,11 +131,10 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
+ 	if (ftrace_graph_notrace_addr(trace->func))
+ 		return 1;
+ 
+-	if (!func_prolog_preempt_disable(tr, &data, &pc))
++	if (!func_prolog_preempt_disable(tr, &data, &trace_ctx))
+ 		return 0;
+ 
+-	local_save_flags(flags);
+-	ret = __trace_graph_entry(tr, trace, flags, pc);
++	ret = __trace_graph_entry(tr, trace, trace_ctx);
+ 	atomic_dec(&data->disabled);
+ 	preempt_enable_notrace();
+ 
+@@ -146,16 +145,14 @@ static void wakeup_graph_return(struct ftrace_graph_ret *trace)
+ {
+ 	struct trace_array *tr = wakeup_trace;
+ 	struct trace_array_cpu *data;
+-	unsigned long flags;
+-	int pc;
++	unsigned int trace_ctx;
+ 
+ 	ftrace_graph_addr_finish(trace);
+ 
+-	if (!func_prolog_preempt_disable(tr, &data, &pc))
++	if (!func_prolog_preempt_disable(tr, &data, &trace_ctx))
+ 		return;
+ 
+-	local_save_flags(flags);
+-	__trace_graph_return(tr, trace, flags, pc);
++	__trace_graph_return(tr, trace, trace_ctx);
+ 	atomic_dec(&data->disabled);
+ 
+ 	preempt_enable_notrace();
+@@ -217,13 +214,13 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip,
+ 	struct trace_array *tr = wakeup_trace;
+ 	struct trace_array_cpu *data;
+ 	unsigned long flags;
+-	int pc;
++	unsigned int trace_ctx;
+ 
+-	if (!func_prolog_preempt_disable(tr, &data, &pc))
++	if (!func_prolog_preempt_disable(tr, &data, &trace_ctx))
+ 		return;
+ 
+ 	local_irq_save(flags);
+-	trace_function(tr, ip, parent_ip, flags, pc);
++	trace_function(tr, ip, parent_ip, trace_ctx);
+ 	local_irq_restore(flags);
+ 
+ 	atomic_dec(&data->disabled);
+@@ -303,12 +300,12 @@ static void wakeup_print_header(struct seq_file *s)
+ static void
+ __trace_function(struct trace_array *tr,
+ 		 unsigned long ip, unsigned long parent_ip,
+-		 unsigned long flags, int pc)
++		 unsigned int trace_ctx)
+ {
+ 	if (is_graph(tr))
+-		trace_graph_function(tr, ip, parent_ip, flags, pc);
++		trace_graph_function(tr, ip, parent_ip, trace_ctx);
+ 	else
+-		trace_function(tr, ip, parent_ip, flags, pc);
++		trace_function(tr, ip, parent_ip, trace_ctx);
+ }
+ 
+ static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
+@@ -375,7 +372,7 @@ static void
+ tracing_sched_switch_trace(struct trace_array *tr,
+ 			   struct task_struct *prev,
+ 			   struct task_struct *next,
+-			   unsigned long flags, int pc)
++			   unsigned int trace_ctx)
+ {
+ 	struct trace_event_call *call = &event_context_switch;
+ 	struct trace_buffer *buffer = tr->array_buffer.buffer;
+@@ -383,7 +380,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
+ 	struct ctx_switch_entry *entry;
+ 
+ 	event = trace_buffer_lock_reserve(buffer, TRACE_CTX,
+-					  sizeof(*entry), flags, pc);
++					  sizeof(*entry), trace_ctx);
+ 	if (!event)
+ 		return;
+ 	entry	= ring_buffer_event_data(event);
+@@ -396,14 +393,14 @@ tracing_sched_switch_trace(struct trace_array *tr,
+ 	entry->next_cpu	= task_cpu(next);
+ 
+ 	if (!call_filter_check_discard(call, entry, buffer, event))
+-		trace_buffer_unlock_commit(tr, buffer, event, flags, pc);
++		trace_buffer_unlock_commit(tr, buffer, event, trace_ctx);
+ }
+ 
+ static void
+ tracing_sched_wakeup_trace(struct trace_array *tr,
+ 			   struct task_struct *wakee,
+ 			   struct task_struct *curr,
+-			   unsigned long flags, int pc)
++			   unsigned int trace_ctx)
+ {
+ 	struct trace_event_call *call = &event_wakeup;
+ 	struct ring_buffer_event *event;
+@@ -411,7 +408,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
+ 	struct trace_buffer *buffer = tr->array_buffer.buffer;
+ 
+ 	event = trace_buffer_lock_reserve(buffer, TRACE_WAKE,
+-					  sizeof(*entry), flags, pc);
++					  sizeof(*entry), trace_ctx);
+ 	if (!event)
+ 		return;
+ 	entry	= ring_buffer_event_data(event);
+@@ -424,7 +421,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
+ 	entry->next_cpu			= task_cpu(wakee);
+ 
+ 	if (!call_filter_check_discard(call, entry, buffer, event))
+-		trace_buffer_unlock_commit(tr, buffer, event, flags, pc);
++		trace_buffer_unlock_commit(tr, buffer, event, trace_ctx);
+ }
+ 
+ static void notrace
+@@ -436,7 +433,7 @@ probe_wakeup_sched_switch(void *ignore, bool preempt,
+ 	unsigned long flags;
+ 	long disabled;
+ 	int cpu;
+-	int pc;
++	unsigned int trace_ctx;
+ 
+ 	tracing_record_cmdline(prev);
+ 
+@@ -455,8 +452,6 @@ probe_wakeup_sched_switch(void *ignore, bool preempt,
+ 	if (next != wakeup_task)
+ 		return;
+ 
+-	pc = preempt_count();
+-
+ 	/* disable local data, not wakeup_cpu data */
+ 	cpu = raw_smp_processor_id();
+ 	disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled);
+@@ -464,6 +459,8 @@ probe_wakeup_sched_switch(void *ignore, bool preempt,
+ 		goto out;
  
- 		raw_spin_unlock_irq(&pool->lock);
+ 	local_irq_save(flags);
++	trace_ctx = tracing_gen_ctx_flags(flags);
 +
-+		for_each_pool_worker(worker, pool)
-+			WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_active_mask) < 0);
+ 	arch_spin_lock(&wakeup_lock);
+ 
+ 	/* We could race with grabbing wakeup_lock */
+@@ -473,9 +470,9 @@ probe_wakeup_sched_switch(void *ignore, bool preempt,
+ 	/* The task we are waiting for is waking up */
+ 	data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu);
+ 
+-	__trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
+-	tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
+-	__trace_stack(wakeup_trace, flags, 0, pc);
++	__trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, trace_ctx);
++	tracing_sched_switch_trace(wakeup_trace, prev, next, trace_ctx);
++	__trace_stack(wakeup_trace, trace_ctx, 0);
+ 
+ 	T0 = data->preempt_timestamp;
+ 	T1 = ftrace_now(cpu);
+@@ -527,9 +524,8 @@ probe_wakeup(void *ignore, struct task_struct *p)
+ {
+ 	struct trace_array_cpu *data;
+ 	int cpu = smp_processor_id();
+-	unsigned long flags;
+ 	long disabled;
+-	int pc;
++	unsigned int trace_ctx;
+ 
+ 	if (likely(!tracer_enabled))
+ 		return;
+@@ -550,11 +546,12 @@ probe_wakeup(void *ignore, struct task_struct *p)
+ 	    (!dl_task(p) && (p->prio >= wakeup_prio || p->prio >= current->prio)))
+ 		return;
+ 
+-	pc = preempt_count();
+ 	disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled);
+ 	if (unlikely(disabled != 1))
+ 		goto out;
+ 
++	trace_ctx = tracing_gen_ctx();
 +
- 		mutex_unlock(&wq_pool_attach_mutex);
+ 	/* interrupts should be off from try_to_wake_up */
+ 	arch_spin_lock(&wakeup_lock);
  
- 		/*
+@@ -581,19 +578,17 @@ probe_wakeup(void *ignore, struct task_struct *p)
+ 
+ 	wakeup_task = get_task_struct(p);
+ 
+-	local_save_flags(flags);
+-
+ 	data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu);
+ 	data->preempt_timestamp = ftrace_now(cpu);
+-	tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
+-	__trace_stack(wakeup_trace, flags, 0, pc);
++	tracing_sched_wakeup_trace(wakeup_trace, p, current, trace_ctx);
++	__trace_stack(wakeup_trace, trace_ctx, 0);
+ 
+ 	/*
+ 	 * We must be careful in using CALLER_ADDR2. But since wake_up
+ 	 * is not called by an assembly function  (where as schedule is)
+ 	 * it should be safe to use it here.
+ 	 */
+-	__trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
++	__trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, trace_ctx);
+ 
+ out_locked:
+ 	arch_spin_unlock(&wakeup_lock);
+diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
+index d85a2f0f316b..8bfcd3b09422 100644
+--- a/kernel/trace/trace_syscalls.c
++++ b/kernel/trace/trace_syscalls.c
+@@ -298,9 +298,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
+ 	struct syscall_metadata *sys_data;
+ 	struct ring_buffer_event *event;
+ 	struct trace_buffer *buffer;
+-	unsigned long irq_flags;
++	unsigned int trace_ctx;
+ 	unsigned long args[6];
+-	int pc;
+ 	int syscall_nr;
+ 	int size;
+ 
+@@ -322,12 +321,11 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
+ 
+ 	size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
+ 
+-	local_save_flags(irq_flags);
+-	pc = preempt_count();
++	trace_ctx = tracing_gen_ctx();
+ 
+ 	buffer = tr->array_buffer.buffer;
+ 	event = trace_buffer_lock_reserve(buffer,
+-			sys_data->enter_event->event.type, size, irq_flags, pc);
++			sys_data->enter_event->event.type, size, trace_ctx);
+ 	if (!event)
+ 		return;
+ 
+@@ -337,7 +335,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
+ 	memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
+ 
+ 	event_trigger_unlock_commit(trace_file, buffer, event, entry,
+-				    irq_flags, pc);
++				    trace_ctx);
+ }
+ 
+ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
+@@ -348,8 +346,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
+ 	struct syscall_metadata *sys_data;
+ 	struct ring_buffer_event *event;
+ 	struct trace_buffer *buffer;
+-	unsigned long irq_flags;
+-	int pc;
++	unsigned int trace_ctx;
+ 	int syscall_nr;
+ 
+ 	syscall_nr = trace_get_syscall_nr(current, regs);
+@@ -368,13 +365,12 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
+ 	if (!sys_data)
+ 		return;
+ 
+-	local_save_flags(irq_flags);
+-	pc = preempt_count();
++	trace_ctx = tracing_gen_ctx();
+ 
+ 	buffer = tr->array_buffer.buffer;
+ 	event = trace_buffer_lock_reserve(buffer,
+ 			sys_data->exit_event->event.type, sizeof(*entry),
+-			irq_flags, pc);
++			trace_ctx);
+ 	if (!event)
+ 		return;
+ 
+@@ -383,7 +379,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
+ 	entry->ret = syscall_get_return_value(current, regs);
+ 
+ 	event_trigger_unlock_commit(trace_file, buffer, event, entry,
+-				    irq_flags, pc);
++				    trace_ctx);
+ }
+ 
+ static int reg_event_syscall_enter(struct trace_event_file *file,
+diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
+index 3cf7128e1ad3..a1ed96a7a462 100644
+--- a/kernel/trace/trace_uprobe.c
++++ b/kernel/trace/trace_uprobe.c
+@@ -961,7 +961,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
+ 	esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
+ 	size = esize + tu->tp.size + dsize;
+ 	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
+-						call->event.type, size, 0, 0);
++						call->event.type, size, 0);
+ 	if (!event)
+ 		return;
+ 
+@@ -977,7 +977,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
+ 
+ 	memcpy(data, ucb->buf, tu->tp.size + dsize);
+ 
+-	event_trigger_unlock_commit(trace_file, buffer, event, entry, 0, 0);
++	event_trigger_unlock_commit(trace_file, buffer, event, entry, 0);
+ }
+ 
+ /* uprobe handler */
 diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
-index 0c781f912f9f..16fcda68c2b6 100644
+index 7937265ef879..74c7913df9dd 100644
 --- a/lib/Kconfig.debug
 +++ b/lib/Kconfig.debug
-@@ -1332,7 +1332,7 @@ config DEBUG_ATOMIC_SLEEP
+@@ -1353,7 +1353,7 @@ config DEBUG_ATOMIC_SLEEP
  
  config DEBUG_LOCKING_API_SELFTESTS
  	bool "Locking API boot-time self-tests"
@@ -21736,37 +17645,23 @@ index 0c781f912f9f..16fcda68c2b6 100644
  	help
  	  Say Y here if you want the kernel to run a short self-test during
  	  bootup. The self-test checks whether common types of locking bugs
-diff --git a/lib/cpumask.c b/lib/cpumask.c
-index 85da6ab4fbb5..35924025097b 100644
---- a/lib/cpumask.c
-+++ b/lib/cpumask.c
-@@ -267,3 +267,21 @@ int cpumask_any_and_distribute(const struct cpumask *src1p,
- 	return next;
+diff --git a/lib/bug.c b/lib/bug.c
+index 7103440c0ee1..baf61c307a6a 100644
+--- a/lib/bug.c
++++ b/lib/bug.c
+@@ -205,6 +205,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
+ 	else
+ 		pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n",
+ 			(void *)bugaddr);
++	pr_flush(1000, true);
+ 
+ 	return BUG_TRAP_TYPE_BUG;
  }
- EXPORT_SYMBOL(cpumask_any_and_distribute);
-+
-+int cpumask_any_distribute(const struct cpumask *srcp)
-+{
-+	int next, prev;
-+
-+	/* NOTE: our first selection will skip 0. */
-+	prev = __this_cpu_read(distribute_cpu_mask_prev);
-+
-+	next = cpumask_next(prev, srcp);
-+	if (next >= nr_cpu_ids)
-+		next = cpumask_first(srcp);
-+
-+	if (next < nr_cpu_ids)
-+		__this_cpu_write(distribute_cpu_mask_prev, next);
-+
-+	return next;
-+}
-+EXPORT_SYMBOL(cpumask_any_distribute);
 diff --git a/lib/debugobjects.c b/lib/debugobjects.c
-index fe4557955d97..f8bfd257d0bb 100644
+index 9e14ae02306b..083882a3cf2f 100644
 --- a/lib/debugobjects.c
 +++ b/lib/debugobjects.c
-@@ -537,7 +537,10 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
+@@ -557,7 +557,10 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack
  	struct debug_obj *obj;
  	unsigned long flags;
  
@@ -21778,26 +17673,6 @@ index fe4557955d97..f8bfd257d0bb 100644
  
  	db = get_bucket((unsigned long) addr);
  
-diff --git a/lib/dump_stack.c b/lib/dump_stack.c
-index a00ee6eedc7c..f5a33b6f773f 100644
---- a/lib/dump_stack.c
-+++ b/lib/dump_stack.c
-@@ -12,6 +12,7 @@
- #include <linux/atomic.h>
- #include <linux/kexec.h>
- #include <linux/utsname.h>
-+#include <linux/stop_machine.h>
- 
- static char dump_stack_arch_desc_str[128];
- 
-@@ -57,6 +58,7 @@ void dump_stack_print_info(const char *log_lvl)
- 		       log_lvl, dump_stack_arch_desc_str);
- 
- 	print_worker_info(log_lvl, current);
-+	print_stop_info(log_lvl, current);
- }
- 
- /**
 diff --git a/lib/irq_poll.c b/lib/irq_poll.c
 index 2f17b488d58e..7557bf7ecf1f 100644
 --- a/lib/irq_poll.c
@@ -21843,10 +17718,10 @@ index 2f17b488d58e..7557bf7ecf1f 100644
  	return 0;
  }
 diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
-index 14f44f59e733..b110d073d2d2 100644
+index 9959ea23529e..716a83aa79d8 100644
 --- a/lib/locking-selftest.c
 +++ b/lib/locking-selftest.c
-@@ -742,6 +742,8 @@ GENERATE_TESTCASE(init_held_rtmutex);
+@@ -787,6 +787,8 @@ GENERATE_TESTCASE(init_held_rtmutex);
  #include "locking-selftest-spin-hardirq.h"
  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin)
  
@@ -21855,7 +17730,7 @@ index 14f44f59e733..b110d073d2d2 100644
  #include "locking-selftest-rlock-hardirq.h"
  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
  
-@@ -757,9 +759,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock)
+@@ -802,9 +804,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock)
  #include "locking-selftest-wlock-softirq.h"
  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
  
@@ -21868,7 +17743,7 @@ index 14f44f59e733..b110d073d2d2 100644
  /*
   * Enabling hardirqs with a softirq-safe lock held:
   */
-@@ -792,6 +797,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
+@@ -837,6 +842,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
  #undef E1
  #undef E2
  
@@ -21877,7 +17752,7 @@ index 14f44f59e733..b110d073d2d2 100644
  /*
   * Enabling irqs with an irq-safe lock held:
   */
-@@ -815,6 +822,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
+@@ -860,6 +867,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
  #include "locking-selftest-spin-hardirq.h"
  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin)
  
@@ -21886,7 +17761,7 @@ index 14f44f59e733..b110d073d2d2 100644
  #include "locking-selftest-rlock-hardirq.h"
  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
  
-@@ -830,6 +839,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock)
+@@ -875,6 +884,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock)
  #include "locking-selftest-wlock-softirq.h"
  GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
  
@@ -21895,7 +17770,7 @@ index 14f44f59e733..b110d073d2d2 100644
  #undef E1
  #undef E2
  
-@@ -861,6 +872,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
+@@ -906,6 +917,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
  #include "locking-selftest-spin-hardirq.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin)
  
@@ -21904,7 +17779,7 @@ index 14f44f59e733..b110d073d2d2 100644
  #include "locking-selftest-rlock-hardirq.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
  
-@@ -876,6 +889,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock)
+@@ -921,6 +934,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock)
  #include "locking-selftest-wlock-softirq.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
  
@@ -21913,7 +17788,7 @@ index 14f44f59e733..b110d073d2d2 100644
  #undef E1
  #undef E2
  #undef E3
-@@ -909,6 +924,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
+@@ -954,6 +969,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
  #include "locking-selftest-spin-hardirq.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin)
  
@@ -21922,7 +17797,7 @@ index 14f44f59e733..b110d073d2d2 100644
  #include "locking-selftest-rlock-hardirq.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
  
-@@ -924,10 +941,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock)
+@@ -969,10 +986,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock)
  #include "locking-selftest-wlock-softirq.h"
  GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
  
@@ -21937,10 +17812,11 @@ index 14f44f59e733..b110d073d2d2 100644
  /*
   * read-lock / write-lock irq inversion.
   *
-@@ -990,6 +1011,10 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock)
+@@ -1162,6 +1183,11 @@ GENERATE_PERMUTATIONS_3_EVENTS(W1W2_R2R3_R3W1)
+ #undef E1
  #undef E2
  #undef E3
- 
++
 +#endif
 +
 +#ifndef CONFIG_PREEMPT_RT
@@ -21948,7 +17824,7 @@ index 14f44f59e733..b110d073d2d2 100644
  /*
   * read-lock / write-lock recursion that is actually safe.
   */
-@@ -1028,6 +1053,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
+@@ -1208,6 +1234,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_wlock)
  #undef E2
  #undef E3
  
@@ -21957,7 +17833,7 @@ index 14f44f59e733..b110d073d2d2 100644
  /*
   * read-lock / write-lock recursion that is unsafe.
   */
-@@ -2058,6 +2085,7 @@ void locking_selftest(void)
+@@ -2517,6 +2545,7 @@ void locking_selftest(void)
  
  	printk("  --------------------------------------------------------------------------\n");
  
@@ -21965,10 +17841,10 @@ index 14f44f59e733..b110d073d2d2 100644
  	/*
  	 * irq-context testcases:
  	 */
-@@ -2070,6 +2098,28 @@ void locking_selftest(void)
+@@ -2531,6 +2560,28 @@ void locking_selftest(void)
+ 	DO_TESTCASE_6x2x2RW("irq read-recursion #2", irq_read_recursion2);
+ 	DO_TESTCASE_6x2x2RW("irq read-recursion #3", irq_read_recursion3);
  
- 	DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
- //	DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
 +#else
 +	/* On -rt, we only do hardirq context test for raw spinlock */
 +	DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12);
@@ -21991,11 +17867,11 @@ index 14f44f59e733..b110d073d2d2 100644
 +	DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312);
 +	DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321);
 +#endif
- 
  	ww_tests();
  
+ 	force_read_lock_recursive = 0;
 diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
-index 15ca78e1c7d4..77bf84987cda 100644
+index 8abe1870dba4..b09a490f5f70 100644
 --- a/lib/nmi_backtrace.c
 +++ b/lib/nmi_backtrace.c
 @@ -75,12 +75,6 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
@@ -22012,10 +17888,10 @@ index 15ca78e1c7d4..77bf84987cda 100644
  	put_cpu();
  }
 diff --git a/lib/scatterlist.c b/lib/scatterlist.c
-index 5d63a8857f36..5569dac27afe 100644
+index a59778946404..907f59045998 100644
 --- a/lib/scatterlist.c
 +++ b/lib/scatterlist.c
-@@ -811,7 +811,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
+@@ -892,7 +892,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
  			flush_kernel_dcache_page(miter->page);
  
  		if (miter->__flags & SG_MITER_ATOMIC) {
@@ -22024,24 +17900,8 @@ index 5d63a8857f36..5569dac27afe 100644
  			kunmap_atomic(miter->addr);
  		} else
  			kunmap(miter->page);
-diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
-index 525222e4f409..faaa927ac2c8 100644
---- a/lib/smp_processor_id.c
-+++ b/lib/smp_processor_id.c
-@@ -26,6 +26,11 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2)
- 	if (current->nr_cpus_allowed == 1)
- 		goto out;
- 
-+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
-+	if (current->migration_disabled)
-+		goto out;
-+#endif
-+
- 	/*
- 	 * It is valid to assume CPU-locality during early bootup:
- 	 */
 diff --git a/mm/Kconfig b/mm/Kconfig
-index 6c974888f86f..056460878a2b 100644
+index f730605b8dcf..97bce365ae26 100644
 --- a/mm/Kconfig
 +++ b/mm/Kconfig
 @@ -387,7 +387,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
@@ -22053,36 +17913,11 @@ index 6c974888f86f..056460878a2b 100644
  	select COMPACTION
  	select XARRAY_MULTI
  	help
-diff --git a/mm/highmem.c b/mm/highmem.c
-index 64d8dea47dd1..7d3065719ce8 100644
---- a/mm/highmem.c
-+++ b/mm/highmem.c
-@@ -31,8 +31,11 @@
- #include <asm/tlbflush.h>
- #include <linux/vmalloc.h>
- 
-+#ifndef CONFIG_PREEMPT_RT
- #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
- DEFINE_PER_CPU(int, __kmap_atomic_idx);
-+EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
-+#endif
- #endif
- 
- /*
-@@ -108,8 +111,6 @@ static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color)
- atomic_long_t _totalhigh_pages __read_mostly;
- EXPORT_SYMBOL(_totalhigh_pages);
- 
--EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
--
- unsigned int nr_free_highpages (void)
- {
- 	struct zone *zone;
 diff --git a/mm/memcontrol.c b/mm/memcontrol.c
-index 6877c765b8d0..f35ffe52cc4f 100644
+index 913c2b9e5c72..da25645e5c47 100644
 --- a/mm/memcontrol.c
 +++ b/mm/memcontrol.c
-@@ -63,6 +63,7 @@
+@@ -66,6 +66,7 @@
  #include <net/sock.h>
  #include <net/ip.h>
  #include "slab.h"
@@ -22090,7 +17925,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  
  #include <linux/uaccess.h>
  
-@@ -90,6 +91,13 @@ bool cgroup_memory_noswap __read_mostly;
+@@ -96,6 +97,13 @@ bool cgroup_memory_noswap __read_mostly;
  static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq);
  #endif
  
@@ -22104,7 +17939,23 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  /* Whether legacy memory+swap accounting is active */
  static bool do_memsw_account(void)
  {
-@@ -2154,6 +2162,7 @@ void unlock_page_memcg(struct page *page)
+@@ -805,6 +813,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+ 	memcg = pn->memcg;
+ 
++	preempt_disable_rt();
+ 	/* Update memcg */
+ 	__mod_memcg_state(memcg, idx, val);
+ 
+@@ -824,6 +833,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ 		x = 0;
+ 	}
+ 	__this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
++	preempt_enable_rt();
+ }
+ 
+ /**
+@@ -2236,6 +2246,7 @@ void unlock_page_memcg(struct page *page)
  EXPORT_SYMBOL(unlock_page_memcg);
  
  struct memcg_stock_pcp {
@@ -22112,7 +17963,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  	struct mem_cgroup *cached; /* this never be root cgroup */
  	unsigned int nr_pages;
  
-@@ -2205,7 +2214,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+@@ -2287,7 +2298,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
  	if (nr_pages > MEMCG_CHARGE_BATCH)
  		return ret;
  
@@ -22121,7 +17972,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  
  	stock = this_cpu_ptr(&memcg_stock);
  	if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
-@@ -2213,7 +2222,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+@@ -2295,7 +2306,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
  		ret = true;
  	}
  
@@ -22130,7 +17981,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  
  	return ret;
  }
-@@ -2248,14 +2257,14 @@ static void drain_local_stock(struct work_struct *dummy)
+@@ -2330,14 +2341,14 @@ static void drain_local_stock(struct work_struct *dummy)
  	 * The only protection from memory hotplug vs. drain_stock races is
  	 * that we always operate on local CPU stock here with IRQ disabled
  	 */
@@ -22147,7 +17998,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  }
  
  /*
-@@ -2267,7 +2276,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+@@ -2349,7 +2360,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
  	struct memcg_stock_pcp *stock;
  	unsigned long flags;
  
@@ -22156,7 +18007,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  
  	stock = this_cpu_ptr(&memcg_stock);
  	if (stock->cached != memcg) { /* reset if necessary */
-@@ -2280,7 +2289,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+@@ -2362,7 +2373,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
  	if (stock->nr_pages > MEMCG_CHARGE_BATCH)
  		drain_stock(stock);
  
@@ -22165,7 +18016,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  }
  
  /*
-@@ -2300,7 +2309,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
+@@ -2382,7 +2393,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
  	 * as well as workers from this path always operate on the local
  	 * per-cpu data. CPU up doesn't touch memcg_stock at all.
  	 */
@@ -22174,7 +18025,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  	for_each_online_cpu(cpu) {
  		struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
  		struct mem_cgroup *memcg;
-@@ -2323,7 +2332,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
+@@ -2405,7 +2416,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
  				schedule_work_on(cpu, &stock->work);
  		}
  	}
@@ -22183,7 +18034,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  	mutex_unlock(&percpu_charge_mutex);
  }
  
-@@ -3084,7 +3093,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
+@@ -3169,7 +3180,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
  	unsigned long flags;
  	bool ret = false;
  
@@ -22192,7 +18043,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  
  	stock = this_cpu_ptr(&memcg_stock);
  	if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
-@@ -3092,7 +3101,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
+@@ -3177,7 +3188,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
  		ret = true;
  	}
  
@@ -22201,7 +18052,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  
  	return ret;
  }
-@@ -3151,7 +3160,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
+@@ -3236,7 +3247,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
  	struct memcg_stock_pcp *stock;
  	unsigned long flags;
  
@@ -22210,7 +18061,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  
  	stock = this_cpu_ptr(&memcg_stock);
  	if (stock->cached_objcg != objcg) { /* reset if necessary */
-@@ -3165,7 +3174,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
+@@ -3250,7 +3261,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
  	if (stock->nr_bytes > PAGE_SIZE)
  		drain_obj_stock(stock);
  
@@ -22219,7 +18070,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  }
  
  int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size)
-@@ -5681,12 +5690,12 @@ static int mem_cgroup_move_account(struct page *page,
+@@ -5699,12 +5710,12 @@ static int mem_cgroup_move_account(struct page *page,
  
  	ret = 0;
  
@@ -22234,7 +18085,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  out_unlock:
  	unlock_page(page);
  out:
-@@ -6722,10 +6731,10 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
+@@ -6755,10 +6766,10 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
  	css_get(&memcg->css);
  	commit_charge(page, memcg);
  
@@ -22247,7 +18098,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  
  	if (PageSwapCache(page)) {
  		swp_entry_t entry = { .val = page_private(page) };
-@@ -6769,11 +6778,11 @@ static void uncharge_batch(const struct uncharge_gather *ug)
+@@ -6802,11 +6813,11 @@ static void uncharge_batch(const struct uncharge_gather *ug)
  		memcg_oom_recover(ug->memcg);
  	}
  
@@ -22261,7 +18112,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  
  	/* drop reference from uncharge_page */
  	css_put(&ug->memcg->css);
-@@ -6927,10 +6936,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
+@@ -6958,10 +6969,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
  	css_get(&memcg->css);
  	commit_charge(newpage, memcg);
  
@@ -22274,7 +18125,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  }
  
  DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
-@@ -7050,9 +7059,13 @@ static int __init mem_cgroup_init(void)
+@@ -7081,9 +7092,13 @@ static int __init mem_cgroup_init(void)
  	cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL,
  				  memcg_hotplug_cpu_dead);
  
@@ -22291,7 +18142,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  
  	for_each_node(node) {
  		struct mem_cgroup_tree_per_node *rtpn;
-@@ -7101,6 +7114,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+@@ -7132,6 +7147,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
  	struct mem_cgroup *memcg, *swap_memcg;
  	unsigned int nr_entries;
  	unsigned short oldid;
@@ -22299,7 +18150,7 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  
  	VM_BUG_ON_PAGE(PageLRU(page), page);
  	VM_BUG_ON_PAGE(page_count(page), page);
-@@ -7146,9 +7160,13 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+@@ -7180,9 +7196,13 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
  	 * important here to have the interrupts disabled because it is the
  	 * only synchronisation we have for updating the per-CPU variables.
  	 */
@@ -22314,10 +18165,10 @@ index 6877c765b8d0..f35ffe52cc4f 100644
  	css_put(&memcg->css);
  }
 diff --git a/mm/page_alloc.c b/mm/page_alloc.c
-index 780c8f023b28..f581204e3f35 100644
+index 519a60d5b6f7..36f314ae0e56 100644
 --- a/mm/page_alloc.c
 +++ b/mm/page_alloc.c
-@@ -61,6 +61,7 @@
+@@ -62,6 +62,7 @@
  #include <linux/hugetlb.h>
  #include <linux/sched/rt.h>
  #include <linux/sched/mm.h>
@@ -22325,9 +18176,9 @@ index 780c8f023b28..f581204e3f35 100644
  #include <linux/page_owner.h>
  #include <linux/kthread.h>
  #include <linux/memcontrol.h>
-@@ -357,6 +358,13 @@ EXPORT_SYMBOL(nr_node_ids);
- EXPORT_SYMBOL(nr_online_nodes);
- #endif
+@@ -363,6 +364,13 @@ EXPORT_SYMBOL(nr_online_nodes);
+ 
+ int page_group_by_mobility_disabled __read_mostly;
  
 +struct pa_lock {
 +	local_lock_t l;
@@ -22336,178 +18187,55 @@ index 780c8f023b28..f581204e3f35 100644
 +	.l	= INIT_LOCAL_LOCK(l),
 +};
 +
- int page_group_by_mobility_disabled __read_mostly;
- 
  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-@@ -1283,7 +1291,7 @@ static inline void prefetch_buddy(struct page *page)
- }
- 
  /*
-- * Frees a number of pages from the PCP lists
-+ * Frees a number of pages which have been collected from the pcp lists.
-  * Assumes all pages on list are in same zone, and of same order.
-  * count is the number of pages to free.
-  *
-@@ -1293,15 +1301,56 @@ static inline void prefetch_buddy(struct page *page)
-  * And clear the zone's pages_scanned counter, to hold off the "all pages are
-  * pinned" detection logic.
-  */
--static void free_pcppages_bulk(struct zone *zone, int count,
--					struct per_cpu_pages *pcp)
-+static void free_pcppages_bulk(struct zone *zone, struct list_head *head,
-+			       bool zone_retry)
-+{
-+	bool isolated_pageblocks;
-+	struct page *page, *tmp;
-+	unsigned long flags;
-+
-+	spin_lock_irqsave(&zone->lock, flags);
-+	isolated_pageblocks = has_isolate_pageblock(zone);
-+
-+	/*
-+	 * Use safe version since after __free_one_page(),
-+	 * page->lru.next will not point to original list.
-+	 */
-+	list_for_each_entry_safe(page, tmp, head, lru) {
-+		int mt = get_pcppage_migratetype(page);
-+
-+		if (page_zone(page) != zone) {
-+			/*
-+			 * free_unref_page_list() sorts pages by zone. If we end
-+			 * up with pages from a different NUMA nodes belonging
-+			 * to the same ZONE index then we need to redo with the
-+			 * correct ZONE pointer. Skip the page for now, redo it
-+			 * on the next iteration.
-+			 */
-+			WARN_ON_ONCE(zone_retry == false);
-+			if (zone_retry)
-+				continue;
-+		}
-+
-+		/* MIGRATE_ISOLATE page should not go to pcplists */
-+		VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
-+		/* Pageblock could have been isolated meanwhile */
-+		if (unlikely(isolated_pageblocks))
-+			mt = get_pageblock_migratetype(page);
-+
-+		list_del(&page->lru);
-+		__free_one_page(page, page_to_pfn(page), zone, 0, mt, true);
-+		trace_mm_page_pcpu_drain(page, 0, mt);
-+	}
-+	spin_unlock_irqrestore(&zone->lock, flags);
-+}
-+
-+static void isolate_pcp_pages(int count, struct per_cpu_pages *pcp,
-+			      struct list_head *dst)
- {
- 	int migratetype = 0;
- 	int batch_free = 0;
- 	int prefetch_nr = 0;
--	bool isolated_pageblocks;
--	struct page *page, *tmp;
--	LIST_HEAD(head);
-+	struct page *page;
- 
- 	/*
- 	 * Ensure proper count is passed which otherwise would stuck in the
-@@ -1338,7 +1387,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
- 			if (bulkfree_pcp_prepare(page))
- 				continue;
- 
--			list_add_tail(&page->lru, &head);
-+			list_add_tail(&page->lru, dst);
- 
- 			/*
- 			 * We are going to put the page back to the global
-@@ -1353,26 +1402,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
- 				prefetch_buddy(page);
- 		} while (--count && --batch_free && !list_empty(list));
- 	}
--
--	spin_lock(&zone->lock);
--	isolated_pageblocks = has_isolate_pageblock(zone);
--
--	/*
--	 * Use safe version since after __free_one_page(),
--	 * page->lru.next will not point to original list.
--	 */
--	list_for_each_entry_safe(page, tmp, &head, lru) {
--		int mt = get_pcppage_migratetype(page);
--		/* MIGRATE_ISOLATE page should not go to pcplists */
--		VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
--		/* Pageblock could have been isolated meanwhile */
--		if (unlikely(isolated_pageblocks))
--			mt = get_pageblock_migratetype(page);
--
--		__free_one_page(page, page_to_pfn(page), zone, 0, mt, true);
--		trace_mm_page_pcpu_drain(page, 0, mt);
--	}
--	spin_unlock(&zone->lock);
- }
- 
- static void free_one_page(struct zone *zone,
-@@ -1473,10 +1502,10 @@ static void __free_pages_ok(struct page *page, unsigned int order)
+  * During boot we initialize deferred pages on-demand, as needed, but once
+@@ -1537,11 +1545,11 @@ static void __free_pages_ok(struct page *page, unsigned int order,
  		return;
  
  	migratetype = get_pfnblock_migratetype(page, pfn);
 -	local_irq_save(flags);
 +	local_lock_irqsave(&pa_lock.l, flags);
  	__count_vm_events(PGFREE, 1 << order);
- 	free_one_page(page_zone(page), page, pfn, order, migratetype);
+ 	free_one_page(page_zone(page), page, pfn, order, migratetype,
+ 		      fpi_flags);
 -	local_irq_restore(flags);
 +	local_unlock_irqrestore(&pa_lock.l, flags);
  }
  
  void __free_pages_core(struct page *page, unsigned int order)
-@@ -2877,13 +2906,18 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
- {
+@@ -2957,12 +2965,12 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
  	unsigned long flags;
  	int to_drain, batch;
-+	LIST_HEAD(dst);
  
 -	local_irq_save(flags);
 +	local_lock_irqsave(&pa_lock.l, flags);
  	batch = READ_ONCE(pcp->batch);
  	to_drain = min(pcp->count, batch);
  	if (to_drain > 0)
--		free_pcppages_bulk(zone, to_drain, pcp);
+ 		free_pcppages_bulk(zone, to_drain, pcp);
 -	local_irq_restore(flags);
-+		isolate_pcp_pages(to_drain, pcp, &dst);
-+
 +	local_unlock_irqrestore(&pa_lock.l, flags);
-+
-+	if (to_drain > 0)
-+		free_pcppages_bulk(zone, &dst, false);
  }
  #endif
  
-@@ -2899,14 +2933,21 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
- 	unsigned long flags;
+@@ -2979,13 +2987,13 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
  	struct per_cpu_pageset *pset;
  	struct per_cpu_pages *pcp;
-+	LIST_HEAD(dst);
-+	int count;
  
 -	local_irq_save(flags);
 +	local_lock_irqsave(&pa_lock.l, flags);
  	pset = per_cpu_ptr(zone->pageset, cpu);
  
  	pcp = &pset->pcp;
--	if (pcp->count)
--		free_pcppages_bulk(zone, pcp->count, pcp);
+ 	if (pcp->count)
+ 		free_pcppages_bulk(zone, pcp->count, pcp);
 -	local_irq_restore(flags);
-+	count = pcp->count;
-+	if (count)
-+		isolate_pcp_pages(count, pcp, &dst);
-+
 +	local_unlock_irqrestore(&pa_lock.l, flags);
-+
-+	if (count)
-+		free_pcppages_bulk(zone, &dst, false);
  }
  
  /*
-@@ -2954,9 +2995,9 @@ static void drain_local_pages_wq(struct work_struct *work)
+@@ -3033,9 +3041,9 @@ static void drain_local_pages_wq(struct work_struct *work)
  	 * cpu which is allright but we also have to make sure to not move to
  	 * a different one.
  	 */
@@ -22519,60 +18247,19 @@ index 780c8f023b28..f581204e3f35 100644
  }
  
  /*
-@@ -3105,7 +3146,8 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn)
- 	return true;
- }
- 
--static void free_unref_page_commit(struct page *page, unsigned long pfn)
-+static void free_unref_page_commit(struct page *page, unsigned long pfn,
-+				   struct list_head *dst)
- {
- 	struct zone *zone = page_zone(page);
- 	struct per_cpu_pages *pcp;
-@@ -3134,7 +3176,8 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
- 	pcp->count++;
- 	if (pcp->count >= pcp->high) {
- 		unsigned long batch = READ_ONCE(pcp->batch);
--		free_pcppages_bulk(zone, batch, pcp);
-+
-+		isolate_pcp_pages(batch, pcp, dst);
- 	}
- }
- 
-@@ -3145,13 +3188,17 @@ void free_unref_page(struct page *page)
- {
- 	unsigned long flags;
- 	unsigned long pfn = page_to_pfn(page);
-+	struct zone *zone = page_zone(page);
-+	LIST_HEAD(dst);
- 
+@@ -3248,9 +3256,9 @@ void free_unref_page(struct page *page)
  	if (!free_unref_page_prepare(page, pfn))
  		return;
  
 -	local_irq_save(flags);
--	free_unref_page_commit(page, pfn);
--	local_irq_restore(flags);
 +	local_lock_irqsave(&pa_lock.l, flags);
-+	free_unref_page_commit(page, pfn, &dst);
+ 	free_unref_page_commit(page, pfn);
+-	local_irq_restore(flags);
 +	local_unlock_irqrestore(&pa_lock.l, flags);
-+	if (!list_empty(&dst))
-+		free_pcppages_bulk(zone, &dst, false);
  }
  
  /*
-@@ -3162,6 +3209,11 @@ void free_unref_page_list(struct list_head *list)
- 	struct page *page, *next;
- 	unsigned long flags, pfn;
- 	int batch_count = 0;
-+	struct list_head dsts[__MAX_NR_ZONES];
-+	int i;
-+
-+	for (i = 0; i < __MAX_NR_ZONES; i++)
-+		INIT_LIST_HEAD(&dsts[i]);
- 
- 	/* Prepare pages for freeing */
- 	list_for_each_entry_safe(page, next, list, lru) {
-@@ -3171,25 +3223,42 @@ void free_unref_page_list(struct list_head *list)
+@@ -3270,7 +3278,7 @@ void free_unref_page_list(struct list_head *list)
  		set_page_private(page, pfn);
  	}
  
@@ -22580,16 +18267,8 @@ index 780c8f023b28..f581204e3f35 100644
 +	local_lock_irqsave(&pa_lock.l, flags);
  	list_for_each_entry_safe(page, next, list, lru) {
  		unsigned long pfn = page_private(page);
-+		enum zone_type type;
  
- 		set_page_private(page, 0);
- 		trace_mm_page_free_batched(page);
--		free_unref_page_commit(page, pfn);
-+		type = page_zonenum(page);
-+		free_unref_page_commit(page, pfn, &dsts[type]);
- 
- 		/*
- 		 * Guard against excessive IRQ disabled times when we get
+@@ -3283,12 +3291,12 @@ void free_unref_page_list(struct list_head *list)
  		 * a large list of pages to free.
  		 */
  		if (++batch_count == SWAP_CLUSTER_MAX) {
@@ -22602,25 +18281,10 @@ index 780c8f023b28..f581204e3f35 100644
  	}
 -	local_irq_restore(flags);
 +	local_unlock_irqrestore(&pa_lock.l, flags);
-+
-+	for (i = 0; i < __MAX_NR_ZONES; ) {
-+		struct page *page;
-+		struct zone *zone;
-+
-+		if (list_empty(&dsts[i])) {
-+			i++;
-+			continue;
-+		}
-+
-+		page = list_first_entry(&dsts[i], struct page, lru);
-+		zone = page_zone(page);
-+
-+		free_pcppages_bulk(zone, &dsts[i], true);
-+	}
  }
  
  /*
-@@ -3343,7 +3412,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
+@@ -3443,7 +3451,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
  	struct page *page;
  	unsigned long flags;
  
@@ -22629,7 +18293,7 @@ index 780c8f023b28..f581204e3f35 100644
  	pcp = &this_cpu_ptr(zone->pageset)->pcp;
  	list = &pcp->lists[migratetype];
  	page = __rmqueue_pcplist(zone,  migratetype, alloc_flags, pcp, list);
-@@ -3351,7 +3420,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
+@@ -3451,7 +3459,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
  		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
  		zone_statistics(preferred_zone, zone);
  	}
@@ -22638,17 +18302,24 @@ index 780c8f023b28..f581204e3f35 100644
  	return page;
  }
  
-@@ -3385,7 +3454,8 @@ struct page *rmqueue(struct zone *preferred_zone,
+@@ -3485,7 +3493,9 @@ struct page *rmqueue(struct zone *preferred_zone,
  	 * allocate greater than order-1 page units with __GFP_NOFAIL.
  	 */
  	WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
 -	spin_lock_irqsave(&zone->lock, flags);
++
 +	local_lock_irqsave(&pa_lock.l, flags);
 +	spin_lock(&zone->lock);
  
  	do {
  		page = NULL;
-@@ -3411,7 +3481,7 @@ struct page *rmqueue(struct zone *preferred_zone,
+@@ -3506,12 +3516,13 @@ struct page *rmqueue(struct zone *preferred_zone,
+ 	spin_unlock(&zone->lock);
+ 	if (!page)
+ 		goto failed;
++
+ 	__mod_zone_freepage_state(zone, -(1 << order),
+ 				  get_pcppage_migratetype(page));
  
  	__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
  	zone_statistics(preferred_zone, zone);
@@ -22657,7 +18328,7 @@ index 780c8f023b28..f581204e3f35 100644
  
  out:
  	/* Separate test+clear to avoid unnecessary atomics */
-@@ -3424,7 +3494,7 @@ struct page *rmqueue(struct zone *preferred_zone,
+@@ -3524,7 +3535,7 @@ struct page *rmqueue(struct zone *preferred_zone,
  	return page;
  
  failed:
@@ -22666,7 +18337,7 @@ index 780c8f023b28..f581204e3f35 100644
  	return NULL;
  }
  
-@@ -8697,7 +8767,7 @@ void zone_pcp_reset(struct zone *zone)
+@@ -8828,7 +8839,7 @@ void zone_pcp_reset(struct zone *zone)
  	struct per_cpu_pageset *pset;
  
  	/* avoid races with drain_pages()  */
@@ -22675,7 +18346,7 @@ index 780c8f023b28..f581204e3f35 100644
  	if (zone->pageset != &boot_pageset) {
  		for_each_online_cpu(cpu) {
  			pset = per_cpu_ptr(zone->pageset, cpu);
-@@ -8706,7 +8776,7 @@ void zone_pcp_reset(struct zone *zone)
+@@ -8837,7 +8848,7 @@ void zone_pcp_reset(struct zone *zone)
  		free_percpu(zone->pageset);
  		zone->pageset = &boot_pageset;
  	}
@@ -22685,7 +18356,7 @@ index 780c8f023b28..f581204e3f35 100644
  
  #ifdef CONFIG_MEMORY_HOTREMOVE
 diff --git a/mm/shmem.c b/mm/shmem.c
-index 8e2b35ba93ad..e029b943ebed 100644
+index 7c6b6d8f6c39..6a64c3bfecad 100644
 --- a/mm/shmem.c
 +++ b/mm/shmem.c
 @@ -278,10 +278,10 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
@@ -22752,7 +18423,7 @@ index 8e2b35ba93ad..e029b943ebed 100644
  	}
  	return mpol;
  }
-@@ -3582,9 +3583,10 @@ static int shmem_reconfigure(struct fs_context *fc)
+@@ -3587,9 +3588,10 @@ static int shmem_reconfigure(struct fs_context *fc)
  	struct shmem_options *ctx = fc->fs_private;
  	struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
  	unsigned long inodes;
@@ -22764,7 +18435,7 @@ index 8e2b35ba93ad..e029b943ebed 100644
  	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
  	if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
  		if (!sbinfo->max_blocks) {
-@@ -3629,14 +3631,15 @@ static int shmem_reconfigure(struct fs_context *fc)
+@@ -3634,14 +3636,15 @@ static int shmem_reconfigure(struct fs_context *fc)
  	 * Preserve previous mempolicy unless mpol remount option was specified.
  	 */
  	if (ctx->mpol) {
@@ -22783,7 +18454,7 @@ index 8e2b35ba93ad..e029b943ebed 100644
  	return invalfc(fc, "%s", err);
  }
  
-@@ -3753,7 +3756,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
+@@ -3758,7 +3761,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
  	sbinfo->mpol = ctx->mpol;
  	ctx->mpol = NULL;
  
@@ -22793,7 +18464,7 @@ index 8e2b35ba93ad..e029b943ebed 100644
  		goto failed;
  	spin_lock_init(&sbinfo->shrinklist_lock);
 diff --git a/mm/slab.c b/mm/slab.c
-index f658e86ec8ce..3dbddaad8a32 100644
+index d7c8da9319c7..1fa2155b9a80 100644
 --- a/mm/slab.c
 +++ b/mm/slab.c
 @@ -233,7 +233,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
@@ -22928,7 +18599,7 @@ index f658e86ec8ce..3dbddaad8a32 100644
  
  		pr_warn("  node %d: slabs: %ld/%ld, objs: %ld/%ld\n",
  			node, total_slabs - free_slabs, total_slabs,
-@@ -2106,7 +2106,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
+@@ -2107,7 +2107,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
  {
  #ifdef CONFIG_SMP
  	check_irq_off();
@@ -22937,7 +18608,7 @@ index f658e86ec8ce..3dbddaad8a32 100644
  #endif
  }
  
-@@ -2114,7 +2114,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
+@@ -2115,7 +2115,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
  {
  #ifdef CONFIG_SMP
  	check_irq_off();
@@ -22946,7 +18617,7 @@ index f658e86ec8ce..3dbddaad8a32 100644
  #endif
  }
  
-@@ -2154,9 +2154,9 @@ static void do_drain(void *arg)
+@@ -2155,9 +2155,9 @@ static void do_drain(void *arg)
  	check_irq_off();
  	ac = cpu_cache_get(cachep);
  	n = get_node(cachep, node);
@@ -22958,7 +18629,7 @@ index f658e86ec8ce..3dbddaad8a32 100644
  	ac->avail = 0;
  	slabs_destroy(cachep, &list);
  }
-@@ -2174,9 +2174,9 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
+@@ -2175,9 +2175,9 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
  			drain_alien_cache(cachep, n->alien);
  
  	for_each_kmem_cache_node(cachep, node, n) {
@@ -22970,7 +18641,7 @@ index f658e86ec8ce..3dbddaad8a32 100644
  
  		slabs_destroy(cachep, &list);
  	}
-@@ -2198,10 +2198,10 @@ static int drain_freelist(struct kmem_cache *cache,
+@@ -2199,10 +2199,10 @@ static int drain_freelist(struct kmem_cache *cache,
  	nr_freed = 0;
  	while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
  
@@ -22983,7 +18654,7 @@ index f658e86ec8ce..3dbddaad8a32 100644
  			goto out;
  		}
  
-@@ -2214,7 +2214,7 @@ static int drain_freelist(struct kmem_cache *cache,
+@@ -2215,7 +2215,7 @@ static int drain_freelist(struct kmem_cache *cache,
  		 * to the cache.
  		 */
  		n->free_objects -= cache->num;
@@ -22992,7 +18663,7 @@ index f658e86ec8ce..3dbddaad8a32 100644
  		slab_destroy(cache, page);
  		nr_freed++;
  	}
-@@ -2652,7 +2652,7 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
+@@ -2651,7 +2651,7 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
  	INIT_LIST_HEAD(&page->slab_list);
  	n = get_node(cachep, page_to_nid(page));
  
@@ -23001,7 +18672,7 @@ index f658e86ec8ce..3dbddaad8a32 100644
  	n->total_slabs++;
  	if (!page->active) {
  		list_add_tail(&page->slab_list, &n->slabs_free);
-@@ -2662,7 +2662,7 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
+@@ -2661,7 +2661,7 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
  
  	STATS_INC_GROWN(cachep);
  	n->free_objects += cachep->num - page->active;
@@ -23010,7 +18681,7 @@ index f658e86ec8ce..3dbddaad8a32 100644
  
  	fixup_objfreelist_debug(cachep, &list);
  }
-@@ -2828,7 +2828,7 @@ static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
+@@ -2827,7 +2827,7 @@ static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
  {
  	struct page *page;
  
@@ -23019,7 +18690,7 @@ index f658e86ec8ce..3dbddaad8a32 100644
  	page = list_first_entry_or_null(&n->slabs_partial, struct page,
  					slab_list);
  	if (!page) {
-@@ -2855,10 +2855,10 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
+@@ -2854,10 +2854,10 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
  	if (!gfp_pfmemalloc_allowed(flags))
  		return NULL;
  
@@ -23032,7 +18703,7 @@ index f658e86ec8ce..3dbddaad8a32 100644
  		return NULL;
  	}
  
-@@ -2867,7 +2867,7 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
+@@ -2866,7 +2866,7 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
  
  	fixup_slab_list(cachep, n, page, &list);
  
@@ -23041,7 +18712,7 @@ index f658e86ec8ce..3dbddaad8a32 100644
  	fixup_objfreelist_debug(cachep, &list);
  
  	return obj;
-@@ -2926,7 +2926,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
+@@ -2925,7 +2925,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
  	if (!n->free_objects && (!shared || !shared->avail))
  		goto direct_grow;
  
@@ -23050,7 +18721,7 @@ index f658e86ec8ce..3dbddaad8a32 100644
  	shared = READ_ONCE(n->shared);
  
  	/* See if we can refill from the shared array */
-@@ -2950,7 +2950,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
+@@ -2949,7 +2949,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
  must_grow:
  	n->free_objects -= ac->avail;
  alloc_done:
@@ -23059,7 +18730,7 @@ index f658e86ec8ce..3dbddaad8a32 100644
  	fixup_objfreelist_debug(cachep, &list);
  
  direct_grow:
-@@ -3175,7 +3175,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
+@@ -3174,7 +3174,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
  	BUG_ON(!n);
  
  	check_irq_off();
@@ -23068,7 +18739,7 @@ index f658e86ec8ce..3dbddaad8a32 100644
  	page = get_first_slab(n, false);
  	if (!page)
  		goto must_grow;
-@@ -3193,12 +3193,12 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
+@@ -3192,12 +3192,12 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
  
  	fixup_slab_list(cachep, n, page, &list);
  
@@ -23083,7 +18754,7 @@ index f658e86ec8ce..3dbddaad8a32 100644
  	page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
  	if (page) {
  		/* This slab isn't counted yet so don't update free_objects */
-@@ -3376,7 +3376,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
+@@ -3375,7 +3375,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
  
  	check_irq_off();
  	n = get_node(cachep, node);
@@ -23092,7 +18763,7 @@ index f658e86ec8ce..3dbddaad8a32 100644
  	if (n->shared) {
  		struct array_cache *shared_array = n->shared;
  		int max = shared_array->limit - shared_array->avail;
-@@ -3405,7 +3405,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
+@@ -3404,7 +3404,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
  		STATS_SET_FREEABLE(cachep, i);
  	}
  #endif
@@ -23144,10 +18815,10 @@ index f658e86ec8ce..3dbddaad8a32 100644
  	num_objs = total_slabs * cachep->num;
  	active_slabs = total_slabs - free_slabs;
 diff --git a/mm/slab.h b/mm/slab.h
-index 6cc323f1313a..089bcef627e6 100644
+index 1a756a359fa8..7caf7dcc5fba 100644
 --- a/mm/slab.h
 +++ b/mm/slab.h
-@@ -530,7 +530,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
+@@ -523,7 +523,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
   * The slab lists for all objects.
   */
  struct kmem_cache_node {
@@ -23157,10 +18828,10 @@ index 6cc323f1313a..089bcef627e6 100644
  #ifdef CONFIG_SLAB
  	struct list_head slabs_partial;	/* partial list first, better asm code */
 diff --git a/mm/slub.c b/mm/slub.c
-index 6d3574013b2f..795b9a3488df 100644
+index b22a4b101c84..0e317cbb8c25 100644
 --- a/mm/slub.c
 +++ b/mm/slub.c
-@@ -434,7 +434,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
+@@ -436,7 +436,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
  
  #ifdef CONFIG_SLUB_DEBUG
  static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
@@ -23169,7 +18840,7 @@ index 6d3574013b2f..795b9a3488df 100644
  
  /*
   * Determine a map of object in use on a page.
-@@ -450,7 +450,7 @@ static unsigned long *get_map(struct kmem_cache *s, struct page *page)
+@@ -452,7 +452,7 @@ static unsigned long *get_map(struct kmem_cache *s, struct page *page)
  
  	VM_BUG_ON(!irqs_disabled());
  
@@ -23178,7 +18849,7 @@ index 6d3574013b2f..795b9a3488df 100644
  
  	bitmap_zero(object_map, page->objects);
  
-@@ -463,7 +463,7 @@ static unsigned long *get_map(struct kmem_cache *s, struct page *page)
+@@ -465,7 +465,7 @@ static unsigned long *get_map(struct kmem_cache *s, struct page *page)
  static void put_map(unsigned long *map) __releases(&object_map_lock)
  {
  	VM_BUG_ON(map != object_map);
@@ -23187,7 +18858,7 @@ index 6d3574013b2f..795b9a3488df 100644
  }
  
  static inline unsigned int size_from_object(struct kmem_cache *s)
-@@ -1213,7 +1213,7 @@ static noinline int free_debug_processing(
+@@ -1216,7 +1216,7 @@ static noinline int free_debug_processing(
  	unsigned long flags;
  	int ret = 0;
  
@@ -23196,7 +18867,7 @@ index 6d3574013b2f..795b9a3488df 100644
  	slab_lock(page);
  
  	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
-@@ -1248,7 +1248,7 @@ static noinline int free_debug_processing(
+@@ -1251,7 +1251,7 @@ static noinline int free_debug_processing(
  			 bulk_cnt, cnt);
  
  	slab_unlock(page);
@@ -23205,20 +18876,7 @@ index 6d3574013b2f..795b9a3488df 100644
  	if (!ret)
  		slab_fix(s, "Object at 0x%p not freed", object);
  	return ret;
-@@ -1496,6 +1496,12 @@ static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
- }
- #endif /* CONFIG_SLUB_DEBUG */
- 
-+struct slub_free_list {
-+	raw_spinlock_t		lock;
-+	struct list_head	list;
-+};
-+static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
-+
- /*
-  * Hooks for other subsystems that check memory allocations. In a typical
-  * production configuration these hooks all should produce no code at all.
-@@ -1739,10 +1745,18 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
+@@ -1739,10 +1739,18 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
  	void *start, *p, *next;
  	int idx;
  	bool shuffle;
@@ -23237,7 +18895,7 @@ index 6d3574013b2f..795b9a3488df 100644
  		local_irq_enable();
  
  	flags |= s->allocflags;
-@@ -1801,7 +1815,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
+@@ -1803,7 +1811,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
  	page->frozen = 1;
  
  out:
@@ -23246,37 +18904,57 @@ index 6d3574013b2f..795b9a3488df 100644
  		local_irq_disable();
  	if (!page)
  		return NULL;
-@@ -1844,6 +1858,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
- 	__free_pages(page, order);
+@@ -1861,12 +1869,29 @@ static void free_slab(struct kmem_cache *s, struct page *page)
+ 		__free_slab(s, page);
+ }
+ 
++static void discard_slab_delayed(struct kmem_cache *s, struct page *page,
++				 struct list_head *delayed_free)
++{
++	dec_slabs_node(s, page_to_nid(page), page->objects);
++	list_add(&page->lru, delayed_free);
++}
++
+ static void discard_slab(struct kmem_cache *s, struct page *page)
+ {
+ 	dec_slabs_node(s, page_to_nid(page), page->objects);
+ 	free_slab(s, page);
  }
  
-+static void free_delayed(struct list_head *h)
++static void discard_delayed(struct list_head *l)
 +{
-+	while (!list_empty(h)) {
-+		struct page *page = list_first_entry(h, struct page, lru);
++	while (!list_empty(l)) {
++		struct page *page = list_first_entry(l, struct page, lru);
 +
 +		list_del(&page->lru);
 +		__free_slab(page->slab_cache, page);
 +	}
 +}
 +
- static void rcu_free_slab(struct rcu_head *h)
- {
- 	struct page *page = container_of(h, struct page, rcu_head);
-@@ -1855,6 +1879,12 @@ static void free_slab(struct kmem_cache *s, struct page *page)
- {
- 	if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
- 		call_rcu(&page->rcu_head, rcu_free_slab);
-+	} else if (irqs_disabled()) {
-+		struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
-+
-+		raw_spin_lock(&f->lock);
-+		list_add(&page->lru, &f->list);
-+		raw_spin_unlock(&f->lock);
- 	} else
- 		__free_slab(s, page);
+ /*
+  * Management of partially allocated slabs.
+  */
+@@ -1940,15 +1965,16 @@ static inline void *acquire_slab(struct kmem_cache *s,
+ 	WARN_ON(!freelist);
+ 	return freelist;
  }
-@@ -1962,7 +1992,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
+-
+-static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
++static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain,
++			    struct list_head *delayed_free);
+ static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
+ 
+ /*
+  * Try to allocate a partial slab from a specific node.
+  */
+ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
+-				struct kmem_cache_cpu *c, gfp_t flags)
++			      struct kmem_cache_cpu *c, gfp_t flags,
++			      struct list_head *delayed_free)
+ {
+ 	struct page *page, *page2;
+ 	void *object = NULL;
+@@ -1964,7 +1990,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
  	if (!n || !n->nr_partial)
  		return NULL;
  
@@ -23285,7 +18963,16 @@ index 6d3574013b2f..795b9a3488df 100644
  	list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
  		void *t;
  
-@@ -1987,7 +2017,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
+@@ -1981,7 +2007,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
+ 			stat(s, ALLOC_FROM_PARTIAL);
+ 			object = t;
+ 		} else {
+-			put_cpu_partial(s, page, 0);
++			put_cpu_partial(s, page, 0, delayed_free);
+ 			stat(s, CPU_PARTIAL_NODE);
+ 		}
+ 		if (!kmem_cache_has_cpu_partial(s)
+@@ -1989,7 +2015,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
  			break;
  
  	}
@@ -23294,7 +18981,61 @@ index 6d3574013b2f..795b9a3488df 100644
  	return object;
  }
  
-@@ -2241,7 +2271,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
+@@ -1997,7 +2023,8 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
+  * Get a page from somewhere. Search in increasing NUMA distances.
+  */
+ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
+-		struct kmem_cache_cpu *c)
++			     struct kmem_cache_cpu *c,
++			     struct list_head *delayed_free)
+ {
+ #ifdef CONFIG_NUMA
+ 	struct zonelist *zonelist;
+@@ -2039,7 +2066,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
+ 
+ 			if (n && cpuset_zone_allowed(zone, flags) &&
+ 					n->nr_partial > s->min_partial) {
+-				object = get_partial_node(s, n, c, flags);
++				object = get_partial_node(s, n, c, flags, delayed_free);
+ 				if (object) {
+ 					/*
+ 					 * Don't check read_mems_allowed_retry()
+@@ -2061,7 +2088,8 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
+  * Get a partial page, lock it and return it.
+  */
+ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
+-		struct kmem_cache_cpu *c)
++			 struct kmem_cache_cpu *c,
++			 struct list_head *delayed_free)
+ {
+ 	void *object;
+ 	int searchnode = node;
+@@ -2069,11 +2097,12 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
+ 	if (node == NUMA_NO_NODE)
+ 		searchnode = numa_mem_id();
+ 
+-	object = get_partial_node(s, get_node(s, searchnode), c, flags);
++	object = get_partial_node(s, get_node(s, searchnode), c, flags,
++				  delayed_free);
+ 	if (object || node != NUMA_NO_NODE)
+ 		return object;
+ 
+-	return get_any_partial(s, flags, c);
++	return get_any_partial(s, flags, c, delayed_free);
+ }
+ 
+ #ifdef CONFIG_PREEMPTION
+@@ -2149,7 +2178,8 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
+  * Remove the cpu slab
+  */
+ static void deactivate_slab(struct kmem_cache *s, struct page *page,
+-				void *freelist, struct kmem_cache_cpu *c)
++			    void *freelist, struct kmem_cache_cpu *c,
++			    struct list_head *delayed_free)
+ {
+ 	enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
+ 	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+@@ -2243,7 +2273,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
  			 * that acquire_slab() will see a slab page that
  			 * is frozen
  			 */
@@ -23303,7 +19044,7 @@ index 6d3574013b2f..795b9a3488df 100644
  		}
  	} else {
  		m = M_FULL;
-@@ -2252,7 +2282,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
+@@ -2254,7 +2284,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
  			 * slabs from diagnostic functions will not see
  			 * any frozen slabs.
  			 */
@@ -23312,7 +19053,7 @@ index 6d3574013b2f..795b9a3488df 100644
  		}
  	}
  
-@@ -2276,7 +2306,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
+@@ -2278,7 +2308,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
  		goto redo;
  
  	if (lock)
@@ -23321,7 +19062,27 @@ index 6d3574013b2f..795b9a3488df 100644
  
  	if (m == M_PARTIAL)
  		stat(s, tail);
-@@ -2315,10 +2345,10 @@ static void unfreeze_partials(struct kmem_cache *s,
+@@ -2286,7 +2316,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
+ 		stat(s, DEACTIVATE_FULL);
+ 	else if (m == M_FREE) {
+ 		stat(s, DEACTIVATE_EMPTY);
+-		discard_slab(s, page);
++		discard_slab_delayed(s, page, delayed_free);
+ 		stat(s, FREE_SLAB);
+ 	}
+ 
+@@ -2301,8 +2331,8 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
+  * for the cpu using c (or some other guarantee must be there
+  * to guarantee no concurrent accesses).
+  */
+-static void unfreeze_partials(struct kmem_cache *s,
+-		struct kmem_cache_cpu *c)
++static void unfreeze_partials(struct kmem_cache *s, struct kmem_cache_cpu *c,
++			      struct list_head *delayed_free)
+ {
+ #ifdef CONFIG_SLUB_CPU_PARTIAL
+ 	struct kmem_cache_node *n = NULL, *n2 = NULL;
+@@ -2317,10 +2347,10 @@ static void unfreeze_partials(struct kmem_cache *s,
  		n2 = get_node(s, page_to_nid(page));
  		if (n != n2) {
  			if (n)
@@ -23334,7 +19095,7 @@ index 6d3574013b2f..795b9a3488df 100644
  		}
  
  		do {
-@@ -2347,7 +2377,7 @@ static void unfreeze_partials(struct kmem_cache *s,
+@@ -2349,14 +2379,14 @@ static void unfreeze_partials(struct kmem_cache *s,
  	}
  
  	if (n)
@@ -23343,49 +19104,165 @@ index 6d3574013b2f..795b9a3488df 100644
  
  	while (discard_page) {
  		page = discard_page;
-@@ -2384,14 +2414,21 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
- 			pobjects = oldpage->pobjects;
- 			pages = oldpage->pages;
- 			if (drain && pobjects > slub_cpu_partial(s)) {
-+				struct slub_free_list *f;
- 				unsigned long flags;
-+				LIST_HEAD(tofree);
- 				/*
- 				 * partial array is full. Move the existing
+ 		discard_page = discard_page->next;
+ 
+ 		stat(s, DEACTIVATE_EMPTY);
+-		discard_slab(s, page);
++		discard_slab_delayed(s, page, delayed_free);
+ 		stat(s, FREE_SLAB);
+ 	}
+ #endif	/* CONFIG_SLUB_CPU_PARTIAL */
+@@ -2369,7 +2399,8 @@ static void unfreeze_partials(struct kmem_cache *s,
+  * If we did not find a slot then simply move all the partials to the
+  * per node partial list.
+  */
+-static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
++static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain,
++			    struct list_head *delayed_free)
+ {
+ #ifdef CONFIG_SLUB_CPU_PARTIAL
+ 	struct page *oldpage;
+@@ -2392,7 +2423,8 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
  				 * set to the per node partial list.
  				 */
  				local_irq_save(flags);
- 				unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
-+				f = this_cpu_ptr(&slub_free_list);
-+				raw_spin_lock(&f->lock);
-+				list_splice_init(&f->list, &tofree);
-+				raw_spin_unlock(&f->lock);
+-				unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
++				unfreeze_partials(s, this_cpu_ptr(s->cpu_slab),
++						  delayed_free);
  				local_irq_restore(flags);
-+				free_delayed(&tofree);
  				oldpage = NULL;
  				pobjects = 0;
- 				pages = 0;
-@@ -2459,7 +2496,19 @@ static bool has_cpu_slab(int cpu, void *info)
+@@ -2414,17 +2446,18 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
+ 		unsigned long flags;
  
- static void flush_all(struct kmem_cache *s)
+ 		local_irq_save(flags);
+-		unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
++		unfreeze_partials(s, this_cpu_ptr(s->cpu_slab), delayed_free);
+ 		local_irq_restore(flags);
+ 	}
+ 	preempt_enable();
+ #endif	/* CONFIG_SLUB_CPU_PARTIAL */
+ }
+ 
+-static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
++static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c,
++			      struct list_head *delayed_free)
  {
-+	LIST_HEAD(tofree);
-+	int cpu;
+ 	stat(s, CPUSLAB_FLUSH);
+-	deactivate_slab(s, c->page, c->freelist, c);
++	deactivate_slab(s, c->page, c->freelist, c, delayed_free);
+ 
+ 	c->tid = next_tid(c->tid);
+ }
+@@ -2434,34 +2467,81 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
+  *
+  * Called from IPI handler with interrupts disabled.
+  */
+-static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
++static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu,
++				    struct list_head *delayed_free)
+ {
+ 	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+ 
+ 	if (c->page)
+-		flush_slab(s, c);
++		flush_slab(s, c, delayed_free);
+ 
+-	unfreeze_partials(s, c);
++	unfreeze_partials(s, c, delayed_free);
+ }
+ 
+-static void flush_cpu_slab(void *d)
++struct slub_flush_work {
++	struct work_struct work;
++	struct kmem_cache *s;
++	bool skip;
++};
++
++static void flush_cpu_slab(struct work_struct *w)
+ {
+-	struct kmem_cache *s = d;
++	struct slub_flush_work *sfw;
++	LIST_HEAD(delayed_free);
+ 
+-	__flush_cpu_slab(s, smp_processor_id());
++	sfw = container_of(w, struct slub_flush_work, work);
++
++	local_irq_disable();
++	__flush_cpu_slab(sfw->s, smp_processor_id(), &delayed_free);
++	local_irq_enable();
++
++	discard_delayed(&delayed_free);
+ }
+ 
+-static bool has_cpu_slab(int cpu, void *info)
++static bool has_cpu_slab(int cpu, struct kmem_cache *s)
+ {
+-	struct kmem_cache *s = info;
+ 	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+ 
+ 	return c->page || slub_percpu_partial(c);
+ }
+ 
++static DEFINE_MUTEX(flush_lock);
++static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
++
++static void flush_all_locked(struct kmem_cache *s)
++{
++	struct slub_flush_work *sfw;
++	unsigned int cpu;
++
++	mutex_lock(&flush_lock);
 +
- 	on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
 +	for_each_online_cpu(cpu) {
-+		struct slub_free_list *f;
++		sfw = &per_cpu(slub_flush, cpu);
++		if (!has_cpu_slab(cpu, s)) {
++			sfw->skip = true;
++			continue;
++		}
++		INIT_WORK(&sfw->work, flush_cpu_slab);
++		sfw->skip = false;
++		sfw->s = s;
++		schedule_work_on(cpu, &sfw->work);
++	}
 +
-+		f = &per_cpu(slub_free_list, cpu);
-+		raw_spin_lock_irq(&f->lock);
-+		list_splice_init(&f->list, &tofree);
-+		raw_spin_unlock_irq(&f->lock);
-+		free_delayed(&tofree);
++	for_each_online_cpu(cpu) {
++		sfw = &per_cpu(slub_flush, cpu);
++		if (sfw->skip)
++			continue;
++		flush_work(&sfw->work);
 +	}
++
++	mutex_unlock(&flush_lock);
++}
++
+ static void flush_all(struct kmem_cache *s)
+ {
+-	on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
++	cpus_read_lock();
++	flush_all_locked(s);
++	cpus_read_unlock();
  }
  
  /*
-@@ -2514,10 +2563,10 @@ static unsigned long count_partial(struct kmem_cache_node *n,
+@@ -2472,13 +2552,15 @@ static int slub_cpu_dead(unsigned int cpu)
+ {
+ 	struct kmem_cache *s;
+ 	unsigned long flags;
++	LIST_HEAD(delayed_free);
+ 
+ 	mutex_lock(&slab_mutex);
+ 	list_for_each_entry(s, &slab_caches, list) {
+ 		local_irq_save(flags);
+-		__flush_cpu_slab(s, cpu);
++		__flush_cpu_slab(s, cpu, &delayed_free);
+ 		local_irq_restore(flags);
+ 	}
++	discard_delayed(&delayed_free);
+ 	mutex_unlock(&slab_mutex);
+ 	return 0;
+ }
+@@ -2516,10 +2598,10 @@ static unsigned long count_partial(struct kmem_cache_node *n,
  	unsigned long x = 0;
  	struct page *page;
  
@@ -23398,70 +19275,100 @@ index 6d3574013b2f..795b9a3488df 100644
  	return x;
  }
  #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
-@@ -2656,8 +2705,10 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
+@@ -2562,7 +2644,8 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
+ }
+ 
+ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
+-			int node, struct kmem_cache_cpu **pc)
++				     int node, struct kmem_cache_cpu **pc,
++				     struct list_head *delayed_free)
+ {
+ 	void *freelist;
+ 	struct kmem_cache_cpu *c = *pc;
+@@ -2570,7 +2653,7 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
+ 
+ 	WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
+ 
+-	freelist = get_partial(s, flags, node, c);
++	freelist = get_partial(s, flags, node, c, delayed_free);
+ 
+ 	if (freelist)
+ 		return freelist;
+@@ -2579,7 +2662,7 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
+ 	if (page) {
+ 		c = raw_cpu_ptr(s->cpu_slab);
+ 		if (c->page)
+-			flush_slab(s, c);
++			flush_slab(s, c, delayed_free);
+ 
+ 		/*
+ 		 * No other reference to the page yet so we can
+@@ -2658,7 +2741,8 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
   * already disabled (which is the case for bulk allocation).
   */
  static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 -			  unsigned long addr, struct kmem_cache_cpu *c)
 +			  unsigned long addr, struct kmem_cache_cpu *c,
-+			  struct list_head *to_free)
++			  struct list_head *delayed_free)
  {
-+	struct slub_free_list *f;
  	void *freelist;
  	struct page *page;
+@@ -2688,7 +2772,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ 			goto redo;
+ 		} else {
+ 			stat(s, ALLOC_NODE_MISMATCH);
+-			deactivate_slab(s, page, c->freelist, c);
++			deactivate_slab(s, page, c->freelist, c, delayed_free);
+ 			goto new_slab;
+ 		}
+ 	}
+@@ -2699,7 +2783,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ 	 * information when the page leaves the per-cpu allocator
+ 	 */
+ 	if (unlikely(!pfmemalloc_match(page, gfpflags))) {
+-		deactivate_slab(s, page, c->freelist, c);
++		deactivate_slab(s, page, c->freelist, c, delayed_free);
+ 		goto new_slab;
+ 	}
  
-@@ -2723,6 +2774,13 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
- 	VM_BUG_ON(!c->page->frozen);
- 	c->freelist = get_freepointer(s, freelist);
- 	c->tid = next_tid(c->tid);
-+
-+out:
-+	f = this_cpu_ptr(&slub_free_list);
-+	raw_spin_lock(&f->lock);
-+	list_splice_init(&f->list, to_free);
-+	raw_spin_unlock(&f->lock);
-+
- 	return freelist;
+@@ -2738,7 +2822,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ 		goto redo;
+ 	}
  
- new_slab:
-@@ -2738,7 +2796,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+-	freelist = new_slab_objects(s, gfpflags, node, &c);
++	freelist = new_slab_objects(s, gfpflags, node, &c, delayed_free);
  
  	if (unlikely(!freelist)) {
  		slab_out_of_memory(s, gfpflags, node);
--		return NULL;
-+		goto out;
- 	}
- 
- 	page = c->page;
-@@ -2751,7 +2809,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+@@ -2754,7 +2838,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ 			!alloc_debug_processing(s, page, freelist, addr))
  		goto new_slab;	/* Slab failed checks. Next slab needed */
  
- 	deactivate_slab(s, page, get_freepointer(s, freelist), c);
--	return freelist;
-+	goto out;
+-	deactivate_slab(s, page, get_freepointer(s, freelist), c);
++	deactivate_slab(s, page, get_freepointer(s, freelist), c, delayed_free);
+ 	return freelist;
  }
  
- /*
-@@ -2763,6 +2821,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+@@ -2767,6 +2851,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  {
  	void *p;
  	unsigned long flags;
-+	LIST_HEAD(tofree);
++	LIST_HEAD(delayed_free);
  
  	local_irq_save(flags);
  #ifdef CONFIG_PREEMPTION
-@@ -2774,8 +2833,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+@@ -2778,8 +2863,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
  	c = this_cpu_ptr(s->cpu_slab);
  #endif
  
 -	p = ___slab_alloc(s, gfpflags, node, addr, c);
-+	p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
++	p = ___slab_alloc(s, gfpflags, node, addr, c, &delayed_free);
  	local_irq_restore(flags);
-+	free_delayed(&tofree);
++	discard_delayed(&delayed_free);
  	return p;
  }
  
-@@ -2809,6 +2869,10 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
+@@ -2814,6 +2900,10 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
  	unsigned long tid;
  	struct obj_cgroup *objcg = NULL;
  
@@ -23472,7 +19379,7 @@ index 6d3574013b2f..795b9a3488df 100644
  	s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
  	if (!s)
  		return NULL;
-@@ -2975,7 +3039,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
+@@ -2979,7 +3069,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
  
  	do {
  		if (unlikely(n)) {
@@ -23481,7 +19388,7 @@ index 6d3574013b2f..795b9a3488df 100644
  			n = NULL;
  		}
  		prior = page->freelist;
-@@ -3007,7 +3071,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
+@@ -3011,7 +3101,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
  				 * Otherwise the list_lock will synchronize with
  				 * other processors updating the list of slabs.
  				 */
@@ -23490,7 +19397,22 @@ index 6d3574013b2f..795b9a3488df 100644
  
  			}
  		}
-@@ -3048,7 +3112,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
+@@ -3030,11 +3120,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
+ 			 */
+ 			stat(s, FREE_FROZEN);
+ 		} else if (new.frozen) {
++			LIST_HEAD(delayed_free);
+ 			/*
+ 			 * If we just froze the page then put it onto the
+ 			 * per cpu partial list.
+ 			 */
+-			put_cpu_partial(s, page, 1);
++			put_cpu_partial(s, page, 1, &delayed_free);
++			discard_delayed(&delayed_free);
+ 			stat(s, CPU_PARTIAL_FREE);
+ 		}
+ 
+@@ -3053,7 +3145,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
  		add_partial(n, page, DEACTIVATE_TO_TAIL);
  		stat(s, FREE_ADD_PARTIAL);
  	}
@@ -23499,7 +19421,7 @@ index 6d3574013b2f..795b9a3488df 100644
  	return;
  
  slab_empty:
-@@ -3063,7 +3127,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
+@@ -3068,7 +3160,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
  		remove_full(s, n, page);
  	}
  
@@ -23508,47 +19430,45 @@ index 6d3574013b2f..795b9a3488df 100644
  	stat(s, FREE_SLAB);
  	discard_slab(s, page);
  }
-@@ -3270,9 +3334,14 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
- 			  void **p)
- {
+@@ -3278,6 +3370,11 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
  	struct kmem_cache_cpu *c;
-+	LIST_HEAD(to_free);
  	int i;
  	struct obj_cgroup *objcg = NULL;
- 
++	LIST_HEAD(delayed_free);
++
 +	if (IS_ENABLED(CONFIG_PREEMPT_RT) && IS_ENABLED(CONFIG_DEBUG_ATOMIC_SLEEP))
 +		WARN_ON_ONCE(!preemptible() &&
 +			     (system_state > SYSTEM_BOOTING && system_state < SYSTEM_SUSPEND));
-+
+ 
  	/* memcg and kmem_cache debug support */
  	s = slab_pre_alloc_hook(s, &objcg, size, flags);
- 	if (unlikely(!s))
-@@ -3303,7 +3372,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+@@ -3309,7 +3406,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
  			 * of re-populating per CPU c->freelist
  			 */
  			p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
 -					    _RET_IP_, c);
-+					    _RET_IP_, c, &to_free);
++					    _RET_IP_, c, &delayed_free);
  			if (unlikely(!p[i]))
  				goto error;
  
-@@ -3318,6 +3387,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
- 	}
+@@ -3325,6 +3422,8 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
  	c->tid = next_tid(c->tid);
  	local_irq_enable();
-+	free_delayed(&to_free);
  
++	discard_delayed(&delayed_free);
++
  	/* Clear memory outside IRQ disabled fastpath loop */
  	if (unlikely(slab_want_init_on_alloc(flags, s))) {
-@@ -3332,6 +3402,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+ 		int j;
+@@ -3338,6 +3437,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
  	return i;
  error:
  	local_irq_enable();
-+	free_delayed(&to_free);
++	discard_delayed(&delayed_free);
  	slab_post_alloc_hook(s, objcg, flags, i, p);
  	__kmem_cache_free_bulk(s, i, p);
  	return 0;
-@@ -3467,7 +3538,7 @@ static void
+@@ -3487,7 +3587,7 @@ static void
  init_kmem_cache_node(struct kmem_cache_node *n)
  {
  	n->nr_partial = 0;
@@ -23557,7 +19477,7 @@ index 6d3574013b2f..795b9a3488df 100644
  	INIT_LIST_HEAD(&n->partial);
  #ifdef CONFIG_SLUB_DEBUG
  	atomic_long_set(&n->nr_slabs, 0);
-@@ -3868,7 +3939,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
+@@ -3888,7 +3988,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
  	struct page *page, *h;
  
  	BUG_ON(irqs_disabled());
@@ -23566,7 +19486,7 @@ index 6d3574013b2f..795b9a3488df 100644
  	list_for_each_entry_safe(page, h, &n->partial, slab_list) {
  		if (!page->inuse) {
  			remove_partial(n, page);
-@@ -3878,7 +3949,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
+@@ -3898,7 +3998,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
  			  "Objects remaining in %s on __kmem_cache_shutdown()");
  		}
  	}
@@ -23575,7 +19495,23 @@ index 6d3574013b2f..795b9a3488df 100644
  
  	list_for_each_entry_safe(page, h, &discard, slab_list)
  		discard_slab(s, page);
-@@ -4149,7 +4220,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
+@@ -3923,7 +4023,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
+ 	int node;
+ 	struct kmem_cache_node *n;
+ 
+-	flush_all(s);
++	flush_all_locked(s);
+ 	/* Attempt to free all objects */
+ 	for_each_kmem_cache_node(s, node, n) {
+ 		free_partial(s, n);
+@@ -4163,13 +4263,13 @@ int __kmem_cache_shrink(struct kmem_cache *s)
+ 	unsigned long flags;
+ 	int ret = 0;
+ 
+-	flush_all(s);
++	flush_all_locked(s);
+ 	for_each_kmem_cache_node(s, node, n) {
+ 		INIT_LIST_HEAD(&discard);
  		for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
  			INIT_LIST_HEAD(promote + i);
  
@@ -23584,7 +19520,7 @@ index 6d3574013b2f..795b9a3488df 100644
  
  		/*
  		 * Build lists of slabs to discard or promote.
-@@ -4180,7 +4251,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
+@@ -4200,7 +4300,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
  		for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
  			list_splice(promote + i, &n->partial);
  
@@ -23593,20 +19529,25 @@ index 6d3574013b2f..795b9a3488df 100644
  
  		/* Release empty slabs */
  		list_for_each_entry_safe(page, t, &discard, slab_list)
-@@ -4355,6 +4426,12 @@ void __init kmem_cache_init(void)
- {
- 	static __initdata struct kmem_cache boot_kmem_cache,
- 		boot_kmem_cache_node;
-+	int cpu;
-+
-+	for_each_possible_cpu(cpu) {
-+		raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
-+		INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
-+	}
+@@ -4347,6 +4447,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
+ 	int node;
+ 	struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+ 	struct kmem_cache_node *n;
++	LIST_HEAD(delayed_free);
  
- 	if (debug_guardpage_minorder())
- 		slub_max_order = 0;
-@@ -4542,7 +4619,7 @@ static int validate_slab_node(struct kmem_cache *s,
+ 	memcpy(s, static_cache, kmem_cache->object_size);
+ 
+@@ -4355,7 +4456,8 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
+ 	 * up.  Even if it weren't true, IRQs are not up so we couldn't fire
+ 	 * IPIs around.
+ 	 */
+-	__flush_cpu_slab(s, smp_processor_id());
++	__flush_cpu_slab(s, smp_processor_id(), &delayed_free);
++	discard_delayed(&delayed_free);
+ 	for_each_kmem_cache_node(s, node, n) {
+ 		struct page *p;
+ 
+@@ -4562,7 +4664,7 @@ static int validate_slab_node(struct kmem_cache *s,
  	struct page *page;
  	unsigned long flags;
  
@@ -23615,7 +19556,7 @@ index 6d3574013b2f..795b9a3488df 100644
  
  	list_for_each_entry(page, &n->partial, slab_list) {
  		validate_slab(s, page);
-@@ -4564,7 +4641,7 @@ static int validate_slab_node(struct kmem_cache *s,
+@@ -4584,7 +4686,7 @@ static int validate_slab_node(struct kmem_cache *s,
  		       s->name, count, atomic_long_read(&n->nr_slabs));
  
  out:
@@ -23624,7 +19565,17 @@ index 6d3574013b2f..795b9a3488df 100644
  	return count;
  }
  
-@@ -4743,12 +4820,12 @@ static int list_locations(struct kmem_cache *s, char *buf,
+@@ -4635,6 +4737,9 @@ static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
+ 	struct location *l;
+ 	int order;
+ 
++	if (IS_ENABLED(CONFIG_PREEMPT_RT) && flags == GFP_ATOMIC)
++		return 0;
++
+ 	order = get_order(sizeof(struct location) * max);
+ 
+ 	l = (void *)__get_free_pages(flags, order);
+@@ -4763,12 +4868,12 @@ static int list_locations(struct kmem_cache *s, char *buf,
  		if (!atomic_long_read(&n->nr_slabs))
  			continue;
  
@@ -23639,118 +19590,11 @@ index 6d3574013b2f..795b9a3488df 100644
  	}
  
  	for (i = 0; i < t.count; i++) {
-diff --git a/mm/swap.c b/mm/swap.c
-index e7bdf094f76a..65ef7e3525bf 100644
---- a/mm/swap.c
-+++ b/mm/swap.c
-@@ -763,10 +763,20 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
-  */
- void lru_add_drain_all(void)
- {
--	static seqcount_t seqcount = SEQCNT_ZERO(seqcount);
--	static DEFINE_MUTEX(lock);
-+	/*
-+	 * lru_drain_gen - Global pages generation number
-+	 *
-+	 * (A) Definition: global lru_drain_gen = x implies that all generations
-+	 *     0 < n <= x are already *scheduled* for draining.
-+	 *
-+	 * This is an optimization for the highly-contended use case where a
-+	 * user space workload keeps constantly generating a flow of pages for
-+	 * each CPU.
-+	 */
-+	static unsigned int lru_drain_gen;
- 	static struct cpumask has_work;
--	int cpu, seq;
-+	static DEFINE_MUTEX(lock);
-+	unsigned cpu, this_gen;
- 
- 	/*
- 	 * Make sure nobody triggers this path before mm_percpu_wq is fully
-@@ -775,21 +785,54 @@ void lru_add_drain_all(void)
- 	if (WARN_ON(!mm_percpu_wq))
- 		return;
- 
--	seq = raw_read_seqcount_latch(&seqcount);
-+	/*
-+	 * Guarantee pagevec counter stores visible by this CPU are visible to
-+	 * other CPUs before loading the current drain generation.
-+	 */
-+	smp_mb();
-+
-+	/*
-+	 * (B) Locally cache global LRU draining generation number
-+	 *
-+	 * The read barrier ensures that the counter is loaded before the mutex
-+	 * is taken. It pairs with smp_mb() inside the mutex critical section
-+	 * at (D).
-+	 */
-+	this_gen = smp_load_acquire(&lru_drain_gen);
- 
- 	mutex_lock(&lock);
- 
- 	/*
--	 * Piggyback on drain started and finished while we waited for lock:
--	 * all pages pended at the time of our enter were drained from vectors.
-+	 * (C) Exit the draining operation if a newer generation, from another
-+	 * lru_add_drain_all(), was already scheduled for draining. Check (A).
- 	 */
--	if (__read_seqcount_retry(&seqcount, seq))
-+	if (unlikely(this_gen != lru_drain_gen))
- 		goto done;
- 
--	raw_write_seqcount_latch(&seqcount);
-+	/*
-+	 * (D) Increment global generation number
-+	 *
-+	 * Pairs with smp_load_acquire() at (B), outside of the critical
-+	 * section. Use a full memory barrier to guarantee that the new global
-+	 * drain generation number is stored before loading pagevec counters.
-+	 *
-+	 * This pairing must be done here, before the for_each_online_cpu loop
-+	 * below which drains the page vectors.
-+	 *
-+	 * Let x, y, and z represent some system CPU numbers, where x < y < z.
-+	 * Assume CPU #z is is in the middle of the for_each_online_cpu loop
-+	 * below and has already reached CPU #y's per-cpu data. CPU #x comes
-+	 * along, adds some pages to its per-cpu vectors, then calls
-+	 * lru_add_drain_all().
-+	 *
-+	 * If the paired barrier is done at any later step, e.g. after the
-+	 * loop, CPU #x will just exit at (C) and miss flushing out all of its
-+	 * added pages.
-+	 */
-+	WRITE_ONCE(lru_drain_gen, lru_drain_gen + 1);
-+	smp_mb();
- 
- 	cpumask_clear(&has_work);
--
- 	for_each_online_cpu(cpu) {
- 		struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
- 
-@@ -801,7 +844,7 @@ void lru_add_drain_all(void)
- 		    need_activate_page_drain(cpu)) {
- 			INIT_WORK(work, lru_add_drain_per_cpu);
- 			queue_work_on(cpu, mm_percpu_wq, work);
--			cpumask_set_cpu(cpu, &has_work);
-+			__cpumask_set_cpu(cpu, &has_work);
- 		}
- 	}
- 
-@@ -816,7 +859,7 @@ void lru_add_drain_all(void)
- {
- 	lru_add_drain();
- }
--#endif
-+#endif /* CONFIG_SMP */
- 
- /**
-  * release_pages - batched put_page()
 diff --git a/mm/vmalloc.c b/mm/vmalloc.c
-index be4724b916b3..994acb959d00 100644
+index e6f352bf0498..f5d554e0b083 100644
 --- a/mm/vmalloc.c
 +++ b/mm/vmalloc.c
-@@ -1544,7 +1544,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
+@@ -1558,7 +1558,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
  	struct vmap_block *vb;
  	struct vmap_area *va;
  	unsigned long vb_idx;
@@ -23759,7 +19603,7 @@ index be4724b916b3..994acb959d00 100644
  	void *vaddr;
  
  	node = numa_node_id();
-@@ -1581,11 +1581,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
+@@ -1595,11 +1595,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
  		return ERR_PTR(err);
  	}
  
@@ -23774,7 +19618,7 @@ index be4724b916b3..994acb959d00 100644
  
  	return vaddr;
  }
-@@ -1650,6 +1651,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
+@@ -1664,6 +1665,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  	struct vmap_block *vb;
  	void *vaddr = NULL;
  	unsigned int order;
@@ -23782,7 +19626,7 @@ index be4724b916b3..994acb959d00 100644
  
  	BUG_ON(offset_in_page(size));
  	BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
-@@ -1664,7 +1666,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
+@@ -1678,7 +1680,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  	order = get_order(size);
  
  	rcu_read_lock();
@@ -23792,7 +19636,7 @@ index be4724b916b3..994acb959d00 100644
  	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
  		unsigned long pages_off;
  
-@@ -1687,7 +1690,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
+@@ -1701,7 +1704,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
  		break;
  	}
  
@@ -23802,7 +19646,7 @@ index be4724b916b3..994acb959d00 100644
  
  	/* Allocate new block if nothing was found */
 diff --git a/mm/vmstat.c b/mm/vmstat.c
-index 4f7b4ee6aa12..0fc677378d3d 100644
+index f8942160fc95..920d88bf504a 100644
 --- a/mm/vmstat.c
 +++ b/mm/vmstat.c
 @@ -321,6 +321,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
@@ -23902,10 +19746,10 @@ index 4f7b4ee6aa12..0fc677378d3d 100644
  
  void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
 diff --git a/mm/workingset.c b/mm/workingset.c
-index 92e66113a577..29609352552f 100644
+index 10e96de945b3..289c14d10351 100644
 --- a/mm/workingset.c
 +++ b/mm/workingset.c
-@@ -432,6 +432,8 @@ static struct list_lru shadow_nodes;
+@@ -430,6 +430,8 @@ static struct list_lru shadow_nodes;
  
  void workingset_update_node(struct xa_node *node)
  {
@@ -23914,7 +19758,7 @@ index 92e66113a577..29609352552f 100644
  	/*
  	 * Track non-empty nodes that contain only shadow entries;
  	 * unlink those that contain pages or are being freed.
-@@ -440,7 +442,8 @@ void workingset_update_node(struct xa_node *node)
+@@ -438,7 +440,8 @@ void workingset_update_node(struct xa_node *node)
  	 * already where they should be. The list_empty() test is safe
  	 * as node->private_list is protected by the i_pages lock.
  	 */
@@ -23924,8 +19768,70 @@ index 92e66113a577..29609352552f 100644
  
  	if (node->count && node->count == node->nr_values) {
  		if (list_empty(&node->private_list)) {
+diff --git a/mm/z3fold.c b/mm/z3fold.c
+index dacb0d70fa61..234b46f01e83 100644
+--- a/mm/z3fold.c
++++ b/mm/z3fold.c
+@@ -1778,6 +1778,7 @@ static u64 z3fold_zpool_total_size(void *pool)
+ 
+ static struct zpool_driver z3fold_zpool_driver = {
+ 	.type =		"z3fold",
++	.sleep_mapped = true,
+ 	.owner =	THIS_MODULE,
+ 	.create =	z3fold_zpool_create,
+ 	.destroy =	z3fold_zpool_destroy,
+diff --git a/mm/zbud.c b/mm/zbud.c
+index c49966ece674..7ec5f27a68b0 100644
+--- a/mm/zbud.c
++++ b/mm/zbud.c
+@@ -203,6 +203,7 @@ static u64 zbud_zpool_total_size(void *pool)
+ 
+ static struct zpool_driver zbud_zpool_driver = {
+ 	.type =		"zbud",
++	.sleep_mapped = true,
+ 	.owner =	THIS_MODULE,
+ 	.create =	zbud_zpool_create,
+ 	.destroy =	zbud_zpool_destroy,
+diff --git a/mm/zpool.c b/mm/zpool.c
+index 3744a2d1a624..5ed71207ced7 100644
+--- a/mm/zpool.c
++++ b/mm/zpool.c
+@@ -23,6 +23,7 @@ struct zpool {
+ 	void *pool;
+ 	const struct zpool_ops *ops;
+ 	bool evictable;
++	bool can_sleep_mapped;
+ 
+ 	struct list_head list;
+ };
+@@ -183,6 +184,7 @@ struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp,
+ 	zpool->pool = driver->create(name, gfp, ops, zpool);
+ 	zpool->ops = ops;
+ 	zpool->evictable = driver->shrink && ops && ops->evict;
++	zpool->can_sleep_mapped = driver->sleep_mapped;
+ 
+ 	if (!zpool->pool) {
+ 		pr_err("couldn't create %s pool\n", type);
+@@ -393,6 +395,17 @@ bool zpool_evictable(struct zpool *zpool)
+ 	return zpool->evictable;
+ }
+ 
++/**
++ * zpool_can_sleep_mapped - Test if zpool can sleep when do mapped.
++ * @zpool:	The zpool to test
++ *
++ * Returns: true if zpool can sleep; false otherwise.
++ */
++bool zpool_can_sleep_mapped(struct zpool *zpool)
++{
++	return zpool->can_sleep_mapped;
++}
++
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
+ MODULE_DESCRIPTION("Common API for compressed memory storage");
 diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
-index c36fdff9a371..2cc22ee7b894 100644
+index 7289f502ffac..67b459609553 100644
 --- a/mm/zsmalloc.c
 +++ b/mm/zsmalloc.c
 @@ -57,6 +57,7 @@
@@ -23961,11 +19867,11 @@ index c36fdff9a371..2cc22ee7b894 100644
  };
  
  struct mapping_area {
-+	local_lock_t	lock;
- #ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING
- 	struct vm_struct *vm; /* vm area for mapping object that span pages */
- #else
-@@ -326,7 +342,7 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
++	local_lock_t lock;
+ 	char *vm_buf; /* copy buffer for objects that span pages */
+ 	char *vm_addr; /* address of kmap_atomic()'ed pages */
+ 	enum zs_mapmode vm_mm; /* mapping mode */
+@@ -322,7 +338,7 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
  
  static int create_cache(struct zs_pool *pool)
  {
@@ -23974,7 +19880,7 @@ index c36fdff9a371..2cc22ee7b894 100644
  					0, 0, NULL);
  	if (!pool->handle_cachep)
  		return 1;
-@@ -350,9 +366,26 @@ static void destroy_cache(struct zs_pool *pool)
+@@ -346,9 +362,26 @@ static void destroy_cache(struct zs_pool *pool)
  
  static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
  {
@@ -24003,7 +19909,7 @@ index c36fdff9a371..2cc22ee7b894 100644
  
  static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
  {
-@@ -372,12 +405,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
+@@ -368,12 +401,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
  
  static void record_obj(unsigned long handle, unsigned long obj)
  {
@@ -24022,7 +19928,7 @@ index c36fdff9a371..2cc22ee7b894 100644
  }
  
  /* zpool driver */
-@@ -459,7 +498,10 @@ MODULE_ALIAS("zpool-zsmalloc");
+@@ -455,7 +494,10 @@ MODULE_ALIAS("zpool-zsmalloc");
  #endif /* CONFIG_ZPOOL */
  
  /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
@@ -24034,7 +19940,7 @@ index c36fdff9a371..2cc22ee7b894 100644
  
  static bool is_zspage_isolated(struct zspage *zspage)
  {
-@@ -869,7 +911,13 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx)
+@@ -862,7 +904,13 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx)
  
  static unsigned long handle_to_obj(unsigned long handle)
  {
@@ -24048,7 +19954,7 @@ index c36fdff9a371..2cc22ee7b894 100644
  }
  
  static unsigned long obj_to_head(struct page *page, void *obj)
-@@ -883,22 +931,46 @@ static unsigned long obj_to_head(struct page *page, void *obj)
+@@ -876,22 +924,46 @@ static unsigned long obj_to_head(struct page *page, void *obj)
  
  static inline int testpin_tag(unsigned long handle)
  {
@@ -24095,7 +20001,7 @@ index c36fdff9a371..2cc22ee7b894 100644
  }
  
  static void reset_page(struct page *page)
-@@ -1326,7 +1398,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
+@@ -1275,7 +1347,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
  	class = pool->size_class[class_idx];
  	off = (class->size * obj_idx) & ~PAGE_MASK;
  
@@ -24105,7 +20011,7 @@ index c36fdff9a371..2cc22ee7b894 100644
  	area->vm_mm = mm;
  	if (off + class->size <= PAGE_SIZE) {
  		/* this object is contained entirely within a page */
-@@ -1380,7 +1453,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
+@@ -1329,7 +1402,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
  
  		__zs_unmap_object(area, pages, off, class->size);
  	}
@@ -24115,122 +20021,133 @@ index c36fdff9a371..2cc22ee7b894 100644
  	migrate_read_unlock(zspage);
  	unpin_tag(handle);
 diff --git a/mm/zswap.c b/mm/zswap.c
-index fbb782924ccc..78a20f7b00f2 100644
+index 182f6ad5aa69..1566cc3ab7f4 100644
 --- a/mm/zswap.c
 +++ b/mm/zswap.c
-@@ -18,6 +18,7 @@
- #include <linux/highmem.h>
- #include <linux/slab.h>
- #include <linux/spinlock.h>
-+#include <linux/local_lock.h>
- #include <linux/types.h>
- #include <linux/atomic.h>
- #include <linux/frontswap.h>
-@@ -387,27 +388,35 @@ static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
- /*********************************
- * per-cpu code
- **********************************/
--static DEFINE_PER_CPU(u8 *, zswap_dstmem);
-+struct zswap_comp {
-+	/* Used for per-CPU dstmem and tfm */
-+	local_lock_t lock;
-+	u8 *dstmem;
-+};
-+
-+static DEFINE_PER_CPU(struct zswap_comp, zswap_comp);
+@@ -935,13 +935,19 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
+ 	struct scatterlist input, output;
+ 	struct crypto_acomp_ctx *acomp_ctx;
  
- static int zswap_dstmem_prepare(unsigned int cpu)
- {
-+	struct zswap_comp *zcomp;
- 	u8 *dst;
+-	u8 *src;
++	u8 *src, *tmp = NULL;
+ 	unsigned int dlen;
+ 	int ret;
+ 	struct writeback_control wbc = {
+ 		.sync_mode = WB_SYNC_NONE,
+ 	};
  
- 	dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
- 	if (!dst)
- 		return -ENOMEM;
++	if (!zpool_can_sleep_mapped(pool)) {
++		tmp = kmalloc(PAGE_SIZE, GFP_ATOMIC);
++		if (!tmp)
++			return -ENOMEM;
++	}
++
+ 	/* extract swpentry from data */
+ 	zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
+ 	swpentry = zhdr->swpentry; /* here */
+@@ -955,6 +961,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
+ 		/* entry was invalidated */
+ 		spin_unlock(&tree->lock);
+ 		zpool_unmap_handle(pool, handle);
++		kfree(tmp);
+ 		return 0;
+ 	}
+ 	spin_unlock(&tree->lock);
+@@ -979,6 +986,14 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
+ 		dlen = PAGE_SIZE;
+ 		src = (u8 *)zhdr + sizeof(struct zswap_header);
  
--	per_cpu(zswap_dstmem, cpu) = dst;
-+	zcomp = per_cpu_ptr(&zswap_comp, cpu);
-+	zcomp->dstmem = dst;
- 	return 0;
- }
++		if (!zpool_can_sleep_mapped(pool)) {
++
++			memcpy(tmp, src, entry->length);
++			src = tmp;
++
++			zpool_unmap_handle(pool, handle);
++		}
++
+ 		mutex_lock(acomp_ctx->mutex);
+ 		sg_init_one(&input, src, entry->length);
+ 		sg_init_table(&output, 1);
+@@ -1033,7 +1048,11 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
+ 	spin_unlock(&tree->lock);
  
- static int zswap_dstmem_dead(unsigned int cpu)
- {
--	u8 *dst;
-+	struct zswap_comp *zcomp;
+ end:
+-	zpool_unmap_handle(pool, handle);
++	if (zpool_can_sleep_mapped(pool))
++		zpool_unmap_handle(pool, handle);
++	else
++		kfree(tmp);
++
+ 	return ret;
+ }
  
--	dst = per_cpu(zswap_dstmem, cpu);
--	kfree(dst);
--	per_cpu(zswap_dstmem, cpu) = NULL;
-+	zcomp = per_cpu_ptr(&zswap_comp, cpu);
-+	kfree(zcomp->dstmem);
-+	zcomp->dstmem = NULL;
+@@ -1235,7 +1254,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
+ 	struct zswap_entry *entry;
+ 	struct scatterlist input, output;
+ 	struct crypto_acomp_ctx *acomp_ctx;
+-	u8 *src, *dst;
++	u8 *src, *dst, *tmp;
+ 	unsigned int dlen;
+ 	int ret;
  
- 	return 0;
- }
-@@ -919,10 +928,11 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
- 		dlen = PAGE_SIZE;
- 		src = (u8 *)zhdr + sizeof(struct zswap_header);
+@@ -1253,15 +1272,33 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
  		dst = kmap_atomic(page);
--		tfm = *get_cpu_ptr(entry->pool->tfm);
-+		local_lock(&zswap_comp.lock);
-+		tfm = *this_cpu_ptr(entry->pool->tfm);
- 		ret = crypto_comp_decompress(tfm, src, entry->length,
- 					     dst, &dlen);
--		put_cpu_ptr(entry->pool->tfm);
-+		local_unlock(&zswap_comp.lock);
+ 		zswap_fill_page(dst, entry->value);
  		kunmap_atomic(dst);
- 		BUG_ON(ret);
- 		BUG_ON(dlen != PAGE_SIZE);
-@@ -1074,12 +1084,12 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
++		ret = 0;
+ 		goto freeentry;
  	}
  
- 	/* compress */
--	dst = get_cpu_var(zswap_dstmem);
--	tfm = *get_cpu_ptr(entry->pool->tfm);
-+	local_lock(&zswap_comp.lock);
-+	dst = *this_cpu_ptr(&zswap_comp.dstmem);
-+	tfm = *this_cpu_ptr(entry->pool->tfm);
- 	src = kmap_atomic(page);
- 	ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen);
- 	kunmap_atomic(src);
--	put_cpu_ptr(entry->pool->tfm);
- 	if (ret) {
- 		ret = -EINVAL;
- 		goto put_dstmem;
-@@ -1103,7 +1113,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
- 	memcpy(buf, &zhdr, hlen);
- 	memcpy(buf + hlen, dst, dlen);
- 	zpool_unmap_handle(entry->pool->zpool, handle);
--	put_cpu_var(zswap_dstmem);
-+	local_unlock(&zswap_comp.lock);
- 
- 	/* populate entry */
- 	entry->offset = offset;
-@@ -1131,7 +1141,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
- 	return 0;
- 
- put_dstmem:
--	put_cpu_var(zswap_dstmem);
-+	local_unlock(&zswap_comp.lock);
- 	zswap_pool_put(entry->pool);
- freepage:
- 	zswap_entry_cache_free(entry);
-@@ -1176,9 +1186,10 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
++	if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
++
++		tmp = kmalloc(entry->length, GFP_ATOMIC);
++		if (!tmp) {
++			ret = -ENOMEM;
++			goto freeentry;
++		}
++	}
++
+ 	/* decompress */
+ 	dlen = PAGE_SIZE;
+ 	src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO);
  	if (zpool_evictable(entry->pool->zpool))
  		src += sizeof(struct zswap_header);
- 	dst = kmap_atomic(page);
--	tfm = *get_cpu_ptr(entry->pool->tfm);
-+	local_lock(&zswap_comp.lock);
-+	tfm = *this_cpu_ptr(entry->pool->tfm);
- 	ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen);
--	put_cpu_ptr(entry->pool->tfm);
-+	local_unlock(&zswap_comp.lock);
- 	kunmap_atomic(dst);
- 	zpool_unmap_handle(entry->pool->zpool, entry->handle);
+ 
++	if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
++
++		memcpy(tmp, src, entry->length);
++		src = tmp;
++
++		zpool_unmap_handle(entry->pool->zpool, entry->handle);
++	}
++
+ 	acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
+ 	mutex_lock(acomp_ctx->mutex);
+ 	sg_init_one(&input, src, entry->length);
+@@ -1271,7 +1308,11 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
+ 	ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
+ 	mutex_unlock(acomp_ctx->mutex);
+ 
+-	zpool_unmap_handle(entry->pool->zpool, entry->handle);
++	if (zpool_can_sleep_mapped(entry->pool->zpool))
++		zpool_unmap_handle(entry->pool->zpool, entry->handle);
++	else
++		kfree(tmp);
++
  	BUG_ON(ret);
+ 
+ freeentry:
+@@ -1279,7 +1320,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
+ 	zswap_entry_put(tree, entry);
+ 	spin_unlock(&tree->lock);
+ 
+-	return 0;
++	return ret;
+ }
+ 
+ /* frees an entry in zswap */
 diff --git a/net/Kconfig b/net/Kconfig
-index 3831206977a1..81ae878ae553 100644
+index f4c32d982af6..a4b435f393b3 100644
 --- a/net/Kconfig
 +++ b/net/Kconfig
 @@ -282,7 +282,7 @@ config CGROUP_NET_CLASSID
@@ -24243,10 +20160,10 @@ index 3831206977a1..81ae878ae553 100644
  config BQL
  	bool
 diff --git a/net/core/dev.c b/net/core/dev.c
-index 4906b44af850..3063e57529c6 100644
+index 449b45b843d4..d6456c255316 100644
 --- a/net/core/dev.c
 +++ b/net/core/dev.c
-@@ -219,14 +219,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
+@@ -221,14 +221,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
  static inline void rps_lock(struct softnet_data *sd)
  {
  #ifdef CONFIG_RPS
@@ -24263,7 +20180,7 @@ index 4906b44af850..3063e57529c6 100644
  #endif
  }
  
-@@ -3034,6 +3034,7 @@ static void __netif_reschedule(struct Qdisc *q)
+@@ -3041,6 +3041,7 @@ static void __netif_reschedule(struct Qdisc *q)
  	sd->output_queue_tailp = &q->next_sched;
  	raise_softirq_irqoff(NET_TX_SOFTIRQ);
  	local_irq_restore(flags);
@@ -24271,7 +20188,7 @@ index 4906b44af850..3063e57529c6 100644
  }
  
  void __netif_schedule(struct Qdisc *q)
-@@ -3096,6 +3097,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
+@@ -3103,6 +3104,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
  	__this_cpu_write(softnet_data.completion_queue, skb);
  	raise_softirq_irqoff(NET_TX_SOFTIRQ);
  	local_irq_restore(flags);
@@ -24279,7 +20196,7 @@ index 4906b44af850..3063e57529c6 100644
  }
  EXPORT_SYMBOL(__dev_kfree_skb_irq);
  
-@@ -3762,7 +3764,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
+@@ -3775,7 +3777,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
  	 * This permits qdisc->running owner to get the lock more
  	 * often and dequeue packets faster.
  	 */
@@ -24291,7 +20208,7 @@ index 4906b44af850..3063e57529c6 100644
  	if (unlikely(contended))
  		spin_lock(&q->busylock);
  
-@@ -4558,6 +4564,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
+@@ -4570,6 +4576,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
  	rps_unlock(sd);
  
  	local_irq_restore(flags);
@@ -24299,7 +20216,7 @@ index 4906b44af850..3063e57529c6 100644
  
  	atomic_long_inc(&skb->dev->rx_dropped);
  	kfree_skb(skb);
-@@ -4773,7 +4780,7 @@ static int netif_rx_internal(struct sk_buff *skb)
+@@ -4785,7 +4792,7 @@ static int netif_rx_internal(struct sk_buff *skb)
  		struct rps_dev_flow voidflow, *rflow = &voidflow;
  		int cpu;
  
@@ -24308,7 +20225,7 @@ index 4906b44af850..3063e57529c6 100644
  		rcu_read_lock();
  
  		cpu = get_rps_cpu(skb->dev, skb, &rflow);
-@@ -4783,14 +4790,14 @@ static int netif_rx_internal(struct sk_buff *skb)
+@@ -4795,14 +4802,14 @@ static int netif_rx_internal(struct sk_buff *skb)
  		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
  
  		rcu_read_unlock();
@@ -24326,7 +20243,7 @@ index 4906b44af850..3063e57529c6 100644
  	}
  	return ret;
  }
-@@ -4829,11 +4836,9 @@ int netif_rx_ni(struct sk_buff *skb)
+@@ -4841,11 +4848,9 @@ int netif_rx_ni(struct sk_buff *skb)
  
  	trace_netif_rx_ni_entry(skb);
  
@@ -24340,7 +20257,7 @@ index 4906b44af850..3063e57529c6 100644
  	trace_netif_rx_ni_exit(err);
  
  	return err;
-@@ -6202,12 +6207,14 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
+@@ -6288,12 +6293,14 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
  		sd->rps_ipi_list = NULL;
  
  		local_irq_enable();
@@ -24355,7 +20272,7 @@ index 4906b44af850..3063e57529c6 100644
  }
  
  static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
-@@ -6285,6 +6292,7 @@ void __napi_schedule(struct napi_struct *n)
+@@ -6371,6 +6378,7 @@ void __napi_schedule(struct napi_struct *n)
  	local_irq_save(flags);
  	____napi_schedule(this_cpu_ptr(&softnet_data), n);
  	local_irq_restore(flags);
@@ -24363,7 +20280,7 @@ index 4906b44af850..3063e57529c6 100644
  }
  EXPORT_SYMBOL(__napi_schedule);
  
-@@ -10711,6 +10719,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
+@@ -10938,6 +10946,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
  
  	raise_softirq_irqoff(NET_TX_SOFTIRQ);
  	local_irq_enable();
@@ -24371,7 +20288,7 @@ index 4906b44af850..3063e57529c6 100644
  
  #ifdef CONFIG_RPS
  	remsd = oldsd->rps_ipi_list;
-@@ -10724,7 +10733,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
+@@ -10951,7 +10960,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
  		netif_rx_ni(skb);
  		input_queue_head_incr(oldsd);
  	}
@@ -24380,7 +20297,7 @@ index 4906b44af850..3063e57529c6 100644
  		netif_rx_ni(skb);
  		input_queue_head_incr(oldsd);
  	}
-@@ -11040,7 +11049,7 @@ static int __init net_dev_init(void)
+@@ -11267,7 +11276,7 @@ static int __init net_dev_init(void)
  
  		INIT_WORK(flush, flush_backlog);
  
@@ -24390,7 +20307,7 @@ index 4906b44af850..3063e57529c6 100644
  #ifdef CONFIG_XFRM_OFFLOAD
  		skb_queue_head_init(&sd->xfrm_backlog);
 diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
-index 80dbf2f4016e..698b02dfeaaf 100644
+index 8e582e29a41e..e51f4854d8b2 100644
 --- a/net/core/gen_estimator.c
 +++ b/net/core/gen_estimator.c
 @@ -42,7 +42,7 @@
@@ -24411,7 +20328,7 @@ index 80dbf2f4016e..698b02dfeaaf 100644
  		      struct nlattr *opt)
  {
  	struct gnet_estimator *parm = nla_data(opt);
-@@ -223,7 +223,7 @@ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
+@@ -226,7 +226,7 @@ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
  			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
  			  struct net_rate_estimator __rcu **rate_est,
  			  spinlock_t *lock,
@@ -24470,11 +20387,23 @@ index e491b083b348..ef432cea2e10 100644
  			 struct gnet_dump *d,
  			 struct gnet_stats_basic_cpu __percpu *cpu,
  			 struct gnet_stats_basic_packed *b)
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index 785daff48030..e64d0a2e21c3 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -60,6 +60,7 @@
+ #include <linux/prefetch.h>
+ #include <linux/if_vlan.h>
+ #include <linux/mpls.h>
++#include <linux/kcov.h>
+ 
+ #include <net/protocol.h>
+ #include <net/dst.h>
 diff --git a/net/core/sock.c b/net/core/sock.c
-index 6c5c6b18eff4..dc252f9aaf7e 100644
+index bbcd4b97eddd..dd69aa593639 100644
 --- a/net/core/sock.c
 +++ b/net/core/sock.c
-@@ -3049,12 +3049,11 @@ void lock_sock_nested(struct sock *sk, int subclass)
+@@ -3050,12 +3050,11 @@ void lock_sock_nested(struct sock *sk, int subclass)
  	if (sk->sk_lock.owned)
  		__lock_sock(sk);
  	sk->sk_lock.owned = 1;
@@ -24488,7 +20417,7 @@ index 6c5c6b18eff4..dc252f9aaf7e 100644
  }
  EXPORT_SYMBOL(lock_sock_nested);
  
-@@ -3103,12 +3102,11 @@ bool lock_sock_fast(struct sock *sk)
+@@ -3104,13 +3103,12 @@ bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
  
  	__lock_sock(sk);
  	sk->sk_lock.owned = 1;
@@ -24498,25 +20427,26 @@ index 6c5c6b18eff4..dc252f9aaf7e 100644
  	 * The sk_lock has mutex_lock() semantics here:
  	 */
  	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+ 	__acquire(&sk->sk_lock.slock);
 -	local_bh_enable();
  	return true;
  }
  EXPORT_SYMBOL(lock_sock_fast);
 diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
-index 239e54474b65..fcb105cbb546 100644
+index 45fb450b4522..5fb95030e7c0 100644
 --- a/net/ipv4/inet_hashtables.c
 +++ b/net/ipv4/inet_hashtables.c
-@@ -585,7 +585,9 @@ int __inet_hash(struct sock *sk, struct sock *osk)
+@@ -635,7 +635,9 @@ int __inet_hash(struct sock *sk, struct sock *osk)
  	int err = 0;
  
  	if (sk->sk_state != TCP_LISTEN) {
 +		local_bh_disable();
- 		inet_ehash_nolisten(sk, osk);
+ 		inet_ehash_nolisten(sk, osk, NULL);
 +		local_bh_enable();
  		return 0;
  	}
  	WARN_ON(!sk_unhashed(sk));
-@@ -617,11 +619,8 @@ int inet_hash(struct sock *sk)
+@@ -667,11 +669,8 @@ int inet_hash(struct sock *sk)
  {
  	int err = 0;
  
@@ -24529,7 +20459,7 @@ index 239e54474b65..fcb105cbb546 100644
  
  	return err;
  }
-@@ -632,17 +631,20 @@ void inet_unhash(struct sock *sk)
+@@ -682,17 +681,20 @@ void inet_unhash(struct sock *sk)
  	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
  	struct inet_listen_hashbucket *ilb = NULL;
  	spinlock_t *lock;
@@ -24552,7 +20482,7 @@ index 239e54474b65..fcb105cbb546 100644
  	if (sk_unhashed(sk))
  		goto unlock;
  
-@@ -655,7 +657,10 @@ void inet_unhash(struct sock *sk)
+@@ -705,7 +707,10 @@ void inet_unhash(struct sock *sk)
  	__sk_nulls_del_node_init_rcu(sk);
  	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
  unlock:
@@ -24565,10 +20495,10 @@ index 239e54474b65..fcb105cbb546 100644
  EXPORT_SYMBOL_GPL(inet_unhash);
  
 diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
-index 2d3add9e6116..50fd17cbf3ec 100644
+index 55c290d55605..9bad345cba9a 100644
 --- a/net/ipv6/inet6_hashtables.c
 +++ b/net/ipv6/inet6_hashtables.c
-@@ -335,11 +335,8 @@ int inet6_hash(struct sock *sk)
+@@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk)
  {
  	int err = 0;
  
@@ -24581,11 +20511,35 @@ index 2d3add9e6116..50fd17cbf3ec 100644
  
  	return err;
  }
+diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
+index b31417f40bd5..39943c33abbf 100644
+--- a/net/mac80211/iface.c
++++ b/net/mac80211/iface.c
+@@ -15,6 +15,7 @@
+ #include <linux/if_arp.h>
+ #include <linux/netdevice.h>
+ #include <linux/rtnetlink.h>
++#include <linux/kcov.h>
+ #include <net/mac80211.h>
+ #include <net/ieee80211_radiotap.h>
+ #include "ieee80211_i.h"
+diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
+index 972895e9f22d..3527b17f235a 100644
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -17,6 +17,7 @@
+ #include <linux/etherdevice.h>
+ #include <linux/rcupdate.h>
+ #include <linux/export.h>
++#include <linux/kcov.h>
+ #include <linux/bitops.h>
+ #include <net/mac80211.h>
+ #include <net/ieee80211_radiotap.h>
 diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
-index 2a76a2f5ed88..1542f1a5a31c 100644
+index 6fe4e5cc807c..880d109a1b2d 100644
 --- a/net/sched/sch_api.c
 +++ b/net/sched/sch_api.c
-@@ -1257,7 +1257,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
+@@ -1258,7 +1258,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
  		rcu_assign_pointer(sch->stab, stab);
  	}
  	if (tca[TCA_RATE]) {
@@ -24595,7 +20549,7 @@ index 2a76a2f5ed88..1542f1a5a31c 100644
  		err = -EOPNOTSUPP;
  		if (sch->flags & TCQ_F_MQROOT) {
 diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
-index 54c417244642..7ce1abfd68a6 100644
+index 49eae93d1489..512a39d6edec 100644
 --- a/net/sched/sch_generic.c
 +++ b/net/sched/sch_generic.c
 @@ -553,7 +553,11 @@ struct Qdisc noop_qdisc = {
@@ -24610,7 +20564,7 @@ index 54c417244642..7ce1abfd68a6 100644
  	.busylock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
  	.gso_skb = {
  		.next = (struct sk_buff *)&noop_qdisc.gso_skb,
-@@ -858,9 +862,15 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
+@@ -845,9 +849,15 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
  	lockdep_set_class(&sch->busylock,
  			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
  
@@ -24627,7 +20581,7 @@ index 54c417244642..7ce1abfd68a6 100644
  	sch->ops = ops;
  	sch->flags = ops->static_flags;
 diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
-index 43cf8dbde898..d5516102491b 100644
+index dcc50ae54550..e4a0dc8f8e40 100644
 --- a/net/sunrpc/svc_xprt.c
 +++ b/net/sunrpc/svc_xprt.c
 @@ -422,7 +422,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
@@ -24649,7 +20603,7 @@ index 43cf8dbde898..d5516102491b 100644
  }
  EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
 diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
-index efc89a92961d..4e1216d04441 100644
+index d01ca1a18418..14059a9051b8 100644
 --- a/net/xfrm/xfrm_state.c
 +++ b/net/xfrm/xfrm_state.c
 @@ -44,7 +44,7 @@ static void xfrm_state_gc_task(struct work_struct *work);
@@ -24673,7 +20627,7 @@ index efc89a92961d..4e1216d04441 100644
  	spin_lock_bh(&net->xfrm.xfrm_state_lock);
  	write_seqcount_begin(&xfrm_state_hash_generation);
  
-@@ -2589,6 +2594,8 @@ int __net_init xfrm_state_init(struct net *net)
+@@ -2666,6 +2671,8 @@ int __net_init xfrm_state_init(struct net *net)
  	net->xfrm.state_num = 0;
  	INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
  	spin_lock_init(&net->xfrm.xfrm_state_lock);
@@ -24682,200 +20636,6 @@ index efc89a92961d..4e1216d04441 100644
  	return 0;
  
  out_byspi:
-diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py
-index 2fa7bb83885f..a92c55bd8de5 100644
---- a/scripts/gdb/linux/dmesg.py
-+++ b/scripts/gdb/linux/dmesg.py
-@@ -16,8 +16,13 @@ import sys
- 
- from linux import utils
- 
--printk_log_type = utils.CachedType("struct printk_log")
--
-+printk_info_type = utils.CachedType("struct printk_info")
-+prb_data_blk_lpos_type = utils.CachedType("struct prb_data_blk_lpos")
-+prb_desc_type = utils.CachedType("struct prb_desc")
-+prb_desc_ring_type = utils.CachedType("struct prb_desc_ring")
-+prb_data_ring_type = utils.CachedType("struct prb_data_ring")
-+printk_ringbuffer_type = utils.CachedType("struct printk_ringbuffer")
-+atomic_long_type = utils.CachedType("atomic_long_t")
- 
- class LxDmesg(gdb.Command):
-     """Print Linux kernel log buffer."""
-@@ -26,44 +31,110 @@ class LxDmesg(gdb.Command):
-         super(LxDmesg, self).__init__("lx-dmesg", gdb.COMMAND_DATA)
- 
-     def invoke(self, arg, from_tty):
--        log_buf_addr = int(str(gdb.parse_and_eval(
--            "(void *)'printk.c'::log_buf")).split()[0], 16)
--        log_first_idx = int(gdb.parse_and_eval("'printk.c'::log_first_idx"))
--        log_next_idx = int(gdb.parse_and_eval("'printk.c'::log_next_idx"))
--        log_buf_len = int(gdb.parse_and_eval("'printk.c'::log_buf_len"))
--
-         inf = gdb.inferiors()[0]
--        start = log_buf_addr + log_first_idx
--        if log_first_idx < log_next_idx:
--            log_buf_2nd_half = -1
--            length = log_next_idx - log_first_idx
--            log_buf = utils.read_memoryview(inf, start, length).tobytes()
--        else:
--            log_buf_2nd_half = log_buf_len - log_first_idx
--            a = utils.read_memoryview(inf, start, log_buf_2nd_half)
--            b = utils.read_memoryview(inf, log_buf_addr, log_next_idx)
--            log_buf = a.tobytes() + b.tobytes()
--
--        length_offset = printk_log_type.get_type()['len'].bitpos // 8
--        text_len_offset = printk_log_type.get_type()['text_len'].bitpos // 8
--        time_stamp_offset = printk_log_type.get_type()['ts_nsec'].bitpos // 8
--        text_offset = printk_log_type.get_type().sizeof
--
--        pos = 0
--        while pos < log_buf.__len__():
--            length = utils.read_u16(log_buf, pos + length_offset)
--            if length == 0:
--                if log_buf_2nd_half == -1:
--                    gdb.write("Corrupted log buffer!\n")
-+
-+        # read in prb structure
-+        prb_addr = int(str(gdb.parse_and_eval("(void *)'printk.c'::prb")).split()[0], 16)
-+        sz = printk_ringbuffer_type.get_type().sizeof
-+        prb = utils.read_memoryview(inf, prb_addr, sz).tobytes()
-+
-+        # read in descriptor ring structure
-+        off = printk_ringbuffer_type.get_type()['desc_ring'].bitpos // 8
-+        addr = prb_addr + off
-+        sz = prb_desc_ring_type.get_type().sizeof
-+        desc_ring = utils.read_memoryview(inf, addr, sz).tobytes()
-+
-+        # read in descriptor array
-+        off = prb_desc_ring_type.get_type()['count_bits'].bitpos // 8
-+        desc_ring_count = 1 << utils.read_u32(desc_ring, off)
-+        desc_sz = prb_desc_type.get_type().sizeof
-+        off = prb_desc_ring_type.get_type()['descs'].bitpos // 8
-+        addr = utils.read_ulong(desc_ring, off)
-+        descs = utils.read_memoryview(inf, addr, desc_sz * desc_ring_count).tobytes()
-+
-+        # read in info array
-+        info_sz = printk_info_type.get_type().sizeof
-+        off = prb_desc_ring_type.get_type()['infos'].bitpos // 8
-+        addr = utils.read_ulong(desc_ring, off)
-+        infos = utils.read_memoryview(inf, addr, info_sz * desc_ring_count).tobytes()
-+
-+        # read in text data ring structure
-+        off = printk_ringbuffer_type.get_type()['text_data_ring'].bitpos // 8
-+        addr = prb_addr + off
-+        sz = prb_data_ring_type.get_type().sizeof
-+        text_data_ring = utils.read_memoryview(inf, addr, sz).tobytes()
-+
-+        # read in text data
-+        off = prb_data_ring_type.get_type()['size_bits'].bitpos // 8
-+        text_data_sz = 1 << utils.read_u32(text_data_ring, off)
-+        off = prb_data_ring_type.get_type()['data'].bitpos // 8
-+        addr = utils.read_ulong(text_data_ring, off)
-+        text_data = utils.read_memoryview(inf, addr, text_data_sz).tobytes()
-+
-+        counter_off = atomic_long_type.get_type()['counter'].bitpos // 8
-+
-+        sv_off = prb_desc_type.get_type()['state_var'].bitpos // 8
-+
-+        off = prb_desc_type.get_type()['text_blk_lpos'].bitpos // 8
-+        begin_off = off + (prb_data_blk_lpos_type.get_type()['begin'].bitpos // 8)
-+        next_off = off + (prb_data_blk_lpos_type.get_type()['next'].bitpos // 8)
-+
-+        ts_off = printk_info_type.get_type()['ts_nsec'].bitpos // 8
-+        len_off = printk_info_type.get_type()['text_len'].bitpos // 8
-+
-+        # definitions from kernel/printk/printk_ringbuffer.h
-+        desc_committed = 1
-+        desc_finalized = 2
-+        desc_sv_bits = utils.get_long_type().sizeof * 8
-+        desc_flags_shift = desc_sv_bits - 2
-+        desc_flags_mask = 3 << desc_flags_shift
-+        desc_id_mask = ~desc_flags_mask
-+
-+        # read in tail and head descriptor ids
-+        off = prb_desc_ring_type.get_type()['tail_id'].bitpos // 8
-+        tail_id = utils.read_u64(desc_ring, off + counter_off)
-+        off = prb_desc_ring_type.get_type()['head_id'].bitpos // 8
-+        head_id = utils.read_u64(desc_ring, off + counter_off)
-+
-+        did = tail_id
-+        while True:
-+            ind = did % desc_ring_count
-+            desc_off = desc_sz * ind
-+            info_off = info_sz * ind
-+
-+            # skip non-committed record
-+            state = 3 & (utils.read_u64(descs, desc_off + sv_off +
-+                                        counter_off) >> desc_flags_shift)
-+            if state != desc_committed and state != desc_finalized:
-+                if did == head_id:
-                     break
--                pos = log_buf_2nd_half
-+                did = (did + 1) & desc_id_mask
-                 continue
- 
--            text_len = utils.read_u16(log_buf, pos + text_len_offset)
--            text_start = pos + text_offset
--            text = log_buf[text_start:text_start + text_len].decode(
--                encoding='utf8', errors='replace')
--            time_stamp = utils.read_u64(log_buf, pos + time_stamp_offset)
-+            begin = utils.read_ulong(descs, desc_off + begin_off) % text_data_sz
-+            end = utils.read_ulong(descs, desc_off + next_off) % text_data_sz
-+
-+            # handle data-less record
-+            if begin & 1 == 1:
-+                text = ""
-+            else:
-+                # handle wrapping data block
-+                if begin > end:
-+                    begin = 0
-+
-+                # skip over descriptor id
-+                text_start = begin + utils.get_long_type().sizeof
-+
-+                text_len = utils.read_u16(infos, info_off + len_off)
-+
-+                # handle truncated message
-+                if end - text_start < text_len:
-+                    text_len = end - text_start
-+
-+                text = text_data[text_start:text_start + text_len].decode(
-+                    encoding='utf8', errors='replace')
-+
-+            time_stamp = utils.read_u64(infos, info_off + ts_off)
- 
-             for line in text.splitlines():
-                 msg = u"[{time:12.6f}] {line}\n".format(
-@@ -75,7 +146,9 @@ class LxDmesg(gdb.Command):
-                     msg = msg.encode(encoding='utf8', errors='replace')
-                 gdb.write(msg)
- 
--            pos += length
-+            if did == head_id:
-+                break
-+            did = (did + 1) & desc_id_mask
- 
- 
- LxDmesg()
-diff --git a/scripts/gdb/linux/utils.py b/scripts/gdb/linux/utils.py
-index ea94221dbd39..ff7c1799d588 100644
---- a/scripts/gdb/linux/utils.py
-+++ b/scripts/gdb/linux/utils.py
-@@ -123,6 +123,13 @@ def read_u64(buffer, offset):
-         return read_u32(buffer, offset + 4) + (read_u32(buffer, offset) << 32)
- 
- 
-+def read_ulong(buffer, offset):
-+    if get_long_type().sizeof == 8:
-+        return read_u64(buffer, offset)
-+    else:
-+        return read_u32(buffer, offset)
-+
-+
- target_arch = None
- 
- 
 -- 
-2.28.0
+2.30.1
 
diff --git a/patches/soc/ti/beagleboard_dtbs/0001-Add-BeagleBoard.org-DTBS-v5.12.x.patch b/patches/soc/ti/beagleboard_dtbs/0001-Add-BeagleBoard.org-DTBS-v5.12.x.patch
index 3e77de024..a1b2d7446 100644
--- a/patches/soc/ti/beagleboard_dtbs/0001-Add-BeagleBoard.org-DTBS-v5.12.x.patch
+++ b/patches/soc/ti/beagleboard_dtbs/0001-Add-BeagleBoard.org-DTBS-v5.12.x.patch
@@ -1,6 +1,6 @@
-From 70f279224356c652d4024010a1c7312094fd393a Mon Sep 17 00:00:00 2001
+From 1b1649f9a498cfb80ef7cda8e0c08edd1f1b9224 Mon Sep 17 00:00:00 2001
 From: Robert Nelson <robertcnelson@gmail.com>
-Date: Thu, 4 Mar 2021 10:44:43 -0600
+Date: Mon, 8 Mar 2021 12:05:04 -0600
 Subject: [PATCH] Add BeagleBoard.org DTBS: v5.12.x
 
 https://github.com/beagleboard/BeagleBoard-DeviceTrees/tree/v5.12.x
diff --git a/patches/wpanusb/0001-merge-wpanusb-https-github.com-statropy-wpanusb.patch b/patches/wpanusb/0001-merge-wpanusb-https-github.com-statropy-wpanusb.patch
index e954f2bb2..680f89877 100644
--- a/patches/wpanusb/0001-merge-wpanusb-https-github.com-statropy-wpanusb.patch
+++ b/patches/wpanusb/0001-merge-wpanusb-https-github.com-statropy-wpanusb.patch
@@ -1,6 +1,6 @@
-From ff9110136d3bcdcff32055e24bd85679a97152c2 Mon Sep 17 00:00:00 2001
+From b1b70b85665c47c8402a7769556352e064c8ef19 Mon Sep 17 00:00:00 2001
 From: Robert Nelson <robertcnelson@gmail.com>
-Date: Thu, 4 Mar 2021 10:43:24 -0600
+Date: Mon, 8 Mar 2021 12:04:46 -0600
 Subject: [PATCH] merge: wpanusb: https://github.com/statropy/wpanusb
 
 https://github.com/statropy/wpanusb/commit/7ba5f3d24d95f804e80b2d8d28e35b34c15219c2
diff --git a/version.sh b/version.sh
index 9e54057a9..19f6b50f4 100644
--- a/version.sh
+++ b/version.sh
@@ -42,10 +42,10 @@ toolchain="gcc_arm_gnueabihf_10"
 
 #Kernel
 KERNEL_REL=5.12
-KERNEL_TAG=${KERNEL_REL}-rc1
+KERNEL_TAG=${KERNEL_REL}-rc2
 kernel_rt=".x-rty"
 #Kernel Build
-BUILD=${build_prefix}1
+BUILD=${build_prefix}1.1
 
 #v5.X-rcX + upto SHA
 #prev_KERNEL_SHA=""
-- 
GitLab