diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 339883729065a0f6fa73623dbec3865d6c0f8f70..8cc918acbabccaea452e37882d62e396bab09982 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -18,29 +18,26 @@
 
 /* Handles exceptions in both to and from, but doesn't do access_ok */
 __must_check unsigned long
-copy_user_fast_string(void *to, const void *from, unsigned len);
-__must_check unsigned long
 copy_user_generic_unrolled(void *to, const void *from, unsigned len);
 
 static __always_inline __must_check unsigned long
-copy_user_generic(void *to, const void *from, unsigned len)
+copy_user_generic(void *to, const void *from, unsigned long len)
 {
-	unsigned ret;
-
 	stac();
 	/*
 	 * If CPU has FSRM feature, use 'rep movs'.
 	 * Otherwise, use copy_user_generic_unrolled.
 	 */
-	alternative_call(copy_user_generic_unrolled,
-			 copy_user_fast_string,
-			 X86_FEATURE_FSRM,
-			 ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
-				     "=d" (len)),
-			 "1" (to), "2" (from), "3" (len)
-			 : "memory", "rcx", "r8", "r9", "r10", "r11");
+	asm volatile(
+		"1:\n\t"
+		ALTERNATIVE("rep movsb",
+			    "call copy_user_generic_unrolled", ALT_NOT(X86_FEATURE_FSRM))
+		"2:\n"
+		_ASM_EXTABLE_UA(1b, 2b)
+		:"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT
+		: : "memory", "rax", "rdx", "r8", "r9", "r10", "r11");
 	clac();
-	return ret;
+	return len;
 }
 
 static __always_inline __must_check unsigned long
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 818f2f728294e6ec4dff72bea1524f7503447e67..16a743f11b11bb65efde41822411d8a09a919253 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -45,13 +45,29 @@
  * Input:
  * rdi destination
  * rsi source
- * rdx count
+ * rcx count
  *
  * Output:
- * eax uncopied bytes or 0 if successful.
+ * rcx uncopied bytes or 0 if successful.
+ *
+ * NOTE! The calling convention is very intentionally the same as
+ * for 'rep movs', so that we can rewrite the function call with
+ * just a plain 'rep movs' on machines that have FSRM.
+ *
+ * HOWEVER! This function ends up having a lot of the code common
+ * with __copy_user_nocache(), which is a normal C function, and
+ * has a similar calling convention, but gets the 'count' in %rdx,
+ * and returns the result in %rax.
+ *
+ * To share as much code as possible, we end up returning the
+ * result in *both* %rcx/%rax, and we also move the initial count
+ * into %rdx.
+ *
+ * We can clobber rdx/rsi/rdi and r8-r11
  */
 SYM_FUNC_START(copy_user_generic_unrolled)
-	cmpl $8,%edx
+	movl %ecx,%edx
+	cmpl $8,%ecx
 	jb .Lcopy_user_short_string_bytes
 	ALIGN_DESTINATION
 	movl %edx,%ecx
@@ -103,37 +119,6 @@ SYM_FUNC_START(copy_user_generic_unrolled)
 SYM_FUNC_END(copy_user_generic_unrolled)
 EXPORT_SYMBOL(copy_user_generic_unrolled)
 
-/*
- * Some CPUs support FSRM for Fast Short REP MOVS.
- *
- * Only 4GB of copy is supported. This shouldn't be a problem
- * because the kernel normally only writes from/to page sized chunks
- * even if user space passed a longer buffer.
- * And more would be dangerous because both Intel and AMD have
- * errata with rep movsq > 4GB. If someone feels the need to fix
- * this please consider this.
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count
- *
- * Output:
- * eax uncopied bytes or 0 if successful.
- */
-SYM_FUNC_START(copy_user_fast_string)
-	movl %edx,%ecx
-1:	rep movsb
-	xorl %eax,%eax
-	RET
-
-12:	movl %ecx,%eax		/* ecx is zerorest also */
-	RET
-
-	_ASM_EXTABLE_CPY(1b, 12b)
-SYM_FUNC_END(copy_user_fast_string)
-EXPORT_SYMBOL(copy_user_fast_string)
-
 /*
  * Try to copy last bytes and clear the rest if needed.
  * Since protection fault in copy_from/to_user is not a normal situation,
@@ -160,6 +145,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
 
 3:
 	movl %edx,%eax
+	movl %edx,%ecx
 	RET
 
 	_ASM_EXTABLE_CPY(1b, 2b)
@@ -203,6 +189,7 @@ SYM_CODE_START_LOCAL(copy_user_short_string)
 	decl %ecx
 	jnz 21b
 23:	xor %eax,%eax
+	xor %ecx,%ecx
 	RET
 
 40:	leal (%rdx,%rcx,8),%edx
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index ea62a4e03f1e0d5e4cc08aaf5d037ad237aaa9b1..44817bbe48fe057e2d17a68d0527017c94285cb2 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1286,7 +1286,6 @@ static const char *uaccess_safe_builtin[] = {
 	"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
 	"clear_user_original",
 	"copy_user_generic_unrolled",
-	"copy_user_fast_string",
 	"__copy_user_nocache",
 	NULL
 };