From 7554a8bb6ddeeca87fa8abd1d9766111477a6643 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sun, 10 Sep 2023 17:29:39 -0400
Subject: [PATCH] bcachefs: Ensure buffered writes write as much as they can

This adds a new helper, bch2_folio_reservation_get_partial(), which
reserves as many blocks as possible and may return partial success.

__bch2_buffered_write() is switched to the new helper - this fixes
fstests generic/275, the write until -ENOSPC test.

generic/230 now fails: this appears to be a test bug, where xfs_io isn't
looping after a partial write to get the error code.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/fs-io-buffered.c  | 32 ++++++++++++++++--------------
 fs/bcachefs/fs-io-pagecache.c | 37 +++++++++++++++++++++++++++--------
 fs/bcachefs/fs-io-pagecache.h |  7 ++++++-
 3 files changed, 52 insertions(+), 24 deletions(-)

diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
index 1355d618f9883..cc33d763f7221 100644
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -863,24 +863,26 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
 	f_pos = pos;
 	f_offset = pos - folio_pos(darray_first(fs));
 	darray_for_each(fs, fi) {
+		ssize_t f_reserved;
+
 		f = *fi;
 		f_len = min(end, folio_end_pos(f)) - f_pos;
+		f_reserved = bch2_folio_reservation_get_partial(c, inode, f, &res, f_offset, f_len);
+
+		if (unlikely(f_reserved != f_len)) {
+			if (f_reserved < 0) {
+				if (f == darray_first(fs)) {
+					ret = f_reserved;
+					goto out;
+				}
+
+				folios_trunc(&fs, fi);
+				end = min(end, folio_end_pos(darray_last(fs)));
+			} else {
+				folios_trunc(&fs, fi + 1);
+				end = f_pos + f_reserved;
+			}
 
-		/*
-		 * XXX: per POSIX and fstests generic/275, on -ENOSPC we're
-		 * supposed to write as much as we have disk space for.
-		 *
-		 * On failure here we should still write out a partial page if
-		 * we aren't completely out of disk space - we don't do that
-		 * yet:
-		 */
-		ret = bch2_folio_reservation_get(c, inode, f, &res, f_offset, f_len);
-		if (unlikely(ret)) {
-			folios_trunc(&fs, fi);
-			if (!fs.nr)
-				goto out;
-
-			end = min(end, folio_end_pos(darray_last(fs)));
 			break;
 		}
 
diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c
index 872283e5bd1e2..a9cc5cad9cc99 100644
--- a/fs/bcachefs/fs-io-pagecache.c
+++ b/fs/bcachefs/fs-io-pagecache.c
@@ -423,7 +423,7 @@ int bch2_folio_reservation_get(struct bch_fs *c,
 			struct bch_inode_info *inode,
 			struct folio *folio,
 			struct bch2_folio_reservation *res,
-			unsigned offset, unsigned len)
+			size_t offset, size_t len)
 {
 	struct bch_folio *s = bch2_folio_create(folio, 0);
 	unsigned i, disk_sectors = 0, quota_sectors = 0;
@@ -437,8 +437,7 @@ int bch2_folio_reservation_get(struct bch_fs *c,
 	for (i = round_down(offset, block_bytes(c)) >> 9;
 	     i < round_up(offset + len, block_bytes(c)) >> 9;
 	     i++) {
-		disk_sectors += sectors_to_reserve(&s->s[i],
-						res->disk.nr_replicas);
+		disk_sectors += sectors_to_reserve(&s->s[i], res->disk.nr_replicas);
 		quota_sectors += s->s[i].state == SECTOR_unallocated;
 	}
 
@@ -449,12 +448,9 @@ int bch2_folio_reservation_get(struct bch_fs *c,
 	}
 
 	if (quota_sectors) {
-		ret = bch2_quota_reservation_add(c, inode, &res->quota,
-						 quota_sectors, true);
+		ret = bch2_quota_reservation_add(c, inode, &res->quota, quota_sectors, true);
 		if (unlikely(ret)) {
-			struct disk_reservation tmp = {
-				.sectors = disk_sectors
-			};
+			struct disk_reservation tmp = { .sectors = disk_sectors };
 
 			bch2_disk_reservation_put(c, &tmp);
 			res->disk.sectors -= disk_sectors;
@@ -465,6 +461,31 @@ int bch2_folio_reservation_get(struct bch_fs *c,
 	return 0;
 }
 
+ssize_t bch2_folio_reservation_get_partial(struct bch_fs *c,
+			struct bch_inode_info *inode,
+			struct folio *folio,
+			struct bch2_folio_reservation *res,
+			size_t offset, size_t len)
+{
+	size_t l, reserved = 0;
+	int ret;
+
+	while ((l = len - reserved)) {
+		while ((ret = bch2_folio_reservation_get(c, inode, folio, res, offset, l))) {
+			if ((offset & (block_bytes(c) - 1)) + l <= block_bytes(c))
+				return reserved ?: ret;
+
+			len = reserved + l;
+			l /= 2;
+		}
+
+		offset += l;
+		reserved += l;
+	}
+
+	return reserved;
+}
+
 static void bch2_clear_folio_bits(struct folio *folio)
 {
 	struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
diff --git a/fs/bcachefs/fs-io-pagecache.h b/fs/bcachefs/fs-io-pagecache.h
index 828c3d7c8f199..fd7d692c087e1 100644
--- a/fs/bcachefs/fs-io-pagecache.h
+++ b/fs/bcachefs/fs-io-pagecache.h
@@ -153,7 +153,12 @@ int bch2_folio_reservation_get(struct bch_fs *,
 			struct bch_inode_info *,
 			struct folio *,
 			struct bch2_folio_reservation *,
-			unsigned, unsigned);
+			size_t, size_t);
+ssize_t bch2_folio_reservation_get_partial(struct bch_fs *,
+			struct bch_inode_info *,
+			struct folio *,
+			struct bch2_folio_reservation *,
+			size_t, size_t);
 
 void bch2_set_folio_dirty(struct bch_fs *,
 			  struct bch_inode_info *,
-- 
GitLab