From 2b09e513c5854532b994fc21abdb3b34d6d0d2fe Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Sun, 11 Jan 2026 15:37:01 +0800 Subject: [PATCH 1/2] fuse: invalidate the page cache after direct write This fixes xfstests generic/451 (for both O_DIRECT and FOPEN_DIRECT_IO direct write). Commit b359af8275a9 ("fuse: Invalidate the page cache after FOPEN_DIRECT_IO write") tries to fix the similar issue for FOPEN_DIRECT_IO write, which can be reproduced by xfstests generic/209. It only fixes the issue for synchronous direct write, while omitting the case for asynchronous direct write (exactly targeted by generic/451). While for O_DIRECT direct write, it's somewhat more complicated. For synchronous direct write, generic_file_direct_write() will invalidate the page cache after the write, and thus it can pass generic/209. While for asynchronous direct write, the invalidation in generic_file_direct_write() is bypassed since the invalidation shall be done when the asynchronous IO completes. This is omitted in FUSE and generic/451 fails whereby. Fix this by conveying the invalidation for both synchronous and asynchronous write. - with FOPEN_DIRECT_IO - sync write, invalidate in fuse_send_write() - async write, invalidate in fuse_aio_complete() with FUSE_ASYNC_DIO, fuse_send_write() otherwise - without FOPEN_DIRECT_IO - sync write, invalidate in generic_file_direct_write() - async write, invalidate in fuse_aio_complete() with FUSE_ASYNC_DIO, generic_file_direct_write() otherwise Reviewed-by: Bernd Schubert Signed-off-by: Jingbo Xu (cherry picked from commit f6de786cb23689f0ee235a2dc75a991d30a90198) --- fs/fuse/file.c | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 0f1d735cdcea08..cc9a1bbecf4a18 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -818,6 +818,18 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) struct inode *inode = file_inode(io->iocb->ki_filp); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); + struct address_space *mapping = io->iocb->ki_filp->f_mapping; + + /* + * As in generic_file_direct_write(), invalidate after the + * write, to invalidate read-ahead cache that may have competed + * with the write. + */ + if (io->write && res && mapping->nrpages) { + invalidate_inode_pages2_range(mapping, + io->offset >> PAGE_SHIFT, + (io->offset + res - 1) >> PAGE_SHIFT); + } spin_lock(&fi->lock); fi->attr_version = atomic64_inc_return(&fc->attr_version); @@ -1199,9 +1211,11 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos, { struct kiocb *iocb = ia->io->iocb; struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; struct fuse_file *ff = file->private_data; struct fuse_mount *fm = ff->fm; struct fuse_write_in *inarg = &ia->write.in; + ssize_t written; ssize_t err; fuse_write_args_fill(ia, ff, pos, count); @@ -1215,10 +1229,26 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos, return fuse_async_req_send(fm, ia, count); err = fuse_simple_request(fm, &ia->ap.args); - if (!err && ia->write.out.size > count) + written = ia->write.out.size; + if (!err && written > count) err = -EIO; - return err ?: ia->write.out.size; + /* + * Without FOPEN_DIRECT_IO, generic_file_direct_write() does the + * invalidation for us. + */ + if (!err && written && mapping->nrpages && + (ff->open_flags & FOPEN_DIRECT_IO)) { + /* + * As in generic_file_direct_write(), invalidate after the + * write, to invalidate read-ahead cache that may have competed + * with the write. + */ + invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, + (pos + written - 1) >> PAGE_SHIFT); + } + + return err ?: written; } bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written) @@ -1766,15 +1796,6 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, if (res > 0) *ppos = pos; - if (res > 0 && write && fopen_direct_io) { - /* - * As in generic_file_direct_write(), invalidate after the - * write, to invalidate read-ahead cache that may have competed - * with the write. - */ - invalidate_inode_pages2_range(mapping, idx_from, idx_to); - } - return res > 0 ? res : err; } EXPORT_SYMBOL_GPL(fuse_direct_io); From 2654dac9f0f709d5909df2e3f179e2310ecfd74f Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Wed, 11 Feb 2026 16:38:21 +0100 Subject: [PATCH 2/2] fuse: {io-uring} Prefer the current core over mapping Mapping might point to a totally different core due to random assignment. For performance using the current core might be beneficial Example (with core binding) unpatched WRITE: bw=841MiB/s patched WRITE: bw=1363MiB/s With fio --name=test --ioengine=psync --direct=1 \ --rw=write --bs=1M --iodepth=1 --numjobs=1 \ --filename_format=/redfs/testfile.\$jobnum --size=100G \ --thread --create_on_open=1 --runtime=30s --cpus_allowed=1 In order to get the good number `--cpus_allowed=1` is needed. This could be improved by a future change that avoids cpu migration in fuse_request_end() on wake_up() call. (cherry picked from commit 32e0073d67cfc7bd602dc7675ae71fa825b04362) --- fs/fuse/dev_uring.c | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index e4959875658422..d49e1838b326de 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -23,8 +23,12 @@ MODULE_PARM_DESC(enable_uring, #define FUSE_RING_HEADER_PG 0 #define FUSE_RING_PAYLOAD_PG 1 +/* Threshold that determines if a better queue should be searched for */ #define FUSE_URING_Q_THRESHOLD 2 +/* Number of (re)tries to find a better queue */ +#define FUSE_URING_Q_TRIES 3 + /* redfs only to allow patch backports */ #define IO_URING_F_TASK_DEAD (1 << 13) @@ -1557,7 +1561,7 @@ static struct fuse_ring_queue *fuse_uring_select_queue(struct fuse_ring *ring, bool background) { unsigned int qid; - int node, retries = 0; + int node, tries = 0; unsigned int nr_queues; unsigned int cpu = task_cpu(current); struct fuse_ring_queue *queue, *primary_queue = NULL; @@ -1582,26 +1586,36 @@ static struct fuse_ring_queue *fuse_uring_select_queue(struct fuse_ring *ring, nr_queues = READ_ONCE(ring->numa_q_map[node].nr_queues); if (nr_queues) { + /* prefer the queue that corresponds to the current cpu */ + queue = READ_ONCE(ring->queues[cpu]); + if (queue) { + if (queue->nr_reqs <= FUSE_URING_Q_THRESHOLD) + return queue; + primary_queue = queue; + } + qid = ring->numa_q_map[node].cpu_to_qid[cpu]; if (WARN_ON_ONCE(qid >= ring->max_nr_queues)) return NULL; - queue = READ_ONCE(ring->queues[qid]); + if (qid != cpu) { + queue = READ_ONCE(ring->queues[qid]); - /* Might happen on teardown */ - if (unlikely(!queue)) - return NULL; + /* Might happen on teardown */ + if (unlikely(!queue)) + return NULL; - if (queue->nr_reqs < FUSE_URING_Q_THRESHOLD) - return queue; + if (queue->nr_reqs <= FUSE_URING_Q_THRESHOLD) + return queue; + } /* Retries help for load balancing */ - if (retries < FUSE_URING_Q_THRESHOLD) { - if (!retries) + if (tries < FUSE_URING_Q_TRIES && tries + 1 < nr_queues) { + if (!primary_queue) primary_queue = queue; - /* Increase cpu, assuming it will map to a differet qid*/ + /* Increase cpu, assuming it will map to a different qid*/ cpu++; - retries++; + tries++; goto retry; } } @@ -1612,9 +1626,10 @@ static struct fuse_ring_queue *fuse_uring_select_queue(struct fuse_ring *ring, /* global registered queue bitmap */ qid = ring->q_map.cpu_to_qid[cpu]; - if (WARN_ON_ONCE(qid >= ring->max_nr_queues)) - /* Might happen on teardown */ + if (WARN_ON_ONCE(qid >= ring->max_nr_queues)) { + /* Might happen on teardown */ return NULL; + } return READ_ONCE(ring->queues[qid]); }