diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index 1bd12570c583f0..ec91a33627d7f8 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -22,8 +22,12 @@ MODULE_PARM_DESC(enable_uring, #define FUSE_RING_HEADER_PG 0 #define FUSE_RING_PAYLOAD_PG 1 +/* Threshold that determines if a better queue should be searched for */ #define FUSE_URING_Q_THRESHOLD 2 +/* Number of (re)tries to find a better queue */ +#define FUSE_URING_Q_TRIES 3 + /* redfs only to allow patch backports */ #define IO_URING_F_TASK_DEAD (1 << 13) @@ -1462,7 +1466,7 @@ static struct fuse_ring_queue *fuse_uring_select_queue(struct fuse_ring *ring, bool background) { unsigned int qid; - int node, retries = 0; + int node, tries = 0; unsigned int nr_queues; unsigned int cpu = task_cpu(current); struct fuse_ring_queue *queue, *primary_queue = NULL; @@ -1487,26 +1491,36 @@ static struct fuse_ring_queue *fuse_uring_select_queue(struct fuse_ring *ring, nr_queues = READ_ONCE(ring->numa_q_map[node].nr_queues); if (nr_queues) { + /* prefer the queue that corresponds to the current cpu */ + queue = READ_ONCE(ring->queues[cpu]); + if (queue) { + if (queue->nr_reqs <= FUSE_URING_Q_THRESHOLD) + return queue; + primary_queue = queue; + } + qid = ring->numa_q_map[node].cpu_to_qid[cpu]; if (WARN_ON_ONCE(qid >= ring->max_nr_queues)) return NULL; - queue = READ_ONCE(ring->queues[qid]); + if (qid != cpu) { + queue = READ_ONCE(ring->queues[qid]); - /* Might happen on teardown */ - if (unlikely(!queue)) - return NULL; + /* Might happen on teardown */ + if (unlikely(!queue)) + return NULL; - if (queue->nr_reqs < FUSE_URING_Q_THRESHOLD) - return queue; + if (queue->nr_reqs <= FUSE_URING_Q_THRESHOLD) + return queue; + } /* Retries help for load balancing */ - if (retries < FUSE_URING_Q_THRESHOLD) { - if (!retries) + if (tries < FUSE_URING_Q_TRIES && tries + 1 < nr_queues) { + if (!primary_queue) primary_queue = queue; - /* Increase cpu, assuming it will map to a differet qid*/ + /* Increase cpu, assuming it will map to a different qid*/ cpu++; - retries++; + tries++; goto retry; } } @@ -1517,9 +1531,10 @@ static struct fuse_ring_queue *fuse_uring_select_queue(struct fuse_ring *ring, /* global registered queue bitmap */ qid = ring->q_map.cpu_to_qid[cpu]; - if (WARN_ON_ONCE(qid >= ring->max_nr_queues)) - /* Might happen on teardown */ + if (WARN_ON_ONCE(qid >= ring->max_nr_queues)) { + /* Might happen on teardown */ return NULL; + } return READ_ONCE(ring->queues[qid]); } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2c698901210168..feb6c63cc06171 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -751,6 +751,18 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) struct inode *inode = file_inode(io->iocb->ki_filp); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); + struct address_space *mapping = io->iocb->ki_filp->f_mapping; + + /* + * As in generic_file_direct_write(), invalidate after the + * write, to invalidate read-ahead cache that may have competed + * with the write. + */ + if (io->write && res && mapping->nrpages) { + invalidate_inode_pages2_range(mapping, + io->offset >> PAGE_SHIFT, + (io->offset + res - 1) >> PAGE_SHIFT); + } spin_lock(&fi->lock); fi->attr_version = atomic64_inc_return(&fc->attr_version); @@ -1125,9 +1137,11 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos, { struct kiocb *iocb = ia->io->iocb; struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; struct fuse_file *ff = file->private_data; struct fuse_mount *fm = ff->fm; struct fuse_write_in *inarg = &ia->write.in; + ssize_t written; ssize_t err; fuse_write_args_fill(ia, ff, pos, count); @@ -1141,10 +1155,26 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos, return fuse_async_req_send(fm, ia, count); err = fuse_simple_request(fm, &ia->ap.args); - if (!err && ia->write.out.size > count) + written = ia->write.out.size; + if (!err && written > count) err = -EIO; - return err ?: ia->write.out.size; + /* + * Without FOPEN_DIRECT_IO, generic_file_direct_write() does the + * invalidation for us. + */ + if (!err && written && mapping->nrpages && + (ff->open_flags & FOPEN_DIRECT_IO)) { + /* + * As in generic_file_direct_write(), invalidate after the + * write, to invalidate read-ahead cache that may have competed + * with the write. + */ + invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, + (pos + written - 1) >> PAGE_SHIFT); + } + + return err ?: written; } bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written) @@ -1668,15 +1698,6 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, if (res > 0) *ppos = pos; - if (res > 0 && write && fopen_direct_io) { - /* - * As in generic_file_direct_write(), invalidate after the - * write, to invalidate read-ahead cache that may have competed - * with the write. - */ - invalidate_inode_pages2_range(mapping, idx_from, idx_to); - } - return res > 0 ? res : err; } EXPORT_SYMBOL_GPL(fuse_direct_io); @@ -2028,21 +2049,6 @@ static int fuse_writepage_locked(struct page *page) return error; } -static int fuse_writepage(struct page *page, struct writeback_control *wbc) -{ - struct fuse_conn *fc = get_fuse_conn(page->mapping->host); - int err; - - if (wbc->sync_mode == WB_SYNC_NONE && - fc->num_background >= fc->congestion_threshold) - return AOP_WRITEPAGE_ACTIVATE; - - err = fuse_writepage_locked(page); - unlock_page(page); - - return err; -} - struct fuse_fill_wb_data { struct fuse_writepage_args *wpa; struct fuse_file *ff; @@ -2091,8 +2097,9 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data) static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page, - struct fuse_args_pages *ap, - struct fuse_fill_wb_data *data) + struct fuse_args_pages *ap, + struct fuse_fill_wb_data *data, + struct writeback_control *wbc) { WARN_ON(!ap->num_pages); @@ -2112,6 +2119,17 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page, if (ap->num_pages == data->max_pages && !fuse_pages_realloc(data)) return true; + /* Reached alignment */ + if (fc->alignment_pages && !(page->index % fc->alignment_pages)) { + /* we are at a point where we would write aligned + * check if we potentially could reach the next alignment */ + if (page->index + fc->alignment_pages > wbc->range_end) + return true; + + if (ap->num_pages + fc->alignment_pages > fc->max_pages) + return true; + } + return false; } @@ -2133,7 +2151,7 @@ static int fuse_writepages_fill(struct folio *folio, goto out_unlock; } - if (wpa && fuse_writepage_need_send(fc, &folio->page, ap, data)) { + if (wpa && fuse_writepage_need_send(fc, &folio->page, ap, data, wbc)) { fuse_writepages_send(data); data->wpa = NULL; } @@ -3176,6 +3194,16 @@ static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off, return ret; } +#ifdef CONFIG_MIGRATION +int fuse_migrate_folio(struct address_space *mapping, struct folio *dst, + struct folio *src, enum migrate_mode mode) { + + if (folio_test_writeback(src)) + return -EBUSY; + return filemap_migrate_folio(mapping, dst, src, mode); +} +#endif + static const struct file_operations fuse_file_operations = { .llseek = fuse_file_llseek, .read_iter = fuse_file_read_iter, @@ -3200,10 +3228,10 @@ static const struct file_operations fuse_file_operations = { static const struct address_space_operations fuse_file_aops = { .read_folio = fuse_read_folio, .readahead = fuse_readahead, - .writepage = fuse_writepage, .writepages = fuse_writepages, .launder_folio = fuse_launder_folio, .dirty_folio = filemap_dirty_folio, + .migrate_folio = fuse_migrate_folio, .bmap = fuse_bmap, .direct_IO = fuse_direct_IO, .write_begin = fuse_write_begin, diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 6f464de375bafd..167f0439d9ab96 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -931,6 +931,10 @@ struct fuse_conn { /** uring connection information*/ struct fuse_ring *ring; #endif + + /* The foffset alignment in PAGE */ + unsigned int alignment_pages; + }; /* @@ -1473,6 +1477,12 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid, bool isdir); void fuse_file_release(struct inode *inode, struct fuse_file *ff, unsigned int open_flags, fl_owner_t id, bool isdir); +#ifdef CONFIG_MIGRATION +int fuse_migrate_folio(struct address_space *mapping, struct folio *dst, + struct folio *src, enum migrate_mode mode); +#else +#define fuse_migrate_folio NULL +#endif #ifdef CONFIG_MIGRATION int fuse_migrate_folio(struct address_space *mapping, struct folio *dst, diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e23f6e56f80381..f33818a9a1720c 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1423,6 +1423,14 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fc->direct_io_allow_mmap = 1; if (flags & FUSE_OVER_IO_URING && fuse_uring_enabled()) fc->io_uring = 1; + + if (flags & FUSE_ALIGN_PG_ORDER) { + if (arg->align_page_order > 0) { + fc->alignment_pages = + (1UL << arg->align_page_order) + >> PAGE_SHIFT; + } + } if (flags & FUSE_NO_EXPORT_SUPPORT) fm->sb->s_export_op = &fuse_export_fid_operations; if (flags & FUSE_INVAL_INODE_ENTRY) diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 8f6cb98b04d99a..7437c66396d2a2 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -423,6 +423,8 @@ struct fuse_file_lock { * FUSE_OVER_IO_URING: Indicate that client supports io-uring * FUSE_INVAL_INODE_ENTRY: invalidate inode aliases when doing inode invalidation * FUSE_EXPIRE_INODE_ENTRY: expire inode aliases when doing inode invalidation + * FUSE_ALIGN_PG_ORDER: page order (power of 2 exponent for number of pages) for + * optimal io-size alignment * FUSE_URING_REDUCED_Q: Client (kernel) supports less queues - Server is free * to register between 1 and nr-core io-uring queues */ @@ -897,6 +899,9 @@ struct fuse_init_in { #define FUSE_COMPAT_INIT_OUT_SIZE 8 #define FUSE_COMPAT_22_INIT_OUT_SIZE 24 +/* + * align_page_order: Number of pages for optimal IO, or a multiple of that + */ struct fuse_init_out { uint32_t major; uint32_t minor; @@ -909,7 +914,10 @@ struct fuse_init_out { uint16_t max_pages; uint16_t map_alignment; uint32_t flags2; - uint32_t unused[7]; + uint32_t max_stack_depth; + uint8_t align_page_order; + uint8_t padding[3]; + uint32_t unused[5]; }; #define CUSE_INIT_INFO_MAX 4096