diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index bf5e30fd7a0307..ec91a33627d7f8 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -22,6 +22,12 @@ MODULE_PARM_DESC(enable_uring, #define FUSE_RING_HEADER_PG 0 #define FUSE_RING_PAYLOAD_PG 1 +/* Threshold that determines if a better queue should be searched for */ +#define FUSE_URING_Q_THRESHOLD 2 + +/* Number of (re)tries to find a better queue */ +#define FUSE_URING_Q_TRIES 3 + /* redfs only to allow patch backports */ #define IO_URING_F_TASK_DEAD (1 << 13) @@ -1456,13 +1462,23 @@ static void fuse_uring_send_in_task(struct io_uring_cmd *cmd, fuse_uring_send(ent, cmd, err, issue_flags); } -static struct fuse_ring_queue *fuse_uring_select_queue(struct fuse_ring *ring) +static struct fuse_ring_queue *fuse_uring_select_queue(struct fuse_ring *ring, + bool background) { unsigned int qid; - int node; + int node, tries = 0; unsigned int nr_queues; unsigned int cpu = task_cpu(current); + struct fuse_ring_queue *queue, *primary_queue = NULL; + /* + * Background requests result in better performance on a different + * CPU, unless CPUs are already busy. + */ + if (background) + cpu++; + +retry: cpu = cpu % ring->max_nr_queues; /* numa local registered queue bitmap */ @@ -1475,16 +1491,50 @@ static struct fuse_ring_queue *fuse_uring_select_queue(struct fuse_ring *ring) nr_queues = READ_ONCE(ring->numa_q_map[node].nr_queues); if (nr_queues) { + /* prefer the queue that corresponds to the current cpu */ + queue = READ_ONCE(ring->queues[cpu]); + if (queue) { + if (queue->nr_reqs <= FUSE_URING_Q_THRESHOLD) + return queue; + primary_queue = queue; + } + qid = ring->numa_q_map[node].cpu_to_qid[cpu]; if (WARN_ON_ONCE(qid >= ring->max_nr_queues)) return NULL; - return READ_ONCE(ring->queues[qid]); + if (qid != cpu) { + queue = READ_ONCE(ring->queues[qid]); + + /* Might happen on teardown */ + if (unlikely(!queue)) + return NULL; + + if (queue->nr_reqs <= FUSE_URING_Q_THRESHOLD) + return queue; + } + + /* Retries help for load balancing */ + if (tries < FUSE_URING_Q_TRIES && tries + 1 < nr_queues) { + if (!primary_queue) + primary_queue = queue; + + /* Increase cpu, assuming it will map to a different qid*/ + cpu++; + tries++; + goto retry; + } } + /* Retries exceeded, take the primary target queue */ + if (primary_queue) + return primary_queue; + /* global registered queue bitmap */ qid = ring->q_map.cpu_to_qid[cpu]; - if (WARN_ON_ONCE(qid >= ring->max_nr_queues)) + if (WARN_ON_ONCE(qid >= ring->max_nr_queues)) { + /* Might happen on teardown */ return NULL; + } return READ_ONCE(ring->queues[qid]); } @@ -1525,7 +1575,7 @@ void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req) int err; err = -EINVAL; - queue = fuse_uring_select_queue(ring); + queue = fuse_uring_select_queue(ring, false); if (!queue) goto err; @@ -1567,7 +1617,7 @@ bool fuse_uring_queue_bq_req(struct fuse_req *req) struct fuse_ring_queue *queue; struct fuse_ring_ent *ent = NULL; - queue = fuse_uring_select_queue(ring); + queue = fuse_uring_select_queue(ring, true); if (!queue) return false; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d3ddc5dd0d61fb..9ce6e8328ecb1c 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -758,6 +758,18 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) struct inode *inode = file_inode(io->iocb->ki_filp); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); + struct address_space *mapping = io->iocb->ki_filp->f_mapping; + + /* + * As in generic_file_direct_write(), invalidate after the + * write, to invalidate read-ahead cache that may have competed + * with the write. + */ + if (io->write && res && mapping->nrpages) { + invalidate_inode_pages2_range(mapping, + io->offset >> PAGE_SHIFT, + (io->offset + res - 1) >> PAGE_SHIFT); + } spin_lock(&fi->lock); fi->attr_version = atomic64_inc_return(&fc->attr_version); @@ -1132,9 +1144,11 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos, { struct kiocb *iocb = ia->io->iocb; struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; struct fuse_file *ff = file->private_data; struct fuse_mount *fm = ff->fm; struct fuse_write_in *inarg = &ia->write.in; + ssize_t written; ssize_t err; fuse_write_args_fill(ia, ff, pos, count); @@ -1148,10 +1162,26 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos, return fuse_async_req_send(fm, ia, count); err = fuse_simple_request(fm, &ia->ap.args); - if (!err && ia->write.out.size > count) + written = ia->write.out.size; + if (!err && written > count) err = -EIO; - return err ?: ia->write.out.size; + /* + * Without FOPEN_DIRECT_IO, generic_file_direct_write() does the + * invalidation for us. + */ + if (!err && written && mapping->nrpages && + (ff->open_flags & FOPEN_DIRECT_IO)) { + /* + * As in generic_file_direct_write(), invalidate after the + * write, to invalidate read-ahead cache that may have competed + * with the write. + */ + invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, + (pos + written - 1) >> PAGE_SHIFT); + } + + return err ?: written; } bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written) @@ -1676,15 +1706,6 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, if (res > 0) *ppos = pos; - if (res > 0 && write && fopen_direct_io) { - /* - * As in generic_file_direct_write(), invalidate after the - * write, to invalidate read-ahead cache that may have competed - * with the write. - */ - invalidate_inode_pages2_range(mapping, idx_from, idx_to); - } - return res > 0 ? res : err; } EXPORT_SYMBOL_GPL(fuse_direct_io); @@ -2036,21 +2057,6 @@ static int fuse_writepage_locked(struct page *page) return error; } -static int fuse_writepage(struct page *page, struct writeback_control *wbc) -{ - struct fuse_conn *fc = get_fuse_conn(page->mapping->host); - int err; - - if (wbc->sync_mode == WB_SYNC_NONE && - fc->num_background >= fc->congestion_threshold) - return AOP_WRITEPAGE_ACTIVATE; - - err = fuse_writepage_locked(page); - unlock_page(page); - - return err; -} - struct fuse_fill_wb_data { struct fuse_writepage_args *wpa; struct fuse_file *ff; @@ -2099,8 +2105,9 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data) static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page, - struct fuse_args_pages *ap, - struct fuse_fill_wb_data *data) + struct fuse_args_pages *ap, + struct fuse_fill_wb_data *data, + struct writeback_control *wbc) { WARN_ON(!ap->num_pages); @@ -2120,6 +2127,17 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page, if (ap->num_pages == data->max_pages && !fuse_pages_realloc(data)) return true; + /* Reached alignment */ + if (fc->alignment_pages && !(page->index % fc->alignment_pages)) { + /* we are at a point where we would write aligned + * check if we potentially could reach the next alignment */ + if (page->index + fc->alignment_pages > wbc->range_end) + return true; + + if (ap->num_pages + fc->alignment_pages > fc->max_pages) + return true; + } + return false; } @@ -2141,7 +2159,7 @@ static int fuse_writepages_fill(struct folio *folio, goto out_unlock; } - if (wpa && fuse_writepage_need_send(fc, &folio->page, ap, data)) { + if (wpa && fuse_writepage_need_send(fc, &folio->page, ap, data, wbc)) { fuse_writepages_send(data); data->wpa = NULL; } @@ -3184,6 +3202,16 @@ static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off, return ret; } +#ifdef CONFIG_MIGRATION +int fuse_migrate_folio(struct address_space *mapping, struct folio *dst, + struct folio *src, enum migrate_mode mode) { + + if (folio_test_writeback(src)) + return -EBUSY; + return filemap_migrate_folio(mapping, dst, src, mode); +} +#endif + static const struct file_operations fuse_file_operations = { .llseek = fuse_file_llseek, .read_iter = fuse_file_read_iter, @@ -3208,10 +3236,10 @@ static const struct file_operations fuse_file_operations = { static const struct address_space_operations fuse_file_aops = { .read_folio = fuse_read_folio, .readahead = fuse_readahead, - .writepage = fuse_writepage, .writepages = fuse_writepages, .launder_folio = fuse_launder_folio, .dirty_folio = filemap_dirty_folio, + .migrate_folio = fuse_migrate_folio, .bmap = fuse_bmap, .direct_IO = fuse_direct_IO, .write_begin = fuse_write_begin, diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 436e2c05a7a9ad..6fcc7cc03dae4d 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -931,6 +931,10 @@ struct fuse_conn { /** uring connection information*/ struct fuse_ring *ring; #endif + + /* The foffset alignment in PAGE */ + unsigned int alignment_pages; + }; /* @@ -1473,6 +1477,12 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid, bool isdir); void fuse_file_release(struct inode *inode, struct fuse_file *ff, unsigned int open_flags, fl_owner_t id, bool isdir); +#ifdef CONFIG_MIGRATION +int fuse_migrate_folio(struct address_space *mapping, struct folio *dst, + struct folio *src, enum migrate_mode mode); +#else +#define fuse_migrate_folio NULL +#endif #ifdef CONFIG_MIGRATION int fuse_migrate_folio(struct address_space *mapping, struct folio *dst, diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 0032cc94ed3aab..b59a5a325657cc 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1429,6 +1429,14 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fc->direct_io_allow_mmap = 1; if (flags & FUSE_OVER_IO_URING && fuse_uring_enabled()) fc->io_uring = 1; + + if (flags & FUSE_ALIGN_PG_ORDER) { + if (arg->align_page_order > 0) { + fc->alignment_pages = + (1UL << arg->align_page_order) + >> PAGE_SHIFT; + } + } if (flags & FUSE_NO_EXPORT_SUPPORT) fm->sb->s_export_op = &fuse_export_fid_operations; if (flags & FUSE_INVAL_INODE_ENTRY) diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 96a9fc4c395518..52ac7a3d266d46 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -429,6 +429,8 @@ struct fuse_file_lock { * FUSE_OVER_IO_URING: Indicate that client supports io-uring * FUSE_INVAL_INODE_ENTRY: invalidate inode aliases when doing inode invalidation * FUSE_EXPIRE_INODE_ENTRY: expire inode aliases when doing inode invalidation + * FUSE_ALIGN_PG_ORDER: page order (power of 2 exponent for number of pages) for + * optimal io-size alignment * FUSE_URING_REDUCED_Q: Client (kernel) supports less queues - Server is free * to register between 1 and nr-core io-uring queues */ @@ -903,6 +905,9 @@ struct fuse_init_in { #define FUSE_COMPAT_INIT_OUT_SIZE 8 #define FUSE_COMPAT_22_INIT_OUT_SIZE 24 +/* + * align_page_order: Number of pages for optimal IO, or a multiple of that + */ struct fuse_init_out { uint32_t major; uint32_t minor; @@ -915,7 +920,10 @@ struct fuse_init_out { uint16_t max_pages; uint16_t map_alignment; uint32_t flags2; - uint32_t unused[7]; + uint32_t max_stack_depth; + uint8_t align_page_order; + uint8_t padding[3]; + uint32_t unused[5]; }; #define CUSE_INIT_INFO_MAX 4096