Blame - block/bio.c - codeaurora/cp-linux

blob: 68bbc835bacc068e3e254c5df0a78f3fe43e5d60 [file] [log] [blame]

Kyle Swenson	8d8f654	2021-03-15 11:02:55 -0600	[diff] [blame^]	1	/*
				2	* Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
				3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License version 2 as
				6	* published by the Free Software Foundation.
				7	*
				8	* This program is distributed in the hope that it will be useful,
				9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				11	* GNU General Public License for more details.
				12	*
				13	* You should have received a copy of the GNU General Public Licens
				14	* along with this program; if not, write to the Free Software
				15	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
				16	*
				17	*/
				18	#include <linux/mm.h>
				19	#include <linux/swap.h>
				20	#include <linux/bio.h>
				21	#include <linux/blkdev.h>
				22	#include <linux/uio.h>
				23	#include <linux/iocontext.h>
				24	#include <linux/slab.h>
				25	#include <linux/init.h>
				26	#include <linux/kernel.h>
				27	#include <linux/export.h>
				28	#include <linux/mempool.h>
				29	#include <linux/workqueue.h>
				30	#include <linux/cgroup.h>
				31
				32	#include <trace/events/block.h>
				33
				34	/*
				35	* Test patch to inline a certain number of bi_io_vec's inside the bio
				36	* itself, to shrink a bio data allocation from two mempool calls to one
				37	*/
				38	#define BIO_INLINE_VECS 4
				39
				40	/*
				41	* if you change this list, also change bvec_alloc or things will
				42	* break badly! cannot be bigger than what you can fit into an
				43	* unsigned short
				44	*/
				45	#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
				46	static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
				47	BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
				48	};
				49	#undef BV
				50
				51	/*
				52	* fs_bio_set is the bio_set containing bio and iovec memory pools used by
				53	* IO code that does not need private memory pools.
				54	*/
				55	struct bio_set *fs_bio_set;
				56	EXPORT_SYMBOL(fs_bio_set);
				57
				58	/*
				59	* Our slab pool management
				60	*/
				61	struct bio_slab {
				62	struct kmem_cache *slab;
				63	unsigned int slab_ref;
				64	unsigned int slab_size;
				65	char name[8];
				66	};
				67	static DEFINE_MUTEX(bio_slab_lock);
				68	static struct bio_slab *bio_slabs;
				69	static unsigned int bio_slab_nr, bio_slab_max;
				70
				71	static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
				72	{
				73	unsigned int sz = sizeof(struct bio) + extra_size;
				74	struct kmem_cache *slab = NULL;
				75	struct bio_slab bslab, new_bio_slabs;
				76	unsigned int new_bio_slab_max;
				77	unsigned int i, entry = -1;
				78
				79	mutex_lock(&bio_slab_lock);
				80
				81	i = 0;
				82	while (i < bio_slab_nr) {
				83	bslab = &bio_slabs[i];
				84
				85	if (!bslab->slab && entry == -1)
				86	entry = i;
				87	else if (bslab->slab_size == sz) {
				88	slab = bslab->slab;
				89	bslab->slab_ref++;
				90	break;
				91	}
				92	i++;
				93	}
				94
				95	if (slab)
				96	goto out_unlock;
				97
				98	if (bio_slab_nr == bio_slab_max && entry == -1) {
				99	new_bio_slab_max = bio_slab_max << 1;
				100	new_bio_slabs = krealloc(bio_slabs,
				101	new_bio_slab_max * sizeof(struct bio_slab),
				102	GFP_KERNEL);
				103	if (!new_bio_slabs)
				104	goto out_unlock;
				105	bio_slab_max = new_bio_slab_max;
				106	bio_slabs = new_bio_slabs;
				107	}
				108	if (entry == -1)
				109	entry = bio_slab_nr++;
				110
				111	bslab = &bio_slabs[entry];
				112
				113	snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
				114	slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN,
				115	SLAB_HWCACHE_ALIGN, NULL);
				116	if (!slab)
				117	goto out_unlock;
				118
				119	bslab->slab = slab;
				120	bslab->slab_ref = 1;
				121	bslab->slab_size = sz;
				122	out_unlock:
				123	mutex_unlock(&bio_slab_lock);
				124	return slab;
				125	}
				126
				127	static void bio_put_slab(struct bio_set *bs)
				128	{
				129	struct bio_slab *bslab = NULL;
				130	unsigned int i;
				131
				132	mutex_lock(&bio_slab_lock);
				133
				134	for (i = 0; i < bio_slab_nr; i++) {
				135	if (bs->bio_slab == bio_slabs[i].slab) {
				136	bslab = &bio_slabs[i];
				137	break;
				138	}
				139	}
				140
				141	if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
				142	goto out;
				143
				144	WARN_ON(!bslab->slab_ref);
				145
				146	if (--bslab->slab_ref)
				147	goto out;
				148
				149	kmem_cache_destroy(bslab->slab);
				150	bslab->slab = NULL;
				151
				152	out:
				153	mutex_unlock(&bio_slab_lock);
				154	}
				155
				156	unsigned int bvec_nr_vecs(unsigned short idx)
				157	{
				158	return bvec_slabs[idx].nr_vecs;
				159	}
				160
				161	void bvec_free(mempool_t pool, struct bio_vec bv, unsigned int idx)
				162	{
				163	BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
				164
				165	if (idx == BIOVEC_MAX_IDX)
				166	mempool_free(bv, pool);
				167	else {
				168	struct biovec_slab *bvs = bvec_slabs + idx;
				169
				170	kmem_cache_free(bvs->slab, bv);
				171	}
				172	}
				173
				174	struct bio_vec bvec_alloc(gfp_t gfp_mask, int nr, unsigned long idx,
				175	mempool_t *pool)
				176	{
				177	struct bio_vec *bvl;
				178
				179	/*
				180	* see comment near bvec_array define!
				181	*/
				182	switch (nr) {
				183	case 1:
				184	*idx = 0;
				185	break;
				186	case 2 ... 4:
				187	*idx = 1;
				188	break;
				189	case 5 ... 16:
				190	*idx = 2;
				191	break;
				192	case 17 ... 64:
				193	*idx = 3;
				194	break;
				195	case 65 ... 128:
				196	*idx = 4;
				197	break;
				198	case 129 ... BIO_MAX_PAGES:
				199	*idx = 5;
				200	break;
				201	default:
				202	return NULL;
				203	}
				204
				205	/*
				206	* idx now points to the pool we want to allocate from. only the
				207	* 1-vec entry pool is mempool backed.
				208	*/
				209	if (*idx == BIOVEC_MAX_IDX) {
				210	fallback:
				211	bvl = mempool_alloc(pool, gfp_mask);
				212	} else {
				213	struct biovec_slab bvs = bvec_slabs + idx;
				214	gfp_t __gfp_mask = gfp_mask & ~(__GFP_DIRECT_RECLAIM \| __GFP_IO);
				215
				216	/*
				217	* Make this allocation restricted and don't dump info on
				218	* allocation failures, since we'll fallback to the mempool
				219	* in case of failure.
				220	*/
				221	__gfp_mask \|= __GFP_NOMEMALLOC \| __GFP_NORETRY \| __GFP_NOWARN;
				222
				223	/*
				224	* Try a slab allocation. If this fails and __GFP_DIRECT_RECLAIM
				225	* is set, retry with the 1-entry mempool
				226	*/
				227	bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
				228	if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) {
				229	*idx = BIOVEC_MAX_IDX;
				230	goto fallback;
				231	}
				232	}
				233
				234	return bvl;
				235	}
				236
				237	static void __bio_free(struct bio *bio)
				238	{
				239	bio_disassociate_task(bio);
				240
				241	if (bio_integrity(bio))
				242	bio_integrity_free(bio);
				243	}
				244
				245	static void bio_free(struct bio *bio)
				246	{
				247	struct bio_set *bs = bio->bi_pool;
				248	void *p;
				249
				250	__bio_free(bio);
				251
				252	if (bs) {
				253	if (bio_flagged(bio, BIO_OWNS_VEC))
				254	bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio));
				255
				256	/*
				257	* If we have front padding, adjust the bio pointer before freeing
				258	*/
				259	p = bio;
				260	p -= bs->front_pad;
				261
				262	mempool_free(p, bs->bio_pool);
				263	} else {
				264	/* Bio was allocated by bio_kmalloc() */
				265	kfree(bio);
				266	}
				267	}
				268
				269	void bio_init(struct bio *bio)
				270	{
				271	memset(bio, 0, sizeof(*bio));
				272	atomic_set(&bio->__bi_remaining, 1);
				273	atomic_set(&bio->__bi_cnt, 1);
				274	}
				275	EXPORT_SYMBOL(bio_init);
				276
				277	/**
				278	* bio_reset - reinitialize a bio
				279	* @bio: bio to reset
				280	*
				281	* Description:
				282	* After calling bio_reset(), @bio will be in the same state as a freshly
				283	* allocated bio returned bio bio_alloc_bioset() - the only fields that are
				284	* preserved are the ones that are initialized by bio_alloc_bioset(). See
				285	* comment in struct bio.
				286	*/
				287	void bio_reset(struct bio *bio)
				288	{
				289	unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
				290
				291	__bio_free(bio);
				292
				293	memset(bio, 0, BIO_RESET_BYTES);
				294	bio->bi_flags = flags;
				295	atomic_set(&bio->__bi_remaining, 1);
				296	}
				297	EXPORT_SYMBOL(bio_reset);
				298
				299	static void bio_chain_endio(struct bio *bio)
				300	{
				301	struct bio *parent = bio->bi_private;
				302
				303	parent->bi_error = bio->bi_error;
				304	bio_endio(parent);
				305	bio_put(bio);
				306	}
				307
				308	/*
				309	* Increment chain count for the bio. Make sure the CHAIN flag update
				310	* is visible before the raised count.
				311	*/
				312	static inline void bio_inc_remaining(struct bio *bio)
				313	{
				314	bio_set_flag(bio, BIO_CHAIN);
				315	smp_mb__before_atomic();
				316	atomic_inc(&bio->__bi_remaining);
				317	}
				318
				319	/**
				320	* bio_chain - chain bio completions
				321	* @bio: the target bio
				322	* @parent: the @bio's parent bio
				323	*
				324	* The caller won't have a bi_end_io called when @bio completes - instead,
				325	* @parent's bi_end_io won't be called until both @parent and @bio have
				326	* completed; the chained bio will also be freed when it completes.
				327	*
				328	* The caller must not set bi_private or bi_end_io in @bio.
				329	*/
				330	void bio_chain(struct bio bio, struct bio parent)
				331	{
				332	BUG_ON(bio->bi_private \|\| bio->bi_end_io);
				333
				334	bio->bi_private = parent;
				335	bio->bi_end_io = bio_chain_endio;
				336	bio_inc_remaining(parent);
				337	}
				338	EXPORT_SYMBOL(bio_chain);
				339
				340	static void bio_alloc_rescue(struct work_struct *work)
				341	{
				342	struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
				343	struct bio *bio;
				344
				345	while (1) {
				346	spin_lock(&bs->rescue_lock);
				347	bio = bio_list_pop(&bs->rescue_list);
				348	spin_unlock(&bs->rescue_lock);
				349
				350	if (!bio)
				351	break;
				352
				353	generic_make_request(bio);
				354	}
				355	}
				356
				357	static void punt_bios_to_rescuer(struct bio_set *bs)
				358	{
				359	struct bio_list punt, nopunt;
				360	struct bio *bio;
				361
				362	/*
				363	* In order to guarantee forward progress we must punt only bios that
				364	* were allocated from this bio_set; otherwise, if there was a bio on
				365	* there for a stacking driver higher up in the stack, processing it
				366	* could require allocating bios from this bio_set, and doing that from
				367	* our own rescuer would be bad.
				368	*
				369	* Since bio lists are singly linked, pop them all instead of trying to
				370	* remove from the middle of the list:
				371	*/
				372
				373	bio_list_init(&punt);
				374	bio_list_init(&nopunt);
				375
				376	while ((bio = bio_list_pop(&current->bio_list[0])))
				377	bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
				378	current->bio_list[0] = nopunt;
				379
				380	bio_list_init(&nopunt);
				381	while ((bio = bio_list_pop(&current->bio_list[1])))
				382	bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
				383	current->bio_list[1] = nopunt;
				384
				385	spin_lock(&bs->rescue_lock);
				386	bio_list_merge(&bs->rescue_list, &punt);
				387	spin_unlock(&bs->rescue_lock);
				388
				389	queue_work(bs->rescue_workqueue, &bs->rescue_work);
				390	}
				391
				392	/**
				393	* bio_alloc_bioset - allocate a bio for I/O
				394	* @gfp_mask: the GFP_ mask given to the slab allocator
				395	* @nr_iovecs: number of iovecs to pre-allocate
				396	* @bs: the bio_set to allocate from.
				397	*
				398	* Description:
				399	* If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
				400	* backed by the @bs's mempool.
				401	*
				402	* When @bs is not NULL, if %__GFP_DIRECT_RECLAIM is set then bio_alloc will
				403	* always be able to allocate a bio. This is due to the mempool guarantees.
				404	* To make this work, callers must never allocate more than 1 bio at a time
				405	* from this pool. Callers that need to allocate more than 1 bio must always
				406	* submit the previously allocated bio for IO before attempting to allocate
				407	* a new one. Failure to do so can cause deadlocks under memory pressure.
				408	*
				409	* Note that when running under generic_make_request() (i.e. any block
				410	* driver), bios are not submitted until after you return - see the code in
				411	* generic_make_request() that converts recursion into iteration, to prevent
				412	* stack overflows.
				413	*
				414	* This would normally mean allocating multiple bios under
				415	* generic_make_request() would be susceptible to deadlocks, but we have
				416	* deadlock avoidance code that resubmits any blocked bios from a rescuer
				417	* thread.
				418	*
				419	* However, we do not guarantee forward progress for allocations from other
				420	* mempools. Doing multiple allocations from the same mempool under
				421	* generic_make_request() should be avoided - instead, use bio_set's front_pad
				422	* for per bio allocations.
				423	*
				424	* RETURNS:
				425	* Pointer to new bio on success, NULL on failure.
				426	*/
				427	struct bio bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set bs)
				428	{
				429	gfp_t saved_gfp = gfp_mask;
				430	unsigned front_pad;
				431	unsigned inline_vecs;
				432	unsigned long idx = BIO_POOL_NONE;
				433	struct bio_vec *bvl = NULL;
				434	struct bio *bio;
				435	void *p;
				436
				437	if (!bs) {
				438	if (nr_iovecs > UIO_MAXIOV)
				439	return NULL;
				440
				441	p = kmalloc(sizeof(struct bio) +
				442	nr_iovecs * sizeof(struct bio_vec),
				443	gfp_mask);
				444	front_pad = 0;
				445	inline_vecs = nr_iovecs;
				446	} else {
				447	/* should not use nobvec bioset for nr_iovecs > 0 */
				448	if (WARN_ON_ONCE(!bs->bvec_pool && nr_iovecs > 0))
				449	return NULL;
				450	/*
				451	* generic_make_request() converts recursion to iteration; this
				452	* means if we're running beneath it, any bios we allocate and
				453	* submit will not be submitted (and thus freed) until after we
				454	* return.
				455	*
				456	* This exposes us to a potential deadlock if we allocate
				457	* multiple bios from the same bio_set() while running
				458	* underneath generic_make_request(). If we were to allocate
				459	* multiple bios (say a stacking block driver that was splitting
				460	* bios), we would deadlock if we exhausted the mempool's
				461	* reserve.
				462	*
				463	* We solve this, and guarantee forward progress, with a rescuer
				464	* workqueue per bio_set. If we go to allocate and there are
				465	* bios on current->bio_list, we first try the allocation
				466	* without __GFP_DIRECT_RECLAIM; if that fails, we punt those
				467	* bios we would be blocking to the rescuer workqueue before
				468	* we retry with the original gfp_flags.
				469	*/
				470
				471	if (current->bio_list &&
				472	(!bio_list_empty(&current->bio_list[0]) \|\|
				473	!bio_list_empty(&current->bio_list[1])))
				474	gfp_mask &= ~__GFP_DIRECT_RECLAIM;
				475
				476	p = mempool_alloc(bs->bio_pool, gfp_mask);
				477	if (!p && gfp_mask != saved_gfp) {
				478	punt_bios_to_rescuer(bs);
				479	gfp_mask = saved_gfp;
				480	p = mempool_alloc(bs->bio_pool, gfp_mask);
				481	}
				482
				483	front_pad = bs->front_pad;
				484	inline_vecs = BIO_INLINE_VECS;
				485	}
				486
				487	if (unlikely(!p))
				488	return NULL;
				489
				490	bio = p + front_pad;
				491	bio_init(bio);
				492
				493	if (nr_iovecs > inline_vecs) {
				494	bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
				495	if (!bvl && gfp_mask != saved_gfp) {
				496	punt_bios_to_rescuer(bs);
				497	gfp_mask = saved_gfp;
				498	bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
				499	}
				500
				501	if (unlikely(!bvl))
				502	goto err_free;
				503
				504	bio_set_flag(bio, BIO_OWNS_VEC);
				505	} else if (nr_iovecs) {
				506	bvl = bio->bi_inline_vecs;
				507	}
				508
				509	bio->bi_pool = bs;
				510	bio->bi_flags \|= idx << BIO_POOL_OFFSET;
				511	bio->bi_max_vecs = nr_iovecs;
				512	bio->bi_io_vec = bvl;
				513	return bio;
				514
				515	err_free:
				516	mempool_free(p, bs->bio_pool);
				517	return NULL;
				518	}
				519	EXPORT_SYMBOL(bio_alloc_bioset);
				520
				521	void zero_fill_bio(struct bio *bio)
				522	{
				523	unsigned long flags;
				524	struct bio_vec bv;
				525	struct bvec_iter iter;
				526
				527	bio_for_each_segment(bv, bio, iter) {
				528	char *data = bvec_kmap_irq(&bv, &flags);
				529	memset(data, 0, bv.bv_len);
				530	flush_dcache_page(bv.bv_page);
				531	bvec_kunmap_irq(data, &flags);
				532	}
				533	}
				534	EXPORT_SYMBOL(zero_fill_bio);
				535
				536	/**
				537	* bio_put - release a reference to a bio
				538	* @bio: bio to release reference to
				539	*
				540	* Description:
				541	* Put a reference to a &struct bio, either one you have gotten with
				542	* bio_alloc, bio_get or bio_clone. The last put of a bio will free it.
				543	**/
				544	void bio_put(struct bio *bio)
				545	{
				546	if (!bio_flagged(bio, BIO_REFFED))
				547	bio_free(bio);
				548	else {
				549	BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));
				550
				551	/*
				552	* last put frees it
				553	*/
				554	if (atomic_dec_and_test(&bio->__bi_cnt))
				555	bio_free(bio);
				556	}
				557	}
				558	EXPORT_SYMBOL(bio_put);
				559
				560	inline int bio_phys_segments(struct request_queue q, struct bio bio)
				561	{
				562	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
				563	blk_recount_segments(q, bio);
				564
				565	return bio->bi_phys_segments;
				566	}
				567	EXPORT_SYMBOL(bio_phys_segments);
				568
				569	/**
				570	* __bio_clone_fast - clone a bio that shares the original bio's biovec
				571	* @bio: destination bio
				572	* @bio_src: bio to clone
				573	*
				574	* Clone a &bio. Caller will own the returned bio, but not
				575	* the actual data it points to. Reference count of returned
				576	* bio will be one.
				577	*
				578	* Caller must ensure that @bio_src is not freed before @bio.
				579	*/
				580	void __bio_clone_fast(struct bio bio, struct bio bio_src)
				581	{
				582	BUG_ON(bio->bi_pool && BIO_POOL_IDX(bio) != BIO_POOL_NONE);
				583
				584	/*
				585	* most users will be overriding ->bi_bdev with a new target,
				586	* so we don't set nor calculate new physical/hw segment counts here
				587	*/
				588	bio->bi_bdev = bio_src->bi_bdev;
				589	bio_set_flag(bio, BIO_CLONED);
				590	bio->bi_rw = bio_src->bi_rw;
				591	bio->bi_iter = bio_src->bi_iter;
				592	bio->bi_io_vec = bio_src->bi_io_vec;
				593
				594	bio_clone_blkcg_association(bio, bio_src);
				595	}
				596	EXPORT_SYMBOL(__bio_clone_fast);
				597
				598	/**
				599	* bio_clone_fast - clone a bio that shares the original bio's biovec
				600	* @bio: bio to clone
				601	* @gfp_mask: allocation priority
				602	* @bs: bio_set to allocate from
				603	*
				604	* Like __bio_clone_fast, only also allocates the returned bio
				605	*/
				606	struct bio bio_clone_fast(struct bio bio, gfp_t gfp_mask, struct bio_set *bs)
				607	{
				608	struct bio *b;
				609
				610	b = bio_alloc_bioset(gfp_mask, 0, bs);
				611	if (!b)
				612	return NULL;
				613
				614	__bio_clone_fast(b, bio);
				615
				616	if (bio_integrity(bio)) {
				617	int ret;
				618
				619	ret = bio_integrity_clone(b, bio, gfp_mask);
				620
				621	if (ret < 0) {
				622	bio_put(b);
				623	return NULL;
				624	}
				625	}
				626
				627	return b;
				628	}
				629	EXPORT_SYMBOL(bio_clone_fast);
				630
				631	/**
				632	* bio_clone_bioset - clone a bio
				633	* @bio_src: bio to clone
				634	* @gfp_mask: allocation priority
				635	* @bs: bio_set to allocate from
				636	*
				637	* Clone bio. Caller will own the returned bio, but not the actual data it
				638	* points to. Reference count of returned bio will be one.
				639	*/
				640	struct bio bio_clone_bioset(struct bio bio_src, gfp_t gfp_mask,
				641	struct bio_set *bs)
				642	{
				643	struct bvec_iter iter;
				644	struct bio_vec bv;
				645	struct bio *bio;
				646
				647	/*
				648	* Pre immutable biovecs, __bio_clone() used to just do a memcpy from
				649	* bio_src->bi_io_vec to bio->bi_io_vec.
				650	*
				651	* We can't do that anymore, because:
				652	*
				653	* - The point of cloning the biovec is to produce a bio with a biovec
				654	* the caller can modify: bi_idx and bi_bvec_done should be 0.
				655	*
				656	* - The original bio could've had more than BIO_MAX_PAGES biovecs; if
				657	* we tried to clone the whole thing bio_alloc_bioset() would fail.
				658	* But the clone should succeed as long as the number of biovecs we
				659	* actually need to allocate is fewer than BIO_MAX_PAGES.
				660	*
				661	* - Lastly, bi_vcnt should not be looked at or relied upon by code
				662	* that does not own the bio - reason being drivers don't use it for
				663	* iterating over the biovec anymore, so expecting it to be kept up
				664	* to date (i.e. for clones that share the parent biovec) is just
				665	* asking for trouble and would force extra work on
				666	* __bio_clone_fast() anyways.
				667	*/
				668
				669	bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
				670	if (!bio)
				671	return NULL;
				672
				673	bio->bi_bdev = bio_src->bi_bdev;
				674	bio->bi_rw = bio_src->bi_rw;
				675	bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
				676	bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
				677
				678	if (bio->bi_rw & REQ_DISCARD)
				679	goto integrity_clone;
				680
				681	if (bio->bi_rw & REQ_WRITE_SAME) {
				682	bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
				683	goto integrity_clone;
				684	}
				685
				686	bio_for_each_segment(bv, bio_src, iter)
				687	bio->bi_io_vec[bio->bi_vcnt++] = bv;
				688
				689	integrity_clone:
				690	if (bio_integrity(bio_src)) {
				691	int ret;
				692
				693	ret = bio_integrity_clone(bio, bio_src, gfp_mask);
				694	if (ret < 0) {
				695	bio_put(bio);
				696	return NULL;
				697	}
				698	}
				699
				700	bio_clone_blkcg_association(bio, bio_src);
				701
				702	return bio;
				703	}
				704	EXPORT_SYMBOL(bio_clone_bioset);
				705
				706	/**
				707	* bio_add_pc_page - attempt to add page to bio
				708	* @q: the target queue
				709	* @bio: destination bio
				710	* @page: page to add
				711	* @len: vec entry length
				712	* @offset: vec entry offset
				713	*
				714	* Attempt to add a page to the bio_vec maplist. This can fail for a
				715	* number of reasons, such as the bio being full or target block device
				716	* limitations. The target block device must allow bio's up to PAGE_SIZE,
				717	* so it is always possible to add a single page to an empty bio.
				718	*
				719	* This should only be used by REQ_PC bios.
				720	*/
				721	int bio_add_pc_page(struct request_queue q, struct bio bio, struct page
				722	*page, unsigned int len, unsigned int offset)
				723	{
				724	int retried_segments = 0;
				725	struct bio_vec *bvec;
				726
				727	/*
				728	* cloned bio must not modify vec list
				729	*/
				730	if (unlikely(bio_flagged(bio, BIO_CLONED)))
				731	return 0;
				732
				733	if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q))
				734	return 0;
				735
				736	/*
				737	* For filesystems with a blocksize smaller than the pagesize
				738	* we will often be called with the same page as last time and
				739	* a consecutive offset. Optimize this special case.
				740	*/
				741	if (bio->bi_vcnt > 0) {
				742	struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
				743
				744	if (page == prev->bv_page &&
				745	offset == prev->bv_offset + prev->bv_len) {
				746	prev->bv_len += len;
				747	bio->bi_iter.bi_size += len;
				748	goto done;
				749	}
				750
				751	/*
				752	* If the queue doesn't support SG gaps and adding this
				753	* offset would create a gap, disallow it.
				754	*/
				755	if (bvec_gap_to_prev(q, prev, offset))
				756	return 0;
				757	}
				758
				759	if (bio->bi_vcnt >= bio->bi_max_vecs)
				760	return 0;
				761
				762	/*
				763	* setup the new entry, we might clear it again later if we
				764	* cannot add the page
				765	*/
				766	bvec = &bio->bi_io_vec[bio->bi_vcnt];
				767	bvec->bv_page = page;
				768	bvec->bv_len = len;
				769	bvec->bv_offset = offset;
				770	bio->bi_vcnt++;
				771	bio->bi_phys_segments++;
				772	bio->bi_iter.bi_size += len;
				773
				774	/*
				775	* Perform a recount if the number of segments is greater
				776	* than queue_max_segments(q).
				777	*/
				778
				779	while (bio->bi_phys_segments > queue_max_segments(q)) {
				780
				781	if (retried_segments)
				782	goto failed;
				783
				784	retried_segments = 1;
				785	blk_recount_segments(q, bio);
				786	}
				787
				788	/* If we may be able to merge these biovecs, force a recount */
				789	if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
				790	bio_clear_flag(bio, BIO_SEG_VALID);
				791
				792	done:
				793	return len;
				794
				795	failed:
				796	bvec->bv_page = NULL;
				797	bvec->bv_len = 0;
				798	bvec->bv_offset = 0;
				799	bio->bi_vcnt--;
				800	bio->bi_iter.bi_size -= len;
				801	blk_recount_segments(q, bio);
				802	return 0;
				803	}
				804	EXPORT_SYMBOL(bio_add_pc_page);
				805
				806	/**
				807	* bio_add_page - attempt to add page to bio
				808	* @bio: destination bio
				809	* @page: page to add
				810	* @len: vec entry length
				811	* @offset: vec entry offset
				812	*
				813	* Attempt to add a page to the bio_vec maplist. This will only fail
				814	* if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio.
				815	*/
				816	int bio_add_page(struct bio bio, struct page page,
				817	unsigned int len, unsigned int offset)
				818	{
				819	struct bio_vec *bv;
				820
				821	/*
				822	* cloned bio must not modify vec list
				823	*/
				824	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
				825	return 0;
				826
				827	/*
				828	* For filesystems with a blocksize smaller than the pagesize
				829	* we will often be called with the same page as last time and
				830	* a consecutive offset. Optimize this special case.
				831	*/
				832	if (bio->bi_vcnt > 0) {
				833	bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
				834
				835	if (page == bv->bv_page &&
				836	offset == bv->bv_offset + bv->bv_len) {
				837	bv->bv_len += len;
				838	goto done;
				839	}
				840	}
				841
				842	if (bio->bi_vcnt >= bio->bi_max_vecs)
				843	return 0;
				844
				845	bv = &bio->bi_io_vec[bio->bi_vcnt];
				846	bv->bv_page = page;
				847	bv->bv_len = len;
				848	bv->bv_offset = offset;
				849
				850	bio->bi_vcnt++;
				851	done:
				852	bio->bi_iter.bi_size += len;
				853	return len;
				854	}
				855	EXPORT_SYMBOL(bio_add_page);
				856
				857	struct submit_bio_ret {
				858	struct completion event;
				859	int error;
				860	};
				861
				862	static void submit_bio_wait_endio(struct bio *bio)
				863	{
				864	struct submit_bio_ret *ret = bio->bi_private;
				865
				866	ret->error = bio->bi_error;
				867	complete(&ret->event);
				868	}
				869
				870	/**
				871	* submit_bio_wait - submit a bio, and wait until it completes
				872	* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
				873	* @bio: The &struct bio which describes the I/O
				874	*
				875	* Simple wrapper around submit_bio(). Returns 0 on success, or the error from
				876	* bio_endio() on failure.
				877	*/
				878	int submit_bio_wait(int rw, struct bio *bio)
				879	{
				880	struct submit_bio_ret ret;
				881
				882	rw \|= REQ_SYNC;
				883	init_completion(&ret.event);
				884	bio->bi_private = &ret;
				885	bio->bi_end_io = submit_bio_wait_endio;
				886	submit_bio(rw, bio);
				887	wait_for_completion(&ret.event);
				888
				889	return ret.error;
				890	}
				891	EXPORT_SYMBOL(submit_bio_wait);
				892
				893	/**
				894	* bio_advance - increment/complete a bio by some number of bytes
				895	* @bio: bio to advance
				896	* @bytes: number of bytes to complete
				897	*
				898	* This updates bi_sector, bi_size and bi_idx; if the number of bytes to
				899	* complete doesn't align with a bvec boundary, then bv_len and bv_offset will
				900	* be updated on the last bvec as well.
				901	*
				902	* @bio will then represent the remaining, uncompleted portion of the io.
				903	*/
				904	void bio_advance(struct bio *bio, unsigned bytes)
				905	{
				906	if (bio_integrity(bio))
				907	bio_integrity_advance(bio, bytes);
				908
				909	bio_advance_iter(bio, &bio->bi_iter, bytes);
				910	}
				911	EXPORT_SYMBOL(bio_advance);
				912
				913	/**
				914	* bio_alloc_pages - allocates a single page for each bvec in a bio
				915	* @bio: bio to allocate pages for
				916	* @gfp_mask: flags for allocation
				917	*
				918	* Allocates pages up to @bio->bi_vcnt.
				919	*
				920	* Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are
				921	* freed.
				922	*/
				923	int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask)
				924	{
				925	int i;
				926	struct bio_vec *bv;
				927
				928	bio_for_each_segment_all(bv, bio, i) {
				929	bv->bv_page = alloc_page(gfp_mask);
				930	if (!bv->bv_page) {
				931	while (--bv >= bio->bi_io_vec)
				932	__free_page(bv->bv_page);
				933	return -ENOMEM;
				934	}
				935	}
				936
				937	return 0;
				938	}
				939	EXPORT_SYMBOL(bio_alloc_pages);
				940
				941	/**
				942	* bio_copy_data - copy contents of data buffers from one chain of bios to
				943	* another
				944	* @src: source bio list
				945	* @dst: destination bio list
				946	*
				947	* If @src and @dst are single bios, bi_next must be NULL - otherwise, treats
				948	* @src and @dst as linked lists of bios.
				949	*
				950	* Stops when it reaches the end of either @src or @dst - that is, copies
				951	* min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
				952	*/
				953	void bio_copy_data(struct bio dst, struct bio src)
				954	{
				955	struct bvec_iter src_iter, dst_iter;
				956	struct bio_vec src_bv, dst_bv;
				957	void src_p, dst_p;
				958	unsigned bytes;
				959
				960	src_iter = src->bi_iter;
				961	dst_iter = dst->bi_iter;
				962
				963	while (1) {
				964	if (!src_iter.bi_size) {
				965	src = src->bi_next;
				966	if (!src)
				967	break;
				968
				969	src_iter = src->bi_iter;
				970	}
				971
				972	if (!dst_iter.bi_size) {
				973	dst = dst->bi_next;
				974	if (!dst)
				975	break;
				976
				977	dst_iter = dst->bi_iter;
				978	}
				979
				980	src_bv = bio_iter_iovec(src, src_iter);
				981	dst_bv = bio_iter_iovec(dst, dst_iter);
				982
				983	bytes = min(src_bv.bv_len, dst_bv.bv_len);
				984
				985	src_p = kmap_atomic(src_bv.bv_page);
				986	dst_p = kmap_atomic(dst_bv.bv_page);
				987
				988	memcpy(dst_p + dst_bv.bv_offset,
				989	src_p + src_bv.bv_offset,
				990	bytes);
				991
				992	kunmap_atomic(dst_p);
				993	kunmap_atomic(src_p);
				994
				995	bio_advance_iter(src, &src_iter, bytes);
				996	bio_advance_iter(dst, &dst_iter, bytes);
				997	}
				998	}
				999	EXPORT_SYMBOL(bio_copy_data);
				1000
				1001	struct bio_map_data {
				1002	int is_our_pages;
				1003	struct iov_iter iter;
				1004	struct iovec iov[];
				1005	};
				1006
				1007	static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count,
				1008	gfp_t gfp_mask)
				1009	{
				1010	if (iov_count > UIO_MAXIOV)
				1011	return NULL;
				1012
				1013	return kmalloc(sizeof(struct bio_map_data) +
				1014	sizeof(struct iovec) * iov_count, gfp_mask);
				1015	}
				1016
				1017	/**
				1018	* bio_copy_from_iter - copy all pages from iov_iter to bio
				1019	* @bio: The &struct bio which describes the I/O as destination
				1020	* @iter: iov_iter as source
				1021	*
				1022	* Copy all pages from iov_iter to bio.
				1023	* Returns 0 on success, or error on failure.
				1024	*/
				1025	static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter)
				1026	{
				1027	int i;
				1028	struct bio_vec *bvec;
				1029
				1030	bio_for_each_segment_all(bvec, bio, i) {
				1031	ssize_t ret;
				1032
				1033	ret = copy_page_from_iter(bvec->bv_page,
				1034	bvec->bv_offset,
				1035	bvec->bv_len,
				1036	&iter);
				1037
				1038	if (!iov_iter_count(&iter))
				1039	break;
				1040
				1041	if (ret < bvec->bv_len)
				1042	return -EFAULT;
				1043	}
				1044
				1045	return 0;
				1046	}
				1047
				1048	/**
				1049	* bio_copy_to_iter - copy all pages from bio to iov_iter
				1050	* @bio: The &struct bio which describes the I/O as source
				1051	* @iter: iov_iter as destination
				1052	*
				1053	* Copy all pages from bio to iov_iter.
				1054	* Returns 0 on success, or error on failure.
				1055	*/
				1056	static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
				1057	{
				1058	int i;
				1059	struct bio_vec *bvec;
				1060
				1061	bio_for_each_segment_all(bvec, bio, i) {
				1062	ssize_t ret;
				1063
				1064	ret = copy_page_to_iter(bvec->bv_page,
				1065	bvec->bv_offset,
				1066	bvec->bv_len,
				1067	&iter);
				1068
				1069	if (!iov_iter_count(&iter))
				1070	break;
				1071
				1072	if (ret < bvec->bv_len)
				1073	return -EFAULT;
				1074	}
				1075
				1076	return 0;
				1077	}
				1078
				1079	static void bio_free_pages(struct bio *bio)
				1080	{
				1081	struct bio_vec *bvec;
				1082	int i;
				1083
				1084	bio_for_each_segment_all(bvec, bio, i)
				1085	__free_page(bvec->bv_page);
				1086	}
				1087
				1088	/**
				1089	* bio_uncopy_user - finish previously mapped bio
				1090	* @bio: bio being terminated
				1091	*
				1092	* Free pages allocated from bio_copy_user_iov() and write back data
				1093	* to user space in case of a read.
				1094	*/
				1095	int bio_uncopy_user(struct bio *bio)
				1096	{
				1097	struct bio_map_data *bmd = bio->bi_private;
				1098	int ret = 0;
				1099
				1100	if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
				1101	/*
				1102	* if we're in a workqueue, the request is orphaned, so
				1103	* don't copy into a random user address space, just free
				1104	* and return -EINTR so user space doesn't expect any data.
				1105	*/
				1106	if (!current->mm)
				1107	ret = -EINTR;
				1108	else if (bio_data_dir(bio) == READ)
				1109	ret = bio_copy_to_iter(bio, bmd->iter);
				1110	if (bmd->is_our_pages)
				1111	bio_free_pages(bio);
				1112	}
				1113	kfree(bmd);
				1114	bio_put(bio);
				1115	return ret;
				1116	}
				1117	EXPORT_SYMBOL(bio_uncopy_user);
				1118
				1119	/**
				1120	* bio_copy_user_iov - copy user data to bio
				1121	* @q: destination block queue
				1122	* @map_data: pointer to the rq_map_data holding pages (if necessary)
				1123	* @iter: iovec iterator
				1124	* @gfp_mask: memory allocation flags
				1125	*
				1126	* Prepares and returns a bio for indirect user io, bouncing data
				1127	* to/from kernel pages as necessary. Must be paired with
				1128	* call bio_uncopy_user() on io completion.
				1129	*/
				1130	struct bio bio_copy_user_iov(struct request_queue q,
				1131	struct rq_map_data *map_data,
				1132	const struct iov_iter *iter,
				1133	gfp_t gfp_mask)
				1134	{
				1135	struct bio_map_data *bmd;
				1136	struct page *page;
				1137	struct bio *bio;
				1138	int i, ret;
				1139	int nr_pages = 0;
				1140	unsigned int len = iter->count;
				1141	unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
				1142
				1143	for (i = 0; i < iter->nr_segs; i++) {
				1144	unsigned long uaddr;
				1145	unsigned long end;
				1146	unsigned long start;
				1147
				1148	uaddr = (unsigned long) iter->iov[i].iov_base;
				1149	end = (uaddr + iter->iov[i].iov_len + PAGE_SIZE - 1)
				1150	>> PAGE_SHIFT;
				1151	start = uaddr >> PAGE_SHIFT;
				1152
				1153	/*
				1154	* Overflow, abort
				1155	*/
				1156	if (end < start)
				1157	return ERR_PTR(-EINVAL);
				1158
				1159	nr_pages += end - start;
				1160	}
				1161
				1162	if (offset)
				1163	nr_pages++;
				1164
				1165	bmd = bio_alloc_map_data(iter->nr_segs, gfp_mask);
				1166	if (!bmd)
				1167	return ERR_PTR(-ENOMEM);
				1168
				1169	/*
				1170	* We need to do a deep copy of the iov_iter including the iovecs.
				1171	* The caller provided iov might point to an on-stack or otherwise
				1172	* shortlived one.
				1173	*/
				1174	bmd->is_our_pages = map_data ? 0 : 1;
				1175	memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs);
				1176	iov_iter_init(&bmd->iter, iter->type, bmd->iov,
				1177	iter->nr_segs, iter->count);
				1178
				1179	ret = -ENOMEM;
				1180	bio = bio_kmalloc(gfp_mask, nr_pages);
				1181	if (!bio)
				1182	goto out_bmd;
				1183
				1184	if (iter->type & WRITE)
				1185	bio->bi_rw \|= REQ_WRITE;
				1186
				1187	ret = 0;
				1188
				1189	if (map_data) {
				1190	nr_pages = 1 << map_data->page_order;
				1191	i = map_data->offset / PAGE_SIZE;
				1192	}
				1193	while (len) {
				1194	unsigned int bytes = PAGE_SIZE;
				1195
				1196	bytes -= offset;
				1197
				1198	if (bytes > len)
				1199	bytes = len;
				1200
				1201	if (map_data) {
				1202	if (i == map_data->nr_entries * nr_pages) {
				1203	ret = -ENOMEM;
				1204	break;
				1205	}
				1206
				1207	page = map_data->pages[i / nr_pages];
				1208	page += (i % nr_pages);
				1209
				1210	i++;
				1211	} else {
				1212	page = alloc_page(q->bounce_gfp \| gfp_mask);
				1213	if (!page) {
				1214	ret = -ENOMEM;
				1215	break;
				1216	}
				1217	}
				1218
				1219	if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes)
				1220	break;
				1221
				1222	len -= bytes;
				1223	offset = 0;
				1224	}
				1225
				1226	if (ret)
				1227	goto cleanup;
				1228
				1229	/*
				1230	* success
				1231	*/
				1232	if (((iter->type & WRITE) && (!map_data \|\| !map_data->null_mapped)) \|\|
				1233	(map_data && map_data->from_user)) {
				1234	ret = bio_copy_from_iter(bio, *iter);
				1235	if (ret)
				1236	goto cleanup;
				1237	}
				1238
				1239	bio->bi_private = bmd;
				1240	return bio;
				1241	cleanup:
				1242	if (!map_data)
				1243	bio_free_pages(bio);
				1244	bio_put(bio);
				1245	out_bmd:
				1246	kfree(bmd);
				1247	return ERR_PTR(ret);
				1248	}
				1249
				1250	/**
				1251	* bio_map_user_iov - map user iovec into bio
				1252	* @q: the struct request_queue for the bio
				1253	* @iter: iovec iterator
				1254	* @gfp_mask: memory allocation flags
				1255	*
				1256	* Map the user space address into a bio suitable for io to a block
				1257	* device. Returns an error pointer in case of error.
				1258	*/
				1259	struct bio bio_map_user_iov(struct request_queue q,
				1260	const struct iov_iter *iter,
				1261	gfp_t gfp_mask)
				1262	{
				1263	int j;
				1264	int nr_pages = 0;
				1265	struct page **pages;
				1266	struct bio *bio;
				1267	int cur_page = 0;
				1268	int ret, offset;
				1269	struct iov_iter i;
				1270	struct iovec iov;
				1271
				1272	iov_for_each(iov, i, *iter) {
				1273	unsigned long uaddr = (unsigned long) iov.iov_base;
				1274	unsigned long len = iov.iov_len;
				1275	unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1276	unsigned long start = uaddr >> PAGE_SHIFT;
				1277
				1278	/*
				1279	* Overflow, abort
				1280	*/
				1281	if (end < start)
				1282	return ERR_PTR(-EINVAL);
				1283
				1284	nr_pages += end - start;
				1285	/*
				1286	* buffer must be aligned to at least hardsector size for now
				1287	*/
				1288	if (uaddr & queue_dma_alignment(q))
				1289	return ERR_PTR(-EINVAL);
				1290	}
				1291
				1292	if (!nr_pages)
				1293	return ERR_PTR(-EINVAL);
				1294
				1295	bio = bio_kmalloc(gfp_mask, nr_pages);
				1296	if (!bio)
				1297	return ERR_PTR(-ENOMEM);
				1298
				1299	ret = -ENOMEM;
				1300	pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
				1301	if (!pages)
				1302	goto out;
				1303
				1304	iov_for_each(iov, i, *iter) {
				1305	unsigned long uaddr = (unsigned long) iov.iov_base;
				1306	unsigned long len = iov.iov_len;
				1307	unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1308	unsigned long start = uaddr >> PAGE_SHIFT;
				1309	const int local_nr_pages = end - start;
				1310	const int page_limit = cur_page + local_nr_pages;
				1311
				1312	ret = get_user_pages_fast(uaddr, local_nr_pages,
				1313	(iter->type & WRITE) != WRITE,
				1314	&pages[cur_page]);
				1315	if (ret < local_nr_pages) {
				1316	ret = -EFAULT;
				1317	goto out_unmap;
				1318	}
				1319
				1320	offset = uaddr & ~PAGE_MASK;
				1321	for (j = cur_page; j < page_limit; j++) {
				1322	unsigned int bytes = PAGE_SIZE - offset;
				1323	unsigned short prev_bi_vcnt = bio->bi_vcnt;
				1324
				1325	if (len <= 0)
				1326	break;
				1327
				1328	if (bytes > len)
				1329	bytes = len;
				1330
				1331	/*
				1332	* sorry...
				1333	*/
				1334	if (bio_add_pc_page(q, bio, pages[j], bytes, offset) <
				1335	bytes)
				1336	break;
				1337
				1338	/*
				1339	* check if vector was merged with previous
				1340	* drop page reference if needed
				1341	*/
				1342	if (bio->bi_vcnt == prev_bi_vcnt)
				1343	put_page(pages[j]);
				1344
				1345	len -= bytes;
				1346	offset = 0;
				1347	}
				1348
				1349	cur_page = j;
				1350	/*
				1351	* release the pages we didn't map into the bio, if any
				1352	*/
				1353	while (j < page_limit)
				1354	page_cache_release(pages[j++]);
				1355	}
				1356
				1357	kfree(pages);
				1358
				1359	/*
				1360	* set data direction, and check if mapped pages need bouncing
				1361	*/
				1362	if (iter->type & WRITE)
				1363	bio->bi_rw \|= REQ_WRITE;
				1364
				1365	bio_set_flag(bio, BIO_USER_MAPPED);
				1366
				1367	/*
				1368	* subtle -- if __bio_map_user() ended up bouncing a bio,
				1369	* it would normally disappear when its bi_end_io is run.
				1370	* however, we need it for the unmap, so grab an extra
				1371	* reference to it
				1372	*/
				1373	bio_get(bio);
				1374	return bio;
				1375
				1376	out_unmap:
				1377	for (j = 0; j < nr_pages; j++) {
				1378	if (!pages[j])
				1379	break;
				1380	page_cache_release(pages[j]);
				1381	}
				1382	out:
				1383	kfree(pages);
				1384	bio_put(bio);
				1385	return ERR_PTR(ret);
				1386	}
				1387
				1388	static void __bio_unmap_user(struct bio *bio)
				1389	{
				1390	struct bio_vec *bvec;
				1391	int i;
				1392
				1393	/*
				1394	* make sure we dirty pages we wrote to
				1395	*/
				1396	bio_for_each_segment_all(bvec, bio, i) {
				1397	if (bio_data_dir(bio) == READ)
				1398	set_page_dirty_lock(bvec->bv_page);
				1399
				1400	page_cache_release(bvec->bv_page);
				1401	}
				1402
				1403	bio_put(bio);
				1404	}
				1405
				1406	/**
				1407	* bio_unmap_user - unmap a bio
				1408	* @bio: the bio being unmapped
				1409	*
				1410	* Unmap a bio previously mapped by bio_map_user(). Must be called with
				1411	* a process context.
				1412	*
				1413	* bio_unmap_user() may sleep.
				1414	*/
				1415	void bio_unmap_user(struct bio *bio)
				1416	{
				1417	__bio_unmap_user(bio);
				1418	bio_put(bio);
				1419	}
				1420	EXPORT_SYMBOL(bio_unmap_user);
				1421
				1422	static void bio_map_kern_endio(struct bio *bio)
				1423	{
				1424	bio_put(bio);
				1425	}
				1426
				1427	/**
				1428	* bio_map_kern - map kernel address into bio
				1429	* @q: the struct request_queue for the bio
				1430	* @data: pointer to buffer to map
				1431	* @len: length in bytes
				1432	* @gfp_mask: allocation flags for bio allocation
				1433	*
				1434	* Map the kernel address into a bio suitable for io to a block
				1435	* device. Returns an error pointer in case of error.
				1436	*/
				1437	struct bio bio_map_kern(struct request_queue q, void *data, unsigned int len,
				1438	gfp_t gfp_mask)
				1439	{
				1440	unsigned long kaddr = (unsigned long)data;
				1441	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1442	unsigned long start = kaddr >> PAGE_SHIFT;
				1443	const int nr_pages = end - start;
				1444	int offset, i;
				1445	struct bio *bio;
				1446
				1447	bio = bio_kmalloc(gfp_mask, nr_pages);
				1448	if (!bio)
				1449	return ERR_PTR(-ENOMEM);
				1450
				1451	offset = offset_in_page(kaddr);
				1452	for (i = 0; i < nr_pages; i++) {
				1453	unsigned int bytes = PAGE_SIZE - offset;
				1454
				1455	if (len <= 0)
				1456	break;
				1457
				1458	if (bytes > len)
				1459	bytes = len;
				1460
				1461	if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
				1462	offset) < bytes) {
				1463	/* we don't support partial mappings */
				1464	bio_put(bio);
				1465	return ERR_PTR(-EINVAL);
				1466	}
				1467
				1468	data += bytes;
				1469	len -= bytes;
				1470	offset = 0;
				1471	}
				1472
				1473	bio->bi_end_io = bio_map_kern_endio;
				1474	return bio;
				1475	}
				1476	EXPORT_SYMBOL(bio_map_kern);
				1477
				1478	static void bio_copy_kern_endio(struct bio *bio)
				1479	{
				1480	bio_free_pages(bio);
				1481	bio_put(bio);
				1482	}
				1483
				1484	static void bio_copy_kern_endio_read(struct bio *bio)
				1485	{
				1486	char *p = bio->bi_private;
				1487	struct bio_vec *bvec;
				1488	int i;
				1489
				1490	bio_for_each_segment_all(bvec, bio, i) {
				1491	memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
				1492	p += bvec->bv_len;
				1493	}
				1494
				1495	bio_copy_kern_endio(bio);
				1496	}
				1497
				1498	/**
				1499	* bio_copy_kern - copy kernel address into bio
				1500	* @q: the struct request_queue for the bio
				1501	* @data: pointer to buffer to copy
				1502	* @len: length in bytes
				1503	* @gfp_mask: allocation flags for bio and page allocation
				1504	* @reading: data direction is READ
				1505	*
				1506	* copy the kernel address into a bio suitable for io to a block
				1507	* device. Returns an error pointer in case of error.
				1508	*/
				1509	struct bio bio_copy_kern(struct request_queue q, void *data, unsigned int len,
				1510	gfp_t gfp_mask, int reading)
				1511	{
				1512	unsigned long kaddr = (unsigned long)data;
				1513	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
				1514	unsigned long start = kaddr >> PAGE_SHIFT;
				1515	struct bio *bio;
				1516	void *p = data;
				1517	int nr_pages = 0;
				1518
				1519	/*
				1520	* Overflow, abort
				1521	*/
				1522	if (end < start)
				1523	return ERR_PTR(-EINVAL);
				1524
				1525	nr_pages = end - start;
				1526	bio = bio_kmalloc(gfp_mask, nr_pages);
				1527	if (!bio)
				1528	return ERR_PTR(-ENOMEM);
				1529
				1530	while (len) {
				1531	struct page *page;
				1532	unsigned int bytes = PAGE_SIZE;
				1533
				1534	if (bytes > len)
				1535	bytes = len;
				1536
				1537	page = alloc_page(q->bounce_gfp \| gfp_mask);
				1538	if (!page)
				1539	goto cleanup;
				1540
				1541	if (!reading)
				1542	memcpy(page_address(page), p, bytes);
				1543
				1544	if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
				1545	break;
				1546
				1547	len -= bytes;
				1548	p += bytes;
				1549	}
				1550
				1551	if (reading) {
				1552	bio->bi_end_io = bio_copy_kern_endio_read;
				1553	bio->bi_private = data;
				1554	} else {
				1555	bio->bi_end_io = bio_copy_kern_endio;
				1556	bio->bi_rw \|= REQ_WRITE;
				1557	}
				1558
				1559	return bio;
				1560
				1561	cleanup:
				1562	bio_free_pages(bio);
				1563	bio_put(bio);
				1564	return ERR_PTR(-ENOMEM);
				1565	}
				1566	EXPORT_SYMBOL(bio_copy_kern);
				1567
				1568	/*
				1569	* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
				1570	* for performing direct-IO in BIOs.
				1571	*
				1572	* The problem is that we cannot run set_page_dirty() from interrupt context
				1573	* because the required locks are not interrupt-safe. So what we can do is to
				1574	* mark the pages dirty _before_ performing IO. And in interrupt context,
				1575	* check that the pages are still dirty. If so, fine. If not, redirty them
				1576	* in process context.
				1577	*
				1578	* We special-case compound pages here: normally this means reads into hugetlb
				1579	* pages. The logic in here doesn't really work right for compound pages
				1580	* because the VM does not uniformly chase down the head page in all cases.
				1581	* But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
				1582	* handle them at all. So we skip compound pages here at an early stage.
				1583	*
				1584	* Note that this code is very hard to test under normal circumstances because
				1585	* direct-io pins the pages with get_user_pages(). This makes
				1586	* is_page_cache_freeable return false, and the VM will not clean the pages.
				1587	* But other code (eg, flusher threads) could clean the pages if they are mapped
				1588	* pagecache.
				1589	*
				1590	* Simply disabling the call to bio_set_pages_dirty() is a good way to test the
				1591	* deferred bio dirtying paths.
				1592	*/
				1593
				1594	/*
				1595	* bio_set_pages_dirty() will mark all the bio's pages as dirty.
				1596	*/
				1597	void bio_set_pages_dirty(struct bio *bio)
				1598	{
				1599	struct bio_vec *bvec;
				1600	int i;
				1601
				1602	bio_for_each_segment_all(bvec, bio, i) {
				1603	struct page *page = bvec->bv_page;
				1604
				1605	if (page && !PageCompound(page))
				1606	set_page_dirty_lock(page);
				1607	}
				1608	}
				1609
				1610	static void bio_release_pages(struct bio *bio)
				1611	{
				1612	struct bio_vec *bvec;
				1613	int i;
				1614
				1615	bio_for_each_segment_all(bvec, bio, i) {
				1616	struct page *page = bvec->bv_page;
				1617
				1618	if (page)
				1619	put_page(page);
				1620	}
				1621	}
				1622
				1623	/*
				1624	* bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
				1625	* If they are, then fine. If, however, some pages are clean then they must
				1626	* have been written out during the direct-IO read. So we take another ref on
				1627	* the BIO and the offending pages and re-dirty the pages in process context.
				1628	*
				1629	* It is expected that bio_check_pages_dirty() will wholly own the BIO from
				1630	* here on. It will run one page_cache_release() against each page and will
				1631	* run one bio_put() against the BIO.
				1632	*/
				1633
				1634	static void bio_dirty_fn(struct work_struct *work);
				1635
				1636	static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
				1637	static DEFINE_SPINLOCK(bio_dirty_lock);
				1638	static struct bio *bio_dirty_list;
				1639
				1640	/*
				1641	* This runs in process context
				1642	*/
				1643	static void bio_dirty_fn(struct work_struct *work)
				1644	{
				1645	unsigned long flags;
				1646	struct bio *bio;
				1647
				1648	spin_lock_irqsave(&bio_dirty_lock, flags);
				1649	bio = bio_dirty_list;
				1650	bio_dirty_list = NULL;
				1651	spin_unlock_irqrestore(&bio_dirty_lock, flags);
				1652
				1653	while (bio) {
				1654	struct bio *next = bio->bi_private;
				1655
				1656	bio_set_pages_dirty(bio);
				1657	bio_release_pages(bio);
				1658	bio_put(bio);
				1659	bio = next;
				1660	}
				1661	}
				1662
				1663	void bio_check_pages_dirty(struct bio *bio)
				1664	{
				1665	struct bio_vec *bvec;
				1666	int nr_clean_pages = 0;
				1667	int i;
				1668
				1669	bio_for_each_segment_all(bvec, bio, i) {
				1670	struct page *page = bvec->bv_page;
				1671
				1672	if (PageDirty(page) \|\| PageCompound(page)) {
				1673	page_cache_release(page);
				1674	bvec->bv_page = NULL;
				1675	} else {
				1676	nr_clean_pages++;
				1677	}
				1678	}
				1679
				1680	if (nr_clean_pages) {
				1681	unsigned long flags;
				1682
				1683	spin_lock_irqsave(&bio_dirty_lock, flags);
				1684	bio->bi_private = bio_dirty_list;
				1685	bio_dirty_list = bio;
				1686	spin_unlock_irqrestore(&bio_dirty_lock, flags);
				1687	schedule_work(&bio_dirty_work);
				1688	} else {
				1689	bio_put(bio);
				1690	}
				1691	}
				1692
				1693	void generic_start_io_acct(int rw, unsigned long sectors,
				1694	struct hd_struct *part)
				1695	{
				1696	int cpu = part_stat_lock();
				1697
				1698	part_round_stats(cpu, part);
				1699	part_stat_inc(cpu, part, ios[rw]);
				1700	part_stat_add(cpu, part, sectors[rw], sectors);
				1701	part_inc_in_flight(part, rw);
				1702
				1703	part_stat_unlock();
				1704	}
				1705	EXPORT_SYMBOL(generic_start_io_acct);
				1706
				1707	void generic_end_io_acct(int rw, struct hd_struct *part,
				1708	unsigned long start_time)
				1709	{
				1710	unsigned long duration = jiffies - start_time;
				1711	int cpu = part_stat_lock();
				1712
				1713	part_stat_add(cpu, part, ticks[rw], duration);
				1714	part_round_stats(cpu, part);
				1715	part_dec_in_flight(part, rw);
				1716
				1717	part_stat_unlock();
				1718	}
				1719	EXPORT_SYMBOL(generic_end_io_acct);
				1720
				1721	#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
				1722	void bio_flush_dcache_pages(struct bio *bi)
				1723	{
				1724	struct bio_vec bvec;
				1725	struct bvec_iter iter;
				1726
				1727	bio_for_each_segment(bvec, bi, iter)
				1728	flush_dcache_page(bvec.bv_page);
				1729	}
				1730	EXPORT_SYMBOL(bio_flush_dcache_pages);
				1731	#endif
				1732
				1733	static inline bool bio_remaining_done(struct bio *bio)
				1734	{
				1735	/*
				1736	* If we're not chaining, then ->__bi_remaining is always 1 and
				1737	* we always end io on the first invocation.
				1738	*/
				1739	if (!bio_flagged(bio, BIO_CHAIN))
				1740	return true;
				1741
				1742	BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);
				1743
				1744	if (atomic_dec_and_test(&bio->__bi_remaining)) {
				1745	bio_clear_flag(bio, BIO_CHAIN);
				1746	return true;
				1747	}
				1748
				1749	return false;
				1750	}
				1751
				1752	/**
				1753	* bio_endio - end I/O on a bio
				1754	* @bio: bio
				1755	*
				1756	* Description:
				1757	* bio_endio() will end I/O on the whole bio. bio_endio() is the preferred
				1758	* way to end I/O on a bio. No one should call bi_end_io() directly on a
				1759	* bio unless they own it and thus know that it has an end_io function.
				1760	**/
				1761	void bio_endio(struct bio *bio)
				1762	{
				1763	while (bio) {
				1764	if (unlikely(!bio_remaining_done(bio)))
				1765	break;
				1766
				1767	/*
				1768	* Need to have a real endio function for chained bios,
				1769	* otherwise various corner cases will break (like stacking
				1770	* block devices that save/restore bi_end_io) - however, we want
				1771	* to avoid unbounded recursion and blowing the stack. Tail call
				1772	* optimization would handle this, but compiling with frame
				1773	* pointers also disables gcc's sibling call optimization.
				1774	*/
				1775	if (bio->bi_end_io == bio_chain_endio) {
				1776	struct bio *parent = bio->bi_private;
				1777	parent->bi_error = bio->bi_error;
				1778	bio_put(bio);
				1779	bio = parent;
				1780	} else {
				1781	if (bio->bi_end_io)
				1782	bio->bi_end_io(bio);
				1783	bio = NULL;
				1784	}
				1785	}
				1786	}
				1787	EXPORT_SYMBOL(bio_endio);
				1788
				1789	/**
				1790	* bio_split - split a bio
				1791	* @bio: bio to split
				1792	* @sectors: number of sectors to split from the front of @bio
				1793	* @gfp: gfp mask
				1794	* @bs: bio set to allocate from
				1795	*
				1796	* Allocates and returns a new bio which represents @sectors from the start of
				1797	* @bio, and updates @bio to represent the remaining sectors.
				1798	*
				1799	* Unless this is a discard request the newly allocated bio will point
				1800	* to @bio's bi_io_vec; it is the caller's responsibility to ensure that
				1801	* @bio is not freed before the split.
				1802	*/
				1803	struct bio bio_split(struct bio bio, int sectors,
				1804	gfp_t gfp, struct bio_set *bs)
				1805	{
				1806	struct bio *split = NULL;
				1807
				1808	BUG_ON(sectors <= 0);
				1809	BUG_ON(sectors >= bio_sectors(bio));
				1810
				1811	/*
				1812	* Discards need a mutable bio_vec to accommodate the payload
				1813	* required by the DSM TRIM and UNMAP commands.
				1814	*/
				1815	if (bio->bi_rw & REQ_DISCARD)
				1816	split = bio_clone_bioset(bio, gfp, bs);
				1817	else
				1818	split = bio_clone_fast(bio, gfp, bs);
				1819
				1820	if (!split)
				1821	return NULL;
				1822
				1823	split->bi_iter.bi_size = sectors << 9;
				1824
				1825	if (bio_integrity(split))
				1826	bio_integrity_trim(split, 0, sectors);
				1827
				1828	bio_advance(bio, split->bi_iter.bi_size);
				1829
				1830	return split;
				1831	}
				1832	EXPORT_SYMBOL(bio_split);
				1833
				1834	/**
				1835	* bio_trim - trim a bio
				1836	* @bio: bio to trim
				1837	* @offset: number of sectors to trim from the front of @bio
				1838	* @size: size we want to trim @bio to, in sectors
				1839	*/
				1840	void bio_trim(struct bio *bio, int offset, int size)
				1841	{
				1842	/* 'bio' is a cloned bio which we need to trim to match
				1843	* the given offset and size.
				1844	*/
				1845
				1846	size <<= 9;
				1847	if (offset == 0 && size == bio->bi_iter.bi_size)
				1848	return;
				1849
				1850	bio_clear_flag(bio, BIO_SEG_VALID);
				1851
				1852	bio_advance(bio, offset << 9);
				1853
				1854	bio->bi_iter.bi_size = size;
				1855	}
				1856	EXPORT_SYMBOL_GPL(bio_trim);
				1857
				1858	/*
				1859	* create memory pools for biovec's in a bio_set.
				1860	* use the global biovec slabs created for general use.
				1861	*/
				1862	mempool_t *biovec_create_pool(int pool_entries)
				1863	{
				1864	struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
				1865
				1866	return mempool_create_slab_pool(pool_entries, bp->slab);
				1867	}
				1868
				1869	void bioset_free(struct bio_set *bs)
				1870	{
				1871	if (bs->rescue_workqueue)
				1872	destroy_workqueue(bs->rescue_workqueue);
				1873
				1874	if (bs->bio_pool)
				1875	mempool_destroy(bs->bio_pool);
				1876
				1877	if (bs->bvec_pool)
				1878	mempool_destroy(bs->bvec_pool);
				1879
				1880	bioset_integrity_free(bs);
				1881	bio_put_slab(bs);
				1882
				1883	kfree(bs);
				1884	}
				1885	EXPORT_SYMBOL(bioset_free);
				1886
				1887	static struct bio_set *__bioset_create(unsigned int pool_size,
				1888	unsigned int front_pad,
				1889	bool create_bvec_pool)
				1890	{
				1891	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
				1892	struct bio_set *bs;
				1893
				1894	bs = kzalloc(sizeof(*bs), GFP_KERNEL);
				1895	if (!bs)
				1896	return NULL;
				1897
				1898	bs->front_pad = front_pad;
				1899
				1900	spin_lock_init(&bs->rescue_lock);
				1901	bio_list_init(&bs->rescue_list);
				1902	INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
				1903
				1904	bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
				1905	if (!bs->bio_slab) {
				1906	kfree(bs);
				1907	return NULL;
				1908	}
				1909
				1910	bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
				1911	if (!bs->bio_pool)
				1912	goto bad;
				1913
				1914	if (create_bvec_pool) {
				1915	bs->bvec_pool = biovec_create_pool(pool_size);
				1916	if (!bs->bvec_pool)
				1917	goto bad;
				1918	}
				1919
				1920	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
				1921	if (!bs->rescue_workqueue)
				1922	goto bad;
				1923
				1924	return bs;
				1925	bad:
				1926	bioset_free(bs);
				1927	return NULL;
				1928	}
				1929
				1930	/**
				1931	* bioset_create - Create a bio_set
				1932	* @pool_size: Number of bio and bio_vecs to cache in the mempool
				1933	* @front_pad: Number of bytes to allocate in front of the returned bio
				1934	*
				1935	* Description:
				1936	* Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
				1937	* to ask for a number of bytes to be allocated in front of the bio.
				1938	* Front pad allocation is useful for embedding the bio inside
				1939	* another structure, to avoid allocating extra data to go with the bio.
				1940	* Note that the bio must be embedded at the END of that structure always,
				1941	* or things will break badly.
				1942	*/
				1943	struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
				1944	{
				1945	return __bioset_create(pool_size, front_pad, true);
				1946	}
				1947	EXPORT_SYMBOL(bioset_create);
				1948
				1949	/**
				1950	* bioset_create_nobvec - Create a bio_set without bio_vec mempool
				1951	* @pool_size: Number of bio to cache in the mempool
				1952	* @front_pad: Number of bytes to allocate in front of the returned bio
				1953	*
				1954	* Description:
				1955	* Same functionality as bioset_create() except that mempool is not
				1956	* created for bio_vecs. Saving some memory for bio_clone_fast() users.
				1957	*/
				1958	struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad)
				1959	{
				1960	return __bioset_create(pool_size, front_pad, false);
				1961	}
				1962	EXPORT_SYMBOL(bioset_create_nobvec);
				1963
				1964	#ifdef CONFIG_BLK_CGROUP
				1965
				1966	/**
				1967	* bio_associate_blkcg - associate a bio with the specified blkcg
				1968	* @bio: target bio
				1969	* @blkcg_css: css of the blkcg to associate
				1970	*
				1971	* Associate @bio with the blkcg specified by @blkcg_css. Block layer will
				1972	* treat @bio as if it were issued by a task which belongs to the blkcg.
				1973	*
				1974	* This function takes an extra reference of @blkcg_css which will be put
				1975	* when @bio is released. The caller must own @bio and is responsible for
				1976	* synchronizing calls to this function.
				1977	*/
				1978	int bio_associate_blkcg(struct bio bio, struct cgroup_subsys_state blkcg_css)
				1979	{
				1980	if (unlikely(bio->bi_css))
				1981	return -EBUSY;
				1982	css_get(blkcg_css);
				1983	bio->bi_css = blkcg_css;
				1984	return 0;
				1985	}
				1986	EXPORT_SYMBOL_GPL(bio_associate_blkcg);
				1987
				1988	/**
				1989	* bio_associate_current - associate a bio with %current
				1990	* @bio: target bio
				1991	*
				1992	* Associate @bio with %current if it hasn't been associated yet. Block
				1993	* layer will treat @bio as if it were issued by %current no matter which
				1994	* task actually issues it.
				1995	*
				1996	* This function takes an extra reference of @task's io_context and blkcg
				1997	* which will be put when @bio is released. The caller must own @bio,
				1998	* ensure %current->io_context exists, and is responsible for synchronizing
				1999	* calls to this function.
				2000	*/
				2001	int bio_associate_current(struct bio *bio)
				2002	{
				2003	struct io_context *ioc;
				2004
				2005	if (bio->bi_css)
				2006	return -EBUSY;
				2007
				2008	ioc = current->io_context;
				2009	if (!ioc)
				2010	return -ENOENT;
				2011
				2012	get_io_context_active(ioc);
				2013	bio->bi_ioc = ioc;
				2014	bio->bi_css = task_get_css(current, io_cgrp_id);
				2015	return 0;
				2016	}
				2017	EXPORT_SYMBOL_GPL(bio_associate_current);
				2018
				2019	/**
				2020	* bio_disassociate_task - undo bio_associate_current()
				2021	* @bio: target bio
				2022	*/
				2023	void bio_disassociate_task(struct bio *bio)
				2024	{
				2025	if (bio->bi_ioc) {
				2026	put_io_context(bio->bi_ioc);
				2027	bio->bi_ioc = NULL;
				2028	}
				2029	if (bio->bi_css) {
				2030	css_put(bio->bi_css);
				2031	bio->bi_css = NULL;
				2032	}
				2033	}
				2034
				2035	/**
				2036	* bio_clone_blkcg_association - clone blkcg association from src to dst bio
				2037	* @dst: destination bio
				2038	* @src: source bio
				2039	*/
				2040	void bio_clone_blkcg_association(struct bio dst, struct bio src)
				2041	{
				2042	if (src->bi_css)
				2043	WARN_ON(bio_associate_blkcg(dst, src->bi_css));
				2044	}
				2045
				2046	#endif /* CONFIG_BLK_CGROUP */
				2047
				2048	static void __init biovec_init_slabs(void)
				2049	{
				2050	int i;
				2051
				2052	for (i = 0; i < BIOVEC_NR_POOLS; i++) {
				2053	int size;
				2054	struct biovec_slab *bvs = bvec_slabs + i;
				2055
				2056	if (bvs->nr_vecs <= BIO_INLINE_VECS) {
				2057	bvs->slab = NULL;
				2058	continue;
				2059	}
				2060
				2061	size = bvs->nr_vecs * sizeof(struct bio_vec);
				2062	bvs->slab = kmem_cache_create(bvs->name, size, 0,
				2063	SLAB_HWCACHE_ALIGN\|SLAB_PANIC, NULL);
				2064	}
				2065	}
				2066
				2067	static int __init init_bio(void)
				2068	{
				2069	bio_slab_max = 2;
				2070	bio_slab_nr = 0;
				2071	bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
				2072	if (!bio_slabs)
				2073	panic("bio: can't allocate bios\n");
				2074
				2075	bio_integrity_init();
				2076	biovec_init_slabs();
				2077
				2078	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
				2079	if (!fs_bio_set)
				2080	panic("bio: can't allocate bios\n");
				2081
				2082	if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
				2083	panic("bio: can't create integrity pool\n");
				2084
				2085	return 0;
				2086	}
				2087	subsys_initcall(init_bio);