blob: 474f0734026a747f3cc038de89bb06f3ff401e3c [file] [log] [blame]
Denis Vlasenkoc4f623e2006-12-26 01:30:59 +00001/* vi: set sw=4 ts=4: */
2/*
3 * unix_io.c --- This is the Unix (well, really POSIX) implementation
4 * of the I/O manager.
5 *
6 * Implements a one-block write-through cache.
7 *
8 * Includes support for Windows NT support under Cygwin.
9 *
10 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
11 * 2002 by Theodore Ts'o.
12 *
13 * %Begin-Header%
14 * This file may be redistributed under the terms of the GNU Public
15 * License.
16 * %End-Header%
17 */
18
19#include <stdio.h>
20#include <string.h>
21#if HAVE_UNISTD_H
22#include <unistd.h>
23#endif
24#if HAVE_ERRNO_H
25#include <errno.h>
26#endif
27#include <fcntl.h>
28#include <time.h>
29#ifdef __linux__
30#include <sys/utsname.h>
31#endif
32#if HAVE_SYS_STAT_H
33#include <sys/stat.h>
34#endif
35#if HAVE_SYS_TYPES_H
36#include <sys/types.h>
37#endif
38#include <sys/resource.h>
39
40#include "ext2_fs.h"
41#include "ext2fs.h"
42
43/*
44 * For checking structure magic numbers...
45 */
46
47#define EXT2_CHECK_MAGIC(struct, code) \
48 if ((struct)->magic != (code)) return (code)
49
50struct unix_cache {
51 char *buf;
52 unsigned long block;
53 int access_time;
54 unsigned dirty:1;
55 unsigned in_use:1;
56};
57
58#define CACHE_SIZE 8
59#define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */
60#define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */
61
62struct unix_private_data {
63 int magic;
64 int dev;
65 int flags;
66 int access_time;
67 ext2_loff_t offset;
68 struct unix_cache cache[CACHE_SIZE];
69};
70
71static errcode_t unix_open(const char *name, int flags, io_channel *channel);
72static errcode_t unix_close(io_channel channel);
73static errcode_t unix_set_blksize(io_channel channel, int blksize);
74static errcode_t unix_read_blk(io_channel channel, unsigned long block,
75 int count, void *data);
76static errcode_t unix_write_blk(io_channel channel, unsigned long block,
77 int count, const void *data);
78static errcode_t unix_flush(io_channel channel);
79static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
80 int size, const void *data);
81static errcode_t unix_set_option(io_channel channel, const char *option,
82 const char *arg);
83
84static void reuse_cache(io_channel channel, struct unix_private_data *data,
85 struct unix_cache *cache, unsigned long block);
86
87/* __FreeBSD_kernel__ is defined by GNU/kFreeBSD - the FreeBSD kernel
88 * does not know buffered block devices - everything is raw. */
89#if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
90#define NEED_BOUNCE_BUFFER
91#else
92#undef NEED_BOUNCE_BUFFER
93#endif
94
95static struct struct_io_manager struct_unix_manager = {
96 EXT2_ET_MAGIC_IO_MANAGER,
97 "Unix I/O Manager",
98 unix_open,
99 unix_close,
100 unix_set_blksize,
101 unix_read_blk,
102 unix_write_blk,
103 unix_flush,
104#ifdef NEED_BOUNCE_BUFFER
105 0,
106#else
107 unix_write_byte,
108#endif
109 unix_set_option
110};
111
112io_manager unix_io_manager = &struct_unix_manager;
113
114/*
115 * Here are the raw I/O functions
116 */
117#ifndef NEED_BOUNCE_BUFFER
118static errcode_t raw_read_blk(io_channel channel,
119 struct unix_private_data *data,
120 unsigned long block,
121 int count, void *buf)
122{
123 errcode_t retval;
124 ssize_t size;
125 ext2_loff_t location;
126 int actual = 0;
127
128 size = (count < 0) ? -count : count * channel->block_size;
129 location = ((ext2_loff_t) block * channel->block_size) + data->offset;
130 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
131 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
132 goto error_out;
133 }
134 actual = read(data->dev, buf, size);
135 if (actual != size) {
136 if (actual < 0)
137 actual = 0;
138 retval = EXT2_ET_SHORT_READ;
139 goto error_out;
140 }
141 return 0;
142
143error_out:
144 memset((char *) buf+actual, 0, size-actual);
145 if (channel->read_error)
146 retval = (channel->read_error)(channel, block, count, buf,
147 size, actual, retval);
148 return retval;
149}
150#else /* NEED_BOUNCE_BUFFER */
151/*
152 * Windows and FreeBSD block devices only allow sector alignment IO in offset and size
153 */
154static errcode_t raw_read_blk(io_channel channel,
155 struct unix_private_data *data,
156 unsigned long block,
157 int count, void *buf)
158{
159 errcode_t retval;
160 size_t size, alignsize, fragment;
161 ext2_loff_t location;
162 int total = 0, actual;
163#define BLOCKALIGN 512
164 char sector[BLOCKALIGN];
165
166 size = (count < 0) ? -count : count * channel->block_size;
167 location = ((ext2_loff_t) block * channel->block_size) + data->offset;
168#ifdef DEBUG
169 printf("count=%d, size=%d, block=%d, blk_size=%d, location=%lx\n",
170 count, size, block, channel->block_size, location);
171#endif
172 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
173 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
174 goto error_out;
175 }
176 fragment = size % BLOCKALIGN;
177 alignsize = size - fragment;
178 if (alignsize) {
179 actual = read(data->dev, buf, alignsize);
180 if (actual != alignsize)
181 goto short_read;
182 }
183 if (fragment) {
184 actual = read(data->dev, sector, BLOCKALIGN);
185 if (actual != BLOCKALIGN)
186 goto short_read;
187 memcpy(buf+alignsize, sector, fragment);
188 }
189 return 0;
190
191short_read:
192 if (actual>0)
193 total += actual;
194 retval = EXT2_ET_SHORT_READ;
195
196error_out:
197 memset((char *) buf+total, 0, size-actual);
198 if (channel->read_error)
199 retval = (channel->read_error)(channel, block, count, buf,
200 size, actual, retval);
201 return retval;
202}
203#endif
204
205static errcode_t raw_write_blk(io_channel channel,
206 struct unix_private_data *data,
207 unsigned long block,
208 int count, const void *buf)
209{
210 ssize_t size;
211 ext2_loff_t location;
212 int actual = 0;
213 errcode_t retval;
214
215 if (count == 1)
216 size = channel->block_size;
217 else {
218 if (count < 0)
219 size = -count;
220 else
221 size = count * channel->block_size;
222 }
223
224 location = ((ext2_loff_t) block * channel->block_size) + data->offset;
225 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
226 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
227 goto error_out;
228 }
229
230 actual = write(data->dev, buf, size);
231 if (actual != size) {
232 retval = EXT2_ET_SHORT_WRITE;
233 goto error_out;
234 }
235 return 0;
236
237error_out:
238 if (channel->write_error)
239 retval = (channel->write_error)(channel, block, count, buf,
240 size, actual, retval);
241 return retval;
242}
243
244
245/*
246 * Here we implement the cache functions
247 */
248
249/* Allocate the cache buffers */
250static errcode_t alloc_cache(io_channel channel,
251 struct unix_private_data *data)
252{
253 errcode_t retval;
254 struct unix_cache *cache;
255 int i;
256
257 data->access_time = 0;
258 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
259 cache->block = 0;
260 cache->access_time = 0;
261 cache->dirty = 0;
262 cache->in_use = 0;
263 if ((retval = ext2fs_get_mem(channel->block_size,
264 &cache->buf)))
265 return retval;
266 }
267 return 0;
268}
269
270/* Free the cache buffers */
271static void free_cache(struct unix_private_data *data)
272{
273 struct unix_cache *cache;
274 int i;
275
276 data->access_time = 0;
277 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
278 cache->block = 0;
279 cache->access_time = 0;
280 cache->dirty = 0;
281 cache->in_use = 0;
282 ext2fs_free_mem(&cache->buf);
283 cache->buf = 0;
284 }
285}
286
287#ifndef NO_IO_CACHE
288/*
289 * Try to find a block in the cache. If the block is not found, and
290 * eldest is a non-zero pointer, then fill in eldest with the cache
291 * entry to that should be reused.
292 */
293static struct unix_cache *find_cached_block(struct unix_private_data *data,
294 unsigned long block,
295 struct unix_cache **eldest)
296{
297 struct unix_cache *cache, *unused_cache, *oldest_cache;
298 int i;
299
300 unused_cache = oldest_cache = 0;
301 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
302 if (!cache->in_use) {
303 if (!unused_cache)
304 unused_cache = cache;
305 continue;
306 }
307 if (cache->block == block) {
308 cache->access_time = ++data->access_time;
309 return cache;
310 }
311 if (!oldest_cache ||
312 (cache->access_time < oldest_cache->access_time))
313 oldest_cache = cache;
314 }
315 if (eldest)
316 *eldest = (unused_cache) ? unused_cache : oldest_cache;
317 return 0;
318}
319
320/*
321 * Reuse a particular cache entry for another block.
322 */
323static void reuse_cache(io_channel channel, struct unix_private_data *data,
324 struct unix_cache *cache, unsigned long block)
325{
326 if (cache->dirty && cache->in_use)
327 raw_write_blk(channel, data, cache->block, 1, cache->buf);
328
329 cache->in_use = 1;
330 cache->dirty = 0;
331 cache->block = block;
332 cache->access_time = ++data->access_time;
333}
334
335/*
336 * Flush all of the blocks in the cache
337 */
338static errcode_t flush_cached_blocks(io_channel channel,
339 struct unix_private_data *data,
340 int invalidate)
341
342{
343 struct unix_cache *cache;
344 errcode_t retval, retval2;
345 int i;
346
347 retval2 = 0;
348 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
349 if (!cache->in_use)
350 continue;
351
352 if (invalidate)
353 cache->in_use = 0;
354
355 if (!cache->dirty)
356 continue;
357
358 retval = raw_write_blk(channel, data,
359 cache->block, 1, cache->buf);
360 if (retval)
361 retval2 = retval;
362 else
363 cache->dirty = 0;
364 }
365 return retval2;
366}
367#endif /* NO_IO_CACHE */
368
369static errcode_t unix_open(const char *name, int flags, io_channel *channel)
370{
371 io_channel io = NULL;
372 struct unix_private_data *data = NULL;
373 errcode_t retval;
374 int open_flags;
375 struct stat st;
376#ifdef __linux__
377 struct utsname ut;
378#endif
379
380 if (name == 0)
381 return EXT2_ET_BAD_DEVICE_NAME;
382 retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
383 if (retval)
384 return retval;
385 memset(io, 0, sizeof(struct struct_io_channel));
386 io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
387 retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data);
388 if (retval)
389 goto cleanup;
390
391 io->manager = unix_io_manager;
392 retval = ext2fs_get_mem(strlen(name)+1, &io->name);
393 if (retval)
394 goto cleanup;
395
396 strcpy(io->name, name);
397 io->private_data = data;
398 io->block_size = 1024;
399 io->read_error = 0;
400 io->write_error = 0;
401 io->refcount = 1;
402
403 memset(data, 0, sizeof(struct unix_private_data));
404 data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
405
406 if ((retval = alloc_cache(io, data)))
407 goto cleanup;
408
409 open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY;
410#ifdef CONFIG_LFS
411 data->dev = open64(io->name, open_flags);
412#else
413 data->dev = open(io->name, open_flags);
414#endif
415 if (data->dev < 0) {
416 retval = errno;
417 goto cleanup;
418 }
419
420#ifdef __linux__
421#undef RLIM_INFINITY
422#if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4)))
423#define RLIM_INFINITY ((unsigned long)(~0UL>>1))
424#else
425#define RLIM_INFINITY (~0UL)
426#endif
427 /*
428 * Work around a bug in 2.4.10-2.4.18 kernels where writes to
429 * block devices are wrongly getting hit by the filesize
430 * limit. This workaround isn't perfect, since it won't work
431 * if glibc wasn't built against 2.2 header files. (Sigh.)
432 *
433 */
434 if ((flags & IO_FLAG_RW) &&
435 (uname(&ut) == 0) &&
436 ((ut.release[0] == '2') && (ut.release[1] == '.') &&
437 (ut.release[2] == '4') && (ut.release[3] == '.') &&
438 (ut.release[4] == '1') && (ut.release[5] >= '0') &&
439 (ut.release[5] < '8')) &&
440 (fstat(data->dev, &st) == 0) &&
441 (S_ISBLK(st.st_mode))) {
442 struct rlimit rlim;
443
444 rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY;
445 setrlimit(RLIMIT_FSIZE, &rlim);
446 getrlimit(RLIMIT_FSIZE, &rlim);
447 if (((unsigned long) rlim.rlim_cur) <
448 ((unsigned long) rlim.rlim_max)) {
449 rlim.rlim_cur = rlim.rlim_max;
450 setrlimit(RLIMIT_FSIZE, &rlim);
451 }
452 }
453#endif
454 *channel = io;
455 return 0;
456
457cleanup:
458 if (data) {
459 free_cache(data);
460 ext2fs_free_mem(&data);
461 }
462 ext2fs_free_mem(&io);
463 return retval;
464}
465
466static errcode_t unix_close(io_channel channel)
467{
468 struct unix_private_data *data;
469 errcode_t retval = 0;
470
471 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
472 data = (struct unix_private_data *) channel->private_data;
473 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
474
475 if (--channel->refcount > 0)
476 return 0;
477
478#ifndef NO_IO_CACHE
479 retval = flush_cached_blocks(channel, data, 0);
480#endif
481
482 if (close(data->dev) < 0)
483 retval = errno;
484 free_cache(data);
485
486 ext2fs_free_mem(&channel->private_data);
487 ext2fs_free_mem(&channel->name);
488 ext2fs_free_mem(&channel);
489 return retval;
490}
491
492static errcode_t unix_set_blksize(io_channel channel, int blksize)
493{
494 struct unix_private_data *data;
495 errcode_t retval;
496
497 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
498 data = (struct unix_private_data *) channel->private_data;
499 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
500
501 if (channel->block_size != blksize) {
502#ifndef NO_IO_CACHE
503 if ((retval = flush_cached_blocks(channel, data, 0)))
504 return retval;
505#endif
506
507 channel->block_size = blksize;
508 free_cache(data);
509 if ((retval = alloc_cache(channel, data)))
510 return retval;
511 }
512 return 0;
513}
514
515
516static errcode_t unix_read_blk(io_channel channel, unsigned long block,
517 int count, void *buf)
518{
519 struct unix_private_data *data;
520 struct unix_cache *cache, *reuse[READ_DIRECT_SIZE];
521 errcode_t retval;
522 char *cp;
523 int i, j;
524
525 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
526 data = (struct unix_private_data *) channel->private_data;
527 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
528
529#ifdef NO_IO_CACHE
530 return raw_read_blk(channel, data, block, count, buf);
531#else
532 /*
533 * If we're doing an odd-sized read or a very large read,
534 * flush out the cache and then do a direct read.
535 */
536 if (count < 0 || count > WRITE_DIRECT_SIZE) {
537 if ((retval = flush_cached_blocks(channel, data, 0)))
538 return retval;
539 return raw_read_blk(channel, data, block, count, buf);
540 }
541
542 cp = buf;
543 while (count > 0) {
544 /* If it's in the cache, use it! */
545 if ((cache = find_cached_block(data, block, &reuse[0]))) {
546#ifdef DEBUG
547 printf("Using cached block %d\n", block);
548#endif
549 memcpy(cp, cache->buf, channel->block_size);
550 count--;
551 block++;
552 cp += channel->block_size;
553 continue;
554 }
555 /*
556 * Find the number of uncached blocks so we can do a
557 * single read request
558 */
559 for (i=1; i < count; i++)
560 if (find_cached_block(data, block+i, &reuse[i]))
561 break;
562#ifdef DEBUG
563 printf("Reading %d blocks starting at %d\n", i, block);
564#endif
565 if ((retval = raw_read_blk(channel, data, block, i, cp)))
566 return retval;
567
568 /* Save the results in the cache */
569 for (j=0; j < i; j++) {
570 count--;
571 cache = reuse[j];
572 reuse_cache(channel, data, cache, block++);
573 memcpy(cache->buf, cp, channel->block_size);
574 cp += channel->block_size;
575 }
576 }
577 return 0;
578#endif /* NO_IO_CACHE */
579}
580
581static errcode_t unix_write_blk(io_channel channel, unsigned long block,
582 int count, const void *buf)
583{
584 struct unix_private_data *data;
585 struct unix_cache *cache, *reuse;
586 errcode_t retval = 0;
587 const char *cp;
588 int writethrough;
589
590 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
591 data = (struct unix_private_data *) channel->private_data;
592 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
593
594#ifdef NO_IO_CACHE
595 return raw_write_blk(channel, data, block, count, buf);
596#else
597 /*
598 * If we're doing an odd-sized write or a very large write,
599 * flush out the cache completely and then do a direct write.
600 */
601 if (count < 0 || count > WRITE_DIRECT_SIZE) {
602 if ((retval = flush_cached_blocks(channel, data, 1)))
603 return retval;
604 return raw_write_blk(channel, data, block, count, buf);
605 }
606
607 /*
608 * For a moderate-sized multi-block write, first force a write
609 * if we're in write-through cache mode, and then fill the
610 * cache with the blocks.
611 */
612 writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH;
613 if (writethrough)
614 retval = raw_write_blk(channel, data, block, count, buf);
615
616 cp = buf;
617 while (count > 0) {
618 cache = find_cached_block(data, block, &reuse);
619 if (!cache) {
620 cache = reuse;
621 reuse_cache(channel, data, cache, block);
622 }
623 memcpy(cache->buf, cp, channel->block_size);
624 cache->dirty = !writethrough;
625 count--;
626 block++;
627 cp += channel->block_size;
628 }
629 return retval;
630#endif /* NO_IO_CACHE */
631}
632
633static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
634 int size, const void *buf)
635{
636 struct unix_private_data *data;
637 errcode_t retval = 0;
638 ssize_t actual;
639
640 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
641 data = (struct unix_private_data *) channel->private_data;
642 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
643
644#ifndef NO_IO_CACHE
645 /*
646 * Flush out the cache completely
647 */
648 if ((retval = flush_cached_blocks(channel, data, 1)))
649 return retval;
650#endif
651
652 if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0)
653 return errno;
654
655 actual = write(data->dev, buf, size);
656 if (actual != size)
657 return EXT2_ET_SHORT_WRITE;
658
659 return 0;
660}
661
662/*
663 * Flush data buffers to disk.
664 */
665static errcode_t unix_flush(io_channel channel)
666{
667 struct unix_private_data *data;
668 errcode_t retval = 0;
669
670 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
671 data = (struct unix_private_data *) channel->private_data;
672 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
673
674#ifndef NO_IO_CACHE
675 retval = flush_cached_blocks(channel, data, 0);
676#endif
677 fsync(data->dev);
678 return retval;
679}
680
681static errcode_t unix_set_option(io_channel channel, const char *option,
682 const char *arg)
683{
684 struct unix_private_data *data;
685 unsigned long tmp;
686 char *end;
687
688 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
689 data = (struct unix_private_data *) channel->private_data;
690 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
691
692 if (!strcmp(option, "offset")) {
693 if (!arg)
694 return EXT2_ET_INVALID_ARGUMENT;
695
696 tmp = strtoul(arg, &end, 0);
697 if (*end)
698 return EXT2_ET_INVALID_ARGUMENT;
699 data->offset = tmp;
700 return 0;
701 }
702 return EXT2_ET_INVALID_ARGUMENT;
703}