"Robert P. J. Day" | 63fc1a9 | 2006-07-02 19:47:05 +0000 | [diff] [blame] | 1 | /* vi: set sw=4 ts=4: */ |
Rob Landley | 3ea05d3 | 2006-03-21 18:20:40 +0000 | [diff] [blame] | 2 | /* Copyright 2005 Rob Landley <rob@landley.net> |
| 3 | * |
| 4 | * Switch from rootfs to another filesystem as the root of the mount tree. |
| 5 | * |
Rob Landley | e9a7a62 | 2006-09-22 02:52:41 +0000 | [diff] [blame] | 6 | * Licensed under GPL version 2, see file LICENSE in this tarball for details. |
Rob Landley | 3ea05d3 | 2006-03-21 18:20:40 +0000 | [diff] [blame] | 7 | */ |
Denis Vlasenko | b6adbf1 | 2007-05-26 19:00:18 +0000 | [diff] [blame] | 8 | #include "libbb.h" |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 9 | #include <sys/vfs.h> |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 10 | |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 11 | // Make up for header deficiencies |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 12 | #ifndef RAMFS_MAGIC |
Denys Vlasenko | a5bdbe1 | 2009-06-17 14:03:24 +0200 | [diff] [blame^] | 13 | # define RAMFS_MAGIC ((unsigned)0x858458f6) |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 14 | #endif |
| 15 | |
| 16 | #ifndef TMPFS_MAGIC |
Denys Vlasenko | a5bdbe1 | 2009-06-17 14:03:24 +0200 | [diff] [blame^] | 17 | # define TMPFS_MAGIC ((unsigned)0x01021994) |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 18 | #endif |
| 19 | |
| 20 | #ifndef MS_MOVE |
Denys Vlasenko | a5bdbe1 | 2009-06-17 14:03:24 +0200 | [diff] [blame^] | 21 | # define MS_MOVE 8192 |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 22 | #endif |
| 23 | |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 24 | // Recursively delete contents of rootfs |
Denis Vlasenko | 39acf45 | 2008-07-11 23:44:50 +0000 | [diff] [blame] | 25 | static void delete_contents(const char *directory, dev_t rootdev) |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 26 | { |
| 27 | DIR *dir; |
| 28 | struct dirent *d; |
| 29 | struct stat st; |
| 30 | |
| 31 | // Don't descend into other filesystems |
Denis Vlasenko | 39acf45 | 2008-07-11 23:44:50 +0000 | [diff] [blame] | 32 | if (lstat(directory, &st) || st.st_dev != rootdev) |
| 33 | return; |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 34 | |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 35 | // Recursively delete the contents of directories |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 36 | if (S_ISDIR(st.st_mode)) { |
Denis Vlasenko | 51742f4 | 2007-04-12 00:32:05 +0000 | [diff] [blame] | 37 | dir = opendir(directory); |
| 38 | if (dir) { |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 39 | while ((d = readdir(dir))) { |
Denis Vlasenko | 51742f4 | 2007-04-12 00:32:05 +0000 | [diff] [blame] | 40 | char *newdir = d->d_name; |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 41 | |
| 42 | // Skip . and .. |
Denis Vlasenko | 39acf45 | 2008-07-11 23:44:50 +0000 | [diff] [blame] | 43 | if (DOT_OR_DOTDOT(newdir)) |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 44 | continue; |
Tim Riker | c1ef7bd | 2006-01-25 00:08:53 +0000 | [diff] [blame] | 45 | |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 46 | // Recurse to delete contents |
Denis Vlasenko | 39acf45 | 2008-07-11 23:44:50 +0000 | [diff] [blame] | 47 | newdir = concat_path_file(directory, newdir); |
| 48 | delete_contents(newdir, rootdev); |
| 49 | free(newdir); |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 50 | } |
| 51 | closedir(dir); |
Tim Riker | c1ef7bd | 2006-01-25 00:08:53 +0000 | [diff] [blame] | 52 | |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 53 | // Directory should now be empty, zap it |
Rob Landley | 5d84c23 | 2005-12-20 17:25:51 +0000 | [diff] [blame] | 54 | rmdir(directory); |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 55 | } |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 56 | } else { |
| 57 | // It wasn't a directory, zap it |
| 58 | unlink(directory); |
| 59 | } |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 60 | } |
| 61 | |
Denis Vlasenko | 9b49a5e | 2007-10-11 10:05:36 +0000 | [diff] [blame] | 62 | int switch_root_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
Denis Vlasenko | a60f84e | 2008-07-05 09:18:54 +0000 | [diff] [blame] | 63 | int switch_root_main(int argc UNUSED_PARAM, char **argv) |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 64 | { |
Denis Vlasenko | 51742f4 | 2007-04-12 00:32:05 +0000 | [diff] [blame] | 65 | char *newroot, *console = NULL; |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 66 | struct stat st; |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 67 | struct statfs stfs; |
Denis Vlasenko | 39acf45 | 2008-07-11 23:44:50 +0000 | [diff] [blame] | 68 | dev_t rootdev; |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 69 | |
| 70 | // Parse args (-c console) |
Denis Vlasenko | 6dd03f0 | 2008-02-13 17:25:31 +0000 | [diff] [blame] | 71 | opt_complementary = "-2"; // minimum 2 params |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 72 | getopt32(argv, "+c:", &console); // '+': stop at first non-option |
Denis Vlasenko | 3ace9fa | 2007-04-18 21:40:30 +0000 | [diff] [blame] | 73 | argv += optind; |
Denis Vlasenko | 3ace9fa | 2007-04-18 21:40:30 +0000 | [diff] [blame] | 74 | newroot = *argv++; |
Tim Riker | c1ef7bd | 2006-01-25 00:08:53 +0000 | [diff] [blame] | 75 | |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 76 | // Change to new root directory and verify it's a different fs |
Denis Vlasenko | 3ace9fa | 2007-04-18 21:40:30 +0000 | [diff] [blame] | 77 | xchdir(newroot); |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 78 | xstat("/", &st); |
| 79 | rootdev = st.st_dev; |
| 80 | xstat(".", &st); |
| 81 | if (st.st_dev == rootdev || getpid() != 1) { |
| 82 | // Show usage, it says new root must be a mountpoint |
| 83 | // and we must be PID 1 |
| 84 | bb_show_usage(); |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 85 | } |
Tim Riker | c1ef7bd | 2006-01-25 00:08:53 +0000 | [diff] [blame] | 86 | |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 87 | // Additional sanity checks: we're about to rm -rf /, so be REALLY SURE |
| 88 | // we mean it. I could make this a CONFIG option, but I would get email |
| 89 | // from all the people who WILL destroy their filesystems. |
| 90 | statfs("/", &stfs); // this never fails |
Denys Vlasenko | a5bdbe1 | 2009-06-17 14:03:24 +0200 | [diff] [blame^] | 91 | if (stat("/init", &st) != 0 || !S_ISREG(st.st_mode) |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 92 | || ((unsigned)stfs.f_type != RAMFS_MAGIC |
| 93 | && (unsigned)stfs.f_type != TMPFS_MAGIC) |
Denis Vlasenko | 77ad97f | 2008-05-13 02:27:31 +0000 | [diff] [blame] | 94 | ) { |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 95 | bb_error_msg_and_die("not rootfs"); |
| 96 | } |
| 97 | |
| 98 | // Zap everything out of rootdev |
Denis Vlasenko | 39acf45 | 2008-07-11 23:44:50 +0000 | [diff] [blame] | 99 | delete_contents("/", rootdev); |
Tim Riker | c1ef7bd | 2006-01-25 00:08:53 +0000 | [diff] [blame] | 100 | |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 101 | // Overmount / with newdir and chroot into it |
| 102 | if (mount(".", "/", NULL, MS_MOVE, NULL)) { |
| 103 | // For example, fails when newroot is not a mountpoint |
Denis Vlasenko | 0ad8234 | 2009-04-21 00:29:17 +0000 | [diff] [blame] | 104 | bb_perror_msg_and_die("error moving root"); |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 105 | } |
Denis Vlasenko | 394eebe | 2008-02-25 20:30:24 +0000 | [diff] [blame] | 106 | xchroot("."); |
Denis Vlasenko | e6b10ef | 2009-04-21 20:52:58 +0000 | [diff] [blame] | 107 | // The chdir is needed to recalculate "." and ".." links |
Denis Vlasenko | 3ace9fa | 2007-04-18 21:40:30 +0000 | [diff] [blame] | 108 | xchdir("/"); |
Tim Riker | c1ef7bd | 2006-01-25 00:08:53 +0000 | [diff] [blame] | 109 | |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 110 | // If a new console specified, redirect stdin/stdout/stderr to it |
Rob Landley | 5d84c23 | 2005-12-20 17:25:51 +0000 | [diff] [blame] | 111 | if (console) { |
| 112 | close(0); |
Denis Vlasenko | 51742f4 | 2007-04-12 00:32:05 +0000 | [diff] [blame] | 113 | xopen(console, O_RDWR); |
Denis Vlasenko | 39acf45 | 2008-07-11 23:44:50 +0000 | [diff] [blame] | 114 | xdup2(0, 1); |
| 115 | xdup2(0, 2); |
Rob Landley | 5d84c23 | 2005-12-20 17:25:51 +0000 | [diff] [blame] | 116 | } |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 117 | |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 118 | // Exec real init |
Denis Vlasenko | 3ace9fa | 2007-04-18 21:40:30 +0000 | [diff] [blame] | 119 | execv(argv[0], argv); |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 120 | bb_perror_msg_and_die("can't execute '%s'", argv[0]); |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 121 | } |
Denys Vlasenko | a5bdbe1 | 2009-06-17 14:03:24 +0200 | [diff] [blame^] | 122 | |
| 123 | /* |
| 124 | From: Rob Landley <rob@landley.net> |
| 125 | Date: Tue, Jun 16, 2009 at 7:47 PM |
| 126 | Subject: Re: switch_root... |
| 127 | |
| 128 | ... |
| 129 | ... |
| 130 | ... |
| 131 | |
| 132 | If you're _not_ running out of init_ramfs (if for example you're using initrd |
| 133 | instead), you probably shouldn't use switch_root because it's the wrong tool. |
| 134 | |
| 135 | Basically what the sucker does is something like the following shell script: |
| 136 | |
| 137 | find / -xdev | xargs rm -rf |
| 138 | cd "$1" |
| 139 | shift |
| 140 | mount --move . / |
| 141 | exec chroot . "$@" |
| 142 | |
| 143 | There are a couple reasons that won't work as a shell script: |
| 144 | |
| 145 | 1) If you delete the commands out of your $PATH, your shell scripts can't run |
| 146 | more commands, but you can't start using dynamically linked _new_ commands |
| 147 | until after you do the chroot because the path to the dynamic linker is wrong. |
| 148 | So there's a step that needs to be sort of atomic but can't be as a shell |
| 149 | script. (You can work around this with static linking or very carefully laid |
| 150 | out paths and sequencing, but it's brittle, ugly, and non-obvious.) |
| 151 | |
| 152 | 2) The "find | rm" bit will acually delete everything because the mount points |
| 153 | still show up (even if their contents don't), and rm -rf will then happily zap |
| 154 | that. So the first line is an oversimplification of what you need to do _not_ |
| 155 | to descend into other filesystems and delete their contents. |
| 156 | |
| 157 | The reason we do this is to free up memory, by the way. Since initramfs is a |
| 158 | ramfs, deleting its contents frees up the memory it uses. (We leave it with |
| 159 | one remaining dentry for the new mount point, but that's ok.) |
| 160 | |
| 161 | Note that you cannot ever umount rootfs, for approximately the same reason you |
| 162 | can't kill PID 1. The kernel tracks mount points as a doubly linked list, and |
| 163 | the pointer to the start/end of that list always points to an entry that's |
| 164 | known to be there (rootfs), so it never has to worry about moving that pointer |
| 165 | and it never has to worry about the list being empty. (Back around 2.6.13 |
| 166 | there _was_ a bug that let you umount rootfs, and the system locked hard the |
| 167 | instant you did so endlessly looping to find the end of the mount list and |
| 168 | never stopping. They fixed it.) |
| 169 | |
| 170 | Oh, and the reason we mount --move _and_ do the chroot is due to the way "/" |
| 171 | works. Each process has two special symlinks, ".", and "/". Each of them |
| 172 | points to the dentry of a directory, and give you a location paths can start |
| 173 | from. (Historically ".." was also special, because you could enter a |
| 174 | directory via a symlink so backing out to the directory you came from doesn't |
| 175 | necessarily mean the one physically above where "." points to. These days I |
| 176 | think it's just handed off to the filesystem.) |
| 177 | |
| 178 | Anyway, path resolution starts with "." or "/" (although the "./" at the start |
| 179 | of the path may be implicit), meaning it's relative to one of those two |
| 180 | directories. Your current directory, and your current root directory. The |
| 181 | chdir() syscall changes where "." points to, and the chroot() syscall changes |
| 182 | where "/" points to. (Again, both are per-process which is why chroot only |
| 183 | affects your current process and its child processes.) |
| 184 | |
| 185 | Note that chroot() does _not_ change where "." points to, and back before they |
| 186 | put crazy security checks into the kernel your current directory could be |
| 187 | somewhere you could no longer access after the chroot. (The command line |
| 188 | chroot does a cd as well, the chroot _syscall_ is what I'm talking about.) |
| 189 | |
| 190 | The reason mounting something new over / has no obvious effect is the same |
| 191 | reason mounting something over your current directory has no obvious effect: |
| 192 | the . and / links aren't recalculated after a mount, so they still point to |
| 193 | the same dentry they did before, even if that dentry is no longer accessible |
| 194 | by other means. Note that "cd ." is a NOP, and "chroot /" is a nop; both look |
| 195 | up the cached dentry and set it right back. They don't re-parse any paths, |
| 196 | because they're what all paths your process uses would be relative to. |
| 197 | |
| 198 | That's why the careful sequencing above: we cd into the new mount point before |
| 199 | we do the mount --move. Moving the mount point would otherwise make it |
| 200 | totally inaccessible to is because cd-ing to the old path wouldn't give it to |
| 201 | us anymore, and cd "/" just gives us the cached dentry from when the process |
| 202 | was created (in this case the old initramfs one). But the "." symlink gives |
| 203 | us the dentry of the filesystem we just moved, so we can then "chroot ." to |
| 204 | copy that dentry to "/" and get the new filesystem. If we _didn't_ save that |
| 205 | dentry in "." we couldn't get it back after the mount --move. |
| 206 | |
| 207 | (Yes, this is all screwy and I had to email questions to Linus Torvalds to get |
| 208 | it straight myself. I keep meaning to write up a "how mount actually works" |
| 209 | document someday...) |
| 210 | */ |