"Robert P. J. Day" | 63fc1a9 | 2006-07-02 19:47:05 +0000 | [diff] [blame] | 1 | /* vi: set sw=4 ts=4: */ |
Denys Vlasenko | 2ab9403 | 2017-10-05 15:33:28 +0200 | [diff] [blame] | 2 | /* |
| 3 | * Copyright 2005 Rob Landley <rob@landley.net> |
Rob Landley | 3ea05d3 | 2006-03-21 18:20:40 +0000 | [diff] [blame] | 4 | * |
| 5 | * Switch from rootfs to another filesystem as the root of the mount tree. |
| 6 | * |
Denys Vlasenko | 0ef64bd | 2010-08-16 20:14:46 +0200 | [diff] [blame] | 7 | * Licensed under GPLv2, see file LICENSE in this source tree. |
Rob Landley | 3ea05d3 | 2006-03-21 18:20:40 +0000 | [diff] [blame] | 8 | */ |
Denys Vlasenko | dd898c9 | 2016-11-23 11:46:32 +0100 | [diff] [blame] | 9 | //config:config SWITCH_ROOT |
Denys Vlasenko | b097a84 | 2018-12-28 03:20:17 +0100 | [diff] [blame] | 10 | //config: bool "switch_root (5.5 kb)" |
Denys Vlasenko | dd898c9 | 2016-11-23 11:46:32 +0100 | [diff] [blame] | 11 | //config: default y |
Denys Vlasenko | dd898c9 | 2016-11-23 11:46:32 +0100 | [diff] [blame] | 12 | //config: help |
Denys Vlasenko | 72089cf | 2017-07-21 09:50:55 +0200 | [diff] [blame] | 13 | //config: The switch_root utility is used from initramfs to select a new |
| 14 | //config: root device. Under initramfs, you have to use this instead of |
| 15 | //config: pivot_root. (Stop reading here if you don't care why.) |
Denys Vlasenko | dd898c9 | 2016-11-23 11:46:32 +0100 | [diff] [blame] | 16 | //config: |
Denys Vlasenko | 72089cf | 2017-07-21 09:50:55 +0200 | [diff] [blame] | 17 | //config: Booting with initramfs extracts a gzipped cpio archive into rootfs |
| 18 | //config: (which is a variant of ramfs/tmpfs). Because rootfs can't be moved |
| 19 | //config: or unmounted*, pivot_root will not work from initramfs. Instead, |
| 20 | //config: switch_root deletes everything out of rootfs (including itself), |
| 21 | //config: does a mount --move that overmounts rootfs with the new root, and |
| 22 | //config: then execs the specified init program. |
Denys Vlasenko | dd898c9 | 2016-11-23 11:46:32 +0100 | [diff] [blame] | 23 | //config: |
Denys Vlasenko | 72089cf | 2017-07-21 09:50:55 +0200 | [diff] [blame] | 24 | //config: * Because the Linux kernel uses rootfs internally as the starting |
| 25 | //config: and ending point for searching through the kernel's doubly linked |
| 26 | //config: list of active mount points. That's why. |
Denys Vlasenko | bbc26c6 | 2017-08-22 10:37:30 +0200 | [diff] [blame] | 27 | //config: |
| 28 | // RUN_INIT config item is in klibc-utils |
Denys Vlasenko | dd898c9 | 2016-11-23 11:46:32 +0100 | [diff] [blame] | 29 | |
| 30 | //applet:IF_SWITCH_ROOT(APPLET(switch_root, BB_DIR_SBIN, BB_SUID_DROP)) |
Denys Vlasenko | 200bcc8 | 2017-08-21 19:30:01 +0200 | [diff] [blame] | 31 | // APPLET_ODDNAME:name main location suid_type help |
| 32 | //applet:IF_RUN_INIT( APPLET_ODDNAME(run-init, switch_root, BB_DIR_SBIN, BB_SUID_DROP, run_init)) |
Denys Vlasenko | dd898c9 | 2016-11-23 11:46:32 +0100 | [diff] [blame] | 33 | |
| 34 | //kbuild:lib-$(CONFIG_SWITCH_ROOT) += switch_root.o |
Denys Vlasenko | 200bcc8 | 2017-08-21 19:30:01 +0200 | [diff] [blame] | 35 | //kbuild:lib-$(CONFIG_RUN_INIT) += switch_root.o |
Pere Orga | 5bc8c00 | 2011-04-11 03:29:49 +0200 | [diff] [blame] | 36 | |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 37 | #include <sys/vfs.h> |
Denys Vlasenko | da49f58 | 2009-07-08 02:58:38 +0200 | [diff] [blame] | 38 | #include <sys/mount.h> |
Denys Vlasenko | 200bcc8 | 2017-08-21 19:30:01 +0200 | [diff] [blame] | 39 | #if ENABLE_RUN_INIT |
| 40 | # include <sys/prctl.h> |
Denys Vlasenko | 2af5e3f | 2018-02-21 20:13:39 +0100 | [diff] [blame] | 41 | # ifndef PR_CAPBSET_READ |
| 42 | # define PR_CAPBSET_READ 23 |
| 43 | # endif |
| 44 | # ifndef PR_CAPBSET_DROP |
| 45 | # define PR_CAPBSET_DROP 24 |
| 46 | # endif |
Denys Vlasenko | 200bcc8 | 2017-08-21 19:30:01 +0200 | [diff] [blame] | 47 | # include <linux/capability.h> |
| 48 | // #include <sys/capability.h> |
| 49 | // This header is in libcap, but the functions are in libc. |
| 50 | // Comment in the header says this above capset/capget: |
| 51 | /* system calls - look to libc for function to system call mapping */ |
| 52 | extern int capset(cap_user_header_t header, cap_user_data_t data); |
| 53 | extern int capget(cap_user_header_t header, const cap_user_data_t data); |
| 54 | // so for bbox, let's just repeat the declarations. |
| 55 | // This way, libcap needs not be installed in build environment. |
| 56 | #endif |
| 57 | |
Denys Vlasenko | da49f58 | 2009-07-08 02:58:38 +0200 | [diff] [blame] | 58 | #include "libbb.h" |
Denys Vlasenko | 200bcc8 | 2017-08-21 19:30:01 +0200 | [diff] [blame] | 59 | |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 60 | // Make up for header deficiencies |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 61 | #ifndef RAMFS_MAGIC |
Denys Vlasenko | a5bdbe1 | 2009-06-17 14:03:24 +0200 | [diff] [blame] | 62 | # define RAMFS_MAGIC ((unsigned)0x858458f6) |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 63 | #endif |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 64 | #ifndef TMPFS_MAGIC |
Denys Vlasenko | a5bdbe1 | 2009-06-17 14:03:24 +0200 | [diff] [blame] | 65 | # define TMPFS_MAGIC ((unsigned)0x01021994) |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 66 | #endif |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 67 | #ifndef MS_MOVE |
Denys Vlasenko | a5bdbe1 | 2009-06-17 14:03:24 +0200 | [diff] [blame] | 68 | # define MS_MOVE 8192 |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 69 | #endif |
| 70 | |
Denys Vlasenko | 3b267e9 | 2021-06-24 17:54:11 +0200 | [diff] [blame] | 71 | static void delete_contents(const char *directory, dev_t rootdev); |
| 72 | |
| 73 | static int FAST_FUNC rmrf(const char *directory, struct dirent *d, void *rootdevp) |
| 74 | { |
| 75 | char *newdir = concat_subpath_file(directory, d->d_name); |
| 76 | if (newdir) { // not . or .. |
| 77 | // Recurse to delete contents |
| 78 | delete_contents(newdir, *(dev_t*)rootdevp); |
| 79 | free(newdir); |
| 80 | } |
| 81 | return 0; |
| 82 | } |
| 83 | |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 84 | // Recursively delete contents of rootfs |
Denis Vlasenko | 39acf45 | 2008-07-11 23:44:50 +0000 | [diff] [blame] | 85 | static void delete_contents(const char *directory, dev_t rootdev) |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 86 | { |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 87 | struct stat st; |
| 88 | |
| 89 | // Don't descend into other filesystems |
Denis Vlasenko | 39acf45 | 2008-07-11 23:44:50 +0000 | [diff] [blame] | 90 | if (lstat(directory, &st) || st.st_dev != rootdev) |
| 91 | return; |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 92 | |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 93 | // Recursively delete the contents of directories |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 94 | if (S_ISDIR(st.st_mode)) { |
Denys Vlasenko | 3b267e9 | 2021-06-24 17:54:11 +0200 | [diff] [blame] | 95 | iterate_on_dir(directory, rmrf, &rootdev); |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 96 | } else { |
| 97 | // It wasn't a directory, zap it |
| 98 | unlink(directory); |
| 99 | } |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 100 | } |
| 101 | |
Denys Vlasenko | 200bcc8 | 2017-08-21 19:30:01 +0200 | [diff] [blame] | 102 | #if ENABLE_RUN_INIT |
| 103 | DEFINE_STRUCT_CAPS; |
| 104 | |
| 105 | static void drop_capset(int cap_idx) |
| 106 | { |
| 107 | struct caps caps; |
| 108 | |
Denys Vlasenko | 200bcc8 | 2017-08-21 19:30:01 +0200 | [diff] [blame] | 109 | getcaps(&caps); |
Denys Vlasenko | 200bcc8 | 2017-08-21 19:30:01 +0200 | [diff] [blame] | 110 | caps.data[CAP_TO_INDEX(cap_idx)].inheritable &= ~CAP_TO_MASK(cap_idx); |
Denys Vlasenko | 200bcc8 | 2017-08-21 19:30:01 +0200 | [diff] [blame] | 111 | if (capset(&caps.header, caps.data) != 0) |
James Byrne | 6937487 | 2019-07-02 11:35:03 +0200 | [diff] [blame] | 112 | bb_simple_perror_msg_and_die("capset"); |
Denys Vlasenko | 200bcc8 | 2017-08-21 19:30:01 +0200 | [diff] [blame] | 113 | } |
| 114 | |
| 115 | static void drop_bounding_set(int cap_idx) |
| 116 | { |
| 117 | int ret; |
| 118 | |
| 119 | ret = prctl(PR_CAPBSET_READ, cap_idx, 0, 0, 0); |
| 120 | if (ret < 0) |
| 121 | bb_perror_msg_and_die("prctl: %s", "PR_CAPBSET_READ"); |
| 122 | |
| 123 | if (ret == 1) { |
| 124 | ret = prctl(PR_CAPBSET_DROP, cap_idx, 0, 0, 0); |
| 125 | if (ret != 0) |
| 126 | bb_perror_msg_and_die("prctl: %s", "PR_CAPBSET_DROP"); |
| 127 | } |
| 128 | } |
| 129 | |
| 130 | static void drop_usermodehelper(const char *filename, int cap_idx) |
| 131 | { |
| 132 | unsigned lo, hi; |
| 133 | char buf[sizeof(int)*3 * 2 + 8]; |
| 134 | int fd; |
| 135 | int ret; |
| 136 | |
| 137 | ret = open_read_close(filename, buf, sizeof(buf) - 1); |
| 138 | if (ret < 0) |
| 139 | return; /* assuming files do not exist */ |
| 140 | |
| 141 | buf[ret] = '\0'; |
| 142 | ret = sscanf(buf, "%u %u", &lo, &hi); |
| 143 | if (ret != 2) |
| 144 | bb_perror_msg_and_die("can't parse file '%s'", filename); |
| 145 | |
| 146 | if (cap_idx < 32) |
| 147 | lo &= ~(1 << cap_idx); |
| 148 | else |
| 149 | hi &= ~(1 << (cap_idx - 32)); |
| 150 | |
| 151 | fd = xopen(filename, O_WRONLY); |
| 152 | fdprintf(fd, "%u %u", lo, hi); |
| 153 | close(fd); |
| 154 | } |
| 155 | |
| 156 | static void drop_capabilities(char *string) |
| 157 | { |
| 158 | char *cap; |
| 159 | |
Denys Vlasenko | 2496616 | 2020-10-06 02:36:47 +0200 | [diff] [blame] | 160 | cap = strtok_r(string, ",", &string); |
Denys Vlasenko | 200bcc8 | 2017-08-21 19:30:01 +0200 | [diff] [blame] | 161 | while (cap) { |
| 162 | unsigned cap_idx; |
| 163 | |
| 164 | cap_idx = cap_name_to_number(cap); |
| 165 | drop_usermodehelper("/proc/sys/kernel/usermodehelper/bset", cap_idx); |
| 166 | drop_usermodehelper("/proc/sys/kernel/usermodehelper/inheritable", cap_idx); |
| 167 | drop_bounding_set(cap_idx); |
| 168 | drop_capset(cap_idx); |
| 169 | bb_error_msg("dropped capability: %s", cap); |
Denys Vlasenko | 2496616 | 2020-10-06 02:36:47 +0200 | [diff] [blame] | 170 | cap = strtok_r(NULL, ",", &string); |
Denys Vlasenko | 200bcc8 | 2017-08-21 19:30:01 +0200 | [diff] [blame] | 171 | } |
| 172 | } |
| 173 | #endif |
| 174 | |
Denis Vlasenko | 9b49a5e | 2007-10-11 10:05:36 +0000 | [diff] [blame] | 175 | int switch_root_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
Denis Vlasenko | a60f84e | 2008-07-05 09:18:54 +0000 | [diff] [blame] | 176 | int switch_root_main(int argc UNUSED_PARAM, char **argv) |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 177 | { |
Denis Vlasenko | 51742f4 | 2007-04-12 00:32:05 +0000 | [diff] [blame] | 178 | char *newroot, *console = NULL; |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 179 | struct stat st; |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 180 | struct statfs stfs; |
Denys Vlasenko | bbc26c6 | 2017-08-22 10:37:30 +0200 | [diff] [blame] | 181 | unsigned dry_run = 0; |
Denis Vlasenko | 39acf45 | 2008-07-11 23:44:50 +0000 | [diff] [blame] | 182 | dev_t rootdev; |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 183 | |
Denys Vlasenko | bbc26c6 | 2017-08-22 10:37:30 +0200 | [diff] [blame] | 184 | // Parse args. '+': stop at first non-option |
Denys Vlasenko | 200bcc8 | 2017-08-21 19:30:01 +0200 | [diff] [blame] | 185 | if (ENABLE_SWITCH_ROOT && (!ENABLE_RUN_INIT || applet_name[0] == 's')) { |
Denys Vlasenko | bbc26c6 | 2017-08-22 10:37:30 +0200 | [diff] [blame] | 186 | //usage:#define switch_root_trivial_usage |
| 187 | //usage: "[-c CONSOLE_DEV] NEW_ROOT NEW_INIT [ARGS]" |
| 188 | //usage:#define switch_root_full_usage "\n\n" |
| 189 | //usage: "Free initramfs and switch to another root fs:\n" |
| 190 | //usage: "chroot to NEW_ROOT, delete all in /, move NEW_ROOT to /,\n" |
| 191 | //usage: "execute NEW_INIT. PID must be 1. NEW_ROOT must be a mountpoint.\n" |
| 192 | //usage: "\n -c DEV Reopen stdio to DEV after switch" |
Denys Vlasenko | 200bcc8 | 2017-08-21 19:30:01 +0200 | [diff] [blame] | 193 | getopt32(argv, "^+" |
| 194 | "c:" |
| 195 | "\0" "-2" /* minimum 2 args */, |
| 196 | &console |
| 197 | ); |
| 198 | } else { |
| 199 | #if ENABLE_RUN_INIT |
Denys Vlasenko | bbc26c6 | 2017-08-22 10:37:30 +0200 | [diff] [blame] | 200 | //usage:#define run_init_trivial_usage |
| 201 | //usage: "[-d CAP,CAP...] [-n] [-c CONSOLE_DEV] NEW_ROOT NEW_INIT [ARGS]" |
| 202 | //usage:#define run_init_full_usage "\n\n" |
| 203 | //usage: "Free initramfs and switch to another root fs:\n" |
| 204 | //usage: "chroot to NEW_ROOT, delete all in /, move NEW_ROOT to /,\n" |
| 205 | //usage: "execute NEW_INIT. PID must be 1. NEW_ROOT must be a mountpoint.\n" |
| 206 | //usage: "\n -c DEV Reopen stdio to DEV after switch" |
| 207 | //usage: "\n -d CAPS Drop capabilities" |
| 208 | //usage: "\n -n Dry run" |
Denys Vlasenko | 200bcc8 | 2017-08-21 19:30:01 +0200 | [diff] [blame] | 209 | char *cap_list = NULL; |
Denys Vlasenko | bbc26c6 | 2017-08-22 10:37:30 +0200 | [diff] [blame] | 210 | dry_run = getopt32(argv, "^+" |
| 211 | "c:d:n" |
Denys Vlasenko | 200bcc8 | 2017-08-21 19:30:01 +0200 | [diff] [blame] | 212 | "\0" "-2" /* minimum 2 args */, |
| 213 | &console, |
| 214 | &cap_list |
| 215 | ); |
Denys Vlasenko | bbc26c6 | 2017-08-22 10:37:30 +0200 | [diff] [blame] | 216 | dry_run >>= 2; // -n |
Denys Vlasenko | 200bcc8 | 2017-08-21 19:30:01 +0200 | [diff] [blame] | 217 | if (cap_list) |
| 218 | drop_capabilities(cap_list); |
| 219 | #endif |
| 220 | } |
Denis Vlasenko | 3ace9fa | 2007-04-18 21:40:30 +0000 | [diff] [blame] | 221 | argv += optind; |
Denis Vlasenko | 3ace9fa | 2007-04-18 21:40:30 +0000 | [diff] [blame] | 222 | newroot = *argv++; |
Tim Riker | c1ef7bd | 2006-01-25 00:08:53 +0000 | [diff] [blame] | 223 | |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 224 | // Change to new root directory and verify it's a different fs |
Denis Vlasenko | 3ace9fa | 2007-04-18 21:40:30 +0000 | [diff] [blame] | 225 | xchdir(newroot); |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 226 | xstat("/", &st); |
| 227 | rootdev = st.st_dev; |
| 228 | xstat(".", &st); |
Denys Vlasenko | 7d834c9 | 2017-08-22 11:40:27 +0200 | [diff] [blame] | 229 | if (st.st_dev == rootdev) { |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 230 | // Show usage, it says new root must be a mountpoint |
Denys Vlasenko | 7d834c9 | 2017-08-22 11:40:27 +0200 | [diff] [blame] | 231 | bb_show_usage(); |
| 232 | } |
| 233 | if (!dry_run && getpid() != 1) { |
| 234 | // Show usage, it says we must be PID 1 |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 235 | bb_show_usage(); |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 236 | } |
Tim Riker | c1ef7bd | 2006-01-25 00:08:53 +0000 | [diff] [blame] | 237 | |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 238 | // Additional sanity checks: we're about to rm -rf /, so be REALLY SURE |
| 239 | // we mean it. I could make this a CONFIG option, but I would get email |
| 240 | // from all the people who WILL destroy their filesystems. |
Denys Vlasenko | cb37637 | 2009-12-15 01:55:55 +0100 | [diff] [blame] | 241 | if (stat("/init", &st) != 0 || !S_ISREG(st.st_mode)) { |
Denys Vlasenko | bbc26c6 | 2017-08-22 10:37:30 +0200 | [diff] [blame] | 242 | bb_error_msg_and_die("'%s' is not a regular file", "/init"); |
Denys Vlasenko | cb37637 | 2009-12-15 01:55:55 +0100 | [diff] [blame] | 243 | } |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 244 | statfs("/", &stfs); // this never fails |
Denys Vlasenko | cb37637 | 2009-12-15 01:55:55 +0100 | [diff] [blame] | 245 | if ((unsigned)stfs.f_type != RAMFS_MAGIC |
| 246 | && (unsigned)stfs.f_type != TMPFS_MAGIC |
Denis Vlasenko | 77ad97f | 2008-05-13 02:27:31 +0000 | [diff] [blame] | 247 | ) { |
James Byrne | 6937487 | 2019-07-02 11:35:03 +0200 | [diff] [blame] | 248 | bb_simple_error_msg_and_die("root filesystem is not ramfs/tmpfs"); |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 249 | } |
| 250 | |
Denys Vlasenko | bbc26c6 | 2017-08-22 10:37:30 +0200 | [diff] [blame] | 251 | if (!dry_run) { |
| 252 | // Zap everything out of rootdev |
| 253 | delete_contents("/", rootdev); |
Tim Riker | c1ef7bd | 2006-01-25 00:08:53 +0000 | [diff] [blame] | 254 | |
Denys Vlasenko | bbc26c6 | 2017-08-22 10:37:30 +0200 | [diff] [blame] | 255 | // Overmount / with newdir and chroot into it |
| 256 | if (mount(".", "/", NULL, MS_MOVE, NULL)) { |
| 257 | // For example, fails when newroot is not a mountpoint |
James Byrne | 6937487 | 2019-07-02 11:35:03 +0200 | [diff] [blame] | 258 | bb_simple_perror_msg_and_die("error moving root"); |
Denys Vlasenko | bbc26c6 | 2017-08-22 10:37:30 +0200 | [diff] [blame] | 259 | } |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 260 | } |
Denis Vlasenko | 394eebe | 2008-02-25 20:30:24 +0000 | [diff] [blame] | 261 | xchroot("."); |
Denis Vlasenko | e6b10ef | 2009-04-21 20:52:58 +0000 | [diff] [blame] | 262 | // The chdir is needed to recalculate "." and ".." links |
Denys Vlasenko | 0687a5b | 2012-03-08 00:28:24 +0100 | [diff] [blame] | 263 | /*xchdir("/"); - done in xchroot */ |
Tim Riker | c1ef7bd | 2006-01-25 00:08:53 +0000 | [diff] [blame] | 264 | |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 265 | // If a new console specified, redirect stdin/stdout/stderr to it |
Rob Landley | 5d84c23 | 2005-12-20 17:25:51 +0000 | [diff] [blame] | 266 | if (console) { |
Andrei Gherzan | e3b65ab | 2017-03-24 16:39:08 +0100 | [diff] [blame] | 267 | int fd = open_or_warn(console, O_RDWR); |
| 268 | if (fd >= 0) { |
| 269 | xmove_fd(fd, 0); |
| 270 | xdup2(0, 1); |
| 271 | xdup2(0, 2); |
| 272 | } |
Rob Landley | 5d84c23 | 2005-12-20 17:25:51 +0000 | [diff] [blame] | 273 | } |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 274 | |
Denys Vlasenko | bbc26c6 | 2017-08-22 10:37:30 +0200 | [diff] [blame] | 275 | if (dry_run) { |
| 276 | // Does NEW_INIT look like it can be executed? |
| 277 | //xstat(argv[0], &st); |
| 278 | //if (!S_ISREG(st.st_mode)) |
| 279 | // bb_perror_msg_and_die("'%s' is not a regular file", argv[0]); |
| 280 | if (access(argv[0], X_OK) == 0) |
| 281 | return 0; |
| 282 | } else { |
| 283 | // Exec NEW_INIT |
| 284 | execv(argv[0], argv); |
| 285 | } |
Denis Vlasenko | f9d4fc3 | 2009-04-21 20:40:51 +0000 | [diff] [blame] | 286 | bb_perror_msg_and_die("can't execute '%s'", argv[0]); |
Rob Landley | 0f34a82 | 2005-10-27 22:55:50 +0000 | [diff] [blame] | 287 | } |
Denys Vlasenko | a5bdbe1 | 2009-06-17 14:03:24 +0200 | [diff] [blame] | 288 | |
| 289 | /* |
| 290 | From: Rob Landley <rob@landley.net> |
| 291 | Date: Tue, Jun 16, 2009 at 7:47 PM |
| 292 | Subject: Re: switch_root... |
| 293 | |
| 294 | ... |
| 295 | ... |
| 296 | ... |
| 297 | |
| 298 | If you're _not_ running out of init_ramfs (if for example you're using initrd |
| 299 | instead), you probably shouldn't use switch_root because it's the wrong tool. |
| 300 | |
| 301 | Basically what the sucker does is something like the following shell script: |
| 302 | |
| 303 | find / -xdev | xargs rm -rf |
| 304 | cd "$1" |
| 305 | shift |
| 306 | mount --move . / |
| 307 | exec chroot . "$@" |
| 308 | |
| 309 | There are a couple reasons that won't work as a shell script: |
| 310 | |
| 311 | 1) If you delete the commands out of your $PATH, your shell scripts can't run |
| 312 | more commands, but you can't start using dynamically linked _new_ commands |
| 313 | until after you do the chroot because the path to the dynamic linker is wrong. |
| 314 | So there's a step that needs to be sort of atomic but can't be as a shell |
| 315 | script. (You can work around this with static linking or very carefully laid |
| 316 | out paths and sequencing, but it's brittle, ugly, and non-obvious.) |
| 317 | |
Denys Vlasenko | 10ad622 | 2017-04-17 16:13:32 +0200 | [diff] [blame] | 318 | 2) The "find | rm" bit will actually delete everything because the mount points |
Denys Vlasenko | a5bdbe1 | 2009-06-17 14:03:24 +0200 | [diff] [blame] | 319 | still show up (even if their contents don't), and rm -rf will then happily zap |
| 320 | that. So the first line is an oversimplification of what you need to do _not_ |
| 321 | to descend into other filesystems and delete their contents. |
| 322 | |
| 323 | The reason we do this is to free up memory, by the way. Since initramfs is a |
| 324 | ramfs, deleting its contents frees up the memory it uses. (We leave it with |
| 325 | one remaining dentry for the new mount point, but that's ok.) |
| 326 | |
| 327 | Note that you cannot ever umount rootfs, for approximately the same reason you |
| 328 | can't kill PID 1. The kernel tracks mount points as a doubly linked list, and |
| 329 | the pointer to the start/end of that list always points to an entry that's |
| 330 | known to be there (rootfs), so it never has to worry about moving that pointer |
| 331 | and it never has to worry about the list being empty. (Back around 2.6.13 |
| 332 | there _was_ a bug that let you umount rootfs, and the system locked hard the |
| 333 | instant you did so endlessly looping to find the end of the mount list and |
| 334 | never stopping. They fixed it.) |
| 335 | |
| 336 | Oh, and the reason we mount --move _and_ do the chroot is due to the way "/" |
| 337 | works. Each process has two special symlinks, ".", and "/". Each of them |
| 338 | points to the dentry of a directory, and give you a location paths can start |
| 339 | from. (Historically ".." was also special, because you could enter a |
| 340 | directory via a symlink so backing out to the directory you came from doesn't |
| 341 | necessarily mean the one physically above where "." points to. These days I |
| 342 | think it's just handed off to the filesystem.) |
| 343 | |
| 344 | Anyway, path resolution starts with "." or "/" (although the "./" at the start |
| 345 | of the path may be implicit), meaning it's relative to one of those two |
| 346 | directories. Your current directory, and your current root directory. The |
| 347 | chdir() syscall changes where "." points to, and the chroot() syscall changes |
| 348 | where "/" points to. (Again, both are per-process which is why chroot only |
| 349 | affects your current process and its child processes.) |
| 350 | |
| 351 | Note that chroot() does _not_ change where "." points to, and back before they |
| 352 | put crazy security checks into the kernel your current directory could be |
| 353 | somewhere you could no longer access after the chroot. (The command line |
| 354 | chroot does a cd as well, the chroot _syscall_ is what I'm talking about.) |
| 355 | |
| 356 | The reason mounting something new over / has no obvious effect is the same |
| 357 | reason mounting something over your current directory has no obvious effect: |
| 358 | the . and / links aren't recalculated after a mount, so they still point to |
| 359 | the same dentry they did before, even if that dentry is no longer accessible |
| 360 | by other means. Note that "cd ." is a NOP, and "chroot /" is a nop; both look |
| 361 | up the cached dentry and set it right back. They don't re-parse any paths, |
| 362 | because they're what all paths your process uses would be relative to. |
| 363 | |
| 364 | That's why the careful sequencing above: we cd into the new mount point before |
| 365 | we do the mount --move. Moving the mount point would otherwise make it |
Denys Vlasenko | bf74fb4 | 2015-10-13 12:34:35 +0200 | [diff] [blame] | 366 | totally inaccessible to us because cd-ing to the old path wouldn't give it to |
Denys Vlasenko | a5bdbe1 | 2009-06-17 14:03:24 +0200 | [diff] [blame] | 367 | us anymore, and cd "/" just gives us the cached dentry from when the process |
| 368 | was created (in this case the old initramfs one). But the "." symlink gives |
| 369 | us the dentry of the filesystem we just moved, so we can then "chroot ." to |
| 370 | copy that dentry to "/" and get the new filesystem. If we _didn't_ save that |
| 371 | dentry in "." we couldn't get it back after the mount --move. |
| 372 | |
| 373 | (Yes, this is all screwy and I had to email questions to Linus Torvalds to get |
| 374 | it straight myself. I keep meaning to write up a "how mount actually works" |
| 375 | document someday...) |
| 376 | */ |