Kyle Swenson | 8d8f654 | 2021-03-15 11:02:55 -0600 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 2006 Andi Kleen, SUSE Labs. |
| 3 | * Subject to the GNU Public License, v.2 |
| 4 | * |
| 5 | * Fast user context implementation of clock_gettime, gettimeofday, and time. |
| 6 | * |
| 7 | * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> |
| 8 | * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany |
| 9 | * |
| 10 | * The code should have no internal unresolved relocations. |
| 11 | * Check with readelf after changing. |
| 12 | */ |
| 13 | |
| 14 | #include <uapi/linux/time.h> |
| 15 | #include <asm/vgtod.h> |
| 16 | #include <asm/hpet.h> |
| 17 | #include <asm/vvar.h> |
| 18 | #include <asm/unistd.h> |
| 19 | #include <asm/msr.h> |
| 20 | #include <linux/math64.h> |
| 21 | #include <linux/time.h> |
| 22 | |
| 23 | #define gtod (&VVAR(vsyscall_gtod_data)) |
| 24 | |
| 25 | extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); |
| 26 | extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); |
| 27 | extern time_t __vdso_time(time_t *t); |
| 28 | |
| 29 | #ifdef CONFIG_HPET_TIMER |
| 30 | extern u8 hpet_page |
| 31 | __attribute__((visibility("hidden"))); |
| 32 | |
| 33 | static notrace cycle_t vread_hpet(void) |
| 34 | { |
| 35 | return *(const volatile u32 *)(&hpet_page + HPET_COUNTER); |
| 36 | } |
| 37 | #endif |
| 38 | |
| 39 | #ifndef BUILD_VDSO32 |
| 40 | |
| 41 | #include <linux/kernel.h> |
| 42 | #include <asm/vsyscall.h> |
| 43 | #include <asm/fixmap.h> |
| 44 | #include <asm/pvclock.h> |
| 45 | |
| 46 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
| 47 | { |
| 48 | long ret; |
| 49 | asm("syscall" : "=a" (ret) : |
| 50 | "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); |
| 51 | return ret; |
| 52 | } |
| 53 | |
| 54 | notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) |
| 55 | { |
| 56 | long ret; |
| 57 | |
| 58 | asm("syscall" : "=a" (ret) : |
| 59 | "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); |
| 60 | return ret; |
| 61 | } |
| 62 | |
| 63 | #ifdef CONFIG_PARAVIRT_CLOCK |
| 64 | |
| 65 | static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu) |
| 66 | { |
| 67 | const struct pvclock_vsyscall_time_info *pvti_base; |
| 68 | int idx = cpu / (PAGE_SIZE/PVTI_SIZE); |
| 69 | int offset = cpu % (PAGE_SIZE/PVTI_SIZE); |
| 70 | |
| 71 | BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END); |
| 72 | |
| 73 | pvti_base = (struct pvclock_vsyscall_time_info *) |
| 74 | __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx); |
| 75 | |
| 76 | return &pvti_base[offset]; |
| 77 | } |
| 78 | |
| 79 | static notrace cycle_t vread_pvclock(int *mode) |
| 80 | { |
| 81 | const struct pvclock_vsyscall_time_info *pvti; |
| 82 | cycle_t ret; |
| 83 | u64 last; |
| 84 | u32 version; |
| 85 | u8 flags; |
| 86 | unsigned cpu, cpu1; |
| 87 | |
| 88 | |
| 89 | /* |
| 90 | * Note: hypervisor must guarantee that: |
| 91 | * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. |
| 92 | * 2. that per-CPU pvclock time info is updated if the |
| 93 | * underlying CPU changes. |
| 94 | * 3. that version is increased whenever underlying CPU |
| 95 | * changes. |
| 96 | * |
| 97 | */ |
| 98 | do { |
| 99 | cpu = __getcpu() & VGETCPU_CPU_MASK; |
| 100 | /* TODO: We can put vcpu id into higher bits of pvti.version. |
| 101 | * This will save a couple of cycles by getting rid of |
| 102 | * __getcpu() calls (Gleb). |
| 103 | */ |
| 104 | |
| 105 | pvti = get_pvti(cpu); |
| 106 | |
| 107 | version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); |
| 108 | |
| 109 | /* |
| 110 | * Test we're still on the cpu as well as the version. |
| 111 | * We could have been migrated just after the first |
| 112 | * vgetcpu but before fetching the version, so we |
| 113 | * wouldn't notice a version change. |
| 114 | */ |
| 115 | cpu1 = __getcpu() & VGETCPU_CPU_MASK; |
| 116 | } while (unlikely(cpu != cpu1 || |
| 117 | (pvti->pvti.version & 1) || |
| 118 | pvti->pvti.version != version)); |
| 119 | |
| 120 | if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) |
| 121 | *mode = VCLOCK_NONE; |
| 122 | |
| 123 | /* refer to tsc.c read_tsc() comment for rationale */ |
| 124 | last = gtod->cycle_last; |
| 125 | |
| 126 | if (likely(ret >= last)) |
| 127 | return ret; |
| 128 | |
| 129 | return last; |
| 130 | } |
| 131 | #endif |
| 132 | |
| 133 | #else |
| 134 | |
| 135 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
| 136 | { |
| 137 | long ret; |
| 138 | |
| 139 | asm( |
| 140 | "mov %%ebx, %%edx \n" |
| 141 | "mov %2, %%ebx \n" |
| 142 | "call __kernel_vsyscall \n" |
| 143 | "mov %%edx, %%ebx \n" |
| 144 | : "=a" (ret) |
| 145 | : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) |
| 146 | : "memory", "edx"); |
| 147 | return ret; |
| 148 | } |
| 149 | |
| 150 | notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) |
| 151 | { |
| 152 | long ret; |
| 153 | |
| 154 | asm( |
| 155 | "mov %%ebx, %%edx \n" |
| 156 | "mov %2, %%ebx \n" |
| 157 | "call __kernel_vsyscall \n" |
| 158 | "mov %%edx, %%ebx \n" |
| 159 | : "=a" (ret) |
| 160 | : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) |
| 161 | : "memory", "edx"); |
| 162 | return ret; |
| 163 | } |
| 164 | |
| 165 | #ifdef CONFIG_PARAVIRT_CLOCK |
| 166 | |
| 167 | static notrace cycle_t vread_pvclock(int *mode) |
| 168 | { |
| 169 | *mode = VCLOCK_NONE; |
| 170 | return 0; |
| 171 | } |
| 172 | #endif |
| 173 | |
| 174 | #endif |
| 175 | |
| 176 | notrace static cycle_t vread_tsc(void) |
| 177 | { |
| 178 | cycle_t ret = (cycle_t)rdtsc_ordered(); |
| 179 | u64 last = gtod->cycle_last; |
| 180 | |
| 181 | if (likely(ret >= last)) |
| 182 | return ret; |
| 183 | |
| 184 | /* |
| 185 | * GCC likes to generate cmov here, but this branch is extremely |
| 186 | * predictable (it's just a funciton of time and the likely is |
| 187 | * very likely) and there's a data dependence, so force GCC |
| 188 | * to generate a branch instead. I don't barrier() because |
| 189 | * we don't actually need a barrier, and if this function |
| 190 | * ever gets inlined it will generate worse code. |
| 191 | */ |
| 192 | asm volatile (""); |
| 193 | return last; |
| 194 | } |
| 195 | |
| 196 | notrace static inline u64 vgetsns(int *mode) |
| 197 | { |
| 198 | u64 v; |
| 199 | cycles_t cycles; |
| 200 | |
| 201 | if (gtod->vclock_mode == VCLOCK_TSC) |
| 202 | cycles = vread_tsc(); |
| 203 | #ifdef CONFIG_HPET_TIMER |
| 204 | else if (gtod->vclock_mode == VCLOCK_HPET) |
| 205 | cycles = vread_hpet(); |
| 206 | #endif |
| 207 | #ifdef CONFIG_PARAVIRT_CLOCK |
| 208 | else if (gtod->vclock_mode == VCLOCK_PVCLOCK) |
| 209 | cycles = vread_pvclock(mode); |
| 210 | #endif |
| 211 | else |
| 212 | return 0; |
| 213 | v = (cycles - gtod->cycle_last) & gtod->mask; |
| 214 | return v * gtod->mult; |
| 215 | } |
| 216 | |
| 217 | /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ |
| 218 | notrace static int __always_inline do_realtime(struct timespec *ts) |
| 219 | { |
| 220 | unsigned long seq; |
| 221 | u64 ns; |
| 222 | int mode; |
| 223 | |
| 224 | do { |
| 225 | seq = gtod_read_begin(gtod); |
| 226 | mode = gtod->vclock_mode; |
| 227 | ts->tv_sec = gtod->wall_time_sec; |
| 228 | ns = gtod->wall_time_snsec; |
| 229 | ns += vgetsns(&mode); |
| 230 | ns >>= gtod->shift; |
| 231 | } while (unlikely(gtod_read_retry(gtod, seq))); |
| 232 | |
| 233 | ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); |
| 234 | ts->tv_nsec = ns; |
| 235 | |
| 236 | return mode; |
| 237 | } |
| 238 | |
| 239 | notrace static int __always_inline do_monotonic(struct timespec *ts) |
| 240 | { |
| 241 | unsigned long seq; |
| 242 | u64 ns; |
| 243 | int mode; |
| 244 | |
| 245 | do { |
| 246 | seq = gtod_read_begin(gtod); |
| 247 | mode = gtod->vclock_mode; |
| 248 | ts->tv_sec = gtod->monotonic_time_sec; |
| 249 | ns = gtod->monotonic_time_snsec; |
| 250 | ns += vgetsns(&mode); |
| 251 | ns >>= gtod->shift; |
| 252 | } while (unlikely(gtod_read_retry(gtod, seq))); |
| 253 | |
| 254 | ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); |
| 255 | ts->tv_nsec = ns; |
| 256 | |
| 257 | return mode; |
| 258 | } |
| 259 | |
| 260 | notrace static void do_realtime_coarse(struct timespec *ts) |
| 261 | { |
| 262 | unsigned long seq; |
| 263 | do { |
| 264 | seq = gtod_read_begin(gtod); |
| 265 | ts->tv_sec = gtod->wall_time_coarse_sec; |
| 266 | ts->tv_nsec = gtod->wall_time_coarse_nsec; |
| 267 | } while (unlikely(gtod_read_retry(gtod, seq))); |
| 268 | } |
| 269 | |
| 270 | notrace static void do_monotonic_coarse(struct timespec *ts) |
| 271 | { |
| 272 | unsigned long seq; |
| 273 | do { |
| 274 | seq = gtod_read_begin(gtod); |
| 275 | ts->tv_sec = gtod->monotonic_time_coarse_sec; |
| 276 | ts->tv_nsec = gtod->monotonic_time_coarse_nsec; |
| 277 | } while (unlikely(gtod_read_retry(gtod, seq))); |
| 278 | } |
| 279 | |
| 280 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) |
| 281 | { |
| 282 | switch (clock) { |
| 283 | case CLOCK_REALTIME: |
| 284 | if (do_realtime(ts) == VCLOCK_NONE) |
| 285 | goto fallback; |
| 286 | break; |
| 287 | case CLOCK_MONOTONIC: |
| 288 | if (do_monotonic(ts) == VCLOCK_NONE) |
| 289 | goto fallback; |
| 290 | break; |
| 291 | case CLOCK_REALTIME_COARSE: |
| 292 | do_realtime_coarse(ts); |
| 293 | break; |
| 294 | case CLOCK_MONOTONIC_COARSE: |
| 295 | do_monotonic_coarse(ts); |
| 296 | break; |
| 297 | default: |
| 298 | goto fallback; |
| 299 | } |
| 300 | |
| 301 | return 0; |
| 302 | fallback: |
| 303 | return vdso_fallback_gettime(clock, ts); |
| 304 | } |
| 305 | int clock_gettime(clockid_t, struct timespec *) |
| 306 | __attribute__((weak, alias("__vdso_clock_gettime"))); |
| 307 | |
| 308 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) |
| 309 | { |
| 310 | if (likely(tv != NULL)) { |
| 311 | if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE)) |
| 312 | return vdso_fallback_gtod(tv, tz); |
| 313 | tv->tv_usec /= 1000; |
| 314 | } |
| 315 | if (unlikely(tz != NULL)) { |
| 316 | tz->tz_minuteswest = gtod->tz_minuteswest; |
| 317 | tz->tz_dsttime = gtod->tz_dsttime; |
| 318 | } |
| 319 | |
| 320 | return 0; |
| 321 | } |
| 322 | int gettimeofday(struct timeval *, struct timezone *) |
| 323 | __attribute__((weak, alias("__vdso_gettimeofday"))); |
| 324 | |
| 325 | /* |
| 326 | * This will break when the xtime seconds get inaccurate, but that is |
| 327 | * unlikely |
| 328 | */ |
| 329 | notrace time_t __vdso_time(time_t *t) |
| 330 | { |
| 331 | /* This is atomic on x86 so we don't need any locks. */ |
| 332 | time_t result = ACCESS_ONCE(gtod->wall_time_sec); |
| 333 | |
| 334 | if (t) |
| 335 | *t = result; |
| 336 | return result; |
| 337 | } |
| 338 | int time(time_t *t) |
| 339 | __attribute__((weak, alias("__vdso_time"))); |