1 /*
2 * Check decoding of KVM_* commands of ioctl syscall using /dev/kvm API.
3 * Based on kvmtest.c from https://lwn.net/Articles/658512/
4 *
5 * kvmtest.c author: Josh Triplett <josh@joshtriplett.org>
6 * Copyright (c) 2015 Intel Corporation
7 * Copyright (c) 2017-2021 The strace developers.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a copy
10 * of this software and associated documentation files (the "Software"), to
11 * deal in the Software without restriction, including without limitation the
12 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
13 * sell copies of the Software, and to permit persons to whom the Software is
14 * furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included in
17 * all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "tests.h"
29
30 #if defined HAVE_LINUX_KVM_H \
31 && defined HAVE_STRUCT_KVM_CPUID2 \
32 && defined HAVE_STRUCT_KVM_REGS \
33 && defined HAVE_STRUCT_KVM_SREGS \
34 && defined HAVE_STRUCT_KVM_USERSPACE_MEMORY_REGION \
35 &&(defined __x86_64__ || defined __i386__)
36
37 # include <fcntl.h>
38 # include <stdint.h>
39 # include <stdio.h>
40 # include <stdlib.h>
41 # include <string.h>
42 # include <sys/ioctl.h>
43 # include <sys/mman.h>
44 # include <unistd.h>
45 # include <linux/kvm.h>
46
47 # ifndef KVM_MAX_CPUID_ENTRIES
48 # define KVM_MAX_CPUID_ENTRIES 80
49 # endif
50
51 # include "xmalloc.h"
52 # include "xlat.h"
53 # include "xlat/kvm_cpuid_flags.h"
54
55 static int
56 kvm_ioctl(int fd, unsigned long cmd, const char *cmd_str, void *arg)
57 {
58 int rc = ioctl(fd, cmd, arg);
59 if (rc < 0)
60 perror_msg_and_skip("%s", cmd_str);
61 return rc;
62 }
63
64 # define KVM_IOCTL(fd_, cmd_, arg_) \
65 kvm_ioctl((fd_), (cmd_), #cmd_, (arg_))
66
67 static const char dev[] = "/dev/kvm";
68 static const char vm_dev[] = "anon_inode:kvm-vm";
69 static char vcpu_dev[] = "anon_inode:kvm-vcpu:0";
70 static size_t page_size;
71
72 extern const char code[];
73 extern const unsigned short code_size;
74
75 __asm__(
76 ".type code, @object \n"
77 "code: \n"
78 " mov $0xd80003f8, %edx \n"
79 " mov $'\n', %al \n"
80 " out %al, (%dx) \n"
81 " hlt \n"
82 ".size code, . - code \n"
83 ".type code_size, @object \n"
84 "code_size: \n"
85 " .short . - code \n"
86 ".size code_size, . - code_size \n"
87 );
88
89 static void
90 print_kvm_segment(const struct kvm_segment *seg)
91 {
92 printf("{base=%#jx, limit=%u, selector=%u, type=%u, present=%u, "
93 "dpl=%u, db=%u, s=%u, l=%u, g=%u, avl=%u}",
94 (uintmax_t) seg->base, seg->limit, seg->selector, seg->type,
95 seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g,
96 seg->avl);
97 }
98
99 static void
100 print_kvm_sregs(const struct kvm_sregs *sregs)
101 {
102 printf("{cs=");
103 print_kvm_segment(&sregs->cs);
104 # if VERBOSE
105 printf(", ds=");
106 print_kvm_segment(&sregs->ds);
107 printf(", es=");
108 print_kvm_segment(&sregs->es);
109 printf(", fs=");
110 print_kvm_segment(&sregs->fs);
111 printf(", gs=");
112 print_kvm_segment(&sregs->gs);
113 printf(", ss=");
114 print_kvm_segment(&sregs->ss);
115 printf(", tr=");
116 print_kvm_segment(&sregs->tr);
117 printf(", ldt=");
118 print_kvm_segment(&sregs->ldt);
119 printf(", gdt={base=%#jx, limit=%u}, idt={base=%#jx, limit=%u}, "
120 "cr0=%llu, cr2=%llu, cr3=%llu, cr4=%llu, cr8=%llu, efer=%llu, "
121 "apic_base=%#jx", (uintmax_t) sregs->gdt.base, sregs->gdt.limit,
122 (uintmax_t) sregs->idt.base, sregs->idt.limit, sregs->cr0,
123 sregs->cr2, sregs->cr3, sregs->cr4, sregs->cr8, sregs->efer,
124 (uintmax_t)sregs->apic_base);
125 printf(", interrupt_bitmap=[");
126 for (size_t i = 0; i < ARRAY_SIZE(sregs->interrupt_bitmap); i++) {
127 if (i)
128 printf(", ");
129 printf("%#jx", (uintmax_t) sregs->interrupt_bitmap[i]);
130 }
131 printf("]");
132 # else
133 printf(", ...");
134 # endif
135 printf("}");
136 }
137
138 static void
139 print_kvm_regs(const struct kvm_regs *regs)
140 {
141 printf("{rax=%#jx", (uintmax_t) regs->rax);
142 # if VERBOSE
143 printf(", rbx=%#jx, rcx=%#jx, rdx=%#jx, rsi=%#jx, rdi=%#jx",
144 (uintmax_t) regs->rbx, (uintmax_t) regs->rcx,
145 (uintmax_t) regs->rdx, (uintmax_t) regs->rsi,
146 (uintmax_t) regs->rdi);
147 # else
148 printf(", ...");
149 # endif
150 printf(", rsp=%#jx, rbp=%#jx", (uintmax_t) regs->rsp,
151 (uintmax_t) regs->rbp);
152 # if VERBOSE
153 printf(", r8=%#jx, r9=%#jx, r10=%#jx, r11=%#jx, r12=%#jx, r13=%#jx"
154 ", r14=%#jx, r15=%#jx",
155 (uintmax_t) regs->r8, (uintmax_t) regs->r9,
156 (uintmax_t) regs->r10, (uintmax_t) regs->r11,
157 (uintmax_t) regs->r12, (uintmax_t) regs->r13,
158 (uintmax_t) regs->r14, (uintmax_t) regs->r15);
159 # else
160 printf(", ...");
161 # endif
162 printf(", rip=%#jx, rflags=%#jx}", (uintmax_t) regs->rip,
163 (uintmax_t) regs->rflags);
164 }
165
166 # define need_print_KVM_RUN 1
167
168 static void
169 print_KVM_RUN(const int fd, const char *const dev, const unsigned int reason);
170
171 static void
172 run_kvm(const int vcpu_fd, struct kvm_run *const run, const size_t mmap_size,
173 void *const mem)
174 {
175 /* Initialize CS to point at 0, via a read-modify-write of sregs. */
176 struct kvm_sregs sregs;
177 KVM_IOCTL(vcpu_fd, KVM_GET_SREGS, &sregs);
178 printf("ioctl(%d<%s>, KVM_GET_SREGS, ", vcpu_fd, vcpu_dev);
179 print_kvm_sregs(&sregs);
180 printf(") = 0\n");
181
182 sregs.cs.base = 0;
183 sregs.cs.selector = 0;
184 KVM_IOCTL(vcpu_fd, KVM_SET_SREGS, &sregs);
185 printf("ioctl(%d<%s>, KVM_SET_SREGS, ", vcpu_fd, vcpu_dev);
186 print_kvm_sregs(&sregs);
187 printf(") = 0\n");
188
189 /*
190 * Initialize registers: instruction pointer for our code, addends,
191 * and initial flags required by x86 architecture.
192 */
193 struct kvm_regs regs = {
194 .rip = page_size,
195 .rax = 2,
196 .rbx = 2,
197 .rflags = 0x2,
198 };
199 KVM_IOCTL(vcpu_fd, KVM_SET_REGS, ®s);
200 printf("ioctl(%d<%s>, KVM_SET_REGS, ", vcpu_fd, vcpu_dev);
201 print_kvm_regs(®s);
202 printf(") = 0\n");
203
204 /* Copy the code */
205 memcpy(mem, code, code_size);
206
207 const char *p = "\n";
208
209 /* Repeatedly run code and handle VM exits. */
210 for (;;) {
211 KVM_IOCTL(vcpu_fd, KVM_RUN, NULL);
212 print_KVM_RUN(vcpu_fd, vcpu_dev, run->exit_reason);
213
214 switch (run->exit_reason) {
215 case KVM_EXIT_HLT:
216 if (p)
217 error_msg_and_fail("premature KVM_EXIT_HLT");
218 return;
219 case KVM_EXIT_IO:
220 if (run->io.direction == KVM_EXIT_IO_OUT
221 && run->io.size == 1
222 && run->io.port == 0x03f8
223 && run->io.count == 1
224 && run->io.data_offset < mmap_size
225 && p && *p == ((char *) run)[run->io.data_offset])
226 p = NULL;
227 else
228 error_msg_and_fail("unhandled KVM_EXIT_IO");
229 break;
230 case KVM_EXIT_MMIO:
231 error_msg_and_fail("Got an unexpected MMIO exit:"
232 " phys_addr %#llx,"
233 " data %02x %02x %02x %02x"
234 " %02x %02x %02x %02x,"
235 " len %u, is_write %hhu",
236 (unsigned long long) run->mmio.phys_addr,
237 run->mmio.data[0], run->mmio.data[1],
238 run->mmio.data[2], run->mmio.data[3],
239 run->mmio.data[4], run->mmio.data[5],
240 run->mmio.data[6], run->mmio.data[7],
241 run->mmio.len, run->mmio.is_write);
242 case KVM_EXIT_FAIL_ENTRY:
243 error_msg_and_fail("Got an unexpected FAIL_ENTRY exit:"
244 " hardware_entry_failure_reason %" PRI__x64,
245 run->fail_entry.hardware_entry_failure_reason);
246
247 default:
248 error_msg_and_fail("exit_reason = %#x",
249 run->exit_reason);
250 }
251 }
252 }
253
254 static int
255 vcpu_dev_should_have_cpuid(int fd)
256 {
257 int r = 0;
258 char *proc = xasprintf("/proc/self/fd/%u", fd);
259 char buf[sizeof(vcpu_dev)];
260
261 if (readlink(proc, buf, sizeof(buf)) == sizeof(buf) - 1
262 && (memcmp(buf, vcpu_dev, sizeof(buf) - 1) == 0))
263 r = 1;
264 free(proc);
265 return r;
266 }
267
268 static void
269 print_cpuid_ioctl(int fd, const char *fd_dev,
270 const char *ioctl_name, const struct kvm_cpuid2 *cpuid)
271 {
272 printf("ioctl(%d<%s>, %s, {nent=%u, entries=[",
273 fd, fd_dev, ioctl_name, cpuid->nent);
274 # if VERBOSE
275 for (size_t i = 0; i < cpuid->nent; i++) {
276 if (i)
277 printf(", ");
278 printf("{function=%#x, index=%#x, flags=",
279 cpuid->entries[i].function, cpuid->entries[i].index);
280 printflags(kvm_cpuid_flags, cpuid->entries[i].flags,
281 "KVM_CPUID_FLAG_???");
282 printf(", eax=%#x, ebx=%#x, ecx=%#x, edx=%#x}",
283 cpuid->entries[i].eax, cpuid->entries[i].ebx,
284 cpuid->entries[i].ecx, cpuid->entries[i].edx);
285 }
286 # else
287 if (cpuid->nent)
288 printf("...");
289 # endif
290 printf("]}) = 0\n");
291 }
292
293 int
294 main(void)
295 {
296 skip_if_unavailable("/proc/self/fd/");
297
298 int kvm = open(dev, O_RDWR);
299 if (kvm < 0)
300 perror_msg_and_skip("open: %s", dev);
301
302 /* Make sure we have the stable version of the API */
303 int ret = KVM_IOCTL(kvm, KVM_GET_API_VERSION, 0);
304 if (ret != KVM_API_VERSION)
305 error_msg_and_skip("KVM_GET_API_VERSION returned %d"
306 ", KVM_API_VERSION is %d",
307 kvm, KVM_API_VERSION);
308 printf("ioctl(%d<%s>, KVM_GET_API_VERSION, 0) = %d\n",
309 kvm, dev, ret);
310
311 ret = KVM_IOCTL(kvm, KVM_CHECK_EXTENSION,
312 (void *) (uintptr_t) KVM_CAP_USER_MEMORY);
313 printf("ioctl(%d<%s>, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY) = %d\n",
314 kvm, dev, ret);
315
316 int vm_fd = KVM_IOCTL(kvm, KVM_CREATE_VM, 0);
317 printf("ioctl(%d<%s>, KVM_CREATE_VM, 0) = %d<%s>\n",
318 kvm, dev, vm_fd, vm_dev);
319
320 /* Allocate one aligned page of guest memory to hold the code. */
321 page_size = get_page_size();
322 void *const mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
323 MAP_SHARED | MAP_ANONYMOUS, -1, 0);
324 if (mem == MAP_FAILED)
325 perror_msg_and_fail("mmap page");
326
327 /* Map it to the second page frame (to avoid the real-mode IDT at 0). */
328 struct kvm_userspace_memory_region region = {
329 .slot = 0,
330 .guest_phys_addr = page_size,
331 .memory_size = page_size,
332 .userspace_addr = (uintptr_t) mem,
333 };
334 KVM_IOCTL(vm_fd, KVM_SET_USER_MEMORY_REGION, ®ion);
335 printf("ioctl(%d<%s>, KVM_SET_USER_MEMORY_REGION"
336 ", {slot=0, flags=0, guest_phys_addr=%#lx, memory_size=%lu"
337 ", userspace_addr=%p}) = 0\n", vm_fd, vm_dev,
338 (unsigned long) page_size, (unsigned long) page_size, mem);
339
340 int vcpu_fd = KVM_IOCTL(vm_fd, KVM_CREATE_VCPU, NULL);
341 if (!vcpu_dev_should_have_cpuid(vcpu_fd)) {
342 /*
343 * This is an older kernel that doesn't place a cpuid
344 * at the end of the dentry associated with vcpu_fd.
345 * Trim the cpuid part of vcpu_dev like:
346 * "anon_inode:kvm-vcpu:0" -> "anon_inode:kvm-vcpu"
347 */
348 vcpu_dev[strlen (vcpu_dev) - 2] = '\0';
349 # ifdef KVM_NO_CPUID_CALLBACK
350 KVM_NO_CPUID_CALLBACK;
351 # endif
352 }
353
354 printf("ioctl(%d<%s>, KVM_CREATE_VCPU, 0) = %d<%s>\n",
355 vm_fd, vm_dev, vcpu_fd, vcpu_dev);
356
357 /* Map the shared kvm_run structure and following data. */
358 ret = KVM_IOCTL(kvm, KVM_GET_VCPU_MMAP_SIZE, NULL);
359 struct kvm_run *run;
360 if (ret < (int) sizeof(*run))
361 error_msg_and_fail("KVM_GET_VCPU_MMAP_SIZE returned %d < %d",
362 ret, (int) sizeof(*run));
363 printf("ioctl(%d<%s>, KVM_GET_VCPU_MMAP_SIZE, 0) = %d\n",
364 kvm, dev, ret);
365
366 const size_t mmap_size = (ret + page_size - 1) & -page_size;
367 run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
368 MAP_SHARED, vcpu_fd, 0);
369 if (run == MAP_FAILED)
370 perror_msg_and_fail("mmap vcpu");
371
372 size_t cpuid_nent = KVM_MAX_CPUID_ENTRIES;
373 struct kvm_cpuid2 *cpuid = tail_alloc(sizeof(*cpuid) +
374 cpuid_nent *
375 sizeof(*cpuid->entries));
376
377 cpuid->nent = 0;
378 ioctl(kvm, KVM_GET_SUPPORTED_CPUID, cpuid);
379 printf("ioctl(%d<%s>, KVM_GET_SUPPORTED_CPUID, %p) = -1 E2BIG (%m)\n",
380 kvm, dev, cpuid);
381
382 cpuid->nent = cpuid_nent;
383
384 KVM_IOCTL(kvm, KVM_GET_SUPPORTED_CPUID, cpuid);
385 print_cpuid_ioctl(kvm, dev, "KVM_GET_SUPPORTED_CPUID", cpuid);
386
387 struct kvm_cpuid2 cpuid_tmp = { .nent = 0 };
388 KVM_IOCTL(vcpu_fd, KVM_SET_CPUID2, &cpuid_tmp);
389 printf("ioctl(%d<%s>, KVM_SET_CPUID2, {nent=%u, entries=[]}) = 0\n",
390 vcpu_fd, vcpu_dev, cpuid_tmp.nent);
391
392 KVM_IOCTL(vcpu_fd, KVM_SET_CPUID2, cpuid);
393 print_cpuid_ioctl(vcpu_fd, vcpu_dev, "KVM_SET_CPUID2", cpuid);
394
395 ioctl(vcpu_fd, KVM_SET_CPUID2, NULL);
396 printf("ioctl(%d<%s>, KVM_SET_CPUID2, NULL) = -1 EFAULT (%m)\n",
397 vcpu_fd, vcpu_dev);
398
399 run_kvm(vcpu_fd, run, mmap_size, mem);
400
401 puts("+++ exited with 0 +++");
402 return 0;
403 }
404
405 #else /* !HAVE_LINUX_KVM_H */
406
407 SKIP_MAIN_UNDEFINED("HAVE_LINUX_KVM_H && HAVE_STRUCT_KVM_CPUID2 && "
408 "HAVE_STRUCT_KVM_REGS && HAVE_STRUCT_KVM_SREGS && "
409 "HAVE_STRUCT_KVM_USERSPACE_MEMORY_REGION && "
410 "(__x86_64__ || __i386__)")
411
412 # define need_print_KVM_RUN 0
413
414 #endif