add idl4k kernel firmware version 1.13.0.105

This commit is contained in:
Jaroslav Kysela
2015-03-26 17:22:37 +01:00
parent 5194d2792e
commit e9070cdc77
31064 changed files with 12769984 additions and 0 deletions

View File

@@ -0,0 +1,70 @@
#
# KVM configuration
#
source "virt/kvm/Kconfig"
menuconfig VIRTUALIZATION
bool "Virtualization"
depends on HAVE_KVM || X86
default y
---help---
Say Y here to get to see options for using your Linux host to run other
operating systems inside virtual machines (guests).
This option alone does not add any kernel code.
If you say N, all options in this submenu will be skipped and disabled.
if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
# for device assignment:
depends on PCI
select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
select ANON_INODES
select HAVE_KVM_IRQCHIP
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
processor equipped with virtualization extensions. You will also
need to select one or more of the processor modules below.
This module provides access to the hardware capabilities through
a character device node named /dev/kvm.
To compile this as a module, choose M here: the module
will be called kvm.
If unsure, say N.
config KVM_INTEL
tristate "KVM for Intel processors support"
depends on KVM
---help---
Provides support for KVM on Intel processors equipped with the VT
extensions.
To compile this as a module, choose M here: the module
will be called kvm-intel.
config KVM_AMD
tristate "KVM for AMD processors support"
depends on KVM
---help---
Provides support for KVM on AMD processors equipped with the AMD-V
(SVM) extensions.
To compile this as a module, choose M here: the module
will be called kvm-amd.
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
source drivers/lguest/Kconfig
source drivers/virtio/Kconfig
endif # VIRTUALIZATION

View File

@@ -0,0 +1,19 @@
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
CFLAGS_x86.o := -I.
CFLAGS_svm.o := -I.
CFLAGS_vmx.o := -I.
kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o irq_comm.o eventfd.o)
kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o)
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
i8254.o timer.o
kvm-intel-y += vmx.o
kvm-amd-y += svm.o
obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
obj-$(CONFIG_KVM_AMD) += kvm-amd.o

File diff suppressed because it is too large Load Diff

735
kernel/arch/x86/kvm/i8254.c Normal file
View File

@@ -0,0 +1,735 @@
/*
* 8253/8254 interval timer emulation
*
* Copyright (c) 2003-2004 Fabrice Bellard
* Copyright (c) 2006 Intel Corporation
* Copyright (c) 2007 Keir Fraser, XenSource Inc
* Copyright (c) 2008 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
* Authors:
* Sheng Yang <sheng.yang@intel.com>
* Based on QEMU and Xen.
*/
#include <linux/kvm_host.h>
#include "irq.h"
#include "i8254.h"
#ifndef CONFIG_X86_64
#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
#else
#define mod_64(x, y) ((x) % (y))
#endif
#define RW_STATE_LSB 1
#define RW_STATE_MSB 2
#define RW_STATE_WORD0 3
#define RW_STATE_WORD1 4
/* Compute with 96 bit intermediate result: (a*b)/c */
static u64 muldiv64(u64 a, u32 b, u32 c)
{
union {
u64 ll;
struct {
u32 low, high;
} l;
} u, res;
u64 rl, rh;
u.ll = a;
rl = (u64)u.l.low * (u64)b;
rh = (u64)u.l.high * (u64)b;
rh += (rl >> 32);
res.l.high = div64_u64(rh, c);
res.l.low = div64_u64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
return res.ll;
}
static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
{
struct kvm_kpit_channel_state *c =
&kvm->arch.vpit->pit_state.channels[channel];
WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
switch (c->mode) {
default:
case 0:
case 4:
/* XXX: just disable/enable counting */
break;
case 1:
case 2:
case 3:
case 5:
/* Restart counting on rising edge. */
if (c->gate < val)
c->count_load_time = ktime_get();
break;
}
c->gate = val;
}
static int pit_get_gate(struct kvm *kvm, int channel)
{
WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
return kvm->arch.vpit->pit_state.channels[channel].gate;
}
static s64 __kpit_elapsed(struct kvm *kvm)
{
s64 elapsed;
ktime_t remaining;
struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
if (!ps->pit_timer.period)
return 0;
/*
* The Counter does not stop when it reaches zero. In
* Modes 0, 1, 4, and 5 the Counter ``wraps around'' to
* the highest count, either FFFF hex for binary counting
* or 9999 for BCD counting, and continues counting.
* Modes 2 and 3 are periodic; the Counter reloads
* itself with the initial count and continues counting
* from there.
*/
remaining = hrtimer_get_remaining(&ps->pit_timer.timer);
elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
elapsed = mod_64(elapsed, ps->pit_timer.period);
return elapsed;
}
static s64 kpit_elapsed(struct kvm *kvm, struct kvm_kpit_channel_state *c,
int channel)
{
if (channel == 0)
return __kpit_elapsed(kvm);
return ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
}
static int pit_get_count(struct kvm *kvm, int channel)
{
struct kvm_kpit_channel_state *c =
&kvm->arch.vpit->pit_state.channels[channel];
s64 d, t;
int counter;
WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
t = kpit_elapsed(kvm, c, channel);
d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
switch (c->mode) {
case 0:
case 1:
case 4:
case 5:
counter = (c->count - d) & 0xffff;
break;
case 3:
/* XXX: may be incorrect for odd counts */
counter = c->count - (mod_64((2 * d), c->count));
break;
default:
counter = c->count - mod_64(d, c->count);
break;
}
return counter;
}
static int pit_get_out(struct kvm *kvm, int channel)
{
struct kvm_kpit_channel_state *c =
&kvm->arch.vpit->pit_state.channels[channel];
s64 d, t;
int out;
WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
t = kpit_elapsed(kvm, c, channel);
d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
switch (c->mode) {
default:
case 0:
out = (d >= c->count);
break;
case 1:
out = (d < c->count);
break;
case 2:
out = ((mod_64(d, c->count) == 0) && (d != 0));
break;
case 3:
out = (mod_64(d, c->count) < ((c->count + 1) >> 1));
break;
case 4:
case 5:
out = (d == c->count);
break;
}
return out;
}
static void pit_latch_count(struct kvm *kvm, int channel)
{
struct kvm_kpit_channel_state *c =
&kvm->arch.vpit->pit_state.channels[channel];
WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
if (!c->count_latched) {
c->latched_count = pit_get_count(kvm, channel);
c->count_latched = c->rw_mode;
}
}
static void pit_latch_status(struct kvm *kvm, int channel)
{
struct kvm_kpit_channel_state *c =
&kvm->arch.vpit->pit_state.channels[channel];
WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
if (!c->status_latched) {
/* TODO: Return NULL COUNT (bit 6). */
c->status = ((pit_get_out(kvm, channel) << 7) |
(c->rw_mode << 4) |
(c->mode << 1) |
c->bcd);
c->status_latched = 1;
}
}
int pit_has_pending_timer(struct kvm_vcpu *vcpu)
{
struct kvm_pit *pit = vcpu->kvm->arch.vpit;
if (pit && kvm_vcpu_is_bsp(vcpu) && pit->pit_state.irq_ack)
return atomic_read(&pit->pit_state.pit_timer.pending);
return 0;
}
static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
{
struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
irq_ack_notifier);
spin_lock(&ps->inject_lock);
if (atomic_dec_return(&ps->pit_timer.pending) < 0)
atomic_inc(&ps->pit_timer.pending);
ps->irq_ack = 1;
spin_unlock(&ps->inject_lock);
}
void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
{
struct kvm_pit *pit = vcpu->kvm->arch.vpit;
struct hrtimer *timer;
if (!kvm_vcpu_is_bsp(vcpu) || !pit)
return;
timer = &pit->pit_state.pit_timer.timer;
if (hrtimer_cancel(timer))
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}
static void destroy_pit_timer(struct kvm_timer *pt)
{
pr_debug("pit: execute del timer!\n");
hrtimer_cancel(&pt->timer);
}
static bool kpit_is_periodic(struct kvm_timer *ktimer)
{
struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state,
pit_timer);
return ps->is_periodic;
}
static struct kvm_timer_ops kpit_ops = {
.is_periodic = kpit_is_periodic,
};
static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
{
struct kvm_timer *pt = &ps->pit_timer;
s64 interval;
interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
pr_debug("pit: create pit timer, interval is %llu nsec\n", interval);
/* TODO The new value only affected after the retriggered */
hrtimer_cancel(&pt->timer);
pt->period = interval;
ps->is_periodic = is_period;
pt->timer.function = kvm_timer_fn;
pt->t_ops = &kpit_ops;
pt->kvm = ps->pit->kvm;
pt->vcpu = pt->kvm->bsp_vcpu;
atomic_set(&pt->pending, 0);
ps->irq_ack = 1;
hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
HRTIMER_MODE_ABS);
}
static void pit_load_count(struct kvm *kvm, int channel, u32 val)
{
struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
WARN_ON(!mutex_is_locked(&ps->lock));
pr_debug("pit: load_count val is %d, channel is %d\n", val, channel);
/*
* The largest possible initial count is 0; this is equivalent
* to 216 for binary counting and 104 for BCD counting.
*/
if (val == 0)
val = 0x10000;
ps->channels[channel].count = val;
if (channel != 0) {
ps->channels[channel].count_load_time = ktime_get();
return;
}
/* Two types of timer
* mode 1 is one shot, mode 2 is period, otherwise del timer */
switch (ps->channels[0].mode) {
case 0:
case 1:
/* FIXME: enhance mode 4 precision */
case 4:
if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) {
create_pit_timer(ps, val, 0);
}
break;
case 2:
case 3:
if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){
create_pit_timer(ps, val, 1);
}
break;
default:
destroy_pit_timer(&ps->pit_timer);
}
}
void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start)
{
u8 saved_mode;
if (hpet_legacy_start) {
/* save existing mode for later reenablement */
saved_mode = kvm->arch.vpit->pit_state.channels[0].mode;
kvm->arch.vpit->pit_state.channels[0].mode = 0xff; /* disable timer */
pit_load_count(kvm, channel, val);
kvm->arch.vpit->pit_state.channels[0].mode = saved_mode;
} else {
pit_load_count(kvm, channel, val);
}
}
static inline struct kvm_pit *dev_to_pit(struct kvm_io_device *dev)
{
return container_of(dev, struct kvm_pit, dev);
}
static inline struct kvm_pit *speaker_to_pit(struct kvm_io_device *dev)
{
return container_of(dev, struct kvm_pit, speaker_dev);
}
static inline int pit_in_range(gpa_t addr)
{
return ((addr >= KVM_PIT_BASE_ADDRESS) &&
(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
}
static int pit_ioport_write(struct kvm_io_device *this,
gpa_t addr, int len, const void *data)
{
struct kvm_pit *pit = dev_to_pit(this);
struct kvm_kpit_state *pit_state = &pit->pit_state;
struct kvm *kvm = pit->kvm;
int channel, access;
struct kvm_kpit_channel_state *s;
u32 val = *(u32 *) data;
if (!pit_in_range(addr))
return -EOPNOTSUPP;
val &= 0xff;
addr &= KVM_PIT_CHANNEL_MASK;
mutex_lock(&pit_state->lock);
if (val != 0)
pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n",
(unsigned int)addr, len, val);
if (addr == 3) {
channel = val >> 6;
if (channel == 3) {
/* Read-Back Command. */
for (channel = 0; channel < 3; channel++) {
s = &pit_state->channels[channel];
if (val & (2 << channel)) {
if (!(val & 0x20))
pit_latch_count(kvm, channel);
if (!(val & 0x10))
pit_latch_status(kvm, channel);
}
}
} else {
/* Select Counter <channel>. */
s = &pit_state->channels[channel];
access = (val >> 4) & KVM_PIT_CHANNEL_MASK;
if (access == 0) {
pit_latch_count(kvm, channel);
} else {
s->rw_mode = access;
s->read_state = access;
s->write_state = access;
s->mode = (val >> 1) & 7;
if (s->mode > 5)
s->mode -= 4;
s->bcd = val & 1;
}
}
} else {
/* Write Count. */
s = &pit_state->channels[addr];
switch (s->write_state) {
default:
case RW_STATE_LSB:
pit_load_count(kvm, addr, val);
break;
case RW_STATE_MSB:
pit_load_count(kvm, addr, val << 8);
break;
case RW_STATE_WORD0:
s->write_latch = val;
s->write_state = RW_STATE_WORD1;
break;
case RW_STATE_WORD1:
pit_load_count(kvm, addr, s->write_latch | (val << 8));
s->write_state = RW_STATE_WORD0;
break;
}
}
mutex_unlock(&pit_state->lock);
return 0;
}
static int pit_ioport_read(struct kvm_io_device *this,
gpa_t addr, int len, void *data)
{
struct kvm_pit *pit = dev_to_pit(this);
struct kvm_kpit_state *pit_state = &pit->pit_state;
struct kvm *kvm = pit->kvm;
int ret, count;
struct kvm_kpit_channel_state *s;
if (!pit_in_range(addr))
return -EOPNOTSUPP;
addr &= KVM_PIT_CHANNEL_MASK;
if (addr == 3)
return 0;
s = &pit_state->channels[addr];
mutex_lock(&pit_state->lock);
if (s->status_latched) {
s->status_latched = 0;
ret = s->status;
} else if (s->count_latched) {
switch (s->count_latched) {
default:
case RW_STATE_LSB:
ret = s->latched_count & 0xff;
s->count_latched = 0;
break;
case RW_STATE_MSB:
ret = s->latched_count >> 8;
s->count_latched = 0;
break;
case RW_STATE_WORD0:
ret = s->latched_count & 0xff;
s->count_latched = RW_STATE_MSB;
break;
}
} else {
switch (s->read_state) {
default:
case RW_STATE_LSB:
count = pit_get_count(kvm, addr);
ret = count & 0xff;
break;
case RW_STATE_MSB:
count = pit_get_count(kvm, addr);
ret = (count >> 8) & 0xff;
break;
case RW_STATE_WORD0:
count = pit_get_count(kvm, addr);
ret = count & 0xff;
s->read_state = RW_STATE_WORD1;
break;
case RW_STATE_WORD1:
count = pit_get_count(kvm, addr);
ret = (count >> 8) & 0xff;
s->read_state = RW_STATE_WORD0;
break;
}
}
if (len > sizeof(ret))
len = sizeof(ret);
memcpy(data, (char *)&ret, len);
mutex_unlock(&pit_state->lock);
return 0;
}
static int speaker_ioport_write(struct kvm_io_device *this,
gpa_t addr, int len, const void *data)
{
struct kvm_pit *pit = speaker_to_pit(this);
struct kvm_kpit_state *pit_state = &pit->pit_state;
struct kvm *kvm = pit->kvm;
u32 val = *(u32 *) data;
if (addr != KVM_SPEAKER_BASE_ADDRESS)
return -EOPNOTSUPP;
mutex_lock(&pit_state->lock);
pit_state->speaker_data_on = (val >> 1) & 1;
pit_set_gate(kvm, 2, val & 1);
mutex_unlock(&pit_state->lock);
return 0;
}
static int speaker_ioport_read(struct kvm_io_device *this,
gpa_t addr, int len, void *data)
{
struct kvm_pit *pit = speaker_to_pit(this);
struct kvm_kpit_state *pit_state = &pit->pit_state;
struct kvm *kvm = pit->kvm;
unsigned int refresh_clock;
int ret;
if (addr != KVM_SPEAKER_BASE_ADDRESS)
return -EOPNOTSUPP;
/* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
mutex_lock(&pit_state->lock);
ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) |
(pit_get_out(kvm, 2) << 5) | (refresh_clock << 4));
if (len > sizeof(ret))
len = sizeof(ret);
memcpy(data, (char *)&ret, len);
mutex_unlock(&pit_state->lock);
return 0;
}
void kvm_pit_reset(struct kvm_pit *pit)
{
int i;
struct kvm_kpit_channel_state *c;
mutex_lock(&pit->pit_state.lock);
pit->pit_state.flags = 0;
for (i = 0; i < 3; i++) {
c = &pit->pit_state.channels[i];
c->mode = 0xff;
c->gate = (i != 2);
pit_load_count(pit->kvm, i, 0);
}
mutex_unlock(&pit->pit_state.lock);
atomic_set(&pit->pit_state.pit_timer.pending, 0);
pit->pit_state.irq_ack = 1;
}
static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
{
struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
if (!mask) {
atomic_set(&pit->pit_state.pit_timer.pending, 0);
pit->pit_state.irq_ack = 1;
}
}
static const struct kvm_io_device_ops pit_dev_ops = {
.read = pit_ioport_read,
.write = pit_ioport_write,
};
static const struct kvm_io_device_ops speaker_dev_ops = {
.read = speaker_ioport_read,
.write = speaker_ioport_write,
};
/* Caller must have writers lock on slots_lock */
struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
{
struct kvm_pit *pit;
struct kvm_kpit_state *pit_state;
int ret;
pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
if (!pit)
return NULL;
pit->irq_source_id = kvm_request_irq_source_id(kvm);
if (pit->irq_source_id < 0) {
kfree(pit);
return NULL;
}
mutex_init(&pit->pit_state.lock);
mutex_lock(&pit->pit_state.lock);
spin_lock_init(&pit->pit_state.inject_lock);
kvm->arch.vpit = pit;
pit->kvm = kvm;
pit_state = &pit->pit_state;
pit_state->pit = pit;
hrtimer_init(&pit_state->pit_timer.timer,
CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
pit_state->irq_ack_notifier.gsi = 0;
pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
pit_state->pit_timer.reinject = true;
mutex_unlock(&pit->pit_state.lock);
kvm_pit_reset(pit);
pit->mask_notifier.func = pit_mask_notifer;
kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
kvm_iodevice_init(&pit->dev, &pit_dev_ops);
ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
if (ret < 0)
goto fail;
if (flags & KVM_PIT_SPEAKER_DUMMY) {
kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
ret = __kvm_io_bus_register_dev(&kvm->pio_bus,
&pit->speaker_dev);
if (ret < 0)
goto fail_unregister;
}
return pit;
fail_unregister:
__kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev);
fail:
if (pit->irq_source_id >= 0)
kvm_free_irq_source_id(kvm, pit->irq_source_id);
kfree(pit);
return NULL;
}
void kvm_free_pit(struct kvm *kvm)
{
struct hrtimer *timer;
if (kvm->arch.vpit) {
kvm_unregister_irq_mask_notifier(kvm, 0,
&kvm->arch.vpit->mask_notifier);
kvm_unregister_irq_ack_notifier(kvm,
&kvm->arch.vpit->pit_state.irq_ack_notifier);
mutex_lock(&kvm->arch.vpit->pit_state.lock);
timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
hrtimer_cancel(timer);
kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id);
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
kfree(kvm->arch.vpit);
}
}
static void __inject_pit_timer_intr(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;
int i;
mutex_lock(&kvm->irq_lock);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
mutex_unlock(&kvm->irq_lock);
/*
* Provides NMI watchdog support via Virtual Wire mode.
* The route is: PIT -> PIC -> LVT0 in NMI mode.
*
* Note: Our Virtual Wire implementation is simplified, only
* propagating PIT interrupts to all VCPUs when they have set
* LVT0 to NMI delivery. Other PIC interrupts are just sent to
* VCPU0, and only if its LVT0 is in EXTINT mode.
*/
if (kvm->arch.vapics_in_nmi_mode > 0)
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_apic_nmi_wd_deliver(vcpu);
}
void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
{
struct kvm_pit *pit = vcpu->kvm->arch.vpit;
struct kvm *kvm = vcpu->kvm;
struct kvm_kpit_state *ps;
if (pit) {
int inject = 0;
ps = &pit->pit_state;
/* Try to inject pending interrupts when
* last one has been acked.
*/
spin_lock(&ps->inject_lock);
if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
ps->irq_ack = 0;
inject = 1;
}
spin_unlock(&ps->inject_lock);
if (inject)
__inject_pit_timer_intr(kvm);
}
}

View File

@@ -0,0 +1,58 @@
#ifndef __I8254_H
#define __I8254_H
#include "iodev.h"
struct kvm_kpit_channel_state {
u32 count; /* can be 65536 */
u16 latched_count;
u8 count_latched;
u8 status_latched;
u8 status;
u8 read_state;
u8 write_state;
u8 write_latch;
u8 rw_mode;
u8 mode;
u8 bcd; /* not supported */
u8 gate; /* timer start */
ktime_t count_load_time;
};
struct kvm_kpit_state {
struct kvm_kpit_channel_state channels[3];
u32 flags;
struct kvm_timer pit_timer;
bool is_periodic;
u32 speaker_data_on;
struct mutex lock;
struct kvm_pit *pit;
spinlock_t inject_lock;
unsigned long irq_ack;
struct kvm_irq_ack_notifier irq_ack_notifier;
};
struct kvm_pit {
unsigned long base_addresss;
struct kvm_io_device dev;
struct kvm_io_device speaker_dev;
struct kvm *kvm;
struct kvm_kpit_state pit_state;
int irq_source_id;
struct kvm_irq_mask_notifier mask_notifier;
};
#define KVM_PIT_BASE_ADDRESS 0x40
#define KVM_SPEAKER_BASE_ADDRESS 0x61
#define KVM_PIT_MEM_LENGTH 4
#define KVM_PIT_FREQ 1193181
#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
#define KVM_PIT_CHANNEL_MASK 0x3
void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start);
struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
void kvm_free_pit(struct kvm *kvm);
void kvm_pit_reset(struct kvm_pit *pit);
#endif

537
kernel/arch/x86/kvm/i8259.c Normal file
View File

@@ -0,0 +1,537 @@
/*
* 8259 interrupt controller emulation
*
* Copyright (c) 2003-2004 Fabrice Bellard
* Copyright (c) 2007 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Authors:
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
* Port from Qemu.
*/
#include <linux/mm.h>
#include <linux/bitops.h>
#include "irq.h"
#include <linux/kvm_host.h>
#include "trace.h"
static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
{
s->isr &= ~(1 << irq);
s->isr_ack |= (1 << irq);
if (s != &s->pics_state->pics[0])
irq += 8;
kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
}
void kvm_pic_clear_isr_ack(struct kvm *kvm)
{
struct kvm_pic *s = pic_irqchip(kvm);
spin_lock(&s->lock);
s->pics[0].isr_ack = 0xff;
s->pics[1].isr_ack = 0xff;
spin_unlock(&s->lock);
}
/*
* set irq level. If an edge is detected, then the IRR is set to 1
*/
static inline int pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
{
int mask, ret = 1;
mask = 1 << irq;
if (s->elcr & mask) /* level triggered */
if (level) {
ret = !(s->irr & mask);
s->irr |= mask;
s->last_irr |= mask;
} else {
s->irr &= ~mask;
s->last_irr &= ~mask;
}
else /* edge triggered */
if (level) {
if ((s->last_irr & mask) == 0) {
ret = !(s->irr & mask);
s->irr |= mask;
}
s->last_irr |= mask;
} else
s->last_irr &= ~mask;
return (s->imr & mask) ? -1 : ret;
}
/*
* return the highest priority found in mask (highest = smallest
* number). Return 8 if no irq
*/
static inline int get_priority(struct kvm_kpic_state *s, int mask)
{
int priority;
if (mask == 0)
return 8;
priority = 0;
while ((mask & (1 << ((priority + s->priority_add) & 7))) == 0)
priority++;
return priority;
}
/*
* return the pic wanted interrupt. return -1 if none
*/
static int pic_get_irq(struct kvm_kpic_state *s)
{
int mask, cur_priority, priority;
mask = s->irr & ~s->imr;
priority = get_priority(s, mask);
if (priority == 8)
return -1;
/*
* compute current priority. If special fully nested mode on the
* master, the IRQ coming from the slave is not taken into account
* for the priority computation.
*/
mask = s->isr;
if (s->special_fully_nested_mode && s == &s->pics_state->pics[0])
mask &= ~(1 << 2);
cur_priority = get_priority(s, mask);
if (priority < cur_priority)
/*
* higher priority found: an irq should be generated
*/
return (priority + s->priority_add) & 7;
else
return -1;
}
/*
* raise irq to CPU if necessary. must be called every time the active
* irq may change
*/
static void pic_update_irq(struct kvm_pic *s)
{
int irq2, irq;
irq2 = pic_get_irq(&s->pics[1]);
if (irq2 >= 0) {
/*
* if irq request by slave pic, signal master PIC
*/
pic_set_irq1(&s->pics[0], 2, 1);
pic_set_irq1(&s->pics[0], 2, 0);
}
irq = pic_get_irq(&s->pics[0]);
if (irq >= 0)
s->irq_request(s->irq_request_opaque, 1);
else
s->irq_request(s->irq_request_opaque, 0);
}
void kvm_pic_update_irq(struct kvm_pic *s)
{
spin_lock(&s->lock);
pic_update_irq(s);
spin_unlock(&s->lock);
}
int kvm_pic_set_irq(void *opaque, int irq, int level)
{
struct kvm_pic *s = opaque;
int ret = -1;
spin_lock(&s->lock);
if (irq >= 0 && irq < PIC_NUM_PINS) {
ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
pic_update_irq(s);
trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
s->pics[irq >> 3].imr, ret == 0);
}
spin_unlock(&s->lock);
return ret;
}
/*
* acknowledge interrupt 'irq'
*/
static inline void pic_intack(struct kvm_kpic_state *s, int irq)
{
s->isr |= 1 << irq;
if (s->auto_eoi) {
if (s->rotate_on_auto_eoi)
s->priority_add = (irq + 1) & 7;
pic_clear_isr(s, irq);
}
/*
* We don't clear a level sensitive interrupt here
*/
if (!(s->elcr & (1 << irq)))
s->irr &= ~(1 << irq);
}
int kvm_pic_read_irq(struct kvm *kvm)
{
int irq, irq2, intno;
struct kvm_pic *s = pic_irqchip(kvm);
spin_lock(&s->lock);
irq = pic_get_irq(&s->pics[0]);
if (irq >= 0) {
pic_intack(&s->pics[0], irq);
if (irq == 2) {
irq2 = pic_get_irq(&s->pics[1]);
if (irq2 >= 0)
pic_intack(&s->pics[1], irq2);
else
/*
* spurious IRQ on slave controller
*/
irq2 = 7;
intno = s->pics[1].irq_base + irq2;
irq = irq2 + 8;
} else
intno = s->pics[0].irq_base + irq;
} else {
/*
* spurious IRQ on host controller
*/
irq = 7;
intno = s->pics[0].irq_base + irq;
}
pic_update_irq(s);
spin_unlock(&s->lock);
return intno;
}
void kvm_pic_reset(struct kvm_kpic_state *s)
{
int irq, irqbase, n;
struct kvm *kvm = s->pics_state->irq_request_opaque;
struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu;
if (s == &s->pics_state->pics[0])
irqbase = 0;
else
irqbase = 8;
for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
n = irq + irqbase;
kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
}
}
s->last_irr = 0;
s->irr = 0;
s->imr = 0;
s->isr = 0;
s->isr_ack = 0xff;
s->priority_add = 0;
s->irq_base = 0;
s->read_reg_select = 0;
s->poll = 0;
s->special_mask = 0;
s->init_state = 0;
s->auto_eoi = 0;
s->rotate_on_auto_eoi = 0;
s->special_fully_nested_mode = 0;
s->init4 = 0;
}
static void pic_ioport_write(void *opaque, u32 addr, u32 val)
{
struct kvm_kpic_state *s = opaque;
int priority, cmd, irq;
addr &= 1;
if (addr == 0) {
if (val & 0x10) {
kvm_pic_reset(s); /* init */
/*
* deassert a pending interrupt
*/
s->pics_state->irq_request(s->pics_state->
irq_request_opaque, 0);
s->init_state = 1;
s->init4 = val & 1;
if (val & 0x02)
printk(KERN_ERR "single mode not supported");
if (val & 0x08)
printk(KERN_ERR
"level sensitive irq not supported");
} else if (val & 0x08) {
if (val & 0x04)
s->poll = 1;
if (val & 0x02)
s->read_reg_select = val & 1;
if (val & 0x40)
s->special_mask = (val >> 5) & 1;
} else {
cmd = val >> 5;
switch (cmd) {
case 0:
case 4:
s->rotate_on_auto_eoi = cmd >> 2;
break;
case 1: /* end of interrupt */
case 5:
priority = get_priority(s, s->isr);
if (priority != 8) {
irq = (priority + s->priority_add) & 7;
pic_clear_isr(s, irq);
if (cmd == 5)
s->priority_add = (irq + 1) & 7;
pic_update_irq(s->pics_state);
}
break;
case 3:
irq = val & 7;
pic_clear_isr(s, irq);
pic_update_irq(s->pics_state);
break;
case 6:
s->priority_add = (val + 1) & 7;
pic_update_irq(s->pics_state);
break;
case 7:
irq = val & 7;
s->priority_add = (irq + 1) & 7;
pic_clear_isr(s, irq);
pic_update_irq(s->pics_state);
break;
default:
break; /* no operation */
}
}
} else
switch (s->init_state) {
case 0: /* normal mode */
s->imr = val;
pic_update_irq(s->pics_state);
break;
case 1:
s->irq_base = val & 0xf8;
s->init_state = 2;
break;
case 2:
if (s->init4)
s->init_state = 3;
else
s->init_state = 0;
break;
case 3:
s->special_fully_nested_mode = (val >> 4) & 1;
s->auto_eoi = (val >> 1) & 1;
s->init_state = 0;
break;
}
}
static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1)
{
int ret;
ret = pic_get_irq(s);
if (ret >= 0) {
if (addr1 >> 7) {
s->pics_state->pics[0].isr &= ~(1 << 2);
s->pics_state->pics[0].irr &= ~(1 << 2);
}
s->irr &= ~(1 << ret);
pic_clear_isr(s, ret);
if (addr1 >> 7 || ret != 2)
pic_update_irq(s->pics_state);
} else {
ret = 0x07;
pic_update_irq(s->pics_state);
}
return ret;
}
static u32 pic_ioport_read(void *opaque, u32 addr1)
{
struct kvm_kpic_state *s = opaque;
unsigned int addr;
int ret;
addr = addr1;
addr &= 1;
if (s->poll) {
ret = pic_poll_read(s, addr1);
s->poll = 0;
} else
if (addr == 0)
if (s->read_reg_select)
ret = s->isr;
else
ret = s->irr;
else
ret = s->imr;
return ret;
}
static void elcr_ioport_write(void *opaque, u32 addr, u32 val)
{
struct kvm_kpic_state *s = opaque;
s->elcr = val & s->elcr_mask;
}
static u32 elcr_ioport_read(void *opaque, u32 addr1)
{
struct kvm_kpic_state *s = opaque;
return s->elcr;
}
static int picdev_in_range(gpa_t addr)
{
switch (addr) {
case 0x20:
case 0x21:
case 0xa0:
case 0xa1:
case 0x4d0:
case 0x4d1:
return 1;
default:
return 0;
}
}
static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
{
return container_of(dev, struct kvm_pic, dev);
}
static int picdev_write(struct kvm_io_device *this,
gpa_t addr, int len, const void *val)
{
struct kvm_pic *s = to_pic(this);
unsigned char data = *(unsigned char *)val;
if (!picdev_in_range(addr))
return -EOPNOTSUPP;
if (len != 1) {
if (printk_ratelimit())
printk(KERN_ERR "PIC: non byte write\n");
return 0;
}
spin_lock(&s->lock);
switch (addr) {
case 0x20:
case 0x21:
case 0xa0:
case 0xa1:
pic_ioport_write(&s->pics[addr >> 7], addr, data);
break;
case 0x4d0:
case 0x4d1:
elcr_ioport_write(&s->pics[addr & 1], addr, data);
break;
}
spin_unlock(&s->lock);
return 0;
}
static int picdev_read(struct kvm_io_device *this,
gpa_t addr, int len, void *val)
{
struct kvm_pic *s = to_pic(this);
unsigned char data = 0;
if (!picdev_in_range(addr))
return -EOPNOTSUPP;
if (len != 1) {
if (printk_ratelimit())
printk(KERN_ERR "PIC: non byte read\n");
return 0;
}
spin_lock(&s->lock);
switch (addr) {
case 0x20:
case 0x21:
case 0xa0:
case 0xa1:
data = pic_ioport_read(&s->pics[addr >> 7], addr);
break;
case 0x4d0:
case 0x4d1:
data = elcr_ioport_read(&s->pics[addr & 1], addr);
break;
}
*(unsigned char *)val = data;
spin_unlock(&s->lock);
return 0;
}
/*
* callback when PIC0 irq status changed
*/
static void pic_irq_request(void *opaque, int level)
{
struct kvm *kvm = opaque;
struct kvm_vcpu *vcpu = kvm->bsp_vcpu;
struct kvm_pic *s = pic_irqchip(kvm);
int irq = pic_get_irq(&s->pics[0]);
s->output = level;
if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) {
s->pics[0].isr_ack &= ~(1 << irq);
kvm_vcpu_kick(vcpu);
}
}
static const struct kvm_io_device_ops picdev_ops = {
.read = picdev_read,
.write = picdev_write,
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm)
{
struct kvm_pic *s;
int ret;
s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
if (!s)
return NULL;
spin_lock_init(&s->lock);
s->kvm = kvm;
s->pics[0].elcr_mask = 0xf8;
s->pics[1].elcr_mask = 0xde;
s->irq_request = pic_irq_request;
s->irq_request_opaque = kvm;
s->pics[0].pics_state = s;
s->pics[1].pics_state = s;
/*
* Initialize PIO device
*/
kvm_iodevice_init(&s->dev, &picdev_ops);
ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev);
if (ret < 0) {
kfree(s);
return NULL;
}
return s;
}

101
kernel/arch/x86/kvm/irq.c Normal file
View File

@@ -0,0 +1,101 @@
/*
* irq.c: API for in kernel interrupt controller
* Copyright (c) 2007, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
* Authors:
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
*
*/
#include <linux/module.h>
#include <linux/kvm_host.h>
#include "irq.h"
#include "i8254.h"
#include "x86.h"
/*
* check if there are pending timer events
* to be processed.
*/
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
int ret;
ret = pit_has_pending_timer(vcpu);
ret |= apic_has_pending_timer(vcpu);
return ret;
}
EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
/*
* check if there is pending interrupt without
* intack.
*/
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{
struct kvm_pic *s;
if (!irqchip_in_kernel(v->kvm))
return v->arch.interrupt.pending;
if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */
if (kvm_apic_accept_pic_intr(v)) {
s = pic_irqchip(v->kvm); /* PIC */
return s->output;
} else
return 0;
}
return 1;
}
EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
/*
* Read pending interrupt vector and intack.
*/
int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
{
struct kvm_pic *s;
int vector;
if (!irqchip_in_kernel(v->kvm))
return v->arch.interrupt.nr;
vector = kvm_get_apic_interrupt(v); /* APIC */
if (vector == -1) {
if (kvm_apic_accept_pic_intr(v)) {
s = pic_irqchip(v->kvm);
s->output = 0; /* PIC */
vector = kvm_pic_read_irq(v->kvm);
}
}
return vector;
}
EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{
kvm_inject_apic_timer_irqs(vcpu);
kvm_inject_pit_timer_irqs(vcpu);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
{
__kvm_migrate_apic_timer(vcpu);
__kvm_migrate_pit_timer(vcpu);
}

103
kernel/arch/x86/kvm/irq.h Normal file
View File

@@ -0,0 +1,103 @@
/*
* irq.h: in kernel interrupt controller related definitions
* Copyright (c) 2007, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
* Authors:
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
*
*/
#ifndef __IRQ_H
#define __IRQ_H
#include <linux/mm_types.h>
#include <linux/hrtimer.h>
#include <linux/kvm_host.h>
#include <linux/spinlock.h>
#include "iodev.h"
#include "ioapic.h"
#include "lapic.h"
#define PIC_NUM_PINS 16
#define SELECT_PIC(irq) \
((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE)
struct kvm;
struct kvm_vcpu;
typedef void irq_request_func(void *opaque, int level);
struct kvm_kpic_state {
u8 last_irr; /* edge detection */
u8 irr; /* interrupt request register */
u8 imr; /* interrupt mask register */
u8 isr; /* interrupt service register */
u8 isr_ack; /* interrupt ack detection */
u8 priority_add; /* highest irq priority */
u8 irq_base;
u8 read_reg_select;
u8 poll;
u8 special_mask;
u8 init_state;
u8 auto_eoi;
u8 rotate_on_auto_eoi;
u8 special_fully_nested_mode;
u8 init4; /* true if 4 byte init */
u8 elcr; /* PIIX edge/trigger selection */
u8 elcr_mask;
struct kvm_pic *pics_state;
};
struct kvm_pic {
spinlock_t lock;
unsigned pending_acks;
struct kvm *kvm;
struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
irq_request_func *irq_request;
void *irq_request_opaque;
int output; /* intr from master PIC */
struct kvm_io_device dev;
void (*ack_notifier)(void *opaque, int irq);
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
int kvm_pic_read_irq(struct kvm *kvm);
void kvm_pic_update_irq(struct kvm_pic *s);
void kvm_pic_clear_isr_ack(struct kvm *kvm);
static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
{
return kvm->arch.vpic;
}
static inline int irqchip_in_kernel(struct kvm *kvm)
{
return pic_irqchip(kvm) != NULL;
}
void kvm_pic_reset(struct kvm_kpic_state *s);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
int pit_has_pending_timer(struct kvm_vcpu *vcpu);
int apic_has_pending_timer(struct kvm_vcpu *vcpu);
#endif

View File

@@ -0,0 +1,41 @@
#ifndef ASM_KVM_CACHE_REGS_H
#define ASM_KVM_CACHE_REGS_H
static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
if (!test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail))
kvm_x86_ops->cache_reg(vcpu, reg);
return vcpu->arch.regs[reg];
}
static inline void kvm_register_write(struct kvm_vcpu *vcpu,
enum kvm_reg reg,
unsigned long val)
{
vcpu->arch.regs[reg] = val;
__set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
}
static inline unsigned long kvm_rip_read(struct kvm_vcpu *vcpu)
{
return kvm_register_read(vcpu, VCPU_REGS_RIP);
}
static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val)
{
kvm_register_write(vcpu, VCPU_REGS_RIP, val);
}
static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
{
if (!test_bit(VCPU_EXREG_PDPTR,
(unsigned long *)&vcpu->arch.regs_avail))
kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR);
return vcpu->arch.pdptrs[index];
}
#endif

View File

@@ -0,0 +1,18 @@
struct kvm_timer {
struct hrtimer timer;
s64 period; /* unit: ns */
atomic_t pending; /* accumulated triggered timers */
bool reinject;
struct kvm_timer_ops *t_ops;
struct kvm *kvm;
struct kvm_vcpu *vcpu;
};
struct kvm_timer_ops {
bool (*is_periodic)(struct kvm_timer *);
};
enum hrtimer_restart kvm_timer_fn(struct hrtimer *data);

1254
kernel/arch/x86/kvm/lapic.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,51 @@
#ifndef __KVM_X86_LAPIC_H
#define __KVM_X86_LAPIC_H
#include "iodev.h"
#include "kvm_timer.h"
#include <linux/kvm_host.h>
struct kvm_lapic {
unsigned long base_address;
struct kvm_io_device dev;
struct kvm_timer lapic_timer;
u32 divide_count;
struct kvm_vcpu *vcpu;
bool irr_pending;
struct page *regs_page;
void *regs;
gpa_t vapic_addr;
struct page *vapic_page;
};
int kvm_create_lapic(struct kvm_vcpu *vcpu);
void kvm_free_lapic(struct kvm_vcpu *vcpu);
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
void kvm_lapic_reset(struct kvm_vcpu *vcpu);
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
void kvm_apic_set_version(struct kvm_vcpu *vcpu);
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
bool kvm_apic_present(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
#endif

3449
kernel/arch/x86/kvm/mmu.c Normal file

File diff suppressed because it is too large Load Diff

91
kernel/arch/x86/kvm/mmu.h Normal file
View File

@@ -0,0 +1,91 @@
#ifndef __KVM_X86_MMU_H
#define __KVM_X86_MMU_H
#include <linux/kvm_host.h>
#define PT64_PT_BITS 9
#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
#define PT32_PT_BITS 10
#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
#define PT_WRITABLE_SHIFT 1
#define PT_PRESENT_MASK (1ULL << 0)
#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
#define PT_USER_MASK (1ULL << 2)
#define PT_PWT_MASK (1ULL << 3)
#define PT_PCD_MASK (1ULL << 4)
#define PT_ACCESSED_SHIFT 5
#define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT)
#define PT_DIRTY_MASK (1ULL << 6)
#define PT_PAGE_SIZE_MASK (1ULL << 7)
#define PT_PAT_MASK (1ULL << 7)
#define PT_GLOBAL_MASK (1ULL << 8)
#define PT64_NX_SHIFT 63
#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
#define PT_PAT_SHIFT 7
#define PT_DIR_PAT_SHIFT 12
#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
#define PT32_DIR_PSE36_SIZE 4
#define PT32_DIR_PSE36_SHIFT 13
#define PT32_DIR_PSE36_MASK \
(((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
#define PT64_ROOT_LEVEL 4
#define PT32_ROOT_LEVEL 2
#define PT32E_ROOT_LEVEL 3
#define PFERR_PRESENT_MASK (1U << 0)
#define PFERR_WRITE_MASK (1U << 1)
#define PFERR_USER_MASK (1U << 2)
#define PFERR_RSVD_MASK (1U << 3)
#define PFERR_FETCH_MASK (1U << 4)
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
{
if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
__kvm_mmu_free_some_pages(vcpu);
}
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
{
if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE))
return 0;
return kvm_mmu_load(vcpu);
}
static inline int is_long_mode(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
return vcpu->arch.shadow_efer & EFER_LMA;
#else
return 0;
#endif
}
static inline int is_pae(struct kvm_vcpu *vcpu)
{
return vcpu->arch.cr4 & X86_CR4_PAE;
}
static inline int is_pse(struct kvm_vcpu *vcpu)
{
return vcpu->arch.cr4 & X86_CR4_PSE;
}
static inline int is_paging(struct kvm_vcpu *vcpu)
{
return vcpu->arch.cr0 & X86_CR0_PG;
}
static inline int is_present_gpte(unsigned long pte)
{
return pte & PT_PRESENT_MASK;
}
#endif

View File

@@ -0,0 +1,220 @@
#if !defined(_TRACE_KVMMMU_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_KVMMMU_H
#include <linux/tracepoint.h>
#include <linux/ftrace_event.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvmmmu
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE mmutrace
#define KVM_MMU_PAGE_FIELDS \
__field(__u64, gfn) \
__field(__u32, role) \
__field(__u32, root_count) \
__field(__u32, unsync)
#define KVM_MMU_PAGE_ASSIGN(sp) \
__entry->gfn = sp->gfn; \
__entry->role = sp->role.word; \
__entry->root_count = sp->root_count; \
__entry->unsync = sp->unsync;
#define KVM_MMU_PAGE_PRINTK() ({ \
const char *ret = p->buffer + p->len; \
static const char *access_str[] = { \
"---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \
}; \
union kvm_mmu_page_role role; \
\
role.word = __entry->role; \
\
trace_seq_printf(p, "sp gfn %llx %u/%u q%u%s %s%s %spge" \
" %snxe root %u %s%c", \
__entry->gfn, role.level, role.glevels, \
role.quadrant, \
role.direct ? " direct" : "", \
access_str[role.access], \
role.invalid ? " invalid" : "", \
role.cr4_pge ? "" : "!", \
role.nxe ? "" : "!", \
__entry->root_count, \
__entry->unsync ? "unsync" : "sync", 0); \
ret; \
})
#define kvm_mmu_trace_pferr_flags \
{ PFERR_PRESENT_MASK, "P" }, \
{ PFERR_WRITE_MASK, "W" }, \
{ PFERR_USER_MASK, "U" }, \
{ PFERR_RSVD_MASK, "RSVD" }, \
{ PFERR_FETCH_MASK, "F" }
/*
* A pagetable walk has started
*/
TRACE_EVENT(
kvm_mmu_pagetable_walk,
TP_PROTO(u64 addr, int write_fault, int user_fault, int fetch_fault),
TP_ARGS(addr, write_fault, user_fault, fetch_fault),
TP_STRUCT__entry(
__field(__u64, addr)
__field(__u32, pferr)
),
TP_fast_assign(
__entry->addr = addr;
__entry->pferr = (!!write_fault << 1) | (!!user_fault << 2)
| (!!fetch_fault << 4);
),
TP_printk("addr %llx pferr %x %s", __entry->addr, __entry->pferr,
__print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags))
);
/* We just walked a paging element */
TRACE_EVENT(
kvm_mmu_paging_element,
TP_PROTO(u64 pte, int level),
TP_ARGS(pte, level),
TP_STRUCT__entry(
__field(__u64, pte)
__field(__u32, level)
),
TP_fast_assign(
__entry->pte = pte;
__entry->level = level;
),
TP_printk("pte %llx level %u", __entry->pte, __entry->level)
);
/* We set a pte accessed bit */
TRACE_EVENT(
kvm_mmu_set_accessed_bit,
TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
TP_ARGS(table_gfn, index, size),
TP_STRUCT__entry(
__field(__u64, gpa)
),
TP_fast_assign(
__entry->gpa = ((u64)table_gfn << PAGE_SHIFT)
+ index * size;
),
TP_printk("gpa %llx", __entry->gpa)
);
/* We set a pte dirty bit */
TRACE_EVENT(
kvm_mmu_set_dirty_bit,
TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
TP_ARGS(table_gfn, index, size),
TP_STRUCT__entry(
__field(__u64, gpa)
),
TP_fast_assign(
__entry->gpa = ((u64)table_gfn << PAGE_SHIFT)
+ index * size;
),
TP_printk("gpa %llx", __entry->gpa)
);
TRACE_EVENT(
kvm_mmu_walker_error,
TP_PROTO(u32 pferr),
TP_ARGS(pferr),
TP_STRUCT__entry(
__field(__u32, pferr)
),
TP_fast_assign(
__entry->pferr = pferr;
),
TP_printk("pferr %x %s", __entry->pferr,
__print_flags(__entry->pferr, "|", kvm_mmu_trace_pferr_flags))
);
TRACE_EVENT(
kvm_mmu_get_page,
TP_PROTO(struct kvm_mmu_page *sp, bool created),
TP_ARGS(sp, created),
TP_STRUCT__entry(
KVM_MMU_PAGE_FIELDS
__field(bool, created)
),
TP_fast_assign(
KVM_MMU_PAGE_ASSIGN(sp)
__entry->created = created;
),
TP_printk("%s %s", KVM_MMU_PAGE_PRINTK(),
__entry->created ? "new" : "existing")
);
TRACE_EVENT(
kvm_mmu_sync_page,
TP_PROTO(struct kvm_mmu_page *sp),
TP_ARGS(sp),
TP_STRUCT__entry(
KVM_MMU_PAGE_FIELDS
),
TP_fast_assign(
KVM_MMU_PAGE_ASSIGN(sp)
),
TP_printk("%s", KVM_MMU_PAGE_PRINTK())
);
TRACE_EVENT(
kvm_mmu_unsync_page,
TP_PROTO(struct kvm_mmu_page *sp),
TP_ARGS(sp),
TP_STRUCT__entry(
KVM_MMU_PAGE_FIELDS
),
TP_fast_assign(
KVM_MMU_PAGE_ASSIGN(sp)
),
TP_printk("%s", KVM_MMU_PAGE_PRINTK())
);
TRACE_EVENT(
kvm_mmu_zap_page,
TP_PROTO(struct kvm_mmu_page *sp),
TP_ARGS(sp),
TP_STRUCT__entry(
KVM_MMU_PAGE_FIELDS
),
TP_fast_assign(
KVM_MMU_PAGE_ASSIGN(sp)
),
TP_printk("%s", KVM_MMU_PAGE_PRINTK())
);
#endif /* _TRACE_KVMMMU_H */
/* This part must be outside protection */
#include <trace/define_trace.h>

View File

@@ -0,0 +1,644 @@
/*
* Kernel-based Virtual Machine driver for Linux
*
* This module enables machines with Intel VT-x extensions to run virtual
* machines without emulation or binary translation.
*
* MMU support
*
* Copyright (C) 2006 Qumranet, Inc.
*
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
* Avi Kivity <avi@qumranet.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
*/
/*
* We need the mmu code to access both 32-bit and 64-bit guest ptes,
* so the code in this file is compiled twice, once per pte size.
*/
#if PTTYPE == 64
#define pt_element_t u64
#define guest_walker guest_walker64
#define FNAME(name) paging##64_##name
#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
#define PT_LEVEL_BITS PT64_LEVEL_BITS
#ifdef CONFIG_X86_64
#define PT_MAX_FULL_LEVELS 4
#define CMPXCHG cmpxchg
#else
#define CMPXCHG cmpxchg64
#define PT_MAX_FULL_LEVELS 2
#endif
#elif PTTYPE == 32
#define pt_element_t u32
#define guest_walker guest_walker32
#define FNAME(name) paging##32_##name
#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
#define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl)
#define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl)
#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
#define PT_LEVEL_BITS PT32_LEVEL_BITS
#define PT_MAX_FULL_LEVELS 2
#define CMPXCHG cmpxchg
#else
#error Invalid PTTYPE value
#endif
#define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL)
/*
* The guest_walker structure emulates the behavior of the hardware page
* table walker.
*/
struct guest_walker {
int level;
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
pt_element_t ptes[PT_MAX_FULL_LEVELS];
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
unsigned pt_access;
unsigned pte_access;
gfn_t gfn;
u32 error_code;
};
static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
{
return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
}
static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
gfn_t table_gfn, unsigned index,
pt_element_t orig_pte, pt_element_t new_pte)
{
pt_element_t ret;
pt_element_t *table;
struct page *page;
page = gfn_to_page(kvm, table_gfn);
table = kmap_atomic(page, KM_USER0);
ret = CMPXCHG(&table[index], orig_pte, new_pte);
kunmap_atomic(table, KM_USER0);
kvm_release_page_dirty(page);
return (ret != orig_pte);
}
static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
{
unsigned access;
access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
#if PTTYPE == 64
if (is_nx(vcpu))
access &= ~(gpte >> PT64_NX_SHIFT);
#endif
return access;
}
/*
* Fetch a guest pte for a guest virtual address
*/
static int FNAME(walk_addr)(struct guest_walker *walker,
struct kvm_vcpu *vcpu, gva_t addr,
int write_fault, int user_fault, int fetch_fault)
{
pt_element_t pte;
gfn_t table_gfn;
unsigned index, pt_access, pte_access;
gpa_t pte_gpa;
int rsvd_fault = 0;
trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,
fetch_fault);
walk:
walker->level = vcpu->arch.mmu.root_level;
pte = vcpu->arch.cr3;
#if PTTYPE == 64
if (!is_long_mode(vcpu)) {
pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3);
trace_kvm_mmu_paging_element(pte, walker->level);
if (!is_present_gpte(pte))
goto not_present;
--walker->level;
}
#endif
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
(vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
pt_access = ACC_ALL;
for (;;) {
index = PT_INDEX(addr, walker->level);
table_gfn = gpte_to_gfn(pte);
pte_gpa = gfn_to_gpa(table_gfn);
pte_gpa += index * sizeof(pt_element_t);
walker->table_gfn[walker->level - 1] = table_gfn;
walker->pte_gpa[walker->level - 1] = pte_gpa;
if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)))
goto not_present;
trace_kvm_mmu_paging_element(pte, walker->level);
if (!is_present_gpte(pte))
goto not_present;
rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
if (rsvd_fault)
goto access_error;
if (write_fault && !is_writeble_pte(pte))
if (user_fault || is_write_protection(vcpu))
goto access_error;
if (user_fault && !(pte & PT_USER_MASK))
goto access_error;
#if PTTYPE == 64
if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
goto access_error;
#endif
if (!(pte & PT_ACCESSED_MASK)) {
trace_kvm_mmu_set_accessed_bit(table_gfn, index,
sizeof(pte));
mark_page_dirty(vcpu->kvm, table_gfn);
if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
index, pte, pte|PT_ACCESSED_MASK))
goto walk;
pte |= PT_ACCESSED_MASK;
}
pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
walker->ptes[walker->level - 1] = pte;
if ((walker->level == PT_PAGE_TABLE_LEVEL) ||
((walker->level == PT_DIRECTORY_LEVEL) &&
(pte & PT_PAGE_SIZE_MASK) &&
(PTTYPE == 64 || is_pse(vcpu))) ||
((walker->level == PT_PDPE_LEVEL) &&
(pte & PT_PAGE_SIZE_MASK) &&
is_long_mode(vcpu))) {
int lvl = walker->level;
walker->gfn = gpte_to_gfn_lvl(pte, lvl);
walker->gfn += (addr & PT_LVL_OFFSET_MASK(lvl))
>> PAGE_SHIFT;
if (PTTYPE == 32 &&
walker->level == PT_DIRECTORY_LEVEL &&
is_cpuid_PSE36())
walker->gfn += pse36_gfn_delta(pte);
break;
}
pt_access = pte_access;
--walker->level;
}
if (write_fault && !is_dirty_gpte(pte)) {
bool ret;
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
mark_page_dirty(vcpu->kvm, table_gfn);
ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
pte|PT_DIRTY_MASK);
if (ret)
goto walk;
pte |= PT_DIRTY_MASK;
walker->ptes[walker->level - 1] = pte;
}
walker->pt_access = pt_access;
walker->pte_access = pte_access;
pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
__func__, (u64)pte, pt_access, pte_access);
return 1;
not_present:
walker->error_code = 0;
goto err;
access_error:
walker->error_code = PFERR_PRESENT_MASK;
err:
if (write_fault)
walker->error_code |= PFERR_WRITE_MASK;
if (user_fault)
walker->error_code |= PFERR_USER_MASK;
if (fetch_fault)
walker->error_code |= PFERR_FETCH_MASK;
if (rsvd_fault)
walker->error_code |= PFERR_RSVD_MASK;
trace_kvm_mmu_walker_error(walker->error_code);
return 0;
}
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
u64 *spte, const void *pte)
{
pt_element_t gpte;
unsigned pte_access;
pfn_t pfn;
gpte = *(const pt_element_t *)pte;
if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
if (!is_present_gpte(gpte))
__set_spte(spte, shadow_notrap_nonpresent_pte);
return;
}
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
return;
pfn = vcpu->arch.update_pte.pfn;
if (is_error_pfn(pfn))
return;
if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
return;
kvm_get_pfn(pfn);
/*
* we call mmu_set_spte() with reset_host_protection = true beacuse that
* vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
*/
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
gpte_to_gfn(gpte), pfn, true, true);
}
/*
* Fetch a shadow pte for a specific level in the paging hierarchy.
*/
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *gw,
int user_fault, int write_fault, int hlevel,
int *ptwrite, pfn_t pfn)
{
unsigned access = gw->pt_access;
struct kvm_mmu_page *shadow_page;
u64 spte, *sptep = NULL;
int direct;
gfn_t table_gfn;
int r;
int level;
pt_element_t curr_pte;
struct kvm_shadow_walk_iterator iterator;
if (!is_present_gpte(gw->ptes[gw->level - 1]))
return NULL;
for_each_shadow_entry(vcpu, addr, iterator) {
level = iterator.level;
sptep = iterator.sptep;
if (iterator.level == hlevel) {
mmu_set_spte(vcpu, sptep, access,
gw->pte_access & access,
user_fault, write_fault,
gw->ptes[gw->level-1] & PT_DIRTY_MASK,
ptwrite, level,
gw->gfn, pfn, false, true);
break;
}
if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) {
struct kvm_mmu_page *child;
unsigned direct_access;
if (level != gw->level)
continue;
/*
* For the direct sp, if the guest pte's dirty bit
* changed form clean to dirty, it will corrupt the
* sp's access: allow writable in the read-only sp,
* so we should update the spte at this point to get
* a new sp with the correct access.
*/
direct_access = gw->pt_access & gw->pte_access;
if (!is_dirty_gpte(gw->ptes[gw->level - 1]))
direct_access &= ~ACC_WRITE_MASK;
child = page_header(*sptep & PT64_BASE_ADDR_MASK);
if (child->role.access == direct_access)
continue;
mmu_page_remove_parent_pte(child, sptep);
__set_spte(sptep, shadow_trap_nonpresent_pte);
kvm_flush_remote_tlbs(vcpu->kvm);
}
if (is_large_pte(*sptep)) {
rmap_remove(vcpu->kvm, sptep);
__set_spte(sptep, shadow_trap_nonpresent_pte);
kvm_flush_remote_tlbs(vcpu->kvm);
}
if (level <= gw->level) {
int delta = level - gw->level + 1;
direct = 1;
if (!is_dirty_gpte(gw->ptes[level - delta]))
access &= ~ACC_WRITE_MASK;
table_gfn = gpte_to_gfn(gw->ptes[level - delta]);
/* advance table_gfn when emulating 1gb pages with 4k */
if (delta == 0)
table_gfn += PT_INDEX(addr, level);
access &= gw->pte_access;
} else {
direct = 0;
table_gfn = gw->table_gfn[level - 2];
}
shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
direct, access, sptep);
if (!direct) {
r = kvm_read_guest_atomic(vcpu->kvm,
gw->pte_gpa[level - 2],
&curr_pte, sizeof(curr_pte));
if (r || curr_pte != gw->ptes[level - 2]) {
kvm_mmu_put_page(shadow_page, sptep);
kvm_release_pfn_clean(pfn);
sptep = NULL;
break;
}
}
spte = __pa(shadow_page->spt)
| PT_PRESENT_MASK | PT_ACCESSED_MASK
| PT_WRITABLE_MASK | PT_USER_MASK;
*sptep = spte;
}
return sptep;
}
/*
* Page fault handler. There are several causes for a page fault:
* - there is no shadow pte for the guest pte
* - write access through a shadow pte marked read only so that we can set
* the dirty bit
* - write access to a shadow pte marked read only so we can update the page
* dirty bitmap, when userspace requests it
* - mmio access; in this case we will never install a present shadow pte
* - normal guest page fault due to the guest pte marked not present, not
* writable, or not executable
*
* Returns: 1 if we need to emulate the instruction, 0 otherwise, or
* a negative value on error.
*/
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
u32 error_code)
{
int write_fault = error_code & PFERR_WRITE_MASK;
int user_fault = error_code & PFERR_USER_MASK;
int fetch_fault = error_code & PFERR_FETCH_MASK;
struct guest_walker walker;
u64 *sptep;
int write_pt = 0;
int r;
pfn_t pfn;
int level = PT_PAGE_TABLE_LEVEL;
unsigned long mmu_seq;
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
kvm_mmu_audit(vcpu, "pre page fault");
r = mmu_topup_memory_caches(vcpu);
if (r)
return r;
/*
* Look up the guest pte for the faulting address.
*/
r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
fetch_fault);
/*
* The page is not mapped by the guest. Let the guest handle it.
*/
if (!r) {
pgprintk("%s: guest page fault\n", __func__);
inject_page_fault(vcpu, addr, walker.error_code);
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
return 0;
}
if (walker.level >= PT_DIRECTORY_LEVEL) {
level = min(walker.level, mapping_level(vcpu, walker.gfn));
walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
}
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
/* mmio */
if (is_error_pfn(pfn)) {
pgprintk("gfn %lx is mmio\n", walker.gfn);
kvm_release_pfn_clean(pfn);
return 1;
}
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu, mmu_seq))
goto out_unlock;
kvm_mmu_free_some_pages(vcpu);
sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
level, &write_pt, pfn);
pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
sptep, *sptep, write_pt);
if (!write_pt)
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, "post page fault (fixed)");
spin_unlock(&vcpu->kvm->mmu_lock);
return write_pt;
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
return 0;
}
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
{
struct kvm_shadow_walk_iterator iterator;
int level;
u64 *sptep;
int need_flush = 0;
spin_lock(&vcpu->kvm->mmu_lock);
for_each_shadow_entry(vcpu, gva, iterator) {
level = iterator.level;
sptep = iterator.sptep;
/* FIXME: properly handle invlpg on large guest pages */
if (level == PT_PAGE_TABLE_LEVEL ||
((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
if (is_shadow_present_pte(*sptep)) {
rmap_remove(vcpu->kvm, sptep);
if (is_large_pte(*sptep))
--vcpu->kvm->stat.lpages;
need_flush = 1;
}
__set_spte(sptep, shadow_trap_nonpresent_pte);
break;
}
if (!is_shadow_present_pte(*sptep))
break;
}
if (need_flush)
kvm_flush_remote_tlbs(vcpu->kvm);
spin_unlock(&vcpu->kvm->mmu_lock);
}
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
u32 *error)
{
struct guest_walker walker;
gpa_t gpa = UNMAPPED_GVA;
int r;
r = FNAME(walk_addr)(&walker, vcpu, vaddr,
!!(access & PFERR_WRITE_MASK),
!!(access & PFERR_USER_MASK),
!!(access & PFERR_FETCH_MASK));
if (r) {
gpa = gfn_to_gpa(walker.gfn);
gpa |= vaddr & ~PAGE_MASK;
} else if (error)
*error = walker.error_code;
return gpa;
}
static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp)
{
int i, j, offset, r;
pt_element_t pt[256 / sizeof(pt_element_t)];
gpa_t pte_gpa;
if (sp->role.direct
|| (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
nonpaging_prefetch_page(vcpu, sp);
return;
}
pte_gpa = gfn_to_gpa(sp->gfn);
if (PTTYPE == 32) {
offset = sp->role.quadrant << PT64_LEVEL_BITS;
pte_gpa += offset * sizeof(pt_element_t);
}
for (i = 0; i < PT64_ENT_PER_PAGE; i += ARRAY_SIZE(pt)) {
r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt);
pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t);
for (j = 0; j < ARRAY_SIZE(pt); ++j)
if (r || is_present_gpte(pt[j]))
sp->spt[i+j] = shadow_trap_nonpresent_pte;
else
sp->spt[i+j] = shadow_notrap_nonpresent_pte;
}
}
/*
* Using the cached information from sp->gfns is safe because:
* - The spte has a reference to the struct page, so the pfn for a given gfn
* can't change unless all sptes pointing to it are nuked first.
* - Alias changes zap the entire shadow cache.
*/
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
int i, offset, nr_present;
bool reset_host_protection;
offset = nr_present = 0;
if (PTTYPE == 32)
offset = sp->role.quadrant << PT64_LEVEL_BITS;
for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
unsigned pte_access;
pt_element_t gpte;
gpa_t pte_gpa;
gfn_t gfn = sp->gfns[i];
if (!is_shadow_present_pte(sp->spt[i]))
continue;
pte_gpa = gfn_to_gpa(sp->gfn);
pte_gpa += (i+offset) * sizeof(pt_element_t);
if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
sizeof(pt_element_t)))
return -EINVAL;
if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) ||
!(gpte & PT_ACCESSED_MASK)) {
u64 nonpresent;
rmap_remove(vcpu->kvm, &sp->spt[i]);
if (is_present_gpte(gpte))
nonpresent = shadow_trap_nonpresent_pte;
else
nonpresent = shadow_notrap_nonpresent_pte;
__set_spte(&sp->spt[i], nonpresent);
continue;
}
nr_present++;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) {
pte_access &= ~ACC_WRITE_MASK;
reset_host_protection = 0;
} else {
reset_host_protection = 1;
}
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
spte_to_pfn(sp->spt[i]), true, false,
reset_host_protection);
}
return !nr_present;
}
#undef pt_element_t
#undef guest_walker
#undef FNAME
#undef PT_BASE_ADDR_MASK
#undef PT_INDEX
#undef PT_LEVEL_MASK
#undef PT_LVL_ADDR_MASK
#undef PT_LVL_OFFSET_MASK
#undef PT_LEVEL_BITS
#undef PT_MAX_FULL_LEVELS
#undef gpte_to_gfn
#undef gpte_to_gfn_lvl
#undef CMPXCHG

3027
kernel/arch/x86/kvm/svm.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,50 @@
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/hrtimer.h>
#include <asm/atomic.h>
#include "kvm_timer.h"
static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
{
int restart_timer = 0;
wait_queue_head_t *q = &vcpu->wq;
/*
* There is a race window between reading and incrementing, but we do
* not care about potentially loosing timer events in the !reinject
* case anyway.
*/
if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
atomic_inc(&ktimer->pending);
/* FIXME: this code should not know anything about vcpus */
set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
}
if (waitqueue_active(q))
wake_up_interruptible(q);
if (ktimer->t_ops->is_periodic(ktimer)) {
hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
restart_timer = 1;
}
return restart_timer;
}
enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
{
int restart_timer;
struct kvm_vcpu *vcpu;
struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
vcpu = ktimer->vcpu;
if (!vcpu)
return HRTIMER_NORESTART;
restart_timer = __kvm_timer_fn(vcpu, ktimer);
if (restart_timer)
return HRTIMER_RESTART;
else
return HRTIMER_NORESTART;
}

355
kernel/arch/x86/kvm/trace.h Normal file
View File

@@ -0,0 +1,355 @@
#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_KVM_H
#include <linux/tracepoint.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm
#define TRACE_INCLUDE_PATH arch/x86/kvm
#define TRACE_INCLUDE_FILE trace
/*
* Tracepoint for guest mode entry.
*/
TRACE_EVENT(kvm_entry,
TP_PROTO(unsigned int vcpu_id),
TP_ARGS(vcpu_id),
TP_STRUCT__entry(
__field( unsigned int, vcpu_id )
),
TP_fast_assign(
__entry->vcpu_id = vcpu_id;
),
TP_printk("vcpu %u", __entry->vcpu_id)
);
/*
* Tracepoint for hypercall.
*/
TRACE_EVENT(kvm_hypercall,
TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1,
unsigned long a2, unsigned long a3),
TP_ARGS(nr, a0, a1, a2, a3),
TP_STRUCT__entry(
__field( unsigned long, nr )
__field( unsigned long, a0 )
__field( unsigned long, a1 )
__field( unsigned long, a2 )
__field( unsigned long, a3 )
),
TP_fast_assign(
__entry->nr = nr;
__entry->a0 = a0;
__entry->a1 = a1;
__entry->a2 = a2;
__entry->a3 = a3;
),
TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx",
__entry->nr, __entry->a0, __entry->a1, __entry->a2,
__entry->a3)
);
/*
* Tracepoint for PIO.
*/
TRACE_EVENT(kvm_pio,
TP_PROTO(unsigned int rw, unsigned int port, unsigned int size,
unsigned int count),
TP_ARGS(rw, port, size, count),
TP_STRUCT__entry(
__field( unsigned int, rw )
__field( unsigned int, port )
__field( unsigned int, size )
__field( unsigned int, count )
),
TP_fast_assign(
__entry->rw = rw;
__entry->port = port;
__entry->size = size;
__entry->count = count;
),
TP_printk("pio_%s at 0x%x size %d count %d",
__entry->rw ? "write" : "read",
__entry->port, __entry->size, __entry->count)
);
/*
* Tracepoint for cpuid.
*/
TRACE_EVENT(kvm_cpuid,
TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx,
unsigned long rcx, unsigned long rdx),
TP_ARGS(function, rax, rbx, rcx, rdx),
TP_STRUCT__entry(
__field( unsigned int, function )
__field( unsigned long, rax )
__field( unsigned long, rbx )
__field( unsigned long, rcx )
__field( unsigned long, rdx )
),
TP_fast_assign(
__entry->function = function;
__entry->rax = rax;
__entry->rbx = rbx;
__entry->rcx = rcx;
__entry->rdx = rdx;
),
TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx",
__entry->function, __entry->rax,
__entry->rbx, __entry->rcx, __entry->rdx)
);
#define AREG(x) { APIC_##x, "APIC_" #x }
#define kvm_trace_symbol_apic \
AREG(ID), AREG(LVR), AREG(TASKPRI), AREG(ARBPRI), AREG(PROCPRI), \
AREG(EOI), AREG(RRR), AREG(LDR), AREG(DFR), AREG(SPIV), AREG(ISR), \
AREG(TMR), AREG(IRR), AREG(ESR), AREG(ICR), AREG(ICR2), AREG(LVTT), \
AREG(LVTTHMR), AREG(LVTPC), AREG(LVT0), AREG(LVT1), AREG(LVTERR), \
AREG(TMICT), AREG(TMCCT), AREG(TDCR), AREG(SELF_IPI), AREG(EFEAT), \
AREG(ECTRL)
/*
* Tracepoint for apic access.
*/
TRACE_EVENT(kvm_apic,
TP_PROTO(unsigned int rw, unsigned int reg, unsigned int val),
TP_ARGS(rw, reg, val),
TP_STRUCT__entry(
__field( unsigned int, rw )
__field( unsigned int, reg )
__field( unsigned int, val )
),
TP_fast_assign(
__entry->rw = rw;
__entry->reg = reg;
__entry->val = val;
),
TP_printk("apic_%s %s = 0x%x",
__entry->rw ? "write" : "read",
__print_symbolic(__entry->reg, kvm_trace_symbol_apic),
__entry->val)
);
#define trace_kvm_apic_read(reg, val) trace_kvm_apic(0, reg, val)
#define trace_kvm_apic_write(reg, val) trace_kvm_apic(1, reg, val)
/*
* Tracepoint for kvm guest exit:
*/
TRACE_EVENT(kvm_exit,
TP_PROTO(unsigned int exit_reason, unsigned long guest_rip),
TP_ARGS(exit_reason, guest_rip),
TP_STRUCT__entry(
__field( unsigned int, exit_reason )
__field( unsigned long, guest_rip )
),
TP_fast_assign(
__entry->exit_reason = exit_reason;
__entry->guest_rip = guest_rip;
),
TP_printk("reason %s rip 0x%lx",
ftrace_print_symbols_seq(p, __entry->exit_reason,
kvm_x86_ops->exit_reasons_str),
__entry->guest_rip)
);
/*
* Tracepoint for kvm interrupt injection:
*/
TRACE_EVENT(kvm_inj_virq,
TP_PROTO(unsigned int irq),
TP_ARGS(irq),
TP_STRUCT__entry(
__field( unsigned int, irq )
),
TP_fast_assign(
__entry->irq = irq;
),
TP_printk("irq %u", __entry->irq)
);
/*
* Tracepoint for page fault.
*/
TRACE_EVENT(kvm_page_fault,
TP_PROTO(unsigned long fault_address, unsigned int error_code),
TP_ARGS(fault_address, error_code),
TP_STRUCT__entry(
__field( unsigned long, fault_address )
__field( unsigned int, error_code )
),
TP_fast_assign(
__entry->fault_address = fault_address;
__entry->error_code = error_code;
),
TP_printk("address %lx error_code %x",
__entry->fault_address, __entry->error_code)
);
/*
* Tracepoint for guest MSR access.
*/
TRACE_EVENT(kvm_msr,
TP_PROTO(unsigned int rw, unsigned int ecx, unsigned long data),
TP_ARGS(rw, ecx, data),
TP_STRUCT__entry(
__field( unsigned int, rw )
__field( unsigned int, ecx )
__field( unsigned long, data )
),
TP_fast_assign(
__entry->rw = rw;
__entry->ecx = ecx;
__entry->data = data;
),
TP_printk("msr_%s %x = 0x%lx",
__entry->rw ? "write" : "read",
__entry->ecx, __entry->data)
);
#define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data)
#define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data)
/*
* Tracepoint for guest CR access.
*/
TRACE_EVENT(kvm_cr,
TP_PROTO(unsigned int rw, unsigned int cr, unsigned long val),
TP_ARGS(rw, cr, val),
TP_STRUCT__entry(
__field( unsigned int, rw )
__field( unsigned int, cr )
__field( unsigned long, val )
),
TP_fast_assign(
__entry->rw = rw;
__entry->cr = cr;
__entry->val = val;
),
TP_printk("cr_%s %x = 0x%lx",
__entry->rw ? "write" : "read",
__entry->cr, __entry->val)
);
#define trace_kvm_cr_read(cr, val) trace_kvm_cr(0, cr, val)
#define trace_kvm_cr_write(cr, val) trace_kvm_cr(1, cr, val)
TRACE_EVENT(kvm_pic_set_irq,
TP_PROTO(__u8 chip, __u8 pin, __u8 elcr, __u8 imr, bool coalesced),
TP_ARGS(chip, pin, elcr, imr, coalesced),
TP_STRUCT__entry(
__field( __u8, chip )
__field( __u8, pin )
__field( __u8, elcr )
__field( __u8, imr )
__field( bool, coalesced )
),
TP_fast_assign(
__entry->chip = chip;
__entry->pin = pin;
__entry->elcr = elcr;
__entry->imr = imr;
__entry->coalesced = coalesced;
),
TP_printk("chip %u pin %u (%s%s)%s",
__entry->chip, __entry->pin,
(__entry->elcr & (1 << __entry->pin)) ? "level":"edge",
(__entry->imr & (1 << __entry->pin)) ? "|masked":"",
__entry->coalesced ? " (coalesced)" : "")
);
#define kvm_apic_dst_shorthand \
{0x0, "dst"}, \
{0x1, "self"}, \
{0x2, "all"}, \
{0x3, "all-but-self"}
TRACE_EVENT(kvm_apic_ipi,
TP_PROTO(__u32 icr_low, __u32 dest_id),
TP_ARGS(icr_low, dest_id),
TP_STRUCT__entry(
__field( __u32, icr_low )
__field( __u32, dest_id )
),
TP_fast_assign(
__entry->icr_low = icr_low;
__entry->dest_id = dest_id;
),
TP_printk("dst %x vec %u (%s|%s|%s|%s|%s)",
__entry->dest_id, (u8)__entry->icr_low,
__print_symbolic((__entry->icr_low >> 8 & 0x7),
kvm_deliver_mode),
(__entry->icr_low & (1<<11)) ? "logical" : "physical",
(__entry->icr_low & (1<<14)) ? "assert" : "de-assert",
(__entry->icr_low & (1<<15)) ? "level" : "edge",
__print_symbolic((__entry->icr_low >> 18 & 0x3),
kvm_apic_dst_shorthand))
);
TRACE_EVENT(kvm_apic_accept_irq,
TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec, bool coalesced),
TP_ARGS(apicid, dm, tm, vec, coalesced),
TP_STRUCT__entry(
__field( __u32, apicid )
__field( __u16, dm )
__field( __u8, tm )
__field( __u8, vec )
__field( bool, coalesced )
),
TP_fast_assign(
__entry->apicid = apicid;
__entry->dm = dm;
__entry->tm = tm;
__entry->vec = vec;
__entry->coalesced = coalesced;
),
TP_printk("apicid %x vec %u (%s|%s)%s",
__entry->apicid, __entry->vec,
__print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode),
__entry->tm ? "level" : "edge",
__entry->coalesced ? " (coalesced)" : "")
);
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
#include <trace/define_trace.h>

59
kernel/arch/x86/kvm/tss.h Normal file
View File

@@ -0,0 +1,59 @@
#ifndef __TSS_SEGMENT_H
#define __TSS_SEGMENT_H
struct tss_segment_32 {
u32 prev_task_link;
u32 esp0;
u32 ss0;
u32 esp1;
u32 ss1;
u32 esp2;
u32 ss2;
u32 cr3;
u32 eip;
u32 eflags;
u32 eax;
u32 ecx;
u32 edx;
u32 ebx;
u32 esp;
u32 ebp;
u32 esi;
u32 edi;
u32 es;
u32 cs;
u32 ss;
u32 ds;
u32 fs;
u32 gs;
u32 ldt_selector;
u16 t;
u16 io_map;
};
struct tss_segment_16 {
u16 prev_task_link;
u16 sp0;
u16 ss0;
u16 sp1;
u16 ss1;
u16 sp2;
u16 ss2;
u16 ip;
u16 flag;
u16 ax;
u16 cx;
u16 dx;
u16 bx;
u16 sp;
u16 bp;
u16 si;
u16 di;
u16 es;
u16 cs;
u16 ss;
u16 ds;
u16 ldt;
};
#endif

4113
kernel/arch/x86/kvm/vmx.c Normal file

File diff suppressed because it is too large Load Diff

5219
kernel/arch/x86/kvm/x86.c Normal file

File diff suppressed because it is too large Load Diff

38
kernel/arch/x86/kvm/x86.h Normal file
View File

@@ -0,0 +1,38 @@
#ifndef ARCH_X86_KVM_X86_H
#define ARCH_X86_KVM_X86_H
#include <linux/kvm_host.h>
static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
{
vcpu->arch.exception.pending = false;
}
static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
bool soft)
{
vcpu->arch.interrupt.pending = true;
vcpu->arch.interrupt.soft = soft;
vcpu->arch.interrupt.nr = vector;
}
static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
{
vcpu->arch.interrupt.pending = false;
}
static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
{
return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
vcpu->arch.nmi_injected;
}
static inline bool kvm_exception_is_soft(unsigned int nr)
{
return (nr == BP_VECTOR) || (nr == OF_VECTOR);
}
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
#endif