add idl4k kernel firmware version 1.13.0.105

This commit is contained in:
Jaroslav Kysela
2015-03-26 17:22:37 +01:00
parent 5194d2792e
commit e9070cdc77
31064 changed files with 12769984 additions and 0 deletions

View File

@@ -0,0 +1,63 @@
config XEN_BALLOON
bool "Xen memory balloon driver"
depends on XEN
default y
help
The balloon driver allows the Xen domain to request more memory from
the system to expand the domain's memory allocation, or alternatively
return unneeded memory to the system.
config XEN_SCRUB_PAGES
bool "Scrub pages before returning them to system"
depends on XEN_BALLOON
default y
help
Scrub pages before returning them to the system for reuse by
other domains. This makes sure that any confidential data
is not accidentally visible to other domains. Is it more
secure, but slightly less efficient.
If in doubt, say yes.
config XEN_DEV_EVTCHN
tristate "Xen /dev/xen/evtchn device"
depends on XEN
default y
help
The evtchn driver allows a userspace process to triger event
channels and to receive notification of an event channel
firing.
If in doubt, say yes.
config XENFS
tristate "Xen filesystem"
depends on XEN
default y
help
The xen filesystem provides a way for domains to share
information with each other and with the hypervisor.
For example, by reading and writing the "xenbus" file, guests
may pass arbitrary information to the initial domain.
If in doubt, say yes.
config XEN_COMPAT_XENFS
bool "Create compatibility mount point /proc/xen"
depends on XENFS
default y
help
The old xenstore userspace tools expect to find "xenbus"
under /proc/xen, but "xenbus" is now found at the root of the
xenfs filesystem. Selecting this causes the kernel to create
the compatibility mount point /proc/xen if it is running on
a xen platform.
If in doubt, say yes.
config XEN_SYS_HYPERVISOR
bool "Create xen entries under /sys/hypervisor"
depends on XEN && SYSFS
select SYS_HYPERVISOR
default y
help
Create entries under /sys/hypervisor describing the Xen
hypervisor environment. When running native or in another
virtual environment, /sys/hypervisor will still be present,
but will have no xen contents.

View File

@@ -0,0 +1,12 @@
obj-y += grant-table.o features.o events.o manage.o
obj-y += xenbus/
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_features.o := $(nostackp)
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += balloon.o
obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o
obj-$(CONFIG_XENFS) += xenfs/
obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o

View File

@@ -0,0 +1,578 @@
/******************************************************************************
* balloon.c
*
* Xen balloon driver - enables returning/claiming memory to/from Xen.
*
* Copyright (c) 2003, B Dragovic
* Copyright (c) 2003-2004, M Williamson, K Fraser
* Copyright (c) 2005 Dan M. Smith, IBM Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/bootmem.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/sysdev.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
#include <asm/tlb.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
#include <xen/interface/xen.h>
#include <xen/interface/memory.h>
#include <xen/xenbus.h>
#include <xen/features.h>
#include <xen/page.h>
#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
#define BALLOON_CLASS_NAME "xen_memory"
struct balloon_stats {
/* We aim for 'current allocation' == 'target allocation'. */
unsigned long current_pages;
unsigned long target_pages;
/*
* Drivers may alter the memory reservation independently, but they
* must inform the balloon driver so we avoid hitting the hard limit.
*/
unsigned long driver_pages;
/* Number of pages in high- and low-memory balloons. */
unsigned long balloon_low;
unsigned long balloon_high;
};
static DEFINE_MUTEX(balloon_mutex);
static struct sys_device balloon_sysdev;
static int register_balloon(struct sys_device *sysdev);
/*
* Protects atomic reservation decrease/increase against concurrent increases.
* Also protects non-atomic updates of current_pages and driver_pages, and
* balloon lists.
*/
static DEFINE_SPINLOCK(balloon_lock);
static struct balloon_stats balloon_stats;
/* We increase/decrease in batches which fit in a page */
static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
#ifdef CONFIG_HIGHMEM
#define inc_totalhigh_pages() (totalhigh_pages++)
#define dec_totalhigh_pages() (totalhigh_pages--)
#else
#define inc_totalhigh_pages() do {} while(0)
#define dec_totalhigh_pages() do {} while(0)
#endif
/* List of ballooned pages, threaded through the mem_map array. */
static LIST_HEAD(ballooned_pages);
/* Main work function, always executed in process context. */
static void balloon_process(struct work_struct *work);
static DECLARE_WORK(balloon_worker, balloon_process);
static struct timer_list balloon_timer;
/* When ballooning out (allocating memory to return to Xen) we don't really
want the kernel to try too hard since that can trigger the oom killer. */
#define GFP_BALLOON \
(GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
static void scrub_page(struct page *page)
{
#ifdef CONFIG_XEN_SCRUB_PAGES
clear_highpage(page);
#endif
}
/* balloon_append: add the given page to the balloon. */
static void balloon_append(struct page *page)
{
/* Lowmem is re-populated first, so highmem pages go at list tail. */
if (PageHighMem(page)) {
list_add_tail(&page->lru, &ballooned_pages);
balloon_stats.balloon_high++;
dec_totalhigh_pages();
} else {
list_add(&page->lru, &ballooned_pages);
balloon_stats.balloon_low++;
}
totalram_pages--;
}
/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
static struct page *balloon_retrieve(void)
{
struct page *page;
if (list_empty(&ballooned_pages))
return NULL;
page = list_entry(ballooned_pages.next, struct page, lru);
list_del(&page->lru);
if (PageHighMem(page)) {
balloon_stats.balloon_high--;
inc_totalhigh_pages();
}
else
balloon_stats.balloon_low--;
totalram_pages++;
return page;
}
static struct page *balloon_first_page(void)
{
if (list_empty(&ballooned_pages))
return NULL;
return list_entry(ballooned_pages.next, struct page, lru);
}
static struct page *balloon_next_page(struct page *page)
{
struct list_head *next = page->lru.next;
if (next == &ballooned_pages)
return NULL;
return list_entry(next, struct page, lru);
}
static void balloon_alarm(unsigned long unused)
{
schedule_work(&balloon_worker);
}
static unsigned long current_target(void)
{
unsigned long target = balloon_stats.target_pages;
target = min(target,
balloon_stats.current_pages +
balloon_stats.balloon_low +
balloon_stats.balloon_high);
return target;
}
static int increase_reservation(unsigned long nr_pages)
{
unsigned long pfn, i, flags;
struct page *page;
long rc;
struct xen_memory_reservation reservation = {
.address_bits = 0,
.extent_order = 0,
.domid = DOMID_SELF
};
if (nr_pages > ARRAY_SIZE(frame_list))
nr_pages = ARRAY_SIZE(frame_list);
spin_lock_irqsave(&balloon_lock, flags);
page = balloon_first_page();
for (i = 0; i < nr_pages; i++) {
BUG_ON(page == NULL);
frame_list[i] = page_to_pfn(page);
page = balloon_next_page(page);
}
set_xen_guest_handle(reservation.extent_start, frame_list);
reservation.nr_extents = nr_pages;
rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
if (rc < 0)
goto out;
for (i = 0; i < rc; i++) {
page = balloon_retrieve();
BUG_ON(page == NULL);
pfn = page_to_pfn(page);
BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
phys_to_machine_mapping_valid(pfn));
set_phys_to_machine(pfn, frame_list[i]);
/* Link back into the page tables if not highmem. */
if (pfn < max_low_pfn) {
int ret;
ret = HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
mfn_pte(frame_list[i], PAGE_KERNEL),
0);
BUG_ON(ret);
}
/* Relinquish the page back to the allocator. */
ClearPageReserved(page);
init_page_count(page);
__free_page(page);
}
balloon_stats.current_pages += rc;
out:
spin_unlock_irqrestore(&balloon_lock, flags);
return rc < 0 ? rc : rc != nr_pages;
}
static int decrease_reservation(unsigned long nr_pages)
{
unsigned long pfn, i, flags;
struct page *page;
int need_sleep = 0;
int ret;
struct xen_memory_reservation reservation = {
.address_bits = 0,
.extent_order = 0,
.domid = DOMID_SELF
};
if (nr_pages > ARRAY_SIZE(frame_list))
nr_pages = ARRAY_SIZE(frame_list);
for (i = 0; i < nr_pages; i++) {
if ((page = alloc_page(GFP_BALLOON)) == NULL) {
nr_pages = i;
need_sleep = 1;
break;
}
pfn = page_to_pfn(page);
frame_list[i] = pfn_to_mfn(pfn);
scrub_page(page);
if (!PageHighMem(page)) {
ret = HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
__pte_ma(0), 0);
BUG_ON(ret);
}
}
/* Ensure that ballooned highmem pages don't have kmaps. */
kmap_flush_unused();
flush_tlb_all();
spin_lock_irqsave(&balloon_lock, flags);
/* No more mappings: invalidate P2M and add to balloon. */
for (i = 0; i < nr_pages; i++) {
pfn = mfn_to_pfn(frame_list[i]);
set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
balloon_append(pfn_to_page(pfn));
}
set_xen_guest_handle(reservation.extent_start, frame_list);
reservation.nr_extents = nr_pages;
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
BUG_ON(ret != nr_pages);
balloon_stats.current_pages -= nr_pages;
spin_unlock_irqrestore(&balloon_lock, flags);
return need_sleep;
}
/*
* We avoid multiple worker processes conflicting via the balloon mutex.
* We may of course race updates of the target counts (which are protected
* by the balloon lock), or with changes to the Xen hard limit, but we will
* recover from these in time.
*/
static void balloon_process(struct work_struct *work)
{
int need_sleep = 0;
long credit;
mutex_lock(&balloon_mutex);
do {
credit = current_target() - balloon_stats.current_pages;
if (credit > 0)
need_sleep = (increase_reservation(credit) != 0);
if (credit < 0)
need_sleep = (decrease_reservation(-credit) != 0);
#ifndef CONFIG_PREEMPT
if (need_resched())
schedule();
#endif
} while ((credit != 0) && !need_sleep);
/* Schedule more work if there is some still to be done. */
if (current_target() != balloon_stats.current_pages)
mod_timer(&balloon_timer, jiffies + HZ);
mutex_unlock(&balloon_mutex);
}
/* Resets the Xen limit, sets new target, and kicks off processing. */
static void balloon_set_new_target(unsigned long target)
{
/* No need for lock. Not read-modify-write updates. */
balloon_stats.target_pages = target;
schedule_work(&balloon_worker);
}
static struct xenbus_watch target_watch =
{
.node = "memory/target"
};
/* React to a change in the target key */
static void watch_target(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
unsigned long long new_target;
int err;
err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
if (err != 1) {
/* This is ok (for domain0 at least) - so just return */
return;
}
/* The given memory/target value is in KiB, so it needs converting to
* pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
*/
balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
}
static int balloon_init_watcher(struct notifier_block *notifier,
unsigned long event,
void *data)
{
int err;
err = register_xenbus_watch(&target_watch);
if (err)
printk(KERN_ERR "Failed to set balloon watcher\n");
return NOTIFY_DONE;
}
static struct notifier_block xenstore_notifier;
static int __init balloon_init(void)
{
unsigned long pfn;
struct page *page;
if (!xen_pv_domain())
return -ENODEV;
pr_info("xen_balloon: Initialising balloon driver.\n");
balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
balloon_stats.target_pages = balloon_stats.current_pages;
balloon_stats.balloon_low = 0;
balloon_stats.balloon_high = 0;
balloon_stats.driver_pages = 0UL;
init_timer(&balloon_timer);
balloon_timer.data = 0;
balloon_timer.function = balloon_alarm;
register_balloon(&balloon_sysdev);
/* Initialise the balloon with excess memory space. */
for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
page = pfn_to_page(pfn);
if (!PageReserved(page))
balloon_append(page);
}
target_watch.callback = watch_target;
xenstore_notifier.notifier_call = balloon_init_watcher;
register_xenstore_notifier(&xenstore_notifier);
return 0;
}
subsys_initcall(balloon_init);
static void balloon_exit(void)
{
/* XXX - release balloon here */
return;
}
module_exit(balloon_exit);
#define BALLOON_SHOW(name, format, args...) \
static ssize_t show_##name(struct sys_device *dev, \
struct sysdev_attribute *attr, \
char *buf) \
{ \
return sprintf(buf, format, ##args); \
} \
static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
char *buf)
{
return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
}
static ssize_t store_target_kb(struct sys_device *dev,
struct sysdev_attribute *attr,
const char *buf,
size_t count)
{
char *endchar;
unsigned long long target_bytes;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
balloon_set_new_target(target_bytes >> PAGE_SHIFT);
return count;
}
static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
show_target_kb, store_target_kb);
static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr,
char *buf)
{
return sprintf(buf, "%llu\n",
(unsigned long long)balloon_stats.target_pages
<< PAGE_SHIFT);
}
static ssize_t store_target(struct sys_device *dev,
struct sysdev_attribute *attr,
const char *buf,
size_t count)
{
char *endchar;
unsigned long long target_bytes;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
target_bytes = memparse(buf, &endchar);
balloon_set_new_target(target_bytes >> PAGE_SHIFT);
return count;
}
static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR,
show_target, store_target);
static struct sysdev_attribute *balloon_attrs[] = {
&attr_target_kb,
&attr_target,
};
static struct attribute *balloon_info_attrs[] = {
&attr_current_kb.attr,
&attr_low_kb.attr,
&attr_high_kb.attr,
&attr_driver_kb.attr,
NULL
};
static struct attribute_group balloon_info_group = {
.name = "info",
.attrs = balloon_info_attrs,
};
static struct sysdev_class balloon_sysdev_class = {
.name = BALLOON_CLASS_NAME,
};
static int register_balloon(struct sys_device *sysdev)
{
int i, error;
error = sysdev_class_register(&balloon_sysdev_class);
if (error)
return error;
sysdev->id = 0;
sysdev->cls = &balloon_sysdev_class;
error = sysdev_register(sysdev);
if (error) {
sysdev_class_unregister(&balloon_sysdev_class);
return error;
}
for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
error = sysdev_create_file(sysdev, balloon_attrs[i]);
if (error)
goto fail;
}
error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
if (error)
goto fail;
return 0;
fail:
while (--i >= 0)
sysdev_remove_file(sysdev, balloon_attrs[i]);
sysdev_unregister(sysdev);
sysdev_class_unregister(&balloon_sysdev_class);
return error;
}
MODULE_LICENSE("GPL");

View File

@@ -0,0 +1,110 @@
#include <linux/notifier.h>
#include <xen/xenbus.h>
#include <asm/xen/hypervisor.h>
#include <asm/cpu.h>
static void enable_hotplug_cpu(int cpu)
{
if (!cpu_present(cpu))
arch_register_cpu(cpu);
set_cpu_present(cpu, true);
}
static void disable_hotplug_cpu(int cpu)
{
if (cpu_present(cpu))
arch_unregister_cpu(cpu);
set_cpu_present(cpu, false);
}
static int vcpu_online(unsigned int cpu)
{
int err;
char dir[32], state[32];
sprintf(dir, "cpu/%u", cpu);
err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
if (err != 1) {
printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
return err;
}
if (strcmp(state, "online") == 0)
return 1;
else if (strcmp(state, "offline") == 0)
return 0;
printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", state, cpu);
return -EINVAL;
}
static void vcpu_hotplug(unsigned int cpu)
{
if (!cpu_possible(cpu))
return;
switch (vcpu_online(cpu)) {
case 1:
enable_hotplug_cpu(cpu);
break;
case 0:
(void)cpu_down(cpu);
disable_hotplug_cpu(cpu);
break;
default:
break;
}
}
static void handle_vcpu_hotplug_event(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
unsigned int cpu;
char *cpustr;
const char *node = vec[XS_WATCH_PATH];
cpustr = strstr(node, "cpu/");
if (cpustr != NULL) {
sscanf(cpustr, "cpu/%u", &cpu);
vcpu_hotplug(cpu);
}
}
static int setup_cpu_watcher(struct notifier_block *notifier,
unsigned long event, void *data)
{
int cpu;
static struct xenbus_watch cpu_watch = {
.node = "cpu",
.callback = handle_vcpu_hotplug_event};
(void)register_xenbus_watch(&cpu_watch);
for_each_possible_cpu(cpu) {
if (vcpu_online(cpu) == 0) {
(void)cpu_down(cpu);
cpu_clear(cpu, cpu_present_map);
}
}
return NOTIFY_DONE;
}
static int __init setup_vcpu_hotplug_event(void)
{
static struct notifier_block xsn_cpu = {
.notifier_call = setup_cpu_watcher };
if (!xen_pv_domain())
return -ENODEV;
register_xenstore_notifier(&xsn_cpu);
return 0;
}
arch_initcall(setup_vcpu_hotplug_event);

951
kernel/drivers/xen/events.c Normal file
View File

@@ -0,0 +1,951 @@
/*
* Xen event channels
*
* Xen models interrupts with abstract event channels. Because each
* domain gets 1024 event channels, but NR_IRQ is not that large, we
* must dynamically map irqs<->event channels. The event channels
* interface with the rest of the kernel by defining a xen interrupt
* chip. When an event is recieved, it is mapped to an irq and sent
* through the normal interrupt processing path.
*
* There are four kinds of events which can be mapped to an event
* channel:
*
* 1. Inter-domain notifications. This includes all the virtual
* device events, since they're driven by front-ends in another domain
* (typically dom0).
* 2. VIRQs, typically used for timers. These are per-cpu events.
* 3. IPIs.
* 4. Hardware interrupts. Not supported at present.
*
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
*/
#include <linux/linkage.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/bootmem.h>
#include <asm/ptrace.h>
#include <asm/irq.h>
#include <asm/idle.h>
#include <asm/sync_bitops.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
#include <xen/xen-ops.h>
#include <xen/events.h>
#include <xen/interface/xen.h>
#include <xen/interface/event_channel.h>
/*
* This lock protects updates to the following mapping and reference-count
* arrays. The lock does not need to be acquired to read the mapping tables.
*/
static DEFINE_SPINLOCK(irq_mapping_update_lock);
/* IRQ <-> VIRQ mapping. */
static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
/* IRQ <-> IPI mapping */
static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
/* Interrupt types. */
enum xen_irq_type {
IRQT_UNBOUND = 0,
IRQT_PIRQ,
IRQT_VIRQ,
IRQT_IPI,
IRQT_EVTCHN
};
/*
* Packed IRQ information:
* type - enum xen_irq_type
* event channel - irq->event channel mapping
* cpu - cpu this event channel is bound to
* index - type-specific information:
* PIRQ - vector, with MSB being "needs EIO"
* VIRQ - virq number
* IPI - IPI vector
* EVTCHN -
*/
struct irq_info
{
enum xen_irq_type type; /* type */
unsigned short evtchn; /* event channel */
unsigned short cpu; /* cpu bound */
union {
unsigned short virq;
enum ipi_vector ipi;
struct {
unsigned short gsi;
unsigned short vector;
} pirq;
} u;
};
static struct irq_info irq_info[NR_IRQS];
static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
[0 ... NR_EVENT_CHANNELS-1] = -1
};
struct cpu_evtchn_s {
unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG];
};
static struct cpu_evtchn_s *cpu_evtchn_mask_p;
static inline unsigned long *cpu_evtchn_mask(int cpu)
{
return cpu_evtchn_mask_p[cpu].bits;
}
/* Xen will never allocate port zero for any purpose. */
#define VALID_EVTCHN(chn) ((chn) != 0)
static struct irq_chip xen_dynamic_chip;
static struct irq_chip xen_percpu_chip;
/* Constructor for packed IRQ information. */
static struct irq_info mk_unbound_info(void)
{
return (struct irq_info) { .type = IRQT_UNBOUND };
}
static struct irq_info mk_evtchn_info(unsigned short evtchn)
{
return (struct irq_info) { .type = IRQT_EVTCHN, .evtchn = evtchn,
.cpu = 0 };
}
static struct irq_info mk_ipi_info(unsigned short evtchn, enum ipi_vector ipi)
{
return (struct irq_info) { .type = IRQT_IPI, .evtchn = evtchn,
.cpu = 0, .u.ipi = ipi };
}
static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq)
{
return (struct irq_info) { .type = IRQT_VIRQ, .evtchn = evtchn,
.cpu = 0, .u.virq = virq };
}
static struct irq_info mk_pirq_info(unsigned short evtchn,
unsigned short gsi, unsigned short vector)
{
return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn,
.cpu = 0, .u.pirq = { .gsi = gsi, .vector = vector } };
}
/*
* Accessors for packed IRQ information.
*/
static struct irq_info *info_for_irq(unsigned irq)
{
return &irq_info[irq];
}
static unsigned int evtchn_from_irq(unsigned irq)
{
return info_for_irq(irq)->evtchn;
}
unsigned irq_from_evtchn(unsigned int evtchn)
{
return evtchn_to_irq[evtchn];
}
EXPORT_SYMBOL_GPL(irq_from_evtchn);
static enum ipi_vector ipi_from_irq(unsigned irq)
{
struct irq_info *info = info_for_irq(irq);
BUG_ON(info == NULL);
BUG_ON(info->type != IRQT_IPI);
return info->u.ipi;
}
static unsigned virq_from_irq(unsigned irq)
{
struct irq_info *info = info_for_irq(irq);
BUG_ON(info == NULL);
BUG_ON(info->type != IRQT_VIRQ);
return info->u.virq;
}
static unsigned gsi_from_irq(unsigned irq)
{
struct irq_info *info = info_for_irq(irq);
BUG_ON(info == NULL);
BUG_ON(info->type != IRQT_PIRQ);
return info->u.pirq.gsi;
}
static unsigned vector_from_irq(unsigned irq)
{
struct irq_info *info = info_for_irq(irq);
BUG_ON(info == NULL);
BUG_ON(info->type != IRQT_PIRQ);
return info->u.pirq.vector;
}
static enum xen_irq_type type_from_irq(unsigned irq)
{
return info_for_irq(irq)->type;
}
static unsigned cpu_from_irq(unsigned irq)
{
return info_for_irq(irq)->cpu;
}
static unsigned int cpu_from_evtchn(unsigned int evtchn)
{
int irq = evtchn_to_irq[evtchn];
unsigned ret = 0;
if (irq != -1)
ret = cpu_from_irq(irq);
return ret;
}
static inline unsigned long active_evtchns(unsigned int cpu,
struct shared_info *sh,
unsigned int idx)
{
return (sh->evtchn_pending[idx] &
cpu_evtchn_mask(cpu)[idx] &
~sh->evtchn_mask[idx]);
}
static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
{
int irq = evtchn_to_irq[chn];
BUG_ON(irq == -1);
#ifdef CONFIG_SMP
cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu));
#endif
__clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
__set_bit(chn, cpu_evtchn_mask(cpu));
irq_info[irq].cpu = cpu;
}
static void init_evtchn_cpu_bindings(void)
{
#ifdef CONFIG_SMP
struct irq_desc *desc;
int i;
/* By default all event channels notify CPU#0. */
for_each_irq_desc(i, desc) {
cpumask_copy(desc->affinity, cpumask_of(0));
}
#endif
memset(cpu_evtchn_mask(0), ~0, sizeof(struct cpu_evtchn_s));
}
static inline void clear_evtchn(int port)
{
struct shared_info *s = HYPERVISOR_shared_info;
sync_clear_bit(port, &s->evtchn_pending[0]);
}
static inline void set_evtchn(int port)
{
struct shared_info *s = HYPERVISOR_shared_info;
sync_set_bit(port, &s->evtchn_pending[0]);
}
static inline int test_evtchn(int port)
{
struct shared_info *s = HYPERVISOR_shared_info;
return sync_test_bit(port, &s->evtchn_pending[0]);
}
/**
* notify_remote_via_irq - send event to remote end of event channel via irq
* @irq: irq of event channel to send event to
*
* Unlike notify_remote_via_evtchn(), this is safe to use across
* save/restore. Notifications on a broken connection are silently
* dropped.
*/
void notify_remote_via_irq(int irq)
{
int evtchn = evtchn_from_irq(irq);
if (VALID_EVTCHN(evtchn))
notify_remote_via_evtchn(evtchn);
}
EXPORT_SYMBOL_GPL(notify_remote_via_irq);
static void mask_evtchn(int port)
{
struct shared_info *s = HYPERVISOR_shared_info;
sync_set_bit(port, &s->evtchn_mask[0]);
}
static void unmask_evtchn(int port)
{
struct shared_info *s = HYPERVISOR_shared_info;
unsigned int cpu = get_cpu();
BUG_ON(!irqs_disabled());
/* Slow path (hypercall) if this is a non-local port. */
if (unlikely(cpu != cpu_from_evtchn(port))) {
struct evtchn_unmask unmask = { .port = port };
(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
} else {
struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
sync_clear_bit(port, &s->evtchn_mask[0]);
/*
* The following is basically the equivalent of
* 'hw_resend_irq'. Just like a real IO-APIC we 'lose
* the interrupt edge' if the channel is masked.
*/
if (sync_test_bit(port, &s->evtchn_pending[0]) &&
!sync_test_and_set_bit(port / BITS_PER_LONG,
&vcpu_info->evtchn_pending_sel))
vcpu_info->evtchn_upcall_pending = 1;
}
put_cpu();
}
static int find_unbound_irq(void)
{
int irq;
struct irq_desc *desc;
for (irq = 0; irq < nr_irqs; irq++)
if (irq_info[irq].type == IRQT_UNBOUND)
break;
if (irq == nr_irqs)
panic("No available IRQ to bind to: increase nr_irqs!\n");
desc = irq_to_desc_alloc_node(irq, 0);
if (WARN_ON(desc == NULL))
return -1;
dynamic_irq_init(irq);
return irq;
}
int bind_evtchn_to_irq(unsigned int evtchn)
{
int irq;
spin_lock(&irq_mapping_update_lock);
irq = evtchn_to_irq[evtchn];
if (irq == -1) {
irq = find_unbound_irq();
set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
handle_edge_irq, "event");
evtchn_to_irq[evtchn] = irq;
irq_info[irq] = mk_evtchn_info(evtchn);
}
spin_unlock(&irq_mapping_update_lock);
return irq;
}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
int evtchn, irq;
spin_lock(&irq_mapping_update_lock);
irq = per_cpu(ipi_to_irq, cpu)[ipi];
if (irq == -1) {
irq = find_unbound_irq();
if (irq < 0)
goto out;
set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
handle_percpu_irq, "ipi");
bind_ipi.vcpu = cpu;
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
&bind_ipi) != 0)
BUG();
evtchn = bind_ipi.port;
evtchn_to_irq[evtchn] = irq;
irq_info[irq] = mk_ipi_info(evtchn, ipi);
per_cpu(ipi_to_irq, cpu)[ipi] = irq;
bind_evtchn_to_cpu(evtchn, cpu);
}
out:
spin_unlock(&irq_mapping_update_lock);
return irq;
}
static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
{
struct evtchn_bind_virq bind_virq;
int evtchn, irq;
spin_lock(&irq_mapping_update_lock);
irq = per_cpu(virq_to_irq, cpu)[virq];
if (irq == -1) {
bind_virq.virq = virq;
bind_virq.vcpu = cpu;
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
&bind_virq) != 0)
BUG();
evtchn = bind_virq.port;
irq = find_unbound_irq();
set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
handle_percpu_irq, "virq");
evtchn_to_irq[evtchn] = irq;
irq_info[irq] = mk_virq_info(evtchn, virq);
per_cpu(virq_to_irq, cpu)[virq] = irq;
bind_evtchn_to_cpu(evtchn, cpu);
}
spin_unlock(&irq_mapping_update_lock);
return irq;
}
static void unbind_from_irq(unsigned int irq)
{
struct evtchn_close close;
int evtchn = evtchn_from_irq(irq);
spin_lock(&irq_mapping_update_lock);
if (VALID_EVTCHN(evtchn)) {
close.port = evtchn;
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
BUG();
switch (type_from_irq(irq)) {
case IRQT_VIRQ:
per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
[virq_from_irq(irq)] = -1;
break;
case IRQT_IPI:
per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
[ipi_from_irq(irq)] = -1;
break;
default:
break;
}
/* Closed ports are implicitly re-bound to VCPU0. */
bind_evtchn_to_cpu(evtchn, 0);
evtchn_to_irq[evtchn] = -1;
}
if (irq_info[irq].type != IRQT_UNBOUND) {
irq_info[irq] = mk_unbound_info();
dynamic_irq_cleanup(irq);
}
spin_unlock(&irq_mapping_update_lock);
}
int bind_evtchn_to_irqhandler(unsigned int evtchn,
irq_handler_t handler,
unsigned long irqflags,
const char *devname, void *dev_id)
{
unsigned int irq;
int retval;
irq = bind_evtchn_to_irq(evtchn);
retval = request_irq(irq, handler, irqflags, devname, dev_id);
if (retval != 0) {
unbind_from_irq(irq);
return retval;
}
return irq;
}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
irq_handler_t handler,
unsigned long irqflags, const char *devname, void *dev_id)
{
unsigned int irq;
int retval;
irq = bind_virq_to_irq(virq, cpu);
retval = request_irq(irq, handler, irqflags, devname, dev_id);
if (retval != 0) {
unbind_from_irq(irq);
return retval;
}
return irq;
}
EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
int bind_ipi_to_irqhandler(enum ipi_vector ipi,
unsigned int cpu,
irq_handler_t handler,
unsigned long irqflags,
const char *devname,
void *dev_id)
{
int irq, retval;
irq = bind_ipi_to_irq(ipi, cpu);
if (irq < 0)
return irq;
irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME;
retval = request_irq(irq, handler, irqflags, devname, dev_id);
if (retval != 0) {
unbind_from_irq(irq);
return retval;
}
return irq;
}
void unbind_from_irqhandler(unsigned int irq, void *dev_id)
{
free_irq(irq, dev_id);
unbind_from_irq(irq);
}
EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
{
int irq = per_cpu(ipi_to_irq, cpu)[vector];
BUG_ON(irq < 0);
notify_remote_via_irq(irq);
}
irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
{
struct shared_info *sh = HYPERVISOR_shared_info;
int cpu = smp_processor_id();
int i;
unsigned long flags;
static DEFINE_SPINLOCK(debug_lock);
spin_lock_irqsave(&debug_lock, flags);
printk("vcpu %d\n ", cpu);
for_each_online_cpu(i) {
struct vcpu_info *v = per_cpu(xen_vcpu, i);
printk("%d: masked=%d pending=%d event_sel %08lx\n ", i,
(get_irq_regs() && i == cpu) ? xen_irqs_disabled(get_irq_regs()) : v->evtchn_upcall_mask,
v->evtchn_upcall_pending,
v->evtchn_pending_sel);
}
printk("pending:\n ");
for(i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
printk("%08lx%s", sh->evtchn_pending[i],
i % 8 == 0 ? "\n " : " ");
printk("\nmasks:\n ");
for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
printk("%08lx%s", sh->evtchn_mask[i],
i % 8 == 0 ? "\n " : " ");
printk("\nunmasked:\n ");
for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
printk("%08lx%s", sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
i % 8 == 0 ? "\n " : " ");
printk("\npending list:\n");
for(i = 0; i < NR_EVENT_CHANNELS; i++) {
if (sync_test_bit(i, sh->evtchn_pending)) {
printk(" %d: event %d -> irq %d\n",
cpu_from_evtchn(i), i,
evtchn_to_irq[i]);
}
}
spin_unlock_irqrestore(&debug_lock, flags);
return IRQ_HANDLED;
}
static DEFINE_PER_CPU(unsigned, xed_nesting_count);
/*
* Search the CPUs pending events bitmasks. For each one found, map
* the event number to an irq, and feed it into do_IRQ() for
* handling.
*
* Xen uses a two-level bitmap to speed searching. The first level is
* a bitset of words which contain pending event bits. The second
* level is a bitset of pending events themselves.
*/
void xen_evtchn_do_upcall(struct pt_regs *regs)
{
int cpu = get_cpu();
struct pt_regs *old_regs = set_irq_regs(regs);
struct shared_info *s = HYPERVISOR_shared_info;
struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
unsigned count;
exit_idle();
irq_enter();
do {
unsigned long pending_words;
vcpu_info->evtchn_upcall_pending = 0;
if (__get_cpu_var(xed_nesting_count)++)
goto out;
#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
/* Clear master flag /before/ clearing selector flag. */
wmb();
#endif
pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
while (pending_words != 0) {
unsigned long pending_bits;
int word_idx = __ffs(pending_words);
pending_words &= ~(1UL << word_idx);
while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
int bit_idx = __ffs(pending_bits);
int port = (word_idx * BITS_PER_LONG) + bit_idx;
int irq = evtchn_to_irq[port];
if (irq != -1)
handle_irq(irq, regs);
}
}
BUG_ON(!irqs_disabled());
count = __get_cpu_var(xed_nesting_count);
__get_cpu_var(xed_nesting_count) = 0;
} while(count != 1);
out:
irq_exit();
set_irq_regs(old_regs);
put_cpu();
}
/* Rebind a new event channel to an existing irq. */
void rebind_evtchn_irq(int evtchn, int irq)
{
struct irq_info *info = info_for_irq(irq);
/* Make sure the irq is masked, since the new event channel
will also be masked. */
disable_irq(irq);
spin_lock(&irq_mapping_update_lock);
/* After resume the irq<->evtchn mappings are all cleared out */
BUG_ON(evtchn_to_irq[evtchn] != -1);
/* Expect irq to have been bound before,
so there should be a proper type */
BUG_ON(info->type == IRQT_UNBOUND);
evtchn_to_irq[evtchn] = irq;
irq_info[irq] = mk_evtchn_info(evtchn);
spin_unlock(&irq_mapping_update_lock);
/* new event channels are always bound to cpu 0 */
irq_set_affinity(irq, cpumask_of(0));
/* Unmask the event channel. */
enable_irq(irq);
}
/* Rebind an evtchn so that it gets delivered to a specific cpu */
static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
{
struct evtchn_bind_vcpu bind_vcpu;
int evtchn = evtchn_from_irq(irq);
if (!VALID_EVTCHN(evtchn))
return -1;
/* Send future instances of this interrupt to other vcpu. */
bind_vcpu.port = evtchn;
bind_vcpu.vcpu = tcpu;
/*
* If this fails, it usually just indicates that we're dealing with a
* virq or IPI channel, which don't actually need to be rebound. Ignore
* it, but don't do the xenlinux-level rebind in that case.
*/
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
bind_evtchn_to_cpu(evtchn, tcpu);
return 0;
}
static int set_affinity_irq(unsigned irq, const struct cpumask *dest)
{
unsigned tcpu = cpumask_first(dest);
return rebind_irq_to_cpu(irq, tcpu);
}
int resend_irq_on_evtchn(unsigned int irq)
{
int masked, evtchn = evtchn_from_irq(irq);
struct shared_info *s = HYPERVISOR_shared_info;
if (!VALID_EVTCHN(evtchn))
return 1;
masked = sync_test_and_set_bit(evtchn, s->evtchn_mask);
sync_set_bit(evtchn, s->evtchn_pending);
if (!masked)
unmask_evtchn(evtchn);
return 1;
}
static void enable_dynirq(unsigned int irq)
{
int evtchn = evtchn_from_irq(irq);
if (VALID_EVTCHN(evtchn))
unmask_evtchn(evtchn);
}
static void disable_dynirq(unsigned int irq)
{
int evtchn = evtchn_from_irq(irq);
if (VALID_EVTCHN(evtchn))
mask_evtchn(evtchn);
}
static void ack_dynirq(unsigned int irq)
{
int evtchn = evtchn_from_irq(irq);
move_native_irq(irq);
if (VALID_EVTCHN(evtchn))
clear_evtchn(evtchn);
}
static int retrigger_dynirq(unsigned int irq)
{
int evtchn = evtchn_from_irq(irq);
struct shared_info *sh = HYPERVISOR_shared_info;
int ret = 0;
if (VALID_EVTCHN(evtchn)) {
int masked;
masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask);
sync_set_bit(evtchn, sh->evtchn_pending);
if (!masked)
unmask_evtchn(evtchn);
ret = 1;
}
return ret;
}
static void restore_cpu_virqs(unsigned int cpu)
{
struct evtchn_bind_virq bind_virq;
int virq, irq, evtchn;
for (virq = 0; virq < NR_VIRQS; virq++) {
if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
continue;
BUG_ON(virq_from_irq(irq) != virq);
/* Get a new binding from Xen. */
bind_virq.virq = virq;
bind_virq.vcpu = cpu;
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
&bind_virq) != 0)
BUG();
evtchn = bind_virq.port;
/* Record the new mapping. */
evtchn_to_irq[evtchn] = irq;
irq_info[irq] = mk_virq_info(evtchn, virq);
bind_evtchn_to_cpu(evtchn, cpu);
}
}
static void restore_cpu_ipis(unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
int ipi, irq, evtchn;
for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
continue;
BUG_ON(ipi_from_irq(irq) != ipi);
/* Get a new binding from Xen. */
bind_ipi.vcpu = cpu;
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
&bind_ipi) != 0)
BUG();
evtchn = bind_ipi.port;
/* Record the new mapping. */
evtchn_to_irq[evtchn] = irq;
irq_info[irq] = mk_ipi_info(evtchn, ipi);
bind_evtchn_to_cpu(evtchn, cpu);
}
}
/* Clear an irq's pending state, in preparation for polling on it */
void xen_clear_irq_pending(int irq)
{
int evtchn = evtchn_from_irq(irq);
if (VALID_EVTCHN(evtchn))
clear_evtchn(evtchn);
}
void xen_set_irq_pending(int irq)
{
int evtchn = evtchn_from_irq(irq);
if (VALID_EVTCHN(evtchn))
set_evtchn(evtchn);
}
bool xen_test_irq_pending(int irq)
{
int evtchn = evtchn_from_irq(irq);
bool ret = false;
if (VALID_EVTCHN(evtchn))
ret = test_evtchn(evtchn);
return ret;
}
/* Poll waiting for an irq to become pending. In the usual case, the
irq will be disabled so it won't deliver an interrupt. */
void xen_poll_irq(int irq)
{
evtchn_port_t evtchn = evtchn_from_irq(irq);
if (VALID_EVTCHN(evtchn)) {
struct sched_poll poll;
poll.nr_ports = 1;
poll.timeout = 0;
set_xen_guest_handle(poll.ports, &evtchn);
if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
BUG();
}
}
void xen_irq_resume(void)
{
unsigned int cpu, irq, evtchn;
init_evtchn_cpu_bindings();
/* New event-channel space is not 'live' yet. */
for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
mask_evtchn(evtchn);
/* No IRQ <-> event-channel mappings. */
for (irq = 0; irq < nr_irqs; irq++)
irq_info[irq].evtchn = 0; /* zap event-channel binding */
for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
evtchn_to_irq[evtchn] = -1;
for_each_possible_cpu(cpu) {
restore_cpu_virqs(cpu);
restore_cpu_ipis(cpu);
}
}
static struct irq_chip xen_dynamic_chip __read_mostly = {
.name = "xen-dyn",
.disable = disable_dynirq,
.mask = disable_dynirq,
.unmask = enable_dynirq,
.ack = ack_dynirq,
.set_affinity = set_affinity_irq,
.retrigger = retrigger_dynirq,
};
static struct irq_chip xen_percpu_chip __read_mostly = {
.name = "xen-percpu",
.disable = disable_dynirq,
.mask = disable_dynirq,
.unmask = enable_dynirq,
.ack = ack_dynirq,
};
void __init xen_init_IRQ(void)
{
int i;
cpu_evtchn_mask_p = kcalloc(nr_cpu_ids, sizeof(struct cpu_evtchn_s),
GFP_KERNEL);
BUG_ON(cpu_evtchn_mask_p == NULL);
init_evtchn_cpu_bindings();
/* No event channels are 'live' right now. */
for (i = 0; i < NR_EVENT_CHANNELS; i++)
mask_evtchn(i);
irq_ctx_init(smp_processor_id());
}

506
kernel/drivers/xen/evtchn.c Normal file
View File

@@ -0,0 +1,506 @@
/******************************************************************************
* evtchn.c
*
* Driver for receiving and demuxing event-channel signals.
*
* Copyright (c) 2004-2005, K A Fraser
* Multi-process extensions Copyright (c) 2004, Steven Smith
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/miscdevice.h>
#include <linux/major.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
#include <linux/poll.h>
#include <linux/irq.h>
#include <linux/init.h>
#include <linux/gfp.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
#include <xen/events.h>
#include <xen/evtchn.h>
#include <asm/xen/hypervisor.h>
struct per_user_data {
struct mutex bind_mutex; /* serialize bind/unbind operations */
/* Notification ring, accessed via /dev/xen/evtchn. */
#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
evtchn_port_t *ring;
unsigned int ring_cons, ring_prod, ring_overflow;
struct mutex ring_cons_mutex; /* protect against concurrent readers */
/* Processes wait on this queue when ring is empty. */
wait_queue_head_t evtchn_wait;
struct fasync_struct *evtchn_async_queue;
const char *name;
};
/* Who's bound to each port? */
static struct per_user_data *port_user[NR_EVENT_CHANNELS];
static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */
irqreturn_t evtchn_interrupt(int irq, void *data)
{
unsigned int port = (unsigned long)data;
struct per_user_data *u;
spin_lock(&port_user_lock);
u = port_user[port];
disable_irq_nosync(irq);
if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
wmb(); /* Ensure ring contents visible */
if (u->ring_cons == u->ring_prod++) {
wake_up_interruptible(&u->evtchn_wait);
kill_fasync(&u->evtchn_async_queue,
SIGIO, POLL_IN);
}
} else {
u->ring_overflow = 1;
}
spin_unlock(&port_user_lock);
return IRQ_HANDLED;
}
static ssize_t evtchn_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
int rc;
unsigned int c, p, bytes1 = 0, bytes2 = 0;
struct per_user_data *u = file->private_data;
/* Whole number of ports. */
count &= ~(sizeof(evtchn_port_t)-1);
if (count == 0)
return 0;
if (count > PAGE_SIZE)
count = PAGE_SIZE;
for (;;) {
mutex_lock(&u->ring_cons_mutex);
rc = -EFBIG;
if (u->ring_overflow)
goto unlock_out;
c = u->ring_cons;
p = u->ring_prod;
if (c != p)
break;
mutex_unlock(&u->ring_cons_mutex);
if (file->f_flags & O_NONBLOCK)
return -EAGAIN;
rc = wait_event_interruptible(u->evtchn_wait,
u->ring_cons != u->ring_prod);
if (rc)
return rc;
}
/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
sizeof(evtchn_port_t);
bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
} else {
bytes1 = (p - c) * sizeof(evtchn_port_t);
bytes2 = 0;
}
/* Truncate chunks according to caller's maximum byte count. */
if (bytes1 > count) {
bytes1 = count;
bytes2 = 0;
} else if ((bytes1 + bytes2) > count) {
bytes2 = count - bytes1;
}
rc = -EFAULT;
rmb(); /* Ensure that we see the port before we copy it. */
if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
((bytes2 != 0) &&
copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
goto unlock_out;
u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
rc = bytes1 + bytes2;
unlock_out:
mutex_unlock(&u->ring_cons_mutex);
return rc;
}
static ssize_t evtchn_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
int rc, i;
evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
struct per_user_data *u = file->private_data;
if (kbuf == NULL)
return -ENOMEM;
/* Whole number of ports. */
count &= ~(sizeof(evtchn_port_t)-1);
rc = 0;
if (count == 0)
goto out;
if (count > PAGE_SIZE)
count = PAGE_SIZE;
rc = -EFAULT;
if (copy_from_user(kbuf, buf, count) != 0)
goto out;
spin_lock_irq(&port_user_lock);
for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
enable_irq(irq_from_evtchn(kbuf[i]));
spin_unlock_irq(&port_user_lock);
rc = count;
out:
free_page((unsigned long)kbuf);
return rc;
}
static int evtchn_bind_to_user(struct per_user_data *u, int port)
{
int rc = 0;
/*
* Ports are never reused, so every caller should pass in a
* unique port.
*
* (Locking not necessary because we haven't registered the
* interrupt handler yet, and our caller has already
* serialized bind operations.)
*/
BUG_ON(port_user[port] != NULL);
port_user[port] = u;
rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
u->name, (void *)(unsigned long)port);
if (rc >= 0)
rc = 0;
return rc;
}
static void evtchn_unbind_from_user(struct per_user_data *u, int port)
{
int irq = irq_from_evtchn(port);
unbind_from_irqhandler(irq, (void *)(unsigned long)port);
/* make sure we unbind the irq handler before clearing the port */
barrier();
port_user[port] = NULL;
}
static long evtchn_ioctl(struct file *file,
unsigned int cmd, unsigned long arg)
{
int rc;
struct per_user_data *u = file->private_data;
void __user *uarg = (void __user *) arg;
/* Prevent bind from racing with unbind */
mutex_lock(&u->bind_mutex);
switch (cmd) {
case IOCTL_EVTCHN_BIND_VIRQ: {
struct ioctl_evtchn_bind_virq bind;
struct evtchn_bind_virq bind_virq;
rc = -EFAULT;
if (copy_from_user(&bind, uarg, sizeof(bind)))
break;
bind_virq.virq = bind.virq;
bind_virq.vcpu = 0;
rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
&bind_virq);
if (rc != 0)
break;
rc = evtchn_bind_to_user(u, bind_virq.port);
if (rc == 0)
rc = bind_virq.port;
break;
}
case IOCTL_EVTCHN_BIND_INTERDOMAIN: {
struct ioctl_evtchn_bind_interdomain bind;
struct evtchn_bind_interdomain bind_interdomain;
rc = -EFAULT;
if (copy_from_user(&bind, uarg, sizeof(bind)))
break;
bind_interdomain.remote_dom = bind.remote_domain;
bind_interdomain.remote_port = bind.remote_port;
rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
&bind_interdomain);
if (rc != 0)
break;
rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
if (rc == 0)
rc = bind_interdomain.local_port;
break;
}
case IOCTL_EVTCHN_BIND_UNBOUND_PORT: {
struct ioctl_evtchn_bind_unbound_port bind;
struct evtchn_alloc_unbound alloc_unbound;
rc = -EFAULT;
if (copy_from_user(&bind, uarg, sizeof(bind)))
break;
alloc_unbound.dom = DOMID_SELF;
alloc_unbound.remote_dom = bind.remote_domain;
rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
&alloc_unbound);
if (rc != 0)
break;
rc = evtchn_bind_to_user(u, alloc_unbound.port);
if (rc == 0)
rc = alloc_unbound.port;
break;
}
case IOCTL_EVTCHN_UNBIND: {
struct ioctl_evtchn_unbind unbind;
rc = -EFAULT;
if (copy_from_user(&unbind, uarg, sizeof(unbind)))
break;
rc = -EINVAL;
if (unbind.port >= NR_EVENT_CHANNELS)
break;
spin_lock_irq(&port_user_lock);
rc = -ENOTCONN;
if (port_user[unbind.port] != u) {
spin_unlock_irq(&port_user_lock);
break;
}
evtchn_unbind_from_user(u, unbind.port);
spin_unlock_irq(&port_user_lock);
rc = 0;
break;
}
case IOCTL_EVTCHN_NOTIFY: {
struct ioctl_evtchn_notify notify;
rc = -EFAULT;
if (copy_from_user(&notify, uarg, sizeof(notify)))
break;
if (notify.port >= NR_EVENT_CHANNELS) {
rc = -EINVAL;
} else if (port_user[notify.port] != u) {
rc = -ENOTCONN;
} else {
notify_remote_via_evtchn(notify.port);
rc = 0;
}
break;
}
case IOCTL_EVTCHN_RESET: {
/* Initialise the ring to empty. Clear errors. */
mutex_lock(&u->ring_cons_mutex);
spin_lock_irq(&port_user_lock);
u->ring_cons = u->ring_prod = u->ring_overflow = 0;
spin_unlock_irq(&port_user_lock);
mutex_unlock(&u->ring_cons_mutex);
rc = 0;
break;
}
default:
rc = -ENOSYS;
break;
}
mutex_unlock(&u->bind_mutex);
return rc;
}
static unsigned int evtchn_poll(struct file *file, poll_table *wait)
{
unsigned int mask = POLLOUT | POLLWRNORM;
struct per_user_data *u = file->private_data;
poll_wait(file, &u->evtchn_wait, wait);
if (u->ring_cons != u->ring_prod)
mask |= POLLIN | POLLRDNORM;
if (u->ring_overflow)
mask = POLLERR;
return mask;
}
static int evtchn_fasync(int fd, struct file *filp, int on)
{
struct per_user_data *u = filp->private_data;
return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
}
static int evtchn_open(struct inode *inode, struct file *filp)
{
struct per_user_data *u;
u = kzalloc(sizeof(*u), GFP_KERNEL);
if (u == NULL)
return -ENOMEM;
u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm);
if (u->name == NULL) {
kfree(u);
return -ENOMEM;
}
init_waitqueue_head(&u->evtchn_wait);
u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
if (u->ring == NULL) {
kfree(u->name);
kfree(u);
return -ENOMEM;
}
mutex_init(&u->bind_mutex);
mutex_init(&u->ring_cons_mutex);
filp->private_data = u;
return 0;
}
static int evtchn_release(struct inode *inode, struct file *filp)
{
int i;
struct per_user_data *u = filp->private_data;
spin_lock_irq(&port_user_lock);
free_page((unsigned long)u->ring);
for (i = 0; i < NR_EVENT_CHANNELS; i++) {
if (port_user[i] != u)
continue;
evtchn_unbind_from_user(port_user[i], i);
}
spin_unlock_irq(&port_user_lock);
kfree(u->name);
kfree(u);
return 0;
}
static const struct file_operations evtchn_fops = {
.owner = THIS_MODULE,
.read = evtchn_read,
.write = evtchn_write,
.unlocked_ioctl = evtchn_ioctl,
.poll = evtchn_poll,
.fasync = evtchn_fasync,
.open = evtchn_open,
.release = evtchn_release,
};
static struct miscdevice evtchn_miscdev = {
.minor = MISC_DYNAMIC_MINOR,
.name = "evtchn",
.fops = &evtchn_fops,
};
static int __init evtchn_init(void)
{
int err;
if (!xen_domain())
return -ENODEV;
spin_lock_init(&port_user_lock);
memset(port_user, 0, sizeof(port_user));
/* Create '/dev/misc/evtchn'. */
err = misc_register(&evtchn_miscdev);
if (err != 0) {
printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
return err;
}
printk(KERN_INFO "Event-channel device installed.\n");
return 0;
}
static void __exit evtchn_cleanup(void)
{
misc_deregister(&evtchn_miscdev);
}
module_init(evtchn_init);
module_exit(evtchn_cleanup);
MODULE_LICENSE("GPL");

View File

@@ -0,0 +1,33 @@
/******************************************************************************
* features.c
*
* Xen feature flags.
*
* Copyright (c) 2006, Ian Campbell, XenSource Inc.
*/
#include <linux/types.h>
#include <linux/cache.h>
#include <linux/module.h>
#include <asm/xen/hypercall.h>
#include <xen/interface/xen.h>
#include <xen/interface/version.h>
#include <xen/features.h>
u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
EXPORT_SYMBOL_GPL(xen_features);
void xen_setup_features(void)
{
struct xen_feature_info fi;
int i, j;
for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) {
fi.submap_idx = i;
if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
break;
for (j = 0; j < 32; j++)
xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
}
}

View File

@@ -0,0 +1,558 @@
/******************************************************************************
* grant_table.c
*
* Granting foreign access to our memory reservation.
*
* Copyright (c) 2005-2006, Christopher Clark
* Copyright (c) 2004-2005, K A Fraser
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
#include <xen/interface/xen.h>
#include <xen/page.h>
#include <xen/grant_table.h>
#include <asm/xen/hypercall.h>
#include <asm/pgtable.h>
#include <asm/sync_bitops.h>
/* External tools reserve first few grant table entries. */
#define NR_RESERVED_ENTRIES 8
#define GNTTAB_LIST_END 0xffffffff
#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry))
static grant_ref_t **gnttab_list;
static unsigned int nr_grant_frames;
static unsigned int boot_max_nr_grant_frames;
static int gnttab_free_count;
static grant_ref_t gnttab_free_head;
static DEFINE_SPINLOCK(gnttab_list_lock);
static struct grant_entry *shared;
static struct gnttab_free_callback *gnttab_free_callback_list;
static int gnttab_expand(unsigned int req_entries);
#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
{
return &gnttab_list[(entry) / RPP][(entry) % RPP];
}
/* This can be used as an l-value */
#define gnttab_entry(entry) (*__gnttab_entry(entry))
static int get_free_entries(unsigned count)
{
unsigned long flags;
int ref, rc;
grant_ref_t head;
spin_lock_irqsave(&gnttab_list_lock, flags);
if ((gnttab_free_count < count) &&
((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
spin_unlock_irqrestore(&gnttab_list_lock, flags);
return rc;
}
ref = head = gnttab_free_head;
gnttab_free_count -= count;
while (count-- > 1)
head = gnttab_entry(head);
gnttab_free_head = gnttab_entry(head);
gnttab_entry(head) = GNTTAB_LIST_END;
spin_unlock_irqrestore(&gnttab_list_lock, flags);
return ref;
}
static void do_free_callbacks(void)
{
struct gnttab_free_callback *callback, *next;
callback = gnttab_free_callback_list;
gnttab_free_callback_list = NULL;
while (callback != NULL) {
next = callback->next;
if (gnttab_free_count >= callback->count) {
callback->next = NULL;
callback->fn(callback->arg);
} else {
callback->next = gnttab_free_callback_list;
gnttab_free_callback_list = callback;
}
callback = next;
}
}
static inline void check_free_callbacks(void)
{
if (unlikely(gnttab_free_callback_list))
do_free_callbacks();
}
static void put_free_entry(grant_ref_t ref)
{
unsigned long flags;
spin_lock_irqsave(&gnttab_list_lock, flags);
gnttab_entry(ref) = gnttab_free_head;
gnttab_free_head = ref;
gnttab_free_count++;
check_free_callbacks();
spin_unlock_irqrestore(&gnttab_list_lock, flags);
}
static void update_grant_entry(grant_ref_t ref, domid_t domid,
unsigned long frame, unsigned flags)
{
/*
* Introducing a valid entry into the grant table:
* 1. Write ent->domid.
* 2. Write ent->frame:
* GTF_permit_access: Frame to which access is permitted.
* GTF_accept_transfer: Pseudo-phys frame slot being filled by new
* frame, or zero if none.
* 3. Write memory barrier (WMB).
* 4. Write ent->flags, inc. valid type.
*/
shared[ref].frame = frame;
shared[ref].domid = domid;
wmb();
shared[ref].flags = flags;
}
/*
* Public grant-issuing interface functions
*/
void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
unsigned long frame, int readonly)
{
update_grant_entry(ref, domid, frame,
GTF_permit_access | (readonly ? GTF_readonly : 0));
}
EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
int readonly)
{
int ref;
ref = get_free_entries(1);
if (unlikely(ref < 0))
return -ENOSPC;
gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
return ref;
}
EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
int gnttab_query_foreign_access(grant_ref_t ref)
{
u16 nflags;
nflags = shared[ref].flags;
return (nflags & (GTF_reading|GTF_writing));
}
EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
{
u16 flags, nflags;
nflags = shared[ref].flags;
do {
flags = nflags;
if (flags & (GTF_reading|GTF_writing)) {
printk(KERN_ALERT "WARNING: g.e. still in use!\n");
return 0;
}
} while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags);
return 1;
}
EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
unsigned long page)
{
if (gnttab_end_foreign_access_ref(ref, readonly)) {
put_free_entry(ref);
if (page != 0)
free_page(page);
} else {
/* XXX This needs to be fixed so that the ref and page are
placed on a list to be freed up later. */
printk(KERN_WARNING
"WARNING: leaking g.e. and page still in use!\n");
}
}
EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
{
int ref;
ref = get_free_entries(1);
if (unlikely(ref < 0))
return -ENOSPC;
gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
return ref;
}
EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
unsigned long pfn)
{
update_grant_entry(ref, domid, pfn, GTF_accept_transfer);
}
EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
{
unsigned long frame;
u16 flags;
/*
* If a transfer is not even yet started, try to reclaim the grant
* reference and return failure (== 0).
*/
while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags)
return 0;
cpu_relax();
}
/* If a transfer is in progress then wait until it is completed. */
while (!(flags & GTF_transfer_completed)) {
flags = shared[ref].flags;
cpu_relax();
}
rmb(); /* Read the frame number /after/ reading completion status. */
frame = shared[ref].frame;
BUG_ON(frame == 0);
return frame;
}
EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
{
unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
put_free_entry(ref);
return frame;
}
EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
void gnttab_free_grant_reference(grant_ref_t ref)
{
put_free_entry(ref);
}
EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
void gnttab_free_grant_references(grant_ref_t head)
{
grant_ref_t ref;
unsigned long flags;
int count = 1;
if (head == GNTTAB_LIST_END)
return;
spin_lock_irqsave(&gnttab_list_lock, flags);
ref = head;
while (gnttab_entry(ref) != GNTTAB_LIST_END) {
ref = gnttab_entry(ref);
count++;
}
gnttab_entry(ref) = gnttab_free_head;
gnttab_free_head = head;
gnttab_free_count += count;
check_free_callbacks();
spin_unlock_irqrestore(&gnttab_list_lock, flags);
}
EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
{
int h = get_free_entries(count);
if (h < 0)
return -ENOSPC;
*head = h;
return 0;
}
EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
int gnttab_empty_grant_references(const grant_ref_t *private_head)
{
return (*private_head == GNTTAB_LIST_END);
}
EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
int gnttab_claim_grant_reference(grant_ref_t *private_head)
{
grant_ref_t g = *private_head;
if (unlikely(g == GNTTAB_LIST_END))
return -ENOSPC;
*private_head = gnttab_entry(g);
return g;
}
EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
void gnttab_release_grant_reference(grant_ref_t *private_head,
grant_ref_t release)
{
gnttab_entry(release) = *private_head;
*private_head = release;
}
EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
void gnttab_request_free_callback(struct gnttab_free_callback *callback,
void (*fn)(void *), void *arg, u16 count)
{
unsigned long flags;
spin_lock_irqsave(&gnttab_list_lock, flags);
if (callback->next)
goto out;
callback->fn = fn;
callback->arg = arg;
callback->count = count;
callback->next = gnttab_free_callback_list;
gnttab_free_callback_list = callback;
check_free_callbacks();
out:
spin_unlock_irqrestore(&gnttab_list_lock, flags);
}
EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
{
struct gnttab_free_callback **pcb;
unsigned long flags;
spin_lock_irqsave(&gnttab_list_lock, flags);
for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
if (*pcb == callback) {
*pcb = callback->next;
break;
}
}
spin_unlock_irqrestore(&gnttab_list_lock, flags);
}
EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
static int grow_gnttab_list(unsigned int more_frames)
{
unsigned int new_nr_grant_frames, extra_entries, i;
unsigned int nr_glist_frames, new_nr_glist_frames;
new_nr_grant_frames = nr_grant_frames + more_frames;
extra_entries = more_frames * GREFS_PER_GRANT_FRAME;
nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
new_nr_glist_frames =
(new_nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
if (!gnttab_list[i])
goto grow_nomem;
}
for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
gnttab_entry(i) = i + 1;
gnttab_entry(i) = gnttab_free_head;
gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
gnttab_free_count += extra_entries;
nr_grant_frames = new_nr_grant_frames;
check_free_callbacks();
return 0;
grow_nomem:
for ( ; i >= nr_glist_frames; i--)
free_page((unsigned long) gnttab_list[i]);
return -ENOMEM;
}
static unsigned int __max_nr_grant_frames(void)
{
struct gnttab_query_size query;
int rc;
query.dom = DOMID_SELF;
rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
if ((rc < 0) || (query.status != GNTST_okay))
return 4; /* Legacy max supported number of frames */
return query.max_nr_frames;
}
static inline unsigned int max_nr_grant_frames(void)
{
unsigned int xen_max = __max_nr_grant_frames();
if (xen_max > boot_max_nr_grant_frames)
return boot_max_nr_grant_frames;
return xen_max;
}
static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
{
struct gnttab_setup_table setup;
unsigned long *frames;
unsigned int nr_gframes = end_idx + 1;
int rc;
frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
if (!frames)
return -ENOMEM;
setup.dom = DOMID_SELF;
setup.nr_frames = nr_gframes;
set_xen_guest_handle(setup.frame_list, frames);
rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
if (rc == -ENOSYS) {
kfree(frames);
return -ENOSYS;
}
BUG_ON(rc || setup.status);
rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(),
&shared);
BUG_ON(rc);
kfree(frames);
return 0;
}
int gnttab_resume(void)
{
if (max_nr_grant_frames() < nr_grant_frames)
return -ENOSYS;
return gnttab_map(0, nr_grant_frames - 1);
}
int gnttab_suspend(void)
{
arch_gnttab_unmap_shared(shared, nr_grant_frames);
return 0;
}
static int gnttab_expand(unsigned int req_entries)
{
int rc;
unsigned int cur, extra;
cur = nr_grant_frames;
extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
GREFS_PER_GRANT_FRAME);
if (cur + extra > max_nr_grant_frames())
return -ENOSPC;
rc = gnttab_map(cur, cur + extra - 1);
if (rc == 0)
rc = grow_gnttab_list(extra);
return rc;
}
static int __devinit gnttab_init(void)
{
int i;
unsigned int max_nr_glist_frames, nr_glist_frames;
unsigned int nr_init_grefs;
if (!xen_domain())
return -ENODEV;
nr_grant_frames = 1;
boot_max_nr_grant_frames = __max_nr_grant_frames();
/* Determine the maximum number of frames required for the
* grant reference free list on the current hypervisor.
*/
max_nr_glist_frames = (boot_max_nr_grant_frames *
GREFS_PER_GRANT_FRAME / RPP);
gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
GFP_KERNEL);
if (gnttab_list == NULL)
return -ENOMEM;
nr_glist_frames = (nr_grant_frames * GREFS_PER_GRANT_FRAME + RPP - 1) / RPP;
for (i = 0; i < nr_glist_frames; i++) {
gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
if (gnttab_list[i] == NULL)
goto ini_nomem;
}
if (gnttab_resume() < 0)
return -ENODEV;
nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
gnttab_entry(i) = i + 1;
gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
gnttab_free_head = NR_RESERVED_ENTRIES;
printk("Grant table initialized\n");
return 0;
ini_nomem:
for (i--; i >= 0; i--)
free_page((unsigned long)gnttab_list[i]);
kfree(gnttab_list);
return -ENOMEM;
}
core_initcall(gnttab_init);

273
kernel/drivers/xen/manage.c Normal file
View File

@@ -0,0 +1,273 @@
/*
* Handle extern requests for shutdown, reboot and sysrq
*/
#include <linux/kernel.h>
#include <linux/err.h>
#include <linux/reboot.h>
#include <linux/sysrq.h>
#include <linux/stop_machine.h>
#include <linux/freezer.h>
#include <xen/xenbus.h>
#include <xen/grant_table.h>
#include <xen/events.h>
#include <xen/hvc-console.h>
#include <xen/xen-ops.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/page.h>
enum shutdown_state {
SHUTDOWN_INVALID = -1,
SHUTDOWN_POWEROFF = 0,
SHUTDOWN_SUSPEND = 2,
/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
report a crash, not be instructed to crash!
HALT is the same as POWEROFF, as far as we're concerned. The tools use
the distinction when we return the reason code to them. */
SHUTDOWN_HALT = 4,
};
/* Ignore multiple shutdown requests. */
static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
#ifdef CONFIG_PM_SLEEP
static int xen_suspend(void *data)
{
int *cancelled = data;
int err;
BUG_ON(!irqs_disabled());
err = sysdev_suspend(PMSG_SUSPEND);
if (err) {
printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n",
err);
return err;
}
xen_mm_pin_all();
gnttab_suspend();
xen_pre_suspend();
/*
* This hypercall returns 1 if suspend was cancelled
* or the domain was merely checkpointed, and 0 if it
* is resuming in a new domain.
*/
*cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
xen_post_suspend(*cancelled);
gnttab_resume();
xen_mm_unpin_all();
if (!*cancelled) {
xen_irq_resume();
xen_console_resume();
xen_timer_resume();
}
sysdev_resume();
return 0;
}
static void do_suspend(void)
{
int err;
int cancelled = 1;
shutting_down = SHUTDOWN_SUSPEND;
err = stop_machine_create();
if (err) {
printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err);
goto out;
}
#ifdef CONFIG_PREEMPT
/* If the kernel is preemptible, we need to freeze all the processes
to prevent them from being in the middle of a pagetable update
during suspend. */
err = freeze_processes();
if (err) {
printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
goto out_destroy_sm;
}
#endif
err = dpm_suspend_start(PMSG_SUSPEND);
if (err) {
printk(KERN_ERR "xen suspend: dpm_suspend_start %d\n", err);
goto out_thaw;
}
printk(KERN_DEBUG "suspending xenstore...\n");
xs_suspend();
err = dpm_suspend_noirq(PMSG_SUSPEND);
if (err) {
printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err);
goto out_resume;
}
err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
dpm_resume_noirq(PMSG_RESUME);
if (err) {
printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
cancelled = 1;
}
out_resume:
if (!cancelled) {
xen_arch_resume();
xs_resume();
} else
xs_suspend_cancel();
dpm_resume_end(PMSG_RESUME);
/* Make sure timer events get retriggered on all CPUs */
clock_was_set();
out_thaw:
#ifdef CONFIG_PREEMPT
thaw_processes();
out_destroy_sm:
#endif
stop_machine_destroy();
out:
shutting_down = SHUTDOWN_INVALID;
}
#endif /* CONFIG_PM_SLEEP */
static void shutdown_handler(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
char *str;
struct xenbus_transaction xbt;
int err;
if (shutting_down != SHUTDOWN_INVALID)
return;
again:
err = xenbus_transaction_start(&xbt);
if (err)
return;
str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
/* Ignore read errors and empty reads. */
if (XENBUS_IS_ERR_READ(str)) {
xenbus_transaction_end(xbt, 1);
return;
}
xenbus_write(xbt, "control", "shutdown", "");
err = xenbus_transaction_end(xbt, 0);
if (err == -EAGAIN) {
kfree(str);
goto again;
}
if (strcmp(str, "poweroff") == 0 ||
strcmp(str, "halt") == 0) {
shutting_down = SHUTDOWN_POWEROFF;
orderly_poweroff(false);
} else if (strcmp(str, "reboot") == 0) {
shutting_down = SHUTDOWN_POWEROFF; /* ? */
ctrl_alt_del();
#ifdef CONFIG_PM_SLEEP
} else if (strcmp(str, "suspend") == 0) {
do_suspend();
#endif
} else {
printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
shutting_down = SHUTDOWN_INVALID;
}
kfree(str);
}
static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
unsigned int len)
{
char sysrq_key = '\0';
struct xenbus_transaction xbt;
int err;
again:
err = xenbus_transaction_start(&xbt);
if (err)
return;
if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
printk(KERN_ERR "Unable to read sysrq code in "
"control/sysrq\n");
xenbus_transaction_end(xbt, 1);
return;
}
if (sysrq_key != '\0')
xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
err = xenbus_transaction_end(xbt, 0);
if (err == -EAGAIN)
goto again;
if (sysrq_key != '\0')
handle_sysrq(sysrq_key, NULL);
}
static struct xenbus_watch shutdown_watch = {
.node = "control/shutdown",
.callback = shutdown_handler
};
static struct xenbus_watch sysrq_watch = {
.node = "control/sysrq",
.callback = sysrq_handler
};
static int setup_shutdown_watcher(void)
{
int err;
err = register_xenbus_watch(&shutdown_watch);
if (err) {
printk(KERN_ERR "Failed to set shutdown watcher\n");
return err;
}
err = register_xenbus_watch(&sysrq_watch);
if (err) {
printk(KERN_ERR "Failed to set sysrq watcher\n");
return err;
}
return 0;
}
static int shutdown_event(struct notifier_block *notifier,
unsigned long event,
void *data)
{
setup_shutdown_watcher();
return NOTIFY_DONE;
}
static int __init setup_shutdown_event(void)
{
static struct notifier_block xenstore_notifier = {
.notifier_call = shutdown_event
};
register_xenstore_notifier(&xenstore_notifier);
return 0;
}
subsys_initcall(setup_shutdown_event);

View File

@@ -0,0 +1,445 @@
/*
* copyright (c) 2006 IBM Corporation
* Authored by: Mike D. Day <ncmike@us.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kobject.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
#include <xen/xenbus.h>
#include <xen/interface/xen.h>
#include <xen/interface/version.h>
#define HYPERVISOR_ATTR_RO(_name) \
static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name)
#define HYPERVISOR_ATTR_RW(_name) \
static struct hyp_sysfs_attr _name##_attr = \
__ATTR(_name, 0644, _name##_show, _name##_store)
struct hyp_sysfs_attr {
struct attribute attr;
ssize_t (*show)(struct hyp_sysfs_attr *, char *);
ssize_t (*store)(struct hyp_sysfs_attr *, const char *, size_t);
void *hyp_attr_data;
};
static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer)
{
return sprintf(buffer, "xen\n");
}
HYPERVISOR_ATTR_RO(type);
static int __init xen_sysfs_type_init(void)
{
return sysfs_create_file(hypervisor_kobj, &type_attr.attr);
}
static void xen_sysfs_type_destroy(void)
{
sysfs_remove_file(hypervisor_kobj, &type_attr.attr);
}
/* xen version attributes */
static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer)
{
int version = HYPERVISOR_xen_version(XENVER_version, NULL);
if (version)
return sprintf(buffer, "%d\n", version >> 16);
return -ENODEV;
}
HYPERVISOR_ATTR_RO(major);
static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer)
{
int version = HYPERVISOR_xen_version(XENVER_version, NULL);
if (version)
return sprintf(buffer, "%d\n", version & 0xff);
return -ENODEV;
}
HYPERVISOR_ATTR_RO(minor);
static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer)
{
int ret = -ENOMEM;
char *extra;
extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL);
if (extra) {
ret = HYPERVISOR_xen_version(XENVER_extraversion, extra);
if (!ret)
ret = sprintf(buffer, "%s\n", extra);
kfree(extra);
}
return ret;
}
HYPERVISOR_ATTR_RO(extra);
static struct attribute *version_attrs[] = {
&major_attr.attr,
&minor_attr.attr,
&extra_attr.attr,
NULL
};
static struct attribute_group version_group = {
.name = "version",
.attrs = version_attrs,
};
static int __init xen_sysfs_version_init(void)
{
return sysfs_create_group(hypervisor_kobj, &version_group);
}
static void xen_sysfs_version_destroy(void)
{
sysfs_remove_group(hypervisor_kobj, &version_group);
}
/* UUID */
static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
{
char *vm, *val;
int ret;
extern int xenstored_ready;
if (!xenstored_ready)
return -EBUSY;
vm = xenbus_read(XBT_NIL, "vm", "", NULL);
if (IS_ERR(vm))
return PTR_ERR(vm);
val = xenbus_read(XBT_NIL, vm, "uuid", NULL);
kfree(vm);
if (IS_ERR(val))
return PTR_ERR(val);
ret = sprintf(buffer, "%s\n", val);
kfree(val);
return ret;
}
HYPERVISOR_ATTR_RO(uuid);
static int __init xen_sysfs_uuid_init(void)
{
return sysfs_create_file(hypervisor_kobj, &uuid_attr.attr);
}
static void xen_sysfs_uuid_destroy(void)
{
sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr);
}
/* xen compilation attributes */
static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer)
{
int ret = -ENOMEM;
struct xen_compile_info *info;
info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
if (info) {
ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
if (!ret)
ret = sprintf(buffer, "%s\n", info->compiler);
kfree(info);
}
return ret;
}
HYPERVISOR_ATTR_RO(compiler);
static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer)
{
int ret = -ENOMEM;
struct xen_compile_info *info;
info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
if (info) {
ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
if (!ret)
ret = sprintf(buffer, "%s\n", info->compile_by);
kfree(info);
}
return ret;
}
HYPERVISOR_ATTR_RO(compiled_by);
static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer)
{
int ret = -ENOMEM;
struct xen_compile_info *info;
info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
if (info) {
ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
if (!ret)
ret = sprintf(buffer, "%s\n", info->compile_date);
kfree(info);
}
return ret;
}
HYPERVISOR_ATTR_RO(compile_date);
static struct attribute *xen_compile_attrs[] = {
&compiler_attr.attr,
&compiled_by_attr.attr,
&compile_date_attr.attr,
NULL
};
static struct attribute_group xen_compilation_group = {
.name = "compilation",
.attrs = xen_compile_attrs,
};
int __init static xen_compilation_init(void)
{
return sysfs_create_group(hypervisor_kobj, &xen_compilation_group);
}
static void xen_compilation_destroy(void)
{
sysfs_remove_group(hypervisor_kobj, &xen_compilation_group);
}
/* xen properties info */
static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer)
{
int ret = -ENOMEM;
char *caps;
caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL);
if (caps) {
ret = HYPERVISOR_xen_version(XENVER_capabilities, caps);
if (!ret)
ret = sprintf(buffer, "%s\n", caps);
kfree(caps);
}
return ret;
}
HYPERVISOR_ATTR_RO(capabilities);
static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer)
{
int ret = -ENOMEM;
char *cset;
cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL);
if (cset) {
ret = HYPERVISOR_xen_version(XENVER_changeset, cset);
if (!ret)
ret = sprintf(buffer, "%s\n", cset);
kfree(cset);
}
return ret;
}
HYPERVISOR_ATTR_RO(changeset);
static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer)
{
int ret = -ENOMEM;
struct xen_platform_parameters *parms;
parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL);
if (parms) {
ret = HYPERVISOR_xen_version(XENVER_platform_parameters,
parms);
if (!ret)
ret = sprintf(buffer, "%lx\n", parms->virt_start);
kfree(parms);
}
return ret;
}
HYPERVISOR_ATTR_RO(virtual_start);
static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer)
{
int ret;
ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL);
if (ret > 0)
ret = sprintf(buffer, "%x\n", ret);
return ret;
}
HYPERVISOR_ATTR_RO(pagesize);
static ssize_t xen_feature_show(int index, char *buffer)
{
ssize_t ret;
struct xen_feature_info info;
info.submap_idx = index;
ret = HYPERVISOR_xen_version(XENVER_get_features, &info);
if (!ret)
ret = sprintf(buffer, "%08x", info.submap);
return ret;
}
static ssize_t features_show(struct hyp_sysfs_attr *attr, char *buffer)
{
ssize_t len;
int i;
len = 0;
for (i = XENFEAT_NR_SUBMAPS-1; i >= 0; i--) {
int ret = xen_feature_show(i, buffer + len);
if (ret < 0) {
if (len == 0)
len = ret;
break;
}
len += ret;
}
if (len > 0)
buffer[len++] = '\n';
return len;
}
HYPERVISOR_ATTR_RO(features);
static struct attribute *xen_properties_attrs[] = {
&capabilities_attr.attr,
&changeset_attr.attr,
&virtual_start_attr.attr,
&pagesize_attr.attr,
&features_attr.attr,
NULL
};
static struct attribute_group xen_properties_group = {
.name = "properties",
.attrs = xen_properties_attrs,
};
static int __init xen_properties_init(void)
{
return sysfs_create_group(hypervisor_kobj, &xen_properties_group);
}
static void xen_properties_destroy(void)
{
sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
}
static int __init hyper_sysfs_init(void)
{
int ret;
if (!xen_domain())
return -ENODEV;
ret = xen_sysfs_type_init();
if (ret)
goto out;
ret = xen_sysfs_version_init();
if (ret)
goto version_out;
ret = xen_compilation_init();
if (ret)
goto comp_out;
ret = xen_sysfs_uuid_init();
if (ret)
goto uuid_out;
ret = xen_properties_init();
if (ret)
goto prop_out;
goto out;
prop_out:
xen_sysfs_uuid_destroy();
uuid_out:
xen_compilation_destroy();
comp_out:
xen_sysfs_version_destroy();
version_out:
xen_sysfs_type_destroy();
out:
return ret;
}
static void __exit hyper_sysfs_exit(void)
{
xen_properties_destroy();
xen_compilation_destroy();
xen_sysfs_uuid_destroy();
xen_sysfs_version_destroy();
xen_sysfs_type_destroy();
}
module_init(hyper_sysfs_init);
module_exit(hyper_sysfs_exit);
static ssize_t hyp_sysfs_show(struct kobject *kobj,
struct attribute *attr,
char *buffer)
{
struct hyp_sysfs_attr *hyp_attr;
hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
if (hyp_attr->show)
return hyp_attr->show(hyp_attr, buffer);
return 0;
}
static ssize_t hyp_sysfs_store(struct kobject *kobj,
struct attribute *attr,
const char *buffer,
size_t len)
{
struct hyp_sysfs_attr *hyp_attr;
hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
if (hyp_attr->store)
return hyp_attr->store(hyp_attr, buffer, len);
return 0;
}
static struct sysfs_ops hyp_sysfs_ops = {
.show = hyp_sysfs_show,
.store = hyp_sysfs_store,
};
static struct kobj_type hyp_sysfs_kobj_type = {
.sysfs_ops = &hyp_sysfs_ops,
};
static int __init hypervisor_subsys_init(void)
{
if (!xen_domain())
return -ENODEV;
hypervisor_kobj->ktype = &hyp_sysfs_kobj_type;
return 0;
}
device_initcall(hypervisor_subsys_init);

View File

@@ -0,0 +1,7 @@
obj-y += xenbus.o
xenbus-objs =
xenbus-objs += xenbus_client.o
xenbus-objs += xenbus_comms.o
xenbus-objs += xenbus_xs.o
xenbus-objs += xenbus_probe.o

View File

@@ -0,0 +1,568 @@
/******************************************************************************
* Client-facing interface for the Xenbus driver. In other words, the
* interface between the Xenbus and the device-specific code, be it the
* frontend or the backend of that driver.
*
* Copyright (C) 2005 XenSource Ltd
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <linux/types.h>
#include <linux/vmalloc.h>
#include <asm/xen/hypervisor.h>
#include <xen/interface/xen.h>
#include <xen/interface/event_channel.h>
#include <xen/events.h>
#include <xen/grant_table.h>
#include <xen/xenbus.h>
const char *xenbus_strstate(enum xenbus_state state)
{
static const char *const name[] = {
[ XenbusStateUnknown ] = "Unknown",
[ XenbusStateInitialising ] = "Initialising",
[ XenbusStateInitWait ] = "InitWait",
[ XenbusStateInitialised ] = "Initialised",
[ XenbusStateConnected ] = "Connected",
[ XenbusStateClosing ] = "Closing",
[ XenbusStateClosed ] = "Closed",
};
return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
}
EXPORT_SYMBOL_GPL(xenbus_strstate);
/**
* xenbus_watch_path - register a watch
* @dev: xenbus device
* @path: path to watch
* @watch: watch to register
* @callback: callback to register
*
* Register a @watch on the given path, using the given xenbus_watch structure
* for storage, and the given @callback function as the callback. Return 0 on
* success, or -errno on error. On success, the given @path will be saved as
* @watch->node, and remains the caller's to free. On error, @watch->node will
* be NULL, the device will switch to %XenbusStateClosing, and the error will
* be saved in the store.
*/
int xenbus_watch_path(struct xenbus_device *dev, const char *path,
struct xenbus_watch *watch,
void (*callback)(struct xenbus_watch *,
const char **, unsigned int))
{
int err;
watch->node = path;
watch->callback = callback;
err = register_xenbus_watch(watch);
if (err) {
watch->node = NULL;
watch->callback = NULL;
xenbus_dev_fatal(dev, err, "adding watch on %s", path);
}
return err;
}
EXPORT_SYMBOL_GPL(xenbus_watch_path);
/**
* xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
* @dev: xenbus device
* @watch: watch to register
* @callback: callback to register
* @pathfmt: format of path to watch
*
* Register a watch on the given @path, using the given xenbus_watch
* structure for storage, and the given @callback function as the callback.
* Return 0 on success, or -errno on error. On success, the watched path
* (@path/@path2) will be saved as @watch->node, and becomes the caller's to
* kfree(). On error, watch->node will be NULL, so the caller has nothing to
* free, the device will switch to %XenbusStateClosing, and the error will be
* saved in the store.
*/
int xenbus_watch_pathfmt(struct xenbus_device *dev,
struct xenbus_watch *watch,
void (*callback)(struct xenbus_watch *,
const char **, unsigned int),
const char *pathfmt, ...)
{
int err;
va_list ap;
char *path;
va_start(ap, pathfmt);
path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap);
va_end(ap);
if (!path) {
xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
return -ENOMEM;
}
err = xenbus_watch_path(dev, path, watch, callback);
if (err)
kfree(path);
return err;
}
EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
/**
* xenbus_switch_state
* @dev: xenbus device
* @state: new state
*
* Advertise in the store a change of the given driver to the given new_state.
* Return 0 on success, or -errno on error. On error, the device will switch
* to XenbusStateClosing, and the error will be saved in the store.
*/
int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
{
/* We check whether the state is currently set to the given value, and
if not, then the state is set. We don't want to unconditionally
write the given state, because we don't want to fire watches
unnecessarily. Furthermore, if the node has gone, we don't write
to it, as the device will be tearing down, and we don't want to
resurrect that directory.
Note that, because of this cached value of our state, this function
will not work inside a Xenstore transaction (something it was
trying to in the past) because dev->state would not get reset if
the transaction was aborted.
*/
int current_state;
int err;
if (state == dev->state)
return 0;
err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d",
&current_state);
if (err != 1)
return 0;
err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state);
if (err) {
if (state != XenbusStateClosing) /* Avoid looping */
xenbus_dev_fatal(dev, err, "writing new state");
return err;
}
dev->state = state;
return 0;
}
EXPORT_SYMBOL_GPL(xenbus_switch_state);
int xenbus_frontend_closed(struct xenbus_device *dev)
{
xenbus_switch_state(dev, XenbusStateClosed);
complete(&dev->down);
return 0;
}
EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
/**
* Return the path to the error node for the given device, or NULL on failure.
* If the value returned is non-NULL, then it is the caller's to kfree.
*/
static char *error_path(struct xenbus_device *dev)
{
return kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
}
static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
const char *fmt, va_list ap)
{
int ret;
unsigned int len;
char *printf_buffer = NULL;
char *path_buffer = NULL;
#define PRINTF_BUFFER_SIZE 4096
printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
if (printf_buffer == NULL)
goto fail;
len = sprintf(printf_buffer, "%i ", -err);
ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1);
dev_err(&dev->dev, "%s\n", printf_buffer);
path_buffer = error_path(dev);
if (path_buffer == NULL) {
dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
dev->nodename, printf_buffer);
goto fail;
}
if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
dev->nodename, printf_buffer);
goto fail;
}
fail:
kfree(printf_buffer);
kfree(path_buffer);
}
/**
* xenbus_dev_error
* @dev: xenbus device
* @err: error to report
* @fmt: error message format
*
* Report the given negative errno into the store, along with the given
* formatted message.
*/
void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
xenbus_va_dev_error(dev, err, fmt, ap);
va_end(ap);
}
EXPORT_SYMBOL_GPL(xenbus_dev_error);
/**
* xenbus_dev_fatal
* @dev: xenbus device
* @err: error to report
* @fmt: error message format
*
* Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
* xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly
* closedown of this driver and its peer.
*/
void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
xenbus_va_dev_error(dev, err, fmt, ap);
va_end(ap);
xenbus_switch_state(dev, XenbusStateClosing);
}
EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
/**
* xenbus_grant_ring
* @dev: xenbus device
* @ring_mfn: mfn of ring to grant
* Grant access to the given @ring_mfn to the peer of the given device. Return
* 0 on success, or -errno on error. On error, the device will switch to
* XenbusStateClosing, and the error will be saved in the store.
*/
int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
{
int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
if (err < 0)
xenbus_dev_fatal(dev, err, "granting access to ring page");
return err;
}
EXPORT_SYMBOL_GPL(xenbus_grant_ring);
/**
* Allocate an event channel for the given xenbus_device, assigning the newly
* created local port to *port. Return 0 on success, or -errno on error. On
* error, the device will switch to XenbusStateClosing, and the error will be
* saved in the store.
*/
int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port)
{
struct evtchn_alloc_unbound alloc_unbound;
int err;
alloc_unbound.dom = DOMID_SELF;
alloc_unbound.remote_dom = dev->otherend_id;
err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
&alloc_unbound);
if (err)
xenbus_dev_fatal(dev, err, "allocating event channel");
else
*port = alloc_unbound.port;
return err;
}
EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
/**
* Bind to an existing interdomain event channel in another domain. Returns 0
* on success and stores the local port in *port. On error, returns -errno,
* switches the device to XenbusStateClosing, and saves the error in XenStore.
*/
int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port)
{
struct evtchn_bind_interdomain bind_interdomain;
int err;
bind_interdomain.remote_dom = dev->otherend_id;
bind_interdomain.remote_port = remote_port;
err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
&bind_interdomain);
if (err)
xenbus_dev_fatal(dev, err,
"binding to event channel %d from domain %d",
remote_port, dev->otherend_id);
else
*port = bind_interdomain.local_port;
return err;
}
EXPORT_SYMBOL_GPL(xenbus_bind_evtchn);
/**
* Free an existing event channel. Returns 0 on success or -errno on error.
*/
int xenbus_free_evtchn(struct xenbus_device *dev, int port)
{
struct evtchn_close close;
int err;
close.port = port;
err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
if (err)
xenbus_dev_error(dev, err, "freeing event channel %d", port);
return err;
}
EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
/**
* xenbus_map_ring_valloc
* @dev: xenbus device
* @gnt_ref: grant reference
* @vaddr: pointer to address to be filled out by mapping
*
* Based on Rusty Russell's skeleton driver's map_page.
* Map a page of memory into this domain from another domain's grant table.
* xenbus_map_ring_valloc allocates a page of virtual address space, maps the
* page to that address, and sets *vaddr to that address.
* Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
* or -ENOMEM on error. If an error is returned, device will switch to
* XenbusStateClosing and the error message will be saved in XenStore.
*/
int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
{
struct gnttab_map_grant_ref op = {
.flags = GNTMAP_host_map,
.ref = gnt_ref,
.dom = dev->otherend_id,
};
struct vm_struct *area;
*vaddr = NULL;
area = xen_alloc_vm_area(PAGE_SIZE);
if (!area)
return -ENOMEM;
op.host_addr = (unsigned long)area->addr;
if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
BUG();
if (op.status != GNTST_okay) {
xen_free_vm_area(area);
xenbus_dev_fatal(dev, op.status,
"mapping in shared page %d from domain %d",
gnt_ref, dev->otherend_id);
return op.status;
}
/* Stuff the handle in an unused field */
area->phys_addr = (unsigned long)op.handle;
*vaddr = area->addr;
return 0;
}
EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
/**
* xenbus_map_ring
* @dev: xenbus device
* @gnt_ref: grant reference
* @handle: pointer to grant handle to be filled
* @vaddr: address to be mapped to
*
* Map a page of memory into this domain from another domain's grant table.
* xenbus_map_ring does not allocate the virtual address space (you must do
* this yourself!). It only maps in the page to the specified address.
* Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
* or -ENOMEM on error. If an error is returned, device will switch to
* XenbusStateClosing and the error message will be saved in XenStore.
*/
int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
grant_handle_t *handle, void *vaddr)
{
struct gnttab_map_grant_ref op = {
.host_addr = (unsigned long)vaddr,
.flags = GNTMAP_host_map,
.ref = gnt_ref,
.dom = dev->otherend_id,
};
if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
BUG();
if (op.status != GNTST_okay) {
xenbus_dev_fatal(dev, op.status,
"mapping in shared page %d from domain %d",
gnt_ref, dev->otherend_id);
} else
*handle = op.handle;
return op.status;
}
EXPORT_SYMBOL_GPL(xenbus_map_ring);
/**
* xenbus_unmap_ring_vfree
* @dev: xenbus device
* @vaddr: addr to unmap
*
* Based on Rusty Russell's skeleton driver's unmap_page.
* Unmap a page of memory in this domain that was imported from another domain.
* Use xenbus_unmap_ring_vfree if you mapped in your memory with
* xenbus_map_ring_valloc (it will free the virtual address space).
* Returns 0 on success and returns GNTST_* on error
* (see xen/include/interface/grant_table.h).
*/
int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
{
struct vm_struct *area;
struct gnttab_unmap_grant_ref op = {
.host_addr = (unsigned long)vaddr,
};
/* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
* method so that we don't have to muck with vmalloc internals here.
* We could force the user to hang on to their struct vm_struct from
* xenbus_map_ring_valloc, but these 6 lines considerably simplify
* this API.
*/
read_lock(&vmlist_lock);
for (area = vmlist; area != NULL; area = area->next) {
if (area->addr == vaddr)
break;
}
read_unlock(&vmlist_lock);
if (!area) {
xenbus_dev_error(dev, -ENOENT,
"can't find mapped virtual address %p", vaddr);
return GNTST_bad_virt_addr;
}
op.handle = (grant_handle_t)area->phys_addr;
if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
BUG();
if (op.status == GNTST_okay)
xen_free_vm_area(area);
else
xenbus_dev_error(dev, op.status,
"unmapping page at handle %d error %d",
(int16_t)area->phys_addr, op.status);
return op.status;
}
EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
/**
* xenbus_unmap_ring
* @dev: xenbus device
* @handle: grant handle
* @vaddr: addr to unmap
*
* Unmap a page of memory in this domain that was imported from another domain.
* Returns 0 on success and returns GNTST_* on error
* (see xen/include/interface/grant_table.h).
*/
int xenbus_unmap_ring(struct xenbus_device *dev,
grant_handle_t handle, void *vaddr)
{
struct gnttab_unmap_grant_ref op = {
.host_addr = (unsigned long)vaddr,
.handle = handle,
};
if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
BUG();
if (op.status != GNTST_okay)
xenbus_dev_error(dev, op.status,
"unmapping page at handle %d error %d",
handle, op.status);
return op.status;
}
EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
/**
* xenbus_read_driver_state
* @path: path for driver
*
* Return the state of the driver rooted at the given store path, or
* XenbusStateUnknown if no state can be read.
*/
enum xenbus_state xenbus_read_driver_state(const char *path)
{
enum xenbus_state result;
int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
if (err)
result = XenbusStateUnknown;
return result;
}
EXPORT_SYMBOL_GPL(xenbus_read_driver_state);

View File

@@ -0,0 +1,234 @@
/******************************************************************************
* xenbus_comms.c
*
* Low level code to talks to Xen Store: ringbuffer and event channel.
*
* Copyright (C) 2005 Rusty Russell, IBM Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <linux/wait.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/err.h>
#include <xen/xenbus.h>
#include <asm/xen/hypervisor.h>
#include <xen/events.h>
#include <xen/page.h>
#include "xenbus_comms.h"
static int xenbus_irq;
static DECLARE_WORK(probe_work, xenbus_probe);
static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
static irqreturn_t wake_waiting(int irq, void *unused)
{
if (unlikely(xenstored_ready == 0)) {
xenstored_ready = 1;
schedule_work(&probe_work);
}
wake_up(&xb_waitq);
return IRQ_HANDLED;
}
static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
{
return ((prod - cons) <= XENSTORE_RING_SIZE);
}
static void *get_output_chunk(XENSTORE_RING_IDX cons,
XENSTORE_RING_IDX prod,
char *buf, uint32_t *len)
{
*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
*len = XENSTORE_RING_SIZE - (prod - cons);
return buf + MASK_XENSTORE_IDX(prod);
}
static const void *get_input_chunk(XENSTORE_RING_IDX cons,
XENSTORE_RING_IDX prod,
const char *buf, uint32_t *len)
{
*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
if ((prod - cons) < *len)
*len = prod - cons;
return buf + MASK_XENSTORE_IDX(cons);
}
/**
* xb_write - low level write
* @data: buffer to send
* @len: length of buffer
*
* Returns 0 on success, error otherwise.
*/
int xb_write(const void *data, unsigned len)
{
struct xenstore_domain_interface *intf = xen_store_interface;
XENSTORE_RING_IDX cons, prod;
int rc;
while (len != 0) {
void *dst;
unsigned int avail;
rc = wait_event_interruptible(
xb_waitq,
(intf->req_prod - intf->req_cons) !=
XENSTORE_RING_SIZE);
if (rc < 0)
return rc;
/* Read indexes, then verify. */
cons = intf->req_cons;
prod = intf->req_prod;
if (!check_indexes(cons, prod)) {
intf->req_cons = intf->req_prod = 0;
return -EIO;
}
dst = get_output_chunk(cons, prod, intf->req, &avail);
if (avail == 0)
continue;
if (avail > len)
avail = len;
/* Must write data /after/ reading the consumer index. */
mb();
memcpy(dst, data, avail);
data += avail;
len -= avail;
/* Other side must not see new producer until data is there. */
wmb();
intf->req_prod += avail;
/* Implies mb(): other side will see the updated producer. */
notify_remote_via_evtchn(xen_store_evtchn);
}
return 0;
}
int xb_data_to_read(void)
{
struct xenstore_domain_interface *intf = xen_store_interface;
return (intf->rsp_cons != intf->rsp_prod);
}
int xb_wait_for_data_to_read(void)
{
return wait_event_interruptible(xb_waitq, xb_data_to_read());
}
int xb_read(void *data, unsigned len)
{
struct xenstore_domain_interface *intf = xen_store_interface;
XENSTORE_RING_IDX cons, prod;
int rc;
while (len != 0) {
unsigned int avail;
const char *src;
rc = xb_wait_for_data_to_read();
if (rc < 0)
return rc;
/* Read indexes, then verify. */
cons = intf->rsp_cons;
prod = intf->rsp_prod;
if (!check_indexes(cons, prod)) {
intf->rsp_cons = intf->rsp_prod = 0;
return -EIO;
}
src = get_input_chunk(cons, prod, intf->rsp, &avail);
if (avail == 0)
continue;
if (avail > len)
avail = len;
/* Must read data /after/ reading the producer index. */
rmb();
memcpy(data, src, avail);
data += avail;
len -= avail;
/* Other side must not see free space until we've copied out */
mb();
intf->rsp_cons += avail;
pr_debug("Finished read of %i bytes (%i to go)\n", avail, len);
/* Implies mb(): other side will see the updated consumer. */
notify_remote_via_evtchn(xen_store_evtchn);
}
return 0;
}
/**
* xb_init_comms - Set up interrupt handler off store event channel.
*/
int xb_init_comms(void)
{
struct xenstore_domain_interface *intf = xen_store_interface;
if (intf->req_prod != intf->req_cons)
printk(KERN_ERR "XENBUS request ring is not quiescent "
"(%08x:%08x)!\n", intf->req_cons, intf->req_prod);
if (intf->rsp_prod != intf->rsp_cons) {
printk(KERN_WARNING "XENBUS response ring is not quiescent "
"(%08x:%08x): fixing up\n",
intf->rsp_cons, intf->rsp_prod);
intf->rsp_cons = intf->rsp_prod;
}
if (xenbus_irq) {
/* Already have an irq; assume we're resuming */
rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
} else {
int err;
err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
0, "xenbus", &xb_waitq);
if (err <= 0) {
printk(KERN_ERR "XENBUS request irq failed %i\n", err);
return err;
}
xenbus_irq = err;
}
return 0;
}

View File

@@ -0,0 +1,46 @@
/*
* Private include for xenbus communications.
*
* Copyright (C) 2005 Rusty Russell, IBM Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef _XENBUS_COMMS_H
#define _XENBUS_COMMS_H
int xs_init(void);
int xb_init_comms(void);
/* Low level routines. */
int xb_write(const void *data, unsigned len);
int xb_read(void *data, unsigned len);
int xb_data_to_read(void);
int xb_wait_for_data_to_read(void);
int xs_input_avail(void);
extern struct xenstore_domain_interface *xen_store_interface;
extern int xen_store_evtchn;
#endif /* _XENBUS_COMMS_H */

View File

@@ -0,0 +1,956 @@
/******************************************************************************
* Talks to Xen Store to figure out what devices we have.
*
* Copyright (C) 2005 Rusty Russell, IBM Corporation
* Copyright (C) 2005 Mike Wray, Hewlett-Packard
* Copyright (C) 2005, 2006 XenSource Ltd
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#define DPRINTK(fmt, args...) \
pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
__func__, __LINE__, ##args)
#include <linux/kernel.h>
#include <linux/err.h>
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/notifier.h>
#include <linux/kthread.h>
#include <linux/mutex.h>
#include <linux/io.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/xen/hypervisor.h>
#include <xen/xenbus.h>
#include <xen/events.h>
#include <xen/page.h>
#include "xenbus_comms.h"
#include "xenbus_probe.h"
int xen_store_evtchn;
EXPORT_SYMBOL(xen_store_evtchn);
struct xenstore_domain_interface *xen_store_interface;
static unsigned long xen_store_mfn;
static BLOCKING_NOTIFIER_HEAD(xenstore_chain);
static void wait_for_devices(struct xenbus_driver *xendrv);
static int xenbus_probe_frontend(const char *type, const char *name);
static void xenbus_dev_shutdown(struct device *_dev);
static int xenbus_dev_suspend(struct device *dev, pm_message_t state);
static int xenbus_dev_resume(struct device *dev);
/* If something in array of ids matches this device, return it. */
static const struct xenbus_device_id *
match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
{
for (; *arr->devicetype != '\0'; arr++) {
if (!strcmp(arr->devicetype, dev->devicetype))
return arr;
}
return NULL;
}
int xenbus_match(struct device *_dev, struct device_driver *_drv)
{
struct xenbus_driver *drv = to_xenbus_driver(_drv);
if (!drv->ids)
return 0;
return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
}
static int xenbus_uevent(struct device *_dev, struct kobj_uevent_env *env)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype))
return -ENOMEM;
return 0;
}
/* device/<type>/<id> => <type>-<id> */
static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename)
{
nodename = strchr(nodename, '/');
if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) {
printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename);
return -EINVAL;
}
strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE);
if (!strchr(bus_id, '/')) {
printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id);
return -EINVAL;
}
*strchr(bus_id, '/') = '-';
return 0;
}
static void free_otherend_details(struct xenbus_device *dev)
{
kfree(dev->otherend);
dev->otherend = NULL;
}
static void free_otherend_watch(struct xenbus_device *dev)
{
if (dev->otherend_watch.node) {
unregister_xenbus_watch(&dev->otherend_watch);
kfree(dev->otherend_watch.node);
dev->otherend_watch.node = NULL;
}
}
int read_otherend_details(struct xenbus_device *xendev,
char *id_node, char *path_node)
{
int err = xenbus_gather(XBT_NIL, xendev->nodename,
id_node, "%i", &xendev->otherend_id,
path_node, NULL, &xendev->otherend,
NULL);
if (err) {
xenbus_dev_fatal(xendev, err,
"reading other end details from %s",
xendev->nodename);
return err;
}
if (strlen(xendev->otherend) == 0 ||
!xenbus_exists(XBT_NIL, xendev->otherend, "")) {
xenbus_dev_fatal(xendev, -ENOENT,
"unable to read other end from %s. "
"missing or inaccessible.",
xendev->nodename);
free_otherend_details(xendev);
return -ENOENT;
}
return 0;
}
static int read_backend_details(struct xenbus_device *xendev)
{
return read_otherend_details(xendev, "backend-id", "backend");
}
static struct device_attribute xenbus_dev_attrs[] = {
__ATTR_NULL
};
/* Bus type for frontend drivers. */
static struct xen_bus_type xenbus_frontend = {
.root = "device",
.levels = 2, /* device/type/<id> */
.get_bus_id = frontend_bus_id,
.probe = xenbus_probe_frontend,
.bus = {
.name = "xen",
.match = xenbus_match,
.uevent = xenbus_uevent,
.probe = xenbus_dev_probe,
.remove = xenbus_dev_remove,
.shutdown = xenbus_dev_shutdown,
.dev_attrs = xenbus_dev_attrs,
.suspend = xenbus_dev_suspend,
.resume = xenbus_dev_resume,
},
};
static void otherend_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
struct xenbus_device *dev =
container_of(watch, struct xenbus_device, otherend_watch);
struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
enum xenbus_state state;
/* Protect us against watches firing on old details when the otherend
details change, say immediately after a resume. */
if (!dev->otherend ||
strncmp(dev->otherend, vec[XS_WATCH_PATH],
strlen(dev->otherend))) {
dev_dbg(&dev->dev, "Ignoring watch at %s\n",
vec[XS_WATCH_PATH]);
return;
}
state = xenbus_read_driver_state(dev->otherend);
dev_dbg(&dev->dev, "state is %d, (%s), %s, %s\n",
state, xenbus_strstate(state), dev->otherend_watch.node,
vec[XS_WATCH_PATH]);
/*
* Ignore xenbus transitions during shutdown. This prevents us doing
* work that can fail e.g., when the rootfs is gone.
*/
if (system_state > SYSTEM_RUNNING) {
struct xen_bus_type *bus = bus;
bus = container_of(dev->dev.bus, struct xen_bus_type, bus);
/* If we're frontend, drive the state machine to Closed. */
/* This should cause the backend to release our resources. */
if ((bus == &xenbus_frontend) && (state == XenbusStateClosing))
xenbus_frontend_closed(dev);
return;
}
if (drv->otherend_changed)
drv->otherend_changed(dev, state);
}
static int talk_to_otherend(struct xenbus_device *dev)
{
struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
free_otherend_watch(dev);
free_otherend_details(dev);
return drv->read_otherend_details(dev);
}
static int watch_otherend(struct xenbus_device *dev)
{
return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed,
"%s/%s", dev->otherend, "state");
}
int xenbus_dev_probe(struct device *_dev)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
const struct xenbus_device_id *id;
int err;
DPRINTK("%s", dev->nodename);
if (!drv->probe) {
err = -ENODEV;
goto fail;
}
id = match_device(drv->ids, dev);
if (!id) {
err = -ENODEV;
goto fail;
}
err = talk_to_otherend(dev);
if (err) {
dev_warn(&dev->dev, "talk_to_otherend on %s failed.\n",
dev->nodename);
return err;
}
err = drv->probe(dev, id);
if (err)
goto fail;
err = watch_otherend(dev);
if (err) {
dev_warn(&dev->dev, "watch_otherend on %s failed.\n",
dev->nodename);
return err;
}
return 0;
fail:
xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename);
xenbus_switch_state(dev, XenbusStateClosed);
return -ENODEV;
}
int xenbus_dev_remove(struct device *_dev)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
DPRINTK("%s", dev->nodename);
free_otherend_watch(dev);
free_otherend_details(dev);
if (drv->remove)
drv->remove(dev);
xenbus_switch_state(dev, XenbusStateClosed);
return 0;
}
static void xenbus_dev_shutdown(struct device *_dev)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
unsigned long timeout = 5*HZ;
DPRINTK("%s", dev->nodename);
get_device(&dev->dev);
if (dev->state != XenbusStateConnected) {
printk(KERN_INFO "%s: %s: %s != Connected, skipping\n", __func__,
dev->nodename, xenbus_strstate(dev->state));
goto out;
}
xenbus_switch_state(dev, XenbusStateClosing);
timeout = wait_for_completion_timeout(&dev->down, timeout);
if (!timeout)
printk(KERN_INFO "%s: %s timeout closing device\n",
__func__, dev->nodename);
out:
put_device(&dev->dev);
}
int xenbus_register_driver_common(struct xenbus_driver *drv,
struct xen_bus_type *bus,
struct module *owner,
const char *mod_name)
{
drv->driver.name = drv->name;
drv->driver.bus = &bus->bus;
drv->driver.owner = owner;
drv->driver.mod_name = mod_name;
return driver_register(&drv->driver);
}
int __xenbus_register_frontend(struct xenbus_driver *drv,
struct module *owner, const char *mod_name)
{
int ret;
drv->read_otherend_details = read_backend_details;
ret = xenbus_register_driver_common(drv, &xenbus_frontend,
owner, mod_name);
if (ret)
return ret;
/* If this driver is loaded as a module wait for devices to attach. */
wait_for_devices(drv);
return 0;
}
EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
void xenbus_unregister_driver(struct xenbus_driver *drv)
{
driver_unregister(&drv->driver);
}
EXPORT_SYMBOL_GPL(xenbus_unregister_driver);
struct xb_find_info
{
struct xenbus_device *dev;
const char *nodename;
};
static int cmp_dev(struct device *dev, void *data)
{
struct xenbus_device *xendev = to_xenbus_device(dev);
struct xb_find_info *info = data;
if (!strcmp(xendev->nodename, info->nodename)) {
info->dev = xendev;
get_device(dev);
return 1;
}
return 0;
}
struct xenbus_device *xenbus_device_find(const char *nodename,
struct bus_type *bus)
{
struct xb_find_info info = { .dev = NULL, .nodename = nodename };
bus_for_each_dev(bus, NULL, &info, cmp_dev);
return info.dev;
}
static int cleanup_dev(struct device *dev, void *data)
{
struct xenbus_device *xendev = to_xenbus_device(dev);
struct xb_find_info *info = data;
int len = strlen(info->nodename);
DPRINTK("%s", info->nodename);
/* Match the info->nodename path, or any subdirectory of that path. */
if (strncmp(xendev->nodename, info->nodename, len))
return 0;
/* If the node name is longer, ensure it really is a subdirectory. */
if ((strlen(xendev->nodename) > len) && (xendev->nodename[len] != '/'))
return 0;
info->dev = xendev;
get_device(dev);
return 1;
}
static void xenbus_cleanup_devices(const char *path, struct bus_type *bus)
{
struct xb_find_info info = { .nodename = path };
do {
info.dev = NULL;
bus_for_each_dev(bus, NULL, &info, cleanup_dev);
if (info.dev) {
device_unregister(&info.dev->dev);
put_device(&info.dev->dev);
}
} while (info.dev);
}
static void xenbus_dev_release(struct device *dev)
{
if (dev)
kfree(to_xenbus_device(dev));
}
static ssize_t xendev_show_nodename(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
}
static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
static ssize_t xendev_show_devtype(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
}
static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
static ssize_t xendev_show_modalias(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype);
}
static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
int xenbus_probe_node(struct xen_bus_type *bus,
const char *type,
const char *nodename)
{
char devname[XEN_BUS_ID_SIZE];
int err;
struct xenbus_device *xendev;
size_t stringlen;
char *tmpstring;
enum xenbus_state state = xenbus_read_driver_state(nodename);
if (state != XenbusStateInitialising) {
/* Device is not new, so ignore it. This can happen if a
device is going away after switching to Closed. */
return 0;
}
stringlen = strlen(nodename) + 1 + strlen(type) + 1;
xendev = kzalloc(sizeof(*xendev) + stringlen, GFP_KERNEL);
if (!xendev)
return -ENOMEM;
xendev->state = XenbusStateInitialising;
/* Copy the strings into the extra space. */
tmpstring = (char *)(xendev + 1);
strcpy(tmpstring, nodename);
xendev->nodename = tmpstring;
tmpstring += strlen(tmpstring) + 1;
strcpy(tmpstring, type);
xendev->devicetype = tmpstring;
init_completion(&xendev->down);
xendev->dev.bus = &bus->bus;
xendev->dev.release = xenbus_dev_release;
err = bus->get_bus_id(devname, xendev->nodename);
if (err)
goto fail;
dev_set_name(&xendev->dev, devname);
/* Register with generic device framework. */
err = device_register(&xendev->dev);
if (err)
goto fail;
err = device_create_file(&xendev->dev, &dev_attr_nodename);
if (err)
goto fail_unregister;
err = device_create_file(&xendev->dev, &dev_attr_devtype);
if (err)
goto fail_remove_nodename;
err = device_create_file(&xendev->dev, &dev_attr_modalias);
if (err)
goto fail_remove_devtype;
return 0;
fail_remove_devtype:
device_remove_file(&xendev->dev, &dev_attr_devtype);
fail_remove_nodename:
device_remove_file(&xendev->dev, &dev_attr_nodename);
fail_unregister:
device_unregister(&xendev->dev);
fail:
kfree(xendev);
return err;
}
/* device/<typename>/<name> */
static int xenbus_probe_frontend(const char *type, const char *name)
{
char *nodename;
int err;
nodename = kasprintf(GFP_KERNEL, "%s/%s/%s",
xenbus_frontend.root, type, name);
if (!nodename)
return -ENOMEM;
DPRINTK("%s", nodename);
err = xenbus_probe_node(&xenbus_frontend, type, nodename);
kfree(nodename);
return err;
}
static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
{
int err = 0;
char **dir;
unsigned int dir_n = 0;
int i;
dir = xenbus_directory(XBT_NIL, bus->root, type, &dir_n);
if (IS_ERR(dir))
return PTR_ERR(dir);
for (i = 0; i < dir_n; i++) {
err = bus->probe(type, dir[i]);
if (err)
break;
}
kfree(dir);
return err;
}
int xenbus_probe_devices(struct xen_bus_type *bus)
{
int err = 0;
char **dir;
unsigned int i, dir_n;
dir = xenbus_directory(XBT_NIL, bus->root, "", &dir_n);
if (IS_ERR(dir))
return PTR_ERR(dir);
for (i = 0; i < dir_n; i++) {
err = xenbus_probe_device_type(bus, dir[i]);
if (err)
break;
}
kfree(dir);
return err;
}
static unsigned int char_count(const char *str, char c)
{
unsigned int i, ret = 0;
for (i = 0; str[i]; i++)
if (str[i] == c)
ret++;
return ret;
}
static int strsep_len(const char *str, char c, unsigned int len)
{
unsigned int i;
for (i = 0; str[i]; i++)
if (str[i] == c) {
if (len == 0)
return i;
len--;
}
return (len == 0) ? i : -ERANGE;
}
void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
{
int exists, rootlen;
struct xenbus_device *dev;
char type[XEN_BUS_ID_SIZE];
const char *p, *root;
if (char_count(node, '/') < 2)
return;
exists = xenbus_exists(XBT_NIL, node, "");
if (!exists) {
xenbus_cleanup_devices(node, &bus->bus);
return;
}
/* backend/<type>/... or device/<type>/... */
p = strchr(node, '/') + 1;
snprintf(type, XEN_BUS_ID_SIZE, "%.*s", (int)strcspn(p, "/"), p);
type[XEN_BUS_ID_SIZE-1] = '\0';
rootlen = strsep_len(node, '/', bus->levels);
if (rootlen < 0)
return;
root = kasprintf(GFP_KERNEL, "%.*s", rootlen, node);
if (!root)
return;
dev = xenbus_device_find(root, &bus->bus);
if (!dev)
xenbus_probe_node(bus, type, root);
else
put_device(&dev->dev);
kfree(root);
}
EXPORT_SYMBOL_GPL(xenbus_dev_changed);
static void frontend_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
DPRINTK("");
xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
}
/* We watch for devices appearing and vanishing. */
static struct xenbus_watch fe_watch = {
.node = "device",
.callback = frontend_changed,
};
static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
{
int err = 0;
struct xenbus_driver *drv;
struct xenbus_device *xdev;
DPRINTK("");
if (dev->driver == NULL)
return 0;
drv = to_xenbus_driver(dev->driver);
xdev = container_of(dev, struct xenbus_device, dev);
if (drv->suspend)
err = drv->suspend(xdev, state);
if (err)
printk(KERN_WARNING
"xenbus: suspend %s failed: %i\n", dev_name(dev), err);
return 0;
}
static int xenbus_dev_resume(struct device *dev)
{
int err;
struct xenbus_driver *drv;
struct xenbus_device *xdev;
DPRINTK("");
if (dev->driver == NULL)
return 0;
drv = to_xenbus_driver(dev->driver);
xdev = container_of(dev, struct xenbus_device, dev);
err = talk_to_otherend(xdev);
if (err) {
printk(KERN_WARNING
"xenbus: resume (talk_to_otherend) %s failed: %i\n",
dev_name(dev), err);
return err;
}
xdev->state = XenbusStateInitialising;
if (drv->resume) {
err = drv->resume(xdev);
if (err) {
printk(KERN_WARNING
"xenbus: resume %s failed: %i\n",
dev_name(dev), err);
return err;
}
}
err = watch_otherend(xdev);
if (err) {
printk(KERN_WARNING
"xenbus_probe: resume (watch_otherend) %s failed: "
"%d.\n", dev_name(dev), err);
return err;
}
return 0;
}
/* A flag to determine if xenstored is 'ready' (i.e. has started) */
int xenstored_ready = 0;
int register_xenstore_notifier(struct notifier_block *nb)
{
int ret = 0;
if (xenstored_ready > 0)
ret = nb->notifier_call(nb, 0, NULL);
else
blocking_notifier_chain_register(&xenstore_chain, nb);
return ret;
}
EXPORT_SYMBOL_GPL(register_xenstore_notifier);
void unregister_xenstore_notifier(struct notifier_block *nb)
{
blocking_notifier_chain_unregister(&xenstore_chain, nb);
}
EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
void xenbus_probe(struct work_struct *unused)
{
BUG_ON((xenstored_ready <= 0));
/* Enumerate devices in xenstore and watch for changes. */
xenbus_probe_devices(&xenbus_frontend);
register_xenbus_watch(&fe_watch);
xenbus_backend_probe_and_watch();
/* Notify others that xenstore is up */
blocking_notifier_call_chain(&xenstore_chain, 0, NULL);
}
static int __init xenbus_probe_init(void)
{
int err = 0;
DPRINTK("");
err = -ENODEV;
if (!xen_domain())
goto out_error;
/* Register ourselves with the kernel bus subsystem */
err = bus_register(&xenbus_frontend.bus);
if (err)
goto out_error;
err = xenbus_backend_bus_register();
if (err)
goto out_unreg_front;
/*
* Domain0 doesn't have a store_evtchn or store_mfn yet.
*/
if (xen_initial_domain()) {
/* dom0 not yet supported */
} else {
xenstored_ready = 1;
xen_store_evtchn = xen_start_info->store_evtchn;
xen_store_mfn = xen_start_info->store_mfn;
}
xen_store_interface = mfn_to_virt(xen_store_mfn);
/* Initialize the interface to xenstore. */
err = xs_init();
if (err) {
printk(KERN_WARNING
"XENBUS: Error initializing xenstore comms: %i\n", err);
goto out_unreg_back;
}
if (!xen_initial_domain())
xenbus_probe(NULL);
#ifdef CONFIG_XEN_COMPAT_XENFS
/*
* Create xenfs mountpoint in /proc for compatibility with
* utilities that expect to find "xenbus" under "/proc/xen".
*/
proc_mkdir("xen", NULL);
#endif
return 0;
out_unreg_back:
xenbus_backend_bus_unregister();
out_unreg_front:
bus_unregister(&xenbus_frontend.bus);
out_error:
return err;
}
postcore_initcall(xenbus_probe_init);
MODULE_LICENSE("GPL");
static int is_device_connecting(struct device *dev, void *data)
{
struct xenbus_device *xendev = to_xenbus_device(dev);
struct device_driver *drv = data;
struct xenbus_driver *xendrv;
/*
* A device with no driver will never connect. We care only about
* devices which should currently be in the process of connecting.
*/
if (!dev->driver)
return 0;
/* Is this search limited to a particular driver? */
if (drv && (dev->driver != drv))
return 0;
xendrv = to_xenbus_driver(dev->driver);
return (xendev->state < XenbusStateConnected ||
(xendev->state == XenbusStateConnected &&
xendrv->is_ready && !xendrv->is_ready(xendev)));
}
static int exists_connecting_device(struct device_driver *drv)
{
return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
is_device_connecting);
}
static int print_device_status(struct device *dev, void *data)
{
struct xenbus_device *xendev = to_xenbus_device(dev);
struct device_driver *drv = data;
/* Is this operation limited to a particular driver? */
if (drv && (dev->driver != drv))
return 0;
if (!dev->driver) {
/* Information only: is this too noisy? */
printk(KERN_INFO "XENBUS: Device with no driver: %s\n",
xendev->nodename);
} else if (xendev->state < XenbusStateConnected) {
enum xenbus_state rstate = XenbusStateUnknown;
if (xendev->otherend)
rstate = xenbus_read_driver_state(xendev->otherend);
printk(KERN_WARNING "XENBUS: Timeout connecting "
"to device: %s (local state %d, remote state %d)\n",
xendev->nodename, xendev->state, rstate);
}
return 0;
}
/* We only wait for device setup after most initcalls have run. */
static int ready_to_wait_for_devices;
/*
* On a 5-minute timeout, wait for all devices currently configured. We need
* to do this to guarantee that the filesystems and / or network devices
* needed for boot are available, before we can allow the boot to proceed.
*
* This needs to be on a late_initcall, to happen after the frontend device
* drivers have been initialised, but before the root fs is mounted.
*
* A possible improvement here would be to have the tools add a per-device
* flag to the store entry, indicating whether it is needed at boot time.
* This would allow people who knew what they were doing to accelerate their
* boot slightly, but of course needs tools or manual intervention to set up
* those flags correctly.
*/
static void wait_for_devices(struct xenbus_driver *xendrv)
{
unsigned long start = jiffies;
struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
unsigned int seconds_waited = 0;
if (!ready_to_wait_for_devices || !xen_domain())
return;
while (exists_connecting_device(drv)) {
if (time_after(jiffies, start + (seconds_waited+5)*HZ)) {
if (!seconds_waited)
printk(KERN_WARNING "XENBUS: Waiting for "
"devices to initialise: ");
seconds_waited += 5;
printk("%us...", 300 - seconds_waited);
if (seconds_waited == 300)
break;
}
schedule_timeout_interruptible(HZ/10);
}
if (seconds_waited)
printk("\n");
bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
print_device_status);
}
#ifndef MODULE
static int __init boot_wait_for_devices(void)
{
ready_to_wait_for_devices = 1;
wait_for_devices(NULL);
return 0;
}
late_initcall(boot_wait_for_devices);
#endif

View File

@@ -0,0 +1,76 @@
/******************************************************************************
* xenbus_probe.h
*
* Talks to Xen Store to figure out what devices we have.
*
* Copyright (C) 2005 Rusty Russell, IBM Corporation
* Copyright (C) 2005 XenSource Ltd.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef _XENBUS_PROBE_H
#define _XENBUS_PROBE_H
#define XEN_BUS_ID_SIZE 20
#ifdef CONFIG_XEN_BACKEND
extern void xenbus_backend_suspend(int (*fn)(struct device *, void *));
extern void xenbus_backend_resume(int (*fn)(struct device *, void *));
extern void xenbus_backend_probe_and_watch(void);
extern int xenbus_backend_bus_register(void);
extern void xenbus_backend_bus_unregister(void);
#else
static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {}
static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {}
static inline void xenbus_backend_probe_and_watch(void) {}
static inline int xenbus_backend_bus_register(void) { return 0; }
static inline void xenbus_backend_bus_unregister(void) {}
#endif
struct xen_bus_type
{
char *root;
unsigned int levels;
int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename);
int (*probe)(const char *type, const char *dir);
struct bus_type bus;
};
extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
extern int xenbus_dev_probe(struct device *_dev);
extern int xenbus_dev_remove(struct device *_dev);
extern int xenbus_register_driver_common(struct xenbus_driver *drv,
struct xen_bus_type *bus,
struct module *owner,
const char *mod_name);
extern int xenbus_probe_node(struct xen_bus_type *bus,
const char *type,
const char *nodename);
extern int xenbus_probe_devices(struct xen_bus_type *bus);
extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
#endif

View File

@@ -0,0 +1,864 @@
/******************************************************************************
* xenbus_xs.c
*
* This is the kernel equivalent of the "xs" library. We don't need everything
* and we use xenbus_comms for communication.
*
* Copyright (C) 2005 Rusty Russell, IBM Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <linux/unistd.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/uio.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/fcntl.h>
#include <linux/kthread.h>
#include <linux/rwsem.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <xen/xenbus.h>
#include "xenbus_comms.h"
struct xs_stored_msg {
struct list_head list;
struct xsd_sockmsg hdr;
union {
/* Queued replies. */
struct {
char *body;
} reply;
/* Queued watch events. */
struct {
struct xenbus_watch *handle;
char **vec;
unsigned int vec_size;
} watch;
} u;
};
struct xs_handle {
/* A list of replies. Currently only one will ever be outstanding. */
struct list_head reply_list;
spinlock_t reply_lock;
wait_queue_head_t reply_waitq;
/*
* Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex.
* response_mutex is never taken simultaneously with the other three.
*/
/* One request at a time. */
struct mutex request_mutex;
/* Protect xenbus reader thread against save/restore. */
struct mutex response_mutex;
/* Protect transactions against save/restore. */
struct rw_semaphore transaction_mutex;
/* Protect watch (de)register against save/restore. */
struct rw_semaphore watch_mutex;
};
static struct xs_handle xs_state;
/* List of registered watches, and a lock to protect it. */
static LIST_HEAD(watches);
static DEFINE_SPINLOCK(watches_lock);
/* List of pending watch callback events, and a lock to protect it. */
static LIST_HEAD(watch_events);
static DEFINE_SPINLOCK(watch_events_lock);
/*
* Details of the xenwatch callback kernel thread. The thread waits on the
* watch_events_waitq for work to do (queued on watch_events list). When it
* wakes up it acquires the xenwatch_mutex before reading the list and
* carrying out work.
*/
static pid_t xenwatch_pid;
static DEFINE_MUTEX(xenwatch_mutex);
static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq);
static int get_error(const char *errorstring)
{
unsigned int i;
for (i = 0; strcmp(errorstring, xsd_errors[i].errstring) != 0; i++) {
if (i == ARRAY_SIZE(xsd_errors) - 1) {
printk(KERN_WARNING
"XENBUS xen store gave: unknown error %s",
errorstring);
return EINVAL;
}
}
return xsd_errors[i].errnum;
}
static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
{
struct xs_stored_msg *msg;
char *body;
spin_lock(&xs_state.reply_lock);
while (list_empty(&xs_state.reply_list)) {
spin_unlock(&xs_state.reply_lock);
/* XXX FIXME: Avoid synchronous wait for response here. */
wait_event(xs_state.reply_waitq,
!list_empty(&xs_state.reply_list));
spin_lock(&xs_state.reply_lock);
}
msg = list_entry(xs_state.reply_list.next,
struct xs_stored_msg, list);
list_del(&msg->list);
spin_unlock(&xs_state.reply_lock);
*type = msg->hdr.type;
if (len)
*len = msg->hdr.len;
body = msg->u.reply.body;
kfree(msg);
return body;
}
void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
{
void *ret;
struct xsd_sockmsg req_msg = *msg;
int err;
if (req_msg.type == XS_TRANSACTION_START)
down_read(&xs_state.transaction_mutex);
mutex_lock(&xs_state.request_mutex);
err = xb_write(msg, sizeof(*msg) + msg->len);
if (err) {
msg->type = XS_ERROR;
ret = ERR_PTR(err);
} else
ret = read_reply(&msg->type, &msg->len);
mutex_unlock(&xs_state.request_mutex);
if ((msg->type == XS_TRANSACTION_END) ||
((req_msg.type == XS_TRANSACTION_START) &&
(msg->type == XS_ERROR)))
up_read(&xs_state.transaction_mutex);
return ret;
}
EXPORT_SYMBOL(xenbus_dev_request_and_reply);
/* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */
static void *xs_talkv(struct xenbus_transaction t,
enum xsd_sockmsg_type type,
const struct kvec *iovec,
unsigned int num_vecs,
unsigned int *len)
{
struct xsd_sockmsg msg;
void *ret = NULL;
unsigned int i;
int err;
msg.tx_id = t.id;
msg.req_id = 0;
msg.type = type;
msg.len = 0;
for (i = 0; i < num_vecs; i++)
msg.len += iovec[i].iov_len;
mutex_lock(&xs_state.request_mutex);
err = xb_write(&msg, sizeof(msg));
if (err) {
mutex_unlock(&xs_state.request_mutex);
return ERR_PTR(err);
}
for (i = 0; i < num_vecs; i++) {
err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
if (err) {
mutex_unlock(&xs_state.request_mutex);
return ERR_PTR(err);
}
}
ret = read_reply(&msg.type, len);
mutex_unlock(&xs_state.request_mutex);
if (IS_ERR(ret))
return ret;
if (msg.type == XS_ERROR) {
err = get_error(ret);
kfree(ret);
return ERR_PTR(-err);
}
if (msg.type != type) {
if (printk_ratelimit())
printk(KERN_WARNING
"XENBUS unexpected type [%d], expected [%d]\n",
msg.type, type);
kfree(ret);
return ERR_PTR(-EINVAL);
}
return ret;
}
/* Simplified version of xs_talkv: single message. */
static void *xs_single(struct xenbus_transaction t,
enum xsd_sockmsg_type type,
const char *string,
unsigned int *len)
{
struct kvec iovec;
iovec.iov_base = (void *)string;
iovec.iov_len = strlen(string) + 1;
return xs_talkv(t, type, &iovec, 1, len);
}
/* Many commands only need an ack, don't care what it says. */
static int xs_error(char *reply)
{
if (IS_ERR(reply))
return PTR_ERR(reply);
kfree(reply);
return 0;
}
static unsigned int count_strings(const char *strings, unsigned int len)
{
unsigned int num;
const char *p;
for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
num++;
return num;
}
/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */
static char *join(const char *dir, const char *name)
{
char *buffer;
if (strlen(name) == 0)
buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s", dir);
else
buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/%s", dir, name);
return (!buffer) ? ERR_PTR(-ENOMEM) : buffer;
}
static char **split(char *strings, unsigned int len, unsigned int *num)
{
char *p, **ret;
/* Count the strings. */
*num = count_strings(strings, len);
/* Transfer to one big alloc for easy freeing. */
ret = kmalloc(*num * sizeof(char *) + len, GFP_NOIO | __GFP_HIGH);
if (!ret) {
kfree(strings);
return ERR_PTR(-ENOMEM);
}
memcpy(&ret[*num], strings, len);
kfree(strings);
strings = (char *)&ret[*num];
for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
ret[(*num)++] = p;
return ret;
}
char **xenbus_directory(struct xenbus_transaction t,
const char *dir, const char *node, unsigned int *num)
{
char *strings, *path;
unsigned int len;
path = join(dir, node);
if (IS_ERR(path))
return (char **)path;
strings = xs_single(t, XS_DIRECTORY, path, &len);
kfree(path);
if (IS_ERR(strings))
return (char **)strings;
return split(strings, len, num);
}
EXPORT_SYMBOL_GPL(xenbus_directory);
/* Check if a path exists. Return 1 if it does. */
int xenbus_exists(struct xenbus_transaction t,
const char *dir, const char *node)
{
char **d;
int dir_n;
d = xenbus_directory(t, dir, node, &dir_n);
if (IS_ERR(d))
return 0;
kfree(d);
return 1;
}
EXPORT_SYMBOL_GPL(xenbus_exists);
/* Get the value of a single file.
* Returns a kmalloced value: call free() on it after use.
* len indicates length in bytes.
*/
void *xenbus_read(struct xenbus_transaction t,
const char *dir, const char *node, unsigned int *len)
{
char *path;
void *ret;
path = join(dir, node);
if (IS_ERR(path))
return (void *)path;
ret = xs_single(t, XS_READ, path, len);
kfree(path);
return ret;
}
EXPORT_SYMBOL_GPL(xenbus_read);
/* Write the value of a single file.
* Returns -err on failure.
*/
int xenbus_write(struct xenbus_transaction t,
const char *dir, const char *node, const char *string)
{
const char *path;
struct kvec iovec[2];
int ret;
path = join(dir, node);
if (IS_ERR(path))
return PTR_ERR(path);
iovec[0].iov_base = (void *)path;
iovec[0].iov_len = strlen(path) + 1;
iovec[1].iov_base = (void *)string;
iovec[1].iov_len = strlen(string);
ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
kfree(path);
return ret;
}
EXPORT_SYMBOL_GPL(xenbus_write);
/* Create a new directory. */
int xenbus_mkdir(struct xenbus_transaction t,
const char *dir, const char *node)
{
char *path;
int ret;
path = join(dir, node);
if (IS_ERR(path))
return PTR_ERR(path);
ret = xs_error(xs_single(t, XS_MKDIR, path, NULL));
kfree(path);
return ret;
}
EXPORT_SYMBOL_GPL(xenbus_mkdir);
/* Destroy a file or directory (directories must be empty). */
int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node)
{
char *path;
int ret;
path = join(dir, node);
if (IS_ERR(path))
return PTR_ERR(path);
ret = xs_error(xs_single(t, XS_RM, path, NULL));
kfree(path);
return ret;
}
EXPORT_SYMBOL_GPL(xenbus_rm);
/* Start a transaction: changes by others will not be seen during this
* transaction, and changes will not be visible to others until end.
*/
int xenbus_transaction_start(struct xenbus_transaction *t)
{
char *id_str;
down_read(&xs_state.transaction_mutex);
id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL);
if (IS_ERR(id_str)) {
up_read(&xs_state.transaction_mutex);
return PTR_ERR(id_str);
}
t->id = simple_strtoul(id_str, NULL, 0);
kfree(id_str);
return 0;
}
EXPORT_SYMBOL_GPL(xenbus_transaction_start);
/* End a transaction.
* If abandon is true, transaction is discarded instead of committed.
*/
int xenbus_transaction_end(struct xenbus_transaction t, int abort)
{
char abortstr[2];
int err;
if (abort)
strcpy(abortstr, "F");
else
strcpy(abortstr, "T");
err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));
up_read(&xs_state.transaction_mutex);
return err;
}
EXPORT_SYMBOL_GPL(xenbus_transaction_end);
/* Single read and scanf: returns -errno or num scanned. */
int xenbus_scanf(struct xenbus_transaction t,
const char *dir, const char *node, const char *fmt, ...)
{
va_list ap;
int ret;
char *val;
val = xenbus_read(t, dir, node, NULL);
if (IS_ERR(val))
return PTR_ERR(val);
va_start(ap, fmt);
ret = vsscanf(val, fmt, ap);
va_end(ap);
kfree(val);
/* Distinctive errno. */
if (ret == 0)
return -ERANGE;
return ret;
}
EXPORT_SYMBOL_GPL(xenbus_scanf);
/* Single printf and write: returns -errno or 0. */
int xenbus_printf(struct xenbus_transaction t,
const char *dir, const char *node, const char *fmt, ...)
{
va_list ap;
int ret;
#define PRINTF_BUFFER_SIZE 4096
char *printf_buffer;
printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_NOIO | __GFP_HIGH);
if (printf_buffer == NULL)
return -ENOMEM;
va_start(ap, fmt);
ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
va_end(ap);
BUG_ON(ret > PRINTF_BUFFER_SIZE-1);
ret = xenbus_write(t, dir, node, printf_buffer);
kfree(printf_buffer);
return ret;
}
EXPORT_SYMBOL_GPL(xenbus_printf);
/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
int xenbus_gather(struct xenbus_transaction t, const char *dir, ...)
{
va_list ap;
const char *name;
int ret = 0;
va_start(ap, dir);
while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
const char *fmt = va_arg(ap, char *);
void *result = va_arg(ap, void *);
char *p;
p = xenbus_read(t, dir, name, NULL);
if (IS_ERR(p)) {
ret = PTR_ERR(p);
break;
}
if (fmt) {
if (sscanf(p, fmt, result) == 0)
ret = -EINVAL;
kfree(p);
} else
*(char **)result = p;
}
va_end(ap);
return ret;
}
EXPORT_SYMBOL_GPL(xenbus_gather);
static int xs_watch(const char *path, const char *token)
{
struct kvec iov[2];
iov[0].iov_base = (void *)path;
iov[0].iov_len = strlen(path) + 1;
iov[1].iov_base = (void *)token;
iov[1].iov_len = strlen(token) + 1;
return xs_error(xs_talkv(XBT_NIL, XS_WATCH, iov,
ARRAY_SIZE(iov), NULL));
}
static int xs_unwatch(const char *path, const char *token)
{
struct kvec iov[2];
iov[0].iov_base = (char *)path;
iov[0].iov_len = strlen(path) + 1;
iov[1].iov_base = (char *)token;
iov[1].iov_len = strlen(token) + 1;
return xs_error(xs_talkv(XBT_NIL, XS_UNWATCH, iov,
ARRAY_SIZE(iov), NULL));
}
static struct xenbus_watch *find_watch(const char *token)
{
struct xenbus_watch *i, *cmp;
cmp = (void *)simple_strtoul(token, NULL, 16);
list_for_each_entry(i, &watches, list)
if (i == cmp)
return i;
return NULL;
}
/* Register callback to watch this node. */
int register_xenbus_watch(struct xenbus_watch *watch)
{
/* Pointer in ascii is the token. */
char token[sizeof(watch) * 2 + 1];
int err;
sprintf(token, "%lX", (long)watch);
down_read(&xs_state.watch_mutex);
spin_lock(&watches_lock);
BUG_ON(find_watch(token));
list_add(&watch->list, &watches);
spin_unlock(&watches_lock);
err = xs_watch(watch->node, token);
/* Ignore errors due to multiple registration. */
if ((err != 0) && (err != -EEXIST)) {
spin_lock(&watches_lock);
list_del(&watch->list);
spin_unlock(&watches_lock);
}
up_read(&xs_state.watch_mutex);
return err;
}
EXPORT_SYMBOL_GPL(register_xenbus_watch);
void unregister_xenbus_watch(struct xenbus_watch *watch)
{
struct xs_stored_msg *msg, *tmp;
char token[sizeof(watch) * 2 + 1];
int err;
sprintf(token, "%lX", (long)watch);
down_read(&xs_state.watch_mutex);
spin_lock(&watches_lock);
BUG_ON(!find_watch(token));
list_del(&watch->list);
spin_unlock(&watches_lock);
err = xs_unwatch(watch->node, token);
if (err)
printk(KERN_WARNING
"XENBUS Failed to release watch %s: %i\n",
watch->node, err);
up_read(&xs_state.watch_mutex);
/* Make sure there are no callbacks running currently (unless
its us) */
if (current->pid != xenwatch_pid)
mutex_lock(&xenwatch_mutex);
/* Cancel pending watch events. */
spin_lock(&watch_events_lock);
list_for_each_entry_safe(msg, tmp, &watch_events, list) {
if (msg->u.watch.handle != watch)
continue;
list_del(&msg->list);
kfree(msg->u.watch.vec);
kfree(msg);
}
spin_unlock(&watch_events_lock);
if (current->pid != xenwatch_pid)
mutex_unlock(&xenwatch_mutex);
}
EXPORT_SYMBOL_GPL(unregister_xenbus_watch);
void xs_suspend(void)
{
down_write(&xs_state.transaction_mutex);
down_write(&xs_state.watch_mutex);
mutex_lock(&xs_state.request_mutex);
mutex_lock(&xs_state.response_mutex);
}
void xs_resume(void)
{
struct xenbus_watch *watch;
char token[sizeof(watch) * 2 + 1];
xb_init_comms();
mutex_unlock(&xs_state.response_mutex);
mutex_unlock(&xs_state.request_mutex);
up_write(&xs_state.transaction_mutex);
/* No need for watches_lock: the watch_mutex is sufficient. */
list_for_each_entry(watch, &watches, list) {
sprintf(token, "%lX", (long)watch);
xs_watch(watch->node, token);
}
up_write(&xs_state.watch_mutex);
}
void xs_suspend_cancel(void)
{
mutex_unlock(&xs_state.response_mutex);
mutex_unlock(&xs_state.request_mutex);
up_write(&xs_state.watch_mutex);
up_write(&xs_state.transaction_mutex);
}
static int xenwatch_thread(void *unused)
{
struct list_head *ent;
struct xs_stored_msg *msg;
for (;;) {
wait_event_interruptible(watch_events_waitq,
!list_empty(&watch_events));
if (kthread_should_stop())
break;
mutex_lock(&xenwatch_mutex);
spin_lock(&watch_events_lock);
ent = watch_events.next;
if (ent != &watch_events)
list_del(ent);
spin_unlock(&watch_events_lock);
if (ent != &watch_events) {
msg = list_entry(ent, struct xs_stored_msg, list);
msg->u.watch.handle->callback(
msg->u.watch.handle,
(const char **)msg->u.watch.vec,
msg->u.watch.vec_size);
kfree(msg->u.watch.vec);
kfree(msg);
}
mutex_unlock(&xenwatch_mutex);
}
return 0;
}
static int process_msg(void)
{
struct xs_stored_msg *msg;
char *body;
int err;
/*
* We must disallow save/restore while reading a xenstore message.
* A partial read across s/r leaves us out of sync with xenstored.
*/
for (;;) {
err = xb_wait_for_data_to_read();
if (err)
return err;
mutex_lock(&xs_state.response_mutex);
if (xb_data_to_read())
break;
/* We raced with save/restore: pending data 'disappeared'. */
mutex_unlock(&xs_state.response_mutex);
}
msg = kmalloc(sizeof(*msg), GFP_NOIO | __GFP_HIGH);
if (msg == NULL) {
err = -ENOMEM;
goto out;
}
err = xb_read(&msg->hdr, sizeof(msg->hdr));
if (err) {
kfree(msg);
goto out;
}
body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH);
if (body == NULL) {
kfree(msg);
err = -ENOMEM;
goto out;
}
err = xb_read(body, msg->hdr.len);
if (err) {
kfree(body);
kfree(msg);
goto out;
}
body[msg->hdr.len] = '\0';
if (msg->hdr.type == XS_WATCH_EVENT) {
msg->u.watch.vec = split(body, msg->hdr.len,
&msg->u.watch.vec_size);
if (IS_ERR(msg->u.watch.vec)) {
err = PTR_ERR(msg->u.watch.vec);
kfree(msg);
goto out;
}
spin_lock(&watches_lock);
msg->u.watch.handle = find_watch(
msg->u.watch.vec[XS_WATCH_TOKEN]);
if (msg->u.watch.handle != NULL) {
spin_lock(&watch_events_lock);
list_add_tail(&msg->list, &watch_events);
wake_up(&watch_events_waitq);
spin_unlock(&watch_events_lock);
} else {
kfree(msg->u.watch.vec);
kfree(msg);
}
spin_unlock(&watches_lock);
} else {
msg->u.reply.body = body;
spin_lock(&xs_state.reply_lock);
list_add_tail(&msg->list, &xs_state.reply_list);
spin_unlock(&xs_state.reply_lock);
wake_up(&xs_state.reply_waitq);
}
out:
mutex_unlock(&xs_state.response_mutex);
return err;
}
static int xenbus_thread(void *unused)
{
int err;
for (;;) {
err = process_msg();
if (err)
printk(KERN_WARNING "XENBUS error %d while reading "
"message\n", err);
if (kthread_should_stop())
break;
}
return 0;
}
int xs_init(void)
{
int err;
struct task_struct *task;
INIT_LIST_HEAD(&xs_state.reply_list);
spin_lock_init(&xs_state.reply_lock);
init_waitqueue_head(&xs_state.reply_waitq);
mutex_init(&xs_state.request_mutex);
mutex_init(&xs_state.response_mutex);
init_rwsem(&xs_state.transaction_mutex);
init_rwsem(&xs_state.watch_mutex);
/* Initialize the shared memory rings to talk to xenstored */
err = xb_init_comms();
if (err)
return err;
task = kthread_run(xenwatch_thread, NULL, "xenwatch");
if (IS_ERR(task))
return PTR_ERR(task);
xenwatch_pid = task->pid;
task = kthread_run(xenbus_thread, NULL, "xenbus");
if (IS_ERR(task))
return PTR_ERR(task);
return 0;
}

View File

@@ -0,0 +1,217 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Copyright (C) IBM Corp. 2006
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
#include <linux/gfp.h>
#include <linux/mm.h>
#include <asm/page.h>
#include <xen/xencomm.h>
#include <xen/interface/xen.h>
#include <asm/xen/xencomm.h> /* for xencomm_is_phys_contiguous() */
static int xencomm_init(struct xencomm_desc *desc,
void *buffer, unsigned long bytes)
{
unsigned long recorded = 0;
int i = 0;
while ((recorded < bytes) && (i < desc->nr_addrs)) {
unsigned long vaddr = (unsigned long)buffer + recorded;
unsigned long paddr;
int offset;
int chunksz;
offset = vaddr % PAGE_SIZE; /* handle partial pages */
chunksz = min(PAGE_SIZE - offset, bytes - recorded);
paddr = xencomm_vtop(vaddr);
if (paddr == ~0UL) {
printk(KERN_DEBUG "%s: couldn't translate vaddr %lx\n",
__func__, vaddr);
return -EINVAL;
}
desc->address[i++] = paddr;
recorded += chunksz;
}
if (recorded < bytes) {
printk(KERN_DEBUG
"%s: could only translate %ld of %ld bytes\n",
__func__, recorded, bytes);
return -ENOSPC;
}
/* mark remaining addresses invalid (just for safety) */
while (i < desc->nr_addrs)
desc->address[i++] = XENCOMM_INVALID;
desc->magic = XENCOMM_MAGIC;
return 0;
}
static struct xencomm_desc *xencomm_alloc(gfp_t gfp_mask,
void *buffer, unsigned long bytes)
{
struct xencomm_desc *desc;
unsigned long buffer_ulong = (unsigned long)buffer;
unsigned long start = buffer_ulong & PAGE_MASK;
unsigned long end = (buffer_ulong + bytes) | ~PAGE_MASK;
unsigned long nr_addrs = (end - start + 1) >> PAGE_SHIFT;
unsigned long size = sizeof(*desc) +
sizeof(desc->address[0]) * nr_addrs;
/*
* slab allocator returns at least sizeof(void*) aligned pointer.
* When sizeof(*desc) > sizeof(void*), struct xencomm_desc might
* cross page boundary.
*/
if (sizeof(*desc) > sizeof(void *)) {
unsigned long order = get_order(size);
desc = (struct xencomm_desc *)__get_free_pages(gfp_mask,
order);
if (desc == NULL)
return NULL;
desc->nr_addrs =
((PAGE_SIZE << order) - sizeof(struct xencomm_desc)) /
sizeof(*desc->address);
} else {
desc = kmalloc(size, gfp_mask);
if (desc == NULL)
return NULL;
desc->nr_addrs = nr_addrs;
}
return desc;
}
void xencomm_free(struct xencomm_handle *desc)
{
if (desc && !((ulong)desc & XENCOMM_INLINE_FLAG)) {
struct xencomm_desc *desc__ = (struct xencomm_desc *)desc;
if (sizeof(*desc__) > sizeof(void *)) {
unsigned long size = sizeof(*desc__) +
sizeof(desc__->address[0]) * desc__->nr_addrs;
unsigned long order = get_order(size);
free_pages((unsigned long)__va(desc), order);
} else
kfree(__va(desc));
}
}
static int xencomm_create(void *buffer, unsigned long bytes,
struct xencomm_desc **ret, gfp_t gfp_mask)
{
struct xencomm_desc *desc;
int rc;
pr_debug("%s: %p[%ld]\n", __func__, buffer, bytes);
if (bytes == 0) {
/* don't create a descriptor; Xen recognizes NULL. */
BUG_ON(buffer != NULL);
*ret = NULL;
return 0;
}
BUG_ON(buffer == NULL); /* 'bytes' is non-zero */
desc = xencomm_alloc(gfp_mask, buffer, bytes);
if (!desc) {
printk(KERN_DEBUG "%s failure\n", "xencomm_alloc");
return -ENOMEM;
}
rc = xencomm_init(desc, buffer, bytes);
if (rc) {
printk(KERN_DEBUG "%s failure: %d\n", "xencomm_init", rc);
xencomm_free((struct xencomm_handle *)__pa(desc));
return rc;
}
*ret = desc;
return 0;
}
static struct xencomm_handle *xencomm_create_inline(void *ptr)
{
unsigned long paddr;
BUG_ON(!xencomm_is_phys_contiguous((unsigned long)ptr));
paddr = (unsigned long)xencomm_pa(ptr);
BUG_ON(paddr & XENCOMM_INLINE_FLAG);
return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG);
}
/* "mini" routine, for stack-based communications: */
static int xencomm_create_mini(void *buffer,
unsigned long bytes, struct xencomm_mini *xc_desc,
struct xencomm_desc **ret)
{
int rc = 0;
struct xencomm_desc *desc;
BUG_ON(((unsigned long)xc_desc) % sizeof(*xc_desc) != 0);
desc = (void *)xc_desc;
desc->nr_addrs = XENCOMM_MINI_ADDRS;
rc = xencomm_init(desc, buffer, bytes);
if (!rc)
*ret = desc;
return rc;
}
struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes)
{
int rc;
struct xencomm_desc *desc;
if (xencomm_is_phys_contiguous((unsigned long)ptr))
return xencomm_create_inline(ptr);
rc = xencomm_create(ptr, bytes, &desc, GFP_KERNEL);
if (rc || desc == NULL)
return NULL;
return xencomm_pa(desc);
}
struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, unsigned long bytes,
struct xencomm_mini *xc_desc)
{
int rc;
struct xencomm_desc *desc = NULL;
if (xencomm_is_phys_contiguous((unsigned long)ptr))
return xencomm_create_inline(ptr);
rc = xencomm_create_mini(ptr, bytes, xc_desc,
&desc);
if (rc)
return NULL;
return xencomm_pa(desc);
}

View File

@@ -0,0 +1,3 @@
obj-$(CONFIG_XENFS) += xenfs.o
xenfs-objs = super.o xenbus.o

View File

@@ -0,0 +1,81 @@
/*
* xenfs.c - a filesystem for passing info between the a domain and
* the hypervisor.
*
* 2008-10-07 Alex Zeffertt Replaced /proc/xen/xenbus with xenfs filesystem
* and /proc/xen compatibility mount point.
* Turned xenfs into a loadable module.
*/
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/magic.h>
#include "xenfs.h"
#include <asm/xen/hypervisor.h>
MODULE_DESCRIPTION("Xen filesystem");
MODULE_LICENSE("GPL");
static ssize_t capabilities_read(struct file *file, char __user *buf,
size_t size, loff_t *off)
{
char *tmp = "";
if (xen_initial_domain())
tmp = "control_d\n";
return simple_read_from_buffer(buf, size, off, tmp, strlen(tmp));
}
static const struct file_operations capabilities_file_ops = {
.read = capabilities_read,
};
static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
{
static struct tree_descr xenfs_files[] = {
[1] = {},
{ "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR },
{ "capabilities", &capabilities_file_ops, S_IRUGO },
{""},
};
return simple_fill_super(sb, XENFS_SUPER_MAGIC, xenfs_files);
}
static int xenfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name,
void *data, struct vfsmount *mnt)
{
return get_sb_single(fs_type, flags, data, xenfs_fill_super, mnt);
}
static struct file_system_type xenfs_type = {
.owner = THIS_MODULE,
.name = "xenfs",
.get_sb = xenfs_get_sb,
.kill_sb = kill_litter_super,
};
static int __init xenfs_init(void)
{
if (xen_pv_domain())
return register_filesystem(&xenfs_type);
printk(KERN_INFO "XENFS: not registering filesystem on non-xen platform\n");
return 0;
}
static void __exit xenfs_exit(void)
{
if (xen_pv_domain())
unregister_filesystem(&xenfs_type);
}
module_init(xenfs_init);
module_exit(xenfs_exit);

View File

@@ -0,0 +1,593 @@
/*
* Driver giving user-space access to the kernel's xenbus connection
* to xenstore.
*
* Copyright (c) 2005, Christian Limpach
* Copyright (c) 2005, Rusty Russell, IBM Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Changes:
* 2008-10-07 Alex Zeffertt Replaced /proc/xen/xenbus with xenfs filesystem
* and /proc/xen compatibility mount point.
* Turned xenfs into a loadable module.
*/
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/uio.h>
#include <linux/notifier.h>
#include <linux/wait.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/uaccess.h>
#include <linux/init.h>
#include <linux/namei.h>
#include <linux/string.h>
#include "xenfs.h"
#include "../xenbus/xenbus_comms.h"
#include <xen/xenbus.h>
#include <asm/xen/hypervisor.h>
/*
* An element of a list of outstanding transactions, for which we're
* still waiting a reply.
*/
struct xenbus_transaction_holder {
struct list_head list;
struct xenbus_transaction handle;
};
/*
* A buffer of data on the queue.
*/
struct read_buffer {
struct list_head list;
unsigned int cons;
unsigned int len;
char msg[];
};
struct xenbus_file_priv {
/*
* msgbuffer_mutex is held while partial requests are built up
* and complete requests are acted on. It therefore protects
* the "transactions" and "watches" lists, and the partial
* request length and buffer.
*
* reply_mutex protects the reply being built up to return to
* usermode. It nests inside msgbuffer_mutex but may be held
* alone during a watch callback.
*/
struct mutex msgbuffer_mutex;
/* In-progress transactions */
struct list_head transactions;
/* Active watches. */
struct list_head watches;
/* Partial request. */
unsigned int len;
union {
struct xsd_sockmsg msg;
char buffer[PAGE_SIZE];
} u;
/* Response queue. */
struct mutex reply_mutex;
struct list_head read_buffers;
wait_queue_head_t read_waitq;
};
/* Read out any raw xenbus messages queued up. */
static ssize_t xenbus_file_read(struct file *filp,
char __user *ubuf,
size_t len, loff_t *ppos)
{
struct xenbus_file_priv *u = filp->private_data;
struct read_buffer *rb;
unsigned i;
int ret;
mutex_lock(&u->reply_mutex);
while (list_empty(&u->read_buffers)) {
mutex_unlock(&u->reply_mutex);
ret = wait_event_interruptible(u->read_waitq,
!list_empty(&u->read_buffers));
if (ret)
return ret;
mutex_lock(&u->reply_mutex);
}
rb = list_entry(u->read_buffers.next, struct read_buffer, list);
i = 0;
while (i < len) {
unsigned sz = min((unsigned)len - i, rb->len - rb->cons);
ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz);
i += sz - ret;
rb->cons += sz - ret;
if (ret != sz) {
if (i == 0)
i = -EFAULT;
goto out;
}
/* Clear out buffer if it has been consumed */
if (rb->cons == rb->len) {
list_del(&rb->list);
kfree(rb);
if (list_empty(&u->read_buffers))
break;
rb = list_entry(u->read_buffers.next,
struct read_buffer, list);
}
}
out:
mutex_unlock(&u->reply_mutex);
return i;
}
/*
* Add a buffer to the queue. Caller must hold the appropriate lock
* if the queue is not local. (Commonly the caller will build up
* multiple queued buffers on a temporary local list, and then add it
* to the appropriate list under lock once all the buffers have een
* successfully allocated.)
*/
static int queue_reply(struct list_head *queue, const void *data, size_t len)
{
struct read_buffer *rb;
if (len == 0)
return 0;
rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL);
if (rb == NULL)
return -ENOMEM;
rb->cons = 0;
rb->len = len;
memcpy(rb->msg, data, len);
list_add_tail(&rb->list, queue);
return 0;
}
/*
* Free all the read_buffer s on a list.
* Caller must have sole reference to list.
*/
static void queue_cleanup(struct list_head *list)
{
struct read_buffer *rb;
while (!list_empty(list)) {
rb = list_entry(list->next, struct read_buffer, list);
list_del(list->next);
kfree(rb);
}
}
struct watch_adapter {
struct list_head list;
struct xenbus_watch watch;
struct xenbus_file_priv *dev_data;
char *token;
};
static void free_watch_adapter(struct watch_adapter *watch)
{
kfree(watch->watch.node);
kfree(watch->token);
kfree(watch);
}
static struct watch_adapter *alloc_watch_adapter(const char *path,
const char *token)
{
struct watch_adapter *watch;
watch = kzalloc(sizeof(*watch), GFP_KERNEL);
if (watch == NULL)
goto out_fail;
watch->watch.node = kstrdup(path, GFP_KERNEL);
if (watch->watch.node == NULL)
goto out_free;
watch->token = kstrdup(token, GFP_KERNEL);
if (watch->token == NULL)
goto out_free;
return watch;
out_free:
free_watch_adapter(watch);
out_fail:
return NULL;
}
static void watch_fired(struct xenbus_watch *watch,
const char **vec,
unsigned int len)
{
struct watch_adapter *adap;
struct xsd_sockmsg hdr;
const char *path, *token;
int path_len, tok_len, body_len, data_len = 0;
int ret;
LIST_HEAD(staging_q);
adap = container_of(watch, struct watch_adapter, watch);
path = vec[XS_WATCH_PATH];
token = adap->token;
path_len = strlen(path) + 1;
tok_len = strlen(token) + 1;
if (len > 2)
data_len = vec[len] - vec[2] + 1;
body_len = path_len + tok_len + data_len;
hdr.type = XS_WATCH_EVENT;
hdr.len = body_len;
mutex_lock(&adap->dev_data->reply_mutex);
ret = queue_reply(&staging_q, &hdr, sizeof(hdr));
if (!ret)
ret = queue_reply(&staging_q, path, path_len);
if (!ret)
ret = queue_reply(&staging_q, token, tok_len);
if (!ret && len > 2)
ret = queue_reply(&staging_q, vec[2], data_len);
if (!ret) {
/* success: pass reply list onto watcher */
list_splice_tail(&staging_q, &adap->dev_data->read_buffers);
wake_up(&adap->dev_data->read_waitq);
} else
queue_cleanup(&staging_q);
mutex_unlock(&adap->dev_data->reply_mutex);
}
static int xenbus_write_transaction(unsigned msg_type,
struct xenbus_file_priv *u)
{
int rc;
void *reply;
struct xenbus_transaction_holder *trans = NULL;
LIST_HEAD(staging_q);
if (msg_type == XS_TRANSACTION_START) {
trans = kmalloc(sizeof(*trans), GFP_KERNEL);
if (!trans) {
rc = -ENOMEM;
goto out;
}
}
reply = xenbus_dev_request_and_reply(&u->u.msg);
if (IS_ERR(reply)) {
kfree(trans);
rc = PTR_ERR(reply);
goto out;
}
if (msg_type == XS_TRANSACTION_START) {
trans->handle.id = simple_strtoul(reply, NULL, 0);
list_add(&trans->list, &u->transactions);
} else if (msg_type == XS_TRANSACTION_END) {
list_for_each_entry(trans, &u->transactions, list)
if (trans->handle.id == u->u.msg.tx_id)
break;
BUG_ON(&trans->list == &u->transactions);
list_del(&trans->list);
kfree(trans);
}
mutex_lock(&u->reply_mutex);
rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg));
if (!rc)
rc = queue_reply(&staging_q, reply, u->u.msg.len);
if (!rc) {
list_splice_tail(&staging_q, &u->read_buffers);
wake_up(&u->read_waitq);
} else {
queue_cleanup(&staging_q);
}
mutex_unlock(&u->reply_mutex);
kfree(reply);
out:
return rc;
}
static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u)
{
struct watch_adapter *watch, *tmp_watch;
char *path, *token;
int err, rc;
LIST_HEAD(staging_q);
path = u->u.buffer + sizeof(u->u.msg);
token = memchr(path, 0, u->u.msg.len);
if (token == NULL) {
rc = -EILSEQ;
goto out;
}
token++;
if (msg_type == XS_WATCH) {
watch = alloc_watch_adapter(path, token);
if (watch == NULL) {
rc = -ENOMEM;
goto out;
}
watch->watch.callback = watch_fired;
watch->dev_data = u;
err = register_xenbus_watch(&watch->watch);
if (err) {
free_watch_adapter(watch);
rc = err;
goto out;
}
list_add(&watch->list, &u->watches);
} else {
list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
if (!strcmp(watch->token, token) &&
!strcmp(watch->watch.node, path)) {
unregister_xenbus_watch(&watch->watch);
list_del(&watch->list);
free_watch_adapter(watch);
break;
}
}
}
/* Success. Synthesize a reply to say all is OK. */
{
struct {
struct xsd_sockmsg hdr;
char body[3];
} __packed reply = {
{
.type = msg_type,
.len = sizeof(reply.body)
},
"OK"
};
mutex_lock(&u->reply_mutex);
rc = queue_reply(&u->read_buffers, &reply, sizeof(reply));
mutex_unlock(&u->reply_mutex);
}
out:
return rc;
}
static ssize_t xenbus_file_write(struct file *filp,
const char __user *ubuf,
size_t len, loff_t *ppos)
{
struct xenbus_file_priv *u = filp->private_data;
uint32_t msg_type;
int rc = len;
int ret;
LIST_HEAD(staging_q);
/*
* We're expecting usermode to be writing properly formed
* xenbus messages. If they write an incomplete message we
* buffer it up. Once it is complete, we act on it.
*/
/*
* Make sure concurrent writers can't stomp all over each
* other's messages and make a mess of our partial message
* buffer. We don't make any attemppt to stop multiple
* writers from making a mess of each other's incomplete
* messages; we're just trying to guarantee our own internal
* consistency and make sure that single writes are handled
* atomically.
*/
mutex_lock(&u->msgbuffer_mutex);
/* Get this out of the way early to avoid confusion */
if (len == 0)
goto out;
/* Can't write a xenbus message larger we can buffer */
if ((len + u->len) > sizeof(u->u.buffer)) {
/* On error, dump existing buffer */
u->len = 0;
rc = -EINVAL;
goto out;
}
ret = copy_from_user(u->u.buffer + u->len, ubuf, len);
if (ret == len) {
rc = -EFAULT;
goto out;
}
/* Deal with a partial copy. */
len -= ret;
rc = len;
u->len += len;
/* Return if we haven't got a full message yet */
if (u->len < sizeof(u->u.msg))
goto out; /* not even the header yet */
/* If we're expecting a message that's larger than we can
possibly send, dump what we have and return an error. */
if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) {
rc = -E2BIG;
u->len = 0;
goto out;
}
if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
goto out; /* incomplete data portion */
/*
* OK, now we have a complete message. Do something with it.
*/
msg_type = u->u.msg.type;
switch (msg_type) {
case XS_TRANSACTION_START:
case XS_TRANSACTION_END:
case XS_DIRECTORY:
case XS_READ:
case XS_GET_PERMS:
case XS_RELEASE:
case XS_GET_DOMAIN_PATH:
case XS_WRITE:
case XS_MKDIR:
case XS_RM:
case XS_SET_PERMS:
/* Send out a transaction */
ret = xenbus_write_transaction(msg_type, u);
break;
case XS_WATCH:
case XS_UNWATCH:
/* (Un)Ask for some path to be watched for changes */
ret = xenbus_write_watch(msg_type, u);
break;
default:
ret = -EINVAL;
break;
}
if (ret != 0)
rc = ret;
/* Buffered message consumed */
u->len = 0;
out:
mutex_unlock(&u->msgbuffer_mutex);
return rc;
}
static int xenbus_file_open(struct inode *inode, struct file *filp)
{
struct xenbus_file_priv *u;
if (xen_store_evtchn == 0)
return -ENOENT;
nonseekable_open(inode, filp);
u = kzalloc(sizeof(*u), GFP_KERNEL);
if (u == NULL)
return -ENOMEM;
INIT_LIST_HEAD(&u->transactions);
INIT_LIST_HEAD(&u->watches);
INIT_LIST_HEAD(&u->read_buffers);
init_waitqueue_head(&u->read_waitq);
mutex_init(&u->reply_mutex);
mutex_init(&u->msgbuffer_mutex);
filp->private_data = u;
return 0;
}
static int xenbus_file_release(struct inode *inode, struct file *filp)
{
struct xenbus_file_priv *u = filp->private_data;
struct xenbus_transaction_holder *trans, *tmp;
struct watch_adapter *watch, *tmp_watch;
/*
* No need for locking here because there are no other users,
* by definition.
*/
list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
xenbus_transaction_end(trans->handle, 1);
list_del(&trans->list);
kfree(trans);
}
list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
unregister_xenbus_watch(&watch->watch);
list_del(&watch->list);
free_watch_adapter(watch);
}
kfree(u);
return 0;
}
static unsigned int xenbus_file_poll(struct file *file, poll_table *wait)
{
struct xenbus_file_priv *u = file->private_data;
poll_wait(file, &u->read_waitq, wait);
if (!list_empty(&u->read_buffers))
return POLLIN | POLLRDNORM;
return 0;
}
const struct file_operations xenbus_file_ops = {
.read = xenbus_file_read,
.write = xenbus_file_write,
.open = xenbus_file_open,
.release = xenbus_file_release,
.poll = xenbus_file_poll,
};

View File

@@ -0,0 +1,6 @@
#ifndef _XENFS_XENBUS_H
#define _XENFS_XENBUS_H
extern const struct file_operations xenbus_file_ops;
#endif /* _XENFS_XENBUS_H */