diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2011-05-12 08:06:06 -0500 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2011-05-12 08:06:06 -0500 |
commit | 89bb563f6912b3f538d6bdf5833881ed7cdbd923 (patch) | |
tree | 601d288a7cac548a018cd5f6bcb1c09af032ee9a | |
parent | 0225e254ae81c5638463cda8f5730f31619113b6 (diff) | |
parent | c962247883ffd957dd7f3bccb519803e2775ced2 (diff) |
Merge remote-tracking branch 'agraf/xen-next' into staging
-rw-r--r-- | Makefile.target | 14 | ||||
-rw-r--r-- | arch_init.c | 5 | ||||
-rw-r--r-- | arch_init.h | 1 | ||||
-rwxr-xr-x | configure | 71 | ||||
-rw-r--r-- | cpu-common.h | 1 | ||||
-rw-r--r-- | exec.c | 86 | ||||
-rw-r--r-- | hw/boards.h | 1 | ||||
-rw-r--r-- | hw/pc.c | 28 | ||||
-rw-r--r-- | hw/pc.h | 11 | ||||
-rw-r--r-- | hw/pc_piix.c | 71 | ||||
-rw-r--r-- | hw/pci.c | 2 | ||||
-rw-r--r-- | hw/piix_pci.c | 49 | ||||
-rw-r--r-- | hw/xen.h | 41 | ||||
-rw-r--r-- | hw/xen_backend.c | 421 | ||||
-rw-r--r-- | hw/xen_backend.h | 6 | ||||
-rw-r--r-- | hw/xen_common.h | 106 | ||||
-rw-r--r-- | hw/xen_disk.c | 496 | ||||
-rw-r--r-- | hw/xen_domainbuild.c | 3 | ||||
-rw-r--r-- | hw/xen_machine_pv.c | 1 | ||||
-rw-r--r-- | hw/xen_nic.c | 265 | ||||
-rw-r--r-- | qemu-config.c | 14 | ||||
-rw-r--r-- | qemu-options.hx | 10 | ||||
-rw-r--r-- | sysemu.h | 2 | ||||
-rw-r--r-- | trace-events | 13 | ||||
-rw-r--r-- | vl.c | 136 | ||||
-rw-r--r-- | xen-all.c | 605 | ||||
-rw-r--r-- | xen-mapcache-stub.c | 44 | ||||
-rw-r--r-- | xen-mapcache.c | 375 | ||||
-rw-r--r-- | xen-mapcache.h | 37 | ||||
-rw-r--r-- | xen-stub.c | 41 |
30 files changed, 2343 insertions, 613 deletions
diff --git a/Makefile.target b/Makefile.target index 21f864afd2..2e281a4588 100644 --- a/Makefile.target +++ b/Makefile.target @@ -206,7 +206,19 @@ QEMU_CFLAGS += $(VNC_JPEG_CFLAGS) QEMU_CFLAGS += $(VNC_PNG_CFLAGS) # xen backend driver support -obj-$(CONFIG_XEN) += xen_machine_pv.o xen_domainbuild.o +obj-i386-$(CONFIG_XEN) += xen_machine_pv.o xen_domainbuild.o + +ifeq ($(TARGET_BASE_ARCH), i386) + CONFIG_NO_XEN = $(if $(subst n,,$(CONFIG_XEN)),n,y) +else + CONFIG_NO_XEN = y +endif +# xen support +CONFIG_NO_XEN_MAPCACHE = $(if $(subst n,,$(CONFIG_XEN_MAPCACHE)),n,y) +obj-i386-$(CONFIG_XEN) += xen-all.o +obj-$(CONFIG_NO_XEN) += xen-stub.o +obj-i386-$(CONFIG_XEN_MAPCACHE) += xen-mapcache.o +obj-$(CONFIG_NO_XEN_MAPCACHE) += xen-mapcache-stub.o # Inter-VM PCI shared memory CONFIG_IVSHMEM = diff --git a/arch_init.c b/arch_init.c index 0c09f9118e..484b39d4dd 100644 --- a/arch_init.c +++ b/arch_init.c @@ -709,6 +709,11 @@ int audio_available(void) #endif } +int tcg_available(void) +{ + return 1; +} + int kvm_available(void) { #ifdef CONFIG_KVM diff --git a/arch_init.h b/arch_init.h index 86ebc149bc..2de9f0852d 100644 --- a/arch_init.h +++ b/arch_init.h @@ -27,6 +27,7 @@ void do_smbios_option(const char *optarg); void cpudef_init(void); int audio_available(void); void audio_init(qemu_irq *isa_pic, PCIBus *pci_bus); +int tcg_available(void); int kvm_available(void); int xen_available(void); @@ -127,6 +127,7 @@ vnc_jpeg="" vnc_png="" vnc_thread="no" xen="" +xen_ctrl_version="" linux_aio="" attr="" vhost_net="" @@ -1180,20 +1181,81 @@ fi if test "$xen" != "no" ; then xen_libs="-lxenstore -lxenctrl -lxenguest" + + # Xen unstable cat > $TMPC <<EOF #include <xenctrl.h> #include <xs.h> -int main(void) { xs_daemon_open(); xc_interface_open(); return 0; } +#include <stdint.h> +#include <xen/hvm/hvm_info_table.h> +#if !defined(HVM_MAX_VCPUS) +# error HVM_MAX_VCPUS not defined +#endif +int main(void) { + xc_interface *xc; + xs_daemon_open(); + xc = xc_interface_open(0, 0, 0); + xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0); + xc_gnttab_open(NULL, 0); + return 0; +} EOF if compile_prog "" "$xen_libs" ; then + xen_ctrl_version=410 xen=yes - libs_softmmu="$xen_libs $libs_softmmu" + + # Xen 4.0.0 + elif ( + cat > $TMPC <<EOF +#include <xenctrl.h> +#include <xs.h> +#include <stdint.h> +#include <xen/hvm/hvm_info_table.h> +#if !defined(HVM_MAX_VCPUS) +# error HVM_MAX_VCPUS not defined +#endif +int main(void) { + xs_daemon_open(); + xc_interface_open(); + xc_gnttab_open(); + xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0); + return 0; +} +EOF + compile_prog "" "$xen_libs" + ) ; then + xen_ctrl_version=400 + xen=yes + + # Xen 3.3.0, 3.4.0 + elif ( + cat > $TMPC <<EOF +#include <xenctrl.h> +#include <xs.h> +int main(void) { + xs_daemon_open(); + xc_interface_open(); + xc_gnttab_open(); + xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0); + return 0; +} +EOF + compile_prog "" "$xen_libs" + ) ; then + xen_ctrl_version=330 + xen=yes + + # Xen not found or unsupported else if test "$xen" = "yes" ; then feature_not_found "xen" fi xen=no fi + + if test "$xen" = yes; then + libs_softmmu="$xen_libs $libs_softmmu" + fi fi ########################################## @@ -2855,6 +2917,7 @@ if test "$bluez" = "yes" ; then fi if test "$xen" = "yes" ; then echo "CONFIG_XEN=y" >> $config_host_mak + echo "CONFIG_XEN_CTRL_INTERFACE_VERSION=$xen_ctrl_version" >> $config_host_mak fi if test "$io_thread" = "yes" ; then echo "CONFIG_IOTHREAD=y" >> $config_host_mak @@ -3235,7 +3298,11 @@ echo "TARGET_ABI_DIR=$TARGET_ABI_DIR" >> $config_target_mak case "$target_arch2" in i386|x86_64) if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then + target_phys_bits=64 echo "CONFIG_XEN=y" >> $config_target_mak + if test "$cpu" = "i386" -o "$cpu" = "x86_64"; then + echo "CONFIG_XEN_MAPCACHE=y" >> $config_target_mak + fi fi esac case "$target_arch2" in diff --git a/cpu-common.h b/cpu-common.h index 6410cccda5..151c32c1f2 100644 --- a/cpu-common.h +++ b/cpu-common.h @@ -67,6 +67,7 @@ void *qemu_get_ram_ptr(ram_addr_t addr); /* Same but slower, to use for migration, where the order of * RAMBlocks must not change. */ void *qemu_safe_ram_ptr(ram_addr_t addr); +void qemu_put_ram_ptr(void *addr); /* This should not be used by devices. */ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr); ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr); @@ -32,6 +32,7 @@ #include "hw/qdev.h" #include "osdep.h" #include "kvm.h" +#include "hw/xen.h" #include "qemu-timer.h" #if defined(CONFIG_USER_ONLY) #include <qemu.h> @@ -51,6 +52,8 @@ #include <libutil.h> #endif #endif +#else /* !CONFIG_USER_ONLY */ +#include "xen-mapcache.h" #endif //#define DEBUG_TB_INVALIDATE @@ -2889,6 +2892,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, } } + new_block->offset = find_ram_offset(size); if (host) { new_block->host = host; new_block->flags |= RAM_PREALLOC_MASK; @@ -2911,13 +2915,15 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); #else - new_block->host = qemu_vmalloc(size); + if (xen_mapcache_enabled()) { + xen_ram_alloc(new_block->offset, size); + } else { + new_block->host = qemu_vmalloc(size); + } #endif qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE); } } - - new_block->offset = find_ram_offset(size); new_block->length = size; QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); @@ -2962,7 +2968,11 @@ void qemu_ram_free(ram_addr_t addr) #if defined(TARGET_S390X) && defined(CONFIG_KVM) munmap(block->host, block->length); #else - qemu_vfree(block->host); + if (xen_mapcache_enabled()) { + qemu_invalidate_entry(block->host); + } else { + qemu_vfree(block->host); + } #endif } qemu_free(block); @@ -3051,6 +3061,16 @@ void *qemu_get_ram_ptr(ram_addr_t addr) QLIST_REMOVE(block, next); QLIST_INSERT_HEAD(&ram_list.blocks, block, next); } + if (xen_mapcache_enabled()) { + /* We need to check if the requested address is in the RAM + * because we don't want to map the entire memory in QEMU. + */ + if (block->offset == 0) { + return qemu_map_cache(addr, 0, 1); + } else if (block->host == NULL) { + block->host = xen_map_block(block->offset, block->length); + } + } return block->host + (addr - block->offset); } } @@ -3070,6 +3090,16 @@ void *qemu_safe_ram_ptr(ram_addr_t addr) QLIST_FOREACH(block, &ram_list.blocks, next) { if (addr - block->offset < block->length) { + if (xen_mapcache_enabled()) { + /* We need to check if the requested address is in the RAM + * because we don't want to map the entire memory in QEMU. + */ + if (block->offset == 0) { + return qemu_map_cache(addr, 0, 1); + } else if (block->host == NULL) { + block->host = xen_map_block(block->offset, block->length); + } + } return block->host + (addr - block->offset); } } @@ -3080,17 +3110,48 @@ void *qemu_safe_ram_ptr(ram_addr_t addr) return NULL; } +void qemu_put_ram_ptr(void *addr) +{ + trace_qemu_put_ram_ptr(addr); + + if (xen_mapcache_enabled()) { + RAMBlock *block; + + QLIST_FOREACH(block, &ram_list.blocks, next) { + if (addr == block->host) { + break; + } + } + if (block && block->host) { + xen_unmap_block(block->host, block->length); + block->host = NULL; + } else { + qemu_map_cache_unlock(addr); + } + } +} + int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) { RAMBlock *block; uint8_t *host = ptr; QLIST_FOREACH(block, &ram_list.blocks, next) { + /* This case append when the block is not mapped. */ + if (block->host == NULL) { + continue; + } if (host - block->host < block->length) { *ram_addr = block->offset + (host - block->host); return 0; } } + + if (xen_mapcache_enabled()) { + *ram_addr = qemu_ram_addr_from_mapcache(ptr); + return 0; + } + return -1; } @@ -3785,6 +3846,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, cpu_physical_memory_set_dirty_flags( addr1, (0xff & ~CODE_DIRTY_FLAG)); } + qemu_put_ram_ptr(ptr); } } else { if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && @@ -3812,9 +3874,9 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, } } else { /* RAM case */ - ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) + - (addr & ~TARGET_PAGE_MASK); - memcpy(buf, ptr, l); + ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK); + memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l); + qemu_put_ram_ptr(ptr); } } len -= l; @@ -3855,6 +3917,7 @@ void cpu_physical_memory_write_rom(target_phys_addr_t addr, /* ROM/RAM case */ ptr = qemu_get_ram_ptr(addr1); memcpy(ptr, buf, l); + qemu_put_ram_ptr(ptr); } len -= l; buf += l; @@ -3996,6 +4059,15 @@ void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len, access_len -= l; } } + if (xen_mapcache_enabled()) { + uint8_t *buffer1 = buffer; + uint8_t *end_buffer = buffer + len; + + while (buffer1 < end_buffer) { + qemu_put_ram_ptr(buffer1); + buffer1 += TARGET_PAGE_SIZE; + } + } return; } if (is_write) { diff --git a/hw/boards.h b/hw/boards.h index 6f0f0d7925..716fd7b1a6 100644 --- a/hw/boards.h +++ b/hw/boards.h @@ -27,6 +27,7 @@ typedef struct QEMUMachine { no_cdrom:1, no_sdcard:1; int is_default; + const char *default_machine_opts; GlobalProperty *compat_props; struct QEMUMachine *next; } QEMUMachine; @@ -957,29 +957,18 @@ void pc_cpus_init(const char *cpu_model) } } -void pc_memory_init(ram_addr_t ram_size, - const char *kernel_filename, +void pc_memory_init(const char *kernel_filename, const char *kernel_cmdline, const char *initrd_filename, - ram_addr_t *below_4g_mem_size_p, - ram_addr_t *above_4g_mem_size_p) + ram_addr_t below_4g_mem_size, + ram_addr_t above_4g_mem_size) { char *filename; int ret, linux_boot, i; ram_addr_t ram_addr, bios_offset, option_rom_offset; - ram_addr_t below_4g_mem_size, above_4g_mem_size = 0; int bios_size, isa_bios_size; void *fw_cfg; - if (ram_size >= 0xe0000000 ) { - above_4g_mem_size = ram_size - 0xe0000000; - below_4g_mem_size = 0xe0000000; - } else { - below_4g_mem_size = ram_size; - } - *above_4g_mem_size_p = above_4g_mem_size; - *below_4g_mem_size_p = below_4g_mem_size; - linux_boot = (kernel_filename != NULL); /* allocate RAM */ @@ -1093,7 +1082,8 @@ static void cpu_request_exit(void *opaque, int irq, int level) } void pc_basic_device_init(qemu_irq *isa_irq, - ISADevice **rtc_state) + ISADevice **rtc_state, + bool no_vmport) { int i; DriveInfo *fd[MAX_FD]; @@ -1138,8 +1128,12 @@ void pc_basic_device_init(qemu_irq *isa_irq, a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 2); i8042 = isa_create_simple("i8042"); i8042_setup_a20_line(i8042, &a20_line[0]); - vmport_init(); - vmmouse = isa_try_create("vmmouse"); + if (!no_vmport) { + vmport_init(); + vmmouse = isa_try_create("vmmouse"); + } else { + vmmouse = NULL; + } if (vmmouse) { qdev_prop_set_ptr(&vmmouse->qdev, "ps2_mouse", i8042); qdev_init_nofail(&vmmouse->qdev); @@ -129,16 +129,16 @@ void pc_cmos_set_s3_resume(void *opaque, int irq, int level); void pc_acpi_smi_interrupt(void *opaque, int irq, int level); void pc_cpus_init(const char *cpu_model); -void pc_memory_init(ram_addr_t ram_size, - const char *kernel_filename, +void pc_memory_init(const char *kernel_filename, const char *kernel_cmdline, const char *initrd_filename, - ram_addr_t *below_4g_mem_size_p, - ram_addr_t *above_4g_mem_size_p); + ram_addr_t below_4g_mem_size, + ram_addr_t above_4g_mem_size); qemu_irq *pc_allocate_cpu_irq(void); void pc_vga_init(PCIBus *pci_bus); void pc_basic_device_init(qemu_irq *isa_irq, - ISADevice **rtc_state); + ISADevice **rtc_state, + bool no_vmport); void pc_init_ne2k_isa(NICInfo *nd); void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size, const char *boot_device, @@ -176,6 +176,7 @@ struct PCII440FXState; typedef struct PCII440FXState PCII440FXState; PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn, qemu_irq *pic, ram_addr_t ram_size); +PCIBus *i440fx_xen_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq *pic, ram_addr_t ram_size); void i440fx_init_memory_mappings(PCII440FXState *d); /* piix4.c */ diff --git a/hw/pc_piix.c b/hw/pc_piix.c index a85214b7f1..9a22a8afc8 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -38,6 +38,10 @@ #include "arch_init.h" #include "blockdev.h" #include "smbus.h" +#include "xen.h" +#ifdef CONFIG_XEN +# include <xen/hvm/hvm_info_table.h> +#endif #define MAX_IDE_BUS 2 @@ -92,12 +96,26 @@ static void pc_init1(ram_addr_t ram_size, kvmclock_create(); } + if (ram_size >= 0xe0000000 ) { + above_4g_mem_size = ram_size - 0xe0000000; + below_4g_mem_size = 0xe0000000; + } else { + above_4g_mem_size = 0; + below_4g_mem_size = ram_size; + } + /* allocate ram and load rom/bios */ - pc_memory_init(ram_size, kernel_filename, kernel_cmdline, initrd_filename, - &below_4g_mem_size, &above_4g_mem_size); + if (!xen_enabled()) { + pc_memory_init(kernel_filename, kernel_cmdline, initrd_filename, + below_4g_mem_size, above_4g_mem_size); + } - cpu_irq = pc_allocate_cpu_irq(); - i8259 = i8259_init(cpu_irq[0]); + if (!xen_enabled()) { + cpu_irq = pc_allocate_cpu_irq(); + i8259 = i8259_init(cpu_irq[0]); + } else { + i8259 = xen_interrupt_controller_init(); + } isa_irq_state = qemu_mallocz(sizeof(*isa_irq_state)); isa_irq_state->i8259 = i8259; if (pci_enabled) { @@ -106,7 +124,11 @@ static void pc_init1(ram_addr_t ram_size, isa_irq = qemu_allocate_irqs(isa_irq_handler, isa_irq_state, 24); if (pci_enabled) { - pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size); + if (!xen_enabled()) { + pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size); + } else { + pci_bus = i440fx_xen_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size); + } } else { pci_bus = NULL; i440fx_state = NULL; @@ -119,7 +141,7 @@ static void pc_init1(ram_addr_t ram_size, pc_vga_init(pci_enabled? pci_bus: NULL); /* init basic PC hardware */ - pc_basic_device_init(isa_irq, &rtc_state); + pc_basic_device_init(isa_irq, &rtc_state, xen_enabled()); for(i = 0; i < nb_nics; i++) { NICInfo *nd = &nd_table[i]; @@ -157,7 +179,11 @@ static void pc_init1(ram_addr_t ram_size, if (pci_enabled && acpi_enabled) { i2c_bus *smbus; - cmos_s3 = qemu_allocate_irqs(pc_cmos_set_s3_resume, rtc_state, 1); + if (!xen_enabled()) { + cmos_s3 = qemu_allocate_irqs(pc_cmos_set_s3_resume, rtc_state, 1); + } else { + cmos_s3 = qemu_allocate_irqs(xen_cmos_set_s3_resume, rtc_state, 1); + } smi_irq = qemu_allocate_irqs(pc_acpi_smi_interrupt, first_cpu, 1); /* TODO: Populate SPD eeprom data. */ smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100, @@ -213,6 +239,24 @@ static void pc_init_isa(ram_addr_t ram_size, initrd_filename, cpu_model, 0, 1); } +#ifdef CONFIG_XEN +static void pc_xen_hvm_init(ram_addr_t ram_size, + const char *boot_device, + const char *kernel_filename, + const char *kernel_cmdline, + const char *initrd_filename, + const char *cpu_model) +{ + if (xen_hvm_init() != 0) { + hw_error("xen hardware virtual machine initialisation failed"); + } + pc_init_pci_no_kvmclock(ram_size, boot_device, + kernel_filename, kernel_cmdline, + initrd_filename, cpu_model); + xen_vcpu_init(); +} +#endif + static QEMUMachine pc_machine = { .name = "pc-0.14", .alias = "pc", @@ -377,6 +421,16 @@ static QEMUMachine isapc_machine = { .max_cpus = 1, }; +#ifdef CONFIG_XEN +static QEMUMachine xenfv_machine = { + .name = "xenfv", + .desc = "Xen Fully-virtualized PC", + .init = pc_xen_hvm_init, + .max_cpus = HVM_MAX_VCPUS, + .default_machine_opts = "accel=xen", +}; +#endif + static void pc_machine_init(void) { qemu_register_machine(&pc_machine); @@ -385,6 +439,9 @@ static void pc_machine_init(void) qemu_register_machine(&pc_machine_v0_11); qemu_register_machine(&pc_machine_v0_10); qemu_register_machine(&isapc_machine); +#ifdef CONFIG_XEN + qemu_register_machine(&xenfv_machine); +#endif } machine_init(pc_machine_init); @@ -1922,6 +1922,8 @@ static int pci_add_option_rom(PCIDevice *pdev, bool is_default_rom) pci_patch_ids(pdev, ptr, size); } + qemu_put_ram_ptr(ptr); + pci_register_bar(pdev, PCI_ROM_SLOT, size, 0, pci_map_option_rom); diff --git a/hw/piix_pci.c b/hw/piix_pci.c index 5f0d92f10d..7f1c4cca31 100644 --- a/hw/piix_pci.c +++ b/hw/piix_pci.c @@ -29,6 +29,7 @@ #include "isa.h" #include "sysbus.h" #include "range.h" +#include "xen.h" /* * I440FX chipset data sheet. @@ -172,6 +173,13 @@ static void i440fx_write_config(PCIDevice *dev, } } +static void i440fx_write_config_xen(PCIDevice *dev, + uint32_t address, uint32_t val, int len) +{ + xen_piix_pci_write_config_client(address, val, len); + i440fx_write_config(dev, address, val, len); +} + static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id) { PCII440FXState *d = opaque; @@ -239,7 +247,10 @@ static int i440fx_initfn(PCIDevice *dev) return 0; } -PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq *pic, ram_addr_t ram_size) +static PCIBus *i440fx_common_init(const char *device_name, + PCII440FXState **pi440fx_state, + int *piix3_devfn, + qemu_irq *pic, ram_addr_t ram_size) { DeviceState *dev; PCIBus *b; @@ -253,13 +264,13 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq * s->bus = b; qdev_init_nofail(dev); - d = pci_create_simple(b, 0, "i440FX"); + d = pci_create_simple(b, 0, device_name); *pi440fx_state = DO_UPCAST(PCII440FXState, dev, d); piix3 = DO_UPCAST(PIIX3State, dev, pci_create_simple_multifunction(b, -1, true, "PIIX3")); piix3->pic = pic; - pci_bus_irqs(b, piix3_set_irq, pci_slot_get_pirq, piix3, PIIX_NUM_PIRQS); + (*pi440fx_state)->piix3 = piix3; *piix3_devfn = piix3->dev.devfn; @@ -272,6 +283,30 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq * return b; } +PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn, + qemu_irq *pic, ram_addr_t ram_size) +{ + PCIBus *b; + + b = i440fx_common_init("i440FX", pi440fx_state, piix3_devfn, pic, ram_size); + pci_bus_irqs(b, piix3_set_irq, pci_slot_get_pirq, (*pi440fx_state)->piix3, + PIIX_NUM_PIRQS); + + return b; +} + +PCIBus *i440fx_xen_init(PCII440FXState **pi440fx_state, int *piix3_devfn, + qemu_irq *pic, ram_addr_t ram_size) +{ + PCIBus *b; + + b = i440fx_common_init("i440FX-xen", pi440fx_state, piix3_devfn, pic, ram_size); + pci_bus_irqs(b, xen_piix3_set_irq, xen_pci_slot_get_pirq, + (*pi440fx_state)->piix3, PIIX_NUM_PIRQS); + + return b; +} + /* PIIX3 PCI to ISA bridge */ static void piix3_set_irq_pic(PIIX3State *piix3, int pic_irq) { @@ -430,6 +465,14 @@ static PCIDeviceInfo i440fx_info[] = { .init = i440fx_initfn, .config_write = i440fx_write_config, },{ + .qdev.name = "i440FX-xen", + .qdev.desc = "Host bridge", + .qdev.size = sizeof(PCII440FXState), + .qdev.vmsd = &vmstate_i440fx, + .qdev.no_user = 1, + .init = i440fx_initfn, + .config_write = i440fx_write_config_xen, + },{ .qdev.name = "PIIX3", .qdev.desc = "ISA bridge", .qdev.size = sizeof(PIIX3State), @@ -8,6 +8,8 @@ */ #include <inttypes.h> +#include "qemu-common.h" + /* xen-machine.c */ enum xen_mode { XEN_EMULATE = 0, // xen emulation, using xenner (default) @@ -18,4 +20,43 @@ enum xen_mode { extern uint32_t xen_domid; extern enum xen_mode xen_mode; +extern int xen_allowed; + +static inline int xen_enabled(void) +{ +#ifdef CONFIG_XEN + return xen_allowed; +#else + return 0; +#endif +} + +static inline int xen_mapcache_enabled(void) +{ +#ifdef CONFIG_XEN_MAPCACHE + return xen_enabled(); +#else + return 0; +#endif +} + +int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num); +void xen_piix3_set_irq(void *opaque, int irq_num, int level); +void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len); +void xen_cmos_set_s3_resume(void *opaque, int irq, int level); + +qemu_irq *xen_interrupt_controller_init(void); + +int xen_init(void); +int xen_hvm_init(void); +void xen_vcpu_init(void); + +#if defined(NEED_CPU_H) && !defined(CONFIG_USER_ONLY) +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size); +#endif + +#if defined(CONFIG_XEN) && CONFIG_XEN_CTRL_INTERFACE_VERSION < 400 +# define HVM_MAX_VCPUS 32 +#endif + #endif /* QEMU_HW_XEN_H */ diff --git a/hw/xen_backend.c b/hw/xen_backend.c index a2e408fa0e..d881fa2f70 100644 --- a/hw/xen_backend.c +++ b/hw/xen_backend.c @@ -43,7 +43,8 @@ /* ------------------------------------------------------------- */ /* public */ -int xen_xc; +XenXC xen_xc = XC_HANDLER_INITIAL_VALUE; +XenGnttab xen_xcg = XC_HANDLER_INITIAL_VALUE; struct xs_handle *xenstore = NULL; const char *xen_protocol; @@ -58,8 +59,9 @@ int xenstore_write_str(const char *base, const char *node, const char *val) char abspath[XEN_BUFSIZE]; snprintf(abspath, sizeof(abspath), "%s/%s", base, node); - if (!xs_write(xenstore, 0, abspath, val, strlen(val))) - return -1; + if (!xs_write(xenstore, 0, abspath, val, strlen(val))) { + return -1; + } return 0; } @@ -94,8 +96,9 @@ int xenstore_read_int(const char *base, const char *node, int *ival) int rc = -1; val = xenstore_read_str(base, node); - if (val && 1 == sscanf(val, "%d", ival)) - rc = 0; + if (val && 1 == sscanf(val, "%d", ival)) { + rc = 0; + } qemu_free(val); return rc; } @@ -134,16 +137,16 @@ int xenstore_read_fe_int(struct XenDevice *xendev, const char *node, int *ival) const char *xenbus_strstate(enum xenbus_state state) { - static const char *const name[] = { - [ XenbusStateUnknown ] = "Unknown", - [ XenbusStateInitialising ] = "Initialising", - [ XenbusStateInitWait ] = "InitWait", - [ XenbusStateInitialised ] = "Initialised", - [ XenbusStateConnected ] = "Connected", - [ XenbusStateClosing ] = "Closing", - [ XenbusStateClosed ] = "Closed", - }; - return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; + static const char *const name[] = { + [ XenbusStateUnknown ] = "Unknown", + [ XenbusStateInitialising ] = "Initialising", + [ XenbusStateInitWait ] = "InitWait", + [ XenbusStateInitialised ] = "Initialised", + [ XenbusStateConnected ] = "Connected", + [ XenbusStateClosing ] = "Closing", + [ XenbusStateClosed ] = "Closed", + }; + return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; } int xen_be_set_state(struct XenDevice *xendev, enum xenbus_state state) @@ -151,10 +154,11 @@ int xen_be_set_state(struct XenDevice *xendev, enum xenbus_state state) int rc; rc = xenstore_write_be_int(xendev, "state", state); - if (rc < 0) - return rc; + if (rc < 0) { + return rc; + } xen_be_printf(xendev, 1, "backend state: %s -> %s\n", - xenbus_strstate(xendev->be_state), xenbus_strstate(state)); + xenbus_strstate(xendev->be_state), xenbus_strstate(state)); xendev->be_state = state; return 0; } @@ -166,13 +170,16 @@ struct XenDevice *xen_be_find_xendev(const char *type, int dom, int dev) struct XenDevice *xendev; QTAILQ_FOREACH(xendev, &xendevs, next) { - if (xendev->dom != dom) - continue; - if (xendev->dev != dev) - continue; - if (strcmp(xendev->type, type) != 0) - continue; - return xendev; + if (xendev->dom != dom) { + continue; + } + if (xendev->dev != dev) { + continue; + } + if (strcmp(xendev->type, type) != 0) { + continue; + } + return xendev; } return NULL; } @@ -187,8 +194,9 @@ static struct XenDevice *xen_be_get_xendev(const char *type, int dom, int dev, char *dom0; xendev = xen_be_find_xendev(type, dom, dev); - if (xendev) - return xendev; + if (xendev) { + return xendev; + } /* init new xendev */ xendev = qemu_mallocz(ops->size); @@ -199,38 +207,39 @@ static struct XenDevice *xen_be_get_xendev(const char *type, int dom, int dev, dom0 = xs_get_domain_path(xenstore, 0); snprintf(xendev->be, sizeof(xendev->be), "%s/backend/%s/%d/%d", - dom0, xendev->type, xendev->dom, xendev->dev); + dom0, xendev->type, xendev->dom, xendev->dev); snprintf(xendev->name, sizeof(xendev->name), "%s-%d", - xendev->type, xendev->dev); + xendev->type, xendev->dev); free(dom0); xendev->debug = debug; xendev->local_port = -1; - xendev->evtchndev = xc_evtchn_open(); - if (xendev->evtchndev < 0) { - xen_be_printf(NULL, 0, "can't open evtchn device\n"); - qemu_free(xendev); - return NULL; + xendev->evtchndev = xen_xc_evtchn_open(NULL, 0); + if (xendev->evtchndev == XC_HANDLER_INITIAL_VALUE) { + xen_be_printf(NULL, 0, "can't open evtchn device\n"); + qemu_free(xendev); + return NULL; } fcntl(xc_evtchn_fd(xendev->evtchndev), F_SETFD, FD_CLOEXEC); if (ops->flags & DEVOPS_FLAG_NEED_GNTDEV) { - xendev->gnttabdev = xc_gnttab_open(); - if (xendev->gnttabdev < 0) { - xen_be_printf(NULL, 0, "can't open gnttab device\n"); - xc_evtchn_close(xendev->evtchndev); - qemu_free(xendev); - return NULL; - } + xendev->gnttabdev = xen_xc_gnttab_open(NULL, 0); + if (xendev->gnttabdev == XC_HANDLER_INITIAL_VALUE) { + xen_be_printf(NULL, 0, "can't open gnttab device\n"); + xc_evtchn_close(xendev->evtchndev); + qemu_free(xendev); + return NULL; + } } else { - xendev->gnttabdev = -1; + xendev->gnttabdev = XC_HANDLER_INITIAL_VALUE; } QTAILQ_INSERT_TAIL(&xendevs, xendev, next); - if (xendev->ops->alloc) - xendev->ops->alloc(xendev); + if (xendev->ops->alloc) { + xendev->ops->alloc(xendev); + } return xendev; } @@ -251,28 +260,33 @@ static struct XenDevice *xen_be_del_xendev(int dom, int dev) xendev = xnext; xnext = xendev->next.tqe_next; - if (xendev->dom != dom) - continue; - if (xendev->dev != dev && dev != -1) - continue; - - if (xendev->ops->free) - xendev->ops->free(xendev); - - if (xendev->fe) { - char token[XEN_BUFSIZE]; - snprintf(token, sizeof(token), "fe:%p", xendev); - xs_unwatch(xenstore, xendev->fe, token); - qemu_free(xendev->fe); - } - - if (xendev->evtchndev >= 0) - xc_evtchn_close(xendev->evtchndev); - if (xendev->gnttabdev >= 0) - xc_gnttab_close(xendev->gnttabdev); - - QTAILQ_REMOVE(&xendevs, xendev, next); - qemu_free(xendev); + if (xendev->dom != dom) { + continue; + } + if (xendev->dev != dev && dev != -1) { + continue; + } + + if (xendev->ops->free) { + xendev->ops->free(xendev); + } + + if (xendev->fe) { + char token[XEN_BUFSIZE]; + snprintf(token, sizeof(token), "fe:%p", xendev); + xs_unwatch(xenstore, xendev->fe, token); + qemu_free(xendev->fe); + } + + if (xendev->evtchndev != XC_HANDLER_INITIAL_VALUE) { + xc_evtchn_close(xendev->evtchndev); + } + if (xendev->gnttabdev != XC_HANDLER_INITIAL_VALUE) { + xc_gnttab_close(xendev->gnttabdev); + } + + QTAILQ_REMOVE(&xendevs, xendev, next); + qemu_free(xendev); } return NULL; } @@ -285,14 +299,16 @@ static struct XenDevice *xen_be_del_xendev(int dom, int dev) static void xen_be_backend_changed(struct XenDevice *xendev, const char *node) { if (node == NULL || strcmp(node, "online") == 0) { - if (xenstore_read_be_int(xendev, "online", &xendev->online) == -1) - xendev->online = 0; + if (xenstore_read_be_int(xendev, "online", &xendev->online) == -1) { + xendev->online = 0; + } } if (node) { - xen_be_printf(xendev, 2, "backend update: %s\n", node); - if (xendev->ops->backend_changed) - xendev->ops->backend_changed(xendev, node); + xen_be_printf(xendev, 2, "backend update: %s\n", node); + if (xendev->ops->backend_changed) { + xendev->ops->backend_changed(xendev, node); + } } } @@ -301,25 +317,29 @@ static void xen_be_frontend_changed(struct XenDevice *xendev, const char *node) int fe_state; if (node == NULL || strcmp(node, "state") == 0) { - if (xenstore_read_fe_int(xendev, "state", &fe_state) == -1) - fe_state = XenbusStateUnknown; - if (xendev->fe_state != fe_state) - xen_be_printf(xendev, 1, "frontend state: %s -> %s\n", - xenbus_strstate(xendev->fe_state), - xenbus_strstate(fe_state)); - xendev->fe_state = fe_state; + if (xenstore_read_fe_int(xendev, "state", &fe_state) == -1) { + fe_state = XenbusStateUnknown; + } + if (xendev->fe_state != fe_state) { + xen_be_printf(xendev, 1, "frontend state: %s -> %s\n", + xenbus_strstate(xendev->fe_state), + xenbus_strstate(fe_state)); + } + xendev->fe_state = fe_state; } if (node == NULL || strcmp(node, "protocol") == 0) { - qemu_free(xendev->protocol); - xendev->protocol = xenstore_read_fe_str(xendev, "protocol"); - if (xendev->protocol) - xen_be_printf(xendev, 1, "frontend protocol: %s\n", xendev->protocol); + qemu_free(xendev->protocol); + xendev->protocol = xenstore_read_fe_str(xendev, "protocol"); + if (xendev->protocol) { + xen_be_printf(xendev, 1, "frontend protocol: %s\n", xendev->protocol); + } } if (node) { - xen_be_printf(xendev, 2, "frontend update: %s\n", node); - if (xendev->ops->frontend_changed) - xendev->ops->frontend_changed(xendev, node); + xen_be_printf(xendev, 2, "frontend update: %s\n", node); + if (xendev->ops->frontend_changed) { + xendev->ops->frontend_changed(xendev, node); + } } } @@ -340,28 +360,28 @@ static int xen_be_try_setup(struct XenDevice *xendev) int be_state; if (xenstore_read_be_int(xendev, "state", &be_state) == -1) { - xen_be_printf(xendev, 0, "reading backend state failed\n"); - return -1; + xen_be_printf(xendev, 0, "reading backend state failed\n"); + return -1; } if (be_state != XenbusStateInitialising) { - xen_be_printf(xendev, 0, "initial backend state is wrong (%s)\n", - xenbus_strstate(be_state)); - return -1; + xen_be_printf(xendev, 0, "initial backend state is wrong (%s)\n", + xenbus_strstate(be_state)); + return -1; } xendev->fe = xenstore_read_be_str(xendev, "frontend"); if (xendev->fe == NULL) { - xen_be_printf(xendev, 0, "reading frontend path failed\n"); - return -1; + xen_be_printf(xendev, 0, "reading frontend path failed\n"); + return -1; } /* setup frontend watch */ snprintf(token, sizeof(token), "fe:%p", xendev); if (!xs_watch(xenstore, xendev->fe, token)) { - xen_be_printf(xendev, 0, "watching frontend path (%s) failed\n", - xendev->fe); - return -1; + xen_be_printf(xendev, 0, "watching frontend path (%s) failed\n", + xendev->fe); + return -1; } xen_be_set_state(xendev, XenbusStateInitialising); @@ -383,15 +403,16 @@ static int xen_be_try_init(struct XenDevice *xendev) int rc = 0; if (!xendev->online) { - xen_be_printf(xendev, 1, "not online\n"); - return -1; + xen_be_printf(xendev, 1, "not online\n"); + return -1; } - if (xendev->ops->init) - rc = xendev->ops->init(xendev); + if (xendev->ops->init) { + rc = xendev->ops->init(xendev); + } if (rc != 0) { - xen_be_printf(xendev, 1, "init() failed\n"); - return rc; + xen_be_printf(xendev, 1, "init() failed\n"); + return rc; } xenstore_write_be_str(xendev, "hotplug-status", "connected"); @@ -411,20 +432,21 @@ static int xen_be_try_connect(struct XenDevice *xendev) int rc = 0; if (xendev->fe_state != XenbusStateInitialised && - xendev->fe_state != XenbusStateConnected) { - if (xendev->ops->flags & DEVOPS_FLAG_IGNORE_STATE) { - xen_be_printf(xendev, 2, "frontend not ready, ignoring\n"); - } else { - xen_be_printf(xendev, 2, "frontend not ready (yet)\n"); - return -1; - } + xendev->fe_state != XenbusStateConnected) { + if (xendev->ops->flags & DEVOPS_FLAG_IGNORE_STATE) { + xen_be_printf(xendev, 2, "frontend not ready, ignoring\n"); + } else { + xen_be_printf(xendev, 2, "frontend not ready (yet)\n"); + return -1; + } } - if (xendev->ops->connect) - rc = xendev->ops->connect(xendev); + if (xendev->ops->connect) { + rc = xendev->ops->connect(xendev); + } if (rc != 0) { - xen_be_printf(xendev, 0, "connect() failed\n"); - return rc; + xen_be_printf(xendev, 0, "connect() failed\n"); + return rc; } xen_be_set_state(xendev, XenbusStateConnected); @@ -440,10 +462,12 @@ static void xen_be_disconnect(struct XenDevice *xendev, enum xenbus_state state) { if (xendev->be_state != XenbusStateClosing && xendev->be_state != XenbusStateClosed && - xendev->ops->disconnect) - xendev->ops->disconnect(xendev); - if (xendev->be_state != state) + xendev->ops->disconnect) { + xendev->ops->disconnect(xendev); + } + if (xendev->be_state != state) { xen_be_set_state(xendev, state); + } } /* @@ -451,8 +475,9 @@ static void xen_be_disconnect(struct XenDevice *xendev, enum xenbus_state state) */ static int xen_be_try_reset(struct XenDevice *xendev) { - if (xendev->fe_state != XenbusStateInitialising) + if (xendev->fe_state != XenbusStateInitialising) { return -1; + } xen_be_printf(xendev, 1, "device reset (for re-connect)\n"); xen_be_set_state(xendev, XenbusStateInitialising); @@ -468,31 +493,32 @@ void xen_be_check_state(struct XenDevice *xendev) /* frontend may request shutdown from almost anywhere */ if (xendev->fe_state == XenbusStateClosing || - xendev->fe_state == XenbusStateClosed) { - xen_be_disconnect(xendev, xendev->fe_state); - return; + xendev->fe_state == XenbusStateClosed) { + xen_be_disconnect(xendev, xendev->fe_state); + return; } /* check for possible backend state transitions */ for (;;) { - switch (xendev->be_state) { - case XenbusStateUnknown: - rc = xen_be_try_setup(xendev); - break; - case XenbusStateInitialising: - rc = xen_be_try_init(xendev); - break; - case XenbusStateInitWait: - rc = xen_be_try_connect(xendev); - break; + switch (xendev->be_state) { + case XenbusStateUnknown: + rc = xen_be_try_setup(xendev); + break; + case XenbusStateInitialising: + rc = xen_be_try_init(xendev); + break; + case XenbusStateInitWait: + rc = xen_be_try_connect(xendev); + break; case XenbusStateClosed: rc = xen_be_try_reset(xendev); break; - default: - rc = -1; - } - if (rc != 0) - break; + default: + rc = -1; + } + if (rc != 0) { + break; + } } } @@ -511,26 +537,28 @@ static int xenstore_scan(const char *type, int dom, struct XenDevOps *ops) snprintf(path, sizeof(path), "%s/backend/%s/%d", dom0, type, dom); free(dom0); if (!xs_watch(xenstore, path, token)) { - xen_be_printf(NULL, 0, "xen be: watching backend path (%s) failed\n", path); - return -1; + xen_be_printf(NULL, 0, "xen be: watching backend path (%s) failed\n", path); + return -1; } /* look for backends */ dev = xs_directory(xenstore, 0, path, &cdev); - if (!dev) - return 0; + if (!dev) { + return 0; + } for (j = 0; j < cdev; j++) { - xendev = xen_be_get_xendev(type, dom, atoi(dev[j]), ops); - if (xendev == NULL) - continue; - xen_be_check_state(xendev); + xendev = xen_be_get_xendev(type, dom, atoi(dev[j]), ops); + if (xendev == NULL) { + continue; + } + xen_be_check_state(xendev); } free(dev); return 0; } static void xenstore_update_be(char *watch, char *type, int dom, - struct XenDevOps *ops) + struct XenDevOps *ops) { struct XenDevice *xendev; char path[XEN_BUFSIZE], *dom0; @@ -539,25 +567,28 @@ static void xenstore_update_be(char *watch, char *type, int dom, dom0 = xs_get_domain_path(xenstore, 0); len = snprintf(path, sizeof(path), "%s/backend/%s/%d", dom0, type, dom); free(dom0); - if (strncmp(path, watch, len) != 0) - return; + if (strncmp(path, watch, len) != 0) { + return; + } if (sscanf(watch+len, "/%u/%255s", &dev, path) != 2) { - strcpy(path, ""); - if (sscanf(watch+len, "/%u", &dev) != 1) - dev = -1; + strcpy(path, ""); + if (sscanf(watch+len, "/%u", &dev) != 1) { + dev = -1; + } + } + if (dev == -1) { + return; } - if (dev == -1) - return; if (0) { - /* FIXME: detect devices being deleted from xenstore ... */ - xen_be_del_xendev(dom, dev); + /* FIXME: detect devices being deleted from xenstore ... */ + xen_be_del_xendev(dom, dev); } xendev = xen_be_get_xendev(type, dom, dev, ops); if (xendev != NULL) { - xen_be_backend_changed(xendev, path); - xen_be_check_state(xendev); + xen_be_backend_changed(xendev, path); + xen_be_check_state(xendev); } } @@ -567,10 +598,12 @@ static void xenstore_update_fe(char *watch, struct XenDevice *xendev) unsigned int len; len = strlen(xendev->fe); - if (strncmp(xendev->fe, watch, len) != 0) - return; - if (watch[len] != '/') - return; + if (strncmp(xendev->fe, watch, len) != 0) { + return; + } + if (watch[len] != '/') { + return; + } node = watch + len + 1; xen_be_frontend_changed(xendev, node); @@ -584,14 +617,17 @@ static void xenstore_update(void *unused) unsigned int dom, count; vec = xs_read_watch(xenstore, &count); - if (vec == NULL) - goto cleanup; + if (vec == NULL) { + goto cleanup; + } if (sscanf(vec[XS_WATCH_TOKEN], "be:%" PRIxPTR ":%d:%" PRIxPTR, - &type, &dom, &ops) == 3) - xenstore_update_be(vec[XS_WATCH_PATH], (void*)type, dom, (void*)ops); - if (sscanf(vec[XS_WATCH_TOKEN], "fe:%" PRIxPTR, &ptr) == 1) - xenstore_update_fe(vec[XS_WATCH_PATH], (void*)ptr); + &type, &dom, &ops) == 3) { + xenstore_update_be(vec[XS_WATCH_PATH], (void*)type, dom, (void*)ops); + } + if (sscanf(vec[XS_WATCH_TOKEN], "fe:%" PRIxPTR, &ptr) == 1) { + xenstore_update_fe(vec[XS_WATCH_PATH], (void*)ptr); + } cleanup: free(vec); @@ -604,14 +640,15 @@ static void xen_be_evtchn_event(void *opaque) port = xc_evtchn_pending(xendev->evtchndev); if (port != xendev->local_port) { - xen_be_printf(xendev, 0, "xc_evtchn_pending returned %d (expected %d)\n", - port, xendev->local_port); - return; + xen_be_printf(xendev, 0, "xc_evtchn_pending returned %d (expected %d)\n", + port, xendev->local_port); + return; } xc_evtchn_unmask(xendev->evtchndev, port); - if (xendev->ops->event) - xendev->ops->event(xendev); + if (xendev->ops->event) { + xendev->ops->event(xendev); + } } /* -------------------------------------------------------------------- */ @@ -620,17 +657,17 @@ int xen_be_init(void) { xenstore = xs_daemon_open(); if (!xenstore) { - xen_be_printf(NULL, 0, "can't connect to xenstored\n"); - return -1; + xen_be_printf(NULL, 0, "can't connect to xenstored\n"); + return -1; } - if (qemu_set_fd_handler(xs_fileno(xenstore), xenstore_update, NULL, NULL) < 0) - goto err; + if (qemu_set_fd_handler(xs_fileno(xenstore), xenstore_update, NULL, NULL) < 0) { + goto err; + } - xen_xc = xc_interface_open(); - if (xen_xc == -1) { - xen_be_printf(NULL, 0, "can't open xen interface\n"); - goto err; + if (xen_xc == XC_HANDLER_INITIAL_VALUE) { + /* Check if xen_init() have been called */ + goto err; } return 0; @@ -649,24 +686,26 @@ int xen_be_register(const char *type, struct XenDevOps *ops) int xen_be_bind_evtchn(struct XenDevice *xendev) { - if (xendev->local_port != -1) - return 0; + if (xendev->local_port != -1) { + return 0; + } xendev->local_port = xc_evtchn_bind_interdomain - (xendev->evtchndev, xendev->dom, xendev->remote_port); + (xendev->evtchndev, xendev->dom, xendev->remote_port); if (xendev->local_port == -1) { - xen_be_printf(xendev, 0, "xc_evtchn_bind_interdomain failed\n"); - return -1; + xen_be_printf(xendev, 0, "xc_evtchn_bind_interdomain failed\n"); + return -1; } xen_be_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port); qemu_set_fd_handler(xc_evtchn_fd(xendev->evtchndev), - xen_be_evtchn_event, NULL, xendev); + xen_be_evtchn_event, NULL, xendev); return 0; } void xen_be_unbind_evtchn(struct XenDevice *xendev) { - if (xendev->local_port == -1) - return; + if (xendev->local_port == -1) { + return; + } qemu_set_fd_handler(xc_evtchn_fd(xendev->evtchndev), NULL, NULL, NULL); xc_evtchn_unbind(xendev->evtchndev, xendev->local_port); xen_be_printf(xendev, 2, "unbind evtchn port %d\n", xendev->local_port); @@ -690,17 +729,21 @@ void xen_be_printf(struct XenDevice *xendev, int msg_level, const char *fmt, ... va_list args; if (xendev) { - if (msg_level > xendev->debug) + if (msg_level > xendev->debug) { return; + } qemu_log("xen be: %s: ", xendev->name); - if (msg_level == 0) + if (msg_level == 0) { fprintf(stderr, "xen be: %s: ", xendev->name); + } } else { - if (msg_level > debug) + if (msg_level > debug) { return; + } qemu_log("xen be core: "); - if (msg_level == 0) + if (msg_level == 0) { fprintf(stderr, "xen be core: "); + } } va_start(args, fmt); qemu_log_vprintf(fmt, args); diff --git a/hw/xen_backend.h b/hw/xen_backend.h index 1b428e3bf4..6401c85a7e 100644 --- a/hw/xen_backend.h +++ b/hw/xen_backend.h @@ -45,8 +45,8 @@ struct XenDevice { int remote_port; int local_port; - int evtchndev; - int gnttabdev; + XenEvtchn evtchndev; + XenGnttab gnttabdev; struct XenDevOps *ops; QTAILQ_ENTRY(XenDevice) next; @@ -55,7 +55,7 @@ struct XenDevice { /* ------------------------------------------------------------- */ /* variables */ -extern int xen_xc; +extern XenXC xen_xc; extern struct xs_handle *xenstore; extern const char *xen_protocol; diff --git a/hw/xen_common.h b/hw/xen_common.h index 8a55b44f0b..a1958a0af1 100644 --- a/hw/xen_common.h +++ b/hw/xen_common.h @@ -1,6 +1,8 @@ #ifndef QEMU_HW_XEN_COMMON_H #define QEMU_HW_XEN_COMMON_H 1 +#include "config-host.h" + #include <stddef.h> #include <inttypes.h> @@ -13,22 +15,98 @@ #include "qemu-queue.h" /* - * tweaks needed to build with different xen versions - * 0x00030205 -> 3.1.0 - * 0x00030207 -> 3.2.0 - * 0x00030208 -> unstable + * We don't support Xen prior to 3.3.0. */ -#include <xen/xen-compat.h> -#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00030205 -# define evtchn_port_or_error_t int -#endif -#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00030207 -# define xc_map_foreign_pages xc_map_foreign_batch + +/* Xen before 4.0 */ +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 400 +static inline void *xc_map_foreign_bulk(int xc_handle, uint32_t dom, int prot, + xen_pfn_t *arr, int *err, + unsigned int num) +{ + return xc_map_foreign_batch(xc_handle, dom, prot, arr, num); +} #endif -#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00030208 -# define xen_mb() mb() -# define xen_rmb() rmb() -# define xen_wmb() wmb() + + +/* Xen before 4.1 */ +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 410 + +typedef int XenXC; +typedef int XenEvtchn; +typedef int XenGnttab; + +# define XC_INTERFACE_FMT "%i" +# define XC_HANDLER_INITIAL_VALUE -1 + +static inline XenEvtchn xen_xc_evtchn_open(void *logger, + unsigned int open_flags) +{ + return xc_evtchn_open(); +} + +static inline XenGnttab xen_xc_gnttab_open(void *logger, + unsigned int open_flags) +{ + return xc_gnttab_open(); +} + +static inline XenXC xen_xc_interface_open(void *logger, void *dombuild_logger, + unsigned int open_flags) +{ + return xc_interface_open(); +} + +static inline int xc_fd(int xen_xc) +{ + return xen_xc; +} + + +static inline int xc_domain_populate_physmap_exact + (XenXC xc_handle, uint32_t domid, unsigned long nr_extents, + unsigned int extent_order, unsigned int mem_flags, xen_pfn_t *extent_start) +{ + return xc_domain_memory_populate_physmap + (xc_handle, domid, nr_extents, extent_order, mem_flags, extent_start); +} + + +/* Xen 4.1 */ +#else + +typedef xc_interface *XenXC; +typedef xc_evtchn *XenEvtchn; +typedef xc_gnttab *XenGnttab; + +# define XC_INTERFACE_FMT "%p" +# define XC_HANDLER_INITIAL_VALUE NULL + +static inline XenEvtchn xen_xc_evtchn_open(void *logger, + unsigned int open_flags) +{ + return xc_evtchn_open(logger, open_flags); +} + +static inline XenGnttab xen_xc_gnttab_open(void *logger, + unsigned int open_flags) +{ + return xc_gnttab_open(logger, open_flags); +} + +static inline XenXC xen_xc_interface_open(void *logger, void *dombuild_logger, + unsigned int open_flags) +{ + return xc_interface_open(logger, dombuild_logger, open_flags); +} + +/* FIXME There is now way to have the xen fd */ +static inline int xc_fd(xc_interface *xen_xc) +{ + return -1; +} #endif +void destroy_hvm_domain(void); + #endif /* QEMU_HW_XEN_COMMON_H */ diff --git a/hw/xen_disk.c b/hw/xen_disk.c index 558bf8ae25..233c8c99c6 100644 --- a/hw/xen_disk.c +++ b/hw/xen_disk.c @@ -120,17 +120,18 @@ static struct ioreq *ioreq_start(struct XenBlkDev *blkdev) struct ioreq *ioreq = NULL; if (QLIST_EMPTY(&blkdev->freelist)) { - if (blkdev->requests_total >= max_requests) - goto out; - /* allocate new struct */ - ioreq = qemu_mallocz(sizeof(*ioreq)); - ioreq->blkdev = blkdev; - blkdev->requests_total++; + if (blkdev->requests_total >= max_requests) { + goto out; + } + /* allocate new struct */ + ioreq = qemu_mallocz(sizeof(*ioreq)); + ioreq->blkdev = blkdev; + blkdev->requests_total++; qemu_iovec_init(&ioreq->v, BLKIF_MAX_SEGMENTS_PER_REQUEST); } else { - /* get one from freelist */ - ioreq = QLIST_FIRST(&blkdev->freelist); - QLIST_REMOVE(ioreq, list); + /* get one from freelist */ + ioreq = QLIST_FIRST(&blkdev->freelist); + QLIST_REMOVE(ioreq, list); qemu_iovec_reset(&ioreq->v); } QLIST_INSERT_HEAD(&blkdev->inflight, ioreq, list); @@ -173,30 +174,32 @@ static int ioreq_parse(struct ioreq *ioreq) int i; xen_be_printf(&blkdev->xendev, 3, - "op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n", - ioreq->req.operation, ioreq->req.nr_segments, - ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number); + "op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n", + ioreq->req.operation, ioreq->req.nr_segments, + ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number); switch (ioreq->req.operation) { case BLKIF_OP_READ: - ioreq->prot = PROT_WRITE; /* to memory */ - break; + ioreq->prot = PROT_WRITE; /* to memory */ + break; case BLKIF_OP_WRITE_BARRIER: if (!ioreq->req.nr_segments) { ioreq->presync = 1; return 0; } - if (!syncwrite) - ioreq->presync = ioreq->postsync = 1; - /* fall through */ + if (!syncwrite) { + ioreq->presync = ioreq->postsync = 1; + } + /* fall through */ case BLKIF_OP_WRITE: - ioreq->prot = PROT_READ; /* from memory */ - if (syncwrite) - ioreq->postsync = 1; - break; + ioreq->prot = PROT_READ; /* from memory */ + if (syncwrite) { + ioreq->postsync = 1; + } + break; default: - xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n", - ioreq->req.operation); - goto err; + xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n", + ioreq->req.operation); + goto err; }; if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') { @@ -206,29 +209,29 @@ static int ioreq_parse(struct ioreq *ioreq) ioreq->start = ioreq->req.sector_number * blkdev->file_blk; for (i = 0; i < ioreq->req.nr_segments; i++) { - if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) { - xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n"); - goto err; - } - if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) { - xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n"); - goto err; - } - if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) { - xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n"); - goto err; - } - - ioreq->domids[i] = blkdev->xendev.dom; - ioreq->refs[i] = ioreq->req.seg[i].gref; - - mem = ioreq->req.seg[i].first_sect * blkdev->file_blk; - len = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk; + if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) { + xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n"); + goto err; + } + if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) { + xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n"); + goto err; + } + if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) { + xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n"); + goto err; + } + + ioreq->domids[i] = blkdev->xendev.dom; + ioreq->refs[i] = ioreq->req.seg[i].gref; + + mem = ioreq->req.seg[i].first_sect * blkdev->file_blk; + len = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk; qemu_iovec_add(&ioreq->v, (void*)mem, len); } if (ioreq->start + ioreq->v.size > blkdev->file_size) { - xen_be_printf(&blkdev->xendev, 0, "error: access beyond end of file\n"); - goto err; + xen_be_printf(&blkdev->xendev, 0, "error: access beyond end of file\n"); + goto err; } return 0; @@ -239,66 +242,73 @@ err: static void ioreq_unmap(struct ioreq *ioreq) { - int gnt = ioreq->blkdev->xendev.gnttabdev; + XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev; int i; - if (ioreq->v.niov == 0) + if (ioreq->v.niov == 0) { return; + } if (batch_maps) { - if (!ioreq->pages) - return; - if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->v.niov) != 0) - xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n", - strerror(errno)); - ioreq->blkdev->cnt_map -= ioreq->v.niov; - ioreq->pages = NULL; + if (!ioreq->pages) { + return; + } + if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->v.niov) != 0) { + xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n", + strerror(errno)); + } + ioreq->blkdev->cnt_map -= ioreq->v.niov; + ioreq->pages = NULL; } else { - for (i = 0; i < ioreq->v.niov; i++) { - if (!ioreq->page[i]) - continue; - if (xc_gnttab_munmap(gnt, ioreq->page[i], 1) != 0) - xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n", - strerror(errno)); - ioreq->blkdev->cnt_map--; - ioreq->page[i] = NULL; - } + for (i = 0; i < ioreq->v.niov; i++) { + if (!ioreq->page[i]) { + continue; + } + if (xc_gnttab_munmap(gnt, ioreq->page[i], 1) != 0) { + xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n", + strerror(errno)); + } + ioreq->blkdev->cnt_map--; + ioreq->page[i] = NULL; + } } } static int ioreq_map(struct ioreq *ioreq) { - int gnt = ioreq->blkdev->xendev.gnttabdev; + XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev; int i; - if (ioreq->v.niov == 0) + if (ioreq->v.niov == 0) { return 0; + } if (batch_maps) { - ioreq->pages = xc_gnttab_map_grant_refs - (gnt, ioreq->v.niov, ioreq->domids, ioreq->refs, ioreq->prot); - if (ioreq->pages == NULL) { - xen_be_printf(&ioreq->blkdev->xendev, 0, - "can't map %d grant refs (%s, %d maps)\n", - ioreq->v.niov, strerror(errno), ioreq->blkdev->cnt_map); - return -1; - } - for (i = 0; i < ioreq->v.niov; i++) - ioreq->v.iov[i].iov_base = ioreq->pages + i * XC_PAGE_SIZE + - (uintptr_t)ioreq->v.iov[i].iov_base; - ioreq->blkdev->cnt_map += ioreq->v.niov; + ioreq->pages = xc_gnttab_map_grant_refs + (gnt, ioreq->v.niov, ioreq->domids, ioreq->refs, ioreq->prot); + if (ioreq->pages == NULL) { + xen_be_printf(&ioreq->blkdev->xendev, 0, + "can't map %d grant refs (%s, %d maps)\n", + ioreq->v.niov, strerror(errno), ioreq->blkdev->cnt_map); + return -1; + } + for (i = 0; i < ioreq->v.niov; i++) { + ioreq->v.iov[i].iov_base = ioreq->pages + i * XC_PAGE_SIZE + + (uintptr_t)ioreq->v.iov[i].iov_base; + } + ioreq->blkdev->cnt_map += ioreq->v.niov; } else { - for (i = 0; i < ioreq->v.niov; i++) { - ioreq->page[i] = xc_gnttab_map_grant_ref - (gnt, ioreq->domids[i], ioreq->refs[i], ioreq->prot); - if (ioreq->page[i] == NULL) { - xen_be_printf(&ioreq->blkdev->xendev, 0, - "can't map grant ref %d (%s, %d maps)\n", - ioreq->refs[i], strerror(errno), ioreq->blkdev->cnt_map); - ioreq_unmap(ioreq); - return -1; - } - ioreq->v.iov[i].iov_base = ioreq->page[i] + (uintptr_t)ioreq->v.iov[i].iov_base; - ioreq->blkdev->cnt_map++; - } + for (i = 0; i < ioreq->v.niov; i++) { + ioreq->page[i] = xc_gnttab_map_grant_ref + (gnt, ioreq->domids[i], ioreq->refs[i], ioreq->prot); + if (ioreq->page[i] == NULL) { + xen_be_printf(&ioreq->blkdev->xendev, 0, + "can't map grant ref %d (%s, %d maps)\n", + ioreq->refs[i], strerror(errno), ioreq->blkdev->cnt_map); + ioreq_unmap(ioreq); + return -1; + } + ioreq->v.iov[i].iov_base = ioreq->page[i] + (uintptr_t)ioreq->v.iov[i].iov_base; + ioreq->blkdev->cnt_map++; + } } return 0; } @@ -309,54 +319,58 @@ static int ioreq_runio_qemu_sync(struct ioreq *ioreq) int i, rc, len = 0; off_t pos; - if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) - goto err_no_map; - if (ioreq->presync) - bdrv_flush(blkdev->bs); + if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) { + goto err_no_map; + } + if (ioreq->presync) { + bdrv_flush(blkdev->bs); + } switch (ioreq->req.operation) { case BLKIF_OP_READ: - pos = ioreq->start; - for (i = 0; i < ioreq->v.niov; i++) { - rc = bdrv_read(blkdev->bs, pos / BLOCK_SIZE, - ioreq->v.iov[i].iov_base, - ioreq->v.iov[i].iov_len / BLOCK_SIZE); - if (rc != 0) { - xen_be_printf(&blkdev->xendev, 0, "rd I/O error (%p, len %zd)\n", - ioreq->v.iov[i].iov_base, - ioreq->v.iov[i].iov_len); - goto err; - } - len += ioreq->v.iov[i].iov_len; - pos += ioreq->v.iov[i].iov_len; - } - break; + pos = ioreq->start; + for (i = 0; i < ioreq->v.niov; i++) { + rc = bdrv_read(blkdev->bs, pos / BLOCK_SIZE, + ioreq->v.iov[i].iov_base, + ioreq->v.iov[i].iov_len / BLOCK_SIZE); + if (rc != 0) { + xen_be_printf(&blkdev->xendev, 0, "rd I/O error (%p, len %zd)\n", + ioreq->v.iov[i].iov_base, + ioreq->v.iov[i].iov_len); + goto err; + } + len += ioreq->v.iov[i].iov_len; + pos += ioreq->v.iov[i].iov_len; + } + break; case BLKIF_OP_WRITE: case BLKIF_OP_WRITE_BARRIER: - if (!ioreq->req.nr_segments) + if (!ioreq->req.nr_segments) { break; - pos = ioreq->start; - for (i = 0; i < ioreq->v.niov; i++) { - rc = bdrv_write(blkdev->bs, pos / BLOCK_SIZE, - ioreq->v.iov[i].iov_base, - ioreq->v.iov[i].iov_len / BLOCK_SIZE); - if (rc != 0) { - xen_be_printf(&blkdev->xendev, 0, "wr I/O error (%p, len %zd)\n", - ioreq->v.iov[i].iov_base, - ioreq->v.iov[i].iov_len); - goto err; - } - len += ioreq->v.iov[i].iov_len; - pos += ioreq->v.iov[i].iov_len; - } - break; + } + pos = ioreq->start; + for (i = 0; i < ioreq->v.niov; i++) { + rc = bdrv_write(blkdev->bs, pos / BLOCK_SIZE, + ioreq->v.iov[i].iov_base, + ioreq->v.iov[i].iov_len / BLOCK_SIZE); + if (rc != 0) { + xen_be_printf(&blkdev->xendev, 0, "wr I/O error (%p, len %zd)\n", + ioreq->v.iov[i].iov_base, + ioreq->v.iov[i].iov_len); + goto err; + } + len += ioreq->v.iov[i].iov_len; + pos += ioreq->v.iov[i].iov_len; + } + break; default: - /* unknown operation (shouldn't happen -- parse catches this) */ - goto err; + /* unknown operation (shouldn't happen -- parse catches this) */ + goto err; } - if (ioreq->postsync) - bdrv_flush(blkdev->bs); + if (ioreq->postsync) { + bdrv_flush(blkdev->bs); + } ioreq->status = BLKIF_RSP_OKAY; ioreq_unmap(ioreq); @@ -382,8 +396,9 @@ static void qemu_aio_complete(void *opaque, int ret) } ioreq->aio_inflight--; - if (ioreq->aio_inflight > 0) + if (ioreq->aio_inflight > 0) { return; + } ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY; ioreq_unmap(ioreq); @@ -395,12 +410,14 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq) { struct XenBlkDev *blkdev = ioreq->blkdev; - if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) - goto err_no_map; + if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) { + goto err_no_map; + } ioreq->aio_inflight++; - if (ioreq->presync) - bdrv_flush(blkdev->bs); /* FIXME: aio_flush() ??? */ + if (ioreq->presync) { + bdrv_flush(blkdev->bs); /* FIXME: aio_flush() ??? */ + } switch (ioreq->req.operation) { case BLKIF_OP_READ: @@ -408,23 +425,25 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq) bdrv_aio_readv(blkdev->bs, ioreq->start / BLOCK_SIZE, &ioreq->v, ioreq->v.size / BLOCK_SIZE, qemu_aio_complete, ioreq); - break; + break; case BLKIF_OP_WRITE: case BLKIF_OP_WRITE_BARRIER: - if (!ioreq->req.nr_segments) + if (!ioreq->req.nr_segments) { break; + } ioreq->aio_inflight++; bdrv_aio_writev(blkdev->bs, ioreq->start / BLOCK_SIZE, &ioreq->v, ioreq->v.size / BLOCK_SIZE, qemu_aio_complete, ioreq); - break; + break; default: - /* unknown operation (shouldn't happen -- parse catches this) */ - goto err; + /* unknown operation (shouldn't happen -- parse catches this) */ + goto err; } - if (ioreq->postsync) - bdrv_flush(blkdev->bs); /* FIXME: aio_flush() ??? */ + if (ioreq->postsync) { + bdrv_flush(blkdev->bs); /* FIXME: aio_flush() ??? */ + } qemu_aio_complete(ioreq, 0); return 0; @@ -452,36 +471,37 @@ static int blk_send_response_one(struct ioreq *ioreq) /* Place on the response ring for the relevant domain. */ switch (blkdev->protocol) { case BLKIF_PROTOCOL_NATIVE: - dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt); - break; + dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt); + break; case BLKIF_PROTOCOL_X86_32: dst = RING_GET_RESPONSE(&blkdev->rings.x86_32_part, blkdev->rings.x86_32_part.rsp_prod_pvt); - break; + break; case BLKIF_PROTOCOL_X86_64: dst = RING_GET_RESPONSE(&blkdev->rings.x86_64_part, blkdev->rings.x86_64_part.rsp_prod_pvt); - break; + break; default: - dst = NULL; + dst = NULL; } memcpy(dst, &resp, sizeof(resp)); blkdev->rings.common.rsp_prod_pvt++; RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blkdev->rings.common, send_notify); if (blkdev->rings.common.rsp_prod_pvt == blkdev->rings.common.req_cons) { - /* - * Tail check for pending requests. Allows frontend to avoid - * notifications if requests are already in flight (lower - * overheads and promotes batching). - */ - RING_FINAL_CHECK_FOR_REQUESTS(&blkdev->rings.common, have_requests); + /* + * Tail check for pending requests. Allows frontend to avoid + * notifications if requests are already in flight (lower + * overheads and promotes batching). + */ + RING_FINAL_CHECK_FOR_REQUESTS(&blkdev->rings.common, have_requests); } else if (RING_HAS_UNCONSUMED_REQUESTS(&blkdev->rings.common)) { - have_requests = 1; + have_requests = 1; } - if (have_requests) - blkdev->more_work++; + if (have_requests) { + blkdev->more_work++; + } return send_notify; } @@ -493,28 +513,29 @@ static void blk_send_response_all(struct XenBlkDev *blkdev) while (!QLIST_EMPTY(&blkdev->finished)) { ioreq = QLIST_FIRST(&blkdev->finished); - send_notify += blk_send_response_one(ioreq); - ioreq_release(ioreq); + send_notify += blk_send_response_one(ioreq); + ioreq_release(ioreq); + } + if (send_notify) { + xen_be_send_notify(&blkdev->xendev); } - if (send_notify) - xen_be_send_notify(&blkdev->xendev); } static int blk_get_request(struct XenBlkDev *blkdev, struct ioreq *ioreq, RING_IDX rc) { switch (blkdev->protocol) { case BLKIF_PROTOCOL_NATIVE: - memcpy(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.native, rc), - sizeof(ioreq->req)); - break; + memcpy(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.native, rc), + sizeof(ioreq->req)); + break; case BLKIF_PROTOCOL_X86_32: blkif_get_x86_32_req(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.x86_32_part, rc)); - break; + break; case BLKIF_PROTOCOL_X86_64: blkif_get_x86_64_req(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.x86_64_part, rc)); - break; + break; } return 0; } @@ -530,12 +551,14 @@ static void blk_handle_requests(struct XenBlkDev *blkdev) rp = blkdev->rings.common.sring->req_prod; xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ - if (use_aio) + if (use_aio) { blk_send_response_all(blkdev); + } while (rc != rp) { /* pull request from ring */ - if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) + if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) { break; + } ioreq = ioreq_start(blkdev); if (ioreq == NULL) { blkdev->more_work++; @@ -546,8 +569,9 @@ static void blk_handle_requests(struct XenBlkDev *blkdev) /* parse them */ if (ioreq_parse(ioreq) != 0) { - if (blk_send_response_one(ioreq)) + if (blk_send_response_one(ioreq)) { xen_be_send_notify(&blkdev->xendev); + } ioreq_release(ioreq); continue; } @@ -560,11 +584,13 @@ static void blk_handle_requests(struct XenBlkDev *blkdev) ioreq_runio_qemu_sync(ioreq); } } - if (!use_aio) + if (!use_aio) { blk_send_response_all(blkdev); + } - if (blkdev->more_work && blkdev->requests_inflight < max_requests) + if (blkdev->more_work && blkdev->requests_inflight < max_requests) { qemu_bh_schedule(blkdev->bh); + } } /* ------------------------------------------------------------- */ @@ -583,8 +609,9 @@ static void blk_alloc(struct XenDevice *xendev) QLIST_INIT(&blkdev->finished); QLIST_INIT(&blkdev->freelist); blkdev->bh = qemu_bh_new(blk_bh, blkdev); - if (xen_mode != XEN_EMULATE) + if (xen_mode != XEN_EMULATE) { batch_maps = 1; + } } static int blk_init(struct XenDevice *xendev) @@ -595,44 +622,50 @@ static int blk_init(struct XenDevice *xendev) /* read xenstore entries */ if (blkdev->params == NULL) { - blkdev->params = xenstore_read_be_str(&blkdev->xendev, "params"); + blkdev->params = xenstore_read_be_str(&blkdev->xendev, "params"); h = strchr(blkdev->params, ':'); - if (h != NULL) { - blkdev->fileproto = blkdev->params; - blkdev->filename = h+1; - *h = 0; - } else { - blkdev->fileproto = "<unset>"; - blkdev->filename = blkdev->params; - } - } - if (blkdev->mode == NULL) - blkdev->mode = xenstore_read_be_str(&blkdev->xendev, "mode"); - if (blkdev->type == NULL) - blkdev->type = xenstore_read_be_str(&blkdev->xendev, "type"); - if (blkdev->dev == NULL) - blkdev->dev = xenstore_read_be_str(&blkdev->xendev, "dev"); - if (blkdev->devtype == NULL) - blkdev->devtype = xenstore_read_be_str(&blkdev->xendev, "device-type"); + if (h != NULL) { + blkdev->fileproto = blkdev->params; + blkdev->filename = h+1; + *h = 0; + } else { + blkdev->fileproto = "<unset>"; + blkdev->filename = blkdev->params; + } + } + if (blkdev->mode == NULL) { + blkdev->mode = xenstore_read_be_str(&blkdev->xendev, "mode"); + } + if (blkdev->type == NULL) { + blkdev->type = xenstore_read_be_str(&blkdev->xendev, "type"); + } + if (blkdev->dev == NULL) { + blkdev->dev = xenstore_read_be_str(&blkdev->xendev, "dev"); + } + if (blkdev->devtype == NULL) { + blkdev->devtype = xenstore_read_be_str(&blkdev->xendev, "device-type"); + } /* do we have all we need? */ if (blkdev->params == NULL || - blkdev->mode == NULL || - blkdev->type == NULL || - blkdev->dev == NULL) - return -1; + blkdev->mode == NULL || + blkdev->type == NULL || + blkdev->dev == NULL) { + return -1; + } /* read-only ? */ if (strcmp(blkdev->mode, "w") == 0) { - qflags = BDRV_O_RDWR; + qflags = BDRV_O_RDWR; } else { - qflags = 0; - info |= VDISK_READONLY; + qflags = 0; + info |= VDISK_READONLY; } /* cdrom ? */ - if (blkdev->devtype && !strcmp(blkdev->devtype, "cdrom")) - info |= VDISK_CDROM; + if (blkdev->devtype && !strcmp(blkdev->devtype, "cdrom")) { + info |= VDISK_CDROM; + } /* init qemu block driver */ index = (blkdev->xendev.dev - 202 * 256) / 16; @@ -649,7 +682,7 @@ static int blk_init(struct XenDevice *xendev) } else { /* setup via qemu cmdline -> already setup for us */ xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n"); - blkdev->bs = blkdev->dinfo->bdrv; + blkdev->bs = blkdev->dinfo->bdrv; } blkdev->file_blk = BLOCK_SIZE; blkdev->file_size = bdrv_getlength(blkdev->bs); @@ -657,21 +690,21 @@ static int blk_init(struct XenDevice *xendev) xen_be_printf(&blkdev->xendev, 1, "bdrv_getlength: %d (%s) | drv %s\n", (int)blkdev->file_size, strerror(-blkdev->file_size), blkdev->bs->drv ? blkdev->bs->drv->format_name : "-"); - blkdev->file_size = 0; + blkdev->file_size = 0; } have_barriers = blkdev->bs->drv && blkdev->bs->drv->bdrv_flush ? 1 : 0; xen_be_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\"," - " size %" PRId64 " (%" PRId64 " MB)\n", - blkdev->type, blkdev->fileproto, blkdev->filename, - blkdev->file_size, blkdev->file_size >> 20); + " size %" PRId64 " (%" PRId64 " MB)\n", + blkdev->type, blkdev->fileproto, blkdev->filename, + blkdev->file_size, blkdev->file_size >> 20); /* fill info */ xenstore_write_be_int(&blkdev->xendev, "feature-barrier", have_barriers); xenstore_write_be_int(&blkdev->xendev, "info", info); xenstore_write_be_int(&blkdev->xendev, "sector-size", blkdev->file_blk); xenstore_write_be_int(&blkdev->xendev, "sectors", - blkdev->file_size / blkdev->file_blk); + blkdev->file_size / blkdev->file_blk); return 0; } @@ -679,57 +712,62 @@ static int blk_connect(struct XenDevice *xendev) { struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); - if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) - return -1; + if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) { + return -1; + } if (xenstore_read_fe_int(&blkdev->xendev, "event-channel", - &blkdev->xendev.remote_port) == -1) - return -1; + &blkdev->xendev.remote_port) == -1) { + return -1; + } blkdev->protocol = BLKIF_PROTOCOL_NATIVE; if (blkdev->xendev.protocol) { - if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0) + if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0) { blkdev->protocol = BLKIF_PROTOCOL_X86_32; - if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0) + } + if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0) { blkdev->protocol = BLKIF_PROTOCOL_X86_64; + } } blkdev->sring = xc_gnttab_map_grant_ref(blkdev->xendev.gnttabdev, - blkdev->xendev.dom, - blkdev->ring_ref, - PROT_READ | PROT_WRITE); - if (!blkdev->sring) - return -1; + blkdev->xendev.dom, + blkdev->ring_ref, + PROT_READ | PROT_WRITE); + if (!blkdev->sring) { + return -1; + } blkdev->cnt_map++; switch (blkdev->protocol) { case BLKIF_PROTOCOL_NATIVE: { - blkif_sring_t *sring_native = blkdev->sring; - BACK_RING_INIT(&blkdev->rings.native, sring_native, XC_PAGE_SIZE); - break; + blkif_sring_t *sring_native = blkdev->sring; + BACK_RING_INIT(&blkdev->rings.native, sring_native, XC_PAGE_SIZE); + break; } case BLKIF_PROTOCOL_X86_32: { - blkif_x86_32_sring_t *sring_x86_32 = blkdev->sring; + blkif_x86_32_sring_t *sring_x86_32 = blkdev->sring; BACK_RING_INIT(&blkdev->rings.x86_32_part, sring_x86_32, XC_PAGE_SIZE); - break; + break; } case BLKIF_PROTOCOL_X86_64: { - blkif_x86_64_sring_t *sring_x86_64 = blkdev->sring; + blkif_x86_64_sring_t *sring_x86_64 = blkdev->sring; BACK_RING_INIT(&blkdev->rings.x86_64_part, sring_x86_64, XC_PAGE_SIZE); - break; + break; } } xen_be_bind_evtchn(&blkdev->xendev); xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, " - "remote port %d, local port %d\n", - blkdev->xendev.protocol, blkdev->ring_ref, - blkdev->xendev.remote_port, blkdev->xendev.local_port); + "remote port %d, local port %d\n", + blkdev->xendev.protocol, blkdev->ring_ref, + blkdev->xendev.remote_port, blkdev->xendev.local_port); return 0; } @@ -743,14 +781,14 @@ static void blk_disconnect(struct XenDevice *xendev) bdrv_close(blkdev->bs); bdrv_delete(blkdev->bs); } - blkdev->bs = NULL; + blkdev->bs = NULL; } xen_be_unbind_evtchn(&blkdev->xendev); if (blkdev->sring) { - xc_gnttab_munmap(blkdev->xendev.gnttabdev, blkdev->sring, 1); - blkdev->cnt_map--; - blkdev->sring = NULL; + xc_gnttab_munmap(blkdev->xendev.gnttabdev, blkdev->sring, 1); + blkdev->cnt_map--; + blkdev->sring = NULL; } } @@ -760,10 +798,10 @@ static int blk_free(struct XenDevice *xendev) struct ioreq *ioreq; while (!QLIST_EMPTY(&blkdev->freelist)) { - ioreq = QLIST_FIRST(&blkdev->freelist); + ioreq = QLIST_FIRST(&blkdev->freelist); QLIST_REMOVE(ioreq, list); qemu_iovec_destroy(&ioreq->v); - qemu_free(ioreq); + qemu_free(ioreq); } qemu_free(blkdev->params); diff --git a/hw/xen_domainbuild.c b/hw/xen_domainbuild.c index 4093587df1..a6a12e5930 100644 --- a/hw/xen_domainbuild.c +++ b/hw/xen_domainbuild.c @@ -175,8 +175,9 @@ static int xen_domain_watcher(void) for (i = 3; i < n; i++) { if (i == fd[0]) continue; - if (i == xen_xc) + if (i == xc_fd(xen_xc)) { continue; + } close(i); } diff --git a/hw/xen_machine_pv.c b/hw/xen_machine_pv.c index 0d7f73ed82..7985d11d5a 100644 --- a/hw/xen_machine_pv.c +++ b/hw/xen_machine_pv.c @@ -113,6 +113,7 @@ static QEMUMachine xenpv_machine = { .desc = "Xen Para-virtualized PC", .init = xen_init_pv, .max_cpus = 1, + .default_machine_opts = "accel=xen", }; static void xenpv_machine_init(void) diff --git a/hw/xen_nic.c b/hw/xen_nic.c index 08055b83ff..ff86491cfa 100644 --- a/hw/xen_nic.c +++ b/hw/xen_nic.c @@ -74,20 +74,23 @@ static void net_tx_response(struct XenNetDev *netdev, netif_tx_request_t *txp, i resp->status = st; #if 0 - if (txp->flags & NETTXF_extra_info) - RING_GET_RESPONSE(&netdev->tx_ring, ++i)->status = NETIF_RSP_NULL; + if (txp->flags & NETTXF_extra_info) { + RING_GET_RESPONSE(&netdev->tx_ring, ++i)->status = NETIF_RSP_NULL; + } #endif netdev->tx_ring.rsp_prod_pvt = ++i; RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->tx_ring, notify); - if (notify) - xen_be_send_notify(&netdev->xendev); + if (notify) { + xen_be_send_notify(&netdev->xendev); + } if (i == netdev->tx_ring.req_cons) { - int more_to_do; - RING_FINAL_CHECK_FOR_REQUESTS(&netdev->tx_ring, more_to_do); - if (more_to_do) - netdev->tx_work++; + int more_to_do; + RING_FINAL_CHECK_FOR_REQUESTS(&netdev->tx_ring, more_to_do); + if (more_to_do) { + netdev->tx_work++; + } } } @@ -101,10 +104,11 @@ static void net_tx_error(struct XenNetDev *netdev, netif_tx_request_t *txp, RING RING_IDX cons = netdev->tx_ring.req_cons; do { - make_tx_response(netif, txp, NETIF_RSP_ERROR); - if (cons >= end) - break; - txp = RING_GET_REQUEST(&netdev->tx_ring, cons++); + make_tx_response(netif, txp, NETIF_RSP_ERROR); + if (cons >= end) { + break; + } + txp = RING_GET_REQUEST(&netdev->tx_ring, cons++); } while (1); netdev->tx_ring.req_cons = cons; netif_schedule_work(netif); @@ -122,75 +126,78 @@ static void net_tx_packets(struct XenNetDev *netdev) void *tmpbuf = NULL; for (;;) { - rc = netdev->tx_ring.req_cons; - rp = netdev->tx_ring.sring->req_prod; - xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ + rc = netdev->tx_ring.req_cons; + rp = netdev->tx_ring.sring->req_prod; + xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ - while ((rc != rp)) { - if (RING_REQUEST_CONS_OVERFLOW(&netdev->tx_ring, rc)) - break; - memcpy(&txreq, RING_GET_REQUEST(&netdev->tx_ring, rc), sizeof(txreq)); - netdev->tx_ring.req_cons = ++rc; + while ((rc != rp)) { + if (RING_REQUEST_CONS_OVERFLOW(&netdev->tx_ring, rc)) { + break; + } + memcpy(&txreq, RING_GET_REQUEST(&netdev->tx_ring, rc), sizeof(txreq)); + netdev->tx_ring.req_cons = ++rc; #if 1 - /* should not happen in theory, we don't announce the * - * feature-{sg,gso,whatelse} flags in xenstore (yet?) */ - if (txreq.flags & NETTXF_extra_info) { - xen_be_printf(&netdev->xendev, 0, "FIXME: extra info flag\n"); - net_tx_error(netdev, &txreq, rc); - continue; - } - if (txreq.flags & NETTXF_more_data) { - xen_be_printf(&netdev->xendev, 0, "FIXME: more data flag\n"); - net_tx_error(netdev, &txreq, rc); - continue; - } + /* should not happen in theory, we don't announce the * + * feature-{sg,gso,whatelse} flags in xenstore (yet?) */ + if (txreq.flags & NETTXF_extra_info) { + xen_be_printf(&netdev->xendev, 0, "FIXME: extra info flag\n"); + net_tx_error(netdev, &txreq, rc); + continue; + } + if (txreq.flags & NETTXF_more_data) { + xen_be_printf(&netdev->xendev, 0, "FIXME: more data flag\n"); + net_tx_error(netdev, &txreq, rc); + continue; + } #endif - if (txreq.size < 14) { - xen_be_printf(&netdev->xendev, 0, "bad packet size: %d\n", txreq.size); - net_tx_error(netdev, &txreq, rc); - continue; - } - - if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) { - xen_be_printf(&netdev->xendev, 0, "error: page crossing\n"); - net_tx_error(netdev, &txreq, rc); - continue; - } - - xen_be_printf(&netdev->xendev, 3, "tx packet ref %d, off %d, len %d, flags 0x%x%s%s%s%s\n", - txreq.gref, txreq.offset, txreq.size, txreq.flags, - (txreq.flags & NETTXF_csum_blank) ? " csum_blank" : "", - (txreq.flags & NETTXF_data_validated) ? " data_validated" : "", - (txreq.flags & NETTXF_more_data) ? " more_data" : "", - (txreq.flags & NETTXF_extra_info) ? " extra_info" : ""); - - page = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev, - netdev->xendev.dom, - txreq.gref, PROT_READ); - if (page == NULL) { - xen_be_printf(&netdev->xendev, 0, "error: tx gref dereference failed (%d)\n", + if (txreq.size < 14) { + xen_be_printf(&netdev->xendev, 0, "bad packet size: %d\n", txreq.size); + net_tx_error(netdev, &txreq, rc); + continue; + } + + if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) { + xen_be_printf(&netdev->xendev, 0, "error: page crossing\n"); + net_tx_error(netdev, &txreq, rc); + continue; + } + + xen_be_printf(&netdev->xendev, 3, "tx packet ref %d, off %d, len %d, flags 0x%x%s%s%s%s\n", + txreq.gref, txreq.offset, txreq.size, txreq.flags, + (txreq.flags & NETTXF_csum_blank) ? " csum_blank" : "", + (txreq.flags & NETTXF_data_validated) ? " data_validated" : "", + (txreq.flags & NETTXF_more_data) ? " more_data" : "", + (txreq.flags & NETTXF_extra_info) ? " extra_info" : ""); + + page = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev, + netdev->xendev.dom, + txreq.gref, PROT_READ); + if (page == NULL) { + xen_be_printf(&netdev->xendev, 0, "error: tx gref dereference failed (%d)\n", txreq.gref); - net_tx_error(netdev, &txreq, rc); - continue; - } - if (txreq.flags & NETTXF_csum_blank) { + net_tx_error(netdev, &txreq, rc); + continue; + } + if (txreq.flags & NETTXF_csum_blank) { /* have read-only mapping -> can't fill checksum in-place */ - if (!tmpbuf) + if (!tmpbuf) { tmpbuf = qemu_malloc(XC_PAGE_SIZE); + } memcpy(tmpbuf, page + txreq.offset, txreq.size); - net_checksum_calculate(tmpbuf, txreq.size); + net_checksum_calculate(tmpbuf, txreq.size); qemu_send_packet(&netdev->nic->nc, tmpbuf, txreq.size); } else { qemu_send_packet(&netdev->nic->nc, page + txreq.offset, txreq.size); } - xc_gnttab_munmap(netdev->xendev.gnttabdev, page, 1); - net_tx_response(netdev, &txreq, NETIF_RSP_OKAY); - } - if (!netdev->tx_work) - break; - netdev->tx_work = 0; + xc_gnttab_munmap(netdev->xendev.gnttabdev, page, 1); + net_tx_response(netdev, &txreq, NETIF_RSP_OKAY); + } + if (!netdev->tx_work) { + break; + } + netdev->tx_work = 0; } qemu_free(tmpbuf); } @@ -198,9 +205,9 @@ static void net_tx_packets(struct XenNetDev *netdev) /* ------------------------------------------------------------- */ static void net_rx_response(struct XenNetDev *netdev, - netif_rx_request_t *req, int8_t st, - uint16_t offset, uint16_t size, - uint16_t flags) + netif_rx_request_t *req, int8_t st, + uint16_t offset, uint16_t size, + uint16_t flags) { RING_IDX i = netdev->rx_ring.rsp_prod_pvt; netif_rx_response_t *resp; @@ -211,16 +218,18 @@ static void net_rx_response(struct XenNetDev *netdev, resp->flags = flags; resp->id = req->id; resp->status = (int16_t)size; - if (st < 0) - resp->status = (int16_t)st; + if (st < 0) { + resp->status = (int16_t)st; + } xen_be_printf(&netdev->xendev, 3, "rx response: idx %d, status %d, flags 0x%x\n", - i, resp->status, resp->flags); + i, resp->status, resp->flags); netdev->rx_ring.rsp_prod_pvt = ++i; RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->rx_ring, notify); - if (notify) - xen_be_send_notify(&netdev->xendev); + if (notify) { + xen_be_send_notify(&netdev->xendev); + } } #define NET_IP_ALIGN 2 @@ -230,17 +239,18 @@ static int net_rx_ok(VLANClientState *nc) struct XenNetDev *netdev = DO_UPCAST(NICState, nc, nc)->opaque; RING_IDX rc, rp; - if (netdev->xendev.be_state != XenbusStateConnected) - return 0; + if (netdev->xendev.be_state != XenbusStateConnected) { + return 0; + } rc = netdev->rx_ring.req_cons; rp = netdev->rx_ring.sring->req_prod; xen_rmb(); if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) { - xen_be_printf(&netdev->xendev, 2, "%s: no rx buffers (%d/%d)\n", - __FUNCTION__, rc, rp); - return 0; + xen_be_printf(&netdev->xendev, 2, "%s: no rx buffers (%d/%d)\n", + __FUNCTION__, rc, rp); + return 0; } return 1; } @@ -252,34 +262,35 @@ static ssize_t net_rx_packet(VLANClientState *nc, const uint8_t *buf, size_t siz RING_IDX rc, rp; void *page; - if (netdev->xendev.be_state != XenbusStateConnected) - return -1; + if (netdev->xendev.be_state != XenbusStateConnected) { + return -1; + } rc = netdev->rx_ring.req_cons; rp = netdev->rx_ring.sring->req_prod; xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) { - xen_be_printf(&netdev->xendev, 2, "no buffer, drop packet\n"); - return -1; + xen_be_printf(&netdev->xendev, 2, "no buffer, drop packet\n"); + return -1; } if (size > XC_PAGE_SIZE - NET_IP_ALIGN) { - xen_be_printf(&netdev->xendev, 0, "packet too big (%lu > %ld)", - (unsigned long)size, XC_PAGE_SIZE - NET_IP_ALIGN); - return -1; + xen_be_printf(&netdev->xendev, 0, "packet too big (%lu > %ld)", + (unsigned long)size, XC_PAGE_SIZE - NET_IP_ALIGN); + return -1; } memcpy(&rxreq, RING_GET_REQUEST(&netdev->rx_ring, rc), sizeof(rxreq)); netdev->rx_ring.req_cons = ++rc; page = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev, - netdev->xendev.dom, - rxreq.gref, PROT_WRITE); + netdev->xendev.dom, + rxreq.gref, PROT_WRITE); if (page == NULL) { - xen_be_printf(&netdev->xendev, 0, "error: rx gref dereference failed (%d)\n", + xen_be_printf(&netdev->xendev, 0, "error: rx gref dereference failed (%d)\n", rxreq.gref); - net_rx_response(netdev, &rxreq, NETIF_RSP_ERROR, 0, 0, 0); - return -1; + net_rx_response(netdev, &rxreq, NETIF_RSP_ERROR, 0, 0, 0); + return -1; } memcpy(page + NET_IP_ALIGN, buf, size); xc_gnttab_munmap(netdev->xendev.gnttabdev, page, 1); @@ -302,15 +313,18 @@ static int net_init(struct XenDevice *xendev) struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev); /* read xenstore entries */ - if (netdev->mac == NULL) - netdev->mac = xenstore_read_be_str(&netdev->xendev, "mac"); + if (netdev->mac == NULL) { + netdev->mac = xenstore_read_be_str(&netdev->xendev, "mac"); + } /* do we have all we need? */ - if (netdev->mac == NULL) - return -1; + if (netdev->mac == NULL) { + return -1; + } - if (net_parse_macaddr(netdev->conf.macaddr.a, netdev->mac) < 0) + if (net_parse_macaddr(netdev->conf.macaddr.a, netdev->mac) < 0) { return -1; + } netdev->conf.vlan = qemu_find_vlan(netdev->xendev.dev, 1); netdev->conf.peer = NULL; @@ -334,41 +348,46 @@ static int net_connect(struct XenDevice *xendev) int rx_copy; if (xenstore_read_fe_int(&netdev->xendev, "tx-ring-ref", - &netdev->tx_ring_ref) == -1) - return -1; + &netdev->tx_ring_ref) == -1) { + return -1; + } if (xenstore_read_fe_int(&netdev->xendev, "rx-ring-ref", - &netdev->rx_ring_ref) == -1) - return 1; + &netdev->rx_ring_ref) == -1) { + return 1; + } if (xenstore_read_fe_int(&netdev->xendev, "event-channel", - &netdev->xendev.remote_port) == -1) - return -1; + &netdev->xendev.remote_port) == -1) { + return -1; + } - if (xenstore_read_fe_int(&netdev->xendev, "request-rx-copy", &rx_copy) == -1) - rx_copy = 0; + if (xenstore_read_fe_int(&netdev->xendev, "request-rx-copy", &rx_copy) == -1) { + rx_copy = 0; + } if (rx_copy == 0) { - xen_be_printf(&netdev->xendev, 0, "frontend doesn't support rx-copy.\n"); - return -1; + xen_be_printf(&netdev->xendev, 0, "frontend doesn't support rx-copy.\n"); + return -1; } netdev->txs = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev, - netdev->xendev.dom, - netdev->tx_ring_ref, - PROT_READ | PROT_WRITE); + netdev->xendev.dom, + netdev->tx_ring_ref, + PROT_READ | PROT_WRITE); netdev->rxs = xc_gnttab_map_grant_ref(netdev->xendev.gnttabdev, - netdev->xendev.dom, - netdev->rx_ring_ref, - PROT_READ | PROT_WRITE); - if (!netdev->txs || !netdev->rxs) - return -1; + netdev->xendev.dom, + netdev->rx_ring_ref, + PROT_READ | PROT_WRITE); + if (!netdev->txs || !netdev->rxs) { + return -1; + } BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XC_PAGE_SIZE); BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XC_PAGE_SIZE); xen_be_bind_evtchn(&netdev->xendev); xen_be_printf(&netdev->xendev, 1, "ok: tx-ring-ref %d, rx-ring-ref %d, " - "remote port %d, local port %d\n", - netdev->tx_ring_ref, netdev->rx_ring_ref, - netdev->xendev.remote_port, netdev->xendev.local_port); + "remote port %d, local port %d\n", + netdev->tx_ring_ref, netdev->rx_ring_ref, + netdev->xendev.remote_port, netdev->xendev.local_port); net_tx_packets(netdev); return 0; @@ -381,12 +400,12 @@ static void net_disconnect(struct XenDevice *xendev) xen_be_unbind_evtchn(&netdev->xendev); if (netdev->txs) { - xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->txs, 1); - netdev->txs = NULL; + xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->txs, 1); + netdev->txs = NULL; } if (netdev->rxs) { - xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->rxs, 1); - netdev->rxs = NULL; + xc_gnttab_munmap(netdev->xendev.gnttabdev, netdev->rxs, 1); + netdev->rxs = NULL; } if (netdev->nic) { qemu_del_vlan_client(&netdev->nic->nc); diff --git a/qemu-config.c b/qemu-config.c index 14d34194d0..5d7ffa2f23 100644 --- a/qemu-config.c +++ b/qemu-config.c @@ -450,6 +450,19 @@ QemuOptsList qemu_option_rom_opts = { }, }; +static QemuOptsList qemu_machine_opts = { + .name = "machine", + .head = QTAILQ_HEAD_INITIALIZER(qemu_machine_opts.head), + .desc = { + { + .name = "accel", + .type = QEMU_OPT_STRING, + .help = "accelerator list", + }, + { /* End of list */ } + }, +}; + static QemuOptsList *vm_config_groups[32] = { &qemu_drive_opts, &qemu_chardev_opts, @@ -464,6 +477,7 @@ static QemuOptsList *vm_config_groups[32] = { &qemu_trace_opts, #endif &qemu_option_rom_opts, + &qemu_machine_opts, NULL, }; diff --git a/qemu-options.hx b/qemu-options.hx index 489df10c46..9f121adcdb 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2030,6 +2030,16 @@ Enable KVM full virtualization support. This option is only available if KVM support is enabled when compiling. ETEXI +DEF("machine", HAS_ARG, QEMU_OPTION_machine, \ + "-machine accel=accel1[:accel2] use an accelerator (kvm,xen,tcg), default is tcg\n", QEMU_ARCH_ALL) +STEXI +@item -machine accel=@var{accels} +@findex -machine +This is use to enable an accelerator, in kvm,xen,tcg. +By default, it use only tcg. If there a more than one accelerator +specified, the next one is used if the first don't work. +ETEXI + DEF("xen-domid", HAS_ARG, QEMU_OPTION_xen_domid, "-xen-domid id specify xen guest domain id\n", QEMU_ARCH_ALL) DEF("xen-create", 0, QEMU_OPTION_xen_create, @@ -42,6 +42,8 @@ void qemu_system_shutdown_request(void); void qemu_system_powerdown_request(void); void qemu_system_debug_request(void); void qemu_system_vmstop_request(int reason); +int qemu_shutdown_requested_get(void); +int qemu_reset_requested_get(void); int qemu_shutdown_requested(void); int qemu_reset_requested(void); int qemu_powerdown_requested(void); diff --git a/trace-events b/trace-events index 4f965e2ebd..a00b63cefd 100644 --- a/trace-events +++ b/trace-events @@ -361,3 +361,16 @@ disable milkymist_uart_pulse_irq_tx(void) "Pulse IRQ TX" # hw/milkymist-vgafb.c disable milkymist_vgafb_memory_read(uint32_t addr, uint32_t value) "addr %08x value %08x" disable milkymist_vgafb_memory_write(uint32_t addr, uint32_t value) "addr %08x value %08x" + +# xen-all.c +disable xen_ram_alloc(unsigned long ram_addr, unsigned long size) "requested: %#lx, size %#lx" + +# xen-mapcache.c +disable qemu_map_cache(uint64_t phys_addr) "want %#"PRIx64"" +disable qemu_remap_bucket(uint64_t index) "index %#"PRIx64"" +disable qemu_map_cache_return(void* ptr) "%p" +disable xen_map_block(uint64_t phys_addr, uint64_t size) "%#"PRIx64", size %#"PRIx64"" +disable xen_unmap_block(void* addr, unsigned long size) "%p, size %#lx" + +# exec.c +disable qemu_put_ram_ptr(void* addr) "%p" @@ -257,7 +257,9 @@ static NotifierList exit_notifiers = static NotifierList machine_init_done_notifiers = NOTIFIER_LIST_INITIALIZER(machine_init_done_notifiers); +static int tcg_allowed = 1; int kvm_allowed = 0; +int xen_allowed = 0; uint32_t xen_domid; enum xen_mode xen_mode = XEN_EMULATE; @@ -1159,6 +1161,16 @@ static int powerdown_requested; static int debug_requested; static int vmstop_requested; +int qemu_shutdown_requested_get(void) +{ + return shutdown_requested; +} + +int qemu_reset_requested_get(void) +{ + return reset_requested; +} + int qemu_shutdown_requested(void) { int r = shutdown_requested; @@ -1876,6 +1888,83 @@ static int debugcon_parse(const char *devname) return 0; } +static int tcg_init(void) +{ + return 0; +} + +static struct { + const char *opt_name; + const char *name; + int (*available)(void); + int (*init)(void); + int *allowed; +} accel_list[] = { + { "tcg", "tcg", tcg_available, tcg_init, &tcg_allowed }, + { "xen", "Xen", xen_available, xen_init, &xen_allowed }, + { "kvm", "KVM", kvm_available, kvm_init, &kvm_allowed }, +}; + +static int configure_accelerator(void) +{ + const char *p = NULL; + char buf[10]; + int i, ret; + bool accel_initalised = 0; + bool init_failed = 0; + + QemuOptsList *list = qemu_find_opts("machine"); + if (!QTAILQ_EMPTY(&list->head)) { + p = qemu_opt_get(QTAILQ_FIRST(&list->head), "accel"); + } + + if (p == NULL) { + /* Use the default "accelerator", tcg */ + p = "tcg"; + } + + while (!accel_initalised && *p != '\0') { + if (*p == ':') { + p++; + } + p = get_opt_name(buf, sizeof (buf), p, ':'); + for (i = 0; i < ARRAY_SIZE(accel_list); i++) { + if (strcmp(accel_list[i].opt_name, buf) == 0) { + ret = accel_list[i].init(); + if (ret < 0) { + init_failed = 1; + if (!accel_list[i].available()) { + printf("%s not supported for this target\n", + accel_list[i].name); + } else { + fprintf(stderr, "failed to initialize %s: %s\n", + accel_list[i].name, + strerror(-ret)); + } + } else { + accel_initalised = 1; + *(accel_list[i].allowed) = 1; + } + break; + } + } + if (i == ARRAY_SIZE(accel_list)) { + fprintf(stderr, "\"%s\" accelerator does not exist.\n", buf); + } + } + + if (!accel_initalised) { + fprintf(stderr, "No accelerator found!\n"); + exit(1); + } + + if (init_failed) { + fprintf(stderr, "Back to %s accelerator.\n", accel_list[i].name); + } + + return !accel_initalised; +} + void qemu_add_exit_notifier(Notifier *notify) { notifier_list_add(&exit_notifiers, notify); @@ -2576,7 +2665,18 @@ int main(int argc, char **argv, char **envp) do_smbios_option(optarg); break; case QEMU_OPTION_enable_kvm: - kvm_allowed = 1; + olist = qemu_find_opts("machine"); + qemu_opts_reset(olist); + qemu_opts_parse(olist, "accel=kvm", 0); + break; + case QEMU_OPTION_machine: + olist = qemu_find_opts("machine"); + qemu_opts_reset(olist); + opts = qemu_opts_parse(olist, optarg, 0); + if (!opts) { + fprintf(stderr, "parse error: %s\n", optarg); + exit(1); + } break; case QEMU_OPTION_usb: usb_enabled = 1; @@ -2826,6 +2926,28 @@ int main(int argc, char **argv, char **envp) exit(1); } + /* + * Get the default machine options from the machine if it is not already + * specified either by the configuration file or by the command line. + */ + if (machine->default_machine_opts) { + QemuOptsList *list = qemu_find_opts("machine"); + const char *p = NULL; + + if (!QTAILQ_EMPTY(&list->head)) { + p = qemu_opt_get(QTAILQ_FIRST(&list->head), "accel"); + } + if (p == NULL) { + opts = qemu_opts_parse(qemu_find_opts("machine"), + machine->default_machine_opts, 0); + if (!opts) { + fprintf(stderr, "parse error for machine %s: %s\n", + machine->name, machine->default_machine_opts); + exit(1); + } + } + } + qemu_opts_foreach(qemu_find_opts("device"), default_driver_check, NULL, 0); qemu_opts_foreach(qemu_find_opts("global"), default_driver_check, NULL, 0); @@ -2896,17 +3018,7 @@ int main(int argc, char **argv, char **envp) exit(1); } - if (kvm_allowed) { - int ret = kvm_init(); - if (ret < 0) { - if (!kvm_available()) { - printf("KVM not supported for this target\n"); - } else { - fprintf(stderr, "failed to initialize KVM: %s\n", strerror(-ret)); - } - exit(1); - } - } + configure_accelerator(); if (qemu_init_main_loop()) { fprintf(stderr, "qemu_init_main_loop failed\n"); diff --git a/xen-all.c b/xen-all.c new file mode 100644 index 0000000000..0eac202d4e --- /dev/null +++ b/xen-all.c @@ -0,0 +1,605 @@ +/* + * Copyright (C) 2010 Citrix Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include <sys/mman.h> + +#include "hw/pci.h" +#include "hw/pc.h" +#include "hw/xen_common.h" +#include "hw/xen_backend.h" + +#include "xen-mapcache.h" +#include "trace.h" + +#include <xen/hvm/ioreq.h> +#include <xen/hvm/params.h> + +//#define DEBUG_XEN + +#ifdef DEBUG_XEN +#define DPRINTF(fmt, ...) \ + do { fprintf(stderr, "xen: " fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +/* Compatibility with older version */ +#if __XEN_LATEST_INTERFACE_VERSION__ < 0x0003020a +static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i) +{ + return shared_page->vcpu_iodata[i].vp_eport; +} +static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu) +{ + return &shared_page->vcpu_iodata[vcpu].vp_ioreq; +} +# define FMT_ioreq_size PRIx64 +#else +static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i) +{ + return shared_page->vcpu_ioreq[i].vp_eport; +} +static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu) +{ + return &shared_page->vcpu_ioreq[vcpu]; +} +# define FMT_ioreq_size "u" +#endif + +#define BUFFER_IO_MAX_DELAY 100 + +typedef struct XenIOState { + shared_iopage_t *shared_page; + buffered_iopage_t *buffered_io_page; + QEMUTimer *buffered_io_timer; + /* the evtchn port for polling the notification, */ + evtchn_port_t *ioreq_local_port; + /* the evtchn fd for polling */ + XenEvtchn xce_handle; + /* which vcpu we are serving */ + int send_vcpu; + + struct xs_handle *xenstore; + + Notifier exit; +} XenIOState; + +/* Xen specific function for piix pci */ + +int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) +{ + return irq_num + ((pci_dev->devfn >> 3) << 2); +} + +void xen_piix3_set_irq(void *opaque, int irq_num, int level) +{ + xc_hvm_set_pci_intx_level(xen_xc, xen_domid, 0, 0, irq_num >> 2, + irq_num & 3, level); +} + +void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len) +{ + int i; + + /* Scan for updates to PCI link routes (0x60-0x63). */ + for (i = 0; i < len; i++) { + uint8_t v = (val >> (8 * i)) & 0xff; + if (v & 0x80) { + v = 0; + } + v &= 0xf; + if (((address + i) >= 0x60) && ((address + i) <= 0x63)) { + xc_hvm_set_pci_link_route(xen_xc, xen_domid, address + i - 0x60, v); + } + } +} + +void xen_cmos_set_s3_resume(void *opaque, int irq, int level) +{ + pc_cmos_set_s3_resume(opaque, irq, level); + if (level) { + xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 3); + } +} + +/* Xen Interrupt Controller */ + +static void xen_set_irq(void *opaque, int irq, int level) +{ + xc_hvm_set_isa_irq_level(xen_xc, xen_domid, irq, level); +} + +qemu_irq *xen_interrupt_controller_init(void) +{ + return qemu_allocate_irqs(xen_set_irq, NULL, 16); +} + +/* Memory Ops */ + +static void xen_ram_init(ram_addr_t ram_size) +{ + RAMBlock *new_block; + ram_addr_t below_4g_mem_size, above_4g_mem_size = 0; + + new_block = qemu_mallocz(sizeof (*new_block)); + pstrcpy(new_block->idstr, sizeof (new_block->idstr), "xen.ram"); + new_block->host = NULL; + new_block->offset = 0; + new_block->length = ram_size; + + QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); + + ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty, + new_block->length >> TARGET_PAGE_BITS); + memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS), + 0xff, new_block->length >> TARGET_PAGE_BITS); + + if (ram_size >= 0xe0000000 ) { + above_4g_mem_size = ram_size - 0xe0000000; + below_4g_mem_size = 0xe0000000; + } else { + below_4g_mem_size = ram_size; + } + + cpu_register_physical_memory(0, below_4g_mem_size, new_block->offset); +#if TARGET_PHYS_ADDR_BITS > 32 + if (above_4g_mem_size > 0) { + cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size, + new_block->offset + below_4g_mem_size); + } +#endif +} + +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size) +{ + unsigned long nr_pfn; + xen_pfn_t *pfn_list; + int i; + + trace_xen_ram_alloc(ram_addr, size); + + nr_pfn = size >> TARGET_PAGE_BITS; + pfn_list = qemu_malloc(sizeof (*pfn_list) * nr_pfn); + + for (i = 0; i < nr_pfn; i++) { + pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i; + } + + if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) { + hw_error("xen: failed to populate ram at %lx", ram_addr); + } + + qemu_free(pfn_list); +} + + +/* VCPU Operations, MMIO, IO ring ... */ + +static void xen_reset_vcpu(void *opaque) +{ + CPUState *env = opaque; + + env->halted = 1; +} + +void xen_vcpu_init(void) +{ + CPUState *first_cpu; + + if ((first_cpu = qemu_get_cpu(0))) { + qemu_register_reset(xen_reset_vcpu, first_cpu); + xen_reset_vcpu(first_cpu); + } +} + +/* get the ioreq packets from share mem */ +static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu) +{ + ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu); + + if (req->state != STATE_IOREQ_READY) { + DPRINTF("I/O request not ready: " + "%x, ptr: %x, port: %"PRIx64", " + "data: %"PRIx64", count: %" FMT_ioreq_size ", size: %" FMT_ioreq_size "\n", + req->state, req->data_is_ptr, req->addr, + req->data, req->count, req->size); + return NULL; + } + + xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */ + + req->state = STATE_IOREQ_INPROCESS; + return req; +} + +/* use poll to get the port notification */ +/* ioreq_vec--out,the */ +/* retval--the number of ioreq packet */ +static ioreq_t *cpu_get_ioreq(XenIOState *state) +{ + int i; + evtchn_port_t port; + + port = xc_evtchn_pending(state->xce_handle); + if (port != -1) { + for (i = 0; i < smp_cpus; i++) { + if (state->ioreq_local_port[i] == port) { + break; + } + } + + if (i == smp_cpus) { + hw_error("Fatal error while trying to get io event!\n"); + } + + /* unmask the wanted port again */ + xc_evtchn_unmask(state->xce_handle, port); + + /* get the io packet from shared memory */ + state->send_vcpu = i; + return cpu_get_ioreq_from_shared_memory(state, i); + } + + /* read error or read nothing */ + return NULL; +} + +static uint32_t do_inp(pio_addr_t addr, unsigned long size) +{ + switch (size) { + case 1: + return cpu_inb(addr); + case 2: + return cpu_inw(addr); + case 4: + return cpu_inl(addr); + default: + hw_error("inp: bad size: %04"FMT_pioaddr" %lx", addr, size); + } +} + +static void do_outp(pio_addr_t addr, + unsigned long size, uint32_t val) +{ + switch (size) { + case 1: + return cpu_outb(addr, val); + case 2: + return cpu_outw(addr, val); + case 4: + return cpu_outl(addr, val); + default: + hw_error("outp: bad size: %04"FMT_pioaddr" %lx", addr, size); + } +} + +static void cpu_ioreq_pio(ioreq_t *req) +{ + int i, sign; + + sign = req->df ? -1 : 1; + + if (req->dir == IOREQ_READ) { + if (!req->data_is_ptr) { + req->data = do_inp(req->addr, req->size); + } else { + uint32_t tmp; + + for (i = 0; i < req->count; i++) { + tmp = do_inp(req->addr, req->size); + cpu_physical_memory_write(req->data + (sign * i * req->size), + (uint8_t *) &tmp, req->size); + } + } + } else if (req->dir == IOREQ_WRITE) { + if (!req->data_is_ptr) { + do_outp(req->addr, req->size, req->data); + } else { + for (i = 0; i < req->count; i++) { + uint32_t tmp = 0; + + cpu_physical_memory_read(req->data + (sign * i * req->size), + (uint8_t*) &tmp, req->size); + do_outp(req->addr, req->size, tmp); + } + } + } +} + +static void cpu_ioreq_move(ioreq_t *req) +{ + int i, sign; + + sign = req->df ? -1 : 1; + + if (!req->data_is_ptr) { + if (req->dir == IOREQ_READ) { + for (i = 0; i < req->count; i++) { + cpu_physical_memory_read(req->addr + (sign * i * req->size), + (uint8_t *) &req->data, req->size); + } + } else if (req->dir == IOREQ_WRITE) { + for (i = 0; i < req->count; i++) { + cpu_physical_memory_write(req->addr + (sign * i * req->size), + (uint8_t *) &req->data, req->size); + } + } + } else { + target_ulong tmp; + + if (req->dir == IOREQ_READ) { + for (i = 0; i < req->count; i++) { + cpu_physical_memory_read(req->addr + (sign * i * req->size), + (uint8_t*) &tmp, req->size); + cpu_physical_memory_write(req->data + (sign * i * req->size), + (uint8_t*) &tmp, req->size); + } + } else if (req->dir == IOREQ_WRITE) { + for (i = 0; i < req->count; i++) { + cpu_physical_memory_read(req->data + (sign * i * req->size), + (uint8_t*) &tmp, req->size); + cpu_physical_memory_write(req->addr + (sign * i * req->size), + (uint8_t*) &tmp, req->size); + } + } + } +} + +static void handle_ioreq(ioreq_t *req) +{ + if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) && + (req->size < sizeof (target_ulong))) { + req->data &= ((target_ulong) 1 << (8 * req->size)) - 1; + } + + switch (req->type) { + case IOREQ_TYPE_PIO: + cpu_ioreq_pio(req); + break; + case IOREQ_TYPE_COPY: + cpu_ioreq_move(req); + break; + case IOREQ_TYPE_TIMEOFFSET: + break; + case IOREQ_TYPE_INVALIDATE: + qemu_invalidate_map_cache(); + break; + default: + hw_error("Invalid ioreq type 0x%x\n", req->type); + } +} + +static void handle_buffered_iopage(XenIOState *state) +{ + buf_ioreq_t *buf_req = NULL; + ioreq_t req; + int qw; + + if (!state->buffered_io_page) { + return; + } + + while (state->buffered_io_page->read_pointer != state->buffered_io_page->write_pointer) { + buf_req = &state->buffered_io_page->buf_ioreq[ + state->buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM]; + req.size = 1UL << buf_req->size; + req.count = 1; + req.addr = buf_req->addr; + req.data = buf_req->data; + req.state = STATE_IOREQ_READY; + req.dir = buf_req->dir; + req.df = 1; + req.type = buf_req->type; + req.data_is_ptr = 0; + qw = (req.size == 8); + if (qw) { + buf_req = &state->buffered_io_page->buf_ioreq[ + (state->buffered_io_page->read_pointer + 1) % IOREQ_BUFFER_SLOT_NUM]; + req.data |= ((uint64_t)buf_req->data) << 32; + } + + handle_ioreq(&req); + + xen_mb(); + state->buffered_io_page->read_pointer += qw ? 2 : 1; + } +} + +static void handle_buffered_io(void *opaque) +{ + XenIOState *state = opaque; + + handle_buffered_iopage(state); + qemu_mod_timer(state->buffered_io_timer, + BUFFER_IO_MAX_DELAY + qemu_get_clock_ms(rt_clock)); +} + +static void cpu_handle_ioreq(void *opaque) +{ + XenIOState *state = opaque; + ioreq_t *req = cpu_get_ioreq(state); + + handle_buffered_iopage(state); + if (req) { + handle_ioreq(req); + + if (req->state != STATE_IOREQ_INPROCESS) { + fprintf(stderr, "Badness in I/O request ... not in service?!: " + "%x, ptr: %x, port: %"PRIx64", " + "data: %"PRIx64", count: %" FMT_ioreq_size ", size: %" FMT_ioreq_size "\n", + req->state, req->data_is_ptr, req->addr, + req->data, req->count, req->size); + destroy_hvm_domain(); + return; + } + + xen_wmb(); /* Update ioreq contents /then/ update state. */ + + /* + * We do this before we send the response so that the tools + * have the opportunity to pick up on the reset before the + * guest resumes and does a hlt with interrupts disabled which + * causes Xen to powerdown the domain. + */ + if (vm_running) { + if (qemu_shutdown_requested_get()) { + destroy_hvm_domain(); + } + if (qemu_reset_requested_get()) { + qemu_system_reset(); + } + } + + req->state = STATE_IORESP_READY; + xc_evtchn_notify(state->xce_handle, state->ioreq_local_port[state->send_vcpu]); + } +} + +static void xenstore_record_dm_state(XenIOState *s, const char *state) +{ + char path[50]; + + snprintf(path, sizeof (path), "/local/domain/0/device-model/%u/state", xen_domid); + if (!xs_write(s->xenstore, XBT_NULL, path, state, strlen(state))) { + fprintf(stderr, "error recording dm state\n"); + exit(1); + } +} + +static void xen_main_loop_prepare(XenIOState *state) +{ + int evtchn_fd = -1; + + if (state->xce_handle != XC_HANDLER_INITIAL_VALUE) { + evtchn_fd = xc_evtchn_fd(state->xce_handle); + } + + state->buffered_io_timer = qemu_new_timer_ms(rt_clock, handle_buffered_io, + state); + qemu_mod_timer(state->buffered_io_timer, qemu_get_clock_ms(rt_clock)); + + if (evtchn_fd != -1) { + qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state); + } + + /* record state running */ + xenstore_record_dm_state(state, "running"); +} + + +/* Initialise Xen */ + +static void xen_vm_change_state_handler(void *opaque, int running, int reason) +{ + XenIOState *state = opaque; + if (running) { + xen_main_loop_prepare(state); + } +} + +static void xen_exit_notifier(Notifier *n) +{ + XenIOState *state = container_of(n, XenIOState, exit); + + xc_evtchn_close(state->xce_handle); + xs_daemon_close(state->xenstore); +} + +int xen_init(void) +{ + xen_xc = xen_xc_interface_open(0, 0, 0); + if (xen_xc == XC_HANDLER_INITIAL_VALUE) { + xen_be_printf(NULL, 0, "can't open xen interface\n"); + return -1; + } + + return 0; +} + +int xen_hvm_init(void) +{ + int i, rc; + unsigned long ioreq_pfn; + XenIOState *state; + + state = qemu_mallocz(sizeof (XenIOState)); + + state->xce_handle = xen_xc_evtchn_open(NULL, 0); + if (state->xce_handle == XC_HANDLER_INITIAL_VALUE) { + perror("xen: event channel open"); + return -errno; + } + + state->xenstore = xs_daemon_open(); + if (state->xenstore == NULL) { + perror("xen: xenstore open"); + return -errno; + } + + state->exit.notify = xen_exit_notifier; + qemu_add_exit_notifier(&state->exit); + + xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_IOREQ_PFN, &ioreq_pfn); + DPRINTF("shared page at pfn %lx\n", ioreq_pfn); + state->shared_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE, + PROT_READ|PROT_WRITE, ioreq_pfn); + if (state->shared_page == NULL) { + hw_error("map shared IO page returned error %d handle=" XC_INTERFACE_FMT, + errno, xen_xc); + } + + xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_PFN, &ioreq_pfn); + DPRINTF("buffered io page at pfn %lx\n", ioreq_pfn); + state->buffered_io_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE, + PROT_READ|PROT_WRITE, ioreq_pfn); + if (state->buffered_io_page == NULL) { + hw_error("map buffered IO page returned error %d", errno); + } + + state->ioreq_local_port = qemu_mallocz(smp_cpus * sizeof (evtchn_port_t)); + + /* FIXME: how about if we overflow the page here? */ + for (i = 0; i < smp_cpus; i++) { + rc = xc_evtchn_bind_interdomain(state->xce_handle, xen_domid, + xen_vcpu_eport(state->shared_page, i)); + if (rc == -1) { + fprintf(stderr, "bind interdomain ioctl error %d\n", errno); + return -1; + } + state->ioreq_local_port[i] = rc; + } + + /* Init RAM management */ + qemu_map_cache_init(); + xen_ram_init(ram_size); + + qemu_add_vm_change_state_handler(xen_vm_change_state_handler, state); + + return 0; +} + +void destroy_hvm_domain(void) +{ + XenXC xc_handle; + int sts; + + xc_handle = xen_xc_interface_open(0, 0, 0); + if (xc_handle == XC_HANDLER_INITIAL_VALUE) { + fprintf(stderr, "Cannot acquire xenctrl handle\n"); + } else { + sts = xc_domain_shutdown(xc_handle, xen_domid, SHUTDOWN_poweroff); + if (sts != 0) { + fprintf(stderr, "? xc_domain_shutdown failed to issue poweroff, " + "sts %d, %s\n", sts, strerror(errno)); + } else { + fprintf(stderr, "Issued domain %d poweroff\n", xen_domid); + } + xc_interface_close(xc_handle); + } +} diff --git a/xen-mapcache-stub.c b/xen-mapcache-stub.c new file mode 100644 index 0000000000..7c14b3d141 --- /dev/null +++ b/xen-mapcache-stub.c @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2011 Citrix Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "config.h" + +#include "exec-all.h" +#include "qemu-common.h" +#include "cpu-common.h" +#include "xen-mapcache.h" + +void qemu_map_cache_init(void) +{ +} + +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock) +{ + return qemu_get_ram_ptr(phys_addr); +} + +void qemu_map_cache_unlock(void *buffer) +{ +} + +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr) +{ + return -1; +} + +void qemu_invalidate_map_cache(void) +{ +} + +void qemu_invalidate_entry(uint8_t *buffer) +{ +} +uint8_t *xen_map_block(target_phys_addr_t phys_addr, target_phys_addr_t size) +{ + return NULL; +} diff --git a/xen-mapcache.c b/xen-mapcache.c new file mode 100644 index 0000000000..349cc6221d --- /dev/null +++ b/xen-mapcache.c @@ -0,0 +1,375 @@ +/* + * Copyright (C) 2011 Citrix Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "config.h" + +#include <sys/resource.h> + +#include "hw/xen_backend.h" +#include "blockdev.h" +#include "bitmap.h" + +#include <xen/hvm/params.h> +#include <sys/mman.h> + +#include "xen-mapcache.h" +#include "trace.h" + + +//#define MAPCACHE_DEBUG + +#ifdef MAPCACHE_DEBUG +# define DPRINTF(fmt, ...) do { \ + fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \ +} while (0) +#else +# define DPRINTF(fmt, ...) do { } while (0) +#endif + +#if defined(__i386__) +# define MCACHE_BUCKET_SHIFT 16 +# define MCACHE_MAX_SIZE (1UL<<31) /* 2GB Cap */ +#elif defined(__x86_64__) +# define MCACHE_BUCKET_SHIFT 20 +# define MCACHE_MAX_SIZE (1UL<<35) /* 32GB Cap */ +#endif +#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT) + +typedef struct MapCacheEntry { + target_phys_addr_t paddr_index; + uint8_t *vaddr_base; + DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT); + uint8_t lock; + struct MapCacheEntry *next; +} MapCacheEntry; + +typedef struct MapCacheRev { + uint8_t *vaddr_req; + target_phys_addr_t paddr_index; + QTAILQ_ENTRY(MapCacheRev) next; +} MapCacheRev; + +typedef struct MapCache { + MapCacheEntry *entry; + unsigned long nr_buckets; + QTAILQ_HEAD(map_cache_head, MapCacheRev) locked_entries; + + /* For most cases (>99.9%), the page address is the same. */ + target_phys_addr_t last_address_index; + uint8_t *last_address_vaddr; + unsigned long max_mcache_size; + unsigned int mcache_bucket_shift; +} MapCache; + +static MapCache *mapcache; + +void qemu_map_cache_init(void) +{ + unsigned long size; + struct rlimit rlimit_as; + + mapcache = qemu_mallocz(sizeof (MapCache)); + + QTAILQ_INIT(&mapcache->locked_entries); + mapcache->last_address_index = -1; + + getrlimit(RLIMIT_AS, &rlimit_as); + if (rlimit_as.rlim_max < MCACHE_MAX_SIZE) { + rlimit_as.rlim_cur = rlimit_as.rlim_max; + } else { + rlimit_as.rlim_cur = MCACHE_MAX_SIZE; + } + + setrlimit(RLIMIT_AS, &rlimit_as); + mapcache->max_mcache_size = rlimit_as.rlim_cur; + + mapcache->nr_buckets = + (((mapcache->max_mcache_size >> XC_PAGE_SHIFT) + + (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >> + (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)); + + size = mapcache->nr_buckets * sizeof (MapCacheEntry); + size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1); + DPRINTF("qemu_map_cache_init, nr_buckets = %lx size %lu\n", mapcache->nr_buckets, size); + mapcache->entry = qemu_mallocz(size); +} + +static void qemu_remap_bucket(MapCacheEntry *entry, + target_phys_addr_t size, + target_phys_addr_t address_index) +{ + uint8_t *vaddr_base; + xen_pfn_t *pfns; + int *err; + unsigned int i; + target_phys_addr_t nb_pfn = size >> XC_PAGE_SHIFT; + + trace_qemu_remap_bucket(address_index); + + pfns = qemu_mallocz(nb_pfn * sizeof (xen_pfn_t)); + err = qemu_mallocz(nb_pfn * sizeof (int)); + + if (entry->vaddr_base != NULL) { + if (munmap(entry->vaddr_base, size) != 0) { + perror("unmap fails"); + exit(-1); + } + } + + for (i = 0; i < nb_pfn; i++) { + pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i; + } + + vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE, + pfns, err, nb_pfn); + if (vaddr_base == NULL) { + perror("xc_map_foreign_bulk"); + exit(-1); + } + + entry->vaddr_base = vaddr_base; + entry->paddr_index = address_index; + + bitmap_zero(entry->valid_mapping, nb_pfn); + for (i = 0; i < nb_pfn; i++) { + if (!err[i]) { + bitmap_set(entry->valid_mapping, i, 1); + } + } + + qemu_free(pfns); + qemu_free(err); +} + +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock) +{ + MapCacheEntry *entry, *pentry = NULL; + target_phys_addr_t address_index = phys_addr >> MCACHE_BUCKET_SHIFT; + target_phys_addr_t address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1); + + trace_qemu_map_cache(phys_addr); + + if (address_index == mapcache->last_address_index && !lock) { + trace_qemu_map_cache_return(mapcache->last_address_vaddr + address_offset); + return mapcache->last_address_vaddr + address_offset; + } + + entry = &mapcache->entry[address_index % mapcache->nr_buckets]; + + while (entry && entry->lock && entry->paddr_index != address_index && entry->vaddr_base) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + entry = qemu_mallocz(sizeof (MapCacheEntry)); + pentry->next = entry; + qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index); + } else if (!entry->lock) { + if (!entry->vaddr_base || entry->paddr_index != address_index || + !test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) { + qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index); + } + } + + if (!test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) { + mapcache->last_address_index = -1; + trace_qemu_map_cache_return(NULL); + return NULL; + } + + mapcache->last_address_index = address_index; + mapcache->last_address_vaddr = entry->vaddr_base; + if (lock) { + MapCacheRev *reventry = qemu_mallocz(sizeof(MapCacheRev)); + entry->lock++; + reventry->vaddr_req = mapcache->last_address_vaddr + address_offset; + reventry->paddr_index = mapcache->last_address_index; + QTAILQ_INSERT_HEAD(&mapcache->locked_entries, reventry, next); + } + + trace_qemu_map_cache_return(mapcache->last_address_vaddr + address_offset); + return mapcache->last_address_vaddr + address_offset; +} + +void qemu_map_cache_unlock(void *buffer) +{ + MapCacheEntry *entry = NULL, *pentry = NULL; + MapCacheRev *reventry; + target_phys_addr_t paddr_index; + int found = 0; + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + if (reventry->vaddr_req == buffer) { + paddr_index = reventry->paddr_index; + found = 1; + break; + } + } + if (!found) { + return; + } + QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next); + qemu_free(reventry); + + entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; + while (entry && entry->paddr_index != paddr_index) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + return; + } + if (entry->lock > 0) { + entry->lock--; + } +} + +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr) +{ + MapCacheRev *reventry; + target_phys_addr_t paddr_index; + int found = 0; + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + if (reventry->vaddr_req == ptr) { + paddr_index = reventry->paddr_index; + found = 1; + break; + } + } + if (!found) { + fprintf(stderr, "qemu_ram_addr_from_mapcache, could not find %p\n", ptr); + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF(" "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, + reventry->vaddr_req); + } + abort(); + return 0; + } + + return paddr_index << MCACHE_BUCKET_SHIFT; +} + +void qemu_invalidate_entry(uint8_t *buffer) +{ + MapCacheEntry *entry = NULL, *pentry = NULL; + MapCacheRev *reventry; + target_phys_addr_t paddr_index; + int found = 0; + + if (mapcache->last_address_vaddr == buffer) { + mapcache->last_address_index = -1; + } + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + if (reventry->vaddr_req == buffer) { + paddr_index = reventry->paddr_index; + found = 1; + break; + } + } + if (!found) { + DPRINTF("qemu_invalidate_entry, could not find %p\n", buffer); + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF(" "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); + } + return; + } + QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next); + qemu_free(reventry); + + entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; + while (entry && entry->paddr_index != paddr_index) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer); + return; + } + entry->lock--; + if (entry->lock > 0 || pentry == NULL) { + return; + } + + pentry->next = entry->next; + if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) { + perror("unmap fails"); + exit(-1); + } + qemu_free(entry); +} + +void qemu_invalidate_map_cache(void) +{ + unsigned long i; + MapCacheRev *reventry; + + /* Flush pending AIO before destroying the mapcache */ + qemu_aio_flush(); + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF("There should be no locked mappings at this time, " + "but "TARGET_FMT_plx" -> %p is present\n", + reventry->paddr_index, reventry->vaddr_req); + } + + mapcache_lock(); + + for (i = 0; i < mapcache->nr_buckets; i++) { + MapCacheEntry *entry = &mapcache->entry[i]; + + if (entry->vaddr_base == NULL) { + continue; + } + + if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) { + perror("unmap fails"); + exit(-1); + } + + entry->paddr_index = 0; + entry->vaddr_base = NULL; + } + + mapcache->last_address_index = -1; + mapcache->last_address_vaddr = NULL; + + mapcache_unlock(); +} + +uint8_t *xen_map_block(target_phys_addr_t phys_addr, target_phys_addr_t size) +{ + uint8_t *vaddr_base; + xen_pfn_t *pfns; + int *err; + unsigned int i; + target_phys_addr_t nb_pfn = size >> XC_PAGE_SHIFT; + + trace_xen_map_block(phys_addr, size); + phys_addr >>= XC_PAGE_SHIFT; + + pfns = qemu_mallocz(nb_pfn * sizeof (xen_pfn_t)); + err = qemu_mallocz(nb_pfn * sizeof (int)); + + for (i = 0; i < nb_pfn; i++) { + pfns[i] = phys_addr + i; + } + + vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE, + pfns, err, nb_pfn); + if (vaddr_base == NULL) { + perror("xc_map_foreign_bulk"); + exit(-1); + } + + qemu_free(pfns); + qemu_free(err); + + return vaddr_base; +} diff --git a/xen-mapcache.h b/xen-mapcache.h new file mode 100644 index 0000000000..339444c94e --- /dev/null +++ b/xen-mapcache.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2011 Citrix Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef XEN_MAPCACHE_H +#define XEN_MAPCACHE_H + +#include <sys/mman.h> +#include "trace.h" + +void qemu_map_cache_init(void); +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock); +void qemu_map_cache_unlock(void *phys_addr); +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr); +void qemu_invalidate_entry(uint8_t *buffer); +void qemu_invalidate_map_cache(void); + +uint8_t *xen_map_block(target_phys_addr_t phys_addr, target_phys_addr_t size); + +static inline void xen_unmap_block(void *addr, ram_addr_t size) +{ + trace_xen_unmap_block(addr, size); + + if (munmap(addr, size) != 0) { + hw_error("xen_unmap_block: %s", strerror(errno)); + } +} + + +#define mapcache_lock() ((void)0) +#define mapcache_unlock() ((void)0) + +#endif /* !XEN_MAPCACHE_H */ diff --git a/xen-stub.c b/xen-stub.c new file mode 100644 index 0000000000..a4f35a19fb --- /dev/null +++ b/xen-stub.c @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2010 Citrix Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "hw/xen.h" + +int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) +{ + return -1; +} + +void xen_piix3_set_irq(void *opaque, int irq_num, int level) +{ +} + +void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len) +{ +} + +void xen_cmos_set_s3_resume(void *opaque, int irq, int level) +{ +} + +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size) +{ +} + +qemu_irq *xen_interrupt_controller_init(void) +{ + return NULL; +} + +int xen_init(void) +{ + return -ENOSYS; +} |