diff options
Diffstat (limited to 'hw')
69 files changed, 5579 insertions, 842 deletions
diff --git a/hw/9pfs/meson.build b/hw/9pfs/meson.build index 12443b6ad5..fd37b7a02d 100644 --- a/hw/9pfs/meson.build +++ b/hw/9pfs/meson.build @@ -15,7 +15,7 @@ fs_ss.add(files( )) fs_ss.add(when: 'CONFIG_LINUX', if_true: files('9p-util-linux.c')) fs_ss.add(when: 'CONFIG_DARWIN', if_true: files('9p-util-darwin.c')) -fs_ss.add(when: 'CONFIG_XEN', if_true: files('xen-9p-backend.c')) +fs_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-9p-backend.c')) softmmu_ss.add_all(when: 'CONFIG_FSDEV_9P', if_true: fs_ss) specific_ss.add(when: 'CONFIG_VIRTIO_9P', if_true: files('virtio-9p-device.c')) diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c index 65c4979c3c..74f3a05f88 100644 --- a/hw/9pfs/xen-9p-backend.c +++ b/hw/9pfs/xen-9p-backend.c @@ -22,6 +22,7 @@ #include "qemu/config-file.h" #include "qemu/main-loop.h" #include "qemu/option.h" +#include "qemu/iov.h" #include "fsdev/qemu-fsdev.h" #define VERSIONS "1" @@ -241,7 +242,7 @@ static void xen_9pfs_push_and_notify(V9fsPDU *pdu) xen_wmb(); ring->inprogress = false; - xenevtchn_notify(ring->evtchndev, ring->local_port); + qemu_xen_evtchn_notify(ring->evtchndev, ring->local_port); qemu_bh_schedule(ring->bh); } @@ -324,8 +325,8 @@ static void xen_9pfs_evtchn_event(void *opaque) Xen9pfsRing *ring = opaque; evtchn_port_t port; - port = xenevtchn_pending(ring->evtchndev); - xenevtchn_unmask(ring->evtchndev, port); + port = qemu_xen_evtchn_pending(ring->evtchndev); + qemu_xen_evtchn_unmask(ring->evtchndev, port); qemu_bh_schedule(ring->bh); } @@ -337,10 +338,10 @@ static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev) for (i = 0; i < xen_9pdev->num_rings; i++) { if (xen_9pdev->rings[i].evtchndev != NULL) { - qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev), - NULL, NULL, NULL); - xenevtchn_unbind(xen_9pdev->rings[i].evtchndev, - xen_9pdev->rings[i].local_port); + qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev), + NULL, NULL, NULL); + qemu_xen_evtchn_unbind(xen_9pdev->rings[i].evtchndev, + xen_9pdev->rings[i].local_port); xen_9pdev->rings[i].evtchndev = NULL; } } @@ -359,12 +360,13 @@ static int xen_9pfs_free(struct XenLegacyDevice *xendev) if (xen_9pdev->rings[i].data != NULL) { xen_be_unmap_grant_refs(&xen_9pdev->xendev, xen_9pdev->rings[i].data, + xen_9pdev->rings[i].intf->ref, (1 << xen_9pdev->rings[i].ring_order)); } if (xen_9pdev->rings[i].intf != NULL) { - xen_be_unmap_grant_refs(&xen_9pdev->xendev, - xen_9pdev->rings[i].intf, - 1); + xen_be_unmap_grant_ref(&xen_9pdev->xendev, + xen_9pdev->rings[i].intf, + xen_9pdev->rings[i].ref); } if (xen_9pdev->rings[i].bh != NULL) { qemu_bh_delete(xen_9pdev->rings[i].bh); @@ -447,12 +449,12 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev) xen_9pdev->rings[i].inprogress = false; - xen_9pdev->rings[i].evtchndev = xenevtchn_open(NULL, 0); + xen_9pdev->rings[i].evtchndev = qemu_xen_evtchn_open(); if (xen_9pdev->rings[i].evtchndev == NULL) { goto out; } - qemu_set_cloexec(xenevtchn_fd(xen_9pdev->rings[i].evtchndev)); - xen_9pdev->rings[i].local_port = xenevtchn_bind_interdomain + qemu_set_cloexec(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev)); + xen_9pdev->rings[i].local_port = qemu_xen_evtchn_bind_interdomain (xen_9pdev->rings[i].evtchndev, xendev->dom, xen_9pdev->rings[i].evtchn); @@ -463,8 +465,8 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev) goto out; } xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port); - qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev), - xen_9pfs_evtchn_event, NULL, &xen_9pdev->rings[i]); + qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev), + xen_9pfs_evtchn_event, NULL, &xen_9pdev->rings[i]); } xen_9pdev->security_model = xenstore_read_be_str(xendev, "security_model"); diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index 86601cb1a5..c1f2b9cfca 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -524,6 +524,11 @@ static void yosemitev2_bmc_i2c_init(AspeedMachineState *bmc) at24c_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 4), 0x51, 128 * KiB); at24c_eeprom_init_rom(aspeed_i2c_get_bus(&soc->i2c, 8), 0x51, 128 * KiB, yosemitev2_bmc_fruid, yosemitev2_bmc_fruid_len); + /* TMP421 */ + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 11), "tmp421", 0x1f); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 9), "tmp421", 0x4e); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 9), "tmp421", 0x4f); + } static void romulus_bmc_i2c_init(AspeedMachineState *bmc) @@ -542,6 +547,10 @@ static void tiogapass_bmc_i2c_init(AspeedMachineState *bmc) at24c_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 4), 0x54, 128 * KiB); at24c_eeprom_init_rom(aspeed_i2c_get_bus(&soc->i2c, 6), 0x54, 128 * KiB, tiogapass_bmc_fruid, tiogapass_bmc_fruid_len); + /* TMP421 */ + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 8), "tmp421", 0x1f); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 6), "tmp421", 0x4f); + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 6), "tmp421", 0x4e); } static void create_pca9552(AspeedSoCState *soc, int bus_id, int addr) diff --git a/hw/arm/aspeed_eeprom.c b/hw/arm/aspeed_eeprom.c index 2fb2d5dbb7..dc33a88a54 100644 --- a/hw/arm/aspeed_eeprom.c +++ b/hw/arm/aspeed_eeprom.c @@ -101,17 +101,17 @@ const uint8_t fby35_bmc_fruid[] = { /* Yosemite V2 BMC FRU */ const uint8_t yosemitev2_bmc_fruid[] = { 0x01, 0x00, 0x00, 0x01, 0x0d, 0x00, 0x00, 0xf1, 0x01, 0x0c, 0x00, 0x36, - 0xe6, 0xd0, 0xc6, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xd2, 0x42, 0x4d, - 0x43, 0x20, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x20, 0x4d, 0x6f, - 0x64, 0x75, 0x6c, 0x65, 0xcd, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, + 0xe6, 0xd0, 0xc6, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xd2, 0x42, 0x61, + 0x73, 0x65, 0x62, 0x6f, 0x61, 0x72, 0x64, 0x20, 0x4d, 0x50, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xcd, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xce, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc3, 0x31, 0x2e, 0x30, 0xc9, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xd2, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc1, 0x39, 0x01, 0x0c, 0x00, 0xc6, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xd2, 0x59, 0x6f, 0x73, 0x65, 0x6d, - 0x69, 0x74, 0x65, 0x20, 0x56, 0x32, 0x2e, 0x30, 0x20, 0x45, 0x56, 0x54, - 0x32, 0xce, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, + 0x69, 0x74, 0x65, 0x20, 0x56, 0x32, 0x20, 0x4d, 0x50, 0x00, 0x00, 0x00, + 0x00, 0xce, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc4, 0x45, 0x56, 0x54, 0x32, 0xcd, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc7, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc3, 0x31, 0x2e, 0x30, 0xc9, diff --git a/hw/audio/trace-events b/hw/audio/trace-events index e0e71cd9b1..4dec48a4fd 100644 --- a/hw/audio/trace-events +++ b/hw/audio/trace-events @@ -11,3 +11,9 @@ hda_audio_running(const char *stream, int nr, bool running) "st %s, nr %d, run % hda_audio_format(const char *stream, int chan, const char *fmt, int freq) "st %s, %d x %s @ %d Hz" hda_audio_adjust(const char *stream, int pos) "st %s, pos %d" hda_audio_overrun(const char *stream) "st %s" + +#via-ac97.c +via_ac97_codec_write(uint8_t addr, uint16_t val) "0x%x <- 0x%x" +via_ac97_sgd_fetch(uint32_t curr, uint32_t addr, char stop, char eol, char flag, uint32_t len) "curr=0x%x addr=0x%x %c%c%c len=%d" +via_ac97_sgd_read(uint64_t addr, unsigned size, uint64_t val) "0x%"PRIx64" %d -> 0x%"PRIx64 +via_ac97_sgd_write(uint64_t addr, unsigned size, uint64_t val) "0x%"PRIx64" %d <- 0x%"PRIx64 diff --git a/hw/audio/via-ac97.c b/hw/audio/via-ac97.c index d1a856f63d..676254b7a4 100644 --- a/hw/audio/via-ac97.c +++ b/hw/audio/via-ac97.c @@ -1,39 +1,482 @@ /* * VIA south bridges sound support * + * Copyright (c) 2022-2023 BALATON Zoltan + * * This work is licensed under the GNU GPL license version 2 or later. */ /* - * TODO: This is entirely boiler plate just registering empty PCI devices - * with the right ID guests expect, functionality should be added here. + * TODO: This is only a basic implementation of one audio playback channel + * more functionality should be added here. */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "hw/isa/vt82c686.h" -#include "hw/pci/pci_device.h" +#include "ac97.h" +#include "trace.h" + +#define CLEN_IS_EOL(x) ((x)->clen & BIT(31)) +#define CLEN_IS_FLAG(x) ((x)->clen & BIT(30)) +#define CLEN_IS_STOP(x) ((x)->clen & BIT(29)) +#define CLEN_LEN(x) ((x)->clen & 0xffffff) + +#define STAT_ACTIVE BIT(7) +#define STAT_PAUSED BIT(6) +#define STAT_TRIG BIT(3) +#define STAT_STOP BIT(2) +#define STAT_EOL BIT(1) +#define STAT_FLAG BIT(0) + +#define CNTL_START BIT(7) +#define CNTL_TERM BIT(6) +#define CNTL_PAUSE BIT(3) + +static void open_voice_out(ViaAC97State *s); + +static uint16_t codec_rates[] = { 8000, 11025, 16000, 22050, 32000, 44100, + 48000 }; + +#define CODEC_REG(s, o) ((s)->codec_regs[(o) / 2]) +#define CODEC_VOL(vol, mask) ((255 * ((vol) & mask)) / mask) + +static void codec_volume_set_out(ViaAC97State *s) +{ + int lvol, rvol, mute; + + lvol = 255 - CODEC_VOL(CODEC_REG(s, AC97_Master_Volume_Mute) >> 8, 0x1f); + lvol *= 255 - CODEC_VOL(CODEC_REG(s, AC97_PCM_Out_Volume_Mute) >> 8, 0x1f); + lvol /= 255; + rvol = 255 - CODEC_VOL(CODEC_REG(s, AC97_Master_Volume_Mute), 0x1f); + rvol *= 255 - CODEC_VOL(CODEC_REG(s, AC97_PCM_Out_Volume_Mute), 0x1f); + rvol /= 255; + mute = CODEC_REG(s, AC97_Master_Volume_Mute) >> MUTE_SHIFT; + mute |= CODEC_REG(s, AC97_PCM_Out_Volume_Mute) >> MUTE_SHIFT; + AUD_set_volume_out(s->vo, mute, lvol, rvol); +} + +static void codec_reset(ViaAC97State *s) +{ + memset(s->codec_regs, 0, sizeof(s->codec_regs)); + CODEC_REG(s, AC97_Reset) = 0x6a90; + CODEC_REG(s, AC97_Master_Volume_Mute) = 0x8000; + CODEC_REG(s, AC97_Headphone_Volume_Mute) = 0x8000; + CODEC_REG(s, AC97_Master_Volume_Mono_Mute) = 0x8000; + CODEC_REG(s, AC97_Phone_Volume_Mute) = 0x8008; + CODEC_REG(s, AC97_Mic_Volume_Mute) = 0x8008; + CODEC_REG(s, AC97_Line_In_Volume_Mute) = 0x8808; + CODEC_REG(s, AC97_CD_Volume_Mute) = 0x8808; + CODEC_REG(s, AC97_Video_Volume_Mute) = 0x8808; + CODEC_REG(s, AC97_Aux_Volume_Mute) = 0x8808; + CODEC_REG(s, AC97_PCM_Out_Volume_Mute) = 0x8808; + CODEC_REG(s, AC97_Record_Gain_Mute) = 0x8000; + CODEC_REG(s, AC97_Powerdown_Ctrl_Stat) = 0x000f; + CODEC_REG(s, AC97_Extended_Audio_ID) = 0x0a05; + CODEC_REG(s, AC97_Extended_Audio_Ctrl_Stat) = 0x0400; + CODEC_REG(s, AC97_PCM_Front_DAC_Rate) = 48000; + CODEC_REG(s, AC97_PCM_LR_ADC_Rate) = 48000; + /* Sigmatel 9766 (STAC9766) */ + CODEC_REG(s, AC97_Vendor_ID1) = 0x8384; + CODEC_REG(s, AC97_Vendor_ID2) = 0x7666; +} + +static uint16_t codec_read(ViaAC97State *s, uint8_t addr) +{ + return CODEC_REG(s, addr); +} + +static void codec_write(ViaAC97State *s, uint8_t addr, uint16_t val) +{ + trace_via_ac97_codec_write(addr, val); + switch (addr) { + case AC97_Reset: + codec_reset(s); + return; + case AC97_Master_Volume_Mute: + case AC97_PCM_Out_Volume_Mute: + if (addr == AC97_Master_Volume_Mute) { + if (val & BIT(13)) { + val |= 0x1f00; + } + if (val & BIT(5)) { + val |= 0x1f; + } + } + CODEC_REG(s, addr) = val & 0x9f1f; + codec_volume_set_out(s); + return; + case AC97_Extended_Audio_Ctrl_Stat: + CODEC_REG(s, addr) &= ~EACS_VRA; + CODEC_REG(s, addr) |= val & EACS_VRA; + if (!(val & EACS_VRA)) { + CODEC_REG(s, AC97_PCM_Front_DAC_Rate) = 48000; + CODEC_REG(s, AC97_PCM_LR_ADC_Rate) = 48000; + open_voice_out(s); + } + return; + case AC97_PCM_Front_DAC_Rate: + case AC97_PCM_LR_ADC_Rate: + if (CODEC_REG(s, AC97_Extended_Audio_Ctrl_Stat) & EACS_VRA) { + int i; + uint16_t rate = val; + + for (i = 0; i < ARRAY_SIZE(codec_rates) - 1; i++) { + if (rate < codec_rates[i] + + (codec_rates[i + 1] - codec_rates[i]) / 2) { + rate = codec_rates[i]; + break; + } + } + if (rate > 48000) { + rate = 48000; + } + CODEC_REG(s, addr) = rate; + open_voice_out(s); + } + return; + case AC97_Powerdown_Ctrl_Stat: + CODEC_REG(s, addr) = (val & 0xff00) | (CODEC_REG(s, addr) & 0xff); + return; + case AC97_Extended_Audio_ID: + case AC97_Vendor_ID1: + case AC97_Vendor_ID2: + /* Read only registers */ + return; + default: + qemu_log_mask(LOG_UNIMP, + "via-ac97: Unimplemented codec register 0x%x\n", addr); + CODEC_REG(s, addr) = val; + } +} + +static void fetch_sgd(ViaAC97SGDChannel *c, PCIDevice *d) +{ + uint32_t b[2]; + + if (c->curr < c->base) { + c->curr = c->base; + } + if (unlikely(pci_dma_read(d, c->curr, b, sizeof(b)) != MEMTX_OK)) { + qemu_log_mask(LOG_GUEST_ERROR, + "via-ac97: DMA error reading SGD table\n"); + return; + } + c->addr = le32_to_cpu(b[0]); + c->clen = le32_to_cpu(b[1]); + trace_via_ac97_sgd_fetch(c->curr, c->addr, CLEN_IS_STOP(c) ? 'S' : '-', + CLEN_IS_EOL(c) ? 'E' : '-', + CLEN_IS_FLAG(c) ? 'F' : '-', CLEN_LEN(c)); +} + +static void out_cb(void *opaque, int avail) +{ + ViaAC97State *s = opaque; + ViaAC97SGDChannel *c = &s->aur; + int temp, to_copy, copied; + bool stop = false; + uint8_t tmpbuf[4096]; + + if (c->stat & STAT_PAUSED) { + return; + } + c->stat |= STAT_ACTIVE; + while (avail && !stop) { + if (!c->clen) { + fetch_sgd(c, &s->dev); + } + temp = MIN(CLEN_LEN(c), avail); + while (temp) { + to_copy = MIN(temp, sizeof(tmpbuf)); + pci_dma_read(&s->dev, c->addr, tmpbuf, to_copy); + copied = AUD_write(s->vo, tmpbuf, to_copy); + if (!copied) { + stop = true; + break; + } + temp -= copied; + avail -= copied; + c->addr += copied; + c->clen -= copied; + } + if (CLEN_LEN(c) == 0) { + c->curr += 8; + if (CLEN_IS_EOL(c)) { + c->stat |= STAT_EOL; + if (c->type & CNTL_START) { + c->curr = c->base; + c->stat |= STAT_PAUSED; + } else { + c->stat &= ~STAT_ACTIVE; + AUD_set_active_out(s->vo, 0); + } + if (c->type & STAT_EOL) { + pci_set_irq(&s->dev, 1); + } + } + if (CLEN_IS_FLAG(c)) { + c->stat |= STAT_FLAG; + c->stat |= STAT_PAUSED; + if (c->type & STAT_FLAG) { + pci_set_irq(&s->dev, 1); + } + } + if (CLEN_IS_STOP(c)) { + c->stat |= STAT_STOP; + c->stat |= STAT_PAUSED; + } + c->clen = 0; + stop = true; + } + } +} + +static void open_voice_out(ViaAC97State *s) +{ + struct audsettings as = { + .freq = CODEC_REG(s, AC97_PCM_Front_DAC_Rate), + .nchannels = s->aur.type & BIT(4) ? 2 : 1, + .fmt = s->aur.type & BIT(5) ? AUDIO_FORMAT_S16 : AUDIO_FORMAT_S8, + .endianness = 0, + }; + s->vo = AUD_open_out(&s->card, s->vo, "via-ac97.out", s, out_cb, &as); +} + +static uint64_t sgd_read(void *opaque, hwaddr addr, unsigned size) +{ + ViaAC97State *s = opaque; + uint64_t val = 0; + + switch (addr) { + case 0: + val = s->aur.stat; + if (s->aur.type & CNTL_START) { + val |= STAT_TRIG; + } + break; + case 1: + val = s->aur.stat & STAT_PAUSED ? BIT(3) : 0; + break; + case 2: + val = s->aur.type; + break; + case 4: + val = s->aur.curr; + break; + case 0xc: + val = CLEN_LEN(&s->aur); + break; + case 0x10: + /* silence unimplemented log message that happens at every IRQ */ + break; + case 0x80: + val = s->ac97_cmd; + break; + case 0x84: + val = s->aur.stat & STAT_FLAG; + if (s->aur.stat & STAT_EOL) { + val |= BIT(4); + } + if (s->aur.stat & STAT_STOP) { + val |= BIT(8); + } + if (s->aur.stat & STAT_ACTIVE) { + val |= BIT(12); + } + break; + default: + qemu_log_mask(LOG_UNIMP, "via-ac97: Unimplemented register read 0x%" + HWADDR_PRIx"\n", addr); + } + trace_via_ac97_sgd_read(addr, size, val); + return val; +} + +static void sgd_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) +{ + ViaAC97State *s = opaque; + + trace_via_ac97_sgd_write(addr, size, val); + switch (addr) { + case 0: + if (val & STAT_STOP) { + s->aur.stat &= ~STAT_PAUSED; + } + if (val & STAT_EOL) { + s->aur.stat &= ~(STAT_EOL | STAT_PAUSED); + if (s->aur.type & STAT_EOL) { + pci_set_irq(&s->dev, 0); + } + } + if (val & STAT_FLAG) { + s->aur.stat &= ~(STAT_FLAG | STAT_PAUSED); + if (s->aur.type & STAT_FLAG) { + pci_set_irq(&s->dev, 0); + } + } + break; + case 1: + if (val & CNTL_START) { + AUD_set_active_out(s->vo, 1); + s->aur.stat = STAT_ACTIVE; + } + if (val & CNTL_TERM) { + AUD_set_active_out(s->vo, 0); + s->aur.stat &= ~(STAT_ACTIVE | STAT_PAUSED); + s->aur.clen = 0; + } + if (val & CNTL_PAUSE) { + AUD_set_active_out(s->vo, 0); + s->aur.stat &= ~STAT_ACTIVE; + s->aur.stat |= STAT_PAUSED; + } else if (!(val & CNTL_PAUSE) && (s->aur.stat & STAT_PAUSED)) { + AUD_set_active_out(s->vo, 1); + s->aur.stat |= STAT_ACTIVE; + s->aur.stat &= ~STAT_PAUSED; + } + break; + case 2: + { + uint32_t oldval = s->aur.type; + s->aur.type = val; + if ((oldval & 0x30) != (val & 0x30)) { + open_voice_out(s); + } + break; + } + case 4: + s->aur.base = val & ~1ULL; + s->aur.curr = s->aur.base; + break; + case 0x80: + if (val >> 30) { + /* we only have primary codec */ + break; + } + if (val & BIT(23)) { /* read reg */ + s->ac97_cmd = val & 0xc0ff0000ULL; + s->ac97_cmd |= codec_read(s, (val >> 16) & 0x7f); + s->ac97_cmd |= BIT(25); /* data valid */ + } else { + s->ac97_cmd = val & 0xc0ffffffULL; + codec_write(s, (val >> 16) & 0x7f, val); + } + break; + case 0xc: + case 0x84: + /* Read only */ + break; + default: + qemu_log_mask(LOG_UNIMP, "via-ac97: Unimplemented register write 0x%" + HWADDR_PRIx"\n", addr); + } +} + +static const MemoryRegionOps sgd_ops = { + .read = sgd_read, + .write = sgd_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static uint64_t fm_read(void *opaque, hwaddr addr, unsigned size) +{ + qemu_log_mask(LOG_UNIMP, "%s: 0x%"HWADDR_PRIx" %d\n", __func__, addr, size); + return 0; +} + +static void fm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) +{ + qemu_log_mask(LOG_UNIMP, "%s: 0x%"HWADDR_PRIx" %d <= 0x%"PRIX64"\n", + __func__, addr, size, val); +} + +static const MemoryRegionOps fm_ops = { + .read = fm_read, + .write = fm_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static uint64_t midi_read(void *opaque, hwaddr addr, unsigned size) +{ + qemu_log_mask(LOG_UNIMP, "%s: 0x%"HWADDR_PRIx" %d\n", __func__, addr, size); + return 0; +} + +static void midi_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) +{ + qemu_log_mask(LOG_UNIMP, "%s: 0x%"HWADDR_PRIx" %d <= 0x%"PRIX64"\n", + __func__, addr, size, val); +} + +static const MemoryRegionOps midi_ops = { + .read = midi_read, + .write = midi_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void via_ac97_reset(DeviceState *dev) +{ + ViaAC97State *s = VIA_AC97(dev); + + codec_reset(s); +} static void via_ac97_realize(PCIDevice *pci_dev, Error **errp) { - pci_set_word(pci_dev->config + PCI_COMMAND, - PCI_COMMAND_INVALIDATE | PCI_COMMAND_PARITY); + ViaAC97State *s = VIA_AC97(pci_dev); + Object *o = OBJECT(s); + + /* + * Command register Bus Master bit is documented to be fixed at 0 but it's + * needed for PCI DMA to work in QEMU. The pegasos2 firmware writes 0 here + * and the AmigaOS driver writes 1 only enabling IO bit which works on + * real hardware. So set it here and fix it to 1 to allow DMA. + */ + pci_set_word(pci_dev->config + PCI_COMMAND, PCI_COMMAND_MASTER); + pci_set_word(pci_dev->wmask + PCI_COMMAND, PCI_COMMAND_IO); pci_set_word(pci_dev->config + PCI_STATUS, PCI_STATUS_CAP_LIST | PCI_STATUS_DEVSEL_MEDIUM); pci_set_long(pci_dev->config + PCI_INTERRUPT_PIN, 0x03); + pci_set_byte(pci_dev->config + 0x40, 1); /* codec ready */ + + memory_region_init_io(&s->sgd, o, &sgd_ops, s, "via-ac97.sgd", 256); + pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &s->sgd); + memory_region_init_io(&s->fm, o, &fm_ops, s, "via-ac97.fm", 4); + pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &s->fm); + memory_region_init_io(&s->midi, o, &midi_ops, s, "via-ac97.midi", 4); + pci_register_bar(pci_dev, 2, PCI_BASE_ADDRESS_SPACE_IO, &s->midi); + + AUD_register_card ("via-ac97", &s->card); } +static void via_ac97_exit(PCIDevice *dev) +{ + ViaAC97State *s = VIA_AC97(dev); + + AUD_close_out(&s->card, s->vo); + AUD_remove_card(&s->card); +} + +static Property via_ac97_properties[] = { + DEFINE_AUDIO_PROPERTIES(ViaAC97State, card), + DEFINE_PROP_END_OF_LIST(), +}; + static void via_ac97_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); k->realize = via_ac97_realize; + k->exit = via_ac97_exit; k->vendor_id = PCI_VENDOR_ID_VIA; k->device_id = PCI_DEVICE_ID_VIA_AC97; k->revision = 0x50; k->class_id = PCI_CLASS_MULTIMEDIA_AUDIO; + device_class_set_props(dc, via_ac97_properties); set_bit(DEVICE_CATEGORY_SOUND, dc->categories); dc->desc = "VIA AC97"; + dc->reset = via_ac97_reset; /* Reason: Part of a south bridge chip */ dc->user_creatable = false; } @@ -41,7 +484,7 @@ static void via_ac97_class_init(ObjectClass *klass, void *data) static const TypeInfo via_ac97_info = { .name = TYPE_VIA_AC97, .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(PCIDevice), + .instance_size = sizeof(ViaAC97State), .class_init = via_ac97_class_init, .interfaces = (InterfaceInfo[]) { { INTERFACE_CONVENTIONAL_PCI_DEVICE }, diff --git a/hw/block/block.c b/hw/block/block.c index af0710e477..9f52ee6e72 100644 --- a/hw/block/block.c +++ b/hw/block/block.c @@ -39,8 +39,7 @@ static int blk_pread_nonzeroes(BlockBackend *blk, hwaddr size, void *buf) return ret; } if (!(ret & BDRV_BLOCK_ZERO)) { - ret = bdrv_pread(bs->file, offset, bytes, - (uint8_t *) buf + offset, 0); + ret = blk_pread(blk, offset, bytes, (uint8_t *) buf + offset, 0); if (ret < 0) { return ret; } diff --git a/hw/block/dataplane/meson.build b/hw/block/dataplane/meson.build index 12c6a264f1..78d7ac1a11 100644 --- a/hw/block/dataplane/meson.build +++ b/hw/block/dataplane/meson.build @@ -1,2 +1,2 @@ specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) -specific_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c')) +specific_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c')) diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c index 2785b9e849..734da42ea7 100644 --- a/hw/block/dataplane/xen-block.c +++ b/hw/block/dataplane/xen-block.c @@ -23,8 +23,9 @@ #include "qemu/main-loop.h" #include "qemu/memalign.h" #include "qapi/error.h" -#include "hw/xen/xen_common.h" +#include "hw/xen/xen.h" #include "hw/block/xen_blkif.h" +#include "hw/xen/interface/io/ring.h" #include "sysemu/block-backend.h" #include "sysemu/iothread.h" #include "xen-block.h" @@ -101,9 +102,9 @@ static XenBlockRequest *xen_block_start_request(XenBlockDataPlane *dataplane) * re-use requests, allocate the memory once here. It will be freed * xen_block_dataplane_destroy() when the request list is freed. */ - request->buf = qemu_memalign(XC_PAGE_SIZE, + request->buf = qemu_memalign(XEN_PAGE_SIZE, BLKIF_MAX_SEGMENTS_PER_REQUEST * - XC_PAGE_SIZE); + XEN_PAGE_SIZE); dataplane->requests_total++; qemu_iovec_init(&request->v, 1); } else { @@ -185,7 +186,7 @@ static int xen_block_parse_request(XenBlockRequest *request) goto err; } if (request->req.seg[i].last_sect * dataplane->sector_size >= - XC_PAGE_SIZE) { + XEN_PAGE_SIZE) { error_report("error: page crossing"); goto err; } @@ -705,6 +706,7 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane) Error *local_err = NULL; xen_device_unmap_grant_refs(xendev, dataplane->sring, + dataplane->ring_ref, dataplane->nr_ring_ref, &local_err); dataplane->sring = NULL; @@ -739,7 +741,7 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, dataplane->protocol = protocol; - ring_size = XC_PAGE_SIZE * dataplane->nr_ring_ref; + ring_size = XEN_PAGE_SIZE * dataplane->nr_ring_ref; switch (dataplane->protocol) { case BLKIF_PROTOCOL_NATIVE: { diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c index 802d2eb021..dc5ffbc4ff 100644 --- a/hw/block/m25p80.c +++ b/hw/block/m25p80.c @@ -24,6 +24,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" #include "sysemu/block-backend.h" +#include "hw/block/block.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" #include "hw/ssi/ssi.h" @@ -1615,8 +1616,7 @@ static void m25p80_realize(SSIPeripheral *ss, Error **errp) trace_m25p80_binding(s); s->storage = blk_blockalign(s->blk, s->size); - if (blk_pread(s->blk, 0, s->size, s->storage, 0) < 0) { - error_setg(errp, "failed to read the initial flash content"); + if (!blk_check_size_and_read_all(s->blk, s->storage, s->size, errp)) { return; } } else { diff --git a/hw/block/meson.build b/hw/block/meson.build index b434d5654c..cc2a75cc50 100644 --- a/hw/block/meson.build +++ b/hw/block/meson.build @@ -14,7 +14,7 @@ softmmu_ss.add(when: 'CONFIG_PFLASH_CFI02', if_true: files('pflash_cfi02.c')) softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c')) softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80_sfdp.c')) softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c')) -softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c')) +softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c')) softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c')) specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c', 'virtio-blk-common.c')) diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c index 345b284d70..f5a744589d 100644 --- a/hw/block/xen-block.c +++ b/hw/block/xen-block.c @@ -19,7 +19,6 @@ #include "qapi/qmp/qdict.h" #include "qapi/qmp/qstring.h" #include "qom/object_interfaces.h" -#include "hw/xen/xen_common.h" #include "hw/block/xen_blkif.h" #include "hw/qdev-properties.h" #include "hw/xen/xen-block.h" @@ -84,7 +83,8 @@ static void xen_block_connect(XenDevice *xendev, Error **errp) g_free(ring_ref); return; } - } else if (order <= blockdev->props.max_ring_page_order) { + } else if (qemu_xen_gnttab_can_map_multi() && + order <= blockdev->props.max_ring_page_order) { unsigned int i; nr_ring_ref = 1 << order; @@ -256,8 +256,12 @@ static void xen_block_realize(XenDevice *xendev, Error **errp) } xen_device_backend_printf(xendev, "feature-flush-cache", "%u", 1); - xen_device_backend_printf(xendev, "max-ring-page-order", "%u", - blockdev->props.max_ring_page_order); + + if (qemu_xen_gnttab_can_map_multi()) { + xen_device_backend_printf(xendev, "max-ring-page-order", "%u", + blockdev->props.max_ring_page_order); + } + xen_device_backend_printf(xendev, "info", "%u", blockdev->info); xen_device_frontend_printf(xendev, "virtual-device", "%lu", diff --git a/hw/char/meson.build b/hw/char/meson.build index 7b594f51b8..e02c60dd54 100644 --- a/hw/char/meson.build +++ b/hw/char/meson.build @@ -18,7 +18,7 @@ softmmu_ss.add(when: 'CONFIG_SERIAL_PCI', if_true: files('serial-pci.c')) softmmu_ss.add(when: 'CONFIG_SERIAL_PCI_MULTI', if_true: files('serial-pci-multi.c')) softmmu_ss.add(when: 'CONFIG_SHAKTI_UART', if_true: files('shakti_uart.c')) softmmu_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-console.c')) -softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen_console.c')) +softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen_console.c')) softmmu_ss.add(when: 'CONFIG_XILINX', if_true: files('xilinx_uartlite.c')) softmmu_ss.add(when: 'CONFIG_AVR_USART', if_true: files('avr_usart.c')) diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c index 63153dfde4..c7a19c0e7c 100644 --- a/hw/char/xen_console.c +++ b/hw/char/xen_console.c @@ -173,6 +173,48 @@ static void xencons_send(struct XenConsole *con) /* -------------------------------------------------------------------- */ +static int store_con_info(struct XenConsole *con) +{ + Chardev *cs = qemu_chr_fe_get_driver(&con->chr); + char *pts = NULL; + char *dom_path; + GString *path; + int ret = -1; + + /* Only continue if we're talking to a pty. */ + if (!CHARDEV_IS_PTY(cs)) { + return 0; + } + pts = cs->filename + 4; + + dom_path = qemu_xen_xs_get_domain_path(xenstore, xen_domid); + if (!dom_path) { + return 0; + } + + path = g_string_new(dom_path); + free(dom_path); + + if (con->xendev.dev) { + g_string_append_printf(path, "/device/console/%d", con->xendev.dev); + } else { + g_string_append(path, "/console"); + } + g_string_append(path, "/tty"); + + if (xenstore_write_str(con->console, path->str, pts)) { + fprintf(stderr, "xenstore_write_str for '%s' fail", path->str); + goto out; + } + ret = 0; + +out: + g_string_free(path, true); + free(path); + + return ret; +} + static int con_init(struct XenLegacyDevice *xendev) { struct XenConsole *con = container_of(xendev, struct XenConsole, xendev); @@ -181,7 +223,7 @@ static int con_init(struct XenLegacyDevice *xendev) const char *output; /* setup */ - dom = xs_get_domain_path(xenstore, con->xendev.dom); + dom = qemu_xen_xs_get_domain_path(xenstore, con->xendev.dom); if (!xendev->dev) { snprintf(con->console, sizeof(con->console), "%s/console", dom); } else { @@ -215,8 +257,7 @@ static int con_init(struct XenLegacyDevice *xendev) &error_abort); } - xenstore_store_pv_console_info(con->xendev.dev, - qemu_chr_fe_get_driver(&con->chr)); + store_con_info(con); out: g_free(type); @@ -237,9 +278,9 @@ static int con_initialise(struct XenLegacyDevice *xendev) if (!xendev->dev) { xen_pfn_t mfn = con->ring_ref; - con->sring = xenforeignmemory_map(xen_fmem, con->xendev.dom, - PROT_READ | PROT_WRITE, - 1, &mfn, NULL); + con->sring = qemu_xen_foreignmem_map(con->xendev.dom, NULL, + PROT_READ | PROT_WRITE, + 1, &mfn, NULL); } else { con->sring = xen_be_map_grant_ref(xendev, con->ring_ref, PROT_READ | PROT_WRITE); @@ -269,9 +310,9 @@ static void con_disconnect(struct XenLegacyDevice *xendev) if (con->sring) { if (!xendev->dev) { - xenforeignmemory_unmap(xen_fmem, con->sring, 1); + qemu_xen_foreignmem_unmap(con->sring, 1); } else { - xen_be_unmap_grant_ref(xendev, con->sring); + xen_be_unmap_grant_ref(xendev, con->sring, con->ring_ref); } con->sring = NULL; } diff --git a/hw/display/meson.build b/hw/display/meson.build index f470179122..4191694380 100644 --- a/hw/display/meson.build +++ b/hw/display/meson.build @@ -14,7 +14,7 @@ softmmu_ss.add(when: 'CONFIG_PL110', if_true: files('pl110.c')) softmmu_ss.add(when: 'CONFIG_SII9022', if_true: files('sii9022.c')) softmmu_ss.add(when: 'CONFIG_SSD0303', if_true: files('ssd0303.c')) softmmu_ss.add(when: 'CONFIG_SSD0323', if_true: files('ssd0323.c')) -softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xenfb.c')) +softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xenfb.c')) softmmu_ss.add(when: 'CONFIG_VGA_PCI', if_true: files('vga-pci.c')) softmmu_ss.add(when: 'CONFIG_VGA_ISA', if_true: files('vga-isa.c')) diff --git a/hw/display/sm501.c b/hw/display/sm501.c index 17835159fc..dbabbc4339 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -465,6 +465,7 @@ typedef struct SM501State { uint32_t last_width; uint32_t last_height; bool do_full_update; /* perform a full update next time */ + uint8_t use_pixman; I2CBus *i2c_bus; /* mmio registers */ @@ -827,7 +828,7 @@ static void sm501_2d_operation(SM501State *s) de = db + (width + (height - 1) * dst_pitch) * bypp; overlap = (db < se && sb < de); } - if (overlap) { + if (overlap && (s->use_pixman & BIT(2))) { /* pixman can't do reverse blit: copy via temporary */ int tmp_stride = DIV_ROUND_UP(width * bypp, sizeof(uint32_t)); uint32_t *tmp = tmp_buf; @@ -852,13 +853,15 @@ static void sm501_2d_operation(SM501State *s) if (tmp != tmp_buf) { g_free(tmp); } - } else { + } else if (!overlap && (s->use_pixman & BIT(1))) { fallback = !pixman_blt((uint32_t *)&s->local_mem[src_base], (uint32_t *)&s->local_mem[dst_base], src_pitch * bypp / sizeof(uint32_t), dst_pitch * bypp / sizeof(uint32_t), 8 * bypp, 8 * bypp, src_x, src_y, dst_x, dst_y, width, height); + } else { + fallback = true; } if (fallback) { uint8_t *sp = s->local_mem + src_base; @@ -891,7 +894,7 @@ static void sm501_2d_operation(SM501State *s) color = cpu_to_le16(color); } - if ((width == 1 && height == 1) || + if (!(s->use_pixman & BIT(0)) || (width == 1 && height == 1) || !pixman_fill((uint32_t *)&s->local_mem[dst_base], dst_pitch * bypp / sizeof(uint32_t), 8 * bypp, dst_x, dst_y, width, height, color)) { @@ -2035,6 +2038,7 @@ static void sm501_realize_sysbus(DeviceState *dev, Error **errp) static Property sm501_sysbus_properties[] = { DEFINE_PROP_UINT32("vram-size", SM501SysBusState, vram_size, 0), + DEFINE_PROP_UINT8("x-pixman", SM501SysBusState, state.use_pixman, 7), DEFINE_PROP_END_OF_LIST(), }; @@ -2122,6 +2126,7 @@ static void sm501_realize_pci(PCIDevice *dev, Error **errp) static Property sm501_pci_properties[] = { DEFINE_PROP_UINT32("vram-size", SM501PCIState, vram_size, 64 * MiB), + DEFINE_PROP_UINT8("x-pixman", SM501PCIState, state.use_pixman, 7), DEFINE_PROP_END_OF_LIST(), }; @@ -2162,11 +2167,18 @@ static void sm501_pci_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_sm501_pci; } +static void sm501_pci_init(Object *o) +{ + object_property_set_description(o, "x-pixman", "Use pixman for: " + "1: fill, 2: blit, 4: overlap blit"); +} + static const TypeInfo sm501_pci_info = { .name = TYPE_PCI_SM501, .parent = TYPE_PCI_DEVICE, .instance_size = sizeof(SM501PCIState), .class_init = sm501_pci_class_init, + .instance_init = sm501_pci_init, .interfaces = (InterfaceInfo[]) { { INTERFACE_CONVENTIONAL_PCI_DEVICE }, { }, diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c index 260eb38a76..0074a9b6f8 100644 --- a/hw/display/xenfb.c +++ b/hw/display/xenfb.c @@ -98,8 +98,9 @@ static int common_bind(struct common *c) if (xenstore_read_fe_int(&c->xendev, "event-channel", &c->xendev.remote_port) == -1) return -1; - c->page = xenforeignmemory_map(xen_fmem, c->xendev.dom, - PROT_READ | PROT_WRITE, 1, &mfn, NULL); + c->page = qemu_xen_foreignmem_map(c->xendev.dom, NULL, + PROT_READ | PROT_WRITE, 1, &mfn, + NULL); if (c->page == NULL) return -1; @@ -115,7 +116,7 @@ static void common_unbind(struct common *c) { xen_pv_unbind_evtchn(&c->xendev); if (c->page) { - xenforeignmemory_unmap(xen_fmem, c->page, 1); + qemu_xen_foreignmem_unmap(c->page, 1); c->page = NULL; } } @@ -488,27 +489,28 @@ static int xenfb_map_fb(struct XenFB *xenfb) } if (xenfb->pixels) { - munmap(xenfb->pixels, xenfb->fbpages * XC_PAGE_SIZE); + munmap(xenfb->pixels, xenfb->fbpages * XEN_PAGE_SIZE); xenfb->pixels = NULL; } - xenfb->fbpages = DIV_ROUND_UP(xenfb->fb_len, XC_PAGE_SIZE); + xenfb->fbpages = DIV_ROUND_UP(xenfb->fb_len, XEN_PAGE_SIZE); n_fbdirs = xenfb->fbpages * mode / 8; - n_fbdirs = DIV_ROUND_UP(n_fbdirs, XC_PAGE_SIZE); + n_fbdirs = DIV_ROUND_UP(n_fbdirs, XEN_PAGE_SIZE); pgmfns = g_new0(xen_pfn_t, n_fbdirs); fbmfns = g_new0(xen_pfn_t, xenfb->fbpages); xenfb_copy_mfns(mode, n_fbdirs, pgmfns, pd); - map = xenforeignmemory_map(xen_fmem, xenfb->c.xendev.dom, - PROT_READ, n_fbdirs, pgmfns, NULL); + map = qemu_xen_foreignmem_map(xenfb->c.xendev.dom, NULL, PROT_READ, + n_fbdirs, pgmfns, NULL); if (map == NULL) goto out; xenfb_copy_mfns(mode, xenfb->fbpages, fbmfns, map); - xenforeignmemory_unmap(xen_fmem, map, n_fbdirs); + qemu_xen_foreignmem_unmap(map, n_fbdirs); - xenfb->pixels = xenforeignmemory_map(xen_fmem, xenfb->c.xendev.dom, - PROT_READ, xenfb->fbpages, fbmfns, NULL); + xenfb->pixels = qemu_xen_foreignmem_map(xenfb->c.xendev.dom, NULL, + PROT_READ, xenfb->fbpages, + fbmfns, NULL); if (xenfb->pixels == NULL) goto out; @@ -526,8 +528,8 @@ static int xenfb_configure_fb(struct XenFB *xenfb, size_t fb_len_lim, { size_t mfn_sz = sizeof_field(struct xenfb_page, pd[0]); size_t pd_len = sizeof_field(struct xenfb_page, pd) / mfn_sz; - size_t fb_pages = pd_len * XC_PAGE_SIZE / mfn_sz; - size_t fb_len_max = fb_pages * XC_PAGE_SIZE; + size_t fb_pages = pd_len * XEN_PAGE_SIZE / mfn_sz; + size_t fb_len_max = fb_pages * XEN_PAGE_SIZE; int max_width, max_height; if (fb_len_lim > fb_len_max) { @@ -927,8 +929,8 @@ static void fb_disconnect(struct XenLegacyDevice *xendev) * Replacing the framebuffer with anonymous shared memory * instead. This releases the guest pages and keeps qemu happy. */ - xenforeignmemory_unmap(xen_fmem, fb->pixels, fb->fbpages); - fb->pixels = mmap(fb->pixels, fb->fbpages * XC_PAGE_SIZE, + qemu_xen_foreignmem_unmap(fb->pixels, fb->fbpages); + fb->pixels = mmap(fb->pixels, fb->fbpages * XEN_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); if (fb->pixels == MAP_FAILED) { diff --git a/hw/i386/kvm/meson.build b/hw/i386/kvm/meson.build index 82dd6ae7c6..6621ba5cd7 100644 --- a/hw/i386/kvm/meson.build +++ b/hw/i386/kvm/meson.build @@ -9,6 +9,7 @@ i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files( 'xen_evtchn.c', 'xen_gnttab.c', 'xen_xenstore.c', + 'xenstore_impl.c', )) i386_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) diff --git a/hw/i386/kvm/trace-events b/hw/i386/kvm/trace-events index b83c3eb965..e4c82de6f3 100644 --- a/hw/i386/kvm/trace-events +++ b/hw/i386/kvm/trace-events @@ -3,3 +3,18 @@ kvm_xen_unmap_pirq(int pirq, int gsi) "pirq %d gsi %d" kvm_xen_get_free_pirq(int pirq, int type) "pirq %d type %d" kvm_xen_bind_pirq(int pirq, int port) "pirq %d port %d" kvm_xen_unmask_pirq(int pirq, char *dev, int vector) "pirq %d dev %s vector %d" +xenstore_error(unsigned int id, unsigned int tx_id, const char *err) "req %u tx %u err %s" +xenstore_read(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_write(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_mkdir(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_directory(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_directory_part(unsigned int tx_id, const char *path, unsigned int offset) "tx %u path %s offset %u" +xenstore_transaction_start(unsigned int new_tx) "new_tx %u" +xenstore_transaction_end(unsigned int tx_id, bool commit) "tx %u commit %d" +xenstore_rm(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_get_perms(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_set_perms(unsigned int tx_id, const char *path) "tx %u path %s" +xenstore_watch(const char *path, const char *token) "path %s token %s" +xenstore_unwatch(const char *path, const char *token) "path %s token %s" +xenstore_reset_watches(void) "" +xenstore_watch_event(const char *path, const char *token) "path %s token %s" diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c index 886fbf6b3b..98a7b85047 100644 --- a/hw/i386/kvm/xen_evtchn.c +++ b/hw/i386/kvm/xen_evtchn.c @@ -34,6 +34,7 @@ #include "hw/pci/msi.h" #include "hw/pci/msix.h" #include "hw/irq.h" +#include "hw/xen/xen_backend_ops.h" #include "xen_evtchn.h" #include "xen_overlay.h" @@ -278,6 +279,17 @@ static const TypeInfo xen_evtchn_info = { .class_init = xen_evtchn_class_init, }; +static struct evtchn_backend_ops emu_evtchn_backend_ops = { + .open = xen_be_evtchn_open, + .bind_interdomain = xen_be_evtchn_bind_interdomain, + .unbind = xen_be_evtchn_unbind, + .close = xen_be_evtchn_close, + .get_fd = xen_be_evtchn_fd, + .notify = xen_be_evtchn_notify, + .unmask = xen_be_evtchn_unmask, + .pending = xen_be_evtchn_pending, +}; + static void gsi_assert_bh(void *opaque) { struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0); @@ -318,6 +330,9 @@ void xen_evtchn_create(void) s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64); s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words); s->pirq = g_new0(struct pirq_info, s->nr_pirqs); + + /* Set event channel functions for backend drivers to use */ + xen_evtchn_ops = &emu_evtchn_backend_ops; } void xen_evtchn_connect_gsis(qemu_irq *system_gsis) diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c index 1e691ded32..21c30e3659 100644 --- a/hw/i386/kvm/xen_gnttab.c +++ b/hw/i386/kvm/xen_gnttab.c @@ -22,6 +22,7 @@ #include "hw/sysbus.h" #include "hw/xen/xen.h" +#include "hw/xen/xen_backend_ops.h" #include "xen_overlay.h" #include "xen_gnttab.h" @@ -34,11 +35,10 @@ #define TYPE_XEN_GNTTAB "xen-gnttab" OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB) -#define XEN_PAGE_SHIFT 12 -#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT) - #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t)) +static struct gnttab_backend_ops emu_gnttab_backend_ops; + struct XenGnttabState { /*< private >*/ SysBusDevice busdev; @@ -57,6 +57,8 @@ struct XenGnttabState { MemoryRegion gnt_frames; MemoryRegion *gnt_aliases; uint64_t *gnt_frame_gpas; + + uint8_t *map_track; }; struct XenGnttabState *xen_gnttab_singleton; @@ -70,13 +72,11 @@ static void xen_gnttab_realize(DeviceState *dev, Error **errp) error_setg(errp, "Xen grant table support is for Xen emulation"); return; } - s->nr_frames = 0; s->max_frames = kvm_xen_get_gnttab_max_frames(); memory_region_init_ram(&s->gnt_frames, OBJECT(dev), "xen:grant_table", XEN_PAGE_SIZE * s->max_frames, &error_abort); memory_region_set_enabled(&s->gnt_frames, true); s->entries.v1 = memory_region_get_ram_ptr(&s->gnt_frames); - memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames); /* Create individual page-sizes aliases for overlays */ s->gnt_aliases = (void *)g_new0(MemoryRegion, s->max_frames); @@ -88,9 +88,18 @@ static void xen_gnttab_realize(DeviceState *dev, Error **errp) s->gnt_frame_gpas[i] = INVALID_GPA; } + s->nr_frames = 0; + memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames); + s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; + s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE); + qemu_mutex_init(&s->gnt_lock); xen_gnttab_singleton = s; + + s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1); + + xen_gnttab_ops = &emu_gnttab_backend_ops; } static int xen_gnttab_post_load(void *opaque, int version_id) @@ -230,3 +239,309 @@ int xen_gnttab_query_size_op(struct gnttab_query_size *size) size->max_nr_frames = s->max_frames; return 0; } + +/* Track per-open refs, to allow close() to clean up. */ +struct active_ref { + MemoryRegionSection mrs; + void *virtaddr; + uint32_t refcnt; + int prot; +}; + +static void gnt_unref(XenGnttabState *s, grant_ref_t ref, + MemoryRegionSection *mrs, int prot) +{ + if (mrs && mrs->mr) { + if (prot & PROT_WRITE) { + memory_region_set_dirty(mrs->mr, mrs->offset_within_region, + XEN_PAGE_SIZE); + } + memory_region_unref(mrs->mr); + mrs->mr = NULL; + } + assert(s->map_track[ref] != 0); + + if (--s->map_track[ref] == 0) { + grant_entry_v1_t *gnt_p = &s->entries.v1[ref]; + qatomic_and(&gnt_p->flags, (uint16_t)~(GTF_reading | GTF_writing)); + } +} + +static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot) +{ + uint16_t mask = GTF_type_mask | GTF_sub_page; + grant_entry_v1_t gnt, *gnt_p; + int retries = 0; + + if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 || + s->map_track[ref] == UINT8_MAX) { + return INVALID_GPA; + } + + if (prot & PROT_WRITE) { + mask |= GTF_readonly; + } + + gnt_p = &s->entries.v1[ref]; + + /* + * The guest can legitimately be changing the GTF_readonly flag. Allow + * that, but don't let a malicious guest cause a livelock. + */ + for (retries = 0; retries < 5; retries++) { + uint16_t new_flags; + + /* Read the entry before an atomic operation on its flags */ + gnt = *(volatile grant_entry_v1_t *)gnt_p; + + if ((gnt.flags & mask) != GTF_permit_access || + gnt.domid != DOMID_QEMU) { + return INVALID_GPA; + } + + new_flags = gnt.flags | GTF_reading; + if (prot & PROT_WRITE) { + new_flags |= GTF_writing; + } + + if (qatomic_cmpxchg(&gnt_p->flags, gnt.flags, new_flags) == gnt.flags) { + return (uint64_t)gnt.frame << XEN_PAGE_SHIFT; + } + } + + return INVALID_GPA; +} + +struct xengntdev_handle { + GHashTable *active_maps; +}; + +static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt, + uint32_t nr_grants) +{ + return 0; +} + +static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt, + uint32_t count, uint32_t domid, + uint32_t *refs, int prot) +{ + XenGnttabState *s = xen_gnttab_singleton; + struct active_ref *act; + + if (!s) { + errno = ENOTSUP; + return NULL; + } + + if (domid != xen_domid) { + errno = EINVAL; + return NULL; + } + + if (!count || count > 4096) { + errno = EINVAL; + return NULL; + } + + /* + * Making a contiguous mapping from potentially discontiguous grant + * references would be... distinctly non-trivial. We don't support it. + * Even changing the API to return an array of pointers, one per page, + * wouldn't be simple to use in PV backends because some structures + * actually cross page boundaries (e.g. 32-bit blkif_response ring + * entries are 12 bytes). + */ + if (count != 1) { + errno = EINVAL; + return NULL; + } + + QEMU_LOCK_GUARD(&s->gnt_lock); + + act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0])); + if (act) { + if ((prot & PROT_WRITE) && !(act->prot & PROT_WRITE)) { + if (gnt_ref(s, refs[0], prot) == INVALID_GPA) { + return NULL; + } + act->prot |= PROT_WRITE; + } + act->refcnt++; + } else { + uint64_t gpa = gnt_ref(s, refs[0], prot); + if (gpa == INVALID_GPA) { + errno = EINVAL; + return NULL; + } + + act = g_new0(struct active_ref, 1); + act->prot = prot; + act->refcnt = 1; + act->mrs = memory_region_find(get_system_memory(), gpa, XEN_PAGE_SIZE); + + if (act->mrs.mr && + !int128_lt(act->mrs.size, int128_make64(XEN_PAGE_SIZE)) && + memory_region_get_ram_addr(act->mrs.mr) != RAM_ADDR_INVALID) { + act->virtaddr = qemu_map_ram_ptr(act->mrs.mr->ram_block, + act->mrs.offset_within_region); + } + if (!act->virtaddr) { + gnt_unref(s, refs[0], &act->mrs, 0); + g_free(act); + errno = EINVAL; + return NULL; + } + + s->map_track[refs[0]]++; + g_hash_table_insert(xgt->active_maps, GINT_TO_POINTER(refs[0]), act); + } + + return act->virtaddr; +} + +static gboolean do_unmap(gpointer key, gpointer value, gpointer user_data) +{ + XenGnttabState *s = user_data; + grant_ref_t gref = GPOINTER_TO_INT(key); + struct active_ref *act = value; + + gnt_unref(s, gref, &act->mrs, act->prot); + g_free(act); + return true; +} + +static int xen_be_gnttab_unmap(struct xengntdev_handle *xgt, + void *start_address, uint32_t *refs, + uint32_t count) +{ + XenGnttabState *s = xen_gnttab_singleton; + struct active_ref *act; + + if (!s) { + return -ENOTSUP; + } + + if (count != 1) { + return -EINVAL; + } + + QEMU_LOCK_GUARD(&s->gnt_lock); + + act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0])); + if (!act) { + return -ENOENT; + } + + if (act->virtaddr != start_address) { + return -EINVAL; + } + + if (!--act->refcnt) { + do_unmap(GINT_TO_POINTER(refs[0]), act, s); + g_hash_table_remove(xgt->active_maps, GINT_TO_POINTER(refs[0])); + } + + return 0; +} + +/* + * This looks a bit like the one for true Xen in xen-operations.c but + * in emulation we don't support multi-page mappings. And under Xen we + * *want* the multi-page mappings so we have fewer bounces through the + * kernel and the hypervisor. So the code paths end up being similar, + * but different. + */ +static int xen_be_gnttab_copy(struct xengntdev_handle *xgt, bool to_domain, + uint32_t domid, XenGrantCopySegment *segs, + uint32_t nr_segs, Error **errp) +{ + int prot = to_domain ? PROT_WRITE : PROT_READ; + unsigned int i; + + for (i = 0; i < nr_segs; i++) { + XenGrantCopySegment *seg = &segs[i]; + void *page; + uint32_t ref = to_domain ? seg->dest.foreign.ref : + seg->source.foreign.ref; + + page = xen_be_gnttab_map_refs(xgt, 1, domid, &ref, prot); + if (!page) { + if (errp) { + error_setg_errno(errp, errno, + "xen_be_gnttab_map_refs failed"); + } + return -errno; + } + + if (to_domain) { + memcpy(page + seg->dest.foreign.offset, seg->source.virt, + seg->len); + } else { + memcpy(seg->dest.virt, page + seg->source.foreign.offset, + seg->len); + } + + if (xen_be_gnttab_unmap(xgt, page, &ref, 1)) { + if (errp) { + error_setg_errno(errp, errno, "xen_be_gnttab_unmap failed"); + } + return -errno; + } + } + + return 0; +} + +static struct xengntdev_handle *xen_be_gnttab_open(void) +{ + struct xengntdev_handle *xgt = g_new0(struct xengntdev_handle, 1); + + xgt->active_maps = g_hash_table_new(g_direct_hash, g_direct_equal); + return xgt; +} + +static int xen_be_gnttab_close(struct xengntdev_handle *xgt) +{ + XenGnttabState *s = xen_gnttab_singleton; + + if (!s) { + return -ENOTSUP; + } + + g_hash_table_foreach_remove(xgt->active_maps, do_unmap, s); + g_hash_table_destroy(xgt->active_maps); + g_free(xgt); + return 0; +} + +static struct gnttab_backend_ops emu_gnttab_backend_ops = { + .open = xen_be_gnttab_open, + .close = xen_be_gnttab_close, + .grant_copy = xen_be_gnttab_copy, + .set_max_grants = xen_be_gnttab_set_max_grants, + .map_refs = xen_be_gnttab_map_refs, + .unmap = xen_be_gnttab_unmap, +}; + +int xen_gnttab_reset(void) +{ + XenGnttabState *s = xen_gnttab_singleton; + + if (!s) { + return -ENOTSUP; + } + + QEMU_LOCK_GUARD(&s->gnt_lock); + + s->nr_frames = 0; + + memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames); + + s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; + s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE); + + memset(s->map_track, 0, s->max_frames * ENTRIES_PER_FRAME_V1); + + return 0; +} diff --git a/hw/i386/kvm/xen_gnttab.h b/hw/i386/kvm/xen_gnttab.h index 3bdbe96191..ee215239b0 100644 --- a/hw/i386/kvm/xen_gnttab.h +++ b/hw/i386/kvm/xen_gnttab.h @@ -13,6 +13,7 @@ #define QEMU_XEN_GNTTAB_H void xen_gnttab_create(void); +int xen_gnttab_reset(void); int xen_gnttab_map_page(uint64_t idx, uint64_t gfn); struct gnttab_set_version; diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c index 14193ef3f9..2cadafd56a 100644 --- a/hw/i386/kvm/xen_xenstore.c +++ b/hw/i386/kvm/xen_xenstore.c @@ -21,6 +21,7 @@ #include "hw/sysbus.h" #include "hw/xen/xen.h" +#include "hw/xen/xen_backend_ops.h" #include "xen_overlay.h" #include "xen_evtchn.h" #include "xen_xenstore.h" @@ -28,15 +29,17 @@ #include "sysemu/kvm.h" #include "sysemu/kvm_xen.h" +#include "trace.h" + +#include "xenstore_impl.h" + #include "hw/xen/interface/io/xs_wire.h" #include "hw/xen/interface/event_channel.h" +#include "hw/xen/interface/grant_table.h" #define TYPE_XEN_XENSTORE "xen-xenstore" OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE) -#define XEN_PAGE_SHIFT 12 -#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT) - #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t)) #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t)) @@ -47,6 +50,9 @@ struct XenXenstoreState { SysBusDevice busdev; /*< public >*/ + XenstoreImplState *impl; + GList *watch_events; /* for the guest */ + MemoryRegion xenstore_page; struct xenstore_domain_interface *xs; uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX]; @@ -59,15 +65,54 @@ struct XenXenstoreState { evtchn_port_t guest_port; evtchn_port_t be_port; struct xenevtchn_handle *eh; + + uint8_t *impl_state; + uint32_t impl_state_size; + + struct xengntdev_handle *gt; + void *granted_xs; }; struct XenXenstoreState *xen_xenstore_singleton; static void xen_xenstore_event(void *opaque); +static void fire_watch_cb(void *opaque, const char *path, const char *token); + +static struct xenstore_backend_ops emu_xenstore_backend_ops; + +static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s, + GList *perms, + const char *relpath, + const char *fmt, ...) +{ + gchar *abspath; + gchar *value; + va_list args; + GByteArray *data; + int err; + + abspath = g_strdup_printf("/local/domain/%u/%s", xen_domid, relpath); + va_start(args, fmt); + value = g_strdup_vprintf(fmt, args); + va_end(args); + + data = g_byte_array_new_take((void *)value, strlen(value)); + + err = xs_impl_write(s->impl, DOMID_QEMU, XBT_NULL, abspath, data); + assert(!err); + + g_byte_array_unref(data); + + err = xs_impl_set_perms(s->impl, DOMID_QEMU, XBT_NULL, abspath, perms); + assert(!err); + + g_free(abspath); +} static void xen_xenstore_realize(DeviceState *dev, Error **errp) { XenXenstoreState *s = XEN_XENSTORE(dev); + GList *perms; if (xen_mode != XEN_EMULATE) { error_setg(errp, "Xen xenstore support is for Xen emulation"); @@ -89,6 +134,50 @@ static void xen_xenstore_realize(DeviceState *dev, Error **errp) } aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true, xen_xenstore_event, NULL, NULL, NULL, s); + + s->impl = xs_impl_create(xen_domid); + + /* Populate the default nodes */ + + /* Nodes owned by 'dom0' but readable by the guest */ + perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU)); + perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid)); + + relpath_printf(s, perms, "", "%s", ""); + + relpath_printf(s, perms, "domid", "%u", xen_domid); + + relpath_printf(s, perms, "control/platform-feature-xs_reset_watches", "%u", 1); + relpath_printf(s, perms, "control/platform-feature-multiprocessor-suspend", "%u", 1); + + relpath_printf(s, perms, "platform/acpi", "%u", 1); + relpath_printf(s, perms, "platform/acpi_s3", "%u", 1); + relpath_printf(s, perms, "platform/acpi_s4", "%u", 1); + relpath_printf(s, perms, "platform/acpi_laptop_slate", "%u", 0); + + g_list_free_full(perms, g_free); + + /* Nodes owned by the guest */ + perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, xen_domid)); + + relpath_printf(s, perms, "attr", "%s", ""); + + relpath_printf(s, perms, "control/shutdown", "%s", ""); + relpath_printf(s, perms, "control/feature-poweroff", "%u", 1); + relpath_printf(s, perms, "control/feature-reboot", "%u", 1); + relpath_printf(s, perms, "control/feature-suspend", "%u", 1); + relpath_printf(s, perms, "control/feature-s3", "%u", 1); + relpath_printf(s, perms, "control/feature-s4", "%u", 1); + + relpath_printf(s, perms, "data", "%s", ""); + relpath_printf(s, perms, "device", "%s", ""); + relpath_printf(s, perms, "drivers", "%s", ""); + relpath_printf(s, perms, "error", "%s", ""); + relpath_printf(s, perms, "feature", "%s", ""); + + g_list_free_full(perms, g_free); + + xen_xenstore_ops = &emu_xenstore_backend_ops; } static bool xen_xenstore_is_needed(void *opaque) @@ -99,16 +188,26 @@ static bool xen_xenstore_is_needed(void *opaque) static int xen_xenstore_pre_save(void *opaque) { XenXenstoreState *s = opaque; + GByteArray *save; if (s->eh) { s->guest_port = xen_be_evtchn_get_guest_port(s->eh); } + + g_free(s->impl_state); + save = xs_impl_serialize(s->impl); + s->impl_state = save->data; + s->impl_state_size = save->len; + g_byte_array_free(save, false); + return 0; } static int xen_xenstore_post_load(void *opaque, int ver) { XenXenstoreState *s = opaque; + GByteArray *save; + int ret; /* * As qemu/dom0, rebind to the guest's port. The Windows drivers may @@ -125,11 +224,18 @@ static int xen_xenstore_post_load(void *opaque, int ver) } s->be_port = be_port; } - return 0; + + save = g_byte_array_new_take(s->impl_state, s->impl_state_size); + s->impl_state = NULL; + s->impl_state_size = 0; + + ret = xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s); + return ret; } static const VMStateDescription xen_xenstore_vmstate = { .name = "xen_xenstore", + .unmigratable = 1, /* The PV back ends don't migrate yet */ .version_id = 1, .minimum_version_id = 1, .needed = xen_xenstore_is_needed, @@ -145,6 +251,10 @@ static const VMStateDescription xen_xenstore_vmstate = { VMSTATE_BOOL(rsp_pending, XenXenstoreState), VMSTATE_UINT32(guest_port, XenXenstoreState), VMSTATE_BOOL(fatal_error, XenXenstoreState), + VMSTATE_UINT32(impl_state_size, XenXenstoreState), + VMSTATE_VARRAY_UINT32_ALLOC(impl_state, XenXenstoreState, + impl_state_size, 0, + vmstate_info_uint8, uint8_t), VMSTATE_END_OF_LIST() } }; @@ -213,20 +323,761 @@ static void reset_rsp(XenXenstoreState *s) s->rsp_offset = 0; } +static void xs_error(XenXenstoreState *s, unsigned int id, + xs_transaction_t tx_id, int errnum) +{ + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + const char *errstr = NULL; + + for (unsigned int i = 0; i < ARRAY_SIZE(xsd_errors); i++) { + struct xsd_errors *xsd_error = &xsd_errors[i]; + + if (xsd_error->errnum == errnum) { + errstr = xsd_error->errstring; + break; + } + } + assert(errstr); + + trace_xenstore_error(id, tx_id, errstr); + + rsp->type = XS_ERROR; + rsp->req_id = id; + rsp->tx_id = tx_id; + rsp->len = (uint32_t)strlen(errstr) + 1; + + memcpy(&rsp[1], errstr, rsp->len); +} + +static void xs_ok(XenXenstoreState *s, unsigned int type, unsigned int req_id, + xs_transaction_t tx_id) +{ + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + const char *okstr = "OK"; + + rsp->type = type; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = (uint32_t)strlen(okstr) + 1; + + memcpy(&rsp[1], okstr, rsp->len); +} + +/* + * The correct request and response formats are documented in xen.git: + * docs/misc/xenstore.txt. A summary is given below for convenience. + * The '|' symbol represents a NUL character. + * + * ---------- Database read, write and permissions operations ---------- + * + * READ <path>| <value|> + * WRITE <path>|<value|> + * Store and read the octet string <value> at <path>. + * WRITE creates any missing parent paths, with empty values. + * + * MKDIR <path>| + * Ensures that the <path> exists, by necessary by creating + * it and any missing parents with empty values. If <path> + * or any parent already exists, its value is left unchanged. + * + * RM <path>| + * Ensures that the <path> does not exist, by deleting + * it and all of its children. It is not an error if <path> does + * not exist, but it _is_ an error if <path>'s immediate parent + * does not exist either. + * + * DIRECTORY <path>| <child-leaf-name>|* + * Gives a list of the immediate children of <path>, as only the + * leafnames. The resulting children are each named + * <path>/<child-leaf-name>. + * + * DIRECTORY_PART <path>|<offset> <gencnt>|<child-leaf-name>|* + * Same as DIRECTORY, but to be used for children lists longer than + * XENSTORE_PAYLOAD_MAX. Input are <path> and the byte offset into + * the list of children to return. Return values are the generation + * count <gencnt> of the node (to be used to ensure the node hasn't + * changed between two reads: <gencnt> being the same for multiple + * reads guarantees the node hasn't changed) and the list of children + * starting at the specified <offset> of the complete list. + * + * GET_PERMS <path>| <perm-as-string>|+ + * SET_PERMS <path>|<perm-as-string>|+? + * <perm-as-string> is one of the following + * w<domid> write only + * r<domid> read only + * b<domid> both read and write + * n<domid> no access + * See https://wiki.xen.org/wiki/XenBus section + * `Permissions' for details of the permissions system. + * It is possible to set permissions for the special watch paths + * "@introduceDomain" and "@releaseDomain" to enable receiving those + * watches in unprivileged domains. + * + * ---------- Watches ---------- + * + * WATCH <wpath>|<token>|? + * Adds a watch. + * + * When a <path> is modified (including path creation, removal, + * contents change or permissions change) this generates an event + * on the changed <path>. Changes made in transactions cause an + * event only if and when committed. Each occurring event is + * matched against all the watches currently set up, and each + * matching watch results in a WATCH_EVENT message (see below). + * + * The event's path matches the watch's <wpath> if it is an child + * of <wpath>. + * + * <wpath> can be a <path> to watch or @<wspecial>. In the + * latter case <wspecial> may have any syntax but it matches + * (according to the rules above) only the following special + * events which are invented by xenstored: + * @introduceDomain occurs on INTRODUCE + * @releaseDomain occurs on any domain crash or + * shutdown, and also on RELEASE + * and domain destruction + * <wspecial> events are sent to privileged callers or explicitly + * via SET_PERMS enabled domains only. + * + * When a watch is first set up it is triggered once straight + * away, with <path> equal to <wpath>. Watches may be triggered + * spuriously. The tx_id in a WATCH request is ignored. + * + * Watches are supposed to be restricted by the permissions + * system but in practice the implementation is imperfect. + * Applications should not rely on being sent a notification for + * paths that they cannot read; however, an application may rely + * on being sent a watch when a path which it _is_ able to read + * is deleted even if that leaves only a nonexistent unreadable + * parent. A notification may omitted if a node's permissions + * are changed so as to make it unreadable, in which case future + * notifications may be suppressed (and if the node is later made + * readable, some notifications may have been lost). + * + * WATCH_EVENT <epath>|<token>| + * Unsolicited `reply' generated for matching modification events + * as described above. req_id and tx_id are both 0. + * + * <epath> is the event's path, ie the actual path that was + * modified; however if the event was the recursive removal of an + * parent of <wpath>, <epath> is just + * <wpath> (rather than the actual path which was removed). So + * <epath> is a child of <wpath>, regardless. + * + * Iff <wpath> for the watch was specified as a relative pathname, + * the <epath> path will also be relative (with the same base, + * obviously). + * + * UNWATCH <wpath>|<token>|? + * + * RESET_WATCHES | + * Reset all watches and transactions of the caller. + * + * ---------- Transactions ---------- + * + * TRANSACTION_START | <transid>| + * <transid> is an opaque uint32_t allocated by xenstored + * represented as unsigned decimal. After this, transaction may + * be referenced by using <transid> (as 32-bit binary) in the + * tx_id request header field. When transaction is started whole + * db is copied; reads and writes happen on the copy. + * It is not legal to send non-0 tx_id in TRANSACTION_START. + * + * TRANSACTION_END T| + * TRANSACTION_END F| + * tx_id must refer to existing transaction. After this + * request the tx_id is no longer valid and may be reused by + * xenstore. If F, the transaction is discarded. If T, + * it is committed: if there were any other intervening writes + * then our END gets get EAGAIN. + * + * The plan is that in the future only intervening `conflicting' + * writes cause EAGAIN, meaning only writes or other commits + * which changed paths which were read or written in the + * transaction at hand. + * + */ + +static void xs_read(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, unsigned int len) +{ + const char *path = (const char *)req_data; + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + uint8_t *rsp_data = (uint8_t *)&rsp[1]; + g_autoptr(GByteArray) data = g_byte_array_new(); + int err; + + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_read(tx_id, path); + err = xs_impl_read(s->impl, xen_domid, tx_id, path, data); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + rsp->type = XS_READ; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = 0; + + len = data->len; + if (len > XENSTORE_PAYLOAD_MAX) { + xs_error(s, req_id, tx_id, E2BIG); + return; + } + + memcpy(&rsp_data[rsp->len], data->data, len); + rsp->len += len; +} + +static void xs_write(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + g_autoptr(GByteArray) data = g_byte_array_new(); + const char *path; + int err; + + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + path = (const char *)req_data; + + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + g_byte_array_append(data, req_data, len); + + trace_xenstore_write(tx_id, path); + err = xs_impl_write(s->impl, xen_domid, tx_id, path, data); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_WRITE, req_id, tx_id); +} + +static void xs_mkdir(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + g_autoptr(GByteArray) data = g_byte_array_new(); + const char *path; + int err; + + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + path = (const char *)req_data; + + trace_xenstore_mkdir(tx_id, path); + err = xs_impl_read(s->impl, xen_domid, tx_id, path, data); + if (err == ENOENT) { + err = xs_impl_write(s->impl, xen_domid, tx_id, path, data); + } + + if (!err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_MKDIR, req_id, tx_id); +} + +static void xs_append_strings(XenXenstoreState *s, struct xsd_sockmsg *rsp, + GList *strings, unsigned int start, bool truncate) +{ + uint8_t *rsp_data = (uint8_t *)&rsp[1]; + GList *l; + + for (l = strings; l; l = l->next) { + size_t len = strlen(l->data) + 1; /* Including the NUL termination */ + char *str = l->data; + + if (rsp->len + len > XENSTORE_PAYLOAD_MAX) { + if (truncate) { + len = XENSTORE_PAYLOAD_MAX - rsp->len; + if (!len) { + return; + } + } else { + xs_error(s, rsp->req_id, rsp->tx_id, E2BIG); + return; + } + } + + if (start) { + if (start >= len) { + start -= len; + continue; + } + + str += start; + len -= start; + start = 0; + } + + memcpy(&rsp_data[rsp->len], str, len); + rsp->len += len; + } + /* XS_DIRECTORY_PART wants an extra NUL to indicate the end */ + if (truncate && rsp->len < XENSTORE_PAYLOAD_MAX) { + rsp_data[rsp->len++] = '\0'; + } +} + +static void xs_directory(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + GList *items = NULL; + const char *path; + int err; + + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + path = (const char *)req_data; + + trace_xenstore_directory(tx_id, path); + err = xs_impl_directory(s->impl, xen_domid, tx_id, path, NULL, &items); + if (err != 0) { + xs_error(s, req_id, tx_id, err); + return; + } + + rsp->type = XS_DIRECTORY; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = 0; + + xs_append_strings(s, rsp, items, 0, false); + + g_list_free_full(items, g_free); +} + +static void xs_directory_part(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + const char *offset_str, *path = (const char *)req_data; + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + char *rsp_data = (char *)&rsp[1]; + uint64_t gencnt = 0; + unsigned int offset; + GList *items = NULL; + int err; + + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + offset_str = (const char *)req_data; + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + if (len) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + if (qemu_strtoui(offset_str, NULL, 10, &offset) < 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_directory_part(tx_id, path, offset); + err = xs_impl_directory(s->impl, xen_domid, tx_id, path, &gencnt, &items); + if (err != 0) { + xs_error(s, req_id, tx_id, err); + return; + } + + rsp->type = XS_DIRECTORY_PART; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%" PRIu64, gencnt) + 1; + + xs_append_strings(s, rsp, items, offset, true); + + g_list_free_full(items, g_free); +} + +static void xs_transaction_start(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + char *rsp_data = (char *)&rsp[1]; + int err; + + if (len != 1 || req_data[0] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + rsp->type = XS_TRANSACTION_START; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = 0; + + err = xs_impl_transaction_start(s->impl, xen_domid, &tx_id); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + trace_xenstore_transaction_start(tx_id); + + rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%u", tx_id); + assert(rsp->len < XENSTORE_PAYLOAD_MAX); + rsp->len++; +} + +static void xs_transaction_end(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + bool commit; + int err; + + if (len != 2 || req_data[1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + switch (req_data[0]) { + case 'T': + commit = true; + break; + case 'F': + commit = false; + break; + default: + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_transaction_end(tx_id, commit); + err = xs_impl_transaction_end(s->impl, xen_domid, tx_id, commit); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_TRANSACTION_END, req_id, tx_id); +} + +static void xs_rm(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, unsigned int len) +{ + const char *path = (const char *)req_data; + int err; + + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_rm(tx_id, path); + err = xs_impl_rm(s->impl, xen_domid, tx_id, path); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_RM, req_id, tx_id); +} + +static void xs_get_perms(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + const char *path = (const char *)req_data; + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + GList *perms = NULL; + int err; + + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_get_perms(tx_id, path); + err = xs_impl_get_perms(s->impl, xen_domid, tx_id, path, &perms); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + rsp->type = XS_GET_PERMS; + rsp->req_id = req_id; + rsp->tx_id = tx_id; + rsp->len = 0; + + xs_append_strings(s, rsp, perms, 0, false); + + g_list_free_full(perms, g_free); +} + +static void xs_set_perms(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + const char *path = (const char *)req_data; + uint8_t *perm; + GList *perms = NULL; + int err; + + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + perm = req_data; + while (len--) { + if (*req_data++ == '\0') { + perms = g_list_append(perms, perm); + perm = req_data; + } + } + + /* + * Note that there may be trailing garbage at the end of the buffer. + * This is explicitly permitted by the '?' at the end of the definition: + * + * SET_PERMS <path>|<perm-as-string>|+? + */ + + trace_xenstore_set_perms(tx_id, path); + err = xs_impl_set_perms(s->impl, xen_domid, tx_id, path, perms); + g_list_free(perms); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_SET_PERMS, req_id, tx_id); +} + +static void xs_watch(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + const char *token, *path = (const char *)req_data; + int err; + + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + token = (const char *)req_data; + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + /* + * Note that there may be trailing garbage at the end of the buffer. + * This is explicitly permitted by the '?' at the end of the definition: + * + * WATCH <wpath>|<token>|? + */ + + trace_xenstore_watch(path, token); + err = xs_impl_watch(s->impl, xen_domid, path, token, fire_watch_cb, s); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_WATCH, req_id, tx_id); +} + +static void xs_unwatch(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + const char *token, *path = (const char *)req_data; + int err; + + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + token = (const char *)req_data; + while (len--) { + if (*req_data++ == '\0') { + break; + } + if (len == 0) { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + } + + trace_xenstore_unwatch(path, token); + err = xs_impl_unwatch(s->impl, xen_domid, path, token, fire_watch_cb, s); + if (err) { + xs_error(s, req_id, tx_id, err); + return; + } + + xs_ok(s, XS_UNWATCH, req_id, tx_id); +} + +static void xs_reset_watches(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *req_data, + unsigned int len) +{ + if (len == 0 || req_data[len - 1] != '\0') { + xs_error(s, req_id, tx_id, EINVAL); + return; + } + + trace_xenstore_reset_watches(); + xs_impl_reset_watches(s->impl, xen_domid); + + xs_ok(s, XS_RESET_WATCHES, req_id, tx_id); +} + +static void xs_priv(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *data, + unsigned int len) +{ + xs_error(s, req_id, tx_id, EACCES); +} + +static void xs_unimpl(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *data, + unsigned int len) +{ + xs_error(s, req_id, tx_id, ENOSYS); +} + +typedef void (*xs_impl)(XenXenstoreState *s, unsigned int req_id, + xs_transaction_t tx_id, uint8_t *data, + unsigned int len); + +struct xsd_req { + const char *name; + xs_impl fn; +}; +#define XSD_REQ(_type, _fn) \ + [_type] = { .name = #_type, .fn = _fn } + +struct xsd_req xsd_reqs[] = { + XSD_REQ(XS_READ, xs_read), + XSD_REQ(XS_WRITE, xs_write), + XSD_REQ(XS_MKDIR, xs_mkdir), + XSD_REQ(XS_DIRECTORY, xs_directory), + XSD_REQ(XS_DIRECTORY_PART, xs_directory_part), + XSD_REQ(XS_TRANSACTION_START, xs_transaction_start), + XSD_REQ(XS_TRANSACTION_END, xs_transaction_end), + XSD_REQ(XS_RM, xs_rm), + XSD_REQ(XS_GET_PERMS, xs_get_perms), + XSD_REQ(XS_SET_PERMS, xs_set_perms), + XSD_REQ(XS_WATCH, xs_watch), + XSD_REQ(XS_UNWATCH, xs_unwatch), + XSD_REQ(XS_CONTROL, xs_priv), + XSD_REQ(XS_INTRODUCE, xs_priv), + XSD_REQ(XS_RELEASE, xs_priv), + XSD_REQ(XS_IS_DOMAIN_INTRODUCED, xs_priv), + XSD_REQ(XS_RESUME, xs_priv), + XSD_REQ(XS_SET_TARGET, xs_priv), + XSD_REQ(XS_RESET_WATCHES, xs_reset_watches), +}; + static void process_req(XenXenstoreState *s) { struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data; - struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; - const char enosys[] = "ENOSYS"; + xs_impl handler = NULL; assert(req_pending(s)); assert(!s->rsp_pending); - rsp->type = XS_ERROR; - rsp->req_id = req->req_id; - rsp->tx_id = req->tx_id; - rsp->len = sizeof(enosys); - memcpy((void *)&rsp[1], enosys, sizeof(enosys)); + if (req->type < ARRAY_SIZE(xsd_reqs)) { + handler = xsd_reqs[req->type].fn; + } + if (!handler) { + handler = &xs_unimpl; + } + + handler(s, req->req_id, req->tx_id, (uint8_t *)&req[1], req->len); s->rsp_pending = true; reset_req(s); @@ -415,6 +1266,113 @@ static unsigned int put_rsp(XenXenstoreState *s) return copylen; } +static void deliver_watch(XenXenstoreState *s, const char *path, + const char *token) +{ + struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data; + uint8_t *rsp_data = (uint8_t *)&rsp[1]; + unsigned int len; + + assert(!s->rsp_pending); + + trace_xenstore_watch_event(path, token); + + rsp->type = XS_WATCH_EVENT; + rsp->req_id = 0; + rsp->tx_id = 0; + rsp->len = 0; + + len = strlen(path); + + /* XENSTORE_ABS/REL_PATH_MAX should ensure there can be no overflow */ + assert(rsp->len + len < XENSTORE_PAYLOAD_MAX); + + memcpy(&rsp_data[rsp->len], path, len); + rsp->len += len; + rsp_data[rsp->len] = '\0'; + rsp->len++; + + len = strlen(token); + /* + * It is possible for the guest to have chosen a token that will + * not fit (along with the patch) into a watch event. We have no + * choice but to drop the event if this is the case. + */ + if (rsp->len + len >= XENSTORE_PAYLOAD_MAX) { + return; + } + + memcpy(&rsp_data[rsp->len], token, len); + rsp->len += len; + rsp_data[rsp->len] = '\0'; + rsp->len++; + + s->rsp_pending = true; +} + +struct watch_event { + char *path; + char *token; +}; + +static void free_watch_event(struct watch_event *ev) +{ + if (ev) { + g_free(ev->path); + g_free(ev->token); + g_free(ev); + } +} + +static void queue_watch(XenXenstoreState *s, const char *path, + const char *token) +{ + struct watch_event *ev = g_new0(struct watch_event, 1); + + ev->path = g_strdup(path); + ev->token = g_strdup(token); + + s->watch_events = g_list_append(s->watch_events, ev); +} + +static void fire_watch_cb(void *opaque, const char *path, const char *token) +{ + XenXenstoreState *s = opaque; + + assert(qemu_mutex_iothread_locked()); + + /* + * If there's a response pending, we obviously can't scribble over + * it. But if there's a request pending, it has dibs on the buffer + * too. + * + * In the common case of a watch firing due to backend activity + * when the ring was otherwise idle, we should be able to copy the + * strings directly into the rsp_data and thence the actual ring, + * without needing to perform any allocations and queue them. + */ + if (s->rsp_pending || req_pending(s)) { + queue_watch(s, path, token); + } else { + deliver_watch(s, path, token); + /* + * If the message was queued because there was already ring activity, + * no need to wake the guest. But if not, we need to send the evtchn. + */ + xen_be_evtchn_notify(s->eh, s->be_port); + } +} + +static void process_watch_events(XenXenstoreState *s) +{ + struct watch_event *ev = s->watch_events->data; + + deliver_watch(s, ev->path, ev->token); + + s->watch_events = g_list_remove(s->watch_events, ev); + free_watch_event(ev); +} + static void xen_xenstore_event(void *opaque) { XenXenstoreState *s = opaque; @@ -433,6 +1391,10 @@ static void xen_xenstore_event(void *opaque) copied_to = copied_from = 0; processed = false; + if (!s->rsp_pending && s->watch_events) { + process_watch_events(s); + } + if (s->rsp_pending) { copied_to = put_rsp(s); } @@ -441,7 +1403,7 @@ static void xen_xenstore_event(void *opaque) copied_from = get_req(s); } - if (req_pending(s) && !s->rsp_pending) { + if (req_pending(s) && !s->rsp_pending && !s->watch_events) { process_req(s); processed = true; } @@ -496,5 +1458,270 @@ int xen_xenstore_reset(void) } s->be_port = err; + /* + * We don't actually access the guest's page through the grant, because + * this isn't real Xen, and we can just use the page we gave it in the + * first place. Map the grant anyway, mostly for cosmetic purposes so + * it *looks* like it's in use in the guest-visible grant table. + */ + s->gt = qemu_xen_gnttab_open(); + uint32_t xs_gntref = GNTTAB_RESERVED_XENSTORE; + s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid, &xs_gntref, + PROT_READ | PROT_WRITE); + return 0; } + +struct qemu_xs_handle { + XenstoreImplState *impl; + GList *watches; + QEMUBH *watch_bh; +}; + +struct qemu_xs_watch { + struct qemu_xs_handle *h; + char *path; + xs_watch_fn fn; + void *opaque; + GList *events; +}; + +static char *xs_be_get_domain_path(struct qemu_xs_handle *h, unsigned int domid) +{ + return g_strdup_printf("/local/domain/%u", domid); +} + +static char **xs_be_directory(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, unsigned int *num) +{ + GList *items = NULL, *l; + unsigned int i = 0; + char **items_ret; + int err; + + err = xs_impl_directory(h->impl, DOMID_QEMU, t, path, NULL, &items); + if (err) { + errno = err; + return NULL; + } + + items_ret = g_new0(char *, g_list_length(items) + 1); + *num = 0; + for (l = items; l; l = l->next) { + items_ret[i++] = l->data; + (*num)++; + } + g_list_free(items); + return items_ret; +} + +static void *xs_be_read(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, unsigned int *len) +{ + GByteArray *data = g_byte_array_new(); + bool free_segment = false; + int err; + + err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data); + if (err) { + free_segment = true; + errno = err; + } else { + if (len) { + *len = data->len; + } + /* The xen-bus-helper code expects to get NUL terminated string! */ + g_byte_array_append(data, (void *)"", 1); + } + + return g_byte_array_free(data, free_segment); +} + +static bool xs_be_write(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, const void *data, unsigned int len) +{ + GByteArray *gdata = g_byte_array_new(); + int err; + + g_byte_array_append(gdata, data, len); + err = xs_impl_write(h->impl, DOMID_QEMU, t, path, gdata); + g_byte_array_unref(gdata); + if (err) { + errno = err; + return false; + } + return true; +} + +static bool xs_be_create(struct qemu_xs_handle *h, xs_transaction_t t, + unsigned int owner, unsigned int domid, + unsigned int perms, const char *path) +{ + g_autoptr(GByteArray) data = g_byte_array_new(); + GList *perms_list = NULL; + int err; + + /* mkdir does this */ + err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data); + if (err == ENOENT) { + err = xs_impl_write(h->impl, DOMID_QEMU, t, path, data); + } + if (err) { + errno = err; + return false; + } + + perms_list = g_list_append(perms_list, + xs_perm_as_string(XS_PERM_NONE, owner)); + perms_list = g_list_append(perms_list, + xs_perm_as_string(perms, domid)); + + err = xs_impl_set_perms(h->impl, DOMID_QEMU, t, path, perms_list); + g_list_free_full(perms_list, g_free); + if (err) { + errno = err; + return false; + } + return true; +} + +static bool xs_be_destroy(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path) +{ + int err = xs_impl_rm(h->impl, DOMID_QEMU, t, path); + if (err) { + errno = err; + return false; + } + return true; +} + +static void be_watch_bh(void *_h) +{ + struct qemu_xs_handle *h = _h; + GList *l; + + for (l = h->watches; l; l = l->next) { + struct qemu_xs_watch *w = l->data; + + while (w->events) { + struct watch_event *ev = w->events->data; + + w->fn(w->opaque, ev->path); + + w->events = g_list_remove(w->events, ev); + free_watch_event(ev); + } + } +} + +static void xs_be_watch_cb(void *opaque, const char *path, const char *token) +{ + struct watch_event *ev = g_new0(struct watch_event, 1); + struct qemu_xs_watch *w = opaque; + + /* We don't care about the token */ + ev->path = g_strdup(path); + w->events = g_list_append(w->events, ev); + + qemu_bh_schedule(w->h->watch_bh); +} + +static struct qemu_xs_watch *xs_be_watch(struct qemu_xs_handle *h, + const char *path, xs_watch_fn fn, + void *opaque) +{ + struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1); + int err; + + w->h = h; + w->fn = fn; + w->opaque = opaque; + + err = xs_impl_watch(h->impl, DOMID_QEMU, path, NULL, xs_be_watch_cb, w); + if (err) { + errno = err; + g_free(w); + return NULL; + } + + w->path = g_strdup(path); + h->watches = g_list_append(h->watches, w); + return w; +} + +static void xs_be_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w) +{ + xs_impl_unwatch(h->impl, DOMID_QEMU, w->path, NULL, xs_be_watch_cb, w); + + h->watches = g_list_remove(h->watches, w); + g_list_free_full(w->events, (GDestroyNotify)free_watch_event); + g_free(w->path); + g_free(w); +} + +static xs_transaction_t xs_be_transaction_start(struct qemu_xs_handle *h) +{ + unsigned int new_tx = XBT_NULL; + int err = xs_impl_transaction_start(h->impl, DOMID_QEMU, &new_tx); + if (err) { + errno = err; + return XBT_NULL; + } + return new_tx; +} + +static bool xs_be_transaction_end(struct qemu_xs_handle *h, xs_transaction_t t, + bool abort) +{ + int err = xs_impl_transaction_end(h->impl, DOMID_QEMU, t, !abort); + if (err) { + errno = err; + return false; + } + return true; +} + +static struct qemu_xs_handle *xs_be_open(void) +{ + XenXenstoreState *s = xen_xenstore_singleton; + struct qemu_xs_handle *h; + + if (!s && !s->impl) { + errno = -ENOSYS; + return NULL; + } + + h = g_new0(struct qemu_xs_handle, 1); + h->impl = s->impl; + + h->watch_bh = aio_bh_new(qemu_get_aio_context(), be_watch_bh, h); + + return h; +} + +static void xs_be_close(struct qemu_xs_handle *h) +{ + while (h->watches) { + struct qemu_xs_watch *w = h->watches->data; + xs_be_unwatch(h, w); + } + + qemu_bh_delete(h->watch_bh); + g_free(h); +} + +static struct xenstore_backend_ops emu_xenstore_backend_ops = { + .open = xs_be_open, + .close = xs_be_close, + .get_domain_path = xs_be_get_domain_path, + .directory = xs_be_directory, + .read = xs_be_read, + .write = xs_be_write, + .create = xs_be_create, + .destroy = xs_be_destroy, + .watch = xs_be_watch, + .unwatch = xs_be_unwatch, + .transaction_start = xs_be_transaction_start, + .transaction_end = xs_be_transaction_end, +}; diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c new file mode 100644 index 0000000000..305fe75519 --- /dev/null +++ b/hw/i386/kvm/xenstore_impl.c @@ -0,0 +1,1927 @@ +/* + * QEMU Xen emulation: The actual implementation of XenStore + * + * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Authors: David Woodhouse <dwmw2@infradead.org>, Paul Durrant <paul@xen.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qom/object.h" + +#include "hw/xen/xen.h" + +#include "xen_xenstore.h" +#include "xenstore_impl.h" + +#include "hw/xen/interface/io/xs_wire.h" + +#define XS_MAX_WATCHES 128 +#define XS_MAX_DOMAIN_NODES 1000 +#define XS_MAX_NODE_SIZE 2048 +#define XS_MAX_TRANSACTIONS 10 +#define XS_MAX_PERMS_PER_NODE 5 + +#define XS_VALID_CHARS "abcdefghijklmnopqrstuvwxyz" \ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ + "0123456789-/_" + +typedef struct XsNode { + uint32_t ref; + GByteArray *content; + GList *perms; + GHashTable *children; + uint64_t gencnt; + bool deleted_in_tx; + bool modified_in_tx; + unsigned int serialized_tx; +#ifdef XS_NODE_UNIT_TEST + gchar *name; /* debug only */ +#endif +} XsNode; + +typedef struct XsWatch { + struct XsWatch *next; + xs_impl_watch_fn *cb; + void *cb_opaque; + char *token; + unsigned int dom_id; + int rel_prefix; +} XsWatch; + +typedef struct XsTransaction { + XsNode *root; + unsigned int nr_nodes; + unsigned int base_tx; + unsigned int tx_id; + unsigned int dom_id; +} XsTransaction; + +struct XenstoreImplState { + XsNode *root; + unsigned int nr_nodes; + GHashTable *watches; + unsigned int nr_domu_watches; + GHashTable *transactions; + unsigned int nr_domu_transactions; + unsigned int root_tx; + unsigned int last_tx; + bool serialized; +}; + + +static void nobble_tx(gpointer key, gpointer value, gpointer user_data) +{ + unsigned int *new_tx_id = user_data; + XsTransaction *tx = value; + + if (tx->base_tx == *new_tx_id) { + /* Transactions based on XBT_NULL will always fail */ + tx->base_tx = XBT_NULL; + } +} + +static inline unsigned int next_tx(struct XenstoreImplState *s) +{ + unsigned int tx_id; + + /* Find the next TX id which isn't either XBT_NULL or in use. */ + do { + tx_id = ++s->last_tx; + } while (tx_id == XBT_NULL || tx_id == s->root_tx || + g_hash_table_lookup(s->transactions, GINT_TO_POINTER(tx_id))); + + /* + * It is vanishingly unlikely, but ensure that no outstanding transaction + * is based on the (previous incarnation of the) newly-allocated TX id. + */ + g_hash_table_foreach(s->transactions, nobble_tx, &tx_id); + + return tx_id; +} + +static inline XsNode *xs_node_new(void) +{ + XsNode *n = g_new0(XsNode, 1); + n->ref = 1; + +#ifdef XS_NODE_UNIT_TEST + nr_xs_nodes++; + xs_node_list = g_list_prepend(xs_node_list, n); +#endif + return n; +} + +static inline XsNode *xs_node_ref(XsNode *n) +{ + /* With just 10 transactions, it can never get anywhere near this. */ + g_assert(n->ref < INT_MAX); + + g_assert(n->ref); + n->ref++; + return n; +} + +static inline void xs_node_unref(XsNode *n) +{ + if (!n) { + return; + } + g_assert(n->ref); + if (--n->ref) { + return; + } + + if (n->content) { + g_byte_array_unref(n->content); + } + if (n->perms) { + g_list_free_full(n->perms, g_free); + } + if (n->children) { + g_hash_table_unref(n->children); + } +#ifdef XS_NODE_UNIT_TEST + g_free(n->name); + nr_xs_nodes--; + xs_node_list = g_list_remove(xs_node_list, n); +#endif + g_free(n); +} + +char *xs_perm_as_string(unsigned int perm, unsigned int domid) +{ + char letter; + + switch (perm) { + case XS_PERM_READ | XS_PERM_WRITE: + letter = 'b'; + break; + case XS_PERM_READ: + letter = 'r'; + break; + case XS_PERM_WRITE: + letter = 'w'; + break; + case XS_PERM_NONE: + default: + letter = 'n'; + break; + } + + return g_strdup_printf("%c%u", letter, domid); +} + +static gpointer do_perm_copy(gconstpointer src, gpointer user_data) +{ + return g_strdup(src); +} + +static XsNode *xs_node_create(const char *name, GList *perms) +{ + XsNode *n = xs_node_new(); + +#ifdef XS_NODE_UNIT_TEST + if (name) { + n->name = g_strdup(name); + } +#endif + + n->perms = g_list_copy_deep(perms, do_perm_copy, NULL); + + return n; +} + +/* For copying from one hash table to another using g_hash_table_foreach() */ +static void do_child_insert(gpointer key, gpointer value, gpointer user_data) +{ + g_hash_table_insert(user_data, g_strdup(key), xs_node_ref(value)); +} + +static XsNode *xs_node_copy(XsNode *old) +{ + XsNode *n = xs_node_new(); + + n->gencnt = old->gencnt; + +#ifdef XS_NODE_UNIT_TEST + if (n->name) { + n->name = g_strdup(old->name); + } +#endif + + assert(old); + if (old->children) { + n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, + (GDestroyNotify)xs_node_unref); + g_hash_table_foreach(old->children, do_child_insert, n->children); + } + if (old->perms) { + n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL); + } + if (old->content) { + n->content = g_byte_array_ref(old->content); + } + return n; +} + +/* Returns true if it made a change to the hash table */ +static bool xs_node_add_child(XsNode *n, const char *path_elem, XsNode *child) +{ + assert(!strchr(path_elem, '/')); + + if (!child) { + assert(n->children); + return g_hash_table_remove(n->children, path_elem); + } + +#ifdef XS_NODE_UNIT_TEST + g_free(child->name); + child->name = g_strdup(path_elem); +#endif + if (!n->children) { + n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, + (GDestroyNotify)xs_node_unref); + } + + /* + * The documentation for g_hash_table_insert() says that it "returns a + * boolean value to indicate whether the newly added value was already + * in the hash table or not." + * + * It could perhaps be clearer that returning TRUE means it wasn't, + */ + return g_hash_table_insert(n->children, g_strdup(path_elem), child); +} + +struct walk_op { + struct XenstoreImplState *s; + char path[XENSTORE_ABS_PATH_MAX + 2]; /* Two NUL terminators */ + int (*op_fn)(XsNode **n, struct walk_op *op); + void *op_opaque; + void *op_opaque2; + + GList *watches; + unsigned int dom_id; + unsigned int tx_id; + + /* The number of nodes which will exist in the tree if this op succeeds. */ + unsigned int new_nr_nodes; + + /* + * This is maintained on the way *down* the walk to indicate + * whether nodes can be modified in place or whether COW is + * required. It starts off being true, as we're always going to + * replace the root node. If we walk into a shared subtree it + * becomes false. If we start *creating* new nodes for a write, + * it becomes true again. + * + * Do not use it on the way back up. + */ + bool inplace; + bool mutating; + bool create_dirs; + bool in_transaction; + + /* Tracking during recursion so we know which is first. */ + bool deleted_in_tx; +}; + +static void fire_watches(struct walk_op *op, bool parents) +{ + GList *l = NULL; + XsWatch *w; + + if (!op->mutating || op->in_transaction) { + return; + } + + if (parents) { + l = op->watches; + } + + w = g_hash_table_lookup(op->s->watches, op->path); + while (w || l) { + if (!w) { + /* Fire the parent nodes from 'op' if asked to */ + w = l->data; + l = l->next; + continue; + } + + assert(strlen(op->path) > w->rel_prefix); + w->cb(w->cb_opaque, op->path + w->rel_prefix, w->token); + + w = w->next; + } +} + +static int xs_node_add_content(XsNode **n, struct walk_op *op) +{ + GByteArray *data = op->op_opaque; + + if (op->dom_id) { + /* + * The real XenStored includes permissions and names of child nodes + * in the calculated datasize but life's too short. For a single + * tenant internal XenStore, we don't have to be quite as pedantic. + */ + if (data->len > XS_MAX_NODE_SIZE) { + return E2BIG; + } + } + /* We *are* the node to be written. Either this or a copy. */ + if (!op->inplace) { + XsNode *old = *n; + *n = xs_node_copy(old); + xs_node_unref(old); + } + + if ((*n)->content) { + g_byte_array_unref((*n)->content); + } + (*n)->content = g_byte_array_ref(data); + if (op->tx_id != XBT_NULL) { + (*n)->modified_in_tx = true; + } + return 0; +} + +static int xs_node_get_content(XsNode **n, struct walk_op *op) +{ + GByteArray *data = op->op_opaque; + GByteArray *node_data; + + assert(op->inplace); + assert(*n); + + node_data = (*n)->content; + if (node_data) { + g_byte_array_append(data, node_data->data, node_data->len); + } + + return 0; +} + +static int node_rm_recurse(gpointer key, gpointer value, gpointer user_data) +{ + struct walk_op *op = user_data; + int path_len = strlen(op->path); + int key_len = strlen(key); + XsNode *n = value; + bool this_inplace = op->inplace; + + if (n->ref != 1) { + op->inplace = 0; + } + + assert(key_len + path_len + 2 <= sizeof(op->path)); + op->path[path_len] = '/'; + memcpy(op->path + path_len + 1, key, key_len + 1); + + if (n->children) { + g_hash_table_foreach_remove(n->children, node_rm_recurse, op); + } + op->new_nr_nodes--; + + /* + * Fire watches on *this* node but not the parents because they are + * going to be deleted too, so the watch will fire for them anyway. + */ + fire_watches(op, false); + op->path[path_len] = '\0'; + + /* + * Actually deleting the child here is just an optimisation; if we + * don't then the final unref on the topmost victim will just have + * to cascade down again repeating all the g_hash_table_foreach() + * calls. + */ + return this_inplace; +} + +static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op); +static void copy_deleted_recurse(gpointer key, gpointer value, + gpointer user_data) +{ + struct walk_op *op = user_data; + GHashTable *siblings = op->op_opaque2; + XsNode *n = xs_node_copy_deleted(value, op); + + /* + * Reinsert the deleted_in_tx copy of the node into the parent's + * 'children' hash table. Having stashed it from op->op_opaque2 + * before the recursive call to xs_node_copy_deleted() scribbled + * over it. + */ + g_hash_table_insert(siblings, g_strdup(key), n); +} + +static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op) +{ + XsNode *n = xs_node_new(); + + n->gencnt = old->gencnt; + +#ifdef XS_NODE_UNIT_TEST + if (old->name) { + n->name = g_strdup(old->name); + } +#endif + + if (old->children) { + n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, + (GDestroyNotify)xs_node_unref); + op->op_opaque2 = n->children; + g_hash_table_foreach(old->children, copy_deleted_recurse, op); + } + if (old->perms) { + n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL); + } + n->deleted_in_tx = true; + /* If it gets resurrected we only fire a watch if it lost its content */ + if (old->content) { + n->modified_in_tx = true; + } + op->new_nr_nodes--; + return n; +} + +static int xs_node_rm(XsNode **n, struct walk_op *op) +{ + bool this_inplace = op->inplace; + + if (op->tx_id != XBT_NULL) { + /* It's not trivial to do inplace handling for this one */ + XsNode *old = *n; + *n = xs_node_copy_deleted(old, op); + xs_node_unref(old); + return 0; + } + + /* Fire watches for, and count, nodes in the subtree which get deleted */ + if ((*n)->children) { + g_hash_table_foreach_remove((*n)->children, node_rm_recurse, op); + } + op->new_nr_nodes--; + + if (this_inplace) { + xs_node_unref(*n); + } + *n = NULL; + return 0; +} + +static int xs_node_get_perms(XsNode **n, struct walk_op *op) +{ + GList **perms = op->op_opaque; + + assert(op->inplace); + assert(*n); + + *perms = g_list_copy_deep((*n)->perms, do_perm_copy, NULL); + return 0; +} + +static void parse_perm(const char *perm, char *letter, unsigned int *dom_id) +{ + unsigned int n = sscanf(perm, "%c%u", letter, dom_id); + + assert(n == 2); +} + +static bool can_access(unsigned int dom_id, GList *perms, const char *letters) +{ + unsigned int i, n; + char perm_letter; + unsigned int perm_dom_id; + bool access; + + if (dom_id == 0) { + return true; + } + + n = g_list_length(perms); + assert(n >= 1); + + /* + * The dom_id of the first perm is the owner, and the owner always has + * read-write access. + */ + parse_perm(g_list_nth_data(perms, 0), &perm_letter, &perm_dom_id); + if (dom_id == perm_dom_id) { + return true; + } + + /* + * The letter of the first perm specified the default access for all other + * domains. + */ + access = !!strchr(letters, perm_letter); + for (i = 1; i < n; i++) { + parse_perm(g_list_nth_data(perms, i), &perm_letter, &perm_dom_id); + if (dom_id != perm_dom_id) { + continue; + } + access = !!strchr(letters, perm_letter); + } + + return access; +} + +static int xs_node_set_perms(XsNode **n, struct walk_op *op) +{ + GList *perms = op->op_opaque; + + if (op->dom_id) { + unsigned int perm_dom_id; + char perm_letter; + + /* A guest may not change permissions on nodes it does not own */ + if (!can_access(op->dom_id, (*n)->perms, "")) { + return EPERM; + } + + /* A guest may not change the owner of a node it owns. */ + parse_perm(perms->data, &perm_letter, &perm_dom_id); + if (perm_dom_id != op->dom_id) { + return EPERM; + } + + if (g_list_length(perms) > XS_MAX_PERMS_PER_NODE) { + return ENOSPC; + } + } + + /* We *are* the node to be written. Either this or a copy. */ + if (!op->inplace) { + XsNode *old = *n; + *n = xs_node_copy(old); + xs_node_unref(old); + } + + if ((*n)->perms) { + g_list_free_full((*n)->perms, g_free); + } + (*n)->perms = g_list_copy_deep(perms, do_perm_copy, NULL); + if (op->tx_id != XBT_NULL) { + (*n)->modified_in_tx = true; + } + return 0; +} + +/* + * Passed a full reference in *n which it may free if it needs to COW. + * + * When changing the tree, the op->inplace flag indicates whether this + * node may be modified in place (i.e. it and all its parents had a + * refcount of one). If walking down the tree we find a node whose + * refcount is higher, we must clear op->inplace and COW from there + * down. Unless we are creating new nodes as scaffolding for a write + * (which works like 'mkdir -p' does). In which case those newly + * created nodes can (and must) be modified in place again. + */ +static int xs_node_walk(XsNode **n, struct walk_op *op) +{ + char *child_name = NULL; + size_t namelen; + XsNode *old = *n, *child = NULL; + bool stole_child = false; + bool this_inplace; + XsWatch *watch; + int err; + + namelen = strlen(op->path); + watch = g_hash_table_lookup(op->s->watches, op->path); + + /* Is there a child, or do we hit the double-NUL termination? */ + if (op->path[namelen + 1]) { + char *slash; + child_name = op->path + namelen + 1; + slash = strchr(child_name, '/'); + if (slash) { + *slash = '\0'; + } + op->path[namelen] = '/'; + } + + /* If we walk into a subtree which is shared, we must COW */ + if (op->mutating && old->ref != 1) { + op->inplace = false; + } + + if (!child_name) { + const char *letters = op->mutating ? "wb" : "rb"; + + if (!can_access(op->dom_id, old->perms, letters)) { + err = EACCES; + goto out; + } + + /* This is the actual node on which the operation shall be performed */ + err = op->op_fn(n, op); + if (!err) { + fire_watches(op, true); + } + goto out; + } + + /* op->inplace will be further modified during the recursion */ + this_inplace = op->inplace; + + if (old && old->children) { + child = g_hash_table_lookup(old->children, child_name); + /* This is a *weak* reference to 'child', owned by the hash table */ + } + + if (child) { + if (child->deleted_in_tx) { + assert(child->ref == 1); + /* Cannot actually set child->deleted_in_tx = false until later */ + } + xs_node_ref(child); + /* + * Now we own it too. But if we can modify inplace, that's going to + * foil the check and force it to COW. We want to be the *only* owner + * so that it can be modified in place, so remove it from the hash + * table in that case. We'll add it (or its replacement) back later. + */ + if (op->mutating && this_inplace) { + g_hash_table_remove(old->children, child_name); + stole_child = true; + } + } else if (op->create_dirs) { + assert(op->mutating); + + if (!can_access(op->dom_id, old->perms, "wb")) { + err = EACCES; + goto out; + } + + if (op->dom_id && op->new_nr_nodes >= XS_MAX_DOMAIN_NODES) { + err = ENOSPC; + goto out; + } + + child = xs_node_create(child_name, old->perms); + op->new_nr_nodes++; + + /* + * If we're creating a new child, we can clearly modify it (and its + * children) in place from here on down. + */ + op->inplace = true; + } else { + err = ENOENT; + goto out; + } + + /* + * If there's a watch on this node, add it to the list to be fired + * (with the correct full pathname for the modified node) at the end. + */ + if (watch) { + op->watches = g_list_append(op->watches, watch); + } + + /* + * Except for the temporary child-stealing as noted, our node has not + * changed yet. We don't yet know the overall operation will complete. + */ + err = xs_node_walk(&child, op); + + if (watch) { + op->watches = g_list_remove(op->watches, watch); + } + + if (err || !op->mutating) { + if (stole_child) { + /* Put it back as it was. */ + g_hash_table_replace(old->children, g_strdup(child_name), child); + } else { + xs_node_unref(child); + } + goto out; + } + + /* + * Now we know the operation has completed successfully and we're on + * the way back up. Make the change, substituting 'child' in the + * node at our level. + */ + if (!this_inplace) { + *n = xs_node_copy(old); + xs_node_unref(old); + } + + /* + * If we resurrected a deleted_in_tx node, we can mark it as no longer + * deleted now that we know the overall operation has succeeded. + */ + if (op->create_dirs && child && child->deleted_in_tx) { + op->new_nr_nodes++; + child->deleted_in_tx = false; + } + + /* + * The child may be NULL here, for a remove operation. Either way, + * xs_node_add_child() will do the right thing and return a value + * indicating whether it changed the parent's hash table or not. + * + * We bump the parent gencnt if it adds a child that we *didn't* + * steal from it in the first place, or if child==NULL and was + * thus removed (whether we stole it earlier and didn't put it + * back, or xs_node_add_child() actually removed it now). + */ + if ((xs_node_add_child(*n, child_name, child) && !stole_child) || !child) { + (*n)->gencnt++; + } + + out: + op->path[namelen] = '\0'; + if (!namelen) { + assert(!op->watches); + /* + * On completing the recursion back up the path walk and reaching the + * top, assign the new node count if the operation was successful. If + * the main tree was changed, bump its tx ID so that outstanding + * transactions correctly fail. But don't bump it every time; only + * if it makes a difference. + */ + if (!err && op->mutating) { + if (!op->in_transaction) { + if (op->s->root_tx != op->s->last_tx) { + op->s->root_tx = next_tx(op->s); + } + op->s->nr_nodes = op->new_nr_nodes; + } else { + XsTransaction *tx = g_hash_table_lookup(op->s->transactions, + GINT_TO_POINTER(op->tx_id)); + assert(tx); + tx->nr_nodes = op->new_nr_nodes; + } + } + } + return err; +} + +static void append_directory_item(gpointer key, gpointer value, + gpointer user_data) +{ + GList **items = user_data; + + *items = g_list_insert_sorted(*items, g_strdup(key), (GCompareFunc)strcmp); +} + +/* Populates items with char * names which caller must free. */ +static int xs_node_directory(XsNode **n, struct walk_op *op) +{ + GList **items = op->op_opaque; + + assert(op->inplace); + assert(*n); + + if ((*n)->children) { + g_hash_table_foreach((*n)->children, append_directory_item, items); + } + + if (op->op_opaque2) { + *(uint64_t *)op->op_opaque2 = (*n)->gencnt; + } + + return 0; +} + +static int validate_path(char *outpath, const char *userpath, + unsigned int dom_id) +{ + size_t i, pathlen = strlen(userpath); + + if (!pathlen || userpath[pathlen] == '/' || strstr(userpath, "//")) { + return EINVAL; + } + for (i = 0; i < pathlen; i++) { + if (!strchr(XS_VALID_CHARS, userpath[i])) { + return EINVAL; + } + } + if (userpath[0] == '/') { + if (pathlen > XENSTORE_ABS_PATH_MAX) { + return E2BIG; + } + memcpy(outpath, userpath, pathlen + 1); + } else { + if (pathlen > XENSTORE_REL_PATH_MAX) { + return E2BIG; + } + snprintf(outpath, XENSTORE_ABS_PATH_MAX, "/local/domain/%u/%s", dom_id, + userpath); + } + return 0; +} + + +static int init_walk_op(XenstoreImplState *s, struct walk_op *op, + xs_transaction_t tx_id, unsigned int dom_id, + const char *path, XsNode ***rootp) +{ + int ret = validate_path(op->path, path, dom_id); + if (ret) { + return ret; + } + + /* + * We use *two* NUL terminators at the end of the path, as during the walk + * we will temporarily turn each '/' into a NUL to allow us to use that + * path element for the lookup. + */ + op->path[strlen(op->path) + 1] = '\0'; + op->watches = NULL; + op->path[0] = '\0'; + op->inplace = true; + op->mutating = false; + op->create_dirs = false; + op->in_transaction = false; + op->dom_id = dom_id; + op->tx_id = tx_id; + op->s = s; + + if (tx_id == XBT_NULL) { + *rootp = &s->root; + op->new_nr_nodes = s->nr_nodes; + } else { + XsTransaction *tx = g_hash_table_lookup(s->transactions, + GINT_TO_POINTER(tx_id)); + if (!tx) { + return ENOENT; + } + *rootp = &tx->root; + op->new_nr_nodes = tx->nr_nodes; + op->in_transaction = true; + } + + return 0; +} + +int xs_impl_read(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GByteArray *data) +{ + /* + * The data GByteArray shall exist, and will be freed by caller. + * Just g_byte_array_append() to it. + */ + struct walk_op op; + XsNode **n; + int ret; + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_get_content; + op.op_opaque = data; + return xs_node_walk(n, &op); +} + +int xs_impl_write(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GByteArray *data) +{ + /* + * The data GByteArray shall exist, will be freed by caller. You are + * free to use g_byte_array_steal() and keep the data. Or just ref it. + */ + struct walk_op op; + XsNode **n; + int ret; + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_add_content; + op.op_opaque = data; + op.mutating = true; + op.create_dirs = true; + return xs_node_walk(n, &op); +} + +int xs_impl_directory(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, + uint64_t *gencnt, GList **items) +{ + /* + * The items are (char *) to be freed by caller. Although it's consumed + * immediately so if you want to change it to (const char *) and keep + * them, go ahead and change the caller. + */ + struct walk_op op; + XsNode **n; + int ret; + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_directory; + op.op_opaque = items; + op.op_opaque2 = gencnt; + return xs_node_walk(n, &op); +} + +int xs_impl_transaction_start(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t *tx_id) +{ + XsTransaction *tx; + + if (*tx_id != XBT_NULL) { + return EINVAL; + } + + if (dom_id && s->nr_domu_transactions >= XS_MAX_TRANSACTIONS) { + return ENOSPC; + } + + tx = g_new0(XsTransaction, 1); + + tx->nr_nodes = s->nr_nodes; + tx->tx_id = next_tx(s); + tx->base_tx = s->root_tx; + tx->root = xs_node_ref(s->root); + tx->dom_id = dom_id; + + g_hash_table_insert(s->transactions, GINT_TO_POINTER(tx->tx_id), tx); + if (dom_id) { + s->nr_domu_transactions++; + } + *tx_id = tx->tx_id; + return 0; +} + +static gboolean tx_commit_walk(gpointer key, gpointer value, + gpointer user_data) +{ + struct walk_op *op = user_data; + int path_len = strlen(op->path); + int key_len = strlen(key); + bool fire_parents = true; + XsWatch *watch; + XsNode *n = value; + + if (n->ref != 1) { + return false; + } + + if (n->deleted_in_tx) { + /* + * We fire watches on our parents if we are the *first* node + * to be deleted (the topmost one). This matches the behaviour + * when deleting in the live tree. + */ + fire_parents = !op->deleted_in_tx; + + /* Only used on the way down so no need to clear it later */ + op->deleted_in_tx = true; + } + + assert(key_len + path_len + 2 <= sizeof(op->path)); + op->path[path_len] = '/'; + memcpy(op->path + path_len + 1, key, key_len + 1); + + watch = g_hash_table_lookup(op->s->watches, op->path); + if (watch) { + op->watches = g_list_append(op->watches, watch); + } + + if (n->children) { + g_hash_table_foreach_remove(n->children, tx_commit_walk, op); + } + + if (watch) { + op->watches = g_list_remove(op->watches, watch); + } + + /* + * Don't fire watches if this node was only copied because a + * descendent was changed. The modified_in_tx flag indicates the + * ones which were really changed. + */ + if (n->modified_in_tx || n->deleted_in_tx) { + fire_watches(op, fire_parents); + n->modified_in_tx = false; + } + op->path[path_len] = '\0'; + + /* Deleted nodes really do get expunged when we commit */ + return n->deleted_in_tx; +} + +static int transaction_commit(XenstoreImplState *s, XsTransaction *tx) +{ + struct walk_op op; + XsNode **n; + + if (s->root_tx != tx->base_tx) { + return EAGAIN; + } + xs_node_unref(s->root); + s->root = tx->root; + tx->root = NULL; + s->root_tx = tx->tx_id; + s->nr_nodes = tx->nr_nodes; + + init_walk_op(s, &op, XBT_NULL, tx->dom_id, "/", &n); + op.deleted_in_tx = false; + op.mutating = true; + + /* + * Walk the new root and fire watches on any node which has a + * refcount of one (which is therefore unique to this transaction). + */ + if (s->root->children) { + g_hash_table_foreach_remove(s->root->children, tx_commit_walk, &op); + } + + return 0; +} + +int xs_impl_transaction_end(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, bool commit) +{ + int ret = 0; + XsTransaction *tx = g_hash_table_lookup(s->transactions, + GINT_TO_POINTER(tx_id)); + + if (!tx || tx->dom_id != dom_id) { + return ENOENT; + } + + if (commit) { + ret = transaction_commit(s, tx); + } + + g_hash_table_remove(s->transactions, GINT_TO_POINTER(tx_id)); + if (dom_id) { + assert(s->nr_domu_transactions); + s->nr_domu_transactions--; + } + return ret; +} + +int xs_impl_rm(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path) +{ + struct walk_op op; + XsNode **n; + int ret; + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_rm; + op.mutating = true; + return xs_node_walk(n, &op); +} + +int xs_impl_get_perms(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GList **perms) +{ + struct walk_op op; + XsNode **n; + int ret; + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_get_perms; + op.op_opaque = perms; + return xs_node_walk(n, &op); +} + +static void is_valid_perm(gpointer data, gpointer user_data) +{ + char *perm = data; + bool *valid = user_data; + char letter; + unsigned int dom_id; + + if (!*valid) { + return; + } + + if (sscanf(perm, "%c%u", &letter, &dom_id) != 2) { + *valid = false; + return; + } + + switch (letter) { + case 'n': + case 'r': + case 'w': + case 'b': + break; + + default: + *valid = false; + break; + } +} + +int xs_impl_set_perms(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GList *perms) +{ + struct walk_op op; + XsNode **n; + bool valid = true; + int ret; + + if (!g_list_length(perms)) { + return EINVAL; + } + + g_list_foreach(perms, is_valid_perm, &valid); + if (!valid) { + return EINVAL; + } + + ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); + if (ret) { + return ret; + } + op.op_fn = xs_node_set_perms; + op.op_opaque = perms; + op.mutating = true; + return xs_node_walk(n, &op); +} + +static int do_xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, + const char *path, const char *token, + xs_impl_watch_fn fn, void *opaque) + +{ + char abspath[XENSTORE_ABS_PATH_MAX + 1]; + XsWatch *w, *l; + int ret; + + ret = validate_path(abspath, path, dom_id); + if (ret) { + return ret; + } + + /* Check for duplicates */ + l = w = g_hash_table_lookup(s->watches, abspath); + while (w) { + if (!g_strcmp0(token, w->token) && opaque == w->cb_opaque && + fn == w->cb && dom_id == w->dom_id) { + return EEXIST; + } + w = w->next; + } + + if (dom_id && s->nr_domu_watches >= XS_MAX_WATCHES) { + return E2BIG; + } + + w = g_new0(XsWatch, 1); + w->token = g_strdup(token); + w->cb = fn; + w->cb_opaque = opaque; + w->dom_id = dom_id; + w->rel_prefix = strlen(abspath) - strlen(path); + + /* l was looked up above when checking for duplicates */ + if (l) { + w->next = l->next; + l->next = w; + } else { + g_hash_table_insert(s->watches, g_strdup(abspath), w); + } + if (dom_id) { + s->nr_domu_watches++; + } + + return 0; +} + +int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path, + const char *token, xs_impl_watch_fn fn, void *opaque) +{ + int ret = do_xs_impl_watch(s, dom_id, path, token, fn, opaque); + + if (!ret) { + /* A new watch should fire immediately */ + fn(opaque, path, token); + } + + return ret; +} + +static XsWatch *free_watch(XenstoreImplState *s, XsWatch *w) +{ + XsWatch *next = w->next; + + if (w->dom_id) { + assert(s->nr_domu_watches); + s->nr_domu_watches--; + } + + g_free(w->token); + g_free(w); + + return next; +} + +int xs_impl_unwatch(XenstoreImplState *s, unsigned int dom_id, + const char *path, const char *token, + xs_impl_watch_fn fn, void *opaque) +{ + char abspath[XENSTORE_ABS_PATH_MAX + 1]; + XsWatch *w, **l; + int ret; + + ret = validate_path(abspath, path, dom_id); + if (ret) { + return ret; + } + + w = g_hash_table_lookup(s->watches, abspath); + if (!w) { + return ENOENT; + } + + /* + * The hash table contains the first element of a list of + * watches. Removing the first element in the list is a + * special case because we have to update the hash table to + * point to the next (or remove it if there's nothing left). + */ + if (!g_strcmp0(token, w->token) && fn == w->cb && opaque == w->cb_opaque && + dom_id == w->dom_id) { + if (w->next) { + /* Insert the previous 'next' into the hash table */ + g_hash_table_insert(s->watches, g_strdup(abspath), w->next); + } else { + /* Nothing left; remove from hash table */ + g_hash_table_remove(s->watches, abspath); + } + free_watch(s, w); + return 0; + } + + /* + * We're all done messing with the hash table because the element + * it points to has survived the cull. Now it's just a simple + * linked list removal operation. + */ + for (l = &w->next; *l; l = &w->next) { + w = *l; + + if (!g_strcmp0(token, w->token) && fn == w->cb && + opaque != w->cb_opaque && dom_id == w->dom_id) { + *l = free_watch(s, w); + return 0; + } + } + + return ENOENT; +} + +int xs_impl_reset_watches(XenstoreImplState *s, unsigned int dom_id) +{ + char **watch_paths; + guint nr_watch_paths; + guint i; + + watch_paths = (char **)g_hash_table_get_keys_as_array(s->watches, + &nr_watch_paths); + + for (i = 0; i < nr_watch_paths; i++) { + XsWatch *w1 = g_hash_table_lookup(s->watches, watch_paths[i]); + XsWatch *w2, *w, **l; + + /* + * w1 is the original list. The hash table has this pointer. + * w2 is the head of our newly-filtered list. + * w and l are temporary for processing. w is somewhat redundant + * with *l but makes my eyes bleed less. + */ + + w = w2 = w1; + l = &w; + while (w) { + if (w->dom_id == dom_id) { + /* If we're freeing the head of the list, bump w2 */ + if (w2 == w) { + w2 = w->next; + } + *l = free_watch(s, w); + } else { + l = &w->next; + } + w = *l; + } + /* + * If the head of the list survived the cull, we don't need to + * touch the hash table and we're done with this path. Else... + */ + if (w1 != w2) { + g_hash_table_steal(s->watches, watch_paths[i]); + + /* + * It was already freed. (Don't worry, this whole thing is + * single-threaded and nobody saw it in the meantime). And + * having *stolen* it, we now own the watch_paths[i] string + * so if we don't give it back to the hash table, we need + * to free it. + */ + if (w2) { + g_hash_table_insert(s->watches, watch_paths[i], w2); + } else { + g_free(watch_paths[i]); + } + } + } + g_free(watch_paths); + return 0; +} + +static void xs_tx_free(void *_tx) +{ + XsTransaction *tx = _tx; + if (tx->root) { + xs_node_unref(tx->root); + } + g_free(tx); +} + +XenstoreImplState *xs_impl_create(unsigned int dom_id) +{ + XenstoreImplState *s = g_new0(XenstoreImplState, 1); + GList *perms; + + s->watches = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); + s->transactions = g_hash_table_new_full(g_direct_hash, g_direct_equal, + NULL, xs_tx_free); + + perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, 0)); + s->root = xs_node_create("/", perms); + g_list_free_full(perms, g_free); + s->nr_nodes = 1; + + s->root_tx = s->last_tx = 1; + return s; +} + + +static void clear_serialized_tx(gpointer key, gpointer value, gpointer opaque) +{ + XsNode *n = value; + + n->serialized_tx = XBT_NULL; + if (n->children) { + g_hash_table_foreach(n->children, clear_serialized_tx, NULL); + } +} + +static void clear_tx_serialized_tx(gpointer key, gpointer value, + gpointer opaque) +{ + XsTransaction *t = value; + + clear_serialized_tx(NULL, t->root, NULL); +} + +static void write_be32(GByteArray *save, uint32_t val) +{ + uint32_t be = htonl(val); + g_byte_array_append(save, (void *)&be, sizeof(be)); +} + + +struct save_state { + GByteArray *bytes; + unsigned int tx_id; +}; + +#define MODIFIED_IN_TX (1U << 0) +#define DELETED_IN_TX (1U << 1) +#define NODE_REF (1U << 2) + +static void save_node(gpointer key, gpointer value, gpointer opaque) +{ + struct save_state *ss = opaque; + XsNode *n = value; + char *name = key; + uint8_t flag = 0; + + /* Child nodes (i.e. anything but the root) have a name */ + if (name) { + g_byte_array_append(ss->bytes, key, strlen(key) + 1); + } + + /* + * If we already wrote this node, refer to the previous copy. + * There's no rename/move in XenStore, so all we need to find + * it is the tx_id of the transation in which it exists. Which + * may be the root tx. + */ + if (n->serialized_tx != XBT_NULL) { + flag = NODE_REF; + g_byte_array_append(ss->bytes, &flag, 1); + write_be32(ss->bytes, n->serialized_tx); + } else { + GList *l; + n->serialized_tx = ss->tx_id; + + if (n->modified_in_tx) { + flag |= MODIFIED_IN_TX; + } + if (n->deleted_in_tx) { + flag |= DELETED_IN_TX; + } + g_byte_array_append(ss->bytes, &flag, 1); + + if (n->content) { + write_be32(ss->bytes, n->content->len); + g_byte_array_append(ss->bytes, n->content->data, n->content->len); + } else { + write_be32(ss->bytes, 0); + } + + for (l = n->perms; l; l = l->next) { + g_byte_array_append(ss->bytes, l->data, strlen(l->data) + 1); + } + /* NUL termination after perms */ + g_byte_array_append(ss->bytes, (void *)"", 1); + + if (n->children) { + g_hash_table_foreach(n->children, save_node, ss); + } + /* NUL termination after children (child name is NUL) */ + g_byte_array_append(ss->bytes, (void *)"", 1); + } +} + +static void save_tree(struct save_state *ss, uint32_t tx_id, XsNode *root) +{ + write_be32(ss->bytes, tx_id); + ss->tx_id = tx_id; + save_node(NULL, root, ss); +} + +static void save_tx(gpointer key, gpointer value, gpointer opaque) +{ + uint32_t tx_id = GPOINTER_TO_INT(key); + struct save_state *ss = opaque; + XsTransaction *n = value; + + write_be32(ss->bytes, n->base_tx); + write_be32(ss->bytes, n->dom_id); + + save_tree(ss, tx_id, n->root); +} + +static void save_watch(gpointer key, gpointer value, gpointer opaque) +{ + struct save_state *ss = opaque; + XsWatch *w = value; + + /* We only save the *guest* watches. */ + if (w->dom_id) { + gpointer relpath = key + w->rel_prefix; + g_byte_array_append(ss->bytes, relpath, strlen(relpath) + 1); + g_byte_array_append(ss->bytes, (void *)w->token, strlen(w->token) + 1); + } +} + +GByteArray *xs_impl_serialize(XenstoreImplState *s) +{ + struct save_state ss; + + ss.bytes = g_byte_array_new(); + + /* + * node = flags [ real_node / node_ref ] + * flags = uint8_t (MODIFIED_IN_TX | DELETED_IN_TX | NODE_REF) + * node_ref = tx_id (in which the original version of this node exists) + * real_node = content perms child* NUL + * content = len data + * len = uint32_t + * data = uint8_t{len} + * perms = perm* NUL + * perm = asciiz + * child = name node + * name = asciiz + * + * tree = tx_id node + * tx_id = uint32_t + * + * transaction = base_tx_id dom_id tree + * base_tx_id = uint32_t + * dom_id = uint32_t + * + * tx_list = tree transaction* XBT_NULL + * + * watch = path token + * path = asciiz + * token = asciiz + * + * watch_list = watch* NUL + * + * xs_serialize_stream = last_tx tx_list watch_list + * last_tx = uint32_t + */ + + /* Clear serialized_tx in every node. */ + if (s->serialized) { + clear_serialized_tx(NULL, s->root, NULL); + g_hash_table_foreach(s->transactions, clear_tx_serialized_tx, NULL); + } + + s->serialized = true; + + write_be32(ss.bytes, s->last_tx); + save_tree(&ss, s->root_tx, s->root); + g_hash_table_foreach(s->transactions, save_tx, &ss); + + write_be32(ss.bytes, XBT_NULL); + + g_hash_table_foreach(s->watches, save_watch, &ss); + g_byte_array_append(ss.bytes, (void *)"", 1); + + return ss.bytes; +} + +struct unsave_state { + char path[XENSTORE_ABS_PATH_MAX + 1]; + XenstoreImplState *s; + GByteArray *bytes; + uint8_t *d; + size_t l; + bool root_walk; +}; + +static int consume_be32(struct unsave_state *us, unsigned int *val) +{ + uint32_t d; + + if (us->l < sizeof(d)) { + return -EINVAL; + } + memcpy(&d, us->d, sizeof(d)); + *val = ntohl(d); + us->d += sizeof(d); + us->l -= sizeof(d); + return 0; +} + +static int consume_string(struct unsave_state *us, char **str, size_t *len) +{ + size_t l; + + if (!us->l) { + return -EINVAL; + } + + l = strnlen((void *)us->d, us->l); + if (l == us->l) { + return -EINVAL; + } + + if (str) { + *str = (void *)us->d; + } + if (len) { + *len = l; + } + + us->d += l + 1; + us->l -= l + 1; + return 0; +} + +static XsNode *lookup_node(XsNode *n, char *path) +{ + char *slash = strchr(path, '/'); + XsNode *child; + + if (path[0] == '\0') { + return n; + } + + if (slash) { + *slash = '\0'; + } + + if (!n->children) { + return NULL; + } + child = g_hash_table_lookup(n->children, path); + if (!slash) { + return child; + } + + *slash = '/'; + if (!child) { + return NULL; + } + return lookup_node(child, slash + 1); +} + +static XsNode *lookup_tx_node(struct unsave_state *us, unsigned int tx_id) +{ + XsTransaction *t; + if (tx_id == us->s->root_tx) { + return lookup_node(us->s->root, us->path + 1); + } + + t = g_hash_table_lookup(us->s->transactions, GINT_TO_POINTER(tx_id)); + if (!t) { + return NULL; + } + g_assert(t->root); + return lookup_node(t->root, us->path + 1); +} + +static void count_child_nodes(gpointer key, gpointer value, gpointer user_data) +{ + unsigned int *nr_nodes = user_data; + XsNode *n = value; + + (*nr_nodes)++; + + if (n->children) { + g_hash_table_foreach(n->children, count_child_nodes, nr_nodes); + } +} + +static int consume_node(struct unsave_state *us, XsNode **nodep, + unsigned int *nr_nodes) +{ + XsNode *n = NULL; + uint8_t flags; + int ret; + + if (us->l < 1) { + return -EINVAL; + } + flags = us->d[0]; + us->d++; + us->l--; + + if (flags == NODE_REF) { + unsigned int tx; + + ret = consume_be32(us, &tx); + if (ret) { + return ret; + } + + n = lookup_tx_node(us, tx); + if (!n) { + return -EINVAL; + } + n->ref++; + if (n->children) { + g_hash_table_foreach(n->children, count_child_nodes, nr_nodes); + } + } else { + uint32_t datalen; + + if (flags & ~(DELETED_IN_TX | MODIFIED_IN_TX)) { + return -EINVAL; + } + n = xs_node_new(); + + if (flags & DELETED_IN_TX) { + n->deleted_in_tx = true; + } + if (flags & MODIFIED_IN_TX) { + n->modified_in_tx = true; + } + ret = consume_be32(us, &datalen); + if (ret) { + xs_node_unref(n); + return -EINVAL; + } + if (datalen) { + if (datalen > us->l) { + xs_node_unref(n); + return -EINVAL; + } + + GByteArray *node_data = g_byte_array_new(); + g_byte_array_append(node_data, us->d, datalen); + us->d += datalen; + us->l -= datalen; + n->content = node_data; + + if (us->root_walk) { + n->modified_in_tx = true; + } + } + while (1) { + char *perm = NULL; + size_t permlen = 0; + + ret = consume_string(us, &perm, &permlen); + if (ret) { + xs_node_unref(n); + return ret; + } + + if (!permlen) { + break; + } + + n->perms = g_list_append(n->perms, g_strdup(perm)); + } + + /* Now children */ + while (1) { + size_t childlen; + char *childname; + char *pathend; + XsNode *child = NULL; + + ret = consume_string(us, &childname, &childlen); + if (ret) { + xs_node_unref(n); + return ret; + } + + if (!childlen) { + break; + } + + pathend = us->path + strlen(us->path); + strncat(us->path, "/", sizeof(us->path) - 1); + strncat(us->path, childname, sizeof(us->path) - 1); + + ret = consume_node(us, &child, nr_nodes); + *pathend = '\0'; + if (ret) { + xs_node_unref(n); + return ret; + } + g_assert(child); + xs_node_add_child(n, childname, child); + } + + /* + * If the node has no data and no children we still want to fire + * a watch on it. + */ + if (us->root_walk && !n->children) { + n->modified_in_tx = true; + } + } + + if (!n->deleted_in_tx) { + (*nr_nodes)++; + } + + *nodep = n; + return 0; +} + +static int consume_tree(struct unsave_state *us, XsTransaction *t) +{ + int ret; + + ret = consume_be32(us, &t->tx_id); + if (ret) { + return ret; + } + + if (t->tx_id > us->s->last_tx) { + return -EINVAL; + } + + us->path[0] = '\0'; + + return consume_node(us, &t->root, &t->nr_nodes); +} + +int xs_impl_deserialize(XenstoreImplState *s, GByteArray *bytes, + unsigned int dom_id, xs_impl_watch_fn watch_fn, + void *watch_opaque) +{ + struct unsave_state us; + XsTransaction base_t = { 0 }; + int ret; + + us.s = s; + us.bytes = bytes; + us.d = bytes->data; + us.l = bytes->len; + + xs_impl_reset_watches(s, dom_id); + g_hash_table_remove_all(s->transactions); + + xs_node_unref(s->root); + s->root = NULL; + s->root_tx = s->last_tx = XBT_NULL; + + ret = consume_be32(&us, &s->last_tx); + if (ret) { + return ret; + } + + /* + * Consume the base tree into a transaction so that watches can be + * fired as we commit it. By setting us.root_walk we cause the nodes + * to be marked as 'modified_in_tx' as they are created, so that the + * watches are triggered on them. + */ + base_t.dom_id = dom_id; + base_t.base_tx = XBT_NULL; + us.root_walk = true; + ret = consume_tree(&us, &base_t); + if (ret) { + return ret; + } + us.root_walk = false; + + /* + * Commit the transaction now while the refcount on all nodes is 1. + * Note that we haven't yet reinstated the *guest* watches but that's + * OK because we don't want the guest to see any changes. Even any + * backend nodes which get recreated should be *precisely* as they + * were before the migration. Back ends may have been instantiated + * already, and will see the frontend magically blink into existence + * now (well, from the aio_bh which fires the watches). It's their + * responsibility to rebuild everything precisely as it was before. + */ + ret = transaction_commit(s, &base_t); + if (ret) { + return ret; + } + + while (1) { + unsigned int base_tx; + XsTransaction *t; + + ret = consume_be32(&us, &base_tx); + if (ret) { + return ret; + } + if (base_tx == XBT_NULL) { + break; + } + + t = g_new0(XsTransaction, 1); + t->base_tx = base_tx; + + ret = consume_be32(&us, &t->dom_id); + if (!ret) { + ret = consume_tree(&us, t); + } + if (ret) { + g_free(t); + return ret; + } + g_assert(t->root); + if (t->dom_id) { + s->nr_domu_transactions++; + } + g_hash_table_insert(s->transactions, GINT_TO_POINTER(t->tx_id), t); + } + + while (1) { + char *path, *token; + size_t pathlen, toklen; + + ret = consume_string(&us, &path, &pathlen); + if (ret) { + return ret; + } + if (!pathlen) { + break; + } + + ret = consume_string(&us, &token, &toklen); + if (ret) { + return ret; + } + + if (!watch_fn) { + continue; + } + + ret = do_xs_impl_watch(s, dom_id, path, token, watch_fn, watch_opaque); + if (ret) { + return ret; + } + } + + if (us.l) { + return -EINVAL; + } + + return 0; +} diff --git a/hw/i386/kvm/xenstore_impl.h b/hw/i386/kvm/xenstore_impl.h new file mode 100644 index 0000000000..0df2a91aae --- /dev/null +++ b/hw/i386/kvm/xenstore_impl.h @@ -0,0 +1,63 @@ +/* + * QEMU Xen emulation: The actual implementation of XenStore + * + * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Authors: David Woodhouse <dwmw2@infradead.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_XENSTORE_IMPL_H +#define QEMU_XENSTORE_IMPL_H + +#include "hw/xen/xen_backend_ops.h" + +typedef struct XenstoreImplState XenstoreImplState; + +XenstoreImplState *xs_impl_create(unsigned int dom_id); + +char *xs_perm_as_string(unsigned int perm, unsigned int domid); + +/* + * These functions return *positive* error numbers. This is a little + * unconventional but it helps to keep us honest because there is + * also a very limited set of error numbers that they are permitted + * to return (those in xsd_errors). + */ + +int xs_impl_read(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GByteArray *data); +int xs_impl_write(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GByteArray *data); +int xs_impl_directory(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, + uint64_t *gencnt, GList **items); +int xs_impl_transaction_start(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t *tx_id); +int xs_impl_transaction_end(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, bool commit); +int xs_impl_rm(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path); +int xs_impl_get_perms(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GList **perms); +int xs_impl_set_perms(XenstoreImplState *s, unsigned int dom_id, + xs_transaction_t tx_id, const char *path, GList *perms); + +/* This differs from xs_watch_fn because it has the token */ +typedef void(xs_impl_watch_fn)(void *opaque, const char *path, + const char *token); +int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path, + const char *token, xs_impl_watch_fn fn, void *opaque); +int xs_impl_unwatch(XenstoreImplState *s, unsigned int dom_id, + const char *path, const char *token, xs_impl_watch_fn fn, + void *opaque); +int xs_impl_reset_watches(XenstoreImplState *s, unsigned int dom_id); + +GByteArray *xs_impl_serialize(XenstoreImplState *s); +int xs_impl_deserialize(XenstoreImplState *s, GByteArray *bytes, + unsigned int dom_id, xs_impl_watch_fn watch_fn, + void *watch_opaque); + +#endif /* QEMU_XENSTORE_IMPL_H */ diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 7bebea57e3..1489abf010 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -102,6 +102,11 @@ #include "trace.h" #include CONFIG_DEVICES +#ifdef CONFIG_XEN_EMU +#include "hw/xen/xen-legacy-backend.h" +#include "hw/xen/xen-bus.h" +#endif + /* * Helper for setting model-id for CPU models that changed model-id * depending on QEMU versions up to QEMU 2.4. @@ -1318,6 +1323,8 @@ void pc_basic_device_init(struct PCMachineState *pcms, if (pcms->bus) { pci_create_simple(pcms->bus, -1, "xen-platform"); } + xen_bus_init(); + xen_be_init(); } #endif diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 4bf15f9c1f..30eedd62a3 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -47,8 +47,6 @@ #include "hw/kvm/clock.h" #include "hw/sysbus.h" #include "hw/i2c/smbus_eeprom.h" -#include "hw/xen/xen-x86.h" -#include "hw/xen/xen.h" #include "exec/memory.h" #include "hw/acpi/acpi.h" #include "hw/acpi/piix4.h" @@ -60,6 +58,8 @@ #include <xen/hvm/hvm_info_table.h> #include "hw/xen/xen_pt.h" #endif +#include "hw/xen/xen-x86.h" +#include "hw/xen/xen.h" #include "migration/global_state.h" #include "migration/misc.h" #include "sysemu/numa.h" diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index e5a1dd19f4..56641a550e 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -18,7 +18,7 @@ #include "hw/irq.h" #include "hw/hw.h" #include "hw/i386/apic-msidef.h" -#include "hw/xen/xen_common.h" +#include "hw/xen/xen_native.h" #include "hw/xen/xen-legacy-backend.h" #include "hw/xen/xen-bus.h" #include "hw/xen/xen-x86.h" @@ -52,10 +52,11 @@ static bool xen_in_migration; /* Compatibility with older version */ -/* This allows QEMU to build on a system that has Xen 4.5 or earlier - * installed. This here (not in hw/xen/xen_common.h) because xen/hvm/ioreq.h - * needs to be included before this block and hw/xen/xen_common.h needs to - * be included before xen/hvm/ioreq.h +/* + * This allows QEMU to build on a system that has Xen 4.5 or earlier installed. + * This is here (not in hw/xen/xen_native.h) because xen/hvm/ioreq.h needs to + * be included before this block and hw/xen/xen_native.h needs to be included + * before xen/hvm/ioreq.h */ #ifndef IOREQ_TYPE_VMWARE_PORT #define IOREQ_TYPE_VMWARE_PORT 3 @@ -761,7 +762,7 @@ static ioreq_t *cpu_get_ioreq(XenIOState *state) int i; evtchn_port_t port; - port = xenevtchn_pending(state->xce_handle); + port = qemu_xen_evtchn_pending(state->xce_handle); if (port == state->bufioreq_local_port) { timer_mod(state->buffered_io_timer, BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); @@ -780,7 +781,7 @@ static ioreq_t *cpu_get_ioreq(XenIOState *state) } /* unmask the wanted port again */ - xenevtchn_unmask(state->xce_handle, port); + qemu_xen_evtchn_unmask(state->xce_handle, port); /* get the io packet from shared memory */ state->send_vcpu = i; @@ -1147,7 +1148,7 @@ static void handle_buffered_io(void *opaque) BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); } else { timer_del(state->buffered_io_timer); - xenevtchn_unmask(state->xce_handle, state->bufioreq_local_port); + qemu_xen_evtchn_unmask(state->xce_handle, state->bufioreq_local_port); } } @@ -1196,8 +1197,8 @@ static void cpu_handle_ioreq(void *opaque) } req->state = STATE_IORESP_READY; - xenevtchn_notify(state->xce_handle, - state->ioreq_local_port[state->send_vcpu]); + qemu_xen_evtchn_notify(state->xce_handle, + state->ioreq_local_port[state->send_vcpu]); } } @@ -1206,7 +1207,7 @@ static void xen_main_loop_prepare(XenIOState *state) int evtchn_fd = -1; if (state->xce_handle != NULL) { - evtchn_fd = xenevtchn_fd(state->xce_handle); + evtchn_fd = qemu_xen_evtchn_fd(state->xce_handle); } state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io, @@ -1249,7 +1250,7 @@ static void xen_exit_notifier(Notifier *n, void *data) xenforeignmemory_unmap_resource(xen_fmem, state->fres); } - xenevtchn_close(state->xce_handle); + qemu_xen_evtchn_close(state->xce_handle); xs_daemon_close(state->xenstore); } @@ -1397,9 +1398,11 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) xen_pfn_t ioreq_pfn; XenIOState *state; + setup_xen_backend_ops(); + state = g_new0(XenIOState, 1); - state->xce_handle = xenevtchn_open(NULL, 0); + state->xce_handle = qemu_xen_evtchn_open(); if (state->xce_handle == NULL) { perror("xen: event channel open"); goto err; @@ -1463,8 +1466,9 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) /* FIXME: how about if we overflow the page here? */ for (i = 0; i < max_cpus; i++) { - rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid, - xen_vcpu_eport(state->shared_page, i)); + rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid, + xen_vcpu_eport(state->shared_page, + i)); if (rc == -1) { error_report("shared evtchn %d bind error %d", i, errno); goto err; @@ -1472,8 +1476,8 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory) state->ioreq_local_port[i] = rc; } - rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid, - state->bufioreq_remote_port); + rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid, + state->bufioreq_remote_port); if (rc == -1) { error_report("buffered evtchn bind error %d", errno); goto err; diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c index 1d0879d234..f7d974677d 100644 --- a/hw/i386/xen/xen-mapcache.c +++ b/hw/i386/xen/xen-mapcache.c @@ -14,7 +14,7 @@ #include <sys/resource.h> -#include "hw/xen/xen-legacy-backend.h" +#include "hw/xen/xen_native.h" #include "qemu/bitmap.h" #include "sysemu/runstate.h" diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c index 539f7da374..57f1d742c1 100644 --- a/hw/i386/xen/xen_platform.c +++ b/hw/i386/xen/xen_platform.c @@ -28,7 +28,6 @@ #include "hw/ide/pci.h" #include "hw/pci/pci.h" #include "migration/vmstate.h" -#include "hw/xen/xen.h" #include "net/net.h" #include "trace.h" #include "sysemu/xen.h" @@ -38,10 +37,12 @@ #include "qom/object.h" #ifdef CONFIG_XEN -#include "hw/xen/xen_common.h" -#include "hw/xen/xen-legacy-backend.h" +#include "hw/xen/xen_native.h" #endif +/* The rule is that xen_native.h must come first */ +#include "hw/xen/xen.h" + //#define DEBUG_PLATFORM #ifdef DEBUG_PLATFORM diff --git a/hw/intc/i8259.c b/hw/intc/i8259.c index 17910f3bcb..bbae2d87f4 100644 --- a/hw/intc/i8259.c +++ b/hw/intc/i8259.c @@ -133,7 +133,7 @@ static void pic_set_irq(void *opaque, int irq, int level) } #endif - if (s->elcr & mask) { + if (s->ltim || (s->elcr & mask)) { /* level triggered */ if (level) { s->irr |= mask; @@ -167,7 +167,7 @@ static void pic_intack(PICCommonState *s, int irq) s->isr |= (1 << irq); } /* We don't clear a level sensitive interrupt here */ - if (!(s->elcr & (1 << irq))) { + if (!s->ltim && !(s->elcr & (1 << irq))) { s->irr &= ~(1 << irq); } pic_update_irq(s); @@ -224,6 +224,7 @@ static void pic_reset(DeviceState *dev) PICCommonState *s = PIC_COMMON(dev); s->elcr = 0; + s->ltim = 0; pic_init_reset(s); } @@ -243,10 +244,7 @@ static void pic_ioport_write(void *opaque, hwaddr addr64, s->init_state = 1; s->init4 = val & 1; s->single_mode = val & 2; - if (val & 0x08) { - qemu_log_mask(LOG_UNIMP, - "i8259: level sensitive irq not supported\n"); - } + s->ltim = val & 8; } else if (val & 0x08) { if (val & 0x04) { s->poll = 1; diff --git a/hw/intc/i8259_common.c b/hw/intc/i8259_common.c index af2e4a2241..c931dc2d07 100644 --- a/hw/intc/i8259_common.c +++ b/hw/intc/i8259_common.c @@ -51,7 +51,7 @@ void pic_reset_common(PICCommonState *s) s->special_fully_nested_mode = 0; s->init4 = 0; s->single_mode = 0; - /* Note: ELCR is not reset */ + /* Note: ELCR and LTIM are not reset */ } static int pic_dispatch_pre_save(void *opaque) @@ -144,6 +144,24 @@ static void pic_print_info(InterruptStatsProvider *obj, Monitor *mon) s->special_fully_nested_mode); } +static bool ltim_state_needed(void *opaque) +{ + PICCommonState *s = PIC_COMMON(opaque); + + return !!s->ltim; +} + +static const VMStateDescription vmstate_pic_ltim = { + .name = "i8259/ltim", + .version_id = 1, + .minimum_version_id = 1, + .needed = ltim_state_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT8(ltim, PICCommonState), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_pic_common = { .name = "i8259", .version_id = 1, @@ -168,6 +186,10 @@ static const VMStateDescription vmstate_pic_common = { VMSTATE_UINT8(single_mode, PICCommonState), VMSTATE_UINT8(elcr, PICCommonState), VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_pic_ltim, + NULL } }; diff --git a/hw/intc/mips_gic.c b/hw/intc/mips_gic.c index bda4549925..4bdc3b1bd1 100644 --- a/hw/intc/mips_gic.c +++ b/hw/intc/mips_gic.c @@ -439,8 +439,8 @@ static void mips_gic_realize(DeviceState *dev, Error **errp) } static Property mips_gic_properties[] = { - DEFINE_PROP_INT32("num-vp", MIPSGICState, num_vps, 1), - DEFINE_PROP_INT32("num-irq", MIPSGICState, num_irq, 256), + DEFINE_PROP_UINT32("num-vp", MIPSGICState, num_vps, 1), + DEFINE_PROP_UINT32("num-irq", MIPSGICState, num_irq, 256), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c index eee04643cb..b466a6abaf 100644 --- a/hw/intc/riscv_aclint.c +++ b/hw/intc/riscv_aclint.c @@ -130,7 +130,7 @@ static uint64_t riscv_aclint_mtimer_read(void *opaque, hwaddr addr, addr < (mtimer->timecmp_base + (mtimer->num_harts << 3))) { size_t hartid = mtimer->hartid_base + ((addr - mtimer->timecmp_base) >> 3); - CPUState *cpu = qemu_get_cpu(hartid); + CPUState *cpu = cpu_by_arch_id(hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { qemu_log_mask(LOG_GUEST_ERROR, @@ -173,7 +173,7 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, addr < (mtimer->timecmp_base + (mtimer->num_harts << 3))) { size_t hartid = mtimer->hartid_base + ((addr - mtimer->timecmp_base) >> 3); - CPUState *cpu = qemu_get_cpu(hartid); + CPUState *cpu = cpu_by_arch_id(hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { qemu_log_mask(LOG_GUEST_ERROR, @@ -231,7 +231,7 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, /* Check if timer interrupt is triggered for each hart. */ for (i = 0; i < mtimer->num_harts; i++) { - CPUState *cpu = qemu_get_cpu(mtimer->hartid_base + i); + CPUState *cpu = cpu_by_arch_id(mtimer->hartid_base + i); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { continue; @@ -292,7 +292,7 @@ static void riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp) s->timecmp = g_new0(uint64_t, s->num_harts); /* Claim timer interrupt bits */ for (i = 0; i < s->num_harts; i++) { - RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(s->hartid_base + i)); + RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base + i)); if (riscv_cpu_claim_interrupts(cpu, MIP_MTIP) < 0) { error_report("MTIP already claimed"); exit(1); @@ -372,7 +372,7 @@ DeviceState *riscv_aclint_mtimer_create(hwaddr addr, hwaddr size, sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); for (i = 0; i < num_harts; i++) { - CPUState *cpu = qemu_get_cpu(hartid_base + i); + CPUState *cpu = cpu_by_arch_id(hartid_base + i); RISCVCPU *rvcpu = RISCV_CPU(cpu); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; riscv_aclint_mtimer_callback *cb = @@ -407,7 +407,7 @@ static uint64_t riscv_aclint_swi_read(void *opaque, hwaddr addr, if (addr < (swi->num_harts << 2)) { size_t hartid = swi->hartid_base + (addr >> 2); - CPUState *cpu = qemu_get_cpu(hartid); + CPUState *cpu = cpu_by_arch_id(hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { qemu_log_mask(LOG_GUEST_ERROR, @@ -430,7 +430,7 @@ static void riscv_aclint_swi_write(void *opaque, hwaddr addr, uint64_t value, if (addr < (swi->num_harts << 2)) { size_t hartid = swi->hartid_base + (addr >> 2); - CPUState *cpu = qemu_get_cpu(hartid); + CPUState *cpu = cpu_by_arch_id(hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { qemu_log_mask(LOG_GUEST_ERROR, @@ -545,7 +545,7 @@ DeviceState *riscv_aclint_swi_create(hwaddr addr, uint32_t hartid_base, sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); for (i = 0; i < num_harts; i++) { - CPUState *cpu = qemu_get_cpu(hartid_base + i); + CPUState *cpu = cpu_by_arch_id(hartid_base + i); RISCVCPU *rvcpu = RISCV_CPU(cpu); qdev_connect_gpio_out(dev, i, diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c index cfd007e629..cd7efc4ad4 100644 --- a/hw/intc/riscv_aplic.c +++ b/hw/intc/riscv_aplic.c @@ -833,7 +833,7 @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp) /* Claim the CPU interrupt to be triggered by this APLIC */ for (i = 0; i < aplic->num_harts; i++) { - RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(aplic->hartid_base + i)); + RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(aplic->hartid_base + i)); if (riscv_cpu_claim_interrupts(cpu, (aplic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) { error_report("%s already claimed", @@ -966,7 +966,7 @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size, if (!msimode) { for (i = 0; i < num_harts; i++) { - CPUState *cpu = qemu_get_cpu(hartid_base + i); + CPUState *cpu = cpu_by_arch_id(hartid_base + i); qdev_connect_gpio_out_named(dev, NULL, i, qdev_get_gpio_in(DEVICE(cpu), diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c index 4d4d5b50ca..fea3385b51 100644 --- a/hw/intc/riscv_imsic.c +++ b/hw/intc/riscv_imsic.c @@ -316,8 +316,8 @@ static const MemoryRegionOps riscv_imsic_ops = { static void riscv_imsic_realize(DeviceState *dev, Error **errp) { RISCVIMSICState *imsic = RISCV_IMSIC(dev); - RISCVCPU *rcpu = RISCV_CPU(qemu_get_cpu(imsic->hartid)); - CPUState *cpu = qemu_get_cpu(imsic->hartid); + RISCVCPU *rcpu = RISCV_CPU(cpu_by_arch_id(imsic->hartid)); + CPUState *cpu = cpu_by_arch_id(imsic->hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; imsic->num_eistate = imsic->num_pages * imsic->num_irqs; @@ -413,7 +413,7 @@ DeviceState *riscv_imsic_create(hwaddr addr, uint32_t hartid, bool mmode, uint32_t num_pages, uint32_t num_ids) { DeviceState *dev = qdev_new(TYPE_RISCV_IMSIC); - CPUState *cpu = qemu_get_cpu(hartid); + CPUState *cpu = cpu_by_arch_id(hartid); uint32_t i; assert(!(addr & (IMSIC_MMIO_PAGE_SZ - 1))); diff --git a/hw/isa/i82378.c b/hw/isa/i82378.c index 233059c6dc..5432ab5065 100644 --- a/hw/isa/i82378.c +++ b/hw/isa/i82378.c @@ -47,6 +47,12 @@ static const VMStateDescription vmstate_i82378 = { }, }; +static void i82378_request_out0_irq(void *opaque, int irq, int level) +{ + I82378State *s = opaque; + qemu_set_irq(s->cpu_intr, level); +} + static void i82378_request_pic_irq(void *opaque, int irq, int level) { DeviceState *dev = opaque; @@ -88,7 +94,9 @@ static void i82378_realize(PCIDevice *pci, Error **errp) */ /* 2 82C59 (irq) */ - s->isa_irqs_in = i8259_init(isabus, s->cpu_intr); + s->isa_irqs_in = i8259_init(isabus, + qemu_allocate_irq(i82378_request_out0_irq, + s, 0)); isa_bus_register_input_irqs(isabus, s->isa_irqs_in); /* 1 82C54 (pit) */ diff --git a/hw/isa/trace-events b/hw/isa/trace-events index c4567a9b47..1816e8307a 100644 --- a/hw/isa/trace-events +++ b/hw/isa/trace-events @@ -16,6 +16,7 @@ apm_io_write(uint8_t addr, uint8_t val) "write addr=0x%x val=0x%02x" # vt82c686.c via_isa_write(uint32_t addr, uint32_t val, int len) "addr 0x%x val 0x%x len 0x%x" +via_pm_read(uint32_t addr, uint32_t val, int len) "addr 0x%x val 0x%x len 0x%x" via_pm_write(uint32_t addr, uint32_t val, int len) "addr 0x%x val 0x%x len 0x%x" via_pm_io_read(uint32_t addr, uint32_t val, int len) "addr 0x%x val 0x%x len 0x%x" via_pm_io_write(uint32_t addr, uint32_t val, int len) "addr 0x%x val 0x%x len 0x%x" diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c index f4c40965cd..ca89119ce0 100644 --- a/hw/isa/vt82c686.c +++ b/hw/isa/vt82c686.c @@ -554,7 +554,7 @@ struct ViaISAState { PCIIDEState ide; UHCIState uhci[2]; ViaPMState pm; - PCIDevice ac97; + ViaAC97State ac97; PCIDevice mc97; }; @@ -598,15 +598,63 @@ void via_isa_set_irq(PCIDevice *d, int n, int level) qemu_set_irq(s->isa_irqs_in[n], level); } +static void via_isa_request_i8259_irq(void *opaque, int irq, int level) +{ + ViaISAState *s = opaque; + qemu_set_irq(s->cpu_intr, level); +} + +static int via_isa_get_pci_irq(const ViaISAState *s, int irq_num) +{ + switch (irq_num) { + case 0: + return s->dev.config[0x55] >> 4; + case 1: + return s->dev.config[0x56] & 0xf; + case 2: + return s->dev.config[0x56] >> 4; + case 3: + return s->dev.config[0x57] >> 4; + } + return 0; +} + +static void via_isa_set_pci_irq(void *opaque, int irq_num, int level) +{ + ViaISAState *s = opaque; + PCIBus *bus = pci_get_bus(&s->dev); + int i, pic_level, pic_irq = via_isa_get_pci_irq(s, irq_num); + + /* IRQ 0: disabled, IRQ 2,8,13: reserved */ + if (!pic_irq) { + return; + } + if (unlikely(pic_irq == 2 || pic_irq == 8 || pic_irq == 13)) { + qemu_log_mask(LOG_GUEST_ERROR, "Invalid ISA IRQ routing"); + } + + /* The pic level is the logical OR of all the PCI irqs mapped to it. */ + pic_level = 0; + for (i = 0; i < PCI_NUM_PINS; i++) { + if (pic_irq == via_isa_get_pci_irq(s, i)) { + pic_level |= pci_bus_get_irq_level(bus, i); + } + } + /* Now we change the pic irq level according to the via irq mappings. */ + qemu_set_irq(s->isa_irqs_in[pic_irq], pic_level); +} + static void via_isa_realize(PCIDevice *d, Error **errp) { ViaISAState *s = VIA_ISA(d); DeviceState *dev = DEVICE(d); PCIBus *pci_bus = pci_get_bus(d); + qemu_irq *isa_irq; ISABus *isa_bus; int i; qdev_init_gpio_out(dev, &s->cpu_intr, 1); + isa_irq = qemu_allocate_irqs(via_isa_request_i8259_irq, s, 1); isa_bus = isa_bus_new(dev, pci_address_space(d), pci_address_space_io(d), errp); @@ -614,11 +662,13 @@ static void via_isa_realize(PCIDevice *d, Error **errp) return; } - s->isa_irqs_in = i8259_init(isa_bus, s->cpu_intr); + s->isa_irqs_in = i8259_init(isa_bus, *isa_irq); isa_bus_register_input_irqs(isa_bus, s->isa_irqs_in); i8254_pit_init(isa_bus, 0x40, 0, NULL); i8257_dma_init(isa_bus, 0); + qdev_init_gpio_in_named(dev, via_isa_set_pci_irq, "pirq", PCI_NUM_PINS); + /* RTC */ qdev_prop_set_int32(DEVICE(&s->rtc), "base_year", 2000); if (!qdev_realize(DEVICE(&s->rtc), BUS(isa_bus), errp)) { diff --git a/hw/mips/boston.c b/hw/mips/boston.c index a9d87f3437..21ad844519 100644 --- a/hw/mips/boston.c +++ b/hw/mips/boston.c @@ -702,7 +702,7 @@ static void boston_mach_init(MachineState *machine) object_initialize_child(OBJECT(machine), "cps", &s->cps, TYPE_MIPS_CPS); object_property_set_str(OBJECT(&s->cps), "cpu-type", machine->cpu_type, &error_fatal); - object_property_set_int(OBJECT(&s->cps), "num-vp", machine->smp.cpus, + object_property_set_uint(OBJECT(&s->cps), "num-vp", machine->smp.cpus, &error_fatal); qdev_connect_clock_in(DEVICE(&s->cps), "clk-in", qdev_get_clock_out(dev, "cpu-refclk")); diff --git a/hw/mips/cps.c b/hw/mips/cps.c index 2b436700ce..2b5269ebf1 100644 --- a/hw/mips/cps.c +++ b/hw/mips/cps.c @@ -66,20 +66,17 @@ static bool cpu_mips_itu_supported(CPUMIPSState *env) static void mips_cps_realize(DeviceState *dev, Error **errp) { MIPSCPSState *s = MIPS_CPS(dev); - CPUMIPSState *env; - MIPSCPU *cpu; - int i; target_ulong gcr_base; bool itu_present = false; - bool saar_present = false; if (!clock_get(s->clock)) { error_setg(errp, "CPS input clock is not connected to an output clock"); return; } - for (i = 0; i < s->num_vp; i++) { - cpu = MIPS_CPU(object_new(s->cpu_type)); + for (int i = 0; i < s->num_vp; i++) { + MIPSCPU *cpu = MIPS_CPU(object_new(s->cpu_type)); + CPUMIPSState *env = &cpu->env; /* All VPs are halted on reset. Leave powering up to CPC. */ if (!object_property_set_bool(OBJECT(cpu), "start-powered-off", true, @@ -97,7 +94,6 @@ static void mips_cps_realize(DeviceState *dev, Error **errp) cpu_mips_irq_init_cpu(cpu); cpu_mips_clock_init(cpu); - env = &cpu->env; if (cpu_mips_itu_supported(env)) { itu_present = true; /* Attach ITC Tag to the VP */ @@ -107,22 +103,15 @@ static void mips_cps_realize(DeviceState *dev, Error **errp) qemu_register_reset(main_cpu_reset, cpu); } - cpu = MIPS_CPU(first_cpu); - env = &cpu->env; - saar_present = (bool)env->saarp; - /* Inter-Thread Communication Unit */ if (itu_present) { object_initialize_child(OBJECT(dev), "itu", &s->itu, TYPE_MIPS_ITU); - object_property_set_int(OBJECT(&s->itu), "num-fifo", 16, + object_property_set_link(OBJECT(&s->itu), "cpu[0]", + OBJECT(first_cpu), &error_abort); + object_property_set_uint(OBJECT(&s->itu), "num-fifo", 16, &error_abort); - object_property_set_int(OBJECT(&s->itu), "num-semaphores", 16, + object_property_set_uint(OBJECT(&s->itu), "num-semaphores", 16, &error_abort); - object_property_set_bool(OBJECT(&s->itu), "saar-present", saar_present, - &error_abort); - if (saar_present) { - s->itu.saar = &env->CP0_SAAR; - } if (!sysbus_realize(SYS_BUS_DEVICE(&s->itu), errp)) { return; } @@ -133,7 +122,7 @@ static void mips_cps_realize(DeviceState *dev, Error **errp) /* Cluster Power Controller */ object_initialize_child(OBJECT(dev), "cpc", &s->cpc, TYPE_MIPS_CPC); - object_property_set_int(OBJECT(&s->cpc), "num-vp", s->num_vp, + object_property_set_uint(OBJECT(&s->cpc), "num-vp", s->num_vp, &error_abort); object_property_set_int(OBJECT(&s->cpc), "vp-start-running", 1, &error_abort); @@ -146,9 +135,9 @@ static void mips_cps_realize(DeviceState *dev, Error **errp) /* Global Interrupt Controller */ object_initialize_child(OBJECT(dev), "gic", &s->gic, TYPE_MIPS_GIC); - object_property_set_int(OBJECT(&s->gic), "num-vp", s->num_vp, + object_property_set_uint(OBJECT(&s->gic), "num-vp", s->num_vp, &error_abort); - object_property_set_int(OBJECT(&s->gic), "num-irq", 128, + object_property_set_uint(OBJECT(&s->gic), "num-irq", 128, &error_abort); if (!sysbus_realize(SYS_BUS_DEVICE(&s->gic), errp)) { return; @@ -158,10 +147,10 @@ static void mips_cps_realize(DeviceState *dev, Error **errp) sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->gic), 0)); /* Global Configuration Registers */ - gcr_base = env->CP0_CMGCRBase << 4; + gcr_base = MIPS_CPU(first_cpu)->env.CP0_CMGCRBase << 4; object_initialize_child(OBJECT(dev), "gcr", &s->gcr, TYPE_MIPS_GCR); - object_property_set_int(OBJECT(&s->gcr), "num-vp", s->num_vp, + object_property_set_uint(OBJECT(&s->gcr), "num-vp", s->num_vp, &error_abort); object_property_set_int(OBJECT(&s->gcr), "gcr-rev", 0x800, &error_abort); diff --git a/hw/mips/malta.c b/hw/mips/malta.c index ec172b111a..af9021316d 100644 --- a/hw/mips/malta.c +++ b/hw/mips/malta.c @@ -1066,7 +1066,7 @@ static void create_cps(MachineState *ms, MaltaState *s, object_initialize_child(OBJECT(s), "cps", &s->cps, TYPE_MIPS_CPS); object_property_set_str(OBJECT(&s->cps), "cpu-type", ms->cpu_type, &error_fatal); - object_property_set_int(OBJECT(&s->cps), "num-vp", ms->smp.cpus, + object_property_set_uint(OBJECT(&s->cps), "num-vp", ms->smp.cpus, &error_fatal); qdev_connect_clock_in(DEVICE(&s->cps), "clk-in", s->cpuclk); sysbus_realize(SYS_BUS_DEVICE(&s->cps), &error_fatal); diff --git a/hw/misc/edu.c b/hw/misc/edu.c index e935c418d4..a1f8bc77e7 100644 --- a/hw/misc/edu.c +++ b/hw/misc/edu.c @@ -267,6 +267,8 @@ static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val, case 0x20: if (val & EDU_STATUS_IRQFACT) { qatomic_or(&edu->status, EDU_STATUS_IRQFACT); + /* Order check of the COMPUTING flag after setting IRQFACT. */ + smp_mb__after_rmw(); } else { qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT); } @@ -349,6 +351,9 @@ static void *edu_fact_thread(void *opaque) qemu_mutex_unlock(&edu->thr_mutex); qatomic_and(&edu->status, ~EDU_STATUS_COMPUTING); + /* Clear COMPUTING flag before checking IRQFACT. */ + smp_mb__after_rmw(); + if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) { qemu_mutex_lock_iothread(); edu_raise_irq(edu, FACT_IRQ); diff --git a/hw/misc/mips_cmgcr.c b/hw/misc/mips_cmgcr.c index 3c8b37f700..66eb11662c 100644 --- a/hw/misc/mips_cmgcr.c +++ b/hw/misc/mips_cmgcr.c @@ -212,7 +212,7 @@ static const VMStateDescription vmstate_mips_gcr = { }; static Property mips_gcr_properties[] = { - DEFINE_PROP_INT32("num-vp", MIPSGCRState, num_vps, 1), + DEFINE_PROP_UINT32("num-vp", MIPSGCRState, num_vps, 1), DEFINE_PROP_INT32("gcr-rev", MIPSGCRState, gcr_rev, 0x800), DEFINE_PROP_UINT64("gcr-base", MIPSGCRState, gcr_base, GCR_BASE_ADDR), DEFINE_PROP_LINK("gic", MIPSGCRState, gic_mr, TYPE_MEMORY_REGION, diff --git a/hw/misc/mips_itu.c b/hw/misc/mips_itu.c index badef5c214..0eda302db4 100644 --- a/hw/misc/mips_itu.c +++ b/hw/misc/mips_itu.c @@ -93,10 +93,10 @@ void itc_reconfigure(MIPSITUState *tag) uint64_t size = (1 * KiB) + (am[1] & ITC_AM1_ADDR_MASK_MASK); bool is_enabled = (am[0] & ITC_AM0_EN_MASK) != 0; - if (tag->saar_present) { - address = ((*(uint64_t *) tag->saar) & 0xFFFFFFFFE000ULL) << 4; - size = 1ULL << ((*(uint64_t *) tag->saar >> 1) & 0x1f); - is_enabled = *(uint64_t *) tag->saar & 1; + if (tag->saar) { + address = (tag->saar[0] & 0xFFFFFFFFE000ULL) << 4; + size = 1ULL << ((tag->saar[0] >> 1) & 0x1f); + is_enabled = tag->saar[0] & 1; } memory_region_transaction_begin(); @@ -157,7 +157,7 @@ static inline ITCView get_itc_view(hwaddr addr) static inline int get_cell_stride_shift(const MIPSITUState *s) { /* Minimum interval (for EntryGain = 0) is 128 B */ - if (s->saar_present) { + if (s->saar) { return 7 + ((s->icr0 >> ITC_ICR0_BLK_GRAIN) & ITC_ICR0_BLK_GRAIN_MASK); } else { @@ -515,6 +515,7 @@ static void mips_itu_init(Object *obj) static void mips_itu_realize(DeviceState *dev, Error **errp) { MIPSITUState *s = MIPS_ITU(dev); + CPUMIPSState *env; if (s->num_fifo > ITC_FIFO_NUM_MAX) { error_setg(errp, "Exceed maximum number of FIFO cells: %d", @@ -526,6 +527,15 @@ static void mips_itu_realize(DeviceState *dev, Error **errp) s->num_semaphores); return; } + if (!s->cpu0) { + error_setg(errp, "Missing 'cpu[0]' property"); + return; + } + + env = &s->cpu0->env; + if (env->saarp) { + s->saar = env->CP0_SAAR; + } s->cell = g_new(ITCStorageCell, get_num_cells(s)); } @@ -534,8 +544,8 @@ static void mips_itu_reset(DeviceState *dev) { MIPSITUState *s = MIPS_ITU(dev); - if (s->saar_present) { - *(uint64_t *) s->saar = 0x11 << 1; + if (s->saar) { + s->saar[0] = 0x11 << 1; s->icr0 = get_num_cells(s) << ITC_ICR0_CELL_NUM; } else { s->ITCAddressMap[0] = 0; @@ -549,11 +559,11 @@ static void mips_itu_reset(DeviceState *dev) } static Property mips_itu_properties[] = { - DEFINE_PROP_INT32("num-fifo", MIPSITUState, num_fifo, + DEFINE_PROP_UINT32("num-fifo", MIPSITUState, num_fifo, ITC_FIFO_NUM_MAX), - DEFINE_PROP_INT32("num-semaphores", MIPSITUState, num_semaphores, + DEFINE_PROP_UINT32("num-semaphores", MIPSITUState, num_semaphores, ITC_SEMAPH_NUM_MAX), - DEFINE_PROP_BOOL("saar-present", MIPSITUState, saar_present, false), + DEFINE_PROP_LINK("cpu[0]", MIPSITUState, cpu0, TYPE_MIPS_CPU, MIPSCPU *), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c index 7d92c2d022..9bbf6599fc 100644 --- a/hw/net/xen_nic.c +++ b/hw/net/xen_nic.c @@ -145,7 +145,7 @@ static void net_tx_packets(struct XenNetDev *netdev) continue; } - if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) { + if ((txreq.offset + txreq.size) > XEN_PAGE_SIZE) { xen_pv_printf(&netdev->xendev, 0, "error: page crossing\n"); net_tx_error(netdev, &txreq, rc); continue; @@ -171,7 +171,7 @@ static void net_tx_packets(struct XenNetDev *netdev) if (txreq.flags & NETTXF_csum_blank) { /* have read-only mapping -> can't fill checksum in-place */ if (!tmpbuf) { - tmpbuf = g_malloc(XC_PAGE_SIZE); + tmpbuf = g_malloc(XEN_PAGE_SIZE); } memcpy(tmpbuf, page + txreq.offset, txreq.size); net_checksum_calculate(tmpbuf, txreq.size, CSUM_ALL); @@ -181,7 +181,7 @@ static void net_tx_packets(struct XenNetDev *netdev) qemu_send_packet(qemu_get_queue(netdev->nic), page + txreq.offset, txreq.size); } - xen_be_unmap_grant_ref(&netdev->xendev, page); + xen_be_unmap_grant_ref(&netdev->xendev, page, txreq.gref); net_tx_response(netdev, &txreq, NETIF_RSP_OKAY); } if (!netdev->tx_work) { @@ -243,9 +243,9 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) { return 0; } - if (size > XC_PAGE_SIZE - NET_IP_ALIGN) { + if (size > XEN_PAGE_SIZE - NET_IP_ALIGN) { xen_pv_printf(&netdev->xendev, 0, "packet too big (%lu > %ld)", - (unsigned long)size, XC_PAGE_SIZE - NET_IP_ALIGN); + (unsigned long)size, XEN_PAGE_SIZE - NET_IP_ALIGN); return -1; } @@ -261,7 +261,7 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size return -1; } memcpy(page + NET_IP_ALIGN, buf, size); - xen_be_unmap_grant_ref(&netdev->xendev, page); + xen_be_unmap_grant_ref(&netdev->xendev, page, rxreq.gref); net_rx_response(netdev, &rxreq, NETIF_RSP_OKAY, NET_IP_ALIGN, size, 0); return size; @@ -343,12 +343,13 @@ static int net_connect(struct XenLegacyDevice *xendev) netdev->rx_ring_ref, PROT_READ | PROT_WRITE); if (!netdev->rxs) { - xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs); + xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs, + netdev->tx_ring_ref); netdev->txs = NULL; return -1; } - BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XC_PAGE_SIZE); - BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XC_PAGE_SIZE); + BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XEN_PAGE_SIZE); + BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XEN_PAGE_SIZE); xen_be_bind_evtchn(&netdev->xendev); @@ -368,11 +369,13 @@ static void net_disconnect(struct XenLegacyDevice *xendev) xen_pv_unbind_evtchn(&netdev->xendev); if (netdev->txs) { - xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs); + xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs, + netdev->tx_ring_ref); netdev->txs = NULL; } if (netdev->rxs) { - xen_be_unmap_grant_ref(&netdev->xendev, netdev->rxs); + xen_be_unmap_grant_ref(&netdev->xendev, netdev->rxs, + netdev->rx_ring_ref); netdev->rxs = NULL; } } diff --git a/hw/pci-host/mv64361.c b/hw/pci-host/mv64361.c index 298564f1f5..19e8031a3f 100644 --- a/hw/pci-host/mv64361.c +++ b/hw/pci-host/mv64361.c @@ -873,10 +873,6 @@ static void mv64361_realize(DeviceState *dev, Error **errp) } sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->cpu_irq); qdev_init_gpio_in_named(dev, mv64361_gpp_irq, "gpp", 32); - /* FIXME: PCI IRQ connections may be board specific */ - for (i = 0; i < PCI_NUM_PINS; i++) { - s->pci[1].irq[i] = qdev_get_gpio_in_named(dev, "gpp", 12 + i); - } } static void mv64361_reset(DeviceState *dev) diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c index 7cc375df05..f1650be5ee 100644 --- a/hw/ppc/pegasos2.c +++ b/hw/ppc/pegasos2.c @@ -73,6 +73,8 @@ struct Pegasos2MachineState { MachineState parent_obj; PowerPCCPU *cpu; DeviceState *mv; + qemu_irq mv_pirq[PCI_NUM_PINS]; + qemu_irq via_pirq[PCI_NUM_PINS]; Vof *vof; void *fdt_blob; uint64_t kernel_addr; @@ -95,6 +97,15 @@ static void pegasos2_cpu_reset(void *opaque) } } +static void pegasos2_pci_irq(void *opaque, int n, int level) +{ + Pegasos2MachineState *pm = opaque; + + /* PCI interrupt lines are connected to both MV64361 and VT8231 */ + qemu_set_irq(pm->mv_pirq[n], level); + qemu_set_irq(pm->via_pirq[n], level); +} + static void pegasos2_init(MachineState *machine) { Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine); @@ -106,7 +117,7 @@ static void pegasos2_init(MachineState *machine) I2CBus *i2c_bus; const char *fwname = machine->firmware ?: PROM_FILENAME; char *filename; - int sz; + int i, sz; uint8_t *spd_data; /* init CPU */ @@ -156,11 +167,18 @@ static void pegasos2_init(MachineState *machine) /* Marvell Discovery II system controller */ pm->mv = DEVICE(sysbus_create_simple(TYPE_MV64361, -1, qdev_get_gpio_in(DEVICE(pm->cpu), PPC6xx_INPUT_INT))); + for (i = 0; i < PCI_NUM_PINS; i++) { + pm->mv_pirq[i] = qdev_get_gpio_in_named(pm->mv, "gpp", 12 + i); + } pci_bus = mv64361_get_pci_bus(pm->mv, 1); + pci_bus_irqs(pci_bus, pegasos2_pci_irq, pm, PCI_NUM_PINS); /* VIA VT8231 South Bridge (multifunction PCI device) */ via = OBJECT(pci_create_simple_multifunction(pci_bus, PCI_DEVFN(12, 0), true, TYPE_VT8231_ISA)); + for (i = 0; i < PCI_NUM_PINS; i++) { + pm->via_pirq[i] = qdev_get_gpio_in_named(DEVICE(via), "pirq", i); + } object_property_add_alias(OBJECT(machine), "rtc-time", object_resolve_path_component(via, "rtc"), "date"); @@ -267,6 +285,12 @@ static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason) PCI_INTERRUPT_LINE, 2, 0x9); pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | 0x50, 1, 0x2); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | + 0x55, 1, 0x90); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | + 0x56, 1, 0x99); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | + 0x57, 1, 0x90); pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) | PCI_INTERRUPT_LINE, 2, 0x109); diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig index 4550b3b938..6528ebfa3a 100644 --- a/hw/riscv/Kconfig +++ b/hw/riscv/Kconfig @@ -44,6 +44,7 @@ config RISCV_VIRT select VIRTIO_MMIO select FW_CFG_DMA select PLATFORM_BUS + select ACPI config SHAKTI_C bool diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build index ab6cae57ea..2f7ee81be3 100644 --- a/hw/riscv/meson.build +++ b/hw/riscv/meson.build @@ -9,5 +9,6 @@ riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c')) riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c')) riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c')) riscv_ss.add(when: 'CONFIG_MICROCHIP_PFSOC', if_true: files('microchip_pfsoc.c')) +riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c')) hw_arch += {'riscv': riscv_ss} diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c new file mode 100644 index 0000000000..82da0a238c --- /dev/null +++ b/hw/riscv/virt-acpi-build.c @@ -0,0 +1,416 @@ +/* + * Support for generating ACPI tables and passing them to Guests + * + * RISC-V virt ACPI generation + * + * Copyright (C) 2008-2010 Kevin O'Connor <kevin@koconnor.net> + * Copyright (C) 2006 Fabrice Bellard + * Copyright (C) 2013 Red Hat Inc + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD. + * Copyright (C) 2021-2023 Ventana Micro Systems Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "hw/acpi/acpi-defs.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/aml-build.h" +#include "hw/acpi/utils.h" +#include "qapi/error.h" +#include "sysemu/reset.h" +#include "migration/vmstate.h" +#include "hw/riscv/virt.h" +#include "hw/riscv/numa.h" +#include "hw/intc/riscv_aclint.h" + +#define ACPI_BUILD_TABLE_SIZE 0x20000 + +typedef struct AcpiBuildState { + /* Copy of table in RAM (for patching) */ + MemoryRegion *table_mr; + MemoryRegion *rsdp_mr; + MemoryRegion *linker_mr; + /* Is table patched? */ + bool patched; +} AcpiBuildState; + +static void acpi_align_size(GArray *blob, unsigned align) +{ + /* + * Align size to multiple of given size. This reduces the chance + * we need to change size in the future (breaking cross version migration). + */ + g_array_set_size(blob, ROUND_UP(acpi_data_len(blob), align)); +} + +static void riscv_acpi_madt_add_rintc(uint32_t uid, + const CPUArchIdList *arch_ids, + GArray *entry) +{ + uint64_t hart_id = arch_ids->cpus[uid].arch_id; + + build_append_int_noprefix(entry, 0x18, 1); /* Type */ + build_append_int_noprefix(entry, 20, 1); /* Length */ + build_append_int_noprefix(entry, 1, 1); /* Version */ + build_append_int_noprefix(entry, 0, 1); /* Reserved */ + build_append_int_noprefix(entry, 0x1, 4); /* Flags */ + build_append_int_noprefix(entry, hart_id, 8); /* Hart ID */ + build_append_int_noprefix(entry, uid, 4); /* ACPI Processor UID */ +} + +static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState *s) +{ + MachineClass *mc = MACHINE_GET_CLASS(s); + MachineState *ms = MACHINE(s); + const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms); + + for (int i = 0; i < arch_ids->len; i++) { + Aml *dev; + GArray *madt_buf = g_array_new(0, 1, 1); + + dev = aml_device("C%.03X", i); + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007"))); + aml_append(dev, aml_name_decl("_UID", + aml_int(arch_ids->cpus[i].arch_id))); + + /* build _MAT object */ + riscv_acpi_madt_add_rintc(i, arch_ids, madt_buf); + aml_append(dev, aml_name_decl("_MAT", + aml_buffer(madt_buf->len, + (uint8_t *)madt_buf->data))); + g_array_free(madt_buf, true); + + aml_append(scope, dev); + } +} + +static void acpi_dsdt_add_fw_cfg(Aml *scope, const MemMapEntry *fw_cfg_memmap) +{ + Aml *dev = aml_device("FWCF"); + aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0002"))); + + /* device present, functioning, decoding, not shown in UI */ + aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); + aml_append(dev, aml_name_decl("_CCA", aml_int(1))); + + Aml *crs = aml_resource_template(); + aml_append(crs, aml_memory32_fixed(fw_cfg_memmap->base, + fw_cfg_memmap->size, AML_READ_WRITE)); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); +} + +/* RHCT Node[N] starts at offset 56 */ +#define RHCT_NODE_ARRAY_OFFSET 56 + +/* + * ACPI spec, Revision 6.5+ + * 5.2.36 RISC-V Hart Capabilities Table (RHCT) + * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/16 + * https://drive.google.com/file/d/1nP3nFiH4jkPMp6COOxP6123DCZKR-tia/view + */ +static void build_rhct(GArray *table_data, + BIOSLinker *linker, + RISCVVirtState *s) +{ + MachineClass *mc = MACHINE_GET_CLASS(s); + MachineState *ms = MACHINE(s); + const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms); + size_t len, aligned_len; + uint32_t isa_offset, num_rhct_nodes; + RISCVCPU *cpu; + char *isa; + + AcpiTable table = { .sig = "RHCT", .rev = 1, .oem_id = s->oem_id, + .oem_table_id = s->oem_table_id }; + + acpi_table_begin(&table, table_data); + + build_append_int_noprefix(table_data, 0x0, 4); /* Reserved */ + + /* Time Base Frequency */ + build_append_int_noprefix(table_data, + RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, 8); + + /* ISA + N hart info */ + num_rhct_nodes = 1 + ms->smp.cpus; + + /* Number of RHCT nodes*/ + build_append_int_noprefix(table_data, num_rhct_nodes, 4); + + /* Offset to the RHCT node array */ + build_append_int_noprefix(table_data, RHCT_NODE_ARRAY_OFFSET, 4); + + /* ISA String Node */ + isa_offset = table_data->len - table.table_offset; + build_append_int_noprefix(table_data, 0, 2); /* Type 0 */ + + cpu = &s->soc[0].harts[0]; + isa = riscv_isa_string(cpu); + len = 8 + strlen(isa) + 1; + aligned_len = (len % 2) ? (len + 1) : len; + + build_append_int_noprefix(table_data, aligned_len, 2); /* Length */ + build_append_int_noprefix(table_data, 0x1, 2); /* Revision */ + + /* ISA string length including NUL */ + build_append_int_noprefix(table_data, strlen(isa) + 1, 2); + g_array_append_vals(table_data, isa, strlen(isa) + 1); /* ISA string */ + + if (aligned_len != len) { + build_append_int_noprefix(table_data, 0x0, 1); /* Optional Padding */ + } + + /* Hart Info Node */ + for (int i = 0; i < arch_ids->len; i++) { + build_append_int_noprefix(table_data, 0xFFFF, 2); /* Type */ + build_append_int_noprefix(table_data, 16, 2); /* Length */ + build_append_int_noprefix(table_data, 0x1, 2); /* Revision */ + build_append_int_noprefix(table_data, 1, 2); /* Number of offsets */ + build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */ + build_append_int_noprefix(table_data, isa_offset, 4); /* Offsets[0] */ + } + + acpi_table_end(linker, &table); +} + +/* FADT */ +static void build_fadt_rev6(GArray *table_data, + BIOSLinker *linker, + RISCVVirtState *s, + unsigned dsdt_tbl_offset) +{ + AcpiFadtData fadt = { + .rev = 6, + .minor_ver = 5, + .flags = 1 << ACPI_FADT_F_HW_REDUCED_ACPI, + .xdsdt_tbl_offset = &dsdt_tbl_offset, + }; + + build_fadt(table_data, linker, &fadt, s->oem_id, s->oem_table_id); +} + +/* DSDT */ +static void build_dsdt(GArray *table_data, + BIOSLinker *linker, + RISCVVirtState *s) +{ + Aml *scope, *dsdt; + const MemMapEntry *memmap = s->memmap; + AcpiTable table = { .sig = "DSDT", .rev = 2, .oem_id = s->oem_id, + .oem_table_id = s->oem_table_id }; + + + acpi_table_begin(&table, table_data); + dsdt = init_aml_allocator(); + + /* + * When booting the VM with UEFI, UEFI takes ownership of the RTC hardware. + * While UEFI can use libfdt to disable the RTC device node in the DTB that + * it passes to the OS, it cannot modify AML. Therefore, we won't generate + * the RTC ACPI device at all when using UEFI. + */ + scope = aml_scope("\\_SB"); + acpi_dsdt_add_cpus(scope, s); + + acpi_dsdt_add_fw_cfg(scope, &memmap[VIRT_FW_CFG]); + + aml_append(dsdt, scope); + + /* copy AML table into ACPI tables blob and patch header there */ + g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len); + + acpi_table_end(linker, &table); + free_aml_allocator(); +} + +/* + * ACPI spec, Revision 6.5+ + * 5.2.12 Multiple APIC Description Table (MADT) + * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/15 + * https://drive.google.com/file/d/1R6k4MshhN3WTT-hwqAquu5nX6xSEqK2l/view + */ +static void build_madt(GArray *table_data, + BIOSLinker *linker, + RISCVVirtState *s) +{ + MachineClass *mc = MACHINE_GET_CLASS(s); + MachineState *ms = MACHINE(s); + const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms); + + AcpiTable table = { .sig = "APIC", .rev = 6, .oem_id = s->oem_id, + .oem_table_id = s->oem_table_id }; + + acpi_table_begin(&table, table_data); + /* Local Interrupt Controller Address */ + build_append_int_noprefix(table_data, 0, 4); + build_append_int_noprefix(table_data, 0, 4); /* MADT Flags */ + + /* RISC-V Local INTC structures per HART */ + for (int i = 0; i < arch_ids->len; i++) { + riscv_acpi_madt_add_rintc(i, arch_ids, table_data); + } + + acpi_table_end(linker, &table); +} + +static void virt_acpi_build(RISCVVirtState *s, AcpiBuildTables *tables) +{ + GArray *table_offsets; + unsigned dsdt, xsdt; + GArray *tables_blob = tables->table_data; + + table_offsets = g_array_new(false, true, + sizeof(uint32_t)); + + bios_linker_loader_alloc(tables->linker, + ACPI_BUILD_TABLE_FILE, tables_blob, + 64, false); + + /* DSDT is pointed to by FADT */ + dsdt = tables_blob->len; + build_dsdt(tables_blob, tables->linker, s); + + /* FADT and others pointed to by XSDT */ + acpi_add_table(table_offsets, tables_blob); + build_fadt_rev6(tables_blob, tables->linker, s, dsdt); + + acpi_add_table(table_offsets, tables_blob); + build_madt(tables_blob, tables->linker, s); + + acpi_add_table(table_offsets, tables_blob); + build_rhct(tables_blob, tables->linker, s); + + /* XSDT is pointed to by RSDP */ + xsdt = tables_blob->len; + build_xsdt(tables_blob, tables->linker, table_offsets, s->oem_id, + s->oem_table_id); + + /* RSDP is in FSEG memory, so allocate it separately */ + { + AcpiRsdpData rsdp_data = { + .revision = 2, + .oem_id = s->oem_id, + .xsdt_tbl_offset = &xsdt, + .rsdt_tbl_offset = NULL, + }; + build_rsdp(tables->rsdp, tables->linker, &rsdp_data); + } + + /* + * The align size is 128, warn if 64k is not enough therefore + * the align size could be resized. + */ + if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) { + warn_report("ACPI table size %u exceeds %d bytes," + " migration may not work", + tables_blob->len, ACPI_BUILD_TABLE_SIZE / 2); + error_printf("Try removing some objects."); + } + + acpi_align_size(tables_blob, ACPI_BUILD_TABLE_SIZE); + + /* Clean up memory that's no longer used */ + g_array_free(table_offsets, true); +} + +static void acpi_ram_update(MemoryRegion *mr, GArray *data) +{ + uint32_t size = acpi_data_len(data); + + /* + * Make sure RAM size is correct - in case it got changed + * e.g. by migration + */ + memory_region_ram_resize(mr, size, &error_abort); + + memcpy(memory_region_get_ram_ptr(mr), data->data, size); + memory_region_set_dirty(mr, 0, size); +} + +static void virt_acpi_build_update(void *build_opaque) +{ + AcpiBuildState *build_state = build_opaque; + AcpiBuildTables tables; + + /* No state to update or already patched? Nothing to do. */ + if (!build_state || build_state->patched) { + return; + } + + build_state->patched = true; + + acpi_build_tables_init(&tables); + + virt_acpi_build(RISCV_VIRT_MACHINE(qdev_get_machine()), &tables); + + acpi_ram_update(build_state->table_mr, tables.table_data); + acpi_ram_update(build_state->rsdp_mr, tables.rsdp); + acpi_ram_update(build_state->linker_mr, tables.linker->cmd_blob); + + acpi_build_tables_cleanup(&tables, true); +} + +static void virt_acpi_build_reset(void *build_opaque) +{ + AcpiBuildState *build_state = build_opaque; + build_state->patched = false; +} + +static const VMStateDescription vmstate_virt_acpi_build = { + .name = "virt_acpi_build", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_BOOL(patched, AcpiBuildState), + VMSTATE_END_OF_LIST() + }, +}; + +void virt_acpi_setup(RISCVVirtState *s) +{ + AcpiBuildTables tables; + AcpiBuildState *build_state; + + build_state = g_malloc0(sizeof *build_state); + + acpi_build_tables_init(&tables); + virt_acpi_build(s, &tables); + + /* Now expose it all to Guest */ + build_state->table_mr = acpi_add_rom_blob(virt_acpi_build_update, + build_state, tables.table_data, + ACPI_BUILD_TABLE_FILE); + assert(build_state->table_mr != NULL); + + build_state->linker_mr = acpi_add_rom_blob(virt_acpi_build_update, + build_state, + tables.linker->cmd_blob, + ACPI_BUILD_LOADER_FILE); + + build_state->rsdp_mr = acpi_add_rom_blob(virt_acpi_build_update, + build_state, tables.rsdp, + ACPI_BUILD_RSDP_FILE); + + qemu_register_reset(virt_acpi_build_reset, build_state); + virt_acpi_build_reset(build_state); + vmstate_register(NULL, 0, &vmstate_virt_acpi_build, build_state); + + /* + * Clean up tables but don't free the memory: we track it + * in build_state. + */ + acpi_build_tables_cleanup(&tables, false); +} diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 4f8191860b..4e3efbee16 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -49,6 +49,8 @@ #include "hw/pci/pci.h" #include "hw/pci-host/gpex.h" #include "hw/display/ramfb.h" +#include "hw/acpi/aml-build.h" +#include "qapi/qapi-visit-common.h" /* * The virt machine physical address space used by some of the devices @@ -228,8 +230,9 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket, int cpu; uint32_t cpu_phandle; MachineState *ms = MACHINE(s); - char *name, *cpu_name, *core_name, *intc_name; + char *name, *cpu_name, *core_name, *intc_name, *sv_name; bool is_32_bit = riscv_is_32bit(&s->soc[0]); + uint8_t satp_mode_max; for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) { RISCVCPU *cpu_ptr = &s->soc[socket].harts[cpu]; @@ -239,16 +242,29 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket, cpu_name = g_strdup_printf("/cpus/cpu@%d", s->soc[socket].hartid_base + cpu); qemu_fdt_add_subnode(ms->fdt, cpu_name); - if (cpu_ptr->cfg.mmu) { - qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", - (is_32_bit) ? "riscv,sv32" : "riscv,sv48"); - } else { - qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", - "riscv,none"); - } + + satp_mode_max = satp_mode_max_from_map( + s->soc[socket].harts[cpu].cfg.satp_mode.map); + sv_name = g_strdup_printf("riscv,%s", + satp_mode_str(satp_mode_max, is_32_bit)); + qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name); + g_free(sv_name); + + name = riscv_isa_string(cpu_ptr); qemu_fdt_setprop_string(ms->fdt, cpu_name, "riscv,isa", name); g_free(name); + + if (cpu_ptr->cfg.ext_icbom) { + qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cbom-block-size", + cpu_ptr->cfg.cbom_blocksize); + } + + if (cpu_ptr->cfg.ext_icboz) { + qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cboz-block-size", + cpu_ptr->cfg.cboz_blocksize); + } + qemu_fdt_setprop_string(ms->fdt, cpu_name, "compatible", "riscv"); qemu_fdt_setprop_string(ms->fdt, cpu_name, "status", "okay"); qemu_fdt_setprop_cell(ms->fdt, cpu_name, "reg", @@ -1307,6 +1323,10 @@ static void virt_machine_done(Notifier *notifier, void *data) if (kvm_enabled()) { riscv_setup_direct_kernel(kernel_entry, fdt_load_addr); } + + if (virt_is_acpi_enabled(s)) { + virt_acpi_setup(s); + } } static void virt_machine_init(MachineState *machine) @@ -1442,6 +1462,8 @@ static void virt_machine_init(MachineState *machine) ROUND_UP(virt_high_pcie_memmap.base, virt_high_pcie_memmap.size); } + s->memmap = virt_memmap; + /* register system main memory (actual RAM) */ memory_region_add_subregion(system_memory, memmap[VIRT_DRAM].base, machine->ram); @@ -1514,6 +1536,11 @@ static void virt_machine_init(MachineState *machine) static void virt_machine_instance_init(Object *obj) { + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + + s->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); + s->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); + s->acpi = ON_OFF_AUTO_AUTO; } static char *virt_get_aia_guests(Object *obj, Error **errp) @@ -1588,6 +1615,28 @@ static void virt_set_aclint(Object *obj, bool value, Error **errp) s->have_aclint = value; } +bool virt_is_acpi_enabled(RISCVVirtState *s) +{ + return s->acpi != ON_OFF_AUTO_OFF; +} + +static void virt_get_acpi(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + OnOffAuto acpi = s->acpi; + + visit_type_OnOffAuto(v, name, &acpi, errp); +} + +static void virt_set_acpi(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + + visit_type_OnOffAuto(v, name, &s->acpi, errp); +} + static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, DeviceState *dev) { @@ -1659,6 +1708,11 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) sprintf(str, "Set number of guest MMIO pages for AIA IMSIC. Valid value " "should be between 0 and %d.", VIRT_IRQCHIP_MAX_GUESTS); object_class_property_set_description(oc, "aia-guests", str); + object_class_property_add(oc, "acpi", "OnOffAuto", + virt_get_acpi, virt_set_acpi, + NULL, NULL); + object_class_property_set_description(oc, "acpi", + "Enable ACPI"); } static const TypeInfo virt_machine_typeinfo = { diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c index 6f8b543243..88d2b4b13c 100644 --- a/hw/usb/hcd-ohci.c +++ b/hw/usb/hcd-ohci.c @@ -1410,6 +1410,18 @@ static void ohci_set_hub_status(OHCIState *ohci, uint32_t val) } } +/* This is the one state transition the controller can do by itself */ +static bool ohci_resume(OHCIState *s) +{ + if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) { + trace_usb_ohci_remote_wakeup(s->name); + s->ctl &= ~OHCI_CTL_HCFS; + s->ctl |= OHCI_USB_RESUME; + return true; + } + return false; +} + /* * Sets a flag in a port status reg but only set it if the port is connected. * If not set ConnectStatusChange flag. If flag is enabled return 1. @@ -1426,7 +1438,10 @@ static int ohci_port_set_if_connected(OHCIState *ohci, int i, uint32_t val) if (!(ohci->rhport[i].ctrl & OHCI_PORT_CCS)) { ohci->rhport[i].ctrl |= OHCI_PORT_CSC; if (ohci->rhstatus & OHCI_RHS_DRWE) { - /* TODO: CSC is a wakeup event */ + /* CSC is a wakeup event */ + if (ohci_resume(ohci)) { + ohci_set_interrupt(ohci, OHCI_INTR_RD); + } } return 0; } @@ -1828,11 +1843,7 @@ static void ohci_wakeup(USBPort *port1) intr = OHCI_INTR_RHSC; } /* Note that the controller can be suspended even if this port is not */ - if ((s->ctl & OHCI_CTL_HCFS) == OHCI_USB_SUSPEND) { - trace_usb_ohci_remote_wakeup(s->name); - /* This is the one state transition the controller can do by itself */ - s->ctl &= ~OHCI_CTL_HCFS; - s->ctl |= OHCI_USB_RESUME; + if (ohci_resume(s)) { /* * In suspend mode only ResumeDetected is possible, not RHSC: * see the OHCI spec 5.1.2.3. diff --git a/hw/usb/meson.build b/hw/usb/meson.build index bdf34cbd3e..599dc24f0d 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build @@ -84,6 +84,6 @@ if libusb.found() hw_usb_modules += {'host': usbhost_ss} endif -softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XEN', libusb], if_true: files('xen-usb.c')) +softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XEN_BUS', libusb], if_true: files('xen-usb.c')) modules += { 'hw-usb': hw_usb_modules } diff --git a/hw/usb/vt82c686-uhci-pci.c b/hw/usb/vt82c686-uhci-pci.c index 46a901f56f..b4884c9011 100644 --- a/hw/usb/vt82c686-uhci-pci.c +++ b/hw/usb/vt82c686-uhci-pci.c @@ -1,17 +1,7 @@ #include "qemu/osdep.h" -#include "hw/irq.h" #include "hw/isa/vt82c686.h" #include "hcd-uhci.h" -static void uhci_isa_set_irq(void *opaque, int irq_num, int level) -{ - UHCIState *s = opaque; - uint8_t irq = pci_get_byte(s->dev.config + PCI_INTERRUPT_LINE); - if (irq > 0 && irq < 15) { - via_isa_set_irq(pci_get_function_0(&s->dev), irq, level); - } -} - static void usb_uhci_vt82c686b_realize(PCIDevice *dev, Error **errp) { UHCIState *s = UHCI(dev); @@ -25,8 +15,6 @@ static void usb_uhci_vt82c686b_realize(PCIDevice *dev, Error **errp) pci_set_long(pci_conf + 0xc0, 0x00002000); usb_uhci_common_realize(dev, errp); - object_unref(s->irq); - s->irq = qemu_allocate_irq(uhci_isa_set_irq, s, 0); } static UHCIInfo uhci_info[] = { diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c index 0f7369e7ed..66cb3f7c24 100644 --- a/hw/usb/xen-usb.c +++ b/hw/usb/xen-usb.c @@ -101,6 +101,8 @@ struct usbback_hotplug { struct usbback_info { struct XenLegacyDevice xendev; /* must be first */ USBBus bus; + uint32_t urb_ring_ref; + uint32_t conn_ring_ref; void *urb_sring; void *conn_sring; struct usbif_urb_back_ring urb_ring; @@ -159,7 +161,7 @@ static int usbback_gnttab_map(struct usbback_req *usbback_req) for (i = 0; i < nr_segs; i++) { if ((unsigned)usbback_req->req.seg[i].offset + - (unsigned)usbback_req->req.seg[i].length > XC_PAGE_SIZE) { + (unsigned)usbback_req->req.seg[i].length > XEN_PAGE_SIZE) { xen_pv_printf(xendev, 0, "segment crosses page boundary\n"); return -EINVAL; } @@ -183,7 +185,7 @@ static int usbback_gnttab_map(struct usbback_req *usbback_req) for (i = 0; i < usbback_req->nr_buffer_segs; i++) { seg = usbback_req->req.seg + i; - addr = usbback_req->buffer + i * XC_PAGE_SIZE + seg->offset; + addr = usbback_req->buffer + i * XEN_PAGE_SIZE + seg->offset; qemu_iovec_add(&usbback_req->packet.iov, addr, seg->length); } } @@ -277,10 +279,11 @@ static int usbback_init_packet(struct usbback_req *usbback_req) static void usbback_do_response(struct usbback_req *usbback_req, int32_t status, int32_t actual_length, int32_t error_count) { + uint32_t ref[USBIF_MAX_SEGMENTS_PER_REQUEST]; struct usbback_info *usbif; struct usbif_urb_response *res; struct XenLegacyDevice *xendev; - unsigned int notify; + unsigned int notify, i; usbif = usbback_req->usbif; xendev = &usbif->xendev; @@ -293,13 +296,19 @@ static void usbback_do_response(struct usbback_req *usbback_req, int32_t status, } if (usbback_req->buffer) { - xen_be_unmap_grant_refs(xendev, usbback_req->buffer, + for (i = 0; i < usbback_req->nr_buffer_segs; i++) { + ref[i] = usbback_req->req.seg[i].gref; + } + xen_be_unmap_grant_refs(xendev, usbback_req->buffer, ref, usbback_req->nr_buffer_segs); usbback_req->buffer = NULL; } if (usbback_req->isoc_buffer) { - xen_be_unmap_grant_refs(xendev, usbback_req->isoc_buffer, + for (i = 0; i < usbback_req->nr_extra_segs; i++) { + ref[i] = usbback_req->req.seg[i + usbback_req->req.nr_buffer_segs].gref; + } + xen_be_unmap_grant_refs(xendev, usbback_req->isoc_buffer, ref, usbback_req->nr_extra_segs); usbback_req->isoc_buffer = NULL; } @@ -832,11 +841,11 @@ static void usbback_disconnect(struct XenLegacyDevice *xendev) xen_pv_unbind_evtchn(xendev); if (usbif->urb_sring) { - xen_be_unmap_grant_ref(xendev, usbif->urb_sring); + xen_be_unmap_grant_ref(xendev, usbif->urb_sring, usbif->urb_ring_ref); usbif->urb_sring = NULL; } if (usbif->conn_sring) { - xen_be_unmap_grant_ref(xendev, usbif->conn_sring); + xen_be_unmap_grant_ref(xendev, usbif->conn_sring, usbif->conn_ring_ref); usbif->conn_sring = NULL; } @@ -889,10 +898,12 @@ static int usbback_connect(struct XenLegacyDevice *xendev) return -1; } + usbif->urb_ring_ref = urb_ring_ref; + usbif->conn_ring_ref = conn_ring_ref; urb_sring = usbif->urb_sring; conn_sring = usbif->conn_sring; - BACK_RING_INIT(&usbif->urb_ring, urb_sring, XC_PAGE_SIZE); - BACK_RING_INIT(&usbif->conn_ring, conn_sring, XC_PAGE_SIZE); + BACK_RING_INIT(&usbif->urb_ring, urb_sring, XEN_PAGE_SIZE); + BACK_RING_INIT(&usbif->conn_ring, conn_sring, XEN_PAGE_SIZE); xen_be_bind_evtchn(xendev); diff --git a/hw/xen/meson.build b/hw/xen/meson.build index ae0ace3046..19c6aabc7c 100644 --- a/hw/xen/meson.build +++ b/hw/xen/meson.build @@ -1,4 +1,4 @@ -softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files( +softmmu_ss.add(when: ['CONFIG_XEN_BUS'], if_true: files( 'xen-backend.c', 'xen-bus-helper.c', 'xen-bus.c', @@ -7,6 +7,10 @@ softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files( 'xen_pvdev.c', )) +softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files( + 'xen-operations.c', +)) + xen_specific_ss = ss.source_set() if have_xen_pci_passthrough xen_specific_ss.add(files( diff --git a/hw/xen/trace-events b/hw/xen/trace-events index 3da3fd8348..55c9e1df68 100644 --- a/hw/xen/trace-events +++ b/hw/xen/trace-events @@ -1,6 +1,6 @@ # See docs/devel/tracing.rst for syntax documentation. -# ../../include/hw/xen/xen_common.h +# ../../include/hw/xen/xen_native.h xen_default_ioreq_server(void) "" xen_ioreq_server_create(uint32_t id) "id: %u" xen_ioreq_server_destroy(uint32_t id) "id: %u" diff --git a/hw/xen/xen-bus-helper.c b/hw/xen/xen-bus-helper.c index 5a1e12b374..b2b2cc9c5d 100644 --- a/hw/xen/xen-bus-helper.c +++ b/hw/xen/xen-bus-helper.c @@ -10,6 +10,7 @@ #include "hw/xen/xen-bus.h" #include "hw/xen/xen-bus-helper.h" #include "qapi/error.h" +#include "trace.h" #include <glib/gprintf.h> @@ -46,34 +47,28 @@ const char *xs_strstate(enum xenbus_state state) return "INVALID"; } -void xs_node_create(struct xs_handle *xsh, xs_transaction_t tid, - const char *node, struct xs_permissions perms[], - unsigned int nr_perms, Error **errp) +void xs_node_create(struct qemu_xs_handle *h, xs_transaction_t tid, + const char *node, unsigned int owner, unsigned int domid, + unsigned int perms, Error **errp) { trace_xs_node_create(node); - if (!xs_write(xsh, tid, node, "", 0)) { + if (!qemu_xen_xs_create(h, tid, owner, domid, perms, node)) { error_setg_errno(errp, errno, "failed to create node '%s'", node); - return; - } - - if (!xs_set_permissions(xsh, tid, node, perms, nr_perms)) { - error_setg_errno(errp, errno, "failed to set node '%s' permissions", - node); } } -void xs_node_destroy(struct xs_handle *xsh, xs_transaction_t tid, +void xs_node_destroy(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, Error **errp) { trace_xs_node_destroy(node); - if (!xs_rm(xsh, tid, node)) { + if (!qemu_xen_xs_destroy(h, tid, node)) { error_setg_errno(errp, errno, "failed to destroy node '%s'", node); } } -void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid, +void xs_node_vprintf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, const char *key, Error **errp, const char *fmt, va_list ap) { @@ -86,7 +81,7 @@ void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid, trace_xs_node_vprintf(path, value); - if (!xs_write(xsh, tid, path, value, len)) { + if (!qemu_xen_xs_write(h, tid, path, value, len)) { error_setg_errno(errp, errno, "failed to write '%s' to '%s'", value, path); } @@ -95,18 +90,18 @@ void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid, g_free(path); } -void xs_node_printf(struct xs_handle *xsh, xs_transaction_t tid, +void xs_node_printf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, const char *key, Error **errp, const char *fmt, ...) { va_list ap; va_start(ap, fmt); - xs_node_vprintf(xsh, tid, node, key, errp, fmt, ap); + xs_node_vprintf(h, tid, node, key, errp, fmt, ap); va_end(ap); } -int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid, +int xs_node_vscanf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, const char *key, Error **errp, const char *fmt, va_list ap) { @@ -115,7 +110,7 @@ int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid, path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) : g_strdup(key); - value = xs_read(xsh, tid, path, NULL); + value = qemu_xen_xs_read(h, tid, path, NULL); trace_xs_node_vscanf(path, value); @@ -133,7 +128,7 @@ int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid, return rc; } -int xs_node_scanf(struct xs_handle *xsh, xs_transaction_t tid, +int xs_node_scanf(struct qemu_xs_handle *h, xs_transaction_t tid, const char *node, const char *key, Error **errp, const char *fmt, ...) { @@ -141,42 +136,35 @@ int xs_node_scanf(struct xs_handle *xsh, xs_transaction_t tid, int rc; va_start(ap, fmt); - rc = xs_node_vscanf(xsh, tid, node, key, errp, fmt, ap); + rc = xs_node_vscanf(h, tid, node, key, errp, fmt, ap); va_end(ap); return rc; } -void xs_node_watch(struct xs_handle *xsh, const char *node, const char *key, - char *token, Error **errp) +struct qemu_xs_watch *xs_node_watch(struct qemu_xs_handle *h, const char *node, + const char *key, xs_watch_fn fn, + void *opaque, Error **errp) { char *path; + struct qemu_xs_watch *w; path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) : g_strdup(key); trace_xs_node_watch(path); - if (!xs_watch(xsh, path, token)) { + w = qemu_xen_xs_watch(h, path, fn, opaque); + if (!w) { error_setg_errno(errp, errno, "failed to watch node '%s'", path); } g_free(path); + + return w; } -void xs_node_unwatch(struct xs_handle *xsh, const char *node, - const char *key, const char *token, Error **errp) +void xs_node_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w) { - char *path; - - path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) : - g_strdup(key); - - trace_xs_node_unwatch(path); - - if (!xs_unwatch(xsh, path, token)) { - error_setg_errno(errp, errno, "failed to unwatch node '%s'", path); - } - - g_free(path); + qemu_xen_xs_unwatch(h, w); } diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c index df3f6b9ae0..c59850b1de 100644 --- a/hw/xen/xen-bus.c +++ b/hw/xen/xen-bus.c @@ -62,7 +62,7 @@ static void xen_device_unplug(XenDevice *xendev, Error **errp) /* Mimic the way the Xen toolstack does an unplug */ again: - tid = xs_transaction_start(xenbus->xsh); + tid = qemu_xen_xs_transaction_start(xenbus->xsh); if (tid == XBT_NULL) { error_setg_errno(errp, errno, "failed xs_transaction_start"); return; @@ -80,7 +80,7 @@ again: goto abort; } - if (!xs_transaction_end(xenbus->xsh, tid, false)) { + if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, false)) { if (errno == EAGAIN) { goto again; } @@ -95,7 +95,7 @@ abort: * We only abort if there is already a failure so ignore any error * from ending the transaction. */ - xs_transaction_end(xenbus->xsh, tid, true); + qemu_xen_xs_transaction_end(xenbus->xsh, tid, true); } static void xen_bus_print_dev(Monitor *mon, DeviceState *dev, int indent) @@ -111,143 +111,6 @@ static char *xen_bus_get_dev_path(DeviceState *dev) return xen_device_get_backend_path(XEN_DEVICE(dev)); } -struct XenWatch { - char *node, *key; - char *token; - XenWatchHandler handler; - void *opaque; - Notifier notifier; -}; - -static void watch_notify(Notifier *n, void *data) -{ - XenWatch *watch = container_of(n, XenWatch, notifier); - const char *token = data; - - if (!strcmp(watch->token, token)) { - watch->handler(watch->opaque); - } -} - -static XenWatch *new_watch(const char *node, const char *key, - XenWatchHandler handler, void *opaque) -{ - XenWatch *watch = g_new0(XenWatch, 1); - QemuUUID uuid; - - qemu_uuid_generate(&uuid); - - watch->token = qemu_uuid_unparse_strdup(&uuid); - watch->node = g_strdup(node); - watch->key = g_strdup(key); - watch->handler = handler; - watch->opaque = opaque; - watch->notifier.notify = watch_notify; - - return watch; -} - -static void free_watch(XenWatch *watch) -{ - g_free(watch->token); - g_free(watch->key); - g_free(watch->node); - - g_free(watch); -} - -struct XenWatchList { - struct xs_handle *xsh; - NotifierList notifiers; -}; - -static void watch_list_event(void *opaque) -{ - XenWatchList *watch_list = opaque; - char **v; - const char *token; - - v = xs_check_watch(watch_list->xsh); - if (!v) { - return; - } - - token = v[XS_WATCH_TOKEN]; - - notifier_list_notify(&watch_list->notifiers, (void *)token); - - free(v); -} - -static XenWatchList *watch_list_create(struct xs_handle *xsh) -{ - XenWatchList *watch_list = g_new0(XenWatchList, 1); - - g_assert(xsh); - - watch_list->xsh = xsh; - notifier_list_init(&watch_list->notifiers); - qemu_set_fd_handler(xs_fileno(watch_list->xsh), watch_list_event, NULL, - watch_list); - - return watch_list; -} - -static void watch_list_destroy(XenWatchList *watch_list) -{ - g_assert(notifier_list_empty(&watch_list->notifiers)); - qemu_set_fd_handler(xs_fileno(watch_list->xsh), NULL, NULL, NULL); - g_free(watch_list); -} - -static XenWatch *watch_list_add(XenWatchList *watch_list, const char *node, - const char *key, XenWatchHandler handler, - void *opaque, Error **errp) -{ - ERRP_GUARD(); - XenWatch *watch = new_watch(node, key, handler, opaque); - - notifier_list_add(&watch_list->notifiers, &watch->notifier); - - xs_node_watch(watch_list->xsh, node, key, watch->token, errp); - if (*errp) { - notifier_remove(&watch->notifier); - free_watch(watch); - - return NULL; - } - - return watch; -} - -static void watch_list_remove(XenWatchList *watch_list, XenWatch *watch, - Error **errp) -{ - xs_node_unwatch(watch_list->xsh, watch->node, watch->key, watch->token, - errp); - - notifier_remove(&watch->notifier); - free_watch(watch); -} - -static XenWatch *xen_bus_add_watch(XenBus *xenbus, const char *node, - const char *key, XenWatchHandler handler, - Error **errp) -{ - trace_xen_bus_add_watch(node, key); - - return watch_list_add(xenbus->watch_list, node, key, handler, xenbus, - errp); -} - -static void xen_bus_remove_watch(XenBus *xenbus, XenWatch *watch, - Error **errp) -{ - trace_xen_bus_remove_watch(watch->node, watch->key); - - watch_list_remove(xenbus->watch_list, watch, errp); -} - static void xen_bus_backend_create(XenBus *xenbus, const char *type, const char *name, char *path, Error **errp) @@ -261,15 +124,15 @@ static void xen_bus_backend_create(XenBus *xenbus, const char *type, trace_xen_bus_backend_create(type, path); again: - tid = xs_transaction_start(xenbus->xsh); + tid = qemu_xen_xs_transaction_start(xenbus->xsh); if (tid == XBT_NULL) { error_setg(errp, "failed xs_transaction_start"); return; } - key = xs_directory(xenbus->xsh, tid, path, &n); + key = qemu_xen_xs_directory(xenbus->xsh, tid, path, &n); if (!key) { - if (!xs_transaction_end(xenbus->xsh, tid, true)) { + if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, true)) { error_setg_errno(errp, errno, "failed xs_transaction_end"); } return; @@ -300,7 +163,7 @@ again: free(key); - if (!xs_transaction_end(xenbus->xsh, tid, false)) { + if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, false)) { qobject_unref(opts); if (errno == EAGAIN) { @@ -327,7 +190,7 @@ static void xen_bus_type_enumerate(XenBus *xenbus, const char *type) trace_xen_bus_type_enumerate(type); - backend = xs_directory(xenbus->xsh, XBT_NULL, domain_path, &n); + backend = qemu_xen_xs_directory(xenbus->xsh, XBT_NULL, domain_path, &n); if (!backend) { goto out; } @@ -372,7 +235,7 @@ static void xen_bus_enumerate(XenBus *xenbus) trace_xen_bus_enumerate(); - type = xs_directory(xenbus->xsh, XBT_NULL, "backend", &n); + type = qemu_xen_xs_directory(xenbus->xsh, XBT_NULL, "backend", &n); if (!type) { return; } @@ -415,7 +278,7 @@ static void xen_bus_cleanup(XenBus *xenbus) } } -static void xen_bus_backend_changed(void *opaque) +static void xen_bus_backend_changed(void *opaque, const char *path) { XenBus *xenbus = opaque; @@ -434,7 +297,7 @@ static void xen_bus_unrealize(BusState *bus) for (i = 0; i < xenbus->backend_types; i++) { if (xenbus->backend_watch[i]) { - xen_bus_remove_watch(xenbus, xenbus->backend_watch[i], NULL); + xs_node_unwatch(xenbus->xsh, xenbus->backend_watch[i]); } } @@ -442,13 +305,8 @@ static void xen_bus_unrealize(BusState *bus) xenbus->backend_watch = NULL; } - if (xenbus->watch_list) { - watch_list_destroy(xenbus->watch_list); - xenbus->watch_list = NULL; - } - if (xenbus->xsh) { - xs_close(xenbus->xsh); + qemu_xen_xs_close(xenbus->xsh); } } @@ -463,7 +321,7 @@ static void xen_bus_realize(BusState *bus, Error **errp) trace_xen_bus_realize(); - xenbus->xsh = xs_open(0); + xenbus->xsh = qemu_xen_xs_open(); if (!xenbus->xsh) { error_setg_errno(errp, errno, "failed xs_open"); goto fail; @@ -476,19 +334,18 @@ static void xen_bus_realize(BusState *bus, Error **errp) xenbus->backend_id = 0; /* Assume lack of node means dom0 */ } - xenbus->watch_list = watch_list_create(xenbus->xsh); - module_call_init(MODULE_INIT_XEN_BACKEND); type = xen_backend_get_types(&xenbus->backend_types); - xenbus->backend_watch = g_new(XenWatch *, xenbus->backend_types); + xenbus->backend_watch = g_new(struct qemu_xs_watch *, + xenbus->backend_types); for (i = 0; i < xenbus->backend_types; i++) { char *node = g_strdup_printf("backend/%s", type[i]); xenbus->backend_watch[i] = - xen_bus_add_watch(xenbus, node, key, xen_bus_backend_changed, - &local_err); + xs_node_watch(xenbus->xsh, node, key, xen_bus_backend_changed, + xenbus, &local_err); if (local_err) { /* This need not be treated as a hard error so don't propagate */ error_reportf_err(local_err, @@ -631,7 +488,7 @@ static bool xen_device_frontend_is_active(XenDevice *xendev) } } -static void xen_device_backend_changed(void *opaque) +static void xen_device_backend_changed(void *opaque, const char *path) { XenDevice *xendev = opaque; const char *type = object_get_typename(OBJECT(xendev)); @@ -685,66 +542,35 @@ static void xen_device_backend_changed(void *opaque) } } -static XenWatch *xen_device_add_watch(XenDevice *xendev, const char *node, - const char *key, - XenWatchHandler handler, - Error **errp) -{ - const char *type = object_get_typename(OBJECT(xendev)); - - trace_xen_device_add_watch(type, xendev->name, node, key); - - return watch_list_add(xendev->watch_list, node, key, handler, xendev, - errp); -} - -static void xen_device_remove_watch(XenDevice *xendev, XenWatch *watch, - Error **errp) -{ - const char *type = object_get_typename(OBJECT(xendev)); - - trace_xen_device_remove_watch(type, xendev->name, watch->node, - watch->key); - - watch_list_remove(xendev->watch_list, watch, errp); -} - - static void xen_device_backend_create(XenDevice *xendev, Error **errp) { ERRP_GUARD(); XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); - struct xs_permissions perms[2]; xendev->backend_path = xen_device_get_backend_path(xendev); - perms[0].id = xenbus->backend_id; - perms[0].perms = XS_PERM_NONE; - perms[1].id = xendev->frontend_id; - perms[1].perms = XS_PERM_READ; - g_assert(xenbus->xsh); - xs_node_create(xenbus->xsh, XBT_NULL, xendev->backend_path, perms, - ARRAY_SIZE(perms), errp); + xs_node_create(xenbus->xsh, XBT_NULL, xendev->backend_path, + xenbus->backend_id, xendev->frontend_id, XS_PERM_READ, errp); if (*errp) { error_prepend(errp, "failed to create backend: "); return; } xendev->backend_state_watch = - xen_device_add_watch(xendev, xendev->backend_path, - "state", xen_device_backend_changed, - errp); + xs_node_watch(xendev->xsh, xendev->backend_path, + "state", xen_device_backend_changed, xendev, + errp); if (*errp) { error_prepend(errp, "failed to watch backend state: "); return; } xendev->backend_online_watch = - xen_device_add_watch(xendev, xendev->backend_path, - "online", xen_device_backend_changed, - errp); + xs_node_watch(xendev->xsh, xendev->backend_path, + "online", xen_device_backend_changed, xendev, + errp); if (*errp) { error_prepend(errp, "failed to watch backend online: "); return; @@ -757,12 +583,12 @@ static void xen_device_backend_destroy(XenDevice *xendev) Error *local_err = NULL; if (xendev->backend_online_watch) { - xen_device_remove_watch(xendev, xendev->backend_online_watch, NULL); + xs_node_unwatch(xendev->xsh, xendev->backend_online_watch); xendev->backend_online_watch = NULL; } if (xendev->backend_state_watch) { - xen_device_remove_watch(xendev, xendev->backend_state_watch, NULL); + xs_node_unwatch(xendev->xsh, xendev->backend_state_watch); xendev->backend_state_watch = NULL; } @@ -837,7 +663,7 @@ static void xen_device_frontend_set_state(XenDevice *xendev, } } -static void xen_device_frontend_changed(void *opaque) +static void xen_device_frontend_changed(void *opaque, const char *path) { XenDevice *xendev = opaque; XenDeviceClass *xendev_class = XEN_DEVICE_GET_CLASS(xendev); @@ -885,7 +711,6 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp) { ERRP_GUARD(); XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); - struct xs_permissions perms[2]; xendev->frontend_path = xen_device_get_frontend_path(xendev); @@ -894,15 +719,11 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp) * toolstack. */ if (!xen_device_frontend_exists(xendev)) { - perms[0].id = xendev->frontend_id; - perms[0].perms = XS_PERM_NONE; - perms[1].id = xenbus->backend_id; - perms[1].perms = XS_PERM_READ | XS_PERM_WRITE; - g_assert(xenbus->xsh); - xs_node_create(xenbus->xsh, XBT_NULL, xendev->frontend_path, perms, - ARRAY_SIZE(perms), errp); + xs_node_create(xenbus->xsh, XBT_NULL, xendev->frontend_path, + xendev->frontend_id, xenbus->backend_id, + XS_PERM_READ | XS_PERM_WRITE, errp); if (*errp) { error_prepend(errp, "failed to create frontend: "); return; @@ -910,8 +731,8 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp) } xendev->frontend_state_watch = - xen_device_add_watch(xendev, xendev->frontend_path, "state", - xen_device_frontend_changed, errp); + xs_node_watch(xendev->xsh, xendev->frontend_path, "state", + xen_device_frontend_changed, xendev, errp); if (*errp) { error_prepend(errp, "failed to watch frontend state: "); } @@ -923,8 +744,7 @@ static void xen_device_frontend_destroy(XenDevice *xendev) Error *local_err = NULL; if (xendev->frontend_state_watch) { - xen_device_remove_watch(xendev, xendev->frontend_state_watch, - NULL); + xs_node_unwatch(xendev->xsh, xendev->frontend_state_watch); xendev->frontend_state_watch = NULL; } @@ -947,7 +767,7 @@ static void xen_device_frontend_destroy(XenDevice *xendev) void xen_device_set_max_grant_refs(XenDevice *xendev, unsigned int nr_refs, Error **errp) { - if (xengnttab_set_max_grants(xendev->xgth, nr_refs)) { + if (qemu_xen_gnttab_set_max_grants(xendev->xgth, nr_refs)) { error_setg_errno(errp, errno, "xengnttab_set_max_grants failed"); } } @@ -956,9 +776,8 @@ void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs, unsigned int nr_refs, int prot, Error **errp) { - void *map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_refs, - xendev->frontend_id, refs, - prot); + void *map = qemu_xen_gnttab_map_refs(xendev->xgth, nr_refs, + xendev->frontend_id, refs, prot); if (!map) { error_setg_errno(errp, errno, @@ -968,112 +787,20 @@ void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs, return map; } -void xen_device_unmap_grant_refs(XenDevice *xendev, void *map, +void xen_device_unmap_grant_refs(XenDevice *xendev, void *map, uint32_t *refs, unsigned int nr_refs, Error **errp) { - if (xengnttab_unmap(xendev->xgth, map, nr_refs)) { + if (qemu_xen_gnttab_unmap(xendev->xgth, map, refs, nr_refs)) { error_setg_errno(errp, errno, "xengnttab_unmap failed"); } } -static void compat_copy_grant_refs(XenDevice *xendev, bool to_domain, - XenDeviceGrantCopySegment segs[], - unsigned int nr_segs, Error **errp) -{ - uint32_t *refs = g_new(uint32_t, nr_segs); - int prot = to_domain ? PROT_WRITE : PROT_READ; - void *map; - unsigned int i; - - for (i = 0; i < nr_segs; i++) { - XenDeviceGrantCopySegment *seg = &segs[i]; - - refs[i] = to_domain ? seg->dest.foreign.ref : - seg->source.foreign.ref; - } - - map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_segs, - xendev->frontend_id, refs, - prot); - if (!map) { - error_setg_errno(errp, errno, - "xengnttab_map_domain_grant_refs failed"); - goto done; - } - - for (i = 0; i < nr_segs; i++) { - XenDeviceGrantCopySegment *seg = &segs[i]; - void *page = map + (i * XC_PAGE_SIZE); - - if (to_domain) { - memcpy(page + seg->dest.foreign.offset, seg->source.virt, - seg->len); - } else { - memcpy(seg->dest.virt, page + seg->source.foreign.offset, - seg->len); - } - } - - if (xengnttab_unmap(xendev->xgth, map, nr_segs)) { - error_setg_errno(errp, errno, "xengnttab_unmap failed"); - } - -done: - g_free(refs); -} - void xen_device_copy_grant_refs(XenDevice *xendev, bool to_domain, XenDeviceGrantCopySegment segs[], unsigned int nr_segs, Error **errp) { - xengnttab_grant_copy_segment_t *xengnttab_segs; - unsigned int i; - - if (!xendev->feature_grant_copy) { - compat_copy_grant_refs(xendev, to_domain, segs, nr_segs, errp); - return; - } - - xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs); - - for (i = 0; i < nr_segs; i++) { - XenDeviceGrantCopySegment *seg = &segs[i]; - xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; - - if (to_domain) { - xengnttab_seg->flags = GNTCOPY_dest_gref; - xengnttab_seg->dest.foreign.domid = xendev->frontend_id; - xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref; - xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset; - xengnttab_seg->source.virt = seg->source.virt; - } else { - xengnttab_seg->flags = GNTCOPY_source_gref; - xengnttab_seg->source.foreign.domid = xendev->frontend_id; - xengnttab_seg->source.foreign.ref = seg->source.foreign.ref; - xengnttab_seg->source.foreign.offset = - seg->source.foreign.offset; - xengnttab_seg->dest.virt = seg->dest.virt; - } - - xengnttab_seg->len = seg->len; - } - - if (xengnttab_grant_copy(xendev->xgth, nr_segs, xengnttab_segs)) { - error_setg_errno(errp, errno, "xengnttab_grant_copy failed"); - goto done; - } - - for (i = 0; i < nr_segs; i++) { - xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; - - if (xengnttab_seg->status != GNTST_okay) { - error_setg(errp, "xengnttab_grant_copy seg[%u] failed", i); - break; - } - } - -done: - g_free(xengnttab_segs); + qemu_xen_gnttab_grant_copy(xendev->xgth, to_domain, xendev->frontend_id, + (XenGrantCopySegment *)segs, nr_segs, errp); } struct XenEventChannel { @@ -1095,12 +822,12 @@ static bool xen_device_poll(void *opaque) static void xen_device_event(void *opaque) { XenEventChannel *channel = opaque; - unsigned long port = xenevtchn_pending(channel->xeh); + unsigned long port = qemu_xen_evtchn_pending(channel->xeh); if (port == channel->local_port) { xen_device_poll(channel); - xenevtchn_unmask(channel->xeh, port); + qemu_xen_evtchn_unmask(channel->xeh, port); } } @@ -1115,11 +842,11 @@ void xen_device_set_event_channel_context(XenDevice *xendev, } if (channel->ctx) - aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true, + aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true, NULL, NULL, NULL, NULL, NULL); channel->ctx = ctx; - aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true, + aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true, xen_device_event, NULL, xen_device_poll, NULL, channel); } @@ -1131,13 +858,13 @@ XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev, XenEventChannel *channel = g_new0(XenEventChannel, 1); xenevtchn_port_or_error_t local_port; - channel->xeh = xenevtchn_open(NULL, 0); + channel->xeh = qemu_xen_evtchn_open(); if (!channel->xeh) { error_setg_errno(errp, errno, "failed xenevtchn_open"); goto fail; } - local_port = xenevtchn_bind_interdomain(channel->xeh, + local_port = qemu_xen_evtchn_bind_interdomain(channel->xeh, xendev->frontend_id, port); if (local_port < 0) { @@ -1160,7 +887,7 @@ XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev, fail: if (channel->xeh) { - xenevtchn_close(channel->xeh); + qemu_xen_evtchn_close(channel->xeh); } g_free(channel); @@ -1177,7 +904,7 @@ void xen_device_notify_event_channel(XenDevice *xendev, return; } - if (xenevtchn_notify(channel->xeh, channel->local_port) < 0) { + if (qemu_xen_evtchn_notify(channel->xeh, channel->local_port) < 0) { error_setg_errno(errp, errno, "xenevtchn_notify failed"); } } @@ -1193,14 +920,14 @@ void xen_device_unbind_event_channel(XenDevice *xendev, QLIST_REMOVE(channel, list); - aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true, + aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true, NULL, NULL, NULL, NULL, NULL); - if (xenevtchn_unbind(channel->xeh, channel->local_port) < 0) { + if (qemu_xen_evtchn_unbind(channel->xeh, channel->local_port) < 0) { error_setg_errno(errp, errno, "xenevtchn_unbind failed"); } - xenevtchn_close(channel->xeh); + qemu_xen_evtchn_close(channel->xeh); g_free(channel); } @@ -1235,17 +962,12 @@ static void xen_device_unrealize(DeviceState *dev) xen_device_backend_destroy(xendev); if (xendev->xgth) { - xengnttab_close(xendev->xgth); + qemu_xen_gnttab_close(xendev->xgth); xendev->xgth = NULL; } - if (xendev->watch_list) { - watch_list_destroy(xendev->watch_list); - xendev->watch_list = NULL; - } - if (xendev->xsh) { - xs_close(xendev->xsh); + qemu_xen_xs_close(xendev->xsh); xendev->xsh = NULL; } @@ -1290,23 +1012,18 @@ static void xen_device_realize(DeviceState *dev, Error **errp) trace_xen_device_realize(type, xendev->name); - xendev->xsh = xs_open(0); + xendev->xsh = qemu_xen_xs_open(); if (!xendev->xsh) { error_setg_errno(errp, errno, "failed xs_open"); goto unrealize; } - xendev->watch_list = watch_list_create(xendev->xsh); - - xendev->xgth = xengnttab_open(NULL, 0); + xendev->xgth = qemu_xen_gnttab_open(); if (!xendev->xgth) { error_setg_errno(errp, errno, "failed xengnttab_open"); goto unrealize; } - xendev->feature_grant_copy = - (xengnttab_grant_copy(xendev->xgth, 0, NULL) == 0); - xen_device_backend_create(xendev, errp); if (*errp) { goto unrealize; @@ -1317,13 +1034,6 @@ static void xen_device_realize(DeviceState *dev, Error **errp) goto unrealize; } - if (xendev_class->realize) { - xendev_class->realize(xendev, errp); - if (*errp) { - goto unrealize; - } - } - xen_device_backend_printf(xendev, "frontend", "%s", xendev->frontend_path); xen_device_backend_printf(xendev, "frontend-id", "%u", @@ -1342,6 +1052,13 @@ static void xen_device_realize(DeviceState *dev, Error **errp) xen_device_frontend_set_state(xendev, XenbusStateInitialising, true); } + if (xendev_class->realize) { + xendev_class->realize(xendev, errp); + if (*errp) { + goto unrealize; + } + } + xendev->exit.notify = xen_device_exit; qemu_add_exit_notifier(&xendev->exit); return; diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c index afba71f6eb..4ded3cec23 100644 --- a/hw/xen/xen-legacy-backend.c +++ b/hw/xen/xen-legacy-backend.c @@ -39,11 +39,10 @@ BusState *xen_sysbus; /* ------------------------------------------------------------- */ /* public */ -struct xs_handle *xenstore; +struct qemu_xs_handle *xenstore; const char *xen_protocol; /* private */ -static bool xen_feature_grant_copy; static int debug; int xenstore_write_be_str(struct XenLegacyDevice *xendev, const char *node, @@ -113,7 +112,7 @@ void xen_be_set_max_grant_refs(struct XenLegacyDevice *xendev, { assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV); - if (xengnttab_set_max_grants(xendev->gnttabdev, nr_refs)) { + if (qemu_xen_gnttab_set_max_grants(xendev->gnttabdev, nr_refs)) { xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n", strerror(errno)); } @@ -126,8 +125,8 @@ void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs, assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV); - ptr = xengnttab_map_domain_grant_refs(xendev->gnttabdev, nr_refs, - xen_domid, refs, prot); + ptr = qemu_xen_gnttab_map_refs(xendev->gnttabdev, nr_refs, xen_domid, refs, + prot); if (!ptr) { xen_pv_printf(xendev, 0, "xengnttab_map_domain_grant_refs failed: %s\n", @@ -138,123 +137,31 @@ void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs, } void xen_be_unmap_grant_refs(struct XenLegacyDevice *xendev, void *ptr, - unsigned int nr_refs) + uint32_t *refs, unsigned int nr_refs) { assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV); - if (xengnttab_unmap(xendev->gnttabdev, ptr, nr_refs)) { + if (qemu_xen_gnttab_unmap(xendev->gnttabdev, ptr, refs, nr_refs)) { xen_pv_printf(xendev, 0, "xengnttab_unmap failed: %s\n", strerror(errno)); } } -static int compat_copy_grant_refs(struct XenLegacyDevice *xendev, - bool to_domain, - XenGrantCopySegment segs[], - unsigned int nr_segs) -{ - uint32_t *refs = g_new(uint32_t, nr_segs); - int prot = to_domain ? PROT_WRITE : PROT_READ; - void *pages; - unsigned int i; - - for (i = 0; i < nr_segs; i++) { - XenGrantCopySegment *seg = &segs[i]; - - refs[i] = to_domain ? - seg->dest.foreign.ref : seg->source.foreign.ref; - } - - pages = xengnttab_map_domain_grant_refs(xendev->gnttabdev, nr_segs, - xen_domid, refs, prot); - if (!pages) { - xen_pv_printf(xendev, 0, - "xengnttab_map_domain_grant_refs failed: %s\n", - strerror(errno)); - g_free(refs); - return -1; - } - - for (i = 0; i < nr_segs; i++) { - XenGrantCopySegment *seg = &segs[i]; - void *page = pages + (i * XC_PAGE_SIZE); - - if (to_domain) { - memcpy(page + seg->dest.foreign.offset, seg->source.virt, - seg->len); - } else { - memcpy(seg->dest.virt, page + seg->source.foreign.offset, - seg->len); - } - } - - if (xengnttab_unmap(xendev->gnttabdev, pages, nr_segs)) { - xen_pv_printf(xendev, 0, "xengnttab_unmap failed: %s\n", - strerror(errno)); - } - - g_free(refs); - return 0; -} - int xen_be_copy_grant_refs(struct XenLegacyDevice *xendev, bool to_domain, XenGrantCopySegment segs[], unsigned int nr_segs) { - xengnttab_grant_copy_segment_t *xengnttab_segs; - unsigned int i; int rc; assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV); - if (!xen_feature_grant_copy) { - return compat_copy_grant_refs(xendev, to_domain, segs, nr_segs); - } - - xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs); - - for (i = 0; i < nr_segs; i++) { - XenGrantCopySegment *seg = &segs[i]; - xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; - - if (to_domain) { - xengnttab_seg->flags = GNTCOPY_dest_gref; - xengnttab_seg->dest.foreign.domid = xen_domid; - xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref; - xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset; - xengnttab_seg->source.virt = seg->source.virt; - } else { - xengnttab_seg->flags = GNTCOPY_source_gref; - xengnttab_seg->source.foreign.domid = xen_domid; - xengnttab_seg->source.foreign.ref = seg->source.foreign.ref; - xengnttab_seg->source.foreign.offset = - seg->source.foreign.offset; - xengnttab_seg->dest.virt = seg->dest.virt; - } - - xengnttab_seg->len = seg->len; - } - - rc = xengnttab_grant_copy(xendev->gnttabdev, nr_segs, xengnttab_segs); - + rc = qemu_xen_gnttab_grant_copy(xendev->gnttabdev, to_domain, xen_domid, + segs, nr_segs, NULL); if (rc) { - xen_pv_printf(xendev, 0, "xengnttab_copy failed: %s\n", - strerror(errno)); - } - - for (i = 0; i < nr_segs; i++) { - xengnttab_grant_copy_segment_t *xengnttab_seg = - &xengnttab_segs[i]; - - if (xengnttab_seg->status != GNTST_okay) { - xen_pv_printf(xendev, 0, "segment[%u] status: %d\n", i, - xengnttab_seg->status); - rc = -1; - } + xen_pv_printf(xendev, 0, "xengnttab_grant_copy failed: %s\n", + strerror(-rc)); } - - g_free(xengnttab_segs); return rc; } @@ -294,13 +201,13 @@ static struct XenLegacyDevice *xen_be_get_xendev(const char *type, int dom, xendev->debug = debug; xendev->local_port = -1; - xendev->evtchndev = xenevtchn_open(NULL, 0); + xendev->evtchndev = qemu_xen_evtchn_open(); if (xendev->evtchndev == NULL) { xen_pv_printf(NULL, 0, "can't open evtchn device\n"); qdev_unplug(DEVICE(xendev), NULL); return NULL; } - qemu_set_cloexec(xenevtchn_fd(xendev->evtchndev)); + qemu_set_cloexec(qemu_xen_evtchn_fd(xendev->evtchndev)); xen_pv_insert_xendev(xendev); @@ -367,6 +274,25 @@ static void xen_be_frontend_changed(struct XenLegacyDevice *xendev, } } +static void xenstore_update_fe(void *opaque, const char *watch) +{ + struct XenLegacyDevice *xendev = opaque; + const char *node; + unsigned int len; + + len = strlen(xendev->fe); + if (strncmp(xendev->fe, watch, len) != 0) { + return; + } + if (watch[len] != '/') { + return; + } + node = watch + len + 1; + + xen_be_frontend_changed(xendev, node); + xen_be_check_state(xendev); +} + /* ------------------------------------------------------------- */ /* Check for possible state transitions and perform them. */ @@ -380,7 +306,6 @@ static void xen_be_frontend_changed(struct XenLegacyDevice *xendev, */ static int xen_be_try_setup(struct XenLegacyDevice *xendev) { - char token[XEN_BUFSIZE]; int be_state; if (xenstore_read_be_int(xendev, "state", &be_state) == -1) { @@ -401,8 +326,9 @@ static int xen_be_try_setup(struct XenLegacyDevice *xendev) } /* setup frontend watch */ - snprintf(token, sizeof(token), "fe:%p", xendev); - if (!xs_watch(xenstore, xendev->fe, token)) { + xendev->watch = qemu_xen_xs_watch(xenstore, xendev->fe, xenstore_update_fe, + xendev); + if (!xendev->watch) { xen_pv_printf(xendev, 0, "watching frontend path (%s) failed\n", xendev->fe); return -1; @@ -466,7 +392,7 @@ static int xen_be_try_initialise(struct XenLegacyDevice *xendev) } if (xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV) { - xendev->gnttabdev = xengnttab_open(NULL, 0); + xendev->gnttabdev = qemu_xen_gnttab_open(); if (xendev->gnttabdev == NULL) { xen_pv_printf(NULL, 0, "can't open gnttab device\n"); return -1; @@ -524,7 +450,7 @@ static void xen_be_disconnect(struct XenLegacyDevice *xendev, xendev->ops->disconnect(xendev); } if (xendev->gnttabdev) { - xengnttab_close(xendev->gnttabdev); + qemu_xen_gnttab_close(xendev->gnttabdev); xendev->gnttabdev = NULL; } if (xendev->be_state != state) { @@ -591,46 +517,20 @@ void xen_be_check_state(struct XenLegacyDevice *xendev) /* ------------------------------------------------------------- */ -static int xenstore_scan(const char *type, int dom, struct XenDevOps *ops) -{ - struct XenLegacyDevice *xendev; - char path[XEN_BUFSIZE], token[XEN_BUFSIZE]; - char **dev = NULL; - unsigned int cdev, j; - - /* setup watch */ - snprintf(token, sizeof(token), "be:%p:%d:%p", type, dom, ops); - snprintf(path, sizeof(path), "backend/%s/%d", type, dom); - if (!xs_watch(xenstore, path, token)) { - xen_pv_printf(NULL, 0, "xen be: watching backend path (%s) failed\n", - path); - return -1; - } - - /* look for backends */ - dev = xs_directory(xenstore, 0, path, &cdev); - if (!dev) { - return 0; - } - for (j = 0; j < cdev; j++) { - xendev = xen_be_get_xendev(type, dom, atoi(dev[j]), ops); - if (xendev == NULL) { - continue; - } - xen_be_check_state(xendev); - } - free(dev); - return 0; -} +struct xenstore_be { + const char *type; + int dom; + struct XenDevOps *ops; +}; -void xenstore_update_be(char *watch, char *type, int dom, - struct XenDevOps *ops) +static void xenstore_update_be(void *opaque, const char *watch) { + struct xenstore_be *be = opaque; struct XenLegacyDevice *xendev; char path[XEN_BUFSIZE], *bepath; unsigned int len, dev; - len = snprintf(path, sizeof(path), "backend/%s/%d", type, dom); + len = snprintf(path, sizeof(path), "backend/%s/%d", be->type, be->dom); if (strncmp(path, watch, len) != 0) { return; } @@ -644,9 +544,9 @@ void xenstore_update_be(char *watch, char *type, int dom, return; } - xendev = xen_be_get_xendev(type, dom, dev, ops); + xendev = xen_be_get_xendev(be->type, be->dom, dev, be->ops); if (xendev != NULL) { - bepath = xs_read(xenstore, 0, xendev->be, &len); + bepath = qemu_xen_xs_read(xenstore, 0, xendev->be, &len); if (bepath == NULL) { xen_pv_del_xendev(xendev); } else { @@ -657,23 +557,41 @@ void xenstore_update_be(char *watch, char *type, int dom, } } -void xenstore_update_fe(char *watch, struct XenLegacyDevice *xendev) +static int xenstore_scan(const char *type, int dom, struct XenDevOps *ops) { - char *node; - unsigned int len; + struct XenLegacyDevice *xendev; + char path[XEN_BUFSIZE]; + struct xenstore_be *be = g_new0(struct xenstore_be, 1); + char **dev = NULL; + unsigned int cdev, j; - len = strlen(xendev->fe); - if (strncmp(xendev->fe, watch, len) != 0) { - return; - } - if (watch[len] != '/') { - return; + /* setup watch */ + be->type = type; + be->dom = dom; + be->ops = ops; + snprintf(path, sizeof(path), "backend/%s/%d", type, dom); + if (!qemu_xen_xs_watch(xenstore, path, xenstore_update_be, be)) { + xen_pv_printf(NULL, 0, "xen be: watching backend path (%s) failed\n", + path); + return -1; } - node = watch + len + 1; - xen_be_frontend_changed(xendev, node); - xen_be_check_state(xendev); + /* look for backends */ + dev = qemu_xen_xs_directory(xenstore, 0, path, &cdev); + if (!dev) { + return 0; + } + for (j = 0; j < cdev; j++) { + xendev = xen_be_get_xendev(type, dom, atoi(dev[j]), ops); + if (xendev == NULL) { + continue; + } + xen_be_check_state(xendev); + } + free(dev); + return 0; } + /* -------------------------------------------------------------------- */ static void xen_set_dynamic_sysbus(void) @@ -687,29 +605,17 @@ static void xen_set_dynamic_sysbus(void) void xen_be_init(void) { - xengnttab_handle *gnttabdev; - - xenstore = xs_daemon_open(); + xenstore = qemu_xen_xs_open(); if (!xenstore) { xen_pv_printf(NULL, 0, "can't connect to xenstored\n"); exit(1); } - qemu_set_fd_handler(xs_fileno(xenstore), xenstore_update, NULL, NULL); - - if (xen_xc == NULL || xen_fmem == NULL) { + if (xen_evtchn_ops == NULL || xen_gnttab_ops == NULL) { xen_pv_printf(NULL, 0, "Xen operations not set up\n"); exit(1); } - gnttabdev = xengnttab_open(NULL, 0); - if (gnttabdev != NULL) { - if (xengnttab_grant_copy(gnttabdev, 0, NULL) == 0) { - xen_feature_grant_copy = true; - } - xengnttab_close(gnttabdev); - } - xen_sysdev = qdev_new(TYPE_XENSYSDEV); sysbus_realize_and_unref(SYS_BUS_DEVICE(xen_sysdev), &error_fatal); xen_sysbus = qbus_new(TYPE_XENSYSBUS, xen_sysdev, "xen-sysbus"); @@ -751,14 +657,14 @@ int xen_be_bind_evtchn(struct XenLegacyDevice *xendev) if (xendev->local_port != -1) { return 0; } - xendev->local_port = xenevtchn_bind_interdomain + xendev->local_port = qemu_xen_evtchn_bind_interdomain (xendev->evtchndev, xendev->dom, xendev->remote_port); if (xendev->local_port == -1) { xen_pv_printf(xendev, 0, "xenevtchn_bind_interdomain failed\n"); return -1; } xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port); - qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev), + qemu_set_fd_handler(qemu_xen_evtchn_fd(xendev->evtchndev), xen_pv_evtchn_event, NULL, xendev); return 0; } diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c new file mode 100644 index 0000000000..4b78fbf4bd --- /dev/null +++ b/hw/xen/xen-operations.c @@ -0,0 +1,478 @@ +/* + * QEMU Xen backend support: Operations for true Xen + * + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Authors: David Woodhouse <dwmw2@infradead.org> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/uuid.h" +#include "qapi/error.h" + +#include "hw/xen/xen_native.h" +#include "hw/xen/xen_backend_ops.h" + +/* + * If we have new enough libxenctrl then we do not want/need these compat + * interfaces, despite what the user supplied cflags might say. They + * must be undefined before including xenctrl.h + */ +#undef XC_WANT_COMPAT_EVTCHN_API +#undef XC_WANT_COMPAT_GNTTAB_API +#undef XC_WANT_COMPAT_MAP_FOREIGN_API + +#include <xenctrl.h> + +/* + * We don't support Xen prior to 4.2.0. + */ + +/* Xen 4.2 through 4.6 */ +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701 + +typedef xc_evtchn xenevtchn_handle; +typedef evtchn_port_or_error_t xenevtchn_port_or_error_t; + +#define xenevtchn_open(l, f) xc_evtchn_open(l, f); +#define xenevtchn_close(h) xc_evtchn_close(h) +#define xenevtchn_fd(h) xc_evtchn_fd(h) +#define xenevtchn_pending(h) xc_evtchn_pending(h) +#define xenevtchn_notify(h, p) xc_evtchn_notify(h, p) +#define xenevtchn_bind_interdomain(h, d, p) xc_evtchn_bind_interdomain(h, d, p) +#define xenevtchn_unmask(h, p) xc_evtchn_unmask(h, p) +#define xenevtchn_unbind(h, p) xc_evtchn_unbind(h, p) + +typedef xc_gnttab xengnttab_handle; + +#define xengnttab_open(l, f) xc_gnttab_open(l, f) +#define xengnttab_close(h) xc_gnttab_close(h) +#define xengnttab_set_max_grants(h, n) xc_gnttab_set_max_grants(h, n) +#define xengnttab_map_grant_ref(h, d, r, p) xc_gnttab_map_grant_ref(h, d, r, p) +#define xengnttab_unmap(h, a, n) xc_gnttab_munmap(h, a, n) +#define xengnttab_map_grant_refs(h, c, d, r, p) \ + xc_gnttab_map_grant_refs(h, c, d, r, p) +#define xengnttab_map_domain_grant_refs(h, c, d, r, p) \ + xc_gnttab_map_domain_grant_refs(h, c, d, r, p) + +typedef xc_interface xenforeignmemory_handle; + +#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */ + +#include <xenevtchn.h> +#include <xengnttab.h> +#include <xenforeignmemory.h> + +#endif + +/* Xen before 4.8 */ + +static int libxengnttab_fallback_grant_copy(xengnttab_handle *xgt, + bool to_domain, uint32_t domid, + XenGrantCopySegment segs[], + unsigned int nr_segs, Error **errp) +{ + uint32_t *refs = g_new(uint32_t, nr_segs); + int prot = to_domain ? PROT_WRITE : PROT_READ; + void *map; + unsigned int i; + int rc = 0; + + for (i = 0; i < nr_segs; i++) { + XenGrantCopySegment *seg = &segs[i]; + + refs[i] = to_domain ? seg->dest.foreign.ref : + seg->source.foreign.ref; + } + map = xengnttab_map_domain_grant_refs(xgt, nr_segs, domid, refs, prot); + if (!map) { + if (errp) { + error_setg_errno(errp, errno, + "xengnttab_map_domain_grant_refs failed"); + } + rc = -errno; + goto done; + } + + for (i = 0; i < nr_segs; i++) { + XenGrantCopySegment *seg = &segs[i]; + void *page = map + (i * XEN_PAGE_SIZE); + + if (to_domain) { + memcpy(page + seg->dest.foreign.offset, seg->source.virt, + seg->len); + } else { + memcpy(seg->dest.virt, page + seg->source.foreign.offset, + seg->len); + } + } + + if (xengnttab_unmap(xgt, map, nr_segs)) { + if (errp) { + error_setg_errno(errp, errno, "xengnttab_unmap failed"); + } + rc = -errno; + } + +done: + g_free(refs); + return rc; +} + +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40800 + +static int libxengnttab_backend_grant_copy(xengnttab_handle *xgt, + bool to_domain, uint32_t domid, + XenGrantCopySegment *segs, + uint32_t nr_segs, Error **errp) +{ + xengnttab_grant_copy_segment_t *xengnttab_segs; + unsigned int i; + int rc; + + xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs); + + for (i = 0; i < nr_segs; i++) { + XenGrantCopySegment *seg = &segs[i]; + xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; + + if (to_domain) { + xengnttab_seg->flags = GNTCOPY_dest_gref; + xengnttab_seg->dest.foreign.domid = domid; + xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref; + xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset; + xengnttab_seg->source.virt = seg->source.virt; + } else { + xengnttab_seg->flags = GNTCOPY_source_gref; + xengnttab_seg->source.foreign.domid = domid; + xengnttab_seg->source.foreign.ref = seg->source.foreign.ref; + xengnttab_seg->source.foreign.offset = + seg->source.foreign.offset; + xengnttab_seg->dest.virt = seg->dest.virt; + } + + xengnttab_seg->len = seg->len; + } + + if (xengnttab_grant_copy(xgt, nr_segs, xengnttab_segs)) { + if (errp) { + error_setg_errno(errp, errno, "xengnttab_grant_copy failed"); + } + rc = -errno; + goto done; + } + + rc = 0; + for (i = 0; i < nr_segs; i++) { + xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; + + if (xengnttab_seg->status != GNTST_okay) { + if (errp) { + error_setg(errp, "xengnttab_grant_copy seg[%u] failed", i); + } + rc = -EIO; + break; + } + } + +done: + g_free(xengnttab_segs); + return rc; +} +#endif + +static xenevtchn_handle *libxenevtchn_backend_open(void) +{ + return xenevtchn_open(NULL, 0); +} + +struct evtchn_backend_ops libxenevtchn_backend_ops = { + .open = libxenevtchn_backend_open, + .close = xenevtchn_close, + .bind_interdomain = xenevtchn_bind_interdomain, + .unbind = xenevtchn_unbind, + .get_fd = xenevtchn_fd, + .notify = xenevtchn_notify, + .unmask = xenevtchn_unmask, + .pending = xenevtchn_pending, +}; + +static xengnttab_handle *libxengnttab_backend_open(void) +{ + return xengnttab_open(NULL, 0); +} + +static int libxengnttab_backend_unmap(xengnttab_handle *xgt, + void *start_address, uint32_t *refs, + uint32_t count) +{ + return xengnttab_unmap(xgt, start_address, count); +} + + +static struct gnttab_backend_ops libxengnttab_backend_ops = { + .features = XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE, + .open = libxengnttab_backend_open, + .close = xengnttab_close, + .grant_copy = libxengnttab_fallback_grant_copy, + .set_max_grants = xengnttab_set_max_grants, + .map_refs = xengnttab_map_domain_grant_refs, + .unmap = libxengnttab_backend_unmap, +}; + +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701 + +static void *libxenforeignmem_backend_map(uint32_t dom, void *addr, int prot, + size_t pages, xfn_pfn_t *pfns, + int *errs) +{ + if (errs) { + return xc_map_foreign_bulk(xen_xc, dom, prot, pfns, errs, pages); + } else { + return xc_map_foreign_pages(xen_xc, dom, prot, pfns, pages); + } +} + +static int libxenforeignmem_backend_unmap(void *addr, size_t pages) +{ + return munmap(addr, pages * XC_PAGE_SIZE); +} + +#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */ + +static void *libxenforeignmem_backend_map(uint32_t dom, void *addr, int prot, + size_t pages, xen_pfn_t *pfns, + int *errs) +{ + return xenforeignmemory_map2(xen_fmem, dom, addr, prot, 0, pages, pfns, + errs); +} + +static int libxenforeignmem_backend_unmap(void *addr, size_t pages) +{ + return xenforeignmemory_unmap(xen_fmem, addr, pages); +} + +#endif + +struct foreignmem_backend_ops libxenforeignmem_backend_ops = { + .map = libxenforeignmem_backend_map, + .unmap = libxenforeignmem_backend_unmap, +}; + +struct qemu_xs_handle { + struct xs_handle *xsh; + NotifierList notifiers; +}; + +static void watch_event(void *opaque) +{ + struct qemu_xs_handle *h = opaque; + + for (;;) { + char **v = xs_check_watch(h->xsh); + + if (!v) { + break; + } + + notifier_list_notify(&h->notifiers, v); + free(v); + } +} + +static struct qemu_xs_handle *libxenstore_open(void) +{ + struct xs_handle *xsh = xs_open(0); + struct qemu_xs_handle *h = g_new0(struct qemu_xs_handle, 1); + + if (!xsh) { + return NULL; + } + + h = g_new0(struct qemu_xs_handle, 1); + h->xsh = xsh; + + notifier_list_init(&h->notifiers); + qemu_set_fd_handler(xs_fileno(h->xsh), watch_event, NULL, h); + + return h; +} + +static void libxenstore_close(struct qemu_xs_handle *h) +{ + g_assert(notifier_list_empty(&h->notifiers)); + qemu_set_fd_handler(xs_fileno(h->xsh), NULL, NULL, NULL); + xs_close(h->xsh); + g_free(h); +} + +static char *libxenstore_get_domain_path(struct qemu_xs_handle *h, + unsigned int domid) +{ + return xs_get_domain_path(h->xsh, domid); +} + +static char **libxenstore_directory(struct qemu_xs_handle *h, + xs_transaction_t t, const char *path, + unsigned int *num) +{ + return xs_directory(h->xsh, t, path, num); +} + +static void *libxenstore_read(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, unsigned int *len) +{ + return xs_read(h->xsh, t, path, len); +} + +static bool libxenstore_write(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path, const void *data, + unsigned int len) +{ + return xs_write(h->xsh, t, path, data, len); +} + +static bool libxenstore_create(struct qemu_xs_handle *h, xs_transaction_t t, + unsigned int owner, unsigned int domid, + unsigned int perms, const char *path) +{ + struct xs_permissions perms_list[] = { + { + .id = owner, + .perms = XS_PERM_NONE, + }, + { + .id = domid, + .perms = perms, + }, + }; + + if (!xs_mkdir(h->xsh, t, path)) { + return false; + } + + return xs_set_permissions(h->xsh, t, path, perms_list, + ARRAY_SIZE(perms_list)); +} + +static bool libxenstore_destroy(struct qemu_xs_handle *h, xs_transaction_t t, + const char *path) +{ + return xs_rm(h->xsh, t, path); +} + +struct qemu_xs_watch { + char *path; + char *token; + xs_watch_fn fn; + void *opaque; + Notifier notifier; +}; + +static void watch_notify(Notifier *n, void *data) +{ + struct qemu_xs_watch *w = container_of(n, struct qemu_xs_watch, notifier); + const char **v = data; + + if (!strcmp(w->token, v[XS_WATCH_TOKEN])) { + w->fn(w->opaque, v[XS_WATCH_PATH]); + } +} + +static struct qemu_xs_watch *new_watch(const char *path, xs_watch_fn fn, + void *opaque) +{ + struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1); + QemuUUID uuid; + + qemu_uuid_generate(&uuid); + + w->token = qemu_uuid_unparse_strdup(&uuid); + w->path = g_strdup(path); + w->fn = fn; + w->opaque = opaque; + w->notifier.notify = watch_notify; + + return w; +} + +static void free_watch(struct qemu_xs_watch *w) +{ + g_free(w->token); + g_free(w->path); + + g_free(w); +} + +static struct qemu_xs_watch *libxenstore_watch(struct qemu_xs_handle *h, + const char *path, xs_watch_fn fn, + void *opaque) +{ + struct qemu_xs_watch *w = new_watch(path, fn, opaque); + + notifier_list_add(&h->notifiers, &w->notifier); + + if (!xs_watch(h->xsh, path, w->token)) { + notifier_remove(&w->notifier); + free_watch(w); + return NULL; + } + + return w; +} + +static void libxenstore_unwatch(struct qemu_xs_handle *h, + struct qemu_xs_watch *w) +{ + xs_unwatch(h->xsh, w->path, w->token); + notifier_remove(&w->notifier); + free_watch(w); +} + +static xs_transaction_t libxenstore_transaction_start(struct qemu_xs_handle *h) +{ + return xs_transaction_start(h->xsh); +} + +static bool libxenstore_transaction_end(struct qemu_xs_handle *h, + xs_transaction_t t, bool abort) +{ + return xs_transaction_end(h->xsh, t, abort); +} + +struct xenstore_backend_ops libxenstore_backend_ops = { + .open = libxenstore_open, + .close = libxenstore_close, + .get_domain_path = libxenstore_get_domain_path, + .directory = libxenstore_directory, + .read = libxenstore_read, + .write = libxenstore_write, + .create = libxenstore_create, + .destroy = libxenstore_destroy, + .watch = libxenstore_watch, + .unwatch = libxenstore_unwatch, + .transaction_start = libxenstore_transaction_start, + .transaction_end = libxenstore_transaction_end, +}; + +void setup_xen_backend_ops(void) +{ +#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40800 + xengnttab_handle *xgt = xengnttab_open(NULL, 0); + + if (xgt) { + if (xengnttab_grant_copy(xgt, 0, NULL) == 0) { + libxengnttab_backend_ops.grant_copy = libxengnttab_backend_grant_copy; + } + xengnttab_close(xgt); + } +#endif + xen_evtchn_ops = &libxenevtchn_backend_ops; + xen_gnttab_ops = &libxengnttab_backend_ops; + xen_foreignmem_ops = &libxenforeignmem_backend_ops; + xen_xenstore_ops = &libxenstore_backend_ops; +} diff --git a/hw/xen/xen_devconfig.c b/hw/xen/xen_devconfig.c index 46ee4a7f02..9b7304e544 100644 --- a/hw/xen/xen_devconfig.c +++ b/hw/xen/xen_devconfig.c @@ -11,11 +11,11 @@ static int xen_config_dev_dirs(const char *ftype, const char *btype, int vdev, { char *dom; - dom = xs_get_domain_path(xenstore, xen_domid); + dom = qemu_xen_xs_get_domain_path(xenstore, xen_domid); snprintf(fe, len, "%s/device/%s/%d", dom, ftype, vdev); free(dom); - dom = xs_get_domain_path(xenstore, 0); + dom = qemu_xen_xs_get_domain_path(xenstore, 0); snprintf(be, len, "%s/backend/%s/%d/%d", dom, btype, xen_domid, vdev); free(dom); diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index 85c93cffcf..2d33d178ad 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -60,9 +60,9 @@ #include "hw/pci/pci_bus.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" +#include "xen_pt.h" #include "hw/xen/xen.h" #include "hw/xen/xen-legacy-backend.h" -#include "xen_pt.h" #include "qemu/range.h" static bool has_igd_gfx_passthru; diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h index e184699740..b20744f7c7 100644 --- a/hw/xen/xen_pt.h +++ b/hw/xen/xen_pt.h @@ -1,7 +1,7 @@ #ifndef XEN_PT_H #define XEN_PT_H -#include "hw/xen/xen_common.h" +#include "hw/xen/xen_native.h" #include "xen-host-pci-device.h" #include "qom/object.h" diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index 8b9b554352..2b8680b112 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -15,8 +15,8 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/timer.h" -#include "hw/xen/xen-legacy-backend.h" #include "xen_pt.h" +#include "hw/xen/xen-legacy-backend.h" #define XEN_PT_MERGE_VALUE(value, data, val_mask) \ (((value) & (val_mask)) | ((data) & ~(val_mask))) diff --git a/hw/xen/xen_pt_graphics.c b/hw/xen/xen_pt_graphics.c index f303f67c9c..0aed3bb6fd 100644 --- a/hw/xen/xen_pt_graphics.c +++ b/hw/xen/xen_pt_graphics.c @@ -5,7 +5,6 @@ #include "qapi/error.h" #include "xen_pt.h" #include "xen-host-pci-device.h" -#include "hw/xen/xen-legacy-backend.h" static unsigned long igd_guest_opregion; static unsigned long igd_host_opregion; diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c index b71563f98a..09cca4eecb 100644 --- a/hw/xen/xen_pt_msi.c +++ b/hw/xen/xen_pt_msi.c @@ -11,9 +11,9 @@ #include "qemu/osdep.h" -#include "hw/xen/xen-legacy-backend.h" -#include "xen_pt.h" #include "hw/i386/apic-msidef.h" +#include "xen_pt.h" +#include "hw/xen/xen-legacy-backend.h" #define XEN_PT_AUTO_ASSIGN -1 diff --git a/hw/xen/xen_pvdev.c b/hw/xen/xen_pvdev.c index 1a5177b354..be1504b82c 100644 --- a/hw/xen/xen_pvdev.c +++ b/hw/xen/xen_pvdev.c @@ -54,31 +54,17 @@ void xen_config_cleanup(void) struct xs_dirs *d; QTAILQ_FOREACH(d, &xs_cleanup, list) { - xs_rm(xenstore, 0, d->xs_dir); + qemu_xen_xs_destroy(xenstore, 0, d->xs_dir); } } int xenstore_mkdir(char *path, int p) { - struct xs_permissions perms[2] = { - { - .id = 0, /* set owner: dom0 */ - }, { - .id = xen_domid, - .perms = p, - } - }; - - if (!xs_mkdir(xenstore, 0, path)) { + if (!qemu_xen_xs_create(xenstore, 0, 0, xen_domid, p, path)) { xen_pv_printf(NULL, 0, "xs_mkdir %s: failed\n", path); return -1; } xenstore_cleanup_dir(g_strdup(path)); - - if (!xs_set_permissions(xenstore, 0, path, perms, 2)) { - xen_pv_printf(NULL, 0, "xs_set_permissions %s: failed\n", path); - return -1; - } return 0; } @@ -87,7 +73,7 @@ int xenstore_write_str(const char *base, const char *node, const char *val) char abspath[XEN_BUFSIZE]; snprintf(abspath, sizeof(abspath), "%s/%s", base, node); - if (!xs_write(xenstore, 0, abspath, val, strlen(val))) { + if (!qemu_xen_xs_write(xenstore, 0, abspath, val, strlen(val))) { return -1; } return 0; @@ -100,7 +86,7 @@ char *xenstore_read_str(const char *base, const char *node) char *str, *ret = NULL; snprintf(abspath, sizeof(abspath), "%s/%s", base, node); - str = xs_read(xenstore, 0, abspath, &len); + str = qemu_xen_xs_read(xenstore, 0, abspath, &len); if (str != NULL) { /* move to qemu-allocated memory to make sure * callers can savely g_free() stuff. */ @@ -152,29 +138,6 @@ int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval) return rc; } -void xenstore_update(void *unused) -{ - char **vec = NULL; - intptr_t type, ops, ptr; - unsigned int dom, count; - - vec = xs_read_watch(xenstore, &count); - if (vec == NULL) { - goto cleanup; - } - - if (sscanf(vec[XS_WATCH_TOKEN], "be:%" PRIxPTR ":%d:%" PRIxPTR, - &type, &dom, &ops) == 3) { - xenstore_update_be(vec[XS_WATCH_PATH], (void *)type, dom, (void*)ops); - } - if (sscanf(vec[XS_WATCH_TOKEN], "fe:%" PRIxPTR, &ptr) == 1) { - xenstore_update_fe(vec[XS_WATCH_PATH], (void *)ptr); - } - -cleanup: - free(vec); -} - const char *xenbus_strstate(enum xenbus_state state) { static const char *const name[] = { @@ -238,14 +201,14 @@ void xen_pv_evtchn_event(void *opaque) struct XenLegacyDevice *xendev = opaque; evtchn_port_t port; - port = xenevtchn_pending(xendev->evtchndev); + port = qemu_xen_evtchn_pending(xendev->evtchndev); if (port != xendev->local_port) { xen_pv_printf(xendev, 0, "xenevtchn_pending returned %d (expected %d)\n", port, xendev->local_port); return; } - xenevtchn_unmask(xendev->evtchndev, port); + qemu_xen_evtchn_unmask(xendev->evtchndev, port); if (xendev->ops->event) { xendev->ops->event(xendev); @@ -257,15 +220,15 @@ void xen_pv_unbind_evtchn(struct XenLegacyDevice *xendev) if (xendev->local_port == -1) { return; } - qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev), NULL, NULL, NULL); - xenevtchn_unbind(xendev->evtchndev, xendev->local_port); + qemu_set_fd_handler(qemu_xen_evtchn_fd(xendev->evtchndev), NULL, NULL, NULL); + qemu_xen_evtchn_unbind(xendev->evtchndev, xendev->local_port); xen_pv_printf(xendev, 2, "unbind evtchn port %d\n", xendev->local_port); xendev->local_port = -1; } int xen_pv_send_notify(struct XenLegacyDevice *xendev) { - return xenevtchn_notify(xendev->evtchndev, xendev->local_port); + return qemu_xen_evtchn_notify(xendev->evtchndev, xendev->local_port); } /* ------------------------------------------------------------- */ @@ -299,17 +262,15 @@ void xen_pv_del_xendev(struct XenLegacyDevice *xendev) } if (xendev->fe) { - char token[XEN_BUFSIZE]; - snprintf(token, sizeof(token), "fe:%p", xendev); - xs_unwatch(xenstore, xendev->fe, token); + qemu_xen_xs_unwatch(xenstore, xendev->watch); g_free(xendev->fe); } if (xendev->evtchndev != NULL) { - xenevtchn_close(xendev->evtchndev); + qemu_xen_evtchn_close(xendev->evtchndev); } if (xendev->gnttabdev != NULL) { - xengnttab_close(xendev->gnttabdev); + qemu_xen_gnttab_close(xendev->gnttabdev); } QTAILQ_REMOVE(&xendevs, xendev, next); |