aboutsummaryrefslogtreecommitdiff
path: root/hw/i386/kvm/xen_xenstore.c
blob: 14193ef3f921e923b79f15d89abec40d3a421f0c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
/*
 * QEMU Xen emulation: Shared/overlay pages support
 *
 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Authors: David Woodhouse <dwmw2@infradead.org>
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 */

#include "qemu/osdep.h"

#include "qemu/host-utils.h"
#include "qemu/module.h"
#include "qemu/main-loop.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
#include "qom/object.h"
#include "migration/vmstate.h"

#include "hw/sysbus.h"
#include "hw/xen/xen.h"
#include "xen_overlay.h"
#include "xen_evtchn.h"
#include "xen_xenstore.h"

#include "sysemu/kvm.h"
#include "sysemu/kvm_xen.h"

#include "hw/xen/interface/io/xs_wire.h"
#include "hw/xen/interface/event_channel.h"

#define TYPE_XEN_XENSTORE "xen-xenstore"
OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)

#define XEN_PAGE_SHIFT 12
#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT)

#define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
#define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))

#define XENSTORE_HEADER_SIZE ((unsigned int)sizeof(struct xsd_sockmsg))

struct XenXenstoreState {
    /*< private >*/
    SysBusDevice busdev;
    /*< public >*/

    MemoryRegion xenstore_page;
    struct xenstore_domain_interface *xs;
    uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
    uint8_t rsp_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
    uint32_t req_offset;
    uint32_t rsp_offset;
    bool rsp_pending;
    bool fatal_error;

    evtchn_port_t guest_port;
    evtchn_port_t be_port;
    struct xenevtchn_handle *eh;
};

struct XenXenstoreState *xen_xenstore_singleton;

static void xen_xenstore_event(void *opaque);

static void xen_xenstore_realize(DeviceState *dev, Error **errp)
{
    XenXenstoreState *s = XEN_XENSTORE(dev);

    if (xen_mode != XEN_EMULATE) {
        error_setg(errp, "Xen xenstore support is for Xen emulation");
        return;
    }
    memory_region_init_ram(&s->xenstore_page, OBJECT(dev), "xen:xenstore_page",
                           XEN_PAGE_SIZE, &error_abort);
    memory_region_set_enabled(&s->xenstore_page, true);
    s->xs = memory_region_get_ram_ptr(&s->xenstore_page);
    memset(s->xs, 0, XEN_PAGE_SIZE);

    /* We can't map it this early as KVM isn't ready */
    xen_xenstore_singleton = s;

    s->eh = xen_be_evtchn_open();
    if (!s->eh) {
        error_setg(errp, "Xenstore evtchn port init failed");
        return;
    }
    aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true,
                       xen_xenstore_event, NULL, NULL, NULL, s);
}

static bool xen_xenstore_is_needed(void *opaque)
{
    return xen_mode == XEN_EMULATE;
}

static int xen_xenstore_pre_save(void *opaque)
{
    XenXenstoreState *s = opaque;

    if (s->eh) {
        s->guest_port = xen_be_evtchn_get_guest_port(s->eh);
    }
    return 0;
}

static int xen_xenstore_post_load(void *opaque, int ver)
{
    XenXenstoreState *s = opaque;

    /*
     * As qemu/dom0, rebind to the guest's port. The Windows drivers may
     * unbind the XenStore evtchn and rebind to it, having obtained the
     * "remote" port through EVTCHNOP_status. In the case that migration
     * occurs while it's unbound, the "remote" port needs to be the same
     * as before so that the guest can find it, but should remain unbound.
     */
    if (s->guest_port) {
        int be_port = xen_be_evtchn_bind_interdomain(s->eh, xen_domid,
                                                     s->guest_port);
        if (be_port < 0) {
            return be_port;
        }
        s->be_port = be_port;
    }
    return 0;
}

static const VMStateDescription xen_xenstore_vmstate = {
    .name = "xen_xenstore",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = xen_xenstore_is_needed,
    .pre_save = xen_xenstore_pre_save,
    .post_load = xen_xenstore_post_load,
    .fields = (VMStateField[]) {
        VMSTATE_UINT8_ARRAY(req_data, XenXenstoreState,
                            sizeof_field(XenXenstoreState, req_data)),
        VMSTATE_UINT8_ARRAY(rsp_data, XenXenstoreState,
                            sizeof_field(XenXenstoreState, rsp_data)),
        VMSTATE_UINT32(req_offset, XenXenstoreState),
        VMSTATE_UINT32(rsp_offset, XenXenstoreState),
        VMSTATE_BOOL(rsp_pending, XenXenstoreState),
        VMSTATE_UINT32(guest_port, XenXenstoreState),
        VMSTATE_BOOL(fatal_error, XenXenstoreState),
        VMSTATE_END_OF_LIST()
    }
};

static void xen_xenstore_class_init(ObjectClass *klass, void *data)
{
    DeviceClass *dc = DEVICE_CLASS(klass);

    dc->realize = xen_xenstore_realize;
    dc->vmsd = &xen_xenstore_vmstate;
}

static const TypeInfo xen_xenstore_info = {
    .name          = TYPE_XEN_XENSTORE,
    .parent        = TYPE_SYS_BUS_DEVICE,
    .instance_size = sizeof(XenXenstoreState),
    .class_init    = xen_xenstore_class_init,
};

void xen_xenstore_create(void)
{
    DeviceState *dev = sysbus_create_simple(TYPE_XEN_XENSTORE, -1, NULL);

    xen_xenstore_singleton = XEN_XENSTORE(dev);

    /*
     * Defer the init (xen_xenstore_reset()) until KVM is set up and the
     * overlay page can be mapped.
     */
}

static void xen_xenstore_register_types(void)
{
    type_register_static(&xen_xenstore_info);
}

type_init(xen_xenstore_register_types)

uint16_t xen_xenstore_get_port(void)
{
    XenXenstoreState *s = xen_xenstore_singleton;
    if (!s) {
        return 0;
    }
    return s->guest_port;
}

static bool req_pending(XenXenstoreState *s)
{
    struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;

    return s->req_offset == XENSTORE_HEADER_SIZE + req->len;
}

static void reset_req(XenXenstoreState *s)
{
    memset(s->req_data, 0, sizeof(s->req_data));
    s->req_offset = 0;
}

static void reset_rsp(XenXenstoreState *s)
{
    s->rsp_pending = false;

    memset(s->rsp_data, 0, sizeof(s->rsp_data));
    s->rsp_offset = 0;
}

static void process_req(XenXenstoreState *s)
{
    struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
    struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
    const char enosys[] = "ENOSYS";

    assert(req_pending(s));
    assert(!s->rsp_pending);

    rsp->type = XS_ERROR;
    rsp->req_id = req->req_id;
    rsp->tx_id = req->tx_id;
    rsp->len = sizeof(enosys);
    memcpy((void *)&rsp[1], enosys, sizeof(enosys));

    s->rsp_pending = true;
    reset_req(s);
}

static unsigned int copy_from_ring(XenXenstoreState *s, uint8_t *ptr,
                                   unsigned int len)
{
    if (!len) {
        return 0;
    }

    XENSTORE_RING_IDX prod = qatomic_read(&s->xs->req_prod);
    XENSTORE_RING_IDX cons = qatomic_read(&s->xs->req_cons);
    unsigned int copied = 0;

    /* Ensure the ring contents don't cross the req_prod access. */
    smp_rmb();

    while (len) {
        unsigned int avail = prod - cons;
        unsigned int offset = MASK_XENSTORE_IDX(cons);
        unsigned int copylen = avail;

        if (avail > XENSTORE_RING_SIZE) {
            error_report("XenStore ring handling error");
            s->fatal_error = true;
            break;
        } else if (avail == 0) {
            break;
        }

        if (copylen > len) {
            copylen = len;
        }
        if (copylen > XENSTORE_RING_SIZE - offset) {
            copylen = XENSTORE_RING_SIZE - offset;
        }

        memcpy(ptr, &s->xs->req[offset], copylen);
        copied += copylen;

        ptr += copylen;
        len -= copylen;

        cons += copylen;
    }

    /*
     * Not sure this ever mattered except on Alpha, but this barrier
     * is to ensure that the update to req_cons is globally visible
     * only after we have consumed all the data from the ring, and we
     * don't end up seeing data written to the ring *after* the other
     * end sees the update and writes more to the ring. Xen's own
     * xenstored has the same barrier here (although with no comment
     * at all, obviously, because it's Xen code).
     */
    smp_mb();

    qatomic_set(&s->xs->req_cons, cons);

    return copied;
}

static unsigned int copy_to_ring(XenXenstoreState *s, uint8_t *ptr,
                                 unsigned int len)
{
    if (!len) {
        return 0;
    }

    XENSTORE_RING_IDX cons = qatomic_read(&s->xs->rsp_cons);
    XENSTORE_RING_IDX prod = qatomic_read(&s->xs->rsp_prod);
    unsigned int copied = 0;

    /*
     * This matches the barrier in copy_to_ring() (or the guest's
     * equivalent) betweem writing the data to the ring and updating
     * rsp_prod. It protects against the pathological case (which
     * again I think never happened except on Alpha) where our
     * subsequent writes to the ring could *cross* the read of
     * rsp_cons and the guest could see the new data when it was
     * intending to read the old.
     */
    smp_mb();

    while (len) {
        unsigned int avail = cons + XENSTORE_RING_SIZE - prod;
        unsigned int offset = MASK_XENSTORE_IDX(prod);
        unsigned int copylen = len;

        if (avail > XENSTORE_RING_SIZE) {
            error_report("XenStore ring handling error");
            s->fatal_error = true;
            break;
        } else if (avail == 0) {
            break;
        }

        if (copylen > avail) {
            copylen = avail;
        }
        if (copylen > XENSTORE_RING_SIZE - offset) {
            copylen = XENSTORE_RING_SIZE - offset;
        }


        memcpy(&s->xs->rsp[offset], ptr, copylen);
        copied += copylen;

        ptr += copylen;
        len -= copylen;

        prod += copylen;
    }

    /* Ensure the ring contents are seen before rsp_prod update. */
    smp_wmb();

    qatomic_set(&s->xs->rsp_prod, prod);

    return copied;
}

static unsigned int get_req(XenXenstoreState *s)
{
    unsigned int copied = 0;

    if (s->fatal_error) {
        return 0;
    }

    assert(!req_pending(s));

    if (s->req_offset < XENSTORE_HEADER_SIZE) {
        void *ptr = s->req_data + s->req_offset;
        unsigned int len = XENSTORE_HEADER_SIZE;
        unsigned int copylen = copy_from_ring(s, ptr, len);

        copied += copylen;
        s->req_offset += copylen;
    }

    if (s->req_offset >= XENSTORE_HEADER_SIZE) {
        struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;

        if (req->len > (uint32_t)XENSTORE_PAYLOAD_MAX) {
            error_report("Illegal XenStore request");
            s->fatal_error = true;
            return 0;
        }

        void *ptr = s->req_data + s->req_offset;
        unsigned int len = XENSTORE_HEADER_SIZE + req->len - s->req_offset;
        unsigned int copylen = copy_from_ring(s, ptr, len);

        copied += copylen;
        s->req_offset += copylen;
    }

    return copied;
}

static unsigned int put_rsp(XenXenstoreState *s)
{
    if (s->fatal_error) {
        return 0;
    }

    assert(s->rsp_pending);

    struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
    assert(s->rsp_offset < XENSTORE_HEADER_SIZE + rsp->len);

    void *ptr = s->rsp_data + s->rsp_offset;
    unsigned int len = XENSTORE_HEADER_SIZE + rsp->len - s->rsp_offset;
    unsigned int copylen = copy_to_ring(s, ptr, len);

    s->rsp_offset += copylen;

    /* Have we produced a complete response? */
    if (s->rsp_offset == XENSTORE_HEADER_SIZE + rsp->len) {
        reset_rsp(s);
    }

    return copylen;
}

static void xen_xenstore_event(void *opaque)
{
    XenXenstoreState *s = opaque;
    evtchn_port_t port = xen_be_evtchn_pending(s->eh);
    unsigned int copied_to, copied_from;
    bool processed, notify = false;

    if (port != s->be_port) {
        return;
    }

    /* We know this is a no-op. */
    xen_be_evtchn_unmask(s->eh, port);

    do {
        copied_to = copied_from = 0;
        processed = false;

        if (s->rsp_pending) {
            copied_to = put_rsp(s);
        }

        if (!req_pending(s)) {
            copied_from = get_req(s);
        }

        if (req_pending(s) && !s->rsp_pending) {
            process_req(s);
            processed = true;
        }

        notify |= copied_to || copied_from;
    } while (copied_to || copied_from || processed);

    if (notify) {
        xen_be_evtchn_notify(s->eh, s->be_port);
    }
}

static void alloc_guest_port(XenXenstoreState *s)
{
    struct evtchn_alloc_unbound alloc = {
        .dom = DOMID_SELF,
        .remote_dom = DOMID_QEMU,
    };

    if (!xen_evtchn_alloc_unbound_op(&alloc)) {
        s->guest_port = alloc.port;
    }
}

int xen_xenstore_reset(void)
{
    XenXenstoreState *s = xen_xenstore_singleton;
    int err;

    if (!s) {
        return -ENOTSUP;
    }

    s->req_offset = s->rsp_offset = 0;
    s->rsp_pending = false;

    if (!memory_region_is_mapped(&s->xenstore_page)) {
        uint64_t gpa = XEN_SPECIAL_PFN(XENSTORE) << TARGET_PAGE_BITS;
        xen_overlay_do_map_page(&s->xenstore_page, gpa);
    }

    alloc_guest_port(s);

    /*
     * As qemu/dom0, bind to the guest's port. For incoming migration, this
     * will be unbound as the guest's evtchn table is overwritten. We then
     * rebind to the correct guest port in xen_xenstore_post_load().
     */
    err = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, s->guest_port);
    if (err < 0) {
        return err;
    }
    s->be_port = err;

    return 0;
}