From ce317461573bac12b10d67699b4ddf1f97cf066c Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 25 Sep 2015 13:21:28 +0800
Subject: virtio: introduce virtqueue_unmap_sg()

Factor out sg unmapping logic. This will be reused by the patch that
can discard descriptor.

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Andrew James <andrew.james@hpe.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio/virtio.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 7504f8b33a..6f2b96cc7b 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -244,14 +244,12 @@ int virtio_queue_empty(VirtQueue *vq)
     return vring_avail_idx(vq) == vq->last_avail_idx;
 }
 
-void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
-                    unsigned int len, unsigned int idx)
+static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
+                               unsigned int len)
 {
     unsigned int offset;
     int i;
 
-    trace_virtqueue_fill(vq, elem, len, idx);
-
     offset = 0;
     for (i = 0; i < elem->in_num; i++) {
         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
@@ -267,6 +265,14 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
         cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
                                   elem->out_sg[i].iov_len,
                                   0, elem->out_sg[i].iov_len);
+}
+
+void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+                    unsigned int len, unsigned int idx)
+{
+    trace_virtqueue_fill(vq, elem, len, idx);
+
+    virtqueue_unmap_sg(vq, elem, len);
 
     idx = (idx + vring_used_idx(vq)) % vq->vring.num;
 
-- 
cgit v1.2.3


From 29b9f5efd78ae0f9cc02dd169b6e80d2c404bade Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 25 Sep 2015 13:21:29 +0800
Subject: virtio: introduce virtqueue_discard()

This patch introduces virtqueue_discard() to discard a descriptor and
unmap the sgs. This will be used by the patch that will discard
descriptor when packet is truncated.

Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio/virtio.c         | 7 +++++++
 include/hw/virtio/virtio.h | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 6f2b96cc7b..d0bc72e0e4 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -267,6 +267,13 @@ static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
                                   0, elem->out_sg[i].iov_len);
 }
 
+void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
+                       unsigned int len)
+{
+    vq->last_avail_idx--;
+    virtqueue_unmap_sg(vq, elem, len);
+}
+
 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
                     unsigned int len, unsigned int idx)
 {
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 6201ee8ce0..9d09115fab 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -146,6 +146,8 @@ void virtio_del_queue(VirtIODevice *vdev, int n);
 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
                     unsigned int len);
 void virtqueue_flush(VirtQueue *vq, unsigned int count);
+void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
+                       unsigned int len);
 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
                     unsigned int len, unsigned int idx);
 
-- 
cgit v1.2.3


From 0cf33fb6b49a19de32859e2cdc6021334f448fb3 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 25 Sep 2015 13:21:30 +0800
Subject: virtio-net: correctly drop truncated packets

When packet is truncated during receiving, we drop the packets but
neither discard the descriptor nor add and signal used
descriptor. This will lead several issues:

- sg mappings are leaked
- rx will be stalled if a lots of packets were truncated

In order to be consistent with vhost, fix by discarding the descriptor
in this case.

Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/net/virtio-net.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index d388c5571d..a877614e3e 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1094,13 +1094,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
          * must have consumed the complete packet.
          * Otherwise, drop it. */
         if (!n->mergeable_rx_bufs && offset < size) {
-#if 0
-            error_report("virtio-net truncated non-mergeable packet: "
-                         "i %zd mergeable %d offset %zd, size %zd, "
-                         "guest hdr len %zd, host hdr len %zd",
-                         i, n->mergeable_rx_bufs,
-                         offset, size, n->guest_hdr_len, n->host_hdr_len);
-#endif
+            virtqueue_discard(q->rx_vq, &elem, total);
             return size;
         }
 
-- 
cgit v1.2.3


From c2dfc5ba3fb3a1b7278c99bfd3bf350202169434 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 10 Sep 2015 16:36:51 +0300
Subject: oslib: rework anonimous RAM allocation

At the moment we first allocate RAM, sometimes more than necessary for
alignment reasons.  We then free the extra RAM.

Rework this to avoid the temporary allocation: reserve the
range by mapping it with PROT_NONE, then use just the
necessary range with MAP_FIXED.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
---
 util/oslib-posix.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 3ae4987b6b..27972d4add 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -129,9 +129,9 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
 {
     size_t align = QEMU_VMALLOC_ALIGN;
     size_t total = size + align - getpagesize();
-    void *ptr = mmap(0, total, PROT_READ | PROT_WRITE,
-                     MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+    void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
     size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
+    void *ptr1;
 
     if (ptr == MAP_FAILED) {
         return NULL;
@@ -140,6 +140,14 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
     if (alignment) {
         *alignment = align;
     }
+
+    ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
+                MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+    if (ptr1 == MAP_FAILED) {
+        munmap(ptr, total);
+        return NULL;
+    }
+
     ptr += offset;
     total -= offset;
 
-- 
cgit v1.2.3


From 9fac18f03a9040b67ec38e14d3e1ed34db9c7e06 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 10 Sep 2015 16:41:17 +0300
Subject: oslib: allocate PROT_NONE pages on top of RAM

This inserts a read and write protected page between RAM and QEMU
memory. This makes it harder to exploit QEMU bugs resulting from buffer
overflows in devices using variants of cpu_physical_memory_map,
dma_memory_map etc.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
---
 util/oslib-posix.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 27972d4add..a0fcdc2ede 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -128,7 +128,7 @@ void *qemu_memalign(size_t alignment, size_t size)
 void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
 {
     size_t align = QEMU_VMALLOC_ALIGN;
-    size_t total = size + align - getpagesize();
+    size_t total = size + align;
     void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
     size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
     void *ptr1;
@@ -154,8 +154,8 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
     if (offset > 0) {
         munmap(ptr - offset, offset);
     }
-    if (total > size) {
-        munmap(ptr + size, total - size);
+    if (total > size + getpagesize()) {
+        munmap(ptr + size + getpagesize(), total - size - getpagesize());
     }
 
     trace_qemu_anon_ram_alloc(size, ptr);
@@ -172,7 +172,7 @@ void qemu_anon_ram_free(void *ptr, size_t size)
 {
     trace_qemu_anon_ram_free(ptr, size);
     if (ptr) {
-        munmap(ptr, size);
+        munmap(ptr, size + getpagesize());
     }
 }
 
-- 
cgit v1.2.3


From 8561c9244ddf1122dfe7ccac9b23f506062f1499 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 10 Sep 2015 16:41:17 +0300
Subject: exec: allocate PROT_NONE pages on top of RAM

This inserts a read and write protected page between RAM and QEMU
memory, for file-backend RAM.
This makes it harder to exploit QEMU bugs resulting from buffer
overflows in devices using variants of cpu_physical_memory_map,
dma_memory_map etc.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
---
 exec.c | 42 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/exec.c b/exec.c
index 47ada31040..7d90a52252 100644
--- a/exec.c
+++ b/exec.c
@@ -84,6 +84,9 @@ static MemoryRegion io_mem_unassigned;
  */
 #define RAM_RESIZEABLE (1 << 2)
 
+/* An extra page is mapped on top of this RAM.
+ */
+#define RAM_EXTRA (1 << 3)
 #endif
 
 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
@@ -1185,10 +1188,13 @@ static void *file_ram_alloc(RAMBlock *block,
     char *filename;
     char *sanitized_name;
     char *c;
+    void *ptr;
     void *area = NULL;
     int fd;
     uint64_t hpagesize;
+    uint64_t total;
     Error *local_err = NULL;
+    size_t offset;
 
     hpagesize = gethugepagesize(path, &local_err);
     if (local_err) {
@@ -1232,6 +1238,7 @@ static void *file_ram_alloc(RAMBlock *block,
     g_free(filename);
 
     memory = ROUND_UP(memory, hpagesize);
+    total = memory + hpagesize;
 
     /*
      * ftruncate is not supported by hugetlbfs in older
@@ -1243,16 +1250,40 @@ static void *file_ram_alloc(RAMBlock *block,
         perror("ftruncate");
     }
 
-    area = mmap(0, memory, PROT_READ | PROT_WRITE,
-                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
+    ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
+                -1, 0);
+    if (ptr == MAP_FAILED) {
+        error_setg_errno(errp, errno,
+                         "unable to allocate memory range for hugepages");
+        close(fd);
+        goto error;
+    }
+
+    offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
+
+    area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
+                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
+                MAP_FIXED,
                 fd, 0);
     if (area == MAP_FAILED) {
         error_setg_errno(errp, errno,
                          "unable to map backing store for hugepages");
+        munmap(ptr, total);
         close(fd);
         goto error;
     }
 
+    if (offset > 0) {
+        munmap(ptr, offset);
+    }
+    ptr += offset;
+    total -= offset;
+
+    if (total > memory + getpagesize()) {
+        munmap(ptr + memory + getpagesize(),
+               total - memory - getpagesize());
+    }
+
     if (mem_prealloc) {
         os_mem_prealloc(fd, area, memory);
     }
@@ -1570,6 +1601,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
     new_block->used_length = size;
     new_block->max_length = size;
     new_block->flags = share ? RAM_SHARED : 0;
+    new_block->flags |= RAM_EXTRA;
     new_block->host = file_ram_alloc(new_block, size,
                                      mem_path, errp);
     if (!new_block->host) {
@@ -1671,7 +1703,11 @@ static void reclaim_ramblock(RAMBlock *block)
         xen_invalidate_map_cache_entry(block->host);
 #ifndef _WIN32
     } else if (block->fd >= 0) {
-        munmap(block->host, block->max_length);
+        if (block->flags & RAM_EXTRA) {
+            munmap(block->host, block->max_length + getpagesize());
+        } else {
+            munmap(block->host, block->max_length);
+        }
         close(block->fd);
 #endif
     } else {
-- 
cgit v1.2.3


From 798595075bf51c7e3125d260a19d860b9aa63e69 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Mon, 28 Sep 2015 15:07:21 -0300
Subject: pc: Add a comment explaining why pc_compat_2_4() doesn't exist

pc_compat_2_4() doesn't exist, and we shouldn't create one. Add a
comment explaining why the function doesn't exist and why pc_compat_*()
functions are deprecated.

Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/i386/pc_piix.c | 7 +++++++
 hw/i386/pc_q35.c  | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 3ffb05f93e..52fdd558e8 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -301,6 +301,13 @@ static void pc_init1(MachineState *machine,
     }
 }
 
+/* Looking for a pc_compat_2_4() function? It doesn't exist.
+ * pc_compat_*() functions that run on machine-init time and
+ * change global QEMU state are deprecated. Please don't create
+ * one, and implement any pc-*-2.4 (and newer) compat code in
+ * HW_COMPAT_*, PC_COMPAT_*, or * pc_*_machine_options().
+ */
+
 static void pc_compat_2_3(MachineState *machine)
 {
     PCMachineState *pcms = PC_MACHINE(machine);
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 1b7d3b644e..837e430666 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -284,6 +284,13 @@ static void pc_q35_init(MachineState *machine)
     }
 }
 
+/* Looking for a pc_compat_2_4() function? It doesn't exist.
+ * pc_compat_*() functions that run on machine-init time and
+ * change global QEMU state are deprecated. Please don't create
+ * one, and implement any pc-*-2.4 (and newer) compat code in
+ * HW_COMPAT_*, PC_COMPAT_*, or * pc_*_machine_options().
+ */
+
 static void pc_compat_2_3(MachineState *machine)
 {
     PCMachineState *pcms = PC_MACHINE(machine);
-- 
cgit v1.2.3


From 0d583647a7fc73f621eeb64ed703556c4bbebfcc Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Tue, 19 May 2015 13:29:51 -0700
Subject: virtio: Notice when the system doesn't support MSIx at all

And do not issue an error_report in that case.

Signed-off-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio/virtio-pci.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index eda8205d58..6703806f83 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1491,12 +1491,17 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
         pci_set_long(cfg_mask->pci_cfg_data, ~0x0);
     }
 
-    if (proxy->nvectors &&
-        msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors,
-                                proxy->msix_bar)) {
-        error_report("unable to init msix vectors to %" PRIu32,
-                     proxy->nvectors);
-        proxy->nvectors = 0;
+    if (proxy->nvectors) {
+        int err = msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors,
+                                          proxy->msix_bar);
+        if (err) {
+            /* Notice when a system that supports MSIx can't initialize it.  */
+            if (err != -ENOTSUP) {
+                error_report("unable to init msix vectors to %" PRIu32,
+                             proxy->nvectors);
+            }
+            proxy->nvectors = 0;
+        }
     }
 
     proxy->pci_dev.config_write = virtio_write_config;
-- 
cgit v1.2.3


From ca06d9cc6691e23b6d02e07b44ea549aeac60151 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 29 Sep 2015 14:12:03 +0200
Subject: vhost-user-test: do not reinvent glib-compat.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

glib-compat.h has the gunk to support both old-style and new-style
gthread functions.  Use it instead of reinventing it.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Tested-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
 tests/vhost-user-test.c | 113 +++++++-----------------------------------------
 1 file changed, 16 insertions(+), 97 deletions(-)

diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index e301db79b9..0e04f06839 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -8,7 +8,6 @@
  *
  */
 
-#define QEMU_GLIB_COMPAT_H
 #include <glib.h>
 
 #include "libqtest.h"
@@ -30,12 +29,6 @@
 #define HAVE_MONOTONIC_TIME
 #endif
 
-#if GLIB_CHECK_VERSION(2, 32, 0)
-#define HAVE_MUTEX_INIT
-#define HAVE_COND_INIT
-#define HAVE_THREAD_NEW
-#endif
-
 #define QEMU_CMD_ACCEL  " -machine accel=tcg"
 #define QEMU_CMD_MEM    " -m 512 -object memory-backend-file,id=mem,size=512M,"\
                         "mem-path=%s,share=on -numa node,memdev=mem"
@@ -113,93 +106,21 @@ static VhostUserMsg m __attribute__ ((unused));
 
 int fds_num = 0, fds[VHOST_MEMORY_MAX_NREGIONS];
 static VhostUserMemory memory;
-static GMutex *data_mutex;
-static GCond *data_cond;
-
-static gint64 _get_time(void)
-{
-#ifdef HAVE_MONOTONIC_TIME
-    return g_get_monotonic_time();
-#else
-    GTimeVal time;
-    g_get_current_time(&time);
-
-    return time.tv_sec * G_TIME_SPAN_SECOND + time.tv_usec;
-#endif
-}
-
-static GMutex *_mutex_new(void)
-{
-    GMutex *mutex;
-
-#ifdef HAVE_MUTEX_INIT
-    mutex = g_new(GMutex, 1);
-    g_mutex_init(mutex);
-#else
-    mutex = g_mutex_new();
-#endif
-
-    return mutex;
-}
+static CompatGMutex data_mutex;
+static CompatGCond data_cond;
 
-static void _mutex_free(GMutex *mutex)
-{
-#ifdef HAVE_MUTEX_INIT
-    g_mutex_clear(mutex);
-    g_free(mutex);
-#else
-    g_mutex_free(mutex);
-#endif
-}
-
-static GCond *_cond_new(void)
-{
-    GCond *cond;
-
-#ifdef HAVE_COND_INIT
-    cond = g_new(GCond, 1);
-    g_cond_init(cond);
-#else
-    cond = g_cond_new();
-#endif
-
-    return cond;
-}
-
-static gboolean _cond_wait_until(GCond *cond, GMutex *mutex, gint64 end_time)
+#if !GLIB_CHECK_VERSION(2, 32, 0)
+static gboolean g_cond_wait_until(CompatGCond cond, CompatGMutex mutex,
+                                  gint64 end_time)
 {
     gboolean ret = FALSE;
-#ifdef HAVE_COND_INIT
-    ret = g_cond_wait_until(cond, mutex, end_time);
-#else
+    end_time -= g_get_monotonic_time();
     GTimeVal time = { end_time / G_TIME_SPAN_SECOND,
                       end_time % G_TIME_SPAN_SECOND };
     ret = g_cond_timed_wait(cond, mutex, &time);
-#endif
     return ret;
 }
-
-static void _cond_free(GCond *cond)
-{
-#ifdef HAVE_COND_INIT
-    g_cond_clear(cond);
-    g_free(cond);
-#else
-    g_cond_free(cond);
 #endif
-}
-
-static GThread *_thread_new(const gchar *name, GThreadFunc func, gpointer data)
-{
-    GThread *thread = NULL;
-    GError *error = NULL;
-#ifdef HAVE_THREAD_NEW
-    thread = g_thread_try_new(name, func, data, &error);
-#else
-    thread = g_thread_create(func, data, TRUE, &error);
-#endif
-    return thread;
-}
 
 static void read_guest_mem(void)
 {
@@ -208,11 +129,11 @@ static void read_guest_mem(void)
     int i, j;
     size_t size;
 
-    g_mutex_lock(data_mutex);
+    g_mutex_lock(&data_mutex);
 
-    end_time = _get_time() + 5 * G_TIME_SPAN_SECOND;
+    end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND;
     while (!fds_num) {
-        if (!_cond_wait_until(data_cond, data_mutex, end_time)) {
+        if (!g_cond_wait_until(&data_cond, &data_mutex, end_time)) {
             /* timeout has passed */
             g_assert(fds_num);
             break;
@@ -252,7 +173,7 @@ static void read_guest_mem(void)
     }
 
     g_assert_cmpint(1, ==, 1);
-    g_mutex_unlock(data_mutex);
+    g_mutex_unlock(&data_mutex);
 }
 
 static void *thread_function(void *data)
@@ -280,7 +201,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
         return;
     }
 
-    g_mutex_lock(data_mutex);
+    g_mutex_lock(&data_mutex);
     memcpy(p, buf, VHOST_USER_HDR_SIZE);
 
     if (msg.size) {
@@ -313,7 +234,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
         fds_num = qemu_chr_fe_get_msgfds(chr, fds, sizeof(fds) / sizeof(int));
 
         /* signal the test that it can continue */
-        g_cond_signal(data_cond);
+        g_cond_signal(&data_cond);
         break;
 
     case VHOST_USER_SET_VRING_KICK:
@@ -330,7 +251,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
     default:
         break;
     }
-    g_mutex_unlock(data_mutex);
+    g_mutex_unlock(&data_mutex);
 }
 
 static const char *init_hugepagefs(void)
@@ -395,9 +316,9 @@ int main(int argc, char **argv)
     qemu_chr_add_handlers(chr, chr_can_read, chr_read, NULL, chr);
 
     /* run the main loop thread so the chardev may operate */
-    data_mutex = _mutex_new();
-    data_cond = _cond_new();
-    _thread_new(NULL, thread_function, NULL);
+    g_mutex_init(&data_mutex);
+    g_cond_init(&data_cond);
+    g_thread_new(NULL, thread_function, NULL);
 
     qemu_cmd = g_strdup_printf(QEMU_CMD, hugefs, socket_path);
     s = qtest_start(qemu_cmd);
@@ -414,8 +335,6 @@ int main(int argc, char **argv)
     /* cleanup */
     unlink(socket_path);
     g_free(socket_path);
-    _cond_free(data_cond);
-    _mutex_free(data_mutex);
 
     return ret;
 }
-- 
cgit v1.2.3


From 8a9b6b37dabf00388e8069a2f5c0f659626693b3 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 24 Sep 2015 18:22:01 +0200
Subject: vhost-user: unit test for new messages

Data is empty for now, but do make sure master
sets the new feature bit flag.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
 tests/vhost-user-test.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index 0e04f06839..87281b9d9f 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -46,6 +46,8 @@
 
 #define VHOST_MEMORY_MAX_NREGIONS    8
 
+#define VHOST_USER_F_PROTOCOL_FEATURES 30
+
 typedef enum VhostUserRequest {
     VHOST_USER_NONE = 0,
     VHOST_USER_GET_FEATURES = 1,
@@ -62,6 +64,8 @@ typedef enum VhostUserRequest {
     VHOST_USER_SET_VRING_KICK = 12,
     VHOST_USER_SET_VRING_CALL = 13,
     VHOST_USER_SET_VRING_ERR = 14,
+    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
     VHOST_USER_MAX
 } VhostUserRequest;
 
@@ -211,6 +215,20 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
 
     switch (msg.request) {
     case VHOST_USER_GET_FEATURES:
+        /* send back features to qemu */
+        msg.flags |= VHOST_USER_REPLY_MASK;
+        msg.size = sizeof(m.u64);
+        msg.u64 = 0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
+        p = (uint8_t *) &msg;
+        qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
+        break;
+
+    case VHOST_USER_SET_FEATURES:
+	g_assert_cmpint(msg.u64 & (0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES),
+			!=, 0ULL);
+        break;
+
+    case VHOST_USER_GET_PROTOCOL_FEATURES:
         /* send back features to qemu */
         msg.flags |= VHOST_USER_REPLY_MASK;
         msg.size = sizeof(m.u64);
-- 
cgit v1.2.3


From df0acded19ec4b826aa095cfc19d341bd66fafd3 Mon Sep 17 00:00:00 2001
From: Igor Mammedov <imammedo@redhat.com>
Date: Tue, 29 Sep 2015 16:53:28 +0200
Subject: memhp: extend address auto assignment to support gaps

setting gap to TRUE will make sparse DIMM
address auto allocation, leaving gaps between
a new DIMM address and preceeding existing DIMM.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/i386/pc.c             |  3 ++-
 hw/mem/pc-dimm.c         | 15 +++++++++------
 hw/ppc/spapr.c           |  2 +-
 include/hw/mem/pc-dimm.h |  7 ++++---
 4 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 461c128d23..ef027364ce 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1644,7 +1644,8 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
         goto out;
     }
 
-    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err);
+    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, false,
+                        &local_err);
     if (local_err) {
         goto out;
     }
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index bb04862de8..6cc6ac30e7 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -32,7 +32,8 @@ typedef struct pc_dimms_capacity {
 } pc_dimms_capacity;
 
 void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
-                         MemoryRegion *mr, uint64_t align, Error **errp)
+                         MemoryRegion *mr, uint64_t align, bool gap,
+                         Error **errp)
 {
     int slot;
     MachineState *machine = MACHINE(qdev_get_machine());
@@ -48,7 +49,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
 
     addr = pc_dimm_get_free_addr(hpms->base,
                                  memory_region_size(&hpms->mr),
-                                 !addr ? NULL : &addr, align,
+                                 !addr ? NULL : &addr, align, gap,
                                  memory_region_size(mr), &local_err);
     if (local_err) {
         goto out;
@@ -287,8 +288,8 @@ static int pc_dimm_built_list(Object *obj, void *opaque)
 
 uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
                                uint64_t address_space_size,
-                               uint64_t *hint, uint64_t align, uint64_t size,
-                               Error **errp)
+                               uint64_t *hint, uint64_t align, bool gap,
+                               uint64_t size, Error **errp)
 {
     GSList *list = NULL, *item;
     uint64_t new_addr, ret = 0;
@@ -333,13 +334,15 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
             goto out;
         }
 
-        if (ranges_overlap(dimm->addr, dimm_size, new_addr, size)) {
+        if (ranges_overlap(dimm->addr, dimm_size, new_addr,
+                           size + (gap ? 1 : 0))) {
             if (hint) {
                 DeviceState *d = DEVICE(dimm);
                 error_setg(errp, "address range conflicts with '%s'", d->id);
                 goto out;
             }
-            new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size, align);
+            new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size + (gap ? 1 : 0),
+                                     align);
         }
     }
     ret = new_addr;
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index a9b5f2a669..d1b0e53668 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2096,7 +2096,7 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         goto out;
     }
 
-    pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err);
+    pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, false, &local_err);
     if (local_err) {
         goto out;
     }
diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h
index d83bf30ea9..c1ee7b0408 100644
--- a/include/hw/mem/pc-dimm.h
+++ b/include/hw/mem/pc-dimm.h
@@ -83,15 +83,16 @@ typedef struct MemoryHotplugState {
 
 uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
                                uint64_t address_space_size,
-                               uint64_t *hint, uint64_t align, uint64_t size,
-                               Error **errp);
+                               uint64_t *hint, uint64_t align, bool gap,
+                               uint64_t size, Error **errp);
 
 int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp);
 
 int qmp_pc_dimm_device_list(Object *obj, void *opaque);
 uint64_t pc_existing_dimms_capacity(Error **errp);
 void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
-                         MemoryRegion *mr, uint64_t align, Error **errp);
+                         MemoryRegion *mr, uint64_t align, bool gap,
+                         Error **errp);
 void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
                            MemoryRegion *mr);
 #endif
-- 
cgit v1.2.3


From aa8580cddf011e8cedcf87f7a0fdea7549fc4704 Mon Sep 17 00:00:00 2001
From: Igor Mammedov <imammedo@redhat.com>
Date: Tue, 29 Sep 2015 16:53:29 +0200
Subject: pc: memhp: force gaps between DIMM's GPA

mapping DIMMs non contiguously allows to workaround
virtio bug reported earlier:
http://lists.nongnu.org/archive/html/qemu-devel/2015-08/msg00522.html
in this case guest kernel doesn't allocate buffers
that can cross DIMM boundary keeping each buffer
local to a DIMM.

Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Acked-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/i386/pc.c         | 6 ++++--
 hw/i386/pc_piix.c    | 1 +
 hw/i386/pc_q35.c     | 1 +
 include/hw/i386/pc.h | 1 +
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index ef027364ce..efbd41a1f1 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1629,6 +1629,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
     HotplugHandlerClass *hhc;
     Error *local_err = NULL;
     PCMachineState *pcms = PC_MACHINE(hotplug_dev);
+    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
     PCDIMMDevice *dimm = PC_DIMM(dev);
     PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
     MemoryRegion *mr = ddc->get_memory_region(dimm);
@@ -1644,8 +1645,8 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
         goto out;
     }
 
-    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, false,
-                        &local_err);
+    pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align,
+                        pcmc->inter_dimm_gap, &local_err);
     if (local_err) {
         goto out;
     }
@@ -1946,6 +1947,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
     PCMachineClass *pcmc = PC_MACHINE_CLASS(oc);
     HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
 
+    pcmc->inter_dimm_gap = true;
     pcmc->get_hotplug_handler = mc->get_hotplug_handler;
     mc->get_hotplug_handler = pc_get_hotpug_handler;
     mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 52fdd558e8..4514cd1462 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -489,6 +489,7 @@ static void pc_i440fx_2_4_machine_options(MachineClass *m)
     m->alias = NULL;
     m->is_default = 0;
     pcmc->broken_reserved_end = true;
+    pcmc->inter_dimm_gap = false;
     SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
 }
 
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 837e430666..1f100b1a69 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -392,6 +392,7 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
     pc_q35_2_5_machine_options(m);
     m->alias = NULL;
     pcmc->broken_reserved_end = true;
+    pcmc->inter_dimm_gap = false;
     SET_MACHINE_COMPAT(m, PC_COMPAT_2_4);
 }
 
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index ab5413f561..c13e91ddde 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -60,6 +60,7 @@ struct PCMachineClass {
 
     /*< public >*/
     bool broken_reserved_end;
+    bool inter_dimm_gap;
     HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
                                            DeviceState *dev);
 };
-- 
cgit v1.2.3


From 1b7e1e3b463a6e5c117498b192cb07603c04b668 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 30 Sep 2015 18:01:21 +0300
Subject: vhost-user-test: use tmpfs by default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most people don't run make check by default, so they skip vhost-user
unit tests.  Solve this by using tmpfs instead, unless hugetlbfs is
specified (using an environment variable).

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
 tests/vhost-user-test.c | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index 87281b9d9f..5e63cbc112 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -272,17 +272,11 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
     g_mutex_unlock(&data_mutex);
 }
 
-static const char *init_hugepagefs(void)
+static const char *init_hugepagefs(const char *path)
 {
-    const char *path;
     struct statfs fs;
     int ret;
 
-    path = getenv("QTEST_HUGETLBFS_PATH");
-    if (!path) {
-        path = "/hugetlbfs";
-    }
-
     if (access(path, R_OK | W_OK | X_OK)) {
         g_test_message("access on path (%s): %s\n", path, strerror(errno));
         return NULL;
@@ -309,19 +303,31 @@ int main(int argc, char **argv)
 {
     QTestState *s = NULL;
     CharDriverState *chr = NULL;
-    const char *hugefs = 0;
+    const char *hugefs;
     char *socket_path = 0;
     char *qemu_cmd = 0;
     char *chr_path = 0;
     int ret;
+    char template[] = "/tmp/vhost-test-XXXXXX";
+    const char *tmpfs;
+    const char *root;
 
     g_test_init(&argc, &argv, NULL);
 
     module_call_init(MODULE_INIT_QOM);
 
-    hugefs = init_hugepagefs();
-    if (!hugefs) {
-        return 0;
+    tmpfs = mkdtemp(template);
+    if (!tmpfs) {
+          g_test_message("mkdtemp on path (%s): %s\n", template, strerror(errno));
+    }
+    g_assert(tmpfs);
+
+    hugefs = getenv("QTEST_HUGETLBFS_PATH");
+    if (hugefs) {
+        root = init_hugepagefs(hugefs);
+        g_assert(root);
+    } else {
+        root = tmpfs;
     }
 
     socket_path = g_strdup_printf("/tmp/vhost-%d.sock", getpid());
@@ -338,7 +344,7 @@ int main(int argc, char **argv)
     g_cond_init(&data_cond);
     g_thread_new(NULL, thread_function, NULL);
 
-    qemu_cmd = g_strdup_printf(QEMU_CMD, hugefs, socket_path);
+    qemu_cmd = g_strdup_printf(QEMU_CMD, root, socket_path);
     s = qtest_start(qemu_cmd);
     g_free(qemu_cmd);
 
@@ -354,5 +360,12 @@ int main(int argc, char **argv)
     unlink(socket_path);
     g_free(socket_path);
 
+    ret = rmdir(tmpfs);
+    if (ret != 0) {
+        g_test_message("unable to rmdir: path (%s): %s\n",
+                       tmpfs, strerror(errno));
+    }
+    g_assert_cmpint(ret, ==, 0);
+
     return ret;
 }
-- 
cgit v1.2.3


From 6fdac09370530be0cc6fe9e8d425c0670ba994b1 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 1 Oct 2015 15:50:52 +0300
Subject: vhost-user-test: fix predictable filename on tmpfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

vhost-user-test uses getpid to create a unique filename. This name is
predictable, and a security problem.  Instead, use a tmp directory
created by mkdtemp, which is a suggested best practice.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
 tests/vhost-user-test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index 5e63cbc112..56df5cc552 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -330,7 +330,7 @@ int main(int argc, char **argv)
         root = tmpfs;
     }
 
-    socket_path = g_strdup_printf("/tmp/vhost-%d.sock", getpid());
+    socket_path = g_strdup_printf("%s/vhost.sock", tmpfs);
 
     /* create char dev and add read handlers */
     qemu_add_opts(&qemu_chardev_opts);
-- 
cgit v1.2.3