From a52a8841038638afe54ffb00e0aca48de0b1539a Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 9 Jan 2013 23:50:00 +0200
Subject: e1000: document ICS read behaviour

Add code comment to clarify the reason we set ICS with ICR:
the reason was previously undocumented and git
log confused rather than clarified the comments.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/e1000.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/hw/e1000.c b/hw/e1000.c
index ef06ca1894..ee85c53d38 100644
--- a/hw/e1000.c
+++ b/hw/e1000.c
@@ -237,7 +237,17 @@ set_interrupt_cause(E1000State *s, int index, uint32_t val)
         val |= E1000_ICR_INT_ASSERTED;
     }
     s->mac_reg[ICR] = val;
+
+    /*
+     * Make sure ICR and ICS registers have the same value.
+     * The spec says that the ICS register is write-only.  However in practice,
+     * on real hardware ICS is readable, and for reads it has the same value as
+     * ICR (except that ICS does not have the clear on read behaviour of ICR).
+     *
+     * The VxWorks PRO/1000 driver uses this behaviour.
+     */
     s->mac_reg[ICS] = val;
+
     qemu_set_irq(s->dev.irq[0], (s->mac_reg[IMS] & s->mac_reg[ICR]) != 0);
 }
 
-- 
cgit v1.2.3


From 4b25966ab976f3a7fd9008193b2defcc82f8f04d Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 15 Jan 2013 13:12:35 +0200
Subject: rules.mak: cleanup config generation rules

This addresses two issues with config generation
1. rule generating timestamp has side effect.
Thus cleanup on error does not work.
2. rule for handling timestamp is too generic.
It can create any missing .h file.
As a result when .h file is removed, build
might try to create it using this rule which
results in build errors.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 rules.mak | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/rules.mak b/rules.mak
index 6d82c0d5a0..d11a5b4f00 100644
--- a/rules.mak
+++ b/rules.mak
@@ -82,12 +82,11 @@ TRACETOOL=$(PYTHON) $(SRC_PATH)/scripts/tracetool.py
 
 # Generate timestamp files for .h include files
 
-%.h: %.h-timestamp
-	@test -f $@ || cp $< $@
+config-%.h: config-%.h-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
 
-%.h-timestamp: %.mak
-	$(call quiet-command, sh $(SRC_PATH)/scripts/create_config < $< > $@, "  GEN   $(TARGET_DIR)$*.h")
-	@cmp $@ $*.h >/dev/null 2>&1 || cp $@ $*.h
+config-%.h-timestamp: config-%.mak
+	$(call quiet-command, sh $(SRC_PATH)/scripts/create_config < $< > $@, "  GEN   $(TARGET_DIR)config-$*.h")
 
 # will delete the target of a rule if commands exit with a nonzero exit status
 .DELETE_ON_ERROR:
-- 
cgit v1.2.3


From 6f329a55305c3b14da3c7b35f19379bae745e728 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 15 Jan 2013 14:47:33 +0200
Subject: Makefile: clean timestamp generation rule

create timestamp by rule without sideeffects.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 trace/Makefile.objs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/trace/Makefile.objs b/trace/Makefile.objs
index 27fe26b5c2..dde9d5784e 100644
--- a/trace/Makefile.objs
+++ b/trace/Makefile.objs
@@ -4,24 +4,24 @@
 # Auto-generated header for tracing routines
 
 $(obj)/generated-tracers.h: $(obj)/generated-tracers.h-timestamp
+	@cmp -s $< $@ || cp $< $@
 $(obj)/generated-tracers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
 	$(call quiet-command,$(TRACETOOL) \
 		--format=h \
 		--backend=$(TRACE_BACKEND) \
 		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
-	@cmp -s $@ $(patsubst %-timestamp,%,$@) || cp $@ $(patsubst %-timestamp,%,$@)
 
 ######################################################################
 # Auto-generated tracing routines (non-DTrace)
 
 ifneq ($(TRACE_BACKEND),dtrace)
 $(obj)/generated-tracers.c: $(obj)/generated-tracers.c-timestamp
+	@cmp -s $< $@ || cp $< $@
 $(obj)/generated-tracers.c-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
 	$(call quiet-command,$(TRACETOOL) \
 		--format=c \
 		--backend=$(TRACE_BACKEND) \
 		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
-	@cmp -s $@ $(patsubst %-timestamp,%,$@) || cp $@ $(patsubst %-timestamp,%,$@)
 
 $(obj)/generated-tracers.o: $(obj)/generated-tracers.c $(obj)/generated-tracers.h
 endif
-- 
cgit v1.2.3


From 7586317bc0db3b993446b21eec914f5b66645ee4 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 15 Jan 2013 13:27:54 +0200
Subject: rules/mak: make clean should blow away timestamp files

Using a global pattern makes it easier to clean out
old generated files.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 rules.mak | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/rules.mak b/rules.mak
index d11a5b4f00..edc2552f08 100644
--- a/rules.mak
+++ b/rules.mak
@@ -88,6 +88,11 @@ config-%.h: config-%.h-timestamp
 config-%.h-timestamp: config-%.mak
 	$(call quiet-command, sh $(SRC_PATH)/scripts/create_config < $< > $@, "  GEN   $(TARGET_DIR)config-$*.h")
 
+.PHONY: clean-timestamp
+clean-timestamp:
+	rm -f *.timestamp
+clean: clean-timestamp
+
 # will delete the target of a rule if commands exit with a nonzero exit status
 .DELETE_ON_ERROR:
 
-- 
cgit v1.2.3


From 41dc8a67c7dcecdf7ae1cd25db3c46f2b42a221f Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 16 Jan 2013 11:37:40 +0200
Subject: virtio-net: revert mac on reset

Once guest overrides virtio net primary mac,
it retains the value set until qemu exit.
This is inconsistent with standard nic behaviour.
To fix, revert the mac to the original value on reset.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-net.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 3bb01b1037..4d80a25744 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -199,6 +199,7 @@ static void virtio_net_reset(VirtIODevice *vdev)
     n->mac_table.multi_overflow = 0;
     n->mac_table.uni_overflow = 0;
     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
+    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
     memset(n->vlans, 0, MAX_VLAN >> 3);
 }
 
-- 
cgit v1.2.3


From 921ac5d0f3a0df869db5ce4edf752f51d8b1596a Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 22 Jan 2013 23:44:44 +0800
Subject: virtio-net: remove layout assumptions for ctrl vq

Virtio-net code makes assumption about virtqueue descriptor layout
(e.g. sg[0] is the header, sg[1] is the data buffer).

This patch makes code not rely on the layout of descriptors.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-net.c | 129 ++++++++++++++++++++++++++++++++------------------------
 1 file changed, 75 insertions(+), 54 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 4d80a25744..9ea987562f 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -316,44 +316,44 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
 }
 
 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
-                                     VirtQueueElement *elem)
+                                     struct iovec *iov, unsigned int iov_cnt)
 {
     uint8_t on;
+    size_t s;
 
-    if (elem->out_num != 2 || elem->out_sg[1].iov_len != sizeof(on)) {
-        error_report("virtio-net ctrl invalid rx mode command");
-        exit(1);
+    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
+    if (s != sizeof(on)) {
+        return VIRTIO_NET_ERR;
     }
 
-    on = ldub_p(elem->out_sg[1].iov_base);
-
-    if (cmd == VIRTIO_NET_CTRL_RX_MODE_PROMISC)
+    if (cmd == VIRTIO_NET_CTRL_RX_MODE_PROMISC) {
         n->promisc = on;
-    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLMULTI)
+    } else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLMULTI) {
         n->allmulti = on;
-    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLUNI)
+    } else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLUNI) {
         n->alluni = on;
-    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOMULTI)
+    } else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOMULTI) {
         n->nomulti = on;
-    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOUNI)
+    } else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOUNI) {
         n->nouni = on;
-    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOBCAST)
+    } else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOBCAST) {
         n->nobcast = on;
-    else
+    } else {
         return VIRTIO_NET_ERR;
+    }
 
     return VIRTIO_NET_OK;
 }
 
 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
-                                 VirtQueueElement *elem)
+                                 struct iovec *iov, unsigned int iov_cnt)
 {
     struct virtio_net_ctrl_mac mac_data;
+    size_t s;
 
-    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET || elem->out_num != 3 ||
-        elem->out_sg[1].iov_len < sizeof(mac_data) ||
-        elem->out_sg[2].iov_len < sizeof(mac_data))
+    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
         return VIRTIO_NET_ERR;
+    }
 
     n->mac_table.in_use = 0;
     n->mac_table.first_multi = 0;
@@ -361,54 +361,72 @@ static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
     n->mac_table.multi_overflow = 0;
     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 
-    mac_data.entries = ldl_p(elem->out_sg[1].iov_base);
+    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
+                   sizeof(mac_data.entries));
+    mac_data.entries = ldl_p(&mac_data.entries);
+    if (s != sizeof(mac_data.entries)) {
+        return VIRTIO_NET_ERR;
+    }
+    iov_discard_front(&iov, &iov_cnt, s);
 
-    if (sizeof(mac_data.entries) +
-        (mac_data.entries * ETH_ALEN) > elem->out_sg[1].iov_len)
+    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
         return VIRTIO_NET_ERR;
+    }
 
     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
-        memcpy(n->mac_table.macs, elem->out_sg[1].iov_base + sizeof(mac_data),
-               mac_data.entries * ETH_ALEN);
+        s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
+                       mac_data.entries * ETH_ALEN);
+        if (s != mac_data.entries * ETH_ALEN) {
+            return VIRTIO_NET_ERR;
+        }
         n->mac_table.in_use += mac_data.entries;
     } else {
         n->mac_table.uni_overflow = 1;
     }
 
+    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
+
     n->mac_table.first_multi = n->mac_table.in_use;
 
-    mac_data.entries = ldl_p(elem->out_sg[2].iov_base);
+    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
+                   sizeof(mac_data.entries));
+    mac_data.entries = ldl_p(&mac_data.entries);
+    if (s != sizeof(mac_data.entries)) {
+        return VIRTIO_NET_ERR;
+    }
+
+    iov_discard_front(&iov, &iov_cnt, s);
 
-    if (sizeof(mac_data.entries) +
-        (mac_data.entries * ETH_ALEN) > elem->out_sg[2].iov_len)
+    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
         return VIRTIO_NET_ERR;
+    }
 
-    if (mac_data.entries) {
-        if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
-            memcpy(n->mac_table.macs + (n->mac_table.in_use * ETH_ALEN),
-                   elem->out_sg[2].iov_base + sizeof(mac_data),
-                   mac_data.entries * ETH_ALEN);
-            n->mac_table.in_use += mac_data.entries;
-        } else {
-            n->mac_table.multi_overflow = 1;
+    if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
+        s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
+                       mac_data.entries * ETH_ALEN);
+        if (s != mac_data.entries * ETH_ALEN) {
+            return VIRTIO_NET_ERR;
         }
+        n->mac_table.in_use += mac_data.entries;
+    } else {
+        n->mac_table.multi_overflow = 1;
     }
 
     return VIRTIO_NET_OK;
 }
 
 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
-                                        VirtQueueElement *elem)
+                                        struct iovec *iov, unsigned int iov_cnt)
 {
     uint16_t vid;
+    size_t s;
 
-    if (elem->out_num != 2 || elem->out_sg[1].iov_len != sizeof(vid)) {
-        error_report("virtio-net ctrl invalid vlan command");
+    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
+    vid = lduw_p(&vid);
+    if (s != sizeof(vid)) {
         return VIRTIO_NET_ERR;
     }
 
-    vid = lduw_p(elem->out_sg[1].iov_base);
-
     if (vid >= MAX_VLAN)
         return VIRTIO_NET_ERR;
 
@@ -428,30 +446,33 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
     struct virtio_net_ctrl_hdr ctrl;
     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
     VirtQueueElement elem;
+    size_t s;
+    struct iovec *iov;
+    unsigned int iov_cnt;
 
     while (virtqueue_pop(vq, &elem)) {
-        if ((elem.in_num < 1) || (elem.out_num < 1)) {
+        if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
+            iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
             error_report("virtio-net ctrl missing headers");
             exit(1);
         }
 
-        if (elem.out_sg[0].iov_len < sizeof(ctrl) ||
-            elem.in_sg[elem.in_num - 1].iov_len < sizeof(status)) {
-            error_report("virtio-net ctrl header not in correct element");
-            exit(1);
+        iov = elem.out_sg;
+        iov_cnt = elem.out_num;
+        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
+        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
+        if (s != sizeof(ctrl)) {
+            status = VIRTIO_NET_ERR;
+        } else if (ctrl.class == VIRTIO_NET_CTRL_RX_MODE) {
+            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
+        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
+            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
+        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
+            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
         }
 
-        ctrl.class = ldub_p(elem.out_sg[0].iov_base);
-        ctrl.cmd = ldub_p(elem.out_sg[0].iov_base + sizeof(ctrl.class));
-
-        if (ctrl.class == VIRTIO_NET_CTRL_RX_MODE)
-            status = virtio_net_handle_rx_mode(n, ctrl.cmd, &elem);
-        else if (ctrl.class == VIRTIO_NET_CTRL_MAC)
-            status = virtio_net_handle_mac(n, ctrl.cmd, &elem);
-        else if (ctrl.class == VIRTIO_NET_CTRL_VLAN)
-            status = virtio_net_handle_vlan_table(n, ctrl.cmd, &elem);
-
-        stb_p(elem.in_sg[elem.in_num - 1].iov_base, status);
+        s = iov_from_buf(elem.in_sg, elem.in_num, 0, &status, sizeof(status));
+        assert(s == sizeof(status));
 
         virtqueue_push(vq, &elem, sizeof(status));
         virtio_notify(vdev, vq);
-- 
cgit v1.2.3


From c1943a3f3774ee1aad51e8cc5b8cd24e66e198a5 Mon Sep 17 00:00:00 2001
From: Amos Kong <akong@redhat.com>
Date: Tue, 22 Jan 2013 23:44:45 +0800
Subject: virtio-net: introduce a new macaddr control

In virtio-net guest driver, currently we write MAC address to
pci config space byte by byte, this means that we have an
intermediate step where mac is wrong. This patch introduced
a new control command to set MAC address, it's atomic.

VIRTIO_NET_F_CTRL_MAC_ADDR is a new feature bit for compatibility.

"mac" field will be set to read-only when VIRTIO_NET_F_CTRL_MAC_ADDR
is acked.

Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pc_piix.c    |  4 ++++
 hw/virtio-net.c | 13 ++++++++++++-
 hw/virtio-net.h | 12 ++++++++++--
 3 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index b9a9b2efe1..ba09714d6c 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -309,6 +309,10 @@ static QEMUMachine pc_i440fx_machine_v1_4 = {
             .driver   = "usb-tablet",\
             .property = "usb_version",\
             .value    = stringify(1),\
+        },{\
+            .driver   = "virtio-net-pci",\
+            .property = "ctrl_mac_addr",\
+            .value    = "off",      \
         }
 
 static QEMUMachine pc_machine_v1_3 = {
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 9ea987562f..04834e99a7 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -93,7 +93,8 @@ static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
 
     memcpy(&netcfg, config, sizeof(netcfg));
 
-    if (memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
+    if (!(n->vdev.guest_features >> VIRTIO_NET_F_CTRL_MAC_ADDR & 1) &&
+        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
         memcpy(n->mac, netcfg.mac, ETH_ALEN);
         qemu_format_nic_info_str(&n->nic->nc, n->mac);
     }
@@ -351,6 +352,16 @@ static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
     struct virtio_net_ctrl_mac mac_data;
     size_t s;
 
+    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
+        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
+            return VIRTIO_NET_ERR;
+        }
+        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
+        assert(s == sizeof(n->mac));
+        qemu_format_nic_info_str(&n->nic->nc, n->mac);
+        return VIRTIO_NET_OK;
+    }
+
     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
         return VIRTIO_NET_ERR;
     }
diff --git a/hw/virtio-net.h b/hw/virtio-net.h
index d46fb9840f..1ec632f2f3 100644
--- a/hw/virtio-net.h
+++ b/hw/virtio-net.h
@@ -44,6 +44,8 @@
 #define VIRTIO_NET_F_CTRL_VLAN  19      /* Control channel VLAN filtering */
 #define VIRTIO_NET_F_CTRL_RX_EXTRA 20   /* Extra RX mode control support */
 
+#define VIRTIO_NET_F_CTRL_MAC_ADDR   23 /* Set MAC address */
+
 #define VIRTIO_NET_S_LINK_UP    1       /* Link is up */
 
 #define TX_TIMER_INTERVAL 150000 /* 150 us */
@@ -106,7 +108,7 @@ typedef uint8_t virtio_net_ctrl_ack;
  #define VIRTIO_NET_CTRL_RX_MODE_NOBCAST      5
 
 /*
- * Control the MAC filter table.
+ * Control the MAC
  *
  * The MAC filter table is managed by the hypervisor, the guest should
  * assume the size is infinite.  Filtering should be considered
@@ -119,6 +121,10 @@ typedef uint8_t virtio_net_ctrl_ack;
  * first sg list contains unicast addresses, the second is for multicast.
  * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
  * is available.
+ *
+ * The ADDR_SET command requests one out scatterlist, it contains a
+ * 6 bytes MAC address. This functionality is present if the
+ * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
  */
 struct virtio_net_ctrl_mac {
     uint32_t entries;
@@ -126,6 +132,7 @@ struct virtio_net_ctrl_mac {
 };
 #define VIRTIO_NET_CTRL_MAC    1
  #define VIRTIO_NET_CTRL_MAC_TABLE_SET        0
+ #define VIRTIO_NET_CTRL_MAC_ADDR_SET         1
 
 /*
  * Control VLAN filtering
@@ -158,5 +165,6 @@ struct virtio_net_ctrl_mac {
         DEFINE_PROP_BIT("ctrl_vq", _state, _field, VIRTIO_NET_F_CTRL_VQ, true), \
         DEFINE_PROP_BIT("ctrl_rx", _state, _field, VIRTIO_NET_F_CTRL_RX, true), \
         DEFINE_PROP_BIT("ctrl_vlan", _state, _field, VIRTIO_NET_F_CTRL_VLAN, true), \
-        DEFINE_PROP_BIT("ctrl_rx_extra", _state, _field, VIRTIO_NET_F_CTRL_RX_EXTRA, true)
+        DEFINE_PROP_BIT("ctrl_rx_extra", _state, _field, VIRTIO_NET_F_CTRL_RX_EXTRA, true), \
+        DEFINE_PROP_BIT("ctrl_mac_addr", _state, _field, VIRTIO_NET_F_CTRL_MAC_ADDR, true)
 #endif
-- 
cgit v1.2.3


From dd23454ba2c83168b453155365671e67723b881f Mon Sep 17 00:00:00 2001
From: Amos Kong <akong@redhat.com>
Date: Tue, 22 Jan 2013 23:44:46 +0800
Subject: virtio-net: rename ctrl rx commands

This patch makes rx commands consistent with specification.

Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-net.c | 14 +++++++-------
 hw/virtio-net.h | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 04834e99a7..dfb9687d2f 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -327,17 +327,17 @@ static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
         return VIRTIO_NET_ERR;
     }
 
-    if (cmd == VIRTIO_NET_CTRL_RX_MODE_PROMISC) {
+    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
         n->promisc = on;
-    } else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLMULTI) {
+    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
         n->allmulti = on;
-    } else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLUNI) {
+    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
         n->alluni = on;
-    } else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOMULTI) {
+    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
         n->nomulti = on;
-    } else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOUNI) {
+    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
         n->nouni = on;
-    } else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOBCAST) {
+    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
         n->nobcast = on;
     } else {
         return VIRTIO_NET_ERR;
@@ -474,7 +474,7 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
         if (s != sizeof(ctrl)) {
             status = VIRTIO_NET_ERR;
-        } else if (ctrl.class == VIRTIO_NET_CTRL_RX_MODE) {
+        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
diff --git a/hw/virtio-net.h b/hw/virtio-net.h
index 1ec632f2f3..c0bb284df2 100644
--- a/hw/virtio-net.h
+++ b/hw/virtio-net.h
@@ -99,13 +99,13 @@ typedef uint8_t virtio_net_ctrl_ack;
  * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature.
  * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA.
  */
-#define VIRTIO_NET_CTRL_RX_MODE    0
- #define VIRTIO_NET_CTRL_RX_MODE_PROMISC      0
- #define VIRTIO_NET_CTRL_RX_MODE_ALLMULTI     1
- #define VIRTIO_NET_CTRL_RX_MODE_ALLUNI       2
- #define VIRTIO_NET_CTRL_RX_MODE_NOMULTI      3
- #define VIRTIO_NET_CTRL_RX_MODE_NOUNI        4
- #define VIRTIO_NET_CTRL_RX_MODE_NOBCAST      5
+#define VIRTIO_NET_CTRL_RX    0
+ #define VIRTIO_NET_CTRL_RX_PROMISC      0
+ #define VIRTIO_NET_CTRL_RX_ALLMULTI     1
+ #define VIRTIO_NET_CTRL_RX_ALLUNI       2
+ #define VIRTIO_NET_CTRL_RX_NOMULTI      3
+ #define VIRTIO_NET_CTRL_RX_NOUNI        4
+ #define VIRTIO_NET_CTRL_RX_NOBCAST      5
 
 /*
  * Control the MAC
-- 
cgit v1.2.3


From 91c3f2f00810a9ba5e4404c9611197efd8f694c8 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Tue, 22 Jan 2013 19:11:37 -0700
Subject: ich9: add support for pci assignment

Fills out support for the pci assignment API.  Added:

PCIINTxRoute ich9_route_intx_pin_to_irq(void *opaque, int pirq_pin)

Add calls to pci_bus_fire_intx_routing_notifier() when routing changes
are made.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/ich9.h     |  1 +
 hw/lpc_ich9.c | 33 +++++++++++++++++++++++++++++++++
 hw/pc_q35.c   |  1 +
 3 files changed, 35 insertions(+)

diff --git a/hw/ich9.h b/hw/ich9.h
index b8d8e6d3df..d4509bb606 100644
--- a/hw/ich9.h
+++ b/hw/ich9.h
@@ -18,6 +18,7 @@
 
 void ich9_lpc_set_irq(void *opaque, int irq_num, int level);
 int ich9_lpc_map_irq(PCIDevice *pci_dev, int intx);
+PCIINTxRoute ich9_route_intx_pin_to_irq(void *opaque, int pirq_pin);
 void ich9_lpc_pm_init(PCIDevice *pci_lpc, qemu_irq cmos_s3);
 PCIBus *ich9_d2pbr_init(PCIBus *bus, int devfn, int sec_bus);
 i2c_bus *ich9_smb_init(PCIBus *bus, int devfn, uint32_t smb_io_base);
diff --git a/hw/lpc_ich9.c b/hw/lpc_ich9.c
index 16843d76bc..e25689bf87 100644
--- a/hw/lpc_ich9.c
+++ b/hw/lpc_ich9.c
@@ -158,6 +158,7 @@ static void ich9_cc_write(void *opaque, hwaddr addr,
 
     ich9_cc_addr_len(&addr, &len);
     memcpy(lpc->chip_config + addr, &val, len);
+    pci_bus_fire_intx_routing_notifier(lpc->d.bus);
     ich9_cc_update(lpc);
 }
 
@@ -286,6 +287,32 @@ int ich9_lpc_map_irq(PCIDevice *pci_dev, int intx)
     return lpc->irr[PCI_SLOT(pci_dev->devfn)][intx];
 }
 
+PCIINTxRoute ich9_route_intx_pin_to_irq(void *opaque, int pirq_pin)
+{
+    ICH9LPCState *lpc = opaque;
+    PCIINTxRoute route;
+    int pic_irq;
+    int pic_dis;
+
+    assert(0 <= pirq_pin);
+    assert(pirq_pin < ICH9_LPC_NB_PIRQS);
+
+    route.mode = PCI_INTX_ENABLED;
+    ich9_lpc_pic_irq(lpc, pirq_pin, &pic_irq, &pic_dis);
+    if (!pic_dis) {
+        if (pic_irq < ICH9_LPC_PIC_NUM_PINS) {
+            route.irq = pic_irq;
+        } else {
+            route.mode = PCI_INTX_DISABLED;
+            route.irq = -1;
+        }
+    } else {
+        route.irq = ich9_pirq_to_gsi(pirq_pin);
+    }
+
+    return route;
+}
+
 static int ich9_lpc_sci_irq(ICH9LPCState *lpc)
 {
     switch (lpc->d.config[ICH9_LPC_ACPI_CTRL] &
@@ -405,6 +432,12 @@ static void ich9_lpc_config_write(PCIDevice *d,
     if (ranges_overlap(addr, len, ICH9_LPC_RCBA, 4)) {
         ich9_lpc_rcba_update(lpc, rbca_old);
     }
+    if (ranges_overlap(addr, len, ICH9_LPC_PIRQA_ROUT, 4)) {
+        pci_bus_fire_intx_routing_notifier(lpc->d.bus);
+    }
+    if (ranges_overlap(addr, len, ICH9_LPC_PIRQE_ROUT, 4)) {
+        pci_bus_fire_intx_routing_notifier(lpc->d.bus);
+    }
 }
 
 static void ich9_lpc_reset(DeviceState *qdev)
diff --git a/hw/pc_q35.c b/hw/pc_q35.c
index d82353e84f..6f5ff8dcae 100644
--- a/hw/pc_q35.c
+++ b/hw/pc_q35.c
@@ -147,6 +147,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
     ich9_lpc->ioapic = gsi_state->ioapic_irq;
     pci_bus_irqs(host_bus, ich9_lpc_set_irq, ich9_lpc_map_irq, ich9_lpc,
                  ICH9_LPC_NB_PIRQS);
+    pci_bus_set_route_irq_fn(host_bus, ich9_route_intx_pin_to_irq);
     isa_bus = ich9_lpc->isa_bus;
 
     /*end early*/
-- 
cgit v1.2.3


From 1ec4ba741630699665a6334f3959271da3effec7 Mon Sep 17 00:00:00 2001
From: Laszlo Ersek <lersek@redhat.com>
Date: Thu, 24 Jan 2013 10:31:20 +0100
Subject: PIIX3: reset the VM when the Reset Control Register's RCPU bit gets
 set

  Traditional PCI config space access is achieved by writing a 32 bit
  value to io port 0xcf8 to identify the bus, device, function and config
  register. Port 0xcfc then contains the register in question. But if you
  write the appropriate pair of magic values to 0xcf9, the machine will
  reboot. Spectacular! And not standardised in any way (certainly not part
  of the PCI spec), so different chipsets may have different requirements.
  Booo.

In the PIIX3 spec, IO port 0xcf9 is specified as the Reset Control
Register. Bit 1 (System Reset, SRST) would normally differentiate between
soft reset and hard reset, but we ignore the difference beyond allowing
the guest to read it back.

RHBZ reference: 890459

This patch introduces the following overlap between the preexistent
"pci-conf-idx" region and the "piix3-reset-control" region just being
added. Partial output from "info mtree":

  I/O
  0000000000000000-000000000000ffff (prio 0, RW): io
    0000000000000cf8-0000000000000cfb (prio 0, RW): pci-conf-idx
    0000000000000cf9-0000000000000cf9 (prio 1, RW): piix3-reset-control

I sanity-checked the patch by booting a RHEL-6.3 guest and found no
problems. I summoned gdb and set a breakpoint on rcr_write() in order to
gather a bit more confidence. Relevant frames of the stack:

  kvm_handle_io (port=3321, data=0x7f3f5f3de000, direction=1, size=1,
                 count=1)                                 [kvm-all.c:1422]
    cpu_outb (addr=3321, val=6 '\006')                      [ioport.c:289]
      ioport_write (index=0, address=3321, data=6)           [ioport.c:83]
        ioport_writeb_thunk (opaque=0x7f3f622c4680, addr=3321, data=6)
                                                            [ioport.c:212]
          memory_region_iorange_write (iorange=0x7f3f622c4680, offset=0,
                                       width=1, data=6)     [memory.c:439]
            access_with_adjusted_size (addr=0, value=0x7f3f531fbac0,
                                       size=1, access_size_min=1,
                                       access_size_max=4,
                                       access=0x7f3f5f6e0f90
                                           <memory_region_write_accessor>,
                                       opaque=0x7f3f6227b668)
                                                            [memory.c:364]
              memory_region_write_accessor (opaque=0x7f3f6227b668, addr=0,
                                            value=0x7f3f531fbac0, size=1,
                                            shift=0, mask=255)
                                                            [memory.c:334]
                rcr_write (opaque=0x7f3f6227afb0, addr=0, val=6, len=1)
                                                       [hw/piix_pci.c:498]

The dispatch happens in ioport_write(); "index=0" means byte-wide access:

    static void ioport_write(int index, uint32_t address, uint32_t data)
    {
        static IOPortWriteFunc * const default_func[3] = {
            default_ioport_writeb,
            default_ioport_writew,
            default_ioport_writel
        };
        IOPortWriteFunc *func = ioport_write_table[index][address];
        if (!func)
            func = default_func[index];
        func(ioport_opaque[address], address, data);
    }

The "ioport_write_table" and "ioport_opaque" arrays describe the flattened
IO port space. The first array is less interesting (it selects a thunk
function). The "ioport_opaque" array is interesting because it decides how
writing to the port is implemented ultimately.

4-byte wide access to 0xcf8 (pci-conf-idx):

  (gdb) print ioport_write_table[2][0xcf8]
  $1 = (IOPortWriteFunc *) 0x7f3f5f6d99ba <ioport_writel_thunk>

  (gdb) print \
        ((struct MemoryRegionIORange*)ioport_opaque[0xcf8])->mr->ops.write
  $2 = (void (*)(void *, hwaddr, uint64_t, unsigned int))
       0x7f3f5f5575cb <pci_host_config_write>

1-byte wide access to 0xcf9 (piix3-reset-control):

  (gdb) print ioport_write_table[0][0xcf9]
  $3 = (IOPortWriteFunc *) 0x7f3f5f6d98d0 <ioport_writeb_thunk>

  (gdb) print \
        ((struct MemoryRegionIORange*)ioport_opaque[0xcf9])->mr->ops.write
  $4 = (void (*)(void *, hwaddr, uint64_t, unsigned int))
       0x7f3f5f6b42f1 <rcr_write>

The higher priority of "piix3-reset-control" ensures that the 0xcf9
entries in ioport_write_table / ioport_opaque will always belong to it,
independently of its relative registration order versus "pci-conf-idx".

Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/piix_pci.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/hw/piix_pci.c b/hw/piix_pci.c
index 3d79c73fda..6c77e493e4 100644
--- a/hw/piix_pci.c
+++ b/hw/piix_pci.c
@@ -31,6 +31,7 @@
 #include "qemu/range.h"
 #include "xen.h"
 #include "pam.h"
+#include "sysemu/sysemu.h"
 
 /*
  * I440FX chipset data sheet.
@@ -46,6 +47,12 @@ typedef struct I440FXState {
 #define XEN_PIIX_NUM_PIRQS      128ULL
 #define PIIX_PIRQC              0x60
 
+/*
+ * Reset Control Register: PCI-accessible ISA-Compatible Register at address
+ * 0xcf9, provided by the PCI/ISA bridge (PIIX3 PCI function 0, 8086:7000).
+ */
+#define RCR_IOPORT 0xcf9
+
 typedef struct PIIX3State {
     PCIDevice dev;
 
@@ -67,6 +74,12 @@ typedef struct PIIX3State {
 
     /* This member isn't used. Just for save/load compatibility */
     int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS];
+
+    /* Reset Control Register contents */
+    uint8_t rcr;
+
+    /* IO memory region for Reset Control Register (RCR_IOPORT) */
+    MemoryRegion rcr_mem;
 } PIIX3State;
 
 struct PCII440FXState {
@@ -442,6 +455,7 @@ static void piix3_reset(void *opaque)
     pci_conf[0xae] = 0x00;
 
     d->pic_levels = 0;
+    d->rcr = 0;
 }
 
 static int piix3_post_load(void *opaque, int version_id)
@@ -462,6 +476,23 @@ static void piix3_pre_save(void *opaque)
     }
 }
 
+static bool piix3_rcr_needed(void *opaque)
+{
+    PIIX3State *piix3 = opaque;
+
+    return (piix3->rcr != 0);
+}
+
+static const VMStateDescription vmstate_piix3_rcr = {
+    .name = "PIIX3/rcr",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField []) {
+        VMSTATE_UINT8(rcr, PIIX3State),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_piix3 = {
     .name = "PIIX3",
     .version_id = 3,
@@ -469,12 +500,44 @@ static const VMStateDescription vmstate_piix3 = {
     .minimum_version_id_old = 2,
     .post_load = piix3_post_load,
     .pre_save = piix3_pre_save,
-    .fields      = (VMStateField []) {
+    .fields      = (VMStateField[]) {
         VMSTATE_PCI_DEVICE(dev, PIIX3State),
         VMSTATE_INT32_ARRAY_V(pci_irq_levels_vmstate, PIIX3State,
                               PIIX_NUM_PIRQS, 3),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (VMStateSubsection[]) {
+        {
+            .vmsd = &vmstate_piix3_rcr,
+            .needed = piix3_rcr_needed,
+        },
+        { 0 }
+    }
+};
+
+
+static void rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len)
+{
+    PIIX3State *d = opaque;
+
+    if (val & 4) {
+        qemu_system_reset_request();
+        return;
     }
+    d->rcr = val & 2; /* keep System Reset type only */
+}
+
+static uint64_t rcr_read(void *opaque, hwaddr addr, unsigned len)
+{
+    PIIX3State *d = opaque;
+
+    return d->rcr;
+}
+
+static const MemoryRegionOps rcr_ops = {
+    .read = rcr_read,
+    .write = rcr_write,
+    .endianness = DEVICE_LITTLE_ENDIAN
 };
 
 static int piix3_initfn(PCIDevice *dev)
@@ -482,6 +545,11 @@ static int piix3_initfn(PCIDevice *dev)
     PIIX3State *d = DO_UPCAST(PIIX3State, dev, dev);
 
     isa_bus_new(&d->dev.qdev, pci_address_space_io(dev));
+
+    memory_region_init_io(&d->rcr_mem, &rcr_ops, d, "piix3-reset-control", 1);
+    memory_region_add_subregion_overlap(pci_address_space_io(dev), RCR_IOPORT,
+                                        &d->rcr_mem, 1);
+
     qemu_register_reset(piix3_reset, d);
     return 0;
 }
-- 
cgit v1.2.3


From 6a659bbff991b0033d1bf1ff71b7d550e0367d99 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 23 Jan 2013 17:46:13 -0700
Subject: vfio-pci: Enable PCIe extended config space

We don't know pre-init time whether the device we're exposing is PCIe
or legacy PCI.  We could ask for it to be specified via a device
option, but that seems like too much to ask of the user.  Instead we
can assume everything will be PCIe, which makes PCI-core allocate
enough config space.  Removing the flag during init leaves the space
allocated, but allows legacy PCI devices to report the real device
config space size to rest of Qemu.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/vfio_pci.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index c51ae6761b..66537b7eb5 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -1899,6 +1899,9 @@ static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev)
             (unsigned long)reg_info.flags);
 
     vdev->config_size = reg_info.size;
+    if (vdev->config_size == PCI_CONFIG_SPACE_SIZE) {
+        vdev->pdev.cap_present &= ~QEMU_PCI_CAP_EXPRESS;
+    }
     vdev->config_offset = reg_info.offset;
 
 error:
@@ -2121,6 +2124,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
     pdc->exit = vfio_exitfn;
     pdc->config_read = vfio_pci_read_config;
     pdc->config_write = vfio_pci_write_config;
+    pdc->is_express = 1; /* We might be */
 }
 
 static const TypeInfo vfio_pci_dev_info = {
-- 
cgit v1.2.3