aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Makefile2
-rw-r--r--coroutine-win32.c13
-rw-r--r--docs/qapi-code-gen.txt8
-rw-r--r--docs/qmp/qmp-events.txt559
-rw-r--r--hw/char/virtio-console.c12
-rw-r--r--hw/core/qdev-properties-system.c3
-rw-r--r--hw/intc/xics.c182
-rw-r--r--hw/intc/xics_kvm.c34
-rw-r--r--hw/misc/vfio.c78
-rw-r--r--hw/net/eepro100.c4
-rw-r--r--hw/net/virtio-net.c2
-rw-r--r--hw/nvram/spapr_nvram.c4
-rw-r--r--hw/pci-host/uninorth.c2
-rw-r--r--hw/ppc/Makefile.objs3
-rw-r--r--hw/ppc/e500.c13
-rw-r--r--hw/ppc/mac_newworld.c18
-rw-r--r--hw/ppc/spapr.c104
-rw-r--r--hw/ppc/spapr_events.c5
-rw-r--r--hw/ppc/spapr_iommu.c7
-rw-r--r--hw/ppc/spapr_pci.c217
-rw-r--r--hw/ppc/spapr_pci_vfio.c102
-rw-r--r--hw/ppc/spapr_rtas.c97
-rw-r--r--hw/ppc/spapr_vio.c9
-rw-r--r--hw/watchdog/watchdog.c2
-rw-r--r--include/block/blockjob.h2
-rw-r--r--include/hw/misc/vfio.h9
-rw-r--r--include/hw/pci-host/spapr.h32
-rw-r--r--include/hw/ppc/spapr.h82
-rw-r--r--include/hw/ppc/xics.h9
-rw-r--r--include/migration/vmstate.h11
-rw-r--r--include/net/net.h2
-rw-r--r--linux-user/elfload.c44
-rw-r--r--monitor.c3
-rw-r--r--net/Makefile.objs1
-rw-r--r--net/clients.h2
-rw-r--r--net/l2tpv3.c757
-rw-r--r--net/net.c10
-rw-r--r--pc-bios/s390-ccw.imgbin9432 -> 17624 bytes
-rw-r--r--pc-bios/s390-ccw/bootmap.c445
-rw-r--r--pc-bios/s390-ccw/bootmap.h344
-rw-r--r--pc-bios/s390-ccw/main.c13
-rw-r--r--pc-bios/s390-ccw/s390-ccw.h38
-rw-r--r--pc-bios/s390-ccw/sclp-ascii.c4
-rw-r--r--pc-bios/s390-ccw/virtio.c122
-rw-r--r--pc-bios/s390-ccw/virtio.h50
-rw-r--r--qapi-schema.json80
-rw-r--r--qapi/block-core.json3
-rw-r--r--qapi/event.json (renamed from qapi-event.json)18
-rw-r--r--qemu-bridge-helper.c9
-rw-r--r--qemu-char.c4
-rw-r--r--qemu-options.hx82
-rw-r--r--qmp-commands.hx19
-rw-r--r--savevm.c5
-rw-r--r--scripts/qapi-event.py5
-rw-r--r--scripts/qapi.py2
-rw-r--r--target-i386/cpu-qom.h6
-rw-r--r--target-i386/cpu.c566
-rw-r--r--target-i386/cpu.h4
-rw-r--r--target-i386/kvm.c15
-rw-r--r--target-i386/machine.c2
-rw-r--r--target-ppc/cpu-models.c3
-rw-r--r--target-ppc/cpu-models.h7
-rw-r--r--target-ppc/cpu-qom.h2
-rw-r--r--target-ppc/cpu.h2
-rw-r--r--target-ppc/gdbstub.c137
-rw-r--r--target-ppc/kvm.c7
-rw-r--r--target-ppc/kvm_ppc.h6
-rw-r--r--target-ppc/translate.c13
-rw-r--r--target-ppc/translate_init.c28
-rw-r--r--tcg/ppc/tcg-target.c11
-rw-r--r--tests/qapi-schema/event-nest-struct.err2
-rw-r--r--trace-events11
-rw-r--r--vmstate.c13
74 files changed, 3741 insertions, 782 deletions
diff --git a/.gitignore b/.gitignore
index c658613560..2286d0a109 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,7 @@
/qapi-generated
/qapi-types.[ch]
/qapi-visit.[ch]
+/qapi-event.[ch]
/qmp-commands.h
/qmp-marshal.c
/qemu-doc.html
diff --git a/Makefile b/Makefile
index 145adb68a2..1eea0c418f 100644
--- a/Makefile
+++ b/Makefile
@@ -248,7 +248,7 @@ $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
qapi-modules = $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/qapi/common.json \
$(SRC_PATH)/qapi/block.json $(SRC_PATH)/qapi/block-core.json \
- $(SRC_PATH)/qapi-event.json
+ $(SRC_PATH)/qapi/event.json
qapi-types.c qapi-types.h :\
$(qapi-modules) $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
diff --git a/coroutine-win32.c b/coroutine-win32.c
index edc1f72c18..17ace37dee 100644
--- a/coroutine-win32.c
+++ b/coroutine-win32.c
@@ -36,8 +36,17 @@ typedef struct
static __thread CoroutineWin32 leader;
static __thread Coroutine *current;
-CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
- CoroutineAction action)
+/* This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * the SwitchToFiber() call may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+ CoroutineAction action)
{
CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_);
CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_);
diff --git a/docs/qapi-code-gen.txt b/docs/qapi-code-gen.txt
index 3a0c99e1da..a6197a9133 100644
--- a/docs/qapi-code-gen.txt
+++ b/docs/qapi-code-gen.txt
@@ -218,10 +218,10 @@ An example command is:
=== Events ===
Events are defined with the keyword 'event'. When 'data' is also specified,
-additional info will be carried on. Finally there will be C API generated
-in qapi-event.h; when called by QEMU code, a message with timestamp will
-be emitted on the wire. If timestamp is -1, it means failure to retrieve host
-time.
+additional info will be included in the event. Finally there will be C API
+generated in qapi-event.h; when called by QEMU code, a message with timestamp
+will be emitted on the wire. If timestamp is -1, it means failure to retrieve
+host time.
An example event is:
diff --git a/docs/qmp/qmp-events.txt b/docs/qmp/qmp-events.txt
new file mode 100644
index 0000000000..44be891261
--- /dev/null
+++ b/docs/qmp/qmp-events.txt
@@ -0,0 +1,559 @@
+ QEMU Machine Protocol Events
+ ============================
+
+ACPI_DEVICE_OST
+---------------
+
+Emitted when guest executes ACPI _OST method.
+
+ - data: ACPIOSTInfo type as described in qapi-schema.json
+
+{ "event": "ACPI_DEVICE_OST",
+ "data": { "device": "d1", "slot": "0", "slot-type": "DIMM", "source": 1, "status": 0 } }
+
+BALLOON_CHANGE
+--------------
+
+Emitted when the guest changes the actual BALLOON level. This
+value is equivalent to the 'actual' field return by the
+'query-balloon' command
+
+Data:
+
+- "actual": actual level of the guest memory balloon in bytes (json-number)
+
+Example:
+
+{ "event": "BALLOON_CHANGE",
+ "data": { "actual": 944766976 },
+ "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
+
+BLOCK_IMAGE_CORRUPTED
+---------------------
+
+Emitted when a disk image is being marked corrupt.
+
+Data:
+
+- "device": Device name (json-string)
+- "msg": Informative message (e.g., reason for the corruption) (json-string)
+- "offset": If the corruption resulted from an image access, this is the access
+ offset into the image (json-int)
+- "size": If the corruption resulted from an image access, this is the access
+ size (json-int)
+
+Example:
+
+{ "event": "BLOCK_IMAGE_CORRUPTED",
+ "data": { "device": "ide0-hd0",
+ "msg": "Prevented active L1 table overwrite", "offset": 196608,
+ "size": 65536 },
+ "timestamp": { "seconds": 1378126126, "microseconds": 966463 } }
+
+BLOCK_IO_ERROR
+--------------
+
+Emitted when a disk I/O error occurs.
+
+Data:
+
+- "device": device name (json-string)
+- "operation": I/O operation (json-string, "read" or "write")
+- "action": action that has been taken, it's one of the following (json-string):
+ "ignore": error has been ignored
+ "report": error has been reported to the device
+ "stop": the VM is going to stop because of the error
+
+Example:
+
+{ "event": "BLOCK_IO_ERROR",
+ "data": { "device": "ide0-hd1",
+ "operation": "write",
+ "action": "stop" },
+ "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+Note: If action is "stop", a STOP event will eventually follow the
+BLOCK_IO_ERROR event.
+
+BLOCK_JOB_CANCELLED
+-------------------
+
+Emitted when a block job has been cancelled.
+
+Data:
+
+- "type": Job type (json-string; "stream" for image streaming
+ "commit" for block commit)
+- "device": Device name (json-string)
+- "len": Maximum progress value (json-int)
+- "offset": Current progress value (json-int)
+ On success this is equal to len.
+ On failure this is less than len.
+- "speed": Rate limit, bytes per second (json-int)
+
+Example:
+
+{ "event": "BLOCK_JOB_CANCELLED",
+ "data": { "type": "stream", "device": "virtio-disk0",
+ "len": 10737418240, "offset": 134217728,
+ "speed": 0 },
+ "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
+
+BLOCK_JOB_COMPLETED
+-------------------
+
+Emitted when a block job has completed.
+
+Data:
+
+- "type": Job type (json-string; "stream" for image streaming
+ "commit" for block commit)
+- "device": Device name (json-string)
+- "len": Maximum progress value (json-int)
+- "offset": Current progress value (json-int)
+ On success this is equal to len.
+ On failure this is less than len.
+- "speed": Rate limit, bytes per second (json-int)
+- "error": Error message (json-string, optional)
+ Only present on failure. This field contains a human-readable
+ error message. There are no semantics other than that streaming
+ has failed and clients should not try to interpret the error
+ string.
+
+Example:
+
+{ "event": "BLOCK_JOB_COMPLETED",
+ "data": { "type": "stream", "device": "virtio-disk0",
+ "len": 10737418240, "offset": 10737418240,
+ "speed": 0 },
+ "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
+
+BLOCK_JOB_ERROR
+---------------
+
+Emitted when a block job encounters an error.
+
+Data:
+
+- "device": device name (json-string)
+- "operation": I/O operation (json-string, "read" or "write")
+- "action": action that has been taken, it's one of the following (json-string):
+ "ignore": error has been ignored, the job may fail later
+ "report": error will be reported and the job canceled
+ "stop": error caused job to be paused
+
+Example:
+
+{ "event": "BLOCK_JOB_ERROR",
+ "data": { "device": "ide0-hd1",
+ "operation": "write",
+ "action": "stop" },
+ "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+BLOCK_JOB_READY
+---------------
+
+Emitted when a block job is ready to complete.
+
+Data:
+
+- "type": Job type (json-string; "stream" for image streaming
+ "commit" for block commit)
+- "device": Device name (json-string)
+- "len": Maximum progress value (json-int)
+- "offset": Current progress value (json-int)
+ On success this is equal to len.
+ On failure this is less than len.
+- "speed": Rate limit, bytes per second (json-int)
+
+Example:
+
+{ "event": "BLOCK_JOB_READY",
+ "data": { "device": "drive0", "type": "mirror", "speed": 0,
+ "len": 2097152, "offset": 2097152 }
+ "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+Note: The "ready to complete" status is always reset by a BLOCK_JOB_ERROR
+event.
+
+DEVICE_DELETED
+--------------
+
+Emitted whenever the device removal completion is acknowledged
+by the guest.
+At this point, it's safe to reuse the specified device ID.
+Device removal can be initiated by the guest or by HMP/QMP commands.
+
+Data:
+
+- "device": device name (json-string, optional)
+- "path": device path (json-string)
+
+{ "event": "DEVICE_DELETED",
+ "data": { "device": "virtio-net-pci-0",
+ "path": "/machine/peripheral/virtio-net-pci-0" },
+ "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+DEVICE_TRAY_MOVED
+-----------------
+
+It's emitted whenever the tray of a removable device is moved by the guest
+or by HMP/QMP commands.
+
+Data:
+
+- "device": device name (json-string)
+- "tray-open": true if the tray has been opened or false if it has been closed
+ (json-bool)
+
+{ "event": "DEVICE_TRAY_MOVED",
+ "data": { "device": "ide1-cd0",
+ "tray-open": true
+ },
+ "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+GUEST_PANICKED
+--------------
+
+Emitted when guest OS panic is detected.
+
+Data:
+
+- "action": Action that has been taken (json-string, currently always "pause").
+
+Example:
+
+{ "event": "GUEST_PANICKED",
+ "data": { "action": "pause" } }
+
+NIC_RX_FILTER_CHANGED
+---------------------
+
+The event is emitted once until the query command is executed,
+the first event will always be emitted.
+
+Data:
+
+- "name": net client name (json-string)
+- "path": device path (json-string)
+
+{ "event": "NIC_RX_FILTER_CHANGED",
+ "data": { "name": "vnet0",
+ "path": "/machine/peripheral/vnet0/virtio-backend" },
+ "timestamp": { "seconds": 1368697518, "microseconds": 326866 } }
+}
+
+QUORUM_FAILURE
+--------------
+
+Emitted by the Quorum block driver if it fails to establish a quorum.
+
+Data:
+
+- "reference": device name if defined else node name.
+- "sector-num": Number of the first sector of the failed read operation.
+- "sector-count": Failed read operation sector count.
+
+Example:
+
+{ "event": "QUORUM_FAILURE",
+ "data": { "reference": "usr1", "sector-num": 345435, "sector-count": 5 },
+ "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+
+QUORUM_REPORT_BAD
+-----------------
+
+Emitted to report a corruption of a Quorum file.
+
+Data:
+
+- "error": Error message (json-string, optional)
+ Only present on failure. This field contains a human-readable
+ error message. There are no semantics other than that the
+ block layer reported an error and clients should not try to
+ interpret the error string.
+- "node-name": The graph node name of the block driver state.
+- "sector-num": Number of the first sector of the failed read operation.
+- "sector-count": Failed read operation sector count.
+
+Example:
+
+{ "event": "QUORUM_REPORT_BAD",
+ "data": { "node-name": "1.raw", "sector-num": 345435, "sector-count": 5 },
+ "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+
+RESET
+-----
+
+Emitted when the Virtual Machine is reseted.
+
+Data: None.
+
+Example:
+
+{ "event": "RESET",
+ "timestamp": { "seconds": 1267041653, "microseconds": 9518 } }
+
+RESUME
+------
+
+Emitted when the Virtual Machine resumes execution.
+
+Data: None.
+
+Example:
+
+{ "event": "RESUME",
+ "timestamp": { "seconds": 1271770767, "microseconds": 582542 } }
+
+RTC_CHANGE
+----------
+
+Emitted when the guest changes the RTC time.
+
+Data:
+
+- "offset": Offset between base RTC clock (as specified by -rtc base), and
+new RTC clock value (json-number)
+
+Example:
+
+{ "event": "RTC_CHANGE",
+ "data": { "offset": 78 },
+ "timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
+
+SHUTDOWN
+--------
+
+Emitted when the Virtual Machine is powered down.
+
+Data: None.
+
+Example:
+
+{ "event": "SHUTDOWN",
+ "timestamp": { "seconds": 1267040730, "microseconds": 682951 } }
+
+Note: If the command-line option "-no-shutdown" has been specified, a STOP
+event will eventually follow the SHUTDOWN event.
+
+SPICE_CONNECTED, SPICE_DISCONNECTED
+-----------------------------------
+
+Emitted when a SPICE client connects or disconnects.
+
+Data:
+
+- "server": Server information (json-object)
+ - "host": IP address (json-string)
+ - "port": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+- "client": Client information (json-object)
+ - "host": IP address (json-string)
+ - "port": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+
+Example:
+
+{ "timestamp": {"seconds": 1290688046, "microseconds": 388707},
+ "event": "SPICE_CONNECTED",
+ "data": {
+ "server": { "port": "5920", "family": "ipv4", "host": "127.0.0.1"},
+ "client": {"port": "52873", "family": "ipv4", "host": "127.0.0.1"}
+}}
+
+SPICE_INITIALIZED
+-----------------
+
+Emitted after initial handshake and authentication takes place (if any)
+and the SPICE channel is up'n'running
+
+Data:
+
+- "server": Server information (json-object)
+ - "host": IP address (json-string)
+ - "port": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+ - "auth": authentication method (json-string, optional)
+- "client": Client information (json-object)
+ - "host": IP address (json-string)
+ - "port": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+ - "connection-id": spice connection id. All channels with the same id
+ belong to the same spice session (json-int)
+ - "channel-type": channel type. "1" is the main control channel, filter for
+ this one if you want track spice sessions only (json-int)
+ - "channel-id": channel id. Usually "0", might be different needed when
+ multiple channels of the same type exist, such as multiple
+ display channels in a multihead setup (json-int)
+ - "tls": whevener the channel is encrypted (json-bool)
+
+Example:
+
+{ "timestamp": {"seconds": 1290688046, "microseconds": 417172},
+ "event": "SPICE_INITIALIZED",
+ "data": {"server": {"auth": "spice", "port": "5921",
+ "family": "ipv4", "host": "127.0.0.1"},
+ "client": {"port": "49004", "family": "ipv4", "channel-type": 3,
+ "connection-id": 1804289383, "host": "127.0.0.1",
+ "channel-id": 0, "tls": true}
+}}
+
+STOP
+----
+
+Emitted when the Virtual Machine is stopped.
+
+Data: None.
+
+Example:
+
+{ "event": "STOP",
+ "timestamp": { "seconds": 1267041730, "microseconds": 281295 } }
+
+SUSPEND
+-------
+
+Emitted when guest enters S3 state.
+
+Data: None.
+
+Example:
+
+{ "event": "SUSPEND",
+ "timestamp": { "seconds": 1344456160, "microseconds": 309119 } }
+
+SUSPEND_DISK
+------------
+
+Emitted when the guest makes a request to enter S4 state.
+
+Data: None.
+
+Example:
+
+{ "event": "SUSPEND_DISK",
+ "timestamp": { "seconds": 1344456160, "microseconds": 309119 } }
+
+Note: QEMU shuts down when entering S4 state.
+
+VNC_CONNECTED
+-------------
+
+Emitted when a VNC client establishes a connection.
+
+Data:
+
+- "server": Server information (json-object)
+ - "host": IP address (json-string)
+ - "service": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+ - "auth": authentication method (json-string, optional)
+- "client": Client information (json-object)
+ - "host": IP address (json-string)
+ - "service": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+
+Example:
+
+{ "event": "VNC_CONNECTED",
+ "data": {
+ "server": { "auth": "sasl", "family": "ipv4",
+ "service": "5901", "host": "0.0.0.0" },
+ "client": { "family": "ipv4", "service": "58425",
+ "host": "127.0.0.1" } },
+ "timestamp": { "seconds": 1262976601, "microseconds": 975795 } }
+
+
+Note: This event is emitted before any authentication takes place, thus
+the authentication ID is not provided.
+
+VNC_DISCONNECTED
+----------------
+
+Emitted when the connection is closed.
+
+Data:
+
+- "server": Server information (json-object)
+ - "host": IP address (json-string)
+ - "service": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+ - "auth": authentication method (json-string, optional)
+- "client": Client information (json-object)
+ - "host": IP address (json-string)
+ - "service": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+ - "x509_dname": TLS dname (json-string, optional)
+ - "sasl_username": SASL username (json-string, optional)
+
+Example:
+
+{ "event": "VNC_DISCONNECTED",
+ "data": {
+ "server": { "auth": "sasl", "family": "ipv4",
+ "service": "5901", "host": "0.0.0.0" },
+ "client": { "family": "ipv4", "service": "58425",
+ "host": "127.0.0.1", "sasl_username": "luiz" } },
+ "timestamp": { "seconds": 1262976601, "microseconds": 975795 } }
+
+VNC_INITIALIZED
+---------------
+
+Emitted after authentication takes place (if any) and the VNC session is
+made active.
+
+Data:
+
+- "server": Server information (json-object)
+ - "host": IP address (json-string)
+ - "service": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+ - "auth": authentication method (json-string, optional)
+- "client": Client information (json-object)
+ - "host": IP address (json-string)
+ - "service": port number (json-string)
+ - "family": address family (json-string, "ipv4" or "ipv6")
+ - "x509_dname": TLS dname (json-string, optional)
+ - "sasl_username": SASL username (json-string, optional)
+
+Example:
+
+{ "event": "VNC_INITIALIZED",
+ "data": {
+ "server": { "auth": "sasl", "family": "ipv4",
+ "service": "5901", "host": "0.0.0.0"},
+ "client": { "family": "ipv4", "service": "46089",
+ "host": "127.0.0.1", "sasl_username": "luiz" } },
+ "timestamp": { "seconds": 1263475302, "microseconds": 150772 } }
+
+WAKEUP
+------
+
+Emitted when the guest has woken up from S3 and is running.
+
+Data: None.
+
+Example:
+
+{ "event": "WAKEUP",
+ "timestamp": { "seconds": 1344522075, "microseconds": 745528 } }
+
+WATCHDOG
+--------
+
+Emitted when the watchdog device's timer is expired.
+
+Data:
+
+- "action": Action that has been taken, it's one of the following (json-string):
+ "reset", "shutdown", "poweroff", "pause", "debug", or "none"
+
+Example:
+
+{ "event": "WATCHDOG",
+ "data": { "action": "reset" },
+ "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
+
+Note: If action is "reset", "shutdown", or "pause" the WATCHDOG event is
+followed respectively by the RESET, SHUTDOWN, or STOP events.
diff --git a/hw/char/virtio-console.c b/hw/char/virtio-console.c
index 6c8be0fe26..54eb15f3af 100644
--- a/hw/char/virtio-console.c
+++ b/hw/char/virtio-console.c
@@ -14,6 +14,7 @@
#include "qemu/error-report.h"
#include "trace.h"
#include "hw/virtio/virtio-serial.h"
+#include "qapi-event.h"
#define TYPE_VIRTIO_CONSOLE_SERIAL_PORT "virtserialport"
#define VIRTIO_CONSOLE(obj) \
@@ -81,11 +82,16 @@ static ssize_t flush_buf(VirtIOSerialPort *port,
static void set_guest_connected(VirtIOSerialPort *port, int guest_connected)
{
VirtConsole *vcon = VIRTIO_CONSOLE(port);
+ DeviceState *dev = DEVICE(port);
- if (!vcon->chr) {
- return;
+ if (vcon->chr) {
+ qemu_chr_fe_set_open(vcon->chr, guest_connected);
+ }
+
+ if (dev->id) {
+ qapi_event_send_vserport_change(dev->id, guest_connected,
+ &error_abort);
}
- qemu_chr_fe_set_open(vcon->chr, guest_connected);
}
/* Readiness of the guest to accept data on a port */
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index de433b2e38..52c2f8afa5 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -180,7 +180,6 @@ PropertyInfo qdev_prop_chr = {
static int parse_netdev(DeviceState *dev, const char *str, void **ptr)
{
NICPeers *peers_ptr = (NICPeers *)ptr;
- NICConf *conf = container_of(peers_ptr, NICConf, peers);
NetClientState **ncs = peers_ptr->ncs;
NetClientState *peers[MAX_QUEUE_NUM];
int queues, i = 0;
@@ -219,7 +218,7 @@ static int parse_netdev(DeviceState *dev, const char *str, void **ptr)
ncs[i]->queue_index = i;
}
- conf->queues = queues;
+ peers_ptr->queues = queues;
return 0;
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 76dd6f5708..0fd2a84c7b 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -437,7 +437,7 @@ static void ics_set_irq(void *opaque, int srcno, int val)
{
ICSState *ics = (ICSState *)opaque;
- if (ics->islsi[srcno]) {
+ if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
set_irq_lsi(ics, srcno, val);
} else {
set_irq_msi(ics, srcno, val);
@@ -474,7 +474,7 @@ static void ics_write_xive(ICSState *ics, int nr, int server,
trace_xics_ics_write_xive(nr, srcno, server, priority);
- if (ics->islsi[srcno]) {
+ if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
write_xive_lsi(ics, srcno);
} else {
write_xive_msi(ics, srcno);
@@ -496,7 +496,7 @@ static void ics_resend(ICSState *ics)
for (i = 0; i < ics->nr_irqs; i++) {
/* FIXME: filter by server#? */
- if (ics->islsi[i]) {
+ if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) {
resend_lsi(ics, i);
} else {
resend_msi(ics, i);
@@ -511,7 +511,7 @@ static void ics_eoi(ICSState *ics, int nr)
trace_xics_ics_eoi(nr);
- if (ics->islsi[srcno]) {
+ if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
irq->status &= ~XICS_STATUS_SENT;
}
}
@@ -520,11 +520,18 @@ static void ics_reset(DeviceState *dev)
{
ICSState *ics = ICS(dev);
int i;
+ uint8_t flags[ics->nr_irqs];
+
+ for (i = 0; i < ics->nr_irqs; i++) {
+ flags[i] = ics->irqs[i].flags;
+ }
memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs);
+
for (i = 0; i < ics->nr_irqs; i++) {
ics->irqs[i].priority = 0xff;
ics->irqs[i].saved_priority = 0xff;
+ ics->irqs[i].flags = flags[i];
}
}
@@ -563,13 +570,14 @@ static int ics_dispatch_post_load(void *opaque, int version_id)
static const VMStateDescription vmstate_ics_irq = {
.name = "ics/irq",
- .version_id = 1,
+ .version_id = 2,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT32(server, ICSIRQState),
VMSTATE_UINT8(priority, ICSIRQState),
VMSTATE_UINT8(saved_priority, ICSIRQState),
VMSTATE_UINT8(status, ICSIRQState),
+ VMSTATE_UINT8(flags, ICSIRQState),
VMSTATE_END_OF_LIST()
},
};
@@ -606,7 +614,6 @@ static void ics_realize(DeviceState *dev, Error **errp)
return;
}
ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
- ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool));
ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, ics->nr_irqs);
}
@@ -633,21 +640,166 @@ static const TypeInfo ics_info = {
/*
* Exported functions
*/
+static int xics_find_source(XICSState *icp, int irq)
+{
+ int sources = 1;
+ int src;
+
+ /* FIXME: implement multiple sources */
+ for (src = 0; src < sources; ++src) {
+ ICSState *ics = &icp->ics[src];
+ if (ics_valid_irq(ics, irq)) {
+ return src;
+ }
+ }
+
+ return -1;
+}
qemu_irq xics_get_qirq(XICSState *icp, int irq)
{
- if (!ics_valid_irq(icp->ics, irq)) {
- return NULL;
+ int src = xics_find_source(icp, irq);
+
+ if (src >= 0) {
+ ICSState *ics = &icp->ics[src];
+ return ics->qirqs[irq - ics->offset];
}
- return icp->ics->qirqs[irq - icp->ics->offset];
+ return NULL;
+}
+
+static void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
+{
+ assert(!(ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MASK));
+
+ ics->irqs[srcno].flags |=
+ lsi ? XICS_FLAGS_IRQ_LSI : XICS_FLAGS_IRQ_MSI;
}
void xics_set_irq_type(XICSState *icp, int irq, bool lsi)
{
- assert(ics_valid_irq(icp->ics, irq));
+ int src = xics_find_source(icp, irq);
+ ICSState *ics;
- icp->ics->islsi[irq - icp->ics->offset] = lsi;
+ assert(src >= 0);
+
+ ics = &icp->ics[src];
+ ics_set_irq_type(ics, irq - ics->offset, lsi);
+}
+
+#define ICS_IRQ_FREE(ics, srcno) \
+ (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
+
+static int ics_find_free_block(ICSState *ics, int num, int alignnum)
+{
+ int first, i;
+
+ for (first = 0; first < ics->nr_irqs; first += alignnum) {
+ if (num > (ics->nr_irqs - first)) {
+ return -1;
+ }
+ for (i = first; i < first + num; ++i) {
+ if (!ICS_IRQ_FREE(ics, i)) {
+ break;
+ }
+ }
+ if (i == (first + num)) {
+ return first;
+ }
+ }
+
+ return -1;
+}
+
+int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi)
+{
+ ICSState *ics = &icp->ics[src];
+ int irq;
+
+ if (irq_hint) {
+ assert(src == xics_find_source(icp, irq_hint));
+ if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
+ trace_xics_alloc_failed_hint(src, irq_hint);
+ return -1;
+ }
+ irq = irq_hint;
+ } else {
+ irq = ics_find_free_block(ics, 1, 1);
+ if (irq < 0) {
+ trace_xics_alloc_failed_no_left(src);
+ return -1;
+ }
+ irq += ics->offset;
+ }
+
+ ics_set_irq_type(ics, irq - ics->offset, lsi);
+ trace_xics_alloc(src, irq);
+
+ return irq;
+}
+
+/*
+ * Allocate block of consequtive IRQs, returns a number of the first.
+ * If align==true, aligns the first IRQ number to num.
+ */
+int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align)
+{
+ int i, first = -1;
+ ICSState *ics = &icp->ics[src];
+
+ assert(src == 0);
+ /*
+ * MSIMesage::data is used for storing VIRQ so
+ * it has to be aligned to num to support multiple
+ * MSI vectors. MSI-X is not affected by this.
+ * The hint is used for the first IRQ, the rest should
+ * be allocated continuously.
+ */
+ if (align) {
+ assert((num == 1) || (num == 2) || (num == 4) ||
+ (num == 8) || (num == 16) || (num == 32));
+ first = ics_find_free_block(ics, num, num);
+ } else {
+ first = ics_find_free_block(ics, num, 1);
+ }
+
+ if (first >= 0) {
+ for (i = first; i < first + num; ++i) {
+ ics_set_irq_type(ics, i, lsi);
+ }
+ }
+ first += ics->offset;
+
+ trace_xics_alloc_block(src, first, num, lsi, align);
+
+ return first;
+}
+
+static void ics_free(ICSState *ics, int srcno, int num)
+{
+ int i;
+
+ for (i = srcno; i < srcno + num; ++i) {
+ if (ICS_IRQ_FREE(ics, i)) {
+ trace_xics_ics_free_warn(ics - ics->icp->ics, i + ics->offset);
+ }
+ memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
+ }
+}
+
+void xics_free(XICSState *icp, int irq, int num)
+{
+ int src = xics_find_source(icp, irq);
+
+ if (src >= 0) {
+ ICSState *ics = &icp->ics[src];
+
+ /* FIXME: implement multiple sources */
+ assert(src == 0);
+
+ trace_xics_ics_free(ics - icp->ics, irq, num);
+ ics_free(ics, irq - ics->offset, num);
+ }
}
/*
@@ -866,10 +1018,10 @@ static void xics_realize(DeviceState *dev, Error **errp)
}
/* Registration of global state belongs into realize */
- spapr_rtas_register("ibm,set-xive", rtas_set_xive);
- spapr_rtas_register("ibm,get-xive", rtas_get_xive);
- spapr_rtas_register("ibm,int-off", rtas_int_off);
- spapr_rtas_register("ibm,int-on", rtas_int_on);
+ spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
+ spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
+ spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_int_off);
+ spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_int_on);
spapr_register_hypercall(H_CPPR, h_cppr);
spapr_register_hypercall(H_IPI, h_ipi);
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 09476ae34d..20b19e9d4f 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -38,10 +38,6 @@
typedef struct KVMXICSState {
XICSState parent_obj;
- uint32_t set_xive_token;
- uint32_t get_xive_token;
- uint32_t int_off_token;
- uint32_t int_on_token;
int kernel_xics_fd;
} KVMXICSState;
@@ -224,7 +220,7 @@ static int ics_set_kvm_state(ICSState *ics, int version_id)
state |= KVM_XICS_MASKED;
}
- if (ics->islsi[i]) {
+ if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) {
state |= KVM_XICS_LEVEL_SENSITIVE;
if (irq->status & XICS_STATUS_ASSERTED) {
state |= KVM_XICS_PENDING;
@@ -253,7 +249,7 @@ static void ics_kvm_set_irq(void *opaque, int srcno, int val)
int rc;
args.irq = srcno + ics->offset;
- if (!ics->islsi[srcno]) {
+ if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MSI) {
if (!val) {
return;
}
@@ -271,11 +267,18 @@ static void ics_kvm_reset(DeviceState *dev)
{
ICSState *ics = ICS(dev);
int i;
+ uint8_t flags[ics->nr_irqs];
+
+ for (i = 0; i < ics->nr_irqs; i++) {
+ flags[i] = ics->irqs[i].flags;
+ }
memset(ics->irqs, 0, sizeof(ICSIRQState) * ics->nr_irqs);
+
for (i = 0; i < ics->nr_irqs; i++) {
ics->irqs[i].priority = 0xff;
ics->irqs[i].saved_priority = 0xff;
+ ics->irqs[i].flags = flags[i];
}
ics_set_kvm_state(ics, 1);
@@ -290,7 +293,6 @@ static void ics_kvm_realize(DeviceState *dev, Error **errp)
return;
}
ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
- ics->islsi = g_malloc0(ics->nr_irqs * sizeof(bool));
ics->qirqs = qemu_allocate_irqs(ics_kvm_set_irq, ics, ics->nr_irqs);
}
@@ -392,32 +394,30 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
goto fail;
}
- icpkvm->set_xive_token = spapr_rtas_register("ibm,set-xive", rtas_dummy);
- icpkvm->get_xive_token = spapr_rtas_register("ibm,get-xive", rtas_dummy);
- icpkvm->int_off_token = spapr_rtas_register("ibm,int-off", rtas_dummy);
- icpkvm->int_on_token = spapr_rtas_register("ibm,int-on", rtas_dummy);
+ spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_dummy);
+ spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_dummy);
+ spapr_rtas_register(RTAS_IBM_INT_OFF, "ibm,int-off", rtas_dummy);
+ spapr_rtas_register(RTAS_IBM_INT_ON, "ibm,int-on", rtas_dummy);
- rc = kvmppc_define_rtas_kernel_token(icpkvm->set_xive_token,
- "ibm,set-xive");
+ rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_SET_XIVE, "ibm,set-xive");
if (rc < 0) {
error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,set-xive");
goto fail;
}
- rc = kvmppc_define_rtas_kernel_token(icpkvm->get_xive_token,
- "ibm,get-xive");
+ rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_GET_XIVE, "ibm,get-xive");
if (rc < 0) {
error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,get-xive");
goto fail;
}
- rc = kvmppc_define_rtas_kernel_token(icpkvm->int_on_token, "ibm,int-on");
+ rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_ON, "ibm,int-on");
if (rc < 0) {
error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,int-on");
goto fail;
}
- rc = kvmppc_define_rtas_kernel_token(icpkvm->int_off_token, "ibm,int-off");
+ rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_OFF, "ibm,int-off");
if (rc < 0) {
error_setg(errp, "kvmppc_define_rtas_kernel_token: ibm,int-off");
goto fail;
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 7437c2e3c3..7b279c4f05 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -39,6 +39,7 @@
#include "qemu/range.h"
#include "sysemu/kvm.h"
#include "sysemu/sysemu.h"
+#include "hw/misc/vfio.h"
/* #define DEBUG_VFIO */
#ifdef DEBUG_VFIO
@@ -3649,6 +3650,39 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
container->iommu_data.type1.initialized = true;
+ } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) {
+ ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
+ if (ret) {
+ error_report("vfio: failed to set group container: %m");
+ ret = -errno;
+ goto free_container_exit;
+ }
+
+ ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU);
+ if (ret) {
+ error_report("vfio: failed to set iommu for container: %m");
+ ret = -errno;
+ goto free_container_exit;
+ }
+
+ /*
+ * The host kernel code implementing VFIO_IOMMU_DISABLE is called
+ * when container fd is closed so we do not call it explicitly
+ * in this file.
+ */
+ ret = ioctl(fd, VFIO_IOMMU_ENABLE);
+ if (ret) {
+ error_report("vfio: failed to enable container: %m");
+ ret = -errno;
+ goto free_container_exit;
+ }
+
+ container->iommu_data.type1.listener = vfio_memory_listener;
+ container->iommu_data.release = vfio_listener_release;
+
+ memory_listener_register(&container->iommu_data.type1.listener,
+ container->space->as);
+
} else {
error_report("vfio: No available IOMMU models");
ret = -EINVAL;
@@ -4318,3 +4352,47 @@ static void register_vfio_pci_dev_type(void)
}
type_init(register_vfio_pci_dev_type)
+
+static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid,
+ int req, void *param)
+{
+ VFIOGroup *group;
+ VFIOContainer *container;
+ int ret = -1;
+
+ group = vfio_get_group(groupid, as);
+ if (!group) {
+ error_report("vfio: group %d not registered", groupid);
+ return ret;
+ }
+
+ container = group->container;
+ if (group->container) {
+ ret = ioctl(container->fd, req, param);
+ if (ret < 0) {
+ error_report("vfio: failed to ioctl container: ret=%d, %s",
+ ret, strerror(errno));
+ }
+ }
+
+ vfio_put_group(group);
+
+ return ret;
+}
+
+int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
+ int req, void *param)
+{
+ /* We allow only certain ioctls to the container */
+ switch (req) {
+ case VFIO_CHECK_EXTENSION:
+ case VFIO_IOMMU_SPAPR_TCE_GET_INFO:
+ break;
+ default:
+ /* Return an error on unknown requests */
+ error_report("vfio: unsupported ioctl %X", req);
+ return -1;
+ }
+
+ return vfio_container_do_ioctl(as, groupid, req, param);
+}
diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
index aaa3ff2360..3263e3fe90 100644
--- a/hw/net/eepro100.c
+++ b/hw/net/eepro100.c
@@ -1217,7 +1217,6 @@ static void eepro100_write_mdi(EEPRO100State *s)
break;
case 1: /* Status Register */
missing("not writable");
- data = s->mdimem[reg];
break;
case 2: /* PHY Identification Register (Word 1) */
case 3: /* PHY Identification Register (Word 2) */
@@ -1230,7 +1229,8 @@ static void eepro100_write_mdi(EEPRO100State *s)
default:
missing("not implemented");
}
- s->mdimem[reg] = data;
+ s->mdimem[reg] &= eepro100_mdi_mask[reg];
+ s->mdimem[reg] |= data & ~eepro100_mdi_mask[reg];
} else if (opcode == 2) {
/* MDI read */
switch (reg) {
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 00b5e07ddd..e51d753cee 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1542,7 +1542,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
- n->max_queues = MAX(n->nic_conf.queues, 1);
+ n->max_queues = MAX(n->nic_conf.peers.queues, 1);
n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
n->vqs[0].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
n->curr_queues = 1;
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index af49c4667d..6a72ef4814 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -153,8 +153,8 @@ static int spapr_nvram_init(VIOsPAPRDevice *dev)
return -1;
}
- spapr_rtas_register("nvram-fetch", rtas_nvram_fetch);
- spapr_rtas_register("nvram-store", rtas_nvram_store);
+ spapr_rtas_register(RTAS_NVRAM_FETCH, "nvram-fetch", rtas_nvram_fetch);
+ spapr_rtas_register(RTAS_NVRAM_STORE, "nvram-store", rtas_nvram_store);
return 0;
}
diff --git a/hw/pci-host/uninorth.c b/hw/pci-host/uninorth.c
index e72fe2a70b..21f805f314 100644
--- a/hw/pci-host/uninorth.c
+++ b/hw/pci-host/uninorth.c
@@ -230,7 +230,7 @@ PCIBus *pci_pmac_init(qemu_irq *pic,
d = UNI_NORTH_PCI_HOST_BRIDGE(dev);
memory_region_init(&d->pci_mmio, OBJECT(d), "pci-mmio", 0x100000000ULL);
memory_region_init_alias(&d->pci_hole, OBJECT(d), "pci-hole", &d->pci_mmio,
- 0x80000000ULL, 0x70000000ULL);
+ 0x80000000ULL, 0x10000000ULL);
memory_region_add_subregion(address_space_mem, 0x80000000ULL,
&d->pci_hole);
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index ea747f0a20..edd44d03e7 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -4,6 +4,9 @@ obj-y += ppc.o ppc_booke.o
obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
obj-$(CONFIG_PSERIES) += spapr_pci.o
+ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
+obj-y += spapr_pci_vfio.o
+endif
# PowerPC 4xx boards
obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
obj-y += ppc4xx_pci.o
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index a973c18d88..bb2e75fe1b 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -316,10 +316,15 @@ static int ppce500_load_device_tree(MachineState *machine,
* device it finds in the dt as serial output device. And we generate
* devices in reverse order to the dt.
*/
- dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET,
- soc, mpic, "serial1", 1, false);
- dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET,
- soc, mpic, "serial0", 0, true);
+ if (serial_hds[1]) {
+ dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET,
+ soc, mpic, "serial1", 1, false);
+ }
+
+ if (serial_hds[0]) {
+ dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET,
+ soc, mpic, "serial0", 0, true);
+ }
snprintf(gutil, sizeof(gutil), "%s/global-utilities@%llx", soc,
MPC8544_UTIL_OFFSET);
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index e493dc1b4b..89d3cadf19 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -373,18 +373,11 @@ static void ppc_core99_init(MachineState *machine)
machine_arch = ARCH_MAC99;
}
/* init basic PC hardware */
- pci_vga_init(pci_bus);
-
escc_mem = escc_init(0, pic[0x25], pic[0x24],
serial_hds[0], serial_hds[1], ESCC_CLOCK, 4);
memory_region_init_alias(escc_bar, NULL, "escc-bar",
escc_mem, 0, memory_region_size(escc_mem));
- for(i = 0; i < nb_nics; i++)
- pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL);
-
- ide_drive_get(hd, MAX_IDE_BUS);
-
macio = pci_create(pci_bus, -1, TYPE_NEWWORLD_MACIO);
dev = DEVICE(macio);
qdev_connect_gpio_out(dev, 0, pic[0x19]); /* CUDA */
@@ -395,6 +388,8 @@ static void ppc_core99_init(MachineState *machine)
macio_init(macio, pic_mem, escc_bar);
/* We only emulate 2 out of 3 IDE controllers for now */
+ ide_drive_get(hd, MAX_IDE_BUS);
+
macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio),
"ide[0]"));
macio_ide_init_drives(macio_ide, hd);
@@ -420,8 +415,15 @@ static void ppc_core99_init(MachineState *machine)
}
}
- if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8)
+ pci_vga_init(pci_bus);
+
+ if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8) {
graphic_depth = 15;
+ }
+
+ for (i = 0; i < nb_nics; i++) {
+ pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL);
+ }
/* The NewWorld NVRAM is not located in the MacIO device */
dev = qdev_create(NULL, TYPE_MACIO_NVRAM);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 82f183f173..a8ba916970 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -85,16 +85,16 @@
#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift))
+typedef struct sPAPRMachineState sPAPRMachineState;
-typedef struct SPAPRMachine SPAPRMachine;
#define TYPE_SPAPR_MACHINE "spapr-machine"
#define SPAPR_MACHINE(obj) \
- OBJECT_CHECK(SPAPRMachine, (obj), TYPE_SPAPR_MACHINE)
+ OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE)
/**
- * SPAPRMachine:
+ * sPAPRMachineState:
*/
-struct SPAPRMachine {
+struct sPAPRMachineState {
/*< private >*/
MachineState parent_obj;
@@ -102,76 +102,8 @@ struct SPAPRMachine {
char *kvm_type;
};
-
sPAPREnvironment *spapr;
-int spapr_allocate_irq(int hint, bool lsi)
-{
- int irq;
-
- if (hint) {
- irq = hint;
- if (hint >= spapr->next_irq) {
- spapr->next_irq = hint + 1;
- }
- /* FIXME: we should probably check for collisions somehow */
- } else {
- irq = spapr->next_irq++;
- }
-
- /* Configure irq type */
- if (!xics_get_qirq(spapr->icp, irq)) {
- return 0;
- }
-
- xics_set_irq_type(spapr->icp, irq, lsi);
-
- return irq;
-}
-
-/*
- * Allocate block of consequtive IRQs, returns a number of the first.
- * If msi==true, aligns the first IRQ number to num.
- */
-int spapr_allocate_irq_block(int num, bool lsi, bool msi)
-{
- int first = -1;
- int i, hint = 0;
-
- /*
- * MSIMesage::data is used for storing VIRQ so
- * it has to be aligned to num to support multiple
- * MSI vectors. MSI-X is not affected by this.
- * The hint is used for the first IRQ, the rest should
- * be allocated continuously.
- */
- if (msi) {
- assert((num == 1) || (num == 2) || (num == 4) ||
- (num == 8) || (num == 16) || (num == 32));
- hint = (spapr->next_irq + num - 1) & ~(num - 1);
- }
-
- for (i = 0; i < num; ++i) {
- int irq;
-
- irq = spapr_allocate_irq(hint, lsi);
- if (!irq) {
- return -1;
- }
-
- if (0 == i) {
- first = irq;
- hint = 0;
- }
-
- /* If the above doesn't create a consecutive block then that's
- * an internal bug */
- assert(irq == (first + i));
- }
-
- return first;
-}
-
static XICSState *try_create_xics(const char *type, int nr_servers,
int nr_irqs)
{
@@ -442,6 +374,9 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
if (boot_device) {
_FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
}
+ if (boot_menu) {
+ _FDT((fdt_property_cell(fdt, "qemu,boot-menu", boot_menu)));
+ }
_FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width)));
_FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height)));
_FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth)));
@@ -956,7 +891,7 @@ static const VMStateDescription vmstate_spapr = {
.version_id = 2,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
- VMSTATE_UINT32(next_irq, sPAPREnvironment),
+ VMSTATE_UNUSED(4), /* used to be @next_irq */
/* RTC offset */
VMSTATE_UINT64(rtc_offset, sPAPREnvironment),
@@ -1360,7 +1295,6 @@ static void ppc_spapr_init(MachineState *machine)
/* Set up Interrupt Controller before we create the VCPUs */
spapr->icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads,
XICS_IRQS);
- spapr->next_irq = XICS_IRQ_BASE;
/* init CPUs */
if (cpu_model == NULL) {
@@ -1619,14 +1553,14 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus,
static char *spapr_get_kvm_type(Object *obj, Error **errp)
{
- SPAPRMachine *sm = SPAPR_MACHINE(obj);
+ sPAPRMachineState *sm = SPAPR_MACHINE(obj);
return g_strdup(sm->kvm_type);
}
static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
{
- SPAPRMachine *sm = SPAPR_MACHINE(obj);
+ sPAPRMachineState *sm = SPAPR_MACHINE(obj);
g_free(sm->kvm_type);
sm->kvm_type = g_strdup(value);
@@ -1660,7 +1594,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
static const TypeInfo spapr_machine_info = {
.name = TYPE_SPAPR_MACHINE,
.parent = TYPE_MACHINE,
- .instance_size = sizeof(SPAPRMachine),
+ .instance_size = sizeof(sPAPRMachineState),
.instance_init = spapr_machine_initfn,
.class_init = spapr_machine_class_init,
.interfaces = (InterfaceInfo[]) {
@@ -1669,9 +1603,25 @@ static const TypeInfo spapr_machine_info = {
},
};
+static void spapr_machine_2_1_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+
+ mc->name = "pseries-2.1";
+ mc->desc = "pSeries Logical Partition (PAPR compliant) v2.1";
+ mc->is_default = 0;
+}
+
+static const TypeInfo spapr_machine_2_1_info = {
+ .name = TYPE_SPAPR_MACHINE "2.1",
+ .parent = TYPE_SPAPR_MACHINE,
+ .class_init = spapr_machine_2_1_class_init,
+};
+
static void spapr_machine_register_types(void)
{
type_register_static(&spapr_machine_info);
+ type_register_static(&spapr_machine_2_1_info);
}
type_init(spapr_machine_register_types)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 16fa49e886..1b6157dec4 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -314,8 +314,9 @@ static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr,
void spapr_events_init(sPAPREnvironment *spapr)
{
- spapr->epow_irq = spapr_allocate_msi(0);
+ spapr->epow_irq = xics_alloc(spapr->icp, 0, 0, false);
spapr->epow_notifier.notify = spapr_powerdown_req;
qemu_register_powerdown_notifier(&spapr->epow_notifier);
- spapr_rtas_register("check-exception", check_exception);
+ spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
+ check_exception);
}
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 9e49ec4a5c..698ae60953 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -118,7 +118,8 @@ static int spapr_tce_table_realize(DeviceState *dev)
tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
tcet->nb_table <<
tcet->page_shift,
- &tcet->fd);
+ &tcet->fd,
+ tcet->vfio_accel);
}
if (!tcet->table) {
@@ -142,7 +143,8 @@ static int spapr_tce_table_realize(DeviceState *dev)
sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
uint64_t bus_offset,
uint32_t page_shift,
- uint32_t nb_table)
+ uint32_t nb_table,
+ bool vfio_accel)
{
sPAPRTCETable *tcet;
@@ -161,6 +163,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
tcet->bus_offset = bus_offset;
tcet->page_shift = page_shift;
tcet->nb_table = nb_table;
+ tcet->vfio_accel = vfio_accel;
object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet), NULL);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 988f8cb858..9ed39a93b7 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -220,36 +220,12 @@ static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
}
/*
- * Find an entry with config_addr or returns the empty one if not found AND
- * alloc_new is set.
- * At the moment the msi_table entries are never released so there is
- * no point to look till the end of the list if we need to find the free entry.
- */
-static int spapr_msicfg_find(sPAPRPHBState *phb, uint32_t config_addr,
- bool alloc_new)
-{
- int i;
-
- for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) {
- if (!phb->msi_table[i].nvec) {
- break;
- }
- if (phb->msi_table[i].config_addr == config_addr) {
- return i;
- }
- }
- if ((i < SPAPR_MSIX_MAX_DEVS) && alloc_new) {
- trace_spapr_pci_msi("Allocating new MSI config", i, config_addr);
- return i;
- }
-
- return -1;
-}
-
-/*
* Set MSI/MSIX message data.
* This is required for msi_notify()/msix_notify() which
* will write at the addresses via spapr_msi_write().
+ *
+ * If hwaddr == 0, all entries will have .data == first_irq i.e.
+ * table will be reset.
*/
static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
unsigned first_irq, unsigned req_num)
@@ -263,9 +239,12 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
return;
}
- for (i = 0; i < req_num; ++i, ++msg.data) {
+ for (i = 0; i < req_num; ++i) {
msix_set_message(pdev, i, msg);
trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
+ if (addr) {
+ ++msg.data;
+ }
}
}
@@ -280,9 +259,12 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
unsigned int seq_num = rtas_ld(args, 5);
unsigned int ret_intr_type;
- int ndev, irq, max_irqs = 0;
+ unsigned int irq, max_irqs = 0, num = 0;
sPAPRPHBState *phb = NULL;
PCIDevice *pdev = NULL;
+ bool msix = false;
+ spapr_pci_msi *msi;
+ int *config_addr_key;
switch (func) {
case RTAS_CHANGE_MSI_FN:
@@ -310,13 +292,18 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
/* Releasing MSIs */
if (!req_num) {
- ndev = spapr_msicfg_find(phb, config_addr, false);
- if (ndev < 0) {
- trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
+ msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr);
+ if (!msi) {
+ trace_spapr_pci_msi("Releasing wrong config", config_addr);
rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
return;
}
- trace_spapr_pci_msi("Released MSIs", ndev, config_addr);
+
+ xics_free(spapr->icp, msi->first_irq, msi->num);
+ spapr_msi_setmsg(pdev, 0, msix, 0, num);
+ g_hash_table_remove(phb->msi, &config_addr);
+
+ trace_spapr_pci_msi("Released MSIs", config_addr);
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
rtas_st(rets, 1, 0);
return;
@@ -324,15 +311,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
/* Enabling MSI */
- /* Find a device number in the map to add or reuse the existing one */
- ndev = spapr_msicfg_find(phb, config_addr, true);
- if (ndev >= SPAPR_MSIX_MAX_DEVS || ndev < 0) {
- error_report("No free entry for a new MSI device");
- rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
- return;
- }
- trace_spapr_pci_msi("Configuring MSI", ndev, config_addr);
-
/* Check if the device supports as many IRQs as requested */
if (ret_intr_type == RTAS_TYPE_MSI) {
max_irqs = msi_nr_vectors_allocated(pdev);
@@ -340,48 +318,47 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
max_irqs = pdev->msix_entries_nr;
}
if (!max_irqs) {
- error_report("Requested interrupt type %d is not enabled for device#%d",
- ret_intr_type, ndev);
+ error_report("Requested interrupt type %d is not enabled for device %x",
+ ret_intr_type, config_addr);
rtas_st(rets, 0, -1); /* Hardware error */
return;
}
/* Correct the number if the guest asked for too many */
if (req_num > max_irqs) {
+ trace_spapr_pci_msi_retry(config_addr, req_num, max_irqs);
req_num = max_irqs;
+ irq = 0; /* to avoid misleading trace */
+ goto out;
}
- /* Check if there is an old config and MSI number has not changed */
- if (phb->msi_table[ndev].nvec && (req_num != phb->msi_table[ndev].nvec)) {
- /* Unexpected behaviour */
- error_report("Cannot reuse MSI config for device#%d", ndev);
+ /* Allocate MSIs */
+ irq = xics_alloc_block(spapr->icp, 0, req_num, false,
+ ret_intr_type == RTAS_TYPE_MSI);
+ if (!irq) {
+ error_report("Cannot allocate MSIs for device %x", config_addr);
rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
return;
}
- /* There is no cached config, allocate MSIs */
- if (!phb->msi_table[ndev].nvec) {
- irq = spapr_allocate_irq_block(req_num, false,
- ret_intr_type == RTAS_TYPE_MSI);
- if (irq < 0) {
- error_report("Cannot allocate MSIs for device#%d", ndev);
- rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
- return;
- }
- phb->msi_table[ndev].irq = irq;
- phb->msi_table[ndev].nvec = req_num;
- phb->msi_table[ndev].config_addr = config_addr;
- }
-
/* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == RTAS_TYPE_MSIX,
- phb->msi_table[ndev].irq, req_num);
+ irq, req_num);
+ /* Add MSI device to cache */
+ msi = g_new(spapr_pci_msi, 1);
+ msi->first_irq = irq;
+ msi->num = req_num;
+ config_addr_key = g_new(int, 1);
+ *config_addr_key = config_addr;
+ g_hash_table_insert(phb->msi, config_addr_key, msi);
+
+out:
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
rtas_st(rets, 1, req_num);
rtas_st(rets, 2, ++seq_num);
rtas_st(rets, 3, ret_intr_type);
- trace_spapr_pci_rtas_ibm_change_msi(func, req_num);
+ trace_spapr_pci_rtas_ibm_change_msi(config_addr, func, req_num, irq);
}
static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
@@ -395,25 +372,28 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
uint32_t config_addr = rtas_ld(args, 0);
uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
- int ndev;
sPAPRPHBState *phb = NULL;
+ PCIDevice *pdev = NULL;
+ spapr_pci_msi *msi;
- /* Fins sPAPRPHBState */
+ /* Find sPAPRPHBState */
phb = find_phb(spapr, buid);
- if (!phb) {
+ if (phb) {
+ pdev = find_dev(spapr, buid, config_addr);
+ }
+ if (!phb || !pdev) {
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
return;
}
/* Find device descriptor and start IRQ */
- ndev = spapr_msicfg_find(phb, config_addr, false);
- if (ndev < 0) {
- trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
+ msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr);
+ if (!msi || !msi->first_irq || !msi->num || (ioa_intr_num >= msi->num)) {
+ trace_spapr_pci_msi("Failed to return vector", config_addr);
rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
return;
}
-
- intr_src_num = phb->msi_table[ndev].irq + ioa_intr_num;
+ intr_src_num = msi->first_irq + ioa_intr_num;
trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
intr_src_num);
@@ -634,7 +614,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
for (i = 0; i < PCI_NUM_PINS; i++) {
uint32_t irq;
- irq = spapr_allocate_lsi(0);
+ irq = xics_alloc_block(spapr->icp, 0, 1, true, false);
if (!irq) {
error_setg(errp, "spapr_allocate_lsi failed");
return;
@@ -649,6 +629,8 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
}
info->finish_realize(sphb, errp);
+
+ sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
}
static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
@@ -658,7 +640,7 @@ static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
0,
SPAPR_TCE_PAGE_SHIFT,
- 0x40000000 >> SPAPR_TCE_PAGE_SHIFT);
+ 0x40000000 >> SPAPR_TCE_PAGE_SHIFT, false);
if (!tcet) {
error_setg(errp, "Unable to create TCE table for %s",
sphb->dtbusname);
@@ -712,22 +694,69 @@ static const VMStateDescription vmstate_spapr_pci_lsi = {
};
static const VMStateDescription vmstate_spapr_pci_msi = {
- .name = "spapr_pci/lsi",
+ .name = "spapr_pci/msi",
.version_id = 1,
.minimum_version_id = 1,
- .fields = (VMStateField[]) {
- VMSTATE_UINT32(config_addr, struct spapr_pci_msi),
- VMSTATE_UINT32(irq, struct spapr_pci_msi),
- VMSTATE_UINT32(nvec, struct spapr_pci_msi),
-
+ .fields = (VMStateField []) {
+ VMSTATE_UINT32(key, spapr_pci_msi_mig),
+ VMSTATE_UINT32(value.first_irq, spapr_pci_msi_mig),
+ VMSTATE_UINT32(value.num, spapr_pci_msi_mig),
VMSTATE_END_OF_LIST()
},
};
+static void spapr_pci_pre_save(void *opaque)
+{
+ sPAPRPHBState *sphb = opaque;
+ GHashTableIter iter;
+ gpointer key, value;
+ int i;
+
+ if (sphb->msi_devs) {
+ g_free(sphb->msi_devs);
+ sphb->msi_devs = NULL;
+ }
+ sphb->msi_devs_num = g_hash_table_size(sphb->msi);
+ if (!sphb->msi_devs_num) {
+ return;
+ }
+ sphb->msi_devs = g_malloc(sphb->msi_devs_num * sizeof(spapr_pci_msi_mig));
+
+ g_hash_table_iter_init(&iter, sphb->msi);
+ for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) {
+ sphb->msi_devs[i].key = *(uint32_t *) key;
+ sphb->msi_devs[i].value = *(spapr_pci_msi *) value;
+ }
+}
+
+static int spapr_pci_post_load(void *opaque, int version_id)
+{
+ sPAPRPHBState *sphb = opaque;
+ gpointer key, value;
+ int i;
+
+ for (i = 0; i < sphb->msi_devs_num; ++i) {
+ key = g_memdup(&sphb->msi_devs[i].key,
+ sizeof(sphb->msi_devs[i].key));
+ value = g_memdup(&sphb->msi_devs[i].value,
+ sizeof(sphb->msi_devs[i].value));
+ g_hash_table_insert(sphb->msi, key, value);
+ }
+ if (sphb->msi_devs) {
+ g_free(sphb->msi_devs);
+ sphb->msi_devs = NULL;
+ }
+ sphb->msi_devs_num = 0;
+
+ return 0;
+}
+
static const VMStateDescription vmstate_spapr_pci = {
.name = "spapr_pci",
- .version_id = 1,
- .minimum_version_id = 1,
+ .version_id = 2,
+ .minimum_version_id = 2,
+ .pre_save = spapr_pci_pre_save,
+ .post_load = spapr_pci_post_load,
.fields = (VMStateField[]) {
VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState),
VMSTATE_UINT32_EQUAL(dma_liobn, sPAPRPHBState),
@@ -737,9 +766,9 @@ static const VMStateDescription vmstate_spapr_pci = {
VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState),
VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0,
vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
- VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS, 0,
- vmstate_spapr_pci_msi, struct spapr_pci_msi),
-
+ VMSTATE_INT32(msi_devs_num, sPAPRPHBState),
+ VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, sPAPRPHBState, msi_devs_num, 0,
+ vmstate_spapr_pci_msi, spapr_pci_msi_mig),
VMSTATE_END_OF_LIST()
},
};
@@ -909,14 +938,20 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
void spapr_pci_rtas_init(void)
{
- spapr_rtas_register("read-pci-config", rtas_read_pci_config);
- spapr_rtas_register("write-pci-config", rtas_write_pci_config);
- spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
- spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
+ spapr_rtas_register(RTAS_READ_PCI_CONFIG, "read-pci-config",
+ rtas_read_pci_config);
+ spapr_rtas_register(RTAS_WRITE_PCI_CONFIG, "write-pci-config",
+ rtas_write_pci_config);
+ spapr_rtas_register(RTAS_IBM_READ_PCI_CONFIG, "ibm,read-pci-config",
+ rtas_ibm_read_pci_config);
+ spapr_rtas_register(RTAS_IBM_WRITE_PCI_CONFIG, "ibm,write-pci-config",
+ rtas_ibm_write_pci_config);
if (msi_supported) {
- spapr_rtas_register("ibm,query-interrupt-source-number",
+ spapr_rtas_register(RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER,
+ "ibm,query-interrupt-source-number",
rtas_ibm_query_interrupt_source_number);
- spapr_rtas_register("ibm,change-msi", rtas_ibm_change_msi);
+ spapr_rtas_register(RTAS_IBM_CHANGE_MSI, "ibm,change-msi",
+ rtas_ibm_change_msi);
}
}
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
new file mode 100644
index 0000000000..d3bddf2887
--- /dev/null
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -0,0 +1,102 @@
+/*
+ * QEMU sPAPR PCI host for VFIO
+ *
+ * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/ppc/spapr.h"
+#include "hw/pci-host/spapr.h"
+#include "linux/vfio.h"
+#include "hw/misc/vfio.h"
+
+static Property spapr_phb_vfio_properties[] = {
+ DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp)
+{
+ sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
+ struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) };
+ int ret;
+ sPAPRTCETable *tcet;
+ uint32_t liobn = svphb->phb.dma_liobn;
+
+ if (svphb->iommugroupid == -1) {
+ error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid);
+ return;
+ }
+
+ ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
+ VFIO_CHECK_EXTENSION,
+ (void *) VFIO_SPAPR_TCE_IOMMU);
+ if (ret != 1) {
+ error_setg_errno(errp, -ret,
+ "spapr-vfio: SPAPR extension is not supported");
+ return;
+ }
+
+ ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
+ VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
+ if (ret) {
+ error_setg_errno(errp, -ret,
+ "spapr-vfio: get info from container failed");
+ return;
+ }
+
+ tcet = spapr_tce_new_table(DEVICE(sphb), liobn, info.dma32_window_start,
+ SPAPR_TCE_PAGE_SHIFT,
+ info.dma32_window_size >> SPAPR_TCE_PAGE_SHIFT,
+ true);
+ if (!tcet) {
+ error_setg(errp, "spapr-vfio: failed to create VFIO TCE table");
+ return;
+ }
+
+ /* Register default 32bit DMA window */
+ memory_region_add_subregion(&sphb->iommu_root, tcet->bus_offset,
+ spapr_tce_get_iommu(tcet));
+}
+
+static void spapr_phb_vfio_reset(DeviceState *qdev)
+{
+ /* Do nothing */
+}
+
+static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
+
+ dc->props = spapr_phb_vfio_properties;
+ dc->reset = spapr_phb_vfio_reset;
+ spc->finish_realize = spapr_phb_vfio_finish_realize;
+}
+
+static const TypeInfo spapr_phb_vfio_info = {
+ .name = TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE,
+ .parent = TYPE_SPAPR_PCI_HOST_BRIDGE,
+ .instance_size = sizeof(sPAPRPHBVFIOState),
+ .class_init = spapr_phb_vfio_class_init,
+ .class_size = sizeof(sPAPRPHBClass),
+};
+
+static void spapr_pci_vfio_register_types(void)
+{
+ type_register_static(&spapr_phb_vfio_info);
+}
+
+type_init(spapr_pci_vfio_register_types)
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 8d08539baa..9ba1ba69f9 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -36,9 +36,6 @@
#include <libfdt.h>
-#define TOKEN_BASE 0x2000
-#define TOKEN_MAX 0x100
-
static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
@@ -225,8 +222,6 @@ static void rtas_stop_self(PowerPCCPU *cpu, sPAPREnvironment *spapr,
env->msr = 0;
}
-#define DIAGNOSTICS_RUN_MODE 42
-
static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu,
sPAPREnvironment *spapr,
uint32_t token, uint32_t nargs,
@@ -236,16 +231,28 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu,
target_ulong parameter = rtas_ld(args, 0);
target_ulong buffer = rtas_ld(args, 1);
target_ulong length = rtas_ld(args, 2);
- target_ulong ret = RTAS_OUT_NOT_SUPPORTED;
+ target_ulong ret = RTAS_OUT_SUCCESS;
switch (parameter) {
- case DIAGNOSTICS_RUN_MODE:
- if (length == 1) {
- rtas_st(buffer, 0, 0);
- ret = RTAS_OUT_SUCCESS;
- }
+ case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS: {
+ char *param_val = g_strdup_printf("MaxEntCap=%d,MaxPlatProcs=%d",
+ max_cpus, smp_cpus);
+ rtas_st_buffer(buffer, length, (uint8_t *)param_val, strlen(param_val));
+ g_free(param_val);
break;
}
+ case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE: {
+ uint8_t param_val = DIAGNOSTICS_RUN_MODE_DISABLED;
+
+ rtas_st_buffer(buffer, length, &param_val, sizeof(param_val));
+ break;
+ }
+ case RTAS_SYSPARM_UUID:
+ rtas_st_buffer(buffer, length, qemu_uuid, (qemu_uuid_set ? 16 : 0));
+ break;
+ default:
+ ret = RTAS_OUT_NOT_SUPPORTED;
+ }
rtas_st(rets, 0, ret);
}
@@ -260,7 +267,9 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
target_ulong ret = RTAS_OUT_NOT_SUPPORTED;
switch (parameter) {
- case DIAGNOSTICS_RUN_MODE:
+ case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS:
+ case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE:
+ case RTAS_SYSPARM_UUID:
ret = RTAS_OUT_NOT_AUTHORIZED;
break;
}
@@ -271,17 +280,14 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
static struct rtas_call {
const char *name;
spapr_rtas_fn fn;
-} rtas_table[TOKEN_MAX];
-
-static struct rtas_call *rtas_next = rtas_table;
+} rtas_table[RTAS_TOKEN_MAX - RTAS_TOKEN_BASE];
target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr,
uint32_t token, uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
- if ((token >= TOKEN_BASE)
- && ((token - TOKEN_BASE) < TOKEN_MAX)) {
- struct rtas_call *call = rtas_table + (token - TOKEN_BASE);
+ if ((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX)) {
+ struct rtas_call *call = rtas_table + (token - RTAS_TOKEN_BASE);
if (call->fn) {
call->fn(cpu, spapr, token, nargs, args, nret, rets);
@@ -303,23 +309,22 @@ target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr,
return H_PARAMETER;
}
-int spapr_rtas_register(const char *name, spapr_rtas_fn fn)
+void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn)
{
- int i;
-
- for (i = 0; i < (rtas_next - rtas_table); i++) {
- if (strcmp(name, rtas_table[i].name) == 0) {
- fprintf(stderr, "RTAS call \"%s\" registered twice\n", name);
- exit(1);
- }
+ if (!((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX))) {
+ fprintf(stderr, "RTAS invalid token 0x%x\n", token);
+ exit(1);
}
- assert(rtas_next < (rtas_table + TOKEN_MAX));
-
- rtas_next->name = name;
- rtas_next->fn = fn;
+ token -= RTAS_TOKEN_BASE;
+ if (rtas_table[token].name) {
+ fprintf(stderr, "RTAS call \"%s\" is registered already as 0x%x\n",
+ rtas_table[token].name, token);
+ exit(1);
+ }
- return (rtas_next++ - rtas_table) + TOKEN_BASE;
+ rtas_table[token].name = name;
+ rtas_table[token].fn = fn;
}
int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
@@ -359,7 +364,7 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
return ret;
}
- for (i = 0; i < TOKEN_MAX; i++) {
+ for (i = 0; i < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; i++) {
struct rtas_call *call = &rtas_table[i];
if (!call->name) {
@@ -367,7 +372,7 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
}
ret = qemu_fdt_setprop_cell(fdt, "/rtas", call->name,
- i + TOKEN_BASE);
+ i + RTAS_TOKEN_BASE);
if (ret < 0) {
fprintf(stderr, "Couldn't add rtas token for %s: %s\n",
call->name, fdt_strerror(ret));
@@ -380,18 +385,24 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
static void core_rtas_register_types(void)
{
- spapr_rtas_register("display-character", rtas_display_character);
- spapr_rtas_register("get-time-of-day", rtas_get_time_of_day);
- spapr_rtas_register("set-time-of-day", rtas_set_time_of_day);
- spapr_rtas_register("power-off", rtas_power_off);
- spapr_rtas_register("system-reboot", rtas_system_reboot);
- spapr_rtas_register("query-cpu-stopped-state",
+ spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
+ rtas_display_character);
+ spapr_rtas_register(RTAS_GET_TIME_OF_DAY, "get-time-of-day",
+ rtas_get_time_of_day);
+ spapr_rtas_register(RTAS_SET_TIME_OF_DAY, "set-time-of-day",
+ rtas_set_time_of_day);
+ spapr_rtas_register(RTAS_POWER_OFF, "power-off", rtas_power_off);
+ spapr_rtas_register(RTAS_SYSTEM_REBOOT, "system-reboot",
+ rtas_system_reboot);
+ spapr_rtas_register(RTAS_QUERY_CPU_STOPPED_STATE, "query-cpu-stopped-state",
rtas_query_cpu_stopped_state);
- spapr_rtas_register("start-cpu", rtas_start_cpu);
- spapr_rtas_register("stop-self", rtas_stop_self);
- spapr_rtas_register("ibm,get-system-parameter",
+ spapr_rtas_register(RTAS_START_CPU, "start-cpu", rtas_start_cpu);
+ spapr_rtas_register(RTAS_STOP_SELF, "stop-self", rtas_stop_self);
+ spapr_rtas_register(RTAS_IBM_GET_SYSTEM_PARAMETER,
+ "ibm,get-system-parameter",
rtas_ibm_get_system_parameter);
- spapr_rtas_register("ibm,set-system-parameter",
+ spapr_rtas_register(RTAS_IBM_SET_SYSTEM_PARAMETER,
+ "ibm,set-system-parameter",
rtas_ibm_set_system_parameter);
}
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 04e16ae04d..dc9e46a7b1 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -449,7 +449,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
dev->qdev.id = id;
}
- dev->irq = spapr_allocate_msi(dev->irq);
+ dev->irq = xics_alloc(spapr->icp, 0, dev->irq, false);
if (!dev->irq) {
return -1;
}
@@ -460,7 +460,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
0,
SPAPR_TCE_PAGE_SHIFT,
pc->rtce_window_size >>
- SPAPR_TCE_PAGE_SHIFT);
+ SPAPR_TCE_PAGE_SHIFT, false);
address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet), qdev->id);
}
@@ -517,8 +517,9 @@ VIOsPAPRBus *spapr_vio_bus_init(void)
spapr_register_hypercall(H_ENABLE_CRQ, h_enable_crq);
/* RTAS calls */
- spapr_rtas_register("ibm,set-tce-bypass", rtas_set_tce_bypass);
- spapr_rtas_register("quiesce", rtas_quiesce);
+ spapr_rtas_register(RTAS_IBM_SET_TCE_BYPASS, "ibm,set-tce-bypass",
+ rtas_set_tce_bypass);
+ spapr_rtas_register(RTAS_QUIESCE, "quiesce", rtas_quiesce);
return bus;
}
diff --git a/hw/watchdog/watchdog.c b/hw/watchdog/watchdog.c
index 4aebd34924..9f607d42bb 100644
--- a/hw/watchdog/watchdog.c
+++ b/hw/watchdog/watchdog.c
@@ -106,7 +106,7 @@ int select_watchdog_action(const char *p)
*/
void watchdog_perform_action(void)
{
- switch(watchdog_action) {
+ switch (watchdog_action) {
case WDT_RESET: /* same as 'system_reset' in monitor */
qapi_event_send_watchdog(WATCHDOG_EXPIRATION_ACTION_RESET, &error_abort);
qemu_system_reset_request();
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 67ca076380..f3cf63f9f5 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -225,7 +225,7 @@ void block_job_pause(BlockJob *job);
void block_job_resume(BlockJob *job);
/**
- * block_job_event_cancle:
+ * block_job_event_cancelled:
* @job: The job whose information is requested.
*
* Send a BLOCK_JOB_CANCELLED event for the specified job.
diff --git a/include/hw/misc/vfio.h b/include/hw/misc/vfio.h
new file mode 100644
index 0000000000..0b26cd8e11
--- /dev/null
+++ b/include/hw/misc/vfio.h
@@ -0,0 +1,9 @@
+#ifndef VFIO_API_H
+#define VFIO_API_H
+
+#include "qemu/typedefs.h"
+
+extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
+ int req, void *param);
+
+#endif
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 0934518bbd..32f0aa7d10 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -27,13 +27,15 @@
#include "hw/pci/pci_host.h"
#include "hw/ppc/xics.h"
-#define SPAPR_MSIX_MAX_DEVS 32
-
#define TYPE_SPAPR_PCI_HOST_BRIDGE "spapr-pci-host-bridge"
+#define TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE "spapr-pci-vfio-host-bridge"
#define SPAPR_PCI_HOST_BRIDGE(obj) \
OBJECT_CHECK(sPAPRPHBState, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE)
+#define SPAPR_PCI_VFIO_HOST_BRIDGE(obj) \
+ OBJECT_CHECK(sPAPRPHBVFIOState, (obj), TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE)
+
#define SPAPR_PCI_HOST_BRIDGE_CLASS(klass) \
OBJECT_CLASS_CHECK(sPAPRPHBClass, (klass), TYPE_SPAPR_PCI_HOST_BRIDGE)
#define SPAPR_PCI_HOST_BRIDGE_GET_CLASS(obj) \
@@ -41,6 +43,7 @@
typedef struct sPAPRPHBClass sPAPRPHBClass;
typedef struct sPAPRPHBState sPAPRPHBState;
+typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState;
struct sPAPRPHBClass {
PCIHostBridgeClass parent_class;
@@ -48,6 +51,16 @@ struct sPAPRPHBClass {
void (*finish_realize)(sPAPRPHBState *sphb, Error **errp);
};
+typedef struct spapr_pci_msi {
+ uint32_t first_irq;
+ uint32_t num;
+} spapr_pci_msi;
+
+typedef struct spapr_pci_msi_mig {
+ uint32_t key;
+ spapr_pci_msi value;
+} spapr_pci_msi_mig;
+
struct sPAPRPHBState {
PCIHostState parent_obj;
@@ -67,15 +80,20 @@ struct sPAPRPHBState {
uint32_t irq;
} lsi_table[PCI_NUM_PINS];
- struct spapr_pci_msi {
- uint32_t config_addr;
- uint32_t irq;
- uint32_t nvec;
- } msi_table[SPAPR_MSIX_MAX_DEVS];
+ GHashTable *msi;
+ /* Temporary cache for migration purposes */
+ int32_t msi_devs_num;
+ spapr_pci_msi_mig *msi_devs;
QLIST_ENTRY(sPAPRPHBState) list;
};
+struct sPAPRPHBVFIOState {
+ sPAPRPHBState phb;
+
+ int32_t iommugroupid;
+};
+
#define SPAPR_PCI_BASE_BUID 0x800000020000000ULL
#define SPAPR_PCI_WINDOW_BASE 0x10000000000ULL
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 08c301f38d..bbba51a703 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -27,7 +27,6 @@ typedef struct sPAPREnvironment {
long rtas_size;
void *fdt_skel;
target_ulong entry_point;
- uint32_t next_irq;
uint64_t rtc_offset;
struct PPCTimebase tb;
bool has_graphics;
@@ -339,16 +338,6 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
int spapr_allocate_irq(int hint, bool lsi);
int spapr_allocate_irq_block(int num, bool lsi, bool msi);
-static inline int spapr_allocate_msi(int hint)
-{
- return spapr_allocate_irq(hint, false);
-}
-
-static inline int spapr_allocate_lsi(int hint)
-{
- return spapr_allocate_irq(hint, true);
-}
-
/* RTAS return codes */
#define RTAS_OUT_SUCCESS 0
#define RTAS_OUT_NO_ERRORS_FOUND 1
@@ -358,6 +347,58 @@ static inline int spapr_allocate_lsi(int hint)
#define RTAS_OUT_NOT_SUPPORTED -3
#define RTAS_OUT_NOT_AUTHORIZED -9002
+/* RTAS tokens */
+#define RTAS_TOKEN_BASE 0x2000
+
+#define RTAS_DISPLAY_CHARACTER (RTAS_TOKEN_BASE + 0x00)
+#define RTAS_GET_TIME_OF_DAY (RTAS_TOKEN_BASE + 0x01)
+#define RTAS_SET_TIME_OF_DAY (RTAS_TOKEN_BASE + 0x02)
+#define RTAS_POWER_OFF (RTAS_TOKEN_BASE + 0x03)
+#define RTAS_SYSTEM_REBOOT (RTAS_TOKEN_BASE + 0x04)
+#define RTAS_QUERY_CPU_STOPPED_STATE (RTAS_TOKEN_BASE + 0x05)
+#define RTAS_START_CPU (RTAS_TOKEN_BASE + 0x06)
+#define RTAS_STOP_SELF (RTAS_TOKEN_BASE + 0x07)
+#define RTAS_IBM_GET_SYSTEM_PARAMETER (RTAS_TOKEN_BASE + 0x08)
+#define RTAS_IBM_SET_SYSTEM_PARAMETER (RTAS_TOKEN_BASE + 0x09)
+#define RTAS_IBM_SET_XIVE (RTAS_TOKEN_BASE + 0x0A)
+#define RTAS_IBM_GET_XIVE (RTAS_TOKEN_BASE + 0x0B)
+#define RTAS_IBM_INT_OFF (RTAS_TOKEN_BASE + 0x0C)
+#define RTAS_IBM_INT_ON (RTAS_TOKEN_BASE + 0x0D)
+#define RTAS_CHECK_EXCEPTION (RTAS_TOKEN_BASE + 0x0E)
+#define RTAS_EVENT_SCAN (RTAS_TOKEN_BASE + 0x0F)
+#define RTAS_IBM_SET_TCE_BYPASS (RTAS_TOKEN_BASE + 0x10)
+#define RTAS_QUIESCE (RTAS_TOKEN_BASE + 0x11)
+#define RTAS_NVRAM_FETCH (RTAS_TOKEN_BASE + 0x12)
+#define RTAS_NVRAM_STORE (RTAS_TOKEN_BASE + 0x13)
+#define RTAS_READ_PCI_CONFIG (RTAS_TOKEN_BASE + 0x14)
+#define RTAS_WRITE_PCI_CONFIG (RTAS_TOKEN_BASE + 0x15)
+#define RTAS_IBM_READ_PCI_CONFIG (RTAS_TOKEN_BASE + 0x16)
+#define RTAS_IBM_WRITE_PCI_CONFIG (RTAS_TOKEN_BASE + 0x17)
+#define RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER (RTAS_TOKEN_BASE + 0x18)
+#define RTAS_IBM_CHANGE_MSI (RTAS_TOKEN_BASE + 0x19)
+#define RTAS_SET_INDICATOR (RTAS_TOKEN_BASE + 0x1A)
+#define RTAS_SET_POWER_LEVEL (RTAS_TOKEN_BASE + 0x1B)
+#define RTAS_GET_POWER_LEVEL (RTAS_TOKEN_BASE + 0x1C)
+#define RTAS_GET_SENSOR_STATE (RTAS_TOKEN_BASE + 0x1D)
+#define RTAS_IBM_CONFIGURE_CONNECTOR (RTAS_TOKEN_BASE + 0x1E)
+#define RTAS_IBM_OS_TERM (RTAS_TOKEN_BASE + 0x1F)
+#define RTAS_IBM_EXTENDED_OS_TERM (RTAS_TOKEN_BASE + 0x20)
+
+#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x21)
+
+/* RTAS ibm,get-system-parameter token values */
+#define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS 20
+#define RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE 42
+#define RTAS_SYSPARM_UUID 48
+
+/* Possible values for the platform-processor-diagnostics-run-mode parameter
+ * of the RTAS ibm,get-system-parameter call.
+ */
+#define DIAGNOSTICS_RUN_MODE_DISABLED 0
+#define DIAGNOSTICS_RUN_MODE_STAGGERED 1
+#define DIAGNOSTICS_RUN_MODE_IMMEDIATE 2
+#define DIAGNOSTICS_RUN_MODE_PERIODIC 3
+
static inline uint64_t ppc64_phys_to_real(uint64_t addr)
{
return addr & ~0xF000000000000000ULL;
@@ -373,11 +414,24 @@ static inline void rtas_st(target_ulong phys, int n, uint32_t val)
stl_be_phys(&address_space_memory, ppc64_phys_to_real(phys + 4*n), val);
}
+
+static inline void rtas_st_buffer(target_ulong phys, target_ulong phys_len,
+ uint8_t *buffer, uint16_t buffer_len)
+{
+ if (phys_len < 2) {
+ return;
+ }
+ stw_be_phys(&address_space_memory,
+ ppc64_phys_to_real(phys), buffer_len);
+ cpu_physical_memory_write(ppc64_phys_to_real(phys + 2),
+ buffer, MIN(buffer_len, phys_len - 2));
+}
+
typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, sPAPREnvironment *spapr,
uint32_t token,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets);
-int spapr_rtas_register(const char *name, spapr_rtas_fn fn);
+void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn);
target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr,
uint32_t token, uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets);
@@ -407,6 +461,7 @@ struct sPAPRTCETable {
uint32_t page_shift;
uint64_t *table;
bool bypass;
+ bool vfio_accel;
int fd;
MemoryRegion iommu;
QLIST_ENTRY(sPAPRTCETable) list;
@@ -418,7 +473,8 @@ int spapr_h_cas_compose_response(target_ulong addr, target_ulong size);
sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
uint64_t bus_offset,
uint32_t page_shift,
- uint32_t nb_table);
+ uint32_t nb_table,
+ bool vfio_accel);
MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet);
void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass);
int spapr_dma_dt(void *fdt, int node_off, const char *propname,
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 85e4c8a3dd..a214dd7f28 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -136,7 +136,6 @@ struct ICSState {
uint32_t nr_irqs;
uint32_t offset;
qemu_irq *qirqs;
- bool *islsi;
ICSIRQState *irqs;
XICSState *icp;
};
@@ -150,12 +149,20 @@ struct ICSIRQState {
#define XICS_STATUS_REJECTED 0x4
#define XICS_STATUS_MASKED_PENDING 0x8
uint8_t status;
+/* (flags & XICS_FLAGS_IRQ_MASK) == 0 means the interrupt is not allocated */
+#define XICS_FLAGS_IRQ_LSI 0x1
+#define XICS_FLAGS_IRQ_MSI 0x2
+#define XICS_FLAGS_IRQ_MASK 0x3
+ uint8_t flags;
};
#define XICS_IRQS 1024
qemu_irq xics_get_qirq(XICSState *icp, int irq);
void xics_set_irq_type(XICSState *icp, int irq, bool lsi);
+int xics_alloc(XICSState *icp, int src, int irq_hint, bool lsi);
+int xics_alloc_block(XICSState *icp, int src, int num, bool lsi, bool align);
+void xics_free(XICSState *icp, int irq, int num);
void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 71a8a95641..9a001bd28f 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -101,6 +101,7 @@ enum VMStateFlags {
VMS_VARRAY_UINT8 = 0x400, /* Array with size in uint8_t field*/
VMS_VARRAY_UINT32 = 0x800, /* Array with size in uint32_t field*/
VMS_MUST_EXIST = 0x1000, /* Field must exist in input */
+ VMS_ALLOC = 0x2000, /* Alloc a buffer on the destination */
};
typedef struct {
@@ -429,6 +430,16 @@ extern const VMStateInfo vmstate_info_bitmap;
.offset = offsetof(_state, _field), \
}
+#define VMSTATE_STRUCT_VARRAY_ALLOC(_field, _state, _field_num, _version, _vmsd, _type) {\
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .vmsd = &(_vmsd), \
+ .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_VARRAY_INT32|VMS_ALLOC|VMS_POINTER, \
+ .offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
#define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) { \
.name = (stringify(_field)), \
.version_id = (_version), \
diff --git a/include/net/net.h b/include/net/net.h
index 8b189da5ee..ed594f9bdb 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -24,13 +24,13 @@ struct MACAddr {
typedef struct NICPeers {
NetClientState *ncs[MAX_QUEUE_NUM];
+ int32_t queues;
} NICPeers;
typedef struct NICConf {
MACAddr macaddr;
NICPeers peers;
int32_t bootindex;
- int32_t queues;
} NICConf;
#define DEFINE_NIC_PROPERTIES(_state, _conf) \
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 1248eda272..60777fecf6 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -736,6 +736,14 @@ enum {
QEMU_PPC_FEATURE_TRUE_LE = 0x00000002,
QEMU_PPC_FEATURE_PPC_LE = 0x00000001,
+
+ /* Feature definitions in AT_HWCAP2. */
+ QEMU_PPC_FEATURE2_ARCH_2_07 = 0x80000000, /* ISA 2.07 */
+ QEMU_PPC_FEATURE2_HAS_HTM = 0x40000000, /* Hardware Transactional Memory */
+ QEMU_PPC_FEATURE2_HAS_DSCR = 0x20000000, /* Data Stream Control Register */
+ QEMU_PPC_FEATURE2_HAS_EBB = 0x10000000, /* Event Base Branching */
+ QEMU_PPC_FEATURE2_HAS_ISEL = 0x08000000, /* Integer Select */
+ QEMU_PPC_FEATURE2_HAS_TAR = 0x04000000, /* Target Address Register */
};
#define ELF_HWCAP get_elf_hwcap()
@@ -749,6 +757,8 @@ static uint32_t get_elf_hwcap(void)
Altivec/FP/SPE support. Anything else is just a bonus. */
#define GET_FEATURE(flag, feature) \
do { if (cpu->env.insns_flags & flag) { features |= feature; } } while (0)
+#define GET_FEATURE2(flag, feature) \
+ do { if (cpu->env.insns_flags2 & flag) { features |= feature; } } while (0)
GET_FEATURE(PPC_64B, QEMU_PPC_FEATURE_64);
GET_FEATURE(PPC_FLOAT, QEMU_PPC_FEATURE_HAS_FPU);
GET_FEATURE(PPC_ALTIVEC, QEMU_PPC_FEATURE_HAS_ALTIVEC);
@@ -757,7 +767,36 @@ static uint32_t get_elf_hwcap(void)
GET_FEATURE(PPC_SPE_DOUBLE, QEMU_PPC_FEATURE_HAS_EFP_DOUBLE);
GET_FEATURE(PPC_BOOKE, QEMU_PPC_FEATURE_BOOKE);
GET_FEATURE(PPC_405_MAC, QEMU_PPC_FEATURE_HAS_4xxMAC);
+ GET_FEATURE2(PPC2_DFP, QEMU_PPC_FEATURE_HAS_DFP);
+ GET_FEATURE2(PPC2_VSX, QEMU_PPC_FEATURE_HAS_VSX);
+ GET_FEATURE2((PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 |
+ PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206),
+ QEMU_PPC_FEATURE_ARCH_2_06);
+#undef GET_FEATURE
+#undef GET_FEATURE2
+
+ return features;
+}
+
+#define ELF_HWCAP2 get_elf_hwcap2()
+
+static uint32_t get_elf_hwcap2(void)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(thread_cpu);
+ uint32_t features = 0;
+
+#define GET_FEATURE(flag, feature) \
+ do { if (cpu->env.insns_flags & flag) { features |= feature; } } while (0)
+#define GET_FEATURE2(flag, feature) \
+ do { if (cpu->env.insns_flags2 & flag) { features |= feature; } } while (0)
+
+ GET_FEATURE(PPC_ISEL, QEMU_PPC_FEATURE2_HAS_ISEL);
+ GET_FEATURE2(PPC2_BCTAR_ISA207, QEMU_PPC_FEATURE2_HAS_TAR);
+ GET_FEATURE2((PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
+ PPC2_ISA207S), QEMU_PPC_FEATURE2_ARCH_2_07);
+
#undef GET_FEATURE
+#undef GET_FEATURE2
return features;
}
@@ -774,8 +813,9 @@ static uint32_t get_elf_hwcap(void)
#define DLINFO_ARCH_ITEMS 5
#define ARCH_DLINFO \
do { \
- NEW_AUX_ENT(AT_DCACHEBSIZE, 0x20); \
- NEW_AUX_ENT(AT_ICACHEBSIZE, 0x20); \
+ PowerPCCPU *cpu = POWERPC_CPU(thread_cpu); \
+ NEW_AUX_ENT(AT_DCACHEBSIZE, cpu->env.dcache_line_size); \
+ NEW_AUX_ENT(AT_ICACHEBSIZE, cpu->env.icache_line_size); \
NEW_AUX_ENT(AT_UCACHEBSIZE, 0); \
/* \
* Now handle glibc compatibility. \
diff --git a/monitor.c b/monitor.c
index a8ab600c88..5718d0b60a 100644
--- a/monitor.c
+++ b/monitor.c
@@ -570,6 +570,7 @@ monitor_qapi_event_throttle(QAPIEvent event, int64_t rate)
trace_monitor_protocol_event_throttle(event, rate);
evstate->event = event;
+ assert(rate * SCALE_MS <= INT64_MAX);
evstate->rate = rate * SCALE_MS;
evstate->last = 0;
evstate->data = NULL;
@@ -585,9 +586,9 @@ static void monitor_qapi_event_init(void)
monitor_qapi_event_throttle(QAPI_EVENT_RTC_CHANGE, 1000);
monitor_qapi_event_throttle(QAPI_EVENT_WATCHDOG, 1000);
monitor_qapi_event_throttle(QAPI_EVENT_BALLOON_CHANGE, 1000);
- /* limit the rate of quorum events to avoid hammering the management */
monitor_qapi_event_throttle(QAPI_EVENT_QUORUM_REPORT_BAD, 1000);
monitor_qapi_event_throttle(QAPI_EVENT_QUORUM_FAILURE, 1000);
+ monitor_qapi_event_throttle(QAPI_EVENT_VSERPORT_CHANGE, 1000);
qmp_event_set_func_emit(monitor_qapi_event_queue);
}
diff --git a/net/Makefile.objs b/net/Makefile.objs
index 301f6b6b51..a06ba59dad 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -2,6 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
common-obj-y += socket.o
common-obj-y += dump.o
common-obj-y += eth.o
+common-obj-$(CONFIG_LINUX) += l2tpv3.o
common-obj-$(CONFIG_POSIX) += tap.o vhost-user.o
common-obj-$(CONFIG_LINUX) += tap-linux.o
common-obj-$(CONFIG_WIN32) += tap-win32.o
diff --git a/net/clients.h b/net/clients.h
index 7f3d4ae9f3..2e8fedad8d 100644
--- a/net/clients.h
+++ b/net/clients.h
@@ -47,6 +47,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name,
int net_init_bridge(const NetClientOptions *opts, const char *name,
NetClientState *peer);
+int net_init_l2tpv3(const NetClientOptions *opts, const char *name,
+ NetClientState *peer);
#ifdef CONFIG_VDE
int net_init_vde(const NetClientOptions *opts, const char *name,
NetClientState *peer);
diff --git a/net/l2tpv3.c b/net/l2tpv3.c
new file mode 100644
index 0000000000..528d95b641
--- /dev/null
+++ b/net/l2tpv3.c
@@ -0,0 +1,757 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2012-2014 Cisco Systems
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <linux/ip.h>
+#include <netdb.h>
+#include "config-host.h"
+#include "net/net.h"
+#include "clients.h"
+#include "monitor/monitor.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/sockets.h"
+#include "qemu/iov.h"
+#include "qemu/main-loop.h"
+
+
+/* The buffer size needs to be investigated for optimum numbers and
+ * optimum means of paging in on different systems. This size is
+ * chosen to be sufficient to accommodate one packet with some headers
+ */
+
+#define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
+#define BUFFER_SIZE 2048
+#define IOVSIZE 2
+#define MAX_L2TPV3_MSGCNT 64
+#define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
+
+/* Header set to 0x30000 signifies a data packet */
+
+#define L2TPV3_DATA_PACKET 0x30000
+
+/* IANA-assigned IP protocol ID for L2TPv3 */
+
+#ifndef IPPROTO_L2TP
+#define IPPROTO_L2TP 0x73
+#endif
+
+typedef struct NetL2TPV3State {
+ NetClientState nc;
+ int fd;
+
+ /*
+ * these are used for xmit - that happens packet a time
+ * and for first sign of life packet (easier to parse that once)
+ */
+
+ uint8_t *header_buf;
+ struct iovec *vec;
+
+ /*
+ * these are used for receive - try to "eat" up to 32 packets at a time
+ */
+
+ struct mmsghdr *msgvec;
+
+ /*
+ * peer address
+ */
+
+ struct sockaddr_storage *dgram_dst;
+ uint32_t dst_size;
+
+ /*
+ * L2TPv3 parameters
+ */
+
+ uint64_t rx_cookie;
+ uint64_t tx_cookie;
+ uint32_t rx_session;
+ uint32_t tx_session;
+ uint32_t header_size;
+ uint32_t counter;
+
+ /*
+ * DOS avoidance in error handling
+ */
+
+ bool header_mismatch;
+
+ /*
+ * Ring buffer handling
+ */
+
+ int queue_head;
+ int queue_tail;
+ int queue_depth;
+
+ /*
+ * Precomputed offsets
+ */
+
+ uint32_t offset;
+ uint32_t cookie_offset;
+ uint32_t counter_offset;
+ uint32_t session_offset;
+
+ /* Poll Control */
+
+ bool read_poll;
+ bool write_poll;
+
+ /* Flags */
+
+ bool ipv6;
+ bool udp;
+ bool has_counter;
+ bool pin_counter;
+ bool cookie;
+ bool cookie_is_64;
+
+} NetL2TPV3State;
+
+static int l2tpv3_can_send(void *opaque);
+static void net_l2tpv3_send(void *opaque);
+static void l2tpv3_writable(void *opaque);
+
+static void l2tpv3_update_fd_handler(NetL2TPV3State *s)
+{
+ qemu_set_fd_handler2(s->fd,
+ s->read_poll ? l2tpv3_can_send : NULL,
+ s->read_poll ? net_l2tpv3_send : NULL,
+ s->write_poll ? l2tpv3_writable : NULL,
+ s);
+}
+
+static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable)
+{
+ if (s->read_poll != enable) {
+ s->read_poll = enable;
+ l2tpv3_update_fd_handler(s);
+ }
+}
+
+static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable)
+{
+ if (s->write_poll != enable) {
+ s->write_poll = enable;
+ l2tpv3_update_fd_handler(s);
+ }
+}
+
+static void l2tpv3_writable(void *opaque)
+{
+ NetL2TPV3State *s = opaque;
+ l2tpv3_write_poll(s, false);
+ qemu_flush_queued_packets(&s->nc);
+}
+
+static int l2tpv3_can_send(void *opaque)
+{
+ NetL2TPV3State *s = opaque;
+
+ return qemu_can_send_packet(&s->nc);
+}
+
+static void l2tpv3_send_completed(NetClientState *nc, ssize_t len)
+{
+ NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+ l2tpv3_read_poll(s, true);
+}
+
+static void l2tpv3_poll(NetClientState *nc, bool enable)
+{
+ NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+ l2tpv3_write_poll(s, enable);
+ l2tpv3_read_poll(s, enable);
+}
+
+static void l2tpv3_form_header(NetL2TPV3State *s)
+{
+ uint32_t *counter;
+
+ if (s->udp) {
+ stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET);
+ }
+ stl_be_p(
+ (uint32_t *) (s->header_buf + s->session_offset),
+ s->tx_session
+ );
+ if (s->cookie) {
+ if (s->cookie_is_64) {
+ stq_be_p(
+ (uint64_t *)(s->header_buf + s->cookie_offset),
+ s->tx_cookie
+ );
+ } else {
+ stl_be_p(
+ (uint32_t *) (s->header_buf + s->cookie_offset),
+ s->tx_cookie
+ );
+ }
+ }
+ if (s->has_counter) {
+ counter = (uint32_t *)(s->header_buf + s->counter_offset);
+ if (s->pin_counter) {
+ *counter = 0;
+ } else {
+ stl_be_p(counter, ++s->counter);
+ }
+ }
+}
+
+static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc,
+ const struct iovec *iov,
+ int iovcnt)
+{
+ NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+ struct msghdr message;
+ int ret;
+
+ if (iovcnt > MAX_L2TPV3_IOVCNT - 1) {
+ error_report(
+ "iovec too long %d > %d, change l2tpv3.h",
+ iovcnt, MAX_L2TPV3_IOVCNT
+ );
+ return -1;
+ }
+ l2tpv3_form_header(s);
+ memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
+ s->vec->iov_base = s->header_buf;
+ s->vec->iov_len = s->offset;
+ message.msg_name = s->dgram_dst;
+ message.msg_namelen = s->dst_size;
+ message.msg_iov = s->vec;
+ message.msg_iovlen = iovcnt + 1;
+ message.msg_control = NULL;
+ message.msg_controllen = 0;
+ message.msg_flags = 0;
+ do {
+ ret = sendmsg(s->fd, &message, 0);
+ } while ((ret == -1) && (errno == EINTR));
+ if (ret > 0) {
+ ret -= s->offset;
+ } else if (ret == 0) {
+ /* belt and braces - should not occur on DGRAM
+ * we should get an error and never a 0 send
+ */
+ ret = iov_size(iov, iovcnt);
+ } else {
+ /* signal upper layer that socket buffer is full */
+ ret = -errno;
+ if (ret == -EAGAIN || ret == -ENOBUFS) {
+ l2tpv3_write_poll(s, true);
+ ret = 0;
+ }
+ }
+ return ret;
+}
+
+static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc,
+ const uint8_t *buf,
+ size_t size)
+{
+ NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+ struct iovec *vec;
+ struct msghdr message;
+ ssize_t ret = 0;
+
+ l2tpv3_form_header(s);
+ vec = s->vec;
+ vec->iov_base = s->header_buf;
+ vec->iov_len = s->offset;
+ vec++;
+ vec->iov_base = (void *) buf;
+ vec->iov_len = size;
+ message.msg_name = s->dgram_dst;
+ message.msg_namelen = s->dst_size;
+ message.msg_iov = s->vec;
+ message.msg_iovlen = 2;
+ message.msg_control = NULL;
+ message.msg_controllen = 0;
+ message.msg_flags = 0;
+ do {
+ ret = sendmsg(s->fd, &message, 0);
+ } while ((ret == -1) && (errno == EINTR));
+ if (ret > 0) {
+ ret -= s->offset;
+ } else if (ret == 0) {
+ /* belt and braces - should not occur on DGRAM
+ * we should get an error and never a 0 send
+ */
+ ret = size;
+ } else {
+ ret = -errno;
+ if (ret == -EAGAIN || ret == -ENOBUFS) {
+ /* signal upper layer that socket buffer is full */
+ l2tpv3_write_poll(s, true);
+ ret = 0;
+ }
+ }
+ return ret;
+}
+
+static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
+{
+
+ uint32_t *session;
+ uint64_t cookie;
+
+ if ((!s->udp) && (!s->ipv6)) {
+ buf += sizeof(struct iphdr) /* fix for ipv4 raw */;
+ }
+
+ /* we do not do a strict check for "data" packets as per
+ * the RFC spec because the pure IP spec does not have
+ * that anyway.
+ */
+
+ if (s->cookie) {
+ if (s->cookie_is_64) {
+ cookie = ldq_be_p(buf + s->cookie_offset);
+ } else {
+ cookie = ldl_be_p(buf + s->cookie_offset);
+ }
+ if (cookie != s->rx_cookie) {
+ if (!s->header_mismatch) {
+ error_report("unknown cookie id");
+ }
+ return -1;
+ }
+ }
+ session = (uint32_t *) (buf + s->session_offset);
+ if (ldl_be_p(session) != s->rx_session) {
+ if (!s->header_mismatch) {
+ error_report("session mismatch");
+ }
+ return -1;
+ }
+ return 0;
+}
+
+static void net_l2tpv3_process_queue(NetL2TPV3State *s)
+{
+ int size = 0;
+ struct iovec *vec;
+ bool bad_read;
+ int data_size;
+ struct mmsghdr *msgvec;
+
+ /* go into ring mode only if there is a "pending" tail */
+ if (s->queue_depth > 0) {
+ do {
+ msgvec = s->msgvec + s->queue_tail;
+ if (msgvec->msg_len > 0) {
+ data_size = msgvec->msg_len - s->header_size;
+ vec = msgvec->msg_hdr.msg_iov;
+ if ((data_size > 0) &&
+ (l2tpv3_verify_header(s, vec->iov_base) == 0)) {
+ vec++;
+ /* Use the legacy delivery for now, we will
+ * switch to using our own ring as a queueing mechanism
+ * at a later date
+ */
+ size = qemu_send_packet_async(
+ &s->nc,
+ vec->iov_base,
+ data_size,
+ l2tpv3_send_completed
+ );
+ if (size == 0) {
+ l2tpv3_read_poll(s, false);
+ }
+ bad_read = false;
+ } else {
+ bad_read = true;
+ if (!s->header_mismatch) {
+ /* report error only once */
+ error_report("l2tpv3 header verification failed");
+ s->header_mismatch = true;
+ }
+ }
+ } else {
+ bad_read = true;
+ }
+ s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT;
+ s->queue_depth--;
+ } while (
+ (s->queue_depth > 0) &&
+ qemu_can_send_packet(&s->nc) &&
+ ((size > 0) || bad_read)
+ );
+ }
+}
+
+static void net_l2tpv3_send(void *opaque)
+{
+ NetL2TPV3State *s = opaque;
+ int target_count, count;
+ struct mmsghdr *msgvec;
+
+ /* go into ring mode only if there is a "pending" tail */
+
+ if (s->queue_depth) {
+
+ /* The ring buffer we use has variable intake
+ * count of how much we can read varies - adjust accordingly
+ */
+
+ target_count = MAX_L2TPV3_MSGCNT - s->queue_depth;
+
+ /* Ensure we do not overrun the ring when we have
+ * a lot of enqueued packets
+ */
+
+ if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) {
+ target_count = MAX_L2TPV3_MSGCNT - s->queue_head;
+ }
+ } else {
+
+ /* we do not have any pending packets - we can use
+ * the whole message vector linearly instead of using
+ * it as a ring
+ */
+
+ s->queue_head = 0;
+ s->queue_tail = 0;
+ target_count = MAX_L2TPV3_MSGCNT;
+ }
+
+ msgvec = s->msgvec + s->queue_head;
+ if (target_count > 0) {
+ do {
+ count = recvmmsg(
+ s->fd,
+ msgvec,
+ target_count, MSG_DONTWAIT, NULL);
+ } while ((count == -1) && (errno == EINTR));
+ if (count < 0) {
+ /* Recv error - we still need to flush packets here,
+ * (re)set queue head to current position
+ */
+ count = 0;
+ }
+ s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT;
+ s->queue_depth += count;
+ }
+ net_l2tpv3_process_queue(s);
+}
+
+static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount)
+{
+ int i, j;
+ struct iovec *iov;
+ struct mmsghdr *cleanup = msgvec;
+ if (cleanup) {
+ for (i = 0; i < count; i++) {
+ if (cleanup->msg_hdr.msg_iov) {
+ iov = cleanup->msg_hdr.msg_iov;
+ for (j = 0; j < iovcount; j++) {
+ g_free(iov->iov_base);
+ iov++;
+ }
+ g_free(cleanup->msg_hdr.msg_iov);
+ }
+ cleanup++;
+ }
+ g_free(msgvec);
+ }
+}
+
+static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count)
+{
+ int i;
+ struct iovec *iov;
+ struct mmsghdr *msgvec, *result;
+
+ msgvec = g_malloc(sizeof(struct mmsghdr) * count);
+ result = msgvec;
+ for (i = 0; i < count ; i++) {
+ msgvec->msg_hdr.msg_name = NULL;
+ msgvec->msg_hdr.msg_namelen = 0;
+ iov = g_malloc(sizeof(struct iovec) * IOVSIZE);
+ msgvec->msg_hdr.msg_iov = iov;
+ iov->iov_base = g_malloc(s->header_size);
+ iov->iov_len = s->header_size;
+ iov++ ;
+ iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE);
+ iov->iov_len = BUFFER_SIZE;
+ msgvec->msg_hdr.msg_iovlen = 2;
+ msgvec->msg_hdr.msg_control = NULL;
+ msgvec->msg_hdr.msg_controllen = 0;
+ msgvec->msg_hdr.msg_flags = 0;
+ msgvec++;
+ }
+ return result;
+}
+
+static void net_l2tpv3_cleanup(NetClientState *nc)
+{
+ NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
+ qemu_purge_queued_packets(nc);
+ l2tpv3_read_poll(s, false);
+ l2tpv3_write_poll(s, false);
+ if (s->fd > 0) {
+ close(s->fd);
+ }
+ destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE);
+ g_free(s->vec);
+ g_free(s->header_buf);
+ g_free(s->dgram_dst);
+}
+
+static NetClientInfo net_l2tpv3_info = {
+ .type = NET_CLIENT_OPTIONS_KIND_L2TPV3,
+ .size = sizeof(NetL2TPV3State),
+ .receive = net_l2tpv3_receive_dgram,
+ .receive_iov = net_l2tpv3_receive_dgram_iov,
+ .poll = l2tpv3_poll,
+ .cleanup = net_l2tpv3_cleanup,
+};
+
+int net_init_l2tpv3(const NetClientOptions *opts,
+ const char *name,
+ NetClientState *peer)
+{
+
+
+ const NetdevL2TPv3Options *l2tpv3;
+ NetL2TPV3State *s;
+ NetClientState *nc;
+ int fd = -1, gairet;
+ struct addrinfo hints;
+ struct addrinfo *result = NULL;
+ char *srcport, *dstport;
+
+ nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name);
+
+ s = DO_UPCAST(NetL2TPV3State, nc, nc);
+
+ s->queue_head = 0;
+ s->queue_tail = 0;
+ s->header_mismatch = false;
+
+ assert(opts->kind == NET_CLIENT_OPTIONS_KIND_L2TPV3);
+ l2tpv3 = opts->l2tpv3;
+
+ if (l2tpv3->has_ipv6 && l2tpv3->ipv6) {
+ s->ipv6 = l2tpv3->ipv6;
+ } else {
+ s->ipv6 = false;
+ }
+
+ if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) {
+ error_report("l2tpv3_open : offset must be less than 256 bytes");
+ goto outerr;
+ }
+
+ if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) {
+ if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) {
+ s->cookie = true;
+ } else {
+ goto outerr;
+ }
+ } else {
+ s->cookie = false;
+ }
+
+ if (l2tpv3->has_cookie64 || l2tpv3->cookie64) {
+ s->cookie_is_64 = true;
+ } else {
+ s->cookie_is_64 = false;
+ }
+
+ if (l2tpv3->has_udp && l2tpv3->udp) {
+ s->udp = true;
+ if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) {
+ error_report("l2tpv3_open : need both src and dst port for udp");
+ goto outerr;
+ } else {
+ srcport = l2tpv3->srcport;
+ dstport = l2tpv3->dstport;
+ }
+ } else {
+ s->udp = false;
+ srcport = NULL;
+ dstport = NULL;
+ }
+
+
+ s->offset = 4;
+ s->session_offset = 0;
+ s->cookie_offset = 4;
+ s->counter_offset = 4;
+
+ s->tx_session = l2tpv3->txsession;
+ if (l2tpv3->has_rxsession) {
+ s->rx_session = l2tpv3->rxsession;
+ } else {
+ s->rx_session = s->tx_session;
+ }
+
+ if (s->cookie) {
+ s->rx_cookie = l2tpv3->rxcookie;
+ s->tx_cookie = l2tpv3->txcookie;
+ if (s->cookie_is_64 == true) {
+ /* 64 bit cookie */
+ s->offset += 8;
+ s->counter_offset += 8;
+ } else {
+ /* 32 bit cookie */
+ s->offset += 4;
+ s->counter_offset += 4;
+ }
+ }
+
+ memset(&hints, 0, sizeof(hints));
+
+ if (s->ipv6) {
+ hints.ai_family = AF_INET6;
+ } else {
+ hints.ai_family = AF_INET;
+ }
+ if (s->udp) {
+ hints.ai_socktype = SOCK_DGRAM;
+ hints.ai_protocol = 0;
+ s->offset += 4;
+ s->counter_offset += 4;
+ s->session_offset += 4;
+ s->cookie_offset += 4;
+ } else {
+ hints.ai_socktype = SOCK_RAW;
+ hints.ai_protocol = IPPROTO_L2TP;
+ }
+
+ gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result);
+
+ if ((gairet != 0) || (result == NULL)) {
+ error_report(
+ "l2tpv3_open : could not resolve src, errno = %s",
+ gai_strerror(gairet)
+ );
+ goto outerr;
+ }
+ fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol);
+ if (fd == -1) {
+ fd = -errno;
+ error_report("l2tpv3_open : socket creation failed, errno = %d", -fd);
+ freeaddrinfo(result);
+ goto outerr;
+ }
+ if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) {
+ error_report("l2tpv3_open : could not bind socket err=%i", errno);
+ goto outerr;
+ }
+ if (result) {
+ freeaddrinfo(result);
+ }
+
+ memset(&hints, 0, sizeof(hints));
+
+ if (s->ipv6) {
+ hints.ai_family = AF_INET6;
+ } else {
+ hints.ai_family = AF_INET;
+ }
+ if (s->udp) {
+ hints.ai_socktype = SOCK_DGRAM;
+ hints.ai_protocol = 0;
+ } else {
+ hints.ai_socktype = SOCK_RAW;
+ hints.ai_protocol = IPPROTO_L2TP;
+ }
+
+ result = NULL;
+ gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result);
+ if ((gairet != 0) || (result == NULL)) {
+ error_report(
+ "l2tpv3_open : could not resolve dst, error = %s",
+ gai_strerror(gairet)
+ );
+ goto outerr;
+ }
+
+ s->dgram_dst = g_malloc(sizeof(struct sockaddr_storage));
+ memset(s->dgram_dst, '\0' , sizeof(struct sockaddr_storage));
+ memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen);
+ s->dst_size = result->ai_addrlen;
+
+ if (result) {
+ freeaddrinfo(result);
+ }
+
+ if (l2tpv3->has_counter && l2tpv3->counter) {
+ s->has_counter = true;
+ s->offset += 4;
+ } else {
+ s->has_counter = false;
+ }
+
+ if (l2tpv3->has_pincounter && l2tpv3->pincounter) {
+ s->has_counter = true; /* pin counter implies that there is counter */
+ s->pin_counter = true;
+ } else {
+ s->pin_counter = false;
+ }
+
+ if (l2tpv3->has_offset) {
+ /* extra offset */
+ s->offset += l2tpv3->offset;
+ }
+
+ if ((s->ipv6) || (s->udp)) {
+ s->header_size = s->offset;
+ } else {
+ s->header_size = s->offset + sizeof(struct iphdr);
+ }
+
+ s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT);
+ s->vec = g_malloc(sizeof(struct iovec) * MAX_L2TPV3_IOVCNT);
+ s->header_buf = g_malloc(s->header_size);
+
+ qemu_set_nonblock(fd);
+
+ s->fd = fd;
+ s->counter = 0;
+
+ l2tpv3_read_poll(s, true);
+
+ snprintf(s->nc.info_str, sizeof(s->nc.info_str),
+ "l2tpv3: connected");
+ return 0;
+outerr:
+ qemu_del_net_client(nc);
+ if (fd > 0) {
+ close(fd);
+ }
+ if (result) {
+ freeaddrinfo(result);
+ }
+ return -1;
+}
+
diff --git a/net/net.c b/net/net.c
index 3dac29b844..0869c60bee 100644
--- a/net/net.c
+++ b/net/net.c
@@ -250,7 +250,7 @@ NICState *qemu_new_nic(NetClientInfo *info,
{
NetClientState **peers = conf->peers.ncs;
NICState *nic;
- int i, queues = MAX(1, conf->queues);
+ int i, queues = MAX(1, conf->peers.queues);
assert(info->type == NET_CLIENT_OPTIONS_KIND_NIC);
assert(info->size >= sizeof(NICState));
@@ -363,7 +363,7 @@ void qemu_del_net_client(NetClientState *nc)
void qemu_del_nic(NICState *nic)
{
- int i, queues = MAX(nic->conf->queues, 1);
+ int i, queues = MAX(nic->conf->peers.queues, 1);
/* If this is a peer NIC and peer has already been deleted, free it now. */
if (nic->peer_deleted) {
@@ -806,6 +806,9 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])(
#ifdef CONFIG_VHOST_NET_USED
[NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user,
#endif
+#ifdef CONFIG_LINUX
+ [NET_CLIENT_OPTIONS_KIND_L2TPV3] = net_init_l2tpv3,
+#endif
};
@@ -842,6 +845,9 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp)
#ifdef CONFIG_VHOST_NET_USED
case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
#endif
+#ifdef CONFIG_LINUX
+ case NET_CLIENT_OPTIONS_KIND_L2TPV3:
+#endif
break;
default:
diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img
index 1c7f7640fc..603e19e003 100644
--- a/pc-bios/s390-ccw.img
+++ b/pc-bios/s390-ccw.img
Binary files differ
diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index 5ee3fcb38f..fa54abb4d3 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -9,8 +9,12 @@
*/
#include "s390-ccw.h"
+#include "bootmap.h"
+#include "virtio.h"
-// #define DEBUG_FALLBACK
+#ifdef DEBUG
+/* #define DEBUG_FALLBACK */
+#endif
#ifdef DEBUG_FALLBACK
#define dputs(txt) \
@@ -20,43 +24,8 @@
do { } while (0)
#endif
-struct scsi_blockptr {
- uint64_t blockno;
- uint16_t size;
- uint16_t blockct;
- uint8_t reserved[4];
-} __attribute__ ((packed));
-
-struct component_entry {
- struct scsi_blockptr data;
- uint8_t pad[7];
- uint8_t component_type;
- uint64_t load_address;
-} __attribute((packed));
-
-struct component_header {
- uint8_t magic[4];
- uint8_t type;
- uint8_t reserved[27];
-} __attribute((packed));
-
-struct mbr {
- uint8_t magic[4];
- uint32_t version_id;
- uint8_t reserved[8];
- struct scsi_blockptr blockptr;
-} __attribute__ ((packed));
-
-#define ZIPL_MAGIC "zIPL"
-
-#define ZIPL_COMP_HEADER_IPL 0x00
-#define ZIPL_COMP_HEADER_DUMP 0x01
-
-#define ZIPL_COMP_ENTRY_LOAD 0x02
-#define ZIPL_COMP_ENTRY_EXEC 0x01
-
/* Scratch space */
-static uint8_t sec[SECTOR_SIZE] __attribute__((__aligned__(SECTOR_SIZE)));
+static uint8_t sec[MAX_SECTOR_SIZE*4] __attribute__((__aligned__(PAGE_SIZE)));
typedef struct ResetInfo {
uint32_t ipl_mask;
@@ -104,43 +73,243 @@ static void jump_to_IPL_code(uint64_t address)
virtio_panic("\n! IPL returns !\n");
}
-/* Check for ZIPL magic. Returns 0 if not matched. */
-static int zipl_magic(uint8_t *ptr)
+/***********************************************************************
+ * IPL an ECKD DASD (CDL or LDL/CMS format)
+ */
+
+static unsigned char _bprs[8*1024]; /* guessed "max" ECKD sector size */
+const int max_bprs_entries = sizeof(_bprs) / sizeof(ExtEckdBlockPtr);
+
+static inline void verify_boot_info(BootInfo *bip)
+{
+ IPL_assert(magic_match(bip->magic, ZIPL_MAGIC), "No zIPL magic");
+ IPL_assert(bip->version == BOOT_INFO_VERSION, "Wrong zIPL version");
+ IPL_assert(bip->bp_type == BOOT_INFO_BP_TYPE_IPL, "DASD is not for IPL");
+ IPL_assert(bip->dev_type == BOOT_INFO_DEV_TYPE_ECKD, "DASD is not ECKD");
+ IPL_assert(bip->flags == BOOT_INFO_FLAGS_ARCH, "Not for this arch");
+ IPL_assert(block_size_ok(bip->bp.ipl.bm_ptr.eckd.bptr.size),
+ "Bad block size in zIPL section of the 1st record.");
+}
+
+static bool eckd_valid_address(BootMapPointer *p)
{
- uint32_t *p = (void*)ptr;
- uint32_t *z = (void*)ZIPL_MAGIC;
+ const uint64_t cylinder = p->eckd.cylinder
+ + ((p->eckd.head & 0xfff0) << 12);
+ const uint64_t head = p->eckd.head & 0x000f;
+
+ if (head >= virtio_get_heads()
+ || p->eckd.sector > virtio_get_sectors()
+ || p->eckd.sector <= 0) {
+ return false;
+ }
- if (*p != *z) {
- debug_print_int("invalid magic", *p);
- virtio_panic("invalid magic");
+ if (!virtio_guessed_disk_nature() && cylinder >= virtio_get_cylinders()) {
+ return false;
}
- return 1;
+ return true;
}
-#define FREE_SPACE_FILLER '\xAA'
+static block_number_t eckd_block_num(BootMapPointer *p)
+{
+ const uint64_t sectors = virtio_get_sectors();
+ const uint64_t heads = virtio_get_heads();
+ const uint64_t cylinder = p->eckd.cylinder
+ + ((p->eckd.head & 0xfff0) << 12);
+ const uint64_t head = p->eckd.head & 0x000f;
+ const block_number_t block = sectors * heads * cylinder
+ + sectors * head
+ + p->eckd.sector
+ - 1; /* block nr starts with zero */
+ return block;
+}
-static inline bool unused_space(const void *p, unsigned int size)
+static block_number_t load_eckd_segments(block_number_t blk, uint64_t *address)
{
- int i;
- const unsigned char *m = p;
+ block_number_t block_nr;
+ int j, rc;
+ BootMapPointer *bprs = (void *)_bprs;
+ bool more_data;
+
+ memset(_bprs, FREE_SPACE_FILLER, sizeof(_bprs));
+ read_block(blk, bprs, "BPRS read failed");
+
+ do {
+ more_data = false;
+ for (j = 0;; j++) {
+ block_nr = eckd_block_num((void *)&(bprs[j].xeckd));
+ if (is_null_block_number(block_nr)) { /* end of chunk */
+ break;
+ }
+
+ /* we need the updated blockno for the next indirect entry
+ * in the chain, but don't want to advance address
+ */
+ if (j == (max_bprs_entries - 1)) {
+ break;
+ }
+
+ IPL_assert(block_size_ok(bprs[j].xeckd.bptr.size),
+ "bad chunk block size");
+ IPL_assert(eckd_valid_address(&bprs[j]), "bad chunk ECKD addr");
+
+ if ((bprs[j].xeckd.bptr.count == 0) && unused_space(&(bprs[j+1]),
+ sizeof(EckdBlockPtr))) {
+ /* This is a "continue" pointer.
+ * This ptr should be the last one in the current
+ * script section.
+ * I.e. the next ptr must point to the unused memory area
+ */
+ memset(_bprs, FREE_SPACE_FILLER, sizeof(_bprs));
+ read_block(block_nr, bprs, "BPRS continuation read failed");
+ more_data = true;
+ break;
+ }
- for (i = 0; i < size; i++) {
- if (m[i] != FREE_SPACE_FILLER) {
- return false;
+ /* Load (count+1) blocks of code at (block_nr)
+ * to memory (address).
+ */
+ rc = virtio_read_many(block_nr, (void *)(*address),
+ bprs[j].xeckd.bptr.count+1);
+ IPL_assert(rc == 0, "code chunk read failed");
+
+ *address += (bprs[j].xeckd.bptr.count+1) * virtio_get_block_size();
}
+ } while (more_data);
+ return block_nr;
+}
+
+static void run_eckd_boot_script(block_number_t mbr_block_nr)
+{
+ int i;
+ block_number_t block_nr;
+ uint64_t address;
+ ScsiMbr *scsi_mbr = (void *)sec;
+ BootMapScript *bms = (void *)sec;
+
+ memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+ read_block(mbr_block_nr, sec, "Cannot read MBR");
+
+ block_nr = eckd_block_num((void *)&(scsi_mbr->blockptr));
+
+ memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+ read_block(block_nr, sec, "Cannot read Boot Map Script");
+
+ for (i = 0; bms->entry[i].type == BOOT_SCRIPT_LOAD; i++) {
+ address = bms->entry[i].address.load_address;
+ block_nr = eckd_block_num(&(bms->entry[i].blkptr));
+
+ do {
+ block_nr = load_eckd_segments(block_nr, &address);
+ } while (block_nr != -1);
+ }
+
+ IPL_assert(bms->entry[i].type == BOOT_SCRIPT_EXEC,
+ "Unknown script entry type");
+ jump_to_IPL_code(bms->entry[i].address.load_address); /* no return */
+}
+
+static void ipl_eckd_cdl(void)
+{
+ XEckdMbr *mbr;
+ Ipl2 *ipl2 = (void *)sec;
+ IplVolumeLabel *vlbl = (void *)sec;
+ block_number_t block_nr;
+
+ /* we have just read the block #0 and recognized it as "IPL1" */
+ sclp_print("CDL\n");
+
+ memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+ read_block(1, ipl2, "Cannot read IPL2 record at block 1");
+ IPL_assert(magic_match(ipl2, IPL2_MAGIC), "No IPL2 record");
+
+ mbr = &ipl2->u.x.mbr;
+ IPL_assert(magic_match(mbr, ZIPL_MAGIC), "No zIPL section in IPL2 record.");
+ IPL_assert(block_size_ok(mbr->blockptr.xeckd.bptr.size),
+ "Bad block size in zIPL section of IPL2 record.");
+ IPL_assert(mbr->dev_type == DEV_TYPE_ECKD,
+ "Non-ECKD device type in zIPL section of IPL2 record.");
+
+ /* save pointer to Boot Script */
+ block_nr = eckd_block_num((void *)&(mbr->blockptr));
+
+ memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+ read_block(2, vlbl, "Cannot read Volume Label at block 2");
+ IPL_assert(magic_match(vlbl->key, VOL1_MAGIC),
+ "Invalid magic of volume label block");
+ IPL_assert(magic_match(vlbl->f.key, VOL1_MAGIC),
+ "Invalid magic of volser block");
+ print_volser(vlbl->f.volser);
+
+ run_eckd_boot_script(block_nr);
+ /* no return */
+}
+
+static void ipl_eckd_ldl(ECKD_IPL_mode_t mode)
+{
+ LDL_VTOC *vlbl = (void *)sec; /* already read, 3rd block */
+ char msg[4] = { '?', '.', '\n', '\0' };
+ block_number_t block_nr;
+ BootInfo *bip;
+
+ sclp_print((mode == ECKD_CMS) ? "CMS" : "LDL");
+ sclp_print(" version ");
+ switch (vlbl->LDL_version) {
+ case LDL1_VERSION:
+ msg[0] = '1';
+ break;
+ case LDL2_VERSION:
+ msg[0] = '2';
+ break;
+ default:
+ msg[0] = vlbl->LDL_version;
+ msg[0] &= 0x0f; /* convert EBCDIC */
+ msg[0] |= 0x30; /* to ASCII (digit) */
+ msg[1] = '?';
+ break;
+ }
+ sclp_print(msg);
+ print_volser(vlbl->volser);
+
+ /* DO NOT read BootMap pointer (only one, xECKD) at block #2 */
+
+ memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+ read_block(0, sec, "Cannot read block 0");
+ bip = (void *)(sec + 0x70); /* "boot info" is "eckd mbr" for LDL */
+ verify_boot_info(bip);
+
+ block_nr = eckd_block_num((void *)&(bip->bp.ipl.bm_ptr.eckd.bptr));
+ run_eckd_boot_script(block_nr);
+ /* no return */
+}
+
+static void ipl_eckd(ECKD_IPL_mode_t mode)
+{
+ switch (mode) {
+ case ECKD_CDL:
+ ipl_eckd_cdl(); /* no return */
+ case ECKD_CMS:
+ case ECKD_LDL:
+ ipl_eckd_ldl(mode); /* no return */
+ default:
+ virtio_panic("\n! Unknown ECKD IPL mode !\n");
}
- return true;
}
-static int zipl_load_segment(struct component_entry *entry)
+/***********************************************************************
+ * IPL a SCSI disk
+ */
+
+static void zipl_load_segment(ComponentEntry *entry)
{
- const int max_entries = (SECTOR_SIZE / sizeof(struct scsi_blockptr));
- struct scsi_blockptr *bprs = (void*)sec;
+ const int max_entries = (MAX_SECTOR_SIZE / sizeof(ScsiBlockPtr));
+ ScsiBlockPtr *bprs = (void *)sec;
const int bprs_size = sizeof(sec);
- uint64_t blockno;
- long address;
+ block_number_t blockno;
+ uint64_t address;
int i;
+ char err_msg[] = "zIPL failed to read BPRS at 0xZZZZZZZZZZZZZZZZ";
+ char *blk_no = &err_msg[30]; /* where to print blockno in (those ZZs) */
blockno = entry->data.blockno;
address = entry->load_address;
@@ -150,25 +319,25 @@ static int zipl_load_segment(struct component_entry *entry)
do {
memset(bprs, FREE_SPACE_FILLER, bprs_size);
- if (virtio_read(blockno, (uint8_t *)bprs)) {
- debug_print_int("failed reading bprs at", blockno);
- goto fail;
- }
+ fill_hex_val(blk_no, &blockno, sizeof(blockno));
+ read_block(blockno, bprs, err_msg);
for (i = 0;; i++) {
- u64 *cur_desc = (void*)&bprs[i];
+ uint64_t *cur_desc = (void *)&bprs[i];
blockno = bprs[i].blockno;
- if (!blockno)
+ if (!blockno) {
break;
+ }
/* we need the updated blockno for the next indirect entry in the
chain, but don't want to advance address */
- if (i == (max_entries - 1))
+ if (i == (max_entries - 1)) {
break;
+ }
if (bprs[i].blockct == 0 && unused_space(&bprs[i + 1],
- sizeof(struct scsi_blockptr))) {
+ sizeof(ScsiBlockPtr))) {
/* This is a "continue" pointer.
* This ptr is the last one in the current script section.
* I.e. the next ptr must point to the unused memory area.
@@ -178,102 +347,66 @@ static int zipl_load_segment(struct component_entry *entry)
break;
}
address = virtio_load_direct(cur_desc[0], cur_desc[1], 0,
- (void*)address);
- if (address == -1)
- goto fail;
+ (void *)address);
+ IPL_assert(address != -1, "zIPL load segment failed");
}
} while (blockno);
-
- return 0;
-
-fail:
- sclp_print("failed loading segment\n");
- return -1;
}
/* Run a zipl program */
-static int zipl_run(struct scsi_blockptr *pte)
+static void zipl_run(ScsiBlockPtr *pte)
{
- struct component_header *header;
- struct component_entry *entry;
- uint8_t tmp_sec[SECTOR_SIZE];
+ ComponentHeader *header;
+ ComponentEntry *entry;
+ uint8_t tmp_sec[MAX_SECTOR_SIZE];
- virtio_read(pte->blockno, tmp_sec);
- header = (struct component_header *)tmp_sec;
+ read_block(pte->blockno, tmp_sec, "Cannot read header");
+ header = (ComponentHeader *)tmp_sec;
- if (!zipl_magic(tmp_sec)) {
- goto fail;
- }
-
- if (header->type != ZIPL_COMP_HEADER_IPL) {
- goto fail;
- }
+ IPL_assert(magic_match(tmp_sec, ZIPL_MAGIC), "No zIPL magic");
+ IPL_assert(header->type == ZIPL_COMP_HEADER_IPL, "Bad header type");
dputs("start loading images\n");
/* Load image(s) into RAM */
- entry = (struct component_entry *)(&header[1]);
+ entry = (ComponentEntry *)(&header[1]);
while (entry->component_type == ZIPL_COMP_ENTRY_LOAD) {
- if (zipl_load_segment(entry) < 0) {
- goto fail;
- }
+ zipl_load_segment(entry);
entry++;
- if ((uint8_t*)(&entry[1]) > (tmp_sec + SECTOR_SIZE)) {
- goto fail;
- }
+ IPL_assert((uint8_t *)(&entry[1]) <= (tmp_sec + MAX_SECTOR_SIZE),
+ "Wrong entry value");
}
- if (entry->component_type != ZIPL_COMP_ENTRY_EXEC) {
- goto fail;
- }
+ IPL_assert(entry->component_type == ZIPL_COMP_ENTRY_EXEC, "No EXEC entry");
/* should not return */
jump_to_IPL_code(entry->load_address);
-
- return 0;
-
-fail:
- sclp_print("failed running zipl\n");
- return -1;
}
-int zipl_load(void)
+static void ipl_scsi(void)
{
- struct mbr *mbr = (void*)sec;
+ ScsiMbr *mbr = (void *)sec;
uint8_t *ns, *ns_end;
int program_table_entries = 0;
- int pte_len = sizeof(struct scsi_blockptr);
- struct scsi_blockptr *prog_table_entry;
- const char *error = "";
-
- /* Grab the MBR */
- virtio_read(0, (void*)mbr);
-
- dputs("checking magic\n");
+ const int pte_len = sizeof(ScsiBlockPtr);
+ ScsiBlockPtr *prog_table_entry;
- if (!zipl_magic(mbr->magic)) {
- error = "zipl_magic 1";
- goto fail;
- }
+ /* The 0-th block (MBR) was already read into sec[] */
+ sclp_print("Using SCSI scheme.\n");
debug_print_int("program table", mbr->blockptr.blockno);
/* Parse the program table */
- if (virtio_read(mbr->blockptr.blockno, sec)) {
- error = "virtio_read";
- goto fail;
- }
+ read_block(mbr->blockptr.blockno, sec,
+ "Error reading Program Table");
- if (!zipl_magic(sec)) {
- error = "zipl_magic 2";
- goto fail;
- }
+ IPL_assert(magic_match(sec, ZIPL_MAGIC), "No zIPL magic");
- ns_end = sec + SECTOR_SIZE;
+ ns_end = sec + virtio_get_block_size();
for (ns = (sec + pte_len); (ns + pte_len) < ns_end; ns++) {
- prog_table_entry = (struct scsi_blockptr *)ns;
+ prog_table_entry = (ScsiBlockPtr *)ns;
if (!prog_table_entry->blockno) {
break;
}
@@ -283,19 +416,55 @@ int zipl_load(void)
debug_print_int("program table entries", program_table_entries);
- if (!program_table_entries) {
- goto fail;
- }
+ IPL_assert(program_table_entries != 0, "Empty Program Table");
/* Run the default entry */
- prog_table_entry = (struct scsi_blockptr *)(sec + pte_len);
+ prog_table_entry = (ScsiBlockPtr *)(sec + pte_len);
- return zipl_run(prog_table_entry);
+ zipl_run(prog_table_entry); /* no return */
+}
-fail:
- sclp_print("failed loading zipl: ");
- sclp_print(error);
- sclp_print("\n");
- return -1;
+/***********************************************************************
+ * IPL starts here
+ */
+
+void zipl_load(void)
+{
+ ScsiMbr *mbr = (void *)sec;
+ LDL_VTOC *vlbl = (void *)sec;
+
+ /* Grab the MBR */
+ memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+ read_block(0, mbr, "Cannot read block 0");
+
+ dputs("checking magic\n");
+
+ if (magic_match(mbr->magic, ZIPL_MAGIC)) {
+ ipl_scsi(); /* no return */
+ }
+
+ /* We have failed to follow the SCSI scheme, so */
+ sclp_print("Using ECKD scheme.\n");
+ if (virtio_guessed_disk_nature()) {
+ sclp_print("Using guessed DASD geometry.\n");
+ virtio_assume_eckd();
+ }
+
+ if (magic_match(mbr->magic, IPL1_MAGIC)) {
+ ipl_eckd(ECKD_CDL); /* no return */
+ }
+
+ /* LDL/CMS? */
+ memset(sec, FREE_SPACE_FILLER, sizeof(sec));
+ read_block(2, vlbl, "Cannot read block 2");
+
+ if (magic_match(vlbl->magic, CMS1_MAGIC)) {
+ ipl_eckd(ECKD_CMS); /* no return */
+ }
+ if (magic_match(vlbl->magic, LNX1_MAGIC)) {
+ ipl_eckd(ECKD_LDL); /* no return */
+ }
+
+ virtio_panic("\n* invalid MBR magic *\n");
}
diff --git a/pc-bios/s390-ccw/bootmap.h b/pc-bios/s390-ccw/bootmap.h
new file mode 100644
index 0000000000..30ef22fe61
--- /dev/null
+++ b/pc-bios/s390-ccw/bootmap.h
@@ -0,0 +1,344 @@
+/*
+ * QEMU S390 bootmap interpreter -- declarations
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Eugene (jno) Dvurechenski <jno@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#ifndef _PC_BIOS_S390_CCW_BOOTMAP_H
+#define _PC_BIOS_S390_CCW_BOOTMAP_H
+
+#include "s390-ccw.h"
+#include "virtio.h"
+
+typedef uint64_t block_number_t;
+#define NULL_BLOCK_NR 0xffffffffffffffff
+
+#define FREE_SPACE_FILLER '\xAA'
+
+typedef struct ScsiBlockPtr {
+ uint64_t blockno;
+ uint16_t size;
+ uint16_t blockct;
+ uint8_t reserved[4];
+} __attribute__ ((packed)) ScsiBlockPtr;
+
+typedef struct FbaBlockPtr {
+ uint32_t blockno;
+ uint16_t size;
+ uint16_t blockct;
+} __attribute__ ((packed)) FbaBlockPtr;
+
+typedef struct EckdBlockPtr {
+ uint16_t cylinder; /* cylinder/head/sector is an address of the block */
+ uint16_t head;
+ uint8_t sector;
+ uint16_t size;
+ uint8_t count; /* (size_in_blocks-1);
+ * it's 0 for TablePtr, ScriptPtr, and SectionPtr */
+} __attribute__ ((packed)) EckdBlockPtr;
+
+typedef struct ExtEckdBlockPtr {
+ EckdBlockPtr bptr;
+ uint8_t reserved[8];
+} __attribute__ ((packed)) ExtEckdBlockPtr;
+
+typedef union BootMapPointer {
+ ScsiBlockPtr scsi;
+ FbaBlockPtr fba;
+ EckdBlockPtr eckd;
+ ExtEckdBlockPtr xeckd;
+} __attribute__ ((packed)) BootMapPointer;
+
+typedef struct ComponentEntry {
+ ScsiBlockPtr data;
+ uint8_t pad[7];
+ uint8_t component_type;
+ uint64_t load_address;
+} __attribute((packed)) ComponentEntry;
+
+typedef struct ComponentHeader {
+ uint8_t magic[4]; /* == "zIPL" */
+ uint8_t type; /* == ZIPL_COMP_HEADER_* */
+ uint8_t reserved[27];
+} __attribute((packed)) ComponentHeader;
+
+typedef struct ScsiMbr {
+ uint8_t magic[4];
+ uint32_t version_id;
+ uint8_t reserved[8];
+ ScsiBlockPtr blockptr;
+} __attribute__ ((packed)) ScsiMbr;
+
+#define ZIPL_MAGIC "zIPL"
+#define IPL1_MAGIC "\xc9\xd7\xd3\xf1" /* == "IPL1" in EBCDIC */
+#define IPL2_MAGIC "\xc9\xd7\xd3\xf2" /* == "IPL2" in EBCDIC */
+#define VOL1_MAGIC "\xe5\xd6\xd3\xf1" /* == "VOL1" in EBCDIC */
+#define LNX1_MAGIC "\xd3\xd5\xe7\xf1" /* == "LNX1" in EBCDIC */
+#define CMS1_MAGIC "\xc3\xd4\xe2\xf1" /* == "CMS1" in EBCDIC */
+
+#define LDL1_VERSION '\x40' /* == ' ' in EBCDIC */
+#define LDL2_VERSION '\xf2' /* == '2' in EBCDIC */
+
+#define ZIPL_COMP_HEADER_IPL 0x00
+#define ZIPL_COMP_HEADER_DUMP 0x01
+
+#define ZIPL_COMP_ENTRY_LOAD 0x02
+#define ZIPL_COMP_ENTRY_EXEC 0x01
+
+typedef struct XEckdMbr {
+ uint8_t magic[4]; /* == "xIPL" */
+ uint8_t version;
+ uint8_t bp_type;
+ uint8_t dev_type; /* == DEV_TYPE_* */
+#define DEV_TYPE_ECKD 0x00
+#define DEV_TYPE_FBA 0x01
+ uint8_t flags;
+ BootMapPointer blockptr;
+ uint8_t reserved[8];
+} __attribute__ ((packed)) XEckdMbr; /* see also BootInfo */
+
+typedef struct BootMapScriptEntry {
+ BootMapPointer blkptr;
+ uint8_t pad[7];
+ uint8_t type; /* == BOOT_SCRIPT_* */
+#define BOOT_SCRIPT_EXEC 0x01
+#define BOOT_SCRIPT_LOAD 0x02
+ union {
+ uint64_t load_address;
+ uint64_t load_psw;
+ } address;
+} __attribute__ ((packed)) BootMapScriptEntry;
+
+typedef struct BootMapScriptHeader {
+ uint32_t magic;
+ uint8_t type;
+#define BOOT_SCRIPT_HDR_IPL 0x00
+ uint8_t reserved[27];
+} __attribute__ ((packed)) BootMapScriptHeader;
+
+typedef struct BootMapScript {
+ BootMapScriptHeader header;
+ BootMapScriptEntry entry[0];
+} __attribute__ ((packed)) BootMapScript;
+
+/*
+ * These aren't real VTOCs, but referred to this way in some docs.
+ * They are "volume labels" actually.
+ *
+ * Some structures looks similar to described above, but left
+ * separate as there is no indication that they are the same.
+ * So, the value definitions are left separate too.
+ */
+typedef struct LDL_VTOC { /* @ rec.3 cyl.0 trk.0 for ECKD */
+ char magic[4]; /* "LNX1", EBCDIC */
+ char volser[6]; /* volser, EBCDIC */
+ uint8_t reserved[69]; /* reserved, 0x40 */
+ uint8_t LDL_version; /* 0x40 or 0xF2 */
+ uint64_t formatted_blocks; /* if LDL_version >= 0xF2 */
+} __attribute__ ((packed)) LDL_VTOC;
+
+typedef struct format_date {
+ uint8_t YY;
+ uint8_t MM;
+ uint8_t DD;
+ uint8_t hh;
+ uint8_t mm;
+ uint8_t ss;
+} __attribute__ ((packed)) format_date_t;
+
+typedef struct CMS_VTOC { /* @ rec.3 cyl.0 trk.0 for ECKD */
+ /* @ blk.1 (zero based) for FBA */
+ char magic[4]; /* 'CMS1', EBCDIC */
+ char volser[6]; /* volser, EBCDIC */
+ uint16_t version; /* = 0 */
+ uint32_t block_size; /* = 512, 1024, 2048, or 4096 */
+ uint32_t disk_origin; /* = 4 or 5 */
+ uint32_t blocks; /* Number of usable cyls/blocks */
+ uint32_t formatted; /* Max number of fmtd cyls/blks */
+ uint32_t CMS_blocks; /* disk size in CMS blocks */
+ uint32_t CMS_used; /* Number of CMS blocks in use */
+ uint32_t FST_size; /* = 64, bytes */
+ uint32_t FST_per_CMS_blk; /* */
+ format_date_t format_date; /* YYMMDDhhmmss as 6 bytes */
+ uint8_t reserved1[2]; /* = 0 */
+ uint32_t offset; /* disk offset when reserved */
+ uint32_t next_hole; /* block nr */
+ uint32_t HBLK_hole_offset; /* >> HBLK data of next hole */
+ uint32_t alloc_map_usr_off; /* >> user part of Alloc map */
+ uint8_t reserved2[4]; /* = 0 */
+ char shared_seg_name[8]; /* */
+} __attribute__ ((packed)) CMS_VTOC;
+
+/* from zipl/include/boot.h */
+typedef struct BootInfoBpIpl {
+ union {
+ ExtEckdBlockPtr eckd;
+ ScsiBlockPtr linr;
+ } bm_ptr;
+ uint8_t unused[16];
+} __attribute__ ((packed)) BootInfoBpIpl;
+
+typedef struct EckdDumpParam {
+ uint32_t start_blk;
+ uint32_t end_blk;
+ uint16_t blocksize;
+ uint8_t num_heads;
+ uint8_t bpt;
+ char reserved[4];
+} __attribute((packed, may_alias)) EckdDumpParam;
+
+typedef struct FbaDumpParam {
+ uint64_t start_blk;
+ uint64_t blockct;
+} __attribute((packed)) FbaDumpParam;
+
+typedef struct BootInfoBpDump {
+ union {
+ EckdDumpParam eckd;
+ FbaDumpParam fba;
+ } param;
+ uint8_t unused[16];
+} __attribute__ ((packed)) BootInfoBpDump;
+
+typedef struct BootInfo { /* @ 0x70, record #0 */
+ unsigned char magic[4]; /* = 'zIPL', ASCII */
+ uint8_t version; /* = 1 */
+#define BOOT_INFO_VERSION 1
+ uint8_t bp_type; /* = 0 */
+#define BOOT_INFO_BP_TYPE_IPL 0x00
+#define BOOT_INFO_BP_TYPE_DUMP 0x01
+ uint8_t dev_type; /* = 0 */
+#define BOOT_INFO_DEV_TYPE_ECKD 0x00
+#define BOOT_INFO_DEV_TYPE_FBA 0x01
+ uint8_t flags; /* = 1 */
+#ifdef __s390x__
+#define BOOT_INFO_FLAGS_ARCH 0x01
+#else
+#define BOOT_INFO_FLAGS_ARCH 0x00
+#endif
+ union {
+ BootInfoBpDump dump;
+ BootInfoBpIpl ipl;
+ } bp;
+} __attribute__ ((packed)) BootInfo; /* see also XEckdMbr */
+
+typedef struct Ipl1 {
+ unsigned char key[4]; /* == "IPL1" */
+ unsigned char data[24];
+} __attribute__((packed)) Ipl1;
+
+typedef struct Ipl2 {
+ unsigned char key[4]; /* == "IPL2" */
+ union {
+ unsigned char data[144];
+ struct {
+ unsigned char reserved1[92-4];
+ XEckdMbr mbr;
+ unsigned char reserved2[144-(92-4)-sizeof(XEckdMbr)];
+ } x;
+ } u;
+} __attribute__((packed)) Ipl2;
+
+typedef struct IplVolumeLabel {
+ unsigned char key[4]; /* == "VOL1" */
+ union {
+ unsigned char data[80];
+ struct {
+ unsigned char key[4]; /* == "VOL1" */
+ unsigned char volser[6];
+ unsigned char reserved[6];
+ } f;
+ };
+} __attribute__((packed)) IplVolumeLabel;
+
+typedef enum {
+ ECKD_NO_IPL,
+ ECKD_CDL,
+ ECKD_CMS,
+ ECKD_LDL,
+} ECKD_IPL_mode_t;
+
+/* utility code below */
+
+static inline void IPL_assert(bool term, const char *message)
+{
+ if (!term) {
+ sclp_print("\n! ");
+ sclp_print(message);
+ virtio_panic(" !\n"); /* no return */
+ }
+}
+
+static const unsigned char ebc2asc[256] =
+ /* 0123456789abcdef0123456789abcdef */
+ "................................" /* 1F */
+ "................................" /* 3F */
+ " ...........<(+|&.........!$*);." /* 5F first.chr.here.is.real.space */
+ "-/.........,%_>?.........`:#@'=\""/* 7F */
+ ".abcdefghi.......jklmnopqr......" /* 9F */
+ "..stuvwxyz......................" /* BF */
+ ".ABCDEFGHI.......JKLMNOPQR......" /* DF */
+ "..STUVWXYZ......0123456789......";/* FF */
+
+static inline void ebcdic_to_ascii(const char *src,
+ char *dst,
+ unsigned int size)
+{
+ unsigned int i;
+ for (i = 0; i < size; i++) {
+ unsigned c = src[i];
+ dst[i] = ebc2asc[c];
+ }
+}
+
+static inline void print_volser(const void *volser)
+{
+ char ascii[8];
+
+ ebcdic_to_ascii((char *)volser, ascii, 6);
+ ascii[6] = '\0';
+ sclp_print("VOLSER=[");
+ sclp_print(ascii);
+ sclp_print("]\n");
+}
+
+static inline bool unused_space(const void *p, size_t size)
+{
+ size_t i;
+ const unsigned char *m = p;
+
+ for (i = 0; i < size; i++) {
+ if (m[i] != FREE_SPACE_FILLER) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static inline bool is_null_block_number(block_number_t x)
+{
+ return x == NULL_BLOCK_NR;
+}
+
+static inline void read_block(block_number_t blockno,
+ void *buffer,
+ const char *errmsg)
+{
+ IPL_assert(virtio_read(blockno, buffer) == 0, errmsg);
+}
+
+static inline bool block_size_ok(uint32_t block_size)
+{
+ return block_size == virtio_get_block_size();
+}
+
+static inline bool magic_match(const void *data, const void *magic)
+{
+ return *((uint32_t *)data) == *((uint32_t *)magic);
+}
+
+#endif /* _PC_BIOS_S390_CCW_BOOTMAP_H */
diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c
index 5c33766533..dbfb40e685 100644
--- a/pc-bios/s390-ccw/main.c
+++ b/pc-bios/s390-ccw/main.c
@@ -9,6 +9,7 @@
*/
#include "s390-ccw.h"
+#include "virtio.h"
char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE)));
uint64_t boot_value;
@@ -64,6 +65,10 @@ static void virtio_setup(uint64_t dev_info)
}
virtio_setup_block(blk_schid);
+
+ if (!virtio_ipl_disk_is_valid()) {
+ virtio_panic("No valid hard disk detected.\n");
+ }
}
int main(void)
@@ -72,8 +77,8 @@ int main(void)
debug_print_int("boot reg[7] ", boot_value);
virtio_setup(boot_value);
- if (zipl_load() < 0)
- sclp_print("Failed to load OS from hard disk\n");
- disabled_wait();
- while (1) { }
+ zipl_load(); /* no return */
+
+ virtio_panic("Failed to load OS from hard disk\n");
+ return 0; /* make compiler happy */
}
diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h
index 5e871ac84c..959aed0d0b 100644
--- a/pc-bios/s390-ccw/s390-ccw.h
+++ b/pc-bios/s390-ccw/s390-ccw.h
@@ -34,10 +34,10 @@ typedef unsigned long long __u64;
#define PAGE_SIZE 4096
#ifndef EIO
-#define EIO 1
+#define EIO 1
#endif
#ifndef EBUSY
-#define EBUSY 2
+#define EBUSY 2
#endif
#ifndef NULL
#define NULL 0
@@ -57,14 +57,14 @@ void sclp_setup(void);
/* virtio.c */
unsigned long virtio_load_direct(ulong rec_list1, ulong rec_list2,
- ulong subchan_id, void *load_addr);
+ ulong subchan_id, void *load_addr);
bool virtio_is_blk(struct subchannel_id schid);
void virtio_setup_block(struct subchannel_id schid);
int virtio_read(ulong sector, void *load_addr);
int enable_mss_facility(void);
/* bootmap.c */
-int zipl_load(void);
+void zipl_load(void);
static inline void *memset(void *s, int c, size_t n)
{
@@ -86,15 +86,21 @@ static inline void fill_hex(char *out, unsigned char val)
out[1] = hex[val & 0xf];
}
-static inline void print_int(const char *desc, u64 addr)
+static inline void fill_hex_val(char *out, void *ptr, unsigned size)
{
- unsigned char *addr_c = (unsigned char*)&addr;
- char out[] = ": 0xffffffffffffffff\n";
+ unsigned char *value = ptr;
unsigned int i;
- for (i = 0; i < sizeof(addr); i++) {
- fill_hex(&out[4 + (i*2)], addr_c[i]);
+ for (i = 0; i < size; i++) {
+ fill_hex(&out[i*2], value[i]);
}
+}
+
+static inline void print_int(const char *desc, u64 addr)
+{
+ char out[] = ": 0xffffffffffffffff\n";
+
+ fill_hex_val(&out[4], &addr, sizeof(addr));
sclp_print(desc);
sclp_print(out);
@@ -118,18 +124,18 @@ static inline void debug_print_addr(const char *desc, void *p)
* Hypercall functions *
***********************************************/
-#define KVM_S390_VIRTIO_NOTIFY 0
-#define KVM_S390_VIRTIO_RESET 1
-#define KVM_S390_VIRTIO_SET_STATUS 2
+#define KVM_S390_VIRTIO_NOTIFY 0
+#define KVM_S390_VIRTIO_RESET 1
+#define KVM_S390_VIRTIO_SET_STATUS 2
#define KVM_S390_VIRTIO_CCW_NOTIFY 3
static inline void yield(void)
{
- asm volatile ("diag 0,0,0x44"
- : :
- : "memory", "cc");
+ asm volatile ("diag 0,0,0x44"
+ : :
+ : "memory", "cc");
}
-#define SECTOR_SIZE 512
+#define MAX_SECTOR_SIZE 4096
#endif /* S390_CCW_H */
diff --git a/pc-bios/s390-ccw/sclp-ascii.c b/pc-bios/s390-ccw/sclp-ascii.c
index 1c93937104..761fb44ff5 100644
--- a/pc-bios/s390-ccw/sclp-ascii.c
+++ b/pc-bios/s390-ccw/sclp-ascii.c
@@ -33,7 +33,7 @@ static int sclp_service_call(unsigned int command, void *sccb)
static void sclp_set_write_mask(void)
{
- WriteEventMask *sccb = (void*)_sccb;
+ WriteEventMask *sccb = (void *)_sccb;
sccb->h.length = sizeof(WriteEventMask);
sccb->mask_length = sizeof(unsigned int);
@@ -68,7 +68,7 @@ static void _memcpy(char *dest, const char *src, int len)
void sclp_print(const char *str)
{
int len = _strlen(str);
- WriteEventData *sccb = (void*)_sccb;
+ WriteEventData *sccb = (void *)_sccb;
sccb->h.length = sizeof(WriteEventData) + len;
sccb->h.function_code = SCLP_FC_NORMAL_WRITE;
diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c
index bbb3c4f36d..31b23b086c 100644
--- a/pc-bios/s390-ccw/virtio.c
+++ b/pc-bios/s390-ccw/virtio.c
@@ -18,22 +18,22 @@ static char chsc_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
static long kvm_hypercall(unsigned long nr, unsigned long param1,
unsigned long param2)
{
- register ulong r_nr asm("1") = nr;
- register ulong r_param1 asm("2") = param1;
- register ulong r_param2 asm("3") = param2;
- register long retval asm("2");
+ register ulong r_nr asm("1") = nr;
+ register ulong r_param1 asm("2") = param1;
+ register ulong r_param2 asm("3") = param2;
+ register long retval asm("2");
- asm volatile ("diag 2,4,0x500"
- : "=d" (retval)
- : "d" (r_nr), "0" (r_param1), "r"(r_param2)
- : "memory", "cc");
+ asm volatile ("diag 2,4,0x500"
+ : "=d" (retval)
+ : "d" (r_nr), "0" (r_param1), "r"(r_param2)
+ : "memory", "cc");
- return retval;
+ return retval;
}
static void virtio_notify(struct subchannel_id schid)
{
- kvm_hypercall(KVM_S390_VIRTIO_CCW_NOTIFY, *(u32*)&schid, 0);
+ kvm_hypercall(KVM_S390_VIRTIO_CCW_NOTIFY, *(u32 *)&schid, 0);
}
/***********************************************
@@ -202,7 +202,7 @@ static int vring_wait_reply(struct vring *vr, int timeout)
* Virtio block *
***********************************************/
-static int virtio_read_many(ulong sector, void *load_addr, int sec_num)
+int virtio_read_many(ulong sector, void *load_addr, int sec_num)
{
struct virtio_blk_outhdr out_hdr;
u8 status;
@@ -211,12 +211,12 @@ static int virtio_read_many(ulong sector, void *load_addr, int sec_num)
/* Tell the host we want to read */
out_hdr.type = VIRTIO_BLK_T_IN;
out_hdr.ioprio = 99;
- out_hdr.sector = sector;
+ out_hdr.sector = virtio_sector_adjust(sector);
vring_send_buf(&block, &out_hdr, sizeof(out_hdr), VRING_DESC_F_NEXT);
/* This is where we want to receive data */
- vring_send_buf(&block, load_addr, SECTOR_SIZE * sec_num,
+ vring_send_buf(&block, load_addr, virtio_get_block_size() * sec_num,
VRING_DESC_F_WRITE | VRING_HIDDEN_IS_CHAIN |
VRING_DESC_F_NEXT);
@@ -236,7 +236,7 @@ static int virtio_read_many(ulong sector, void *load_addr, int sec_num)
}
unsigned long virtio_load_direct(ulong rec_list1, ulong rec_list2,
- ulong subchan_id, void *load_addr)
+ ulong subchan_id, void *load_addr)
{
u8 status;
int sec = rec_list1;
@@ -244,16 +244,16 @@ unsigned long virtio_load_direct(ulong rec_list1, ulong rec_list2,
int sec_len = rec_list2 >> 48;
ulong addr = (ulong)load_addr;
- if (sec_len != SECTOR_SIZE) {
+ if (sec_len != virtio_get_block_size()) {
return -1;
}
sclp_print(".");
- status = virtio_read_many(sec, (void*)addr, sec_num);
+ status = virtio_read_many(sec, (void *)addr, sec_num);
if (status) {
virtio_panic("I/O Error");
}
- addr += sec_num * SECTOR_SIZE;
+ addr += sec_num * virtio_get_block_size();
return addr;
}
@@ -263,18 +263,98 @@ int virtio_read(ulong sector, void *load_addr)
return virtio_read_many(sector, load_addr, 1);
}
+static VirtioBlkConfig blk_cfg = {};
+static bool guessed_disk_nature;
+
+bool virtio_guessed_disk_nature(void)
+{
+ return guessed_disk_nature;
+}
+
+void virtio_assume_scsi(void)
+{
+ guessed_disk_nature = true;
+ blk_cfg.blk_size = 512;
+}
+
+void virtio_assume_eckd(void)
+{
+ guessed_disk_nature = true;
+ blk_cfg.blk_size = 4096;
+
+ /* this must be here to calculate code segment position */
+ blk_cfg.geometry.heads = 15;
+ blk_cfg.geometry.sectors = 12;
+}
+
+bool virtio_disk_is_scsi(void)
+{
+ if (guessed_disk_nature) {
+ return (blk_cfg.blk_size == 512);
+ }
+ return (blk_cfg.geometry.heads == 255)
+ && (blk_cfg.geometry.sectors == 63)
+ && (blk_cfg.blk_size == 512);
+}
+
+bool virtio_disk_is_eckd(void)
+{
+ if (guessed_disk_nature) {
+ return (blk_cfg.blk_size == 4096);
+ }
+ return (blk_cfg.geometry.heads == 15)
+ && (blk_cfg.geometry.sectors == 12)
+ && (blk_cfg.blk_size == 4096);
+}
+
+bool virtio_ipl_disk_is_valid(void)
+{
+ return blk_cfg.blk_size && (virtio_disk_is_scsi() || virtio_disk_is_eckd());
+}
+
+int virtio_get_block_size(void)
+{
+ return blk_cfg.blk_size;
+}
+
+uint16_t virtio_get_cylinders(void)
+{
+ return blk_cfg.geometry.cylinders;
+}
+
+uint8_t virtio_get_heads(void)
+{
+ return blk_cfg.geometry.heads;
+}
+
+uint8_t virtio_get_sectors(void)
+{
+ return blk_cfg.geometry.sectors;
+}
+
void virtio_setup_block(struct subchannel_id schid)
{
struct vq_info_block info;
struct vq_config_block config = {};
+ blk_cfg.blk_size = 0; /* mark "illegal" - setup started... */
+
virtio_reset(schid);
+ /*
+ * Skipping CCW_CMD_READ_FEAT. We're not doing anything fancy, and
+ * we'll just stop dead anyway if anything does not work like we
+ * expect it.
+ */
+
config.index = 0;
if (run_ccw(schid, CCW_CMD_READ_VQ_CONF, &config, sizeof(config))) {
+ virtio_panic("Could not get block device VQ configuration\n");
+ }
+ if (run_ccw(schid, CCW_CMD_READ_CONF, &blk_cfg, sizeof(blk_cfg))) {
virtio_panic("Could not get block device configuration\n");
}
- vring_init(&block, config.num, (void*)(100 * 1024 * 1024),
+ vring_init(&block, config.num, (void *)(100 * 1024 * 1024),
KVM_S390_VIRTIO_RING_ALIGN);
info.queue = (100ULL * 1024ULL* 1024ULL);
@@ -286,6 +366,12 @@ void virtio_setup_block(struct subchannel_id schid)
if (!run_ccw(schid, CCW_CMD_SET_VQ, &info, sizeof(info))) {
virtio_set_status(schid, VIRTIO_CONFIG_S_DRIVER_OK);
}
+
+ if (!virtio_ipl_disk_is_valid()) {
+ /* make sure all getters but blocksize return 0 for invalid IPL disk */
+ memset(&blk_cfg, 0, sizeof(blk_cfg));
+ virtio_assume_scsi();
+ }
}
bool virtio_is_blk(struct subchannel_id schid)
diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h
index 772a63f152..f1fb1b08fa 100644
--- a/pc-bios/s390-ccw/virtio.h
+++ b/pc-bios/s390-ccw/virtio.h
@@ -66,7 +66,7 @@ struct virtio_dev {
char *config;
};
-#define KVM_S390_VIRTIO_RING_ALIGN 4096
+#define KVM_S390_VIRTIO_RING_ALIGN 4096
#define VRING_USED_F_NO_NOTIFY 1
@@ -161,4 +161,52 @@ struct virtio_blk_outhdr {
u64 sector;
};
+typedef struct VirtioBlkConfig {
+ u64 capacity; /* in 512-byte sectors */
+ u32 size_max; /* max segment size (if VIRTIO_BLK_F_SIZE_MAX) */
+ u32 seg_max; /* max number of segments (if VIRTIO_BLK_F_SEG_MAX) */
+
+ struct virtio_blk_geometry {
+ u16 cylinders;
+ u8 heads;
+ u8 sectors;
+ } geometry; /* (if VIRTIO_BLK_F_GEOMETRY) */
+
+ u32 blk_size; /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
+
+ /* the next 4 entries are guarded by VIRTIO_BLK_F_TOPOLOGY */
+ u8 physical_block_exp; /* exponent for physical block per logical block */
+ u8 alignment_offset; /* alignment offset in logical blocks */
+ u16 min_io_size; /* min I/O size without performance penalty
+ in logical blocks */
+ u32 opt_io_size; /* optimal sustained I/O size in logical blocks */
+
+ u8 wce; /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */
+} __attribute__((packed)) VirtioBlkConfig;
+
+bool virtio_guessed_disk_nature(void);
+void virtio_assume_scsi(void);
+void virtio_assume_eckd(void);
+
+extern bool virtio_disk_is_scsi(void);
+extern bool virtio_disk_is_eckd(void);
+extern bool virtio_ipl_disk_is_valid(void);
+extern int virtio_get_block_size(void);
+extern uint16_t virtio_get_cylinders(void);
+extern uint8_t virtio_get_heads(void);
+extern uint8_t virtio_get_sectors(void);
+extern int virtio_read_many(ulong sector, void *load_addr, int sec_num);
+
+#define VIRTIO_SECTOR_SIZE 512
+
+static inline ulong virtio_eckd_sector_adjust(ulong sector)
+{
+ return sector * (virtio_get_block_size() / VIRTIO_SECTOR_SIZE);
+}
+
+static inline ulong virtio_sector_adjust(ulong sector)
+{
+ return virtio_disk_is_eckd() ? virtio_eckd_sector_adjust(sector) : sector;
+}
+
#endif /* VIRTIO_H */
diff --git a/qapi-schema.json b/qapi-schema.json
index e7727a1153..a83befc05b 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -8,6 +8,9 @@
# QAPI block definitions
{ 'include': 'qapi/block.json' }
+# QAPI event definitions
+{ 'include': 'qapi/event.json' }
+
##
# LostTickPolicy:
#
@@ -211,12 +214,18 @@
#
# @filename: the filename of the character device
#
+# @frontend-open: shows whether the frontend device attached to this backend
+# (eg. with the chardev=... option) is in open or closed state
+# (since 2.1)
+#
# Notes: @filename is encoded using the QEMU command line character device
# encoding. See the QEMU man page for details.
#
# Since: 0.14.0
##
-{ 'type': 'ChardevInfo', 'data': {'label': 'str', 'filename': 'str'} }
+{ 'type': 'ChardevInfo', 'data': {'label': 'str',
+ 'filename': 'str',
+ 'frontend-open': 'bool'} }
##
# @query-chardev:
@@ -654,8 +663,9 @@
#
# @host: IP address
#
-# @service: The service name of vnc port. This may depend on the host system's
-# service database so symbolic names should not be relied on.
+# @service: The service name of the vnc port. This may depend on the host
+# system's service database so symbolic names should not be relied
+# on.
#
# @family: address family
#
@@ -694,7 +704,7 @@
##
{ 'type': 'VncClientInfo',
'base': 'VncBasicInfo',
- 'data': { '*x509_dname' : 'str', '*sasl_username': 'str' } }
+ 'data': { '*x509_dname': 'str', '*sasl_username': 'str' } }
##
# @VncInfo:
@@ -2040,6 +2050,62 @@
'*udp': 'str' } }
##
+# @NetdevL2TPv3Options
+#
+# Connect the VLAN to Ethernet over L2TPv3 Static tunnel
+#
+# @src: source address
+#
+# @dst: destination address
+#
+# @srcport: #optional source port - mandatory for udp, optional for ip
+#
+# @dstport: #optional destination port - mandatory for udp, optional for ip
+#
+# @ipv6: #optional - force the use of ipv6
+#
+# @udp: #optional - use the udp version of l2tpv3 encapsulation
+#
+# @cookie64: #optional - use 64 bit coookies
+#
+# @counter: #optional have sequence counter
+#
+# @pincounter: #optional pin sequence counter to zero -
+# workaround for buggy implementations or
+# networks with packet reorder
+#
+# @txcookie: #optional 32 or 64 bit transmit cookie
+#
+# @rxcookie: #optional 32 or 64 bit receive cookie
+#
+# @txsession: 32 bit transmit session
+#
+# @rxsession: #optional 32 bit receive session - if not specified
+# set to the same value as transmit
+#
+# @offset: #optional additional offset - allows the insertion of
+# additional application-specific data before the packet payload
+#
+# Since 2.1
+##
+{ 'type': 'NetdevL2TPv3Options',
+ 'data': {
+ 'src': 'str',
+ 'dst': 'str',
+ '*srcport': 'str',
+ '*dstport': 'str',
+ '*ipv6': 'bool',
+ '*udp': 'bool',
+ '*cookie64': 'bool',
+ '*counter': 'bool',
+ '*pincounter': 'bool',
+ '*txcookie': 'uint64',
+ '*rxcookie': 'uint64',
+ 'txsession': 'uint32',
+ '*rxsession': 'uint32',
+ '*offset': 'uint32' } }
+
+##
# @NetdevVdeOptions
#
# Connect the VLAN to a vde switch running on the host.
@@ -2150,6 +2216,9 @@
# A discriminated record of network device traits.
#
# Since 1.2
+#
+# 'l2tpv3' - since 2.1
+#
##
{ 'union': 'NetClientOptions',
'data': {
@@ -2157,6 +2226,7 @@
'nic': 'NetLegacyNicOptions',
'user': 'NetdevUserOptions',
'tap': 'NetdevTapOptions',
+ 'l2tpv3': 'NetdevL2TPv3Options',
'socket': 'NetdevSocketOptions',
'vde': 'NetdevVdeOptions',
'dump': 'NetdevDumpOptions',
@@ -3398,5 +3468,3 @@
##
{ 'enum': 'GuestPanicAction',
'data': [ 'pause' ] }
-
-{ 'include': 'qapi-event.json' }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 406ce03951..faf394cc76 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1447,7 +1447,8 @@
# @device: device name
#
# @msg: informative message for human consumption, such as the kind of
-# corruption being detected
+# corruption being detected. It should not be parsed by machine as it is
+# not guaranteed to be stable
#
# @offset: #optional, if the corruption resulted from an image access, this is
# the access offset into the image
diff --git a/qapi-event.json b/qapi/event.json
index e7a47f9927..ff97aeb377 100644
--- a/qapi-event.json
+++ b/qapi/event.json
@@ -1,8 +1,8 @@
##
# @SHUTDOWN
#
-# Emitted when the virtual machine has shutdown, possibly indicating that QEMU
-# is about about to exit.
+# Emitted when the virtual machine has shut down, indicating that qemu is
+# about to exit.
#
# Note: If the command-line option "-no-shutdown" has been specified, qemu will
# not exit, and a STOP event will eventually follow the SHUTDOWN event
@@ -316,3 +316,17 @@
{ 'event': 'QUORUM_REPORT_BAD',
'data': { '*error': 'str', 'node-name': 'str',
'sector-num': 'int', 'sector-count': 'int' } }
+
+##
+# @VSERPORT_CHANGE
+#
+# Emitted when the guest opens or closes a virtio-serial port.
+#
+# @id: device identifier of the virtio-serial port
+#
+# @open: true if the guest has opened the virtio-serial port
+#
+# Since: 2.1
+##
+{ 'event': 'VSERPORT_CHANGE',
+ 'data': { 'id': 'str', 'open': 'bool' } }
diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c
index 6a0974eb48..36eb3bcfd6 100644
--- a/qemu-bridge-helper.c
+++ b/qemu-bridge-helper.c
@@ -229,7 +229,7 @@ int main(int argc, char **argv)
unsigned long ifargs[4];
#endif
int ifindex;
- int fd, ctlfd, unixfd = -1;
+ int fd = -1, ctlfd = -1, unixfd = -1;
int use_vnet = 0;
int mtu;
const char *bridge = NULL;
@@ -436,7 +436,12 @@ int main(int argc, char **argv)
/* profit! */
cleanup:
-
+ if (fd >= 0) {
+ close(fd);
+ }
+ if (ctlfd >= 0) {
+ close(ctlfd);
+ }
while ((acl_rule = QSIMPLEQ_FIRST(&acl_list)) != NULL) {
QSIMPLEQ_REMOVE_HEAD(&acl_list, entry);
g_free(acl_rule);
diff --git a/qemu-char.c b/qemu-char.c
index 2e50a1093e..51917de462 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -94,6 +94,7 @@ static QTAILQ_HEAD(CharDriverStateHead, CharDriverState) chardevs =
CharDriverState *qemu_chr_alloc(void)
{
CharDriverState *chr = g_malloc0(sizeof(CharDriverState));
+ qemu_mutex_init(&chr->chr_write_lock);
return chr;
}
@@ -132,7 +133,7 @@ int qemu_chr_fe_write(CharDriverState *s, const uint8_t *buf, int len)
int qemu_chr_fe_write_all(CharDriverState *s, const uint8_t *buf, int len)
{
int offset = 0;
- int res;
+ int res = 0;
qemu_mutex_lock(&s->chr_write_lock);
while (offset < len) {
@@ -3704,6 +3705,7 @@ ChardevInfoList *qmp_query_chardev(Error **errp)
info->value = g_malloc0(sizeof(*info->value));
info->value->label = g_strdup(chr->label);
info->value->filename = g_strdup(chr->filename);
+ info->value->frontend_open = chr->fe_open;
info->next = chr_list;
chr_list = info;
diff --git a/qemu-options.hx b/qemu-options.hx
index ff76ad4830..9e5468678b 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1433,6 +1433,29 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
" (default=" DEFAULT_BRIDGE_INTERFACE ") using the program 'helper'\n"
" (default=" DEFAULT_BRIDGE_HELPER ")\n"
#endif
+#ifdef __linux__
+ "-net l2tpv3[,vlan=n][,name=str],src=srcaddr,dst=dstaddr[,srcport=srcport][,dstport=dstport],txsession=txsession[,rxsession=rxsession][,ipv6=on/off][,udp=on/off][,cookie64=on/off][,counter][,pincounter][,txcookie=txcookie][,rxcookie=rxcookie][,offset=offset]\n"
+ " connect the VLAN to an Ethernet over L2TPv3 pseudowire\n"
+ " Linux kernel 3.3+ as well as most routers can talk\n"
+ " L2TPv3. This transport allows to connect a VM to a VM,\n"
+ " VM to a router and even VM to Host. It is a nearly-universal\n"
+ " standard (RFC3391). Note - this implementation uses static\n"
+ " pre-configured tunnels (same as the Linux kernel).\n"
+ " use 'src=' to specify source address\n"
+ " use 'dst=' to specify destination address\n"
+ " use 'udp=on' to specify udp encapsulation\n"
+ " use 'dstport=' to specify destination udp port\n"
+ " use 'dstport=' to specify destination udp port\n"
+ " use 'ipv6=on' to force v6\n"
+ " L2TPv3 uses cookies to prevent misconfiguration as\n"
+ " well as a weak security measure\n"
+ " use 'rxcookie=0x012345678' to specify a rxcookie\n"
+ " use 'txcookie=0x012345678' to specify a txcookie\n"
+ " use 'cookie64=on' to set cookie size to 64 bit, otherwise 32\n"
+ " use 'counter=off' to force a 'cut-down' L2TPv3 with no counter\n"
+ " use 'pincounter=on' to work around broken counter handling in peer\n"
+ " use 'offset=X' to add an extra offset between header and data\n"
+#endif
"-net socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port]\n"
" connect the vlan 'n' to another VLAN using a socket connection\n"
"-net socket[,vlan=n][,name=str][,fd=h][,mcast=maddr:port[,localaddr=addr]]\n"
@@ -1778,6 +1801,65 @@ qemu-system-i386 linux.img \
-net socket,mcast=239.192.168.1:1102,localaddr=1.2.3.4
@end example
+@item -netdev l2tpv3,id=@var{id},src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}]
+@item -net l2tpv3[,vlan=@var{n}][,name=@var{name}],src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}]
+Connect VLAN @var{n} to L2TPv3 pseudowire. L2TPv3 (RFC3391) is a popular
+protocol to transport Ethernet (and other Layer 2) data frames between
+two systems. It is present in routers, firewalls and the Linux kernel
+(from version 3.3 onwards).
+
+This transport allows a VM to communicate to another VM, router or firewall directly.
+
+@item src=@var{srcaddr}
+ source address (mandatory)
+@item dst=@var{dstaddr}
+ destination address (mandatory)
+@item udp
+ select udp encapsulation (default is ip).
+@item srcport=@var{srcport}
+ source udp port.
+@item dstport=@var{dstport}
+ destination udp port.
+@item ipv6
+ force v6, otherwise defaults to v4.
+@item rxcookie=@var{rxcookie}
+@item txcookie=@var{txcookie}
+ Cookies are a weak form of security in the l2tpv3 specification.
+Their function is mostly to prevent misconfiguration. By default they are 32
+bit.
+@item cookie64
+ Set cookie size to 64 bit instead of the default 32
+@item counter=off
+ Force a 'cut-down' L2TPv3 with no counter as in
+draft-mkonstan-l2tpext-keyed-ipv6-tunnel-00
+@item pincounter=on
+ Work around broken counter handling in peer. This may also help on
+networks which have packet reorder.
+@item offset=@var{offset}
+ Add an extra offset between header and data
+
+For example, to attach a VM running on host 4.3.2.1 via L2TPv3 to the bridge br-lan
+on the remote Linux host 1.2.3.4:
+@example
+# Setup tunnel on linux host using raw ip as encapsulation
+# on 1.2.3.4
+ip l2tp add tunnel remote 4.3.2.1 local 1.2.3.4 tunnel_id 1 peer_tunnel_id 1 \
+ encap udp udp_sport 16384 udp_dport 16384
+ip l2tp add session tunnel_id 1 name vmtunnel0 session_id \
+ 0xFFFFFFFF peer_session_id 0xFFFFFFFF
+ifconfig vmtunnel0 mtu 1500
+ifconfig vmtunnel0 up
+brctl addif br-lan vmtunnel0
+
+
+# on 4.3.2.1
+# launch QEMU instance - if your network has reorder or is very lossy add ,pincounter
+
+qemu-system-i386 linux.img -net nic -net l2tpv3,src=4.2.3.1,dst=1.2.3.4,udp,srcport=16384,dstport=16384,rxsession=0xffffffff,txsession=0xffffffff,counter
+
+
+@end example
+
@item -netdev vde,id=@var{id}[,sock=@var{socketpath}][,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}]
@item -net vde[,vlan=@var{n}][,name=@var{name}][,sock=@var{socketpath}] [,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}]
Connect VLAN @var{n} to PORT @var{n} of a vde switch running on host and
diff --git a/qmp-commands.hx b/qmp-commands.hx
index d342b8a067..65218bc147 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1926,19 +1926,28 @@ Each json-object contain the following:
- "label": device's label (json-string)
- "filename": device's file (json-string)
+- "frontend-open": open/closed state of the frontend device attached to this
+ backend (json-bool)
Example:
-> { "execute": "query-chardev" }
<- {
- "return":[
+ "return": [
+ {
+ "label": "charchannel0",
+ "filename": "unix:/var/lib/libvirt/qemu/seabios.rhel6.agent,server",
+ "frontend-open": false
+ },
{
- "label":"monitor",
- "filename":"stdio"
+ "label": "charmonitor",
+ "filename": "unix:/var/lib/libvirt/qemu/seabios.rhel6.monitor,server",
+ "frontend-open": true
},
{
- "label":"serial0",
- "filename":"vc"
+ "label": "charserial0",
+ "filename": "pty:/dev/pts/2",
+ "frontend-open": true
}
]
}
diff --git a/savevm.c b/savevm.c
index ba900d304b..e19ae0a9b2 100644
--- a/savevm.c
+++ b/savevm.c
@@ -232,7 +232,6 @@ typedef struct SaveStateEntry {
const VMStateDescription *vmsd;
void *opaque;
CompatEntry *compat;
- int no_migrate;
int is_ram;
} SaveStateEntry;
@@ -430,7 +429,6 @@ int register_savevm_live(DeviceState *dev,
se->ops = ops;
se->opaque = opaque;
se->vmsd = NULL;
- se->no_migrate = 0;
/* if this is a live_savem then set is_ram */
if (ops->save_live_setup != NULL) {
se->is_ram = 1;
@@ -521,7 +519,6 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
se->opaque = opaque;
se->vmsd = vmsd;
se->alias_id = alias_id;
- se->no_migrate = vmsd->unmigratable;
if (dev) {
char *id = qdev_get_dev_path(dev);
@@ -590,7 +587,7 @@ bool qemu_savevm_state_blocked(Error **errp)
SaveStateEntry *se;
QTAILQ_FOREACH(se, &savevm_handlers, entry) {
- if (se->no_migrate) {
+ if (se->vmsd && se->vmsd->unmigratable) {
error_setg(errp, "State blocked by non-migratable device '%s'",
se->idstr);
return true;
diff --git a/scripts/qapi-event.py b/scripts/qapi-event.py
index 3a1cd61914..601e3076ab 100644
--- a/scripts/qapi-event.py
+++ b/scripts/qapi-event.py
@@ -26,9 +26,8 @@ def _generate_event_api_name(event_name, params):
api_name += "bool has_%s,\n" % c_var(argname)
api_name += "".ljust(l)
- if argentry == "str":
- api_name += "const "
- api_name += "%s %s,\n" % (c_type(argentry), c_var(argname))
+ api_name += "%s %s,\n" % (c_type(argentry, is_param=True),
+ c_var(argname))
api_name += "".ljust(l)
api_name += "Error **errp)"
diff --git a/scripts/qapi.py b/scripts/qapi.py
index 54b97cb48e..f2c6d1f840 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -255,7 +255,7 @@ def check_event(expr, expr_info):
if structured:
raise QAPIExprError(expr_info,
"Nested structure define in event is not "
- "supported now, event '%s', argname '%s'"
+ "supported, event '%s', argname '%s'"
% (expr['event'], argname))
def check_union(expr, expr_info):
diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h
index 0808cfc67d..71a1b97cfc 100644
--- a/target-i386/cpu-qom.h
+++ b/target-i386/cpu-qom.h
@@ -71,6 +71,9 @@ typedef struct X86CPUClass {
/**
* X86CPU:
* @env: #CPUX86State
+ * @migratable: If set, only migratable flags will be accepted when "enforce"
+ * mode is used, and only migratable flags will be included in the "host"
+ * CPU model.
*
* An x86 CPU.
*/
@@ -88,6 +91,7 @@ typedef struct X86CPU {
bool check_cpuid;
bool enforce_cpuid;
bool expose_kvm;
+ bool migratable;
/* if true the CPUID code directly forward host cache leaves to the guest */
bool cache_info_passthrough;
@@ -117,7 +121,7 @@ static inline X86CPU *x86_env_get_cpu(CPUX86State *env)
#define ENV_OFFSET offsetof(X86CPU, env)
#ifndef CONFIG_USER_ONLY
-extern const struct VMStateDescription vmstate_x86_cpu;
+extern struct VMStateDescription vmstate_x86_cpu;
#endif
/**
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 8983457e23..45c662dad4 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -263,48 +263,133 @@ static const char *cpuid_7_0_ebx_feature_name[] = {
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};
+static const char *cpuid_apm_edx_feature_name[] = {
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ "invtsc", NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+};
+
+#define I486_FEATURES (CPUID_FP87 | CPUID_VME | CPUID_PSE)
+#define PENTIUM_FEATURES (I486_FEATURES | CPUID_DE | CPUID_TSC | \
+ CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_MMX | CPUID_APIC)
+#define PENTIUM2_FEATURES (PENTIUM_FEATURES | CPUID_PAE | CPUID_SEP | \
+ CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | \
+ CPUID_PSE36 | CPUID_FXSR)
+#define PENTIUM3_FEATURES (PENTIUM2_FEATURES | CPUID_SSE)
+#define PPRO_FEATURES (CPUID_FP87 | CPUID_DE | CPUID_PSE | CPUID_TSC | \
+ CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_PGE | CPUID_CMOV | \
+ CPUID_PAT | CPUID_FXSR | CPUID_MMX | CPUID_SSE | CPUID_SSE2 | \
+ CPUID_PAE | CPUID_SEP | CPUID_APIC)
+
+#define TCG_FEATURES (CPUID_FP87 | CPUID_PSE | CPUID_TSC | CPUID_MSR | \
+ CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | \
+ CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | \
+ CPUID_PSE36 | CPUID_CLFLUSH | CPUID_ACPI | CPUID_MMX | \
+ CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS)
+ /* partly implemented:
+ CPUID_MTRR, CPUID_MCA, CPUID_CLFLUSH (needed for Win64) */
+ /* missing:
+ CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */
+#define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \
+ CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \
+ CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \
+ CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR)
+ /* missing:
+ CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX,
+ CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA,
+ CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA,
+ CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_XSAVE,
+ CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
+ CPUID_EXT_RDRAND */
+
+#ifdef TARGET_X86_64
+#define TCG_EXT2_X86_64_FEATURES (CPUID_EXT2_SYSCALL | CPUID_EXT2_LM)
+#else
+#define TCG_EXT2_X86_64_FEATURES 0
+#endif
+
+#define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \
+ CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \
+ CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT | CPUID_EXT2_PDPE1GB | \
+ TCG_EXT2_X86_64_FEATURES)
+#define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \
+ CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
+#define TCG_EXT4_FEATURES 0
+#define TCG_SVM_FEATURES 0
+#define TCG_KVM_FEATURES 0
+#define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP | \
+ CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX)
+ /* missing:
+ CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2,
+ CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
+ CPUID_7_0_EBX_RDSEED */
+#define TCG_APM_FEATURES 0
+
+
typedef struct FeatureWordInfo {
const char **feat_names;
uint32_t cpuid_eax; /* Input EAX for CPUID */
bool cpuid_needs_ecx; /* CPUID instruction uses ECX as input */
uint32_t cpuid_ecx; /* Input ECX value for CPUID */
int cpuid_reg; /* output register (R_* constant) */
+ uint32_t tcg_features; /* Feature flags supported by TCG */
+ uint32_t unmigratable_flags; /* Feature flags known to be unmigratable */
} FeatureWordInfo;
static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
[FEAT_1_EDX] = {
.feat_names = feature_name,
.cpuid_eax = 1, .cpuid_reg = R_EDX,
+ .tcg_features = TCG_FEATURES,
},
[FEAT_1_ECX] = {
.feat_names = ext_feature_name,
.cpuid_eax = 1, .cpuid_reg = R_ECX,
+ .tcg_features = TCG_EXT_FEATURES,
},
[FEAT_8000_0001_EDX] = {
.feat_names = ext2_feature_name,
.cpuid_eax = 0x80000001, .cpuid_reg = R_EDX,
+ .tcg_features = TCG_EXT2_FEATURES,
},
[FEAT_8000_0001_ECX] = {
.feat_names = ext3_feature_name,
.cpuid_eax = 0x80000001, .cpuid_reg = R_ECX,
+ .tcg_features = TCG_EXT3_FEATURES,
},
[FEAT_C000_0001_EDX] = {
.feat_names = ext4_feature_name,
.cpuid_eax = 0xC0000001, .cpuid_reg = R_EDX,
+ .tcg_features = TCG_EXT4_FEATURES,
},
[FEAT_KVM] = {
.feat_names = kvm_feature_name,
.cpuid_eax = KVM_CPUID_FEATURES, .cpuid_reg = R_EAX,
+ .tcg_features = TCG_KVM_FEATURES,
},
[FEAT_SVM] = {
.feat_names = svm_feature_name,
.cpuid_eax = 0x8000000A, .cpuid_reg = R_EDX,
+ .tcg_features = TCG_SVM_FEATURES,
},
[FEAT_7_0_EBX] = {
.feat_names = cpuid_7_0_ebx_feature_name,
.cpuid_eax = 7,
.cpuid_needs_ecx = true, .cpuid_ecx = 0,
.cpuid_reg = R_EBX,
+ .tcg_features = TCG_7_0_EBX_FEATURES,
+ },
+ [FEAT_8000_0007_EDX] = {
+ .feat_names = cpuid_apm_edx_feature_name,
+ .cpuid_eax = 0x80000007,
+ .cpuid_reg = R_EDX,
+ .tcg_features = TCG_APM_FEATURES,
+ .unmigratable_flags = CPUID_APM_INVTSC,
},
};
@@ -373,11 +458,42 @@ static uint32_t kvm_default_features[FEATURE_WORDS] = {
[FEAT_1_ECX] = CPUID_EXT_X2APIC,
};
+/* Features that are not added by default to any CPU model when KVM is enabled.
+ */
+static uint32_t kvm_default_unset_features[FEATURE_WORDS] = {
+ [FEAT_1_ECX] = CPUID_EXT_MONITOR,
+};
+
void x86_cpu_compat_disable_kvm_features(FeatureWord w, uint32_t features)
{
kvm_default_features[w] &= ~features;
}
+/*
+ * Returns the set of feature flags that are supported and migratable by
+ * QEMU, for a given FeatureWord.
+ */
+static uint32_t x86_cpu_get_migratable_flags(FeatureWord w)
+{
+ FeatureWordInfo *wi = &feature_word_info[w];
+ uint32_t r = 0;
+ int i;
+
+ for (i = 0; i < 32; i++) {
+ uint32_t f = 1U << i;
+ /* If the feature name is unknown, it is not supported by QEMU yet */
+ if (!wi->feat_names[i]) {
+ continue;
+ }
+ /* Skip features known to QEMU, but explicitly marked as unmigratable */
+ if (wi->unmigratable_flags & f) {
+ continue;
+ }
+ r |= f;
+ }
+ return r;
+}
+
void host_cpuid(uint32_t function, uint32_t count,
uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
{
@@ -534,51 +650,6 @@ struct X86CPUDefinition {
bool cache_info_passthrough;
};
-#define I486_FEATURES (CPUID_FP87 | CPUID_VME | CPUID_PSE)
-#define PENTIUM_FEATURES (I486_FEATURES | CPUID_DE | CPUID_TSC | \
- CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_MMX | CPUID_APIC)
-#define PENTIUM2_FEATURES (PENTIUM_FEATURES | CPUID_PAE | CPUID_SEP | \
- CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | \
- CPUID_PSE36 | CPUID_FXSR)
-#define PENTIUM3_FEATURES (PENTIUM2_FEATURES | CPUID_SSE)
-#define PPRO_FEATURES (CPUID_FP87 | CPUID_DE | CPUID_PSE | CPUID_TSC | \
- CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_PGE | CPUID_CMOV | \
- CPUID_PAT | CPUID_FXSR | CPUID_MMX | CPUID_SSE | CPUID_SSE2 | \
- CPUID_PAE | CPUID_SEP | CPUID_APIC)
-
-#define TCG_FEATURES (CPUID_FP87 | CPUID_PSE | CPUID_TSC | CPUID_MSR | \
- CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | \
- CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | \
- CPUID_PSE36 | CPUID_CLFLUSH | CPUID_ACPI | CPUID_MMX | \
- CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS)
- /* partly implemented:
- CPUID_MTRR, CPUID_MCA, CPUID_CLFLUSH (needed for Win64) */
- /* missing:
- CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */
-#define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \
- CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \
- CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \
- CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR)
- /* missing:
- CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX,
- CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA,
- CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA,
- CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_XSAVE,
- CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
- CPUID_EXT_RDRAND */
-#define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \
- CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \
- CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT | CPUID_EXT2_PDPE1GB)
-#define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \
- CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
-#define TCG_SVM_FEATURES 0
-#define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP \
- CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX)
- /* missing:
- CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2,
- CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
- CPUID_7_0_EBX_RDSEED */
-
static X86CPUDefinition builtin_x86_defs[] = {
{
.name = "qemu64",
@@ -827,10 +898,10 @@ static X86CPUDefinition builtin_x86_defs[] = {
.stepping = 3,
.features[FEAT_1_EDX] =
CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
- CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
- CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
- CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
- CPUID_DE | CPUID_FP87,
+ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+ CPUID_DE | CPUID_FP87,
.features[FEAT_1_ECX] =
CPUID_EXT_SSSE3 | CPUID_EXT_SSE3,
.features[FEAT_8000_0001_EDX] =
@@ -849,13 +920,13 @@ static X86CPUDefinition builtin_x86_defs[] = {
.stepping = 3,
.features[FEAT_1_EDX] =
CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
- CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
- CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
- CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
- CPUID_DE | CPUID_FP87,
+ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+ CPUID_DE | CPUID_FP87,
.features[FEAT_1_ECX] =
CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 |
- CPUID_EXT_SSE3,
+ CPUID_EXT_SSE3,
.features[FEAT_8000_0001_EDX] =
CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL,
.features[FEAT_8000_0001_ECX] =
@@ -872,13 +943,13 @@ static X86CPUDefinition builtin_x86_defs[] = {
.stepping = 3,
.features[FEAT_1_EDX] =
CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
- CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
- CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
- CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
- CPUID_DE | CPUID_FP87,
+ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+ CPUID_DE | CPUID_FP87,
.features[FEAT_1_ECX] =
CPUID_EXT_POPCNT | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 |
- CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_SSE3,
+ CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_SSE3,
.features[FEAT_8000_0001_EDX] =
CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
.features[FEAT_8000_0001_ECX] =
@@ -895,14 +966,14 @@ static X86CPUDefinition builtin_x86_defs[] = {
.stepping = 1,
.features[FEAT_1_EDX] =
CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
- CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
- CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
- CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
- CPUID_DE | CPUID_FP87,
+ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+ CPUID_DE | CPUID_FP87,
.features[FEAT_1_ECX] =
CPUID_EXT_AES | CPUID_EXT_POPCNT | CPUID_EXT_SSE42 |
- CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 |
- CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3,
+ CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 |
+ CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3,
.features[FEAT_8000_0001_EDX] =
CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
.features[FEAT_8000_0001_ECX] =
@@ -919,19 +990,19 @@ static X86CPUDefinition builtin_x86_defs[] = {
.stepping = 1,
.features[FEAT_1_EDX] =
CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
- CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
- CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
- CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
- CPUID_DE | CPUID_FP87,
+ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+ CPUID_DE | CPUID_FP87,
.features[FEAT_1_ECX] =
CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES |
- CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_POPCNT |
- CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 |
- CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ |
- CPUID_EXT_SSE3,
+ CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_POPCNT |
+ CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 |
+ CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ |
+ CPUID_EXT_SSE3,
.features[FEAT_8000_0001_EDX] =
CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX |
- CPUID_EXT2_SYSCALL,
+ CPUID_EXT2_SYSCALL,
.features[FEAT_8000_0001_ECX] =
CPUID_EXT3_LAHF_LM,
.xlevel = 0x8000000A,
@@ -946,20 +1017,20 @@ static X86CPUDefinition builtin_x86_defs[] = {
.stepping = 1,
.features[FEAT_1_EDX] =
CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
- CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
- CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
- CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
- CPUID_DE | CPUID_FP87,
+ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+ CPUID_DE | CPUID_FP87,
.features[FEAT_1_ECX] =
CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES |
- CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 |
- CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 |
- CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 |
- CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE |
- CPUID_EXT_PCID,
+ CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 |
+ CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 |
+ CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 |
+ CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE |
+ CPUID_EXT_PCID,
.features[FEAT_8000_0001_EDX] =
CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX |
- CPUID_EXT2_SYSCALL,
+ CPUID_EXT2_SYSCALL,
.features[FEAT_8000_0001_ECX] =
CPUID_EXT3_LAHF_LM,
.features[FEAT_7_0_EBX] =
@@ -971,6 +1042,40 @@ static X86CPUDefinition builtin_x86_defs[] = {
.model_id = "Intel Core Processor (Haswell)",
},
{
+ .name = "Broadwell",
+ .level = 0xd,
+ .vendor = CPUID_VENDOR_INTEL,
+ .family = 6,
+ .model = 61,
+ .stepping = 2,
+ .features[FEAT_1_EDX] =
+ CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
+ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+ CPUID_DE | CPUID_FP87,
+ .features[FEAT_1_ECX] =
+ CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES |
+ CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 |
+ CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 |
+ CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 |
+ CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE |
+ CPUID_EXT_PCID,
+ .features[FEAT_8000_0001_EDX] =
+ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX |
+ CPUID_EXT2_SYSCALL,
+ .features[FEAT_8000_0001_ECX] =
+ CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH,
+ .features[FEAT_7_0_EBX] =
+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 |
+ CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP |
+ CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID |
+ CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX |
+ CPUID_7_0_EBX_SMAP,
+ .xlevel = 0x8000000A,
+ .model_id = "Intel Core Processor (Broadwell)",
+ },
+ {
.name = "Opteron_G1",
.level = 5,
.vendor = CPUID_VENDOR_AMD,
@@ -979,19 +1084,19 @@ static X86CPUDefinition builtin_x86_defs[] = {
.stepping = 1,
.features[FEAT_1_EDX] =
CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
- CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
- CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
- CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
- CPUID_DE | CPUID_FP87,
+ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+ CPUID_DE | CPUID_FP87,
.features[FEAT_1_ECX] =
CPUID_EXT_SSE3,
.features[FEAT_8000_0001_EDX] =
CPUID_EXT2_LM | CPUID_EXT2_FXSR | CPUID_EXT2_MMX |
- CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT |
- CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE |
- CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL | CPUID_EXT2_APIC |
- CPUID_EXT2_CX8 | CPUID_EXT2_MCE | CPUID_EXT2_PAE | CPUID_EXT2_MSR |
- CPUID_EXT2_TSC | CPUID_EXT2_PSE | CPUID_EXT2_DE | CPUID_EXT2_FPU,
+ CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT |
+ CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE |
+ CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL | CPUID_EXT2_APIC |
+ CPUID_EXT2_CX8 | CPUID_EXT2_MCE | CPUID_EXT2_PAE | CPUID_EXT2_MSR |
+ CPUID_EXT2_TSC | CPUID_EXT2_PSE | CPUID_EXT2_DE | CPUID_EXT2_FPU,
.xlevel = 0x80000008,
.model_id = "AMD Opteron 240 (Gen 1 Class Opteron)",
},
@@ -1004,20 +1109,20 @@ static X86CPUDefinition builtin_x86_defs[] = {
.stepping = 1,
.features[FEAT_1_EDX] =
CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
- CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
- CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
- CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
- CPUID_DE | CPUID_FP87,
+ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+ CPUID_DE | CPUID_FP87,
.features[FEAT_1_ECX] =
CPUID_EXT_CX16 | CPUID_EXT_SSE3,
.features[FEAT_8000_0001_EDX] =
CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_FXSR |
- CPUID_EXT2_MMX | CPUID_EXT2_NX | CPUID_EXT2_PSE36 |
- CPUID_EXT2_PAT | CPUID_EXT2_CMOV | CPUID_EXT2_MCA |
- CPUID_EXT2_PGE | CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL |
- CPUID_EXT2_APIC | CPUID_EXT2_CX8 | CPUID_EXT2_MCE |
- CPUID_EXT2_PAE | CPUID_EXT2_MSR | CPUID_EXT2_TSC | CPUID_EXT2_PSE |
- CPUID_EXT2_DE | CPUID_EXT2_FPU,
+ CPUID_EXT2_MMX | CPUID_EXT2_NX | CPUID_EXT2_PSE36 |
+ CPUID_EXT2_PAT | CPUID_EXT2_CMOV | CPUID_EXT2_MCA |
+ CPUID_EXT2_PGE | CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL |
+ CPUID_EXT2_APIC | CPUID_EXT2_CX8 | CPUID_EXT2_MCE |
+ CPUID_EXT2_PAE | CPUID_EXT2_MSR | CPUID_EXT2_TSC | CPUID_EXT2_PSE |
+ CPUID_EXT2_DE | CPUID_EXT2_FPU,
.features[FEAT_8000_0001_ECX] =
CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM,
.xlevel = 0x80000008,
@@ -1032,24 +1137,24 @@ static X86CPUDefinition builtin_x86_defs[] = {
.stepping = 1,
.features[FEAT_1_EDX] =
CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
- CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
- CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
- CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
- CPUID_DE | CPUID_FP87,
+ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+ CPUID_DE | CPUID_FP87,
.features[FEAT_1_ECX] =
CPUID_EXT_POPCNT | CPUID_EXT_CX16 | CPUID_EXT_MONITOR |
- CPUID_EXT_SSE3,
+ CPUID_EXT_SSE3,
.features[FEAT_8000_0001_EDX] =
CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_FXSR |
- CPUID_EXT2_MMX | CPUID_EXT2_NX | CPUID_EXT2_PSE36 |
- CPUID_EXT2_PAT | CPUID_EXT2_CMOV | CPUID_EXT2_MCA |
- CPUID_EXT2_PGE | CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL |
- CPUID_EXT2_APIC | CPUID_EXT2_CX8 | CPUID_EXT2_MCE |
- CPUID_EXT2_PAE | CPUID_EXT2_MSR | CPUID_EXT2_TSC | CPUID_EXT2_PSE |
- CPUID_EXT2_DE | CPUID_EXT2_FPU,
+ CPUID_EXT2_MMX | CPUID_EXT2_NX | CPUID_EXT2_PSE36 |
+ CPUID_EXT2_PAT | CPUID_EXT2_CMOV | CPUID_EXT2_MCA |
+ CPUID_EXT2_PGE | CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL |
+ CPUID_EXT2_APIC | CPUID_EXT2_CX8 | CPUID_EXT2_MCE |
+ CPUID_EXT2_PAE | CPUID_EXT2_MSR | CPUID_EXT2_TSC | CPUID_EXT2_PSE |
+ CPUID_EXT2_DE | CPUID_EXT2_FPU,
.features[FEAT_8000_0001_ECX] =
CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A |
- CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM,
+ CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM,
.xlevel = 0x80000008,
.model_id = "AMD Opteron 23xx (Gen 3 Class Opteron)",
},
@@ -1062,28 +1167,28 @@ static X86CPUDefinition builtin_x86_defs[] = {
.stepping = 2,
.features[FEAT_1_EDX] =
CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
- CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
- CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
- CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
- CPUID_DE | CPUID_FP87,
+ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+ CPUID_DE | CPUID_FP87,
.features[FEAT_1_ECX] =
CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES |
- CPUID_EXT_POPCNT | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 |
- CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ |
- CPUID_EXT_SSE3,
+ CPUID_EXT_POPCNT | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 |
+ CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ |
+ CPUID_EXT_SSE3,
.features[FEAT_8000_0001_EDX] =
CPUID_EXT2_LM | CPUID_EXT2_RDTSCP |
- CPUID_EXT2_PDPE1GB | CPUID_EXT2_FXSR | CPUID_EXT2_MMX |
- CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT |
- CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE |
- CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL | CPUID_EXT2_APIC |
- CPUID_EXT2_CX8 | CPUID_EXT2_MCE | CPUID_EXT2_PAE | CPUID_EXT2_MSR |
- CPUID_EXT2_TSC | CPUID_EXT2_PSE | CPUID_EXT2_DE | CPUID_EXT2_FPU,
+ CPUID_EXT2_PDPE1GB | CPUID_EXT2_FXSR | CPUID_EXT2_MMX |
+ CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT |
+ CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE |
+ CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL | CPUID_EXT2_APIC |
+ CPUID_EXT2_CX8 | CPUID_EXT2_MCE | CPUID_EXT2_PAE | CPUID_EXT2_MSR |
+ CPUID_EXT2_TSC | CPUID_EXT2_PSE | CPUID_EXT2_DE | CPUID_EXT2_FPU,
.features[FEAT_8000_0001_ECX] =
CPUID_EXT3_FMA4 | CPUID_EXT3_XOP |
- CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE |
- CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM |
- CPUID_EXT3_LAHF_LM,
+ CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE |
+ CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM |
+ CPUID_EXT3_LAHF_LM,
.xlevel = 0x8000001A,
.model_id = "AMD Opteron 62xx class CPU",
},
@@ -1096,28 +1201,28 @@ static X86CPUDefinition builtin_x86_defs[] = {
.stepping = 0,
.features[FEAT_1_EDX] =
CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
- CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
- CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
- CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
- CPUID_DE | CPUID_FP87,
+ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+ CPUID_DE | CPUID_FP87,
.features[FEAT_1_ECX] =
CPUID_EXT_F16C | CPUID_EXT_AVX | CPUID_EXT_XSAVE |
- CPUID_EXT_AES | CPUID_EXT_POPCNT | CPUID_EXT_SSE42 |
- CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_FMA |
- CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3,
+ CPUID_EXT_AES | CPUID_EXT_POPCNT | CPUID_EXT_SSE42 |
+ CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_FMA |
+ CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3,
.features[FEAT_8000_0001_EDX] =
CPUID_EXT2_LM | CPUID_EXT2_RDTSCP |
- CPUID_EXT2_PDPE1GB | CPUID_EXT2_FXSR | CPUID_EXT2_MMX |
- CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT |
- CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE |
- CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL | CPUID_EXT2_APIC |
- CPUID_EXT2_CX8 | CPUID_EXT2_MCE | CPUID_EXT2_PAE | CPUID_EXT2_MSR |
- CPUID_EXT2_TSC | CPUID_EXT2_PSE | CPUID_EXT2_DE | CPUID_EXT2_FPU,
+ CPUID_EXT2_PDPE1GB | CPUID_EXT2_FXSR | CPUID_EXT2_MMX |
+ CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT |
+ CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE |
+ CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL | CPUID_EXT2_APIC |
+ CPUID_EXT2_CX8 | CPUID_EXT2_MCE | CPUID_EXT2_PAE | CPUID_EXT2_MSR |
+ CPUID_EXT2_TSC | CPUID_EXT2_PSE | CPUID_EXT2_DE | CPUID_EXT2_FPU,
.features[FEAT_8000_0001_ECX] =
CPUID_EXT3_TBM | CPUID_EXT3_FMA4 | CPUID_EXT3_XOP |
- CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE |
- CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM |
- CPUID_EXT3_LAHF_LM,
+ CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE |
+ CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM |
+ CPUID_EXT3_LAHF_LM,
.xlevel = 0x8000001A,
.model_id = "AMD Opteron 63xx class CPU",
},
@@ -1168,12 +1273,18 @@ static int cpu_x86_fill_model_id(char *str)
static X86CPUDefinition host_cpudef;
+static Property host_x86_cpu_properties[] = {
+ DEFINE_PROP_BOOL("migratable", X86CPU, migratable, true),
+ DEFINE_PROP_END_OF_LIST()
+};
+
/* class_init for the "host" CPU model
*
* This function may be called before KVM is initialized.
*/
static void host_x86_cpu_class_init(ObjectClass *oc, void *data)
{
+ DeviceClass *dc = DEVICE_CLASS(oc);
X86CPUClass *xcc = X86_CPU_CLASS(oc);
uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
@@ -1195,8 +1306,13 @@ static void host_x86_cpu_class_init(ObjectClass *oc, void *data)
/* level, xlevel, xlevel2, and the feature words are initialized on
* instance_init, because they require KVM to be initialized.
*/
+
+ dc->props = host_x86_cpu_properties;
}
+static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w,
+ bool migratable_only);
+
static void host_x86_cpu_initfn(Object *obj)
{
X86CPU *cpu = X86_CPU(obj);
@@ -1211,10 +1327,8 @@ static void host_x86_cpu_initfn(Object *obj)
env->cpuid_xlevel2 = kvm_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX);
for (w = 0; w < FEATURE_WORDS; w++) {
- FeatureWordInfo *wi = &feature_word_info[w];
env->features[w] =
- kvm_arch_get_supported_cpuid(s, wi->cpuid_eax, wi->cpuid_ecx,
- wi->cpuid_reg);
+ x86_cpu_get_supported_feature_word(w, cpu->migratable);
}
object_property_set_bool(OBJECT(cpu), true, "pmu", &error_abort);
}
@@ -1228,53 +1342,23 @@ static const TypeInfo host_x86_cpu_type_info = {
#endif
-static int unavailable_host_feature(FeatureWordInfo *f, uint32_t mask)
+static void report_unavailable_features(FeatureWord w, uint32_t mask)
{
+ FeatureWordInfo *f = &feature_word_info[w];
int i;
- for (i = 0; i < 32; ++i)
+ for (i = 0; i < 32; ++i) {
if (1 << i & mask) {
const char *reg = get_register_name_32(f->cpuid_reg);
assert(reg);
- fprintf(stderr, "warning: host doesn't support requested feature: "
+ fprintf(stderr, "warning: %s doesn't support requested feature: "
"CPUID.%02XH:%s%s%s [bit %d]\n",
+ kvm_enabled() ? "host" : "TCG",
f->cpuid_eax, reg,
f->feat_names[i] ? "." : "",
f->feat_names[i] ? f->feat_names[i] : "", i);
- break;
- }
- return 0;
-}
-
-/* Check if all requested cpu flags are making their way to the guest
- *
- * Returns 0 if all flags are supported by the host, non-zero otherwise.
- *
- * This function may be called only if KVM is enabled.
- */
-static int kvm_check_features_against_host(KVMState *s, X86CPU *cpu)
-{
- CPUX86State *env = &cpu->env;
- int rv = 0;
- FeatureWord w;
-
- assert(kvm_enabled());
-
- for (w = 0; w < FEATURE_WORDS; w++) {
- FeatureWordInfo *wi = &feature_word_info[w];
- uint32_t guest_feat = env->features[w];
- uint32_t host_feat = kvm_arch_get_supported_cpuid(s, wi->cpuid_eax,
- wi->cpuid_ecx,
- wi->cpuid_reg);
- uint32_t mask;
- for (mask = 1; mask; mask <<= 1) {
- if (guest_feat & mask && !(host_feat & mask)) {
- unavailable_host_feature(wi, mask);
- rv = 1;
- }
}
}
- return rv;
}
static void x86_cpuid_version_get_family(Object *obj, Visitor *v, void *opaque,
@@ -1663,6 +1747,7 @@ static void x86_cpu_parse_featurestr(CPUState *cs, char *features,
{
X86CPU *cpu = X86_CPU(cs);
char *featurestr; /* Single 'key=value" string being parsed */
+ FeatureWord w;
/* Features to be added */
FeatureWordArray plus_features = { 0 };
/* Features to be removed */
@@ -1742,22 +1827,11 @@ static void x86_cpu_parse_featurestr(CPUState *cs, char *features,
}
featurestr = strtok(NULL, ",");
}
- env->features[FEAT_1_EDX] |= plus_features[FEAT_1_EDX];
- env->features[FEAT_1_ECX] |= plus_features[FEAT_1_ECX];
- env->features[FEAT_8000_0001_EDX] |= plus_features[FEAT_8000_0001_EDX];
- env->features[FEAT_8000_0001_ECX] |= plus_features[FEAT_8000_0001_ECX];
- env->features[FEAT_C000_0001_EDX] |= plus_features[FEAT_C000_0001_EDX];
- env->features[FEAT_KVM] |= plus_features[FEAT_KVM];
- env->features[FEAT_SVM] |= plus_features[FEAT_SVM];
- env->features[FEAT_7_0_EBX] |= plus_features[FEAT_7_0_EBX];
- env->features[FEAT_1_EDX] &= ~minus_features[FEAT_1_EDX];
- env->features[FEAT_1_ECX] &= ~minus_features[FEAT_1_ECX];
- env->features[FEAT_8000_0001_EDX] &= ~minus_features[FEAT_8000_0001_EDX];
- env->features[FEAT_8000_0001_ECX] &= ~minus_features[FEAT_8000_0001_ECX];
- env->features[FEAT_C000_0001_EDX] &= ~minus_features[FEAT_C000_0001_EDX];
- env->features[FEAT_KVM] &= ~minus_features[FEAT_KVM];
- env->features[FEAT_SVM] &= ~minus_features[FEAT_SVM];
- env->features[FEAT_7_0_EBX] &= ~minus_features[FEAT_7_0_EBX];
+
+ for (w = 0; w < FEATURE_WORDS; w++) {
+ env->features[w] |= plus_features[w];
+ env->features[w] &= ~minus_features[w];
+ }
}
/* generate a composite string into buf of all cpuid names in featureset
@@ -1840,21 +1914,53 @@ CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp)
return cpu_list;
}
-static void filter_features_for_kvm(X86CPU *cpu)
+static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w,
+ bool migratable_only)
+{
+ FeatureWordInfo *wi = &feature_word_info[w];
+ uint32_t r;
+
+ if (kvm_enabled()) {
+ r = kvm_arch_get_supported_cpuid(kvm_state, wi->cpuid_eax,
+ wi->cpuid_ecx,
+ wi->cpuid_reg);
+ } else if (tcg_enabled()) {
+ r = wi->tcg_features;
+ } else {
+ return ~0;
+ }
+ if (migratable_only) {
+ r &= x86_cpu_get_migratable_flags(w);
+ }
+ return r;
+}
+
+/*
+ * Filters CPU feature words based on host availability of each feature.
+ *
+ * Returns: 0 if all flags are supported by the host, non-zero otherwise.
+ */
+static int x86_cpu_filter_features(X86CPU *cpu)
{
CPUX86State *env = &cpu->env;
- KVMState *s = kvm_state;
FeatureWord w;
+ int rv = 0;
for (w = 0; w < FEATURE_WORDS; w++) {
- FeatureWordInfo *wi = &feature_word_info[w];
- uint32_t host_feat = kvm_arch_get_supported_cpuid(s, wi->cpuid_eax,
- wi->cpuid_ecx,
- wi->cpuid_reg);
+ uint32_t host_feat =
+ x86_cpu_get_supported_feature_word(w, cpu->migratable);
uint32_t requested_features = env->features[w];
env->features[w] &= host_feat;
cpu->filtered_features[w] = requested_features & ~env->features[w];
+ if (cpu->filtered_features[w]) {
+ if (cpu->check_cpuid || cpu->enforce_cpuid) {
+ report_unavailable_features(w, cpu->filtered_features[w]);
+ }
+ rv = 1;
+ }
}
+
+ return rv;
}
/* Load data from X86CPUDefinition
@@ -1864,30 +1970,26 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
CPUX86State *env = &cpu->env;
const char *vendor;
char host_vendor[CPUID_VENDOR_SZ + 1];
+ FeatureWord w;
object_property_set_int(OBJECT(cpu), def->level, "level", errp);
object_property_set_int(OBJECT(cpu), def->family, "family", errp);
object_property_set_int(OBJECT(cpu), def->model, "model", errp);
object_property_set_int(OBJECT(cpu), def->stepping, "stepping", errp);
- env->features[FEAT_1_EDX] = def->features[FEAT_1_EDX];
- env->features[FEAT_1_ECX] = def->features[FEAT_1_ECX];
- env->features[FEAT_8000_0001_EDX] = def->features[FEAT_8000_0001_EDX];
- env->features[FEAT_8000_0001_ECX] = def->features[FEAT_8000_0001_ECX];
object_property_set_int(OBJECT(cpu), def->xlevel, "xlevel", errp);
- env->features[FEAT_KVM] = def->features[FEAT_KVM];
- env->features[FEAT_SVM] = def->features[FEAT_SVM];
- env->features[FEAT_C000_0001_EDX] = def->features[FEAT_C000_0001_EDX];
- env->features[FEAT_7_0_EBX] = def->features[FEAT_7_0_EBX];
env->cpuid_xlevel2 = def->xlevel2;
cpu->cache_info_passthrough = def->cache_info_passthrough;
-
object_property_set_str(OBJECT(cpu), def->model_id, "model-id", errp);
+ for (w = 0; w < FEATURE_WORDS; w++) {
+ env->features[w] = def->features[w];
+ }
/* Special cases not set in the X86CPUDefinition structs: */
if (kvm_enabled()) {
FeatureWord w;
for (w = 0; w < FEATURE_WORDS; w++) {
env->features[w] |= kvm_default_features[w];
+ env->features[w] &= ~kvm_default_unset_features[w];
}
}
@@ -2336,6 +2438,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
(AMD_ENC_ASSOC(L3_ASSOCIATIVITY) << 12) | \
(L3_LINES_PER_TAG << 8) | (L3_LINE_SIZE);
break;
+ case 0x80000007:
+ *eax = 0;
+ *ebx = 0;
+ *ecx = 0;
+ *edx = env->features[FEAT_8000_0007_EDX];
+ break;
case 0x80000008:
/* virtual & phys address size in low 2 bytes. */
/* XXX: This value must match the one used in the MMU code. */
@@ -2593,25 +2701,13 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
& CPUID_EXT2_AMD_ALIASES);
}
- if (!kvm_enabled()) {
- env->features[FEAT_1_EDX] &= TCG_FEATURES;
- env->features[FEAT_1_ECX] &= TCG_EXT_FEATURES;
- env->features[FEAT_8000_0001_EDX] &= (TCG_EXT2_FEATURES
-#ifdef TARGET_X86_64
- | CPUID_EXT2_SYSCALL | CPUID_EXT2_LM
-#endif
- );
- env->features[FEAT_8000_0001_ECX] &= TCG_EXT3_FEATURES;
- env->features[FEAT_SVM] &= TCG_SVM_FEATURES;
- } else {
- KVMState *s = kvm_state;
- if ((cpu->check_cpuid || cpu->enforce_cpuid)
- && kvm_check_features_against_host(s, cpu) && cpu->enforce_cpuid) {
- error_setg(&local_err,
- "Host's CPU doesn't support requested features");
- goto out;
- }
- filter_features_for_kvm(cpu);
+
+ if (x86_cpu_filter_features(cpu) && cpu->enforce_cpuid) {
+ error_setg(&local_err,
+ kvm_enabled() ?
+ "Host doesn't support requested features" :
+ "TCG doesn't support requested features");
+ goto out;
}
#ifndef CONFIG_USER_ONLY
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index b5e1b411fb..e634d83e86 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -402,6 +402,7 @@ typedef enum FeatureWord {
FEAT_7_0_EBX, /* CPUID[EAX=7,ECX=0].EBX */
FEAT_8000_0001_EDX, /* CPUID[8000_0001].EDX */
FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */
+ FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */
FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */
FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */
FEAT_SVM, /* CPUID[8000_000A].EDX */
@@ -561,6 +562,9 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
#define CPUID_7_0_EBX_ADX (1U << 19)
#define CPUID_7_0_EBX_SMAP (1U << 20)
+/* CPUID[0x80000007].EDX flags: */
+#define CPUID_APM_INVTSC (1U << 8)
+
#define CPUID_VENDOR_SZ 12
#define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 4bf0ac9e76..097fe1188d 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -35,6 +35,8 @@
#include "exec/ioport.h"
#include <asm/hyperv.h>
#include "hw/pci/pci.h"
+#include "migration/migration.h"
+#include "qapi/qmp/qerror.h"
//#define DEBUG_KVM
@@ -448,6 +450,8 @@ static bool hyperv_enabled(X86CPU *cpu)
cpu->hyperv_relaxed_timing);
}
+static Error *invtsc_mig_blocker;
+
#define KVM_MAX_CPUID_ENTRIES 100
int kvm_arch_init_vcpu(CPUState *cs)
@@ -705,6 +709,17 @@ int kvm_arch_init_vcpu(CPUState *cs)
!!(c->ecx & CPUID_EXT_SMX);
}
+ c = cpuid_find_entry(&cpuid_data.cpuid, 0x80000007, 0);
+ if (c && (c->edx & 1<<8) && invtsc_mig_blocker == NULL) {
+ /* for migration */
+ error_setg(&invtsc_mig_blocker,
+ "State blocked by non-migratable CPU device"
+ " (invtsc flag)");
+ migrate_add_blocker(invtsc_mig_blocker);
+ /* for savevm */
+ vmstate_x86_cpu.unmigratable = 1;
+ }
+
cpuid_data.cpuid.padding = 0;
r = kvm_vcpu_ioctl(cs, KVM_SET_CPUID2, &cpuid_data);
if (r) {
diff --git a/target-i386/machine.c b/target-i386/machine.c
index b8dcd2f943..16d2f6a809 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -603,7 +603,7 @@ static const VMStateDescription vmstate_msr_hyperv_time = {
}
};
-const VMStateDescription vmstate_x86_cpu = {
+VMStateDescription vmstate_x86_cpu = {
.name = "cpu",
.version_id = 12,
.minimum_version_id = 3,
diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
index 97a81d8660..9a91af9dbf 100644
--- a/target-ppc/cpu-models.c
+++ b/target-ppc/cpu-models.c
@@ -1138,6 +1138,8 @@
"POWER7 v2.3")
POWERPC_DEF("POWER7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7P,
"POWER7+ v2.1")
+ POWERPC_DEF("POWER8E_v1.0", CPU_POWERPC_POWER8E_v10, POWER8E,
+ "POWER8E v1.0")
POWERPC_DEF("POWER8_v1.0", CPU_POWERPC_POWER8_v10, POWER8,
"POWER8 v1.0")
POWERPC_DEF("970", CPU_POWERPC_970, 970,
@@ -1386,6 +1388,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
{ "POWER5gs", "POWER5+" },
{ "POWER7", "POWER7_v2.3" },
{ "POWER7+", "POWER7+_v2.1" },
+ { "POWER8E", "POWER8E_v1.0" },
{ "POWER8", "POWER8_v1.0" },
{ "970fx", "970fx_v3.1" },
{ "970mp", "970mp_v1.1" },
diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
index db75896012..c39d03a504 100644
--- a/target-ppc/cpu-models.h
+++ b/target-ppc/cpu-models.h
@@ -559,9 +559,12 @@ enum {
CPU_POWERPC_POWER7P_BASE = 0x004A0000,
CPU_POWERPC_POWER7P_MASK = 0xFFFF0000,
CPU_POWERPC_POWER7P_v21 = 0x004A0201,
- CPU_POWERPC_POWER8_BASE = 0x004B0000,
+ CPU_POWERPC_POWER8E_BASE = 0x004B0000,
+ CPU_POWERPC_POWER8E_MASK = 0xFFFF0000,
+ CPU_POWERPC_POWER8E_v10 = 0x004B0100,
+ CPU_POWERPC_POWER8_BASE = 0x004D0000,
CPU_POWERPC_POWER8_MASK = 0xFFFF0000,
- CPU_POWERPC_POWER8_v10 = 0x004B0100,
+ CPU_POWERPC_POWER8_v10 = 0x004D0100,
CPU_POWERPC_970 = 0x00390202,
CPU_POWERPC_970FX_v10 = 0x00391100,
CPU_POWERPC_970FX_v20 = 0x003C0200,
diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h
index 13c7031265..f1f0a52998 100644
--- a/target-ppc/cpu-qom.h
+++ b/target-ppc/cpu-qom.h
@@ -119,7 +119,9 @@ void ppc_cpu_dump_statistics(CPUState *cpu, FILE *f,
fprintf_function cpu_fprintf, int flags);
hwaddr ppc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
int ppc_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
+int ppc_cpu_gdb_read_register_apple(CPUState *cpu, uint8_t *buf, int reg);
int ppc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+int ppc_cpu_gdb_write_register_apple(CPUState *cpu, uint8_t *buf, int reg);
int ppc64_cpu_write_elf64_qemunote(WriteCoreDumpFunction f,
CPUState *cpu, void *opaque);
int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 74407ee209..08ae527cb6 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -2012,7 +2012,7 @@ enum {
PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 | \
PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | \
PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \
- PPC2_ALTIVEC_207 | PPC2_ISA207S)
+ PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP)
};
/*****************************************************************************/
diff --git a/target-ppc/gdbstub.c b/target-ppc/gdbstub.c
index 381a3c7e31..694d303e19 100644
--- a/target-ppc/gdbstub.c
+++ b/target-ppc/gdbstub.c
@@ -21,6 +21,31 @@
#include "qemu-common.h"
#include "exec/gdbstub.h"
+static int ppc_gdb_register_len_apple(int n)
+{
+ switch (n) {
+ case 0 ... 31:
+ /* gprs */
+ return 8;
+ case 32 ... 63:
+ /* fprs */
+ return 8;
+ case 64 ... 95:
+ return 16;
+ case 64+32: /* nip */
+ case 65+32: /* msr */
+ case 67+32: /* lr */
+ case 68+32: /* ctr */
+ case 69+32: /* xer */
+ case 70+32: /* fpscr */
+ return 8;
+ case 66+32: /* cr */
+ return 4;
+ default:
+ return 0;
+ }
+}
+
static int ppc_gdb_register_len(int n)
{
switch (n) {
@@ -132,6 +157,65 @@ int ppc_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
return r;
}
+int ppc_cpu_gdb_read_register_apple(CPUState *cs, uint8_t *mem_buf, int n)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ CPUPPCState *env = &cpu->env;
+ int r = ppc_gdb_register_len_apple(n);
+
+ if (!r) {
+ return r;
+ }
+
+ if (n < 32) {
+ /* gprs */
+ gdb_get_reg64(mem_buf, env->gpr[n]);
+ } else if (n < 64) {
+ /* fprs */
+ stfq_p(mem_buf, env->fpr[n-32]);
+ } else if (n < 96) {
+ /* Altivec */
+ stq_p(mem_buf, n - 64);
+ stq_p(mem_buf + 8, 0);
+ } else {
+ switch (n) {
+ case 64 + 32:
+ gdb_get_reg64(mem_buf, env->nip);
+ break;
+ case 65 + 32:
+ gdb_get_reg64(mem_buf, env->msr);
+ break;
+ case 66 + 32:
+ {
+ uint32_t cr = 0;
+ int i;
+ for (i = 0; i < 8; i++) {
+ cr |= env->crf[i] << (32 - ((i + 1) * 4));
+ }
+ gdb_get_reg32(mem_buf, cr);
+ break;
+ }
+ case 67 + 32:
+ gdb_get_reg64(mem_buf, env->lr);
+ break;
+ case 68 + 32:
+ gdb_get_reg64(mem_buf, env->ctr);
+ break;
+ case 69 + 32:
+ gdb_get_reg64(mem_buf, env->xer);
+ break;
+ case 70 + 32:
+ gdb_get_reg64(mem_buf, env->fpscr);
+ break;
+ }
+ }
+ if (msr_le) {
+ /* If cpu is in LE mode, convert memory contents to LE. */
+ ppc_gdb_swap_register(mem_buf, n, r);
+ }
+ return r;
+}
+
int ppc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -185,3 +269,56 @@ int ppc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
}
return r;
}
+int ppc_cpu_gdb_write_register_apple(CPUState *cs, uint8_t *mem_buf, int n)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ CPUPPCState *env = &cpu->env;
+ int r = ppc_gdb_register_len_apple(n);
+
+ if (!r) {
+ return r;
+ }
+ if (msr_le) {
+ /* If cpu is in LE mode, convert memory contents to LE. */
+ ppc_gdb_swap_register(mem_buf, n, r);
+ }
+ if (n < 32) {
+ /* gprs */
+ env->gpr[n] = ldq_p(mem_buf);
+ } else if (n < 64) {
+ /* fprs */
+ env->fpr[n-32] = ldfq_p(mem_buf);
+ } else {
+ switch (n) {
+ case 64 + 32:
+ env->nip = ldq_p(mem_buf);
+ break;
+ case 65 + 32:
+ ppc_store_msr(env, ldq_p(mem_buf));
+ break;
+ case 66 + 32:
+ {
+ uint32_t cr = ldl_p(mem_buf);
+ int i;
+ for (i = 0; i < 8; i++) {
+ env->crf[i] = (cr >> (32 - ((i + 1) * 4))) & 0xF;
+ }
+ break;
+ }
+ case 67 + 32:
+ env->lr = ldq_p(mem_buf);
+ break;
+ case 68 + 32:
+ env->ctr = ldq_p(mem_buf);
+ break;
+ case 69 + 32:
+ env->xer = ldq_p(mem_buf);
+ break;
+ case 70 + 32:
+ /* fpscr */
+ store_fpscr(env, ldq_p(mem_buf), 0xffffffff);
+ break;
+ }
+ }
+ return r;
+}
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 561f8ccf2f..2d87108d8b 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -63,6 +63,7 @@ static int cap_ppc_smt;
static int cap_ppc_rma;
static int cap_spapr_tce;
static int cap_spapr_multitce;
+static int cap_spapr_vfio;
static int cap_hior;
static int cap_one_reg;
static int cap_epr;
@@ -101,6 +102,7 @@ int kvm_arch_init(KVMState *s)
cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
+ cap_spapr_vfio = false;
cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
@@ -1660,7 +1662,8 @@ bool kvmppc_spapr_use_multitce(void)
return cap_spapr_multitce;
}
-void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
+void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
+ bool vfio_accel)
{
struct kvm_create_spapr_tce args = {
.liobn = liobn,
@@ -1674,7 +1677,7 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
* destroying the table, which the upper layers -will- do
*/
*pfd = -1;
- if (!cap_spapr_tce) {
+ if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
return NULL;
}
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 412cc7f3c1..1118122d89 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -33,7 +33,8 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
#ifndef CONFIG_USER_ONLY
off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem);
bool kvmppc_spapr_use_multitce(void);
-void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd);
+void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
+ bool vfio_accel);
int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
int kvmppc_reset_htab(int shift_hint);
uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
@@ -144,7 +145,8 @@ static inline bool kvmppc_spapr_use_multitce(void)
}
static inline void *kvmppc_create_spapr_tce(uint32_t liobn,
- uint32_t window_size, int *fd)
+ uint32_t window_size, int *fd,
+ bool vfio_accel)
{
return NULL;
}
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 48017219a4..b23933f7bd 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -426,7 +426,6 @@ static inline uint32_t SPR(uint32_t opcode)
return ((sprn >> 5) & 0x1F) | ((sprn & 0x1F) << 5);
}
/*** Get constants ***/
-EXTRACT_HELPER(IMM, 12, 8);
/* 16 bits signed immediate value */
EXTRACT_SHELPER(SIMM, 0, 16);
/* 16 bits unsigned immediate value */
@@ -459,8 +458,6 @@ EXTRACT_HELPER(FPFLM, 17, 8);
EXTRACT_HELPER(FPW, 16, 1);
/*** Jump target decoding ***/
-/* Displacement */
-EXTRACT_SHELPER(d, 0, 16);
/* Immediate address */
static inline target_ulong LI(uint32_t opcode)
{
@@ -2665,11 +2662,6 @@ static inline void gen_qemu_ld8u(DisasContext *ctx, TCGv arg1, TCGv arg2)
tcg_gen_qemu_ld8u(arg1, arg2, ctx->mem_idx);
}
-static inline void gen_qemu_ld8s(DisasContext *ctx, TCGv arg1, TCGv arg2)
-{
- tcg_gen_qemu_ld8s(arg1, arg2, ctx->mem_idx);
-}
-
static inline void gen_qemu_ld16u(DisasContext *ctx, TCGv arg1, TCGv arg2)
{
TCGMemOp op = MO_UW | ctx->default_tcg_memop_mask;
@@ -4123,8 +4115,9 @@ static void gen_mcrxr(DisasContext *ctx)
tcg_gen_trunc_tl_i32(t0, cpu_so);
tcg_gen_trunc_tl_i32(t1, cpu_ov);
tcg_gen_trunc_tl_i32(dst, cpu_ca);
- tcg_gen_shri_i32(t0, t0, 2);
- tcg_gen_shri_i32(t1, t1, 1);
+ tcg_gen_shli_i32(t0, t0, 3);
+ tcg_gen_shli_i32(t1, t1, 2);
+ tcg_gen_shli_i32(dst, dst, 1);
tcg_gen_or_i32(dst, dst, t0);
tcg_gen_or_i32(dst, dst, t1);
tcg_temp_free_i32(t0);
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 85581c9537..a3bb336e16 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -34,6 +34,7 @@
//#define PPC_DUMP_CPU
//#define PPC_DEBUG_SPR
//#define PPC_DUMP_SPR_ACCESSES
+/* #define USE_APPLE_GDB */
/* For user-mode emulation, we don't emulate any IRQ controller */
#if defined(CONFIG_USER_ONLY)
@@ -8188,16 +8189,16 @@ static void init_proc_POWER8(CPUPPCState *env)
init_proc_book3s_64(env, BOOK3S_CPU_POWER8);
}
-POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
+POWERPC_FAMILY(POWER8E)(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
dc->fw_name = "PowerPC,POWER8";
- dc->desc = "POWER8";
+ dc->desc = "POWER8E";
dc->props = powerpc_servercpu_properties;
- pcc->pvr = CPU_POWERPC_POWER8_BASE;
- pcc->pvr_mask = CPU_POWERPC_POWER8_MASK;
+ pcc->pvr = CPU_POWERPC_POWER8E_BASE;
+ pcc->pvr_mask = CPU_POWERPC_POWER8E_MASK;
pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06;
pcc->init_proc = init_proc_POWER8;
pcc->check_pow = check_pow_nocheck;
@@ -8251,6 +8252,18 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
pcc->l1_icache_size = 0x8000;
pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
}
+
+POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(oc);
+ PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+ ppc_POWER8E_cpu_family_class_init(oc, data);
+
+ dc->desc = "POWER8";
+ pcc->pvr = CPU_POWERPC_POWER8_BASE;
+ pcc->pvr_mask = CPU_POWERPC_POWER8_MASK;
+}
#endif /* defined (TARGET_PPC64) */
@@ -9667,6 +9680,13 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
#endif
cc->gdb_num_core_regs = 71;
+
+#ifdef USE_APPLE_GDB
+ cc->gdb_read_register = ppc_cpu_gdb_read_register_apple;
+ cc->gdb_write_register = ppc_cpu_gdb_write_register_apple;
+ cc->gdb_num_core_regs = 71 + 32;
+#endif
+
#if defined(TARGET_PPC64)
cc->gdb_core_xml_file = "power64-core.xml";
#else
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index c83fd9fdc0..203027eb3e 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -805,7 +805,10 @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
/* For unaligned, or very large offsets, use the indexed form. */
if (offset & align || offset != (int32_t)offset) {
- tcg_debug_assert(rs != base && (!is_store || rs != rt));
+ if (rs == base) {
+ rs = TCG_REG_R0;
+ }
+ tcg_debug_assert(!is_store || rs != rt);
tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
tcg_out32(s, opx | TAB(rt, base, rs));
return;
@@ -1716,6 +1719,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
# define LINK_AREA_SIZE (6 * SZR)
# define LR_OFFSET (1 * SZR)
# define TCG_TARGET_CALL_STACK_OFFSET (LINK_AREA_SIZE + 8 * SZR)
+#elif defined(TCG_TARGET_CALL_DARWIN)
+# define LINK_AREA_SIZE (6 * SZR)
+# define LR_OFFSET (2 * SZR)
#elif TCG_TARGET_REG_BITS == 64
# if defined(_CALL_ELF) && _CALL_ELF == 2
# define LINK_AREA_SIZE (4 * SZR)
@@ -1725,9 +1731,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
# if defined(_CALL_SYSV)
# define LINK_AREA_SIZE (2 * SZR)
# define LR_OFFSET (1 * SZR)
-# elif defined(TCG_TARGET_CALL_DARWIN)
-# define LINK_AREA_SIZE 24
-# define LR_OFFSET 8
# endif
#endif
#ifndef LR_OFFSET
diff --git a/tests/qapi-schema/event-nest-struct.err b/tests/qapi-schema/event-nest-struct.err
index e4a0faac9c..91bde1c967 100644
--- a/tests/qapi-schema/event-nest-struct.err
+++ b/tests/qapi-schema/event-nest-struct.err
@@ -1 +1 @@
-tests/qapi-schema/event-nest-struct.json:1: Nested structure define in event is not supported now, event 'EVENT_A', argname 'a'
+tests/qapi-schema/event-nest-struct.json:1: Nested structure define in event is not supported, event 'EVENT_A', argname 'a'
diff --git a/trace-events b/trace-events
index ba01ad52cf..d071b97dd7 100644
--- a/trace-events
+++ b/trace-events
@@ -1169,12 +1169,13 @@ qxl_render_guest_primary_resized(int32_t width, int32_t height, int32_t stride,
qxl_render_update_area_done(void *cookie) "%p"
# hw/ppc/spapr_pci.c
-spapr_pci_msi(const char *msg, uint32_t n, uint32_t ca) "%s (device#%d, cfg=%x)"
+spapr_pci_msi(const char *msg, uint32_t ca) "%s (cfg=%x)"
spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr) "dev\"%s\" vector %u, addr=%"PRIx64
-spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u, requested %u"
+spapr_pci_rtas_ibm_change_msi(unsigned cfg, unsigned func, unsigned req, unsigned first) "cfgaddr %x func %u, requested %u, first irq %u"
spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) "queries for #%u, IRQ%u"
spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) "@%"PRIx64"<=%"PRIx64" IRQ %u"
spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u"
+spapr_pci_msi_retry(unsigned config_addr, unsigned req_num, unsigned max_irqs) "Guest device at %x asked %u, have only %u"
# hw/intc/xics.c
xics_icp_check_ipi(int server, uint8_t mfrr) "CPU %d can take IPI mfrr=%#x"
@@ -1188,6 +1189,12 @@ xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]"
xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x"
xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]"
xics_ics_eoi(int nr) "ics_eoi: irq %#x"
+xics_alloc(int src, int irq) "source#%d, irq %d"
+xics_alloc_failed_hint(int src, int irq) "source#%d, irq %d is already in use"
+xics_alloc_failed_no_left(int src) "source#%d, no irq left"
+xics_alloc_block(int src, int first, int num, bool lsi, int align) "source#%d, first irq %d, %d irqs, lsi=%d, alignnum %d"
+xics_ics_free(int src, int irq, int num) "Source#%d, first irq %d, %d irqs"
+xics_ics_free_warn(int src, int irq) "Source#%d, irq %d is already free"
# hw/ppc/spapr.c
spapr_cas_failed(unsigned long n) "DT diff buffer is too small: %ld bytes"
diff --git a/vmstate.c b/vmstate.c
index c9965205df..ef2f87bdad 100644
--- a/vmstate.c
+++ b/vmstate.c
@@ -43,11 +43,18 @@ static int vmstate_size(void *opaque, VMStateField *field)
return size;
}
-static void *vmstate_base_addr(void *opaque, VMStateField *field)
+static void *vmstate_base_addr(void *opaque, VMStateField *field, bool alloc)
{
void *base_addr = opaque + field->offset;
if (field->flags & VMS_POINTER) {
+ if (alloc && (field->flags & VMS_ALLOC)) {
+ int n_elems = vmstate_n_elems(opaque, field);
+ if (n_elems) {
+ gsize size = n_elems * field->size;
+ *((void **)base_addr + field->start) = g_malloc(size);
+ }
+ }
base_addr = *(void **)base_addr + field->start;
}
@@ -81,7 +88,7 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
field->field_exists(opaque, version_id)) ||
(!field->field_exists &&
field->version_id <= version_id)) {
- void *base_addr = vmstate_base_addr(opaque, field);
+ void *base_addr = vmstate_base_addr(opaque, field, true);
int i, n_elems = vmstate_n_elems(opaque, field);
int size = vmstate_size(opaque, field);
@@ -135,7 +142,7 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
while (field->name) {
if (!field->field_exists ||
field->field_exists(opaque, vmsd->version_id)) {
- void *base_addr = vmstate_base_addr(opaque, field);
+ void *base_addr = vmstate_base_addr(opaque, field, false);
int i, n_elems = vmstate_n_elems(opaque, field);
int size = vmstate_size(opaque, field);