aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cpus.c4
-rw-r--r--fsdev/virtfs-proxy-helper.texi2
-rw-r--r--nbd/server.c2
-rw-r--r--numa.c11
-rw-r--r--qapi-schema.json2
-rw-r--r--qemu-char.c926
-rw-r--r--qemu-doc.texi8
-rw-r--r--qemu-ga.texi2
-rw-r--r--qemu-img.texi2
-rw-r--r--qemu-options.hx9
-rw-r--r--scripts/dump-guest-memory.py762
-rwxr-xr-xscripts/kvm/kvm_stat1199
-rw-r--r--tests/Makefile2
13 files changed, 1663 insertions, 1268 deletions
diff --git a/cpus.c b/cpus.c
index 3efff6b109..1e97cc4821 100644
--- a/cpus.c
+++ b/cpus.c
@@ -986,7 +986,7 @@ static void qemu_wait_io_event_common(CPUState *cpu)
if (cpu->stop) {
cpu->stop = false;
cpu->stopped = true;
- qemu_cond_signal(&qemu_pause_cond);
+ qemu_cond_broadcast(&qemu_pause_cond);
}
flush_queued_work(cpu);
cpu->thread_kicked = false;
@@ -1396,7 +1396,7 @@ void cpu_stop_current(void)
current_cpu->stop = false;
current_cpu->stopped = true;
cpu_exit(current_cpu);
- qemu_cond_signal(&qemu_pause_cond);
+ qemu_cond_broadcast(&qemu_pause_cond);
}
}
diff --git a/fsdev/virtfs-proxy-helper.texi b/fsdev/virtfs-proxy-helper.texi
index e60e3b9465..9a25d7ecf4 100644
--- a/fsdev/virtfs-proxy-helper.texi
+++ b/fsdev/virtfs-proxy-helper.texi
@@ -1,6 +1,6 @@
@example
@c man begin SYNOPSIS
-usage: virtfs-proxy-helper options
+@command{virtfs-proxy-helper} @var{options}
@c man end
@end example
diff --git a/nbd/server.c b/nbd/server.c
index 2265cb0680..256feafcec 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -671,7 +671,9 @@ NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size,
* that BDRV_O_INACTIVE is cleared and the image is ready for write
* access since the export could be available before migration handover.
*/
+ aio_context_acquire(exp->ctx);
blk_invalidate_cache(blk, NULL);
+ aio_context_release(exp->ctx);
return exp;
fail:
diff --git a/numa.c b/numa.c
index 425ef8dc21..23a5d83024 100644
--- a/numa.c
+++ b/numa.c
@@ -418,12 +418,15 @@ static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
Error *err = NULL;
memory_region_init_ram_from_file(mr, owner, name, ram_size, false,
mem_path, &err);
-
- /* Legacy behavior: if allocation failed, fall back to
- * regular RAM allocation.
- */
if (err) {
error_report_err(err);
+ if (mem_prealloc) {
+ exit(1);
+ }
+
+ /* Legacy behavior: if allocation failed, fall back to
+ * regular RAM allocation.
+ */
memory_region_init_ram(mr, owner, name, ram_size, &error_fatal);
}
#else
diff --git a/qapi-schema.json b/qapi-schema.json
index b3038b215a..8d04897922 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -3146,6 +3146,7 @@
#
# @addr: socket address to listen on (server=true)
# or connect to (server=false)
+# @tls-creds: #optional the ID of the TLS credentials object (since 2.6)
# @server: #optional create server socket (default: true)
# @wait: #optional wait for incoming connection on server
# sockets (default: false).
@@ -3160,6 +3161,7 @@
# Since: 1.4
##
{ 'struct': 'ChardevSocket', 'data': { 'addr' : 'SocketAddress',
+ '*tls-creds' : 'str',
'*server' : 'bool',
'*wait' : 'bool',
'*nodelay' : 'bool',
diff --git a/qemu-char.c b/qemu-char.c
index e133f4fc35..ca53e8c376 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -33,6 +33,9 @@
#include "qapi/qmp-output-visitor.h"
#include "qapi-visit.h"
#include "qemu/base64.h"
+#include "io/channel-socket.h"
+#include "io/channel-file.h"
+#include "io/channel-tls.h"
#include <unistd.h>
#include <fcntl.h>
@@ -88,39 +91,37 @@
#define READ_BUF_LEN 4096
#define READ_RETRIES 10
-#define CHR_MAX_FILENAME_SIZE 256
#define TCP_MAX_FDS 16
/***********************************************************/
/* Socket address helpers */
-static int SocketAddress_to_str(char *dest, int max_len,
- const char *prefix, SocketAddress *addr,
- bool is_listen, bool is_telnet)
+static char *SocketAddress_to_str(const char *prefix, SocketAddress *addr,
+ bool is_listen, bool is_telnet)
{
switch (addr->type) {
case SOCKET_ADDRESS_KIND_INET:
- return snprintf(dest, max_len, "%s%s:%s:%s%s", prefix,
- is_telnet ? "telnet" : "tcp", addr->u.inet->host,
- addr->u.inet->port, is_listen ? ",server" : "");
+ return g_strdup_printf("%s%s:%s:%s%s", prefix,
+ is_telnet ? "telnet" : "tcp", addr->u.inet->host,
+ addr->u.inet->port, is_listen ? ",server" : "");
break;
case SOCKET_ADDRESS_KIND_UNIX:
- return snprintf(dest, max_len, "%sunix:%s%s", prefix,
- addr->u.q_unix->path, is_listen ? ",server" : "");
+ return g_strdup_printf("%sunix:%s%s", prefix,
+ addr->u.q_unix->path,
+ is_listen ? ",server" : "");
break;
case SOCKET_ADDRESS_KIND_FD:
- return snprintf(dest, max_len, "%sfd:%s%s", prefix, addr->u.fd->str,
- is_listen ? ",server" : "");
+ return g_strdup_printf("%sfd:%s%s", prefix, addr->u.fd->str,
+ is_listen ? ",server" : "");
break;
default:
abort();
}
}
-static int sockaddr_to_str(char *dest, int max_len,
- struct sockaddr_storage *ss, socklen_t ss_len,
- struct sockaddr_storage *ps, socklen_t ps_len,
- bool is_listen, bool is_telnet)
+static char *sockaddr_to_str(struct sockaddr_storage *ss, socklen_t ss_len,
+ struct sockaddr_storage *ps, socklen_t ps_len,
+ bool is_listen, bool is_telnet)
{
char shost[NI_MAXHOST], sserv[NI_MAXSERV];
char phost[NI_MAXHOST], pserv[NI_MAXSERV];
@@ -129,9 +130,9 @@ static int sockaddr_to_str(char *dest, int max_len,
switch (ss->ss_family) {
#ifndef _WIN32
case AF_UNIX:
- return snprintf(dest, max_len, "unix:%s%s",
- ((struct sockaddr_un *)(ss))->sun_path,
- is_listen ? ",server" : "");
+ return g_strdup_printf("unix:%s%s",
+ ((struct sockaddr_un *)(ss))->sun_path,
+ is_listen ? ",server" : "");
#endif
case AF_INET6:
left = "[";
@@ -142,14 +143,14 @@ static int sockaddr_to_str(char *dest, int max_len,
sserv, sizeof(sserv), NI_NUMERICHOST | NI_NUMERICSERV);
getnameinfo((struct sockaddr *) ps, ps_len, phost, sizeof(phost),
pserv, sizeof(pserv), NI_NUMERICHOST | NI_NUMERICSERV);
- return snprintf(dest, max_len, "%s:%s%s%s:%s%s <-> %s%s%s:%s",
- is_telnet ? "telnet" : "tcp",
- left, shost, right, sserv,
- is_listen ? ",server" : "",
- left, phost, right, pserv);
+ return g_strdup_printf("%s:%s%s%s:%s%s <-> %s%s%s:%s",
+ is_telnet ? "telnet" : "tcp",
+ left, shost, right, sserv,
+ is_listen ? ",server" : "",
+ left, phost, right, pserv);
default:
- return snprintf(dest, max_len, "unknown");
+ return g_strdup_printf("unknown");
}
}
@@ -768,7 +769,7 @@ typedef struct IOWatchPoll
{
GSource parent;
- GIOChannel *channel;
+ QIOChannel *ioc;
GSource *src;
IOCanReadHandler *fd_can_read;
@@ -791,8 +792,8 @@ static gboolean io_watch_poll_prepare(GSource *source, gint *timeout_)
}
if (now_active) {
- iwp->src = g_io_create_watch(iwp->channel,
- G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
+ iwp->src = qio_channel_create_watch(
+ iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL);
g_source_attach(iwp->src, NULL);
} else {
@@ -838,9 +839,9 @@ static GSourceFuncs io_watch_poll_funcs = {
};
/* Can only be used for read */
-static guint io_add_watch_poll(GIOChannel *channel,
+static guint io_add_watch_poll(QIOChannel *ioc,
IOCanReadHandler *fd_can_read,
- GIOFunc fd_read,
+ QIOChannelFunc fd_read,
gpointer user_data)
{
IOWatchPoll *iwp;
@@ -849,7 +850,7 @@ static guint io_add_watch_poll(GIOChannel *channel,
iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs, sizeof(IOWatchPoll));
iwp->fd_can_read = fd_can_read;
iwp->opaque = user_data;
- iwp->channel = channel;
+ iwp->ioc = ioc;
iwp->fd_read = (GSourceFunc) fd_read;
iwp->src = NULL;
@@ -885,79 +886,50 @@ static void remove_fd_in_watch(CharDriverState *chr)
}
}
-#ifndef _WIN32
-static GIOChannel *io_channel_from_fd(int fd)
-{
- GIOChannel *chan;
-
- if (fd == -1) {
- return NULL;
- }
-
- chan = g_io_channel_unix_new(fd);
-
- g_io_channel_set_encoding(chan, NULL, NULL);
- g_io_channel_set_buffered(chan, FALSE);
-
- return chan;
-}
-#endif
-static GIOChannel *io_channel_from_socket(int fd)
+static int io_channel_send_full(QIOChannel *ioc,
+ const void *buf, size_t len,
+ int *fds, size_t nfds)
{
- GIOChannel *chan;
+ size_t offset = 0;
- if (fd == -1) {
- return NULL;
- }
+ while (offset < len) {
+ ssize_t ret = 0;
+ struct iovec iov = { .iov_base = (char *)buf + offset,
+ .iov_len = len - offset };
+
+ ret = qio_channel_writev_full(
+ ioc, &iov, 1,
+ fds, nfds, NULL);
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ errno = EAGAIN;
+ return -1;
+ } else if (ret < 0) {
+ if (offset) {
+ return offset;
+ }
-#ifdef _WIN32
- chan = g_io_channel_win32_new_socket(fd);
-#else
- chan = g_io_channel_unix_new(fd);
-#endif
+ errno = EINVAL;
+ return -1;
+ }
- g_io_channel_set_encoding(chan, NULL, NULL);
- g_io_channel_set_buffered(chan, FALSE);
+ offset += ret;
+ }
- return chan;
+ return offset;
}
-static int io_channel_send(GIOChannel *fd, const void *buf, size_t len)
-{
- size_t offset = 0;
- GIOStatus status = G_IO_STATUS_NORMAL;
- while (offset < len && status == G_IO_STATUS_NORMAL) {
- gsize bytes_written = 0;
-
- status = g_io_channel_write_chars(fd, buf + offset, len - offset,
- &bytes_written, NULL);
- offset += bytes_written;
- }
-
- if (offset > 0) {
- return offset;
- }
- switch (status) {
- case G_IO_STATUS_NORMAL:
- g_assert(len == 0);
- return 0;
- case G_IO_STATUS_AGAIN:
- errno = EAGAIN;
- return -1;
- default:
- break;
- }
- errno = EINVAL;
- return -1;
+#ifndef _WIN32
+static int io_channel_send(QIOChannel *ioc, const void *buf, size_t len)
+{
+ return io_channel_send_full(ioc, buf, len, NULL, 0);
}
-#ifndef _WIN32
typedef struct FDCharDriver {
CharDriverState *chr;
- GIOChannel *fd_in, *fd_out;
+ QIOChannel *ioc_in, *ioc_out;
int max_size;
} FDCharDriver;
@@ -966,17 +938,16 @@ static int fd_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
{
FDCharDriver *s = chr->opaque;
- return io_channel_send(s->fd_out, buf, len);
+ return io_channel_send(s->ioc_out, buf, len);
}
-static gboolean fd_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
+static gboolean fd_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
{
CharDriverState *chr = opaque;
FDCharDriver *s = chr->opaque;
int len;
uint8_t buf[READ_BUF_LEN];
- GIOStatus status;
- gsize bytes_read;
+ ssize_t ret;
len = sizeof(buf);
if (len > s->max_size) {
@@ -986,15 +957,15 @@ static gboolean fd_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
return TRUE;
}
- status = g_io_channel_read_chars(chan, (gchar *)buf,
- len, &bytes_read, NULL);
- if (status == G_IO_STATUS_EOF) {
+ ret = qio_channel_read(
+ chan, (gchar *)buf, len, NULL);
+ if (ret == 0) {
remove_fd_in_watch(chr);
qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
return FALSE;
}
- if (status == G_IO_STATUS_NORMAL) {
- qemu_chr_be_write(chr, buf, bytes_read);
+ if (ret > 0) {
+ qemu_chr_be_write(chr, buf, ret);
}
return TRUE;
@@ -1012,7 +983,7 @@ static int fd_chr_read_poll(void *opaque)
static GSource *fd_chr_add_watch(CharDriverState *chr, GIOCondition cond)
{
FDCharDriver *s = chr->opaque;
- return g_io_create_watch(s->fd_out, cond);
+ return qio_channel_create_watch(s->ioc_out, cond);
}
static void fd_chr_update_read_handler(CharDriverState *chr)
@@ -1020,8 +991,9 @@ static void fd_chr_update_read_handler(CharDriverState *chr)
FDCharDriver *s = chr->opaque;
remove_fd_in_watch(chr);
- if (s->fd_in) {
- chr->fd_in_tag = io_add_watch_poll(s->fd_in, fd_chr_read_poll,
+ if (s->ioc_in) {
+ chr->fd_in_tag = io_add_watch_poll(s->ioc_in,
+ fd_chr_read_poll,
fd_chr_read, chr);
}
}
@@ -1031,11 +1003,11 @@ static void fd_chr_close(struct CharDriverState *chr)
FDCharDriver *s = chr->opaque;
remove_fd_in_watch(chr);
- if (s->fd_in) {
- g_io_channel_unref(s->fd_in);
+ if (s->ioc_in) {
+ object_unref(OBJECT(s->ioc_in));
}
- if (s->fd_out) {
- g_io_channel_unref(s->fd_out);
+ if (s->ioc_out) {
+ object_unref(OBJECT(s->ioc_out));
}
g_free(s);
@@ -1054,8 +1026,8 @@ static CharDriverState *qemu_chr_open_fd(int fd_in, int fd_out,
return NULL;
}
s = g_new0(FDCharDriver, 1);
- s->fd_in = io_channel_from_fd(fd_in);
- s->fd_out = io_channel_from_fd(fd_out);
+ s->ioc_in = QIO_CHANNEL(qio_channel_file_new_fd(fd_in));
+ s->ioc_out = QIO_CHANNEL(qio_channel_file_new_fd(fd_out));
qemu_set_nonblock(fd_out);
s->chr = chr;
chr->opaque = s;
@@ -1074,15 +1046,18 @@ static CharDriverState *qemu_chr_open_pipe(const char *id,
{
ChardevHostdev *opts = backend->u.pipe;
int fd_in, fd_out;
- char filename_in[CHR_MAX_FILENAME_SIZE];
- char filename_out[CHR_MAX_FILENAME_SIZE];
+ char *filename_in;
+ char *filename_out;
const char *filename = opts->device;
ChardevCommon *common = qapi_ChardevHostdev_base(backend->u.pipe);
- snprintf(filename_in, CHR_MAX_FILENAME_SIZE, "%s.in", filename);
- snprintf(filename_out, CHR_MAX_FILENAME_SIZE, "%s.out", filename);
+
+ filename_in = g_strdup_printf("%s.in", filename);
+ filename_out = g_strdup_printf("%s.out", filename);
TFR(fd_in = qemu_open(filename_in, O_RDWR | O_BINARY));
TFR(fd_out = qemu_open(filename_out, O_RDWR | O_BINARY));
+ g_free(filename_in);
+ g_free(filename_out);
if (fd_in < 0 || fd_out < 0) {
if (fd_in >= 0)
close(fd_in);
@@ -1195,7 +1170,7 @@ static CharDriverState *qemu_chr_open_stdio(const char *id,
#define HAVE_CHARDEV_PTY 1
typedef struct {
- GIOChannel *fd;
+ QIOChannel *ioc;
int read_bytes;
/* Protected by the CharDriverState chr_write_lock. */
@@ -1246,8 +1221,9 @@ static void pty_chr_update_read_handler_locked(CharDriverState *chr)
PtyCharDriver *s = chr->opaque;
GPollFD pfd;
int rc;
+ QIOChannelFile *fioc = QIO_CHANNEL_FILE(s->ioc);
- pfd.fd = g_io_channel_unix_get_fd(s->fd);
+ pfd.fd = fioc->fd;
pfd.events = G_IO_OUT;
pfd.revents = 0;
do {
@@ -1281,7 +1257,7 @@ static int pty_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
return 0;
}
}
- return io_channel_send(s->fd, buf, len);
+ return io_channel_send(s->ioc, buf, len);
}
static GSource *pty_chr_add_watch(CharDriverState *chr, GIOCondition cond)
@@ -1290,7 +1266,7 @@ static GSource *pty_chr_add_watch(CharDriverState *chr, GIOCondition cond)
if (!s->connected) {
return NULL;
}
- return g_io_create_watch(s->fd, cond);
+ return qio_channel_create_watch(s->ioc, cond);
}
static int pty_chr_read_poll(void *opaque)
@@ -1302,13 +1278,13 @@ static int pty_chr_read_poll(void *opaque)
return s->read_bytes;
}
-static gboolean pty_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
+static gboolean pty_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
{
CharDriverState *chr = opaque;
PtyCharDriver *s = chr->opaque;
- gsize size, len;
+ gsize len;
uint8_t buf[READ_BUF_LEN];
- GIOStatus status;
+ ssize_t ret;
len = sizeof(buf);
if (len > s->read_bytes)
@@ -1316,13 +1292,13 @@ static gboolean pty_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
if (len == 0) {
return TRUE;
}
- status = g_io_channel_read_chars(s->fd, (gchar *)buf, len, &size, NULL);
- if (status != G_IO_STATUS_NORMAL) {
+ ret = qio_channel_read(s->ioc, (char *)buf, len, NULL);
+ if (ret <= 0) {
pty_chr_state(chr, 0);
return FALSE;
} else {
pty_chr_state(chr, 1);
- qemu_chr_be_write(chr, buf, size);
+ qemu_chr_be_write(chr, buf, ret);
}
return TRUE;
}
@@ -1364,7 +1340,8 @@ static void pty_chr_state(CharDriverState *chr, int connected)
s->open_tag = g_idle_add(qemu_chr_be_generic_open_func, chr);
}
if (!chr->fd_in_tag) {
- chr->fd_in_tag = io_add_watch_poll(s->fd, pty_chr_read_poll,
+ chr->fd_in_tag = io_add_watch_poll(s->ioc,
+ pty_chr_read_poll,
pty_chr_read, chr);
}
}
@@ -1373,13 +1350,10 @@ static void pty_chr_state(CharDriverState *chr, int connected)
static void pty_chr_close(struct CharDriverState *chr)
{
PtyCharDriver *s = chr->opaque;
- int fd;
qemu_mutex_lock(&chr->chr_write_lock);
pty_chr_state(chr, 0);
- fd = g_io_channel_unix_get_fd(s->fd);
- g_io_channel_unref(s->fd);
- close(fd);
+ object_unref(OBJECT(s->ioc));
if (s->timer_tag) {
g_source_remove(s->timer_tag);
s->timer_tag = 0;
@@ -1430,7 +1404,7 @@ static CharDriverState *qemu_chr_open_pty(const char *id,
chr->chr_add_watch = pty_chr_add_watch;
chr->explicit_be_open = true;
- s->fd = io_channel_from_fd(master_fd);
+ s->ioc = QIO_CHANNEL(qio_channel_file_new_fd(master_fd));
s->timer_tag = 0;
return chr;
@@ -1554,12 +1528,13 @@ static void tty_serial_init(int fd, int speed,
static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg)
{
FDCharDriver *s = chr->opaque;
+ QIOChannelFile *fioc = QIO_CHANNEL_FILE(s->ioc_in);
switch(cmd) {
case CHR_IOCTL_SERIAL_SET_PARAMS:
{
QEMUSerialSetParams *ssp = arg;
- tty_serial_init(g_io_channel_unix_get_fd(s->fd_in),
+ tty_serial_init(fioc->fd,
ssp->speed, ssp->parity,
ssp->data_bits, ssp->stop_bits);
}
@@ -1568,7 +1543,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg)
{
int enable = *(int *)arg;
if (enable) {
- tcsendbreak(g_io_channel_unix_get_fd(s->fd_in), 1);
+ tcsendbreak(fioc->fd, 1);
}
}
break;
@@ -1576,7 +1551,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg)
{
int sarg = 0;
int *targ = (int *)arg;
- ioctl(g_io_channel_unix_get_fd(s->fd_in), TIOCMGET, &sarg);
+ ioctl(fioc->fd, TIOCMGET, &sarg);
*targ = 0;
if (sarg & TIOCM_CTS)
*targ |= CHR_TIOCM_CTS;
@@ -1596,7 +1571,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg)
{
int sarg = *(int *)arg;
int targ = 0;
- ioctl(g_io_channel_unix_get_fd(s->fd_in), TIOCMGET, &targ);
+ ioctl(fioc->fd, TIOCMGET, &targ);
targ &= ~(CHR_TIOCM_CTS | CHR_TIOCM_CAR | CHR_TIOCM_DSR
| CHR_TIOCM_RI | CHR_TIOCM_DTR | CHR_TIOCM_RTS);
if (sarg & CHR_TIOCM_CTS)
@@ -1611,7 +1586,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg)
targ |= TIOCM_DTR;
if (sarg & CHR_TIOCM_RTS)
targ |= TIOCM_RTS;
- ioctl(g_io_channel_unix_get_fd(s->fd_in), TIOCMSET, &targ);
+ ioctl(fioc->fd, TIOCMSET, &targ);
}
break;
default:
@@ -1622,18 +1597,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg)
static void qemu_chr_close_tty(CharDriverState *chr)
{
- FDCharDriver *s = chr->opaque;
- int fd = -1;
-
- if (s) {
- fd = g_io_channel_unix_get_fd(s->fd_in);
- }
-
fd_chr_close(chr);
-
- if (fd >= 0) {
- close(fd);
- }
}
static CharDriverState *qemu_chr_open_tty_fd(int fd,
@@ -1776,18 +1740,19 @@ static CharDriverState *qemu_chr_open_pp_fd(int fd,
return NULL;
}
- drv = g_new0(ParallelCharDriver, 1);
- drv->fd = fd;
- drv->mode = IEEE1284_MODE_COMPAT;
-
chr = qemu_chr_alloc(backend, errp);
if (!chr) {
return NULL;
}
+
+ drv = g_new0(ParallelCharDriver, 1);
+ chr->opaque = drv;
chr->chr_write = null_chr_write;
chr->chr_ioctl = pp_ioctl;
chr->chr_close = pp_close;
- chr->opaque = drv;
+
+ drv->fd = fd;
+ drv->mode = IEEE1284_MODE_COMPAT;
return chr;
}
@@ -2115,7 +2080,7 @@ static int win_chr_pipe_init(CharDriverState *chr, const char *filename,
OVERLAPPED ov;
int ret;
DWORD size;
- char openname[CHR_MAX_FILENAME_SIZE];
+ char *openname;
s->fpipe = TRUE;
@@ -2130,11 +2095,12 @@ static int win_chr_pipe_init(CharDriverState *chr, const char *filename,
goto fail;
}
- snprintf(openname, sizeof(openname), "\\\\.\\pipe\\%s", filename);
+ openname = g_strdup_printf("\\\\.\\pipe\\%s", filename);
s->hcom = CreateNamedPipe(openname, PIPE_ACCESS_DUPLEX | FILE_FLAG_OVERLAPPED,
PIPE_TYPE_BYTE | PIPE_READMODE_BYTE |
PIPE_WAIT,
MAXCONNECT, NSENDBUF, NRECVBUF, NTIMEOUT, NULL);
+ g_free(openname);
if (s->hcom == INVALID_HANDLE_VALUE) {
error_setg(errp, "Failed CreateNamedPipe (%lu)", GetLastError());
s->hcom = NULL;
@@ -2454,8 +2420,7 @@ err1:
/* UDP Net console */
typedef struct {
- int fd;
- GIOChannel *chan;
+ QIOChannel *ioc;
uint8_t buf[READ_BUF_LEN];
int bufcnt;
int bufptr;
@@ -2466,17 +2431,9 @@ typedef struct {
static int udp_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
{
NetCharDriver *s = chr->opaque;
- gsize bytes_written;
- GIOStatus status;
-
- status = g_io_channel_write_chars(s->chan, (const gchar *)buf, len, &bytes_written, NULL);
- if (status == G_IO_STATUS_EOF) {
- return 0;
- } else if (status != G_IO_STATUS_NORMAL) {
- return -1;
- }
- return bytes_written;
+ return qio_channel_write(
+ s->ioc, (const char *)buf, len, NULL);
}
static int udp_chr_read_poll(void *opaque)
@@ -2497,24 +2454,22 @@ static int udp_chr_read_poll(void *opaque)
return s->max_size;
}
-static gboolean udp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
+static gboolean udp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
{
CharDriverState *chr = opaque;
NetCharDriver *s = chr->opaque;
- gsize bytes_read = 0;
- GIOStatus status;
+ ssize_t ret;
if (s->max_size == 0) {
return TRUE;
}
- status = g_io_channel_read_chars(s->chan, (gchar *)s->buf, sizeof(s->buf),
- &bytes_read, NULL);
- s->bufcnt = bytes_read;
- s->bufptr = s->bufcnt;
- if (status != G_IO_STATUS_NORMAL) {
+ ret = qio_channel_read(
+ s->ioc, (char *)s->buf, sizeof(s->buf), NULL);
+ if (ret <= 0) {
remove_fd_in_watch(chr);
return FALSE;
}
+ s->bufcnt = ret;
s->bufptr = 0;
while (s->max_size > 0 && s->bufptr < s->bufcnt) {
@@ -2531,8 +2486,9 @@ static void udp_chr_update_read_handler(CharDriverState *chr)
NetCharDriver *s = chr->opaque;
remove_fd_in_watch(chr);
- if (s->chan) {
- chr->fd_in_tag = io_add_watch_poll(s->chan, udp_chr_read_poll,
+ if (s->ioc) {
+ chr->fd_in_tag = io_add_watch_poll(s->ioc,
+ udp_chr_read_poll,
udp_chr_read, chr);
}
}
@@ -2542,17 +2498,16 @@ static void udp_chr_close(CharDriverState *chr)
NetCharDriver *s = chr->opaque;
remove_fd_in_watch(chr);
- if (s->chan) {
- g_io_channel_unref(s->chan);
- closesocket(s->fd);
+ if (s->ioc) {
+ object_unref(OBJECT(s->ioc));
}
g_free(s);
qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
}
-static CharDriverState *qemu_chr_open_udp_fd(int fd,
- ChardevCommon *backend,
- Error **errp)
+static CharDriverState *qemu_chr_open_udp(QIOChannelSocket *sioc,
+ ChardevCommon *backend,
+ Error **errp)
{
CharDriverState *chr = NULL;
NetCharDriver *s = NULL;
@@ -2563,8 +2518,7 @@ static CharDriverState *qemu_chr_open_udp_fd(int fd,
}
s = g_new0(NetCharDriver, 1);
- s->fd = fd;
- s->chan = io_channel_from_socket(s->fd);
+ s->ioc = QIO_CHANNEL(sioc);
s->bufcnt = 0;
s->bufptr = 0;
chr->opaque = s;
@@ -2580,19 +2534,20 @@ static CharDriverState *qemu_chr_open_udp_fd(int fd,
/* TCP Net console */
typedef struct {
-
- GIOChannel *chan, *listen_chan;
+ QIOChannel *ioc; /* Client I/O channel */
+ QIOChannelSocket *sioc; /* Client master channel */
+ QIOChannelSocket *listen_ioc;
guint listen_tag;
- int fd, listen_fd;
+ QCryptoTLSCreds *tls_creds;
int connected;
int max_size;
int do_telnetopt;
int do_nodelay;
int is_unix;
int *read_msgfds;
- int read_msgfds_num;
+ size_t read_msgfds_num;
int *write_msgfds;
- int write_msgfds_num;
+ size_t write_msgfds_num;
SocketAddress *addr;
bool is_listen;
@@ -2626,68 +2581,27 @@ static void check_report_connect_error(CharDriverState *chr,
qemu_chr_socket_restart_timer(chr);
}
-static gboolean tcp_chr_accept(GIOChannel *chan, GIOCondition cond, void *opaque);
-
-#ifndef _WIN32
-static int unix_send_msgfds(CharDriverState *chr, const uint8_t *buf, int len)
-{
- TCPCharDriver *s = chr->opaque;
- struct msghdr msgh;
- struct iovec iov;
- int r;
-
- size_t fd_size = s->write_msgfds_num * sizeof(int);
- char control[CMSG_SPACE(fd_size)];
- struct cmsghdr *cmsg;
-
- memset(&msgh, 0, sizeof(msgh));
- memset(control, 0, sizeof(control));
-
- /* set the payload */
- iov.iov_base = (uint8_t *) buf;
- iov.iov_len = len;
-
- msgh.msg_iov = &iov;
- msgh.msg_iovlen = 1;
-
- msgh.msg_control = control;
- msgh.msg_controllen = sizeof(control);
-
- cmsg = CMSG_FIRSTHDR(&msgh);
-
- cmsg->cmsg_len = CMSG_LEN(fd_size);
- cmsg->cmsg_level = SOL_SOCKET;
- cmsg->cmsg_type = SCM_RIGHTS;
- memcpy(CMSG_DATA(cmsg), s->write_msgfds, fd_size);
-
- do {
- r = sendmsg(s->fd, &msgh, 0);
- } while (r < 0 && errno == EINTR);
-
- /* free the written msgfds, no matter what */
- if (s->write_msgfds_num) {
- g_free(s->write_msgfds);
- s->write_msgfds = 0;
- s->write_msgfds_num = 0;
- }
-
- return r;
-}
-#endif
+static gboolean tcp_chr_accept(QIOChannel *chan,
+ GIOCondition cond,
+ void *opaque);
/* Called with chr_write_lock held. */
static int tcp_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
{
TCPCharDriver *s = chr->opaque;
if (s->connected) {
-#ifndef _WIN32
- if (s->is_unix && s->write_msgfds_num) {
- return unix_send_msgfds(chr, buf, len);
- } else
-#endif
- {
- return io_channel_send(s->chan, buf, len);
+ int ret = io_channel_send_full(s->ioc, buf, len,
+ s->write_msgfds,
+ s->write_msgfds_num);
+
+ /* free the written msgfds, no matter what */
+ if (s->write_msgfds_num) {
+ g_free(s->write_msgfds);
+ s->write_msgfds = 0;
+ s->write_msgfds_num = 0;
}
+
+ return ret;
} else {
/* XXX: indicate an error ? */
return len;
@@ -2783,6 +2697,10 @@ static int tcp_set_msgfds(CharDriverState *chr, int *fds, int num)
{
TCPCharDriver *s = chr->opaque;
+ if (!qio_channel_has_feature(s->ioc,
+ QIO_CHANNEL_FEATURE_FD_PASS)) {
+ return -1;
+ }
/* clear old pending fd array */
g_free(s->write_msgfds);
@@ -2796,27 +2714,26 @@ static int tcp_set_msgfds(CharDriverState *chr, int *fds, int num)
return 0;
}
-#ifndef _WIN32
-static void unix_process_msgfd(CharDriverState *chr, struct msghdr *msg)
+static ssize_t tcp_chr_recv(CharDriverState *chr, char *buf, size_t len)
{
TCPCharDriver *s = chr->opaque;
- struct cmsghdr *cmsg;
-
- for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
- int fd_size, i;
-
- if (cmsg->cmsg_len < CMSG_LEN(sizeof(int)) ||
- cmsg->cmsg_level != SOL_SOCKET ||
- cmsg->cmsg_type != SCM_RIGHTS) {
- continue;
- }
-
- fd_size = cmsg->cmsg_len - CMSG_LEN(0);
-
- if (!fd_size) {
- continue;
- }
+ struct iovec iov = { .iov_base = buf, .iov_len = len };
+ int ret;
+ size_t i;
+ int *msgfds = NULL;
+ size_t msgfds_num = 0;
+
+ if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
+ ret = qio_channel_readv_full(s->ioc, &iov, 1,
+ &msgfds, &msgfds_num,
+ NULL);
+ } else {
+ ret = qio_channel_readv_full(s->ioc, &iov, 1,
+ NULL, NULL,
+ NULL);
+ }
+ if (msgfds_num) {
/* close and clean read_msgfds */
for (i = 0; i < s->read_msgfds_num; i++) {
close(s->read_msgfds[i]);
@@ -2826,77 +2743,31 @@ static void unix_process_msgfd(CharDriverState *chr, struct msghdr *msg)
g_free(s->read_msgfds);
}
- s->read_msgfds_num = fd_size / sizeof(int);
- s->read_msgfds = g_malloc(fd_size);
- memcpy(s->read_msgfds, CMSG_DATA(cmsg), fd_size);
-
- for (i = 0; i < s->read_msgfds_num; i++) {
- int fd = s->read_msgfds[i];
- if (fd < 0) {
- continue;
- }
-
- /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */
- qemu_set_block(fd);
-
- #ifndef MSG_CMSG_CLOEXEC
- qemu_set_cloexec(fd);
- #endif
- }
+ s->read_msgfds = msgfds;
+ s->read_msgfds_num = msgfds_num;
}
-}
-
-static ssize_t tcp_chr_recv(CharDriverState *chr, char *buf, size_t len)
-{
- TCPCharDriver *s = chr->opaque;
- struct msghdr msg = { NULL, };
- struct iovec iov[1];
- union {
- struct cmsghdr cmsg;
- char control[CMSG_SPACE(sizeof(int) * TCP_MAX_FDS)];
- } msg_control;
- int flags = 0;
- ssize_t ret;
- iov[0].iov_base = buf;
- iov[0].iov_len = len;
+ for (i = 0; i < s->read_msgfds_num; i++) {
+ int fd = s->read_msgfds[i];
+ if (fd < 0) {
+ continue;
+ }
- msg.msg_iov = iov;
- msg.msg_iovlen = 1;
- msg.msg_control = &msg_control;
- msg.msg_controllen = sizeof(msg_control);
+ /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */
+ qemu_set_block(fd);
-#ifdef MSG_CMSG_CLOEXEC
- flags |= MSG_CMSG_CLOEXEC;
+#ifndef MSG_CMSG_CLOEXEC
+ qemu_set_cloexec(fd);
#endif
- do {
- ret = recvmsg(s->fd, &msg, flags);
- } while (ret == -1 && errno == EINTR);
-
- if (ret > 0 && s->is_unix) {
- unix_process_msgfd(chr, &msg);
}
return ret;
}
-#else
-static ssize_t tcp_chr_recv(CharDriverState *chr, char *buf, size_t len)
-{
- TCPCharDriver *s = chr->opaque;
- ssize_t ret;
-
- do {
- ret = qemu_recv(s->fd, buf, len, 0);
- } while (ret == -1 && socket_error() == EINTR);
-
- return ret;
-}
-#endif
static GSource *tcp_chr_add_watch(CharDriverState *chr, GIOCondition cond)
{
TCPCharDriver *s = chr->opaque;
- return g_io_create_watch(s->chan, cond);
+ return qio_channel_create_watch(s->ioc, cond);
}
static void tcp_chr_disconnect(CharDriverState *chr)
@@ -2904,24 +2775,25 @@ static void tcp_chr_disconnect(CharDriverState *chr)
TCPCharDriver *s = chr->opaque;
s->connected = 0;
- if (s->listen_chan) {
- s->listen_tag = g_io_add_watch(s->listen_chan, G_IO_IN,
- tcp_chr_accept, chr);
+ if (s->listen_ioc) {
+ s->listen_tag = qio_channel_add_watch(
+ QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL);
}
remove_fd_in_watch(chr);
- g_io_channel_unref(s->chan);
- s->chan = NULL;
- closesocket(s->fd);
- s->fd = -1;
- SocketAddress_to_str(chr->filename, CHR_MAX_FILENAME_SIZE,
- "disconnected:", s->addr, s->is_listen, s->is_telnet);
+ object_unref(OBJECT(s->sioc));
+ s->sioc = NULL;
+ object_unref(OBJECT(s->ioc));
+ s->ioc = NULL;
+ g_free(chr->filename);
+ chr->filename = SocketAddress_to_str("disconnected:", s->addr,
+ s->is_listen, s->is_telnet);
qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
if (s->reconnect_time) {
qemu_chr_socket_restart_timer(chr);
}
}
-static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
+static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
{
CharDriverState *chr = opaque;
TCPCharDriver *s = chr->opaque;
@@ -2935,9 +2807,7 @@ static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque)
if (len > s->max_size)
len = s->max_size;
size = tcp_chr_recv(chr, (void *)buf, len);
- if (size == 0 ||
- (size < 0 &&
- socket_error() != EAGAIN && socket_error() != EWOULDBLOCK)) {
+ if (size == 0 || size == -1) {
/* connection closed */
tcp_chr_disconnect(chr);
} else if (size > 0) {
@@ -2985,25 +2855,17 @@ static void tcp_chr_connect(void *opaque)
{
CharDriverState *chr = opaque;
TCPCharDriver *s = chr->opaque;
- struct sockaddr_storage ss, ps;
- socklen_t ss_len = sizeof(ss), ps_len = sizeof(ps);
-
- memset(&ss, 0, ss_len);
- if (getsockname(s->fd, (struct sockaddr *) &ss, &ss_len) != 0) {
- snprintf(chr->filename, CHR_MAX_FILENAME_SIZE,
- "Error in getsockname: %s\n", strerror(errno));
- } else if (getpeername(s->fd, (struct sockaddr *) &ps, &ps_len) != 0) {
- snprintf(chr->filename, CHR_MAX_FILENAME_SIZE,
- "Error in getpeername: %s\n", strerror(errno));
- } else {
- sockaddr_to_str(chr->filename, CHR_MAX_FILENAME_SIZE,
- &ss, ss_len, &ps, ps_len,
- s->is_listen, s->is_telnet);
- }
+
+ g_free(chr->filename);
+ chr->filename = sockaddr_to_str(
+ &s->sioc->localAddr, s->sioc->localAddrLen,
+ &s->sioc->remoteAddr, s->sioc->remoteAddrLen,
+ s->is_listen, s->is_telnet);
s->connected = 1;
- if (s->chan) {
- chr->fd_in_tag = io_add_watch_poll(s->chan, tcp_chr_read_poll,
+ if (s->ioc) {
+ chr->fd_in_tag = io_add_watch_poll(s->ioc,
+ tcp_chr_read_poll,
tcp_chr_read, chr);
}
qemu_chr_be_generic_open(chr);
@@ -3014,82 +2876,195 @@ static void tcp_chr_update_read_handler(CharDriverState *chr)
TCPCharDriver *s = chr->opaque;
remove_fd_in_watch(chr);
- if (s->chan) {
- chr->fd_in_tag = io_add_watch_poll(s->chan, tcp_chr_read_poll,
+ if (s->ioc) {
+ chr->fd_in_tag = io_add_watch_poll(s->ioc,
+ tcp_chr_read_poll,
tcp_chr_read, chr);
}
}
-#define IACSET(x,a,b,c) x[0] = a; x[1] = b; x[2] = c;
-static void tcp_chr_telnet_init(int fd)
+typedef struct {
+ CharDriverState *chr;
+ char buf[12];
+ size_t buflen;
+} TCPCharDriverTelnetInit;
+
+static gboolean tcp_chr_telnet_init_io(QIOChannel *ioc,
+ GIOCondition cond G_GNUC_UNUSED,
+ gpointer user_data)
{
- char buf[3];
- /* Send the telnet negotion to put telnet in binary, no echo, single char mode */
- IACSET(buf, 0xff, 0xfb, 0x01); /* IAC WILL ECHO */
- send(fd, (char *)buf, 3, 0);
- IACSET(buf, 0xff, 0xfb, 0x03); /* IAC WILL Suppress go ahead */
- send(fd, (char *)buf, 3, 0);
- IACSET(buf, 0xff, 0xfb, 0x00); /* IAC WILL Binary */
- send(fd, (char *)buf, 3, 0);
- IACSET(buf, 0xff, 0xfd, 0x00); /* IAC DO Binary */
- send(fd, (char *)buf, 3, 0);
+ TCPCharDriverTelnetInit *init = user_data;
+ ssize_t ret;
+
+ ret = qio_channel_write(ioc, init->buf, init->buflen, NULL);
+ if (ret < 0) {
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ ret = 0;
+ } else {
+ tcp_chr_disconnect(init->chr);
+ return FALSE;
+ }
+ }
+ init->buflen -= ret;
+
+ if (init->buflen == 0) {
+ tcp_chr_connect(init->chr);
+ return FALSE;
+ }
+
+ memmove(init->buf, init->buf + ret, init->buflen);
+
+ return TRUE;
}
-static int tcp_chr_add_client(CharDriverState *chr, int fd)
+static void tcp_chr_telnet_init(CharDriverState *chr)
+{
+ TCPCharDriver *s = chr->opaque;
+ TCPCharDriverTelnetInit *init =
+ g_new0(TCPCharDriverTelnetInit, 1);
+ size_t n = 0;
+
+ init->chr = chr;
+ init->buflen = 12;
+
+#define IACSET(x, a, b, c) \
+ do { \
+ x[n++] = a; \
+ x[n++] = b; \
+ x[n++] = c; \
+ } while (0)
+
+ /* Prep the telnet negotion to put telnet in binary,
+ * no echo, single char mode */
+ IACSET(init->buf, 0xff, 0xfb, 0x01); /* IAC WILL ECHO */
+ IACSET(init->buf, 0xff, 0xfb, 0x03); /* IAC WILL Suppress go ahead */
+ IACSET(init->buf, 0xff, 0xfb, 0x00); /* IAC WILL Binary */
+ IACSET(init->buf, 0xff, 0xfd, 0x00); /* IAC DO Binary */
+
+#undef IACSET
+
+ qio_channel_add_watch(
+ s->ioc, G_IO_OUT,
+ tcp_chr_telnet_init_io,
+ init, NULL);
+}
+
+
+static void tcp_chr_tls_handshake(Object *source,
+ Error *err,
+ gpointer user_data)
{
+ CharDriverState *chr = user_data;
TCPCharDriver *s = chr->opaque;
- if (s->fd != -1)
+
+ if (err) {
+ tcp_chr_disconnect(chr);
+ } else {
+ if (s->do_telnetopt) {
+ tcp_chr_telnet_init(chr);
+ } else {
+ tcp_chr_connect(chr);
+ }
+ }
+}
+
+
+static void tcp_chr_tls_init(CharDriverState *chr)
+{
+ TCPCharDriver *s = chr->opaque;
+ QIOChannelTLS *tioc;
+ Error *err = NULL;
+
+ if (s->is_listen) {
+ tioc = qio_channel_tls_new_server(
+ s->ioc, s->tls_creds,
+ NULL, /* XXX Use an ACL */
+ &err);
+ } else {
+ tioc = qio_channel_tls_new_client(
+ s->ioc, s->tls_creds,
+ s->addr->u.inet->host,
+ &err);
+ }
+ if (tioc == NULL) {
+ error_free(err);
+ tcp_chr_disconnect(chr);
+ }
+ object_unref(OBJECT(s->ioc));
+ s->ioc = QIO_CHANNEL(tioc);
+
+ qio_channel_tls_handshake(tioc,
+ tcp_chr_tls_handshake,
+ chr,
+ NULL);
+}
+
+
+static int tcp_chr_new_client(CharDriverState *chr, QIOChannelSocket *sioc)
+{
+ TCPCharDriver *s = chr->opaque;
+ if (s->ioc != NULL) {
return -1;
+ }
- qemu_set_nonblock(fd);
- if (s->do_nodelay)
- socket_set_nodelay(fd);
- s->fd = fd;
- s->chan = io_channel_from_socket(fd);
+ s->ioc = QIO_CHANNEL(sioc);
+ object_ref(OBJECT(sioc));
+ s->sioc = sioc;
+ object_ref(OBJECT(sioc));
+
+ if (s->do_nodelay) {
+ qio_channel_set_delay(s->ioc, false);
+ }
if (s->listen_tag) {
g_source_remove(s->listen_tag);
s->listen_tag = 0;
}
- tcp_chr_connect(chr);
+
+ if (s->tls_creds) {
+ tcp_chr_tls_init(chr);
+ } else {
+ if (s->do_telnetopt) {
+ tcp_chr_telnet_init(chr);
+ } else {
+ tcp_chr_connect(chr);
+ }
+ }
return 0;
}
-static gboolean tcp_chr_accept(GIOChannel *channel, GIOCondition cond, void *opaque)
+
+static int tcp_chr_add_client(CharDriverState *chr, int fd)
+{
+ int ret;
+ QIOChannelSocket *sioc;
+
+ sioc = qio_channel_socket_new_fd(fd, NULL);
+ if (!sioc) {
+ return -1;
+ }
+ qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
+ ret = tcp_chr_new_client(chr, sioc);
+ object_unref(OBJECT(sioc));
+ return ret;
+}
+
+static gboolean tcp_chr_accept(QIOChannel *channel,
+ GIOCondition cond,
+ void *opaque)
{
CharDriverState *chr = opaque;
- TCPCharDriver *s = chr->opaque;
- struct sockaddr_in saddr;
-#ifndef _WIN32
- struct sockaddr_un uaddr;
-#endif
- struct sockaddr *addr;
- socklen_t len;
- int fd;
+ QIOChannelSocket *sioc;
- for(;;) {
-#ifndef _WIN32
- if (s->is_unix) {
- len = sizeof(uaddr);
- addr = (struct sockaddr *)&uaddr;
- } else
-#endif
- {
- len = sizeof(saddr);
- addr = (struct sockaddr *)&saddr;
- }
- fd = qemu_accept(s->listen_fd, addr, &len);
- if (fd < 0 && errno != EINTR) {
- s->listen_tag = 0;
- return FALSE;
- } else if (fd >= 0) {
- if (s->do_telnetopt)
- tcp_chr_telnet_init(fd);
- break;
- }
+ sioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(channel),
+ NULL);
+ if (!sioc) {
+ return TRUE;
}
- if (tcp_chr_add_client(chr, fd) < 0)
- close(fd);
+
+ tcp_chr_new_client(chr, sioc);
+
+ object_unref(OBJECT(sioc));
return TRUE;
}
@@ -3104,22 +3079,16 @@ static void tcp_chr_close(CharDriverState *chr)
s->reconnect_timer = 0;
}
qapi_free_SocketAddress(s->addr);
- if (s->fd >= 0) {
- remove_fd_in_watch(chr);
- if (s->chan) {
- g_io_channel_unref(s->chan);
- }
- closesocket(s->fd);
+ remove_fd_in_watch(chr);
+ if (s->ioc) {
+ object_unref(OBJECT(s->ioc));
}
- if (s->listen_fd >= 0) {
- if (s->listen_tag) {
- g_source_remove(s->listen_tag);
- s->listen_tag = 0;
- }
- if (s->listen_chan) {
- g_io_channel_unref(s->listen_chan);
- }
- closesocket(s->listen_fd);
+ if (s->listen_tag) {
+ g_source_remove(s->listen_tag);
+ s->listen_tag = 0;
+ }
+ if (s->listen_ioc) {
+ object_unref(OBJECT(s->listen_ioc));
}
if (s->read_msgfds_num) {
for (i = 0; i < s->read_msgfds_num; i++) {
@@ -3127,6 +3096,9 @@ static void tcp_chr_close(CharDriverState *chr)
}
g_free(s->read_msgfds);
}
+ if (s->tls_creds) {
+ object_unref(OBJECT(s->tls_creds));
+ }
if (s->write_msgfds_num) {
g_free(s->write_msgfds);
}
@@ -3134,57 +3106,63 @@ static void tcp_chr_close(CharDriverState *chr)
qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
}
-static void qemu_chr_finish_socket_connection(CharDriverState *chr, int fd)
+static void qemu_chr_finish_socket_connection(CharDriverState *chr,
+ QIOChannelSocket *sioc)
{
TCPCharDriver *s = chr->opaque;
if (s->is_listen) {
- s->listen_fd = fd;
- s->listen_chan = io_channel_from_socket(s->listen_fd);
- s->listen_tag = g_io_add_watch(s->listen_chan, G_IO_IN,
- tcp_chr_accept, chr);
+ s->listen_ioc = sioc;
+ s->listen_tag = qio_channel_add_watch(
+ QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL);
} else {
- s->connected = 1;
- s->fd = fd;
- socket_set_nodelay(fd);
- s->chan = io_channel_from_socket(s->fd);
- tcp_chr_connect(chr);
+ tcp_chr_new_client(chr, sioc);
+ object_unref(OBJECT(sioc));
}
}
-static void qemu_chr_socket_connected(int fd, Error *err, void *opaque)
+static void qemu_chr_socket_connected(Object *src, Error *err, void *opaque)
{
+ QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(src);
CharDriverState *chr = opaque;
TCPCharDriver *s = chr->opaque;
- if (fd < 0) {
+ if (err) {
check_report_connect_error(chr, err);
+ object_unref(src);
return;
}
s->connect_err_reported = false;
- qemu_chr_finish_socket_connection(chr, fd);
+ qemu_chr_finish_socket_connection(chr, sioc);
}
static bool qemu_chr_open_socket_fd(CharDriverState *chr, Error **errp)
{
TCPCharDriver *s = chr->opaque;
- int fd;
+ QIOChannelSocket *sioc = qio_channel_socket_new();
if (s->is_listen) {
- fd = socket_listen(s->addr, errp);
+ if (qio_channel_socket_listen_sync(sioc, s->addr, errp) < 0) {
+ goto fail;
+ }
+ qemu_chr_finish_socket_connection(chr, sioc);
} else if (s->reconnect_time) {
- fd = socket_connect(s->addr, errp, qemu_chr_socket_connected, chr);
- return fd >= 0;
+ qio_channel_socket_connect_async(sioc, s->addr,
+ qemu_chr_socket_connected,
+ chr, NULL);
} else {
- fd = socket_connect(s->addr, errp, NULL, NULL);
- }
- if (fd < 0) {
- return false;
+ if (qio_channel_socket_connect_sync(sioc, s->addr, errp) < 0) {
+ goto fail;
+ }
+ qemu_chr_finish_socket_connection(chr, sioc);
}
- qemu_chr_finish_socket_connection(chr, fd);
return true;
+
+ fail:
+ object_unref(OBJECT(sioc));
+ return false;
}
/*********************************************************/
@@ -3651,6 +3629,7 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend,
const char *path = qemu_opt_get(opts, "path");
const char *host = qemu_opt_get(opts, "host");
const char *port = qemu_opt_get(opts, "port");
+ const char *tls_creds = qemu_opt_get(opts, "tls-creds");
SocketAddress *addr;
if (!path) {
@@ -3662,6 +3641,11 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend,
error_setg(errp, "chardev: socket: no port given");
return;
}
+ } else {
+ if (tls_creds) {
+ error_setg(errp, "TLS can only be used over TCP socket");
+ return;
+ }
}
backend->u.socket = g_new0(ChardevSocket, 1);
@@ -3677,6 +3661,7 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend,
backend->u.socket->wait = is_waitconnect;
backend->u.socket->has_reconnect = true;
backend->u.socket->reconnect = reconnect;
+ backend->u.socket->tls_creds = g_strdup(tls_creds);
addr = g_new0(SocketAddress, 1);
if (path) {
@@ -4104,6 +4089,9 @@ QemuOptsList qemu_chardev_opts = {
.name = "telnet",
.type = QEMU_OPT_BOOL,
},{
+ .name = "tls-creds",
+ .type = QEMU_OPT_STRING,
+ },{
.name = "width",
.type = QEMU_OPT_NUMBER,
},{
@@ -4315,12 +4303,43 @@ static CharDriverState *qmp_chardev_open_socket(const char *id,
}
s = g_new0(TCPCharDriver, 1);
- s->fd = -1;
- s->listen_fd = -1;
s->is_unix = addr->type == SOCKET_ADDRESS_KIND_UNIX;
s->is_listen = is_listen;
s->is_telnet = is_telnet;
s->do_nodelay = do_nodelay;
+ if (sock->tls_creds) {
+ Object *creds;
+ creds = object_resolve_path_component(
+ object_get_objects_root(), sock->tls_creds);
+ if (!creds) {
+ error_setg(errp, "No TLS credentials with id '%s'",
+ sock->tls_creds);
+ goto error;
+ }
+ s->tls_creds = (QCryptoTLSCreds *)
+ object_dynamic_cast(creds,
+ TYPE_QCRYPTO_TLS_CREDS);
+ if (!s->tls_creds) {
+ error_setg(errp, "Object with id '%s' is not TLS credentials",
+ sock->tls_creds);
+ goto error;
+ }
+ object_ref(OBJECT(s->tls_creds));
+ if (is_listen) {
+ if (s->tls_creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
+ error_setg(errp, "%s",
+ "Expected TLS credentials for server endpoint");
+ goto error;
+ }
+ } else {
+ if (s->tls_creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
+ error_setg(errp, "%s",
+ "Expected TLS credentials for client endpoint");
+ goto error;
+ }
+ }
+ }
+
qapi_copy_SocketAddress(&s->addr, sock->addr);
chr->opaque = s;
@@ -4335,9 +4354,8 @@ static CharDriverState *qmp_chardev_open_socket(const char *id,
/* be isn't opened until we get a connection */
chr->explicit_be_open = true;
- chr->filename = g_malloc(CHR_MAX_FILENAME_SIZE);
- SocketAddress_to_str(chr->filename, CHR_MAX_FILENAME_SIZE, "disconnected:",
- addr, is_listen, is_telnet);
+ chr->filename = SocketAddress_to_str("disconnected:",
+ addr, is_listen, is_telnet);
if (is_listen) {
if (is_telnet) {
@@ -4350,19 +4368,25 @@ static CharDriverState *qmp_chardev_open_socket(const char *id,
if (s->reconnect_time) {
socket_try_connect(chr);
} else if (!qemu_chr_open_socket_fd(chr, errp)) {
- g_free(s);
- qemu_chr_free_common(chr);
- return NULL;
+ goto error;
}
if (is_listen && is_waitconnect) {
fprintf(stderr, "QEMU waiting for connection on: %s\n",
chr->filename);
- tcp_chr_accept(s->listen_chan, G_IO_IN, chr);
- qemu_set_nonblock(s->listen_fd);
+ tcp_chr_accept(QIO_CHANNEL(s->listen_ioc), G_IO_IN, chr);
+ qio_channel_set_blocking(QIO_CHANNEL(s->listen_ioc), false, NULL);
}
return chr;
+
+ error:
+ if (s->tls_creds) {
+ object_unref(OBJECT(s->tls_creds));
+ }
+ g_free(s);
+ qemu_chr_free_common(chr);
+ return NULL;
}
static CharDriverState *qmp_chardev_open_udp(const char *id,
@@ -4372,13 +4396,15 @@ static CharDriverState *qmp_chardev_open_udp(const char *id,
{
ChardevUdp *udp = backend->u.udp;
ChardevCommon *common = qapi_ChardevUdp_base(backend->u.udp);
- int fd;
+ QIOChannelSocket *sioc = qio_channel_socket_new();
- fd = socket_dgram(udp->remote, udp->local, errp);
- if (fd < 0) {
+ if (qio_channel_socket_dgram_sync(sioc,
+ udp->remote, udp->local,
+ errp) < 0) {
+ object_unref(OBJECT(sioc));
return NULL;
}
- return qemu_chr_open_udp_fd(fd, common, errp);
+ return qemu_chr_open_udp(sioc, common, errp);
}
ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend,
diff --git a/qemu-doc.texi b/qemu-doc.texi
index 7bc388231f..ca4d9de15e 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -259,7 +259,7 @@ Linux should boot and give you a prompt.
@example
@c man begin SYNOPSIS
-usage: qemu-system-i386 [options] [@var{disk_image}]
+@command{qemu-system-i386} [@var{options}] [@var{disk_image}]
@c man end
@end example
@@ -1406,7 +1406,7 @@ no type is given, the HCI logic corresponds to @code{-bt hci,vlan=0}.
This USB device implements the USB Transport Layer of HCI. Example
usage:
@example
-qemu-system-i386 [...OPTIONS...] -usbdevice bt:hci,vlan=3 -bt device:keyboard,vlan=3
+@command{qemu-system-i386} [...@var{OPTIONS}...] @option{-usbdevice} bt:hci,vlan=3 @option{-bt} device:keyboard,vlan=3
@end example
@end table
@@ -2755,7 +2755,7 @@ qemu-i386 /usr/local/qemu-i386/wine/bin/wine \
@subsection Command line options
@example
-usage: qemu-i386 [-h] [-d] [-L path] [-s size] [-cpu model] [-g port] [-B offset] [-R size] program [arguments...]
+@command{qemu-i386} [@option{-h]} [@option{-d]} [@option{-L} @var{path}] [@option{-s} @var{size}] [@option{-cpu} @var{model}] [@option{-g} @var{port}] [@option{-B} @var{offset}] [@option{-R} @var{size}] @var{program} [@var{arguments}...]
@end example
@table @option
@@ -2897,7 +2897,7 @@ qemu-sparc64 /bin/ls
@subsection Command line options
@example
-usage: qemu-sparc64 [-h] [-d] [-L path] [-s size] [-bsd type] program [arguments...]
+@command{qemu-sparc64} [@option{-h]} [@option{-d]} [@option{-L} @var{path}] [@option{-s} @var{size}] [@option{-bsd} @var{type}] @var{program} [@var{arguments}...]
@end example
@table @option
diff --git a/qemu-ga.texi b/qemu-ga.texi
index 536a9b5241..0e53bf6b2c 100644
--- a/qemu-ga.texi
+++ b/qemu-ga.texi
@@ -1,6 +1,6 @@
@example
@c man begin SYNOPSIS
-usage: qemu-ga [OPTIONS]
+@command{qemu-ga} [@var{OPTIONS}]
@c man end
@end example
diff --git a/qemu-img.texi b/qemu-img.texi
index 55c6be391d..7163a108e2 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -1,6 +1,6 @@
@example
@c man begin SYNOPSIS
-usage: qemu-img command [command options]
+@command{qemu-img} @var{command} [@var{command} @var{options}]
@c man end
@end example
diff --git a/qemu-options.hx b/qemu-options.hx
index b4763ba226..f31a240bed 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2092,7 +2092,7 @@ DEF("chardev", HAS_ARG, QEMU_OPTION_chardev,
"-chardev null,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
"-chardev socket,id=id[,host=host],port=port[,to=to][,ipv4][,ipv6][,nodelay][,reconnect=seconds]\n"
" [,server][,nowait][,telnet][,reconnect=seconds][,mux=on|off]\n"
- " [,logfile=PATH][,logappend=on|off] (tcp)\n"
+ " [,logfile=PATH][,logappend=on|off][,tls-creds=ID] (tcp)\n"
"-chardev socket,id=id,path=path[,server][,nowait][,telnet][,reconnect=seconds]\n"
" [,mux=on|off][,logfile=PATH][,logappend=on|off] (unix)\n"
"-chardev udp,id=id[,host=host],port=port[,localaddr=localaddr]\n"
@@ -2172,7 +2172,7 @@ Further options to each backend are described below.
A void device. This device will not emit any data, and will drop any data it
receives. The null backend does not take any options.
-@item -chardev socket ,id=@var{id} [@var{TCP options} or @var{unix options}] [,server] [,nowait] [,telnet] [,reconnect=@var{seconds}]
+@item -chardev socket ,id=@var{id} [@var{TCP options} or @var{unix options}] [,server] [,nowait] [,telnet] [,reconnect=@var{seconds}] [,tls-creds=@var{id}]
Create a two-way stream socket, which can be either a TCP or a unix socket. A
unix socket will be created if @option{path} is specified. Behaviour is
@@ -2190,6 +2190,11 @@ escape sequences.
the remote end goes away. qemu will delay this many seconds and then attempt
to reconnect. Zero disables reconnecting, and is the default.
+@option{tls-creds} requests enablement of the TLS protocol for encryption,
+and specifies the id of the TLS credentials to use for the handshake. The
+credentials must be previously created with the @option{-object tls-creds}
+argument.
+
TCP and unix socket options are given below:
@table @option
diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py
index 08796fff8c..f274bf80fa 100644
--- a/scripts/dump-guest-memory.py
+++ b/scripts/dump-guest-memory.py
@@ -1,39 +1,456 @@
-# This python script adds a new gdb command, "dump-guest-memory". It
-# should be loaded with "source dump-guest-memory.py" at the (gdb)
-# prompt.
-#
-# Copyright (C) 2013, Red Hat, Inc.
-#
-# Authors:
-# Laszlo Ersek <lersek@redhat.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or later. See
-# the COPYING file in the top-level directory.
-#
+"""
+This python script adds a new gdb command, "dump-guest-memory". It
+should be loaded with "source dump-guest-memory.py" at the (gdb)
+prompt.
+
+Copyright (C) 2013, Red Hat, Inc.
+
+Authors:
+ Laszlo Ersek <lersek@redhat.com>
+ Janosch Frank <frankja@linux.vnet.ibm.com>
+
+This work is licensed under the terms of the GNU GPL, version 2 or later. See
+the COPYING file in the top-level directory.
+"""
+
+import ctypes
+
+UINTPTR_T = gdb.lookup_type("uintptr_t")
+
+TARGET_PAGE_SIZE = 0x1000
+TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000
+
+# Special value for e_phnum. This indicates that the real number of
+# program headers is too large to fit into e_phnum. Instead the real
+# value is in the field sh_info of section 0.
+PN_XNUM = 0xFFFF
+
+EV_CURRENT = 1
+
+ELFCLASS32 = 1
+ELFCLASS64 = 2
+
+ELFDATA2LSB = 1
+ELFDATA2MSB = 2
+
+ET_CORE = 4
+
+PT_LOAD = 1
+PT_NOTE = 4
+
+EM_386 = 3
+EM_PPC = 20
+EM_PPC64 = 21
+EM_S390 = 22
+EM_AARCH = 183
+EM_X86_64 = 62
+
+class ELF(object):
+ """Representation of a ELF file."""
+
+ def __init__(self, arch):
+ self.ehdr = None
+ self.notes = []
+ self.segments = []
+ self.notes_size = 0
+ self.endianess = None
+ self.elfclass = ELFCLASS64
+
+ if arch == 'aarch64-le':
+ self.endianess = ELFDATA2LSB
+ self.elfclass = ELFCLASS64
+ self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+ self.ehdr.e_machine = EM_AARCH
+
+ elif arch == 'aarch64-be':
+ self.endianess = ELFDATA2MSB
+ self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+ self.ehdr.e_machine = EM_AARCH
+
+ elif arch == 'X86_64':
+ self.endianess = ELFDATA2LSB
+ self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+ self.ehdr.e_machine = EM_X86_64
+
+ elif arch == '386':
+ self.endianess = ELFDATA2LSB
+ self.elfclass = ELFCLASS32
+ self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+ self.ehdr.e_machine = EM_386
+
+ elif arch == 's390':
+ self.endianess = ELFDATA2MSB
+ self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+ self.ehdr.e_machine = EM_S390
+
+ elif arch == 'ppc64-le':
+ self.endianess = ELFDATA2LSB
+ self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+ self.ehdr.e_machine = EM_PPC64
+
+ elif arch == 'ppc64-be':
+ self.endianess = ELFDATA2MSB
+ self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
+ self.ehdr.e_machine = EM_PPC64
+
+ else:
+ raise gdb.GdbError("No valid arch type specified.\n"
+ "Currently supported types:\n"
+ "aarch64-be, aarch64-le, X86_64, 386, s390, "
+ "ppc64-be, ppc64-le")
+
+ self.add_segment(PT_NOTE, 0, 0)
+
+ def add_note(self, n_name, n_desc, n_type):
+ """Adds a note to the ELF."""
+
+ note = get_arch_note(self.endianess, len(n_name), len(n_desc))
+ note.n_namesz = len(n_name) + 1
+ note.n_descsz = len(n_desc)
+ note.n_name = n_name.encode()
+ note.n_type = n_type
+
+ # Desc needs to be 4 byte aligned (although the 64bit spec
+ # specifies 8 byte). When defining n_desc as uint32 it will be
+ # automatically aligned but we need the memmove to copy the
+ # string into it.
+ ctypes.memmove(note.n_desc, n_desc.encode(), len(n_desc))
+
+ self.notes.append(note)
+ self.segments[0].p_filesz += ctypes.sizeof(note)
+ self.segments[0].p_memsz += ctypes.sizeof(note)
+
+ def add_segment(self, p_type, p_paddr, p_size):
+ """Adds a segment to the elf."""
+
+ phdr = get_arch_phdr(self.endianess, self.elfclass)
+ phdr.p_type = p_type
+ phdr.p_paddr = p_paddr
+ phdr.p_filesz = p_size
+ phdr.p_memsz = p_size
+ self.segments.append(phdr)
+ self.ehdr.e_phnum += 1
+
+ def to_file(self, elf_file):
+ """Writes all ELF structures to the the passed file.
+
+ Structure:
+ Ehdr
+ Segment 0:PT_NOTE
+ Segment 1:PT_LOAD
+ Segment N:PT_LOAD
+ Note 0..N
+ Dump contents
+ """
+ elf_file.write(self.ehdr)
+ off = ctypes.sizeof(self.ehdr) + \
+ len(self.segments) * ctypes.sizeof(self.segments[0])
+
+ for phdr in self.segments:
+ phdr.p_offset = off
+ elf_file.write(phdr)
+ off += phdr.p_filesz
+
+ for note in self.notes:
+ elf_file.write(note)
+
+
+def get_arch_note(endianess, len_name, len_desc):
+ """Returns a Note class with the specified endianess."""
+
+ if endianess == ELFDATA2LSB:
+ superclass = ctypes.LittleEndianStructure
+ else:
+ superclass = ctypes.BigEndianStructure
+
+ len_name = len_name + 1
+
+ class Note(superclass):
+ """Represents an ELF note, includes the content."""
+
+ _fields_ = [("n_namesz", ctypes.c_uint32),
+ ("n_descsz", ctypes.c_uint32),
+ ("n_type", ctypes.c_uint32),
+ ("n_name", ctypes.c_char * len_name),
+ ("n_desc", ctypes.c_uint32 * ((len_desc + 3) // 4))]
+ return Note()
+
+
+class Ident(ctypes.Structure):
+ """Represents the ELF ident array in the ehdr structure."""
+
+ _fields_ = [('ei_mag0', ctypes.c_ubyte),
+ ('ei_mag1', ctypes.c_ubyte),
+ ('ei_mag2', ctypes.c_ubyte),
+ ('ei_mag3', ctypes.c_ubyte),
+ ('ei_class', ctypes.c_ubyte),
+ ('ei_data', ctypes.c_ubyte),
+ ('ei_version', ctypes.c_ubyte),
+ ('ei_osabi', ctypes.c_ubyte),
+ ('ei_abiversion', ctypes.c_ubyte),
+ ('ei_pad', ctypes.c_ubyte * 7)]
+
+ def __init__(self, endianess, elfclass):
+ self.ei_mag0 = 0x7F
+ self.ei_mag1 = ord('E')
+ self.ei_mag2 = ord('L')
+ self.ei_mag3 = ord('F')
+ self.ei_class = elfclass
+ self.ei_data = endianess
+ self.ei_version = EV_CURRENT
+
+
+def get_arch_ehdr(endianess, elfclass):
+ """Returns a EHDR64 class with the specified endianess."""
+
+ if endianess == ELFDATA2LSB:
+ superclass = ctypes.LittleEndianStructure
+ else:
+ superclass = ctypes.BigEndianStructure
+
+ class EHDR64(superclass):
+ """Represents the 64 bit ELF header struct."""
+
+ _fields_ = [('e_ident', Ident),
+ ('e_type', ctypes.c_uint16),
+ ('e_machine', ctypes.c_uint16),
+ ('e_version', ctypes.c_uint32),
+ ('e_entry', ctypes.c_uint64),
+ ('e_phoff', ctypes.c_uint64),
+ ('e_shoff', ctypes.c_uint64),
+ ('e_flags', ctypes.c_uint32),
+ ('e_ehsize', ctypes.c_uint16),
+ ('e_phentsize', ctypes.c_uint16),
+ ('e_phnum', ctypes.c_uint16),
+ ('e_shentsize', ctypes.c_uint16),
+ ('e_shnum', ctypes.c_uint16),
+ ('e_shstrndx', ctypes.c_uint16)]
+
+ def __init__(self):
+ super(superclass, self).__init__()
+ self.e_ident = Ident(endianess, elfclass)
+ self.e_type = ET_CORE
+ self.e_version = EV_CURRENT
+ self.e_ehsize = ctypes.sizeof(self)
+ self.e_phoff = ctypes.sizeof(self)
+ self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass))
+ self.e_phnum = 0
+
+
+ class EHDR32(superclass):
+ """Represents the 32 bit ELF header struct."""
+
+ _fields_ = [('e_ident', Ident),
+ ('e_type', ctypes.c_uint16),
+ ('e_machine', ctypes.c_uint16),
+ ('e_version', ctypes.c_uint32),
+ ('e_entry', ctypes.c_uint32),
+ ('e_phoff', ctypes.c_uint32),
+ ('e_shoff', ctypes.c_uint32),
+ ('e_flags', ctypes.c_uint32),
+ ('e_ehsize', ctypes.c_uint16),
+ ('e_phentsize', ctypes.c_uint16),
+ ('e_phnum', ctypes.c_uint16),
+ ('e_shentsize', ctypes.c_uint16),
+ ('e_shnum', ctypes.c_uint16),
+ ('e_shstrndx', ctypes.c_uint16)]
+
+ def __init__(self):
+ super(superclass, self).__init__()
+ self.e_ident = Ident(endianess, elfclass)
+ self.e_type = ET_CORE
+ self.e_version = EV_CURRENT
+ self.e_ehsize = ctypes.sizeof(self)
+ self.e_phoff = ctypes.sizeof(self)
+ self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass))
+ self.e_phnum = 0
+
+ # End get_arch_ehdr
+ if elfclass == ELFCLASS64:
+ return EHDR64()
+ else:
+ return EHDR32()
+
+
+def get_arch_phdr(endianess, elfclass):
+ """Returns a 32 or 64 bit PHDR class with the specified endianess."""
+
+ if endianess == ELFDATA2LSB:
+ superclass = ctypes.LittleEndianStructure
+ else:
+ superclass = ctypes.BigEndianStructure
+
+ class PHDR64(superclass):
+ """Represents the 64 bit ELF program header struct."""
+
+ _fields_ = [('p_type', ctypes.c_uint32),
+ ('p_flags', ctypes.c_uint32),
+ ('p_offset', ctypes.c_uint64),
+ ('p_vaddr', ctypes.c_uint64),
+ ('p_paddr', ctypes.c_uint64),
+ ('p_filesz', ctypes.c_uint64),
+ ('p_memsz', ctypes.c_uint64),
+ ('p_align', ctypes.c_uint64)]
+
+ class PHDR32(superclass):
+ """Represents the 32 bit ELF program header struct."""
+
+ _fields_ = [('p_type', ctypes.c_uint32),
+ ('p_offset', ctypes.c_uint32),
+ ('p_vaddr', ctypes.c_uint32),
+ ('p_paddr', ctypes.c_uint32),
+ ('p_filesz', ctypes.c_uint32),
+ ('p_memsz', ctypes.c_uint32),
+ ('p_flags', ctypes.c_uint32),
+ ('p_align', ctypes.c_uint32)]
+
+ # End get_arch_phdr
+ if elfclass == ELFCLASS64:
+ return PHDR64()
+ else:
+ return PHDR32()
+
+
+def int128_get64(val):
+ """Returns low 64bit part of Int128 struct."""
+
+ assert val["hi"] == 0
+ return val["lo"]
+
+
+def qlist_foreach(head, field_str):
+ """Generator for qlists."""
+
+ var_p = head["lh_first"]
+ while var_p != 0:
+ var = var_p.dereference()
+ var_p = var[field_str]["le_next"]
+ yield var
+
+
+def qemu_get_ram_block(ram_addr):
+ """Returns the RAMBlock struct to which the given address belongs."""
+
+ ram_blocks = gdb.parse_and_eval("ram_list.blocks")
+
+ for block in qlist_foreach(ram_blocks, "next"):
+ if (ram_addr - block["offset"]) < block["used_length"]:
+ return block
+
+ raise gdb.GdbError("Bad ram offset %x" % ram_addr)
+
+
+def qemu_get_ram_ptr(ram_addr):
+ """Returns qemu vaddr for given guest physical address."""
+
+ block = qemu_get_ram_block(ram_addr)
+ return block["host"] + (ram_addr - block["offset"])
+
+
+def memory_region_get_ram_ptr(memory_region):
+ if memory_region["alias"] != 0:
+ return (memory_region_get_ram_ptr(memory_region["alias"].dereference())
+ + memory_region["alias_offset"])
+
+ return qemu_get_ram_ptr(memory_region["ram_addr"] & TARGET_PAGE_MASK)
+
+
+def get_guest_phys_blocks():
+ """Returns a list of ram blocks.
+
+ Each block entry contains:
+ 'target_start': guest block phys start address
+ 'target_end': guest block phys end address
+ 'host_addr': qemu vaddr of the block's start
+ """
+
+ guest_phys_blocks = []
+
+ print("guest RAM blocks:")
+ print("target_start target_end host_addr message "
+ "count")
+ print("---------------- ---------------- ---------------- ------- "
+ "-----")
+
+ current_map_p = gdb.parse_and_eval("address_space_memory.current_map")
+ current_map = current_map_p.dereference()
+
+ # Conversion to int is needed for python 3
+ # compatibility. Otherwise range doesn't cast the value itself and
+ # breaks.
+ for cur in range(int(current_map["nr"])):
+ flat_range = (current_map["ranges"] + cur).dereference()
+ memory_region = flat_range["mr"].dereference()
+
+ # we only care about RAM
+ if not memory_region["ram"]:
+ continue
+
+ section_size = int128_get64(flat_range["addr"]["size"])
+ target_start = int128_get64(flat_range["addr"]["start"])
+ target_end = target_start + section_size
+ host_addr = (memory_region_get_ram_ptr(memory_region)
+ + flat_range["offset_in_region"])
+ predecessor = None
+
+ # find continuity in guest physical address space
+ if len(guest_phys_blocks) > 0:
+ predecessor = guest_phys_blocks[-1]
+ predecessor_size = (predecessor["target_end"] -
+ predecessor["target_start"])
+
+ # the memory API guarantees monotonically increasing
+ # traversal
+ assert predecessor["target_end"] <= target_start
+
+ # we want continuity in both guest-physical and
+ # host-virtual memory
+ if (predecessor["target_end"] < target_start or
+ predecessor["host_addr"] + predecessor_size != host_addr):
+ predecessor = None
+
+ if predecessor is None:
+ # isolated mapping, add it to the list
+ guest_phys_blocks.append({"target_start": target_start,
+ "target_end": target_end,
+ "host_addr": host_addr})
+ message = "added"
+ else:
+ # expand predecessor until @target_end; predecessor's
+ # start doesn't change
+ predecessor["target_end"] = target_end
+ message = "joined"
+
+ print("%016x %016x %016x %-7s %5u" %
+ (target_start, target_end, host_addr.cast(UINTPTR_T),
+ message, len(guest_phys_blocks)))
+
+ return guest_phys_blocks
+
+
# The leading docstring doesn't have idiomatic Python formatting. It is
# printed by gdb's "help" command (the first line is printed in the
# "help data" summary), and it should match how other help texts look in
# gdb.
-
-import struct
-
class DumpGuestMemory(gdb.Command):
"""Extract guest vmcore from qemu process coredump.
-The sole argument is FILE, identifying the target file to write the
-guest vmcore to.
+The two required arguments are FILE and ARCH:
+FILE identifies the target file to write the guest vmcore to.
+ARCH specifies the architecture for which the core will be generated.
This GDB command reimplements the dump-guest-memory QMP command in
python, using the representation of guest memory as captured in the qemu
coredump. The qemu process that has been dumped must have had the
-command line option "-machine dump-guest-core=on".
+command line option "-machine dump-guest-core=on" which is the default.
For simplicity, the "paging", "begin" and "end" parameters of the QMP
command are not supported -- no attempt is made to get the guest's
internal paging structures (ie. paging=false is hard-wired), and guest
memory is always fully dumped.
-Only x86_64 guests are supported.
+Currently aarch64-be, aarch64-le, X86_64, 386, s390, ppc64-be,
+ppc64-le guests are supported.
The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are
not written to the vmcore. Preparing these would require context that is
@@ -47,293 +464,66 @@ deliberately called abort(), or it was dumped in response to a signal at
a halfway fortunate point, then its coredump should be in reasonable
shape and this command should mostly work."""
- TARGET_PAGE_SIZE = 0x1000
- TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000
-
- # Various ELF constants
- EM_X86_64 = 62 # AMD x86-64 target machine
- ELFDATA2LSB = 1 # little endian
- ELFCLASS64 = 2
- ELFMAG = "\x7FELF"
- EV_CURRENT = 1
- ET_CORE = 4
- PT_LOAD = 1
- PT_NOTE = 4
-
- # Special value for e_phnum. This indicates that the real number of
- # program headers is too large to fit into e_phnum. Instead the real
- # value is in the field sh_info of section 0.
- PN_XNUM = 0xFFFF
-
- # Format strings for packing and header size calculation.
- ELF64_EHDR = ("4s" # e_ident/magic
- "B" # e_ident/class
- "B" # e_ident/data
- "B" # e_ident/version
- "B" # e_ident/osabi
- "8s" # e_ident/pad
- "H" # e_type
- "H" # e_machine
- "I" # e_version
- "Q" # e_entry
- "Q" # e_phoff
- "Q" # e_shoff
- "I" # e_flags
- "H" # e_ehsize
- "H" # e_phentsize
- "H" # e_phnum
- "H" # e_shentsize
- "H" # e_shnum
- "H" # e_shstrndx
- )
- ELF64_PHDR = ("I" # p_type
- "I" # p_flags
- "Q" # p_offset
- "Q" # p_vaddr
- "Q" # p_paddr
- "Q" # p_filesz
- "Q" # p_memsz
- "Q" # p_align
- )
-
def __init__(self):
super(DumpGuestMemory, self).__init__("dump-guest-memory",
gdb.COMMAND_DATA,
gdb.COMPLETE_FILENAME)
- self.uintptr_t = gdb.lookup_type("uintptr_t")
- self.elf64_ehdr_le = struct.Struct("<%s" % self.ELF64_EHDR)
- self.elf64_phdr_le = struct.Struct("<%s" % self.ELF64_PHDR)
-
- def int128_get64(self, val):
- assert (val["hi"] == 0)
- return val["lo"]
-
- def qlist_foreach(self, head, field_str):
- var_p = head["lh_first"]
- while (var_p != 0):
- var = var_p.dereference()
- yield var
- var_p = var[field_str]["le_next"]
-
- def qemu_get_ram_block(self, ram_addr):
- ram_blocks = gdb.parse_and_eval("ram_list.blocks")
- for block in self.qlist_foreach(ram_blocks, "next"):
- if (ram_addr - block["offset"] < block["used_length"]):
- return block
- raise gdb.GdbError("Bad ram offset %x" % ram_addr)
-
- def qemu_get_ram_ptr(self, ram_addr):
- block = self.qemu_get_ram_block(ram_addr)
- return block["host"] + (ram_addr - block["offset"])
-
- def memory_region_get_ram_ptr(self, mr):
- if (mr["alias"] != 0):
- return (self.memory_region_get_ram_ptr(mr["alias"].dereference()) +
- mr["alias_offset"])
- return self.qemu_get_ram_ptr(mr["ram_addr"] & self.TARGET_PAGE_MASK)
-
- def guest_phys_blocks_init(self):
- self.guest_phys_blocks = []
-
- def guest_phys_blocks_append(self):
- print "guest RAM blocks:"
- print ("target_start target_end host_addr message "
- "count")
- print ("---------------- ---------------- ---------------- ------- "
- "-----")
-
- current_map_p = gdb.parse_and_eval("address_space_memory.current_map")
- current_map = current_map_p.dereference()
- for cur in range(current_map["nr"]):
- flat_range = (current_map["ranges"] + cur).dereference()
- mr = flat_range["mr"].dereference()
-
- # we only care about RAM
- if (not mr["ram"]):
- continue
-
- section_size = self.int128_get64(flat_range["addr"]["size"])
- target_start = self.int128_get64(flat_range["addr"]["start"])
- target_end = target_start + section_size
- host_addr = (self.memory_region_get_ram_ptr(mr) +
- flat_range["offset_in_region"])
- predecessor = None
-
- # find continuity in guest physical address space
- if (len(self.guest_phys_blocks) > 0):
- predecessor = self.guest_phys_blocks[-1]
- predecessor_size = (predecessor["target_end"] -
- predecessor["target_start"])
-
- # the memory API guarantees monotonically increasing
- # traversal
- assert (predecessor["target_end"] <= target_start)
-
- # we want continuity in both guest-physical and
- # host-virtual memory
- if (predecessor["target_end"] < target_start or
- predecessor["host_addr"] + predecessor_size != host_addr):
- predecessor = None
-
- if (predecessor is None):
- # isolated mapping, add it to the list
- self.guest_phys_blocks.append({"target_start": target_start,
- "target_end" : target_end,
- "host_addr" : host_addr})
- message = "added"
- else:
- # expand predecessor until @target_end; predecessor's
- # start doesn't change
- predecessor["target_end"] = target_end
- message = "joined"
-
- print ("%016x %016x %016x %-7s %5u" %
- (target_start, target_end, host_addr.cast(self.uintptr_t),
- message, len(self.guest_phys_blocks)))
-
- def cpu_get_dump_info(self):
- # We can't synchronize the registers with KVM post-mortem, and
- # the bits in (first_x86_cpu->env.hflags) seem to be stale; they
- # may not reflect long mode for example. Hence just assume the
- # most common values. This also means that instruction pointer
- # etc. will be bogus in the dump, but at least the RAM contents
- # should be valid.
- self.dump_info = {"d_machine": self.EM_X86_64,
- "d_endian" : self.ELFDATA2LSB,
- "d_class" : self.ELFCLASS64}
-
- def encode_elf64_ehdr_le(self):
- return self.elf64_ehdr_le.pack(
- self.ELFMAG, # e_ident/magic
- self.dump_info["d_class"], # e_ident/class
- self.dump_info["d_endian"], # e_ident/data
- self.EV_CURRENT, # e_ident/version
- 0, # e_ident/osabi
- "", # e_ident/pad
- self.ET_CORE, # e_type
- self.dump_info["d_machine"], # e_machine
- self.EV_CURRENT, # e_version
- 0, # e_entry
- self.elf64_ehdr_le.size, # e_phoff
- 0, # e_shoff
- 0, # e_flags
- self.elf64_ehdr_le.size, # e_ehsize
- self.elf64_phdr_le.size, # e_phentsize
- self.phdr_num, # e_phnum
- 0, # e_shentsize
- 0, # e_shnum
- 0 # e_shstrndx
- )
-
- def encode_elf64_note_le(self):
- return self.elf64_phdr_le.pack(self.PT_NOTE, # p_type
- 0, # p_flags
- (self.memory_offset -
- len(self.note)), # p_offset
- 0, # p_vaddr
- 0, # p_paddr
- len(self.note), # p_filesz
- len(self.note), # p_memsz
- 0 # p_align
- )
-
- def encode_elf64_load_le(self, offset, start_hwaddr, range_size):
- return self.elf64_phdr_le.pack(self.PT_LOAD, # p_type
- 0, # p_flags
- offset, # p_offset
- 0, # p_vaddr
- start_hwaddr, # p_paddr
- range_size, # p_filesz
- range_size, # p_memsz
- 0 # p_align
- )
-
- def note_init(self, name, desc, type):
- # name must include a trailing NUL
- namesz = (len(name) + 1 + 3) / 4 * 4
- descsz = (len(desc) + 3) / 4 * 4
- fmt = ("<" # little endian
- "I" # n_namesz
- "I" # n_descsz
- "I" # n_type
- "%us" # name
- "%us" # desc
- % (namesz, descsz))
- self.note = struct.pack(fmt,
- len(name) + 1, len(desc), type, name, desc)
-
- def dump_init(self):
- self.guest_phys_blocks_init()
- self.guest_phys_blocks_append()
- self.cpu_get_dump_info()
- # we have no way to retrieve the VCPU status from KVM
- # post-mortem
- self.note_init("NONE", "EMPTY", 0)
-
- # Account for PT_NOTE.
- self.phdr_num = 1
-
- # We should never reach PN_XNUM for paging=false dumps: there's
- # just a handful of discontiguous ranges after merging.
- self.phdr_num += len(self.guest_phys_blocks)
- assert (self.phdr_num < self.PN_XNUM)
-
- # Calculate the ELF file offset where the memory dump commences:
- #
- # ELF header
- # PT_NOTE
- # PT_LOAD: 1
- # PT_LOAD: 2
- # ...
- # PT_LOAD: len(self.guest_phys_blocks)
- # ELF note
- # memory dump
- self.memory_offset = (self.elf64_ehdr_le.size +
- self.elf64_phdr_le.size * self.phdr_num +
- len(self.note))
-
- def dump_begin(self, vmcore):
- vmcore.write(self.encode_elf64_ehdr_le())
- vmcore.write(self.encode_elf64_note_le())
- running = self.memory_offset
+ self.elf = None
+ self.guest_phys_blocks = None
+
+ def dump_init(self, vmcore):
+ """Prepares and writes ELF structures to core file."""
+
+ # Needed to make crash happy, data for more useful notes is
+ # not available in a qemu core.
+ self.elf.add_note("NONE", "EMPTY", 0)
+
+ # We should never reach PN_XNUM for paging=false dumps,
+ # there's just a handful of discontiguous ranges after
+ # merging.
+ # The constant is needed to account for the PT_NOTE segment.
+ phdr_num = len(self.guest_phys_blocks) + 1
+ assert phdr_num < PN_XNUM
+
for block in self.guest_phys_blocks:
- range_size = block["target_end"] - block["target_start"]
- vmcore.write(self.encode_elf64_load_le(running,
- block["target_start"],
- range_size))
- running += range_size
- vmcore.write(self.note)
+ block_size = block["target_end"] - block["target_start"]
+ self.elf.add_segment(PT_LOAD, block["target_start"], block_size)
+
+ self.elf.to_file(vmcore)
def dump_iterate(self, vmcore):
+ """Writes guest core to file."""
+
qemu_core = gdb.inferiors()[0]
for block in self.guest_phys_blocks:
- cur = block["host_addr"]
+ cur = block["host_addr"]
left = block["target_end"] - block["target_start"]
- print ("dumping range at %016x for length %016x" %
- (cur.cast(self.uintptr_t), left))
- while (left > 0):
- chunk_size = min(self.TARGET_PAGE_SIZE, left)
+ print("dumping range at %016x for length %016x" %
+ (cur.cast(UINTPTR_T), left))
+
+ while left > 0:
+ chunk_size = min(TARGET_PAGE_SIZE, left)
chunk = qemu_core.read_memory(cur, chunk_size)
vmcore.write(chunk)
- cur += chunk_size
+ cur += chunk_size
left -= chunk_size
- def create_vmcore(self, filename):
- vmcore = open(filename, "wb")
- self.dump_begin(vmcore)
- self.dump_iterate(vmcore)
- vmcore.close()
-
def invoke(self, args, from_tty):
+ """Handles command invocation from gdb."""
+
# Unwittingly pressing the Enter key after the command should
# not dump the same multi-gig coredump to the same file.
self.dont_repeat()
argv = gdb.string_to_argv(args)
- if (len(argv) != 1):
- raise gdb.GdbError("usage: dump-guest-memory FILE")
+ if len(argv) != 2:
+ raise gdb.GdbError("usage: dump-guest-memory FILE ARCH")
+
+ self.elf = ELF(argv[1])
+ self.guest_phys_blocks = get_guest_phys_blocks()
- self.dump_init()
- self.create_vmcore(argv[0])
+ with open(argv[0], "wb") as vmcore:
+ self.dump_init(vmcore)
+ self.dump_iterate(vmcore)
DumpGuestMemory()
diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat
index 7e5d25612b..d43e8f3e85 100755
--- a/scripts/kvm/kvm_stat
+++ b/scripts/kvm/kvm_stat
@@ -12,285 +12,311 @@
# the COPYING file in the top-level directory.
import curses
-import sys, os, time, optparse, ctypes
-from ctypes import *
-
-class DebugfsProvider(object):
- def __init__(self):
- self.base = '/sys/kernel/debug/kvm'
- self._fields = os.listdir(self.base)
- def fields(self):
- return self._fields
- def select(self, fields):
- self._fields = fields
- def read(self):
- def val(key):
- return int(file(self.base + '/' + key).read())
- return dict([(key, val(key)) for key in self._fields])
-
-vmx_exit_reasons = {
- 0: 'EXCEPTION_NMI',
- 1: 'EXTERNAL_INTERRUPT',
- 2: 'TRIPLE_FAULT',
- 7: 'PENDING_INTERRUPT',
- 8: 'NMI_WINDOW',
- 9: 'TASK_SWITCH',
- 10: 'CPUID',
- 12: 'HLT',
- 14: 'INVLPG',
- 15: 'RDPMC',
- 16: 'RDTSC',
- 18: 'VMCALL',
- 19: 'VMCLEAR',
- 20: 'VMLAUNCH',
- 21: 'VMPTRLD',
- 22: 'VMPTRST',
- 23: 'VMREAD',
- 24: 'VMRESUME',
- 25: 'VMWRITE',
- 26: 'VMOFF',
- 27: 'VMON',
- 28: 'CR_ACCESS',
- 29: 'DR_ACCESS',
- 30: 'IO_INSTRUCTION',
- 31: 'MSR_READ',
- 32: 'MSR_WRITE',
- 33: 'INVALID_STATE',
- 36: 'MWAIT_INSTRUCTION',
- 39: 'MONITOR_INSTRUCTION',
- 40: 'PAUSE_INSTRUCTION',
- 41: 'MCE_DURING_VMENTRY',
- 43: 'TPR_BELOW_THRESHOLD',
- 44: 'APIC_ACCESS',
- 48: 'EPT_VIOLATION',
- 49: 'EPT_MISCONFIG',
- 54: 'WBINVD',
- 55: 'XSETBV',
- 56: 'APIC_WRITE',
- 58: 'INVPCID',
+import sys
+import os
+import time
+import optparse
+import ctypes
+import fcntl
+import resource
+import struct
+import re
+from collections import defaultdict
+
+VMX_EXIT_REASONS = {
+ 'EXCEPTION_NMI': 0,
+ 'EXTERNAL_INTERRUPT': 1,
+ 'TRIPLE_FAULT': 2,
+ 'PENDING_INTERRUPT': 7,
+ 'NMI_WINDOW': 8,
+ 'TASK_SWITCH': 9,
+ 'CPUID': 10,
+ 'HLT': 12,
+ 'INVLPG': 14,
+ 'RDPMC': 15,
+ 'RDTSC': 16,
+ 'VMCALL': 18,
+ 'VMCLEAR': 19,
+ 'VMLAUNCH': 20,
+ 'VMPTRLD': 21,
+ 'VMPTRST': 22,
+ 'VMREAD': 23,
+ 'VMRESUME': 24,
+ 'VMWRITE': 25,
+ 'VMOFF': 26,
+ 'VMON': 27,
+ 'CR_ACCESS': 28,
+ 'DR_ACCESS': 29,
+ 'IO_INSTRUCTION': 30,
+ 'MSR_READ': 31,
+ 'MSR_WRITE': 32,
+ 'INVALID_STATE': 33,
+ 'MWAIT_INSTRUCTION': 36,
+ 'MONITOR_INSTRUCTION': 39,
+ 'PAUSE_INSTRUCTION': 40,
+ 'MCE_DURING_VMENTRY': 41,
+ 'TPR_BELOW_THRESHOLD': 43,
+ 'APIC_ACCESS': 44,
+ 'EPT_VIOLATION': 48,
+ 'EPT_MISCONFIG': 49,
+ 'WBINVD': 54,
+ 'XSETBV': 55,
+ 'APIC_WRITE': 56,
+ 'INVPCID': 58,
}
-svm_exit_reasons = {
- 0x000: 'READ_CR0',
- 0x003: 'READ_CR3',
- 0x004: 'READ_CR4',
- 0x008: 'READ_CR8',
- 0x010: 'WRITE_CR0',
- 0x013: 'WRITE_CR3',
- 0x014: 'WRITE_CR4',
- 0x018: 'WRITE_CR8',
- 0x020: 'READ_DR0',
- 0x021: 'READ_DR1',
- 0x022: 'READ_DR2',
- 0x023: 'READ_DR3',
- 0x024: 'READ_DR4',
- 0x025: 'READ_DR5',
- 0x026: 'READ_DR6',
- 0x027: 'READ_DR7',
- 0x030: 'WRITE_DR0',
- 0x031: 'WRITE_DR1',
- 0x032: 'WRITE_DR2',
- 0x033: 'WRITE_DR3',
- 0x034: 'WRITE_DR4',
- 0x035: 'WRITE_DR5',
- 0x036: 'WRITE_DR6',
- 0x037: 'WRITE_DR7',
- 0x040: 'EXCP_BASE',
- 0x060: 'INTR',
- 0x061: 'NMI',
- 0x062: 'SMI',
- 0x063: 'INIT',
- 0x064: 'VINTR',
- 0x065: 'CR0_SEL_WRITE',
- 0x066: 'IDTR_READ',
- 0x067: 'GDTR_READ',
- 0x068: 'LDTR_READ',
- 0x069: 'TR_READ',
- 0x06a: 'IDTR_WRITE',
- 0x06b: 'GDTR_WRITE',
- 0x06c: 'LDTR_WRITE',
- 0x06d: 'TR_WRITE',
- 0x06e: 'RDTSC',
- 0x06f: 'RDPMC',
- 0x070: 'PUSHF',
- 0x071: 'POPF',
- 0x072: 'CPUID',
- 0x073: 'RSM',
- 0x074: 'IRET',
- 0x075: 'SWINT',
- 0x076: 'INVD',
- 0x077: 'PAUSE',
- 0x078: 'HLT',
- 0x079: 'INVLPG',
- 0x07a: 'INVLPGA',
- 0x07b: 'IOIO',
- 0x07c: 'MSR',
- 0x07d: 'TASK_SWITCH',
- 0x07e: 'FERR_FREEZE',
- 0x07f: 'SHUTDOWN',
- 0x080: 'VMRUN',
- 0x081: 'VMMCALL',
- 0x082: 'VMLOAD',
- 0x083: 'VMSAVE',
- 0x084: 'STGI',
- 0x085: 'CLGI',
- 0x086: 'SKINIT',
- 0x087: 'RDTSCP',
- 0x088: 'ICEBP',
- 0x089: 'WBINVD',
- 0x08a: 'MONITOR',
- 0x08b: 'MWAIT',
- 0x08c: 'MWAIT_COND',
- 0x08d: 'XSETBV',
- 0x400: 'NPF',
+SVM_EXIT_REASONS = {
+ 'READ_CR0': 0x000,
+ 'READ_CR3': 0x003,
+ 'READ_CR4': 0x004,
+ 'READ_CR8': 0x008,
+ 'WRITE_CR0': 0x010,
+ 'WRITE_CR3': 0x013,
+ 'WRITE_CR4': 0x014,
+ 'WRITE_CR8': 0x018,
+ 'READ_DR0': 0x020,
+ 'READ_DR1': 0x021,
+ 'READ_DR2': 0x022,
+ 'READ_DR3': 0x023,
+ 'READ_DR4': 0x024,
+ 'READ_DR5': 0x025,
+ 'READ_DR6': 0x026,
+ 'READ_DR7': 0x027,
+ 'WRITE_DR0': 0x030,
+ 'WRITE_DR1': 0x031,
+ 'WRITE_DR2': 0x032,
+ 'WRITE_DR3': 0x033,
+ 'WRITE_DR4': 0x034,
+ 'WRITE_DR5': 0x035,
+ 'WRITE_DR6': 0x036,
+ 'WRITE_DR7': 0x037,
+ 'EXCP_BASE': 0x040,
+ 'INTR': 0x060,
+ 'NMI': 0x061,
+ 'SMI': 0x062,
+ 'INIT': 0x063,
+ 'VINTR': 0x064,
+ 'CR0_SEL_WRITE': 0x065,
+ 'IDTR_READ': 0x066,
+ 'GDTR_READ': 0x067,
+ 'LDTR_READ': 0x068,
+ 'TR_READ': 0x069,
+ 'IDTR_WRITE': 0x06a,
+ 'GDTR_WRITE': 0x06b,
+ 'LDTR_WRITE': 0x06c,
+ 'TR_WRITE': 0x06d,
+ 'RDTSC': 0x06e,
+ 'RDPMC': 0x06f,
+ 'PUSHF': 0x070,
+ 'POPF': 0x071,
+ 'CPUID': 0x072,
+ 'RSM': 0x073,
+ 'IRET': 0x074,
+ 'SWINT': 0x075,
+ 'INVD': 0x076,
+ 'PAUSE': 0x077,
+ 'HLT': 0x078,
+ 'INVLPG': 0x079,
+ 'INVLPGA': 0x07a,
+ 'IOIO': 0x07b,
+ 'MSR': 0x07c,
+ 'TASK_SWITCH': 0x07d,
+ 'FERR_FREEZE': 0x07e,
+ 'SHUTDOWN': 0x07f,
+ 'VMRUN': 0x080,
+ 'VMMCALL': 0x081,
+ 'VMLOAD': 0x082,
+ 'VMSAVE': 0x083,
+ 'STGI': 0x084,
+ 'CLGI': 0x085,
+ 'SKINIT': 0x086,
+ 'RDTSCP': 0x087,
+ 'ICEBP': 0x088,
+ 'WBINVD': 0x089,
+ 'MONITOR': 0x08a,
+ 'MWAIT': 0x08b,
+ 'MWAIT_COND': 0x08c,
+ 'XSETBV': 0x08d,
+ 'NPF': 0x400,
}
# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
-aarch64_exit_reasons = {
- 0x00: 'UNKNOWN',
- 0x01: 'WFI',
- 0x03: 'CP15_32',
- 0x04: 'CP15_64',
- 0x05: 'CP14_MR',
- 0x06: 'CP14_LS',
- 0x07: 'FP_ASIMD',
- 0x08: 'CP10_ID',
- 0x0C: 'CP14_64',
- 0x0E: 'ILL_ISS',
- 0x11: 'SVC32',
- 0x12: 'HVC32',
- 0x13: 'SMC32',
- 0x15: 'SVC64',
- 0x16: 'HVC64',
- 0x17: 'SMC64',
- 0x18: 'SYS64',
- 0x20: 'IABT',
- 0x21: 'IABT_HYP',
- 0x22: 'PC_ALIGN',
- 0x24: 'DABT',
- 0x25: 'DABT_HYP',
- 0x26: 'SP_ALIGN',
- 0x28: 'FP_EXC32',
- 0x2C: 'FP_EXC64',
- 0x2F: 'SERROR',
- 0x30: 'BREAKPT',
- 0x31: 'BREAKPT_HYP',
- 0x32: 'SOFTSTP',
- 0x33: 'SOFTSTP_HYP',
- 0x34: 'WATCHPT',
- 0x35: 'WATCHPT_HYP',
- 0x38: 'BKPT32',
- 0x3A: 'VECTOR32',
- 0x3C: 'BRK64',
+AARCH64_EXIT_REASONS = {
+ 'UNKNOWN': 0x00,
+ 'WFI': 0x01,
+ 'CP15_32': 0x03,
+ 'CP15_64': 0x04,
+ 'CP14_MR': 0x05,
+ 'CP14_LS': 0x06,
+ 'FP_ASIMD': 0x07,
+ 'CP10_ID': 0x08,
+ 'CP14_64': 0x0C,
+ 'ILL_ISS': 0x0E,
+ 'SVC32': 0x11,
+ 'HVC32': 0x12,
+ 'SMC32': 0x13,
+ 'SVC64': 0x15,
+ 'HVC64': 0x16,
+ 'SMC64': 0x17,
+ 'SYS64': 0x18,
+ 'IABT': 0x20,
+ 'IABT_HYP': 0x21,
+ 'PC_ALIGN': 0x22,
+ 'DABT': 0x24,
+ 'DABT_HYP': 0x25,
+ 'SP_ALIGN': 0x26,
+ 'FP_EXC32': 0x28,
+ 'FP_EXC64': 0x2C,
+ 'SERROR': 0x2F,
+ 'BREAKPT': 0x30,
+ 'BREAKPT_HYP': 0x31,
+ 'SOFTSTP': 0x32,
+ 'SOFTSTP_HYP': 0x33,
+ 'WATCHPT': 0x34,
+ 'WATCHPT_HYP': 0x35,
+ 'BKPT32': 0x38,
+ 'VECTOR32': 0x3A,
+ 'BRK64': 0x3C,
}
# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
-userspace_exit_reasons = {
- 0: 'UNKNOWN',
- 1: 'EXCEPTION',
- 2: 'IO',
- 3: 'HYPERCALL',
- 4: 'DEBUG',
- 5: 'HLT',
- 6: 'MMIO',
- 7: 'IRQ_WINDOW_OPEN',
- 8: 'SHUTDOWN',
- 9: 'FAIL_ENTRY',
- 10: 'INTR',
- 11: 'SET_TPR',
- 12: 'TPR_ACCESS',
- 13: 'S390_SIEIC',
- 14: 'S390_RESET',
- 15: 'DCR',
- 16: 'NMI',
- 17: 'INTERNAL_ERROR',
- 18: 'OSI',
- 19: 'PAPR_HCALL',
- 20: 'S390_UCONTROL',
- 21: 'WATCHDOG',
- 22: 'S390_TSCH',
- 23: 'EPR',
- 24: 'SYSTEM_EVENT',
+USERSPACE_EXIT_REASONS = {
+ 'UNKNOWN': 0,
+ 'EXCEPTION': 1,
+ 'IO': 2,
+ 'HYPERCALL': 3,
+ 'DEBUG': 4,
+ 'HLT': 5,
+ 'MMIO': 6,
+ 'IRQ_WINDOW_OPEN': 7,
+ 'SHUTDOWN': 8,
+ 'FAIL_ENTRY': 9,
+ 'INTR': 10,
+ 'SET_TPR': 11,
+ 'TPR_ACCESS': 12,
+ 'S390_SIEIC': 13,
+ 'S390_RESET': 14,
+ 'DCR': 15,
+ 'NMI': 16,
+ 'INTERNAL_ERROR': 17,
+ 'OSI': 18,
+ 'PAPR_HCALL': 19,
+ 'S390_UCONTROL': 20,
+ 'WATCHDOG': 21,
+ 'S390_TSCH': 22,
+ 'EPR': 23,
+ 'SYSTEM_EVENT': 24,
}
-x86_exit_reasons = {
- 'vmx': vmx_exit_reasons,
- 'svm': svm_exit_reasons,
+IOCTL_NUMBERS = {
+ 'SET_FILTER': 0x40082406,
+ 'ENABLE': 0x00002400,
+ 'DISABLE': 0x00002401,
+ 'RESET': 0x00002403,
}
-sc_perf_evt_open = None
-exit_reasons = None
+class Arch(object):
+ """Class that encapsulates global architecture specific data like
+ syscall and ioctl numbers.
+
+ """
+ @staticmethod
+ def get_arch():
+ machine = os.uname()[4]
+
+ if machine.startswith('ppc'):
+ return ArchPPC()
+ elif machine.startswith('aarch64'):
+ return ArchA64()
+ elif machine.startswith('s390'):
+ return ArchS390()
+ else:
+ # X86_64
+ for line in open('/proc/cpuinfo'):
+ if not line.startswith('flags'):
+ continue
+
+ flags = line.split()
+ if 'vmx' in flags:
+ return ArchX86(VMX_EXIT_REASONS)
+ if 'svm' in flags:
+ return ArchX86(SVM_EXIT_REASONS)
+ return
+
+class ArchX86(Arch):
+ def __init__(self, exit_reasons):
+ self.sc_perf_evt_open = 298
+ self.ioctl_numbers = IOCTL_NUMBERS
+ self.exit_reasons = exit_reasons
+
+class ArchPPC(Arch):
+ def __init__(self):
+ self.sc_perf_evt_open = 319
+ self.ioctl_numbers = IOCTL_NUMBERS
+ self.ioctl_numbers['ENABLE'] = 0x20002400
+ self.ioctl_numbers['DISABLE'] = 0x20002401
-ioctl_numbers = {
- 'SET_FILTER' : 0x40082406,
- 'ENABLE' : 0x00002400,
- 'DISABLE' : 0x00002401,
- 'RESET' : 0x00002403,
-}
+ # PPC comes in 32 and 64 bit and some generated ioctl
+ # numbers depend on the wordsize.
+ char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
+ self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
+
+class ArchA64(Arch):
+ def __init__(self):
+ self.sc_perf_evt_open = 241
+ self.ioctl_numbers = IOCTL_NUMBERS
+ self.exit_reasons = AARCH64_EXIT_REASONS
+
+class ArchS390(Arch):
+ def __init__(self):
+ self.sc_perf_evt_open = 331
+ self.ioctl_numbers = IOCTL_NUMBERS
+ self.exit_reasons = None
+
+ARCH = Arch.get_arch()
+
+
+def walkdir(path):
+ """Returns os.walk() data for specified directory.
+
+ As it is only a wrapper it returns the same 3-tuple of (dirpath,
+ dirnames, filenames).
+ """
+ return next(os.walk(path))
+
+
+def parse_int_list(list_string):
+ """Returns an int list from a string of comma separated integers and
+ integer ranges."""
+ integers = []
+ members = list_string.split(',')
-def x86_init(flag):
- globals().update({
- 'sc_perf_evt_open' : 298,
- 'exit_reasons' : x86_exit_reasons[flag],
- })
-
-def s390_init():
- globals().update({
- 'sc_perf_evt_open' : 331
- })
-
-def ppc_init():
- globals().update({
- 'sc_perf_evt_open' : 319,
- 'ioctl_numbers' : {
- 'SET_FILTER' : 0x80002406 | (ctypes.sizeof(ctypes.c_char_p) << 16),
- 'ENABLE' : 0x20002400,
- 'DISABLE' : 0x20002401,
- }
- })
-
-def aarch64_init():
- globals().update({
- 'sc_perf_evt_open' : 241,
- 'exit_reasons' : aarch64_exit_reasons,
- })
-
-def detect_platform():
- if os.uname()[4].startswith('ppc'):
- ppc_init()
- return
- elif os.uname()[4].startswith('aarch64'):
- aarch64_init()
- return
-
- for line in file('/proc/cpuinfo').readlines():
- if line.startswith('flags'):
- for flag in line.split():
- if flag in x86_exit_reasons:
- x86_init(flag)
- return
- elif line.startswith('vendor_id'):
- for flag in line.split():
- if flag == 'IBM/S390':
- s390_init()
- return
-
-detect_platform()
-
-def invert(d):
- return dict((x[1], x[0]) for x in d.iteritems())
-
-filters = {}
-filters['kvm_userspace_exit'] = ('reason', invert(userspace_exit_reasons))
-if exit_reasons:
- filters['kvm_exit'] = ('exit_reason', invert(exit_reasons))
-
-import struct, array
-
-libc = ctypes.CDLL('libc.so.6')
+ for member in members:
+ if '-' not in member:
+ integers.append(int(member))
+ else:
+ int_range = member.split('-')
+ integers.extend(range(int(int_range[0]),
+ int(int_range[1]) + 1))
+
+ return integers
+
+
+def get_online_cpus():
+ with open('/sys/devices/system/cpu/online') as cpu_list:
+ cpu_string = cpu_list.readline()
+ return parse_int_list(cpu_string)
+
+
+def get_filters():
+ filters = {}
+ filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
+ if ARCH.exit_reasons:
+ filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
+ return filters
+
+libc = ctypes.CDLL('libc.so.6', use_errno=True)
syscall = libc.syscall
-get_errno = libc.__errno_location
-get_errno.restype = POINTER(c_int)
class perf_event_attr(ctypes.Structure):
_fields_ = [('type', ctypes.c_uint32),
@@ -305,262 +331,350 @@ class perf_event_attr(ctypes.Structure):
('bp_addr', ctypes.c_uint64),
('bp_len', ctypes.c_uint64),
]
-def _perf_event_open(attr, pid, cpu, group_fd, flags):
- return syscall(sc_perf_evt_open, ctypes.pointer(attr), ctypes.c_int(pid),
- ctypes.c_int(cpu), ctypes.c_int(group_fd),
- ctypes.c_long(flags))
-
-PERF_TYPE_HARDWARE = 0
-PERF_TYPE_SOFTWARE = 1
-PERF_TYPE_TRACEPOINT = 2
-PERF_TYPE_HW_CACHE = 3
-PERF_TYPE_RAW = 4
-PERF_TYPE_BREAKPOINT = 5
-
-PERF_SAMPLE_IP = 1 << 0
-PERF_SAMPLE_TID = 1 << 1
-PERF_SAMPLE_TIME = 1 << 2
-PERF_SAMPLE_ADDR = 1 << 3
-PERF_SAMPLE_READ = 1 << 4
-PERF_SAMPLE_CALLCHAIN = 1 << 5
-PERF_SAMPLE_ID = 1 << 6
-PERF_SAMPLE_CPU = 1 << 7
-PERF_SAMPLE_PERIOD = 1 << 8
-PERF_SAMPLE_STREAM_ID = 1 << 9
-PERF_SAMPLE_RAW = 1 << 10
-
-PERF_FORMAT_TOTAL_TIME_ENABLED = 1 << 0
-PERF_FORMAT_TOTAL_TIME_RUNNING = 1 << 1
-PERF_FORMAT_ID = 1 << 2
-PERF_FORMAT_GROUP = 1 << 3
-import re
+ def __init__(self):
+ super(self.__class__, self).__init__()
+ self.type = PERF_TYPE_TRACEPOINT
+ self.size = ctypes.sizeof(self)
+ self.read_format = PERF_FORMAT_GROUP
+
+def perf_event_open(attr, pid, cpu, group_fd, flags):
+ return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
+ ctypes.c_int(pid), ctypes.c_int(cpu),
+ ctypes.c_int(group_fd), ctypes.c_long(flags))
-sys_tracing = '/sys/kernel/debug/tracing'
+PERF_TYPE_TRACEPOINT = 2
+PERF_FORMAT_GROUP = 1 << 3
+
+PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
+PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
class Group(object):
- def __init__(self, cpu):
+ def __init__(self):
self.events = []
- self.group_leader = None
- self.cpu = cpu
- def add_event(self, name, event_set, tracepoint, filter = None):
- self.events.append(Event(group = self,
- name = name, event_set = event_set,
- tracepoint = tracepoint, filter = filter))
- if len(self.events) == 1:
- self.file = os.fdopen(self.events[0].fd)
+
+ def add_event(self, event):
+ self.events.append(event)
+
def read(self):
- bytes = 8 * (1 + len(self.events))
- fmt = 'xxxxxxxx' + 'q' * len(self.events)
+ length = 8 * (1 + len(self.events))
+ read_format = 'xxxxxxxx' + 'Q' * len(self.events)
return dict(zip([event.name for event in self.events],
- struct.unpack(fmt, self.file.read(bytes))))
+ struct.unpack(read_format,
+ os.read(self.events[0].fd, length))))
class Event(object):
- def __init__(self, group, name, event_set, tracepoint, filter = None):
+ def __init__(self, name, group, trace_cpu, trace_point, trace_filter,
+ trace_set='kvm'):
self.name = name
- attr = perf_event_attr()
- attr.type = PERF_TYPE_TRACEPOINT
- attr.size = ctypes.sizeof(attr)
- id_path = os.path.join(sys_tracing, 'events', event_set,
- tracepoint, 'id')
- id = int(file(id_path).read())
- attr.config = id
- attr.sample_type = (PERF_SAMPLE_RAW
- | PERF_SAMPLE_TIME
- | PERF_SAMPLE_CPU)
- attr.sample_period = 1
- attr.read_format = PERF_FORMAT_GROUP
+ self.fd = None
+ self.setup_event(group, trace_cpu, trace_point, trace_filter,
+ trace_set)
+
+ def setup_event_attribute(self, trace_set, trace_point):
+ id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
+ trace_point, 'id')
+
+ event_attr = perf_event_attr()
+ event_attr.config = int(open(id_path).read())
+ return event_attr
+
+ def setup_event(self, group, trace_cpu, trace_point, trace_filter,
+ trace_set):
+ event_attr = self.setup_event_attribute(trace_set, trace_point)
+
group_leader = -1
if group.events:
group_leader = group.events[0].fd
- fd = _perf_event_open(attr, -1, group.cpu, group_leader, 0)
+
+ fd = perf_event_open(event_attr, -1, trace_cpu,
+ group_leader, 0)
if fd == -1:
- err = get_errno()[0]
- raise Exception('perf_event_open failed, errno = ' + err.__str__())
- if filter:
- import fcntl
- fcntl.ioctl(fd, ioctl_numbers['SET_FILTER'], filter)
+ err = ctypes.get_errno()
+ raise OSError(err, os.strerror(err),
+ 'while calling sys_perf_event_open().')
+
+ if trace_filter:
+ fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
+ trace_filter)
+
self.fd = fd
+
def enable(self):
- import fcntl
- fcntl.ioctl(self.fd, ioctl_numbers['ENABLE'], 0)
+ fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
+
def disable(self):
- import fcntl
- fcntl.ioctl(self.fd, ioctl_numbers['DISABLE'], 0)
+ fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
+
def reset(self):
- import fcntl
- fcntl.ioctl(self.fd, ioctl_numbers['RESET'], 0)
+ fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
class TracepointProvider(object):
def __init__(self):
- path = os.path.join(sys_tracing, 'events', 'kvm')
- fields = [f
- for f in os.listdir(path)
- if os.path.isdir(os.path.join(path, f))]
+ self.group_leaders = []
+ self.filters = get_filters()
+ self._fields = self.get_available_fields()
+ self.setup_traces()
+ self.fields = self._fields
+
+ def get_available_fields(self):
+ path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
+ fields = walkdir(path)[1]
extra = []
- for f in fields:
- if f in filters:
- subfield, values = filters[f]
- for name, number in values.iteritems():
- extra.append(f + '(' + name + ')')
+ for field in fields:
+ if field in self.filters:
+ filter_name_, filter_dicts = self.filters[field]
+ for name in filter_dicts:
+ extra.append(field + '(' + name + ')')
fields += extra
- self._setup(fields)
- self.select(fields)
- def fields(self):
- return self._fields
+ return fields
+
+ def setup_traces(self):
+ cpus = get_online_cpus()
+
+ # The constant is needed as a buffer for python libs, std
+ # streams and other files that the script opens.
+ newlim = len(cpus) * len(self._fields) + 50
+ try:
+ softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
+
+ if hardlim < newlim:
+ # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
+ resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
+ else:
+ # Raising the soft limit is sufficient.
+ resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
+
+ except ValueError:
+ sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
- def _online_cpus(self):
- l = []
- pattern = r'cpu([0-9]+)'
- basedir = '/sys/devices/system/cpu'
- for entry in os.listdir(basedir):
- match = re.match(pattern, entry)
- if not match:
- continue
- path = os.path.join(basedir, entry, 'online')
- if os.path.exists(path) and open(path).read().strip() != '1':
- continue
- l.append(int(match.group(1)))
- return l
-
- def _setup(self, _fields):
- self._fields = _fields
- cpus = self._online_cpus()
- import resource
- nfiles = len(cpus) * 1000
- resource.setrlimit(resource.RLIMIT_NOFILE, (nfiles, nfiles))
- events = []
- self.group_leaders = []
for cpu in cpus:
- group = Group(cpu)
- for name in _fields:
+ group = Group()
+ for name in self._fields:
tracepoint = name
- filter = None
- m = re.match(r'(.*)\((.*)\)', name)
- if m:
- tracepoint, sub = m.groups()
- filter = '%s==%d\0' % (filters[tracepoint][0],
- filters[tracepoint][1][sub])
- event = group.add_event(name, event_set = 'kvm',
- tracepoint = tracepoint,
- filter = filter)
+ tracefilter = None
+ match = re.match(r'(.*)\((.*)\)', name)
+ if match:
+ tracepoint, sub = match.groups()
+ tracefilter = ('%s==%d\0' %
+ (self.filters[tracepoint][0],
+ self.filters[tracepoint][1][sub]))
+
+ group.add_event(Event(name=name,
+ group=group,
+ trace_cpu=cpu,
+ trace_point=tracepoint,
+ trace_filter=tracefilter))
self.group_leaders.append(group)
- def select(self, fields):
+
+ def available_fields(self):
+ return self.get_available_fields()
+
+ @property
+ def fields(self):
+ return self._fields
+
+ @fields.setter
+ def fields(self, fields):
+ self._fields = fields
for group in self.group_leaders:
- for event in group.events:
+ for index, event in enumerate(group.events):
if event.name in fields:
event.reset()
event.enable()
else:
- event.disable()
+ # Do not disable the group leader.
+ # It would disable all of its events.
+ if index != 0:
+ event.disable()
+
def read(self):
- from collections import defaultdict
ret = defaultdict(int)
for group in self.group_leaders:
for name, val in group.read().iteritems():
- ret[name] += val
+ if name in self._fields:
+ ret[name] += val
return ret
-class Stats:
- def __init__(self, providers, fields = None):
+class DebugfsProvider(object):
+ def __init__(self):
+ self._fields = self.get_available_fields()
+
+ def get_available_fields(self):
+ return walkdir(PATH_DEBUGFS_KVM)[2]
+
+ @property
+ def fields(self):
+ return self._fields
+
+ @fields.setter
+ def fields(self, fields):
+ self._fields = fields
+
+ def read(self):
+ def val(key):
+ return int(file(PATH_DEBUGFS_KVM + '/' + key).read())
+ return dict([(key, val(key)) for key in self._fields])
+
+class Stats(object):
+ def __init__(self, providers, fields=None):
self.providers = providers
- self.fields_filter = fields
- self._update()
- def _update(self):
+ self._fields_filter = fields
+ self.values = {}
+ self.update_provider_filters()
+
+ def update_provider_filters(self):
def wanted(key):
- import re
- if not self.fields_filter:
+ if not self._fields_filter:
return True
- return re.match(self.fields_filter, key) is not None
- self.values = dict()
- for d in providers:
- provider_fields = [key for key in d.fields() if wanted(key)]
- for key in provider_fields:
- self.values[key] = None
- d.select(provider_fields)
- def set_fields_filter(self, fields_filter):
- self.fields_filter = fields_filter
- self._update()
+ return re.match(self._fields_filter, key) is not None
+
+ # As we reset the counters when updating the fields we can
+ # also clear the cache of old values.
+ self.values = {}
+ for provider in self.providers:
+ provider_fields = [key for key in provider.get_available_fields()
+ if wanted(key)]
+ provider.fields = provider_fields
+
+ @property
+ def fields_filter(self):
+ return self._fields_filter
+
+ @fields_filter.setter
+ def fields_filter(self, fields_filter):
+ self._fields_filter = fields_filter
+ self.update_provider_filters()
+
def get(self):
- for d in providers:
- new = d.read()
- for key in d.fields():
+ for provider in self.providers:
+ new = provider.read()
+ for key in provider.fields:
oldval = self.values.get(key, (0, 0))
- newval = new[key]
+ newval = new.get(key, 0)
newdelta = None
if oldval is not None:
newdelta = newval - oldval[0]
self.values[key] = (newval, newdelta)
return self.values
-if not os.access('/sys/kernel/debug', os.F_OK):
- print 'Please enable CONFIG_DEBUG_FS in your kernel'
- sys.exit(1)
-if not os.access('/sys/kernel/debug/kvm', os.F_OK):
- print "Please mount debugfs ('mount -t debugfs debugfs /sys/kernel/debug')"
- print "and ensure the kvm modules are loaded"
- sys.exit(1)
-
-label_width = 40
-number_width = 10
-
-def tui(screen, stats):
- curses.use_default_colors()
- curses.noecho()
- drilldown = False
- fields_filter = stats.fields_filter
- def update_drilldown():
- if not fields_filter:
- if drilldown:
- stats.set_fields_filter(None)
- else:
- stats.set_fields_filter(r'^[^\(]*$')
- update_drilldown()
- def refresh(sleeptime):
- screen.erase()
- screen.addstr(0, 0, 'kvm statistics')
- screen.addstr(2, 1, 'Event')
- screen.addstr(2, 1 + label_width + number_width - len('Total'), 'Total')
- screen.addstr(2, 1 + label_width + number_width + 8 - len('Current'), 'Current')
+LABEL_WIDTH = 40
+NUMBER_WIDTH = 10
+
+class Tui(object):
+ def __init__(self, stats):
+ self.stats = stats
+ self.screen = None
+ self.drilldown = False
+ self.update_drilldown()
+
+ def __enter__(self):
+ """Initialises curses for later use. Based on curses.wrapper
+ implementation from the Python standard library."""
+ self.screen = curses.initscr()
+ curses.noecho()
+ curses.cbreak()
+
+ # The try/catch works around a minor bit of
+ # over-conscientiousness in the curses module, the error
+ # return from C start_color() is ignorable.
+ try:
+ curses.start_color()
+ except:
+ pass
+
+ curses.use_default_colors()
+ return self
+
+ def __exit__(self, *exception):
+ """Resets the terminal to its normal state. Based on curses.wrappre
+ implementation from the Python standard library."""
+ if self.screen:
+ self.screen.keypad(0)
+ curses.echo()
+ curses.nocbreak()
+ curses.endwin()
+
+ def update_drilldown(self):
+ if not self.stats.fields_filter:
+ self.stats.fields_filter = r'^[^\(]*$'
+
+ elif self.stats.fields_filter == r'^[^\(]*$':
+ self.stats.fields_filter = None
+
+ def refresh(self, sleeptime):
+ self.screen.erase()
+ self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
+ self.screen.addstr(2, 1, 'Event')
+ self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH -
+ len('Total'), 'Total')
+ self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 -
+ len('Current'), 'Current')
row = 3
- s = stats.get()
+ stats = self.stats.get()
def sortkey(x):
- if s[x][1]:
- return (-s[x][1], -s[x][0])
+ if stats[x][1]:
+ return (-stats[x][1], -stats[x][0])
else:
- return (0, -s[x][0])
- for key in sorted(s.keys(), key = sortkey):
- if row >= screen.getmaxyx()[0]:
+ return (0, -stats[x][0])
+ for key in sorted(stats.keys(), key=sortkey):
+
+ if row >= self.screen.getmaxyx()[0]:
break
- values = s[key]
+ values = stats[key]
if not values[0] and not values[1]:
break
col = 1
- screen.addstr(row, col, key)
- col += label_width
- screen.addstr(row, col, '%10d' % (values[0],))
- col += number_width
+ self.screen.addstr(row, col, key)
+ col += LABEL_WIDTH
+ self.screen.addstr(row, col, '%10d' % (values[0],))
+ col += NUMBER_WIDTH
if values[1] is not None:
- screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
+ self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
row += 1
- screen.refresh()
+ self.screen.refresh()
+
+ def show_filter_selection(self):
+ while True:
+ self.screen.erase()
+ self.screen.addstr(0, 0,
+ "Show statistics for events matching a regex.",
+ curses.A_BOLD)
+ self.screen.addstr(2, 0,
+ "Current regex: {0}"
+ .format(self.stats.fields_filter))
+ self.screen.addstr(3, 0, "New regex: ")
+ curses.echo()
+ regex = self.screen.getstr()
+ curses.noecho()
+ if len(regex) == 0:
+ return
+ try:
+ re.compile(regex)
+ self.stats.fields_filter = regex
+ return
+ except re.error:
+ continue
- sleeptime = 0.25
- while True:
- refresh(sleeptime)
- curses.halfdelay(int(sleeptime * 10))
- sleeptime = 3
- try:
- c = screen.getkey()
- if c == 'x':
- drilldown = not drilldown
- update_drilldown()
- if c == 'q':
+ def show_stats(self):
+ sleeptime = 0.25
+ while True:
+ self.refresh(sleeptime)
+ curses.halfdelay(int(sleeptime * 10))
+ sleeptime = 3
+ try:
+ char = self.screen.getkey()
+ if char == 'x':
+ self.drilldown = not self.drilldown
+ self.update_drilldown()
+ if char == 'q':
+ break
+ if char == 'f':
+ self.show_filter_selection()
+ except KeyboardInterrupt:
break
- except KeyboardInterrupt:
- break
- except curses.error:
- continue
+ except curses.error:
+ continue
def batch(stats):
s = stats.get()
@@ -568,13 +682,13 @@ def batch(stats):
s = stats.get()
for key in sorted(s.keys()):
values = s[key]
- print '%-22s%10d%10d' % (key, values[0], values[1])
+ print '%-42s%10d%10d' % (key, values[0], values[1])
def log(stats):
keys = sorted(stats.get().iterkeys())
def banner():
for k in keys:
- print '%10s' % k[0:9],
+ print '%s' % k,
print
def statline():
s = stats.get()
@@ -590,57 +704,110 @@ def log(stats):
statline()
line += 1
-options = optparse.OptionParser()
-options.add_option('-1', '--once', '--batch',
- action = 'store_true',
- default = False,
- dest = 'once',
- help = 'run in batch mode for one second',
- )
-options.add_option('-l', '--log',
- action = 'store_true',
- default = False,
- dest = 'log',
- help = 'run in logging mode (like vmstat)',
- )
-options.add_option('-t', '--tracepoints',
- action = 'store_true',
- default = False,
- dest = 'tracepoints',
- help = 'retrieve statistics from tracepoints',
- )
-options.add_option('-d', '--debugfs',
- action = 'store_true',
- default = False,
- dest = 'debugfs',
- help = 'retrieve statistics from debugfs',
- )
-options.add_option('-f', '--fields',
- action = 'store',
- default = None,
- dest = 'fields',
- help = 'fields to display (regex)',
- )
-(options, args) = options.parse_args(sys.argv)
-
-providers = []
-if options.tracepoints:
- providers.append(TracepointProvider())
-if options.debugfs:
- providers.append(DebugfsProvider())
-
-if len(providers) == 0:
- try:
- providers = [TracepointProvider()]
- except:
- providers = [DebugfsProvider()]
-
-stats = Stats(providers, fields = options.fields)
-
-if options.log:
- log(stats)
-elif not options.once:
- import curses.wrapper
- curses.wrapper(tui, stats)
-else:
- batch(stats)
+def get_options():
+ description_text = """
+This script displays various statistics about VMs running under KVM.
+The statistics are gathered from the KVM debugfs entries and / or the
+currently available perf traces.
+
+The monitoring takes additional cpu cycles and might affect the VM's
+performance.
+
+Requirements:
+- Access to:
+ /sys/kernel/debug/kvm
+ /sys/kernel/debug/trace/events/*
+ /proc/pid/task
+- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
+ CAP_SYS_ADMIN and perf events are used.
+- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
+ the large number of files that are possibly opened.
+"""
+
+ class PlainHelpFormatter(optparse.IndentedHelpFormatter):
+ def format_description(self, description):
+ if description:
+ return description + "\n"
+ else:
+ return ""
+
+ optparser = optparse.OptionParser(description=description_text,
+ formatter=PlainHelpFormatter())
+ optparser.add_option('-1', '--once', '--batch',
+ action='store_true',
+ default=False,
+ dest='once',
+ help='run in batch mode for one second',
+ )
+ optparser.add_option('-l', '--log',
+ action='store_true',
+ default=False,
+ dest='log',
+ help='run in logging mode (like vmstat)',
+ )
+ optparser.add_option('-t', '--tracepoints',
+ action='store_true',
+ default=False,
+ dest='tracepoints',
+ help='retrieve statistics from tracepoints',
+ )
+ optparser.add_option('-d', '--debugfs',
+ action='store_true',
+ default=False,
+ dest='debugfs',
+ help='retrieve statistics from debugfs',
+ )
+ optparser.add_option('-f', '--fields',
+ action='store',
+ default=None,
+ dest='fields',
+ help='fields to display (regex)',
+ )
+ (options, _) = optparser.parse_args(sys.argv)
+ return options
+
+def get_providers(options):
+ providers = []
+
+ if options.tracepoints:
+ providers.append(TracepointProvider())
+ if options.debugfs:
+ providers.append(DebugfsProvider())
+ if len(providers) == 0:
+ providers.append(TracepointProvider())
+
+ return providers
+
+def check_access():
+ if not os.path.exists('/sys/kernel/debug'):
+ sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
+ sys.exit(1)
+
+ if not os.path.exists(PATH_DEBUGFS_KVM):
+ sys.stderr.write("Please make sure, that debugfs is mounted and "
+ "readable by the current user:\n"
+ "('mount -t debugfs debugfs /sys/kernel/debug')\n"
+ "Also ensure, that the kvm modules are loaded.\n")
+ sys.exit(1)
+
+ if not os.path.exists(PATH_DEBUGFS_TRACING):
+ sys.stderr.write("Please make {0} readable by the current user.\n"
+ .format(PATH_DEBUGFS_TRACING))
+ sys.exit(1)
+
+def main():
+ check_access()
+ options = get_options()
+ providers = get_providers(options)
+ stats = Stats(providers, fields=options.fields)
+
+ if options.log:
+ log(stats)
+ elif not options.once:
+ with Tui(stats) as tui:
+ tui.show_stats()
+ else:
+ batch(stats)
+
+if __name__ == "__main__":
+ main()
diff --git a/tests/Makefile b/tests/Makefile
index b7352f1a35..650e654ec2 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -558,7 +558,7 @@ tests/usb-hcd-uhci-test$(EXESUF): tests/usb-hcd-uhci-test.o $(libqos-usb-obj-y)
tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-usb-obj-y)
tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y)
tests/pc-cpu-test$(EXESUF): tests/pc-cpu-test.o
-tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y)
+tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y) $(test-io-obj-y)
tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y)
tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y)