diff options
author | Anthony Liguori <aliguori@us.ibm.com> | 2012-09-17 10:20:48 -0500 |
---|---|---|
committer | Anthony Liguori <aliguori@us.ibm.com> | 2012-09-17 10:20:48 -0500 |
commit | de71bd6f77739cc5addb6d70bf29be59aed41722 (patch) | |
tree | 83a53c6fa78028ffbcf4cb9413754d025ad8796b | |
parent | 31e165f1770cd10ee4fabf7ef4c1a48566be3336 (diff) | |
parent | 45a7f54a8bb3928ffa58d522e0d61acaee8277bb (diff) |
Merge remote-tracking branch 'stefanha/net' into staging
* stefanha/net:
net: EAGAIN handling for net/socket.c TCP
net: EAGAIN handling for net/socket.c UDP
net: asynchronous send/receive infrastructure for net/socket.c
net: broadcast hub packets if at least one port can receive
net: fix usbnet_receive() packet drops
net: clean up usbnet_receive()
net: add -netdev options to man page
net: do not report queued packets as sent
net: add receive_disabled logic to iov delivery path
eepro100: Fix network hang when rx buffers run out
xen: flush queue when getting an event
e1000: flush queue whenever can_receive can go from false to true
net: notify iothread after flushing queue
-rw-r--r-- | hw/e1000.c | 4 | ||||
-rw-r--r-- | hw/eepro100.c | 4 | ||||
-rw-r--r-- | hw/usb/dev-network.c | 49 | ||||
-rw-r--r-- | hw/virtio-net.c | 4 | ||||
-rw-r--r-- | hw/xen_nic.c | 1 | ||||
-rw-r--r-- | net.c | 22 | ||||
-rw-r--r-- | net/hub.c | 6 | ||||
-rw-r--r-- | net/queue.c | 40 | ||||
-rw-r--r-- | net/queue.h | 2 | ||||
-rw-r--r-- | net/socket.c | 110 | ||||
-rw-r--r-- | qemu-options.hx | 7 |
11 files changed, 186 insertions, 63 deletions
diff --git a/hw/e1000.c b/hw/e1000.c index ae8a6c5523..ec3a7c4ecc 100644 --- a/hw/e1000.c +++ b/hw/e1000.c @@ -295,6 +295,7 @@ set_rx_control(E1000State *s, int index, uint32_t val) s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1; DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT], s->mac_reg[RCTL]); + qemu_flush_queued_packets(&s->nic->nc); } static void @@ -926,6 +927,9 @@ set_rdt(E1000State *s, int index, uint32_t val) { s->check_rxov = 0; s->mac_reg[index] = val & 0xffff; + if (e1000_has_rxbufs(s, 1)) { + qemu_flush_queued_packets(&s->nic->nc); + } } static void diff --git a/hw/eepro100.c b/hw/eepro100.c index 50d117e35e..5b231163d8 100644 --- a/hw/eepro100.c +++ b/hw/eepro100.c @@ -1036,6 +1036,7 @@ static void eepro100_ru_command(EEPRO100State * s, uint8_t val) } set_ru_state(s, ru_ready); s->ru_offset = e100_read_reg4(s, SCBPointer); + qemu_flush_queued_packets(&s->nic->nc); TRACE(OTHER, logout("val=0x%02x (rx start)\n", val)); break; case RX_RESUME: @@ -1770,7 +1771,8 @@ static ssize_t nic_receive(NetClientState *nc, const uint8_t * buf, size_t size) if (rfd_command & COMMAND_EL) { /* EL bit is set, so this was the last frame. */ logout("receive: Running out of frames\n"); - set_ru_state(s, ru_suspended); + set_ru_state(s, ru_no_resources); + eepro100_rnr_interrupt(s); } if (rfd_command & COMMAND_S) { /* S bit is set. */ diff --git a/hw/usb/dev-network.c b/hw/usb/dev-network.c index c84892c98d..e4a43599b5 100644 --- a/hw/usb/dev-network.c +++ b/hw/usb/dev-network.c @@ -1001,6 +1001,13 @@ static int rndis_keepalive_response(USBNetState *s, return 0; } +/* Prepare to receive the next packet */ +static void usb_net_reset_in_buf(USBNetState *s) +{ + s->in_ptr = s->in_len = 0; + qemu_flush_queued_packets(&s->nic->nc); +} + static int rndis_parse(USBNetState *s, uint8_t *data, int length) { uint32_t msg_type; @@ -1025,7 +1032,8 @@ static int rndis_parse(USBNetState *s, uint8_t *data, int length) case RNDIS_RESET_MSG: rndis_clear_responsequeue(s); - s->out_ptr = s->in_ptr = s->in_len = 0; + s->out_ptr = 0; + usb_net_reset_in_buf(s); return rndis_reset_response(s, (rndis_reset_msg_type *) data); case RNDIS_KEEPALIVE_MSG: @@ -1135,7 +1143,7 @@ static int usb_net_handle_datain(USBNetState *s, USBPacket *p) int ret = USB_RET_NAK; if (s->in_ptr > s->in_len) { - s->in_ptr = s->in_len = 0; + usb_net_reset_in_buf(s); ret = USB_RET_NAK; return ret; } @@ -1152,7 +1160,7 @@ static int usb_net_handle_datain(USBNetState *s, USBPacket *p) if (s->in_ptr >= s->in_len && (is_rndis(s) || (s->in_len & (64 - 1)) || !ret)) { /* no short packet necessary */ - s->in_ptr = s->in_len = 0; + usb_net_reset_in_buf(s); } #ifdef TRAFFIC_DEBUG @@ -1250,20 +1258,32 @@ static int usb_net_handle_data(USBDevice *dev, USBPacket *p) static ssize_t usbnet_receive(NetClientState *nc, const uint8_t *buf, size_t size) { USBNetState *s = DO_UPCAST(NICState, nc, nc)->opaque; - struct rndis_packet_msg_type *msg; + uint8_t *in_buf = s->in_buf; + size_t total_size = size; if (is_rndis(s)) { - msg = (struct rndis_packet_msg_type *) s->in_buf; if (s->rndis_state != RNDIS_DATA_INITIALIZED) { return -1; } - if (size + sizeof(struct rndis_packet_msg_type) > sizeof(s->in_buf)) - return -1; + total_size += sizeof(struct rndis_packet_msg_type); + } + if (total_size > sizeof(s->in_buf)) { + return -1; + } + /* Only accept packet if input buffer is empty */ + if (s->in_len > 0) { + return 0; + } + + if (is_rndis(s)) { + struct rndis_packet_msg_type *msg; + + msg = (struct rndis_packet_msg_type *)in_buf; memset(msg, 0, sizeof(struct rndis_packet_msg_type)); msg->MessageType = cpu_to_le32(RNDIS_PACKET_MSG); - msg->MessageLength = cpu_to_le32(size + sizeof(struct rndis_packet_msg_type)); - msg->DataOffset = cpu_to_le32(sizeof(struct rndis_packet_msg_type) - 8); + msg->MessageLength = cpu_to_le32(size + sizeof(*msg)); + msg->DataOffset = cpu_to_le32(sizeof(*msg) - 8); msg->DataLength = cpu_to_le32(size); /* msg->OOBDataOffset; * msg->OOBDataLength; @@ -1273,14 +1293,11 @@ static ssize_t usbnet_receive(NetClientState *nc, const uint8_t *buf, size_t siz * msg->VcHandle; * msg->Reserved; */ - memcpy(msg + 1, buf, size); - s->in_len = size + sizeof(struct rndis_packet_msg_type); - } else { - if (size > sizeof(s->in_buf)) - return -1; - memcpy(s->in_buf, buf, size); - s->in_len = size; + in_buf += sizeof(*msg); } + + memcpy(in_buf, buf, size); + s->in_len = total_size; s->in_ptr = 0; return size; } diff --git a/hw/virtio-net.c b/hw/virtio-net.c index b1998b27d3..6490743290 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -447,10 +447,6 @@ static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) VirtIONet *n = to_virtio_net(vdev); qemu_flush_queued_packets(&n->nic->nc); - - /* We now have RX buffers, signal to the IO thread to break out of the - * select to re-poll the tap file descriptor */ - qemu_notify_event(); } static int virtio_net_can_receive(NetClientState *nc) diff --git a/hw/xen_nic.c b/hw/xen_nic.c index 8b79bfb73e..cf7d5591b3 100644 --- a/hw/xen_nic.c +++ b/hw/xen_nic.c @@ -415,6 +415,7 @@ static void net_event(struct XenDevice *xendev) { struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev); net_tx_packets(netdev); + qemu_flush_queued_packets(&netdev->nic->nc); } static int net_free(struct XenDevice *xendev) @@ -357,7 +357,12 @@ void qemu_flush_queued_packets(NetClientState *nc) { nc->receive_disabled = 0; - qemu_net_queue_flush(nc->send_queue); + if (qemu_net_queue_flush(nc->send_queue)) { + /* We emptied the queue successfully, signal to the IO thread to repoll + * the file descriptor (for tap, for example). + */ + qemu_notify_event(); + } } static ssize_t qemu_send_packet_async_with_flags(NetClientState *sender, @@ -418,16 +423,27 @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender, void *opaque) { NetClientState *nc = opaque; + int ret; if (nc->link_down) { return iov_size(iov, iovcnt); } + if (nc->receive_disabled) { + return 0; + } + if (nc->info->receive_iov) { - return nc->info->receive_iov(nc, iov, iovcnt); + ret = nc->info->receive_iov(nc, iov, iovcnt); } else { - return nc_sendv_compat(nc, iov, iovcnt); + ret = nc_sendv_compat(nc, iov, iovcnt); } + + if (ret == 0) { + nc->receive_disabled = 1; + } + + return ret; } ssize_t qemu_sendv_packet_async(NetClientState *sender, @@ -97,12 +97,12 @@ static int net_hub_port_can_receive(NetClientState *nc) continue; } - if (!qemu_can_send_packet(&port->nc)) { - return 0; + if (qemu_can_send_packet(&port->nc)) { + return 1; } } - return 1; + return 0; } static ssize_t net_hub_port_receive(NetClientState *nc, diff --git a/net/queue.c b/net/queue.c index e8030aafe4..254f28013a 100644 --- a/net/queue.c +++ b/net/queue.c @@ -83,12 +83,12 @@ void qemu_del_net_queue(NetQueue *queue) g_free(queue); } -static ssize_t qemu_net_queue_append(NetQueue *queue, - NetClientState *sender, - unsigned flags, - const uint8_t *buf, - size_t size, - NetPacketSent *sent_cb) +static void qemu_net_queue_append(NetQueue *queue, + NetClientState *sender, + unsigned flags, + const uint8_t *buf, + size_t size, + NetPacketSent *sent_cb) { NetPacket *packet; @@ -100,16 +100,14 @@ static ssize_t qemu_net_queue_append(NetQueue *queue, memcpy(packet->data, buf, size); QTAILQ_INSERT_TAIL(&queue->packets, packet, entry); - - return size; } -static ssize_t qemu_net_queue_append_iov(NetQueue *queue, - NetClientState *sender, - unsigned flags, - const struct iovec *iov, - int iovcnt, - NetPacketSent *sent_cb) +static void qemu_net_queue_append_iov(NetQueue *queue, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) { NetPacket *packet; size_t max_len = 0; @@ -133,8 +131,6 @@ static ssize_t qemu_net_queue_append_iov(NetQueue *queue, } QTAILQ_INSERT_TAIL(&queue->packets, packet, entry); - - return packet->size; } static ssize_t qemu_net_queue_deliver(NetQueue *queue, @@ -177,7 +173,8 @@ ssize_t qemu_net_queue_send(NetQueue *queue, ssize_t ret; if (queue->delivering || !qemu_can_send_packet(sender)) { - return qemu_net_queue_append(queue, sender, flags, data, size, sent_cb); + qemu_net_queue_append(queue, sender, flags, data, size, sent_cb); + return 0; } ret = qemu_net_queue_deliver(queue, sender, flags, data, size); @@ -201,8 +198,8 @@ ssize_t qemu_net_queue_send_iov(NetQueue *queue, ssize_t ret; if (queue->delivering || !qemu_can_send_packet(sender)) { - return qemu_net_queue_append_iov(queue, sender, flags, - iov, iovcnt, sent_cb); + qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt, sent_cb); + return 0; } ret = qemu_net_queue_deliver_iov(queue, sender, flags, iov, iovcnt); @@ -228,7 +225,7 @@ void qemu_net_queue_purge(NetQueue *queue, NetClientState *from) } } -void qemu_net_queue_flush(NetQueue *queue) +bool qemu_net_queue_flush(NetQueue *queue) { while (!QTAILQ_EMPTY(&queue->packets)) { NetPacket *packet; @@ -244,7 +241,7 @@ void qemu_net_queue_flush(NetQueue *queue) packet->size); if (ret == 0) { QTAILQ_INSERT_HEAD(&queue->packets, packet, entry); - break; + return false; } if (packet->sent_cb) { @@ -253,4 +250,5 @@ void qemu_net_queue_flush(NetQueue *queue) g_free(packet); } + return true; } diff --git a/net/queue.h b/net/queue.h index 9d44a9b3b8..fc02b33915 100644 --- a/net/queue.h +++ b/net/queue.h @@ -53,6 +53,6 @@ ssize_t qemu_net_queue_send_iov(NetQueue *queue, NetPacketSent *sent_cb); void qemu_net_queue_purge(NetQueue *queue, NetClientState *from); -void qemu_net_queue_flush(NetQueue *queue); +bool qemu_net_queue_flush(NetQueue *queue); #endif /* QEMU_NET_QUEUE_H */ diff --git a/net/socket.c b/net/socket.c index 7c602e4c3a..5e0c92e062 100644 --- a/net/socket.c +++ b/net/socket.c @@ -32,6 +32,7 @@ #include "qemu-error.h" #include "qemu-option.h" #include "qemu_socket.h" +#include "iov.h" typedef struct NetSocketState { NetClientState nc; @@ -40,29 +41,106 @@ typedef struct NetSocketState { int state; /* 0 = getting length, 1 = getting data */ unsigned int index; unsigned int packet_len; + unsigned int send_index; /* number of bytes sent (only SOCK_STREAM) */ uint8_t buf[4096]; struct sockaddr_in dgram_dst; /* contains inet host and port destination iff connectionless (SOCK_DGRAM) */ + IOHandler *send_fn; /* differs between SOCK_STREAM/SOCK_DGRAM */ + bool read_poll; /* waiting to receive data? */ + bool write_poll; /* waiting to transmit data? */ } NetSocketState; static void net_socket_accept(void *opaque); +static void net_socket_writable(void *opaque); + +/* Only read packets from socket when peer can receive them */ +static int net_socket_can_send(void *opaque) +{ + NetSocketState *s = opaque; + + return qemu_can_send_packet(&s->nc); +} + +static void net_socket_update_fd_handler(NetSocketState *s) +{ + qemu_set_fd_handler2(s->fd, + s->read_poll ? net_socket_can_send : NULL, + s->read_poll ? s->send_fn : NULL, + s->write_poll ? net_socket_writable : NULL, + s); +} + +static void net_socket_read_poll(NetSocketState *s, bool enable) +{ + s->read_poll = enable; + net_socket_update_fd_handler(s); +} + +static void net_socket_write_poll(NetSocketState *s, bool enable) +{ + s->write_poll = enable; + net_socket_update_fd_handler(s); +} + +static void net_socket_writable(void *opaque) +{ + NetSocketState *s = opaque; + + net_socket_write_poll(s, false); + + qemu_flush_queued_packets(&s->nc); +} -/* XXX: we consider we can send the whole packet without blocking */ static ssize_t net_socket_receive(NetClientState *nc, const uint8_t *buf, size_t size) { NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc); - uint32_t len; - len = htonl(size); - - send_all(s->fd, (const uint8_t *)&len, sizeof(len)); - return send_all(s->fd, buf, size); + uint32_t len = htonl(size); + struct iovec iov[] = { + { + .iov_base = &len, + .iov_len = sizeof(len), + }, { + .iov_base = (void *)buf, + .iov_len = size, + }, + }; + size_t remaining; + ssize_t ret; + + remaining = iov_size(iov, 2) - s->send_index; + ret = iov_send(s->fd, iov, 2, s->send_index, remaining); + + if (ret == -1 && errno == EAGAIN) { + ret = 0; /* handled further down */ + } + if (ret == -1) { + s->send_index = 0; + return -errno; + } + if (ret < (ssize_t)remaining) { + s->send_index += ret; + net_socket_write_poll(s, true); + return 0; + } + s->send_index = 0; + return size; } static ssize_t net_socket_receive_dgram(NetClientState *nc, const uint8_t *buf, size_t size) { NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc); + ssize_t ret; + + do { + ret = sendto(s->fd, buf, size, 0, + (struct sockaddr *)&s->dgram_dst, + sizeof(s->dgram_dst)); + } while (ret == -1 && errno == EINTR); - return sendto(s->fd, (const void *)buf, size, 0, - (struct sockaddr *)&s->dgram_dst, sizeof(s->dgram_dst)); + if (ret == -1 && errno == EAGAIN) { + net_socket_write_poll(s, true); + return 0; + } + return ret; } static void net_socket_send(void *opaque) @@ -81,7 +159,8 @@ static void net_socket_send(void *opaque) } else if (size == 0) { /* end of connection */ eoc: - qemu_set_fd_handler(s->fd, NULL, NULL, NULL); + net_socket_read_poll(s, false); + net_socket_write_poll(s, false); if (s->listen_fd != -1) { qemu_set_fd_handler(s->listen_fd, net_socket_accept, NULL, s); } @@ -152,7 +231,8 @@ static void net_socket_send_dgram(void *opaque) return; if (size == 0) { /* end of connection */ - qemu_set_fd_handler(s->fd, NULL, NULL, NULL); + net_socket_read_poll(s, false); + net_socket_write_poll(s, false); return; } qemu_send_packet(&s->nc, s->buf, size); @@ -243,7 +323,8 @@ static void net_socket_cleanup(NetClientState *nc) { NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc); if (s->fd != -1) { - qemu_set_fd_handler(s->fd, NULL, NULL, NULL); + net_socket_read_poll(s, false); + net_socket_write_poll(s, false); close(s->fd); s->fd = -1; } @@ -314,8 +395,8 @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer, s->fd = fd; s->listen_fd = -1; - - qemu_set_fd_handler(s->fd, net_socket_send_dgram, NULL, s); + s->send_fn = net_socket_send_dgram; + net_socket_read_poll(s, true); /* mcast: save bound address as dst */ if (is_connected) { @@ -332,7 +413,8 @@ err: static void net_socket_connect(void *opaque) { NetSocketState *s = opaque; - qemu_set_fd_handler(s->fd, net_socket_send, NULL, s); + s->send_fn = net_socket_send; + net_socket_read_poll(s, true); } static NetClientInfo net_socket_info = { diff --git a/qemu-options.hx b/qemu-options.hx index 5f96f356c3..09c86c4cb0 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -1362,6 +1362,7 @@ Valid values for @var{type} are Not all devices are supported on all targets. Use -net nic,model=? for a list of available devices for your target. +@item -netdev user,id=@var{id}[,@var{option}][,@var{option}][,...] @item -net user[,@var{option}][,@var{option}][,...] Use the user mode network stack which requires no administrator privilege to run. Valid options are: @@ -1370,6 +1371,7 @@ privilege to run. Valid options are: @item vlan=@var{n} Connect user mode stack to VLAN @var{n} (@var{n} = 0 is the default). +@item id=@var{id} @item name=@var{name} Assign symbolic name for use in monitor commands. @@ -1495,6 +1497,7 @@ processed and applied to -net user. Mixing them with the new configuration syntax gives undefined results. Their use for new applications is discouraged as they will be removed from future versions. +@item -netdev tap,id=@var{id}[,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}] @item -net tap[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}] Connect the host TAP network interface @var{name} to VLAN @var{n}. @@ -1534,6 +1537,7 @@ qemu-system-i386 linux.img \ -net nic -net tap,"helper=/usr/local/libexec/qemu-bridge-helper" @end example +@item -netdev bridge,id=@var{id}[,br=@var{bridge}][,helper=@var{helper}] @item -net bridge[,vlan=@var{n}][,name=@var{name}][,br=@var{bridge}][,helper=@var{helper}] Connect a host TAP network interface to a host bridge device. @@ -1556,6 +1560,7 @@ qemu-system-i386 linux.img -net bridge -net nic,model=virtio qemu-system-i386 linux.img -net bridge,br=qemubr0 -net nic,model=virtio @end example +@item -netdev socket,id=@var{id}[,fd=@var{h}][,listen=[@var{host}]:@var{port}][,connect=@var{host}:@var{port}] @item -net socket[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}] [,listen=[@var{host}]:@var{port}][,connect=@var{host}:@var{port}] Connect the VLAN @var{n} to a remote VLAN in another QEMU virtual @@ -1578,6 +1583,7 @@ qemu-system-i386 linux.img \ -net socket,connect=127.0.0.1:1234 @end example +@item -netdev socket,id=@var{id}[,fd=@var{h}][,mcast=@var{maddr}:@var{port}[,localaddr=@var{addr}]] @item -net socket[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,mcast=@var{maddr}:@var{port}[,localaddr=@var{addr}]] Create a VLAN @var{n} shared with another QEMU virtual @@ -1629,6 +1635,7 @@ qemu-system-i386 linux.img \ -net socket,mcast=239.192.168.1:1102,localaddr=1.2.3.4 @end example +@item -netdev vde,id=@var{id}[,sock=@var{socketpath}][,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}] @item -net vde[,vlan=@var{n}][,name=@var{name}][,sock=@var{socketpath}] [,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}] Connect VLAN @var{n} to PORT @var{n} of a vde switch running on host and listening for incoming connections on @var{socketpath}. Use GROUP @var{groupname} |