diff options
-rw-r--r-- | hw/net/e1000.c | 8 | ||||
-rw-r--r-- | hw/net/vmxnet3.c | 19 | ||||
-rw-r--r-- | hw/net/vmxnet3.h | 6 | ||||
-rw-r--r-- | hw/net/vmxnet_tx_pkt.c | 19 | ||||
-rw-r--r-- | include/net/filter.h | 77 | ||||
-rw-r--r-- | include/net/net.h | 6 | ||||
-rw-r--r-- | include/net/queue.h | 20 | ||||
-rw-r--r-- | include/qemu/typedefs.h | 1 | ||||
-rw-r--r-- | net/Makefile.objs | 2 | ||||
-rw-r--r-- | net/filter-buffer.c | 186 | ||||
-rw-r--r-- | net/filter.c | 233 | ||||
-rw-r--r-- | net/net.c | 121 | ||||
-rw-r--r-- | net/queue.c | 24 | ||||
-rw-r--r-- | qapi-schema.json | 20 | ||||
-rw-r--r-- | qdev-monitor.c | 1 | ||||
-rw-r--r-- | qemu-options.hx | 17 | ||||
-rw-r--r-- | tests/.gitignore | 1 | ||||
-rw-r--r-- | tests/Makefile | 2 | ||||
-rw-r--r-- | tests/test-netfilter.c | 200 | ||||
-rw-r--r-- | vl.c | 19 |
20 files changed, 917 insertions, 65 deletions
diff --git a/hw/net/e1000.c b/hw/net/e1000.c index 09c9e9d53b..910de3a7be 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -1647,7 +1647,7 @@ static const TypeInfo e1000_base_info = { static const E1000Info e1000_devices[] = { { - .name = "e1000-82540em", + .name = "e1000", .device_id = E1000_DEV_ID_82540EM, .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, @@ -1666,11 +1666,6 @@ static const E1000Info e1000_devices[] = { }, }; -static const TypeInfo e1000_default_info = { - .name = "e1000", - .parent = "e1000-82540em", -}; - static void e1000_register_types(void) { int i; @@ -1688,7 +1683,6 @@ static void e1000_register_types(void) type_register(&type_info); } - type_register_static(&e1000_default_info); } type_init(e1000_register_types) diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 04159c8222..3c5e10dd6d 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -729,9 +729,7 @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx) } if (txd.eop) { - if (!s->skip_current_tx_pkt) { - vmxnet_tx_pkt_parse(s->tx_pkt); - + if (!s->skip_current_tx_pkt && vmxnet_tx_pkt_parse(s->tx_pkt)) { if (s->needs_vlan) { vmxnet_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci); } @@ -1165,9 +1163,13 @@ vmxnet3_io_bar0_write(void *opaque, hwaddr addr, static uint64_t vmxnet3_io_bar0_read(void *opaque, hwaddr addr, unsigned size) { + VMXNET3State *s = opaque; + if (VMW_IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR, VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) { - g_assert_not_reached(); + int l = VMW_MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR, + VMXNET3_REG_ALIGN); + return s->interrupt_states[l].is_masked; } VMW_CBPRN("BAR0 unknown read [%" PRIx64 "], size %d", addr, size); @@ -1629,6 +1631,11 @@ static void vmxnet3_handle_command(VMXNET3State *s, uint64_t cmd) VMW_CBPRN("Set: VMXNET3_CMD_GET_CONF_INTR - interrupt configuration"); break; + case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO: + VMW_CBPRN("Set: VMXNET3_CMD_GET_ADAPTIVE_RING_INFO - " + "adaptive ring info flags"); + break; + default: VMW_CBPRN("Received unknown command: %" PRIx64, cmd); break; @@ -1668,6 +1675,10 @@ static uint64_t vmxnet3_get_command_status(VMXNET3State *s) ret = vmxnet3_get_interrupt_config(s); break; + case VMXNET3_CMD_GET_ADAPTIVE_RING_INFO: + ret = VMXNET3_DISABLE_ADAPTIVE_RING; + break; + default: VMW_WRPRN("Received request for unknown command: %x", s->last_command); ret = -1; diff --git a/hw/net/vmxnet3.h b/hw/net/vmxnet3.h index f987d71269..f7006afe96 100644 --- a/hw/net/vmxnet3.h +++ b/hw/net/vmxnet3.h @@ -198,9 +198,13 @@ enum { VMXNET3_CMD_GET_DID_LO, /* 0xF00D0005 */ VMXNET3_CMD_GET_DID_HI, /* 0xF00D0006 */ VMXNET3_CMD_GET_DEV_EXTRA_INFO, /* 0xF00D0007 */ - VMXNET3_CMD_GET_CONF_INTR /* 0xF00D0008 */ + VMXNET3_CMD_GET_CONF_INTR, /* 0xF00D0008 */ + VMXNET3_CMD_GET_ADAPTIVE_RING_INFO /* 0xF00D0009 */ }; +/* Adaptive Ring Info Flags */ +#define VMXNET3_DISABLE_ADAPTIVE_RING 1 + /* * Little Endian layout of bitfields - * Byte 0 : 7.....len.....0 diff --git a/hw/net/vmxnet_tx_pkt.c b/hw/net/vmxnet_tx_pkt.c index f7344c4cb3..eb88ddf254 100644 --- a/hw/net/vmxnet_tx_pkt.c +++ b/hw/net/vmxnet_tx_pkt.c @@ -142,11 +142,24 @@ static bool vmxnet_tx_pkt_parse_headers(struct VmxnetTxPkt *pkt) bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 0, l2_hdr->iov_base, ETH_MAX_L2_HDR_LEN); - if (bytes_read < ETH_MAX_L2_HDR_LEN) { + if (bytes_read < sizeof(struct eth_header)) { + l2_hdr->iov_len = 0; + return false; + } + + l2_hdr->iov_len = sizeof(struct eth_header); + switch (be16_to_cpu(PKT_GET_ETH_HDR(l2_hdr->iov_base)->h_proto)) { + case ETH_P_VLAN: + l2_hdr->iov_len += sizeof(struct vlan_header); + break; + case ETH_P_DVLAN: + l2_hdr->iov_len += 2 * sizeof(struct vlan_header); + break; + } + + if (bytes_read < l2_hdr->iov_len) { l2_hdr->iov_len = 0; return false; - } else { - l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr->iov_base); } l3_proto = eth_get_l3_proto(l2_hdr->iov_base, l2_hdr->iov_len); diff --git a/include/net/filter.h b/include/net/filter.h new file mode 100644 index 0000000000..2deda362a6 --- /dev/null +++ b/include/net/filter.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2015 FUJITSU LIMITED + * Author: Yang Hongyang <yanghy@cn.fujitsu.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_NET_FILTER_H +#define QEMU_NET_FILTER_H + +#include "qom/object.h" +#include "qemu-common.h" +#include "qemu/typedefs.h" +#include "net/queue.h" + +#define TYPE_NETFILTER "netfilter" +#define NETFILTER(obj) \ + OBJECT_CHECK(NetFilterState, (obj), TYPE_NETFILTER) +#define NETFILTER_GET_CLASS(obj) \ + OBJECT_GET_CLASS(NetFilterClass, (obj), TYPE_NETFILTER) +#define NETFILTER_CLASS(klass) \ + OBJECT_CLASS_CHECK(NetFilterClass, (klass), TYPE_NETFILTER) + +typedef void (FilterSetup) (NetFilterState *nf, Error **errp); +typedef void (FilterCleanup) (NetFilterState *nf); +/* + * Return: + * 0: finished handling the packet, we should continue + * size: filter stolen this packet, we stop pass this packet further + */ +typedef ssize_t (FilterReceiveIOV)(NetFilterState *nc, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb); + +typedef struct NetFilterClass { + ObjectClass parent_class; + + /* optional */ + FilterSetup *setup; + FilterCleanup *cleanup; + /* mandatory */ + FilterReceiveIOV *receive_iov; +} NetFilterClass; + + +struct NetFilterState { + /* private */ + Object parent; + + /* protected */ + char *netdev_id; + NetClientState *netdev; + NetFilterDirection direction; + char info_str[256]; + QTAILQ_ENTRY(NetFilterState) next; +}; + +ssize_t qemu_netfilter_receive(NetFilterState *nf, + NetFilterDirection direction, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb); + +/* pass the packet to the next filter */ +ssize_t qemu_netfilter_pass_to_next(NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + void *opaque); + +#endif /* QEMU_NET_FILTER_H */ diff --git a/include/net/net.h b/include/net/net.h index 6a6cbef24a..7af3e15f83 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -92,6 +92,7 @@ struct NetClientState { NetClientDestructor *destructor; unsigned int queue_index; unsigned rxfilter_notify_enabled:1; + QTAILQ_HEAD(, NetFilterState) filters; }; typedef struct NICState { @@ -151,11 +152,6 @@ void qemu_check_nic_model(NICInfo *nd, const char *model); int qemu_find_nic_model(NICInfo *nd, const char * const *models, const char *default_model); -ssize_t qemu_deliver_packet(NetClientState *sender, - unsigned flags, - const uint8_t *data, - size_t size, - void *opaque); ssize_t qemu_deliver_packet_iov(NetClientState *sender, unsigned flags, const struct iovec *iov, diff --git a/include/net/queue.h b/include/net/queue.h index fc02b33915..5469fdbeaa 100644 --- a/include/net/queue.h +++ b/include/net/queue.h @@ -34,7 +34,25 @@ typedef void (NetPacketSent) (NetClientState *sender, ssize_t ret); #define QEMU_NET_PACKET_FLAG_NONE 0 #define QEMU_NET_PACKET_FLAG_RAW (1<<0) -NetQueue *qemu_new_net_queue(void *opaque); +/* Returns: + * >0 - success + * 0 - queue packet for future redelivery + * <0 - failure (discard packet) + */ +typedef ssize_t (NetQueueDeliverFunc)(NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + void *opaque); + +NetQueue *qemu_new_net_queue(NetQueueDeliverFunc *deliver, void *opaque); + +void qemu_net_queue_append_iov(NetQueue *queue, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb); void qemu_del_net_queue(NetQueue *queue); diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index 3a835ffb9b..ee1ce1d44d 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -45,6 +45,7 @@ typedef struct Monitor Monitor; typedef struct MouseTransformInfo MouseTransformInfo; typedef struct MSIMessage MSIMessage; typedef struct NetClientState NetClientState; +typedef struct NetFilterState NetFilterState; typedef struct NICInfo NICInfo; typedef struct PcGuestInfo PcGuestInfo; typedef struct PCIBridge PCIBridge; diff --git a/net/Makefile.objs b/net/Makefile.objs index ec19cb31d9..5fa2f9731d 100644 --- a/net/Makefile.objs +++ b/net/Makefile.objs @@ -13,3 +13,5 @@ common-obj-$(CONFIG_HAIKU) += tap-haiku.o common-obj-$(CONFIG_SLIRP) += slirp.o common-obj-$(CONFIG_VDE) += vde.o common-obj-$(CONFIG_NETMAP) += netmap.o +common-obj-y += filter.o +common-obj-y += filter-buffer.o diff --git a/net/filter-buffer.c b/net/filter-buffer.c new file mode 100644 index 0000000000..57be149413 --- /dev/null +++ b/net/filter-buffer.c @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2015 FUJITSU LIMITED + * Author: Yang Hongyang <yanghy@cn.fujitsu.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "net/filter.h" +#include "net/queue.h" +#include "qemu-common.h" +#include "qemu/timer.h" +#include "qemu/iov.h" +#include "qapi/qmp/qerror.h" +#include "qapi-visit.h" +#include "qom/object.h" + +#define TYPE_FILTER_BUFFER "filter-buffer" + +#define FILTER_BUFFER(obj) \ + OBJECT_CHECK(FilterBufferState, (obj), TYPE_FILTER_BUFFER) + +typedef struct FilterBufferState { + NetFilterState parent_obj; + + NetQueue *incoming_queue; + uint32_t interval; + QEMUTimer release_timer; +} FilterBufferState; + +static void filter_buffer_flush(NetFilterState *nf) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + if (!qemu_net_queue_flush(s->incoming_queue)) { + /* Unable to empty the queue, purge remaining packets */ + qemu_net_queue_purge(s->incoming_queue, nf->netdev); + } +} + +static void filter_buffer_release_timer(void *opaque) +{ + NetFilterState *nf = opaque; + FilterBufferState *s = FILTER_BUFFER(nf); + + /* + * Note: filter_buffer_flush() drops packets that can't be sent + * TODO: We should leave them queued. But currently there's no way + * for the next filter or receiver to notify us that it can receive + * more packets. + */ + filter_buffer_flush(nf); + /* Timer rearmed to fire again in s->interval microseconds. */ + timer_mod(&s->release_timer, + qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval); +} + +/* filter APIs */ +static ssize_t filter_buffer_receive_iov(NetFilterState *nf, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + /* + * We return size when buffer a packet, the sender will take it as + * a already sent packet, so sent_cb should not be called later. + * + * FIXME: Even if the guest can't receive packets for some reasons, + * the filter can still accept packets until its internal queue is full. + * For example: + * For some reason, receiver could not receive more packets + * (.can_receive() returns zero). Without a filter, at most one packet + * will be queued in incoming queue and sender's poll will be disabled + * unit its sent_cb() was called. With a filter, it will keep receiving + * the packets without caring about the receiver. This is suboptimal. + * May need more thoughts (e.g keeping sent_cb). + */ + qemu_net_queue_append_iov(s->incoming_queue, sender, flags, + iov, iovcnt, NULL); + return iov_size(iov, iovcnt); +} + +static void filter_buffer_cleanup(NetFilterState *nf) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + if (s->interval) { + timer_del(&s->release_timer); + } + + /* flush packets */ + if (s->incoming_queue) { + filter_buffer_flush(nf); + g_free(s->incoming_queue); + } +} + +static void filter_buffer_setup(NetFilterState *nf, Error **errp) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + /* + * We may want to accept zero interval when VM FT solutions like MC + * or COLO use this filter to release packets on demand. + */ + if (!s->interval) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "interval", + "a non-zero interval"); + return; + } + + s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf); + if (s->interval) { + timer_init_us(&s->release_timer, QEMU_CLOCK_VIRTUAL, + filter_buffer_release_timer, nf); + /* Timer armed to fire in s->interval microseconds. */ + timer_mod(&s->release_timer, + qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval); + } +} + +static void filter_buffer_class_init(ObjectClass *oc, void *data) +{ + NetFilterClass *nfc = NETFILTER_CLASS(oc); + + nfc->setup = filter_buffer_setup; + nfc->cleanup = filter_buffer_cleanup; + nfc->receive_iov = filter_buffer_receive_iov; +} + +static void filter_buffer_get_interval(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + FilterBufferState *s = FILTER_BUFFER(obj); + uint32_t value = s->interval; + + visit_type_uint32(v, &value, name, errp); +} + +static void filter_buffer_set_interval(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + FilterBufferState *s = FILTER_BUFFER(obj); + Error *local_err = NULL; + uint32_t value; + + visit_type_uint32(v, &value, name, &local_err); + if (local_err) { + goto out; + } + if (!value) { + error_setg(&local_err, "Property '%s.%s' requires a positive value", + object_get_typename(obj), name); + goto out; + } + s->interval = value; + +out: + error_propagate(errp, local_err); +} + +static void filter_buffer_init(Object *obj) +{ + object_property_add(obj, "interval", "int", + filter_buffer_get_interval, + filter_buffer_set_interval, NULL, NULL, NULL); +} + +static const TypeInfo filter_buffer_info = { + .name = TYPE_FILTER_BUFFER, + .parent = TYPE_NETFILTER, + .class_init = filter_buffer_class_init, + .instance_init = filter_buffer_init, + .instance_size = sizeof(FilterBufferState), +}; + +static void register_types(void) +{ + type_register_static(&filter_buffer_info); +} + +type_init(register_types); diff --git a/net/filter.c b/net/filter.c new file mode 100644 index 0000000000..326f2b5ac8 --- /dev/null +++ b/net/filter.c @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2015 FUJITSU LIMITED + * Author: Yang Hongyang <yanghy@cn.fujitsu.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "qemu-common.h" +#include "qapi/qmp/qerror.h" +#include "qemu/error-report.h" + +#include "net/filter.h" +#include "net/net.h" +#include "net/vhost_net.h" +#include "qom/object_interfaces.h" +#include "qemu/iov.h" +#include "qapi/string-output-visitor.h" + +ssize_t qemu_netfilter_receive(NetFilterState *nf, + NetFilterDirection direction, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) +{ + if (nf->direction == direction || + nf->direction == NET_FILTER_DIRECTION_ALL) { + return NETFILTER_GET_CLASS(OBJECT(nf))->receive_iov( + nf, sender, flags, iov, iovcnt, sent_cb); + } + + return 0; +} + +ssize_t qemu_netfilter_pass_to_next(NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + void *opaque) +{ + int ret = 0; + int direction; + NetFilterState *nf = opaque; + NetFilterState *next = QTAILQ_NEXT(nf, next); + + if (!sender || !sender->peer) { + /* no receiver, or sender been deleted, no need to pass it further */ + goto out; + } + + if (nf->direction == NET_FILTER_DIRECTION_ALL) { + if (sender == nf->netdev) { + /* This packet is sent by netdev itself */ + direction = NET_FILTER_DIRECTION_TX; + } else { + direction = NET_FILTER_DIRECTION_RX; + } + } else { + direction = nf->direction; + } + + while (next) { + /* + * if qemu_netfilter_pass_to_next been called, means that + * the packet has been hold by filter and has already retured size + * to the sender, so sent_cb shouldn't be called later, just + * pass NULL to next. + */ + ret = qemu_netfilter_receive(next, direction, sender, flags, iov, + iovcnt, NULL); + if (ret) { + return ret; + } + next = QTAILQ_NEXT(next, next); + } + + /* + * We have gone through all filters, pass it to receiver. + * Do the valid check again incase sender or receiver been + * deleted while we go through filters. + */ + if (sender && sender->peer) { + qemu_net_queue_send_iov(sender->peer->incoming_queue, + sender, flags, iov, iovcnt, NULL); + } + +out: + /* no receiver, or sender been deleted */ + return iov_size(iov, iovcnt); +} + +static char *netfilter_get_netdev_id(Object *obj, Error **errp) +{ + NetFilterState *nf = NETFILTER(obj); + + return g_strdup(nf->netdev_id); +} + +static void netfilter_set_netdev_id(Object *obj, const char *str, Error **errp) +{ + NetFilterState *nf = NETFILTER(obj); + + nf->netdev_id = g_strdup(str); +} + +static int netfilter_get_direction(Object *obj, Error **errp G_GNUC_UNUSED) +{ + NetFilterState *nf = NETFILTER(obj); + return nf->direction; +} + +static void netfilter_set_direction(Object *obj, int direction, Error **errp) +{ + NetFilterState *nf = NETFILTER(obj); + nf->direction = direction; +} + +static void netfilter_init(Object *obj) +{ + object_property_add_str(obj, "netdev", + netfilter_get_netdev_id, netfilter_set_netdev_id, + NULL); + object_property_add_enum(obj, "queue", "NetFilterDirection", + NetFilterDirection_lookup, + netfilter_get_direction, netfilter_set_direction, + NULL); +} + +static void netfilter_complete(UserCreatable *uc, Error **errp) +{ + NetFilterState *nf = NETFILTER(uc); + NetClientState *ncs[MAX_QUEUE_NUM]; + NetFilterClass *nfc = NETFILTER_GET_CLASS(uc); + int queues; + Error *local_err = NULL; + char *str, *info; + ObjectProperty *prop; + StringOutputVisitor *ov; + + if (!nf->netdev_id) { + error_setg(errp, "Parameter 'netdev' is required"); + return; + } + + queues = qemu_find_net_clients_except(nf->netdev_id, ncs, + NET_CLIENT_OPTIONS_KIND_NIC, + MAX_QUEUE_NUM); + if (queues < 1) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "netdev", + "a network backend id"); + return; + } else if (queues > 1) { + error_setg(errp, "multiqueue is not supported"); + return; + } + + if (get_vhost_net(ncs[0])) { + error_setg(errp, "Vhost is not supported"); + return; + } + + nf->netdev = ncs[0]; + + if (nfc->setup) { + nfc->setup(nf, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + QTAILQ_INSERT_TAIL(&nf->netdev->filters, nf, next); + + /* generate info str */ + QTAILQ_FOREACH(prop, &OBJECT(nf)->properties, node) { + if (!strcmp(prop->name, "type")) { + continue; + } + ov = string_output_visitor_new(false); + object_property_get(OBJECT(nf), string_output_get_visitor(ov), + prop->name, errp); + str = string_output_get_string(ov); + string_output_visitor_cleanup(ov); + info = g_strdup_printf(",%s=%s", prop->name, str); + g_strlcat(nf->info_str, info, sizeof(nf->info_str)); + g_free(str); + g_free(info); + } +} + +static void netfilter_finalize(Object *obj) +{ + NetFilterState *nf = NETFILTER(obj); + NetFilterClass *nfc = NETFILTER_GET_CLASS(obj); + + if (nfc->cleanup) { + nfc->cleanup(nf); + } + + if (nf->netdev && !QTAILQ_EMPTY(&nf->netdev->filters)) { + QTAILQ_REMOVE(&nf->netdev->filters, nf, next); + } +} + +static void netfilter_class_init(ObjectClass *oc, void *data) +{ + UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); + + ucc->complete = netfilter_complete; +} + +static const TypeInfo netfilter_info = { + .name = TYPE_NETFILTER, + .parent = TYPE_OBJECT, + .abstract = true, + .class_size = sizeof(NetFilterClass), + .class_init = netfilter_class_init, + .instance_size = sizeof(NetFilterState), + .instance_init = netfilter_init, + .instance_finalize = netfilter_finalize, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +static void register_types(void) +{ + type_register_static(&netfilter_info); +} + +type_init(register_types); @@ -44,6 +44,7 @@ #include "qapi/opts-visitor.h" #include "qapi/dealloc-visitor.h" #include "sysemu/sysemu.h" +#include "net/filter.h" /* Net bridge is currently not supported for W32. */ #if !defined(_WIN32) @@ -285,8 +286,9 @@ static void qemu_net_client_setup(NetClientState *nc, } QTAILQ_INSERT_TAIL(&net_clients, nc, next); - nc->incoming_queue = qemu_new_net_queue(nc); + nc->incoming_queue = qemu_new_net_queue(qemu_deliver_packet_iov, nc); nc->destructor = destructor; + QTAILQ_INIT(&nc->filters); } NetClientState *qemu_new_net_client(NetClientInfo *info, @@ -384,6 +386,7 @@ void qemu_del_net_client(NetClientState *nc) { NetClientState *ncs[MAX_QUEUE_NUM]; int queues, i; + NetFilterState *nf, *next; assert(nc->info->type != NET_CLIENT_OPTIONS_KIND_NIC); @@ -395,6 +398,10 @@ void qemu_del_net_client(NetClientState *nc) MAX_QUEUE_NUM); assert(queues != 0); + QTAILQ_FOREACH_SAFE(nf, &nc->filters, next, next) { + object_unparent(OBJECT(nf)); + } + /* If there is a peer NIC, delete and cleanup client, but do not free. */ if (nc->peer && nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_NIC) { NICState *nic = qemu_get_nic(nc->peer); @@ -554,34 +561,42 @@ int qemu_can_send_packet(NetClientState *sender) return 1; } -ssize_t qemu_deliver_packet(NetClientState *sender, - unsigned flags, - const uint8_t *data, - size_t size, - void *opaque) +static ssize_t filter_receive_iov(NetClientState *nc, + NetFilterDirection direction, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) { - NetClientState *nc = opaque; - ssize_t ret; + ssize_t ret = 0; + NetFilterState *nf = NULL; - if (nc->link_down) { - return size; - } - - if (nc->receive_disabled) { - return 0; + QTAILQ_FOREACH(nf, &nc->filters, next) { + ret = qemu_netfilter_receive(nf, direction, sender, flags, iov, + iovcnt, sent_cb); + if (ret) { + return ret; + } } - if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { - ret = nc->info->receive_raw(nc, data, size); - } else { - ret = nc->info->receive(nc, data, size); - } + return ret; +} - if (ret == 0) { - nc->receive_disabled = 1; - } +static ssize_t filter_receive(NetClientState *nc, + NetFilterDirection direction, + NetClientState *sender, + unsigned flags, + const uint8_t *data, + size_t size, + NetPacketSent *sent_cb) +{ + struct iovec iov = { + .iov_base = (void *)data, + .iov_len = size + }; - return ret; + return filter_receive_iov(nc, direction, sender, flags, &iov, 1, sent_cb); } void qemu_purge_queued_packets(NetClientState *nc) @@ -625,6 +640,7 @@ static ssize_t qemu_send_packet_async_with_flags(NetClientState *sender, NetPacketSent *sent_cb) { NetQueue *queue; + int ret; #ifdef DEBUG_NET printf("qemu_send_packet_async:\n"); @@ -635,6 +651,19 @@ static ssize_t qemu_send_packet_async_with_flags(NetClientState *sender, return size; } + /* Let filters handle the packet first */ + ret = filter_receive(sender, NET_FILTER_DIRECTION_TX, + sender, flags, buf, size, sent_cb); + if (ret) { + return ret; + } + + ret = filter_receive(sender->peer, NET_FILTER_DIRECTION_RX, + sender, flags, buf, size, sent_cb); + if (ret) { + return ret; + } + queue = sender->peer->incoming_queue; return qemu_net_queue_send(queue, sender, flags, buf, size, sent_cb); @@ -660,14 +689,25 @@ ssize_t qemu_send_packet_raw(NetClientState *nc, const uint8_t *buf, int size) } static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov, - int iovcnt) + int iovcnt, unsigned flags) { - uint8_t buffer[NET_BUFSIZE]; + uint8_t buf[NET_BUFSIZE]; + uint8_t *buffer; size_t offset; - offset = iov_to_buf(iov, iovcnt, 0, buffer, sizeof(buffer)); + if (iovcnt == 1) { + buffer = iov[0].iov_base; + offset = iov[0].iov_len; + } else { + buffer = buf; + offset = iov_to_buf(iov, iovcnt, 0, buffer, sizeof(buffer)); + } - return nc->info->receive(nc, buffer, offset); + if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) { + return nc->info->receive_raw(nc, buffer, offset); + } else { + return nc->info->receive(nc, buffer, offset); + } } ssize_t qemu_deliver_packet_iov(NetClientState *sender, @@ -690,7 +730,7 @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender, if (nc->info->receive_iov) { ret = nc->info->receive_iov(nc, iov, iovcnt); } else { - ret = nc_sendv_compat(nc, iov, iovcnt); + ret = nc_sendv_compat(nc, iov, iovcnt, flags); } if (ret == 0) { @@ -705,11 +745,25 @@ ssize_t qemu_sendv_packet_async(NetClientState *sender, NetPacketSent *sent_cb) { NetQueue *queue; + int ret; if (sender->link_down || !sender->peer) { return iov_size(iov, iovcnt); } + /* Let filters handle the packet first */ + ret = filter_receive_iov(sender, NET_FILTER_DIRECTION_TX, sender, + QEMU_NET_PACKET_FLAG_NONE, iov, iovcnt, sent_cb); + if (ret) { + return ret; + } + + ret = filter_receive_iov(sender->peer, NET_FILTER_DIRECTION_RX, sender, + QEMU_NET_PACKET_FLAG_NONE, iov, iovcnt, sent_cb); + if (ret) { + return ret; + } + queue = sender->peer->incoming_queue; return qemu_net_queue_send_iov(queue, sender, @@ -1125,10 +1179,21 @@ void qmp_netdev_del(const char *id, Error **errp) void print_net_client(Monitor *mon, NetClientState *nc) { + NetFilterState *nf; + monitor_printf(mon, "%s: index=%d,type=%s,%s\n", nc->name, nc->queue_index, NetClientOptionsKind_lookup[nc->info->type], nc->info_str); + if (!QTAILQ_EMPTY(&nc->filters)) { + monitor_printf(mon, "filters:\n"); + } + QTAILQ_FOREACH(nf, &nc->filters, next) { + monitor_printf(mon, " - %s: type=%s%s\n", + object_get_canonical_path_component(OBJECT(nf)), + object_get_typename(OBJECT(nf)), + nf->info_str); + } } RxFilterInfoList *qmp_query_rx_filter(bool has_name, const char *name, diff --git a/net/queue.c b/net/queue.c index ebbe2bb93b..de8b9d31c6 100644 --- a/net/queue.c +++ b/net/queue.c @@ -52,13 +52,14 @@ struct NetQueue { void *opaque; uint32_t nq_maxlen; uint32_t nq_count; + NetQueueDeliverFunc *deliver; QTAILQ_HEAD(packets, NetPacket) packets; unsigned delivering : 1; }; -NetQueue *qemu_new_net_queue(void *opaque) +NetQueue *qemu_new_net_queue(NetQueueDeliverFunc *deliver, void *opaque) { NetQueue *queue; @@ -67,6 +68,7 @@ NetQueue *qemu_new_net_queue(void *opaque) queue->opaque = opaque; queue->nq_maxlen = 10000; queue->nq_count = 0; + queue->deliver = deliver; QTAILQ_INIT(&queue->packets); @@ -110,12 +112,12 @@ static void qemu_net_queue_append(NetQueue *queue, QTAILQ_INSERT_TAIL(&queue->packets, packet, entry); } -static void qemu_net_queue_append_iov(NetQueue *queue, - NetClientState *sender, - unsigned flags, - const struct iovec *iov, - int iovcnt, - NetPacketSent *sent_cb) +void qemu_net_queue_append_iov(NetQueue *queue, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) { NetPacket *packet; size_t max_len = 0; @@ -152,9 +154,13 @@ static ssize_t qemu_net_queue_deliver(NetQueue *queue, size_t size) { ssize_t ret = -1; + struct iovec iov = { + .iov_base = (void *)data, + .iov_len = size + }; queue->delivering = 1; - ret = qemu_deliver_packet(sender, flags, data, size, queue->opaque); + ret = queue->deliver(sender, flags, &iov, 1, queue->opaque); queue->delivering = 0; return ret; @@ -169,7 +175,7 @@ static ssize_t qemu_net_queue_deliver_iov(NetQueue *queue, ssize_t ret = -1; queue->delivering = 1; - ret = qemu_deliver_packet_iov(sender, flags, iov, iovcnt, queue->opaque); + ret = queue->deliver(sender, flags, iov, iovcnt, queue->opaque); queue->delivering = 0; return ret; diff --git a/qapi-schema.json b/qapi-schema.json index 8b0520c2d0..a386605b6a 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2588,6 +2588,26 @@ 'opts': 'NetClientOptions' } } ## +# @NetFilterDirection +# +# Indicates whether a netfilter is attached to a netdev's transmit queue or +# receive queue or both. +# +# @all: the filter is attached both to the receive and the transmit +# queue of the netdev (default). +# +# @rx: the filter is attached to the receive queue of the netdev, +# where it will receive packets sent to the netdev. +# +# @tx: the filter is attached to the transmit queue of the netdev, +# where it will receive packets sent by the netdev. +# +# Since 2.5 +## +{ 'enum': 'NetFilterDirection', + 'data': [ 'all', 'rx', 'tx' ] } + +## # @InetSocketAddress # # Captures a socket address or address range in the Internet namespace. diff --git a/qdev-monitor.c b/qdev-monitor.c index 1cadefbb13..a35098f711 100644 --- a/qdev-monitor.c +++ b/qdev-monitor.c @@ -50,6 +50,7 @@ static const QDevAlias qdev_alias_table[] = { { "lsi53c895a", "lsi" }, { "ich9-ahci", "ahci" }, { "kvm-pci-assign", "pci-assign" }, + { "e1000", "e1000-82540em" }, { } }; diff --git a/qemu-options.hx b/qemu-options.hx index 328404ca18..2485b94b16 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -3643,6 +3643,23 @@ in PEM format, in filenames @var{ca-cert.pem}, @var{ca-crl.pem} (optional), @var{server-cert.pem} (only servers), @var{server-key.pem} (only servers), @var{client-cert.pem} (only clients), and @var{client-key.pem} (only clients). +@item -object filter-buffer,id=@var{id},netdev=@var{netdevid},interval=@var{t}[,queue=@var{all|rx|tx}] + +Interval @var{t} can't be 0, this filter batches the packet delivery: all +packets arriving in a given interval on netdev @var{netdevid} are delayed +until the end of the interval. Interval is in microseconds. + +queue @var{all|rx|tx} is an option that can be applied to any netfilter. + +@option{all}: the filter is attached both to the receive and the transmit + queue of the netdev (default). + +@option{rx}: the filter is attached to the receive queue of the netdev, + where it will receive packets sent to the netdev. + +@option{tx}: the filter is attached to the transmit queue of the netdev, + where it will receive packets sent by the netdev. + @end table ETEXI diff --git a/tests/.gitignore b/tests/.gitignore index a607bddce4..65496aa5a6 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -49,5 +49,6 @@ test-vmstate test-write-threshold test-x86-cpuid test-xbzrle +test-netfilter *-test qapi-schema/*.test.* diff --git a/tests/Makefile b/tests/Makefile index 5a4732f75a..dbd32a670a 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -194,6 +194,7 @@ gcov-files-i386-y += hw/pci-host/q35.c ifeq ($(CONFIG_VHOST_NET),y) check-qtest-i386-$(CONFIG_LINUX) += tests/vhost-user-test$(EXESUF) endif +check-qtest-i386-y += tests/test-netfilter$(EXESUF) check-qtest-x86_64-y = $(check-qtest-i386-y) gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y)) @@ -438,6 +439,7 @@ tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y) tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y) +tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y) ifeq ($(CONFIG_POSIX),y) LIBS += -lutil diff --git a/tests/test-netfilter.c b/tests/test-netfilter.c new file mode 100644 index 0000000000..303deb7e09 --- /dev/null +++ b/tests/test-netfilter.c @@ -0,0 +1,200 @@ +/* + * QTest testcase for netfilter + * + * Copyright (c) 2015 FUJITSU LIMITED + * Author: Yang Hongyang <yanghy@cn.fujitsu.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include <glib.h> +#include "libqtest.h" + +/* add a netfilter to a netdev and then remove it */ +static void add_one_netfilter(void) +{ + QDict *response; + + response = qmp("{'execute': 'object-add'," + " 'arguments': {" + " 'qom-type': 'filter-buffer'," + " 'id': 'qtest-f0'," + " 'props': {" + " 'netdev': 'qtest-bn0'," + " 'queue': 'rx'," + " 'interval': 1000" + "}}}"); + + g_assert(response); + g_assert(!qdict_haskey(response, "error")); + QDECREF(response); + + response = qmp("{'execute': 'object-del'," + " 'arguments': {" + " 'id': 'qtest-f0'" + "}}"); + g_assert(response); + g_assert(!qdict_haskey(response, "error")); + QDECREF(response); +} + +/* add a netfilter to a netdev and then remove the netdev */ +static void remove_netdev_with_one_netfilter(void) +{ + QDict *response; + + response = qmp("{'execute': 'object-add'," + " 'arguments': {" + " 'qom-type': 'filter-buffer'," + " 'id': 'qtest-f0'," + " 'props': {" + " 'netdev': 'qtest-bn0'," + " 'queue': 'rx'," + " 'interval': 1000" + "}}}"); + + g_assert(response); + g_assert(!qdict_haskey(response, "error")); + QDECREF(response); + + response = qmp("{'execute': 'netdev_del'," + " 'arguments': {" + " 'id': 'qtest-bn0'" + "}}"); + g_assert(response); + g_assert(!qdict_haskey(response, "error")); + QDECREF(response); + + /* add back the netdev */ + response = qmp("{'execute': 'netdev_add'," + " 'arguments': {" + " 'type': 'user'," + " 'id': 'qtest-bn0'" + "}}"); + g_assert(response); + g_assert(!qdict_haskey(response, "error")); + QDECREF(response); +} + +/* add multi(2) netfilters to a netdev and then remove them */ +static void add_multi_netfilter(void) +{ + QDict *response; + + response = qmp("{'execute': 'object-add'," + " 'arguments': {" + " 'qom-type': 'filter-buffer'," + " 'id': 'qtest-f0'," + " 'props': {" + " 'netdev': 'qtest-bn0'," + " 'queue': 'rx'," + " 'interval': 1000" + "}}}"); + + g_assert(response); + g_assert(!qdict_haskey(response, "error")); + QDECREF(response); + + response = qmp("{'execute': 'object-add'," + " 'arguments': {" + " 'qom-type': 'filter-buffer'," + " 'id': 'qtest-f1'," + " 'props': {" + " 'netdev': 'qtest-bn0'," + " 'queue': 'rx'," + " 'interval': 1000" + "}}}"); + + g_assert(response); + g_assert(!qdict_haskey(response, "error")); + QDECREF(response); + + response = qmp("{'execute': 'object-del'," + " 'arguments': {" + " 'id': 'qtest-f0'" + "}}"); + g_assert(response); + g_assert(!qdict_haskey(response, "error")); + QDECREF(response); + + response = qmp("{'execute': 'object-del'," + " 'arguments': {" + " 'id': 'qtest-f1'" + "}}"); + g_assert(response); + g_assert(!qdict_haskey(response, "error")); + QDECREF(response); +} + +/* add multi(2) netfilters to a netdev and then remove the netdev */ +static void remove_netdev_with_multi_netfilter(void) +{ + QDict *response; + + response = qmp("{'execute': 'object-add'," + " 'arguments': {" + " 'qom-type': 'filter-buffer'," + " 'id': 'qtest-f0'," + " 'props': {" + " 'netdev': 'qtest-bn0'," + " 'queue': 'rx'," + " 'interval': 1000" + "}}}"); + + g_assert(response); + g_assert(!qdict_haskey(response, "error")); + QDECREF(response); + + response = qmp("{'execute': 'object-add'," + " 'arguments': {" + " 'qom-type': 'filter-buffer'," + " 'id': 'qtest-f1'," + " 'props': {" + " 'netdev': 'qtest-bn0'," + " 'queue': 'rx'," + " 'interval': 1000" + "}}}"); + + g_assert(response); + g_assert(!qdict_haskey(response, "error")); + QDECREF(response); + + response = qmp("{'execute': 'netdev_del'," + " 'arguments': {" + " 'id': 'qtest-bn0'" + "}}"); + g_assert(response); + g_assert(!qdict_haskey(response, "error")); + QDECREF(response); + + /* add back the netdev */ + response = qmp("{'execute': 'netdev_add'," + " 'arguments': {" + " 'type': 'user'," + " 'id': 'qtest-bn0'" + "}}"); + g_assert(response); + g_assert(!qdict_haskey(response, "error")); + QDECREF(response); +} + +int main(int argc, char **argv) +{ + int ret; + + g_test_init(&argc, &argv, NULL); + qtest_add_func("/netfilter/addremove_one", add_one_netfilter); + qtest_add_func("/netfilter/remove_netdev_one", + remove_netdev_with_one_netfilter); + qtest_add_func("/netfilter/addremove_multi", add_multi_netfilter); + qtest_add_func("/netfilter/remove_netdev_multi", + remove_netdev_with_multi_netfilter); + + qtest_start("-netdev user,id=qtest-bn0 -device e1000,netdev=qtest-bn0"); + ret = g_test_run(); + + qtest_end(); + + return ret; +} @@ -2747,13 +2747,18 @@ static bool object_create_initial(const char *type) if (g_str_equal(type, "rng-egd")) { return false; } + + if (g_str_equal(type, "filter-buffer")) { + return false; + } + return true; } /* * The remainder of object creation happens after the - * creation of chardev, fsdev and device data types. + * creation of chardev, fsdev, net clients and device data types. */ static bool object_create_delayed(const char *type) { @@ -4259,12 +4264,6 @@ int main(int argc, char **argv, char **envp) exit(0); } - if (qemu_opts_foreach(qemu_find_opts("object"), - object_create, - object_create_delayed, NULL)) { - exit(1); - } - machine_opts = qemu_get_machine_opts(); if (qemu_opt_foreach(machine_opts, machine_set_property, current_machine, NULL)) { @@ -4370,6 +4369,12 @@ int main(int argc, char **argv, char **envp) exit(1); } + if (qemu_opts_foreach(qemu_find_opts("object"), + object_create, + object_create_delayed, NULL)) { + exit(1); + } + #ifdef CONFIG_TPM if (tpm_init() < 0) { exit(1); |