diff options
author | Yang Hongyang <yanghy@cn.fujitsu.com> | 2015-10-07 11:52:21 +0800 |
---|---|---|
committer | Jason Wang <jasowang@redhat.com> | 2015-10-12 13:31:28 +0800 |
commit | 7dbb11c84f25e20301b47a77102db00d68a2c4a4 (patch) | |
tree | 422dfd4290eef95e33f27c929af3fe2178f0dc09 | |
parent | b68c7f76926dee3f234ccee88f3167b640d9318e (diff) |
netfilter: add a netbuffer filter
This filter is to buffer/release packets. Can be used when using
MicroCheckpointing or other Remus like VM FT solutions.
You can also use it to crudely simulate network delay. Doesn't
actually delay individual packets, but batches them together, which is
a delay of sorts.
Usage:
-netdev tap,id=bn0
-object filter-buffer,id=f0,netdev=bn0,queue=rx,interval=1000
NOTE:
Interval is in microseconds, it can't be omitted currently, and can't be 0.
Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
-rw-r--r-- | net/Makefile.objs | 1 | ||||
-rw-r--r-- | net/filter-buffer.c | 186 | ||||
-rw-r--r-- | qemu-options.hx | 17 | ||||
-rw-r--r-- | vl.c | 6 |
4 files changed, 209 insertions, 1 deletions
diff --git a/net/Makefile.objs b/net/Makefile.objs index 914aec00a4..5fa2f9731d 100644 --- a/net/Makefile.objs +++ b/net/Makefile.objs @@ -14,3 +14,4 @@ common-obj-$(CONFIG_SLIRP) += slirp.o common-obj-$(CONFIG_VDE) += vde.o common-obj-$(CONFIG_NETMAP) += netmap.o common-obj-y += filter.o +common-obj-y += filter-buffer.o diff --git a/net/filter-buffer.c b/net/filter-buffer.c new file mode 100644 index 0000000000..57be149413 --- /dev/null +++ b/net/filter-buffer.c @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2015 FUJITSU LIMITED + * Author: Yang Hongyang <yanghy@cn.fujitsu.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "net/filter.h" +#include "net/queue.h" +#include "qemu-common.h" +#include "qemu/timer.h" +#include "qemu/iov.h" +#include "qapi/qmp/qerror.h" +#include "qapi-visit.h" +#include "qom/object.h" + +#define TYPE_FILTER_BUFFER "filter-buffer" + +#define FILTER_BUFFER(obj) \ + OBJECT_CHECK(FilterBufferState, (obj), TYPE_FILTER_BUFFER) + +typedef struct FilterBufferState { + NetFilterState parent_obj; + + NetQueue *incoming_queue; + uint32_t interval; + QEMUTimer release_timer; +} FilterBufferState; + +static void filter_buffer_flush(NetFilterState *nf) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + if (!qemu_net_queue_flush(s->incoming_queue)) { + /* Unable to empty the queue, purge remaining packets */ + qemu_net_queue_purge(s->incoming_queue, nf->netdev); + } +} + +static void filter_buffer_release_timer(void *opaque) +{ + NetFilterState *nf = opaque; + FilterBufferState *s = FILTER_BUFFER(nf); + + /* + * Note: filter_buffer_flush() drops packets that can't be sent + * TODO: We should leave them queued. But currently there's no way + * for the next filter or receiver to notify us that it can receive + * more packets. + */ + filter_buffer_flush(nf); + /* Timer rearmed to fire again in s->interval microseconds. */ + timer_mod(&s->release_timer, + qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval); +} + +/* filter APIs */ +static ssize_t filter_buffer_receive_iov(NetFilterState *nf, + NetClientState *sender, + unsigned flags, + const struct iovec *iov, + int iovcnt, + NetPacketSent *sent_cb) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + /* + * We return size when buffer a packet, the sender will take it as + * a already sent packet, so sent_cb should not be called later. + * + * FIXME: Even if the guest can't receive packets for some reasons, + * the filter can still accept packets until its internal queue is full. + * For example: + * For some reason, receiver could not receive more packets + * (.can_receive() returns zero). Without a filter, at most one packet + * will be queued in incoming queue and sender's poll will be disabled + * unit its sent_cb() was called. With a filter, it will keep receiving + * the packets without caring about the receiver. This is suboptimal. + * May need more thoughts (e.g keeping sent_cb). + */ + qemu_net_queue_append_iov(s->incoming_queue, sender, flags, + iov, iovcnt, NULL); + return iov_size(iov, iovcnt); +} + +static void filter_buffer_cleanup(NetFilterState *nf) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + if (s->interval) { + timer_del(&s->release_timer); + } + + /* flush packets */ + if (s->incoming_queue) { + filter_buffer_flush(nf); + g_free(s->incoming_queue); + } +} + +static void filter_buffer_setup(NetFilterState *nf, Error **errp) +{ + FilterBufferState *s = FILTER_BUFFER(nf); + + /* + * We may want to accept zero interval when VM FT solutions like MC + * or COLO use this filter to release packets on demand. + */ + if (!s->interval) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "interval", + "a non-zero interval"); + return; + } + + s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf); + if (s->interval) { + timer_init_us(&s->release_timer, QEMU_CLOCK_VIRTUAL, + filter_buffer_release_timer, nf); + /* Timer armed to fire in s->interval microseconds. */ + timer_mod(&s->release_timer, + qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval); + } +} + +static void filter_buffer_class_init(ObjectClass *oc, void *data) +{ + NetFilterClass *nfc = NETFILTER_CLASS(oc); + + nfc->setup = filter_buffer_setup; + nfc->cleanup = filter_buffer_cleanup; + nfc->receive_iov = filter_buffer_receive_iov; +} + +static void filter_buffer_get_interval(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + FilterBufferState *s = FILTER_BUFFER(obj); + uint32_t value = s->interval; + + visit_type_uint32(v, &value, name, errp); +} + +static void filter_buffer_set_interval(Object *obj, Visitor *v, void *opaque, + const char *name, Error **errp) +{ + FilterBufferState *s = FILTER_BUFFER(obj); + Error *local_err = NULL; + uint32_t value; + + visit_type_uint32(v, &value, name, &local_err); + if (local_err) { + goto out; + } + if (!value) { + error_setg(&local_err, "Property '%s.%s' requires a positive value", + object_get_typename(obj), name); + goto out; + } + s->interval = value; + +out: + error_propagate(errp, local_err); +} + +static void filter_buffer_init(Object *obj) +{ + object_property_add(obj, "interval", "int", + filter_buffer_get_interval, + filter_buffer_set_interval, NULL, NULL, NULL); +} + +static const TypeInfo filter_buffer_info = { + .name = TYPE_FILTER_BUFFER, + .parent = TYPE_NETFILTER, + .class_init = filter_buffer_class_init, + .instance_init = filter_buffer_init, + .instance_size = sizeof(FilterBufferState), +}; + +static void register_types(void) +{ + type_register_static(&filter_buffer_info); +} + +type_init(register_types); diff --git a/qemu-options.hx b/qemu-options.hx index 328404ca18..2485b94b16 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -3643,6 +3643,23 @@ in PEM format, in filenames @var{ca-cert.pem}, @var{ca-crl.pem} (optional), @var{server-cert.pem} (only servers), @var{server-key.pem} (only servers), @var{client-cert.pem} (only clients), and @var{client-key.pem} (only clients). +@item -object filter-buffer,id=@var{id},netdev=@var{netdevid},interval=@var{t}[,queue=@var{all|rx|tx}] + +Interval @var{t} can't be 0, this filter batches the packet delivery: all +packets arriving in a given interval on netdev @var{netdevid} are delayed +until the end of the interval. Interval is in microseconds. + +queue @var{all|rx|tx} is an option that can be applied to any netfilter. + +@option{all}: the filter is attached both to the receive and the transmit + queue of the netdev (default). + +@option{rx}: the filter is attached to the receive queue of the netdev, + where it will receive packets sent to the netdev. + +@option{tx}: the filter is attached to the transmit queue of the netdev, + where it will receive packets sent by the netdev. + @end table ETEXI @@ -2767,7 +2767,11 @@ static bool object_create_initial(const char *type) if (g_str_equal(type, "rng-egd")) { return false; } - /* TODO: implement netfilters */ + + if (g_str_equal(type, "filter-buffer")) { + return false; + } + return true; } |