8 files changed, 134 insertions, 1 deletions
diff --git a/arch_init.c b/arch_init.c
index 0e553c9285..e9dd96fb28 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -104,6 +104,9 @@ int graphic_depth = 32;
 #endif
 
 const uint32_t arch_type = QEMU_ARCH;
+static bool mig_throttle_on;
+static int dirty_rate_high_cnt;
+static void check_guest_throttling(void);
 
 /***********************************************************/
 /* ram save/restore */
@@ -378,8 +381,14 @@ static void migration_bitmap_sync(void)
     uint64_t num_dirty_pages_init = migration_dirty_pages;
     MigrationState *s = migrate_get_current();
     static int64_t start_time;
+    static int64_t bytes_xfer_prev;
     static int64_t num_dirty_pages_period;
     int64_t end_time;
+    int64_t bytes_xfer_now;
+
+    if (!bytes_xfer_prev) {
+        bytes_xfer_prev = ram_bytes_transferred();
+    }
 
     if (!start_time) {
         start_time = qemu_get_clock_ms(rt_clock);
@@ -404,6 +413,25 @@ static void migration_bitmap_sync(void)
 
     /* more than 1 second = 1000 millisecons */
     if (end_time > start_time + 1000) {
+        if (migrate_auto_converge()) {
+            /* The following detection logic can be refined later. For now:
+               Check to see if the dirtied bytes is 50% more than the approx.
+               amount of bytes that just got transferred since the last time we
+               were in this routine. If that happens >N times (for now N==4)
+               we turn on the throttle down logic */
+            bytes_xfer_now = ram_bytes_transferred();
+            if (s->dirty_pages_rate &&
+               (num_dirty_pages_period * TARGET_PAGE_SIZE >
+                   (bytes_xfer_now - bytes_xfer_prev)/2) &&
+               (dirty_rate_high_cnt++ > 4)) {
+                    trace_migration_throttle();
+                    mig_throttle_on = true;
+                    dirty_rate_high_cnt = 0;
+             }
+             bytes_xfer_prev = bytes_xfer_now;
+        } else {
+             mig_throttle_on = false;
+        }
         s->dirty_pages_rate = num_dirty_pages_period * 1000
             / (end_time - start_time);
         s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
@@ -573,6 +601,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     migration_bitmap = bitmap_new(ram_pages);
     bitmap_set(migration_bitmap, 0, ram_pages);
     migration_dirty_pages = ram_pages;
+    mig_throttle_on = false;
+    dirty_rate_high_cnt = 0;
 
     if (migrate_use_xbzrle()) {
         XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
@@ -635,6 +665,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
         }
         total_sent += bytes_sent;
         acct_info.iterations++;
+        check_guest_throttling();
         /* we want to check in the 1st loop, just in case it was the 1st time
            and we had to sync the dirty bitmap.
            qemu_get_clock_ns() is a bit expensive, so we only check each some
@@ -1110,3 +1141,53 @@ TargetInfo *qmp_query_target(Error **errp)
 
     return info;
 }
+
+/* Stub function that's gets run on the vcpu when its brought out of the
+   VM to run inside qemu via async_run_on_cpu()*/
+static void mig_sleep_cpu(void *opq)
+{
+    qemu_mutex_unlock_iothread();
+    g_usleep(30*1000);
+    qemu_mutex_lock_iothread();
+}
+
+/* To reduce the dirty rate explicitly disallow the VCPUs from spending
+   much time in the VM. The migration thread will try to catchup.
+   Workload will experience a performance drop.
+*/
+static void mig_throttle_cpu_down(CPUState *cpu, void *data)
+{
+    async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
+}
+
+static void mig_throttle_guest_down(void)
+{
+    qemu_mutex_lock_iothread();
+    qemu_for_each_cpu(mig_throttle_cpu_down, NULL);
+    qemu_mutex_unlock_iothread();
+}
+
+static void check_guest_throttling(void)
+{
+    static int64_t t0;
+    int64_t        t1;
+
+    if (!mig_throttle_on) {
+        return;
+    }
+
+    if (!t0)  {
+        t0 = qemu_get_clock_ns(rt_clock);
+        return;
+    }
+
+    t1 = qemu_get_clock_ns(rt_clock);
+
+    /* If it has been more than 40 ms since the last time the guest
+     * was throttled then do it again.
+     */
+    if (40 < (t1-t0)/1000000) {
+        mig_throttle_guest_down();
+        t0 = t1;
+    }
+}
diff --git a/cpus.c b/cpus.c
index 29277e109f..8062cdd57e 100644
--- a/cpus.c
+++ b/cpus.c
@@ -652,6 +652,7 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
 
     wi.func = func;
     wi.data = data;
+    wi.free = false;
     if (cpu->queued_work_first == NULL) {
         cpu->queued_work_first = &wi;
     } else {
@@ -670,6 +671,31 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
     }
 }
 
+void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
+{
+    struct qemu_work_item *wi;
+
+    if (qemu_cpu_is_self(cpu)) {
+        func(data);
+        return;
+    }
+
+    wi = g_malloc0(sizeof(struct qemu_work_item));
+    wi->func = func;
+    wi->data = data;
+    wi->free = true;
+    if (cpu->queued_work_first == NULL) {
+        cpu->queued_work_first = wi;
+    } else {
+        cpu->queued_work_last->next = wi;
+    }
+    cpu->queued_work_last = wi;
+    wi->next = NULL;
+    wi->done = false;
+
+    qemu_cpu_kick(cpu);
+}
+
 static void flush_queued_work(CPUState *cpu)
 {
     struct qemu_work_item *wi;
@@ -682,6 +708,9 @@ static void flush_queued_work(CPUState *cpu)
         cpu->queued_work_first = wi->next;
         wi->func(wi->data);
         wi->done = true;
+        if (wi->free) {
+            g_free(wi);
+        }
     }
     cpu->queued_work_last = NULL;
     qemu_cond_broadcast(&qemu_work_cond);
diff --git a/include/migration/migration.h b/include/migration/migration.h
index f0640e0eec..bc9fde0b2a 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -125,6 +125,8 @@ void migrate_del_blocker(Error *reason);
 
 bool migrate_rdma_pin_all(void);
 
+bool migrate_auto_converge(void);
+
 int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
                          uint8_t *dst, int dlen);
 int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index f4397388f5..6948bb9177 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -293,6 +293,7 @@ struct qemu_work_item {
     void (*func)(void *data);
     void *data;
     int done;
+    bool free;
 };
 
 
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 147c256949..dfd81a1d2f 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -379,6 +379,16 @@ bool cpu_is_stopped(CPUState *cpu);
 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
 
 /**
+ * async_run_on_cpu:
+ * @cpu: The vCPU to run on.
+ * @func: The function to be executed.
+ * @data: Data to pass to the function.
+ *
+ * Schedules the function @func for execution on the vCPU @cpu asynchronously.
+ */
+void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
+
+/**
  * qemu_for_each_cpu:
  * @func: The function to be executed.
  * @data: Data to pass to the function.
diff --git a/migration.c b/migration.c
index 0681d8e5f1..9f5a4230d1 100644
--- a/migration.c
+++ b/migration.c
@@ -484,6 +484,15 @@ bool migrate_rdma_pin_all(void)
     return s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL];
 }
 
+bool migrate_auto_converge(void)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
+}
+
 int migrate_use_xbzrle(void)
 {
     MigrationState *s;
diff --git a/qapi-schema.json b/qapi-schema.json
index cf57783caf..7b9fef1bd1 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -616,7 +616,7 @@
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
-  'data': ['xbzrle', 'x-rdma-pin-all'] }
+  'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge'] }
 
 ##
 # @MigrationCapabilityStatus
diff --git a/trace-events b/trace-events
index 0acce7b350..7f6d9625a7 100644
--- a/trace-events
+++ b/trace-events
@@ -1036,6 +1036,7 @@ savevm_section_end(unsigned int section_id) "section_id %u"
 # arch_init.c
 migration_bitmap_sync_start(void) ""
 migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
+migration_throttle(void) ""
 
 # hw/qxl.c
 disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d"