migration: Split log_clear() into smaller chunks

Currently we are doing log_clear() right after log_sync() which mostly keeps the old behavior when log_clear() was still part of log_sync(). This patch tries to further optimize the migration log_clear() code path to split huge log_clear()s into smaller chunks. We do this by spliting the whole guest memory region into memory chunks, whose size is decided by MigrationState.clear_bitmap_shift (an example will be given below). With that, we don't do the dirty bitmap clear operation on the remote node (e.g., KVM) when we fetch the dirty bitmap, instead we explicitly clear the dirty bitmap for the memory chunk for each of the first time we send a page in that chunk. Here comes an example. Assuming the guest has 64G memory, then before this patch the KVM ioctl KVM_CLEAR_DIRTY_LOG will be a single one covering 64G memory. If after the patch, let's assume when the clear bitmap shift is 18, then the memory chunk size on x86_64 will be 1UL<<18 * 4K = 1GB. Then instead of sending a big 64G ioctl, we'll send 64 small ioctls, each of the ioctl will cover 1G of the guest memory. For each of the 64 small ioctls, we'll only send if any of the page in that small chunk was going to be sent right away. Signed-off-by: Peter Xu <peterx@redhat.com> Reviewed-by: Juan Quintela <quintela@redhat.com> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Message-Id: <20190603065056.25211-12-peterx@redhat.com> Signed-off-by: Juan Quintela <quintela@redhat.com>
author: Peter Xu <peterx@redhat.com> 2019-06-03 14:50:56 +0800
committer: Juan Quintela <quintela@redhat.com> 2019-07-15 15:39:03 +0200
commit: 002cad6b16ca01d8dc8160038e139b47e5ca557e (patch)
tree: edf2fe1449cd7a6ab57d50530f0cc1a7c6c50b35 /include
parent: ff4aa11419242c835b03d274f08f797c129ed7ba (diff)
1 files changed, 74 insertions, 2 deletions
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 222b4338fb..b7b2e60ff6 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -51,8 +51,70 @@ struct RAMBlock {
     unsigned long *unsentmap;
     /* bitmap of already received pages in postcopy */
     unsigned long *receivedmap;
+
+    /*
+     * bitmap to track already cleared dirty bitmap.  When the bit is
+     * set, it means the corresponding memory chunk needs a log-clear.
+     * Set this up to non-NULL to enable the capability to postpone
+     * and split clearing of dirty bitmap on the remote node (e.g.,
+     * KVM).  The bitmap will be set only when doing global sync.
+     *
+     * NOTE: this bitmap is different comparing to the other bitmaps
+     * in that one bit can represent multiple guest pages (which is
+     * decided by the `clear_bmap_shift' variable below).  On
+     * destination side, this should always be NULL, and the variable
+     * `clear_bmap_shift' is meaningless.
+     */
+    unsigned long *clear_bmap;
+    uint8_t clear_bmap_shift;
 };
 
+/**
+ * clear_bmap_size: calculate clear bitmap size
+ *
+ * @pages: number of guest pages
+ * @shift: guest page number shift
+ *
+ * Returns: number of bits for the clear bitmap
+ */
+static inline long clear_bmap_size(uint64_t pages, uint8_t shift)
+{
+    return DIV_ROUND_UP(pages, 1UL << shift);
+}
+
+/**
+ * clear_bmap_set: set clear bitmap for the page range
+ *
+ * @rb: the ramblock to operate on
+ * @start: the start page number
+ * @size: number of pages to set in the bitmap
+ *
+ * Returns: None
+ */
+static inline void clear_bmap_set(RAMBlock *rb, uint64_t start,
+                                  uint64_t npages)
+{
+    uint8_t shift = rb->clear_bmap_shift;
+
+    bitmap_set_atomic(rb->clear_bmap, start >> shift,
+                      clear_bmap_size(npages, shift));
+}
+
+/**
+ * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set
+ *
+ * @rb: the ramblock to operate on
+ * @page: the page number to check
+ *
+ * Returns: true if the bit was set, false otherwise
+ */
+static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page)
+{
+    uint8_t shift = rb->clear_bmap_shift;
+
+    return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1);
+}
+
 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
 {
     return (b && b->host && offset < b->used_length) ? true : false;
@@ -463,8 +525,18 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb,
             }
         }
 
-        /* TODO: split the huge bitmap into smaller chunks */
-        memory_region_clear_dirty_bitmap(rb->mr, start, length);
+        if (rb->clear_bmap) {
+            /*
+             * Postpone the dirty bitmap clear to the point before we
+             * really send the pages, also we will split the clear
+             * dirty procedure into smaller chunks.
+             */
+            clear_bmap_set(rb, start >> TARGET_PAGE_BITS,
+                           length >> TARGET_PAGE_BITS);
+        } else {
+            /* Slow path - still do that in a huge chunk */
+            memory_region_clear_dirty_bitmap(rb->mr, start, length);
+        }
     } else {
         ram_addr_t offset = rb->offset;
author	Peter Xu <peterx@redhat.com>	2019-06-03 14:50:56 +0800
committer	Juan Quintela <quintela@redhat.com>	2019-07-15 15:39:03 +0200
commit	002cad6b16ca01d8dc8160038e139b47e5ca557e (patch)
tree	edf2fe1449cd7a6ab57d50530f0cc1a7c6c50b35 /include
parent	ff4aa11419242c835b03d274f08f797c129ed7ba (diff)