diff options
60 files changed, 2093 insertions, 749 deletions
diff --git a/docs/devel/migration/features.rst b/docs/devel/migration/features.rst index a9acaf618e..9d1abd2587 100644 --- a/docs/devel/migration/features.rst +++ b/docs/devel/migration/features.rst @@ -10,3 +10,4 @@ Migration has plenty of features to support different use cases. dirty-limit vfio virtio + mapped-ram diff --git a/docs/devel/migration/mapped-ram.rst b/docs/devel/migration/mapped-ram.rst new file mode 100644 index 0000000000..fa4cefd9fc --- /dev/null +++ b/docs/devel/migration/mapped-ram.rst @@ -0,0 +1,138 @@ +Mapped-ram +========== + +Mapped-ram is a new stream format for the RAM section designed to +supplement the existing ``file:`` migration and make it compatible +with ``multifd``. This enables parallel migration of a guest's RAM to +a file. + +The core of the feature is to ensure that RAM pages are mapped +directly to offsets in the resulting migration file. This enables the +``multifd`` threads to write exclusively to those offsets even if the +guest is constantly dirtying pages (i.e. live migration). Another +benefit is that the resulting file will have a bounded size, since +pages which are dirtied multiple times will always go to a fixed +location in the file, rather than constantly being added to a +sequential stream. Having the pages at fixed offsets also allows the +usage of O_DIRECT for save/restore of the migration stream as the +pages are ensured to be written respecting O_DIRECT alignment +restrictions (direct-io support not yet implemented). + +Usage +----- + +On both source and destination, enable the ``multifd`` and +``mapped-ram`` capabilities: + + ``migrate_set_capability multifd on`` + + ``migrate_set_capability mapped-ram on`` + +Use a ``file:`` URL for migration: + + ``migrate file:/path/to/migration/file`` + +Mapped-ram migration is best done non-live, i.e. by stopping the VM on +the source side before migrating. + +Use-cases +--------- + +The mapped-ram feature was designed for use cases where the migration +stream will be directed to a file in the filesystem and not +immediately restored on the destination VM [#]_. These could be +thought of as snapshots. We can further categorize them into live and +non-live. + +- Non-live snapshot + +If the use case requires a VM to be stopped before taking a snapshot, +that's the ideal scenario for mapped-ram migration. Not having to +track dirty pages, the migration will write the RAM pages to the disk +as fast as it can. + +Note: if a snapshot is taken of a running VM, but the VM will be +stopped after the snapshot by the admin, then consider stopping it +right before the snapshot to take benefit of the performance gains +mentioned above. + +- Live snapshot + +If the use case requires that the VM keeps running during and after +the snapshot operation, then mapped-ram migration can still be used, +but will be less performant. Other strategies such as +background-snapshot should be evaluated as well. One benefit of +mapped-ram in this scenario is portability since background-snapshot +depends on async dirty tracking (KVM_GET_DIRTY_LOG) which is not +supported outside of Linux. + +.. [#] While this same effect could be obtained with the usage of + snapshots or the ``file:`` migration alone, mapped-ram provides + a performance increase for VMs with larger RAM sizes (10s to + 100s of GiBs), specially if the VM has been stopped beforehand. + +RAM section format +------------------ + +Instead of having a sequential stream of pages that follow the +RAMBlock headers, the dirty pages for a RAMBlock follow its header +instead. This ensures that each RAM page has a fixed offset in the +resulting migration file. + +A bitmap is introduced to track which pages have been written in the +migration file. Pages are written at a fixed location for every +ramblock. Zero pages are ignored as they'd be zero in the destination +migration as well. + +:: + + Without mapped-ram: With mapped-ram: + + --------------------- -------------------------------- + | ramblock 1 header | | ramblock 1 header | + --------------------- -------------------------------- + | ramblock 2 header | | ramblock 1 mapped-ram header | + --------------------- -------------------------------- + | ... | | padding to next 1MB boundary | + --------------------- | ... | + | ramblock n header | -------------------------------- + --------------------- | ramblock 1 pages | + | RAM_SAVE_FLAG_EOS | | ... | + --------------------- -------------------------------- + | stream of pages | | ramblock 2 header | + | (iter 1) | -------------------------------- + | ... | | ramblock 2 mapped-ram header | + --------------------- -------------------------------- + | RAM_SAVE_FLAG_EOS | | padding to next 1MB boundary | + --------------------- | ... | + | stream of pages | -------------------------------- + | (iter 2) | | ramblock 2 pages | + | ... | | ... | + --------------------- -------------------------------- + | ... | | ... | + --------------------- -------------------------------- + | RAM_SAVE_FLAG_EOS | + -------------------------------- + | ... | + -------------------------------- + +where: + - ramblock header: the generic information for a ramblock, such as + idstr, used_len, etc. + + - ramblock mapped-ram header: the information added by this feature: + bitmap of pages written, bitmap size and offset of pages in the + migration file. + +Restrictions +------------ + +Since pages are written to their relative offsets and out of order +(due to the memory dirtying patterns), streaming channels such as +sockets are not supported. A seekable channel such as a file is +required. This can be verified in the QIOChannel by the presence of +the QIO_CHANNEL_FEATURE_SEEKABLE. + +The improvements brought by this feature apply only to guest physical +RAM. Other types of memory such as VRAM are migrated as part of device +states. diff --git a/docs/devel/qapi-code-gen.rst b/docs/devel/qapi-code-gen.rst index 6804a4b596..f453bd3546 100644 --- a/docs/devel/qapi-code-gen.rst +++ b/docs/devel/qapi-code-gen.rst @@ -996,7 +996,8 @@ line "Features:", like this:: A tagged section begins with a paragraph that starts with one of the following words: "Note:"/"Notes:", "Since:", "Example:"/"Examples:", -"Returns:", "TODO:". It ends with the start of a new section. +"Returns:", "Errors:", "TODO:". It ends with the start of a new +section. The second and subsequent lines of tagged sections must be indented like this:: @@ -1007,6 +1008,9 @@ like this:: # Duis aute irure dolor in reprehenderit in voluptate velit esse # cillum dolore eu fugiat nulla pariatur. +"Returns" and "Errors" sections are only valid for commands. They +document the success and the error response, respectively. + A "Since: x.y.z" tagged section lists the release that introduced the definition. diff --git a/docs/devel/writing-monitor-commands.rst b/docs/devel/writing-monitor-commands.rst index b6ee4fa063..930da5cd06 100644 --- a/docs/devel/writing-monitor-commands.rst +++ b/docs/devel/writing-monitor-commands.rst @@ -66,12 +66,13 @@ Then, in a different terminal:: "version": { "qemu": { "micro": 50, - "minor": 15, - "major": 0 + "minor": 2, + "major": 8 }, - "package": "" + "package": ... }, "capabilities": [ + "oob" ] } } @@ -107,10 +108,14 @@ The first step is defining the command in the appropriate QAPI schema module. We pick module qapi/misc.json, and add the following line at the bottom:: + ## + # @hello-world: + # + # Since: 9.0 + ## { 'command': 'hello-world' } -The "command" keyword defines a new QMP command. It's an JSON object. All -schema entries are JSON objects. The line above will instruct the QAPI to +The "command" keyword defines a new QMP command. It instructs QAPI to generate any prototypes and the necessary code to marshal and unmarshal protocol data. @@ -132,57 +137,70 @@ There are a few things to be noticed: 3. It takes an "Error \*\*" argument. This is required. Later we will see how to return errors and take additional arguments. The Error argument should not be touched if the command doesn't return errors -4. We won't add the function's prototype. That's automatically done by the QAPI +4. We won't add the function's prototype. That's automatically done by QAPI 5. Printing to the terminal is discouraged for QMP commands, we do it here because it's the easiest way to demonstrate a QMP command -You're done. Now build qemu, run it as suggested in the "Testing" section, +You're done. Now build QEMU, run it as suggested in the "Testing" section, and then type the following QMP command:: { "execute": "hello-world" } -Then check the terminal running qemu and look for the "Hello, world" string. If +Then check the terminal running QEMU and look for the "Hello, world" string. If you don't see it then something went wrong. Arguments ~~~~~~~~~ -Let's add an argument called "message" to our "hello-world" command. The new -argument will contain the string to be printed to stdout. It's an optional -argument, if it's not present we print our default "Hello, World" string. +Let's add arguments to our "hello-world" command. The first change we have to do is to modify the command specification in the schema file to the following:: - { 'command': 'hello-world', 'data': { '*message': 'str' } } + ## + # @hello-world: + # + # @message: message to be printed (default: "Hello, world!") + # + # @times: how many times to print the message (default: 1) + # + # Since: 9.0 + ## + { 'command': 'hello-world', + 'data': { '*message': 'str', '*times': 'int' } } -Notice the new 'data' member in the schema. It's an JSON object whose each -element is an argument to the command in question. Also notice the asterisk, -it's used to mark the argument optional (that means that you shouldn't use it -for mandatory arguments). Finally, 'str' is the argument's type, which -stands for "string". The QAPI also supports integers, booleans, enumerations -and user defined types. +Notice the new 'data' member in the schema. It specifies an argument +'message' of QAPI type 'str', and an argument 'times' of QAPI type +'int'. Also notice the asterisk, it's used to mark the argument +optional. Now, let's update our C implementation in monitor/qmp-cmds.c:: - void qmp_hello_world(const char *message, Error **errp) + void qmp_hello_world(const char *message, bool has_times, int64_t times, + Error **errp) { - if (message) { + if (!message) { + message = "Hello, world"; + } + if (!has_times) { + times = 1; + } + + for (int i = 0; i < times; i++) { printf("%s\n", message); - } else { - printf("Hello, world\n"); } } There are two important details to be noticed: -1. All optional arguments are accompanied by a 'has\_' boolean, which is set - if the optional argument is present or false otherwise +1. Optional arguments other than pointers are accompanied by a 'has\_' + boolean, which is set if the optional argument is present or false + otherwise 2. The C implementation signature must follow the schema's argument ordering, which is defined by the "data" member -Time to test our new version of the "hello-world" command. Build qemu, run it as +Time to test our new version of the "hello-world" command. Build QEMU, run it as described in the "Testing" section and then send two commands:: { "execute": "hello-world" } @@ -191,13 +209,13 @@ described in the "Testing" section and then send two commands:: } } - { "execute": "hello-world", "arguments": { "message": "We love qemu" } } + { "execute": "hello-world", "arguments": { "message": "We love QEMU" } } { "return": { } } -You should see "Hello, world" and "We love qemu" in the terminal running qemu, +You should see "Hello, world" and "We love QEMU" in the terminal running QEMU, if you don't see these strings, then something went wrong. @@ -227,7 +245,7 @@ The first argument to the error_setg() function is the Error pointer to pointer, which is passed to all QMP functions. The next argument is a human description of the error, this is a free-form printf-like string. -Let's test the example above. Build qemu, run it as defined in the "Testing" +Let's test the example above. Build QEMU, run it as defined in the "Testing" section, and then issue the following command:: { "execute": "hello-world", "arguments": { "message": "all you need is love" } } @@ -254,44 +272,14 @@ If the failure you want to report falls into one of the two cases above, use error_set() with a second argument of an ErrorClass value. -Command Documentation -~~~~~~~~~~~~~~~~~~~~~ - -There's only one step missing to make "hello-world"'s implementation complete, -and that's its documentation in the schema file. - -There are many examples of such documentation in the schema file already, but -here goes "hello-world"'s new entry for qapi/misc.json:: - - ## - # @hello-world: - # - # Print a client provided string to the standard output stream. - # - # @message: string to be printed - # - # Returns: Nothing on success. - # - # Notes: if @message is not provided, the "Hello, world" string will - # be printed instead - # - # Since: <next qemu stable release, eg. 1.0> - ## - { 'command': 'hello-world', 'data': { '*message': 'str' } } - -Please, note that the "Returns" clause is optional if a command doesn't return -any data nor any errors. - - Implementing the HMP command ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Now that the QMP command is in place, we can also make it available in the human monitor (HMP). -With the introduction of the QAPI, HMP commands make QMP calls. Most of the -time HMP commands are simple wrappers. All HMP commands implementation exist in -the monitor/hmp-cmds.c file. +With the introduction of QAPI, HMP commands make QMP calls. Most of the +time HMP commands are simple wrappers. Here's the implementation of the "hello-world" HMP command:: @@ -306,18 +294,20 @@ Here's the implementation of the "hello-world" HMP command:: } } -Also, you have to add the function's prototype to the hmp.h file. +Add it to monitor/hmp-cmds.c. Also, add its prototype to +include/monitor/hmp.h. -There are three important points to be noticed: +There are four important points to be noticed: 1. The "mon" and "qdict" arguments are mandatory for all HMP functions. The former is the monitor object. The latter is how the monitor passes arguments entered by the user to the command implementation -2. hmp_hello_world() performs error checking. In this example we just call +2. We chose not to support the "times" argument in HMP +3. hmp_hello_world() performs error checking. In this example we just call hmp_handle_error() which prints a message to the user, but we could do more, like taking different actions depending on the error qmp_hello_world() returns -3. The "err" variable must be initialized to NULL before performing the +4. The "err" variable must be initialized to NULL before performing the QMP call There's one last step to actually make the command available to monitor users, @@ -340,17 +330,17 @@ To test this you have to open a user monitor and issue the "hello-world" command. It might be instructive to check the command's documentation with HMP's "help" command. -Please, check the "-monitor" command-line option to know how to open a user +Please check the "-monitor" command-line option to know how to open a user monitor. Writing more complex commands ----------------------------- -A QMP command is capable of returning any data the QAPI supports like integers, +A QMP command is capable of returning any data QAPI supports like integers, strings, booleans, enumerations and user defined types. -In this section we will focus on user defined types. Please, check the QAPI +In this section we will focus on user defined types. Please check the QAPI documentation for information about the other types. @@ -372,7 +362,7 @@ data, it is not expected that machines will need to parse the result. The overhead of defining a fine grained QAPI type for the data may not be justified by the potential benefit. In such cases, it is permitted to have a command return a simple string that contains formatted data, -however, it is mandatory for the command to use the 'x-' name prefix. +however, it is mandatory for the command to be marked unstable. This indicates that the command is not guaranteed to be long term stable / liable to change in future and is not following QAPI design best practices. An example where this approach is taken is the QMP @@ -386,302 +376,207 @@ an illustration. User Defined Types ~~~~~~~~~~~~~~~~~~ -FIXME This example needs to be redone after commit 6d32717 +For this example we will write the query-option-roms command, which +returns information about ROMs loaded into the option ROM space. For +more information about it, please check the "-option-rom" command-line +option. -For this example we will write the query-alarm-clock command, which returns -information about QEMU's timer alarm. For more information about it, please -check the "-clock" command-line option. - -We want to return two pieces of information. The first one is the alarm clock's -name. The second one is when the next alarm will fire. The former information is -returned as a string, the latter is an integer in nanoseconds (which is not -very useful in practice, as the timer has probably already fired when the -information reaches the client). - -The best way to return that data is to create a new QAPI type, as shown below:: +For each option ROM, we want to return two pieces of information: the +ROM image's file name, and its bootindex, if any. We need to create a +new QAPI type for that, as shown below:: ## - # @QemuAlarmClock - # - # QEMU alarm clock information. + # @OptionRomInfo: # - # @clock-name: The alarm clock method's name. + # @filename: option ROM image file name # - # @next-deadline: The time (in nanoseconds) the next alarm will fire. + # @bootindex: option ROM's bootindex # - # Since: 1.0 + # Since: 9.0 ## - { 'type': 'QemuAlarmClock', - 'data': { 'clock-name': 'str', '*next-deadline': 'int' } } + { 'struct': 'OptionRomInfo', + 'data': { 'filename': 'str', '*bootindex': 'int' } } -The "type" keyword defines a new QAPI type. Its "data" member contains the -type's members. In this example our members are the "clock-name" and the -"next-deadline" one, which is optional. +The "struct" keyword defines a new QAPI type. Its "data" member +contains the type's members. In this example our members are +"filename" and "bootindex". The latter is optional. -Now let's define the query-alarm-clock command:: +Now let's define the query-option-roms command:: ## - # @query-alarm-clock + # @query-option-roms: # - # Return information about QEMU's alarm clock. + # Query information on ROMs loaded into the option ROM space. # - # Returns a @QemuAlarmClock instance describing the alarm clock method - # being currently used by QEMU (this is usually set by the '-clock' - # command-line option). + # Returns: OptionRomInfo # - # Since: 1.0 + # Since: 9.0 ## - { 'command': 'query-alarm-clock', 'returns': 'QemuAlarmClock' } + { 'command': 'query-option-roms', + 'returns': ['OptionRomInfo'] } Notice the "returns" keyword. As its name suggests, it's used to define the data returned by a command. -It's time to implement the qmp_query_alarm_clock() function, you can put it -in the qemu-timer.c file:: +Notice the syntax ['OptionRomInfo']". This should be read as "returns +a list of OptionRomInfo". - QemuAlarmClock *qmp_query_alarm_clock(Error **errp) - { - QemuAlarmClock *clock; - int64_t deadline; - - clock = g_malloc0(sizeof(*clock)); +It's time to implement the qmp_query_option_roms() function. Add to +monitor/qmp-cmds.c:: - deadline = qemu_next_alarm_deadline(); - if (deadline > 0) { - clock->has_next_deadline = true; - clock->next_deadline = deadline; + OptionRomInfoList *qmp_query_option_roms(Error **errp) + { + OptionRomInfoList *info_list = NULL; + OptionRomInfoList **tailp = &info_list; + OptionRomInfo *info; + + for (int i = 0; i < nb_option_roms; i++) { + info = g_malloc0(sizeof(*info)); + info->filename = g_strdup(option_rom[i].name); + info->has_bootindex = option_rom[i].bootindex >= 0; + if (info->has_bootindex) { + info->bootindex = option_rom[i].bootindex; + } + QAPI_LIST_APPEND(tailp, info); } - clock->clock_name = g_strdup(alarm_timer->name); - return clock; + return info_list; } There are a number of things to be noticed: -1. The QemuAlarmClock type is automatically generated by the QAPI framework, - its members correspond to the type's specification in the schema file -2. As specified in the schema file, the function returns a QemuAlarmClock - instance and takes no arguments (besides the "errp" one, which is mandatory - for all QMP functions) -3. The "clock" variable (which will point to our QAPI type instance) is - allocated by the regular g_malloc0() function. Note that we chose to - initialize the memory to zero. This is recommended for all QAPI types, as - it helps avoiding bad surprises (specially with booleans) -4. Remember that "next_deadline" is optional? Non-pointer optional - members have a 'has_TYPE_NAME' member that should be properly set +1. Type OptionRomInfo is automatically generated by the QAPI framework, + its members correspond to the type's specification in the schema + file +2. Type OptionRomInfoList is also generated. It's a singly linked + list. +3. As specified in the schema file, the function returns a + OptionRomInfoList, and takes no arguments (besides the "errp" one, + which is mandatory for all QMP functions) +4. The returned object is dynamically allocated +5. All strings are dynamically allocated. This is so because QAPI also + generates a function to free its types and it cannot distinguish + between dynamically or statically allocated strings +6. Remember that "bootindex" is optional? As a non-pointer optional + member, it comes with a 'has_bootindex' member that needs to be set by the implementation, as shown above -5. Even static strings, such as "alarm_timer->name", should be dynamically - allocated by the implementation. This is so because the QAPI also generates - a function to free its types and it cannot distinguish between dynamically - or statically allocated strings -6. You have to include "qapi/qapi-commands-misc.h" in qemu-timer.c -Time to test the new command. Build qemu, run it as described in the "Testing" +Time to test the new command. Build QEMU, run it as described in the "Testing" section and try this:: - { "execute": "query-alarm-clock" } + { "execute": "query-option-rom" } { - "return": { - "next-deadline": 2368219, - "clock-name": "dynticks" - } + "return": [ + { + "filename": "kvmvapic.bin" + } + ] } The HMP command ~~~~~~~~~~~~~~~ -Here's the HMP counterpart of the query-alarm-clock command:: +Here's the HMP counterpart of the query-option-roms command:: - void hmp_info_alarm_clock(Monitor *mon) + void hmp_info_option_roms(Monitor *mon, const QDict *qdict) { - QemuAlarmClock *clock; Error *err = NULL; + OptionRomInfoList *info_list, *tail; + OptionRomInfo *info; - clock = qmp_query_alarm_clock(&err); + info_list = qmp_query_option_roms(&err); if (hmp_handle_error(mon, err)) { return; } - monitor_printf(mon, "Alarm clock method in use: '%s'\n", clock->clock_name); - if (clock->has_next_deadline) { - monitor_printf(mon, "Next alarm will fire in %" PRId64 " nanoseconds\n", - clock->next_deadline); + for (tail = info_list; tail; tail = tail->next) { + info = tail->value; + monitor_printf(mon, "%s", info->filename); + if (info->has_bootindex) { + monitor_printf(mon, " %" PRId64, info->bootindex); + } + monitor_printf(mon, "\n"); } - qapi_free_QemuAlarmClock(clock); + qapi_free_OptionRomInfoList(info_list); } -It's important to notice that hmp_info_alarm_clock() calls -qapi_free_QemuAlarmClock() to free the data returned by qmp_query_alarm_clock(). -For user defined types, the QAPI will generate a qapi_free_QAPI_TYPE_NAME() -function and that's what you have to use to free the types you define and -qapi_free_QAPI_TYPE_NAMEList() for list types (explained in the next section). -If the QMP call returns a string, then you should g_free() to free it. - -Also note that hmp_info_alarm_clock() performs error handling. That's not -strictly required if you're sure the QMP function doesn't return errors, but -it's good practice to always check for errors. - -Another important detail is that HMP's "info" commands don't go into the -hmp-commands.hx. Instead, they go into the info_cmds[] table, which is defined -in the monitor/misc.c file. The entry for the "info alarmclock" follows:: - - { - .name = "alarmclock", - .args_type = "", - .params = "", - .help = "show information about the alarm clock", - .cmd = hmp_info_alarm_clock, - }, +It's important to notice that hmp_info_option_roms() calls +qapi_free_OptionRomInfoList() to free the data returned by +qmp_query_option_roms(). For user defined types, QAPI will generate a +qapi_free_QAPI_TYPE_NAME() function, and that's what you have to use to +free the types you define and qapi_free_QAPI_TYPE_NAMEList() for list +types (explained in the next section). If the QMP function returns a +string, then you should g_free() to free it. + +Also note that hmp_info_option_roms() performs error handling. That's +not strictly required when you're sure the QMP function doesn't return +errors; you could instead pass it &error_abort then. + +Another important detail is that HMP's "info" commands go into +hmp-commands-info.hx, not hmp-commands.hx. The entry for the "info +option-roms" follows:: + + { + .name = "option-roms", + .args_type = "", + .params = "", + .help = "show roms", + .cmd = hmp_info_option_roms, + }, + SRST + ``info option-roms`` + Show the option ROMs. + ERST -To test this, run qemu and type "info alarmclock" in the user monitor. +To test this, run QEMU and type "info option-roms" in the user monitor. -Returning Lists -~~~~~~~~~~~~~~~ +Writing a debugging aid returning unstructured text +--------------------------------------------------- -For this example, we're going to return all available methods for the timer -alarm, which is pretty much what the command-line option "-clock ?" does, -except that we're also going to inform which method is in use. +As discussed in section `Modelling data in QAPI`_, it is required that +commands expecting machine usage be using fine-grained QAPI data types. +The exception to this rule applies when the command is solely intended +as a debugging aid and allows for returning unstructured text, such as +a query command that report aspects of QEMU's internal state that are +useful only to human operators. -This first step is to define a new type:: +In this example we will consider the existing QMP command +``x-query-roms`` in qapi/machine.json. It has no parameters and +returns a ``HumanReadableText``:: ## - # @TimerAlarmMethod - # - # Timer alarm method information. + # @x-query-roms: # - # @method-name: The method's name. + # Query information on the registered ROMS # - # @current: true if this alarm method is currently in use, false otherwise + # Features: # - # Since: 1.0 - ## - { 'type': 'TimerAlarmMethod', - 'data': { 'method-name': 'str', 'current': 'bool' } } - -The command will be called "query-alarm-methods", here is its schema -specification:: - - ## - # @query-alarm-methods + # @unstable: This command is meant for debugging. # - # Returns information about available alarm methods. + # Returns: registered ROMs # - # Returns: a list of @TimerAlarmMethod for each method - # - # Since: 1.0 + # Since: 6.2 ## - { 'command': 'query-alarm-methods', 'returns': ['TimerAlarmMethod'] } - -Notice the syntax for returning lists "'returns': ['TimerAlarmMethod']", this -should be read as "returns a list of TimerAlarmMethod instances". - -The C implementation follows:: - - TimerAlarmMethodList *qmp_query_alarm_methods(Error **errp) - { - TimerAlarmMethodList *method_list = NULL; - const struct qemu_alarm_timer *p; - bool current = true; - - for (p = alarm_timers; p->name; p++) { - TimerAlarmMethod *value = g_malloc0(*value); - value->method_name = g_strdup(p->name); - value->current = current; - QAPI_LIST_PREPEND(method_list, value); - current = false; - } - - return method_list; - } - -The most important difference from the previous examples is the -TimerAlarmMethodList type, which is automatically generated by the QAPI from -the TimerAlarmMethod type. - -Each list node is represented by a TimerAlarmMethodList instance. We have to -allocate it, and that's done inside the for loop: the "info" pointer points to -an allocated node. We also have to allocate the node's contents, which is -stored in its "value" member. In our example, the "value" member is a pointer -to an TimerAlarmMethod instance. - -Notice that the "current" variable is used as "true" only in the first -iteration of the loop. That's because the alarm timer method in use is the -first element of the alarm_timers array. Also notice that QAPI lists are handled -by hand and we return the head of the list. - -Now Build qemu, run it as explained in the "Testing" section and try our new -command:: - - { "execute": "query-alarm-methods" } - { - "return": [ - { - "current": false, - "method-name": "unix" - }, - { - "current": true, - "method-name": "dynticks" - } - ] - } - -The HMP counterpart is a bit more complex than previous examples because it -has to traverse the list, it's shown below for reference:: - - void hmp_info_alarm_methods(Monitor *mon) - { - TimerAlarmMethodList *method_list, *method; - Error *err = NULL; - - method_list = qmp_query_alarm_methods(&err); - if (hmp_handle_error(mon, err)) { - return; - } - - for (method = method_list; method; method = method->next) { - monitor_printf(mon, "%c %s\n", method->value->current ? '*' : ' ', - method->value->method_name); - } - - qapi_free_TimerAlarmMethodList(method_list); - } - -Writing a debugging aid returning unstructured text ---------------------------------------------------- - -As discussed in section `Modelling data in QAPI`_, it is required that -commands expecting machine usage be using fine-grained QAPI data types. -The exception to this rule applies when the command is solely intended -as a debugging aid and allows for returning unstructured text. This is -commonly needed for query commands that report aspects of QEMU's -internal state that are useful to human operators. - -In this example we will consider a simplified variant of the HMP -command ``info roms``. Following the earlier rules, this command will -need to live under the ``x-`` name prefix, so its QMP implementation -will be called ``x-query-roms``. It will have no parameters and will -return a single text string:: - - { 'struct': 'HumanReadableText', - 'data': { 'human-readable-text': 'str' } } - { 'command': 'x-query-roms', - 'returns': 'HumanReadableText' } + 'returns': 'HumanReadableText', + 'features': [ 'unstable' ] } -The ``HumanReadableText`` struct is intended to be used for all -commands, under the ``x-`` name prefix that are returning unstructured -text targeted at humans. It should never be used for commands outside -the ``x-`` name prefix, as those should be using structured QAPI types. +The ``HumanReadableText`` struct is defined in qapi/common.json as a +struct with a string member. It is intended to be used for all +commands that are returning unstructured text targeted at +humans. These should all have feature 'unstable'. Note that the +feature's documentation states why the command is unstable. We +commonly use a ``x-`` command name prefix to make lack of stability +obvious to human users. Implementing the QMP command ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The QMP implementation will typically involve creating a ``GString`` -object and printing formatted data into it:: +object and printing formatted data into it, like this:: HumanReadableText *qmp_x_query_roms(Error **errp) { @@ -698,6 +593,9 @@ object and printing formatted data into it:: return human_readable_text_from_str(buf); } +The actual implementation emits more information. You can find it in +hw/core/loader.c. + Implementing the HMP command ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -706,7 +604,7 @@ Now that the QMP command is in place, we can also make it available in the human monitor (HMP) as shown in previous examples. The HMP implementations will all look fairly similar, as all they need do is invoke the QMP command and then print the resulting text or error -message. Here's the implementation of the "info roms" HMP command:: +message. Here's an implementation of the "info roms" HMP command:: void hmp_info_roms(Monitor *mon, const QDict *qdict) { @@ -746,3 +644,5 @@ field NULL:: .help = "show roms", .cmd_info_hrt = qmp_x_query_roms, }, + +This is how the actual HMP command is done. diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h index 3eb79723c6..848915ea5b 100644 --- a/include/exec/ramblock.h +++ b/include/exec/ramblock.h @@ -44,6 +44,19 @@ struct RAMBlock { size_t page_size; /* dirty bitmap used during migration */ unsigned long *bmap; + + /* + * Below fields are only used by mapped-ram migration + */ + /* bitmap of pages present in the migration file */ + unsigned long *file_bmap; + /* + * offset in the file pages belonging to this ramblock are saved, + * used only during migration to a file. + */ + off_t bitmap_offset; + uint64_t pages_offset; + /* bitmap of already received pages in postcopy */ unsigned long *receivedmap; diff --git a/include/io/channel.h b/include/io/channel.h index 5f9dbaab65..7986c49c71 100644 --- a/include/io/channel.h +++ b/include/io/channel.h @@ -44,6 +44,7 @@ enum QIOChannelFeature { QIO_CHANNEL_FEATURE_LISTEN, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, QIO_CHANNEL_FEATURE_READ_MSG_PEEK, + QIO_CHANNEL_FEATURE_SEEKABLE, }; @@ -130,6 +131,16 @@ struct QIOChannelClass { Error **errp); /* Optional callbacks */ + ssize_t (*io_pwritev)(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + off_t offset, + Error **errp); + ssize_t (*io_preadv)(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + off_t offset, + Error **errp); int (*io_shutdown)(QIOChannel *ioc, QIOChannelShutdown how, Error **errp); @@ -529,6 +540,78 @@ int qio_channel_close(QIOChannel *ioc, Error **errp); /** + * qio_channel_pwritev + * @ioc: the channel object + * @iov: the array of memory regions to write data from + * @niov: the length of the @iov array + * @offset: offset in the channel where writes should begin + * @errp: pointer to a NULL-initialized error object + * + * Not all implementations will support this facility, so may report + * an error. To avoid errors, the caller may check for the feature + * flag QIO_CHANNEL_FEATURE_SEEKABLE prior to calling this method. + * + * Behaves as qio_channel_writev_full, apart from not supporting + * sending of file handles as well as beginning the write at the + * passed @offset + * + */ +ssize_t qio_channel_pwritev(QIOChannel *ioc, const struct iovec *iov, + size_t niov, off_t offset, Error **errp); + +/** + * qio_channel_pwrite + * @ioc: the channel object + * @buf: the memory region to write data into + * @buflen: the number of bytes to @buf + * @offset: offset in the channel where writes should begin + * @errp: pointer to a NULL-initialized error object + * + * Not all implementations will support this facility, so may report + * an error. To avoid errors, the caller may check for the feature + * flag QIO_CHANNEL_FEATURE_SEEKABLE prior to calling this method. + * + */ +ssize_t qio_channel_pwrite(QIOChannel *ioc, char *buf, size_t buflen, + off_t offset, Error **errp); + +/** + * qio_channel_preadv + * @ioc: the channel object + * @iov: the array of memory regions to read data into + * @niov: the length of the @iov array + * @offset: offset in the channel where writes should begin + * @errp: pointer to a NULL-initialized error object + * + * Not all implementations will support this facility, so may report + * an error. To avoid errors, the caller may check for the feature + * flag QIO_CHANNEL_FEATURE_SEEKABLE prior to calling this method. + * + * Behaves as qio_channel_readv_full, apart from not supporting + * receiving of file handles as well as beginning the read at the + * passed @offset + * + */ +ssize_t qio_channel_preadv(QIOChannel *ioc, const struct iovec *iov, + size_t niov, off_t offset, Error **errp); + +/** + * qio_channel_pread + * @ioc: the channel object + * @buf: the memory region to write data into + * @buflen: the number of bytes to @buf + * @offset: offset in the channel where writes should begin + * @errp: pointer to a NULL-initialized error object + * + * Not all implementations will support this facility, so may report + * an error. To avoid errors, the caller may check for the feature + * flag QIO_CHANNEL_FEATURE_SEEKABLE prior to calling this method. + * + */ +ssize_t qio_channel_pread(QIOChannel *ioc, char *buf, size_t buflen, + off_t offset, Error **errp); + +/** * qio_channel_shutdown: * @ioc: the channel object * @how: the direction to shutdown diff --git a/include/migration/qemu-file-types.h b/include/migration/qemu-file-types.h index 9ba163f333..adec5abc07 100644 --- a/include/migration/qemu-file-types.h +++ b/include/migration/qemu-file-types.h @@ -50,6 +50,8 @@ unsigned int qemu_get_be16(QEMUFile *f); unsigned int qemu_get_be32(QEMUFile *f); uint64_t qemu_get_be64(QEMUFile *f); +bool qemu_file_is_seekable(QEMUFile *f); + static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv) { qemu_put_be64(f, *pv); diff --git a/include/qapi/type-helpers.h b/include/qapi/type-helpers.h index be1f181526..fc8352cdec 100644 --- a/include/qapi/type-helpers.h +++ b/include/qapi/type-helpers.h @@ -12,3 +12,11 @@ #include "qapi/qapi-types-common.h" HumanReadableText *human_readable_text_from_str(GString *str); + +/* + * Produce and return a NULL-terminated array of strings from @list. + * The result is g_malloc()'d and all strings are g_strdup()'d. It + * can be freed with g_strfreev(), or by g_auto(GStrv) automatic + * cleanup. + */ +char **strv_from_str_list(const strList *list); diff --git a/include/qapi/util.h b/include/qapi/util.h index 81a2b13a33..20dfea8a54 100644 --- a/include/qapi/util.h +++ b/include/qapi/util.h @@ -56,4 +56,17 @@ int parse_qapi_name(const char *name, bool complete); (tail) = &(*(tail))->next; \ } while (0) +/* + * For any GenericList @list, return its length. + */ +#define QAPI_LIST_LENGTH(list) \ + ({ \ + size_t _len = 0; \ + typeof(list) _tail; \ + for (_tail = list; _tail != NULL; _tail = _tail->next) { \ + _len++; \ + } \ + _len; \ + }) + #endif diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h index cb3526d1f4..2c0a2fe751 100644 --- a/include/qemu/bitops.h +++ b/include/qemu/bitops.h @@ -68,6 +68,19 @@ static inline void clear_bit(long nr, unsigned long *addr) } /** + * clear_bit_atomic - Clears a bit in memory atomically + * @nr: Bit to clear + * @addr: Address to start counting from + */ +static inline void clear_bit_atomic(long nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = addr + BIT_WORD(nr); + + return qatomic_and(p, ~mask); +} + +/** * change_bit - Toggle a bit in memory * @nr: Bit to change * @addr: Address to start counting from diff --git a/io/channel-file.c b/io/channel-file.c index 4a12c61886..d4706fa592 100644 --- a/io/channel-file.c +++ b/io/channel-file.c @@ -36,6 +36,10 @@ qio_channel_file_new_fd(int fd) ioc->fd = fd; + if (lseek(fd, 0, SEEK_CUR) != (off_t)-1) { + qio_channel_set_feature(QIO_CHANNEL(ioc), QIO_CHANNEL_FEATURE_SEEKABLE); + } + trace_qio_channel_file_new_fd(ioc, fd); return ioc; @@ -60,6 +64,10 @@ qio_channel_file_new_path(const char *path, return NULL; } + if (lseek(ioc->fd, 0, SEEK_CUR) != (off_t)-1) { + qio_channel_set_feature(QIO_CHANNEL(ioc), QIO_CHANNEL_FEATURE_SEEKABLE); + } + trace_qio_channel_file_new_path(ioc, path, flags, mode, ioc->fd); return ioc; @@ -138,6 +146,58 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc, return ret; } +#ifdef CONFIG_PREADV +static ssize_t qio_channel_file_preadv(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + off_t offset, + Error **errp) +{ + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); + ssize_t ret; + + retry: + ret = preadv(fioc->fd, iov, niov, offset); + if (ret < 0) { + if (errno == EAGAIN) { + return QIO_CHANNEL_ERR_BLOCK; + } + if (errno == EINTR) { + goto retry; + } + + error_setg_errno(errp, errno, "Unable to read from file"); + return -1; + } + + return ret; +} + +static ssize_t qio_channel_file_pwritev(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + off_t offset, + Error **errp) +{ + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); + ssize_t ret; + + retry: + ret = pwritev(fioc->fd, iov, niov, offset); + if (ret <= 0) { + if (errno == EAGAIN) { + return QIO_CHANNEL_ERR_BLOCK; + } + if (errno == EINTR) { + goto retry; + } + error_setg_errno(errp, errno, "Unable to write to file"); + return -1; + } + return ret; +} +#endif /* CONFIG_PREADV */ + static int qio_channel_file_set_blocking(QIOChannel *ioc, bool enabled, Error **errp) @@ -182,6 +242,11 @@ static int qio_channel_file_close(QIOChannel *ioc, { QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); + if (qemu_fdatasync(fioc->fd) < 0) { + error_setg_errno(errp, errno, + "Unable to synchronize file data with storage device"); + return -1; + } if (qemu_close(fioc->fd) < 0) { error_setg_errno(errp, errno, "Unable to close file"); @@ -223,6 +288,10 @@ static void qio_channel_file_class_init(ObjectClass *klass, ioc_klass->io_writev = qio_channel_file_writev; ioc_klass->io_readv = qio_channel_file_readv; ioc_klass->io_set_blocking = qio_channel_file_set_blocking; +#ifdef CONFIG_PREADV + ioc_klass->io_pwritev = qio_channel_file_pwritev; + ioc_klass->io_preadv = qio_channel_file_preadv; +#endif ioc_klass->io_seek = qio_channel_file_seek; ioc_klass->io_close = qio_channel_file_close; ioc_klass->io_create_watch = qio_channel_file_create_watch; diff --git a/io/channel.c b/io/channel.c index 86c5834510..a1f12f8e90 100644 --- a/io/channel.c +++ b/io/channel.c @@ -454,6 +454,64 @@ GSource *qio_channel_add_watch_source(QIOChannel *ioc, } +ssize_t qio_channel_pwritev(QIOChannel *ioc, const struct iovec *iov, + size_t niov, off_t offset, Error **errp) +{ + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + + if (!klass->io_pwritev) { + error_setg(errp, "Channel does not support pwritev"); + return -1; + } + + if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_SEEKABLE)) { + error_setg_errno(errp, EINVAL, "Requested channel is not seekable"); + return -1; + } + + return klass->io_pwritev(ioc, iov, niov, offset, errp); +} + +ssize_t qio_channel_pwrite(QIOChannel *ioc, char *buf, size_t buflen, + off_t offset, Error **errp) +{ + struct iovec iov = { + .iov_base = buf, + .iov_len = buflen + }; + + return qio_channel_pwritev(ioc, &iov, 1, offset, errp); +} + +ssize_t qio_channel_preadv(QIOChannel *ioc, const struct iovec *iov, + size_t niov, off_t offset, Error **errp) +{ + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + + if (!klass->io_preadv) { + error_setg(errp, "Channel does not support preadv"); + return -1; + } + + if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_SEEKABLE)) { + error_setg_errno(errp, EINVAL, "Requested channel is not seekable"); + return -1; + } + + return klass->io_preadv(ioc, iov, niov, offset, errp); +} + +ssize_t qio_channel_pread(QIOChannel *ioc, char *buf, size_t buflen, + off_t offset, Error **errp) +{ + struct iovec iov = { + .iov_base = buf, + .iov_len = buflen + }; + + return qio_channel_preadv(ioc, &iov, 1, offset, errp); +} + int qio_channel_shutdown(QIOChannel *ioc, QIOChannelShutdown how, Error **errp) diff --git a/migration/exec.c b/migration/exec.c index 47d2f3b8fb..20e6cccf8c 100644 --- a/migration/exec.c +++ b/migration/exec.c @@ -18,6 +18,7 @@ */ #include "qemu/osdep.h" +#include "qapi/type-helpers.h" #include "qemu/error-report.h" #include "channel.h" #include "exec.h" @@ -39,51 +40,16 @@ const char *exec_get_cmd_path(void) } #endif -/* provides the length of strList */ -static int -str_list_length(strList *list) -{ - int len = 0; - strList *elem; - - for (elem = list; elem != NULL; elem = elem->next) { - len++; - } - - return len; -} - -static void -init_exec_array(strList *command, char **argv, Error **errp) -{ - int i = 0; - strList *lst; - - for (lst = command; lst; lst = lst->next) { - argv[i++] = lst->value; - } - - argv[i] = NULL; - return; -} - void exec_start_outgoing_migration(MigrationState *s, strList *command, Error **errp) { - QIOChannel *ioc; - - int length = str_list_length(command); - g_auto(GStrv) argv = (char **) g_new0(const char *, length + 1); - - init_exec_array(command, argv, errp); + QIOChannel *ioc = NULL; + g_auto(GStrv) argv = strv_from_str_list(command); + const char * const *args = (const char * const *) argv; g_autofree char *new_command = g_strjoinv(" ", (char **)argv); trace_migration_exec_outgoing(new_command); - ioc = QIO_CHANNEL( - qio_channel_command_new_spawn( - (const char * const *) g_steal_pointer(&argv), - O_RDWR, - errp)); + ioc = QIO_CHANNEL(qio_channel_command_new_spawn(args, O_RDWR, errp)); if (!ioc) { return; } @@ -105,19 +71,12 @@ static gboolean exec_accept_incoming_migration(QIOChannel *ioc, void exec_start_incoming_migration(strList *command, Error **errp) { QIOChannel *ioc; - - int length = str_list_length(command); - g_auto(GStrv) argv = (char **) g_new0(const char *, length + 1); - - init_exec_array(command, argv, errp); + g_auto(GStrv) argv = strv_from_str_list(command); + const char * const *args = (const char * const *) argv; g_autofree char *new_command = g_strjoinv(" ", (char **)argv); trace_migration_exec_incoming(new_command); - ioc = QIO_CHANNEL( - qio_channel_command_new_spawn( - (const char * const *) g_steal_pointer(&argv), - O_RDWR, - errp)); + ioc = QIO_CHANNEL(qio_channel_command_new_spawn(args, O_RDWR, errp)); if (!ioc) { return; } diff --git a/migration/fd.c b/migration/fd.c index 0eb677dcae..d4ae72d132 100644 --- a/migration/fd.c +++ b/migration/fd.c @@ -15,18 +15,41 @@ */ #include "qemu/osdep.h" +#include "qapi/error.h" #include "channel.h" #include "fd.h" #include "migration.h" #include "monitor/monitor.h" +#include "io/channel-file.h" #include "io/channel-util.h" +#include "options.h" #include "trace.h" +static struct FdOutgoingArgs { + int fd; +} outgoing_args; + +int fd_args_get_fd(void) +{ + return outgoing_args.fd; +} + +void fd_cleanup_outgoing_migration(void) +{ + if (outgoing_args.fd > 0) { + close(outgoing_args.fd); + outgoing_args.fd = -1; + } +} + void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp) { QIOChannel *ioc; int fd = monitor_get_fd(monitor_cur(), fdname, errp); + + outgoing_args.fd = -1; + if (fd == -1) { return; } @@ -38,6 +61,8 @@ void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error ** return; } + outgoing_args.fd = fd; + qio_channel_set_name(ioc, "migration-fd-outgoing"); migration_channel_connect(s, ioc, NULL, NULL); object_unref(OBJECT(ioc)); @@ -73,4 +98,23 @@ void fd_start_incoming_migration(const char *fdname, Error **errp) fd_accept_incoming_migration, NULL, NULL, g_main_context_get_thread_default()); + + if (migrate_multifd()) { + int channels = migrate_multifd_channels(); + + while (channels--) { + ioc = QIO_CHANNEL(qio_channel_file_new_fd(dup(fd))); + + if (QIO_CHANNEL_FILE(ioc)->fd == -1) { + error_setg(errp, "Failed to duplicate fd %d", fd); + return; + } + + qio_channel_set_name(ioc, "migration-fd-incoming"); + qio_channel_add_watch_full(ioc, G_IO_IN, + fd_accept_incoming_migration, + NULL, NULL, + g_main_context_get_thread_default()); + } + } } diff --git a/migration/fd.h b/migration/fd.h index b901bc014e..0c0a18d9e7 100644 --- a/migration/fd.h +++ b/migration/fd.h @@ -20,4 +20,6 @@ void fd_start_incoming_migration(const char *fdname, Error **errp); void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp); +void fd_cleanup_outgoing_migration(void); +int fd_args_get_fd(void); #endif diff --git a/migration/file.c b/migration/file.c index 5d4975f43e..164b079966 100644 --- a/migration/file.c +++ b/migration/file.c @@ -6,17 +6,25 @@ */ #include "qemu/osdep.h" +#include "exec/ramblock.h" #include "qemu/cutils.h" +#include "qemu/error-report.h" #include "qapi/error.h" #include "channel.h" +#include "fd.h" #include "file.h" #include "migration.h" #include "io/channel-file.h" #include "io/channel-util.h" +#include "options.h" #include "trace.h" #define OFFSET_OPTION ",offset=" +static struct FileOutgoingArgs { + char *fname; +} outgoing_args; + /* Remove the offset option from @filespec and return it in @offsetp. */ int file_parse_offset(char *filespec, uint64_t *offsetp, Error **errp) @@ -36,6 +44,41 @@ int file_parse_offset(char *filespec, uint64_t *offsetp, Error **errp) return 0; } +void file_cleanup_outgoing_migration(void) +{ + g_free(outgoing_args.fname); + outgoing_args.fname = NULL; +} + +bool file_send_channel_create(gpointer opaque, Error **errp) +{ + QIOChannelFile *ioc; + int flags = O_WRONLY; + bool ret = false; + int fd = fd_args_get_fd(); + + if (fd && fd != -1) { + ioc = qio_channel_file_new_fd(dup(fd)); + } else { + ioc = qio_channel_file_new_path(outgoing_args.fname, flags, 0, errp); + if (!ioc) { + goto out; + } + } + + multifd_channel_connect(opaque, QIO_CHANNEL(ioc)); + ret = true; + +out: + /* + * File channel creation is synchronous. However posting this + * semaphore here is simpler than adding a special case. + */ + multifd_send_channel_created(); + + return ret; +} + void file_start_outgoing_migration(MigrationState *s, FileMigrationArgs *file_args, Error **errp) { @@ -52,6 +95,8 @@ void file_start_outgoing_migration(MigrationState *s, return; } + outgoing_args.fname = g_strdup(filename); + ioc = QIO_CHANNEL(fioc); if (offset && qio_channel_io_seek(ioc, offset, SEEK_SET, errp) < 0) { return; @@ -74,7 +119,8 @@ void file_start_incoming_migration(FileMigrationArgs *file_args, Error **errp) g_autofree char *filename = g_strdup(file_args->filename); QIOChannelFile *fioc = NULL; uint64_t offset = file_args->offset; - QIOChannel *ioc; + int channels = 1; + int i = 0; trace_migration_file_incoming(filename); @@ -83,13 +129,100 @@ void file_start_incoming_migration(FileMigrationArgs *file_args, Error **errp) return; } - ioc = QIO_CHANNEL(fioc); - if (offset && qio_channel_io_seek(ioc, offset, SEEK_SET, errp) < 0) { + if (offset && + qio_channel_io_seek(QIO_CHANNEL(fioc), offset, SEEK_SET, errp) < 0) { return; } - qio_channel_set_name(QIO_CHANNEL(ioc), "migration-file-incoming"); - qio_channel_add_watch_full(ioc, G_IO_IN, - file_accept_incoming_migration, - NULL, NULL, - g_main_context_get_thread_default()); + + if (migrate_multifd()) { + channels += migrate_multifd_channels(); + } + + do { + QIOChannel *ioc = QIO_CHANNEL(fioc); + + qio_channel_set_name(ioc, "migration-file-incoming"); + qio_channel_add_watch_full(ioc, G_IO_IN, + file_accept_incoming_migration, + NULL, NULL, + g_main_context_get_thread_default()); + + fioc = qio_channel_file_new_fd(dup(fioc->fd)); + + if (!fioc || fioc->fd == -1) { + error_setg(errp, "Error creating migration incoming channel"); + break; + } + } while (++i < channels); +} + +int file_write_ramblock_iov(QIOChannel *ioc, const struct iovec *iov, + int niov, RAMBlock *block, Error **errp) +{ + ssize_t ret = -1; + int i, slice_idx, slice_num; + uintptr_t base, next, offset; + size_t len; + + slice_idx = 0; + slice_num = 1; + + /* + * If the iov array doesn't have contiguous elements, we need to + * split it in slices because we only have one file offset for the + * whole iov. Do this here so callers don't need to break the iov + * array themselves. + */ + for (i = 0; i < niov; i++, slice_num++) { + base = (uintptr_t) iov[i].iov_base; + + if (i != niov - 1) { + len = iov[i].iov_len; + next = (uintptr_t) iov[i + 1].iov_base; + + if (base + len == next) { + continue; + } + } + + /* + * Use the offset of the first element of the segment that + * we're sending. + */ + offset = (uintptr_t) iov[slice_idx].iov_base - (uintptr_t) block->host; + if (offset >= block->used_length) { + error_setg(errp, "offset " RAM_ADDR_FMT + "outside of ramblock %s range", offset, block->idstr); + ret = -1; + break; + } + + ret = qio_channel_pwritev(ioc, &iov[slice_idx], slice_num, + block->pages_offset + offset, errp); + if (ret < 0) { + break; + } + + slice_idx += slice_num; + slice_num = 0; + } + + return (ret < 0) ? ret : 0; +} + +int multifd_file_recv_data(MultiFDRecvParams *p, Error **errp) +{ + MultiFDRecvData *data = p->data; + size_t ret; + + ret = qio_channel_pread(p->c, (char *) data->opaque, + data->size, data->file_offset, errp); + if (ret != data->size) { + error_prepend(errp, + "multifd recv (%u): read 0x%zx, expected 0x%zx", + p->id, ret, data->size); + return -1; + } + + return 0; } diff --git a/migration/file.h b/migration/file.h index 37d6a08bfc..9f71e87f74 100644 --- a/migration/file.h +++ b/migration/file.h @@ -9,10 +9,18 @@ #define QEMU_MIGRATION_FILE_H #include "qapi/qapi-types-migration.h" +#include "io/task.h" +#include "channel.h" +#include "multifd.h" void file_start_incoming_migration(FileMigrationArgs *file_args, Error **errp); void file_start_outgoing_migration(MigrationState *s, FileMigrationArgs *file_args, Error **errp); int file_parse_offset(char *filespec, uint64_t *offsetp, Error **errp); +void file_cleanup_outgoing_migration(void); +bool file_send_channel_create(gpointer opaque, Error **errp); +int file_write_ramblock_iov(QIOChannel *ioc, const struct iovec *iov, + int niov, RAMBlock *block, Error **errp); +int multifd_file_recv_data(MultiFDRecvParams *p, Error **errp); #endif diff --git a/migration/migration.c b/migration/migration.c index bab68bcbef..a49fcd53ee 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -140,9 +140,38 @@ static bool transport_supports_multi_channels(MigrationAddress *addr) if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { SocketAddress *saddr = &addr->u.socket; - return saddr->type == SOCKET_ADDRESS_TYPE_INET || - saddr->type == SOCKET_ADDRESS_TYPE_UNIX || - saddr->type == SOCKET_ADDRESS_TYPE_VSOCK; + if (saddr->type == SOCKET_ADDRESS_TYPE_FD) { + return migrate_mapped_ram(); + } + + return (saddr->type == SOCKET_ADDRESS_TYPE_INET || + saddr->type == SOCKET_ADDRESS_TYPE_UNIX || + saddr->type == SOCKET_ADDRESS_TYPE_VSOCK); + } else if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { + return migrate_mapped_ram(); + } else { + return false; + } +} + +static bool migration_needs_seekable_channel(void) +{ + return migrate_mapped_ram(); +} + +static bool transport_supports_seeking(MigrationAddress *addr) +{ + if (addr->transport == MIGRATION_ADDRESS_TYPE_FILE) { + return true; + } + + /* + * At this point, the user might not yet have passed the file + * descriptor to QEMU, so we cannot know for sure whether it + * refers to a plain file or a socket. Let it through anyway. + */ + if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { + return addr->u.socket.type == SOCKET_ADDRESS_TYPE_FD; } return false; @@ -152,6 +181,12 @@ static bool migration_channels_and_transport_compatible(MigrationAddress *addr, Error **errp) { + if (migration_needs_seekable_channel() && + !transport_supports_seeking(addr)) { + error_setg(errp, "Migration requires seekable transport (e.g. file)"); + return false; + } + if (migration_needs_multiple_sockets() && !transport_supports_multi_channels(addr)) { error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)"); @@ -881,7 +916,8 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) uint32_t channel_magic = 0; int ret = 0; - if (migrate_multifd() && !migrate_postcopy_ram() && + if (migrate_multifd() && !migrate_mapped_ram() && + !migrate_postcopy_ram() && qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { /* * With multiple channels, it is possible that we receive channels @@ -1950,6 +1986,18 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, return false; } + if (migrate_mapped_ram()) { + if (migrate_tls()) { + error_setg(errp, "Cannot use TLS with mapped-ram"); + return false; + } + + if (migrate_multifd_compression()) { + error_setg(errp, "Cannot use compression with mapped-ram"); + return false; + } + } + if (migrate_mode_is_cpr(s)) { const char *conflict = NULL; diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c index 012e3bdea1..6120faad65 100644 --- a/migration/multifd-zlib.c +++ b/migration/multifd-zlib.c @@ -69,7 +69,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) err_msg = "out of memory for buf"; goto err_free_zbuff; } - p->data = z; + p->compress_data = z; return 0; err_free_zbuff: @@ -92,15 +92,15 @@ err_free_z: */ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) { - struct zlib_data *z = p->data; + struct zlib_data *z = p->compress_data; deflateEnd(&z->zs); g_free(z->zbuff); z->zbuff = NULL; g_free(z->buf); z->buf = NULL; - g_free(p->data); - p->data = NULL; + g_free(p->compress_data); + p->compress_data = NULL; } /** @@ -117,7 +117,7 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) { MultiFDPages_t *pages = p->pages; - struct zlib_data *z = p->data; + struct zlib_data *z = p->compress_data; z_stream *zs = &z->zs; uint32_t out_size = 0; int ret; @@ -194,7 +194,7 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) struct zlib_data *z = g_new0(struct zlib_data, 1); z_stream *zs = &z->zs; - p->data = z; + p->compress_data = z; zs->zalloc = Z_NULL; zs->zfree = Z_NULL; zs->opaque = Z_NULL; @@ -224,17 +224,17 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) */ static void zlib_recv_cleanup(MultiFDRecvParams *p) { - struct zlib_data *z = p->data; + struct zlib_data *z = p->compress_data; inflateEnd(&z->zs); g_free(z->zbuff); z->zbuff = NULL; - g_free(p->data); - p->data = NULL; + g_free(p->compress_data); + p->compress_data = NULL; } /** - * zlib_recv_pages: read the data from the channel into actual pages + * zlib_recv: read the data from the channel into actual pages * * Read the compressed buffer, and uncompress it into the actual * pages. @@ -244,9 +244,9 @@ static void zlib_recv_cleanup(MultiFDRecvParams *p) * @p: Params for the channel that we are using * @errp: pointer to an error */ -static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) +static int zlib_recv(MultiFDRecvParams *p, Error **errp) { - struct zlib_data *z = p->data; + struct zlib_data *z = p->compress_data; z_stream *zs = &z->zs; uint32_t in_size = p->next_packet_size; /* we measure the change of total_out */ @@ -319,7 +319,7 @@ static MultiFDMethods multifd_zlib_ops = { .send_prepare = zlib_send_prepare, .recv_setup = zlib_recv_setup, .recv_cleanup = zlib_recv_cleanup, - .recv_pages = zlib_recv_pages + .recv = zlib_recv }; static void multifd_zlib_register(void) diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c index dc8fe43e94..cac236833d 100644 --- a/migration/multifd-zstd.c +++ b/migration/multifd-zstd.c @@ -52,7 +52,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) struct zstd_data *z = g_new0(struct zstd_data, 1); int res; - p->data = z; + p->compress_data = z; z->zcs = ZSTD_createCStream(); if (!z->zcs) { g_free(z); @@ -90,14 +90,14 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) */ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) { - struct zstd_data *z = p->data; + struct zstd_data *z = p->compress_data; ZSTD_freeCStream(z->zcs); z->zcs = NULL; g_free(z->zbuff); z->zbuff = NULL; - g_free(p->data); - p->data = NULL; + g_free(p->compress_data); + p->compress_data = NULL; } /** @@ -114,7 +114,7 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) { MultiFDPages_t *pages = p->pages; - struct zstd_data *z = p->data; + struct zstd_data *z = p->compress_data; int ret; uint32_t i; @@ -183,7 +183,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) struct zstd_data *z = g_new0(struct zstd_data, 1); int ret; - p->data = z; + p->compress_data = z; z->zds = ZSTD_createDStream(); if (!z->zds) { g_free(z); @@ -221,18 +221,18 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) */ static void zstd_recv_cleanup(MultiFDRecvParams *p) { - struct zstd_data *z = p->data; + struct zstd_data *z = p->compress_data; ZSTD_freeDStream(z->zds); z->zds = NULL; g_free(z->zbuff); z->zbuff = NULL; - g_free(p->data); - p->data = NULL; + g_free(p->compress_data); + p->compress_data = NULL; } /** - * zstd_recv_pages: read the data from the channel into actual pages + * zstd_recv: read the data from the channel into actual pages * * Read the compressed buffer, and uncompress it into the actual * pages. @@ -242,13 +242,13 @@ static void zstd_recv_cleanup(MultiFDRecvParams *p) * @p: Params for the channel that we are using * @errp: pointer to an error */ -static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) +static int zstd_recv(MultiFDRecvParams *p, Error **errp) { uint32_t in_size = p->next_packet_size; uint32_t out_size = 0; uint32_t expected_size = p->normal_num * p->page_size; uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; - struct zstd_data *z = p->data; + struct zstd_data *z = p->compress_data; int ret; int i; @@ -310,7 +310,7 @@ static MultiFDMethods multifd_zstd_ops = { .send_prepare = zstd_send_prepare, .recv_setup = zstd_recv_setup, .recv_cleanup = zstd_recv_cleanup, - .recv_pages = zstd_recv_pages + .recv = zstd_recv }; static void multifd_zstd_register(void) diff --git a/migration/multifd.c b/migration/multifd.c index 6c07f19af1..d4a44da559 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -17,7 +17,8 @@ #include "exec/ramblock.h" #include "qemu/error-report.h" #include "qapi/error.h" -#include "ram.h" +#include "fd.h" +#include "file.h" #include "migration.h" #include "migration-stats.h" #include "socket.h" @@ -28,6 +29,7 @@ #include "threadinfo.h" #include "options.h" #include "qemu/yank.h" +#include "io/channel-file.h" #include "io/channel-socket.h" #include "yank_functions.h" @@ -81,9 +83,13 @@ struct { struct { MultiFDRecvParams *params; + MultiFDRecvData *data; /* number of created threads */ int count; - /* syncs main thread and channels */ + /* + * This is always posted by the recv threads, the migration thread + * uses it to wait for recv threads to finish assigned tasks. + */ QemuSemaphore sem_sync; /* global number of generated multifd packets */ uint64_t packet_num; @@ -92,6 +98,27 @@ struct { MultiFDMethods *ops; } *multifd_recv_state; +static bool multifd_use_packets(void) +{ + return !migrate_mapped_ram(); +} + +void multifd_send_channel_created(void) +{ + qemu_sem_post(&multifd_send_state->channels_created); +} + +static void multifd_set_file_bitmap(MultiFDSendParams *p) +{ + MultiFDPages_t *pages = p->pages; + + assert(pages->block); + + for (int i = 0; i < p->pages->num; i++) { + ramblock_set_file_bmap_atomic(pages->block, pages->offset[i]); + } +} + /* Multifd without compression */ /** @@ -122,6 +149,19 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) return; } +static void multifd_send_prepare_iovs(MultiFDSendParams *p) +{ + MultiFDPages_t *pages = p->pages; + + for (int i = 0; i < pages->num; i++) { + p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; + p->iov[p->iovs_num].iov_len = p->page_size; + p->iovs_num++; + } + + p->next_packet_size = pages->num * p->page_size; +} + /** * nocomp_send_prepare: prepare date to be able to send * @@ -136,9 +176,15 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) { bool use_zero_copy_send = migrate_zero_copy_send(); - MultiFDPages_t *pages = p->pages; int ret; + if (!multifd_use_packets()) { + multifd_send_prepare_iovs(p); + multifd_set_file_bitmap(p); + + return 0; + } + if (!use_zero_copy_send) { /* * Only !zerocopy needs the header in IOV; zerocopy will @@ -147,13 +193,7 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) multifd_send_prepare_header(p); } - for (int i = 0; i < pages->num; i++) { - p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; - p->iov[p->iovs_num].iov_len = p->page_size; - p->iovs_num++; - } - - p->next_packet_size = pages->num * p->page_size; + multifd_send_prepare_iovs(p); p->flags |= MULTIFD_FLAG_NOCOMP; multifd_send_fill_packet(p); @@ -197,7 +237,7 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) } /** - * nocomp_recv_pages: read the data from the channel into actual pages + * nocomp_recv: read the data from the channel * * For no compression we just need to read things into the correct place. * @@ -206,9 +246,15 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) * @p: Params for the channel that we are using * @errp: pointer to an error */ -static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp) +static int nocomp_recv(MultiFDRecvParams *p, Error **errp) { - uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + uint32_t flags; + + if (!multifd_use_packets()) { + return multifd_file_recv_data(p, errp); + } + + flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; if (flags != MULTIFD_FLAG_NOCOMP) { error_setg(errp, "multifd %u: flags received %x flags expected %x", @@ -228,7 +274,7 @@ static MultiFDMethods multifd_nocomp_ops = { .send_prepare = nocomp_send_prepare, .recv_setup = nocomp_recv_setup, .recv_cleanup = nocomp_recv_cleanup, - .recv_pages = nocomp_recv_pages + .recv = nocomp_recv }; static MultiFDMethods *multifd_ops[MULTIFD_COMPRESSION__MAX] = { @@ -663,6 +709,19 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) { if (p->c) { migration_ioc_unregister_yank(p->c); + /* + * An explicit close() on the channel here is normally not + * required, but can be helpful for "file:" iochannels, where it + * will include fdatasync() to make sure the data is flushed to the + * disk backend. + * + * The object_unref() cannot guarantee that because: (1) finalize() + * of the iochannel is only triggered on the last reference, and + * it's not guaranteed that we always hold the last refcount when + * reaching here, and, (2) even if finalize() is invoked, it only + * does a close(fd) without data flush. + */ + qio_channel_close(p->c, &error_abort); object_unref(OBJECT(p->c)); p->c = NULL; } @@ -684,6 +743,8 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) static void multifd_send_cleanup_state(void) { + file_cleanup_outgoing_migration(); + fd_cleanup_outgoing_migration(); socket_cleanup_outgoing_migration(); qemu_sem_destroy(&multifd_send_state->channels_created); qemu_sem_destroy(&multifd_send_state->channels_ready); @@ -795,15 +856,18 @@ static void *multifd_send_thread(void *opaque) MigrationThread *thread = NULL; Error *local_err = NULL; int ret = 0; + bool use_packets = multifd_use_packets(); thread = migration_threads_add(p->name, qemu_get_thread_id()); trace_multifd_send_thread_start(p->id); rcu_register_thread(); - if (multifd_send_initial_packet(p, &local_err) < 0) { - ret = -1; - goto out; + if (use_packets) { + if (multifd_send_initial_packet(p, &local_err) < 0) { + ret = -1; + goto out; + } } while (true) { @@ -829,8 +893,15 @@ static void *multifd_send_thread(void *opaque) break; } - ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, - 0, p->write_flags, &local_err); + if (migrate_mapped_ram()) { + ret = file_write_ramblock_iov(p->c, p->iov, p->iovs_num, + p->pages->block, &local_err); + } else { + ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, + NULL, 0, p->write_flags, + &local_err); + } + if (ret != 0) { break; } @@ -854,16 +925,20 @@ static void *multifd_send_thread(void *opaque) * it doesn't require explicit memory barriers. */ assert(qatomic_read(&p->pending_sync)); - p->flags = MULTIFD_FLAG_SYNC; - multifd_send_fill_packet(p); - ret = qio_channel_write_all(p->c, (void *)p->packet, - p->packet_len, &local_err); - if (ret != 0) { - break; + + if (use_packets) { + p->flags = MULTIFD_FLAG_SYNC; + multifd_send_fill_packet(p); + ret = qio_channel_write_all(p->c, (void *)p->packet, + p->packet_len, &local_err); + if (ret != 0) { + break; + } + /* p->next_packet_size will always be zero for a SYNC packet */ + stat64_add(&mig_stats.multifd_bytes, p->packet_len); + p->flags = 0; } - /* p->next_packet_size will always be zero for a SYNC packet */ - stat64_add(&mig_stats.multifd_bytes, p->packet_len); - p->flags = 0; + qatomic_set(&p->pending_sync, false); qemu_sem_post(&p->sem_sync); } @@ -939,7 +1014,7 @@ static bool multifd_tls_channel_connect(MultiFDSendParams *p, return true; } -static void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc) +void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc) { qio_channel_set_delay(ioc, false); @@ -990,7 +1065,7 @@ out: * Here we're not interested whether creation succeeded, only that * it happened at all. */ - qemu_sem_post(&multifd_send_state->channels_created); + multifd_send_channel_created(); if (ret) { return; @@ -1007,9 +1082,14 @@ out: error_free(local_err); } -static void multifd_new_send_channel_create(gpointer opaque) +static bool multifd_new_send_channel_create(gpointer opaque, Error **errp) { + if (!multifd_use_packets()) { + return file_send_channel_create(opaque, errp); + } + socket_send_channel_create(multifd_new_send_channel_async, opaque); + return true; } bool multifd_send_setup(void) @@ -1018,6 +1098,7 @@ bool multifd_send_setup(void) Error *local_err = NULL; int thread_count, ret = 0; uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + bool use_packets = multifd_use_packets(); uint8_t i; if (!migrate_multifd()) { @@ -1040,18 +1121,27 @@ bool multifd_send_setup(void) qemu_sem_init(&p->sem_sync, 0); p->id = i; p->pages = multifd_pages_init(page_count); - p->packet_len = sizeof(MultiFDPacket_t) - + sizeof(uint64_t) * page_count; - p->packet = g_malloc0(p->packet_len); - p->packet->magic = cpu_to_be32(MULTIFD_MAGIC); - p->packet->version = cpu_to_be32(MULTIFD_VERSION); + + if (use_packets) { + p->packet_len = sizeof(MultiFDPacket_t) + + sizeof(uint64_t) * page_count; + p->packet = g_malloc0(p->packet_len); + p->packet->magic = cpu_to_be32(MULTIFD_MAGIC); + p->packet->version = cpu_to_be32(MULTIFD_VERSION); + + /* We need one extra place for the packet header */ + p->iov = g_new0(struct iovec, page_count + 1); + } else { + p->iov = g_new0(struct iovec, page_count); + } p->name = g_strdup_printf("multifdsend_%d", i); - /* We need one extra place for the packet header */ - p->iov = g_new0(struct iovec, page_count + 1); p->page_size = qemu_target_page_size(); p->page_count = page_count; p->write_flags = 0; - multifd_new_send_channel_create(p); + + if (!multifd_new_send_channel_create(p, &local_err)) { + return false; + } } /* @@ -1083,6 +1173,57 @@ bool multifd_send_setup(void) return true; } +bool multifd_recv(void) +{ + int i; + static int next_recv_channel; + MultiFDRecvParams *p = NULL; + MultiFDRecvData *data = multifd_recv_state->data; + + /* + * next_channel can remain from a previous migration that was + * using more channels, so ensure it doesn't overflow if the + * limit is lower now. + */ + next_recv_channel %= migrate_multifd_channels(); + for (i = next_recv_channel;; i = (i + 1) % migrate_multifd_channels()) { + if (multifd_recv_should_exit()) { + return false; + } + + p = &multifd_recv_state->params[i]; + + if (qatomic_read(&p->pending_job) == false) { + next_recv_channel = (i + 1) % migrate_multifd_channels(); + break; + } + } + + /* + * Order pending_job read before manipulating p->data below. Pairs + * with qatomic_store_release() at multifd_recv_thread(). + */ + smp_mb_acquire(); + + assert(!p->data->size); + multifd_recv_state->data = p->data; + p->data = data; + + /* + * Order p->data update before setting pending_job. Pairs with + * qatomic_load_acquire() at multifd_recv_thread(). + */ + qatomic_store_release(&p->pending_job, true); + qemu_sem_post(&p->sem); + + return true; +} + +MultiFDRecvData *multifd_get_recv_data(void) +{ + return multifd_recv_state->data; +} + static void multifd_recv_terminate_threads(Error *err) { int i; @@ -1107,10 +1248,27 @@ static void multifd_recv_terminate_threads(Error *err) MultiFDRecvParams *p = &multifd_recv_state->params[i]; /* - * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code, - * however try to wakeup it without harm in cleanup phase. + * The migration thread and channels interact differently + * depending on the presence of packets. */ - qemu_sem_post(&p->sem_sync); + if (multifd_use_packets()) { + /* + * The channel receives as long as there are packets. When + * packets end (i.e. MULTIFD_FLAG_SYNC is reached), the + * channel waits for the migration thread to sync. If the + * sync never happens, do it here. + */ + qemu_sem_post(&p->sem_sync); + } else { + /* + * The channel waits for the migration thread to give it + * work. When the migration thread runs out of work, it + * releases the channel and waits for any pending work to + * finish. If we reach here (e.g. due to error) before the + * work runs out, release the channel. + */ + qemu_sem_post(&p->sem); + } /* * We could arrive here for two reasons: @@ -1138,6 +1296,7 @@ static void multifd_recv_cleanup_channel(MultiFDRecvParams *p) p->c = NULL; qemu_mutex_destroy(&p->mutex); qemu_sem_destroy(&p->sem_sync); + qemu_sem_destroy(&p->sem); g_free(p->name); p->name = NULL; p->packet_len = 0; @@ -1155,6 +1314,8 @@ static void multifd_recv_cleanup_state(void) qemu_sem_destroy(&multifd_recv_state->sem_sync); g_free(multifd_recv_state->params); multifd_recv_state->params = NULL; + g_free(multifd_recv_state->data); + multifd_recv_state->data = NULL; g_free(multifd_recv_state); multifd_recv_state = NULL; } @@ -1182,18 +1343,53 @@ void multifd_recv_cleanup(void) void multifd_recv_sync_main(void) { + int thread_count = migrate_multifd_channels(); + bool file_based = !multifd_use_packets(); int i; if (!migrate_multifd()) { return; } - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDRecvParams *p = &multifd_recv_state->params[i]; - trace_multifd_recv_sync_main_wait(p->id); + /* + * File-based channels don't use packets and therefore need to + * wait for more work. Release them to start the sync. + */ + if (file_based) { + for (i = 0; i < thread_count; i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + + trace_multifd_recv_sync_main_signal(p->id); + qemu_sem_post(&p->sem); + } + } + + /* + * Initiate the synchronization by waiting for all channels. + * + * For socket-based migration this means each channel has received + * the SYNC packet on the stream. + * + * For file-based migration this means each channel is done with + * the work (pending_job=false). + */ + for (i = 0; i < thread_count; i++) { + trace_multifd_recv_sync_main_wait(i); qemu_sem_wait(&multifd_recv_state->sem_sync); } - for (i = 0; i < migrate_multifd_channels(); i++) { + + if (file_based) { + /* + * For file-based loading is done in one iteration. We're + * done. + */ + return; + } + + /* + * Sync done. Release the channels for the next iteration. + */ + for (i = 0; i < thread_count; i++) { MultiFDRecvParams *p = &multifd_recv_state->params[i]; WITH_QEMU_LOCK_GUARD(&p->mutex) { @@ -1211,46 +1407,87 @@ static void *multifd_recv_thread(void *opaque) { MultiFDRecvParams *p = opaque; Error *local_err = NULL; + bool use_packets = multifd_use_packets(); int ret; trace_multifd_recv_thread_start(p->id); rcu_register_thread(); while (true) { - uint32_t flags; + uint32_t flags = 0; + bool has_data = false; + p->normal_num = 0; - if (multifd_recv_should_exit()) { - break; - } + if (use_packets) { + if (multifd_recv_should_exit()) { + break; + } - ret = qio_channel_read_all_eof(p->c, (void *)p->packet, - p->packet_len, &local_err); - if (ret == 0 || ret == -1) { /* 0: EOF -1: Error */ - break; - } + ret = qio_channel_read_all_eof(p->c, (void *)p->packet, + p->packet_len, &local_err); + if (ret == 0 || ret == -1) { /* 0: EOF -1: Error */ + break; + } - qemu_mutex_lock(&p->mutex); - ret = multifd_recv_unfill_packet(p, &local_err); - if (ret) { + qemu_mutex_lock(&p->mutex); + ret = multifd_recv_unfill_packet(p, &local_err); + if (ret) { + qemu_mutex_unlock(&p->mutex); + break; + } + + flags = p->flags; + /* recv methods don't know how to handle the SYNC flag */ + p->flags &= ~MULTIFD_FLAG_SYNC; + has_data = !!p->normal_num; qemu_mutex_unlock(&p->mutex); - break; - } + } else { + /* + * No packets, so we need to wait for the vmstate code to + * give us work. + */ + qemu_sem_wait(&p->sem); + + if (multifd_recv_should_exit()) { + break; + } + + /* pairs with qatomic_store_release() at multifd_recv() */ + if (!qatomic_load_acquire(&p->pending_job)) { + /* + * Migration thread did not send work, this is + * equivalent to pending_sync on the sending + * side. Post sem_sync to notify we reached this + * point. + */ + qemu_sem_post(&multifd_recv_state->sem_sync); + continue; + } - flags = p->flags; - /* recv methods don't know how to handle the SYNC flag */ - p->flags &= ~MULTIFD_FLAG_SYNC; - qemu_mutex_unlock(&p->mutex); + has_data = !!p->data->size; + } - if (p->normal_num) { - ret = multifd_recv_state->ops->recv_pages(p, &local_err); + if (has_data) { + ret = multifd_recv_state->ops->recv(p, &local_err); if (ret != 0) { break; } } - if (flags & MULTIFD_FLAG_SYNC) { - qemu_sem_post(&multifd_recv_state->sem_sync); - qemu_sem_wait(&p->sem_sync); + if (use_packets) { + if (flags & MULTIFD_FLAG_SYNC) { + qemu_sem_post(&multifd_recv_state->sem_sync); + qemu_sem_wait(&p->sem_sync); + } + } else { + p->total_normal_pages += p->data->size / qemu_target_page_size(); + p->data->size = 0; + /* + * Order data->size update before clearing + * pending_job. Pairs with smp_mb_acquire() at + * multifd_recv(). + */ + qatomic_store_release(&p->pending_job, false); } } @@ -1269,6 +1506,7 @@ int multifd_recv_setup(Error **errp) { int thread_count; uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + bool use_packets = multifd_use_packets(); uint8_t i; /* @@ -1282,6 +1520,10 @@ int multifd_recv_setup(Error **errp) thread_count = migrate_multifd_channels(); multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state)); multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count); + + multifd_recv_state->data = g_new0(MultiFDRecvData, 1); + multifd_recv_state->data->size = 0; + qatomic_set(&multifd_recv_state->count, 0); qatomic_set(&multifd_recv_state->exiting, 0); qemu_sem_init(&multifd_recv_state->sem_sync, 0); @@ -1292,10 +1534,18 @@ int multifd_recv_setup(Error **errp) qemu_mutex_init(&p->mutex); qemu_sem_init(&p->sem_sync, 0); + qemu_sem_init(&p->sem, 0); + p->pending_job = false; p->id = i; - p->packet_len = sizeof(MultiFDPacket_t) - + sizeof(uint64_t) * page_count; - p->packet = g_malloc0(p->packet_len); + + p->data = g_new0(MultiFDRecvData, 1); + p->data->size = 0; + + if (use_packets) { + p->packet_len = sizeof(MultiFDPacket_t) + + sizeof(uint64_t) * page_count; + p->packet = g_malloc0(p->packet_len); + } p->name = g_strdup_printf("multifdrecv_%d", i); p->iov = g_new0(struct iovec, page_count); p->normal = g_new0(ram_addr_t, page_count); @@ -1339,18 +1589,23 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) { MultiFDRecvParams *p; Error *local_err = NULL; + bool use_packets = multifd_use_packets(); int id; - id = multifd_recv_initial_packet(ioc, &local_err); - if (id < 0) { - multifd_recv_terminate_threads(local_err); - error_propagate_prepend(errp, local_err, - "failed to receive packet" - " via multifd channel %d: ", - qatomic_read(&multifd_recv_state->count)); - return; + if (use_packets) { + id = multifd_recv_initial_packet(ioc, &local_err); + if (id < 0) { + multifd_recv_terminate_threads(local_err); + error_propagate_prepend(errp, local_err, + "failed to receive packet" + " via multifd channel %d: ", + qatomic_read(&multifd_recv_state->count)); + return; + } + trace_multifd_recv_new_channel(id); + } else { + id = qatomic_read(&multifd_recv_state->count); } - trace_multifd_recv_new_channel(id); p = &multifd_recv_state->params[id]; if (p->c != NULL) { diff --git a/migration/multifd.h b/migration/multifd.h index b3fe27ae93..7447c2bea3 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -13,8 +13,13 @@ #ifndef QEMU_MIGRATION_MULTIFD_H #define QEMU_MIGRATION_MULTIFD_H +#include "ram.h" + +typedef struct MultiFDRecvData MultiFDRecvData; + bool multifd_send_setup(void); void multifd_send_shutdown(void); +void multifd_send_channel_created(void); int multifd_recv_setup(Error **errp); void multifd_recv_cleanup(void); void multifd_recv_shutdown(void); @@ -23,6 +28,8 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); void multifd_recv_sync_main(void); int multifd_send_sync_main(void); bool multifd_queue_page(RAMBlock *block, ram_addr_t offset); +bool multifd_recv(void); +MultiFDRecvData *multifd_get_recv_data(void); /* Multifd Compression flags */ #define MULTIFD_FLAG_SYNC (1 << 0) @@ -63,6 +70,13 @@ typedef struct { RAMBlock *block; } MultiFDPages_t; +struct MultiFDRecvData { + void *opaque; + size_t size; + /* for preadv */ + off_t file_offset; +}; + typedef struct { /* Fields are only written at creating/deletion time */ /* No lock required for them, they are read only */ @@ -127,7 +141,7 @@ typedef struct { /* number of iovs used */ uint32_t iovs_num; /* used for compression methods */ - void *data; + void *compress_data; } MultiFDSendParams; typedef struct { @@ -152,6 +166,8 @@ typedef struct { /* syncs main thread and channels */ QemuSemaphore sem_sync; + /* sem where to wait for more work */ + QemuSemaphore sem; /* this mutex protects the following parameters */ QemuMutex mutex; @@ -161,6 +177,8 @@ typedef struct { uint32_t flags; /* global number of generated multifd packets */ uint64_t packet_num; + int pending_job; + MultiFDRecvData *data; /* thread local variables. No locking required */ @@ -183,7 +201,7 @@ typedef struct { /* num of non zero pages */ uint32_t normal_num; /* used for de-compression methods */ - void *data; + void *compress_data; } MultiFDRecvParams; typedef struct { @@ -197,8 +215,8 @@ typedef struct { int (*recv_setup)(MultiFDRecvParams *p, Error **errp); /* Cleanup for receiving side */ void (*recv_cleanup)(MultiFDRecvParams *p); - /* Read all pages */ - int (*recv_pages)(MultiFDRecvParams *p, Error **errp); + /* Read all data */ + int (*recv)(MultiFDRecvParams *p, Error **errp); } MultiFDMethods; void multifd_register_ops(int method, MultiFDMethods *ops); @@ -211,5 +229,6 @@ static inline void multifd_send_prepare_header(MultiFDSendParams *p) p->iovs_num++; } +void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc); #endif diff --git a/migration/options.c b/migration/options.c index 3e3e0b93b4..40eb930940 100644 --- a/migration/options.c +++ b/migration/options.c @@ -204,6 +204,7 @@ Property migration_properties[] = { DEFINE_PROP_MIG_CAP("x-switchover-ack", MIGRATION_CAPABILITY_SWITCHOVER_ACK), DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT), + DEFINE_PROP_MIG_CAP("mapped-ram", MIGRATION_CAPABILITY_MAPPED_RAM), DEFINE_PROP_END_OF_LIST(), }; @@ -263,6 +264,13 @@ bool migrate_events(void) return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; } +bool migrate_mapped_ram(void) +{ + MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_MAPPED_RAM]; +} + bool migrate_ignore_shared(void) { MigrationState *s = migrate_get_current(); @@ -645,6 +653,26 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) } } + if (new_caps[MIGRATION_CAPABILITY_MAPPED_RAM]) { + if (new_caps[MIGRATION_CAPABILITY_XBZRLE]) { + error_setg(errp, + "Mapped-ram migration is incompatible with xbzrle"); + return false; + } + + if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { + error_setg(errp, + "Mapped-ram migration is incompatible with compression"); + return false; + } + + if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { + error_setg(errp, + "Mapped-ram migration is incompatible with postcopy"); + return false; + } + } + return true; } @@ -1218,6 +1246,13 @@ bool migrate_params_check(MigrationParameters *params, Error **errp) } #endif + if (migrate_mapped_ram() && + (migrate_multifd_compression() || migrate_tls())) { + error_setg(errp, + "Mapped-ram only available for non-compressed non-TLS multifd migration"); + return false; + } + if (params->has_x_vcpu_dirty_limit_period && (params->x_vcpu_dirty_limit_period < 1 || params->x_vcpu_dirty_limit_period > 1000)) { @@ -1312,6 +1347,12 @@ static void migrate_params_test_apply(MigrateSetParameters *params, if (params->has_multifd_compression) { dest->multifd_compression = params->multifd_compression; } + if (params->has_multifd_zlib_level) { + dest->multifd_zlib_level = params->multifd_zlib_level; + } + if (params->has_multifd_zstd_level) { + dest->multifd_zstd_level = params->multifd_zstd_level; + } if (params->has_xbzrle_cache_size) { dest->xbzrle_cache_size = params->xbzrle_cache_size; } @@ -1447,6 +1488,12 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) if (params->has_multifd_compression) { s->parameters.multifd_compression = params->multifd_compression; } + if (params->has_multifd_zlib_level) { + s->parameters.multifd_zlib_level = params->multifd_zlib_level; + } + if (params->has_multifd_zstd_level) { + s->parameters.multifd_zstd_level = params->multifd_zstd_level; + } if (params->has_xbzrle_cache_size) { s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; xbzrle_cache_resize(params->xbzrle_cache_size, errp); diff --git a/migration/options.h b/migration/options.h index 246c160aee..6ddd8dad9b 100644 --- a/migration/options.h +++ b/migration/options.h @@ -31,6 +31,7 @@ bool migrate_compress(void); bool migrate_dirty_bitmaps(void); bool migrate_dirty_limit(void); bool migrate_events(void); +bool migrate_mapped_ram(void); bool migrate_ignore_shared(void); bool migrate_late_block_activate(void); bool migrate_multifd(void); diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 94231ff295..b10c882629 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -33,6 +33,7 @@ #include "options.h" #include "qapi/error.h" #include "rdma.h" +#include "io/channel-file.h" #define IO_BUF_SIZE 32768 #define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64) @@ -255,6 +256,10 @@ static void qemu_iovec_release_ram(QEMUFile *f) memset(f->may_free, 0, sizeof(f->may_free)); } +bool qemu_file_is_seekable(QEMUFile *f) +{ + return qio_channel_has_feature(f->ioc, QIO_CHANNEL_FEATURE_SEEKABLE); +} /** * Flushes QEMUFile buffer @@ -447,6 +452,107 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size) } } +void qemu_put_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen, + off_t pos) +{ + Error *err = NULL; + size_t ret; + + if (f->last_error) { + return; + } + + qemu_fflush(f); + ret = qio_channel_pwrite(f->ioc, (char *)buf, buflen, pos, &err); + + if (err) { + qemu_file_set_error_obj(f, -EIO, err); + return; + } + + if ((ssize_t)ret == QIO_CHANNEL_ERR_BLOCK) { + qemu_file_set_error_obj(f, -EAGAIN, NULL); + return; + } + + if (ret != buflen) { + error_setg(&err, "Partial write of size %zu, expected %zu", ret, + buflen); + qemu_file_set_error_obj(f, -EIO, err); + return; + } + + stat64_add(&mig_stats.qemu_file_transferred, buflen); + + return; +} + + +size_t qemu_get_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen, + off_t pos) +{ + Error *err = NULL; + size_t ret; + + if (f->last_error) { + return 0; + } + + ret = qio_channel_pread(f->ioc, (char *)buf, buflen, pos, &err); + + if ((ssize_t)ret == -1 || err) { + qemu_file_set_error_obj(f, -EIO, err); + return 0; + } + + if ((ssize_t)ret == QIO_CHANNEL_ERR_BLOCK) { + qemu_file_set_error_obj(f, -EAGAIN, NULL); + return 0; + } + + if (ret != buflen) { + error_setg(&err, "Partial read of size %zu, expected %zu", ret, buflen); + qemu_file_set_error_obj(f, -EIO, err); + return 0; + } + + return ret; +} + +void qemu_set_offset(QEMUFile *f, off_t off, int whence) +{ + Error *err = NULL; + off_t ret; + + if (qemu_file_is_writable(f)) { + qemu_fflush(f); + } else { + /* Drop all cached buffers if existed; will trigger a re-fill later */ + f->buf_index = 0; + f->buf_size = 0; + } + + ret = qio_channel_io_seek(f->ioc, off, whence, &err); + if (ret == (off_t)-1) { + qemu_file_set_error_obj(f, -EIO, err); + } +} + +off_t qemu_get_offset(QEMUFile *f) +{ + Error *err = NULL; + off_t ret; + + qemu_fflush(f); + + ret = qio_channel_io_seek(f->ioc, 0, SEEK_CUR, &err); + if (ret == (off_t)-1) { + qemu_file_set_error_obj(f, -EIO, err); + } + return ret; +} + + void qemu_put_byte(QEMUFile *f, int v) { if (f->last_error) { diff --git a/migration/qemu-file.h b/migration/qemu-file.h index 8aec9fabf7..32fd4a34fd 100644 --- a/migration/qemu-file.h +++ b/migration/qemu-file.h @@ -75,6 +75,12 @@ QEMUFile *qemu_file_get_return_path(QEMUFile *f); int qemu_fflush(QEMUFile *f); void qemu_file_set_blocking(QEMUFile *f, bool block); int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size); +void qemu_set_offset(QEMUFile *f, off_t off, int whence); +off_t qemu_get_offset(QEMUFile *f); +void qemu_put_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen, + off_t pos); +size_t qemu_get_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen, + off_t pos); QIOChannel *qemu_file_get_ioc(QEMUFile *file); diff --git a/migration/ram.c b/migration/ram.c index 83fd780fc2..003c28e133 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -94,6 +94,24 @@ #define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200 /* We can't use any flag that is bigger than 0x200 */ +/* + * mapped-ram migration supports O_DIRECT, so we need to make sure the + * userspace buffer, the IO operation size and the file offset are + * aligned according to the underlying device's block size. The first + * two are already aligned to page size, but we need to add padding to + * the file to align the offset. We cannot read the block size + * dynamically because the migration file can be moved between + * different systems, so use 1M to cover most block sizes and to keep + * the file offset aligned at page size as well. + */ +#define MAPPED_RAM_FILE_OFFSET_ALIGNMENT 0x100000 + +/* + * When doing mapped-ram migration, this is the amount we read from + * the pages region in the migration file at a time. + */ +#define MAPPED_RAM_LOAD_BUF_SIZE 0x100000 + XBZRLECacheStats xbzrle_counters; /* used by the search for pages to send */ @@ -1126,12 +1144,18 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss, return 0; } + stat64_add(&mig_stats.zero_pages, 1); + + if (migrate_mapped_ram()) { + /* zero pages are not transferred with mapped-ram */ + clear_bit_atomic(offset >> TARGET_PAGE_BITS, pss->block->file_bmap); + return 1; + } + len += save_page_header(pss, file, pss->block, offset | RAM_SAVE_FLAG_ZERO); qemu_put_byte(file, 0); len += 1; ram_release_page(pss->block->idstr, offset); - - stat64_add(&mig_stats.zero_pages, 1); ram_transferred_add(len); /* @@ -1189,14 +1213,20 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, { QEMUFile *file = pss->pss_channel; - ram_transferred_add(save_page_header(pss, pss->pss_channel, block, - offset | RAM_SAVE_FLAG_PAGE)); - if (async) { - qemu_put_buffer_async(file, buf, TARGET_PAGE_SIZE, - migrate_release_ram() && - migration_in_postcopy()); + if (migrate_mapped_ram()) { + qemu_put_buffer_at(file, buf, TARGET_PAGE_SIZE, + block->pages_offset + offset); + set_bit(offset >> TARGET_PAGE_BITS, block->file_bmap); } else { - qemu_put_buffer(file, buf, TARGET_PAGE_SIZE); + ram_transferred_add(save_page_header(pss, pss->pss_channel, block, + offset | RAM_SAVE_FLAG_PAGE)); + if (async) { + qemu_put_buffer_async(file, buf, TARGET_PAGE_SIZE, + migrate_release_ram() && + migration_in_postcopy()); + } else { + qemu_put_buffer(file, buf, TARGET_PAGE_SIZE); + } } ram_transferred_add(TARGET_PAGE_SIZE); stat64_add(&mig_stats.normal_pages, 1); @@ -1332,14 +1362,18 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) pss->block = QLIST_NEXT_RCU(pss->block, next); if (!pss->block) { if (migrate_multifd() && - !migrate_multifd_flush_after_each_section()) { + (!migrate_multifd_flush_after_each_section() || + migrate_mapped_ram())) { QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel; int ret = multifd_send_sync_main(); if (ret < 0) { return ret; } - qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); - qemu_fflush(f); + + if (!migrate_mapped_ram()) { + qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); + qemu_fflush(f); + } } /* * If memory migration starts over, we will meet a dirtied page @@ -2778,6 +2812,9 @@ static void ram_list_init_bitmaps(void) */ block->bmap = bitmap_new(pages); bitmap_set(block->bmap, 0, pages); + if (migrate_mapped_ram()) { + block->file_bmap = bitmap_new(pages); + } block->clear_bmap_shift = shift; block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift)); } @@ -2915,6 +2952,89 @@ void qemu_guest_free_page_hint(void *addr, size_t len) } } +#define MAPPED_RAM_HDR_VERSION 1 +struct MappedRamHeader { + uint32_t version; + /* + * The target's page size, so we know how many pages are in the + * bitmap. + */ + uint64_t page_size; + /* + * The offset in the migration file where the pages bitmap is + * stored. + */ + uint64_t bitmap_offset; + /* + * The offset in the migration file where the actual pages (data) + * are stored. + */ + uint64_t pages_offset; +} QEMU_PACKED; +typedef struct MappedRamHeader MappedRamHeader; + +static void mapped_ram_setup_ramblock(QEMUFile *file, RAMBlock *block) +{ + g_autofree MappedRamHeader *header = NULL; + size_t header_size, bitmap_size; + long num_pages; + + header = g_new0(MappedRamHeader, 1); + header_size = sizeof(MappedRamHeader); + + num_pages = block->used_length >> TARGET_PAGE_BITS; + bitmap_size = BITS_TO_LONGS(num_pages) * sizeof(unsigned long); + + /* + * Save the file offsets of where the bitmap and the pages should + * go as they are written at the end of migration and during the + * iterative phase, respectively. + */ + block->bitmap_offset = qemu_get_offset(file) + header_size; + block->pages_offset = ROUND_UP(block->bitmap_offset + + bitmap_size, + MAPPED_RAM_FILE_OFFSET_ALIGNMENT); + + header->version = cpu_to_be32(MAPPED_RAM_HDR_VERSION); + header->page_size = cpu_to_be64(TARGET_PAGE_SIZE); + header->bitmap_offset = cpu_to_be64(block->bitmap_offset); + header->pages_offset = cpu_to_be64(block->pages_offset); + + qemu_put_buffer(file, (uint8_t *) header, header_size); + + /* prepare offset for next ramblock */ + qemu_set_offset(file, block->pages_offset + block->used_length, SEEK_SET); +} + +static bool mapped_ram_read_header(QEMUFile *file, MappedRamHeader *header, + Error **errp) +{ + size_t ret, header_size = sizeof(MappedRamHeader); + + ret = qemu_get_buffer(file, (uint8_t *)header, header_size); + if (ret != header_size) { + error_setg(errp, "Could not read whole mapped-ram migration header " + "(expected %zd, got %zd bytes)", header_size, ret); + return false; + } + + /* migration stream is big-endian */ + header->version = be32_to_cpu(header->version); + + if (header->version > MAPPED_RAM_HDR_VERSION) { + error_setg(errp, "Migration mapped-ram capability version not " + "supported (expected <= %d, got %d)", MAPPED_RAM_HDR_VERSION, + header->version); + return false; + } + + header->page_size = be64_to_cpu(header->page_size); + header->bitmap_offset = be64_to_cpu(header->bitmap_offset); + header->pages_offset = be64_to_cpu(header->pages_offset); + + return true; +} + /* * Each of ram_save_setup, ram_save_iterate and ram_save_complete has * long-running RCU critical section. When rcu-reclaims in the code @@ -2970,6 +3090,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque) if (migrate_ignore_shared()) { qemu_put_be64(f, block->mr->addr); } + + if (migrate_mapped_ram()) { + mapped_ram_setup_ramblock(f, block); + } } } @@ -2995,7 +3119,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque) return ret; } - if (migrate_multifd() && !migrate_multifd_flush_after_each_section()) { + if (migrate_multifd() && !migrate_multifd_flush_after_each_section() + && !migrate_mapped_ram()) { qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); } @@ -3003,6 +3128,33 @@ static int ram_save_setup(QEMUFile *f, void *opaque) return qemu_fflush(f); } +static void ram_save_file_bmap(QEMUFile *f) +{ + RAMBlock *block; + + RAMBLOCK_FOREACH_MIGRATABLE(block) { + long num_pages = block->used_length >> TARGET_PAGE_BITS; + long bitmap_size = BITS_TO_LONGS(num_pages) * sizeof(unsigned long); + + qemu_put_buffer_at(f, (uint8_t *)block->file_bmap, bitmap_size, + block->bitmap_offset); + ram_transferred_add(bitmap_size); + + /* + * Free the bitmap here to catch any synchronization issues + * with multifd channels. No channels should be sending pages + * after we've written the bitmap to file. + */ + g_free(block->file_bmap); + block->file_bmap = NULL; + } +} + +void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset) +{ + set_bit_atomic(offset >> TARGET_PAGE_BITS, block->file_bmap); +} + /** * ram_save_iterate: iterative stage for migration * @@ -3112,7 +3264,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) out: if (ret >= 0 && migration_is_setup_or_active(migrate_get_current()->state)) { - if (migrate_multifd() && migrate_multifd_flush_after_each_section()) { + if (migrate_multifd() && migrate_multifd_flush_after_each_section() && + !migrate_mapped_ram()) { ret = multifd_send_sync_main(); if (ret < 0) { return ret; @@ -3192,7 +3345,20 @@ static int ram_save_complete(QEMUFile *f, void *opaque) return ret; } - if (migrate_multifd() && !migrate_multifd_flush_after_each_section()) { + if (migrate_mapped_ram()) { + ram_save_file_bmap(f); + + if (qemu_file_get_error(f)) { + Error *local_err = NULL; + int err = qemu_file_get_error_obj(f, &local_err); + + error_reportf_err(local_err, "Failed to write bitmap to file: "); + return -err; + } + } + + if (migrate_multifd() && !migrate_multifd_flush_after_each_section() && + !migrate_mapped_ram()) { qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); } qemu_put_be64(f, RAM_SAVE_FLAG_EOS); @@ -3792,23 +3958,149 @@ void colo_flush_ram_cache(void) trace_colo_flush_ram_cache_end(); } +static size_t ram_load_multifd_pages(void *host_addr, size_t size, + uint64_t offset) +{ + MultiFDRecvData *data = multifd_get_recv_data(); + + data->opaque = host_addr; + data->file_offset = offset; + data->size = size; + + if (!multifd_recv()) { + return 0; + } + + return size; +} + +static bool read_ramblock_mapped_ram(QEMUFile *f, RAMBlock *block, + long num_pages, unsigned long *bitmap, + Error **errp) +{ + ERRP_GUARD(); + unsigned long set_bit_idx, clear_bit_idx; + ram_addr_t offset; + void *host; + size_t read, unread, size; + + for (set_bit_idx = find_first_bit(bitmap, num_pages); + set_bit_idx < num_pages; + set_bit_idx = find_next_bit(bitmap, num_pages, clear_bit_idx + 1)) { + + clear_bit_idx = find_next_zero_bit(bitmap, num_pages, set_bit_idx + 1); + + unread = TARGET_PAGE_SIZE * (clear_bit_idx - set_bit_idx); + offset = set_bit_idx << TARGET_PAGE_BITS; + + while (unread > 0) { + host = host_from_ram_block_offset(block, offset); + if (!host) { + error_setg(errp, "page outside of ramblock %s range", + block->idstr); + return false; + } + + size = MIN(unread, MAPPED_RAM_LOAD_BUF_SIZE); + + if (migrate_multifd()) { + read = ram_load_multifd_pages(host, size, + block->pages_offset + offset); + } else { + read = qemu_get_buffer_at(f, host, size, + block->pages_offset + offset); + } + + if (!read) { + goto err; + } + offset += read; + unread -= read; + } + } + + return true; + +err: + qemu_file_get_error_obj(f, errp); + error_prepend(errp, "(%s) failed to read page " RAM_ADDR_FMT + "from file offset %" PRIx64 ": ", block->idstr, offset, + block->pages_offset + offset); + return false; +} + +static void parse_ramblock_mapped_ram(QEMUFile *f, RAMBlock *block, + ram_addr_t length, Error **errp) +{ + g_autofree unsigned long *bitmap = NULL; + MappedRamHeader header; + size_t bitmap_size; + long num_pages; + + if (!mapped_ram_read_header(f, &header, errp)) { + return; + } + + block->pages_offset = header.pages_offset; + + /* + * Check the alignment of the file region that contains pages. We + * don't enforce MAPPED_RAM_FILE_OFFSET_ALIGNMENT to allow that + * value to change in the future. Do only a sanity check with page + * size alignment. + */ + if (!QEMU_IS_ALIGNED(block->pages_offset, TARGET_PAGE_SIZE)) { + error_setg(errp, + "Error reading ramblock %s pages, region has bad alignment", + block->idstr); + return; + } + + num_pages = length / header.page_size; + bitmap_size = BITS_TO_LONGS(num_pages) * sizeof(unsigned long); + + bitmap = g_malloc0(bitmap_size); + if (qemu_get_buffer_at(f, (uint8_t *)bitmap, bitmap_size, + header.bitmap_offset) != bitmap_size) { + error_setg(errp, "Error reading dirty bitmap"); + return; + } + + if (!read_ramblock_mapped_ram(f, block, num_pages, bitmap, errp)) { + return; + } + + /* Skip pages array */ + qemu_set_offset(f, block->pages_offset + length, SEEK_SET); + + return; +} + static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length) { int ret = 0; /* ADVISE is earlier, it shows the source has the postcopy capability on */ bool postcopy_advised = migration_incoming_postcopy_advised(); int max_hg_page_size; + Error *local_err = NULL; assert(block); + if (migrate_mapped_ram()) { + parse_ramblock_mapped_ram(f, block, length, &local_err); + if (local_err) { + error_report_err(local_err); + return -EINVAL; + } + return 0; + } + if (!qemu_ram_is_migratable(block)) { error_report("block %s should not be migrated !", block->idstr); return -EINVAL; } if (length != block->used_length) { - Error *local_err = NULL; - ret = qemu_ram_resize(block, length, &local_err); if (local_err) { error_report_err(local_err); @@ -3899,6 +4191,12 @@ static int ram_load_precopy(QEMUFile *f) invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE; } + if (migrate_mapped_ram()) { + invalid_flags |= (RAM_SAVE_FLAG_HOOK | RAM_SAVE_FLAG_MULTIFD_FLUSH | + RAM_SAVE_FLAG_PAGE | RAM_SAVE_FLAG_XBZRLE | + RAM_SAVE_FLAG_ZERO); + } + while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { ram_addr_t addr; void *host = NULL, *host_bak = NULL; @@ -3920,6 +4218,8 @@ static int ram_load_precopy(QEMUFile *f) addr &= TARGET_PAGE_MASK; if (flags & invalid_flags) { + error_report("Unexpected RAM flags: %d", flags & invalid_flags); + if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) { error_report("Received an unexpected compressed page"); } @@ -3972,6 +4272,16 @@ static int ram_load_precopy(QEMUFile *f) switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { case RAM_SAVE_FLAG_MEM_SIZE: ret = parse_ramblocks(f, addr); + /* + * For mapped-ram migration (to a file) using multifd, we sync + * once and for all here to make sure all tasks we queued to + * multifd threads are completed, so that all the ramblocks + * (including all the guest memory pages within) are fully + * loaded after this sync returns. + */ + if (migrate_mapped_ram()) { + multifd_recv_sync_main(); + } break; case RAM_SAVE_FLAG_ZERO: @@ -4012,7 +4322,12 @@ static int ram_load_precopy(QEMUFile *f) case RAM_SAVE_FLAG_EOS: /* normal exit */ if (migrate_multifd() && - migrate_multifd_flush_after_each_section()) { + migrate_multifd_flush_after_each_section() && + /* + * Mapped-ram migration flushes once and for all after + * parsing ramblocks. Always ignore EOS for it. + */ + !migrate_mapped_ram()) { multifd_recv_sync_main(); } break; diff --git a/migration/ram.h b/migration/ram.h index 9b937a446b..b9ac0da587 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -75,6 +75,7 @@ bool ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb, Error **errp); bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start); void postcopy_preempt_shutdown_file(MigrationState *s); void *postcopy_preempt_thread(void *opaque); +void ramblock_set_file_bmap_atomic(RAMBlock *block, ram_addr_t offset); /* ram cache */ int colo_init_ram_cache(void); diff --git a/migration/savevm.c b/migration/savevm.c index d612c8a902..dc1fb9c0d3 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -245,6 +245,7 @@ static bool should_validate_capability(int capability) /* Validate only new capabilities to keep compatibility. */ switch (capability) { case MIGRATION_CAPABILITY_X_IGNORE_SHARED: + case MIGRATION_CAPABILITY_MAPPED_RAM: return true; default: return false; diff --git a/migration/trace-events b/migration/trace-events index 298ad2b0dd..bf1a069632 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -132,7 +132,7 @@ multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uin multifd_recv_new_channel(uint8_t id) "channel %u" multifd_recv_sync_main(long packet_num) "packet num %ld" multifd_recv_sync_main_signal(uint8_t id) "channel %u" -multifd_recv_sync_main_wait(uint8_t id) "channel %u" +multifd_recv_sync_main_wait(uint8_t id) "iter %u" multifd_recv_terminate_threads(bool error) "error %d" multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 multifd_recv_thread_start(uint8_t id) "%u" diff --git a/qapi/block-core.json b/qapi/block-core.json index 22b8634422..1874f880a8 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1456,8 +1456,7 @@ # # @size: new image size in bytes # -# Returns: -# - nothing on success +# Errors: # - If @device is not a valid block device, DeviceNotFound # # Since: 0.14 @@ -1674,8 +1673,7 @@ # # For the arguments, see the documentation of BlockdevSnapshotSync. # -# Returns: -# - nothing on success +# Errors: # - If @device is not a valid block device, DeviceNotFound # # Since: 0.14 @@ -1754,8 +1752,7 @@ # is not validated, so care should be taken when specifying the # string or the image chain may not be able to be reopened again. # -# Returns: -# - Nothing on success +# Errors: # - If "device" does not exist or cannot be determined, # DeviceNotFound # @@ -1854,8 +1851,7 @@ # @deprecated: Members @base and @top are deprecated. Use @base-node # and @top-node instead. # -# Returns: -# - Nothing on success +# Errors: # - If @device does not exist, DeviceNotFound # - Any other error returns a GenericError. # @@ -1894,8 +1890,7 @@ # @deprecated: This command is deprecated. Use @blockdev-backup # instead. # -# Returns: -# - nothing on success +# Errors: # - If @device is not a valid block device, GenericError # # Since: 1.6 @@ -1921,8 +1916,7 @@ # 'backup'. The operation can be stopped before it has completed using # the block-job-cancel command. # -# Returns: -# - nothing on success +# Errors: # - If @device is not a valid block device, DeviceNotFound # # Since: 2.3 @@ -2127,8 +2121,7 @@ # specifies the format of the mirror image, default is to probe if # mode='existing', else the format of the source. # -# Returns: -# - nothing on success +# Errors: # - If @device is not a valid block device, GenericError # # Since: 1.3 @@ -2304,8 +2297,7 @@ # Create a dirty bitmap with a name on the node, and start tracking # the writes. # -# Returns: -# - nothing on success +# Errors: # - If @node is not a valid block device or node, DeviceNotFound # - If @name is already taken, GenericError with an explanation # @@ -2328,8 +2320,7 @@ # with block-dirty-bitmap-add. If the bitmap is persistent, remove it # from its storage too. # -# Returns: -# - nothing on success +# Errors: # - If @node is not a valid block device or node, DeviceNotFound # - If @name is not found, GenericError with an explanation # - if @name is frozen by an operation, GenericError @@ -2353,8 +2344,7 @@ # backup from this point in time forward will only backup clusters # modified after this clear operation. # -# Returns: -# - nothing on success +# Errors: # - If @node is not a valid block device, DeviceNotFound # - If @name is not found, GenericError with an explanation # @@ -2375,8 +2365,7 @@ # # Enables a dirty bitmap so that it will begin tracking disk changes. # -# Returns: -# - nothing on success +# Errors: # - If @node is not a valid block device, DeviceNotFound # - If @name is not found, GenericError with an explanation # @@ -2397,8 +2386,7 @@ # # Disables a dirty bitmap so that it will stop tracking disk changes. # -# Returns: -# - nothing on success +# Errors: # - If @node is not a valid block device, DeviceNotFound # - If @name is not found, GenericError with an explanation # @@ -2427,8 +2415,7 @@ # dirty in any of the source bitmaps. This can be used to achieve # backup checkpoints, or in simpler usages, to copy bitmaps. # -# Returns: -# - nothing on success +# Errors: # - If @node is not a valid block device, DeviceNotFound # - If any bitmap in @bitmaps or @target is not found, # GenericError @@ -2470,7 +2457,9 @@ # @unstable: This command is meant for debugging. # # Returns: -# - BlockDirtyBitmapSha256 on success +# BlockDirtyBitmapSha256 +# +# Errors: # - If @node is not a valid block device, DeviceNotFound # - If @name is not found or if hashing has failed, GenericError # with an explanation @@ -2542,8 +2531,6 @@ # disappear from the query list without user intervention. # Defaults to true. (Since 3.1) # -# Returns: nothing on success. -# # Since: 2.6 # # Example: @@ -2866,8 +2853,7 @@ # disappear from the query list without user intervention. # Defaults to true. (Since 3.1) # -# Returns: -# - Nothing on success. +# Errors: # - If @device does not exist, DeviceNotFound. # # Since: 1.1 @@ -2905,8 +2891,7 @@ # @speed: the maximum speed, in bytes per second, or 0 for unlimited. # Defaults to 0. # -# Returns: -# - Nothing on success +# Errors: # - If no background operation is active on this device, # DeviceNotActive # @@ -2950,8 +2935,7 @@ # paused) instead of waiting for the destination to complete its # final synchronization (since 1.3) # -# Returns: -# - Nothing on success +# Errors: # - If no background operation is active on this device, # DeviceNotActive # @@ -2977,8 +2961,7 @@ # the name of the parameter), but since QEMU 2.7 it can have other # values. # -# Returns: -# - Nothing on success +# Errors: # - If no background operation is active on this device, # DeviceNotActive # @@ -3002,8 +2985,7 @@ # the name of the parameter), but since QEMU 2.7 it can have other # values. # -# Returns: -# - Nothing on success +# Errors: # - If no background operation is active on this device, # DeviceNotActive # @@ -3034,8 +3016,7 @@ # the name of the parameter), but since QEMU 2.7 it can have other # values. # -# Returns: -# - Nothing on success +# Errors: # - If no background operation is active on this device, # DeviceNotActive # @@ -3059,8 +3040,6 @@ # # @id: The job identifier. # -# Returns: Nothing on success -# # Since: 2.12 ## { 'command': 'block-job-dismiss', 'data': { 'id': 'str' }, @@ -3078,8 +3057,6 @@ # # @id: The job identifier. # -# Returns: Nothing on success -# # Since: 2.12 ## { 'command': 'block-job-finalize', 'data': { 'id': 'str' }, @@ -6070,8 +6047,7 @@ # For the arguments, see the documentation of # BlockdevSnapshotInternal. # -# Returns: -# - nothing on success +# Errors: # - If @device is not a valid block device, GenericError # - If any snapshot matching @name exists, or @name is empty, # GenericError @@ -6108,7 +6084,9 @@ # @name: optional the snapshot's name to be deleted # # Returns: -# - SnapshotInfo on success +# SnapshotInfo +# +# Errors: # - If @device is not a valid block device, GenericError # - If snapshot not found, GenericError # - If the format of the image used does not support it, diff --git a/qapi/block-export.json b/qapi/block-export.json index d9bd376b48..3919a2d5b9 100644 --- a/qapi/block-export.json +++ b/qapi/block-export.json @@ -65,7 +65,8 @@ # server from advertising multiple client support (since 5.2; # default: 0). # -# Returns: error if the server is already running. +# Errors: +# - if the server is already running # # Since: 1.3 ## @@ -247,8 +248,9 @@ # @deprecated: This command is deprecated. Use @block-export-add # instead. # -# Returns: error if the server is not running, or export with the same -# name already exists. +# Errors: +# - if the server is not running +# - if an export with the same name already exists # # Since: 1.3 ## @@ -294,11 +296,10 @@ # @deprecated: This command is deprecated. Use @block-export-del # instead. # -# Returns: error if -# -# - the server is not running -# - export is not found -# - mode is 'safe' and there are existing connections +# Errors: +# - if the server is not running +# - if export is not found +# - if mode is 'safe' and there are existing connections # # Since: 2.12 ## @@ -415,8 +416,10 @@ # @mode: Mode of command operation. See @BlockExportRemoveMode # description. Default is 'safe'. # -# Returns: Error if the export is not found or @mode is 'safe' and the -# export is still in use (e.g. by existing client connections) +# Errors: +# - if the export is not found +# - if @mode is 'safe' and the export is still in use (e.g. by +# existing client connections) # # Since: 5.2 ## diff --git a/qapi/block.json b/qapi/block.json index 79a0bcc208..65d9804bdf 100644 --- a/qapi/block.json +++ b/qapi/block.json @@ -110,8 +110,7 @@ # # @deprecated: Member @device is deprecated. Use @id instead. # -# Returns: -# - Nothing on success +# Errors: # - If @device is not a valid block device, DeviceNotFound # # Notes: Ejecting a device with no media results in success @@ -459,8 +458,7 @@ # the device will be removed from its group and the rest of its # members will not be affected. The 'group' parameter is ignored. # -# Returns: -# - Nothing on success +# Errors: # - If @device is not a valid block device, DeviceNotFound # # Since: 1.1 @@ -540,8 +538,8 @@ # @boundaries-flush: list of interval boundary values for flush # latency histogram. # -# Returns: error if device is not found or any boundary arrays are -# invalid. +# Errors: +# - if device is not found or any boundary arrays are invalid. # # Since: 4.0 # diff --git a/qapi/char.json b/qapi/char.json index 4873bc635a..777dde55d9 100644 --- a/qapi/char.json +++ b/qapi/char.json @@ -139,8 +139,6 @@ # - data itself is always Unicode regardless of format, like any # other string. # -# Returns: Nothing on success -# # Since: 1.4 # # Example: @@ -772,8 +770,6 @@ # # @id: the chardev's ID, must exist and not be in use # -# Returns: Nothing on success -# # Since: 1.4 # # Example: @@ -791,8 +787,6 @@ # # @id: the chardev's ID, must exist # -# Returns: Nothing on success -# # Since: 2.10 # # Example: diff --git a/qapi/dump.json b/qapi/dump.json index f82dd6a1af..4c021dd53c 100644 --- a/qapi/dump.json +++ b/qapi/dump.json @@ -92,8 +92,6 @@ # # Note: All boolean arguments default to false # -# Returns: nothing on success -# # Since: 1.2 # # Example: diff --git a/qapi/machine-target.json b/qapi/machine-target.json index 2c5dda735e..519adf3220 100644 --- a/qapi/machine-target.json +++ b/qapi/machine-target.json @@ -154,10 +154,13 @@ # Some architectures may not support comparing CPU models. s390x # supports comparing CPU models. # -# Returns: a CpuModelBaselineInfo. Returns an error if comparing CPU -# models is not supported, if a model cannot be used, if a model -# contains an unknown cpu definition name, unknown properties or -# properties with wrong types. +# Returns: a CpuModelBaselineInfo +# +# Errors: +# - if comparing CPU models is not supported +# - if a model cannot be used +# - if a model contains an unknown cpu definition name, unknown +# properties or properties with wrong types. # # Note: this command isn't specific to s390x, but is only implemented # on this architecture currently. @@ -201,10 +204,13 @@ # Some architectures may not support baselining CPU models. s390x # supports baselining CPU models. # -# Returns: a CpuModelBaselineInfo. Returns an error if baselining CPU -# models is not supported, if a model cannot be used, if a model -# contains an unknown cpu definition name, unknown properties or -# properties with wrong types. +# Returns: a CpuModelBaselineInfo +# +# Errors: +# - if baselining CPU models is not supported +# - if a model cannot be used +# - if a model contains an unknown cpu definition name, unknown +# properties or properties with wrong types. # # Note: this command isn't specific to s390x, but is only implemented # on this architecture currently. @@ -263,11 +269,14 @@ # Some architectures may not support all expansion types. s390x # supports "full" and "static". Arm only supports "full". # -# Returns: a CpuModelExpansionInfo. Returns an error if expanding CPU -# models is not supported, if the model cannot be expanded, if the -# model contains an unknown CPU definition name, unknown -# properties or properties with a wrong type. Also returns an -# error if an expansion type is not supported. +# Returns: a CpuModelExpansionInfo +# +# Errors: +# - if expanding CPU models is not supported +# - if the model cannot be expanded +# - if the model contains an unknown CPU definition name, unknown +# properties or properties with a wrong type +# - if an expansion type is not supported # # Since: 2.8 ## @@ -405,8 +414,6 @@ # # @unstable: This command is experimental. # -# Returns: Nothing on success. -# # Since: 8.2 ## { 'command': 'set-cpu-topology', diff --git a/qapi/machine.json b/qapi/machine.json index 93b4677286..bb5a178909 100644 --- a/qapi/machine.json +++ b/qapi/machine.json @@ -326,8 +326,6 @@ # # Since: 1.1 # -# Returns: nothing. -# # Note: prior to 4.0, this command does nothing in case the guest # isn't suspended. # @@ -377,8 +375,6 @@ # all CPUs (ppc64). The command fails when the guest doesn't support # injecting. # -# Returns: If successful, nothing -# # Since: 0.14 # # Note: prior to 2.1, this command was only supported for x86 and s390 @@ -778,8 +774,6 @@ # @cpu-index: the index of the virtual CPU to use for translating the # virtual address (defaults to CPU 0) # -# Returns: Nothing on success -# # Since: 0.14 # # Notes: Errors were not reliably returned until 1.1 @@ -806,8 +800,6 @@ # # @filename: the file to save the memory to as binary data # -# Returns: Nothing on success -# # Since: 0.14 # # Notes: Errors were not reliably returned until 1.1 @@ -1060,8 +1052,7 @@ # # From it we have: balloon_size = vm_ram_size - @value # -# Returns: -# - Nothing on success +# Errors: # - If the balloon driver is enabled but not functional because # the KVM kernel module cannot support it, KVMMissingCap # - If no balloon device is present, DeviceNotActive @@ -1099,7 +1090,9 @@ # Return information about the balloon device. # # Returns: -# - @BalloonInfo on success +# @BalloonInfo +# +# Errors: # - If the balloon driver is enabled but not functional because # the KVM kernel module cannot support it, KVMMissingCap # - If no balloon device is present, DeviceNotActive @@ -1163,7 +1156,9 @@ # message from the guest. # # Returns: -# - @HvBalloonInfo on success +# @HvBalloonInfo +# +# Errors: # - If no hv-balloon device is present, guest memory status # reporting is not enabled or no guest memory status report # received yet, GenericError diff --git a/qapi/migration.json b/qapi/migration.json index 0b33a71ab4..51d188b902 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -531,6 +531,10 @@ # and can result in more stable read performance. Requires KVM # with accelerator property "dirty-ring-size" set. (Since 8.1) # +# @mapped-ram: Migrate using fixed offsets in the migration file for +# each RAM page. Requires a migration URI that supports seeking, +# such as a file. (since 9.0) +# # Features: # # @deprecated: Member @block is deprecated. Use blockdev-mirror with @@ -555,7 +559,7 @@ { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, 'validate-uuid', 'background-snapshot', 'zero-copy-send', 'postcopy-preempt', 'switchover-ack', - 'dirty-limit'] } + 'dirty-limit', 'mapped-ram'] } ## # @MigrationCapabilityStatus: @@ -636,28 +640,30 @@ # # @normal: the original form of migration. (since 8.2) # -# @cpr-reboot: The migrate command stops the VM and saves state to the URI. -# After quitting qemu, the user resumes by running qemu -incoming. +# @cpr-reboot: The migrate command stops the VM and saves state to +# the URI. After quitting QEMU, the user resumes by running +# QEMU -incoming. # -# This mode allows the user to quit qemu, and restart an updated version -# of qemu. The user may even update and reboot the OS before restarting, -# as long as the URI persists across a reboot. +# This mode allows the user to quit QEMU, optionally update and +# reboot the OS, and restart QEMU. If the user reboots, the URI +# must persist across the reboot, such as by using a file. # -# Unlike normal mode, the use of certain local storage options does not -# block the migration, but the user must not modify guest block devices -# between the quit and restart. +# Unlike normal mode, the use of certain local storage options +# does not block the migration, but the user must not modify the +# contents of guest block devices between the quit and restart. # -# This mode supports vfio devices provided the user first puts the guest -# in the suspended runstate, such as by issuing guest-suspend-ram to the -# qemu guest agent. +# This mode supports VFIO devices provided the user first puts +# the guest in the suspended runstate, such as by issuing +# guest-suspend-ram to the QEMU guest agent. # -# Best performance is achieved when the memory backend is shared and the -# @x-ignore-shared migration capability is set, but this is not required. -# Further, if the user reboots before restarting such a configuration, the -# shared backend must be be non-volatile across reboot, such as by backing -# it with a dax device. +# Best performance is achieved when the memory backend is shared +# and the @x-ignore-shared migration capability is set, but this +# is not required. Further, if the user reboots before restarting +# such a configuration, the shared memory must persist across the +# reboot, such as by backing it with a dax device. # -# cpr-reboot may not be used with postcopy, colo, or background-snapshot. +# @cpr-reboot may not be used with postcopy, background-snapshot, +# or COLO. # # (since 8.2) ## @@ -1566,8 +1572,6 @@ # # Cancel the current executing migration process. # -# Returns: nothing on success -# # Notes: This command succeeds even if there is no migration process # running. # @@ -1587,8 +1591,6 @@ # # @state: The state the migration is currently expected to be in # -# Returns: nothing on success -# # Since: 2.11 # # Example: @@ -1710,8 +1712,6 @@ # @deprecated: Members @inc and @blk are deprecated. Use # blockdev-mirror with NBD instead. # -# Returns: nothing on success -# # Since: 0.14 # # Notes: @@ -1793,8 +1793,6 @@ # @channels: list of migration stream channels with each stream in the # list connected to a destination interface endpoint. # -# Returns: nothing on success -# # Since: 2.3 # # Notes: @@ -1862,8 +1860,6 @@ # @live: Optional argument to ask QEMU to treat this command as part # of a live migration. Default to true. (since 2.11) # -# Returns: Nothing on success -# # Since: 1.1 # # Example: @@ -1882,8 +1878,6 @@ # # @enable: true to enable, false to disable. # -# Returns: nothing -# # Since: 1.3 # # Example: @@ -1926,8 +1920,6 @@ # @failover: true to do failover, false to stop. but cannot be # specified if 'enable' is true. default value is false. # -# Returns: nothing. -# # Example: # # -> { "execute": "xen-set-replication", @@ -1979,8 +1971,6 @@ # # Xen uses this command to notify replication to trigger a checkpoint. # -# Returns: nothing. -# # Example: # # -> { "execute": "xen-colo-do-checkpoint" } @@ -2037,8 +2027,6 @@ # # @uri: the URI to be used for the recovery of migration stream. # -# Returns: nothing. -# # Example: # # -> { "execute": "migrate-recover", @@ -2056,8 +2044,6 @@ # # Pause a migration. Currently it only supports postcopy. # -# Returns: nothing. -# # Example: # # -> { "execute": "migrate-pause" } @@ -2426,8 +2412,6 @@ # # If @tag already exists, an error will be reported # -# Returns: nothing -# # Example: # # -> { "execute": "snapshot-save", @@ -2498,8 +2482,6 @@ # device nodes that can have changed since the original @snapshot-save # command execution. # -# Returns: nothing -# # Example: # # -> { "execute": "snapshot-load", @@ -2561,8 +2543,6 @@ # to determine completion and to fetch details of any errors that # arise. # -# Returns: nothing -# # Example: # # -> { "execute": "snapshot-delete", diff --git a/qapi/misc-target.json b/qapi/misc-target.json index 542a3e42f2..4e0a6492a9 100644 --- a/qapi/misc-target.json +++ b/qapi/misc-target.json @@ -472,9 +472,6 @@ # # @port: The port number # -# Returns: -# - Nothing on success. -# # Since: 8.0 # # Example: diff --git a/qapi/misc.json b/qapi/misc.json index 11c55c2b6c..1b0c5dad88 100644 --- a/qapi/misc.json +++ b/qapi/misc.json @@ -28,8 +28,6 @@ # # @tls: whether to perform TLS. Only applies to the "spice" protocol # -# Returns: nothing on success. -# # Since: 0.14 # # Example: @@ -160,8 +158,6 @@ # # Since: 0.14 # -# Returns: If successful, nothing -# # Notes: This command will succeed if the guest is currently running. # It will also succeed if the guest is in the "inmigrate" state; # in this case, the effect of the command is to make sure the @@ -196,8 +192,6 @@ # # Since: 3.0 # -# Returns: nothing -# # Example: # # -> { "execute": "x-exit-preconfig" } @@ -256,8 +250,6 @@ # # @fdname: file descriptor name # -# Returns: Nothing on success -# # Since: 0.14 # # Notes: If @fdname already exists, the file descriptor assigned to it @@ -285,8 +277,6 @@ # # @fdname: file descriptor name # -# Returns: Nothing on success -# # Since: 8.0 # # Notes: If @fdname already exists, the file descriptor assigned to it @@ -309,8 +299,6 @@ # # @fdname: file descriptor name # -# Returns: Nothing on success -# # Since: 0.14 # # Example: @@ -344,7 +332,9 @@ # @opaque: A free-form string that can be used to describe the fd. # # Returns: -# - @AddfdInfo on success +# @AddfdInfo +# +# Errors: # - If file descriptor was not received, GenericError # - If @fdset-id is a negative value, GenericError # @@ -374,8 +364,7 @@ # # @fd: The file descriptor that is to be removed. # -# Returns: -# - Nothing on success +# Errors: # - If @fdset-id or @fd is not found, GenericError # # Since: 1.2 @@ -528,8 +517,10 @@ # @option: option name # # Returns: list of @CommandLineOptionInfo for all options (or for the -# given @option). Returns an error if the given @option doesn't -# exist. +# given @option). +# +# Errors: +# - if the given @option doesn't exist # # Since: 1.5 # diff --git a/qapi/net.json b/qapi/net.json index 1374caac64..417b61a321 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -17,8 +17,7 @@ # # @up: true to set the link status to be up # -# Returns: -# - Nothing on success +# Errors: # - If @name is not a valid network device, DeviceNotFound # # Since: 0.14 @@ -44,8 +43,7 @@ # # Since: 0.14 # -# Returns: -# - Nothing on success +# Errors: # - If @type is not a valid network backend, DeviceNotFound # # Example: @@ -65,8 +63,7 @@ # # @id: the name of the network backend to remove # -# Returns: -# - Nothing on success +# Errors: # - If @id is not a valid network backend, DeviceNotFound # # Since: 0.14 @@ -828,9 +825,11 @@ # @name: net client name # # Returns: list of @RxFilterInfo for all NICs (or for the given NIC). -# Returns an error if the given @name doesn't exist, or given NIC -# doesn't support rx-filter querying, or given net client isn't a -# NIC. +# +# Errors: +# - if the given @name doesn't exist +# - if the given NIC doesn't support rx-filter querying +# - if the given net client isn't a NIC # # Since: 1.6 # diff --git a/qapi/qapi-type-helpers.c b/qapi/qapi-type-helpers.c index f76b34f647..266da013ad 100644 --- a/qapi/qapi-type-helpers.c +++ b/qapi/qapi-type-helpers.c @@ -21,3 +21,17 @@ HumanReadableText *human_readable_text_from_str(GString *str) return ret; } + +char **strv_from_str_list(const strList *list) +{ + const strList *tail; + int i = 0; + char **strv = g_new(char *, QAPI_LIST_LENGTH(list) + 1); + + for (tail = list; tail != NULL; tail = tail->next) { + strv[i++] = g_strdup(tail->value); + } + strv[i] = NULL; + + return strv; +} diff --git a/qapi/qdev.json b/qapi/qdev.json index 32ffaee644..facaa0bc6a 100644 --- a/qapi/qdev.json +++ b/qapi/qdev.json @@ -89,8 +89,7 @@ # # @id: the device's ID or QOM path # -# Returns: -# - Nothing on success +# Errors: # - If @id is not a valid device, DeviceNotFound # # Notes: When this command completes, the device may not be removed diff --git a/qapi/qom.json b/qapi/qom.json index 2a6e49365a..032c6fa037 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -1056,8 +1056,7 @@ # # Create a QOM object. # -# Returns: -# - Nothing on success +# Errors: # - Error if @qom-type is not a valid class name # # Since: 2.0 @@ -1079,8 +1078,7 @@ # # @id: the name of the QOM object to remove # -# Returns: -# - Nothing on success +# Errors: # - Error if @id is not a valid id for a QOM object # # Since: 2.0 diff --git a/qapi/run-state.json b/qapi/run-state.json index dd0770b379..789fc34559 100644 --- a/qapi/run-state.json +++ b/qapi/run-state.json @@ -395,10 +395,7 @@ # # @panic: @PanicAction action taken on guest panic. # -# @watchdog: @WatchdogAction action taken when watchdog timer expires -# . -# -# Returns: Nothing on success. +# @watchdog: @WatchdogAction action taken when watchdog timer expires. # # Since: 6.0 # diff --git a/qapi/tpm.json b/qapi/tpm.json index 07a73e5f2b..1577b5c259 100644 --- a/qapi/tpm.json +++ b/qapi/tpm.json @@ -166,8 +166,6 @@ # # Return information about the TPM device # -# Returns: @TPMInfo on success -# # Since: 1.5 # # Example: diff --git a/qapi/transaction.json b/qapi/transaction.json index 45233ddd2a..5749c133d4 100644 --- a/qapi/transaction.json +++ b/qapi/transaction.json @@ -234,9 +234,8 @@ # execution of the transaction. See @TransactionProperties for # additional detail. # -# Returns: nothing on success -# -# Errors depend on the operations of the transaction +# Errors: +# Any errors from commands in the transaction # # Note: The transaction aborts on the first failure. Therefore, there # will be information on only one failed operation returned in an diff --git a/qapi/ui.json b/qapi/ui.json index e3999b7c07..1726f15429 100644 --- a/qapi/ui.json +++ b/qapi/ui.json @@ -78,8 +78,7 @@ # # Set the password of a remote display server. # -# Returns: -# - Nothing on success +# Errors: # - If Spice is not enabled, DeviceNotFound # # Since: 0.14 @@ -140,8 +139,7 @@ # # Expire the password of a remote display server. # -# Returns: -# - Nothing on success +# Errors: # - If @protocol is 'spice' and Spice is not active, # DeviceNotFound # @@ -187,8 +185,6 @@ # # @format: image format for screendump. (default: ppm) (Since 7.1) # -# Returns: Nothing on success -# # Since: 0.14 # # Example: @@ -1036,8 +1032,7 @@ # @hold-time: time to delay key up events, milliseconds. Defaults to # 100 # -# Returns: -# - Nothing on success +# Errors: # - If key is unknown or redundant, GenericError # # Since: 1.3 @@ -1259,8 +1254,6 @@ # # @events: List of InputEvent union. # -# Returns: Nothing on success. -# # Since: 2.6 # # Note: The consoles are visible in the qom tree, under @@ -1605,8 +1598,6 @@ # # Reload display configuration. # -# Returns: Nothing on success. -# # Since: 6.0 # # Example: @@ -1664,8 +1655,6 @@ # # Update display configuration. # -# Returns: Nothing on success. -# # Since: 7.1 # # Example: diff --git a/qapi/yank.json b/qapi/yank.json index b7aeb9ceef..89f2f4d199 100644 --- a/qapi/yank.json +++ b/qapi/yank.json @@ -78,9 +78,8 @@ # # @instances: the instances to be yanked # -# Returns: -# - Nothing on success -# - @DeviceNotFound error, if any of the YankInstances doesn't exist +# Errors: +# - If any of the YankInstances doesn't exist, DeviceNotFound # # Example: # diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json index b8efe31897..9554b566a7 100644 --- a/qga/qapi-schema.json +++ b/qga/qapi-schema.json @@ -153,8 +153,6 @@ # @time: time of nanoseconds, relative to the Epoch of 1970-01-01 in # UTC. # -# Returns: Nothing on success. -# # Since: 1.5 ## { 'command': 'guest-set-time', @@ -230,7 +228,7 @@ # # @mode: open mode, as per fopen(), "r" is the default. # -# Returns: Guest file handle on success. +# Returns: Guest file handle # # Since: 0.15.0 ## @@ -245,8 +243,6 @@ # # @handle: filehandle returned by guest-file-open # -# Returns: Nothing on success. -# # Since: 0.15.0 ## { 'command': 'guest-file-close', @@ -281,7 +277,7 @@ # @count: maximum number of bytes to read (default is 4KB, maximum is # 48MB) # -# Returns: @GuestFileRead on success. +# Returns: @GuestFileRead # # Since: 0.15.0 ## @@ -316,7 +312,7 @@ # @count: bytes to write (actual bytes, after base64-decode), default # is all content in buf-b64 buffer after base64 decoding # -# Returns: @GuestFileWrite on success. +# Returns: @GuestFileWrite # # Since: 0.15.0 ## @@ -383,7 +379,7 @@ # # @whence: Symbolic or numeric code for interpreting offset # -# Returns: @GuestFileSeek on success. +# Returns: @GuestFileSeek # # Since: 0.15.0 ## @@ -399,8 +395,6 @@ # # @handle: filehandle returned by guest-file-open # -# Returns: Nothing on success. -# # Since: 0.15.0 ## { 'command': 'guest-file-flush', @@ -443,15 +437,16 @@ # command succeeded, you may call @guest-fsfreeze-thaw later to # unfreeze. # +# On error, all filesystems will be thawed. If no filesystems are +# frozen as a result of this call, then @guest-fsfreeze-status will +# remain "thawed" and calling @guest-fsfreeze-thaw is not necessary. +# +# Returns: Number of file systems currently frozen. +# # Note: On Windows, the command is implemented with the help of a # Volume Shadow-copy Service DLL helper. The frozen state is # limited for up to 10 seconds by VSS. # -# Returns: Number of file systems currently frozen. On error, all -# filesystems will be thawed. If no filesystems are frozen as a -# result of this call, then @guest-fsfreeze-status will remain -# "thawed" and calling @guest-fsfreeze-thaw is not necessary. -# # Since: 0.15.0 ## { 'command': 'guest-fsfreeze-freeze', @@ -463,12 +458,13 @@ # Sync and freeze specified guest filesystems. See also # @guest-fsfreeze-freeze. # +# On error, all filesystems will be thawed. +# # @mountpoints: an array of mountpoints of filesystems to be frozen. # If omitted, every mounted filesystem is frozen. Invalid mount # points are ignored. # -# Returns: Number of file systems currently frozen. On error, all -# filesystems will be thawed. +# Returns: Number of file systems currently frozen. # # Since: 2.2 ## @@ -561,9 +557,8 @@ # could also exit (or set its status to "shutdown") due to other # reasons. # -# The following errors may be returned: -# -# - If suspend to disk is not supported, Unsupported +# Errors: +# - If suspend to disk is not supported, Unsupported # # Notes: It's strongly recommended to issue the guest-sync command # before sending commands when the guest resumes @@ -598,9 +593,8 @@ # 2. Issue the query-status QMP command to confirm the VM status is # "suspended" # -# The following errors may be returned: -# -# - If suspend to ram is not supported, Unsupported +# Errors: +# - If suspend to ram is not supported, Unsupported # # Notes: It's strongly recommended to issue the guest-sync command # before sending commands when the guest resumes @@ -634,9 +628,8 @@ # 2. Issue the query-status QMP command to confirm the VM status is # "suspended" # -# The following errors may be returned: -# -# - If hybrid suspend is not supported, Unsupported +# Errors: +# - If hybrid suspend is not supported, Unsupported # # Notes: It's strongly recommended to issue the guest-sync command # before sending commands when the guest resumes @@ -732,7 +725,7 @@ # # Get list of guest IP addresses, MAC addresses and netmasks. # -# Returns: List of GuestNetworkInterface on success. +# Returns: List of GuestNetworkInterface # # Since: 1.1 ## @@ -796,9 +789,6 @@ # - 0: # if the @vcpus list was empty on input. Guest state has not # been changed. Otherwise, -# - Error: -# processing the first node of @vcpus failed for the reason -# returned. Guest state has not been changed. Otherwise, # - < length(@vcpus): # more than zero initial nodes have been processed, but not the # entire @vcpus list. Guest state has changed accordingly. To @@ -808,6 +798,10 @@ # - length(@vcpus): # call successful. # +# Errors: +# - If the reconfiguration of the first node in @vcpus failed. +# Guest state has not been changed. +# # Since: 1.5 ## { 'command': 'guest-set-vcpus', @@ -1079,8 +1073,6 @@ # transmission, even if already crypt()d, to ensure it is 8-bit safe # when passed as JSON. # -# Returns: Nothing on success. -# # Since: 2.3 ## { 'command': 'guest-set-user-password', @@ -1184,9 +1176,9 @@ # @GuestMemoryBlockResponse, which is corresponding to the input # list. # -# Note: it will return NULL if the @mem-blks list was empty on -# input, or there is an error, and in this case, guest state will -# not be changed. +# Note: it will return an empty list if the @mem-blks list was +# empty on input, or there is an error, and in this case, guest +# state will not be changed. # # Since: 2.3 ## @@ -1257,7 +1249,7 @@ # # @pid: pid returned from guest-exec # -# Returns: GuestExecStatus on success. +# Returns: GuestExecStatus # # Since: 2.5 ## @@ -1325,7 +1317,7 @@ # @capture-output: bool flag to enable capture of stdout/stderr of # running process. defaults to false. # -# Returns: PID on success. +# Returns: PID # # Since: 2.5 ## @@ -1354,7 +1346,7 @@ # or even present in DNS or some other name service at all. It need # not even be unique on your local network or site, but usually it is. # -# Returns: the host name of the machine on success +# Returns: the host name of the machine # # Since: 2.10 ## @@ -1604,8 +1596,6 @@ # # @reset: ignore the existing content, set it with the given keys only # -# Returns: Nothing on success. -# # Since: 5.2 ## { 'command': 'guest-ssh-add-authorized-keys', @@ -1624,8 +1614,6 @@ # @keys: the public keys to remove (in OpenSSH/sshd(8) authorized_keys # format) # -# Returns: Nothing on success. -# # Since: 5.2 ## { 'command': 'guest-ssh-remove-authorized-keys', diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py index 11707418fb..d8f76060b8 100644 --- a/scripts/qapi/parser.py +++ b/scripts/qapi/parser.py @@ -543,7 +543,7 @@ class QAPISchemaParser: line = self.get_doc_indented(doc) no_more_args = True elif match := re.match( - r'(Returns|Since|Notes?|Examples?|TODO): *', + r'(Returns|Errors|Since|Notes?|Examples?|TODO): *', line): # tagged section doc.new_tagged_section(self.info, match.group(1)) @@ -639,6 +639,11 @@ class QAPIDoc: # dicts mapping parameter/feature names to their description self.args: Dict[str, QAPIDoc.ArgSection] = {} self.features: Dict[str, QAPIDoc.ArgSection] = {} + # a command's "Returns" and "Errors" section + self.returns: Optional[QAPIDoc.Section] = None + self.errors: Optional[QAPIDoc.Section] = None + # "Since" section + self.since: Optional[QAPIDoc.Section] = None # sections other than .body, .args, .features self.sections: List[QAPIDoc.Section] = [] @@ -660,14 +665,22 @@ class QAPIDoc: self.all_sections.append(section) def new_tagged_section(self, info: QAPISourceInfo, tag: str) -> None: - if tag in ('Returns', 'Since'): - for section in self.all_sections: - if isinstance(section, self.ArgSection): - continue - if section.tag == tag: - raise QAPISemError( - info, "duplicated '%s' section" % tag) section = self.Section(info, tag) + if tag == 'Returns': + if self.returns: + raise QAPISemError( + info, "duplicated '%s' section" % tag) + self.returns = section + elif tag == 'Errors': + if self.errors: + raise QAPISemError( + info, "duplicated '%s' section" % tag) + self.errors = section + elif tag == 'Since': + if self.since: + raise QAPISemError( + info, "duplicated '%s' section" % tag) + self.since = section self.sections.append(section) self.all_sections.append(section) @@ -708,13 +721,20 @@ class QAPIDoc: self.features[feature.name].connect(feature) def check_expr(self, expr: QAPIExpression) -> None: - if 'command' not in expr: - sec = next((sec for sec in self.sections - if sec.tag == 'Returns'), - None) - if sec: - raise QAPISemError(sec.info, - "'Returns:' is only valid for commands") + if 'command' in expr: + if self.returns and 'returns' not in expr: + raise QAPISemError( + self.returns.info, + "'Returns' section, but command doesn't return anything") + else: + if self.returns: + raise QAPISemError( + self.returns.info, + "'Returns' section is only valid for commands") + if self.errors: + raise QAPISemError( + self.returns.info, + "'Errors' section is only valid for commands") def check(self) -> None: diff --git a/tests/qapi-schema/doc-good.json b/tests/qapi-schema/doc-good.json index 5bb2b69071..de38a386e8 100644 --- a/tests/qapi-schema/doc-good.json +++ b/tests/qapi-schema/doc-good.json @@ -159,6 +159,8 @@ # # Returns: @Object # +# Errors: some +# # TODO: frobnicate # # Notes: diff --git a/tests/qapi-schema/doc-good.out b/tests/qapi-schema/doc-good.out index 34ee74af4b..716a9a4102 100644 --- a/tests/qapi-schema/doc-good.out +++ b/tests/qapi-schema/doc-good.out @@ -173,6 +173,8 @@ another feature @arg3 is undocumented section=Returns @Object + section=Errors +some section=TODO frobnicate section=Notes diff --git a/tests/qapi-schema/doc-good.txt b/tests/qapi-schema/doc-good.txt index 879f6ff50a..847db70412 100644 --- a/tests/qapi-schema/doc-good.txt +++ b/tests/qapi-schema/doc-good.txt @@ -206,6 +206,12 @@ Returns "Object" +Errors +~~~~~~ + +some + + Notes ~~~~~ diff --git a/tests/qapi-schema/doc-invalid-return.err b/tests/qapi-schema/doc-invalid-return.err index 3d9e71c2b3..aafd57b135 100644 --- a/tests/qapi-schema/doc-invalid-return.err +++ b/tests/qapi-schema/doc-invalid-return.err @@ -1 +1 @@ -doc-invalid-return.json:6: 'Returns:' is only valid for commands +doc-invalid-return.json:6: 'Returns' section is only valid for commands diff --git a/tests/qapi-schema/doc-invalid-return2.err b/tests/qapi-schema/doc-invalid-return2.err new file mode 100644 index 0000000000..c3d0c7a452 --- /dev/null +++ b/tests/qapi-schema/doc-invalid-return2.err @@ -0,0 +1 @@ +doc-invalid-return2.json:5: 'Returns' section, but command doesn't return anything diff --git a/tests/qapi-schema/doc-invalid-return2.json b/tests/qapi-schema/doc-invalid-return2.json new file mode 100644 index 0000000000..37883d4fea --- /dev/null +++ b/tests/qapi-schema/doc-invalid-return2.json @@ -0,0 +1,7 @@ +# Command doesn't return anything + +## +# @foo: +# Returns: blah +## +{ 'command': 'foo' } diff --git a/tests/qapi-schema/doc-invalid-return2.out b/tests/qapi-schema/doc-invalid-return2.out new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/qapi-schema/doc-invalid-return2.out diff --git a/tests/qapi-schema/meson.build b/tests/qapi-schema/meson.build index 4b8329d070..0f479d9317 100644 --- a/tests/qapi-schema/meson.build +++ b/tests/qapi-schema/meson.build @@ -79,6 +79,7 @@ schemas = [ 'doc-invalid-end.json', 'doc-invalid-end2.json', 'doc-invalid-return.json', + 'doc-invalid-return2.json', 'doc-invalid-section.json', 'doc-invalid-start.json', 'doc-missing-colon.json', diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 83512bce85..4023d808f9 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -2200,6 +2200,14 @@ static void *test_mode_reboot_start(QTestState *from, QTestState *to) return NULL; } +static void *migrate_mapped_ram_start(QTestState *from, QTestState *to) +{ + migrate_set_capability(from, "mapped-ram", true); + migrate_set_capability(to, "mapped-ram", true); + + return NULL; +} + static void test_mode_reboot(void) { g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs, @@ -2214,6 +2222,72 @@ static void test_mode_reboot(void) test_file_common(&args, true); } +static void test_precopy_file_mapped_ram_live(void) +{ + g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs, + FILE_TEST_FILENAME); + MigrateCommon args = { + .connect_uri = uri, + .listen_uri = "defer", + .start_hook = migrate_mapped_ram_start, + }; + + test_file_common(&args, false); +} + +static void test_precopy_file_mapped_ram(void) +{ + g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs, + FILE_TEST_FILENAME); + MigrateCommon args = { + .connect_uri = uri, + .listen_uri = "defer", + .start_hook = migrate_mapped_ram_start, + }; + + test_file_common(&args, true); +} + +static void *migrate_multifd_mapped_ram_start(QTestState *from, QTestState *to) +{ + migrate_mapped_ram_start(from, to); + + migrate_set_parameter_int(from, "multifd-channels", 4); + migrate_set_parameter_int(to, "multifd-channels", 4); + + migrate_set_capability(from, "multifd", true); + migrate_set_capability(to, "multifd", true); + + return NULL; +} + +static void test_multifd_file_mapped_ram_live(void) +{ + g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs, + FILE_TEST_FILENAME); + MigrateCommon args = { + .connect_uri = uri, + .listen_uri = "defer", + .start_hook = migrate_multifd_mapped_ram_start, + }; + + test_file_common(&args, false); +} + +static void test_multifd_file_mapped_ram(void) +{ + g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs, + FILE_TEST_FILENAME); + MigrateCommon args = { + .connect_uri = uri, + .listen_uri = "defer", + .start_hook = migrate_multifd_mapped_ram_start, + }; + + test_file_common(&args, true); +} + + static void test_precopy_tcp_plain(void) { MigrateCommon args = { @@ -2462,6 +2536,13 @@ static void *migrate_precopy_fd_file_start(QTestState *from, QTestState *to) return NULL; } +static void *migrate_fd_file_mapped_ram_start(QTestState *from, QTestState *to) +{ + migrate_mapped_ram_start(from, to); + + return migrate_precopy_fd_file_start(from, to); +} + static void test_migrate_precopy_fd_file(void) { MigrateCommon args = { @@ -2472,6 +2553,36 @@ static void test_migrate_precopy_fd_file(void) }; test_file_common(&args, true); } + +static void test_migrate_precopy_fd_file_mapped_ram(void) +{ + MigrateCommon args = { + .listen_uri = "defer", + .connect_uri = "fd:fd-mig", + .start_hook = migrate_fd_file_mapped_ram_start, + .finish_hook = test_migrate_fd_finish_hook + }; + test_file_common(&args, true); +} + +static void *migrate_multifd_fd_mapped_ram_start(QTestState *from, + QTestState *to) +{ + migrate_multifd_mapped_ram_start(from, to); + return migrate_precopy_fd_file_start(from, to); +} + +static void test_multifd_fd_mapped_ram(void) +{ + MigrateCommon args = { + .connect_uri = "fd:fd-mig", + .listen_uri = "defer", + .start_hook = migrate_multifd_fd_mapped_ram_start, + .finish_hook = test_migrate_fd_finish_hook + }; + + test_file_common(&args, true); +} #endif /* _WIN32 */ static void do_test_validate_uuid(MigrateStart *args, bool should_fail) @@ -2664,6 +2775,13 @@ static void * test_migrate_precopy_tcp_multifd_zlib_start(QTestState *from, QTestState *to) { + /* + * Overloading this test to also check that set_parameter does not error. + * This is also done in the tests for the other compression methods. + */ + migrate_set_parameter_int(from, "multifd-zlib-level", 2); + migrate_set_parameter_int(to, "multifd-zlib-level", 2); + return test_migrate_precopy_tcp_multifd_start_common(from, to, "zlib"); } @@ -2672,6 +2790,9 @@ static void * test_migrate_precopy_tcp_multifd_zstd_start(QTestState *from, QTestState *to) { + migrate_set_parameter_int(from, "multifd-zstd-level", 2); + migrate_set_parameter_int(to, "multifd-zstd-level", 2); + return test_migrate_precopy_tcp_multifd_start_common(from, to, "zstd"); } #endif /* CONFIG_ZSTD */ @@ -3509,6 +3630,20 @@ int main(int argc, char **argv) migration_test_add("/migration/mode/reboot", test_mode_reboot); } + migration_test_add("/migration/precopy/file/mapped-ram", + test_precopy_file_mapped_ram); + migration_test_add("/migration/precopy/file/mapped-ram/live", + test_precopy_file_mapped_ram_live); + + migration_test_add("/migration/multifd/file/mapped-ram", + test_multifd_file_mapped_ram); + migration_test_add("/migration/multifd/file/mapped-ram/live", + test_multifd_file_mapped_ram_live); +#ifndef _WIN32 + migration_test_add("/migration/multifd/fd/mapped-ram", + test_multifd_fd_mapped_ram); +#endif + #ifdef CONFIG_GNUTLS migration_test_add("/migration/precopy/unix/tls/psk", test_precopy_unix_tls_psk); @@ -3570,6 +3705,8 @@ int main(int argc, char **argv) test_migrate_precopy_fd_socket); migration_test_add("/migration/precopy/fd/file", test_migrate_precopy_fd_file); + migration_test_add("/migration/precopy/fd/file/mapped-ram", + test_migrate_precopy_fd_file_mapped_ram); #endif migration_test_add("/migration/validate_uuid", test_validate_uuid); migration_test_add("/migration/validate_uuid_error", |