diff options
53 files changed, 895 insertions, 650 deletions
diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml index ecac3ec50c..544385f5be 100644 --- a/.gitlab-ci.d/buildtest.yml +++ b/.gitlab-ci.d/buildtest.yml @@ -110,7 +110,8 @@ crash-test-debian: IMAGE: debian-amd64 script: - cd build - - scripts/device-crash-test -q ./qemu-system-i386 + - make check-venv + - tests/venv/bin/python3 scripts/device-crash-test -q ./qemu-system-i386 build-system-fedora: extends: .native_build_job_template @@ -155,8 +156,9 @@ crash-test-fedora: IMAGE: fedora script: - cd build - - scripts/device-crash-test -q ./qemu-system-ppc - - scripts/device-crash-test -q ./qemu-system-riscv32 + - make check-venv + - tests/venv/bin/python3 scripts/device-crash-test -q ./qemu-system-ppc + - tests/venv/bin/python3 scripts/device-crash-test -q ./qemu-system-riscv32 build-system-centos: extends: .native_build_job_template diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c index 24520ea112..8f1dda4344 100644 --- a/accel/tcg/tcg-accel-ops-icount.c +++ b/accel/tcg/tcg-accel-ops-icount.c @@ -84,8 +84,7 @@ void icount_handle_deadline(void) * Don't interrupt cpu thread, when these events are waiting * (i.e., there is no checkpoint) */ - if (deadline == 0 - && (replay_mode != REPLAY_MODE_PLAY || replay_has_checkpoint())) { + if (deadline == 0) { icount_notify_aio_contexts(); } } @@ -109,7 +108,7 @@ void icount_prepare_for_run(CPUState *cpu) replay_mutex_lock(); - if (cpu->icount_budget == 0 && replay_has_checkpoint()) { + if (cpu->icount_budget == 0) { icount_notify_aio_contexts(); } } @@ -1035,7 +1035,6 @@ Advanced options (experts only): --with-git-submodules=ignore do not update or check git submodules (default if no .git dir) --static enable static build [$static] --bindir=PATH install binaries in PATH - --efi-aarch64=PATH PATH of efi file to use for aarch64 VMs. --with-suffix=SUFFIX suffix for QEMU data inside datadir/libdir/sysconfdir/docdir [$qemu_suffix] --without-default-features default all --enable-* options to "disabled" --without-default-devices do not include any device that is not needed to diff --git a/docs/devel/index-tcg.rst b/docs/devel/index-tcg.rst index 0b0ad12c22..7b9760b26f 100644 --- a/docs/devel/index-tcg.rst +++ b/docs/devel/index-tcg.rst @@ -13,3 +13,4 @@ are only implementing things for HW accelerated hypervisors. multi-thread-tcg tcg-icount tcg-plugins + replay diff --git a/docs/devel/replay.rst b/docs/devel/replay.rst new file mode 100644 index 0000000000..0244be8b9c --- /dev/null +++ b/docs/devel/replay.rst @@ -0,0 +1,306 @@ +.. + Copyright (c) 2022, ISP RAS + Written by Pavel Dovgalyuk and Alex Bennée + +======================= +Execution Record/Replay +======================= + +Core concepts +============= + +Record/replay functions are used for the deterministic replay of qemu +execution. Execution recording writes a non-deterministic events log, which +can be later used for replaying the execution anywhere and for unlimited +number of times. Execution replaying reads the log and replays all +non-deterministic events including external input, hardware clocks, +and interrupts. + +Several parts of QEMU include function calls to make event log recording +and replaying. +Devices' models that have non-deterministic input from external devices were +changed to write every external event into the execution log immediately. +E.g. network packets are written into the log when they arrive into the virtual +network adapter. + +All non-deterministic events are coming from these devices. But to +replay them we need to know at which moments they occur. We specify +these moments by counting the number of instructions executed between +every pair of consecutive events. + +Academic papers with description of deterministic replay implementation: + +* `Deterministic Replay of System's Execution with Multi-target QEMU Simulator for Dynamic Analysis and Reverse Debugging <https://www.computer.org/csdl/proceedings/csmr/2012/4666/00/4666a553-abs.html>`_ +* `Don't panic: reverse debugging of kernel drivers <https://dl.acm.org/citation.cfm?id=2786805.2803179>`_ + +Modifications of qemu include: + + * wrappers for clock and time functions to save their return values in the log + * saving different asynchronous events (e.g. system shutdown) into the log + * synchronization of the bottom halves execution + * synchronization of the threads from thread pool + * recording/replaying user input (mouse, keyboard, and microphone) + * adding internal checkpoints for cpu and io synchronization + * network filter for recording and replaying the packets + * block driver for making block layer deterministic + * serial port input record and replay + * recording of random numbers obtained from the external sources + +Instruction counting +-------------------- + +QEMU should work in icount mode to use record/replay feature. icount was +designed to allow deterministic execution in absence of external inputs +of the virtual machine. We also use icount to control the occurrence of the +non-deterministic events. The number of instructions elapsed from the last event +is written to the log while recording the execution. In replay mode we +can predict when to inject that event using the instruction counter. + +Locking and thread synchronisation +---------------------------------- + +Previously the synchronisation of the main thread and the vCPU thread +was ensured by the holding of the BQL. However the trend has been to +reduce the time the BQL was held across the system including under TCG +system emulation. As it is important that batches of events are kept +in sequence (e.g. expiring timers and checkpoints in the main thread +while instruction checkpoints are written by the vCPU thread) we need +another lock to keep things in lock-step. This role is now handled by +the replay_mutex_lock. It used to be held only for each event being +written but now it is held for a whole execution period. This results +in a deterministic ping-pong between the two main threads. + +As the BQL is now a finer grained lock than the replay_lock it is almost +certainly a bug, and a source of deadlocks, to take the +replay_mutex_lock while the BQL is held. This is enforced by an assert. +While the unlocks are usually in the reverse order, this is not +necessary; you can drop the replay_lock while holding the BQL, without +doing a more complicated unlock_iothread/replay_unlock/lock_iothread +sequence. + +Checkpoints +----------- + +Replaying the execution of virtual machine is bound by sources of +non-determinism. These are inputs from clock and peripheral devices, +and QEMU thread scheduling. Thread scheduling affect on processing events +from timers, asynchronous input-output, and bottom halves. + +Invocations of timers are coupled with clock reads and changing the state +of the virtual machine. Reads produce non-deterministic data taken from +host clock. And VM state changes should preserve their order. Their relative +order in replay mode must replicate the order of callbacks in record mode. +To preserve this order we use checkpoints. When a specific clock is processed +in record mode we save to the log special "checkpoint" event. +Checkpoints here do not refer to virtual machine snapshots. They are just +record/replay events used for synchronization. + +QEMU in replay mode will try to invoke timers processing in random moment +of time. That's why we do not process a group of timers until the checkpoint +event will be read from the log. Such an event allows synchronizing CPU +execution and timer events. + +Two other checkpoints govern the "warping" of the virtual clock. +While the virtual machine is idle, the virtual clock increments at +1 ns per *real time* nanosecond. This is done by setting up a timer +(called the warp timer) on the virtual real time clock, so that the +timer fires at the next deadline of the virtual clock; the virtual clock +is then incremented (which is called "warping" the virtual clock) as +soon as the timer fires or the CPUs need to go out of the idle state. +Two functions are used for this purpose; because these actions change +virtual machine state and must be deterministic, each of them creates a +checkpoint. ``icount_start_warp_timer`` checks if the CPUs are idle and if so +starts accounting real time to virtual clock. ``icount_account_warp_timer`` +is called when the CPUs get an interrupt or when the warp timer fires, +and it warps the virtual clock by the amount of real time that has passed +since ``icount_start_warp_timer``. + +Virtual devices +=============== + +Record/replay mechanism, that could be enabled through icount mode, expects +the virtual devices to satisfy the following requirement: +everything that affects +the guest state during execution in icount mode should be deterministic. + +Timers +------ + +Timers are used to execute callbacks from different subsystems of QEMU +at the specified moments of time. There are several kinds of timers: + + * Real time clock. Based on host time and used only for callbacks that + do not change the virtual machine state. For this reason real time + clock and timers does not affect deterministic replay at all. + * Virtual clock. These timers run only during the emulation. In icount + mode virtual clock value is calculated using executed instructions counter. + That is why it is completely deterministic and does not have to be recorded. + * Host clock. This clock is used by device models that simulate real time + sources (e.g. real time clock chip). Host clock is the one of the sources + of non-determinism. Host clock read operations should be logged to + make the execution deterministic. + * Virtual real time clock. This clock is similar to real time clock but + it is used only for increasing virtual clock while virtual machine is + sleeping. Due to its nature it is also non-deterministic as the host clock + and has to be logged too. + +All virtual devices should use virtual clock for timers that change the guest +state. Virtual clock is deterministic, therefore such timers are deterministic +too. + +Virtual devices can also use realtime clock for the events that do not change +the guest state directly. When the clock ticking should depend on VM execution +speed, use virtual clock with EXTERNAL attribute. It is not deterministic, +but its speed depends on the guest execution. This clock is used by +the virtual devices (e.g., slirp routing device) that lie outside the +replayed guest. + +Block devices +------------- + +Block devices record/replay module (``blkreplay``) intercepts calls of +bdrv coroutine functions at the top of block drivers stack. + +All block completion operations are added to the queue in the coroutines. +When the queue is flushed the information about processed requests +is recorded to the log. In replay phase the queue is matched with +events read from the log. Therefore block devices requests are processed +deterministically. + +Bottom halves +------------- + +Bottom half callbacks, that affect the guest state, should be invoked through +``replay_bh_schedule_event`` or ``replay_bh_schedule_oneshot_event`` functions. +Their invocations are saved in record mode and synchronized with the existing +log in replay mode. + +Disk I/O events are completely deterministic in our model, because +in both record and replay modes we start virtual machine from the same +disk state. But callbacks that virtual disk controller uses for reading and +writing the disk may occur at different moments of time in record and replay +modes. + +Reading and writing requests are created by CPU thread of QEMU. Later these +requests proceed to block layer which creates "bottom halves". Bottom +halves consist of callback and its parameters. They are processed when +main loop locks the global mutex. These locks are not synchronized with +replaying process because main loop also processes the events that do not +affect the virtual machine state (like user interaction with monitor). + +That is why we had to implement saving and replaying bottom halves callbacks +synchronously to the CPU execution. When the callback is about to execute +it is added to the queue in the replay module. This queue is written to the +log when its callbacks are executed. In replay mode callbacks are not processed +until the corresponding event is read from the events log file. + +Sometimes the block layer uses asynchronous callbacks for its internal purposes +(like reading or writing VM snapshots or disk image cluster tables). In this +case bottom halves are not marked as "replayable" and do not saved +into the log. + +Saving/restoring the VM state +----------------------------- + +All fields in the device state structure (including virtual timers) +should be restored by loadvm to the same values they had before savevm. + +Avoid accessing other devices' state, because the order of saving/restoring +is not defined. It means that you should not call functions like +``update_irq`` in ``post_load`` callback. Save everything explicitly to avoid +the dependencies that may make restoring the VM state non-deterministic. + +Stopping the VM +--------------- + +Stopping the guest should not interfere with its state (with the exception +of the network connections, that could be broken by the remote timeouts). +VM can be stopped at any moment of replay by the user. Restarting the VM +after that stop should not break the replay by the unneeded guest state change. + +Replay log format +================= + +Record/replay log consists of the header and the sequence of execution +events. The header includes 4-byte replay version id and 8-byte reserved +field. Version is updated every time replay log format changes to prevent +using replay log created by another build of qemu. + +The sequence of the events describes virtual machine state changes. +It includes all non-deterministic inputs of VM, synchronization marks and +instruction counts used to correctly inject inputs at replay. + +Synchronization marks (checkpoints) are used for synchronizing qemu threads +that perform operations with virtual hardware. These operations may change +system's state (e.g., change some register or generate interrupt) and +therefore should execute synchronously with CPU thread. + +Every event in the log includes 1-byte event id and optional arguments. +When argument is an array, it is stored as 4-byte array length +and corresponding number of bytes with data. +Here is the list of events that are written into the log: + + - EVENT_INSTRUCTION. Instructions executed since last event. Followed by: + + - 4-byte number of executed instructions. + + - EVENT_INTERRUPT. Used to synchronize interrupt processing. + - EVENT_EXCEPTION. Used to synchronize exception handling. + - EVENT_ASYNC. This is a group of events. When such an event is generated, + it is stored in the queue and processed in icount_account_warp_timer(). + Every such event has it's own id from the following list: + + - REPLAY_ASYNC_EVENT_BH. Bottom-half callback. This event synchronizes + callbacks that affect virtual machine state, but normally called + asynchronously. Followed by: + + - 8-byte operation id. + + - REPLAY_ASYNC_EVENT_INPUT. Input device event. Contains + parameters of keyboard and mouse input operations + (key press/release, mouse pointer movement). Followed by: + + - 9-16 bytes depending of input event. + + - REPLAY_ASYNC_EVENT_INPUT_SYNC. Internal input synchronization event. + - REPLAY_ASYNC_EVENT_CHAR_READ. Character (e.g., serial port) device input + initiated by the sender. Followed by: + + - 1-byte character device id. + - Array with bytes were read. + + - REPLAY_ASYNC_EVENT_BLOCK. Block device operation. Used to synchronize + operations with disk and flash drives with CPU. Followed by: + + - 8-byte operation id. + + - REPLAY_ASYNC_EVENT_NET. Incoming network packet. Followed by: + + - 1-byte network adapter id. + - 4-byte packet flags. + - Array with packet bytes. + + - EVENT_SHUTDOWN. Occurs when user sends shutdown event to qemu, + e.g., by closing the window. + - EVENT_CHAR_WRITE. Used to synchronize character output operations. Followed by: + + - 4-byte output function return value. + - 4-byte offset in the output array. + + - EVENT_CHAR_READ_ALL. Used to synchronize character input operations, + initiated by qemu. Followed by: + + - Array with bytes that were read. + + - EVENT_CHAR_READ_ALL_ERROR. Unsuccessful character input operation, + initiated by qemu. Followed by: + + - 4-byte error code. + + - EVENT_CLOCK + clock_id. Group of events for host clock read operations. Followed by: + + - 8-byte clock value. + + - EVENT_CHECKPOINT + checkpoint_id. Checkpoint for synchronization of + CPU, internal threads, and asynchronous input events. + - EVENT_END. Last event in the log. diff --git a/docs/devel/replay.txt b/docs/devel/replay.txt deleted file mode 100644 index e641c35add..0000000000 --- a/docs/devel/replay.txt +++ /dev/null @@ -1,46 +0,0 @@ -Record/replay mechanism, that could be enabled through icount mode, expects -the virtual devices to satisfy the following requirements. - -The main idea behind this document is that everything that affects -the guest state during execution in icount mode should be deterministic. - -Timers -====== - -All virtual devices should use virtual clock for timers that change the guest -state. Virtual clock is deterministic, therefore such timers are deterministic -too. - -Virtual devices can also use realtime clock for the events that do not change -the guest state directly. When the clock ticking should depend on VM execution -speed, use virtual clock with EXTERNAL attribute. It is not deterministic, -but its speed depends on the guest execution. This clock is used by -the virtual devices (e.g., slirp routing device) that lie outside the -replayed guest. - -Bottom halves -============= - -Bottom half callbacks, that affect the guest state, should be invoked through -replay_bh_schedule_event or replay_bh_schedule_oneshot_event functions. -Their invocations are saved in record mode and synchronized with the existing -log in replay mode. - -Saving/restoring the VM state -============================= - -All fields in the device state structure (including virtual timers) -should be restored by loadvm to the same values they had before savevm. - -Avoid accessing other devices' state, because the order of saving/restoring -is not defined. It means that you should not call functions like -'update_irq' in post_load callback. Save everything explicitly to avoid -the dependencies that may make restoring the VM state non-deterministic. - -Stopping the VM -=============== - -Stopping the guest should not interfere with its state (with the exception -of the network connections, that could be broken by the remote timeouts). -VM can be stopped at any moment of replay by the user. Restarting the VM -after that stop should not break the replay by the unneeded guest state change. diff --git a/docs/replay.txt b/docs/replay.txt deleted file mode 100644 index 5b008ca491..0000000000 --- a/docs/replay.txt +++ /dev/null @@ -1,410 +0,0 @@ -Copyright (c) 2010-2015 Institute for System Programming - of the Russian Academy of Sciences. - -This work is licensed under the terms of the GNU GPL, version 2 or later. -See the COPYING file in the top-level directory. - -Record/replay -------------- - -Record/replay functions are used for the deterministic replay of qemu execution. -Execution recording writes a non-deterministic events log, which can be later -used for replaying the execution anywhere and for unlimited number of times. -It also supports checkpointing for faster rewind to the specific replay moment. -Execution replaying reads the log and replays all non-deterministic events -including external input, hardware clocks, and interrupts. - -Deterministic replay has the following features: - * Deterministically replays whole system execution and all contents of - the memory, state of the hardware devices, clocks, and screen of the VM. - * Writes execution log into the file for later replaying for multiple times - on different machines. - * Supports i386, x86_64, and Arm hardware platforms. - * Performs deterministic replay of all operations with keyboard and mouse - input devices. - -Usage of the record/replay: - * First, record the execution with the following command line: - qemu-system-i386 \ - -icount shift=7,rr=record,rrfile=replay.bin \ - -drive file=disk.qcow2,if=none,snapshot,id=img-direct \ - -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay \ - -device ide-hd,drive=img-blkreplay \ - -netdev user,id=net1 -device rtl8139,netdev=net1 \ - -object filter-replay,id=replay,netdev=net1 - * After recording, you can replay it by using another command line: - qemu-system-i386 \ - -icount shift=7,rr=replay,rrfile=replay.bin \ - -drive file=disk.qcow2,if=none,snapshot,id=img-direct \ - -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay \ - -device ide-hd,drive=img-blkreplay \ - -netdev user,id=net1 -device rtl8139,netdev=net1 \ - -object filter-replay,id=replay,netdev=net1 - The only difference with recording is changing the rr option - from record to replay. - * Block device images are not actually changed in the recording mode, - because all of the changes are written to the temporary overlay file. - This behavior is enabled by using blkreplay driver. It should be used - for every enabled block device, as described in 'Block devices' section. - * '-net none' option should be specified when network is not used, - because QEMU adds network card by default. When network is needed, - it should be configured explicitly with replay filter, as described - in 'Network devices' section. - * Interaction with audio devices and serial ports are recorded and replayed - automatically when such devices are enabled. - -Academic papers with description of deterministic replay implementation: -http://www.computer.org/csdl/proceedings/csmr/2012/4666/00/4666a553-abs.html -http://dl.acm.org/citation.cfm?id=2786805.2803179 - -Modifications of qemu include: - * wrappers for clock and time functions to save their return values in the log - * saving different asynchronous events (e.g. system shutdown) into the log - * synchronization of the bottom halves execution - * synchronization of the threads from thread pool - * recording/replaying user input (mouse, keyboard, and microphone) - * adding internal checkpoints for cpu and io synchronization - * network filter for recording and replaying the packets - * block driver for making block layer deterministic - * serial port input record and replay - * recording of random numbers obtained from the external sources - -Locking and thread synchronisation ----------------------------------- - -Previously the synchronisation of the main thread and the vCPU thread -was ensured by the holding of the BQL. However the trend has been to -reduce the time the BQL was held across the system including under TCG -system emulation. As it is important that batches of events are kept -in sequence (e.g. expiring timers and checkpoints in the main thread -while instruction checkpoints are written by the vCPU thread) we need -another lock to keep things in lock-step. This role is now handled by -the replay_mutex_lock. It used to be held only for each event being -written but now it is held for a whole execution period. This results -in a deterministic ping-pong between the two main threads. - -As the BQL is now a finer grained lock than the replay_lock it is almost -certainly a bug, and a source of deadlocks, to take the -replay_mutex_lock while the BQL is held. This is enforced by an assert. -While the unlocks are usually in the reverse order, this is not -necessary; you can drop the replay_lock while holding the BQL, without -doing a more complicated unlock_iothread/replay_unlock/lock_iothread -sequence. - -Non-deterministic events ------------------------- - -Our record/replay system is based on saving and replaying non-deterministic -events (e.g. keyboard input) and simulating deterministic ones (e.g. reading -from HDD or memory of the VM). Saving only non-deterministic events makes -log file smaller and simulation faster. - -The following non-deterministic data from peripheral devices is saved into -the log: mouse and keyboard input, network packets, audio controller input, -serial port input, and hardware clocks (they are non-deterministic -too, because their values are taken from the host machine). Inputs from -simulated hardware, memory of VM, software interrupts, and execution of -instructions are not saved into the log, because they are deterministic and -can be replayed by simulating the behavior of virtual machine starting from -initial state. - -We had to solve three tasks to implement deterministic replay: recording -non-deterministic events, replaying non-deterministic events, and checking -that there is no divergence between record and replay modes. - -We changed several parts of QEMU to make event log recording and replaying. -Devices' models that have non-deterministic input from external devices were -changed to write every external event into the execution log immediately. -E.g. network packets are written into the log when they arrive into the virtual -network adapter. - -All non-deterministic events are coming from these devices. But to -replay them we need to know at which moments they occur. We specify -these moments by counting the number of instructions executed between -every pair of consecutive events. - -Instruction counting --------------------- - -QEMU should work in icount mode to use record/replay feature. icount was -designed to allow deterministic execution in absence of external inputs -of the virtual machine. We also use icount to control the occurrence of the -non-deterministic events. The number of instructions elapsed from the last event -is written to the log while recording the execution. In replay mode we -can predict when to inject that event using the instruction counter. - -Timers ------- - -Timers are used to execute callbacks from different subsystems of QEMU -at the specified moments of time. There are several kinds of timers: - * Real time clock. Based on host time and used only for callbacks that - do not change the virtual machine state. For this reason real time - clock and timers does not affect deterministic replay at all. - * Virtual clock. These timers run only during the emulation. In icount - mode virtual clock value is calculated using executed instructions counter. - That is why it is completely deterministic and does not have to be recorded. - * Host clock. This clock is used by device models that simulate real time - sources (e.g. real time clock chip). Host clock is the one of the sources - of non-determinism. Host clock read operations should be logged to - make the execution deterministic. - * Virtual real time clock. This clock is similar to real time clock but - it is used only for increasing virtual clock while virtual machine is - sleeping. Due to its nature it is also non-deterministic as the host clock - and has to be logged too. - -Checkpoints ------------ - -Replaying of the execution of virtual machine is bound by sources of -non-determinism. These are inputs from clock and peripheral devices, -and QEMU thread scheduling. Thread scheduling affect on processing events -from timers, asynchronous input-output, and bottom halves. - -Invocations of timers are coupled with clock reads and changing the state -of the virtual machine. Reads produce non-deterministic data taken from -host clock. And VM state changes should preserve their order. Their relative -order in replay mode must replicate the order of callbacks in record mode. -To preserve this order we use checkpoints. When a specific clock is processed -in record mode we save to the log special "checkpoint" event. -Checkpoints here do not refer to virtual machine snapshots. They are just -record/replay events used for synchronization. - -QEMU in replay mode will try to invoke timers processing in random moment -of time. That's why we do not process a group of timers until the checkpoint -event will be read from the log. Such an event allows synchronizing CPU -execution and timer events. - -Two other checkpoints govern the "warping" of the virtual clock. -While the virtual machine is idle, the virtual clock increments at -1 ns per *real time* nanosecond. This is done by setting up a timer -(called the warp timer) on the virtual real time clock, so that the -timer fires at the next deadline of the virtual clock; the virtual clock -is then incremented (which is called "warping" the virtual clock) as -soon as the timer fires or the CPUs need to go out of the idle state. -Two functions are used for this purpose; because these actions change -virtual machine state and must be deterministic, each of them creates a -checkpoint. icount_start_warp_timer checks if the CPUs are idle and if so -starts accounting real time to virtual clock. icount_account_warp_timer -is called when the CPUs get an interrupt or when the warp timer fires, -and it warps the virtual clock by the amount of real time that has passed -since icount_start_warp_timer. - -Bottom halves -------------- - -Disk I/O events are completely deterministic in our model, because -in both record and replay modes we start virtual machine from the same -disk state. But callbacks that virtual disk controller uses for reading and -writing the disk may occur at different moments of time in record and replay -modes. - -Reading and writing requests are created by CPU thread of QEMU. Later these -requests proceed to block layer which creates "bottom halves". Bottom -halves consist of callback and its parameters. They are processed when -main loop locks the global mutex. These locks are not synchronized with -replaying process because main loop also processes the events that do not -affect the virtual machine state (like user interaction with monitor). - -That is why we had to implement saving and replaying bottom halves callbacks -synchronously to the CPU execution. When the callback is about to execute -it is added to the queue in the replay module. This queue is written to the -log when its callbacks are executed. In replay mode callbacks are not processed -until the corresponding event is read from the events log file. - -Sometimes the block layer uses asynchronous callbacks for its internal purposes -(like reading or writing VM snapshots or disk image cluster tables). In this -case bottom halves are not marked as "replayable" and do not saved -into the log. - -Block devices -------------- - -Block devices record/replay module intercepts calls of -bdrv coroutine functions at the top of block drivers stack. -To record and replay block operations the drive must be configured -as following: - -drive file=disk.qcow2,if=none,snapshot,id=img-direct - -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay - -device ide-hd,drive=img-blkreplay - -blkreplay driver should be inserted between disk image and virtual driver -controller. Therefore all disk requests may be recorded and replayed. - -All block completion operations are added to the queue in the coroutines. -Queue is flushed at checkpoints and information about processed requests -is recorded to the log. In replay phase the queue is matched with -events read from the log. Therefore block devices requests are processed -deterministically. - -Snapshotting ------------- - -New VM snapshots may be created in replay mode. They can be used later -to recover the desired VM state. All VM states created in replay mode -are associated with the moment of time in the replay scenario. -After recovering the VM state replay will start from that position. - -Default starting snapshot name may be specified with icount field -rrsnapshot as follows: - -icount shift=7,rr=record,rrfile=replay.bin,rrsnapshot=snapshot_name - -This snapshot is created at start of recording and restored at start -of replaying. It also can be loaded while replaying to roll back -the execution. - -'snapshot' flag of the disk image must be removed to save the snapshots -in the overlay (or original image) instead of using the temporary overlay. - -drive file=disk.ovl,if=none,id=img-direct - -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay - -device ide-hd,drive=img-blkreplay - -Use QEMU monitor to create additional snapshots. 'savevm <name>' command -created the snapshot and 'loadvm <name>' restores it. To prevent corruption -of the original disk image, use overlay files linked to the original images. -Therefore all new snapshots (including the starting one) will be saved in -overlays and the original image remains unchanged. - -When you need to use snapshots with diskless virtual machine, -it must be started with 'orphan' qcow2 image. This image will be used -for storing VM snapshots. Here is the example of the command line for this: - - qemu-system-i386 -icount shift=3,rr=replay,rrfile=record.bin,rrsnapshot=init \ - -net none -drive file=empty.qcow2,if=none,id=rr - -empty.qcow2 drive does not connected to any virtual block device and used -for VM snapshots only. - -Network devices ---------------- - -Record and replay for network interactions is performed with the network filter. -Each backend must have its own instance of the replay filter as follows: - -netdev user,id=net1 -device rtl8139,netdev=net1 - -object filter-replay,id=replay,netdev=net1 - -Replay network filter is used to record and replay network packets. While -recording the virtual machine this filter puts all packets coming from -the outer world into the log. In replay mode packets from the log are -injected into the network device. All interactions with network backend -in replay mode are disabled. - -Audio devices -------------- - -Audio data is recorded and replay automatically. The command line for recording -and replaying must contain identical specifications of audio hardware, e.g.: - -soundhw ac97 - -Serial ports ------------- - -Serial ports input is recorded and replay automatically. The command lines -for recording and replaying must contain identical number of ports in record -and replay modes, but their backends may differ. -E.g., '-serial stdio' in record mode, and '-serial null' in replay mode. - -Reverse debugging ------------------ - -Reverse debugging allows "executing" the program in reverse direction. -GDB remote protocol supports "reverse step" and "reverse continue" -commands. The first one steps single instruction backwards in time, -and the second one finds the last breakpoint in the past. - -Recorded executions may be used to enable reverse debugging. QEMU can't -execute the code in backwards direction, but can load a snapshot and -replay forward to find the desired position or breakpoint. - -The following GDB commands are supported: - - reverse-stepi (or rsi) - step one instruction backwards - - reverse-continue (or rc) - find last breakpoint in the past - -Reverse step loads the nearest snapshot and replays the execution until -the required instruction is met. - -Reverse continue may include several passes of examining the execution -between the snapshots. Each of the passes include the following steps: - 1. loading the snapshot - 2. replaying to examine the breakpoints - 3. if breakpoint or watchpoint was met - - loading the snapshot again - - replaying to the required breakpoint - 4. else - - proceeding to the p.1 with the earlier snapshot - -Therefore usage of the reverse debugging requires at least one snapshot -created in advance. This can be done by omitting 'snapshot' option -for the block drives and adding 'rrsnapshot' for both record and replay -command lines. -See the "Snapshotting" section to learn more about running record/replay -and creating the snapshot in these modes. - -Replay log format ------------------ - -Record/replay log consists of the header and the sequence of execution -events. The header includes 4-byte replay version id and 8-byte reserved -field. Version is updated every time replay log format changes to prevent -using replay log created by another build of qemu. - -The sequence of the events describes virtual machine state changes. -It includes all non-deterministic inputs of VM, synchronization marks and -instruction counts used to correctly inject inputs at replay. - -Synchronization marks (checkpoints) are used for synchronizing qemu threads -that perform operations with virtual hardware. These operations may change -system's state (e.g., change some register or generate interrupt) and -therefore should execute synchronously with CPU thread. - -Every event in the log includes 1-byte event id and optional arguments. -When argument is an array, it is stored as 4-byte array length -and corresponding number of bytes with data. -Here is the list of events that are written into the log: - - - EVENT_INSTRUCTION. Instructions executed since last event. - Argument: 4-byte number of executed instructions. - - EVENT_INTERRUPT. Used to synchronize interrupt processing. - - EVENT_EXCEPTION. Used to synchronize exception handling. - - EVENT_ASYNC. This is a group of events. They are always processed - together with checkpoints. When such an event is generated, it is - stored in the queue and processed only when checkpoint occurs. - Every such event is followed by 1-byte checkpoint id and 1-byte - async event id from the following list: - - REPLAY_ASYNC_EVENT_BH. Bottom-half callback. This event synchronizes - callbacks that affect virtual machine state, but normally called - asynchronously. - Argument: 8-byte operation id. - - REPLAY_ASYNC_EVENT_INPUT. Input device event. Contains - parameters of keyboard and mouse input operations - (key press/release, mouse pointer movement). - Arguments: 9-16 bytes depending of input event. - - REPLAY_ASYNC_EVENT_INPUT_SYNC. Internal input synchronization event. - - REPLAY_ASYNC_EVENT_CHAR_READ. Character (e.g., serial port) device input - initiated by the sender. - Arguments: 1-byte character device id. - Array with bytes were read. - - REPLAY_ASYNC_EVENT_BLOCK. Block device operation. Used to synchronize - operations with disk and flash drives with CPU. - Argument: 8-byte operation id. - - REPLAY_ASYNC_EVENT_NET. Incoming network packet. - Arguments: 1-byte network adapter id. - 4-byte packet flags. - Array with packet bytes. - - EVENT_SHUTDOWN. Occurs when user sends shutdown event to qemu, - e.g., by closing the window. - - EVENT_CHAR_WRITE. Used to synchronize character output operations. - Arguments: 4-byte output function return value. - 4-byte offset in the output array. - - EVENT_CHAR_READ_ALL. Used to synchronize character input operations, - initiated by qemu. - Argument: Array with bytes that were read. - - EVENT_CHAR_READ_ALL_ERROR. Unsuccessful character input operation, - initiated by qemu. - Argument: 4-byte error code. - - EVENT_CLOCK + clock_id. Group of events for host clock read operations. - Argument: 8-byte clock value. - - EVENT_CHECKPOINT + checkpoint_id. Checkpoint for synchronization of - CPU, internal threads, and asynchronous input events. May be followed - by one or more EVENT_ASYNC events. - - EVENT_END. Last event in the log. diff --git a/docs/system/index.rst b/docs/system/index.rst index 23e30e26e5..e3695649c5 100644 --- a/docs/system/index.rst +++ b/docs/system/index.rst @@ -27,6 +27,7 @@ or Hypervisor.Framework. secrets authz gdb + replay managed-startup bootindex cpu-hotplug diff --git a/docs/system/replay.rst b/docs/system/replay.rst new file mode 100644 index 0000000000..3105327423 --- /dev/null +++ b/docs/system/replay.rst @@ -0,0 +1,237 @@ +.. _replay: + +.. + Copyright (c) 2010-2022 Institute for System Programming + of the Russian Academy of Sciences. + + This work is licensed under the terms of the GNU GPL, version 2 or later. + See the COPYING file in the top-level directory. + +Record/replay +============= + +Record/replay functions are used for the deterministic replay of qemu execution. +Execution recording writes a non-deterministic events log, which can be later +used for replaying the execution anywhere and for unlimited number of times. +It also supports checkpointing for faster rewind to the specific replay moment. +Execution replaying reads the log and replays all non-deterministic events +including external input, hardware clocks, and interrupts. + +Deterministic replay has the following features: + + * Deterministically replays whole system execution and all contents of + the memory, state of the hardware devices, clocks, and screen of the VM. + * Writes execution log into the file for later replaying for multiple times + on different machines. + * Supports i386, x86_64, ARM, AArch64, Risc-V, MIPS, MIPS64, S390X, Alpha, + PowerPC, PowerPC64, M68000, Microblaze, OpenRISC, Nios II, SPARC, + and Xtensa hardware platforms. + * Performs deterministic replay of all operations with keyboard and mouse + input devices, serial ports, and network. + +Usage of the record/replay: + + * First, record the execution with the following command line: + + .. parsed-literal:: + |qemu_system| \\ + -icount shift=auto,rr=record,rrfile=replay.bin \\ + -drive file=disk.qcow2,if=none,snapshot,id=img-direct \\ + -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay \\ + -device ide-hd,drive=img-blkreplay \\ + -netdev user,id=net1 -device rtl8139,netdev=net1 \\ + -object filter-replay,id=replay,netdev=net1 + + * After recording, you can replay it by using another command line: + + .. parsed-literal:: + |qemu_system| \\ + -icount shift=auto,rr=replay,rrfile=replay.bin \\ + -drive file=disk.qcow2,if=none,snapshot,id=img-direct \\ + -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay \\ + -device ide-hd,drive=img-blkreplay \\ + -netdev user,id=net1 -device rtl8139,netdev=net1 \\ + -object filter-replay,id=replay,netdev=net1 + + The only difference with recording is changing the rr option + from record to replay. + * Block device images are not actually changed in the recording mode, + because all of the changes are written to the temporary overlay file. + This behavior is enabled by using blkreplay driver. It should be used + for every enabled block device, as described in :ref:`block-label` section. + * ``-net none`` option should be specified when network is not used, + because QEMU adds network card by default. When network is needed, + it should be configured explicitly with replay filter, as described + in :ref:`network-label` section. + * Interaction with audio devices and serial ports are recorded and replayed + automatically when such devices are enabled. + +Core idea +--------- + +Record/replay system is based on saving and replaying non-deterministic +events (e.g. keyboard input) and simulating deterministic ones (e.g. reading +from HDD or memory of the VM). Saving only non-deterministic events makes +log file smaller and simulation faster. + +The following non-deterministic data from peripheral devices is saved into +the log: mouse and keyboard input, network packets, audio controller input, +serial port input, and hardware clocks (they are non-deterministic +too, because their values are taken from the host machine). Inputs from +simulated hardware, memory of VM, software interrupts, and execution of +instructions are not saved into the log, because they are deterministic and +can be replayed by simulating the behavior of virtual machine starting from +initial state. + +Instruction counting +-------------------- + +QEMU should work in icount mode to use record/replay feature. icount was +designed to allow deterministic execution in absence of external inputs +of the virtual machine. Record/replay feature is enabled through ``-icount`` +command-line option, making possible deterministic execution of the machine, +interacting with user or network. + +.. _block-label: + +Block devices +------------- + +Block devices record/replay module intercepts calls of +bdrv coroutine functions at the top of block drivers stack. +To record and replay block operations the drive must be configured +as following: + +.. parsed-literal:: + -drive file=disk.qcow2,if=none,snapshot,id=img-direct + -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay + -device ide-hd,drive=img-blkreplay + +blkreplay driver should be inserted between disk image and virtual driver +controller. Therefore all disk requests may be recorded and replayed. + +.. _snapshotting-label: + +Snapshotting +------------ + +New VM snapshots may be created in replay mode. They can be used later +to recover the desired VM state. All VM states created in replay mode +are associated with the moment of time in the replay scenario. +After recovering the VM state replay will start from that position. + +Default starting snapshot name may be specified with icount field +rrsnapshot as follows: + +.. parsed-literal:: + -icount shift=auto,rr=record,rrfile=replay.bin,rrsnapshot=snapshot_name + +This snapshot is created at start of recording and restored at start +of replaying. It also can be loaded while replaying to roll back +the execution. + +``snapshot`` flag of the disk image must be removed to save the snapshots +in the overlay (or original image) instead of using the temporary overlay. + +.. parsed-literal:: + -drive file=disk.ovl,if=none,id=img-direct + -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay + -device ide-hd,drive=img-blkreplay + +Use QEMU monitor to create additional snapshots. ``savevm <name>`` command +created the snapshot and ``loadvm <name>`` restores it. To prevent corruption +of the original disk image, use overlay files linked to the original images. +Therefore all new snapshots (including the starting one) will be saved in +overlays and the original image remains unchanged. + +When you need to use snapshots with diskless virtual machine, +it must be started with "orphan" qcow2 image. This image will be used +for storing VM snapshots. Here is the example of the command line for this: + +.. parsed-literal:: + |qemu_system| \\ + -icount shift=auto,rr=replay,rrfile=record.bin,rrsnapshot=init \\ + -net none -drive file=empty.qcow2,if=none,id=rr + +``empty.qcow2`` drive does not connected to any virtual block device and used +for VM snapshots only. + +.. _network-label: + +Network devices +--------------- + +Record and replay for network interactions is performed with the network filter. +Each backend must have its own instance of the replay filter as follows: + +.. parsed-literal:: + -netdev user,id=net1 -device rtl8139,netdev=net1 + -object filter-replay,id=replay,netdev=net1 + +Replay network filter is used to record and replay network packets. While +recording the virtual machine this filter puts all packets coming from +the outer world into the log. In replay mode packets from the log are +injected into the network device. All interactions with network backend +in replay mode are disabled. + +Audio devices +------------- + +Audio data is recorded and replay automatically. The command line for recording +and replaying must contain identical specifications of audio hardware, e.g.: + +.. parsed-literal:: + -soundhw ac97 + +Serial ports +------------ + +Serial ports input is recorded and replay automatically. The command lines +for recording and replaying must contain identical number of ports in record +and replay modes, but their backends may differ. +E.g., ``-serial stdio`` in record mode, and ``-serial null`` in replay mode. + +Reverse debugging +----------------- + +Reverse debugging allows "executing" the program in reverse direction. +GDB remote protocol supports "reverse step" and "reverse continue" +commands. The first one steps single instruction backwards in time, +and the second one finds the last breakpoint in the past. + +Recorded executions may be used to enable reverse debugging. QEMU can't +execute the code in backwards direction, but can load a snapshot and +replay forward to find the desired position or breakpoint. + +The following GDB commands are supported: + + - ``reverse-stepi`` (or ``rsi``) - step one instruction backwards + - ``reverse-continue`` (or ``rc``) - find last breakpoint in the past + +Reverse step loads the nearest snapshot and replays the execution until +the required instruction is met. + +Reverse continue may include several passes of examining the execution +between the snapshots. Each of the passes include the following steps: + + #. loading the snapshot + #. replaying to examine the breakpoints + #. if breakpoint or watchpoint was met + + * loading the snapshot again + * replaying to the required breakpoint + + #. else + + * proceeding to the p.1 with the earlier snapshot + +Therefore usage of the reverse debugging requires at least one snapshot +created. This can be done by omitting ``snapshot`` option +for the block drives and adding ``rrsnapshot`` for both record and replay +command lines. +See the :ref:`snapshotting-label` section to learn more about running record/replay +and creating the snapshot in these modes. + +When ``rrsnapshot`` is not used, then snapshot named ``start_debugging`` +created in temporary overlay. This allows using reverse debugging, but with +temporary snapshots (existing within the session). diff --git a/hw/display/qxl.c b/hw/display/qxl.c index 2db34714fb..5b10f697f1 100644 --- a/hw/display/qxl.c +++ b/hw/display/qxl.c @@ -2515,6 +2515,7 @@ static const TypeInfo qxl_primary_info = { .class_init = qxl_primary_class_init, }; module_obj("qxl-vga"); +module_kconfig(QXL); static void qxl_secondary_class_init(ObjectClass *klass, void *data) { diff --git a/hw/display/vhost-user-gpu-pci.c b/hw/display/vhost-user-gpu-pci.c index daefcf7101..d119bcae45 100644 --- a/hw/display/vhost-user-gpu-pci.c +++ b/hw/display/vhost-user-gpu-pci.c @@ -44,6 +44,7 @@ static const VirtioPCIDeviceTypeInfo vhost_user_gpu_pci_info = { .instance_init = vhost_user_gpu_pci_initfn, }; module_obj(TYPE_VHOST_USER_GPU_PCI); +module_kconfig(VHOST_USER_GPU); static void vhost_user_gpu_pci_register_types(void) { diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c index 96e56c4467..3340ef9e5f 100644 --- a/hw/display/vhost-user-gpu.c +++ b/hw/display/vhost-user-gpu.c @@ -606,6 +606,7 @@ static const TypeInfo vhost_user_gpu_info = { .class_init = vhost_user_gpu_class_init, }; module_obj(TYPE_VHOST_USER_GPU); +module_kconfig(VHOST_USER_GPU); static void vhost_user_gpu_register_types(void) { diff --git a/hw/display/vhost-user-vga.c b/hw/display/vhost-user-vga.c index 072c9c65bc..0c146080fd 100644 --- a/hw/display/vhost-user-vga.c +++ b/hw/display/vhost-user-vga.c @@ -45,6 +45,7 @@ static const VirtioPCIDeviceTypeInfo vhost_user_vga_info = { .instance_init = vhost_user_vga_inst_initfn, }; module_obj(TYPE_VHOST_USER_VGA); +module_kconfig(VHOST_USER_VGA); static void vhost_user_vga_register_types(void) { diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c index 8ba5da4312..790cec333c 100644 --- a/hw/display/virtio-gpu-base.c +++ b/hw/display/virtio-gpu-base.c @@ -260,6 +260,7 @@ static const TypeInfo virtio_gpu_base_info = { .abstract = true }; module_obj(TYPE_VIRTIO_GPU_BASE); +module_kconfig(VIRTIO_GPU); static void virtio_register_types(void) diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c index 0bca887703..e06be60dfb 100644 --- a/hw/display/virtio-gpu-gl.c +++ b/hw/display/virtio-gpu-gl.c @@ -160,6 +160,7 @@ static const TypeInfo virtio_gpu_gl_info = { .class_init = virtio_gpu_gl_class_init, }; module_obj(TYPE_VIRTIO_GPU_GL); +module_kconfig(VIRTIO_GPU); static void virtio_register_types(void) { diff --git a/hw/display/virtio-gpu-pci-gl.c b/hw/display/virtio-gpu-pci-gl.c index 99b14a0718..a2819e1ca9 100644 --- a/hw/display/virtio-gpu-pci-gl.c +++ b/hw/display/virtio-gpu-pci-gl.c @@ -47,6 +47,7 @@ static const VirtioPCIDeviceTypeInfo virtio_gpu_gl_pci_info = { .instance_init = virtio_gpu_gl_initfn, }; module_obj(TYPE_VIRTIO_GPU_GL_PCI); +module_kconfig(VIRTIO_PCI); static void virtio_gpu_gl_pci_register_types(void) { diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c index e36eee0c40..93f214ff58 100644 --- a/hw/display/virtio-gpu-pci.c +++ b/hw/display/virtio-gpu-pci.c @@ -65,6 +65,7 @@ static const TypeInfo virtio_gpu_pci_base_info = { .abstract = true }; module_obj(TYPE_VIRTIO_GPU_PCI_BASE); +module_kconfig(VIRTIO_PCI); #define TYPE_VIRTIO_GPU_PCI "virtio-gpu-pci" typedef struct VirtIOGPUPCI VirtIOGPUPCI; diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 529b5246b2..cd4a56056f 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -1452,6 +1452,7 @@ static const TypeInfo virtio_gpu_info = { .class_init = virtio_gpu_class_init, }; module_obj(TYPE_VIRTIO_GPU); +module_kconfig(VIRTIO_GPU); static void virtio_register_types(void) { diff --git a/hw/display/virtio-vga-gl.c b/hw/display/virtio-vga-gl.c index f22549097c..984faa6b39 100644 --- a/hw/display/virtio-vga-gl.c +++ b/hw/display/virtio-vga-gl.c @@ -37,6 +37,7 @@ static VirtioPCIDeviceTypeInfo virtio_vga_gl_info = { .instance_init = virtio_vga_gl_inst_initfn, }; module_obj(TYPE_VIRTIO_VGA_GL); +module_kconfig(VIRTIO_VGA); static void virtio_vga_register_types(void) { diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c index 7b55c8d0e7..c206b5da38 100644 --- a/hw/display/virtio-vga.c +++ b/hw/display/virtio-vga.c @@ -231,6 +231,7 @@ static const TypeInfo virtio_vga_base_info = { .abstract = true, }; module_obj(TYPE_VIRTIO_VGA_BASE); +module_kconfig(VIRTIO_VGA); #define TYPE_VIRTIO_VGA "virtio-vga" diff --git a/hw/s390x/virtio-ccw-gpu.c b/hw/s390x/virtio-ccw-gpu.c index 8d995fcb33..0642c5281d 100644 --- a/hw/s390x/virtio-ccw-gpu.c +++ b/hw/s390x/virtio-ccw-gpu.c @@ -69,6 +69,7 @@ static const TypeInfo virtio_ccw_gpu = { .class_init = virtio_ccw_gpu_class_init, }; module_obj(TYPE_VIRTIO_GPU_CCW); +module_kconfig(VIRTIO_CCW); static void virtio_ccw_gpu_register(void) { diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c index 6c8c0355e0..1ddf7297f6 100644 --- a/hw/usb/ccid-card-emulated.c +++ b/hw/usb/ccid-card-emulated.c @@ -613,6 +613,7 @@ static const TypeInfo emulated_card_info = { .class_init = emulated_class_initfn, }; module_obj(TYPE_EMULATED_CCID); +module_kconfig(USB); static void ccid_card_emulated_register_types(void) { diff --git a/hw/usb/ccid-card-passthru.c b/hw/usb/ccid-card-passthru.c index f530ab2565..07ee42f304 100644 --- a/hw/usb/ccid-card-passthru.c +++ b/hw/usb/ccid-card-passthru.c @@ -415,6 +415,7 @@ static const TypeInfo passthru_card_info = { .class_init = passthru_class_initfn, }; module_obj(TYPE_CCID_PASSTHRU); +module_kconfig(USB); static void ccid_card_passthru_register_types(void) { diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c index 2b35cb6cdd..28f8af8941 100644 --- a/hw/usb/host-libusb.c +++ b/hw/usb/host-libusb.c @@ -1809,6 +1809,7 @@ static const TypeInfo usb_host_dev_info = { .instance_init = usb_host_instance_init, }; module_obj(TYPE_USB_HOST_DEVICE); +module_kconfig(USB); static void usb_host_register_types(void) { diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c index 3bc4dee7fe..fd7df599bc 100644 --- a/hw/usb/redirect.c +++ b/hw/usb/redirect.c @@ -2620,6 +2620,7 @@ static const TypeInfo usbredir_dev_info = { .instance_init = usbredir_instance_init, }; module_obj(TYPE_USB_REDIR); +module_kconfig(USB); static void usbredir_register_types(void) { diff --git a/include/qemu/module.h b/include/qemu/module.h index 5fcc323b2a..bd73607104 100644 --- a/include/qemu/module.h +++ b/include/qemu/module.h @@ -135,6 +135,16 @@ void module_allow_arch(const char *arch); */ #define module_opts(name) modinfo(opts, name) +/** + * module_kconfig + * + * @name: Kconfig requirement necessary to load the module + * + * This module requires a core module that should be implemented and + * enabled in Kconfig. + */ +#define module_kconfig(name) modinfo(kconfig, name) + /* * module info database * diff --git a/include/sysemu/cpu-timers.h b/include/sysemu/cpu-timers.h index ed6ee5c46c..2e786fe7fb 100644 --- a/include/sysemu/cpu-timers.h +++ b/include/sysemu/cpu-timers.h @@ -59,6 +59,7 @@ int64_t icount_round(int64_t count); /* if the CPUs are idle, start accounting real time to virtual clock. */ void icount_start_warp_timer(void); void icount_account_warp_timer(void); +void icount_notify_exit(void); /* * CPU Ticks and Clock diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h index 0f3b0f7eac..73dee9ccdf 100644 --- a/include/sysemu/replay.h +++ b/include/sysemu/replay.h @@ -160,9 +160,14 @@ void replay_shutdown_request(ShutdownCause cause); Returns 0 in PLAY mode if checkpoint was not found. Returns 1 in all other cases. */ bool replay_checkpoint(ReplayCheckpoint checkpoint); -/*! Used to determine that checkpoint is pending. +/*! Used to determine that checkpoint or async event is pending. Does not proceed to the next event in the log. */ -bool replay_has_checkpoint(void); +bool replay_has_event(void); +/* + * Processes the async events added to the queue (while recording) + * or reads the events from the file (while replaying). + */ +void replay_async_events(void); /* Asynchronous events queue */ diff --git a/meson.build b/meson.build index bc6234c85e..d738391810 100644 --- a/meson.build +++ b/meson.build @@ -3175,14 +3175,23 @@ foreach d, list : target_modules endforeach if enable_modules - modinfo_src = custom_target('modinfo.c', - output: 'modinfo.c', - input: modinfo_files, - command: [modinfo_generate, '@INPUT@'], - capture: true) - modinfo_lib = static_library('modinfo', modinfo_src) - modinfo_dep = declare_dependency(link_whole: modinfo_lib) - softmmu_ss.add(modinfo_dep) + foreach target : target_dirs + if target.endswith('-softmmu') + config_target = config_target_mak[target] + config_devices_mak = target + '-config-devices.mak' + modinfo_src = custom_target('modinfo-' + target + '.c', + output: 'modinfo-' + target + '.c', + input: modinfo_files, + command: [modinfo_generate, '--devices', config_devices_mak, '@INPUT@'], + capture: true) + + modinfo_lib = static_library('modinfo-' + target + '.c', modinfo_src) + modinfo_dep = declare_dependency(link_with: modinfo_lib) + + arch = config_target['TARGET_NAME'] == 'sparc64' ? 'sparc64' : config_target['TARGET_BASE_ARCH'] + hw_arch[arch].add(modinfo_dep) + endif + endforeach endif nm = find_program('nm') @@ -3285,6 +3294,9 @@ foreach m : block_mods + softmmu_mods install: true, install_dir: qemu_moddir) endforeach +if emulator_modules.length() > 0 + alias_target('modules', emulator_modules) +endif softmmu_ss.add(authz, blockdev, chardev, crypto, io, qmp) common_ss.add(qom, qemuutil) diff --git a/python/qemu/qmp/util.py b/python/qemu/qmp/util.py index eaa5fc7d5f..ca6225e9cd 100644 --- a/python/qemu/qmp/util.py +++ b/python/qemu/qmp/util.py @@ -40,7 +40,9 @@ async def flush(writer: asyncio.StreamWriter) -> None: drain. The flow control limits are restored after the call is completed. """ - transport = cast(asyncio.WriteTransport, writer.transport) + transport = cast( # type: ignore[redundant-cast] + asyncio.WriteTransport, writer.transport + ) # https://github.com/python/typeshed/issues/5779 low, high = transport.get_write_buffer_limits() # type: ignore diff --git a/python/setup.cfg b/python/setup.cfg index e877ea5647..c2c61c7519 100644 --- a/python/setup.cfg +++ b/python/setup.cfg @@ -79,6 +79,7 @@ strict = True python_version = 3.6 warn_unused_configs = True namespace_packages = True +warn_unused_ignores = False [mypy-qemu.utils.qom_fuse] # fusepy has no type stubs: diff --git a/qga/meson.build b/qga/meson.build index 619ff095bc..65c1e93846 100644 --- a/qga/meson.build +++ b/qga/meson.build @@ -125,7 +125,7 @@ if targetos == 'windows' wixl, '-o', '@OUTPUT0@', '@INPUT0@', qemu_ga_msi_arch[cpu], qemu_ga_msi_vss, - '-D', 'BUILD_DIR=' + meson.build_root(), + '-D', 'BUILD_DIR=' + meson.project_build_root(), '-D', 'BIN_DIR=' + glib.get_variable('bindir'), '-D', 'QEMU_GA_VERSION=' + config_host['QEMU_GA_VERSION'], '-D', 'QEMU_GA_MANUFACTURER=' + config_host['QEMU_GA_MANUFACTURER'], diff --git a/replay/replay-events.c b/replay/replay-events.c index ac47c89834..af0721cc1a 100644 --- a/replay/replay-events.c +++ b/replay/replay-events.c @@ -170,13 +170,12 @@ void replay_block_event(QEMUBH *bh, uint64_t id) } } -static void replay_save_event(Event *event, int checkpoint) +static void replay_save_event(Event *event) { if (replay_mode != REPLAY_MODE_PLAY) { /* put the event into the file */ - replay_put_event(EVENT_ASYNC); - replay_put_byte(checkpoint); - replay_put_byte(event->event_kind); + g_assert(event->event_kind < REPLAY_ASYNC_COUNT); + replay_put_event(EVENT_ASYNC + event->event_kind); /* save event-specific data */ switch (event->event_kind) { @@ -206,36 +205,25 @@ static void replay_save_event(Event *event, int checkpoint) } /* Called with replay mutex locked */ -void replay_save_events(int checkpoint) +void replay_save_events(void) { g_assert(replay_mutex_locked()); - g_assert(checkpoint != CHECKPOINT_CLOCK_WARP_START); - g_assert(checkpoint != CHECKPOINT_CLOCK_VIRTUAL); while (!QTAILQ_EMPTY(&events_list)) { Event *event = QTAILQ_FIRST(&events_list); - replay_save_event(event, checkpoint); + replay_save_event(event); replay_run_event(event); QTAILQ_REMOVE(&events_list, event, events); g_free(event); } } -static Event *replay_read_event(int checkpoint) +static Event *replay_read_event(void) { Event *event; - if (replay_state.read_event_kind == -1) { - replay_state.read_event_checkpoint = replay_get_byte(); - replay_state.read_event_kind = replay_get_byte(); - replay_state.read_event_id = -1; - replay_check_error(); - } - - if (checkpoint != replay_state.read_event_checkpoint) { - return NULL; - } + ReplayAsyncEventKind event_kind = replay_state.data_kind - EVENT_ASYNC; /* Events that has not to be in the queue */ - switch (replay_state.read_event_kind) { + switch (event_kind) { case REPLAY_ASYNC_EVENT_BH: case REPLAY_ASYNC_EVENT_BH_ONESHOT: if (replay_state.read_event_id == -1) { @@ -244,17 +232,17 @@ static Event *replay_read_event(int checkpoint) break; case REPLAY_ASYNC_EVENT_INPUT: event = g_new0(Event, 1); - event->event_kind = replay_state.read_event_kind; + event->event_kind = event_kind; event->opaque = replay_read_input_event(); return event; case REPLAY_ASYNC_EVENT_INPUT_SYNC: event = g_new0(Event, 1); - event->event_kind = replay_state.read_event_kind; + event->event_kind = event_kind; event->opaque = 0; return event; case REPLAY_ASYNC_EVENT_CHAR_READ: event = g_new0(Event, 1); - event->event_kind = replay_state.read_event_kind; + event->event_kind = event_kind; event->opaque = replay_event_char_read_load(); return event; case REPLAY_ASYNC_EVENT_BLOCK: @@ -264,18 +252,17 @@ static Event *replay_read_event(int checkpoint) break; case REPLAY_ASYNC_EVENT_NET: event = g_new0(Event, 1); - event->event_kind = replay_state.read_event_kind; + event->event_kind = event_kind; event->opaque = replay_event_net_load(); return event; default: - error_report("Unknown ID %d of replay event", - replay_state.read_event_kind); + error_report("Unknown ID %d of replay event", event_kind); exit(1); break; } QTAILQ_FOREACH(event, &events_list, events) { - if (event->event_kind == replay_state.read_event_kind + if (event->event_kind == event_kind && (replay_state.read_event_id == -1 || replay_state.read_event_id == event->id)) { break; @@ -284,26 +271,23 @@ static Event *replay_read_event(int checkpoint) if (event) { QTAILQ_REMOVE(&events_list, event, events); - } else { - return NULL; } - /* Read event-specific data */ - return event; } /* Called with replay mutex locked */ -void replay_read_events(int checkpoint) +void replay_read_events(void) { g_assert(replay_mutex_locked()); - while (replay_state.data_kind == EVENT_ASYNC) { - Event *event = replay_read_event(checkpoint); + while (replay_state.data_kind >= EVENT_ASYNC + && replay_state.data_kind <= EVENT_ASYNC_LAST) { + Event *event = replay_read_event(); if (!event) { break; } replay_finish_event(); - replay_state.read_event_kind = -1; + replay_state.read_event_id = -1; replay_run_event(event); g_free(event); @@ -312,7 +296,7 @@ void replay_read_events(int checkpoint) void replay_init_events(void) { - replay_state.read_event_kind = -1; + replay_state.read_event_id = -1; } void replay_finish_events(void) diff --git a/replay/replay-internal.h b/replay/replay-internal.h index 97649ed8d7..89e377be90 100644 --- a/replay/replay-internal.h +++ b/replay/replay-internal.h @@ -12,6 +12,19 @@ * */ +/* Asynchronous events IDs */ + +typedef enum ReplayAsyncEventKind { + REPLAY_ASYNC_EVENT_BH, + REPLAY_ASYNC_EVENT_BH_ONESHOT, + REPLAY_ASYNC_EVENT_INPUT, + REPLAY_ASYNC_EVENT_INPUT_SYNC, + REPLAY_ASYNC_EVENT_CHAR_READ, + REPLAY_ASYNC_EVENT_BLOCK, + REPLAY_ASYNC_EVENT_NET, + REPLAY_ASYNC_COUNT +} ReplayAsyncEventKind; + /* Any changes to order/number of events will need to bump REPLAY_VERSION */ enum ReplayEvents { /* for instruction event */ @@ -22,6 +35,7 @@ enum ReplayEvents { EVENT_EXCEPTION, /* for async events */ EVENT_ASYNC, + EVENT_ASYNC_LAST = EVENT_ASYNC + REPLAY_ASYNC_COUNT - 1, /* for shutdown requests, range allows recovery of ShutdownCause */ EVENT_SHUTDOWN, EVENT_SHUTDOWN_LAST = EVENT_SHUTDOWN + SHUTDOWN_CAUSE__MAX, @@ -49,21 +63,6 @@ enum ReplayEvents { EVENT_COUNT }; -/* Asynchronous events IDs */ - -enum ReplayAsyncEventKind { - REPLAY_ASYNC_EVENT_BH, - REPLAY_ASYNC_EVENT_BH_ONESHOT, - REPLAY_ASYNC_EVENT_INPUT, - REPLAY_ASYNC_EVENT_INPUT_SYNC, - REPLAY_ASYNC_EVENT_CHAR_READ, - REPLAY_ASYNC_EVENT_BLOCK, - REPLAY_ASYNC_EVENT_NET, - REPLAY_ASYNC_COUNT -}; - -typedef enum ReplayAsyncEventKind ReplayAsyncEventKind; - typedef struct ReplayState { /*! Cached clock values. */ int64_t cached_clock[REPLAY_CLOCK_COUNT]; @@ -83,12 +82,8 @@ typedef struct ReplayState { uint64_t block_request_id; /*! Prior value of the host clock */ uint64_t host_clock_last; - /*! Asynchronous event type read from the log */ - int32_t read_event_kind; /*! Asynchronous event id read from the log */ uint64_t read_event_id; - /*! Asynchronous event checkpoint id read from the log */ - int32_t read_event_checkpoint; } ReplayState; extern ReplayState replay_state; @@ -152,9 +147,9 @@ void replay_finish_events(void); /*! Returns true if there are any unsaved events in the queue */ bool replay_has_events(void); /*! Saves events from queue into the file */ -void replay_save_events(int checkpoint); +void replay_save_events(void); /*! Read events from the file into the input queue */ -void replay_read_events(int checkpoint); +void replay_read_events(void); /*! Adds specified async event to the queue */ void replay_add_event(ReplayAsyncEventKind event_kind, void *opaque, void *opaque2, uint64_t id); diff --git a/replay/replay-snapshot.c b/replay/replay-snapshot.c index e8767a1937..10a7cf7992 100644 --- a/replay/replay-snapshot.c +++ b/replay/replay-snapshot.c @@ -59,9 +59,7 @@ static const VMStateDescription vmstate_replay = { VMSTATE_UINT32(has_unread_data, ReplayState), VMSTATE_UINT64(file_offset, ReplayState), VMSTATE_UINT64(block_request_id, ReplayState), - VMSTATE_INT32(read_event_kind, ReplayState), VMSTATE_UINT64(read_event_id, ReplayState), - VMSTATE_INT32(read_event_checkpoint, ReplayState), VMSTATE_END_OF_LIST() }, }; diff --git a/replay/replay.c b/replay/replay.c index 6df2abc18c..4c396bb376 100644 --- a/replay/replay.c +++ b/replay/replay.c @@ -22,7 +22,7 @@ /* Current version of the replay mechanism. Increase it when file format changes. */ -#define REPLAY_VERSION 0xe0200a +#define REPLAY_VERSION 0xe0200c /* Size of replay log header */ #define HEADER_SIZE (sizeof(uint32_t) + sizeof(uint64_t)) @@ -171,64 +171,49 @@ void replay_shutdown_request(ShutdownCause cause) bool replay_checkpoint(ReplayCheckpoint checkpoint) { - bool res = false; - static bool in_checkpoint; assert(EVENT_CHECKPOINT + checkpoint <= EVENT_CHECKPOINT_LAST); - if (!replay_file) { - return true; - } - - if (in_checkpoint) { - /* - Recursion occurs when HW event modifies timers. - Prevent performing icount warp in this case and - wait for another invocation of the checkpoint. - */ - g_assert(replay_mode == REPLAY_MODE_PLAY); - return false; - } - in_checkpoint = true; - replay_save_instructions(); if (replay_mode == REPLAY_MODE_PLAY) { g_assert(replay_mutex_locked()); if (replay_next_event_is(EVENT_CHECKPOINT + checkpoint)) { replay_finish_event(); - } else if (replay_state.data_kind != EVENT_ASYNC) { - res = false; - goto out; + } else { + return false; } - replay_read_events(checkpoint); - /* replay_read_events may leave some unread events. - Return false if not all of the events associated with - checkpoint were processed */ - res = replay_state.data_kind != EVENT_ASYNC; } else if (replay_mode == REPLAY_MODE_RECORD) { g_assert(replay_mutex_locked()); replay_put_event(EVENT_CHECKPOINT + checkpoint); - /* This checkpoint belongs to several threads. - Processing events from different threads is - non-deterministic */ - if (checkpoint != CHECKPOINT_CLOCK_WARP_START - /* FIXME: this is temporary fix, other checkpoints - may also be invoked from the different threads someday. - Asynchronous event processing should be refactored - to create additional replay event kind which is - nailed to the one of the threads and which processes - the event queue. */ - && checkpoint != CHECKPOINT_CLOCK_VIRTUAL) { - replay_save_events(checkpoint); - } - res = true; } -out: - in_checkpoint = false; - return res; + return true; +} + +void replay_async_events(void) +{ + static bool processing = false; + /* + * If we are already processing the events, recursion may occur + * in case of incorrect implementation when HW event modifies timers. + * Timer modification may invoke the icount warp, event processing, + * and cause the recursion. + */ + g_assert(!processing); + processing = true; + + replay_save_instructions(); + + if (replay_mode == REPLAY_MODE_PLAY) { + g_assert(replay_mutex_locked()); + replay_read_events(); + } else if (replay_mode == REPLAY_MODE_RECORD) { + g_assert(replay_mutex_locked()); + replay_save_events(); + } + processing = false; } -bool replay_has_checkpoint(void) +bool replay_has_event(void) { bool res = false; if (replay_mode == REPLAY_MODE_PLAY) { @@ -236,6 +221,8 @@ bool replay_has_checkpoint(void) replay_account_executed_instructions(); res = EVENT_CHECKPOINT <= replay_state.data_kind && replay_state.data_kind <= EVENT_CHECKPOINT_LAST; + res = res || (EVENT_ASYNC <= replay_state.data_kind + && replay_state.data_kind <= EVENT_ASYNC_LAST); } return res; } @@ -387,9 +374,8 @@ void replay_finish(void) g_free(replay_snapshot); replay_snapshot = NULL; - replay_mode = REPLAY_MODE_NONE; - replay_finish_events(); + replay_mode = REPLAY_MODE_NONE; } void replay_add_blocker(Error *reason) diff --git a/scripts/device-crash-test b/scripts/device-crash-test index a203b3fdea..73bcb98693 100755 --- a/scripts/device-crash-test +++ b/scripts/device-crash-test @@ -33,10 +33,18 @@ import re import random import argparse from itertools import chain - -sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python')) -from qemu.machine import QEMUMachine -from qemu.qmp import ConnectError +from pathlib import Path + +try: + from qemu.machine import QEMUMachine + from qemu.qmp import ConnectError +except ModuleNotFoundError as exc: + path = Path(__file__).resolve() + print(f"Module '{exc.name}' not found.") + print(" Try 'make check-venv' from your build directory,") + print(" and then one way to run this script is like so:") + print(f' > $builddir/tests/venv/bin/python3 "{path}"') + sys.exit(1) logger = logging.getLogger('device-crash-test') dbg = logger.debug diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 731e5ea1cf..00ea4d8cd1 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -156,13 +156,13 @@ meson_options_help() { printf "%s\n" ' vhost-kernel vhost kernel backend support' printf "%s\n" ' vhost-net vhost-net kernel acceleration support' printf "%s\n" ' vhost-user vhost-user backend support' - printf "%s\n" ' vmnet vmnet.framework network backend support' printf "%s\n" ' vhost-user-blk-server' printf "%s\n" ' build vhost-user-blk server' printf "%s\n" ' vhost-vdpa vhost-vdpa kernel backend support' printf "%s\n" ' virglrenderer virgl rendering support' printf "%s\n" ' virtfs virtio-9p support' printf "%s\n" ' virtiofsd build virtiofs daemon (virtiofsd)' + printf "%s\n" ' vmnet vmnet.framework network backend support' printf "%s\n" ' vnc VNC server' printf "%s\n" ' vnc-jpeg JPEG lossy compression for VNC server' printf "%s\n" ' vnc-sasl SASL authentication for VNC server' @@ -430,6 +430,8 @@ _meson_option_parse() { --disable-virtfs) printf "%s" -Dvirtfs=disabled ;; --enable-virtiofsd) printf "%s" -Dvirtiofsd=enabled ;; --disable-virtiofsd) printf "%s" -Dvirtiofsd=disabled ;; + --enable-vmnet) printf "%s" -Dvmnet=enabled ;; + --disable-vmnet) printf "%s" -Dvmnet=disabled ;; --enable-vnc) printf "%s" -Dvnc=enabled ;; --disable-vnc) printf "%s" -Dvnc=disabled ;; --enable-vnc-jpeg) printf "%s" -Dvnc_jpeg=enabled ;; diff --git a/scripts/modinfo-generate.py b/scripts/modinfo-generate.py index f559eed007..b1538fcced 100755 --- a/scripts/modinfo-generate.py +++ b/scripts/modinfo-generate.py @@ -32,7 +32,7 @@ def parse_line(line): continue return (kind, data) -def generate(name, lines): +def generate(name, lines, enabled): arch = "" objs = [] deps = [] @@ -48,6 +48,14 @@ def generate(name, lines): opts.append(data) elif kind == 'arch': arch = data; + elif kind == 'kconfig': + # don't add a module which dependency is not enabled + # in kconfig + if data.strip() not in enabled: + print(" /* module {} isn't enabled in Kconfig. */" + .format(data.strip())) + print("/* },{ */") + return None else: print("unknown:", kind) exit(1) @@ -58,8 +66,8 @@ def generate(name, lines): print_array("objs", objs) print_array("deps", deps) print_array("opts", opts) - print("},{"); - return deps + print("},{") + return {dep.strip('" ') for dep in deps} def print_pre(): print("/* generated by scripts/modinfo-generate.py */") @@ -72,23 +80,38 @@ def print_post(): print("}};") def main(args): - deps = {} + if len(args) < 3 or args[0] != '--devices': + print('Expected: modinfo-generate.py --devices ' + 'config-device.mak [modinfo files]', file=sys.stderr) + exit(1) + + # get all devices enabled in kconfig, from *-config-device.mak + enabled = set() + with open(args[1]) as file: + for line in file.readlines(): + config = line.split('=') + if config[1].rstrip() == 'y': + enabled.add(config[0][7:]) # remove CONFIG_ + + deps = set() + modules = set() print_pre() - for modinfo in args: + for modinfo in args[2:]: with open(modinfo) as f: lines = f.readlines() print(" /* %s */" % modinfo) - (basename, ext) = os.path.splitext(modinfo) - deps[basename] = generate(basename, lines) + (basename, _) = os.path.splitext(modinfo) + moddeps = generate(basename, lines, enabled) + if moddeps is not None: + modules.add(basename) + deps.update(moddeps) print_post() - flattened_deps = {flat.strip('" ') for dep in deps.values() for flat in dep} error = False - for dep in flattened_deps: - if dep not in deps.keys(): - print("Dependency {} cannot be satisfied".format(dep), - file=sys.stderr) - error = True + for dep in deps.difference(modules): + print("Dependency {} cannot be satisfied".format(dep), + file=sys.stderr) + error = True if error: exit(1) diff --git a/softmmu/icount.c b/softmmu/icount.c index 5ca271620d..4504433e16 100644 --- a/softmmu/icount.c +++ b/softmmu/icount.c @@ -322,7 +322,7 @@ void icount_start_warp_timer(void) * to vCPU was processed in advance and vCPU went to sleep. * Therefore we have to wake it up for doing someting. */ - if (replay_has_checkpoint()) { + if (replay_has_event()) { qemu_clock_notify(QEMU_CLOCK_VIRTUAL); } return; @@ -404,6 +404,8 @@ void icount_account_warp_timer(void) return; } + replay_async_events(); + /* warp clock deterministically in record/replay mode */ if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) { return; @@ -486,3 +488,11 @@ void icount_configure(QemuOpts *opts, Error **errp) qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + NANOSECONDS_PER_SECOND / 10); } + +void icount_notify_exit(void) +{ + if (icount_enabled() && current_cpu) { + qemu_cpu_kick(current_cpu); + qemu_clock_notify(QEMU_CLOCK_VIRTUAL); + } +} diff --git a/stubs/icount.c b/stubs/icount.c index f13c43568b..6df8c2bf7d 100644 --- a/stubs/icount.c +++ b/stubs/icount.c @@ -43,3 +43,7 @@ void icount_account_warp_timer(void) { abort(); } + +void icount_notify_exit(void) +{ +} diff --git a/target/i386/cpu.c b/target/i386/cpu.c index bb6a5dd498..6a57ef13af 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5284,10 +5284,22 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, /* cache info: needed for Core compatibility */ if (cpu->cache_info_passthrough) { x86_cpu_get_cache_cpuid(index, count, eax, ebx, ecx, edx); - /* QEMU gives out its own APIC IDs, never pass down bits 31..26. */ - *eax &= ~0xFC000000; - if ((*eax & 31) && cs->nr_cores > 1) { - *eax |= (cs->nr_cores - 1) << 26; + /* + * QEMU has its own number of cores/logical cpus, + * set 24..14, 31..26 bit to configured values + */ + if (*eax & 31) { + int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14); + int vcpus_per_socket = env->nr_dies * cs->nr_cores * + cs->nr_threads; + if (cs->nr_cores > 1) { + *eax &= ~0xFC000000; + *eax |= (pow2ceil(cs->nr_cores) - 1) << 26; + } + if (host_vcpus_per_cache > vcpus_per_socket) { + *eax &= ~0x3FFC000; + *eax |= (pow2ceil(vcpus_per_socket) - 1) << 14; + } } } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { *eax = *ebx = *ecx = *edx = 0; @@ -5559,7 +5571,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, * supports. Features can be further restricted by userspace, but not * made more permissive. */ - x86_cpu_get_supported_cpuid(0x12, index, eax, ebx, ecx, edx); + x86_cpu_get_supported_cpuid(0x12, count, eax, ebx, ecx, edx); if (count == 0) { *eax &= env->features[FEAT_SGX_12_0_EAX]; diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c index e1b6d88683..48feba7e75 100644 --- a/target/i386/tcg/sysemu/excp_helper.c +++ b/target/i386/tcg/sysemu/excp_helper.c @@ -359,6 +359,7 @@ static int handle_mmu_fault(CPUState *cs, vaddr addr, int size, CPUX86State *env = &cpu->env; int error_code = PG_ERROR_OK; int pg_mode, prot, page_size; + int32_t a20_mask; hwaddr paddr; hwaddr vaddr; @@ -368,7 +369,8 @@ static int handle_mmu_fault(CPUState *cs, vaddr addr, int size, #endif if (!(env->cr[0] & CR0_PG_MASK)) { - paddr = addr; + a20_mask = x86_get_a20_mask(env); + paddr = addr & a20_mask; #ifdef TARGET_X86_64 if (!(env->hflags & HF_LMA_MASK)) { /* Without long mode we can only address 32bits in real mode */ diff --git a/tests/Makefile.include b/tests/Makefile.include index 6a1688e33e..3accb83b13 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -3,28 +3,28 @@ .PHONY: check-help check-help: @echo "Regression testing targets:" - @echo " $(MAKE) check Run block, qapi-schema, unit, softfloat, qtest and decodetree tests" - @echo " $(MAKE) bench Run speed tests" + @echo " $(MAKE) check Run block, qapi-schema, unit, softfloat, qtest and decodetree tests" + @echo " $(MAKE) bench Run speed tests" @echo @echo "Individual test suites:" - @echo " $(MAKE) check-qtest-TARGET Run qtest tests for given target" - @echo " $(MAKE) check-qtest Run qtest tests" - @echo " $(MAKE) check-unit Run qobject tests" - @echo " $(MAKE) check-qapi-schema Run QAPI schema tests" - @echo " $(MAKE) check-block Run block tests" + @echo " $(MAKE) check-qtest-TARGET Run qtest tests for given target" + @echo " $(MAKE) check-qtest Run qtest tests" + @echo " $(MAKE) check-unit Run qobject tests" + @echo " $(MAKE) check-qapi-schema Run QAPI schema tests" + @echo " $(MAKE) check-block Run block tests" ifneq ($(filter $(all-check-targets), check-softfloat),) - @echo " $(MAKE) check-tcg Run TCG tests" - @echo " $(MAKE) check-softfloat Run FPU emulation tests" + @echo " $(MAKE) check-tcg Run TCG tests" + @echo " $(MAKE) check-softfloat Run FPU emulation tests" endif - @echo " $(MAKE) check-avocado Run avocado (integration) tests for currently configured targets" + @echo " $(MAKE) check-avocado Run avocado (integration) tests for currently configured targets" @echo - @echo " $(MAKE) check-report.tap Generates an aggregated TAP test report" - @echo " $(MAKE) check-venv Creates a Python venv for tests" - @echo " $(MAKE) check-clean Clean the tests and related data" + @echo " $(MAKE) check-report.junit.xml Generates an aggregated XML test report" + @echo " $(MAKE) check-venv Creates a Python venv for tests" + @echo " $(MAKE) check-clean Clean the tests and related data" @echo @echo "The following are useful for CI builds" - @echo " $(MAKE) check-build Build most test binaries" - @echo " $(MAKE) get-vm-images Downloads all images used by avocado tests, according to configured targets (~350 MB each, 1.5 GB max)" + @echo " $(MAKE) check-build Build most test binaries" + @echo " $(MAKE) get-vm-images Downloads all images used by avocado tests, according to configured targets (~350 MB each, 1.5 GB max)" @echo @echo @echo "The variable SPEED can be set to control the gtester speed setting." @@ -89,6 +89,7 @@ TARGETS=$(patsubst libqemu-%.fa, %, $(filter libqemu-%.fa, $(ninja-targets))) TESTS_VENV_DIR=$(BUILD_DIR)/tests/venv TESTS_VENV_REQ=$(SRC_PATH)/tests/requirements.txt TESTS_RESULTS_DIR=$(BUILD_DIR)/tests/results +TESTS_PYTHON=$(TESTS_VENV_DIR)/bin/python3 ifndef AVOCADO_TESTS AVOCADO_TESTS=tests/avocado endif @@ -103,13 +104,14 @@ else AVOCADO_CMDLINE_TAGS=$(addprefix -t , $(AVOCADO_TAGS)) endif +quiet-venv-pip = $(quiet-@)$(call quiet-command-run, \ + $(TESTS_PYTHON) -m pip -q --disable-pip-version-check $1, \ + "VENVPIP","$1") + $(TESTS_VENV_DIR): $(TESTS_VENV_REQ) - $(call quiet-command, \ - $(PYTHON) -m venv $@, \ - VENV, $@) - $(call quiet-command, \ - $(TESTS_VENV_DIR)/bin/python -m pip -q install -r $(TESTS_VENV_REQ), \ - PIP, $(TESTS_VENV_REQ)) + $(call quiet-command, $(PYTHON) -m venv $@, VENV, $@) + $(call quiet-venv-pip,install -e "$(SRC_PATH)/python/") + $(call quiet-venv-pip,install -r $(TESTS_VENV_REQ)) $(call quiet-command, touch $@) $(TESTS_RESULTS_DIR): @@ -126,7 +128,7 @@ FEDORA_31_DOWNLOAD=$(filter $(FEDORA_31_ARCHES),$(FEDORA_31_ARCHES_CANDIDATES)) # download one specific Fedora 31 image get-vm-image-fedora-31-%: check-venv $(call quiet-command, \ - $(TESTS_VENV_DIR)/bin/python -m avocado vmimage get \ + $(TESTS_PYTHON) -m avocado vmimage get \ --distro=fedora --distro-version=31 --arch=$*, \ "AVOCADO", "Downloading avocado tests VM image for $*") @@ -135,7 +137,7 @@ get-vm-images: check-venv $(patsubst %,get-vm-image-fedora-31-%, $(FEDORA_31_DOW check-avocado: check-venv $(TESTS_RESULTS_DIR) get-vm-images $(call quiet-command, \ - $(TESTS_VENV_DIR)/bin/python -m avocado \ + $(TESTS_PYTHON) -m avocado \ --show=$(AVOCADO_SHOW) run --job-results-dir=$(TESTS_RESULTS_DIR) \ $(if $(AVOCADO_TAGS),, --filter-by-tags-include-empty \ --filter-by-tags-include-empty-key) \ diff --git a/tests/avocado/avocado_qemu/__init__.py b/tests/avocado/avocado_qemu/__init__.py index 39f15c1d51..b656a70c55 100644 --- a/tests/avocado/avocado_qemu/__init__.py +++ b/tests/avocado/avocado_qemu/__init__.py @@ -21,6 +21,11 @@ import avocado from avocado.utils import cloudinit, datadrainer, process, ssh, vmimage from avocado.utils.path import find_command +from qemu.machine import QEMUMachine +from qemu.utils import (get_info_usernet_hostfwd_port, kvm_available, + tcg_available) + + #: The QEMU build root directory. It may also be the source directory #: if building from the source dir, but it's safer to use BUILD_DIR for #: that purpose. Be aware that if this code is moved outside of a source @@ -35,12 +40,6 @@ if os.path.islink(os.path.dirname(os.path.dirname(__file__))): else: SOURCE_DIR = BUILD_DIR -sys.path.append(os.path.join(SOURCE_DIR, 'python')) - -from qemu.machine import QEMUMachine -from qemu.utils import (get_info_usernet_hostfwd_port, kvm_available, - tcg_available) - def has_cmd(name, args=None): """ diff --git a/tests/avocado/replay_linux.py b/tests/avocado/replay_linux.py index 15953f9e49..40e4f6908e 100644 --- a/tests/avocado/replay_linux.py +++ b/tests/avocado/replay_linux.py @@ -13,6 +13,7 @@ import logging import time from avocado import skipUnless +from avocado_qemu import BUILD_DIR from avocado.utils import cloudinit from avocado.utils import network from avocado.utils import vmimage @@ -32,9 +33,16 @@ class ReplayLinux(LinuxTest): bus = 'ide' def setUp(self): - super(ReplayLinux, self).setUp() + # LinuxTest does many replay-incompatible things, but includes + # useful methods. Do not setup LinuxTest here and just + # call some functions. + super(LinuxTest, self).setUp() + self._set_distro() self.boot_path = self.download_boot() - self.cloudinit_path = self.prepare_cloudinit() + self.phone_server = cloudinit.PhoneHomeServer(('0.0.0.0', 0), + self.name) + ssh_pubkey, self.ssh_key = self.set_up_existing_ssh_keys() + self.cloudinit_path = self.prepare_cloudinit(ssh_pubkey) def vm_add_disk(self, vm, path, id, device): bus_string = '' @@ -50,7 +58,9 @@ class ReplayLinux(LinuxTest): vm = self.get_vm() vm.add_args('-smp', '1') vm.add_args('-m', '1024') - vm.add_args('-object', 'filter-replay,id=replay,netdev=hub0port0') + vm.add_args('-netdev', 'user,id=vnet,hostfwd=:127.0.0.1:0-:22', + '-device', 'virtio-net,netdev=vnet') + vm.add_args('-object', 'filter-replay,id=replay,netdev=vnet') if args: vm.add_args(*args) self.vm_add_disk(vm, self.boot_path, 0, self.hdd) @@ -75,8 +85,8 @@ class ReplayLinux(LinuxTest): stop_check=(lambda : not vm.is_running())) console_drainer.start() if record: - cloudinit.wait_for_phone_home(('0.0.0.0', self.phone_home_port), - self.name) + while not self.phone_server.instance_phoned_back: + self.phone_server.handle_request() vm.shutdown() logger.info('finished the recording with log size %s bytes' % os.path.getsize(replay_path)) @@ -114,3 +124,68 @@ class ReplayLinuxX8664(ReplayLinux): :avocado: tags=machine:q35 """ self.run_rr(shift=3) + +@skipUnless(os.getenv('AVOCADO_TIMEOUT_EXPECTED'), 'Test might timeout') +class ReplayLinuxX8664Virtio(ReplayLinux): + """ + :avocado: tags=arch:x86_64 + :avocado: tags=virtio + :avocado: tags=accel:tcg + """ + + hdd = 'virtio-blk-pci' + cd = 'virtio-blk-pci' + bus = None + + chksum = 'e3c1b309d9203604922d6e255c2c5d098a309c2d46215d8fc026954f3c5c27a0' + + def test_pc_i440fx(self): + """ + :avocado: tags=machine:pc + """ + self.run_rr(shift=1) + + def test_pc_q35(self): + """ + :avocado: tags=machine:q35 + """ + self.run_rr(shift=3) + +@skipUnless(os.getenv('AVOCADO_TIMEOUT_EXPECTED'), 'Test might timeout') +class ReplayLinuxAarch64(ReplayLinux): + """ + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt + :avocado: tags=cpu:max + """ + + chksum = '1e18d9c0cf734940c4b5d5ec592facaed2af0ad0329383d5639c997fdf16fe49' + + hdd = 'virtio-blk-device' + cd = 'virtio-blk-device' + bus = None + + def get_common_args(self): + return ('-bios', + os.path.join(BUILD_DIR, 'pc-bios', 'edk2-aarch64-code.fd'), + "-cpu", "max,lpa2=off", + '-device', 'virtio-rng-pci,rng=rng0', + '-object', 'rng-builtin,id=rng0') + + def test_virt_gicv2(self): + """ + :avocado: tags=machine:gic-version=2 + """ + + self.run_rr(shift=3, + args=(*self.get_common_args(), + "-machine", "virt,gic-version=2")) + + def test_virt_gicv3(self): + """ + :avocado: tags=machine:gic-version=3 + """ + + self.run_rr(shift=3, + args=(*self.get_common_args(), diff --git a/tests/avocado/virtio_check_params.py b/tests/avocado/virtio_check_params.py index e869690473..4093da8a67 100644 --- a/tests/avocado/virtio_check_params.py +++ b/tests/avocado/virtio_check_params.py @@ -22,7 +22,6 @@ import os import re import logging -sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python')) from qemu.machine import QEMUMachine from avocado_qemu import QemuSystemTest from avocado import skip diff --git a/tests/avocado/virtio_version.py b/tests/avocado/virtio_version.py index 208910bb84..c84e48813a 100644 --- a/tests/avocado/virtio_version.py +++ b/tests/avocado/virtio_version.py @@ -11,7 +11,6 @@ Check compatibility of virtio device types import sys import os -sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python')) from qemu.machine import QEMUMachine from avocado_qemu import QemuSystemTest diff --git a/tests/docker/dockerfiles/debian10.docker b/tests/docker/dockerfiles/debian10.docker index b414af1b9f..03be923066 100644 --- a/tests/docker/dockerfiles/debian10.docker +++ b/tests/docker/dockerfiles/debian10.docker @@ -34,4 +34,5 @@ RUN apt update && \ python3 \ python3-sphinx \ python3-sphinx-rtd-theme \ + python3-venv \ $(apt-get -s build-dep --arch-only qemu | egrep ^Inst | fgrep '[all]' | cut -d\ -f2) diff --git a/tests/requirements.txt b/tests/requirements.txt index a21b59b443..0ba561b6bd 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,5 +1,6 @@ # Add Python module requirements, one per line, to be installed # in the tests/venv Python virtual environment. For more info, # refer to: https://pip.pypa.io/en/stable/user_guide/#id1 +# Note that qemu.git/python/ is always implicitly installed. avocado-framework==88.1 pycdlib==1.11.0 diff --git a/tests/vm/Makefile.include b/tests/vm/Makefile.include index ae91f5043e..588bc999cc 100644 --- a/tests/vm/Makefile.include +++ b/tests/vm/Makefile.include @@ -84,10 +84,11 @@ vm-clean-all: $(IMAGES_DIR)/%.img: $(SRC_PATH)/tests/vm/% \ $(SRC_PATH)/tests/vm/basevm.py \ - $(SRC_PATH)/tests/vm/Makefile.include + $(SRC_PATH)/tests/vm/Makefile.include \ + check-venv @mkdir -p $(IMAGES_DIR) $(call quiet-command, \ - $(PYTHON) $< \ + $(TESTS_PYTHON) $< \ $(if $(V)$(DEBUG), --debug) \ $(if $(GENISOIMAGE),--genisoimage $(GENISOIMAGE)) \ $(if $(QEMU_LOCAL),--build-path $(BUILD_DIR)) \ @@ -101,9 +102,9 @@ $(IMAGES_DIR)/%.img: $(SRC_PATH)/tests/vm/% \ # Build in VM $(IMAGE) -vm-build-%: $(IMAGES_DIR)/%.img +vm-build-%: $(IMAGES_DIR)/%.img check-venv $(call quiet-command, \ - $(PYTHON) $(SRC_PATH)/tests/vm/$* \ + $(TESTS_PYTHON) $(SRC_PATH)/tests/vm/$* \ $(if $(V)$(DEBUG), --debug) \ $(if $(DEBUG), --interactive) \ $(if $(J),--jobs $(J)) \ @@ -127,9 +128,9 @@ vm-boot-serial-%: $(IMAGES_DIR)/%.img -device virtio-net-pci,netdev=vnet \ || true -vm-boot-ssh-%: $(IMAGES_DIR)/%.img +vm-boot-ssh-%: $(IMAGES_DIR)/%.img check-venv $(call quiet-command, \ - $(PYTHON) $(SRC_PATH)/tests/vm/$* \ + $(TESTS_PYTHON) $(SRC_PATH)/tests/vm/$* \ $(if $(J),--jobs $(J)) \ $(if $(V)$(DEBUG), --debug) \ $(if $(QEMU_LOCAL),--build-path $(BUILD_DIR)) \ diff --git a/tests/vm/basevm.py b/tests/vm/basevm.py index 254e11c932..d7d0413df3 100644 --- a/tests/vm/basevm.py +++ b/tests/vm/basevm.py @@ -18,9 +18,6 @@ import socket import logging import time import datetime -sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python')) -from qemu.machine import QEMUMachine -from qemu.utils import get_info_usernet_hostfwd_port, kvm_available import subprocess import hashlib import argparse @@ -31,6 +28,9 @@ import multiprocessing import traceback import shlex +from qemu.machine import QEMUMachine +from qemu.utils import get_info_usernet_hostfwd_port, kvm_available + SSH_KEY_FILE = os.path.join(os.path.dirname(__file__), "..", "keys", "id_rsa") SSH_PUB_KEY_FILE = os.path.join(os.path.dirname(__file__), diff --git a/util/async.c b/util/async.c index 554ba70cca..63434ddae4 100644 --- a/util/async.c +++ b/util/async.c @@ -33,6 +33,7 @@ #include "block/raw-aio.h" #include "qemu/coroutine_int.h" #include "qemu/coroutine-tls.h" +#include "sysemu/cpu-timers.h" #include "trace.h" /***********************************************************/ @@ -84,6 +85,13 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags) } aio_notify(ctx); + /* + * Workaround for record/replay. + * vCPU execution should be suspended when new BH is set. + * This is needed to avoid guest timeouts caused + * by the long cycles of the execution. + */ + icount_notify_exit(); } /* Only called from aio_bh_poll() and aio_ctx_finalize() */ |