diff options
243 files changed, 3724 insertions, 1391 deletions
diff --git a/.gitlab-ci.d/cirrus.yml b/.gitlab-ci.d/cirrus.yml index e7b25e7427..d273a9e713 100644 --- a/.gitlab-ci.d/cirrus.yml +++ b/.gitlab-ci.d/cirrus.yml @@ -14,6 +14,7 @@ stage: build image: registry.gitlab.com/libvirt/libvirt-ci/cirrus-run:master needs: [] + timeout: 80m allow_failure: true script: - source .gitlab-ci.d/cirrus/$NAME.vars @@ -40,6 +41,9 @@ - cat .gitlab-ci.d/cirrus/$NAME.yml - cirrus-run -v --show-build-log always .gitlab-ci.d/cirrus/$NAME.yml rules: + # Allow on 'staging' branch and 'stable-X.Y-staging' branches only + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH !~ /staging/' + when: never - if: "$CIRRUS_GITHUB_REPO && $CIRRUS_API_TOKEN" x64-freebsd-12-build: diff --git a/.gitlab-ci.d/custom-runners.yml b/.gitlab-ci.d/custom-runners.yml index a89a20da48..056c374619 100644 --- a/.gitlab-ci.d/custom-runners.yml +++ b/.gitlab-ci.d/custom-runners.yml @@ -13,238 +13,7 @@ variables: GIT_STRATEGY: clone -# All ubuntu-18.04 jobs should run successfully in an environment -# setup by the scripts/ci/setup/build-environment.yml task -# "Install basic packages to build QEMU on Ubuntu 18.04/20.04" -ubuntu-18.04-s390x-all-linux-static: - needs: [] - stage: build - tags: - - ubuntu_18.04 - - s390x - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - - if: "$S390X_RUNNER_AVAILABLE" - script: - # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763 - # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages - - mkdir build - - cd build - - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - - make --output-sync -j`nproc` check-tcg V=1 - -ubuntu-18.04-s390x-all: - needs: [] - stage: build - tags: - - ubuntu_18.04 - - s390x - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - - if: "$S390X_RUNNER_AVAILABLE" - script: - - mkdir build - - cd build - - ../configure --disable-libssh - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - -ubuntu-18.04-s390x-alldbg: - needs: [] - stage: build - tags: - - ubuntu_18.04 - - s390x - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - when: manual - allow_failure: true - - if: "$S390X_RUNNER_AVAILABLE" - when: manual - allow_failure: true - script: - - mkdir build - - cd build - - ../configure --enable-debug --disable-libssh - - make clean - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - -ubuntu-18.04-s390x-clang: - needs: [] - stage: build - tags: - - ubuntu_18.04 - - s390x - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - when: manual - allow_failure: true - - if: "$S390X_RUNNER_AVAILABLE" - when: manual - allow_failure: true - script: - - mkdir build - - cd build - - ../configure --disable-libssh --cc=clang --cxx=clang++ --enable-sanitizers - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - -ubuntu-18.04-s390x-tci: - needs: [] - stage: build - tags: - - ubuntu_18.04 - - s390x - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - when: manual - allow_failure: true - - if: "$S390X_RUNNER_AVAILABLE" - when: manual - allow_failure: true - script: - - mkdir build - - cd build - - ../configure --disable-libssh --enable-tcg-interpreter - - make --output-sync -j`nproc` - -ubuntu-18.04-s390x-notcg: - needs: [] - stage: build - tags: - - ubuntu_18.04 - - s390x - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - when: manual - allow_failure: true - - if: "$S390X_RUNNER_AVAILABLE" - when: manual - allow_failure: true - script: - - mkdir build - - cd build - - ../configure --disable-libssh --disable-tcg - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - -# All ubuntu-20.04 jobs should run successfully in an environment -# setup by the scripts/ci/setup/qemu/build-environment.yml task -# "Install basic packages to build QEMU on Ubuntu 18.04/20.04" -ubuntu-20.04-aarch64-all-linux-static: - needs: [] - stage: build - tags: - - ubuntu_20.04 - - aarch64 - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - - if: "$AARCH64_RUNNER_AVAILABLE" - script: - # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763 - # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages - - mkdir build - - cd build - - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - - make --output-sync -j`nproc` check-tcg V=1 - -ubuntu-20.04-aarch64-all: - needs: [] - stage: build - tags: - - ubuntu_20.04 - - aarch64 - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - when: manual - allow_failure: true - - if: "$AARCH64_RUNNER_AVAILABLE" - when: manual - allow_failure: true - script: - - mkdir build - - cd build - - ../configure --disable-libssh - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - -ubuntu-20.04-aarch64-alldbg: - needs: [] - stage: build - tags: - - ubuntu_20.04 - - aarch64 - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - - if: "$AARCH64_RUNNER_AVAILABLE" - script: - - mkdir build - - cd build - - ../configure --enable-debug --disable-libssh - - make clean - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - -ubuntu-20.04-aarch64-clang: - needs: [] - stage: build - tags: - - ubuntu_20.04 - - aarch64 - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - when: manual - allow_failure: true - - if: "$AARCH64_RUNNER_AVAILABLE" - when: manual - allow_failure: true - script: - - mkdir build - - cd build - - ../configure --disable-libssh --cc=clang-10 --cxx=clang++-10 --enable-sanitizers - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 - -ubuntu-20.04-aarch64-tci: - needs: [] - stage: build - tags: - - ubuntu_20.04 - - aarch64 - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - when: manual - allow_failure: true - - if: "$AARCH64_RUNNER_AVAILABLE" - when: manual - allow_failure: true - script: - - mkdir build - - cd build - - ../configure --disable-libssh --enable-tcg-interpreter - - make --output-sync -j`nproc` - -ubuntu-20.04-aarch64-notcg: - needs: [] - stage: build - tags: - - ubuntu_20.04 - - aarch64 - rules: - - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' - when: manual - allow_failure: true - - if: "$AARCH64_RUNNER_AVAILABLE" - when: manual - allow_failure: true - script: - - mkdir build - - cd build - - ../configure --disable-libssh --disable-tcg - - make --output-sync -j`nproc` - - make --output-sync -j`nproc` check V=1 +include: + - local: '/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml' + - local: '/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml' + - local: '/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml' diff --git a/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml b/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml new file mode 100644 index 0000000000..49aa703f55 --- /dev/null +++ b/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml @@ -0,0 +1,28 @@ +centos-stream-8-x86_64: + allow_failure: true + needs: [] + stage: build + tags: + - centos_stream_8 + - x86_64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + - if: "$CENTOS_STREAM_8_x86_64_RUNNER_AVAILABLE" + artifacts: + name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" + when: on_failure + expire_in: 7 days + paths: + - build/tests/results/latest/results.xml + - build/tests/results/latest/test-results + reports: + junit: build/tests/results/latest/results.xml + before_script: + - JOBS=$(expr $(nproc) + 1) + script: + - mkdir build + - cd build + - ../scripts/ci/org.centos/stream/8/x86_64/configure + - make -j"$JOBS" + - make NINJA=":" check + - ../scripts/ci/org.centos/stream/8/x86_64/test-avocado diff --git a/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml b/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml new file mode 100644 index 0000000000..f39d874a1e --- /dev/null +++ b/.gitlab-ci.d/custom-runners/ubuntu-18.04-s390x.yml @@ -0,0 +1,118 @@ +# All ubuntu-18.04 jobs should run successfully in an environment +# setup by the scripts/ci/setup/build-environment.yml task +# "Install basic packages to build QEMU on Ubuntu 18.04/20.04" + +ubuntu-18.04-s390x-all-linux-static: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + - if: "$S390X_RUNNER_AVAILABLE" + script: + # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763 + # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages + - mkdir build + - cd build + - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + - make --output-sync -j`nproc` check-tcg V=1 + +ubuntu-18.04-s390x-all: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + - if: "$S390X_RUNNER_AVAILABLE" + script: + - mkdir build + - cd build + - ../configure --disable-libssh + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-18.04-s390x-alldbg: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$S390X_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --enable-debug --disable-libssh + - make clean + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-18.04-s390x-clang: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$S390X_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --cc=clang --cxx=clang++ --enable-sanitizers + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-18.04-s390x-tci: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$S390X_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --enable-tcg-interpreter + - make --output-sync -j`nproc` + +ubuntu-18.04-s390x-notcg: + needs: [] + stage: build + tags: + - ubuntu_18.04 + - s390x + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$S390X_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --disable-tcg + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 diff --git a/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml b/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml new file mode 100644 index 0000000000..920e388bd0 --- /dev/null +++ b/.gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml @@ -0,0 +1,118 @@ +# All ubuntu-20.04 jobs should run successfully in an environment +# setup by the scripts/ci/setup/qemu/build-environment.yml task +# "Install basic packages to build QEMU on Ubuntu 18.04/20.04" + +ubuntu-20.04-aarch64-all-linux-static: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + - if: "$AARCH64_RUNNER_AVAILABLE" + script: + # --disable-libssh is needed because of https://bugs.launchpad.net/qemu/+bug/1838763 + # --disable-glusterfs is needed because there's no static version of those libs in distro supplied packages + - mkdir build + - cd build + - ../configure --enable-debug --static --disable-system --disable-glusterfs --disable-libssh + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + - make --output-sync -j`nproc` check-tcg V=1 + +ubuntu-20.04-aarch64-all: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$AARCH64_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-20.04-aarch64-alldbg: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + - if: "$AARCH64_RUNNER_AVAILABLE" + script: + - mkdir build + - cd build + - ../configure --enable-debug --disable-libssh + - make clean + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-20.04-aarch64-clang: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$AARCH64_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --cc=clang-10 --cxx=clang++-10 --enable-sanitizers + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 + +ubuntu-20.04-aarch64-tci: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$AARCH64_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --enable-tcg-interpreter + - make --output-sync -j`nproc` + +ubuntu-20.04-aarch64-notcg: + needs: [] + stage: build + tags: + - ubuntu_20.04 + - aarch64 + rules: + - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/' + when: manual + allow_failure: true + - if: "$AARCH64_RUNNER_AVAILABLE" + when: manual + allow_failure: true + script: + - mkdir build + - cd build + - ../configure --disable-libssh --disable-tcg + - make --output-sync -j`nproc` + - make --output-sync -j`nproc` check V=1 diff --git a/MAINTAINERS b/MAINTAINERS index d3879aa3c1..7543eb4d59 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -324,7 +324,7 @@ F: disas/sparc.c X86 TCG CPUs M: Paolo Bonzini <pbonzini@redhat.com> M: Richard Henderson <richard.henderson@linaro.org> -M: Eduardo Habkost <ehabkost@redhat.com> +M: Eduardo Habkost <eduardo@habkost.net> S: Maintained F: target/i386/tcg/ F: tests/tcg/i386/ @@ -1628,7 +1628,7 @@ F: include/hw/i386/microvm.h F: pc-bios/bios-microvm.bin Machine core -M: Eduardo Habkost <ehabkost@redhat.com> +M: Eduardo Habkost <eduardo@habkost.net> M: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> R: Philippe Mathieu-Daudé <philmd@redhat.com> S: Supported @@ -2648,13 +2648,13 @@ F: backends/cryptodev*.c Python library M: John Snow <jsnow@redhat.com> M: Cleber Rosa <crosa@redhat.com> -R: Eduardo Habkost <ehabkost@redhat.com> +R: Eduardo Habkost <eduardo@habkost.net> S: Maintained F: python/ T: git https://gitlab.com/jsnow/qemu.git python Python scripts -M: Eduardo Habkost <ehabkost@redhat.com> +M: Eduardo Habkost <eduardo@habkost.net> M: Cleber Rosa <crosa@redhat.com> S: Odd Fixes F: scripts/*.py @@ -2730,7 +2730,7 @@ T: git https://github.com/mdroth/qemu.git qga QOM M: Paolo Bonzini <pbonzini@redhat.com> R: Daniel P. Berrange <berrange@redhat.com> -R: Eduardo Habkost <ehabkost@redhat.com> +R: Eduardo Habkost <eduardo@habkost.net> S: Supported F: docs/qdev-device-use.txt F: hw/core/qdev* @@ -2750,7 +2750,7 @@ F: tests/unit/check-qom-proplist.c F: tests/unit/test-qdev-global-props.c QOM boilerplate conversion script -M: Eduardo Habkost <ehabkost@redhat.com> +M: Eduardo Habkost <eduardo@habkost.net> S: Maintained F: scripts/codeconverter/ @@ -3469,7 +3469,7 @@ M: Alex Bennée <alex.bennee@linaro.org> M: Philippe Mathieu-Daudé <f4bug@amsat.org> M: Thomas Huth <thuth@redhat.com> R: Wainer dos Santos Moschetta <wainersm@redhat.com> -R: Willian Rampazzo <willianr@redhat.com> +R: Beraldo Leal <bleal@redhat.com> S: Maintained F: .github/lockdown.yml F: .gitlab-ci.yml @@ -3507,10 +3507,16 @@ W: https://trello.com/b/6Qi1pxVn/avocado-qemu R: Cleber Rosa <crosa@redhat.com> R: Philippe Mathieu-Daudé <philmd@redhat.com> R: Wainer dos Santos Moschetta <wainersm@redhat.com> -R: Willian Rampazzo <willianr@redhat.com> +R: Beraldo Leal <bleal@redhat.com> S: Odd Fixes F: tests/avocado/ +GitLab custom runner (Works On Arm Sponsored) +M: Alex Bennée <alex.bennee@linaro.org> +M: Philippe Mathieu-Daudé <f4bug@amsat.org> +S: Maintained +F: .gitlab-ci.d/custom-runners/ubuntu-20.04-aarch64.yml + Documentation ------------- Build system architecture @@ -1 +1 @@ -6.1.90 +6.2.50 diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 2d14d02f6c..409ec8c38c 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -721,6 +721,15 @@ static inline bool need_replay_interrupt(int interrupt_request) static inline bool cpu_handle_interrupt(CPUState *cpu, TranslationBlock **last_tb) { + /* + * If we have requested custom cflags with CF_NOIRQ we should + * skip checking here. Any pending interrupts will get picked up + * by the next TB we execute under normal cflags. + */ + if (cpu->cflags_next_tb != -1 && cpu->cflags_next_tb & CF_NOIRQ) { + return false; + } + /* Clear the interrupt flag now since we're processing * cpu->interrupt_request and cpu->exit_request. * Ensure zeroing happens before reading cpu->exit_request or diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build index 137a1a44cc..7a0a79d731 100644 --- a/accel/tcg/meson.build +++ b/accel/tcg/meson.build @@ -10,7 +10,7 @@ tcg_ss.add(files( )) tcg_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c')) tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c')) -tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c'), libdl]) +tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c')]) specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss) specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files( diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c index 847d2079d2..29632bd4c0 100644 --- a/accel/tcg/tcg-accel-ops-mttcg.c +++ b/accel/tcg/tcg-accel-ops-mttcg.c @@ -28,6 +28,7 @@ #include "sysemu/tcg.h" #include "sysemu/replay.h" #include "qemu/main-loop.h" +#include "qemu/notify.h" #include "qemu/guest-random.h" #include "exec/exec-all.h" #include "hw/boards.h" @@ -35,6 +36,26 @@ #include "tcg-accel-ops.h" #include "tcg-accel-ops-mttcg.h" +typedef struct MttcgForceRcuNotifier { + Notifier notifier; + CPUState *cpu; +} MttcgForceRcuNotifier; + +static void do_nothing(CPUState *cpu, run_on_cpu_data d) +{ +} + +static void mttcg_force_rcu(Notifier *notify, void *data) +{ + CPUState *cpu = container_of(notify, MttcgForceRcuNotifier, notifier)->cpu; + + /* + * Called with rcu_registry_lock held, using async_run_on_cpu() ensures + * that there are no deadlocks. + */ + async_run_on_cpu(cpu, do_nothing, RUN_ON_CPU_NULL); +} + /* * In the multi-threaded case each vCPU has its own thread. The TLS * variable current_cpu can be used deep in the code to find the @@ -43,12 +64,16 @@ static void *mttcg_cpu_thread_fn(void *arg) { + MttcgForceRcuNotifier force_rcu; CPUState *cpu = arg; assert(tcg_enabled()); g_assert(!icount_enabled()); rcu_register_thread(); + force_rcu.notifier.notify = mttcg_force_rcu; + force_rcu.cpu = cpu; + rcu_add_force_rcu_notifier(&force_rcu.notifier); tcg_register_thread(); qemu_mutex_lock_iothread(); @@ -100,6 +125,7 @@ static void *mttcg_cpu_thread_fn(void *arg) tcg_cpus_destroy(cpu); qemu_mutex_unlock_iothread(); + rcu_remove_force_rcu_notifier(&force_rcu.notifier); rcu_unregister_thread(); return NULL; } diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c index a5fd26190e..bf59f53dbc 100644 --- a/accel/tcg/tcg-accel-ops-rr.c +++ b/accel/tcg/tcg-accel-ops-rr.c @@ -28,6 +28,7 @@ #include "sysemu/tcg.h" #include "sysemu/replay.h" #include "qemu/main-loop.h" +#include "qemu/notify.h" #include "qemu/guest-random.h" #include "exec/exec-all.h" @@ -133,6 +134,11 @@ static void rr_deal_with_unplugged_cpus(void) } } +static void rr_force_rcu(Notifier *notify, void *data) +{ + rr_kick_next_cpu(); +} + /* * In the single-threaded case each vCPU is simulated in turn. If * there is more than a single vCPU we create a simple timer to kick @@ -143,10 +149,13 @@ static void rr_deal_with_unplugged_cpus(void) static void *rr_cpu_thread_fn(void *arg) { + Notifier force_rcu; CPUState *cpu = arg; assert(tcg_enabled()); rcu_register_thread(); + force_rcu.notify = rr_force_rcu; + rcu_add_force_rcu_notifier(&force_rcu); tcg_register_thread(); qemu_mutex_lock_iothread(); @@ -255,6 +264,7 @@ static void *rr_cpu_thread_fn(void *arg) rr_deal_with_unplugged_cpus(); } + rcu_remove_force_rcu_notifier(&force_rcu); rcu_unregister_thread(); return NULL; } diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index bd0bb81d08..bd71db59a9 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1738,7 +1738,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, if (current_tb_modified) { page_collection_unlock(pages); /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | curr_cflags(cpu); + cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu); mmap_unlock(); cpu_loop_exit_noexc(cpu); } @@ -1906,7 +1906,7 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) #ifdef TARGET_HAS_PRECISE_SMC if (current_tb_modified) { /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | curr_cflags(cpu); + cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu); return true; } #endif @@ -87,8 +87,10 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, static bool bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child); -static void bdrv_replace_child_noperm(BdrvChild *child, - BlockDriverState *new_bs); +static void bdrv_child_free(BdrvChild *child); +static void bdrv_replace_child_noperm(BdrvChild **child, + BlockDriverState *new_bs, + bool free_empty_child); static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, BdrvChild *child, Transaction *tran); @@ -1387,6 +1389,8 @@ static void bdrv_child_cb_attach(BdrvChild *child) { BlockDriverState *bs = child->opaque; + QLIST_INSERT_HEAD(&bs->children, child, next); + if (child->role & BDRV_CHILD_COW) { bdrv_backing_attach(child); } @@ -1403,6 +1407,8 @@ static void bdrv_child_cb_detach(BdrvChild *child) } bdrv_unapply_subtree_drain(child, bs); + + QLIST_REMOVE(child, next); } static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, @@ -2250,13 +2256,18 @@ static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, typedef struct BdrvReplaceChildState { BdrvChild *child; + BdrvChild **childp; BlockDriverState *old_bs; + bool free_empty_child; } BdrvReplaceChildState; static void bdrv_replace_child_commit(void *opaque) { BdrvReplaceChildState *s = opaque; + if (s->free_empty_child && !s->child->bs) { + bdrv_child_free(s->child); + } bdrv_unref(s->old_bs); } @@ -2265,8 +2276,34 @@ static void bdrv_replace_child_abort(void *opaque) BdrvReplaceChildState *s = opaque; BlockDriverState *new_bs = s->child->bs; - /* old_bs reference is transparently moved from @s to @s->child */ - bdrv_replace_child_noperm(s->child, s->old_bs); + /* + * old_bs reference is transparently moved from @s to s->child. + * + * Pass &s->child here instead of s->childp, because: + * (1) s->old_bs must be non-NULL, so bdrv_replace_child_noperm() will not + * modify the BdrvChild * pointer we indirectly pass to it, i.e. it + * will not modify s->child. From that perspective, it does not matter + * whether we pass s->childp or &s->child. + * (2) If new_bs is not NULL, s->childp will be NULL. We then cannot use + * it here. + * (3) If new_bs is NULL, *s->childp will have been NULLed by + * bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we + * must not pass a NULL *s->childp here. + * + * So whether new_bs was NULL or not, we cannot pass s->childp here; and in + * any case, there is no reason to pass it anyway. + */ + bdrv_replace_child_noperm(&s->child, s->old_bs, true); + /* + * The child was pre-existing, so s->old_bs must be non-NULL, and + * s->child thus must not have been freed + */ + assert(s->child != NULL); + if (!new_bs) { + /* As described above, *s->childp was cleared, so restore it */ + assert(s->childp != NULL); + *s->childp = s->child; + } bdrv_unref(new_bs); } @@ -2282,22 +2319,46 @@ static TransactionActionDrv bdrv_replace_child_drv = { * Note: real unref of old_bs is done only on commit. * * The function doesn't update permissions, caller is responsible for this. + * + * (*childp)->bs must not be NULL. + * + * Note that if new_bs == NULL, @childp is stored in a state object attached + * to @tran, so that the old child can be reinstated in the abort handler. + * Therefore, if @new_bs can be NULL, @childp must stay valid until the + * transaction is committed or aborted. + * + * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is + * freed (on commit). @free_empty_child should only be false if the + * caller will free the BDrvChild themselves (which may be important + * if this is in turn called in another transactional context). */ -static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, - Transaction *tran) +static void bdrv_replace_child_tran(BdrvChild **childp, + BlockDriverState *new_bs, + Transaction *tran, + bool free_empty_child) { BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); *s = (BdrvReplaceChildState) { - .child = child, - .old_bs = child->bs, + .child = *childp, + .childp = new_bs == NULL ? childp : NULL, + .old_bs = (*childp)->bs, + .free_empty_child = free_empty_child, }; tran_add(tran, &bdrv_replace_child_drv, s); + /* The abort handler relies on this */ + assert(s->old_bs != NULL); + if (new_bs) { bdrv_ref(new_bs); } - bdrv_replace_child_noperm(child, new_bs); - /* old_bs reference is transparently moved from @child to @s */ + /* + * Pass free_empty_child=false, we will free the child (if + * necessary) in bdrv_replace_child_commit() (if our + * @free_empty_child parameter was true). + */ + bdrv_replace_child_noperm(childp, new_bs, false); + /* old_bs reference is transparently moved from *childp to @s */ } /* @@ -2668,9 +2729,24 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) return permissions[qapi_perm]; } -static void bdrv_replace_child_noperm(BdrvChild *child, - BlockDriverState *new_bs) +/** + * Replace (*childp)->bs by @new_bs. + * + * If @new_bs is NULL, *childp will be set to NULL, too: BDS parents + * generally cannot handle a BdrvChild with .bs == NULL, so clearing + * BdrvChild.bs should generally immediately be followed by the + * BdrvChild pointer being cleared as well. + * + * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is + * freed. @free_empty_child should only be false if the caller will + * free the BdrvChild themselves (this may be important in a + * transactional context, where it may only be freed on commit). + */ +static void bdrv_replace_child_noperm(BdrvChild **childp, + BlockDriverState *new_bs, + bool free_empty_child) { + BdrvChild *child = *childp; BlockDriverState *old_bs = child->bs; int new_bs_quiesce_counter; int drain_saldo; @@ -2705,6 +2781,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child, } child->bs = new_bs; + if (!new_bs) { + *childp = NULL; + } if (new_bs) { QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); @@ -2734,21 +2813,25 @@ static void bdrv_replace_child_noperm(BdrvChild *child, bdrv_parent_drained_end_single(child); drain_saldo++; } -} - -static void bdrv_child_free(void *opaque) -{ - BdrvChild *c = opaque; - g_free(c->name); - g_free(c); + if (free_empty_child && !child->bs) { + bdrv_child_free(child); + } } -static void bdrv_remove_empty_child(BdrvChild *child) +/** + * Free the given @child. + * + * The child must be empty (i.e. `child->bs == NULL`) and it must be + * unused (i.e. not in a children list). + */ +static void bdrv_child_free(BdrvChild *child) { assert(!child->bs); - QLIST_SAFE_REMOVE(child, next); - bdrv_child_free(child); + assert(!child->next.le_prev); /* not in children list */ + + g_free(child->name); + g_free(child); } typedef struct BdrvAttachChildCommonState { @@ -2763,27 +2846,35 @@ static void bdrv_attach_child_common_abort(void *opaque) BdrvChild *child = *s->child; BlockDriverState *bs = child->bs; - bdrv_replace_child_noperm(child, NULL); + /* + * Pass free_empty_child=false, because we still need the child + * for the AioContext operations on the parent below; those + * BdrvChildClass methods all work on a BdrvChild object, so we + * need to keep it as an empty shell (after this function, it will + * not be attached to any parent, and it will not have a .bs). + */ + bdrv_replace_child_noperm(s->child, NULL, false); if (bdrv_get_aio_context(bs) != s->old_child_ctx) { bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort); } if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) { - GSList *ignore = g_slist_prepend(NULL, child); + GSList *ignore; + /* No need to ignore `child`, because it has been detached already */ + ignore = NULL; child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore, &error_abort); g_slist_free(ignore); - ignore = g_slist_prepend(NULL, child); - child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore); + ignore = NULL; + child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore); g_slist_free(ignore); } bdrv_unref(bs); - bdrv_remove_empty_child(child); - *s->child = NULL; + bdrv_child_free(child); } static TransactionActionDrv bdrv_attach_child_common_drv = { @@ -2855,13 +2946,15 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs, if (ret < 0) { error_propagate(errp, local_err); - bdrv_remove_empty_child(new_child); + bdrv_child_free(new_child); return ret; } } bdrv_ref(child_bs); - bdrv_replace_child_noperm(new_child, child_bs); + bdrv_replace_child_noperm(&new_child, child_bs, true); + /* child_bs was non-NULL, so new_child must not have been freed */ + assert(new_child != NULL); *child = new_child; @@ -2913,21 +3006,14 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs, return ret; } - QLIST_INSERT_HEAD(&parent_bs->children, *child, next); - /* - * child is removed in bdrv_attach_child_common_abort(), so don't care to - * abort this change separately. - */ - return 0; } -static void bdrv_detach_child(BdrvChild *child) +static void bdrv_detach_child(BdrvChild **childp) { - BlockDriverState *old_bs = child->bs; + BlockDriverState *old_bs = (*childp)->bs; - bdrv_replace_child_noperm(child, NULL); - bdrv_remove_empty_child(child); + bdrv_replace_child_noperm(childp, NULL, true); if (old_bs) { /* @@ -3033,7 +3119,7 @@ void bdrv_root_unref_child(BdrvChild *child) BlockDriverState *child_bs; child_bs = child->bs; - bdrv_detach_child(child); + bdrv_detach_child(&child); bdrv_unref(child_bs); } @@ -4843,6 +4929,7 @@ static bool should_update_child(BdrvChild *c, BlockDriverState *to) typedef struct BdrvRemoveFilterOrCowChild { BdrvChild *child; + BlockDriverState *bs; bool is_backing; } BdrvRemoveFilterOrCowChild; @@ -4851,7 +4938,6 @@ static void bdrv_remove_filter_or_cow_child_abort(void *opaque) BdrvRemoveFilterOrCowChild *s = opaque; BlockDriverState *parent_bs = s->child->opaque; - QLIST_INSERT_HEAD(&parent_bs->children, s->child, next); if (s->is_backing) { parent_bs->backing = s->child; } else { @@ -4873,10 +4959,19 @@ static void bdrv_remove_filter_or_cow_child_commit(void *opaque) bdrv_child_free(s->child); } +static void bdrv_remove_filter_or_cow_child_clean(void *opaque) +{ + BdrvRemoveFilterOrCowChild *s = opaque; + + /* Drop the bs reference after the transaction is done */ + bdrv_unref(s->bs); + g_free(s); +} + static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = { .abort = bdrv_remove_filter_or_cow_child_abort, .commit = bdrv_remove_filter_or_cow_child_commit, - .clean = g_free, + .clean = bdrv_remove_filter_or_cow_child_clean, }; /* @@ -4887,31 +4982,41 @@ static void bdrv_remove_file_or_backing_child(BlockDriverState *bs, BdrvChild *child, Transaction *tran) { + BdrvChild **childp; BdrvRemoveFilterOrCowChild *s; - assert(child == bs->backing || child == bs->file); - if (!child) { return; } + /* + * Keep a reference to @bs so @childp will stay valid throughout the + * transaction (required by bdrv_replace_child_tran()) + */ + bdrv_ref(bs); + if (child == bs->backing) { + childp = &bs->backing; + } else if (child == bs->file) { + childp = &bs->file; + } else { + g_assert_not_reached(); + } + if (child->bs) { - bdrv_replace_child_tran(child, NULL, tran); + /* + * Pass free_empty_child=false, we will free the child in + * bdrv_remove_filter_or_cow_child_commit() + */ + bdrv_replace_child_tran(childp, NULL, tran, false); } s = g_new(BdrvRemoveFilterOrCowChild, 1); *s = (BdrvRemoveFilterOrCowChild) { .child = child, - .is_backing = (child == bs->backing), + .bs = bs, + .is_backing = (childp == &bs->backing), }; tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s); - - QLIST_SAFE_REMOVE(child, next); - if (s->is_backing) { - bs->backing = NULL; - } else { - bs->file = NULL; - } } /* @@ -4932,6 +5037,8 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, { BdrvChild *c, *next; + assert(to != NULL); + QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { assert(c->bs == from); if (!should_update_child(c, to)) { @@ -4947,7 +5054,12 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, c->name, from->node_name); return -EPERM; } - bdrv_replace_child_tran(c, to, tran); + + /* + * Passing a pointer to the local variable @c is fine here, because + * @to is not NULL, and so &c will not be attached to the transaction. + */ + bdrv_replace_child_tran(&c, to, tran, true); } return 0; @@ -4962,6 +5074,8 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, * * With @detach_subchain=true @to must be in a backing chain of @from. In this * case backing link of the cow-parent of @to is removed. + * + * @to must not be NULL. */ static int bdrv_replace_node_common(BlockDriverState *from, BlockDriverState *to, @@ -4974,6 +5088,8 @@ static int bdrv_replace_node_common(BlockDriverState *from, BlockDriverState *to_cow_parent = NULL; int ret; + assert(to != NULL); + if (detach_subchain) { assert(bdrv_chain_contains(from, to)); assert(from != to); @@ -5029,6 +5145,9 @@ out: return ret; } +/** + * Replace node @from by @to (where neither may be NULL). + */ int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, Error **errp) { @@ -5096,7 +5215,9 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, bdrv_drained_begin(old_bs); bdrv_drained_begin(new_bs); - bdrv_replace_child_tran(child, new_bs, tran); + bdrv_replace_child_tran(&child, new_bs, tran, true); + /* @new_bs must have been non-NULL, so @child must not have been freed */ + assert(child != NULL); found = g_hash_table_new(NULL, NULL); refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs); diff --git a/block/file-posix.c b/block/file-posix.c index 7a27c83060..b283093e5b 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -167,6 +167,7 @@ typedef struct BDRVRawState { int page_cache_inconsistent; /* errno from fdatasync failure */ bool has_fallocate; bool needs_alignment; + bool force_alignment; bool drop_cache; bool check_cache_dropped; struct { @@ -351,6 +352,17 @@ static bool dio_byte_aligned(int fd) return false; } +static bool raw_needs_alignment(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + + if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) { + return true; + } + + return s->force_alignment; +} + /* Check if read is allowed with given memory buffer and length. * * This function is used to check O_DIRECT memory buffer and request alignment. @@ -728,9 +740,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, s->has_discard = true; s->has_write_zeroes = true; - if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) { - s->needs_alignment = true; - } if (fstat(s->fd, &st) < 0) { ret = -errno; @@ -784,9 +793,10 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, * so QEMU makes sure all IO operations on the device are aligned * to sector size, or else FreeBSD will reject them with EINVAL. */ - s->needs_alignment = true; + s->force_alignment = true; } #endif + s->needs_alignment = raw_needs_alignment(bs); #ifdef CONFIG_XFS if (platform_test_xfs_fd(s->fd)) { @@ -1251,7 +1261,9 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) BDRVRawState *s = bs->opaque; struct stat st; + s->needs_alignment = raw_needs_alignment(bs); raw_probe_alignment(bs, s->fd, errp); + bs->bl.min_mem_alignment = s->buf_align; bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size); diff --git a/block/nvme.c b/block/nvme.c index e4f336d79c..fa360b9b3c 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -206,8 +206,9 @@ static void nvme_free_req_queue_cb(void *opaque) NVMeQueuePair *q = opaque; qemu_mutex_lock(&q->lock); - while (qemu_co_enter_next(&q->free_req_queue, &q->lock)) { - /* Retry all pending requests */ + while (q->free_req_head != -1 && + qemu_co_enter_next(&q->free_req_queue, &q->lock)) { + /* Retry waiting requests */ } qemu_mutex_unlock(&q->lock); } diff --git a/block/stream.c b/block/stream.c index 97bee482dc..e45113aed6 100644 --- a/block/stream.c +++ b/block/stream.c @@ -54,8 +54,8 @@ static int stream_prepare(Job *job) { StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs); - BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base); - BlockDriverState *unfiltered_base = bdrv_skip_filters(base); + BlockDriverState *base; + BlockDriverState *unfiltered_base; Error *local_err = NULL; int ret = 0; @@ -63,6 +63,9 @@ static int stream_prepare(Job *job) bdrv_cor_filter_drop(s->cor_filter_bs); s->cor_filter_bs = NULL; + base = bdrv_filter_or_cow_bs(s->above_base); + unfiltered_base = bdrv_skip_filters(base); + if (bdrv_cow_child(unfiltered_bs)) { const char *base_id = NULL, *base_fmt = NULL; if (unfiltered_base) { diff --git a/block/vvfat.c b/block/vvfat.c index 05e78e3c27..5dacc6cfac 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1279,8 +1279,18 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, qemu_co_mutex_init(&s->lock); - ret = 0; + qemu_opts_del(opts); + + return 0; + fail: + g_free(s->qcow_filename); + s->qcow_filename = NULL; + g_free(s->cluster_buffer); + s->cluster_buffer = NULL; + g_free(s->used_clusters); + s->used_clusters = NULL; + qemu_opts_del(opts); return ret; } @@ -3118,7 +3128,7 @@ static int enable_write_target(BlockDriverState *bs, Error **errp) int size = sector2cluster(s, s->sector_count); QDict *options; - s->used_clusters = calloc(size, 1); + s->used_clusters = g_malloc0(size); array_init(&(s->commits), sizeof(commit_t)); @@ -3166,8 +3176,6 @@ static int enable_write_target(BlockDriverState *bs, Error **errp) return 0; err: - g_free(s->qcow_filename); - s->qcow_filename = NULL; return ret; } diff --git a/chardev/wctablet.c b/chardev/wctablet.c index 95e005f5a5..e8b292c43c 100644 --- a/chardev/wctablet.c +++ b/chardev/wctablet.c @@ -320,7 +320,6 @@ static void wctablet_chr_finalize(Object *obj) TabletChardev *tablet = WCTABLET_CHARDEV(obj); qemu_input_handler_unregister(tablet->hs); - g_free(tablet); } static void wctablet_chr_open(Chardev *chr, diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt index 8d6d53a5a2..8ec653f81c 100644 --- a/docs/COLO-FT.txt +++ b/docs/COLO-FT.txt @@ -209,9 +209,9 @@ children.0=childs0 \ 3. On Secondary VM's QEMU monitor, issue command -{'execute':'qmp_capabilities'} -{'execute': 'nbd-server-start', 'arguments': {'addr': {'type': 'inet', 'data': {'host': '0.0.0.0', 'port': '9999'} } } } -{'execute': 'nbd-server-add', 'arguments': {'device': 'parent0', 'writable': true } } +{"execute":"qmp_capabilities"} +{"execute": "nbd-server-start", "arguments": {"addr": {"type": "inet", "data": {"host": "0.0.0.0", "port": "9999"} } } } +{"execute": "nbd-server-add", "arguments": {"device": "parent0", "writable": true } } Note: a. The qmp command nbd-server-start and nbd-server-add must be run @@ -222,11 +222,11 @@ Note: will be merged into the parent disk on failover. 4. On Primary VM's QEMU monitor, issue command: -{'execute':'qmp_capabilities'} -{'execute': 'human-monitor-command', 'arguments': {'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0'}} -{'execute': 'x-blockdev-change', 'arguments':{'parent': 'colo-disk0', 'node': 'replication0' } } -{'execute': 'migrate-set-capabilities', 'arguments': {'capabilities': [ {'capability': 'x-colo', 'state': true } ] } } -{'execute': 'migrate', 'arguments': {'uri': 'tcp:127.0.0.2:9998' } } +{"execute":"qmp_capabilities"} +{"execute": "human-monitor-command", "arguments": {"command-line": "drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0"}} +{"execute": "x-blockdev-change", "arguments":{"parent": "colo-disk0", "node": "replication0" } } +{"execute": "migrate-set-capabilities", "arguments": {"capabilities": [ {"capability": "x-colo", "state": true } ] } } +{"execute": "migrate", "arguments": {"uri": "tcp:127.0.0.2:9998" } } Note: a. There should be only one NBD Client for each primary disk. @@ -249,59 +249,59 @@ if you want to resume the replication, follow "Secondary resume replication" == Primary Failover == The Secondary died, resume on the Primary -{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'child': 'children.1'} } -{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_del replication0' } } -{'execute': 'object-del', 'arguments':{ 'id': 'comp0' } } -{'execute': 'object-del', 'arguments':{ 'id': 'iothread1' } } -{'execute': 'object-del', 'arguments':{ 'id': 'm0' } } -{'execute': 'object-del', 'arguments':{ 'id': 'redire0' } } -{'execute': 'object-del', 'arguments':{ 'id': 'redire1' } } -{'execute': 'x-colo-lost-heartbeat' } +{"execute": "x-blockdev-change", "arguments":{ "parent": "colo-disk0", "child": "children.1"} } +{"execute": "human-monitor-command", "arguments":{ "command-line": "drive_del replication0" } } +{"execute": "object-del", "arguments":{ "id": "comp0" } } +{"execute": "object-del", "arguments":{ "id": "iothread1" } } +{"execute": "object-del", "arguments":{ "id": "m0" } } +{"execute": "object-del", "arguments":{ "id": "redire0" } } +{"execute": "object-del", "arguments":{ "id": "redire1" } } +{"execute": "x-colo-lost-heartbeat" } == Secondary Failover == The Primary died, resume on the Secondary and prepare to become the new Primary -{'execute': 'nbd-server-stop'} -{'execute': 'x-colo-lost-heartbeat'} +{"execute": "nbd-server-stop"} +{"execute": "x-colo-lost-heartbeat"} -{'execute': 'object-del', 'arguments':{ 'id': 'f2' } } -{'execute': 'object-del', 'arguments':{ 'id': 'f1' } } -{'execute': 'chardev-remove', 'arguments':{ 'id': 'red1' } } -{'execute': 'chardev-remove', 'arguments':{ 'id': 'red0' } } +{"execute": "object-del", "arguments":{ "id": "f2" } } +{"execute": "object-del", "arguments":{ "id": "f1" } } +{"execute": "chardev-remove", "arguments":{ "id": "red1" } } +{"execute": "chardev-remove", "arguments":{ "id": "red0" } } -{'execute': 'chardev-add', 'arguments':{ 'id': 'mirror0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '0.0.0.0', 'port': '9003' } }, 'server': true } } } } -{'execute': 'chardev-add', 'arguments':{ 'id': 'compare1', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '0.0.0.0', 'port': '9004' } }, 'server': true } } } } -{'execute': 'chardev-add', 'arguments':{ 'id': 'compare0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9001' } }, 'server': true } } } } -{'execute': 'chardev-add', 'arguments':{ 'id': 'compare0-0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9001' } }, 'server': false } } } } -{'execute': 'chardev-add', 'arguments':{ 'id': 'compare_out', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9005' } }, 'server': true } } } } -{'execute': 'chardev-add', 'arguments':{ 'id': 'compare_out0', 'backend': {'type': 'socket', 'data': {'addr': { 'type': 'inet', 'data': { 'host': '127.0.0.1', 'port': '9005' } }, 'server': false } } } } +{"execute": "chardev-add", "arguments":{ "id": "mirror0", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "0.0.0.0", "port": "9003" } }, "server": true } } } } +{"execute": "chardev-add", "arguments":{ "id": "compare1", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "0.0.0.0", "port": "9004" } }, "server": true } } } } +{"execute": "chardev-add", "arguments":{ "id": "compare0", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "127.0.0.1", "port": "9001" } }, "server": true } } } } +{"execute": "chardev-add", "arguments":{ "id": "compare0-0", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "127.0.0.1", "port": "9001" } }, "server": false } } } } +{"execute": "chardev-add", "arguments":{ "id": "compare_out", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "127.0.0.1", "port": "9005" } }, "server": true } } } } +{"execute": "chardev-add", "arguments":{ "id": "compare_out0", "backend": {"type": "socket", "data": {"addr": { "type": "inet", "data": { "host": "127.0.0.1", "port": "9005" } }, "server": false } } } } == Primary resume replication == Resume replication after new Secondary is up. Start the new Secondary (Steps 2 and 3 above), then on the Primary: -{'execute': 'drive-mirror', 'arguments':{ 'device': 'colo-disk0', 'job-id': 'resync', 'target': 'nbd://127.0.0.2:9999/parent0', 'mode': 'existing', 'format': 'raw', 'sync': 'full'} } +{"execute": "drive-mirror", "arguments":{ "device": "colo-disk0", "job-id": "resync", "target": "nbd://127.0.0.2:9999/parent0", "mode": "existing", "format": "raw", "sync": "full"} } Wait until disk is synced, then: -{'execute': 'stop'} -{'execute': 'block-job-cancel', 'arguments':{ 'device': 'resync'} } +{"execute": "stop"} +{"execute": "block-job-cancel", "arguments":{ "device": "resync"} } -{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0'}} -{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'node': 'replication0' } } +{"execute": "human-monitor-command", "arguments":{ "command-line": "drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.2,file.port=9999,file.export=parent0,node-name=replication0"}} +{"execute": "x-blockdev-change", "arguments":{ "parent": "colo-disk0", "node": "replication0" } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-mirror', 'id': 'm0', 'props': { 'netdev': 'hn0', 'queue': 'tx', 'outdev': 'mirror0' } } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire0', 'props': { 'netdev': 'hn0', 'queue': 'rx', 'indev': 'compare_out' } } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire1', 'props': { 'netdev': 'hn0', 'queue': 'rx', 'outdev': 'compare0' } } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'iothread', 'id': 'iothread1' } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'colo-compare', 'id': 'comp0', 'props': { 'primary_in': 'compare0-0', 'secondary_in': 'compare1', 'outdev': 'compare_out0', 'iothread': 'iothread1' } } } +{"execute": "object-add", "arguments":{ "qom-type": "filter-mirror", "id": "m0", "netdev": "hn0", "queue": "tx", "outdev": "mirror0" } } +{"execute": "object-add", "arguments":{ "qom-type": "filter-redirector", "id": "redire0", "netdev": "hn0", "queue": "rx", "indev": "compare_out" } } +{"execute": "object-add", "arguments":{ "qom-type": "filter-redirector", "id": "redire1", "netdev": "hn0", "queue": "rx", "outdev": "compare0" } } +{"execute": "object-add", "arguments":{ "qom-type": "iothread", "id": "iothread1" } } +{"execute": "object-add", "arguments":{ "qom-type": "colo-compare", "id": "comp0", "primary_in": "compare0-0", "secondary_in": "compare1", "outdev": "compare_out0", "iothread": "iothread1" } } -{'execute': 'migrate-set-capabilities', 'arguments':{ 'capabilities': [ {'capability': 'x-colo', 'state': true } ] } } -{'execute': 'migrate', 'arguments':{ 'uri': 'tcp:127.0.0.2:9998' } } +{"execute": "migrate-set-capabilities", "arguments":{ "capabilities": [ {"capability": "x-colo", "state": true } ] } } +{"execute": "migrate", "arguments":{ "uri": "tcp:127.0.0.2:9998" } } Note: If this Primary previously was a Secondary, then we need to insert the filters before the filter-rewriter by using the -"'insert': 'before', 'position': 'id=rew0'" Options. See below. +""insert": "before", "position": "id=rew0"" Options. See below. == Secondary resume replication == Become Primary and resume replication after new Secondary is up. Note @@ -309,23 +309,23 @@ that now 127.0.0.1 is the Secondary and 127.0.0.2 is the Primary. Start the new Secondary (Steps 2 and 3 above, but with primary_ip=127.0.0.2), then on the old Secondary: -{'execute': 'drive-mirror', 'arguments':{ 'device': 'colo-disk0', 'job-id': 'resync', 'target': 'nbd://127.0.0.1:9999/parent0', 'mode': 'existing', 'format': 'raw', 'sync': 'full'} } +{"execute": "drive-mirror", "arguments":{ "device": "colo-disk0", "job-id": "resync", "target": "nbd://127.0.0.1:9999/parent0", "mode": "existing", "format": "raw", "sync": "full"} } Wait until disk is synced, then: -{'execute': 'stop'} -{'execute': 'block-job-cancel', 'arguments':{ 'device': 'resync' } } +{"execute": "stop"} +{"execute": "block-job-cancel", "arguments":{ "device": "resync" } } -{'execute': 'human-monitor-command', 'arguments':{ 'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.1,file.port=9999,file.export=parent0,node-name=replication0'}} -{'execute': 'x-blockdev-change', 'arguments':{ 'parent': 'colo-disk0', 'node': 'replication0' } } +{"execute": "human-monitor-command", "arguments":{ "command-line": "drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=127.0.0.1,file.port=9999,file.export=parent0,node-name=replication0"}} +{"execute": "x-blockdev-change", "arguments":{ "parent": "colo-disk0", "node": "replication0" } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-mirror', 'id': 'm0', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'tx', 'outdev': 'mirror0' } } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire0', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'rx', 'indev': 'compare_out' } } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'filter-redirector', 'id': 'redire1', 'props': { 'insert': 'before', 'position': 'id=rew0', 'netdev': 'hn0', 'queue': 'rx', 'outdev': 'compare0' } } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'iothread', 'id': 'iothread1' } } -{'execute': 'object-add', 'arguments':{ 'qom-type': 'colo-compare', 'id': 'comp0', 'props': { 'primary_in': 'compare0-0', 'secondary_in': 'compare1', 'outdev': 'compare_out0', 'iothread': 'iothread1' } } } +{"execute": "object-add", "arguments":{ "qom-type": "filter-mirror", "id": "m0", "insert": "before", "position": "id=rew0", "netdev": "hn0", "queue": "tx", "outdev": "mirror0" } } +{"execute": "object-add", "arguments":{ "qom-type": "filter-redirector", "id": "redire0", "insert": "before", "position": "id=rew0", "netdev": "hn0", "queue": "rx", "indev": "compare_out" } } +{"execute": "object-add", "arguments":{ "qom-type": "filter-redirector", "id": "redire1", "insert": "before", "position": "id=rew0", "netdev": "hn0", "queue": "rx", "outdev": "compare0" } } +{"execute": "object-add", "arguments":{ "qom-type": "iothread", "id": "iothread1" } } +{"execute": "object-add", "arguments":{ "qom-type": "colo-compare", "id": "comp0", "primary_in": "compare0-0", "secondary_in": "compare1", "outdev": "compare_out0", "iothread": "iothread1" } } -{'execute': 'migrate-set-capabilities', 'arguments':{ 'capabilities': [ {'capability': 'x-colo', 'state': true } ] } } -{'execute': 'migrate', 'arguments':{ 'uri': 'tcp:127.0.0.1:9998' } } +{"execute": "migrate-set-capabilities", "arguments":{ "capabilities": [ {"capability": "x-colo", "state": true } ] } } +{"execute": "migrate", "arguments":{ "uri": "tcp:127.0.0.1:9998" } } == TODO == 1. Support shared storage. diff --git a/docs/about/build-platforms.rst b/docs/about/build-platforms.rst index bcb1549721..c29a4b8fe6 100644 --- a/docs/about/build-platforms.rst +++ b/docs/about/build-platforms.rst @@ -54,10 +54,12 @@ Those hosts are officially supported, with various accelerators: * - x86 - hax, hvf (64 bit only), kvm, nvmm, tcg, whpx (64 bit only), xen -Other host architectures are not supported. It is possible to build QEMU on an -unsupported host architecture using the configure ``--enable-tcg-interpreter`` -option to enable the experimental TCI support, but note that this is very slow -and is not recommended. +Other host architectures are not supported. It is possible to build QEMU system +emulation on an unsupported host architecture using the configure +``--enable-tcg-interpreter`` option to enable the TCI support, but note that +this is very slow and is not recommended for normal use. QEMU user emulation +requires host-specific support for signal handling, therefore TCI won't help +on unsupported host architectures. Non-supported architectures may be removed in the future following the :ref:`deprecation process<Deprecated features>`. diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 5e514fb443..ff7488cb63 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -192,6 +192,12 @@ as short-form boolean values, and passed to plugins as ``arg_name=on``. However, short-form booleans are deprecated and full explicit ``arg_name=on`` form is preferred. +``-drive if=none`` for the sifive_u OTP device (since 6.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Using ``-drive if=none`` to configure the OTP device of the sifive_u +RISC-V machine is deprecated. Use ``-drive if=pflash`` instead. + QEMU Machine Protocol (QMP) commands ------------------------------------ @@ -239,6 +245,31 @@ single ``bitmap``, the new ``block-export-add`` uses a list of ``bitmaps``. Member ``values`` in return value elements with meta-type ``enum`` is deprecated. Use ``members`` instead. +``drive-backup`` (since 6.2) +'''''''''''''''''''''''''''' + +Use ``blockdev-backup`` in combination with ``blockdev-add`` instead. +This change primarily separates the creation/opening process of the backup +target with explicit, separate steps. ``blockdev-backup`` uses mostly the +same arguments as ``drive-backup``, except the ``format`` and ``mode`` +options are removed in favor of using explicit ``blockdev-create`` and +``blockdev-add`` calls. See :doc:`/interop/live-block-operations` for +details. + +Incorrectly typed ``device_add`` arguments (since 6.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Due to shortcomings in the internal implementation of ``device_add``, QEMU +incorrectly accepts certain invalid arguments: Any object or list arguments are +silently ignored. Other argument types are not checked, but an implicit +conversion happens, so that e.g. string values can be assigned to integer +device properties or vice versa. + +This is a bug in QEMU that will be fixed in the future so that previously +accepted incorrect commands will return an error. Users should make sure that +all arguments passed to ``device_add`` are consistent with the documented +property types. + System accelerators ------------------- diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst index 9d0d90c90d..d42c3341de 100644 --- a/docs/about/removed-features.rst +++ b/docs/about/removed-features.rst @@ -658,8 +658,8 @@ enforce that any failure to open the backing image (including if the backing file is missing or an incorrect format was specified) is an error when ``-u`` is not used. -qemu-img amend to adjust backing file (removed in 6.1) -'''''''''''''''''''''''''''''''''''''''''''''''''''''' +``qemu-img amend`` to adjust backing file (removed in 6.1) +'''''''''''''''''''''''''''''''''''''''''''''''''''''''''' The use of ``qemu-img amend`` to modify the name or format of a qcow2 backing image was never fully documented or tested, and interferes @@ -670,8 +670,8 @@ backing chain should be performed with ``qemu-img rebase -u`` either before or after the remaining changes being performed by amend, as appropriate. -qemu-img backing file without format (removed in 6.1) -''''''''''''''''''''''''''''''''''''''''''''''''''''' +``qemu-img`` backing file without format (removed in 6.1) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''' The use of ``qemu-img create``, ``qemu-img rebase``, or ``qemu-img convert`` to create or modify an image that depends on a backing file diff --git a/docs/block-replication.txt b/docs/block-replication.txt index 108e9166a8..b0f23761c6 100644 --- a/docs/block-replication.txt +++ b/docs/block-replication.txt @@ -79,7 +79,7 @@ Primary | || Secondary disk <--------- hidden-disk 5 <--------- || | | || | | || '-------------------------' - || drive-backup sync=none 6 + || blockdev-backup sync=none 6 1) The disk on the primary is represented by a block device with two children, providing replication between a primary disk and the host that @@ -101,7 +101,7 @@ should support bdrv_make_empty() and backing file. that is modified by the primary VM. It should also start as an empty disk, and the driver supports bdrv_make_empty() and backing file. -6) The drive-backup job (sync=none) is run to allow hidden-disk to buffer +6) The blockdev-backup job (sync=none) is run to allow hidden-disk to buffer any state that would otherwise be lost by the speculative write-through of the NBD server into the secondary disk. So before block replication, the primary disk and secondary disk should contain the same data. @@ -156,15 +156,15 @@ Primary: children.0.driver=raw Run qmp command in primary qemu: - { 'execute': 'human-monitor-command', - 'arguments': { - 'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=xxxx,file.port=xxxx,file.export=colo1,node-name=nbd_client1' + { "execute": "human-monitor-command", + "arguments": { + "command-line": "drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=xxxx,file.port=xxxx,file.export=colo1,node-name=nbd_client1" } } - { 'execute': 'x-blockdev-change', - 'arguments': { - 'parent': 'colo1', - 'node': 'nbd_client1' + { "execute": "x-blockdev-change", + "arguments": { + "parent": "colo1", + "node": "nbd_client1" } } Note: @@ -189,21 +189,21 @@ Secondary: vote-threshold=1,children.0=childs1 Then run qmp command in secondary qemu: - { 'execute': 'nbd-server-start', - 'arguments': { - 'addr': { - 'type': 'inet', - 'data': { - 'host': 'xxx', - 'port': 'xxx' + { "execute": "nbd-server-start", + "arguments": { + "addr": { + "type": "inet", + "data": { + "host": "xxx", + "port": "xxx" } } } } - { 'execute': 'nbd-server-add', - 'arguments': { - 'device': 'colo1', - 'writable': true + { "execute": "nbd-server-add", + "arguments": { + "device": "colo1", + "writable": true } } @@ -223,22 +223,22 @@ After Failover: Primary: The secondary host is down, so we should run the following qmp command to remove the nbd child from the quorum: - { 'execute': 'x-blockdev-change', - 'arguments': { - 'parent': 'colo1', - 'child': 'children.1' + { "execute": "x-blockdev-change", + "arguments": { + "parent": "colo1", + "child": "children.1" } } - { 'execute': 'human-monitor-command', - 'arguments': { - 'command-line': 'drive_del xxxx' + { "execute": "human-monitor-command", + "arguments": { + "command-line": "drive_del xxxx" } } Note: there is no qmp command to remove the blockdev now Secondary: The primary host is down, so we should do the following thing: - { 'execute': 'nbd-server-stop' } + { "execute": "nbd-server-stop" } Promote Secondary to Primary: see COLO-FT.txt diff --git a/docs/devel/build-system.rst b/docs/devel/build-system.rst index 7a83f5fc0d..431caba7aa 100644 --- a/docs/devel/build-system.rst +++ b/docs/devel/build-system.rst @@ -121,11 +121,11 @@ process for: 1) executables, which include: - - Tools - qemu-img, qemu-nbd, qga (guest agent), etc + - Tools - ``qemu-img``, ``qemu-nbd``, ``qga`` (guest agent), etc - - System emulators - qemu-system-$ARCH + - System emulators - ``qemu-system-$ARCH`` - - Userspace emulators - qemu-$ARCH + - Userspace emulators - ``qemu-$ARCH`` - Unit tests diff --git a/docs/devel/ci-jobs.rst.inc b/docs/devel/ci-jobs.rst.inc index 277975e4ad..db3f571d5f 100644 --- a/docs/devel/ci-jobs.rst.inc +++ b/docs/devel/ci-jobs.rst.inc @@ -49,3 +49,10 @@ S390X_RUNNER_AVAILABLE If you've got access to an IBM Z host that can be used as a gitlab-CI runner, you can set this variable to enable the tests that require this kind of host. The runner should be tagged with "s390x". + +CENTOS_STREAM_8_x86_64_RUNNER_AVAILABLE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If you've got access to a CentOS Stream 8 x86_64 host that can be +used as a gitlab-CI runner, you can set this variable to enable the +tests that require this kind of host. The runner should be tagged with +both "centos_stream_8" and "x86_64". diff --git a/docs/devel/index.rst b/docs/devel/index.rst index 7c25177c5d..afd937535e 100644 --- a/docs/devel/index.rst +++ b/docs/devel/index.rst @@ -45,3 +45,6 @@ modifying QEMU's source code. vfio-migration qapi-code-gen writing-monitor-commands + trivial-patches + submitting-a-patch + submitting-a-pull-request diff --git a/docs/devel/modules.rst b/docs/devel/modules.rst index 066f347b89..8e999c4fa4 100644 --- a/docs/devel/modules.rst +++ b/docs/devel/modules.rst @@ -1,5 +1,5 @@ ============ -Qemu modules +QEMU modules ============ .. kernel-doc:: include/qemu/module.h diff --git a/docs/devel/multi-process.rst b/docs/devel/multi-process.rst index e5758a79ab..e4801751f2 100644 --- a/docs/devel/multi-process.rst +++ b/docs/devel/multi-process.rst @@ -187,9 +187,9 @@ desired, in which the emulation application should only be allowed to access the files or devices the VM it's running on behalf of can access. #### qemu-io model -Qemu-io is a test harness used to test changes to the QEMU block backend -object code. (e.g., the code that implements disk images for disk driver -emulation) Qemu-io is not a device emulation application per se, but it +``qemu-io`` is a test harness used to test changes to the QEMU block backend +object code (e.g., the code that implements disk images for disk driver +emulation). ``qemu-io`` is not a device emulation application per se, but it does compile the QEMU block objects into a separate binary from the main QEMU one. This could be useful for disk device emulation, since its emulation applications will need to include the QEMU block objects. @@ -641,7 +641,7 @@ the CPU that issued the MMIO. +==========+========================+ | rid | range MMIO is within | +----------+------------------------+ -| offset | offset withing *rid* | +| offset | offset within *rid* | +----------+------------------------+ | type | e.g., load or store | +----------+------------------------+ diff --git a/docs/devel/multi-thread-tcg.rst b/docs/devel/multi-thread-tcg.rst index 5b446ee08b..c9541a7b20 100644 --- a/docs/devel/multi-thread-tcg.rst +++ b/docs/devel/multi-thread-tcg.rst @@ -228,7 +228,7 @@ Emulated hardware state Currently thanks to KVM work any access to IO memory is automatically protected by the global iothread mutex, also known as the BQL (Big -Qemu Lock). Any IO region that doesn't use global mutex is expected to +QEMU Lock). Any IO region that doesn't use global mutex is expected to do its own locking. However IO memory isn't the only way emulated hardware state can be diff --git a/docs/devel/qapi-code-gen.rst b/docs/devel/qapi-code-gen.rst index 38f2d7aad3..a3b5473089 100644 --- a/docs/devel/qapi-code-gen.rst +++ b/docs/devel/qapi-code-gen.rst @@ -956,15 +956,16 @@ definition must have documentation. Definition documentation starts with a line naming the definition, followed by an optional overview, a description of each argument (for commands and events), member (for structs and unions), branch (for -alternates), or value (for enums), and finally optional tagged -sections. +alternates), or value (for enums), a description of each feature (if +any), and finally optional tagged sections. -Descriptions of arguments can span multiple lines. The description -text can start on the line following the '\@argname:', in which case it -must not be indented at all. It can also start on the same line as -the '\@argname:'. In this case if it spans multiple lines then second -and subsequent lines must be indented to line up with the first -character of the first line of the description:: +The description of an argument or feature 'name' starts with +'\@name:'. The description text can start on the line following the +'\@name:', in which case it must not be indented at all. It can also +start on the same line as the '\@name:'. In this case if it spans +multiple lines then second and subsequent lines must be indented to +line up with the first character of the first line of the +description:: # @argone: # This is a two line description @@ -986,6 +987,12 @@ The number of spaces between the ':' and the text is not significant. Extensions added after the definition was first released carry a '(since x.y.z)' comment. +The feature descriptions must be preceded by a line "Features:", like +this:: + + # Features: + # @feature: Description text + A tagged section starts with one of the following words: "Note:"/"Notes:", "Since:", "Example"/"Examples", "Returns:", "TODO:". The section ends with the start of a new section. @@ -1000,12 +1007,6 @@ multiline argument descriptions. A 'Since: x.y.z' tagged section lists the release that introduced the definition. -The text of a section can start on a new line, in -which case it must not be indented at all. It can also start -on the same line as the 'Note:', 'Returns:', etc tag. In this -case if it spans multiple lines then second and subsequent -lines must be indented to match the first. - An 'Example' or 'Examples' section is automatically rendered entirely as literal fixed-width text. In other sections, the text is formatted, and rST markup can be used. diff --git a/docs/devel/qgraph.rst b/docs/devel/qgraph.rst index db44d71002..43342d9d65 100644 --- a/docs/devel/qgraph.rst +++ b/docs/devel/qgraph.rst @@ -14,7 +14,7 @@ support that device. Using only libqos APIs, the test has to manually take care of covering all the setups, and build the correct command line. -This also introduces backward compability issues: if a device/driver command +This also introduces backward compatibility issues: if a device/driver command line name is changed, all tests that use that will not work properly anymore and need to be adjusted. diff --git a/docs/devel/stable-process.rst b/docs/devel/stable-process.rst index e541b983fa..c21fb86645 100644 --- a/docs/devel/stable-process.rst +++ b/docs/devel/stable-process.rst @@ -1,3 +1,5 @@ +.. _stable-process: + QEMU and the stable process =========================== diff --git a/docs/devel/style.rst b/docs/devel/style.rst index 260e3263fa..9c5c0fffd9 100644 --- a/docs/devel/style.rst +++ b/docs/devel/style.rst @@ -1,3 +1,5 @@ +.. _coding-style: + ================= QEMU Coding Style ================= @@ -686,7 +688,7 @@ Rationale: hex numbers are hard to read in logs when there is no 0x prefix, especially when (occasionally) the representation doesn't contain any letters and especially in one line with other decimal numbers. Number groups are allowed to not use '0x' because for some things notations like %x.%x.%x are used not -only in Qemu. Also dumping raw data bytes with '0x' is less readable. +only in QEMU. Also dumping raw data bytes with '0x' is less readable. '#' printf flag --------------- diff --git a/docs/devel/submitting-a-patch.rst b/docs/devel/submitting-a-patch.rst new file mode 100644 index 0000000000..e51259eb9c --- /dev/null +++ b/docs/devel/submitting-a-patch.rst @@ -0,0 +1,562 @@ +.. _submitting-a-patch: + +Submitting a Patch +================== + +QEMU welcomes contributions of code (either fixing bugs or adding new +functionality). However, we get a lot of patches, and so we have some +guidelines about submitting patches. If you follow these, you'll help +make our task of code review easier and your patch is likely to be +committed faster. + +This page seems very long, so if you are only trying to post a quick +one-shot fix, the bare minimum we ask is that: + +- You **must** provide a Signed-off-by: line (this is a hard + requirement because it's how you say "I'm legally okay to contribute + this and happy for it to go into QEMU", modeled after the `Linux kernel + <http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/SubmittingPatches?id=f6f94e2ab1b33f0082ac22d71f66385a60d8157f#n297>`__ + policy.) ``git commit -s`` or ``git format-patch -s`` will add one. +- All contributions to QEMU must be **sent as patches** to the + qemu-devel `mailing list <MailingLists>`__. Patch contributions + should not be posted on the bug tracker, posted on forums, or + externally hosted and linked to. (We have other mailing lists too, + but all patches must go to qemu-devel, possibly with a Cc: to another + list.) ``git send-email`` (`step-by-step setup + guide <https://git-send-email.io/>`__ and `hints and + tips <https://elixir.bootlin.com/linux/latest/source/Documentation/process/email-clients.rst>`__) + works best for delivering the patch without mangling it, but + attachments can be used as a last resort on a first-time submission. +- You must read replies to your message, and be willing to act on them. + Note, however, that maintainers are often willing to manually fix up + first-time contributions, since there is a learning curve involved in + making an ideal patch submission. + +You do not have to subscribe to post (list policy is to reply-to-all to +preserve CCs and keep non-subscribers in the loop on the threads they +start), although you may find it easier as a subscriber to pick up good +ideas from other posts. If you do subscribe, be prepared for a high +volume of email, often over one thousand messages in a week. The list is +moderated; first-time posts from an email address (whether or not you +subscribed) may be subject to some delay while waiting for a moderator +to whitelist your address. + +The larger your contribution is, or if you plan on becoming a long-term +contributor, then the more important the rest of this page becomes. +Reading the table of contents below should already give you an idea of +the basic requirements. Use the table of contents as a reference, and +read the parts that you have doubts about. + +.. contents:: Table of Contents + +.. _writing_your_patches: + +Writing your Patches +-------------------- + +.. _use_the_qemu_coding_style: + +Use the QEMU coding style +~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can run run *scripts/checkpatch.pl <patchfile>* before submitting to +check that you are in compliance with our coding standards. Be aware +that ``checkpatch.pl`` is not infallible, though, especially where C +preprocessor macros are involved; use some common sense too. See also: + +- :ref:`coding-style` +- `Automate a checkpatch run on + commit <https://blog.vmsplice.net/2011/03/how-to-automatically-run-checkpatchpl.html>`__ + +.. _base_patches_against_current_git_master: + +Base patches against current git master +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There's no point submitting a patch which is based on a released version +of QEMU because development will have moved on from then and it probably +won't even apply to master. We only apply selected bugfixes to release +branches and then only as backports once the code has gone into master. + +It is also okay to base patches on top of other on-going work that is +not yet part of the git master branch. To aid continuous integration +tools, such as `patchew <http://patchew.org/QEMU/>`__, you should `add a +tag <https://lists.gnu.org/archive/html/qemu-devel/2017-08/msg01288.html>`__ +line ``Based-on: $MESSAGE_ID`` to your cover letter to make the series +dependency obvious. + +.. _split_up_long_patches: + +Split up long patches +~~~~~~~~~~~~~~~~~~~~~ + +Split up longer patches into a patch series of logical code changes. +Each change should compile and execute successfully. For instance, don't +add a file to the makefile in patch one and then add the file itself in +patch two. (This rule is here so that people can later use tools like +`git bisect <http://git-scm.com/docs/git-bisect>`__ without hitting +points in the commit history where QEMU doesn't work for reasons +unrelated to the bug they're chasing.) Put documentation first, not +last, so that someone reading the series can do a clean-room evaluation +of the documentation, then validate that the code matched the +documentation. A commit message that mentions "Also, ..." is often a +good candidate for splitting into multiple patches. For more thoughts on +properly splitting patches and writing good commit messages, see `this +advice from +OpenStack <https://wiki.openstack.org/wiki/GitCommitMessages>`__. + +.. _make_code_motion_patches_easy_to_review: + +Make code motion patches easy to review +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If a series requires large blocks of code motion, there are tricks for +making the refactoring easier to review. Split up the series so that +semantic changes (or even function renames) are done in a separate patch +from the raw code motion. Use a one-time setup of ``git config +diff.renames true;`` ``git config diff.algorithm patience`` (refer to +`git-config <http://git-scm.com/docs/git-config>`__). The 'diff.renames' +property ensures file rename patches will be given in a more compact +representation that focuses only on the differences across the file +rename, instead of showing the entire old file as a deletion and the new +file as an insertion. Meanwhile, the 'diff.algorithm' property ensures +that extracting a non-contiguous subset of one file into a new file, but +where all extracted parts occur in the same order both before and after +the patch, will reduce churn in trying to treat unrelated ``}`` lines in +the original file as separating hunks of changes. + +Ideally, a code motion patch can be reviewed by doing:: + + git format-patch --stdout -1 > patch; + diff -u <(sed -n 's/^-//p' patch) <(sed -n 's/^\+//p' patch) + +to focus on the few changes that weren't wholesale code motion. + +.. _dont_include_irrelevant_changes: + +Don't include irrelevant changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In particular, don't include formatting, coding style or whitespace +changes to bits of code that would otherwise not be touched by the +patch. (It's OK to fix coding style issues in the immediate area (few +lines) of the lines you're changing.) If you think a section of code +really does need a reindent or other large-scale style fix, submit this +as a separate patch which makes no semantic changes; don't put it in the +same patch as your bug fix. + +For smaller patches in less frequently changed areas of QEMU, consider +using the :ref:`trivial-patches` process. + +.. _write_a_meaningful_commit_message: + +Write a meaningful commit message +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Commit messages should be meaningful and should stand on their own as a +historical record of why the changes you applied were necessary or +useful. + +QEMU follows the usual standard for git commit messages: the first line +(which becomes the email subject line) is "subsystem: single line +summary of change". Whether the "single line summary of change" starts +with a capital is a matter of taste, but we prefer that the summary does +not end in a dot. Look at ``git shortlog -30`` for an idea of sample +subject lines. Then there is a blank line and a more detailed +description of the patch, another blank and your Signed-off-by: line. +Please do not use lines that are longer than 76 characters in your +commit message (so that the text still shows up nicely with "git show" +in a 80-columns terminal window). + +The body of the commit message is a good place to document why your +change is important. Don't include comments like "This is a suggestion +for fixing this bug" (they can go below the ``---`` line in the email so +they don't go into the final commit message). Make sure the body of the +commit message can be read in isolation even if the reader's mailer +displays the subject line some distance apart (that is, a body that +starts with "... so that" as a continuation of the subject line is +harder to follow). + +If your patch fixes a commit that is already in the repository, please +add an additional line with "Fixes: <at-least-12-digits-of-SHA-commit-id> +("Fixed commit subject")" below the patch description / before your +"Signed-off-by:" line in the commit message. + +If your patch fixes a bug in the gitlab bug tracker, please add a line +with "Resolves: <URL-of-the-bug>" to the commit message, too. Gitlab can +close bugs automatically once commits with the "Resolved:" keyword get +merged into the master branch of the project. And if your patch addresses +a bug in another public bug tracker, you can also use a line with +"Buglink: <URL-of-the-bug>" for reference here, too. + +Example:: + + Fixes: 14055ce53c2d ("s390x/tcg: avoid overflows in time2tod/tod2time") + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/42 + Buglink: https://bugs.launchpad.net/qemu/+bug/1804323`` + +Some other tags that are used in commit messages include "Message-Id:" +"Tested-by:", "Acked-by:", "Reported-by:", "Suggested-by:". See ``git +log`` for these keywords for example usage. + +.. _test_your_patches: + +Test your patches +~~~~~~~~~~~~~~~~~ + +Although QEMU has `continuous integration +services <Testing#Continuous_Integration>`__ that attempt to test +patches submitted to the list, it still saves everyone time if you have +already tested that your patch compiles and works. Because QEMU is such +a large project, it's okay to use configure arguments to limit what is +built for faster turnaround during your development time; but it is +still wise to also check that your patches work with a full build before +submitting a series, especially if your changes might have an unintended +effect on other areas of the code you don't normally experiment with. +See `Testing <Testing>`__ for more details on what tests are available. +Also, it is a wise idea to include a testsuite addition as part of your +patches - either to ensure that future changes won't regress your new +feature, or to add a test which exposes the bug that the rest of your +series fixes. Keeping separate commits for the test and the fix allows +reviewers to rebase the test to occur first to prove it catches the +problem, then again to place it last in the series so that bisection +doesn't land on a known-broken state. + +.. _submitting_your_patches: + +Submitting your Patches +----------------------- + +.. _if_you_cannot_send_patch_emails: + +If you cannot send patch emails +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In rare cases it may not be possible to send properly formatted patch +emails. You can use `sourcehut <https://sourcehut.org/>`__ to send your +patches to the QEMU mailing list by following these steps: + +#. Register or sign in to your account +#. Add your SSH public key in `meta \| + keys <https://meta.sr.ht/keys>`__. +#. Publish your git branch using **git push git@git.sr.ht:~USERNAME/qemu + HEAD** +#. Send your patches to the QEMU mailing list using the web-based + ``git-send-email`` UI at https://git.sr.ht/~USERNAME/qemu/send-email + +`This video +<https://spacepub.space/videos/watch/ad258d23-0ac6-488c-83fc-2bacf578de3a>`__ +shows the web-based ``git-send-email`` workflow. Documentation is +available `here +<https://man.sr.ht/git.sr.ht/#sending-patches-upstream>`__. + +.. _cc_the_relevant_maintainer: + +CC the relevant maintainer +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Send patches both to the mailing list and CC the maintainer(s) of the +files you are modifying. look in the MAINTAINERS file to find out who +that is. Also try using scripts/get_maintainer.pl from the repository +for learning the most common committers for the files you touched. + +Example:: + + ~/src/qemu/scripts/get_maintainer.pl -f hw/ide/core.c + +In fact, you can automate this, via a one-time setup of ``git config +sendemail.cccmd 'scripts/get_maintainer.pl --nogit-fallback'`` (Refer to +`git-config <http://git-scm.com/docs/git-config>`__.) + +.. _do_not_send_as_an_attachment: + +Do not send as an attachment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Send patches inline so they are easy to reply to with review comments. +Do not put patches in attachments. + +.. _use_git_format_patch: + +Use ``git format-patch`` +~~~~~~~~~~~~~~~~~~~~~~~~ + +Use the right diff format. +`git format-patch <http://git-scm.com/docs/git-format-patch>`__ will +produce patch emails in the right format (check the documentation to +find out how to drive it). You can then edit the cover letter before +using ``git send-email`` to mail the files to the mailing list. (We +recommend `git send-email <http://git-scm.com/docs/git-send-email>`__ +because mail clients often mangle patches by wrapping long lines or +messing up whitespace. Some distributions do not include send-email in a +default install of git; you may need to download additional packages, +such as 'git-email' on Fedora-based systems.) Patch series need a cover +letter, with shallow threading (all patches in the series are +in-reply-to the cover letter, but not to each other); single unrelated +patches do not need a cover letter (but if you do send a cover letter, +use ``--numbered`` so the cover and the patch have distinct subject lines). +Patches are easier to find if they start a new top-level thread, rather +than being buried in-reply-to another existing thread. + +.. _avoid_posting_large_binary_blob: + +Avoid posting large binary blob +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you added binaries to the repository, consider producing the patch +emails using ``git format-patch --no-binary`` and include a link to a +git repository to fetch the original commit. + +.. _patch_emails_must_include_a_signed_off_by_line: + +Patch emails must include a ``Signed-off-by:`` line +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For more information see `SubmittingPatches 1.12 +<http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/SubmittingPatches?id=f6f94e2ab1b33f0082ac22d71f66385a60d8157f#n297>`__. +This is vital or we will not be able to apply your patch! Please use +your real name to sign a patch (not an alias or acronym). + +If you wrote the patch, make sure your "From:" and "Signed-off-by:" +lines use the same spelling. It's okay if you subscribe or contribute to +the list via more than one address, but using multiple addresses in one +commit just confuses things. If someone else wrote the patch, git will +include a "From:" line in the body of the email (different from your +envelope From:) that will give credit to the correct author; but again, +that author's Signed-off-by: line is mandatory, with the same spelling. + +.. _include_a_meaningful_cover_letter: + +Include a meaningful cover letter +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a requirement for any series with multiple patches (as it aids +continuous integration), but optional for an isolated patch. The cover +letter explains the overall goal of such a series, and also provides a +convenient 0/N email for others to reply to the series as a whole. A +one-time setup of ``git config format.coverletter auto`` (refer to +`git-config <http://git-scm.com/docs/git-config>`__) will generate the +cover letter as needed. + +When reviewers don't know your goal at the start of their review, they +may object to early changes that don't make sense until the end of the +series, because they do not have enough context yet at that point of +their review. A series where the goal is unclear also risks a higher +number of review-fix cycles because the reviewers haven't bought into +the idea yet. If the cover letter can explain these points to the +reviewer, the process will be smoother patches will get merged faster. +Make sure your cover letter includes a diffstat of changes made over the +entire series; potential reviewers know what files they are interested +in, and they need an easy way determine if your series touches them. + +.. _use_the_rfc_tag_if_needed: + +Use the RFC tag if needed +~~~~~~~~~~~~~~~~~~~~~~~~~ + +For example, "[PATCH RFC v2]". ``git format-patch --subject-prefix=RFC`` +can help. + +"RFC" means "Request For Comments" and is a statement that you don't +intend for your patchset to be applied to master, but would like some +review on it anyway. Reasons for doing this include: + +- the patch depends on some pending kernel changes which haven't yet + been accepted, so the QEMU patch series is blocked until that + dependency has been dealt with, but is worth reviewing anyway +- the patch set is not finished yet (perhaps it doesn't cover all use + cases or work with all targets) but you want early review of a major + API change or design structure before continuing + +In general, since it's asking other people to do review work on a +patchset that the submitter themselves is saying shouldn't be applied, +it's best to: + +- use it sparingly +- in the cover letter, be clear about why a patch is an RFC, what areas + of the patchset you're looking for review on, and why reviewers + should care + +.. _consider_whether_your_patch_is_applicable_for_stable: + +Consider whether your patch is applicable for stable +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your patch fixes a severe issue or a regression, it may be applicable +for stable. In that case, consider adding ``Cc: qemu-stable@nongnu.org`` +to your patch to notify the stable maintainers. + +For more details on how QEMU's stable process works, refer to the +:ref:`stable-process` page. + +.. _participating_in_code_review: + +Participating in Code Review +---------------------------- + +All patches submitted to the QEMU project go through a code review +process before they are accepted. Some areas of code that are well +maintained may review patches quickly, lesser-loved areas of code may +have a longer delay. + +.. _stay_around_to_fix_problems_raised_in_code_review: + +Stay around to fix problems raised in code review +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Not many patches get into QEMU straight away -- it is quite common that +developers will identify bugs, or suggest a cleaner approach, or even +just point out code style issues or commit message typos. You'll need to +respond to these, and then send a second version of your patches with +the issues fixed. This takes a little time and effort on your part, but +if you don't do it then your changes will never get into QEMU. It's also +just polite -- it is quite disheartening for a developer to spend time +reviewing your code and suggesting improvements, only to find that +you're not going to do anything further and it was all wasted effort. + +When replying to comments on your patches **reply to all and not just +the sender** -- keeping discussion on the mailing list means everybody +can follow it. + +.. _pay_attention_to_review_comments: + +Pay attention to review comments +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Someone took their time to review your work, and it pays to respect that +effort; repeatedly submitting a series without addressing all comments +from the previous round tends to alienate reviewers and stall your +patch. Reviewers aren't always perfect, so it is okay if you want to +argue that your code was correct in the first place instead of blindly +doing everything the reviewer asked. On the other hand, if someone +pointed out a potential issue during review, then even if your code +turns out to be correct, it's probably a sign that you should improve +your commit message and/or comments in the code explaining why the code +is correct. + +If you fix issues that are raised during review **resend the entire +patch series** not just the one patch that was changed. This allows +maintainers to easily apply the fixed series without having to manually +identify which patches are relevant. Send the new version as a complete +fresh email or series of emails -- don't try to make it a followup to +version 1. (This helps automatic patch email handling tools distinguish +between v1 and v2 emails.) + +.. _when_resending_patches_add_a_version_tag: + +When resending patches add a version tag +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +All patches beyond the first version should include a version tag -- for +example, "[PATCH v2]". This means people can easily identify whether +they're looking at the most recent version. (The first version of a +patch need not say "v1", just [PATCH] is sufficient.) For patch series, +the version applies to the whole series -- even if you only change one +patch, you resend the entire series and mark it as "v2". Don't try to +track versions of different patches in the series separately. `git +format-patch <http://git-scm.com/docs/git-format-patch>`__ and `git +send-email <http://git-scm.com/docs/git-send-email>`__ both understand +the ``-v2`` option to make this easier. Send each new revision as a new +top-level thread, rather than burying it in-reply-to an earlier +revision, as many reviewers are not looking inside deep threads for new +patches. + +.. _include_version_history_in_patchset_revisions: + +Include version history in patchset revisions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For later versions of patches, include a summary of changes from +previous versions, but not in the commit message itself. In an email +formatted as a git patch, the commit message is the part above the ``---`` +line, and this will go into the git changelog when the patch is +committed. This part should be a self-contained description of what this +version of the patch does, written to make sense to anybody who comes +back to look at this commit in git in six months' time. The part below +the ``---`` line and above the patch proper (git format-patch puts the +diffstat here) is a good place to put remarks for people reading the +patch email, and this is where the "changes since previous version" +summary belongs. The `git-publish +<https://github.com/stefanha/git-publish>`__ script can help with +tracking a good summary across versions. Also, the `git-backport-diff +<https://github.com/codyprime/git-scripts>`__ script can help focus +reviewers on what changed between revisions. + +.. _tips_and_tricks: + +Tips and Tricks +--------------- + +.. _proper_use_of_reviewed_by_tags_can_aid_review: + +Proper use of Reviewed-by: tags can aid review +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When reviewing a large series, a reviewer can reply to some of the +patches with a Reviewed-by tag, stating that they are happy with that +patch in isolation (sometimes conditional on minor cleanup, like fixing +whitespace, that doesn't affect code content). You should then update +those commit messages by hand to include the Reviewed-by tag, so that in +the next revision, reviewers can spot which patches were already clean +from the previous round. Conversely, if you significantly modify a patch +that was previously reviewed, remove the reviewed-by tag out of the +commit message, as well as listing the changes from the previous +version, to make it easier to focus a reviewer's attention to your +changes. + +.. _if_your_patch_seems_to_have_been_ignored: + +If your patch seems to have been ignored +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your patchset has received no replies you should "ping" it after a +week or two, by sending an email as a reply-to-all to the patch mail, +including the word "ping" and ideally also a link to the page for the +patch on `patchew <https://patchew.org/QEMU/>`__ or +`lore.kernel.org <https://lore.kernel.org/qemu-devel/>`__. It's worth +double-checking for reasons why your patch might have been ignored +(forgot to CC the maintainer? annoyed people by failing to respond to +review comments on an earlier version?), but often for less-maintained +areas of QEMU patches do just slip through the cracks. If your ping is +also ignored, ping again after another week or so. As the submitter, you +are the person with the most motivation to get your patch applied, so +you have to be persistent. + +.. _is_my_patch_in: + +Is my patch in? +~~~~~~~~~~~~~~~ + +QEMU has some Continuous Integration machines that try to catch patch +submission problems as soon as possible. `patchew +<http://patchew.org/QEMU/>`__ includes a web interface for tracking the +status of various threads that have been posted to the list, and may +send you an automated mail if it detected a problem with your patch. + +Once your patch has had enough review on list, the maintainer for that +area of code will send notification to the list that they are including +your patch in a particular staging branch. Periodically, the maintainer +then takes care of :ref:`submitting-a-pull-request` +for aggregating topic branches into mainline QEMU. Generally, you do not +need to send a pull request unless you have contributed enough patches +to become a maintainer over a particular section of code. Maintainers +may further modify your commit, by resolving simple merge conflicts or +fixing minor typos pointed out during review, but will always add a +Signed-off-by line in addition to yours, indicating that it went through +their tree. Occasionally, the maintainer's pull request may hit more +difficult merge conflicts, where you may be requested to help rebase and +resolve the problems. It may take a couple of weeks between when your +patch first had a positive review to when it finally lands in qemu.git; +release cycle freezes may extend that time even longer. + +.. _return_the_favor: + +Return the favor +~~~~~~~~~~~~~~~~ + +Peer review only works if everyone chips in a bit of review time. If +everyone submitted more patches than they reviewed, we would have a +patch backlog. A good goal is to try to review at least as many patches +from others as what you submit. Don't worry if you don't know the code +base as well as a maintainer; it's perfectly fine to admit when your +review is weak because you are unfamiliar with the code. diff --git a/docs/devel/submitting-a-pull-request.rst b/docs/devel/submitting-a-pull-request.rst new file mode 100644 index 0000000000..c9d1e8afd9 --- /dev/null +++ b/docs/devel/submitting-a-pull-request.rst @@ -0,0 +1,77 @@ +.. _submitting-a-pull-request: + +Submitting a Pull Request +========================= + +QEMU welcomes contributions of code, but we generally expect these to be +sent as simple patch emails to the mailing list (see our page on +:ref:`submitting-a-patch` +for more details). Generally only existing submaintainers of a tree +will need to submit pull requests, although occasionally for a large +patch series we might ask a submitter to send a pull request. This page +documents our recommendations on pull requests for those people. + +A good rule of thumb is not to send a pull request unless somebody asks +you to. + +**Resend the patches with the pull request** as emails which are +threaded as follow-ups to the pull request itself. The simplest way to +do this is to use ``git format-patch --cover-letter`` to create the +emails, and then edit the cover letter to include the pull request +details that ``git request-pull`` outputs. + +**Use PULL as the subject line tag** in both the cover letter and the +retransmitted patch mails (for example, by using +``--subject-prefix=PULL`` in your ``git format-patch`` command). This +helps people to filter in or out the resulting emails (especially useful +if they are only CC'd on one email out of the set). + +**Each patch must have your own Signed-off-by: line** as well as that of +the original author if the patch was not written by you. This is because +with a pull request you're now indicating that the patch has passed via +you rather than directly from the original author. + +**Don't forget to add Reviewed-by: and Acked-by: lines**. When other +people have reviewed the patches you're putting in the pull request, +make sure you've copied their signoffs across. (If you use the `patches +tool <https://github.com/stefanha/patches>`__ to add patches from email +directly to your git repo it will include the tags automatically; if +you're updating patches manually or in some other way you'll need to +edit the commit messages by hand.) + +**Don't send pull requests for code that hasn't passed review**. A pull +request says these patches are ready to go into QEMU now, so they must +have passed the standard code review processes. In particular if you've +corrected issues in one round of code review, you need to send your +fixed patch series as normal to the list; you can't put it in a pull +request until it's gone through. (Extremely trivial fixes may be OK to +just fix in passing, but if in doubt err on the side of not.) + +**Test before sending**. This is an obvious thing to say, but make sure +everything builds (including that it compiles at each step of the patch +series) and that "make check" passes before sending out the pull +request. As a submaintainer you're one of QEMU's lines of defense +against bad code, so double check the details. + +**All pull requests must be signed**. If your key is not already signed +by members of the QEMU community, you should make arrangements to attend +a `KeySigningParty <https://wiki.qemu.org/KeySigningParty>`__ (for +example at KVM Forum) or make alternative arrangements to have your key +signed by an attendee. Key signing requires meeting another community +member \*in person\* so please make appropriate arrangements. By +"signed" here we mean that the pullreq email should quote a tag which is +a GPG-signed tag (as created with 'gpg tag -s ...'). + +**Pull requests not for master should say "not for master" and have +"PULL SUBSYSTEM whatever" in the subject tag**. If your pull request is +targeting a stable branch or some submaintainer tree, please include the +string "not for master" in the cover letter email, and make sure the +subject tag is "PULL SUBSYSTEM s390/block/whatever" rather than just +"PULL". This allows it to be automatically filtered out of the set of +pull requests that should be applied to master. + +You might be interested in the `make-pullreq +<https://git.linaro.org/people/peter.maydell/misc-scripts.git/tree/make-pullreq>`__ +script which automates some of this process for you and includes a few +sanity checks. Note that you must edit it to configure it suitably for +your local situation! diff --git a/docs/devel/testing.rst b/docs/devel/testing.rst index 60c59023e5..755343c7dd 100644 --- a/docs/devel/testing.rst +++ b/docs/devel/testing.rst @@ -564,11 +564,11 @@ exploiting a QEMU security bug to compromise the host. QEMU binaries ~~~~~~~~~~~~~ -By default, qemu-system-x86_64 is searched in $PATH to run the guest. If there -isn't one, or if it is older than 2.10, the test won't work. In this case, +By default, ``qemu-system-x86_64`` is searched in $PATH to run the guest. If +there isn't one, or if it is older than 2.10, the test won't work. In this case, provide the QEMU binary in env var: ``QEMU=/path/to/qemu-2.10+``. -Likewise the path to qemu-img can be set in QEMU_IMG environment variable. +Likewise the path to ``qemu-img`` can be set in QEMU_IMG environment variable. Make jobs ~~~~~~~~~ @@ -650,7 +650,7 @@ supported. To start the fuzzer, run tests/image-fuzzer/runner.py -c '[["qemu-img", "info", "$test_img"]]' /tmp/test qcow2 -Alternatively, some command different from "qemu-img info" can be tested, by +Alternatively, some command different from ``qemu-img info`` can be tested, by changing the ``-c`` option. Integration tests using the Avocado Framework diff --git a/docs/devel/trivial-patches.rst b/docs/devel/trivial-patches.rst new file mode 100644 index 0000000000..9380c730f7 --- /dev/null +++ b/docs/devel/trivial-patches.rst @@ -0,0 +1,52 @@ +.. _trivial-patches: + +Trivial Patches +=============== + +Overview +-------- + +Trivial patches that change just a few lines of code sometimes languish +on the mailing list even though they require only a small amount of +review. This is often the case for patches that do not fall under an +actively maintained subsystem and therefore fall through the cracks. + +The trivial patches team take on the task of reviewing and building pull +requests for patches that: + +- Do not fall under an actively maintained subsystem. +- Are single patches or short series (max 2-4 patches). +- Only touch a few lines of code. + +**You should hint that your patch is a candidate by CCing +qemu-trivial@nongnu.org.** + +Repositories +------------ + +Since the trivial patch team rotates maintainership there is only one +active repository at a time: + +- git://github.com/vivier/qemu.git trivial-patches - `browse <https://github.com/vivier/qemu/tree/trivial-patches>`__ + +Workflow +-------- + +The trivial patches team rotates the duty of collecting trivial patches +amongst its members. A team member's job is to: + +1. Identify trivial patches on the development mailing list. +2. Review trivial patches, merge them into a git tree, and reply to state + that the patch is queued. +3. Send pull requests to the development mailing list once a week. + +A single team member can be on duty as long as they like. The suggested +time is 1 week before handing off to the next member. + +Team +---- + +If you would like to join the trivial patches team, contact Laurent +Vivier. The current team includes: + +- `Laurent Vivier <mailto:laurent@vivier.eu>`__ diff --git a/docs/devel/ui.rst b/docs/devel/ui.rst index 06c7d622ce..17fb667dec 100644 --- a/docs/devel/ui.rst +++ b/docs/devel/ui.rst @@ -1,8 +1,8 @@ ================= -Qemu UI subsystem +QEMU UI subsystem ================= -Qemu Clipboard +QEMU Clipboard -------------- .. kernel-doc:: include/ui/clipboard.h diff --git a/docs/devel/writing-monitor-commands.rst b/docs/devel/writing-monitor-commands.rst index b3e2c8481d..1693822f8f 100644 --- a/docs/devel/writing-monitor-commands.rst +++ b/docs/devel/writing-monitor-commands.rst @@ -677,7 +677,7 @@ return a single text string:: The ``HumanReadableText`` struct is intended to be used for all commands, under the ``x-`` name prefix that are returning unstructured -text targetted at humans. It should never be used for commands outside +text targeted at humans. It should never be used for commands outside the ``x-`` name prefix, as those should be using structured QAPI types. Implementing the QMP command diff --git a/docs/hyperv.txt b/docs/hyperv.txt index 5d99fd9a72..0417c183a3 100644 --- a/docs/hyperv.txt +++ b/docs/hyperv.txt @@ -195,7 +195,7 @@ The enlightenment allows to use Hyper-V SynIC with hardware APICv/AVIC enabled. Normally, Hyper-V SynIC disables these hardware feature and suggests the guest to use paravirtualized AutoEOI feature. Note: enabling this feature on old hardware (without APICv/AVIC support) may -have negative effect on guest's performace. +have negative effect on guest's performance. 3.19. hv-no-nonarch-coresharing=on/off/auto =========================================== diff --git a/docs/image-fuzzer.txt b/docs/image-fuzzer.txt index 3e23ebec33..279cc8c807 100644 --- a/docs/image-fuzzer.txt +++ b/docs/image-fuzzer.txt @@ -51,10 +51,10 @@ assumes that core dumps will be generated in the current working directory. For comprehensive test results, please, set up your test environment properly. -Paths to binaries under test (SUTs) qemu-img and qemu-io are retrieved from -environment variables. If the environment check fails the runner will +Paths to binaries under test (SUTs) ``qemu-img`` and ``qemu-io`` are retrieved +from environment variables. If the environment check fails the runner will use SUTs installed in system paths. -qemu-img is required for creation of backing files, so it's mandatory to set +``qemu-img`` is required for creation of backing files, so it's mandatory to set the related environment variable if it's not installed in the system path. For details about environment variables see qemu-iotests/check. diff --git a/docs/interop/bitmaps.rst b/docs/interop/bitmaps.rst index 059ad67929..1de46febdc 100644 --- a/docs/interop/bitmaps.rst +++ b/docs/interop/bitmaps.rst @@ -539,12 +539,11 @@ other partial disk images on top of a base image to reconstruct a full backup from the point in time at which the incremental backup was issued. The "Push Model" here references the fact that QEMU is "pushing" the modified -blocks out to a destination. We will be using the `drive-backup -<qemu-qmp-ref.html#index-drive_002dbackup>`_ and `blockdev-backup -<qemu-qmp-ref.html#index-blockdev_002dbackup>`_ QMP commands to create both +blocks out to a destination. We will be using the `blockdev-backup +<qemu-qmp-ref.html#index-blockdev_002dbackup>`_ QMP command to create both full and incremental backups. -Both of these commands are jobs, which have their own QMP API for querying and +The command is a background job, which has its own QMP API for querying and management documented in `Background jobs <qemu-qmp-ref.html#Background-jobs>`_. @@ -557,6 +556,10 @@ create a new incremental backup chain attached to a drive. This example creates a new, full backup of "drive0" and accompanies it with a new, empty bitmap that records writes from this point in time forward. +The target can be created with the help of `blockdev-add +<qemu-qmp-ref.html#index-blockdev_002dadd>`_ or `blockdev-create +<qemu-qmp-ref.html#index-blockdev_002dcreate>`_ command. + .. note:: Any new writes that happen after this command is issued, even while the backup job runs, will be written locally and not to the backup destination. These writes will be recorded in the bitmap @@ -576,12 +579,11 @@ new, empty bitmap that records writes from this point in time forward. } }, { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive0", - "target": "/path/to/drive0.full.qcow2", - "sync": "full", - "format": "qcow2" + "target": "target0", + "sync": "full" } } ] @@ -664,12 +666,11 @@ use a transaction to reset the bitmap while making a new full backup: } }, { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive0", - "target": "/path/to/drive0.new_full.qcow2", - "sync": "full", - "format": "qcow2" + "target": "target0", + "sync": "full" } } ] @@ -728,19 +729,35 @@ Example: First Incremental Backup $ qemu-img create -f qcow2 drive0.inc0.qcow2 \ -b drive0.full.qcow2 -F qcow2 +#. Add target block node: + + .. code-block:: QMP + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target0", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive0.inc0.qcow2" + } + } + } + + <- { "return": {} } + #. Issue an incremental backup command: .. code-block:: QMP -> { - "execute": "drive-backup", + "execute": "blockdev-backup", "arguments": { "device": "drive0", "bitmap": "bitmap0", - "target": "drive0.inc0.qcow2", - "format": "qcow2", - "sync": "incremental", - "mode": "existing" + "target": "target0", + "sync": "incremental" } } @@ -785,20 +802,36 @@ Example: Second Incremental Backup $ qemu-img create -f qcow2 drive0.inc1.qcow2 \ -b drive0.inc0.qcow2 -F qcow2 +#. Add target block node: + + .. code-block:: QMP + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target0", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive0.inc1.qcow2" + } + } + } + + <- { "return": {} } + #. Issue a new incremental backup command. The only difference here is that we have changed the target image below. .. code-block:: QMP -> { - "execute": "drive-backup", + "execute": "blockdev-backup", "arguments": { "device": "drive0", "bitmap": "bitmap0", - "target": "drive0.inc1.qcow2", - "format": "qcow2", - "sync": "incremental", - "mode": "existing" + "target": "target0", + "sync": "incremental" } } @@ -866,20 +899,36 @@ image: file for you, but you lose control over format options like compatibility and preallocation presets. +#. Add target block node: + + .. code-block:: QMP + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target0", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive0.inc2.qcow2" + } + } + } + + <- { "return": {} } + #. Issue a new incremental backup command. Apart from the new destination image, there is no difference from the last two examples. .. code-block:: QMP -> { - "execute": "drive-backup", + "execute": "blockdev-backup", "arguments": { "device": "drive0", "bitmap": "bitmap0", - "target": "drive0.inc2.qcow2", - "format": "qcow2", - "sync": "incremental", - "mode": "existing" + "target": "target0", + "sync": "incremental" } } @@ -930,6 +979,38 @@ point in time. $ qemu-img create -f qcow2 drive0.full.qcow2 64G $ qemu-img create -f qcow2 drive1.full.qcow2 64G +#. Add target block nodes: + + .. code-block:: QMP + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target0", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive0.full.qcow2" + } + } + } + + <- { "return": {} } + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target1", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive1.full.qcow2" + } + } + } + + <- { "return": {} } + #. Create a full (anchor) backup for each drive, with accompanying bitmaps: .. code-block:: QMP @@ -953,21 +1034,19 @@ point in time. } }, { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive0", - "target": "/path/to/drive0.full.qcow2", - "sync": "full", - "format": "qcow2" + "target": "target0", + "sync": "full" } }, { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive1", - "target": "/path/to/drive1.full.qcow2", - "sync": "full", - "format": "qcow2" + "target": "target1", + "sync": "full" } } ] @@ -1016,6 +1095,38 @@ point in time. $ qemu-img create -f qcow2 drive1.inc0.qcow2 \ -b drive1.full.qcow2 -F qcow2 +#. Add target block nodes: + + .. code-block:: QMP + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target0", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive0.inc0.qcow2" + } + } + } + + <- { "return": {} } + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target1", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive1.inc0.qcow2" + } + } + } + + <- { "return": {} } + #. Issue a multi-drive incremental push backup transaction: .. code-block:: QMP @@ -1025,25 +1136,21 @@ point in time. "arguments": { "actions": [ { - "type": "drive-backup", + "type": "blockev-backup", "data": { "device": "drive0", "bitmap": "bitmap0", - "format": "qcow2", - "mode": "existing", "sync": "incremental", - "target": "drive0.inc0.qcow2" + "target": "target0" } }, { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive1", "bitmap": "bitmap0", - "format": "qcow2", - "mode": "existing", "sync": "incremental", - "target": "drive1.inc0.qcow2" + "target": "target1" } }, ] @@ -1119,19 +1226,35 @@ described above. This example demonstrates the single-job failure case: $ qemu-img create -f qcow2 drive0.inc0.qcow2 \ -b drive0.full.qcow2 -F qcow2 +#. Add target block node: + + .. code-block:: QMP + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target0", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive0.inc0.qcow2" + } + } + } + + <- { "return": {} } + #. Attempt to create an incremental backup via QMP: .. code-block:: QMP -> { - "execute": "drive-backup", + "execute": "blockdev-backup", "arguments": { "device": "drive0", "bitmap": "bitmap0", - "target": "drive0.inc0.qcow2", - "format": "qcow2", - "sync": "incremental", - "mode": "existing" + "target": "target0", + "sync": "incremental" } } @@ -1164,6 +1287,19 @@ described above. This example demonstrates the single-job failure case: "event": "BLOCK_JOB_COMPLETED" } +#. Remove target node: + + .. code-block:: QMP + + -> { + "execute": "blockdev-del", + "arguments": { + "node-name": "target0", + } + } + + <- { "return": {} } + #. Delete the failed image, and re-create it. .. code:: bash @@ -1172,20 +1308,36 @@ described above. This example demonstrates the single-job failure case: $ qemu-img create -f qcow2 drive0.inc0.qcow2 \ -b drive0.full.qcow2 -F qcow2 +#. Add target block node: + + .. code-block:: QMP + + -> { + "execute": "blockdev-add", + "arguments": { + "node-name": "target0", + "driver": "qcow2", + "file": { + "driver": "file", + "filename": "drive0.inc0.qcow2" + } + } + } + + <- { "return": {} } + #. Retry the command after fixing the underlying problem, such as freeing up space on the backup volume: .. code-block:: QMP -> { - "execute": "drive-backup", + "execute": "blockdev-backup", "arguments": { "device": "drive0", "bitmap": "bitmap0", - "target": "drive0.inc0.qcow2", - "format": "qcow2", - "sync": "incremental", - "mode": "existing" + "target": "target0", + "sync": "incremental" } } @@ -1210,7 +1362,8 @@ described above. This example demonstrates the single-job failure case: Example: Partial Transactional Failures ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -QMP commands like `drive-backup <qemu-qmp-ref.html#index-drive_002dbackup>`_ +QMP commands like `blockdev-backup +<qemu-qmp-ref.html#index-blockdev_002dbackup>`_ conceptually only start a job, and so transactions containing these commands may succeed even if the job it created later fails. This might have surprising interactions with notions of how a "transaction" ought to behave. @@ -1240,25 +1393,21 @@ and one succeeds: "arguments": { "actions": [ { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive0", "bitmap": "bitmap0", - "format": "qcow2", - "mode": "existing", "sync": "incremental", - "target": "drive0.inc0.qcow2" + "target": "target0" } }, { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive1", "bitmap": "bitmap0", - "format": "qcow2", - "mode": "existing", "sync": "incremental", - "target": "drive1.inc0.qcow2" + "target": "target1" } }] } @@ -1375,25 +1524,21 @@ applied: }, "actions": [ { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive0", "bitmap": "bitmap0", - "format": "qcow2", - "mode": "existing", "sync": "incremental", - "target": "drive0.inc0.qcow2" + "target": "target0" } }, { - "type": "drive-backup", + "type": "blockdev-backup", "data": { "device": "drive1", "bitmap": "bitmap0", - "format": "qcow2", - "mode": "existing", "sync": "incremental", - "target": "drive1.inc0.qcow2" + "target": "target1" } }] } diff --git a/docs/interop/live-block-operations.rst b/docs/interop/live-block-operations.rst index 814c29bbe1..39e62c9915 100644 --- a/docs/interop/live-block-operations.rst +++ b/docs/interop/live-block-operations.rst @@ -116,8 +116,8 @@ QEMU block layer supports. (3) ``drive-mirror`` (and ``blockdev-mirror``): Synchronize a running disk to another image. -(4) ``drive-backup`` (and ``blockdev-backup``): Point-in-time (live) copy - of a block device to a destination. +(4) ``blockdev-backup`` (and the deprecated ``drive-backup``): + Point-in-time (live) copy of a block device to a destination. .. _`Interacting with a QEMU instance`: @@ -555,13 +555,14 @@ Currently, there are four different kinds: (3) ``none`` -- Synchronize only the new writes from this point on. - .. note:: In the case of ``drive-backup`` (or ``blockdev-backup``), - the behavior of ``none`` synchronization mode is different. - Normally, a ``backup`` job consists of two parts: Anything - that is overwritten by the guest is first copied out to - the backup, and in the background the whole image is - copied from start to end. With ``sync=none``, it's only - the first part. + .. note:: In the case of ``blockdev-backup`` (or deprecated + ``drive-backup``), the behavior of ``none`` + synchronization mode is different. Normally, a + ``backup`` job consists of two parts: Anything that is + overwritten by the guest is first copied out to the + backup, and in the background the whole image is copied + from start to end. With ``sync=none``, it's only the + first part. (4) ``incremental`` -- Synchronize content that is described by the dirty bitmap @@ -928,19 +929,22 @@ Shutdown the guest, by issuing the ``quit`` QMP command:: } -Live disk backup --- ``drive-backup`` and ``blockdev-backup`` -------------------------------------------------------------- +Live disk backup --- ``blockdev-backup`` and the deprecated``drive-backup`` +--------------------------------------------------------------------------- -The ``drive-backup`` (and its newer equivalent ``blockdev-backup``) allows +The ``blockdev-backup`` (and the deprecated ``drive-backup``) allows you to create a point-in-time snapshot. -In this case, the point-in-time is when you *start* the ``drive-backup`` -(or its newer equivalent ``blockdev-backup``) command. +In this case, the point-in-time is when you *start* the +``blockdev-backup`` (or deprecated ``drive-backup``) command. QMP invocation for ``drive-backup`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Note that ``drive-backup`` command is deprecated since QEMU 6.2 and +will be removed in future. + Yet again, starting afresh with our example disk image chain:: [A] <-- [B] <-- [C] <-- [D] @@ -965,11 +969,22 @@ will be issued, indicating the live block device job operation has completed, and no further action is required. +Moving from the deprecated ``drive-backup`` to newer ``blockdev-backup`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``blockdev-backup`` differs from ``drive-backup`` in how you specify +the backup target. With ``blockdev-backup`` you can't specify filename +as a target. Instead you use ``node-name`` of existing block node, +which you may add by ``blockdev-add`` or ``blockdev-create`` commands. +Correspondingly, ``blockdev-backup`` doesn't have ``mode`` and +``format`` arguments which don't apply to an existing block node. See +following sections for details and examples. + + Notes on ``blockdev-backup`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The ``blockdev-backup`` command is equivalent in functionality to -``drive-backup``, except that it operates at node-level in a Block Driver +The ``blockdev-backup`` command operates at node-level in a Block Driver State (BDS) graph. E.g. the sequence of actions to create a point-in-time backup diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt index 10ce098a29..bdb0f2a41a 100644 --- a/docs/interop/nbd.txt +++ b/docs/interop/nbd.txt @@ -1,4 +1,4 @@ -Qemu supports the NBD protocol, and has an internal NBD client (see +QEMU supports the NBD protocol, and has an internal NBD client (see block/nbd.c), an internal NBD server (see blockdev-nbd.c), and an external NBD server tool (see qemu-nbd.c). The common code is placed in nbd/*. @@ -7,11 +7,11 @@ The NBD protocol is specified here: https://github.com/NetworkBlockDevice/nbd/blob/master/doc/proto.md The following paragraphs describe some specific properties of NBD -protocol realization in Qemu. +protocol realization in QEMU. = Metadata namespaces = -Qemu supports the "base:allocation" metadata context as defined in the +QEMU supports the "base:allocation" metadata context as defined in the NBD protocol specification, and also defines an additional metadata namespace "qemu". diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt index 0463f761ef..f7dc304ff6 100644 --- a/docs/interop/qcow2.txt +++ b/docs/interop/qcow2.txt @@ -313,7 +313,7 @@ The fields of the bitmaps extension are: The number of bitmaps contained in the image. Must be greater than or equal to 1. - Note: Qemu currently only supports up to 65535 bitmaps per + Note: QEMU currently only supports up to 65535 bitmaps per image. 4 - 7: Reserved, must be zero. @@ -775,7 +775,7 @@ Structure of a bitmap directory entry: 2: extra_data_compatible This flags is meaningful when the extra data is unknown to the software (currently any extra data is - unknown to Qemu). + unknown to QEMU). If it is set, the bitmap may be used as expected, extra data must be left as is. If it is not set, the bitmap must not be used, but @@ -793,7 +793,7 @@ Structure of a bitmap directory entry: 17: granularity_bits Granularity bits. Valid values: 0 - 63. - Note: Qemu currently supports only values 9 - 31. + Note: QEMU currently supports only values 9 - 31. Granularity is calculated as granularity = 1 << granularity_bits @@ -804,7 +804,7 @@ Structure of a bitmap directory entry: 18 - 19: name_size Size of the bitmap name. Must be non-zero. - Note: Qemu currently doesn't support values greater than + Note: QEMU currently doesn't support values greater than 1023. 20 - 23: extra_data_size diff --git a/docs/multiseat.txt b/docs/multiseat.txt index 11850c96ff..2b297e979d 100644 --- a/docs/multiseat.txt +++ b/docs/multiseat.txt @@ -123,7 +123,7 @@ Background info is here: guest side with pci-bridge-seat ------------------------------- -Qemu version 2.4 and newer has a new pci-bridge-seat device which +QEMU version 2.4 and newer has a new pci-bridge-seat device which can be used instead of pci-bridge. Just swap the device name in the qemu command line above. The only difference between the two devices is the pci id. We can match the pci id instead of the device path diff --git a/docs/system/arm/orangepi.rst b/docs/system/arm/orangepi.rst index c55694dd91..83c7445197 100644 --- a/docs/system/arm/orangepi.rst +++ b/docs/system/arm/orangepi.rst @@ -128,7 +128,7 @@ Alternatively, you can also choose to build you own image with buildroot using the orangepi_pc_defconfig. Also see https://buildroot.org for more information. When using an image as an SD card, it must be resized to a power of two. This can be -done with the qemu-img command. It is recommended to only increase the image size +done with the ``qemu-img`` command. It is recommended to only increase the image size instead of shrinking it to a power of two, to avoid loss of data. For example, to prepare a downloaded Armbian image, first extract it and then increase its size to one gigabyte as follows: diff --git a/docs/system/authz.rst b/docs/system/authz.rst index 942af39602..55b7315e49 100644 --- a/docs/system/authz.rst +++ b/docs/system/authz.rst @@ -77,9 +77,7 @@ To create an instance of this driver via QMP: "arguments": { "qom-type": "authz-simple", "id": "authz0", - "props": { - "identity": "fred" - } + "identity": "fred" } } @@ -110,15 +108,13 @@ To create an instance of this class via QMP: "arguments": { "qom-type": "authz-list", "id": "authz0", - "props": { - "rules": [ - { "match": "fred", "policy": "allow", "format": "exact" }, - { "match": "bob", "policy": "allow", "format": "exact" }, - { "match": "danb", "policy": "deny", "format": "exact" }, - { "match": "dan*", "policy": "allow", "format": "glob" } - ], - "policy": "deny" - } + "rules": [ + { "match": "fred", "policy": "allow", "format": "exact" }, + { "match": "bob", "policy": "allow", "format": "exact" }, + { "match": "danb", "policy": "deny", "format": "exact" }, + { "match": "dan*", "policy": "allow", "format": "glob" } + ], + "policy": "deny" } } @@ -143,10 +139,8 @@ To create an instance of this class via QMP: "arguments": { "qom-type": "authz-list-file", "id": "authz0", - "props": { - "filename": "/etc/qemu/myvm-vnc.acl", - "refresh": true - } + "filename": "/etc/qemu/myvm-vnc.acl", + "refresh": true } } diff --git a/docs/system/cpu-models-x86.rst.inc b/docs/system/cpu-models-x86.rst.inc index 6e8be7d79b..7f6368f999 100644 --- a/docs/system/cpu-models-x86.rst.inc +++ b/docs/system/cpu-models-x86.rst.inc @@ -49,7 +49,7 @@ future OS and toolchains are likely to target newer ABIs. The table that follows illustrates which ABI compatibility levels can be satisfied by the QEMU CPU models. Note that the table only lists the long term stable CPU model versions (eg Haswell-v4). -In addition to whats listed, there are also many CPU model +In addition to what is listed, there are also many CPU model aliases which resolve to a different CPU model version, depending on the machine type is in use. diff --git a/docs/system/device-url-syntax.rst.inc b/docs/system/device-url-syntax.rst.inc index d15a021508..7dbc525fa8 100644 --- a/docs/system/device-url-syntax.rst.inc +++ b/docs/system/device-url-syntax.rst.inc @@ -15,7 +15,7 @@ These are specified using a special URL syntax. 'iqn.2008-11.org.linux-kvm[:<name>]' but this can also be set from the command line or a configuration file. - Since version Qemu 2.4 it is possible to specify a iSCSI request + Since version QEMU 2.4 it is possible to specify a iSCSI request timeout to detect stalled requests and force a reestablishment of the session. The timeout is specified in seconds. The default is 0 which means no timeout. Libiscsi 1.15.0 or greater is required for this diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst index bff72d1c24..b5acb2a9c1 100644 --- a/docs/system/devices/nvme.rst +++ b/docs/system/devices/nvme.rst @@ -70,7 +70,7 @@ namespaces and additional features, the ``nvme-ns`` device must be used. The namespaces defined by the ``nvme-ns`` device will attach to the most recently defined ``nvme-bus`` that is created by the ``nvme`` device. Namespace -identifers are allocated automatically, starting from ``1``. +identifiers are allocated automatically, starting from ``1``. There are a number of parameters available: @@ -110,28 +110,32 @@ multipath I/O. This will create an NVM subsystem with two controllers. Having controllers linked to an ``nvme-subsys`` device allows additional ``nvme-ns`` parameters: -``shared`` (default: ``off``) +``shared`` (default: ``on`` since 6.2) Specifies that the namespace will be attached to all controllers in the - subsystem. If set to ``off`` (the default), the namespace will remain a - private namespace and may only be attached to a single controller at a time. + subsystem. If set to ``off``, the namespace will remain a private namespace + and may only be attached to a single controller at a time. Shared namespaces + are always automatically attached to all controllers (also when controllers + are hotplugged). ``detached`` (default: ``off``) If set to ``on``, the namespace will be be available in the subsystem, but - not attached to any controllers initially. + not attached to any controllers initially. A shared namespace with this set + to ``on`` will never be automatically attached to controllers. Thus, adding .. code-block:: console -drive file=nvm-1.img,if=none,id=nvm-1 - -device nvme-ns,drive=nvm-1,nsid=1,shared=on + -device nvme-ns,drive=nvm-1,nsid=1 -drive file=nvm-2.img,if=none,id=nvm-2 - -device nvme-ns,drive=nvm-2,nsid=3,detached=on + -device nvme-ns,drive=nvm-2,nsid=3,shared=off,detached=on -will cause NSID 1 will be a shared namespace (due to ``shared=on``) that is -initially attached to both controllers. NSID 3 will be a private namespace -(i.e. only attachable to a single controller at a time) and will not be -attached to any controller initially (due to ``detached=on``). +will cause NSID 1 will be a shared namespace that is initially attached to both +controllers. NSID 3 will be a private namespace due to ``shared=off`` and only +attachable to a single controller at a time. Additionally it will not be +attached to any controller initially (due to ``detached=on``) or to hotplugged +controllers. Optional Features ================= diff --git a/docs/system/gdb.rst b/docs/system/gdb.rst index bdb42dae2f..453eb73f6c 100644 --- a/docs/system/gdb.rst +++ b/docs/system/gdb.rst @@ -56,7 +56,7 @@ machine has more than one CPU, QEMU exposes each CPU cluster as a separate "inferior", where each CPU within the cluster is a separate "thread". Most QEMU machine types have identical CPUs, so there is a single cluster which has all the CPUs in it. A few machine types are -heterogenous and have multiple clusters: for example the ``sifive_u`` +heterogeneous and have multiple clusters: for example the ``sifive_u`` machine has a cluster with one E51 core and a second cluster with four U54 cores. Here the E51 is the only thread in the first inferior, and the U54 cores are all threads in the second inferior. diff --git a/docs/system/i386/sgx.rst b/docs/system/i386/sgx.rst index 9aa161af1a..f8fade5ac2 100644 --- a/docs/system/i386/sgx.rst +++ b/docs/system/i386/sgx.rst @@ -20,13 +20,13 @@ report the same CPUID info to guest as on host for most of SGX CPUID. With reporting the same CPUID guest is able to use full capacity of SGX, and KVM doesn't need to emulate those info. -The guest's EPC base and size are determined by Qemu, and KVM needs Qemu to +The guest's EPC base and size are determined by QEMU, and KVM needs QEMU to notify such info to it before it can initialize SGX for guest. Virtual EPC ~~~~~~~~~~~ -By default, Qemu does not assign EPC to a VM, i.e. fully enabling SGX in a VM +By default, QEMU does not assign EPC to a VM, i.e. fully enabling SGX in a VM requires explicit allocation of EPC to the VM. Similar to other specialized memory types, e.g. hugetlbfs, EPC is exposed as a memory backend. @@ -35,12 +35,12 @@ prior to realizing the vCPUs themselves, which occurs long before generic devices are parsed and realized. This limitation means that EPC does not require -maxmem as EPC is not treated as {cold,hot}plugged memory. -Qemu does not artificially restrict the number of EPC sections exposed to a -guest, e.g. Qemu will happily allow you to create 64 1M EPC sections. Be aware +QEMU does not artificially restrict the number of EPC sections exposed to a +guest, e.g. QEMU will happily allow you to create 64 1M EPC sections. Be aware that some kernels may not recognize all EPC sections, e.g. the Linux SGX driver is hardwired to support only 8 EPC sections. -The following Qemu snippet creates two EPC sections, with 64M pre-allocated +The following QEMU snippet creates two EPC sections, with 64M pre-allocated to the VM and an additional 28M mapped but not allocated:: -object memory-backend-epc,id=mem1,size=64M,prealloc=on \ @@ -54,7 +54,7 @@ to physical EPC. Because physical EPC is protected via range registers, the size of the physical EPC must be a power of two (though software sees a subset of the full EPC, e.g. 92M or 128M) and the EPC must be naturally aligned. KVM SGX's virtual EPC is purely a software construct and only -requires the size and location to be page aligned. Qemu enforces the EPC +requires the size and location to be page aligned. QEMU enforces the EPC size is a multiple of 4k and will ensure the base of the EPC is 4k aligned. To simplify the implementation, EPC is always located above 4g in the guest physical address space. @@ -62,7 +62,7 @@ physical address space. Migration ~~~~~~~~~ -Qemu/KVM doesn't prevent live migrating SGX VMs, although from hardware's +QEMU/KVM doesn't prevent live migrating SGX VMs, although from hardware's perspective, SGX doesn't support live migration, since both EPC and the SGX key hierarchy are bound to the physical platform. However live migration can be supported in the sense if guest software stack can support recreating @@ -76,7 +76,7 @@ CPUID ~~~~~ Due to its myriad dependencies, SGX is currently not listed as supported -in any of Qemu's built-in CPU configuration. To expose SGX (and SGX Launch +in any of QEMU's built-in CPU configuration. To expose SGX (and SGX Launch Control) to a guest, you must either use ``-cpu host`` to pass-through the host CPU model, or explicitly enable SGX when using a built-in CPU model, e.g. via ``-cpu <model>,+sgx`` or ``-cpu <model>,+sgx,+sgxlc``. @@ -101,7 +101,7 @@ controlled via -cpu are prefixed with "sgx", e.g.:: sgx2 sgxlc -The following Qemu snippet passes through the host CPU but restricts access to +The following QEMU snippet passes through the host CPU but restricts access to the provision and EINIT token keys:: -cpu host,-sgx-provisionkey,-sgx-tokenkey @@ -112,11 +112,11 @@ in hardware cannot be forced on via '-cpu'. Virtualize SGX Launch Control ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Qemu SGX support for Launch Control (LC) is passive, in the sense that it -does not actively change the LC configuration. Qemu SGX provides the user +QEMU SGX support for Launch Control (LC) is passive, in the sense that it +does not actively change the LC configuration. QEMU SGX provides the user the ability to set/clear the CPUID flag (and by extension the associated IA32_FEATURE_CONTROL MSR bit in fw_cfg) and saves/restores the LE Hash MSRs -when getting/putting guest state, but Qemu does not add new controls to +when getting/putting guest state, but QEMU does not add new controls to directly modify the LC configuration. Similar to hardware behavior, locking the LC configuration to a non-Intel value is left to guest firmware. Unlike host bios setting for SGX launch control(LC), there is no special bios setting @@ -126,7 +126,7 @@ creating VM with SGX. Feature Control ~~~~~~~~~~~~~~~ -Qemu SGX updates the ``etc/msr_feature_control`` fw_cfg entry to set the SGX +QEMU SGX updates the ``etc/msr_feature_control`` fw_cfg entry to set the SGX (bit 18) and SGX LC (bit 17) flags based on their respective CPUID support, i.e. existing guest firmware will automatically set SGX and SGX LC accordingly, assuming said firmware supports fw_cfg.msr_feature_control. diff --git a/docs/system/images.rst b/docs/system/images.rst index 3d9144e625..d000bd6b6f 100644 --- a/docs/system/images.rst +++ b/docs/system/images.rst @@ -20,7 +20,7 @@ where myimage.img is the disk image filename and mysize is its size in kilobytes. You can add an ``M`` suffix to give the size in megabytes and a ``G`` suffix for gigabytes. -See the qemu-img invocation documentation for more information. +See the ``qemu-img`` invocation documentation for more information. .. _disk_005fimages_005fsnapshot_005fmode: diff --git a/docs/system/ppc/ppce500.rst b/docs/system/ppc/ppce500.rst index afc58f60f5..9beef39171 100644 --- a/docs/system/ppc/ppce500.rst +++ b/docs/system/ppc/ppce500.rst @@ -75,7 +75,7 @@ as the BIOS. QEMU follows below truth table to select which payload to execute: When both -bios and -kernel are present, QEMU loads U-Boot and U-Boot in turns automatically loads the kernel image specified by the -kernel parameter via U-Boot's built-in "bootm" command, hence a legacy uImage format is required in -such senario. +such scenario. Running Linux kernel -------------------- diff --git a/docs/system/qemu-block-drivers.rst.inc b/docs/system/qemu-block-drivers.rst.inc index 16225710eb..e313784426 100644 --- a/docs/system/qemu-block-drivers.rst.inc +++ b/docs/system/qemu-block-drivers.rst.inc @@ -511,13 +511,13 @@ of an inet socket: |qemu_system| linux.img -hdb nbd+unix://?socket=/tmp/my_socket -In this case, the block device must be exported using qemu-nbd: +In this case, the block device must be exported using ``qemu-nbd``: .. parsed-literal:: qemu-nbd --socket=/tmp/my_socket my_disk.qcow2 -The use of qemu-nbd allows sharing of a disk between several guests: +The use of ``qemu-nbd`` allows sharing of a disk between several guests: .. parsed-literal:: @@ -530,7 +530,7 @@ and then you can use it with two guests: |qemu_system| linux1.img -hdb nbd+unix://?socket=/tmp/my_socket |qemu_system| linux2.img -hdb nbd+unix://?socket=/tmp/my_socket -If the nbd-server uses named exports (supported since NBD 2.9.18, or with QEMU's +If the ``nbd-server`` uses named exports (supported since NBD 2.9.18, or with QEMU's own embedded NBD server), you must specify an export name in the URI: .. parsed-literal:: diff --git a/docs/system/riscv/shakti-c.rst b/docs/system/riscv/shakti-c.rst index a6035d42b0..fea57f7b6b 100644 --- a/docs/system/riscv/shakti-c.rst +++ b/docs/system/riscv/shakti-c.rst @@ -45,7 +45,7 @@ Shakti SDK can be used to generate the baremetal example UART applications. Binary would be generated in: software/examples/uart_applns/loopback/output/loopback.shakti -You could also download the precompiled example applicatons using below +You could also download the precompiled example applications using below commands. .. code-block:: bash diff --git a/docs/system/tls.rst b/docs/system/tls.rst index b0973afe1b..1a04674362 100644 --- a/docs/system/tls.rst +++ b/docs/system/tls.rst @@ -311,7 +311,7 @@ containing one or more usernames and random keys:: mkdir -m 0700 /tmp/keys psktool -u rich -p /tmp/keys/keys.psk -TLS-enabled servers such as qemu-nbd can use this directory like so:: +TLS-enabled servers such as ``qemu-nbd`` can use this directory like so:: qemu-nbd \ -t -x / \ diff --git a/docs/throttle.txt b/docs/throttle.txt index b5b78b7326..0a0453a5ee 100644 --- a/docs/throttle.txt +++ b/docs/throttle.txt @@ -273,11 +273,9 @@ A group can be created using the object-add QMP function: "arguments": { "qom-type": "throttle-group", "id": "group0", - "props": { - "limits" : { - "iops-total": 1000 - "bps-write": 2097152 - } + "limits" : { + "iops-total": 1000, + "bps-write": 2097152 } } } diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst index c0a4443146..d663dd92bd 100644 --- a/docs/tools/qemu-img.rst +++ b/docs/tools/qemu-img.rst @@ -127,9 +127,9 @@ by the used format or see the format descriptions below for details. .. option:: -S SIZE Indicates the consecutive number of bytes that must contain only zeros - for qemu-img to create a sparse image during conversion. This value is rounded - down to the nearest 512 bytes. You may use the common size suffixes like - ``k`` for kilobytes. + for ``qemu-img`` to create a sparse image during conversion. This value is + rounded down to the nearest 512 bytes. You may use the common size suffixes + like ``k`` for kilobytes. .. option:: -t CACHE @@ -431,7 +431,7 @@ Command description: suppressed from the destination image. *SPARSE_SIZE* indicates the consecutive number of bytes (defaults to 4k) - that must contain only zeros for qemu-img to create a sparse image during + that must contain only zeros for ``qemu-img`` to create a sparse image during conversion. If *SPARSE_SIZE* is 0, the source will not be scanned for unallocated or zero sectors, and the destination image will always be fully allocated. @@ -447,7 +447,7 @@ Command description: If the ``-n`` option is specified, the target volume creation will be skipped. This is useful for formats such as ``rbd`` if the target volume has already been created with site specific options that cannot - be supplied through qemu-img. + be supplied through ``qemu-img``. Out of order writes can be enabled with ``-W`` to improve performance. This is only recommended for preallocated devices like host devices or other @@ -472,7 +472,7 @@ Command description: If the option *BACKING_FILE* is specified, then the image will record only the differences from *BACKING_FILE*. No size needs to be specified in this case. *BACKING_FILE* will never be modified unless you use the - ``commit`` monitor command (or qemu-img commit). + ``commit`` monitor command (or ``qemu-img commit``). If a relative path name is given, the backing file is looked up relative to the directory containing *FILENAME*. @@ -684,7 +684,7 @@ Command description: Safe mode This is the default mode and performs a real rebase operation. The - new backing file may differ from the old one and qemu-img rebase + new backing file may differ from the old one and ``qemu-img rebase`` will take care of keeping the guest-visible content of *FILENAME* unchanged. @@ -697,7 +697,7 @@ Command description: exists. Unsafe mode - qemu-img uses the unsafe mode if ``-u`` is specified. In this + ``qemu-img`` uses the unsafe mode if ``-u`` is specified. In this mode, only the backing file name and format of *FILENAME* is changed without any checks on the file contents. The user must take care of specifying the correct new backing file, or the guest-visible @@ -735,7 +735,7 @@ Command description: sizes accordingly. Failure to do so will result in data loss! When shrinking images, the ``--shrink`` option must be given. This informs - qemu-img that the user acknowledges all loss of data beyond the truncated + ``qemu-img`` that the user acknowledges all loss of data beyond the truncated image's end. After using this command to grow a disk image, you must use file system and diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst index 56e54cd441..6031f96893 100644 --- a/docs/tools/qemu-nbd.rst +++ b/docs/tools/qemu-nbd.rst @@ -31,14 +31,14 @@ driver options if ``--image-opts`` is specified. *dev* is an NBD device. -.. option:: --object type,id=ID,...props... +.. option:: --object type,id=ID,... Define a new instance of the *type* object class identified by *ID*. See the :manpage:`qemu(1)` manual page for full details of the properties supported. The common object types that it makes sense to define are the ``secret`` object, which is used to supply passwords and/or encryption keys, and the ``tls-creds`` object, which is used to supply TLS - credentials for the qemu-nbd server or client. + credentials for the ``qemu-nbd`` server or client. .. option:: -p, --port=PORT @@ -238,7 +238,7 @@ daemon: Expose the guest-visible contents of a qcow2 file via a block device /dev/nbd0 (and possibly creating /dev/nbd0p1 and friends for partitions found within), then disconnect the device when done. -Access to bind qemu-nbd to an /dev/nbd device generally requires root +Access to bind ``qemu-nbd`` to a /dev/nbd device generally requires root privileges, and may also require the execution of ``modprobe nbd`` to enable the kernel NBD client module. *CAUTION*: Do not use this method to mount filesystems from an untrusted guest image - a diff --git a/docs/tools/qemu-storage-daemon.rst b/docs/tools/qemu-storage-daemon.rst index b8ef4486f1..3e5a9dc032 100644 --- a/docs/tools/qemu-storage-daemon.rst +++ b/docs/tools/qemu-storage-daemon.rst @@ -10,9 +10,10 @@ Synopsis Description ----------- -qemu-storage-daemon provides disk image functionality from QEMU, qemu-img, and -qemu-nbd in a long-running process controlled via QMP commands without running -a virtual machine. It can export disk images, run block job operations, and +``qemu-storage-daemon`` provides disk image functionality from QEMU, +``qemu-img``, and ``qemu-nbd`` in a long-running process controlled via QMP +commands without running a virtual machine. +It can export disk images, run block job operations, and perform other disk-related operations. The daemon is controlled via a QMP monitor and initial configuration from the command-line. diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst index cc31402830..07ac0be551 100644 --- a/docs/tools/virtiofsd.rst +++ b/docs/tools/virtiofsd.rst @@ -136,8 +136,8 @@ Extended attribute (xattr) mapping By default the name of xattr's used by the client are passed through to the server file system. This can be a problem where either those xattr names are used by something on the server (e.g. selinux client/server confusion) or if the -virtiofsd is running in a container with restricted privileges where it cannot -access some attributes. +``virtiofsd`` is running in a container with restricted privileges where it +cannot access some attributes. Mapping syntax ~~~~~~~~~~~~~~ diff --git a/docs/u2f.txt b/docs/u2f.txt index 8f44994818..7f5813a0b7 100644 --- a/docs/u2f.txt +++ b/docs/u2f.txt @@ -21,7 +21,7 @@ The second factor is materialized by a device implementing the U2F protocol. In case of a USB U2F security key, it is a USB HID device that implements the U2F protocol. -In Qemu, the USB U2F key device offers a dedicated support of U2F, allowing +In QEMU, the USB U2F key device offers a dedicated support of U2F, allowing guest USB FIDO/U2F security keys operating in two possible modes: pass-through and emulated. @@ -94,7 +94,7 @@ static inline int cpu_gdb_index(CPUState *cpu) { #if defined(CONFIG_USER_ONLY) TaskState *ts = (TaskState *) cpu->opaque; - return ts->ts_tid; + return ts ? ts->ts_tid : -1; #else return cpu->cpu_index + 1; #endif diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 1ee2ba2c50..ebe08ed831 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -419,6 +419,20 @@ static void ich9_pm_set_acpi_pci_hotplug(Object *obj, bool value, Error **errp) s->pm.use_acpi_hotplug_bridge = value; } +static bool ich9_pm_get_keep_pci_slot_hpc(Object *obj, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + return s->pm.keep_pci_slot_hpc; +} + +static void ich9_pm_set_keep_pci_slot_hpc(Object *obj, bool value, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + s->pm.keep_pci_slot_hpc = value; +} + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) { static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; @@ -428,6 +442,7 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) pm->disable_s4 = 0; pm->s4_val = 2; pm->use_acpi_hotplug_bridge = true; + pm->keep_pci_slot_hpc = true; object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, &pm->pm_io_base, OBJ_PROP_FLAG_READ); @@ -454,6 +469,9 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) object_property_add_bool(obj, ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, ich9_pm_get_acpi_pci_hotplug, ich9_pm_set_acpi_pci_hotplug); + object_property_add_bool(obj, "x-keep-pci-slot-hpc", + ich9_pm_get_keep_pci_slot_hpc, + ich9_pm_set_keep_pci_slot_hpc); } void ich9_pm_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index f610a25d2e..30405b5113 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -222,9 +222,27 @@ static void acpi_pcihp_eject_slot(AcpiPciHpState *s, unsigned bsel, unsigned slo PCIDevice *dev = PCI_DEVICE(qdev); if (PCI_SLOT(dev->devfn) == slot) { if (!acpi_pcihp_pc_no_hotplug(s, dev)) { - hotplug_ctrl = qdev_get_hotplug_handler(qdev); - hotplug_handler_unplug(hotplug_ctrl, qdev, &error_abort); - object_unparent(OBJECT(qdev)); + /* + * partially_hotplugged is used by virtio-net failover: + * failover has asked the guest OS to unplug the device + * but we need to keep some references to the device + * to be able to plug it back in case of failure so + * we don't execute hotplug_handler_unplug(). + */ + if (dev->partially_hotplugged) { + /* + * pending_deleted_event is set to true when + * virtio-net failover asks to unplug the device, + * and set to false here when the operation is done + * This is used by the migration loop to detect the + * end of the operation and really start the migration. + */ + qdev->pending_deleted_event = false; + } else { + hotplug_ctrl = qdev_get_hotplug_handler(qdev); + hotplug_handler_unplug(hotplug_ctrl, qdev, &error_abort); + object_unparent(OBJECT(qdev)); + } } } } @@ -396,6 +414,12 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, return; } + /* + * pending_deleted_event is used by virtio-net failover to detect the + * end of the unplug operation, the flag is set to false in + * acpi_pcihp_eject_slot() when the operation is completed. + */ + pdev->qdev.pending_deleted_event = true; s->acpi_pcihp_pci_status[bsel].down |= (1U << slot); acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS); } diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 369552ad45..30da05dfe0 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -49,6 +49,7 @@ #include "sysemu/runstate.h" #include "sysemu/tpm.h" #include "sysemu/kvm.h" +#include "sysemu/hvf.h" #include "hw/loader.h" #include "qapi/error.h" #include "qemu/bitops.h" @@ -1969,15 +1970,17 @@ static void machvirt_init(MachineState *machine) exit(1); } - if (vms->virt && kvm_enabled()) { - error_report("mach-virt: KVM does not support providing " - "Virtualization extensions to the guest CPU"); + if (vms->virt && (kvm_enabled() || hvf_enabled())) { + error_report("mach-virt: %s does not support providing " + "Virtualization extensions to the guest CPU", + kvm_enabled() ? "KVM" : "HVF"); exit(1); } - if (vms->mte && kvm_enabled()) { - error_report("mach-virt: KVM does not support providing " - "MTE to the guest CPU"); + if (vms->mte && (kvm_enabled() || hvf_enabled())) { + error_report("mach-virt: %s does not support providing " + "MTE to the guest CPU", + kvm_enabled() ? "KVM" : "HVF"); exit(1); } diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 252c3a7a23..ee5a5352dc 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -222,7 +222,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) memory_region_transaction_commit(); while (j--) { - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j); } goto fail_host_notifiers; } diff --git a/hw/block/fdc.c b/hw/block/fdc.c index fa933cd326..21d18ac2e3 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -61,6 +61,12 @@ } while (0) +/* Anonymous BlockBackend for empty drive */ +static BlockBackend *blk_create_empty_drive(void) +{ + return blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); +} + /********************************************************/ /* qdev floppy bus */ @@ -486,8 +492,7 @@ static void floppy_drive_realize(DeviceState *qdev, Error **errp) } if (!dev->conf.blk) { - /* Anonymous BlockBackend for an empty drive */ - dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); + dev->conf.blk = blk_create_empty_drive(); ret = blk_attach_dev(dev->conf.blk, qdev); assert(ret == 0); @@ -1161,7 +1166,19 @@ static FDrive *get_drv(FDCtrl *fdctrl, int unit) static FDrive *get_cur_drv(FDCtrl *fdctrl) { - return get_drv(fdctrl, fdctrl->cur_drv); + FDrive *cur_drv = get_drv(fdctrl, fdctrl->cur_drv); + + if (!cur_drv->blk) { + /* + * Kludge: empty drive line selected. Create an anonymous + * BlockBackend to avoid NULL deref with various BlockBackend + * API calls within this model (CVE-2021-20196). + * Due to the controller QOM model limitations, we don't + * attach the created to the controller device. + */ + cur_drv->blk = blk_create_empty_drive(); + } + return cur_drv; } /* Status A register : 0x00 (read-only) */ diff --git a/hw/char/escc.c b/hw/char/escc.c index 0fce4f6324..8755d8d34f 100644 --- a/hw/char/escc.c +++ b/hw/char/escc.c @@ -354,6 +354,17 @@ static void escc_reset(DeviceState *d) cs->rregs[j] = 0; cs->wregs[j] = 0; } + + /* + * ...but there is an exception. The "Transmit Interrupts and Transmit + * Buffer Empty Bit" section on page 50 of the ESCC datasheet says of + * the STATUS_TXEMPTY bit in R_STATUS: "After a hardware reset + * (including a hardware reset by software), or a channel reset, this + * bit is set to 1". The Sun PROM checks this bit early on startup and + * gets stuck in an infinite loop if it is not set. + */ + cs->rregs[R_STATUS] |= STATUS_TXEMPTY; + escc_reset_chn(cs); } } @@ -575,6 +586,20 @@ static void escc_mem_write(void *opaque, hwaddr addr, s->wregs[s->reg] = val; break; case W_TXCTRL1: + s->wregs[s->reg] = val; + /* + * The ESCC datasheet states that SPEC_ALLSENT is always set in + * sync mode, and set in async mode when all characters have + * cleared the transmitter. Since writes to SERIAL_DATA use the + * blocking qemu_chr_fe_write_all() function to write each + * character, the guest can never see the state when async data + * is in the process of being transmitted so we can set this bit + * unconditionally regardless of the state of the W_TXCTRL1 mode + * bits. + */ + s->rregs[R_SPEC] |= SPEC_ALLSENT; + escc_update_parameters(s); + break; case W_TXCTRL2: s->wregs[s->reg] = val; escc_update_parameters(s); diff --git a/hw/core/machine.c b/hw/core/machine.c index 26ec54e726..53a99abc56 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -39,6 +39,7 @@ GlobalProperty hw_compat_6_1[] = { { "vhost-user-vsock-device", "seqpacket", "off" }, + { "nvme-ns", "shared", "off" }, }; const size_t hw_compat_6_1_len = G_N_ELEMENTS(hw_compat_6_1); diff --git a/hw/core/numa.c b/hw/core/numa.c index 510d096a88..e6050b2273 100644 --- a/hw/core/numa.c +++ b/hw/core/numa.c @@ -756,6 +756,7 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[]) PCDIMMDeviceInfo *pcdimm_info; VirtioPMEMDeviceInfo *vpi; VirtioMEMDeviceInfo *vmi; + SgxEPCDeviceInfo *se; for (info = info_list; info; info = info->next) { MemoryDeviceInfo *value = info->value; @@ -781,6 +782,12 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[]) node_mem[vmi->node].node_mem += vmi->size; node_mem[vmi->node].node_plugged_mem += vmi->size; break; + case MEMORY_DEVICE_INFO_KIND_SGX_EPC: + se = value->u.sgx_epc.data; + /* TODO: once we support numa, assign to right node */ + node_mem[0].node_mem += se->size; + node_mem[0].node_plugged_mem += se->size; + break; default: g_assert_not_reached(); } diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c index 8cea84f2be..90851e730b 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c @@ -33,7 +33,6 @@ #include "hw/loader.h" #include "hw/qdev-properties.h" #include "qom/object.h" -#include "qapi/error.h" #define TYPE_ISA_VGA "isa-vga" OBJECT_DECLARE_SIMPLE_TYPE(ISAVGAState, ISA_VGA) @@ -62,15 +61,6 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) MemoryRegion *vga_io_memory; const MemoryRegionPortio *vga_ports, *vbe_ports; - /* - * make sure this device is not being added twice, if so - * exit without crashing qemu - */ - if (object_resolve_path_type("", TYPE_ISA_VGA, NULL)) { - error_setg(errp, "at most one %s device is permitted", TYPE_ISA_VGA); - return; - } - s->global_vmstate = true; vga_common_init(s, OBJECT(dev)); s->legacy_address_space = isa_address_space(isadev); diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index a3ad6abd33..a99c6e4fe3 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1337,7 +1337,7 @@ static void build_x86_acpi_pci_hotplug(Aml *table, uint64_t pcihp_addr) aml_append(table, scope); } -static Aml *build_q35_osc_method(void) +static Aml *build_q35_osc_method(bool enable_native_pcie_hotplug) { Aml *if_ctx; Aml *if_ctx2; @@ -1359,8 +1359,10 @@ static Aml *build_q35_osc_method(void) /* * Always allow native PME, AER (no dependencies) * Allow SHPC (PCI bridges can have SHPC controller) + * Disable PCIe Native Hot-plug if ACPI PCI Hot-plug is enabled. */ - aml_append(if_ctx, aml_and(a_ctrl, aml_int(0x1F), a_ctrl)); + aml_append(if_ctx, aml_and(a_ctrl, + aml_int(0x1E | (enable_native_pcie_hotplug ? 0x1 : 0x0)), a_ctrl)); if_ctx2 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1)))); /* Unknown revision */ @@ -1449,7 +1451,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); aml_append(dev, aml_name_decl("_ADR", aml_int(0))); aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid))); - aml_append(dev, build_q35_osc_method()); + aml_append(dev, build_q35_osc_method(!pm->pcihp_bridge_en)); aml_append(sb_scope, dev); if (mcfg_valid) { aml_append(sb_scope, build_q35_dram_controller(&mcfg)); @@ -1565,7 +1567,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, if (pci_bus_is_express(bus)) { aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); - aml_append(dev, build_q35_osc_method()); + + /* Expander bridges do not have ACPI PCI Hot-plug enabled */ + aml_append(dev, build_q35_osc_method(true)); } else { aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); } diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 294499ee20..f584449d8d 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3629,6 +3629,12 @@ static void vtd_init(IntelIOMMUState *s) vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits, x86_iommu->dt_supported); + if (s->scalable_mode) { + vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP; + vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP; + vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP; + } + if (x86_iommu_ir_supported(x86_iommu)) { s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV; if (s->intr_eim == ON_OFF_AUTO_ON) { diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 3d5487fe2c..a6c788049b 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -388,6 +388,8 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8 /* Rsvd field masks for spte */ +#define VTD_SPTE_SNP 0x800ULL + #define VTD_SPTE_PAGE_L1_RSVD_MASK(aw, dt_supported) \ dt_supported ? \ (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | VTD_SL_TM)) : \ diff --git a/hw/i386/microvm-dt.c b/hw/i386/microvm-dt.c index 875ba91963..9c3c4995b4 100644 --- a/hw/i386/microvm-dt.c +++ b/hw/i386/microvm-dt.c @@ -143,6 +143,8 @@ static void dt_add_pcie(MicrovmMachineState *mms) nr_pcie_buses = PCIE_ECAM_SIZE / PCIE_MMCFG_SIZE_MIN; qemu_fdt_setprop_cells(mms->fdt, nodename, "bus-range", 0, nr_pcie_buses - 1); + + g_free(nodename); } static void dt_add_ioapic(MicrovmMachineState *mms, SysBusDevice *dev) @@ -327,12 +329,17 @@ void dt_setup_microvm(MicrovmMachineState *mms) dt_setup_sys_bus(mms); /* add to fw_cfg */ - fprintf(stderr, "%s: add etc/fdt to fw_cfg\n", __func__); + if (debug) { + fprintf(stderr, "%s: add etc/fdt to fw_cfg\n", __func__); + } fw_cfg_add_file(x86ms->fw_cfg, "etc/fdt", mms->fdt, size); if (debug) { fprintf(stderr, "%s: writing microvm.fdt\n", __func__); - g_file_set_contents("microvm.fdt", mms->fdt, size, NULL); + if (!g_file_set_contents("microvm.fdt", mms->fdt, size, NULL)) { + fprintf(stderr, "%s: writing microvm.fdt failed\n", __func__); + return; + } int ret = system("dtc -I dtb -O dts microvm.fdt"); if (ret != 0) { fprintf(stderr, "%s: oops, dtc not installed?\n", __func__); diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 2592a82148..a2ef40ecbc 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -98,6 +98,7 @@ GlobalProperty pc_compat_6_1[] = { { TYPE_X86_CPU, "hv-version-id-build", "0x1bbc" }, { TYPE_X86_CPU, "hv-version-id-major", "0x0006" }, { TYPE_X86_CPU, "hv-version-id-minor", "0x0001" }, + { "ICH9-LPC", "x-keep-pci-slot-hpc", "false" }, }; const size_t pc_compat_6_1_len = G_N_ELEMENTS(pc_compat_6_1); @@ -107,6 +108,7 @@ GlobalProperty pc_compat_6_0[] = { { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" }, { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" }, { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" }, + { "ICH9-LPC", "x-keep-pci-slot-hpc", "true" }, }; const size_t pc_compat_6_0_len = G_N_ELEMENTS(pc_compat_6_0); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 797e09500b..e1e100316d 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -137,6 +137,7 @@ static void pc_q35_init(MachineState *machine) DriveInfo *hd[MAX_SATA_PORTS]; MachineClass *mc = MACHINE_GET_CLASS(machine); bool acpi_pcihp; + bool keep_pci_slot_hpc; /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping @@ -242,8 +243,12 @@ static void pc_q35_init(MachineState *machine) ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, NULL); - if (acpi_pcihp) { - object_register_sugar_prop(TYPE_PCIE_SLOT, "native-hotplug", + keep_pci_slot_hpc = object_property_get_bool(OBJECT(lpc), + "x-keep-pci-slot-hpc", + NULL); + + if (!keep_pci_slot_hpc && acpi_pcihp) { + object_register_sugar_prop(TYPE_PCIE_SLOT, "x-native-hotplug", "false", true); } diff --git a/hw/i386/sgx-epc.c b/hw/i386/sgx-epc.c index 55e2217eae..e508827e78 100644 --- a/hw/i386/sgx-epc.c +++ b/hw/i386/sgx-epc.c @@ -154,6 +154,7 @@ static void sgx_epc_class_init(ObjectClass *oc, void *data) dc->realize = sgx_epc_realize; dc->unrealize = sgx_epc_unrealize; dc->desc = "SGX EPC section"; + dc->user_creatable = false; device_class_set_props(dc, sgx_epc_properties); mdc->get_addr = sgx_epc_md_get_addr; diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c index 11607568b6..8fef3dd8fa 100644 --- a/hw/i386/sgx.c +++ b/hw/i386/sgx.c @@ -21,6 +21,8 @@ #include "qapi/qapi-commands-misc-target.h" #include "exec/address-spaces.h" #include "sysemu/hw_accel.h" +#include "sysemu/reset.h" +#include <sys/ioctl.h> #define SGX_MAX_EPC_SECTIONS 8 #define SGX_CPUID_EPC_INVALID 0x0 @@ -29,6 +31,11 @@ #define SGX_CPUID_EPC_SECTION 0x1 #define SGX_CPUID_EPC_MASK 0xF +#define SGX_MAGIC 0xA4 +#define SGX_IOC_VEPC_REMOVE_ALL _IO(SGX_MAGIC, 0x04) + +#define RETRY_NUM 2 + static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) { return (low & MAKE_64BIT_MASK(12, 20)) + @@ -59,6 +66,46 @@ static uint64_t sgx_calc_host_epc_section_size(void) return size; } +static void sgx_epc_reset(void *opaque) +{ + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); + HostMemoryBackend *hostmem; + SGXEPCDevice *epc; + int failures; + int fd, i, j, r; + static bool warned = false; + + /* + * The second pass is needed to remove SECS pages that could not + * be removed during the first. + */ + for (i = 0; i < RETRY_NUM; i++) { + failures = 0; + for (j = 0; j < pcms->sgx_epc.nr_sections; j++) { + epc = pcms->sgx_epc.sections[j]; + hostmem = MEMORY_BACKEND(epc->hostmem); + fd = memory_region_get_fd(host_memory_backend_get_memory(hostmem)); + + r = ioctl(fd, SGX_IOC_VEPC_REMOVE_ALL); + if (r == -ENOTTY && !warned) { + warned = true; + warn_report("kernel does not support SGX_IOC_VEPC_REMOVE_ALL"); + warn_report("SGX might operate incorrectly in the guest after reset"); + break; + } else if (r > 0) { + /* SECS pages remain */ + failures++; + if (i == 1) { + error_report("cannot reset vEPC section %d", j); + } + } + } + if (!failures) { + break; + } + } +} + SGXInfo *qmp_query_sgx_capabilities(Error **errp) { SGXInfo *info = NULL; @@ -190,4 +237,7 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms) } memory_region_set_size(&sgx_epc->mr, sgx_epc->size); + + /* register the reset callback for sgx epc */ + qemu_register_reset(sgx_epc_reset, NULL); } diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c index 3f24707838..9f5f815db9 100644 --- a/hw/intc/arm_gicv3.c +++ b/hw/intc/arm_gicv3.c @@ -186,7 +186,9 @@ static void gicv3_redist_update_noirqset(GICv3CPUState *cs) * interrupt has reduced in priority and any other interrupt could * now be the new best one). */ - if (!seenbetter && cs->hppi.prio != 0xff && cs->hppi.irq < GIC_INTERNAL) { + if (!seenbetter && cs->hppi.prio != 0xff && + (cs->hppi.irq < GIC_INTERNAL || + cs->hppi.irq >= GICV3_LPI_INTID_START)) { gicv3_full_update_noirqset(cs->gic); } } @@ -354,7 +356,7 @@ static void arm_gicv3_post_load(GICv3State *s) * pending interrupt, but don't set IRQ or FIQ lines. */ for (i = 0; i < s->num_cpu; i++) { - gicv3_redist_update_lpi(&s->cpu[i]); + gicv3_redist_update_lpi_only(&s->cpu[i]); } gicv3_full_update_noirqset(s); /* Repopulate the cache of GICv3CPUState pointers for target CPUs */ @@ -387,17 +389,7 @@ static void arm_gic_realize(DeviceState *dev, Error **errp) return; } - if (s->nb_redist_regions != 1) { - error_setg(errp, "VGICv3 redist region number(%d) not equal to 1", - s->nb_redist_regions); - return; - } - - gicv3_init_irqs_and_mmio(s, gicv3_set_irq, gic_ops, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } + gicv3_init_irqs_and_mmio(s, gicv3_set_irq, gic_ops); gicv3_init_cpuif(s); } diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c index 223db16fec..9884d2e39b 100644 --- a/hw/intc/arm_gicv3_common.c +++ b/hw/intc/arm_gicv3_common.c @@ -250,21 +250,11 @@ static const VMStateDescription vmstate_gicv3 = { }; void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, - const MemoryRegionOps *ops, Error **errp) + const MemoryRegionOps *ops) { SysBusDevice *sbd = SYS_BUS_DEVICE(s); - int rdist_capacity = 0; int i; - - for (i = 0; i < s->nb_redist_regions; i++) { - rdist_capacity += s->redist_region_count[i]; - } - if (rdist_capacity < s->num_cpu) { - error_setg(errp, "Capacity of the redist regions(%d) " - "is less than number of vcpus(%d)", - rdist_capacity, s->num_cpu); - return; - } + int cpuidx; /* For the GIC, also expose incoming GPIO lines for PPIs for each CPU. * GPIO array layout is thus: @@ -293,14 +283,20 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, "gicv3_dist", 0x10000); sysbus_init_mmio(sbd, &s->iomem_dist); - s->iomem_redist = g_new0(MemoryRegion, s->nb_redist_regions); + s->redist_regions = g_new0(GICv3RedistRegion, s->nb_redist_regions); + cpuidx = 0; for (i = 0; i < s->nb_redist_regions; i++) { char *name = g_strdup_printf("gicv3_redist_region[%d]", i); + GICv3RedistRegion *region = &s->redist_regions[i]; + + region->gic = s; + region->cpuidx = cpuidx; + cpuidx += s->redist_region_count[i]; - memory_region_init_io(&s->iomem_redist[i], OBJECT(s), - ops ? &ops[1] : NULL, s, name, + memory_region_init_io(®ion->iomem, OBJECT(s), + ops ? &ops[1] : NULL, region, name, s->redist_region_count[i] * GICV3_REDIST_SIZE); - sysbus_init_mmio(sbd, &s->iomem_redist[i]); + sysbus_init_mmio(sbd, ®ion->iomem); g_free(name); } } @@ -308,7 +304,7 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) { GICv3State *s = ARM_GICV3_COMMON(dev); - int i; + int i, rdist_capacity, cpuidx; /* revision property is actually reserved and currently used only in order * to keep the interface compatible with GICv2 code, avoiding extra @@ -350,12 +346,22 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) return; } + rdist_capacity = 0; + for (i = 0; i < s->nb_redist_regions; i++) { + rdist_capacity += s->redist_region_count[i]; + } + if (rdist_capacity < s->num_cpu) { + error_setg(errp, "Capacity of the redist regions(%d) " + "is less than number of vcpus(%d)", + rdist_capacity, s->num_cpu); + return; + } + s->cpu = g_new0(GICv3CPUState, s->num_cpu); for (i = 0; i < s->num_cpu; i++) { CPUState *cpu = qemu_get_cpu(i); uint64_t cpu_affid; - int last; s->cpu[i].cpu = cpu; s->cpu[i].gic = s; @@ -375,7 +381,6 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) * PLPIS == 0 (physical LPIs not supported) */ cpu_affid = object_property_get_uint(OBJECT(cpu), "mp-affinity", NULL); - last = (i == s->num_cpu - 1); /* The CPU mp-affinity property is in MPIDR register format; squash * the affinity bytes into 32 bits as the GICR_TYPER has them. @@ -384,13 +389,22 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) (cpu_affid & 0xFFFFFF); s->cpu[i].gicr_typer = (cpu_affid << 32) | (1 << 24) | - (i << 8) | - (last << 4); + (i << 8); if (s->lpi_enable) { s->cpu[i].gicr_typer |= GICR_TYPER_PLPIS; } } + + /* + * Now go through and set GICR_TYPER.Last for the final + * redistributor in each region. + */ + cpuidx = 0; + for (i = 0; i < s->nb_redist_regions; i++) { + cpuidx += s->redist_region_count[i]; + s->cpu[cpuidx - 1].gicr_typer |= GICR_TYPER_LAST; + } } static void arm_gicv3_finalize(Object *obj) diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c index 3fe5de8ad7..85fc369e55 100644 --- a/hw/intc/arm_gicv3_cpuif.c +++ b/hw/intc/arm_gicv3_cpuif.c @@ -351,7 +351,8 @@ static uint32_t maintenance_interrupt_state(GICv3CPUState *cs) /* Scan list registers and fill in the U, NP and EOI bits */ eoi_maintenance_interrupt_state(cs, &value); - if (cs->ich_hcr_el2 & (ICH_HCR_EL2_LRENPIE | ICH_HCR_EL2_EOICOUNT_MASK)) { + if ((cs->ich_hcr_el2 & ICH_HCR_EL2_LRENPIE) && + (cs->ich_hcr_el2 & ICH_HCR_EL2_EOICOUNT_MASK)) { value |= ICH_MISR_EL2_LRENP; } @@ -653,7 +654,7 @@ static uint64_t icv_iar_read(CPUARMState *env, const ARMCPRegInfo *ri) if (thisgrp == grp && icv_hppi_can_preempt(cs, lr)) { intid = ich_lr_vintid(lr); - if (intid < INTID_SECURE) { + if (!gicv3_intid_is_special(intid)) { icv_activate_irq(cs, idx, grp); } else { /* Interrupt goes from Pending to Invalid */ @@ -997,7 +998,7 @@ static uint64_t icc_iar0_read(CPUARMState *env, const ARMCPRegInfo *ri) intid = icc_hppir0_value(cs, env); } - if (!(intid >= INTID_SECURE && intid <= INTID_SPURIOUS)) { + if (!gicv3_intid_is_special(intid)) { icc_activate_irq(cs, intid); } @@ -1020,7 +1021,7 @@ static uint64_t icc_iar1_read(CPUARMState *env, const ARMCPRegInfo *ri) intid = icc_hppir1_value(cs, env); } - if (!(intid >= INTID_SECURE && intid <= INTID_SPURIOUS)) { + if (!gicv3_intid_is_special(intid)) { icc_activate_irq(cs, intid); } @@ -1265,8 +1266,7 @@ static void icv_eoir_write(CPUARMState *env, const ARMCPRegInfo *ri, trace_gicv3_icv_eoir_write(ri->crm == 8 ? 0 : 1, gicv3_redist_affid(cs), value); - if (irq >= GICV3_MAXIRQ) { - /* Also catches special interrupt numbers and LPIs */ + if (gicv3_intid_is_special(irq)) { return; } diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c index 84bcbb5f56..c929a9cb5c 100644 --- a/hw/intc/arm_gicv3_its.c +++ b/hw/intc/arm_gicv3_its.c @@ -896,13 +896,14 @@ static bool its_writel(GICv3ITSState *s, hwaddr offset, switch (offset) { case GITS_CTLR: - s->ctlr |= (value & ~(s->ctlr)); - - if (s->ctlr & ITS_CTLR_ENABLED) { + if (value & R_GITS_CTLR_ENABLED_MASK) { + s->ctlr |= ITS_CTLR_ENABLED; extract_table_params(s); extract_cmdq_params(s); s->creadr = 0; process_cmdq(s); + } else { + s->ctlr &= ~ITS_CTLR_ENABLED; } break; case GITS_CBASER: diff --git a/hw/intc/arm_gicv3_its_common.c b/hw/intc/arm_gicv3_its_common.c index 7d7f3882e7..90b85f1e25 100644 --- a/hw/intc/arm_gicv3_its_common.c +++ b/hw/intc/arm_gicv3_its_common.c @@ -50,8 +50,6 @@ static int gicv3_its_post_load(void *opaque, int version_id) static const VMStateDescription vmstate_its = { .name = "arm_gicv3_its", - .version_id = 1, - .minimum_version_id = 1, .pre_save = gicv3_its_pre_save, .post_load = gicv3_its_post_load, .priority = MIG_PRI_GICV3_ITS, diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c index 5c09f00dec..5ec5ff9ef6 100644 --- a/hw/intc/arm_gicv3_kvm.c +++ b/hw/intc/arm_gicv3_kvm.c @@ -787,11 +787,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) return; } - gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } + gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL); for (i = 0; i < s->num_cpu; i++) { ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); @@ -829,7 +825,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) KVM_VGIC_V3_ADDR_TYPE_DIST, s->dev_fd, 0); if (!multiple_redist_region_allowed) { - kvm_arm_register_device(&s->iomem_redist[0], -1, + kvm_arm_register_device(&s->redist_regions[0].iomem, -1, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_REDIST, s->dev_fd, 0); } else { @@ -842,7 +838,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) uint64_t addr_ormask = i | ((uint64_t)s->redist_region_count[i] << 52); - kvm_arm_register_device(&s->iomem_redist[i], -1, + kvm_arm_register_device(&s->redist_regions[i].iomem, -1, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, s->dev_fd, addr_ormask); diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c index 7072bfcbb1..c8ff3eca08 100644 --- a/hw/intc/arm_gicv3_redist.c +++ b/hw/intc/arm_gicv3_redist.c @@ -256,9 +256,10 @@ static MemTxResult gicr_writel(GICv3CPUState *cs, hwaddr offset, cs->gicr_ctlr |= GICR_CTLR_ENABLE_LPIS; /* Check for any pending interr in pending table */ gicv3_redist_update_lpi(cs); - gicv3_redist_update(cs); } else { cs->gicr_ctlr &= ~GICR_CTLR_ENABLE_LPIS; + /* cs->hppi might have been an LPI; recalculate */ + gicv3_redist_update(cs); } } return MEMTX_OK; @@ -425,22 +426,24 @@ static MemTxResult gicr_writell(GICv3CPUState *cs, hwaddr offset, MemTxResult gicv3_redist_read(void *opaque, hwaddr offset, uint64_t *data, unsigned size, MemTxAttrs attrs) { - GICv3State *s = opaque; + GICv3RedistRegion *region = opaque; + GICv3State *s = region->gic; GICv3CPUState *cs; MemTxResult r; int cpuidx; assert((offset & (size - 1)) == 0); - /* This region covers all the redistributor pages; there are - * (for GICv3) two 64K pages per CPU. At the moment they are - * all contiguous (ie in this one region), though we might later - * want to allow splitting of redistributor pages into several - * blocks so we can support more CPUs. + /* + * There are (for GICv3) two 64K redistributor pages per CPU. + * In some cases the redistributor pages for all CPUs are not + * contiguous (eg on the virt board they are split into two + * parts if there are too many CPUs to all fit in the same place + * in the memory map); if so then the GIC has multiple MemoryRegions + * for the redistributors. */ - cpuidx = offset / 0x20000; - offset %= 0x20000; - assert(cpuidx < s->num_cpu); + cpuidx = region->cpuidx + offset / GICV3_REDIST_SIZE; + offset %= GICV3_REDIST_SIZE; cs = &s->cpu[cpuidx]; @@ -482,22 +485,24 @@ MemTxResult gicv3_redist_read(void *opaque, hwaddr offset, uint64_t *data, MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data, unsigned size, MemTxAttrs attrs) { - GICv3State *s = opaque; + GICv3RedistRegion *region = opaque; + GICv3State *s = region->gic; GICv3CPUState *cs; MemTxResult r; int cpuidx; assert((offset & (size - 1)) == 0); - /* This region covers all the redistributor pages; there are - * (for GICv3) two 64K pages per CPU. At the moment they are - * all contiguous (ie in this one region), though we might later - * want to allow splitting of redistributor pages into several - * blocks so we can support more CPUs. + /* + * There are (for GICv3) two 64K redistributor pages per CPU. + * In some cases the redistributor pages for all CPUs are not + * contiguous (eg on the virt board they are split into two + * parts if there are too many CPUs to all fit in the same place + * in the memory map); if so then the GIC has multiple MemoryRegions + * for the redistributors. */ - cpuidx = offset / 0x20000; - offset %= 0x20000; - assert(cpuidx < s->num_cpu); + cpuidx = region->cpuidx + offset / GICV3_REDIST_SIZE; + offset %= GICV3_REDIST_SIZE; cs = &s->cpu[cpuidx]; @@ -567,7 +572,7 @@ static void gicv3_redist_check_lpi_priority(GICv3CPUState *cs, int irq) } } -void gicv3_redist_update_lpi(GICv3CPUState *cs) +void gicv3_redist_update_lpi_only(GICv3CPUState *cs) { /* * This function scans the LPI pending table and for each pending @@ -610,6 +615,12 @@ void gicv3_redist_update_lpi(GICv3CPUState *cs) } } +void gicv3_redist_update_lpi(GICv3CPUState *cs) +{ + gicv3_redist_update_lpi_only(cs); + gicv3_redist_update(cs); +} + void gicv3_redist_lpi_pending(GICv3CPUState *cs, int irq, int level) { /* @@ -647,6 +658,7 @@ void gicv3_redist_lpi_pending(GICv3CPUState *cs, int irq, int level) */ if (level) { gicv3_redist_check_lpi_priority(cs, irq); + gicv3_redist_update(cs); } else { if (irq == cs->hpplpi.irq) { gicv3_redist_update_lpi(cs); @@ -669,8 +681,6 @@ void gicv3_redist_process_lpi(GICv3CPUState *cs, int irq, int level) /* set/clear the pending bit for this irq */ gicv3_redist_lpi_pending(cs, irq, level); - - gicv3_redist_update(cs); } void gicv3_redist_set_irq(GICv3CPUState *cs, int irq, int level) diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h index a0369dace7..b9c37453b0 100644 --- a/hw/intc/gicv3_internal.h +++ b/hw/intc/gicv3_internal.h @@ -412,6 +412,19 @@ FIELD(MAPC, RDBASE, 16, 32) /* Functions internal to the emulated GICv3 */ /** + * gicv3_intid_is_special: + * @intid: interrupt ID + * + * Return true if @intid is a special interrupt ID (1020 to + * 1023 inclusive). This corresponds to the GIC spec pseudocode + * IsSpecial() function. + */ +static inline bool gicv3_intid_is_special(int intid) +{ + return intid >= INTID_SECURE && intid <= INTID_SPURIOUS; +} + +/** * gicv3_redist_update: * @cs: GICv3CPUState for this redistributor * @@ -463,7 +476,24 @@ void gicv3_dist_set_irq(GICv3State *s, int irq, int level); void gicv3_redist_set_irq(GICv3CPUState *cs, int irq, int level); void gicv3_redist_process_lpi(GICv3CPUState *cs, int irq, int level); void gicv3_redist_lpi_pending(GICv3CPUState *cs, int irq, int level); +/** + * gicv3_redist_update_lpi: + * @cs: GICv3CPUState + * + * Scan the LPI pending table and recalculate the highest priority + * pending LPI and also the overall highest priority pending interrupt. + */ void gicv3_redist_update_lpi(GICv3CPUState *cs); +/** + * gicv3_redist_update_lpi_only: + * @cs: GICv3CPUState + * + * Scan the LPI pending table and recalculate cs->hpplpi only, + * without calling gicv3_redist_update() to recalculate the overall + * highest priority pending interrupt. This should be called after + * an incoming migration has loaded new state. + */ +void gicv3_redist_update_lpi_only(GICv3CPUState *cs); void gicv3_redist_send_sgi(GICv3CPUState *cs, int grp, int irq, bool ns); void gicv3_init_cpuif(GICv3State *s); diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index a3a2560301..48b913aba6 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -181,7 +181,21 @@ static void pc_dimm_realize(DeviceState *dev, Error **errp) PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); MachineState *ms = MACHINE(qdev_get_machine()); - int nb_numa_nodes = ms->numa_state->num_nodes; + + if (ms->numa_state) { + int nb_numa_nodes = ms->numa_state->num_nodes; + + if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) || + (!nb_numa_nodes && dimm->node)) { + error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %" + PRIu32 "' which exceeds the number of numa nodes: %d", + dimm->node, nb_numa_nodes ? nb_numa_nodes : 1); + return; + } + } else if (dimm->node > 0) { + error_setg(errp, "machine doesn't support NUMA"); + return; + } if (!dimm->hostmem) { error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set"); @@ -191,13 +205,6 @@ static void pc_dimm_realize(DeviceState *dev, Error **errp) object_get_canonical_path_component(OBJECT(dimm->hostmem))); return; } - if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) || - (!nb_numa_nodes && dimm->node)) { - error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %" - PRIu32 "' which exceeds the number of numa nodes: %d", - dimm->node, nb_numa_nodes ? nb_numa_nodes : 1); - return; - } if (ddc->realize) { ddc->realize(dimm, errp); diff --git a/hw/mips/bootloader.c b/hw/mips/bootloader.c index 6ec8314490..99991f8b2b 100644 --- a/hw/mips/bootloader.c +++ b/hw/mips/bootloader.c @@ -182,7 +182,11 @@ void bl_gen_write_ulong(uint32_t **p, target_ulong addr, target_ulong val) { bl_gen_load_ulong(p, BL_REG_K0, val); bl_gen_load_ulong(p, BL_REG_K1, addr); - bl_gen_sd(p, BL_REG_K0, BL_REG_K1, 0x0); + if (bootcpu_supports_isa(ISA_MIPS3)) { + bl_gen_sd(p, BL_REG_K0, BL_REG_K1, 0x0); + } else { + bl_gen_sw(p, BL_REG_K0, BL_REG_K1, 0x0); + } } void bl_gen_write_u32(uint32_t **p, target_ulong addr, uint32_t val) diff --git a/hw/mips/boston.c b/hw/mips/boston.c index 0e3cca5511..59ca08b93a 100644 --- a/hw/mips/boston.c +++ b/hw/mips/boston.c @@ -777,14 +777,15 @@ static void boston_mach_init(MachineState *machine) exit(1); } } else if (machine->kernel_filename) { - uint64_t kernel_entry, kernel_high, kernel_size; + uint64_t kernel_entry, kernel_high; + ssize_t kernel_size; kernel_size = load_elf(machine->kernel_filename, NULL, cpu_mips_kseg0_to_phys, NULL, &kernel_entry, NULL, &kernel_high, NULL, 0, EM_MIPS, 1, 0); - if (kernel_size) { + if (kernel_size > 0) { int dt_size; g_autofree const void *dtb_file_data, *dtb_load_data; hwaddr dtb_paddr = QEMU_ALIGN_UP(kernel_high, 64 * KiB); diff --git a/hw/misc/macio/pmu.c b/hw/misc/macio/pmu.c index 4ad4f50e08..eb39c64694 100644 --- a/hw/misc/macio/pmu.c +++ b/hw/misc/macio/pmu.c @@ -718,6 +718,7 @@ static const VMStateDescription vmstate_pmu = { }, .subsections = (const VMStateDescription * []) { &vmstate_pmu_adb, + NULL } }; diff --git a/hw/misc/sifive_u_otp.c b/hw/misc/sifive_u_otp.c index 18aa0bd55d..52fdb750c0 100644 --- a/hw/misc/sifive_u_otp.c +++ b/hw/misc/sifive_u_otp.c @@ -209,7 +209,14 @@ static void sifive_u_otp_realize(DeviceState *dev, Error **errp) TYPE_SIFIVE_U_OTP, SIFIVE_U_OTP_REG_SIZE); sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mmio); - dinfo = drive_get_next(IF_NONE); + dinfo = drive_get_next(IF_PFLASH); + if (!dinfo) { + dinfo = drive_get_next(IF_NONE); + if (dinfo) { + warn_report("using \"-drive if=none\" for the OTP is deprecated, " + "use \"-drive if=pflash\" instead."); + } + } if (dinfo) { int ret; uint64_t perm; @@ -235,14 +242,10 @@ static void sifive_u_otp_realize(DeviceState *dev, Error **errp) if (blk_pread(s->blk, 0, s->fuse, filesize) != filesize) { error_setg(errp, "failed to read the initial flash content"); + return; } } } -} - -static void sifive_u_otp_reset(DeviceState *dev) -{ - SiFiveUOTPState *s = SIFIVE_U_OTP(dev); /* Initialize all fuses' initial value to 0xFFs */ memset(s->fuse, 0xff, sizeof(s->fuse)); @@ -259,13 +262,15 @@ static void sifive_u_otp_reset(DeviceState *dev) serial_data = s->serial; if (blk_pwrite(s->blk, index * SIFIVE_U_OTP_FUSE_WORD, &serial_data, SIFIVE_U_OTP_FUSE_WORD, 0) < 0) { - error_report("write error index<%d>", index); + error_setg(errp, "failed to write index<%d>", index); + return; } serial_data = ~(s->serial); if (blk_pwrite(s->blk, (index + 1) * SIFIVE_U_OTP_FUSE_WORD, &serial_data, SIFIVE_U_OTP_FUSE_WORD, 0) < 0) { - error_report("write error index<%d>", index + 1); + error_setg(errp, "failed to write index<%d>", index + 1); + return; } } @@ -279,7 +284,6 @@ static void sifive_u_otp_class_init(ObjectClass *klass, void *data) device_class_set_props(dc, sifive_u_otp_properties); dc->realize = sifive_u_otp_realize; - dc->reset = sifive_u_otp_reset; } static const TypeInfo sifive_u_otp_info = { diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 0d888f29a6..30379d2ca4 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -232,10 +232,10 @@ fail: } static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index, - int last_index) + int vq_index_end) { net->dev.vq_index = vq_index; - net->dev.last_index = last_index; + net->dev.vq_index_end = vq_index_end; } static int vhost_net_start_one(struct vhost_net *net, @@ -326,11 +326,11 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, VirtIONet *n = VIRTIO_NET(dev); int nvhosts = data_queue_pairs + cvq; struct vhost_net *net; - int r, e, i, last_index = data_queue_pairs * 2; + int r, e, i, index_end = data_queue_pairs * 2; NetClientState *peer; - if (!cvq) { - last_index -= 1; + if (cvq) { + index_end += 1; } if (!k->set_guest_notifiers) { @@ -347,7 +347,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, } net = get_vhost_net(peer); - vhost_net_set_vq_index(net, i * 2, last_index); + vhost_net_set_vq_index(net, i * 2, index_end); /* Suppress the masking guest notifiers on vhost user * because vhost user doesn't interrupt masking/unmasking diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 41f796a247..f65af4e9ef 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -1441,6 +1441,7 @@ static void vmxnet3_activate_device(VMXNET3State *s) vmxnet3_setup_rx_filtering(s); /* Cache fields from shared memory */ s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu); + assert(VMXNET3_MIN_MTU <= s->mtu && s->mtu < VMXNET3_MAX_MTU); VMW_CFPRN("MTU is %u", s->mtu); s->max_rx_frags = @@ -1486,6 +1487,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) /* Read rings memory locations for TX queues */ pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.txRingBasePA); size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.txRingSize); + if (size > VMXNET3_TX_RING_MAX_SIZE) { + size = VMXNET3_TX_RING_MAX_SIZE; + } vmxnet3_ring_init(d, &s->txq_descr[i].tx_ring, pa, size, sizeof(struct Vmxnet3_TxDesc), false); @@ -1496,6 +1500,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) /* TXC ring */ pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.compRingBasePA); size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.compRingSize); + if (size > VMXNET3_TC_RING_MAX_SIZE) { + size = VMXNET3_TC_RING_MAX_SIZE; + } vmxnet3_ring_init(d, &s->txq_descr[i].comp_ring, pa, size, sizeof(struct Vmxnet3_TxCompDesc), true); VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring); @@ -1537,6 +1544,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) /* RX rings */ pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.rxRingBasePA[j]); size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.rxRingSize[j]); + if (size > VMXNET3_RX_RING_MAX_SIZE) { + size = VMXNET3_RX_RING_MAX_SIZE; + } vmxnet3_ring_init(d, &s->rxq_descr[i].rx_ring[j], pa, size, sizeof(struct Vmxnet3_RxDesc), false); VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d", @@ -1546,6 +1556,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) /* RXC ring */ pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.compRingBasePA); size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.compRingSize); + if (size > VMXNET3_RC_RING_MAX_SIZE) { + size = VMXNET3_RC_RING_MAX_SIZE; + } vmxnet3_ring_init(d, &s->rxq_descr[i].comp_ring, pa, size, sizeof(struct Vmxnet3_RxCompDesc), true); VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size); diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 6a571d18cf..5f573c417b 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4168,6 +4168,11 @@ static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, int i = 0; uint32_t nsid; + if (off >= sizeof(nslist)) { + trace_pci_nvme_err_invalid_log_page_offset(off, sizeof(nslist)); + return NVME_INVALID_FIELD | NVME_DNR; + } + memset(nslist, 0x0, sizeof(nslist)); trans_len = MIN(sizeof(nslist) - off, buf_len); diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index b7cf1494e7..8b5f98c761 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -465,12 +465,6 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) "linked to an nvme-subsys device"); return; } - - if (ns->params.shared) { - error_setg(errp, "shared requires that the nvme device is " - "linked to an nvme-subsys device"); - return; - } } else { /* * If this namespace belongs to a subsystem (through a link on the @@ -532,7 +526,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) static Property nvme_ns_props[] = { DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf), DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false), - DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, false), + DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, true), DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0), DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid), DEFINE_PROP_UINT64("eui64", NvmeNamespace, params.eui64, 0), diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c index 495dcff5eb..fb58d63950 100644 --- a/hw/nvme/subsys.c +++ b/hw/nvme/subsys.c @@ -14,7 +14,7 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp) { NvmeSubsystem *subsys = n->subsys; - int cntlid; + int cntlid, nsid; for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) { if (!subsys->ctrls[cntlid]) { @@ -29,12 +29,20 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp) subsys->ctrls[cntlid] = n; + for (nsid = 1; nsid < ARRAY_SIZE(subsys->namespaces); nsid++) { + NvmeNamespace *ns = subsys->namespaces[nsid]; + if (ns && ns->params.shared && !ns->params.detached) { + nvme_attach_ns(n, ns); + } + } + return cntlid; } void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n) { subsys->ctrls[n->cntlid] = NULL; + n->cntlid = -1; } static void nvme_subsys_setup(NvmeSubsystem *subsys) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 4a84e478ce..e5993c1ef5 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1380,6 +1380,9 @@ static void pci_update_mappings(PCIDevice *d) continue; new_addr = pci_bar_address(d, i, r->type, r->size); + if (!d->has_power) { + new_addr = PCI_BAR_UNMAPPED; + } /* This bar isn't changed */ if (new_addr == r->addr) @@ -1464,8 +1467,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int if (range_covers_byte(addr, l, PCI_COMMAND)) { pci_update_irq_disabled(d, was_irq_disabled); memory_region_set_enabled(&d->bus_master_enable_region, - pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER); + (pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER) && d->has_power); } msi_write_config(d, addr, val_in, l); @@ -2182,6 +2185,8 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) pci_qdev_unrealize(DEVICE(pci_dev)); return; } + + pci_set_power(pci_dev, true); } PCIDevice *pci_new_multifunction(int devfn, bool multifunction, @@ -2853,6 +2858,22 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector) return msg; } +void pci_set_power(PCIDevice *d, bool state) +{ + if (d->has_power == state) { + return; + } + + d->has_power = state; + pci_update_mappings(d); + memory_region_set_enabled(&d->bus_master_enable_region, + (pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER) && d->has_power); + if (!d->has_power) { + pci_device_reset(d); + } +} + static const TypeInfo pci_device_type_info = { .name = TYPE_PCI_DEVICE, .parent = TYPE_DEVICE, diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index cf02f0d6a5..7beafd40a8 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -74,7 +74,8 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, /* non-zero functions are only exposed when function 0 is present, * allowing direct removal of unexposed functions. */ - if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) { + if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || + !pci_dev->has_power) { return; } @@ -97,7 +98,8 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr, /* non-zero functions are only exposed when function 0 is present, * allowing direct removal of unexposed functions. */ - if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) { + if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || + !pci_dev->has_power) { return ~0x0; } diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 914a9bf3d1..d7d73a31e4 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -366,6 +366,29 @@ static void hotplug_event_clear(PCIDevice *dev) } } +static void pcie_set_power_device(PCIBus *bus, PCIDevice *dev, void *opaque) +{ + bool *power = opaque; + + pci_set_power(dev, *power); +} + +static void pcie_cap_update_power(PCIDevice *hotplug_dev) +{ + uint8_t *exp_cap = hotplug_dev->config + hotplug_dev->exp.exp_cap; + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(hotplug_dev)); + uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP); + uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); + bool power = true; + + if (sltcap & PCI_EXP_SLTCAP_PCP) { + power = (sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_ON; + } + + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), + pcie_set_power_device, &power); +} + /* * A PCI Express Hot-Plug Event has occurred, so update slot status register * and notify OS of the event if necessary. @@ -434,6 +457,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_DLLLA); } + pcie_cap_update_power(hotplug_pdev); return; } @@ -451,6 +475,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, } pcie_cap_slot_event(hotplug_pdev, PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP); + pcie_cap_update_power(hotplug_pdev); } } @@ -472,6 +497,25 @@ static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque) object_unparent(OBJECT(dev)); } +static void pcie_cap_slot_do_unplug(PCIDevice *dev) +{ + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); + uint8_t *exp_cap = dev->config + dev->exp.exp_cap; + uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP); + + pci_for_each_device_under_bus(sec_bus, pcie_unplug_device, NULL); + + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS); + if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || + (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + } + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDC); +} + void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -481,6 +525,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); + uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); /* Check if hot-unplug is disabled on the slot */ if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) { @@ -496,7 +541,15 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, return; } + if ((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_BLINK) { + error_setg(errp, "Hot-unplug failed: " + "guest is busy (power indicator blinking)"); + return; + } + dev->pending_deleted_event = true; + dev->pending_deleted_expires_ms = + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 5000; /* 5 secs */ /* In case user cancel the operation of multi-function hot-add, * remove the function that is unexposed to guest individually, @@ -509,6 +562,16 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, return; } + if (((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF) && + ((sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_OFF)) { + /* slot is powered off -> unplug without round-trip to the guest */ + pcie_cap_slot_do_unplug(hotplug_pdev); + hotplug_event_notify(hotplug_pdev); + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_ABP); + return; + } + pcie_cap_slot_push_attention_button(hotplug_pdev); } @@ -625,6 +688,7 @@ void pcie_cap_slot_reset(PCIDevice *dev) PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_ABP); + pcie_cap_update_power(dev); hotplug_event_update_event_status(dev); } @@ -643,7 +707,6 @@ void pcie_cap_slot_write_config(PCIDevice *dev, uint32_t pos = dev->exp.exp_cap; uint8_t *exp_cap = dev->config + pos; uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA); - uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP); if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) { /* @@ -693,18 +756,9 @@ void pcie_cap_slot_write_config(PCIDevice *dev, (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF && (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) || (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) { - PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); - pci_for_each_device_under_bus(sec_bus, pcie_unplug_device, NULL); - pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDS); - if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || - (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { - pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, - PCI_EXP_LNKSTA_DLLLA); - } - pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDC); + pcie_cap_slot_do_unplug(dev); } + pcie_cap_update_power(dev); hotplug_event_notify(dev); @@ -731,6 +785,7 @@ int pcie_cap_slot_post_load(void *opaque, int version_id) { PCIDevice *dev = opaque; hotplug_event_update_event_status(dev); + pcie_cap_update_power(dev); return 0; } @@ -874,8 +929,8 @@ void pcie_add_capability(PCIDevice *dev, uint16_t offset, uint16_t size) { assert(offset >= PCI_CONFIG_SPACE_SIZE); - assert(offset < offset + size); - assert(offset + size <= PCIE_CONFIG_SPACE_SIZE); + assert(offset < (uint16_t)(offset + size)); + assert((uint16_t)(offset + size) <= PCIE_CONFIG_SPACE_SIZE); assert(size >= 8); assert(pci_is_express(dev)); diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c index da850e8dde..e95c1e5519 100644 --- a/hw/pci/pcie_port.c +++ b/hw/pci/pcie_port.c @@ -148,7 +148,7 @@ static Property pcie_slot_props[] = { DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0), DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0), DEFINE_PROP_BOOL("hotplug", PCIESlot, hotplug, true), - DEFINE_PROP_BOOL("native-hotplug", PCIESlot, native_hotplug, true), + DEFINE_PROP_BOOL("x-native-hotplug", PCIESlot, native_hotplug, true), DEFINE_PROP_END_OF_LIST() }; diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 163c90388a..3b5fd749be 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1414,7 +1414,7 @@ void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex, kvmppc_write_hpte(ptex, pte0, pte1); } else { if (pte0 & HPTE64_V_VALID) { - stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1); + stq_p(spapr->htab + offset + HPTE64_DW1, pte1); /* * When setting valid, we write PTE1 first. This ensures * proper synchronization with the reading code in @@ -1430,7 +1430,7 @@ void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex, * ppc_hash64_pteg_search() */ smp_wmb(); - stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1); + stq_p(spapr->htab + offset + HPTE64_DW1, pte1); } } } @@ -1438,7 +1438,7 @@ void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex, static void spapr_hpte_set_c(PPCVirtualHypervisor *vhyp, hwaddr ptex, uint64_t pte1) { - hwaddr offset = ptex * HASH_PTE_SIZE_64 + 15; + hwaddr offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C; SpaprMachineState *spapr = SPAPR_MACHINE(vhyp); if (!spapr->htab) { @@ -1454,7 +1454,7 @@ static void spapr_hpte_set_c(PPCVirtualHypervisor *vhyp, hwaddr ptex, static void spapr_hpte_set_r(PPCVirtualHypervisor *vhyp, hwaddr ptex, uint64_t pte1) { - hwaddr offset = ptex * HASH_PTE_SIZE_64 + 14; + hwaddr offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R; SpaprMachineState *spapr = SPAPR_MACHINE(vhyp); if (!spapr->htab) { diff --git a/hw/ppc/spapr_softmmu.c b/hw/ppc/spapr_softmmu.c index f8924270ef..4ee03c83e4 100644 --- a/hw/ppc/spapr_softmmu.c +++ b/hw/ppc/spapr_softmmu.c @@ -426,7 +426,7 @@ static void new_hpte_store(void *htab, uint64_t pteg, int slot, addr += slot * HASH_PTE_SIZE_64; stq_p(addr, pte0); - stq_p(addr + HASH_PTE_SIZE_64 / 2, pte1); + stq_p(addr + HPTE64_DW1, pte1); } static int rehash_hpte(PowerPCCPU *cpu, diff --git a/hw/rtc/meson.build b/hw/rtc/meson.build index 7cecdee5dd..8fd8d8f9a7 100644 --- a/hw/rtc/meson.build +++ b/hw/rtc/meson.build @@ -2,7 +2,7 @@ softmmu_ss.add(when: 'CONFIG_DS1338', if_true: files('ds1338.c')) softmmu_ss.add(when: 'CONFIG_M41T80', if_true: files('m41t80.c')) softmmu_ss.add(when: 'CONFIG_M48T59', if_true: files('m48t59.c')) -softmmu_ss.add(when: 'CONFIG_PL031', if_true: files('pl031.c')) +specific_ss.add(when: 'CONFIG_PL031', if_true: files('pl031.c')) softmmu_ss.add(when: 'CONFIG_TWL92230', if_true: files('twl92230.c')) softmmu_ss.add(when: ['CONFIG_ISA_BUS', 'CONFIG_M48T59'], if_true: files('m48t59-isa.c')) softmmu_ss.add(when: 'CONFIG_XLNX_ZYNQMP', if_true: files('xlnx-zynqmp-rtc.c')) diff --git a/hw/rtc/pl031.c b/hw/rtc/pl031.c index 2bbb2062ac..e7ced90b02 100644 --- a/hw/rtc/pl031.c +++ b/hw/rtc/pl031.c @@ -24,6 +24,7 @@ #include "qemu/log.h" #include "qemu/module.h" #include "trace.h" +#include "qapi/qapi-events-misc-target.h" #define RTC_DR 0x00 /* Data read register */ #define RTC_MR 0x04 /* Match register */ @@ -136,10 +137,17 @@ static void pl031_write(void * opaque, hwaddr offset, trace_pl031_write(offset, value); switch (offset) { - case RTC_LR: + case RTC_LR: { + struct tm tm; + s->tick_offset += value - pl031_get_count(s); + + qemu_get_timedate(&tm, s->tick_offset); + qapi_event_send_rtc_change(qemu_timedate_diff(&tm)); + pl031_set_alarm(s); break; + } case RTC_MR: s->mr = value; pl031_set_alarm(s); diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c index 84f935b549..58d0edbd56 100644 --- a/hw/scsi/esp.c +++ b/hw/scsi/esp.c @@ -894,6 +894,7 @@ void esp_hard_reset(ESPState *s) memset(s->wregs, 0, ESP_REGS); s->tchi_written = 0; s->ti_size = 0; + s->async_len = 0; fifo8_reset(&s->fifo); fifo8_reset(&s->cmdfifo); s->dma = 0; diff --git a/hw/vfio/common.c b/hw/vfio/common.c index dd387b0d39..080046e3f5 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -551,6 +551,7 @@ static int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova, QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) { QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); return 0; } } @@ -2239,6 +2240,7 @@ static void vfio_disconnect_container(VFIOGroup *group) if (QLIST_EMPTY(&container->group_list)) { VFIOAddressSpace *space = container->space; VFIOGuestIOMMU *giommu, *tmp; + VFIOHostDMAWindow *hostwin, *next; QLIST_REMOVE(container, next); @@ -2249,6 +2251,12 @@ static void vfio_disconnect_container(VFIOGroup *group) g_free(giommu); } + QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, + next) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); + } + trace_vfio_disconnect_container(container->fd); close(container->fd); g_free(container); diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 0d8051426c..bcaf00e09f 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -645,7 +645,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); } - if (dev->vq_index + dev->nvqs != dev->last_index) { + if (dev->vq_index + dev->nvqs != dev->vq_index_end) { return 0; } diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index c6962fcbfe..9a4f491b54 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -231,7 +231,7 @@ static void balloon_stats_poll_cb(void *opaque) return; } - virtqueue_push(s->svq, s->stats_vq_elem, s->stats_vq_offset); + virtqueue_push(s->svq, s->stats_vq_elem, 0); virtio_notify(vdev, s->svq); g_free(s->stats_vq_elem); s->stats_vq_elem = NULL; @@ -438,7 +438,7 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) memory_region_unref(section.mr); } - virtqueue_push(vq, elem, offset); + virtqueue_push(vq, elem, 0); virtio_notify(vdev, vq); g_free(elem); virtio_balloon_pbp_free(&pbp); @@ -510,6 +510,7 @@ static bool get_free_page_hints(VirtIOBalloon *dev) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtQueue *vq = dev->free_page_vq; bool ret = true; + int i; while (dev->block_iothread) { qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock); @@ -544,12 +545,14 @@ static bool get_free_page_hints(VirtIOBalloon *dev) } if (elem->in_num && dev->free_page_hint_status == FREE_PAGE_HINT_S_START) { - qemu_guest_free_page_hint(elem->in_sg[0].iov_base, - elem->in_sg[0].iov_len); + for (i = 0; i < elem->in_num; i++) { + qemu_guest_free_page_hint(elem->in_sg[i].iov_base, + elem->in_sg[i].iov_len); + } } out: - virtqueue_push(vq, elem, 1); + virtqueue_push(vq, elem, 0); g_free(elem); return ret; } diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index 7b3ebca178..72da12fea5 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -817,6 +817,17 @@ static char *virtio_mmio_bus_get_dev_path(DeviceState *dev) return path; } +static void virtio_mmio_vmstate_change(DeviceState *d, bool running) +{ + VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); + + if (running) { + virtio_mmio_start_ioeventfd(proxy); + } else { + virtio_mmio_stop_ioeventfd(proxy); + } +} + static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data) { BusClass *bus_class = BUS_CLASS(klass); @@ -832,6 +843,7 @@ static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data) k->ioeventfd_enabled = virtio_mmio_ioeventfd_enabled; k->ioeventfd_assign = virtio_mmio_ioeventfd_assign; k->pre_plugged = virtio_mmio_pre_plugged; + k->vmstate_change = virtio_mmio_vmstate_change; k->has_variable_vring_alignment = true; bus_class->max_dev = 1; bus_class->get_dev_path = virtio_mmio_bus_get_dev_path; diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index cc69a9b881..ea7c079fb0 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -247,13 +247,10 @@ static void vring_packed_event_read(VirtIODevice *vdev, hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap); hwaddr off_flags = offsetof(VRingPackedDescEvent, flags); - address_space_read_cached(cache, off_flags, &e->flags, - sizeof(e->flags)); + e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags); /* Make sure flags is seen before off_wrap */ smp_rmb(); - address_space_read_cached(cache, off_off, &e->off_wrap, - sizeof(e->off_wrap)); - virtio_tswap16s(vdev, &e->off_wrap); + e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off); virtio_tswap16s(vdev, &e->flags); } @@ -263,8 +260,7 @@ static void vring_packed_off_wrap_write(VirtIODevice *vdev, { hwaddr off = offsetof(VRingPackedDescEvent, off_wrap); - virtio_tswap16s(vdev, &off_wrap); - address_space_write_cached(cache, off, &off_wrap, sizeof(off_wrap)); + virtio_stw_phys_cached(vdev, cache, off, off_wrap); address_space_cache_invalidate(cache, off, sizeof(off_wrap)); } @@ -273,8 +269,7 @@ static void vring_packed_flags_write(VirtIODevice *vdev, { hwaddr off = offsetof(VRingPackedDescEvent, flags); - virtio_tswap16s(vdev, &flags); - address_space_write_cached(cache, off, &flags, sizeof(flags)); + virtio_stw_phys_cached(vdev, cache, off, flags); address_space_cache_invalidate(cache, off, sizeof(flags)); } @@ -507,11 +502,9 @@ static void vring_packed_desc_read_flags(VirtIODevice *vdev, MemoryRegionCache *cache, int i) { - address_space_read_cached(cache, - i * sizeof(VRingPackedDesc) + - offsetof(VRingPackedDesc, flags), - flags, sizeof(*flags)); - virtio_tswap16s(vdev, flags); + hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags); + + *flags = virtio_lduw_phys_cached(vdev, cache, off); } static void vring_packed_desc_read(VirtIODevice *vdev, @@ -564,8 +557,7 @@ static void vring_packed_desc_write_flags(VirtIODevice *vdev, { hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags); - virtio_tswap16s(vdev, &desc->flags); - address_space_write_cached(cache, off, &desc->flags, sizeof(desc->flags)); + virtio_stw_phys_cached(vdev, cache, off, desc->flags); address_space_cache_invalidate(cache, off, sizeof(desc->flags)); } diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 6bb2a0f7ec..35d8e93976 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -503,6 +503,7 @@ struct TranslationBlock { #define CF_USE_ICOUNT 0x00020000 #define CF_INVALID 0x00040000 /* TB is stale. Set with @jmp_lock held */ #define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */ +#define CF_NOIRQ 0x00100000 /* Generate an uninterruptible TB */ #define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */ #define CF_CLUSTER_SHIFT 24 diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h index 610cba58fe..c57204ddad 100644 --- a/include/exec/gen-icount.h +++ b/include/exec/gen-icount.h @@ -21,7 +21,6 @@ static inline void gen_tb_start(const TranslationBlock *tb) { TCGv_i32 count; - tcg_ctx->exitreq_label = gen_new_label(); if (tb_cflags(tb) & CF_USE_ICOUNT) { count = tcg_temp_local_new_i32(); } else { @@ -42,7 +41,19 @@ static inline void gen_tb_start(const TranslationBlock *tb) icount_start_insn = tcg_last_op(); } - tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, tcg_ctx->exitreq_label); + /* + * Emit the check against icount_decr.u32 to see if we should exit + * unless we suppress the check with CF_NOIRQ. If we are using + * icount and have suppressed interruption the higher level code + * should have ensured we don't run more instructions than the + * budget. + */ + if (tb_cflags(tb) & CF_NOIRQ) { + tcg_ctx->exitreq_label = NULL; + } else { + tcg_ctx->exitreq_label = gen_new_label(); + tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, tcg_ctx->exitreq_label); + } if (tb_cflags(tb) & CF_USE_ICOUNT) { tcg_gen_st16_i32(count, cpu_env, @@ -74,8 +85,10 @@ static inline void gen_tb_end(const TranslationBlock *tb, int num_insns) tcgv_i32_arg(tcg_constant_i32(num_insns))); } - gen_set_label(tcg_ctx->exitreq_label); - tcg_gen_exit_tb(tb, TB_EXIT_REQUESTED); + if (tcg_ctx->exitreq_label) { + gen_set_label(tcg_ctx->exitreq_label); + tcg_gen_exit_tb(tb, TB_EXIT_REQUESTED); + } } #endif diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h index f04f1791bd..7ca92843c6 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -56,6 +56,7 @@ typedef struct ICH9LPCPMRegs { AcpiCpuHotplug gpe_cpu; CPUHotplugState cpuhp_state; + bool keep_pci_slot_hpc; bool use_acpi_hotplug_bridge; AcpiPciHpState acpi_pci_hotplug; MemHotplugState acpi_memory_hotplug; diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h index aa4f0d6770..fc38e4b7dc 100644 --- a/include/hw/intc/arm_gicv3_common.h +++ b/include/hw/intc/arm_gicv3_common.h @@ -215,13 +215,23 @@ struct GICv3CPUState { bool seenbetter; }; +/* + * The redistributor pages might be split into more than one region + * on some machine types if there are many CPUs. + */ +typedef struct GICv3RedistRegion { + GICv3State *gic; + MemoryRegion iomem; + uint32_t cpuidx; /* index of first CPU this region covers */ +} GICv3RedistRegion; + struct GICv3State { /*< private >*/ SysBusDevice parent_obj; /*< public >*/ MemoryRegion iomem_dist; /* Distributor */ - MemoryRegion *iomem_redist; /* Redistributor Regions */ + GICv3RedistRegion *redist_regions; /* Redistributor Regions */ uint32_t *redist_region_count; /* redistributor count within each region */ uint32_t nb_redist_regions; /* number of redist regions */ @@ -306,6 +316,6 @@ struct ARMGICv3CommonClass { }; void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, - const MemoryRegionOps *ops, Error **errp); + const MemoryRegionOps *ops); #endif diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 5c4016b995..e7cdf2d5ec 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -268,6 +268,7 @@ typedef struct PCIReqIDCache PCIReqIDCache; struct PCIDevice { DeviceState qdev; bool partially_hotplugged; + bool has_power; /* PCI config space */ uint8_t *config; @@ -908,5 +909,6 @@ extern const VMStateDescription vmstate_pci_device; } MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); +void pci_set_power(PCIDevice *pci_dev, bool state); #endif diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index 72622bd337..20d3066595 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -181,6 +181,7 @@ struct DeviceState { char *canonical_path; bool realized; bool pending_deleted_event; + int64_t pending_deleted_expires_ms; QDict *opts; int hotplugged; bool allow_unplug_during_migration; diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 3fa0b554ef..58a73e7b7a 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -74,8 +74,8 @@ struct vhost_dev { unsigned int nvqs; /* the first virtqueue which would be used by this vhost dev */ int vq_index; - /* the last vq index for the virtio device (not vhost) */ - int last_index; + /* one past the last vq index for the virtio device (not vhost) */ + int vq_index_end; /* if non-zero, minimum required value for max_queues */ int num_queues; uint64_t features; diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h index 515d327cf1..e69efbd47f 100644 --- a/include/qemu/rcu.h +++ b/include/qemu/rcu.h @@ -27,6 +27,7 @@ #include "qemu/thread.h" #include "qemu/queue.h" #include "qemu/atomic.h" +#include "qemu/notify.h" #include "qemu/sys_membarrier.h" #ifdef __cplusplus @@ -66,6 +67,13 @@ struct rcu_reader_data { /* Data used for registry, protected by rcu_registry_lock */ QLIST_ENTRY(rcu_reader_data) node; + + /* + * NotifierList used to force an RCU grace period. Accessed under + * rcu_registry_lock. Note that the notifier is called _outside_ + * the thread! + */ + NotifierList force_rcu; }; extern __thread struct rcu_reader_data rcu_reader; @@ -180,6 +188,13 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(RCUReadAuto, rcu_read_auto_unlock) #define RCU_READ_LOCK_GUARD() \ g_autoptr(RCUReadAuto) _rcu_read_auto __attribute__((unused)) = rcu_read_auto_lock() +/* + * Force-RCU notifiers tell readers that they should exit their + * read-side critical section. + */ +void rcu_add_force_rcu_notifier(Notifier *n); +void rcu_remove_force_rcu_notifier(Notifier *n); + #ifdef __cplusplus } #endif diff --git a/include/qemu/transactions.h b/include/qemu/transactions.h index 92c5965235..2f2060acd9 100644 --- a/include/qemu/transactions.h +++ b/include/qemu/transactions.h @@ -31,6 +31,9 @@ * tran_create(), call your "prepare" functions on it, and finally call * tran_abort() or tran_commit() to finalize the transaction by corresponding * finalization actions in reverse order. + * + * The clean() functions registered by the drivers in a transaction are called + * last, after all abort() or commit() functions have been called. */ #ifndef QEMU_TRANSACTIONS_H diff --git a/include/qom/object.h b/include/qom/object.h index faae0d841f..fae096f51c 100644 --- a/include/qom/object.h +++ b/include/qom/object.h @@ -1544,6 +1544,18 @@ Object *object_resolve_path_type(const char *path, const char *typename, bool *ambiguous); /** + * object_resolve_path_at: + * @parent: the object in which to resolve the path + * @path: the path to resolve + * + * This is like object_resolve_path(), except paths not starting with + * a slash are relative to @parent. + * + * Returns: The resolved object or NULL on path lookup failure. + */ +Object *object_resolve_path_at(Object *parent, const char *path); + +/** * object_resolve_path_component: * @parent: the object in which to resolve the path * @part: the component to resolve. diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 5da8c02d08..767f54c76d 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -3254,9 +3254,13 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) * Otherwise, allocate a private page to hold them. */ if (TARGET_ARCH_HAS_SIGTRAMP_PAGE) { - abi_ulong tramp_page = target_mmap(0, TARGET_PAGE_SIZE, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, -1, 0); + abi_long tramp_page = target_mmap(0, TARGET_PAGE_SIZE, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (tramp_page == -1) { + return -errno; + } + setup_sigtramp(tramp_page); target_mprotect(tramp_page, TARGET_PAGE_SIZE, PROT_READ | PROT_EXEC); } diff --git a/linux-user/host/aarch64/host-signal.h b/linux-user/host/aarch64/host-signal.h index 0c0b08383a..9770b36dc1 100644 --- a/linux-user/host/aarch64/host-signal.h +++ b/linux-user/host/aarch64/host-signal.h @@ -35,6 +35,11 @@ static inline uintptr_t host_signal_pc(ucontext_t *uc) return uc->uc_mcontext.pc; } +static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +{ + uc->uc_mcontext.pc = pc; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { struct _aarch64_ctx *hdr; diff --git a/linux-user/host/aarch64/hostdep.h b/linux-user/host/aarch64/hostdep.h index a8d41a21ad..39299d798a 100644 --- a/linux-user/host/aarch64/hostdep.h +++ b/linux-user/host/aarch64/hostdep.h @@ -15,24 +15,4 @@ /* We have a safe-syscall.inc.S */ #define HAVE_SAFE_SYSCALL -#ifndef __ASSEMBLER__ - -/* These are defined by the safe-syscall.inc.S file */ -extern char safe_syscall_start[]; -extern char safe_syscall_end[]; - -/* Adjust the signal context to rewind out of safe-syscall if we're in it */ -static inline void rewind_if_in_safe_syscall(void *puc) -{ - ucontext_t *uc = puc; - __u64 *pcreg = &uc->uc_mcontext.pc; - - if (*pcreg > (uintptr_t)safe_syscall_start - && *pcreg < (uintptr_t)safe_syscall_end) { - *pcreg = (uintptr_t)safe_syscall_start; - } -} - -#endif /* __ASSEMBLER__ */ - #endif diff --git a/linux-user/host/alpha/host-signal.h b/linux-user/host/alpha/host-signal.h index e080be412f..f4c942948a 100644 --- a/linux-user/host/alpha/host-signal.h +++ b/linux-user/host/alpha/host-signal.h @@ -16,6 +16,11 @@ static inline uintptr_t host_signal_pc(ucontext_t *uc) return uc->uc_mcontext.sc_pc; } +static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +{ + uc->uc_mcontext.sc_pc = pc; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { uint32_t *pc = (uint32_t *)host_signal_pc(uc); diff --git a/linux-user/host/arm/host-signal.h b/linux-user/host/arm/host-signal.h index efb165c0c5..6c095773c0 100644 --- a/linux-user/host/arm/host-signal.h +++ b/linux-user/host/arm/host-signal.h @@ -16,6 +16,11 @@ static inline uintptr_t host_signal_pc(ucontext_t *uc) return uc->uc_mcontext.arm_pc; } +static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +{ + uc->uc_mcontext.arm_pc = pc; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { /* diff --git a/linux-user/host/arm/hostdep.h b/linux-user/host/arm/hostdep.h index 9276fe6ceb..86b137875a 100644 --- a/linux-user/host/arm/hostdep.h +++ b/linux-user/host/arm/hostdep.h @@ -15,24 +15,4 @@ /* We have a safe-syscall.inc.S */ #define HAVE_SAFE_SYSCALL -#ifndef __ASSEMBLER__ - -/* These are defined by the safe-syscall.inc.S file */ -extern char safe_syscall_start[]; -extern char safe_syscall_end[]; - -/* Adjust the signal context to rewind out of safe-syscall if we're in it */ -static inline void rewind_if_in_safe_syscall(void *puc) -{ - ucontext_t *uc = puc; - unsigned long *pcreg = &uc->uc_mcontext.arm_pc; - - if (*pcreg > (uintptr_t)safe_syscall_start - && *pcreg < (uintptr_t)safe_syscall_end) { - *pcreg = (uintptr_t)safe_syscall_start; - } -} - -#endif /* __ASSEMBLER__ */ - #endif diff --git a/linux-user/host/i386/host-signal.h b/linux-user/host/i386/host-signal.h index 4c8eef99ce..abe1ece5c9 100644 --- a/linux-user/host/i386/host-signal.h +++ b/linux-user/host/i386/host-signal.h @@ -16,6 +16,11 @@ static inline uintptr_t host_signal_pc(ucontext_t *uc) return uc->uc_mcontext.gregs[REG_EIP]; } +static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +{ + uc->uc_mcontext.gregs[REG_EIP] = pc; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { return uc->uc_mcontext.gregs[REG_TRAPNO] == 0xe diff --git a/linux-user/host/i386/hostdep.h b/linux-user/host/i386/hostdep.h index 073be74d87..ce7136501f 100644 --- a/linux-user/host/i386/hostdep.h +++ b/linux-user/host/i386/hostdep.h @@ -15,24 +15,4 @@ /* We have a safe-syscall.inc.S */ #define HAVE_SAFE_SYSCALL -#ifndef __ASSEMBLER__ - -/* These are defined by the safe-syscall.inc.S file */ -extern char safe_syscall_start[]; -extern char safe_syscall_end[]; - -/* Adjust the signal context to rewind out of safe-syscall if we're in it */ -static inline void rewind_if_in_safe_syscall(void *puc) -{ - ucontext_t *uc = puc; - greg_t *pcreg = &uc->uc_mcontext.gregs[REG_EIP]; - - if (*pcreg > (uintptr_t)safe_syscall_start - && *pcreg < (uintptr_t)safe_syscall_end) { - *pcreg = (uintptr_t)safe_syscall_start; - } -} - -#endif /* __ASSEMBLER__ */ - #endif diff --git a/linux-user/host/mips/host-signal.h b/linux-user/host/mips/host-signal.h index ef341f7c20..c666ed8c3f 100644 --- a/linux-user/host/mips/host-signal.h +++ b/linux-user/host/mips/host-signal.h @@ -16,6 +16,11 @@ static inline uintptr_t host_signal_pc(ucontext_t *uc) return uc->uc_mcontext.pc; } +static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +{ + uc->uc_mcontext.pc = pc; +} + #if defined(__misp16) || defined(__mips_micromips) #error "Unsupported encoding" #endif diff --git a/linux-user/host/ppc/host-signal.h b/linux-user/host/ppc/host-signal.h index a491c413dc..1d8e658ff7 100644 --- a/linux-user/host/ppc/host-signal.h +++ b/linux-user/host/ppc/host-signal.h @@ -16,6 +16,11 @@ static inline uintptr_t host_signal_pc(ucontext_t *uc) return uc->uc_mcontext.regs->nip; } +static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +{ + uc->uc_mcontext.regs->nip = pc; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { return uc->uc_mcontext.regs->trap != 0x400 diff --git a/linux-user/host/ppc64/hostdep.h b/linux-user/host/ppc64/hostdep.h index 98979ad917..0c290dd904 100644 --- a/linux-user/host/ppc64/hostdep.h +++ b/linux-user/host/ppc64/hostdep.h @@ -15,24 +15,4 @@ /* We have a safe-syscall.inc.S */ #define HAVE_SAFE_SYSCALL -#ifndef __ASSEMBLER__ - -/* These are defined by the safe-syscall.inc.S file */ -extern char safe_syscall_start[]; -extern char safe_syscall_end[]; - -/* Adjust the signal context to rewind out of safe-syscall if we're in it */ -static inline void rewind_if_in_safe_syscall(void *puc) -{ - ucontext_t *uc = puc; - unsigned long *pcreg = &uc->uc_mcontext.gp_regs[PT_NIP]; - - if (*pcreg > (uintptr_t)safe_syscall_start - && *pcreg < (uintptr_t)safe_syscall_end) { - *pcreg = (uintptr_t)safe_syscall_start; - } -} - -#endif /* __ASSEMBLER__ */ - #endif diff --git a/linux-user/host/riscv/host-signal.h b/linux-user/host/riscv/host-signal.h index 3b168cb58b..a4f170efb0 100644 --- a/linux-user/host/riscv/host-signal.h +++ b/linux-user/host/riscv/host-signal.h @@ -16,6 +16,11 @@ static inline uintptr_t host_signal_pc(ucontext_t *uc) return uc->uc_mcontext.__gregs[REG_PC]; } +static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +{ + uc->uc_mcontext.__gregs[REG_PC] = pc; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { /* diff --git a/linux-user/host/riscv/hostdep.h b/linux-user/host/riscv/hostdep.h index 2ba07456ae..7f67c22868 100644 --- a/linux-user/host/riscv/hostdep.h +++ b/linux-user/host/riscv/hostdep.h @@ -11,24 +11,4 @@ /* We have a safe-syscall.inc.S */ #define HAVE_SAFE_SYSCALL -#ifndef __ASSEMBLER__ - -/* These are defined by the safe-syscall.inc.S file */ -extern char safe_syscall_start[]; -extern char safe_syscall_end[]; - -/* Adjust the signal context to rewind out of safe-syscall if we're in it */ -static inline void rewind_if_in_safe_syscall(void *puc) -{ - ucontext_t *uc = puc; - unsigned long *pcreg = &uc->uc_mcontext.__gregs[REG_PC]; - - if (*pcreg > (uintptr_t)safe_syscall_start - && *pcreg < (uintptr_t)safe_syscall_end) { - *pcreg = (uintptr_t)safe_syscall_start; - } -} - -#endif /* __ASSEMBLER__ */ - #endif diff --git a/linux-user/host/s390/host-signal.h b/linux-user/host/s390/host-signal.h index 26990e4893..a524f2ab00 100644 --- a/linux-user/host/s390/host-signal.h +++ b/linux-user/host/s390/host-signal.h @@ -16,6 +16,11 @@ static inline uintptr_t host_signal_pc(ucontext_t *uc) return uc->uc_mcontext.psw.addr; } +static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +{ + uc->uc_mcontext.psw.addr = pc; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { uint16_t *pinsn = (uint16_t *)host_signal_pc(uc); diff --git a/linux-user/host/s390x/hostdep.h b/linux-user/host/s390x/hostdep.h index 4f0171f36f..d801145854 100644 --- a/linux-user/host/s390x/hostdep.h +++ b/linux-user/host/s390x/hostdep.h @@ -15,24 +15,4 @@ /* We have a safe-syscall.inc.S */ #define HAVE_SAFE_SYSCALL -#ifndef __ASSEMBLER__ - -/* These are defined by the safe-syscall.inc.S file */ -extern char safe_syscall_start[]; -extern char safe_syscall_end[]; - -/* Adjust the signal context to rewind out of safe-syscall if we're in it */ -static inline void rewind_if_in_safe_syscall(void *puc) -{ - ucontext_t *uc = puc; - unsigned long *pcreg = &uc->uc_mcontext.psw.addr; - - if (*pcreg > (uintptr_t)safe_syscall_start - && *pcreg < (uintptr_t)safe_syscall_end) { - *pcreg = (uintptr_t)safe_syscall_start; - } -} - -#endif /* __ASSEMBLER__ */ - #endif diff --git a/linux-user/host/sparc/host-signal.h b/linux-user/host/sparc/host-signal.h index 5e71d33f8e..7342936071 100644 --- a/linux-user/host/sparc/host-signal.h +++ b/linux-user/host/sparc/host-signal.h @@ -20,6 +20,15 @@ static inline uintptr_t host_signal_pc(ucontext_t *uc) #endif } +static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +{ +#ifdef __arch64__ + uc->uc_mcontext.mc_gregs[MC_PC] = pc; +#else + uc->uc_mcontext.gregs[REG_PC] = pc; +#endif +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { uint32_t insn = *(uint32_t *)host_signal_pc(uc); diff --git a/linux-user/host/x86_64/host-signal.h b/linux-user/host/x86_64/host-signal.h index 883d2fcf65..c71d597eb2 100644 --- a/linux-user/host/x86_64/host-signal.h +++ b/linux-user/host/x86_64/host-signal.h @@ -15,6 +15,11 @@ static inline uintptr_t host_signal_pc(ucontext_t *uc) return uc->uc_mcontext.gregs[REG_RIP]; } +static inline void host_signal_set_pc(ucontext_t *uc, uintptr_t pc) +{ + uc->uc_mcontext.gregs[REG_RIP] = pc; +} + static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc) { return uc->uc_mcontext.gregs[REG_TRAPNO] == 0xe diff --git a/linux-user/host/x86_64/hostdep.h b/linux-user/host/x86_64/hostdep.h index a4fefb5114..9c62bd26bd 100644 --- a/linux-user/host/x86_64/hostdep.h +++ b/linux-user/host/x86_64/hostdep.h @@ -15,24 +15,4 @@ /* We have a safe-syscall.inc.S */ #define HAVE_SAFE_SYSCALL -#ifndef __ASSEMBLER__ - -/* These are defined by the safe-syscall.inc.S file */ -extern char safe_syscall_start[]; -extern char safe_syscall_end[]; - -/* Adjust the signal context to rewind out of safe-syscall if we're in it */ -static inline void rewind_if_in_safe_syscall(void *puc) -{ - ucontext_t *uc = puc; - greg_t *pcreg = &uc->uc_mcontext.gregs[REG_RIP]; - - if (*pcreg > (uintptr_t)safe_syscall_start - && *pcreg < (uintptr_t)safe_syscall_end) { - *pcreg = (uintptr_t)safe_syscall_start; - } -} - -#endif /* __ASSEMBLER__ */ - #endif diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h index 7193c3b226..f182d40190 100644 --- a/linux-user/ioctls.h +++ b/linux-user/ioctls.h @@ -637,6 +637,10 @@ IOCTL(LOOP_SET_STATUS64, IOC_W, MK_PTR(MK_STRUCT(STRUCT_loop_info64))) IOCTL(LOOP_GET_STATUS64, IOC_R, MK_PTR(MK_STRUCT(STRUCT_loop_info64))) IOCTL(LOOP_CHANGE_FD, 0, TYPE_INT) + IOCTL(LOOP_SET_CAPACITY, 0, TYPE_INT) + IOCTL(LOOP_SET_DIRECT_IO, 0, TYPE_INT) + IOCTL(LOOP_SET_BLOCK_SIZE, 0, TYPE_INT) + IOCTL(LOOP_CONFIGURE, IOC_W, MK_PTR(MK_STRUCT(STRUCT_loop_config))) IOCTL(LOOP_CTL_ADD, 0, TYPE_INT) IOCTL(LOOP_CTL_REMOVE, 0, TYPE_INT) diff --git a/linux-user/linux_loop.h b/linux-user/linux_loop.h index c69fea11e4..f80b96f1ff 100644 --- a/linux-user/linux_loop.h +++ b/linux-user/linux_loop.h @@ -96,6 +96,8 @@ struct loop_info64 { #define LOOP_CHANGE_FD 0x4C06 #define LOOP_SET_CAPACITY 0x4C07 #define LOOP_SET_DIRECT_IO 0x4C08 +#define LOOP_SET_BLOCK_SIZE 0x4C09 +#define LOOP_CONFIGURE 0x4C0A /* /dev/loop-control interface */ #define LOOP_CTL_ADD 0x4C80 diff --git a/linux-user/safe-syscall.h b/linux-user/safe-syscall.h index 6bc0390262..aaa9ffc0e2 100644 --- a/linux-user/safe-syscall.h +++ b/linux-user/safe-syscall.h @@ -127,6 +127,9 @@ #ifdef HAVE_SAFE_SYSCALL /* The core part of this function is implemented in assembly */ extern long safe_syscall_base(int *pending, long number, ...); +/* These are defined by the safe-syscall.inc.S file */ +extern char safe_syscall_start[]; +extern char safe_syscall_end[]; #define safe_syscall(...) \ ({ \ diff --git a/linux-user/signal.c b/linux-user/signal.c index 81c45bfce9..6d5e5b698c 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -31,6 +31,7 @@ #include "trace.h" #include "signal-common.h" #include "host-signal.h" +#include "safe-syscall.h" static struct target_sigaction sigact_table[TARGET_NSIG]; @@ -793,12 +794,20 @@ int queue_signal(CPUArchState *env, int sig, int si_type, return 1; /* indicates that the signal was queued */ } -#ifndef HAVE_SAFE_SYSCALL + +/* Adjust the signal context to rewind out of safe-syscall if we're in it */ static inline void rewind_if_in_safe_syscall(void *puc) { - /* Default version: never rewind */ -} +#ifdef HAVE_SAFE_SYSCALL + ucontext_t *uc = (ucontext_t *)puc; + uintptr_t pcreg = host_signal_pc(uc); + + if (pcreg > (uintptr_t)safe_syscall_start + && pcreg < (uintptr_t)safe_syscall_end) { + host_signal_set_pc(uc, (uintptr_t)safe_syscall_start); + } #endif +} static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) { diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 544f5b662f..f1cfcc8104 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -197,8 +197,10 @@ //#define DEBUG_ERESTARTSYS //#include <linux/msdos_fs.h> -#define VFAT_IOCTL_READDIR_BOTH _IOR('r', 1, struct linux_dirent [2]) -#define VFAT_IOCTL_READDIR_SHORT _IOR('r', 2, struct linux_dirent [2]) +#define VFAT_IOCTL_READDIR_BOTH \ + _IOC(_IOC_READ, 'r', 1, (sizeof(struct linux_dirent) + 256) * 2) +#define VFAT_IOCTL_READDIR_SHORT \ + _IOC(_IOC_READ, 'r', 2, (sizeof(struct linux_dirent) + 256) * 2) #undef _syscall0 #undef _syscall1 @@ -8137,6 +8139,159 @@ static int host_to_target_cpu_mask(const unsigned long *host_mask, return 0; } +#ifdef TARGET_NR_getdents +static int do_getdents(abi_long dirfd, abi_long arg2, abi_long count) +{ + g_autofree void *hdirp = NULL; + void *tdirp; + int hlen, hoff, toff; + int hreclen, treclen; + off64_t prev_diroff = 0; + + hdirp = g_try_malloc(count); + if (!hdirp) { + return -TARGET_ENOMEM; + } + +#ifdef EMULATE_GETDENTS_WITH_GETDENTS + hlen = sys_getdents(dirfd, hdirp, count); +#else + hlen = sys_getdents64(dirfd, hdirp, count); +#endif + + hlen = get_errno(hlen); + if (is_error(hlen)) { + return hlen; + } + + tdirp = lock_user(VERIFY_WRITE, arg2, count, 0); + if (!tdirp) { + return -TARGET_EFAULT; + } + + for (hoff = toff = 0; hoff < hlen; hoff += hreclen, toff += treclen) { +#ifdef EMULATE_GETDENTS_WITH_GETDENTS + struct linux_dirent *hde = hdirp + hoff; +#else + struct linux_dirent64 *hde = hdirp + hoff; +#endif + struct target_dirent *tde = tdirp + toff; + int namelen; + uint8_t type; + + namelen = strlen(hde->d_name); + hreclen = hde->d_reclen; + treclen = offsetof(struct target_dirent, d_name) + namelen + 2; + treclen = QEMU_ALIGN_UP(treclen, __alignof(struct target_dirent)); + + if (toff + treclen > count) { + /* + * If the host struct is smaller than the target struct, or + * requires less alignment and thus packs into less space, + * then the host can return more entries than we can pass + * on to the guest. + */ + if (toff == 0) { + toff = -TARGET_EINVAL; /* result buffer is too small */ + break; + } + /* + * Return what we have, resetting the file pointer to the + * location of the first record not returned. + */ + lseek64(dirfd, prev_diroff, SEEK_SET); + break; + } + + prev_diroff = hde->d_off; + tde->d_ino = tswapal(hde->d_ino); + tde->d_off = tswapal(hde->d_off); + tde->d_reclen = tswap16(treclen); + memcpy(tde->d_name, hde->d_name, namelen + 1); + + /* + * The getdents type is in what was formerly a padding byte at the + * end of the structure. + */ +#ifdef EMULATE_GETDENTS_WITH_GETDENTS + type = *((uint8_t *)hde + hreclen - 1); +#else + type = hde->d_type; +#endif + *((uint8_t *)tde + treclen - 1) = type; + } + + unlock_user(tdirp, arg2, toff); + return toff; +} +#endif /* TARGET_NR_getdents */ + +#if defined(TARGET_NR_getdents64) && defined(__NR_getdents64) +static int do_getdents64(abi_long dirfd, abi_long arg2, abi_long count) +{ + g_autofree void *hdirp = NULL; + void *tdirp; + int hlen, hoff, toff; + int hreclen, treclen; + off64_t prev_diroff = 0; + + hdirp = g_try_malloc(count); + if (!hdirp) { + return -TARGET_ENOMEM; + } + + hlen = get_errno(sys_getdents64(dirfd, hdirp, count)); + if (is_error(hlen)) { + return hlen; + } + + tdirp = lock_user(VERIFY_WRITE, arg2, count, 0); + if (!tdirp) { + return -TARGET_EFAULT; + } + + for (hoff = toff = 0; hoff < hlen; hoff += hreclen, toff += treclen) { + struct linux_dirent64 *hde = hdirp + hoff; + struct target_dirent64 *tde = tdirp + toff; + int namelen; + + namelen = strlen(hde->d_name) + 1; + hreclen = hde->d_reclen; + treclen = offsetof(struct target_dirent64, d_name) + namelen; + treclen = QEMU_ALIGN_UP(treclen, __alignof(struct target_dirent64)); + + if (toff + treclen > count) { + /* + * If the host struct is smaller than the target struct, or + * requires less alignment and thus packs into less space, + * then the host can return more entries than we can pass + * on to the guest. + */ + if (toff == 0) { + toff = -TARGET_EINVAL; /* result buffer is too small */ + break; + } + /* + * Return what we have, resetting the file pointer to the + * location of the first record not returned. + */ + lseek64(dirfd, prev_diroff, SEEK_SET); + break; + } + + prev_diroff = hde->d_off; + tde->d_ino = tswap64(hde->d_ino); + tde->d_off = tswap64(hde->d_off); + tde->d_reclen = tswap16(treclen); + tde->d_type = hde->d_type; + memcpy(tde->d_name, hde->d_name, namelen); + } + + unlock_user(tdirp, arg2, toff); + return toff; +} +#endif /* TARGET_NR_getdents64 */ + #if defined(TARGET_NR_pivot_root) && defined(__NR_pivot_root) _syscall2(int, pivot_root, const char *, new_root, const char *, put_old) #endif @@ -10227,162 +10382,11 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_getdents case TARGET_NR_getdents: -#ifdef EMULATE_GETDENTS_WITH_GETDENTS -#if TARGET_ABI_BITS == 32 && HOST_LONG_BITS == 64 - { - struct target_dirent *target_dirp; - struct linux_dirent *dirp; - abi_long count = arg3; - - dirp = g_try_malloc(count); - if (!dirp) { - return -TARGET_ENOMEM; - } - - ret = get_errno(sys_getdents(arg1, dirp, count)); - if (!is_error(ret)) { - struct linux_dirent *de; - struct target_dirent *tde; - int len = ret; - int reclen, treclen; - int count1, tnamelen; - - count1 = 0; - de = dirp; - if (!(target_dirp = lock_user(VERIFY_WRITE, arg2, count, 0))) - return -TARGET_EFAULT; - tde = target_dirp; - while (len > 0) { - reclen = de->d_reclen; - tnamelen = reclen - offsetof(struct linux_dirent, d_name); - assert(tnamelen >= 0); - treclen = tnamelen + offsetof(struct target_dirent, d_name); - assert(count1 + treclen <= count); - tde->d_reclen = tswap16(treclen); - tde->d_ino = tswapal(de->d_ino); - tde->d_off = tswapal(de->d_off); - memcpy(tde->d_name, de->d_name, tnamelen); - de = (struct linux_dirent *)((char *)de + reclen); - len -= reclen; - tde = (struct target_dirent *)((char *)tde + treclen); - count1 += treclen; - } - ret = count1; - unlock_user(target_dirp, arg2, ret); - } - g_free(dirp); - } -#else - { - struct linux_dirent *dirp; - abi_long count = arg3; - - if (!(dirp = lock_user(VERIFY_WRITE, arg2, count, 0))) - return -TARGET_EFAULT; - ret = get_errno(sys_getdents(arg1, dirp, count)); - if (!is_error(ret)) { - struct linux_dirent *de; - int len = ret; - int reclen; - de = dirp; - while (len > 0) { - reclen = de->d_reclen; - if (reclen > len) - break; - de->d_reclen = tswap16(reclen); - tswapls(&de->d_ino); - tswapls(&de->d_off); - de = (struct linux_dirent *)((char *)de + reclen); - len -= reclen; - } - } - unlock_user(dirp, arg2, ret); - } -#endif -#else - /* Implement getdents in terms of getdents64 */ - { - struct linux_dirent64 *dirp; - abi_long count = arg3; - - dirp = lock_user(VERIFY_WRITE, arg2, count, 0); - if (!dirp) { - return -TARGET_EFAULT; - } - ret = get_errno(sys_getdents64(arg1, dirp, count)); - if (!is_error(ret)) { - /* Convert the dirent64 structs to target dirent. We do this - * in-place, since we can guarantee that a target_dirent is no - * larger than a dirent64; however this means we have to be - * careful to read everything before writing in the new format. - */ - struct linux_dirent64 *de; - struct target_dirent *tde; - int len = ret; - int tlen = 0; - - de = dirp; - tde = (struct target_dirent *)dirp; - while (len > 0) { - int namelen, treclen; - int reclen = de->d_reclen; - uint64_t ino = de->d_ino; - int64_t off = de->d_off; - uint8_t type = de->d_type; - - namelen = strlen(de->d_name); - treclen = offsetof(struct target_dirent, d_name) - + namelen + 2; - treclen = QEMU_ALIGN_UP(treclen, sizeof(abi_long)); - - memmove(tde->d_name, de->d_name, namelen + 1); - tde->d_ino = tswapal(ino); - tde->d_off = tswapal(off); - tde->d_reclen = tswap16(treclen); - /* The target_dirent type is in what was formerly a padding - * byte at the end of the structure: - */ - *(((char *)tde) + treclen - 1) = type; - - de = (struct linux_dirent64 *)((char *)de + reclen); - tde = (struct target_dirent *)((char *)tde + treclen); - len -= reclen; - tlen += treclen; - } - ret = tlen; - } - unlock_user(dirp, arg2, ret); - } -#endif - return ret; + return do_getdents(arg1, arg2, arg3); #endif /* TARGET_NR_getdents */ #if defined(TARGET_NR_getdents64) && defined(__NR_getdents64) case TARGET_NR_getdents64: - { - struct linux_dirent64 *dirp; - abi_long count = arg3; - if (!(dirp = lock_user(VERIFY_WRITE, arg2, count, 0))) - return -TARGET_EFAULT; - ret = get_errno(sys_getdents64(arg1, dirp, count)); - if (!is_error(ret)) { - struct linux_dirent64 *de; - int len = ret; - int reclen; - de = dirp; - while (len > 0) { - reclen = de->d_reclen; - if (reclen > len) - break; - de->d_reclen = tswap16(reclen); - tswap64s((uint64_t *)&de->d_ino); - tswap64s((uint64_t *)&de->d_off); - de = (struct linux_dirent64 *)((char *)de + reclen); - len -= reclen; - } - } - unlock_user(dirp, arg2, ret); - } - return ret; + return do_getdents64(arg1, arg2, arg3); #endif /* TARGET_NR_getdents64 */ #if defined(TARGET_NR__newselect) case TARGET_NR__newselect: diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h index a5ce487dcc..0b13975937 100644 --- a/linux-user/syscall_defs.h +++ b/linux-user/syscall_defs.h @@ -437,11 +437,11 @@ struct target_dirent { }; struct target_dirent64 { - uint64_t d_ino; - int64_t d_off; - unsigned short d_reclen; + abi_ullong d_ino; + abi_llong d_off; + abi_ushort d_reclen; unsigned char d_type; - char d_name[256]; + char d_name[]; }; @@ -1219,6 +1219,10 @@ struct target_rtc_pll_info { #define TARGET_LOOP_SET_STATUS64 0x4C04 #define TARGET_LOOP_GET_STATUS64 0x4C05 #define TARGET_LOOP_CHANGE_FD 0x4C06 +#define TARGET_LOOP_SET_CAPACITY 0x4C07 +#define TARGET_LOOP_SET_DIRECT_IO 0x4C08 +#define TARGET_LOOP_SET_BLOCK_SIZE 0x4C09 +#define TARGET_LOOP_CONFIGURE 0x4C0A #define TARGET_LOOP_CTL_ADD 0x4C80 #define TARGET_LOOP_CTL_REMOVE 0x4C81 @@ -2714,7 +2718,7 @@ struct linux_dirent { long d_ino; unsigned long d_off; unsigned short d_reclen; - char d_name[256]; /* We must not include limits.h! */ + char d_name[]; }; struct linux_dirent64 { @@ -2722,7 +2726,7 @@ struct linux_dirent64 { int64_t d_off; unsigned short d_reclen; unsigned char d_type; - char d_name[256]; + char d_name[]; }; struct target_mq_attr { diff --git a/linux-user/syscall_types.h b/linux-user/syscall_types.h index ba2c1518eb..c3b43f8022 100644 --- a/linux-user/syscall_types.h +++ b/linux-user/syscall_types.h @@ -201,6 +201,12 @@ STRUCT(loop_info64, MK_ARRAY(TYPE_CHAR, 32), /* lo_encrypt_key */ MK_ARRAY(TYPE_ULONGLONG, 2)) /* lo_init */ +STRUCT(loop_config, + TYPE_INT, /* fd */ + TYPE_INT, /* block_size */ + MK_STRUCT(STRUCT_loop_info64), /* info */ + MK_ARRAY(TYPE_ULONGLONG, 8)) /* __reserved */ + /* mag tape ioctls */ STRUCT(mtop, TYPE_SHORT, TYPE_INT) STRUCT(mtget, TYPE_LONG, TYPE_LONG, TYPE_LONG, TYPE_LONG, TYPE_LONG, diff --git a/meson.build b/meson.build index 9702fdce6d..96de1a6ef9 100644 --- a/meson.build +++ b/meson.build @@ -59,6 +59,12 @@ supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv', 'x86', 'x86_64', 'arm', 'aarch64', 'mips', 'mips64', 'sparc', 'sparc64'] cpu = host_machine.cpu_family() + +# Unify riscv* to a single family. +if cpu in ['riscv32', 'riscv64'] + cpu = 'riscv' +endif + targetos = host_machine.system() if cpu in ['x86', 'x86_64'] @@ -323,9 +329,7 @@ if not get_option('hax').disabled() endif endif if targetos == 'netbsd' - if cc.has_header_symbol('nvmm.h', 'nvmm_cpu_stop', required: get_option('nvmm')) - nvmm = cc.find_library('nvmm', required: get_option('nvmm')) - endif + nvmm = cc.find_library('nvmm', required: get_option('nvmm')) if nvmm.found() accelerators += 'CONFIG_NVMM' endif @@ -335,7 +339,7 @@ tcg_arch = config_host['ARCH'] if not get_option('tcg').disabled() if cpu not in supported_cpus if get_option('tcg_interpreter') - warning('Unsupported CPU @0@, will use TCG with TCI (experimental and slow)'.format(cpu)) + warning('Unsupported CPU @0@, will use TCG with TCI (slow)'.format(cpu)) else error('Unsupported CPU @0@, try --enable-tcg-interpreter'.format(cpu)) endif @@ -566,13 +570,7 @@ endif spice_headers = spice.partial_dependency(compile_args: true, includes: true) rt = cc.find_library('rt', required: false) -libdl = not_found -if 'CONFIG_PLUGIN' in config_host - libdl = cc.find_library('dl', required: false) - if not cc.has_function('dlopen', dependencies: libdl) - error('dlopen not found') - endif -endif + libiscsi = not_found if not get_option('libiscsi').auto() or have_block libiscsi = dependency('libiscsi', version: '>=1.9.0', @@ -681,6 +679,9 @@ iconv = not_found curses = not_found if have_system and not get_option('curses').disabled() curses_test = ''' + #if defined(__APPLE__) || defined(__OpenBSD__) + #define _XOPEN_SOURCE_EXTENDED 1 + #endif #include <locale.h> #include <curses.h> #include <wchar.h> @@ -704,7 +705,7 @@ if have_system and not get_option('curses').disabled() endif endforeach msg = get_option('curses').enabled() ? 'curses library not found' : '' - curses_compile_args = ['-DNCURSES_WIDECHAR'] + curses_compile_args = ['-DNCURSES_WIDECHAR=1'] if curses.found() if cc.links(curses_test, args: curses_compile_args, dependencies: [curses]) curses = declare_dependency(compile_args: curses_compile_args, dependencies: [curses]) @@ -1201,6 +1202,11 @@ keyutils = dependency('libkeyutils', required: false, has_gettid = cc.has_function('gettid') +# libselinux +selinux = dependency('libselinux', + required: get_option('selinux'), + method: 'pkg-config', kwargs: static_kwargs) + # Malloc tests malloc = [] @@ -1479,6 +1485,7 @@ config_host_data.set('CONFIG_SPICE_PROTOCOL', spice_protocol.found()) config_host_data.set('CONFIG_SPICE', spice.found()) config_host_data.set('CONFIG_X11', x11.found()) config_host_data.set('CONFIG_CFI', get_option('cfi')) +config_host_data.set('CONFIG_SELINUX', selinux.found()) config_host_data.set('QEMU_VERSION', '"@0@"'.format(meson.project_version())) config_host_data.set('QEMU_VERSION_MAJOR', meson.project_version().split('.')[0]) config_host_data.set('QEMU_VERSION_MINOR', meson.project_version().split('.')[1]) @@ -1547,8 +1554,6 @@ config_host_data.set('CONFIG_INOTIFY', cc.has_header_symbol('sys/inotify.h', 'inotify_init')) config_host_data.set('CONFIG_INOTIFY1', cc.has_header_symbol('sys/inotify.h', 'inotify_init1')) -config_host_data.set('CONFIG_IOVEC', - cc.has_header_symbol('sys/uio.h', 'struct iovec')) config_host_data.set('CONFIG_MACHINE_BSWAP_H', cc.has_header_symbol('machine/bswap.h', 'bswap32', prefix: '''#include <sys/endian.h> @@ -1561,8 +1566,6 @@ config_host_data.set('CONFIG_SYSMACROS', cc.has_header_symbol('sys/sysmacros.h', 'makedev')) config_host_data.set('HAVE_OPTRESET', cc.has_header_symbol('getopt.h', 'optreset')) -config_host_data.set('HAVE_UTMPX', - cc.has_header_symbol('utmpx.h', 'struct utmpx')) config_host_data.set('HAVE_IPPROTO_MPTCP', cc.has_header_symbol('netinet/in.h', 'IPPROTO_MPTCP')) @@ -1574,6 +1577,14 @@ config_host_data.set('HAVE_STRUCT_STAT_ST_ATIM', cc.has_member('struct stat', 'st_atim', prefix: '#include <sys/stat.h>')) +# has_type +config_host_data.set('CONFIG_IOVEC', + cc.has_type('struct iovec', + prefix: '#include <sys/uio.h>')) +config_host_data.set('HAVE_UTMPX', + cc.has_type('struct utmpx', + prefix: '#include <utmpx.h>')) + config_host_data.set('CONFIG_EVENTFD', cc.links(''' #include <sys/eventfd.h> int main(void) { return eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); }''')) @@ -1615,7 +1626,7 @@ config_host_data.set('CONFIG_POSIX_MADVISE', cc.links(gnu_source_prefix + ''' #include <stddef.h> int main(void) { return posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); }''')) -config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_W_TID', cc.links(''' +config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_W_TID', cc.links(gnu_source_prefix + ''' #include <pthread.h> static void *f(void *p) { return NULL; } @@ -1626,7 +1637,7 @@ config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_W_TID', cc.links(''' pthread_setname_np(thread, "QEMU"); return 0; }''', dependencies: threads)) -config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_WO_TID', cc.links(''' +config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_WO_TID', cc.links(gnu_source_prefix + ''' #include <pthread.h> static void *f(void *p) { pthread_setname_np("QEMU"); return NULL; } @@ -1662,8 +1673,10 @@ config_host_data.set('HAVE_MLOCKALL', cc.links(gnu_source_prefix + ''' have_l2tpv3 = false if not get_option('l2tpv3').disabled() and have_system - have_l2tpv3 = (cc.has_header_symbol('sys/socket.h', 'struct mmsghdr') - and cc.has_header('linux/ip.h')) + have_l2tpv3 = cc.has_type('struct mmsghdr', + prefix: gnu_source_prefix + ''' + #include <sys/socket.h> + #include <linux/ip.h>''') endif config_host_data.set('CONFIG_L2TPV3', have_l2tpv3) @@ -1689,7 +1702,7 @@ config_host_data.set('CONFIG_NETMAP', have_netmap) # xfs headers will not try to redefine structs from linux headers # if this macro is set. config_host_data.set('HAVE_FSXATTR', cc.links(''' - #include <linux/fs.h>' + #include <linux/fs.h> struct fsxattr foo; int main(void) { return 0; @@ -3054,7 +3067,8 @@ if have_tools qemu_io = executable('qemu-io', files('qemu-io.c'), dependencies: [block, qemuutil], install: true) qemu_nbd = executable('qemu-nbd', files('qemu-nbd.c'), - dependencies: [blockdev, qemuutil, gnutls], install: true) + dependencies: [blockdev, qemuutil, gnutls, selinux], + install: true) subdir('storage-daemon') subdir('contrib/rdmacm-mux') @@ -3290,7 +3304,7 @@ endif summary_info += {'TCG support': config_all.has_key('CONFIG_TCG')} if config_all.has_key('CONFIG_TCG') if get_option('tcg_interpreter') - summary_info += {'TCG backend': 'TCI (TCG with bytecode interpreter, experimental and slow)'} + summary_info += {'TCG backend': 'TCI (TCG with bytecode interpreter, slow)'} else summary_info += {'TCG backend': 'native (@0@)'.format(cpu)} endif @@ -3430,6 +3444,7 @@ summary_info += {'libdaxctl support': libdaxctl} summary_info += {'libudev': libudev} # Dummy dependency, keep .found() summary_info += {'FUSE lseek': fuse_lseek.found()} +summary_info += {'selinux': selinux} summary(summary_info, bool_yn: true, section: 'Dependencies') if not supported_cpus.contains(cpu) diff --git a/meson_options.txt b/meson_options.txt index e740dce2a5..e392323732 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -59,7 +59,7 @@ option('xen_pci_passthrough', type: 'feature', value: 'auto', option('tcg', type: 'feature', value: 'auto', description: 'TCG support') option('tcg_interpreter', type: 'boolean', value: false, - description: 'TCG with bytecode interpreter (experimental and slow)') + description: 'TCG with bytecode interpreter (slow)') option('cfi', type: 'boolean', value: 'false', description: 'Control-Flow Integrity (CFI)') option('cfi_debug', type: 'boolean', value: 'false', @@ -201,3 +201,6 @@ option('slirp', type: 'combo', value: 'auto', option('fdt', type: 'combo', value: 'auto', choices: ['disabled', 'enabled', 'auto', 'system', 'internal'], description: 'Whether and how to find the libfdt library') + +option('selinux', type: 'feature', value: 'auto', + description: 'SELinux support in qemu-nbd') diff --git a/nbd/server.c b/nbd/server.c index 6d03e8a4b4..4630dd7322 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016-2020 Red Hat, Inc. + * Copyright (C) 2016-2021 Red Hat, Inc. * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> * * Network Block Device Server Side @@ -879,7 +879,9 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta, if (!*query) { if (client->opt == NBD_OPT_LIST_META_CONTEXT) { meta->allocation_depth = meta->exp->allocation_depth; - memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps); + if (meta->exp->nr_export_bitmaps) { + memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps); + } } trace_nbd_negotiate_meta_query_parse("empty"); return true; @@ -894,7 +896,8 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta, if (nbd_strshift(&query, "dirty-bitmap:")) { trace_nbd_negotiate_meta_query_parse("dirty-bitmap:"); if (!*query) { - if (client->opt == NBD_OPT_LIST_META_CONTEXT) { + if (client->opt == NBD_OPT_LIST_META_CONTEXT && + meta->exp->nr_export_bitmaps) { memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps); } trace_nbd_negotiate_meta_query_parse("empty"); @@ -1024,7 +1027,9 @@ static int nbd_negotiate_meta_queries(NBDClient *client, /* enable all known contexts */ meta->base_allocation = true; meta->allocation_depth = meta->exp->allocation_depth; - memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps); + if (meta->exp->nr_export_bitmaps) { + memset(meta->bitmaps, 1, meta->exp->nr_export_bitmaps); + } } else { for (i = 0; i < nb_queries; ++i) { ret = nbd_negotiate_meta_query(client, meta, errp); @@ -1413,6 +1418,9 @@ static int nbd_receive_request(NBDClient *client, NBDRequest *request, if (ret < 0) { return ret; } + if (ret == 0) { + return -EIO; + } /* Request [ 0 .. 3] magic (NBD_REQUEST_MAGIC) @@ -2501,16 +2509,8 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, if (request->flags & NBD_CMD_FLAG_FAST_ZERO) { flags |= BDRV_REQ_NO_FALLBACK; } - ret = 0; - /* FIXME simplify this when blk_pwrite_zeroes switches to 64-bit */ - while (ret >= 0 && request->len) { - int align = client->check_align ?: 1; - int len = MIN(request->len, QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES, - align)); - ret = blk_pwrite_zeroes(exp->common.blk, request->from, len, flags); - request->len -= len; - request->from += len; - } + ret = blk_pwrite_zeroes(exp->common.blk, request->from, request->len, + flags); return nbd_send_generic_reply(client, request->handle, ret, "writing to file failed", errp); @@ -2524,16 +2524,7 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, "flush failed", errp); case NBD_CMD_TRIM: - ret = 0; - /* FIXME simplify this when blk_co_pdiscard switches to 64-bit */ - while (ret >= 0 && request->len) { - int align = client->check_align ?: 1; - int len = MIN(request->len, QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES, - align)); - ret = blk_co_pdiscard(exp->common.blk, request->from, len); - request->len -= len; - request->from += len; - } + ret = blk_co_pdiscard(exp->common.blk, request->from, request->len); if (ret >= 0 && request->flags & NBD_CMD_FLAG_FUA) { ret = blk_co_flush(exp->common.blk); } diff --git a/net/colo-compare.c b/net/colo-compare.c index b8876d7fd9..b966e7e514 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -209,7 +209,8 @@ static void fill_pkt_tcp_info(void *data, uint32_t *max_ack) pkt->tcp_seq = ntohl(tcphd->th_seq); pkt->tcp_ack = ntohl(tcphd->th_ack); - *max_ack = *max_ack > pkt->tcp_ack ? *max_ack : pkt->tcp_ack; + /* Need to consider ACK will bigger than uint32_t MAX */ + *max_ack = pkt->tcp_ack - *max_ack > 0 ? pkt->tcp_ack : *max_ack; pkt->header_size = pkt->transport_header - (uint8_t *)pkt->data + (tcphd->th_off << 2); pkt->payload_size = pkt->size - pkt->header_size; @@ -413,7 +414,8 @@ static void colo_compare_tcp(CompareState *s, Connection *conn) * can ensure that the packet's payload is acknowledged by * primary and secondary. */ - uint32_t min_ack = conn->pack > conn->sack ? conn->sack : conn->pack; + uint32_t min_ack = conn->pack - conn->sack > 0 ? + conn->sack : conn->pack; pri: if (g_queue_is_empty(&conn->primary_list)) { @@ -805,7 +807,7 @@ static int compare_chr_send(CompareState *s, } if (!size) { - return 0; + return -1; } entry = g_slice_new(SendEntry); diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 49ab322511..25dd6dd975 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -170,9 +170,17 @@ static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc, return true; } +/** Dummy receive in case qemu falls back to userland tap networking */ +static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf, + size_t size) +{ + return 0; +} + static NetClientInfo net_vhost_vdpa_info = { .type = NET_CLIENT_DRIVER_VHOST_VDPA, .size = sizeof(VhostVDPAState), + .receive = vhost_vdpa_receive, .cleanup = vhost_vdpa_cleanup, .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, .has_ufo = vhost_vdpa_has_ufo, @@ -214,7 +222,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp) { unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); - struct vhost_vdpa_config *config; + g_autofree struct vhost_vdpa_config *config = NULL; __virtio16 *max_queue_pairs; uint64_t features; int ret; @@ -260,8 +268,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); opts = &netdev->u.vhost_vdpa; + if (!opts->vhostdev) { + error_setg(errp, "vdpa character device not specified with vhostdev"); + return -1; + } - vdpa_device_fd = qemu_open_old(opts->vhostdev, O_RDWR); + vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp); if (vdpa_device_fd == -1) { return -errno; } diff --git a/pc-bios/bios-256k.bin b/pc-bios/bios-256k.bin Binary files differindex dea01da468..26f863b609 100644 --- a/pc-bios/bios-256k.bin +++ b/pc-bios/bios-256k.bin diff --git a/pc-bios/bios-microvm.bin b/pc-bios/bios-microvm.bin Binary files differindex ca6375d65c..ddeb24391d 100644 --- a/pc-bios/bios-microvm.bin +++ b/pc-bios/bios-microvm.bin diff --git a/pc-bios/bios.bin b/pc-bios/bios.bin Binary files differindex e9fa99f006..6e6cde8c7f 100644 --- a/pc-bios/bios.bin +++ b/pc-bios/bios.bin diff --git a/pc-bios/vgabios-ati.bin b/pc-bios/vgabios-ati.bin Binary files differindex 0de9c0d3c1..fe497ea4f2 100644 --- a/pc-bios/vgabios-ati.bin +++ b/pc-bios/vgabios-ati.bin diff --git a/pc-bios/vgabios-bochs-display.bin b/pc-bios/vgabios-bochs-display.bin Binary files differindex 76e076f029..7ca04402e9 100644 --- a/pc-bios/vgabios-bochs-display.bin +++ b/pc-bios/vgabios-bochs-display.bin diff --git a/pc-bios/vgabios-cirrus.bin b/pc-bios/vgabios-cirrus.bin Binary files differindex 0f16ad199d..020dfe6c56 100644 --- a/pc-bios/vgabios-cirrus.bin +++ b/pc-bios/vgabios-cirrus.bin diff --git a/pc-bios/vgabios-qxl.bin b/pc-bios/vgabios-qxl.bin Binary files differindex 420f9ed331..0cc20678a3 100644 --- a/pc-bios/vgabios-qxl.bin +++ b/pc-bios/vgabios-qxl.bin diff --git a/pc-bios/vgabios-ramfb.bin b/pc-bios/vgabios-ramfb.bin Binary files differindex b9b2e230a5..a7c5ae7c8f 100644 --- a/pc-bios/vgabios-ramfb.bin +++ b/pc-bios/vgabios-ramfb.bin diff --git a/pc-bios/vgabios-stdvga.bin b/pc-bios/vgabios-stdvga.bin Binary files differindex b59256572d..abeea373d3 100644 --- a/pc-bios/vgabios-stdvga.bin +++ b/pc-bios/vgabios-stdvga.bin diff --git a/pc-bios/vgabios-virtio.bin b/pc-bios/vgabios-virtio.bin Binary files differindex c00414ab83..806647a009 100644 --- a/pc-bios/vgabios-virtio.bin +++ b/pc-bios/vgabios-virtio.bin diff --git a/pc-bios/vgabios-vmware.bin b/pc-bios/vgabios-vmware.bin Binary files differindex 5454aceda8..39e3093667 100644 --- a/pc-bios/vgabios-vmware.bin +++ b/pc-bios/vgabios-vmware.bin diff --git a/pc-bios/vgabios.bin b/pc-bios/vgabios.bin Binary files differindex ae82887d73..c3a66af50e 100644 --- a/pc-bios/vgabios.bin +++ b/pc-bios/vgabios.bin diff --git a/plugins/meson.build b/plugins/meson.build index aeb386ebae..b3de57853b 100644 --- a/plugins/meson.build +++ b/plugins/meson.build @@ -2,9 +2,9 @@ plugin_ldflags = [] # Modules need more symbols than just those in plugins/qemu-plugins.symbols if not enable_modules if 'CONFIG_HAS_LD_DYNAMIC_LIST' in config_host - plugin_ldflags = ['-Wl,--dynamic-list=' + (meson.project_build_root() / 'qemu-plugins-ld.symbols')] + plugin_ldflags = ['-Wl,--dynamic-list=qemu-plugins-ld.symbols'] elif 'CONFIG_HAS_LD_EXPORTED_SYMBOLS_LIST' in config_host - plugin_ldflags = ['-Wl,-exported_symbols_list,' + (meson.project_build_root() / 'qemu-plugins-ld64.symbols')] + plugin_ldflags = ['-Wl,-exported_symbols_list,qemu-plugins-ld64.symbols'] endif endif diff --git a/python/qemu/aqmp/protocol.py b/python/qemu/aqmp/protocol.py index ae1df24026..5190b33b13 100644 --- a/python/qemu/aqmp/protocol.py +++ b/python/qemu/aqmp/protocol.py @@ -79,7 +79,11 @@ class ConnectError(AQMPError): self.exc: Exception = exc def __str__(self) -> str: - return f"{self.error_message}: {self.exc!s}" + cause = str(self.exc) + if not cause: + # If there's no error string, use the exception name. + cause = exception_summary(self.exc) + return f"{self.error_message}: {cause}" class StateError(AQMPError): @@ -623,13 +627,21 @@ class AsyncProtocol(Generic[T]): def _done(task: Optional['asyncio.Future[Any]']) -> bool: return task is not None and task.done() - # NB: We can't rely on _bh_tasks being done() here, it may not - # yet have had a chance to run and gather itself. + # Are we already in an error pathway? If either of the tasks are + # already done, or if we have no tasks but a reader/writer; we + # must be. + # + # NB: We can't use _bh_tasks to check for premature task + # completion, because it may not yet have had a chance to run + # and gather itself. tasks = tuple(filter(None, (self._writer_task, self._reader_task))) error_pathway = _done(self._reader_task) or _done(self._writer_task) + if not tasks: + error_pathway |= bool(self._reader) or bool(self._writer) try: - # Try to flush the writer, if possible: + # Try to flush the writer, if possible. + # This *may* cause an error and force us over into the error path. if not error_pathway: await self._bh_flush_writer() except BaseException as err: @@ -639,7 +651,7 @@ class AsyncProtocol(Generic[T]): self.logger.debug("%s:\n%s\n", emsg, pretty_traceback()) raise finally: - # Cancel any still-running tasks: + # Cancel any still-running tasks (Won't raise): if self._writer_task is not None and not self._writer_task.done(): self.logger.debug("Cancelling writer task.") self._writer_task.cancel() @@ -652,7 +664,7 @@ class AsyncProtocol(Generic[T]): self.logger.debug("Waiting for tasks to complete ...") await asyncio.wait(tasks) - # Lastly, close the stream itself. (May raise): + # Lastly, close the stream itself. (*May raise*!): await self._bh_close_stream(error_pathway) self.logger.debug("Disconnected.") diff --git a/python/qemu/aqmp/qmp_client.py b/python/qemu/aqmp/qmp_client.py index f987da02eb..8105e29fa8 100644 --- a/python/qemu/aqmp/qmp_client.py +++ b/python/qemu/aqmp/qmp_client.py @@ -639,9 +639,12 @@ class QMPClient(AsyncProtocol[Message], Events): if sock.family != socket.AF_UNIX: raise AQMPError("Sending file descriptors requires a UNIX socket.") - # Void the warranty sticker. - # Access to sendmsg in asyncio is scheduled for removal in Python 3.11. - sock = sock._sock # pylint: disable=protected-access + if not hasattr(sock, 'sendmsg'): + # We need to void the warranty sticker. + # Access to sendmsg is scheduled for removal in Python 3.11. + # Find the real backing socket to use it anyway. + sock = sock._sock # pylint: disable=protected-access + sock.sendmsg( [b' '], [(socket.SOL_SOCKET, socket.SCM_RIGHTS, struct.pack('@i', fd))] diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py index a487c39745..67ab06ca2b 100644 --- a/python/qemu/machine/machine.py +++ b/python/qemu/machine/machine.py @@ -133,19 +133,18 @@ class QEMUMachine: self._wrapper = wrapper self._qmp_timer = qmp_timer - self._name = name or "qemu-%d" % os.getpid() + self._name = name or f"qemu-{os.getpid()}-{id(self):02x}" + self._temp_dir: Optional[str] = None self._base_temp_dir = base_temp_dir - self._sock_dir = sock_dir or self._base_temp_dir + self._sock_dir = sock_dir self._log_dir = log_dir if monitor_address is not None: self._monitor_address = monitor_address - self._remove_monitor_sockfile = False else: self._monitor_address = os.path.join( - self._sock_dir, f"{self._name}-monitor.sock" + self.sock_dir, f"{self._name}-monitor.sock" ) - self._remove_monitor_sockfile = True self._console_log_path = console_log if self._console_log_path: @@ -163,14 +162,13 @@ class QEMUMachine: self._qmp_set = True # Enable QMP monitor by default. self._qmp_connection: Optional[QEMUMonitorProtocol] = None self._qemu_full_args: Tuple[str, ...] = () - self._temp_dir: Optional[str] = None self._launched = False self._machine: Optional[str] = None self._console_index = 0 self._console_set = False self._console_device_type: Optional[str] = None self._console_address = os.path.join( - self._sock_dir, f"{self._name}-console.sock" + self.sock_dir, f"{self._name}-console.sock" ) self._console_socket: Optional[socket.socket] = None self._remove_files: List[str] = [] @@ -315,8 +313,7 @@ class QEMUMachine: self._remove_files.append(self._console_address) if self._qmp_set: - if self._remove_monitor_sockfile: - assert isinstance(self._monitor_address, str) + if isinstance(self._monitor_address, str): self._remove_files.append(self._monitor_address) self._qmp_connection = QEMUMonitorProtocol( self._monitor_address, @@ -330,6 +327,14 @@ class QEMUMachine: self._qemu_log_path = os.path.join(self.log_dir, self._name + ".log") self._qemu_log_file = open(self._qemu_log_path, 'wb') + self._iolog = None + self._qemu_full_args = tuple(chain( + self._wrapper, + [self._binary], + self._base_args, + self._args + )) + def _post_launch(self) -> None: if self._qmp_connection: self._qmp.accept(self._qmp_timer) @@ -344,9 +349,6 @@ class QEMUMachine: Called to cleanup the VM instance after the process has exited. May also be called after a failed launch. """ - # Comprehensive reset for the failed launch case: - self._early_cleanup() - try: self._close_qmp_connection() except Exception as err: # pylint: disable=broad-except @@ -393,13 +395,18 @@ class QEMUMachine: if self._launched: raise QEMUMachineError('VM already launched') - self._iolog = None - self._qemu_full_args = () try: self._launch() - self._launched = True except: - self._post_shutdown() + # We may have launched the process but it may + # have exited before we could connect via QMP. + # Assume the VM didn't launch or is exiting. + # If we don't wait for the process, exitcode() may still be + # 'None' by the time control is ceded back to the caller. + if self._launched: + self.wait() + else: + self._post_shutdown() LOG.debug('Error launching VM') if self._qemu_full_args: @@ -413,12 +420,6 @@ class QEMUMachine: Launch the VM and establish a QMP connection """ self._pre_launch() - self._qemu_full_args = tuple( - chain(self._wrapper, - [self._binary], - self._base_args, - self._args) - ) LOG.debug('VM launch command: %r', ' '.join(self._qemu_full_args)) # Cleaning up of this subprocess is guaranteed by _do_shutdown. @@ -429,6 +430,7 @@ class QEMUMachine: stderr=subprocess.STDOUT, shell=False, close_fds=False) + self._launched = True self._post_launch() def _close_qmp_connection(self) -> None: @@ -460,8 +462,8 @@ class QEMUMachine: """ Perform any cleanup that needs to happen before the VM exits. - May be invoked by both soft and hard shutdown in failover scenarios. - Called additionally by _post_shutdown for comprehensive cleanup. + This method may be called twice upon shutdown, once each by soft + and hard shutdown in failover scenarios. """ # If we keep the console socket open, we may deadlock waiting # for QEMU to exit, while QEMU is waiting for the socket to @@ -817,6 +819,15 @@ class QEMUMachine: return self._temp_dir @property + def sock_dir(self) -> str: + """ + Returns the directory used for sockfiles by this machine. + """ + if self._sock_dir: + return self._sock_dir + return self.temp_dir + + @property def log_dir(self) -> str: """ Returns a directory to be used for writing logs diff --git a/qapi/block-core.json b/qapi/block-core.json index 33e8507d10..1d3dd9cb48 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1709,6 +1709,9 @@ # The operation can be stopped before it has completed using the # block-job-cancel command. # +# Features: +# @deprecated: This command is deprecated. Use @blockdev-backup instead. +# # Returns: - nothing on success # - If @device is not a valid block device, GenericError # @@ -1724,7 +1727,7 @@ # ## { 'command': 'drive-backup', 'boxed': true, - 'data': 'DriveBackup' } + 'data': 'DriveBackup', 'features': ['deprecated'] } ## # @blockdev-backup: diff --git a/qapi/machine.json b/qapi/machine.json index 17794ef681..067e3f5378 100644 --- a/qapi/machine.json +++ b/qapi/machine.json @@ -1417,107 +1417,143 @@ # # Query interrupt statistics # +# Features: +# @unstable: This command is meant for debugging. +# # Returns: interrupt statistics # # Since: 6.2 ## { 'command': 'x-query-irq', - 'returns': 'HumanReadableText' } + 'returns': 'HumanReadableText', + 'features': [ 'unstable' ] } ## # @x-query-jit: # # Query TCG compiler statistics # +# Features: +# @unstable: This command is meant for debugging. +# # Returns: TCG compiler statistics # # Since: 6.2 ## { 'command': 'x-query-jit', 'returns': 'HumanReadableText', - 'if': 'CONFIG_TCG' } + 'if': 'CONFIG_TCG', + 'features': [ 'unstable' ] } ## # @x-query-numa: # # Query NUMA topology information # +# Features: +# @unstable: This command is meant for debugging. +# # Returns: topology information # # Since: 6.2 ## { 'command': 'x-query-numa', - 'returns': 'HumanReadableText' } + 'returns': 'HumanReadableText', + 'features': [ 'unstable' ] } ## # @x-query-opcount: # # Query TCG opcode counters # +# Features: +# @unstable: This command is meant for debugging. +# # Returns: TCG opcode counters # # Since: 6.2 ## { 'command': 'x-query-opcount', 'returns': 'HumanReadableText', - 'if': 'CONFIG_TCG' } + 'if': 'CONFIG_TCG', + 'features': [ 'unstable' ] } ## # @x-query-profile: # # Query TCG profiling information # +# Features: +# @unstable: This command is meant for debugging. +# # Returns: profile information # # Since: 6.2 ## { 'command': 'x-query-profile', - 'returns': 'HumanReadableText' } + 'returns': 'HumanReadableText', + 'features': [ 'unstable' ] } ## # @x-query-ramblock: # # Query system ramblock information # +# Features: +# @unstable: This command is meant for debugging. +# # Returns: system ramblock information # # Since: 6.2 ## { 'command': 'x-query-ramblock', - 'returns': 'HumanReadableText' } + 'returns': 'HumanReadableText', + 'features': [ 'unstable' ] } ## # @x-query-rdma: # # Query RDMA state # +# Features: +# @unstable: This command is meant for debugging. +# # Returns: RDMA state # # Since: 6.2 ## { 'command': 'x-query-rdma', - 'returns': 'HumanReadableText' } + 'returns': 'HumanReadableText', + 'features': [ 'unstable' ] } ## # @x-query-roms: # # Query information on the registered ROMS # +# Features: +# @unstable: This command is meant for debugging. +# # Returns: registered ROMs # # Since: 6.2 ## { 'command': 'x-query-roms', - 'returns': 'HumanReadableText' } + 'returns': 'HumanReadableText', + 'features': [ 'unstable' ] } ## # @x-query-usb: # # Query information on the USB devices # +# Features: +# @unstable: This command is meant for debugging. +# # Returns: USB device information # # Since: 6.2 ## { 'command': 'x-query-usb', - 'returns': 'HumanReadableText' } + 'returns': 'HumanReadableText', + 'features': [ 'unstable' ] } diff --git a/qapi/qom.json b/qapi/qom.json index ccd1167808..eeb5395ff3 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -769,6 +769,10 @@ # @reduced-phys-bits: number of bits in physical addresses that become # unavailable when SEV is enabled # +# @kernel-hashes: if true, add hashes of kernel/initrd/cmdline to a +# designated guest firmware page for measured boot +# with -kernel (default: false) (since 6.2) +# # Since: 2.12 ## { 'struct': 'SevGuestProperties', @@ -778,7 +782,8 @@ '*policy': 'uint32', '*handle': 'uint32', '*cbitpos': 'uint32', - 'reduced-phys-bits': 'uint32' } } + 'reduced-phys-bits': 'uint32', + '*kernel-hashes': 'bool' } } ## # @ObjectType: diff --git a/qapi/transaction.json b/qapi/transaction.json index d175b5f863..381a2df782 100644 --- a/qapi/transaction.json +++ b/qapi/transaction.json @@ -54,6 +54,10 @@ # @blockdev-snapshot-sync: since 1.1 # @drive-backup: Since 1.6 # +# Features: +# @deprecated: Member @drive-backup is deprecated. Use member +# @blockdev-backup instead. +# # Since: 1.1 ## { 'enum': 'TransactionActionKind', @@ -62,7 +66,7 @@ 'block-dirty-bitmap-disable', 'block-dirty-bitmap-merge', 'blockdev-backup', 'blockdev-snapshot', 'blockdev-snapshot-internal-sync', 'blockdev-snapshot-sync', - 'drive-backup' ] } + { 'name': 'drive-backup', 'features': [ 'deprecated' ] } ] } ## # @AbortWrapper: diff --git a/qemu-nbd.c b/qemu-nbd.c index 9d895ba24b..c6c20df68a 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -47,6 +47,10 @@ #include "trace/control.h" #include "qemu-version.h" +#ifdef CONFIG_SELINUX +#include <selinux/selinux.h> +#endif + #ifdef __linux__ #define HAVE_NBD_DEVICE 1 #else @@ -64,6 +68,7 @@ #define QEMU_NBD_OPT_FORK 263 #define QEMU_NBD_OPT_TLSAUTHZ 264 #define QEMU_NBD_OPT_PID_FILE 265 +#define QEMU_NBD_OPT_SELINUX_LABEL 266 #define MBR_SIZE 512 @@ -116,6 +121,9 @@ static void usage(const char *name) " --fork fork off the server process and exit the parent\n" " once the server is running\n" " --pid-file=PATH store the server's process ID in the given file\n" +#ifdef CONFIG_SELINUX +" --selinux-label=LABEL set SELinux process label on listening socket\n" +#endif #if HAVE_NBD_DEVICE "\n" "Kernel NBD client support:\n" @@ -454,6 +462,7 @@ static const char *socket_activation_validate_opts(const char *device, const char *sockpath, const char *address, const char *port, + const char *selinux, bool list) { if (device != NULL) { @@ -472,6 +481,10 @@ static const char *socket_activation_validate_opts(const char *device, return "TCP port number can't be set when using socket activation"; } + if (selinux != NULL) { + return "SELinux label can't be set when using socket activation"; + } + if (list) { return "List mode is incompatible with socket activation"; } @@ -534,6 +547,8 @@ int main(int argc, char **argv) { "trace", required_argument, NULL, 'T' }, { "fork", no_argument, NULL, QEMU_NBD_OPT_FORK }, { "pid-file", required_argument, NULL, QEMU_NBD_OPT_PID_FILE }, + { "selinux-label", required_argument, NULL, + QEMU_NBD_OPT_SELINUX_LABEL }, { NULL, 0, NULL, 0 } }; int ch; @@ -560,6 +575,7 @@ int main(int argc, char **argv) int old_stderr = -1; unsigned socket_activation; const char *pid_file_name = NULL; + const char *selinux_label = NULL; BlockExportOptions *export_opts; #ifdef CONFIG_POSIX @@ -749,6 +765,9 @@ int main(int argc, char **argv) case QEMU_NBD_OPT_PID_FILE: pid_file_name = optarg; break; + case QEMU_NBD_OPT_SELINUX_LABEL: + selinux_label = optarg; + break; } } @@ -788,6 +807,7 @@ int main(int argc, char **argv) /* Using socket activation - check user didn't use -p etc. */ const char *err_msg = socket_activation_validate_opts(device, sockpath, bindto, port, + selinux_label, list); if (err_msg != NULL) { error_report("%s", err_msg); @@ -827,6 +847,18 @@ int main(int argc, char **argv) } } + if (selinux_label) { +#ifdef CONFIG_SELINUX + if (sockpath == NULL && device == NULL) { + error_report("--selinux-label is not permitted without --socket"); + exit(EXIT_FAILURE); + } +#else + error_report("SELinux support not enabled in this binary"); + exit(EXIT_FAILURE); +#endif + } + if (list) { saddr = nbd_build_socket_address(sockpath, bindto, port); return qemu_nbd_client_list(saddr, tlscreds, bindto); @@ -940,6 +972,13 @@ int main(int argc, char **argv) } else { backlog = MIN(shared, SOMAXCONN); } +#ifdef CONFIG_SELINUX + if (selinux_label && setsockcreatecon_raw(selinux_label) == -1) { + error_report("Cannot set SELinux socket create context to %s: %s", + selinux_label, strerror(errno)); + exit(EXIT_FAILURE); + } +#endif saddr = nbd_build_socket_address(sockpath, bindto, port); if (qio_net_listener_open_sync(server, saddr, backlog, &local_err) < 0) { @@ -947,6 +986,13 @@ int main(int argc, char **argv) error_report_err(local_err); exit(EXIT_FAILURE); } +#ifdef CONFIG_SELINUX + if (selinux_label && setsockcreatecon_raw(NULL) == -1) { + error_report("Cannot clear SELinux socket create context: %s", + strerror(errno)); + exit(EXIT_FAILURE); + } +#endif } else { size_t i; /* See comment in check_socket_activation above. */ diff --git a/qemu-options.hx b/qemu-options.hx index 7749f59300..ae2c6dbbfc 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -5189,7 +5189,7 @@ SRST -object secret,id=sec0,keyid=secmaster0,format=base64,\\ data=$SECRET,iv=$(<iv.b64) - ``-object sev-guest,id=id,cbitpos=cbitpos,reduced-phys-bits=val,[sev-device=string,policy=policy,handle=handle,dh-cert-file=file,session-file=file]`` + ``-object sev-guest,id=id,cbitpos=cbitpos,reduced-phys-bits=val,[sev-device=string,policy=policy,handle=handle,dh-cert-file=file,session-file=file,kernel-hashes=on|off]`` Create a Secure Encrypted Virtualization (SEV) guest object, which can be used to provide the guest memory encryption support on AMD processors. @@ -5229,6 +5229,10 @@ SRST session with the guest owner to negotiate keys used for attestation. The file must be encoded in base64. + The ``kernel-hashes`` adds the hashes of given kernel/initrd/ + cmdline to a designated guest firmware page for measured Linux + boot with -kernel. The default is off. (Since 6.2) + e.g to launch a SEV guest .. parsed-literal:: diff --git a/qom/object.c b/qom/object.c index 6be710bc40..4f0677cca9 100644 --- a/qom/object.c +++ b/qom/object.c @@ -2144,6 +2144,17 @@ Object *object_resolve_path(const char *path, bool *ambiguous) return object_resolve_path_type(path, TYPE_OBJECT, ambiguous); } +Object *object_resolve_path_at(Object *parent, const char *path) +{ + g_auto(GStrv) parts = g_strsplit(path, "/", 0); + + if (*path == '/') { + return object_resolve_abs_path(object_get_root(), parts + 1, + TYPE_OBJECT); + } + return object_resolve_abs_path(parent, parts, TYPE_OBJECT); +} + typedef struct StringProperty { char *(*get)(Object *, Error **); diff --git a/roms/seabios b/roms/seabios -Subproject 64f37cc530f144e53c190c9e8209a51b58fd5c4 +Subproject 2dd4b9b3f84019668719344b40dba79d681be41 diff --git a/scripts/ci/org.centos/stream/8/build-environment.yml b/scripts/ci/org.centos/stream/8/build-environment.yml new file mode 100644 index 0000000000..42b0471634 --- /dev/null +++ b/scripts/ci/org.centos/stream/8/build-environment.yml @@ -0,0 +1,51 @@ +--- +- name: Installation of extra packages to build QEMU + hosts: all + tasks: + - name: Extra check for CentOS Stream 8 + lineinfile: + path: /etc/redhat-release + line: CentOS Stream release 8 + state: present + check_mode: yes + register: centos_stream_8 + + - name: Enable PowerTools repo on CentOS Stream 8 + ini_file: + path: /etc/yum.repos.d/CentOS-Stream-PowerTools.repo + section: powertools + option: enabled + value: "1" + when: + - ansible_facts['distribution'] == 'CentOS' + - ansible_facts['distribution_major_version'] == '8' + - centos_stream_8 + + - name: Install basic packages to build QEMU on CentOS Stream 8 + dnf: + name: + - device-mapper-multipath-devel + - glusterfs-api-devel + - gnutls-devel + - libcap-ng-devel + - libcurl-devel + - libfdt-devel + - libiscsi-devel + - libpmem-devel + - librados-devel + - librbd-devel + - libseccomp-devel + - libssh-devel + - libxkbcommon-devel + - ninja-build + - numactl-devel + - python3-sphinx + - redhat-rpm-config + - snappy-devel + - spice-server-devel + - systemd-devel + state: present + when: + - ansible_facts['distribution'] == 'CentOS' + - ansible_facts['distribution_major_version'] == '8' + - centos_stream_8 diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure new file mode 100755 index 0000000000..048e80dc49 --- /dev/null +++ b/scripts/ci/org.centos/stream/8/x86_64/configure @@ -0,0 +1,208 @@ +#!/bin/sh -e +# +# Configuration for QEMU based on CentOS Stream 8 x86_64 builds +# +# The "configure" command line is based on: +# +# https://git.centos.org/rpms/qemu-kvm/blob/c8s-stream-rhel/f/SPECS/qemu-kvm.spec +# +# But, because the SPEC file contains a number of conditionals and +# variable and expansions only available at RPM build time, this version +# was initially generated from an actual RPM build on an x86_64 platform. +# +# From that initial version, options that are required or are a +# consequence of non-upstream patches have been adapted. One example +# is "--without-default-devices" which is *not* present here, given +# that patches adding downstream specific devices are not available. +# +../configure \ +--prefix="/usr" \ +--libdir="/usr/lib64" \ +--datadir="/usr/share" \ +--sysconfdir="/etc" \ +--interp-prefix=/usr/qemu-%M \ +--localstatedir="/var" \ +--docdir="/usr/share/doc" \ +--libexecdir="/usr/libexec" \ +--extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ +--extra-cflags="-O2 -g -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fexceptions -fstack-protector-strong -grecord-gcc-switches -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection" \ +--with-suffix="qemu-kvm" \ +--firmwarepath=/usr/share/qemu-firmware \ +--with-git=meson \ +--with-git-submodules=update \ +--target-list="x86_64-softmmu" \ +--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \ +--audio-drv-list="" \ +--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \ +--with-coroutine=ucontext \ +--with-git=git \ +--tls-priority=@QEMU,SYSTEM \ +--disable-attr \ +--disable-auth-pam \ +--disable-avx2 \ +--disable-avx512f \ +--disable-bochs \ +--disable-bpf \ +--disable-brlapi \ +--disable-bsd-user \ +--disable-bzip2 \ +--disable-cap-ng \ +--disable-capstone \ +--disable-cfi \ +--disable-cfi-debug \ +--disable-cloop \ +--disable-cocoa \ +--disable-coroutine-pool \ +--disable-crypto-afalg \ +--disable-curl \ +--disable-curses \ +--disable-debug-info \ +--disable-debug-mutex \ +--disable-debug-tcg \ +--disable-dmg \ +--disable-docs \ +--disable-fuse \ +--disable-fuse-lseek \ +--disable-gcrypt \ +--disable-gio \ +--disable-glusterfs \ +--disable-gnutls \ +--disable-gtk \ +--disable-guest-agent \ +--disable-guest-agent-msi \ +--disable-hax \ +--disable-hvf \ +--disable-iconv \ +--disable-kvm \ +--disable-libdaxctl \ +--disable-libiscsi \ +--disable-libnfs \ +--disable-libpmem \ +--disable-libssh \ +--disable-libudev \ +--disable-libusb \ +--disable-libxml2 \ +--disable-linux-aio \ +--disable-linux-io-uring \ +--disable-linux-user \ +--disable-live-block-migration \ +--disable-lto \ +--disable-lzfse \ +--disable-lzo \ +--disable-malloc-trim \ +--disable-membarrier \ +--disable-modules \ +--disable-module-upgrades \ +--disable-mpath \ +--disable-multiprocess \ +--disable-netmap \ +--disable-nettle \ +--disable-numa \ +--disable-nvmm \ +--disable-opengl \ +--disable-parallels \ +--disable-pie \ +--disable-pvrdma \ +--disable-qcow1 \ +--disable-qed \ +--disable-qom-cast-debug \ +--disable-rbd \ +--disable-rdma \ +--disable-replication \ +--disable-rng-none \ +--disable-safe-stack \ +--disable-sanitizers \ +--disable-sdl \ +--disable-sdl-image \ +--disable-seccomp \ +--disable-slirp-smbd \ +--disable-smartcard \ +--disable-snappy \ +--disable-sparse \ +--disable-spice \ +--disable-strip \ +--disable-system \ +--disable-tcg \ +--disable-tools \ +--disable-tpm \ +--disable-u2f \ +--disable-usb-redir \ +--disable-user \ +--disable-vde \ +--disable-vdi \ +--disable-vhost-crypto \ +--disable-vhost-kernel \ +--disable-vhost-net \ +--disable-vhost-scsi \ +--disable-vhost-user \ +--disable-vhost-user-blk-server \ +--disable-vhost-vdpa \ +--disable-vhost-vsock \ +--disable-virglrenderer \ +--disable-virtfs \ +--disable-virtiofsd \ +--disable-vnc \ +--disable-vnc-jpeg \ +--disable-vnc-png \ +--disable-vnc-sasl \ +--disable-vte \ +--disable-vvfat \ +--disable-werror \ +--disable-whpx \ +--disable-xen \ +--disable-xen-pci-passthrough \ +--disable-xfsctl \ +--disable-xkbcommon \ +--disable-zstd \ +--enable-attr \ +--enable-avx2 \ +--enable-cap-ng \ +--enable-capstone \ +--enable-coroutine-pool \ +--enable-curl \ +--enable-debug-info \ +--enable-docs \ +--enable-fdt \ +--enable-gcrypt \ +--enable-glusterfs \ +--enable-gnutls \ +--enable-guest-agent \ +--enable-iconv \ +--enable-kvm \ +--enable-libiscsi \ +--enable-libpmem \ +--enable-libssh \ +--enable-libusb \ +--enable-libudev \ +--enable-linux-aio \ +--enable-lzo \ +--enable-malloc-trim \ +--enable-modules \ +--enable-mpath \ +--enable-numa \ +--enable-opengl \ +--enable-pie \ +--enable-rbd \ +--enable-rdma \ +--enable-seccomp \ +--enable-snappy \ +--enable-smartcard \ +--enable-spice \ +--enable-system \ +--enable-tcg \ +--enable-tools \ +--enable-tpm \ +--enable-trace-backend=dtrace \ +--enable-usb-redir \ +--enable-virtiofsd \ +--enable-vhost-kernel \ +--enable-vhost-net \ +--enable-vhost-user \ +--enable-vhost-user-blk-server \ +--enable-vhost-vdpa \ +--enable-vhost-vsock \ +--enable-vnc \ +--enable-vnc-png \ +--enable-vnc-sasl \ +--enable-werror \ +--enable-xkbcommon diff --git a/scripts/ci/org.centos/stream/8/x86_64/test-avocado b/scripts/ci/org.centos/stream/8/x86_64/test-avocado new file mode 100755 index 0000000000..7aeecbcfb8 --- /dev/null +++ b/scripts/ci/org.centos/stream/8/x86_64/test-avocado @@ -0,0 +1,70 @@ +#!/bin/sh -e +# +# Runs a previously vetted list of tests, either marked explicitly for +# KVM and x86_64, or tests that are generic enough to be valid for all +# targets. Such a test list can be generated with: +# +# ./tests/venv/bin/avocado list --filter-by-tags-include-empty \ +# --filter-by-tags-include-empty-key -t accel:kvm,arch:x86_64 \ +# tests/avocado/ +# +# This is almost the complete list of avocado based tests available at +# the time this was compile, with the following exceptions: +# +# * Require machine type "x-remote": +# - tests/avocado/multiprocess.py:Multiprocess.test_multiprocess_x86_64 +# +# * Needs superuser privileges: +# - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_pre_virtiofsd_set_up +# - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_pre_launch_set_up +# - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_post_launch_set_up +# - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_post_mount_set_up +# - tests/avocado/virtiofs_submounts.py:VirtiofsSubmountsTest.test_two_runs +# +# * Requires display type "egl-headless": +# - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_virtio_vga_virgl +# - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_vhost_user_vga_virgl +# +# * Test is marked (unconditionally) to be skipped: +# - tests/avocado/virtio_check_params.py:VirtioMaxSegSettingsCheck.test_machine_types +# +make get-vm-images +./tests/venv/bin/avocado run \ + --job-results-dir=tests/results/ \ + tests/avocado/boot_linux.py:BootLinuxX8664.test_pc_i440fx_kvm \ + tests/avocado/boot_linux.py:BootLinuxX8664.test_pc_q35_kvm \ + tests/avocado/boot_linux_console.py:BootLinuxConsole.test_x86_64_pc \ + tests/avocado/cpu_queries.py:QueryCPUModelExpansion.test \ + tests/avocado/empty_cpu_model.py:EmptyCPUModel.test \ + tests/avocado/hotplug_cpu.py:HotPlugCPU.test \ + tests/avocado/info_usernet.py:InfoUsernet.test_hostfwd \ + tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu \ + tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_pt \ + tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_strict \ + tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_strict_cm \ + tests/avocado/linux_initrd.py:LinuxInitrd.test_with_2gib_file_should_exit_error_msg_with_linux_v3_6 \ + tests/avocado/linux_initrd.py:LinuxInitrd.test_with_2gib_file_should_work_with_linux_v4_16 \ + tests/avocado/migration.py:Migration.test_migration_with_exec \ + tests/avocado/migration.py:Migration.test_migration_with_tcp_localhost \ + tests/avocado/migration.py:Migration.test_migration_with_unix \ + tests/avocado/pc_cpu_hotplug_props.py:OmittedCPUProps.test_no_die_id \ + tests/avocado/replay_kernel.py:ReplayKernelNormal.test_x86_64_pc \ + tests/avocado/reverse_debugging.py:ReverseDebugging_X86_64.test_x86_64_pc \ + tests/avocado/version.py:Version.test_qmp_human_info_version \ + tests/avocado/virtio_version.py:VirtioVersionCheck.test_conventional_devs \ + tests/avocado/virtio_version.py:VirtioVersionCheck.test_modern_only_devs \ + tests/avocado/vnc.py:Vnc.test_change_password \ + tests/avocado/vnc.py:Vnc.test_change_password_requires_a_password \ + tests/avocado/vnc.py:Vnc.test_no_vnc \ + tests/avocado/vnc.py:Vnc.test_no_vnc_change_password \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_4_0 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_4_1 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_set_4_0 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_unset_4_1 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v1_4_0 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v1_set_4_0 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v2_4_0 \ + tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v2_unset_4_1 \ + tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_4_0_alias_compatibility \ + tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_4_1_alias \ + tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_none_alias diff --git a/scripts/ci/org.centos/stream/README b/scripts/ci/org.centos/stream/README new file mode 100644 index 0000000000..e3eadfe3ea --- /dev/null +++ b/scripts/ci/org.centos/stream/README @@ -0,0 +1,17 @@ +This directory contains scripts for generating a build of QEMU that +closely matches the CentOS Stream[1] builds of the qemu-kvm package. + +To have the environment ready to configure, build QEMU and run tests, +please start with a CentOS Stream machine and: + + * apply the generic "build-environment.yml" playbook located at + scripts/ci/setup + + * apply the "build-environment.yml" in the directory following the + CentOS Stream version (such as "8"). + +This currently only covers CentOS Stream 8 environments and +packages[2]. + +[1] https://www.centos.org/centos-stream/ +[2] https://git.centos.org/rpms/qemu-kvm/commits/c8s-stream-rhel diff --git a/scripts/ci/setup/build-environment.yml b/scripts/ci/setup/build-environment.yml index 581c1c75d1..599896cc5b 100644 --- a/scripts/ci/setup/build-environment.yml +++ b/scripts/ci/setup/build-environment.yml @@ -114,3 +114,41 @@ when: - ansible_facts['distribution'] == 'Ubuntu' - ansible_facts['distribution_version'] == '20.04' + + - name: Install basic packages to build QEMU on EL8 + dnf: + # This list of packages start with tests/docker/dockerfiles/centos8.docker + # but only include files that are common to all distro variants and present + # in the standard repos (no add-ons) + name: + - bzip2 + - bzip2-devel + - dbus-daemon + - diffutils + - gcc + - gcc-c++ + - genisoimage + - gettext + - git + - glib2-devel + - libaio-devel + - libepoxy-devel + - libgcrypt-devel + - lzo-devel + - make + - mesa-libEGL-devel + - nettle-devel + - nmap-ncat + - perl-Test-Harness + - pixman-devel + - python36 + - rdma-core-devel + - spice-glib-devel + - spice-server + - systemtap-sdt-devel + - tar + - zlib-devel + state: present + when: + - ansible_facts['distribution_file_variety'] == 'RedHat' + - ansible_facts['distribution_version'] == '8' diff --git a/scripts/device-crash-test b/scripts/device-crash-test index 8331c057b8..7fbd99158b 100755 --- a/scripts/device-crash-test +++ b/scripts/device-crash-test @@ -36,6 +36,7 @@ from itertools import chain sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python')) from qemu.machine import QEMUMachine +from qemu.aqmp import ConnectError logger = logging.getLogger('device-crash-test') dbg = logger.debug @@ -175,7 +176,6 @@ ERROR_RULE_LIST = [ {'log':r"Multiple VT220 operator consoles are not supported"}, {'log':r"core 0 already populated"}, {'log':r"could not find stage1 bootloader"}, - {'log':r"No '.*' bus found for device"}, # other exitcode=1 failures not listed above will just generate INFO messages: {'exitcode':1, 'loglevel':logging.INFO}, @@ -317,9 +317,7 @@ class QemuBinaryInfo(object): try: vm.launch() mi['runnable'] = True - except KeyboardInterrupt: - raise - except: + except Exception: dbg("exception trying to run binary=%s machine=%s", self.binary, machine, exc_info=sys.exc_info()) dbg("log: %r", vm.get_log()) mi['runnable'] = False @@ -355,14 +353,14 @@ def checkOneCase(args, testcase): '-device', qemuOptsEscape(device)] cmdline = ' '.join([binary] + args) dbg("will launch QEMU: %s", cmdline) - vm = QEMUMachine(binary=binary, args=args) + vm = QEMUMachine(binary=binary, args=args, qmp_timer=15) + exc = None exc_traceback = None try: vm.launch() - except KeyboardInterrupt: - raise - except: + except Exception as this_exc: + exc = this_exc exc_traceback = traceback.format_exc() dbg("Exception while running test case") finally: @@ -370,8 +368,9 @@ def checkOneCase(args, testcase): ec = vm.exitcode() log = vm.get_log() - if exc_traceback is not None or ec != 0: - return {'exc_traceback':exc_traceback, + if exc is not None or ec != 0: + return {'exc': exc, + 'exc_traceback':exc_traceback, 'exitcode':ec, 'log':log, 'testcase':testcase, @@ -459,6 +458,17 @@ def logFailure(f, level): for l in f['log'].strip().split('\n'): logger.log(level, "log: %s", l) logger.log(level, "exit code: %r", f['exitcode']) + + # If the Exception is merely a QMP connect error, + # reduce the logging level for its traceback to + # improve visual clarity. + if isinstance(f.get('exc'), ConnectError): + logger.log(level, "%s.%s: %s", + type(f['exc']).__module__, + type(f['exc']).__qualname__, + str(f['exc'])) + level = logging.DEBUG + if f['exc_traceback']: logger.log(level, "exception:") for l in f['exc_traceback'].split('\n'): @@ -503,6 +513,12 @@ def main(): lvl = logging.WARN logging.basicConfig(stream=sys.stdout, level=lvl, format='%(levelname)s: %(message)s') + if not args.debug: + # Async QMP, when in use, is chatty about connection failures. + # This script knowingly generates a ton of connection errors. + # Silence this logger. + logging.getLogger('qemu.aqmp.qmp_client').setLevel(logging.CRITICAL) + fatal_failures = [] wl_stats = {} skipped = 0 diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 55b8a78560..7a17ff4218 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -13,8 +13,7 @@ meson_options_help() { printf "%s\n" ' jemalloc/system/tcmalloc)' printf "%s\n" ' --enable-slirp[=CHOICE] Whether and how to find the slirp library' printf "%s\n" ' (choices: auto/disabled/enabled/internal/system)' - printf "%s\n" ' --enable-tcg-interpreter TCG with bytecode interpreter (experimental and' - printf "%s\n" ' slow)' + printf "%s\n" ' --enable-tcg-interpreter TCG with bytecode interpreter (slow)' printf "%s\n" ' --enable-trace-backends=CHOICE' printf "%s\n" ' Set available tracing backends [log] (choices:' printf "%s\n" ' dtrace/ftrace/log/nop/simple/syslog/ust)' @@ -73,6 +72,7 @@ meson_options_help() { printf "%s\n" ' sdl SDL user interface' printf "%s\n" ' sdl-image SDL Image support for icons' printf "%s\n" ' seccomp seccomp support' + printf "%s\n" ' selinux SELinux support in qemu-nbd' printf "%s\n" ' smartcard CA smartcard emulation support' printf "%s\n" ' snappy snappy compression support' printf "%s\n" ' sparse sparse checker' @@ -216,6 +216,8 @@ _meson_option_parse() { --disable-sdl-image) printf "%s" -Dsdl_image=disabled ;; --enable-seccomp) printf "%s" -Dseccomp=enabled ;; --disable-seccomp) printf "%s" -Dseccomp=disabled ;; + --enable-selinux) printf "%s" -Dselinux=enabled ;; + --disable-selinux) printf "%s" -Dselinux=disabled ;; --enable-slirp) printf "%s" -Dslirp=enabled ;; --disable-slirp) printf "%s" -Dslirp=disabled ;; --enable-slirp=*) quote_sh "-Dslirp=$2" ;; diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 314f8b439c..3524c04c2a 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -912,7 +912,7 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, */ if (!cpu->can_do_io) { /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | CF_LAST_IO | curr_cflags(cpu); + cpu->cflags_next_tb = 1 | CF_LAST_IO | CF_NOIRQ | curr_cflags(cpu); cpu_loop_exit_restore(cpu, ra); } /* @@ -946,7 +946,7 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, cpu_loop_exit(cpu); } else { /* Force execution of one insn next time. */ - cpu->cflags_next_tb = 1 | CF_LAST_IO | curr_cflags(cpu); + cpu->cflags_next_tb = 1 | CF_LAST_IO | CF_NOIRQ | curr_cflags(cpu); mmap_unlock(); cpu_loop_exit_noexc(cpu); } diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c index f8b3a4cd82..01f3834db5 100644 --- a/softmmu/qdev-monitor.c +++ b/softmmu/qdev-monitor.c @@ -593,8 +593,8 @@ const char *qdev_set_id(DeviceState *dev, char *id, Error **errp) if (prop) { dev->id = id; } else { - g_free(id); error_setg(errp, "Duplicate device ID '%s'", id); + g_free(id); return NULL; } } else { @@ -871,15 +871,9 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp) static DeviceState *find_device_state(const char *id, Error **errp) { - Object *obj; + Object *obj = object_resolve_path_at(qdev_get_peripheral(), id); DeviceState *dev; - if (id[0] == '/') { - obj = object_resolve_path(id, NULL); - } else { - obj = object_resolve_path_component(qdev_get_peripheral(), id); - } - if (!obj) { error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, "Device '%s' not found", id); @@ -943,7 +937,9 @@ void qmp_device_del(const char *id, Error **errp) { DeviceState *dev = find_device_state(id, errp); if (dev != NULL) { - if (dev->pending_deleted_event) { + if (dev->pending_deleted_event && + (dev->pending_deleted_expires_ms == 0 || + dev->pending_deleted_expires_ms > qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL))) { error_setg(errp, "Device %s is already in the " "process of unplug", id); return; diff --git a/softmmu/vl.c b/softmmu/vl.c index 1159a64bce..620a1f1367 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -3766,6 +3766,7 @@ void qemu_init(int argc, char **argv, char **envp) if (vmstate_dump_file) { /* dump and exit */ + module_load_qom_all(); dump_vmstate_json_to_file(vmstate_dump_file); exit(0); } diff --git a/target/arm/psci.c b/target/arm/psci.c index b279c0b9a4..6709e28013 100644 --- a/target/arm/psci.c +++ b/target/arm/psci.c @@ -27,13 +27,15 @@ bool arm_is_psci_call(ARMCPU *cpu, int excp_type) { - /* - * Return true if the exception type matches the configured PSCI conduit. - * This is called before the SMC/HVC instruction is executed, to decide - * whether we should treat it as a PSCI call or with the architecturally + /* Return true if the r0/x0 value indicates a PSCI call and + * the exception type matches the configured PSCI conduit. This is + * called before the SMC/HVC instruction is executed, to decide whether + * we should treat it as a PSCI call or with the architecturally * defined behaviour for an SMC or HVC (which might be UNDEF or trap * to EL2 or to EL3). */ + CPUARMState *env = &cpu->env; + uint64_t param = is_a64(env) ? env->xregs[0] : env->regs[0]; switch (excp_type) { case EXCP_HVC: @@ -50,7 +52,27 @@ bool arm_is_psci_call(ARMCPU *cpu, int excp_type) return false; } - return true; + switch (param) { + case QEMU_PSCI_0_2_FN_PSCI_VERSION: + case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE: + case QEMU_PSCI_0_2_FN_AFFINITY_INFO: + case QEMU_PSCI_0_2_FN64_AFFINITY_INFO: + case QEMU_PSCI_0_2_FN_SYSTEM_RESET: + case QEMU_PSCI_0_2_FN_SYSTEM_OFF: + case QEMU_PSCI_0_1_FN_CPU_ON: + case QEMU_PSCI_0_2_FN_CPU_ON: + case QEMU_PSCI_0_2_FN64_CPU_ON: + case QEMU_PSCI_0_1_FN_CPU_OFF: + case QEMU_PSCI_0_2_FN_CPU_OFF: + case QEMU_PSCI_0_1_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN64_CPU_SUSPEND: + case QEMU_PSCI_0_1_FN_MIGRATE: + case QEMU_PSCI_0_2_FN_MIGRATE: + return true; + default: + return false; + } } void arm_handle_psci_call(ARMCPU *cpu) @@ -172,9 +194,10 @@ void arm_handle_psci_call(ARMCPU *cpu) break; case QEMU_PSCI_0_1_FN_MIGRATE: case QEMU_PSCI_0_2_FN_MIGRATE: - default: ret = QEMU_PSCI_RET_NOT_SUPPORTED; break; + default: + g_assert_not_reached(); } err: diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c index 14c996f968..9af261eea3 100644 --- a/target/i386/nvmm/nvmm-all.c +++ b/target/i386/nvmm/nvmm-all.c @@ -750,7 +750,11 @@ nvmm_vcpu_loop(CPUState *cpu) nvmm_vcpu_pre_run(cpu); if (qatomic_read(&cpu->exit_request)) { +#if NVMM_USER_VERSION >= 2 nvmm_vcpu_stop(vcpu); +#else + qemu_cpu_kick_self(); +#endif } /* Read exit_request before the kernel reads the immediate exit flag */ @@ -767,6 +771,7 @@ nvmm_vcpu_loop(CPUState *cpu) switch (exit->reason) { case NVMM_VCPU_EXIT_NONE: break; +#if NVMM_USER_VERSION >= 2 case NVMM_VCPU_EXIT_STOPPED: /* * The kernel cleared the immediate exit flag; cpu->exit_request @@ -775,6 +780,7 @@ nvmm_vcpu_loop(CPUState *cpu) smp_wmb(); qcpu->stop = true; break; +#endif case NVMM_VCPU_EXIT_MEMORY: ret = nvmm_handle_mem(mach, vcpu); break; @@ -888,8 +894,12 @@ nvmm_ipi_signal(int sigcpu) { if (current_cpu) { struct qemu_vcpu *qcpu = get_qemu_vcpu(current_cpu); +#if NVMM_USER_VERSION >= 2 struct nvmm_vcpu *vcpu = &qcpu->vcpu; nvmm_vcpu_stop(vcpu); +#else + qcpu->stop = true; +#endif } } diff --git a/target/i386/sev.c b/target/i386/sev.c index eede07f11d..025ff7a6f8 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -37,6 +37,7 @@ #include "qapi/qmp/qerror.h" #include "exec/confidential-guest-support.h" #include "hw/i386/pc.h" +#include "exec/address-spaces.h" #define TYPE_SEV_GUEST "sev-guest" OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) @@ -62,6 +63,7 @@ struct SevGuestState { char *session_file; uint32_t cbitpos; uint32_t reduced_phys_bits; + bool kernel_hashes; /* runtime state */ uint32_t handle; @@ -109,9 +111,19 @@ typedef struct QEMU_PACKED SevHashTable { SevHashTableEntry cmdline; SevHashTableEntry initrd; SevHashTableEntry kernel; - uint8_t padding[]; } SevHashTable; +/* + * Data encrypted by sev_encrypt_flash() must be padded to a multiple of + * 16 bytes. + */ +typedef struct QEMU_PACKED PaddedSevHashTable { + SevHashTable ht; + uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)]; +} PaddedSevHashTable; + +QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); + static SevGuestState *sev_guest; static Error *sev_mig_blocker; @@ -327,6 +339,20 @@ sev_guest_set_sev_device(Object *obj, const char *value, Error **errp) sev->sev_device = g_strdup(value); } +static bool sev_guest_get_kernel_hashes(Object *obj, Error **errp) +{ + SevGuestState *sev = SEV_GUEST(obj); + + return sev->kernel_hashes; +} + +static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) +{ + SevGuestState *sev = SEV_GUEST(obj); + + sev->kernel_hashes = value; +} + static void sev_guest_class_init(ObjectClass *oc, void *data) { @@ -345,6 +371,11 @@ sev_guest_class_init(ObjectClass *oc, void *data) sev_guest_set_session_file); object_class_property_set_description(oc, "session-file", "guest owners session parameters (encoded with base64)"); + object_class_property_add_bool(oc, "kernel-hashes", + sev_guest_get_kernel_hashes, + sev_guest_set_kernel_hashes); + object_class_property_set_description(oc, "kernel-hashes", + "add kernel hashes to guest firmware for measured Linux boot"); } static void @@ -1196,18 +1227,35 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) uint8_t *data; SevHashTableDescriptor *area; SevHashTable *ht; + PaddedSevHashTable *padded_ht; uint8_t cmdline_hash[HASH_SIZE]; uint8_t initrd_hash[HASH_SIZE]; uint8_t kernel_hash[HASH_SIZE]; uint8_t *hashp; size_t hash_len = HASH_SIZE; - int aligned_len; + hwaddr mapped_len = sizeof(*padded_ht); + MemTxAttrs attrs = { 0 }; + bool ret = true; + + /* + * Only add the kernel hashes if the sev-guest configuration explicitly + * stated kernel-hashes=on. + */ + if (!sev_guest->kernel_hashes) { + return false; + } if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { - error_setg(errp, "SEV: kernel specified but OVMF has no hash table guid"); + error_setg(errp, "SEV: kernel specified but guest firmware " + "has no hashes table GUID"); return false; } area = (SevHashTableDescriptor *)data; + if (!area->base || area->size < sizeof(PaddedSevHashTable)) { + error_setg(errp, "SEV: guest firmware hashes table area is invalid " + "(base=0x%x size=0x%x)", area->base, area->size); + return false; + } /* * Calculate hash of kernel command-line with the terminating null byte. If @@ -1248,7 +1296,13 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) * Populate the hashes table in the guest's memory at the OVMF-designated * area for the SEV hashes table */ - ht = qemu_map_ram_ptr(NULL, area->base); + padded_ht = address_space_map(&address_space_memory, area->base, + &mapped_len, true, attrs); + if (!padded_ht || mapped_len != sizeof(*padded_ht)) { + error_setg(errp, "SEV: cannot map hashes table guest memory area"); + return false; + } + ht = &padded_ht->ht; ht->guid = sev_hash_table_header_guid; ht->len = sizeof(*ht); @@ -1265,18 +1319,17 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) ht->kernel.len = sizeof(ht->kernel); memcpy(ht->kernel.hash, kernel_hash, sizeof(ht->kernel.hash)); - /* When calling sev_encrypt_flash, the length has to be 16 byte aligned */ - aligned_len = ROUND_UP(ht->len, 16); - if (aligned_len != ht->len) { - /* zero the excess data so the measurement can be reliably calculated */ - memset(ht->padding, 0, aligned_len - ht->len); - } + /* zero the excess data so the measurement can be reliably calculated */ + memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); - if (sev_encrypt_flash((uint8_t *)ht, aligned_len, errp) < 0) { - return false; + if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) { + ret = false; } - return true; + address_space_unmap(&address_space_memory, padded_ht, + mapped_len, true, mapped_len); + + return ret; } static void diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c index 19832c4b46..da9fe99ff8 100644 --- a/target/ppc/mmu-hash64.c +++ b/target/ppc/mmu-hash64.c @@ -786,7 +786,7 @@ static void ppc_hash64_set_dsi(CPUState *cs, int mmu_idx, uint64_t dar, uint64_t static void ppc_hash64_set_r(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1) { - hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + 16; + hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R; if (cpu->vhyp) { PPCVirtualHypervisorClass *vhc = @@ -803,7 +803,7 @@ static void ppc_hash64_set_r(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1) static void ppc_hash64_set_c(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1) { - hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + 15; + hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C; if (cpu->vhyp) { PPCVirtualHypervisorClass *vhc = diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h index c5b2f97ff7..1496955d38 100644 --- a/target/ppc/mmu-hash64.h +++ b/target/ppc/mmu-hash64.h @@ -97,6 +97,11 @@ void ppc_hash64_finalize(PowerPCCPU *cpu); #define HPTE64_V_1TB_SEG 0x4000000000000000ULL #define HPTE64_V_VRMA_MASK 0x4001ffffff000000ULL +/* PTE offsets */ +#define HPTE64_DW1 (HASH_PTE_SIZE_64 / 2) +#define HPTE64_DW1_R (HPTE64_DW1 + 6) +#define HPTE64_DW1_C (HPTE64_DW1 + 7) + /* Format changes for ARCH v3 */ #define HPTE64_V_COMMON_BITS 0x000fffffffffffffULL #define HPTE64_R_3_0_SSIZE_SHIFT 58 diff --git a/target/riscv/machine.c b/target/riscv/machine.c index 7b4c739564..ad8248ebfd 100644 --- a/target/riscv/machine.c +++ b/target/riscv/machine.c @@ -76,20 +76,50 @@ static bool hyper_needed(void *opaque) return riscv_has_ext(env, RVH); } -static bool vector_needed(void *opaque) -{ - RISCVCPU *cpu = opaque; - CPURISCVState *env = &cpu->env; +static const VMStateDescription vmstate_hyper = { + .name = "cpu/hyper", + .version_id = 1, + .minimum_version_id = 1, + .needed = hyper_needed, + .fields = (VMStateField[]) { + VMSTATE_UINTTL(env.hstatus, RISCVCPU), + VMSTATE_UINTTL(env.hedeleg, RISCVCPU), + VMSTATE_UINTTL(env.hideleg, RISCVCPU), + VMSTATE_UINTTL(env.hcounteren, RISCVCPU), + VMSTATE_UINTTL(env.htval, RISCVCPU), + VMSTATE_UINTTL(env.htinst, RISCVCPU), + VMSTATE_UINTTL(env.hgatp, RISCVCPU), + VMSTATE_UINT64(env.htimedelta, RISCVCPU), - return riscv_has_ext(env, RVV); -} + VMSTATE_UINT64(env.vsstatus, RISCVCPU), + VMSTATE_UINTTL(env.vstvec, RISCVCPU), + VMSTATE_UINTTL(env.vsscratch, RISCVCPU), + VMSTATE_UINTTL(env.vsepc, RISCVCPU), + VMSTATE_UINTTL(env.vscause, RISCVCPU), + VMSTATE_UINTTL(env.vstval, RISCVCPU), + VMSTATE_UINTTL(env.vsatp, RISCVCPU), -static bool pointermasking_needed(void *opaque) + VMSTATE_UINTTL(env.mtval2, RISCVCPU), + VMSTATE_UINTTL(env.mtinst, RISCVCPU), + + VMSTATE_UINTTL(env.stvec_hs, RISCVCPU), + VMSTATE_UINTTL(env.sscratch_hs, RISCVCPU), + VMSTATE_UINTTL(env.sepc_hs, RISCVCPU), + VMSTATE_UINTTL(env.scause_hs, RISCVCPU), + VMSTATE_UINTTL(env.stval_hs, RISCVCPU), + VMSTATE_UINTTL(env.satp_hs, RISCVCPU), + VMSTATE_UINT64(env.mstatus_hs, RISCVCPU), + + VMSTATE_END_OF_LIST() + } +}; + +static bool vector_needed(void *opaque) { RISCVCPU *cpu = opaque; CPURISCVState *env = &cpu->env; - return riscv_has_ext(env, RVJ); + return riscv_has_ext(env, RVV); } static const VMStateDescription vmstate_vector = { @@ -108,6 +138,14 @@ static const VMStateDescription vmstate_vector = { } }; +static bool pointermasking_needed(void *opaque) +{ + RISCVCPU *cpu = opaque; + CPURISCVState *env = &cpu->env; + + return riscv_has_ext(env, RVJ); +} + static const VMStateDescription vmstate_pointermasking = { .name = "cpu/pointer_masking", .version_id = 1, @@ -126,44 +164,6 @@ static const VMStateDescription vmstate_pointermasking = { } }; -static const VMStateDescription vmstate_hyper = { - .name = "cpu/hyper", - .version_id = 1, - .minimum_version_id = 1, - .needed = hyper_needed, - .fields = (VMStateField[]) { - VMSTATE_UINTTL(env.hstatus, RISCVCPU), - VMSTATE_UINTTL(env.hedeleg, RISCVCPU), - VMSTATE_UINTTL(env.hideleg, RISCVCPU), - VMSTATE_UINTTL(env.hcounteren, RISCVCPU), - VMSTATE_UINTTL(env.htval, RISCVCPU), - VMSTATE_UINTTL(env.htinst, RISCVCPU), - VMSTATE_UINTTL(env.hgatp, RISCVCPU), - VMSTATE_UINT64(env.htimedelta, RISCVCPU), - - VMSTATE_UINT64(env.vsstatus, RISCVCPU), - VMSTATE_UINTTL(env.vstvec, RISCVCPU), - VMSTATE_UINTTL(env.vsscratch, RISCVCPU), - VMSTATE_UINTTL(env.vsepc, RISCVCPU), - VMSTATE_UINTTL(env.vscause, RISCVCPU), - VMSTATE_UINTTL(env.vstval, RISCVCPU), - VMSTATE_UINTTL(env.vsatp, RISCVCPU), - - VMSTATE_UINTTL(env.mtval2, RISCVCPU), - VMSTATE_UINTTL(env.mtinst, RISCVCPU), - - VMSTATE_UINTTL(env.stvec_hs, RISCVCPU), - VMSTATE_UINTTL(env.sscratch_hs, RISCVCPU), - VMSTATE_UINTTL(env.sepc_hs, RISCVCPU), - VMSTATE_UINTTL(env.scause_hs, RISCVCPU), - VMSTATE_UINTTL(env.stval_hs, RISCVCPU), - VMSTATE_UINTTL(env.satp_hs, RISCVCPU), - VMSTATE_UINT64(env.mstatus_hs, RISCVCPU), - - VMSTATE_END_OF_LIST() - } -}; - const VMStateDescription vmstate_riscv_cpu = { .name = "cpu", .version_id = 3, diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index 3153d053e9..ca3845d023 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -674,11 +674,6 @@ QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096); #define SIGP_STAT_INVALID_ORDER 0x00000002UL #define SIGP_STAT_RECEIVER_CHECK 0x00000001UL -/* SIGP SET ARCHITECTURE modes */ -#define SIGP_MODE_ESA_S390 0 -#define SIGP_MODE_Z_ARCH_TRANS_ALL_PSW 1 -#define SIGP_MODE_Z_ARCH_TRANS_CUR_PSW 2 - /* SIGP order code mask corresponding to bit positions 56-63 */ #define SIGP_ORDER_MASK 0x000000ff diff --git a/tcg/README b/tcg/README index c2e7762a37..bc15cc3b32 100644 --- a/tcg/README +++ b/tcg/README @@ -254,6 +254,12 @@ t0 = t1 ? clz(t1) : t2 t0 = t1 ? ctz(t1) : t2 +* ctpop_i32/i64 t0, t1 + +t0 = number of bits set in t1 +With "ctpop" short for "count population", matching +the function name used in include/qemu/host-utils.h. + ********* Shifts/Rotates * shl_i32/i64 t0, t1, t2 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 633b8a37ba..9d322cdba6 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -2523,8 +2523,13 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, tcg_out_vldst(s, INSN_VLD1 | 0x7d0, arg, arg1, arg2); return; case TCG_TYPE_V128: - /* regs 2; size 8; align 16 */ - tcg_out_vldst(s, INSN_VLD1 | 0xae0, arg, arg1, arg2); + /* + * We have only 8-byte alignment for the stack per the ABI. + * Rather than dynamically re-align the stack, it's easier + * to simply not request alignment beyond that. So: + * regs 2; size 8; align 8 + */ + tcg_out_vldst(s, INSN_VLD1 | 0xad0, arg, arg1, arg2); return; default: g_assert_not_reached(); @@ -2543,8 +2548,8 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, tcg_out_vldst(s, INSN_VST1 | 0x7d0, arg, arg1, arg2); return; case TCG_TYPE_V128: - /* regs 2; size 8; align 16 */ - tcg_out_vldst(s, INSN_VST1 | 0xae0, arg, arg1, arg2); + /* See tcg_out_ld re alignment: regs 2; size 8; align 8 */ + tcg_out_vldst(s, INSN_VST1 | 0xad0, arg, arg1, arg2); return; default: g_assert_not_reached(); diff --git a/tcg/optimize.c b/tcg/optimize.c index dbb2d46e88..2397f2cf93 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1365,7 +1365,7 @@ static bool fold_extract2(OptContext *ctx, TCGOp *op) v2 <<= 64 - shr; } else { v1 = (uint32_t)v1 >> shr; - v2 = (int32_t)v2 << (32 - shr); + v2 = (uint64_t)((int32_t)v2 << (32 - shr)); } return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2); } diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 8938c446c8..57e803e339 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2699,7 +2699,8 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece, static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, unsigned vece, - const TCGArg *args, const int *const_args) + const TCGArg args[TCG_MAX_OP_ARGS], + const int const_args[TCG_MAX_OP_ARGS]) { TCGType type = vecl + TCG_TYPE_V64; TCGArg a0 = args[0], a1 = args[1], a2 = args[2]; @@ -3061,7 +3061,13 @@ static void temp_allocate_frame(TCGContext *s, TCGTemp *ts) g_assert_not_reached(); } - assert(align <= TCG_TARGET_STACK_ALIGN); + /* + * Assume the stack is sufficiently aligned. + * This affects e.g. ARM NEON, where we have 8 byte stack alignment + * and do not require 16 byte vector alignment. This seems slightly + * easier than fully parameterizing the above switch statement. + */ + align = MIN(TCG_TARGET_STACK_ALIGN, align); off = ROUND_UP(s->current_frame_offset, align); /* If we've exhausted the stack frame, restart with a smaller TB. */ diff --git a/tests/avocado/tcg_plugins.py b/tests/avocado/tcg_plugins.py index 9ca1515c3b..642d2e49e3 100644 --- a/tests/avocado/tcg_plugins.py +++ b/tests/avocado/tcg_plugins.py @@ -131,7 +131,7 @@ class PluginKernelNormal(PluginKernelBase): suffix=".log") self.run_vm(kernel_path, kernel_command_line, - "tests/plugin/libmem.so,arg=both", plugin_log.name, + "tests/plugin/libmem.so,inline=true,callback=true", plugin_log.name, console_pattern, args=('-icount', 'shift=1')) diff --git a/tests/data/acpi/q35/DSDT b/tests/data/acpi/q35/DSDT Binary files differindex 281fc82c03..c1965f6051 100644 --- a/tests/data/acpi/q35/DSDT +++ b/tests/data/acpi/q35/DSDT diff --git a/tests/data/acpi/q35/DSDT.acpihmat b/tests/data/acpi/q35/DSDT.acpihmat Binary files differindex 8c1e05a11a..f24d4874bf 100644 --- a/tests/data/acpi/q35/DSDT.acpihmat +++ b/tests/data/acpi/q35/DSDT.acpihmat diff --git a/tests/data/acpi/q35/DSDT.bridge b/tests/data/acpi/q35/DSDT.bridge Binary files differindex 6f1464b6c7..424d51bd1c 100644 --- a/tests/data/acpi/q35/DSDT.bridge +++ b/tests/data/acpi/q35/DSDT.bridge diff --git a/tests/data/acpi/q35/DSDT.cphp b/tests/data/acpi/q35/DSDT.cphp Binary files differindex f8337ff519..f1275606f6 100644 --- a/tests/data/acpi/q35/DSDT.cphp +++ b/tests/data/acpi/q35/DSDT.cphp diff --git a/tests/data/acpi/q35/DSDT.dimmpxm b/tests/data/acpi/q35/DSDT.dimmpxm Binary files differindex fe5820d93d..76e451e829 100644 --- a/tests/data/acpi/q35/DSDT.dimmpxm +++ b/tests/data/acpi/q35/DSDT.dimmpxm diff --git a/tests/data/acpi/q35/DSDT.ipmibt b/tests/data/acpi/q35/DSDT.ipmibt Binary files differindex 6317410658..6ad2411d0e 100644 --- a/tests/data/acpi/q35/DSDT.ipmibt +++ b/tests/data/acpi/q35/DSDT.ipmibt diff --git a/tests/data/acpi/q35/DSDT.ivrs b/tests/data/acpi/q35/DSDT.ivrs Binary files differindex b0eafe90e5..cad26e3f0c 100644 --- a/tests/data/acpi/q35/DSDT.ivrs +++ b/tests/data/acpi/q35/DSDT.ivrs diff --git a/tests/data/acpi/q35/DSDT.memhp b/tests/data/acpi/q35/DSDT.memhp Binary files differindex 9bc11518fc..4e9cb3dc68 100644 --- a/tests/data/acpi/q35/DSDT.memhp +++ b/tests/data/acpi/q35/DSDT.memhp diff --git a/tests/data/acpi/q35/DSDT.mmio64 b/tests/data/acpi/q35/DSDT.mmio64 Binary files differindex 713288a12e..eb5a1c7171 100644 --- a/tests/data/acpi/q35/DSDT.mmio64 +++ b/tests/data/acpi/q35/DSDT.mmio64 diff --git a/tests/data/acpi/q35/DSDT.multi-bridge b/tests/data/acpi/q35/DSDT.multi-bridge Binary files differindex a24c713d22..45808eb03b 100644 --- a/tests/data/acpi/q35/DSDT.multi-bridge +++ b/tests/data/acpi/q35/DSDT.multi-bridge diff --git a/tests/data/acpi/q35/DSDT.nohpet b/tests/data/acpi/q35/DSDT.nohpet Binary files differindex e8202e6ddf..83d1aa00ac 100644 --- a/tests/data/acpi/q35/DSDT.nohpet +++ b/tests/data/acpi/q35/DSDT.nohpet diff --git a/tests/data/acpi/q35/DSDT.numamem b/tests/data/acpi/q35/DSDT.numamem Binary files differindex 151e7cf429..050aaa237b 100644 --- a/tests/data/acpi/q35/DSDT.numamem +++ b/tests/data/acpi/q35/DSDT.numamem diff --git a/tests/data/acpi/q35/DSDT.tis.tpm12 b/tests/data/acpi/q35/DSDT.tis.tpm12 Binary files differindex c96b5277a1..0ebdf6fbd7 100644 --- a/tests/data/acpi/q35/DSDT.tis.tpm12 +++ b/tests/data/acpi/q35/DSDT.tis.tpm12 diff --git a/tests/data/acpi/q35/DSDT.tis.tpm2 b/tests/data/acpi/q35/DSDT.tis.tpm2 Binary files differindex c92d4d29c7..dcbb7f0af3 100644 --- a/tests/data/acpi/q35/DSDT.tis.tpm2 +++ b/tests/data/acpi/q35/DSDT.tis.tpm2 diff --git a/tests/data/acpi/q35/DSDT.xapic b/tests/data/acpi/q35/DSDT.xapic Binary files differindex 119fc90f1f..17552ce363 100644 --- a/tests/data/acpi/q35/DSDT.xapic +++ b/tests/data/acpi/q35/DSDT.xapic diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include index 7a63a3b7f7..f1a0c5db7a 100644 --- a/tests/docker/Makefile.include +++ b/tests/docker/Makefile.include @@ -150,6 +150,9 @@ docker-image-debian-sparc64-cross: docker-image-debian10 # The native build should never use the registry docker-image-debian-native: DOCKER_REGISTRY= +# base images should not add a local user +docker-image-debian10: NOUSER=1 +docker-image-debian11: NOUSER=1 # # The build rule for hexagon-cross is special in so far for most of diff --git a/tests/docker/dockerfiles/centos8.docker b/tests/docker/dockerfiles/centos8.docker index 46398c61ee..7f135f8e8c 100644 --- a/tests/docker/dockerfiles/centos8.docker +++ b/tests/docker/dockerfiles/centos8.docker @@ -51,6 +51,7 @@ ENV PACKAGES \ libpng-devel \ librbd-devel \ libseccomp-devel \ + libselinux-devel \ libslirp-devel \ libssh-devel \ libtasn1-devel \ diff --git a/tests/docker/dockerfiles/fedora-i386-cross.docker b/tests/docker/dockerfiles/fedora-i386-cross.docker index f62a71ce22..13328e6081 100644 --- a/tests/docker/dockerfiles/fedora-i386-cross.docker +++ b/tests/docker/dockerfiles/fedora-i386-cross.docker @@ -8,6 +8,7 @@ ENV PACKAGES \ gcc \ git \ libffi-devel.i686 \ + libselinux-devel.i686 \ libtasn1-devel.i686 \ libzstd-devel.i686 \ make \ diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker index eec1add7f6..c6fd7e1113 100644 --- a/tests/docker/dockerfiles/fedora.docker +++ b/tests/docker/dockerfiles/fedora.docker @@ -53,6 +53,7 @@ ENV PACKAGES \ libpng-devel \ librbd-devel \ libseccomp-devel \ + libselinux-devel \ libslirp-devel \ libssh-devel \ libtasn1-devel \ diff --git a/tests/docker/dockerfiles/opensuse-leap.docker b/tests/docker/dockerfiles/opensuse-leap.docker index 5a8bee0289..3bbdb67f4f 100644 --- a/tests/docker/dockerfiles/opensuse-leap.docker +++ b/tests/docker/dockerfiles/opensuse-leap.docker @@ -55,6 +55,7 @@ ENV PACKAGES \ libpulse-devel \ librbd-devel \ libseccomp-devel \ + libselinux-devel \ libspice-server-devel \ libssh-devel \ libtasn1-devel \ diff --git a/tests/docker/dockerfiles/ubuntu1804.docker b/tests/docker/dockerfiles/ubuntu1804.docker index 0880bf3e29..450fd06d0d 100644 --- a/tests/docker/dockerfiles/ubuntu1804.docker +++ b/tests/docker/dockerfiles/ubuntu1804.docker @@ -60,6 +60,7 @@ ENV PACKAGES \ libsdl2-dev \ libsdl2-image-dev \ libseccomp-dev \ + libselinux-dev \ libsnappy-dev \ libspice-protocol-dev \ libspice-server-dev \ diff --git a/tests/docker/dockerfiles/ubuntu2004.docker b/tests/docker/dockerfiles/ubuntu2004.docker index 39de63d012..15a026be09 100644 --- a/tests/docker/dockerfiles/ubuntu2004.docker +++ b/tests/docker/dockerfiles/ubuntu2004.docker @@ -60,6 +60,7 @@ ENV PACKAGES \ libsdl2-dev \ libsdl2-image-dev \ libseccomp-dev \ + libselinux-dev \ libslirp-dev \ libsnappy-dev \ libspice-protocol-dev \ diff --git a/tests/plugin/syscall.c b/tests/plugin/syscall.c index 484b48de49..96040c578f 100644 --- a/tests/plugin/syscall.c +++ b/tests/plugin/syscall.c @@ -70,19 +70,17 @@ static void vcpu_syscall_ret(qemu_plugin_id_t id, unsigned int vcpu_idx, } g_mutex_unlock(&lock); } else { - g_autofree gchar *out; - out = g_strdup_printf("syscall #%" PRIi64 " returned -> %" PRIi64 "\n", - num, ret); + g_autofree gchar *out = g_strdup_printf( + "syscall #%" PRIi64 " returned -> %" PRIi64 "\n", num, ret); qemu_plugin_outs(out); } } static void print_entry(gpointer val, gpointer user_data) { - g_autofree gchar *out; SyscallStats *entry = (SyscallStats *) val; int64_t syscall_num = entry->num; - out = g_strdup_printf( + g_autofree gchar *out = g_strdup_printf( "%-13" PRIi64 "%-6" PRIi64 " %" PRIi64 "\n", syscall_num, entry->calls, entry->errors); qemu_plugin_outs(out); diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index 5fb65b4bef..567bf1da67 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -251,7 +251,16 @@ class TestParallelOps(iotests.QMPTestCase): speed=1024) self.assert_qmp(result, 'return', {}) - for job in pending_jobs: + # Do this in reverse: After unthrottling them, some jobs may finish + # before we have unthrottled all of them. This will drain their + # subgraph, and this will make jobs above them advance (despite those + # jobs on top being throttled). In the worst case, all jobs below the + # top one are finished before we can unthrottle it, and this makes it + # advance so far that it completes before we can unthrottle it - which + # results in an error. + # Starting from the top (i.e. in reverse) does not have this problem: + # When a job finishes, the ones below it are not advanced. + for job in reversed(pending_jobs): result = self.vm.qmp('block-job-set-speed', device=job, speed=0) self.assert_qmp(result, 'return', {}) diff --git a/tests/qemu-iotests/142 b/tests/qemu-iotests/142 index 69fd10ef51..86d65a2d1a 100755 --- a/tests/qemu-iotests/142 +++ b/tests/qemu-iotests/142 @@ -350,6 +350,35 @@ info block backing-file" echo "$hmp_cmds" | run_qemu -drive "$files","$ids" | grep "Cache" +echo +echo "--- Alignment after changing O_DIRECT ---" +echo + +# Directly test the protocol level: Can unaligned requests succeed even if +# O_DIRECT was only enabled through a reopen and vice versa? + +# Ensure image size is a multiple of the sector size (required for O_DIRECT) +$QEMU_IMG create -f file "$TEST_IMG" 1M | _filter_img_create + +# And write some data (not strictly necessary, but it feels better to actually +# have something to be read) +$QEMU_IO -f file -c 'write 0 4096' "$TEST_IMG" | _filter_qemu_io + +$QEMU_IO --cache=writeback -f file $TEST_IMG <<EOF | _filter_qemu_io +read 42 42 +reopen -o cache.direct=on +read 42 42 +reopen -o cache.direct=off +read 42 42 +EOF +$QEMU_IO --cache=none -f file $TEST_IMG <<EOF | _filter_qemu_io +read 42 42 +reopen -o cache.direct=off +read 42 42 +reopen -o cache.direct=on +read 42 42 +EOF + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/142.out b/tests/qemu-iotests/142.out index a92b948edd..e20cfc8eb8 100644 --- a/tests/qemu-iotests/142.out +++ b/tests/qemu-iotests/142.out @@ -747,4 +747,22 @@ cache.no-flush=on on backing-file Cache mode: writeback Cache mode: writeback, direct Cache mode: writeback, ignore flushes + +--- Alignment after changing O_DIRECT --- + +Formatting 'TEST_DIR/t.IMGFMT', fmt=file size=1048576 +wrote 4096/4096 bytes at offset 0 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 42/42 bytes at offset 42 +42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 42/42 bytes at offset 42 +42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 42/42 bytes at offset 42 +42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 42/42 bytes at offset 42 +42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 42/42 bytes at offset 42 +42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 42/42 bytes at offset 42 +42 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) *** done diff --git a/tests/qemu-iotests/149 b/tests/qemu-iotests/149 index 328fd05a4c..d49646ca60 100755 --- a/tests/qemu-iotests/149 +++ b/tests/qemu-iotests/149 @@ -230,6 +230,18 @@ def create_image(config, size_mb): fn.truncate(size_mb * 1024 * 1024) +def check_cipher_support(config, output): + """Check the output of qemu-img or qemu-io for mention of the respective + cipher algorithm being unsupported, and if so, skip this test. + (Returns `output` for convenience.)""" + + if 'Unsupported cipher algorithm' in output: + iotests.notrun('Unsupported cipher algorithm ' + f'{config.cipher}-{config.keylen}-{config.mode}; ' + 'consider configuring qemu with a different crypto ' + 'backend') + return output + def qemu_img_create(config, size_mb): """Create and format a disk image with LUKS using qemu-img""" @@ -253,7 +265,8 @@ def qemu_img_create(config, size_mb): "%dM" % size_mb] iotests.log("qemu-img " + " ".join(args), filters=[iotests.filter_test_dir]) - iotests.log(iotests.qemu_img_pipe(*args), filters=[iotests.filter_test_dir]) + iotests.log(check_cipher_support(config, iotests.qemu_img_pipe(*args)), + filters=[iotests.filter_test_dir]) def qemu_io_image_args(config, dev=False): """Get the args for access an image or device with qemu-io""" @@ -279,8 +292,8 @@ def qemu_io_write_pattern(config, pattern, offset_mb, size_mb, dev=False): args = ["-c", "write -P 0x%x %dM %dM" % (pattern, offset_mb, size_mb)] args.extend(qemu_io_image_args(config, dev)) iotests.log("qemu-io " + " ".join(args), filters=[iotests.filter_test_dir]) - iotests.log(iotests.qemu_io(*args), filters=[iotests.filter_test_dir, - iotests.filter_qemu_io]) + iotests.log(check_cipher_support(config, iotests.qemu_io(*args)), + filters=[iotests.filter_test_dir, iotests.filter_qemu_io]) def qemu_io_read_pattern(config, pattern, offset_mb, size_mb, dev=False): @@ -291,8 +304,8 @@ def qemu_io_read_pattern(config, pattern, offset_mb, size_mb, dev=False): args = ["-c", "read -P 0x%x %dM %dM" % (pattern, offset_mb, size_mb)] args.extend(qemu_io_image_args(config, dev)) iotests.log("qemu-io " + " ".join(args), filters=[iotests.filter_test_dir]) - iotests.log(iotests.qemu_io(*args), filters=[iotests.filter_test_dir, - iotests.filter_qemu_io]) + iotests.log(check_cipher_support(config, iotests.qemu_io(*args)), + filters=[iotests.filter_test_dir, iotests.filter_qemu_io]) def test_once(config, qemu_img=False): diff --git a/tests/qemu-iotests/206 b/tests/qemu-iotests/206 index c3cdad4ce4..10eff343f7 100755 --- a/tests/qemu-iotests/206 +++ b/tests/qemu-iotests/206 @@ -162,8 +162,8 @@ with iotests.FilePath('t.qcow2') as disk_path, \ 'encrypt': { 'format': 'luks', 'key-secret': 'keysec0', - 'cipher-alg': 'twofish-128', - 'cipher-mode': 'ctr', + 'cipher-alg': 'aes-128', + 'cipher-mode': 'cbc', 'ivgen-alg': 'plain64', 'ivgen-hash-alg': 'md5', 'hash-alg': 'sha1', diff --git a/tests/qemu-iotests/206.out b/tests/qemu-iotests/206.out index 3593e8e9c2..80cd274223 100644 --- a/tests/qemu-iotests/206.out +++ b/tests/qemu-iotests/206.out @@ -97,7 +97,7 @@ Format specific information: === Successful image creation (encrypted) === -{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "encrypt": {"cipher-alg": "twofish-128", "cipher-mode": "ctr", "format": "luks", "hash-alg": "sha1", "iter-time": 10, "ivgen-alg": "plain64", "ivgen-hash-alg": "md5", "key-secret": "keysec0"}, "file": {"driver": "file", "filename": "TEST_DIR/PID-t.qcow2"}, "size": 33554432}}} +{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "encrypt": {"cipher-alg": "aes-128", "cipher-mode": "cbc", "format": "luks", "hash-alg": "sha1", "iter-time": 10, "ivgen-alg": "plain64", "ivgen-hash-alg": "md5", "key-secret": "keysec0"}, "file": {"driver": "file", "filename": "TEST_DIR/PID-t.qcow2"}, "size": 33554432}}} {"return": {}} {"execute": "job-dismiss", "arguments": {"id": "job0"}} {"return": {}} @@ -115,10 +115,10 @@ Format specific information: encrypt: ivgen alg: plain64 hash alg: sha1 - cipher alg: twofish-128 + cipher alg: aes-128 uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX format: luks - cipher mode: ctr + cipher mode: cbc slots: [0]: active: true diff --git a/tests/qemu-iotests/210 b/tests/qemu-iotests/210 index 5a62ed4dd1..a4dcc5fe59 100755 --- a/tests/qemu-iotests/210 +++ b/tests/qemu-iotests/210 @@ -83,8 +83,8 @@ with iotests.FilePath('t.luks') as disk_path, \ }, 'size': size, 'key-secret': 'keysec0', - 'cipher-alg': 'twofish-128', - 'cipher-mode': 'ctr', + 'cipher-alg': 'aes-128', + 'cipher-mode': 'cbc', 'ivgen-alg': 'plain64', 'ivgen-hash-alg': 'md5', 'hash-alg': 'sha1', diff --git a/tests/qemu-iotests/210.out b/tests/qemu-iotests/210.out index 55c0844370..96d9f749dd 100644 --- a/tests/qemu-iotests/210.out +++ b/tests/qemu-iotests/210.out @@ -59,7 +59,7 @@ Format specific information: {"execute": "job-dismiss", "arguments": {"id": "job0"}} {"return": {}} -{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"cipher-alg": "twofish-128", "cipher-mode": "ctr", "driver": "luks", "file": {"driver": "file", "filename": "TEST_DIR/PID-t.luks"}, "hash-alg": "sha1", "iter-time": 10, "ivgen-alg": "plain64", "ivgen-hash-alg": "md5", "key-secret": "keysec0", "size": 67108864}}} +{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"cipher-alg": "aes-128", "cipher-mode": "cbc", "driver": "luks", "file": {"driver": "file", "filename": "TEST_DIR/PID-t.luks"}, "hash-alg": "sha1", "iter-time": 10, "ivgen-alg": "plain64", "ivgen-hash-alg": "md5", "key-secret": "keysec0", "size": 67108864}}} {"return": {}} {"execute": "job-dismiss", "arguments": {"id": "job0"}} {"return": {}} @@ -71,9 +71,9 @@ encrypted: yes Format specific information: ivgen alg: plain64 hash alg: sha1 - cipher alg: twofish-128 + cipher alg: aes-128 uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX - cipher mode: ctr + cipher mode: cbc slots: [0]: active: true diff --git a/tests/qtest/am53c974-test.c b/tests/qtest/am53c974-test.c index 9b1e4211bd..d214a912b3 100644 --- a/tests/qtest/am53c974-test.c +++ b/tests/qtest/am53c974-test.c @@ -223,6 +223,34 @@ static void test_inflight_cancel_ok(void) qtest_quit(s); } +static void test_reset_before_transfer_ok(void) +{ + QTestState *s = qtest_init( + "-device am53c974,id=scsi " + "-device scsi-hd,drive=disk0 -drive " + "id=disk0,if=none,file=null-co://,format=raw -nodefaults"); + + qtest_outl(s, 0xcf8, 0x80001010); + qtest_outl(s, 0xcfc, 0xc000); + qtest_outl(s, 0xcf8, 0x80001004); + qtest_outw(s, 0xcfc, 0x01); + qtest_outl(s, 0xc007, 0x2500); + qtest_outl(s, 0xc00a, 0x410000); + qtest_outl(s, 0xc00a, 0x410000); + qtest_outw(s, 0xc00b, 0x0200); + qtest_outw(s, 0xc040, 0x03); + qtest_outw(s, 0xc009, 0x00); + qtest_outw(s, 0xc00b, 0x00); + qtest_outw(s, 0xc009, 0x00); + qtest_outw(s, 0xc00b, 0x00); + qtest_outw(s, 0xc009, 0x00); + qtest_outw(s, 0xc003, 0x1000); + qtest_outw(s, 0xc00b, 0x1000); + qtest_outl(s, 0xc00b, 0x9000); + qtest_outw(s, 0xc00b, 0x1000); + qtest_quit(s); +} + int main(int argc, char **argv) { const char *arch = qtest_get_arch(); @@ -248,6 +276,8 @@ int main(int argc, char **argv) test_cancelled_request_ok); qtest_add_func("am53c974/test_inflight_cancel_ok", test_inflight_cancel_ok); + qtest_add_func("am53c974/test_reset_before_transfer_ok", + test_reset_before_transfer_ok); } return g_test_run(); diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c index 26b69f7c5c..8f6eee84a4 100644 --- a/tests/qtest/fdc-test.c +++ b/tests/qtest/fdc-test.c @@ -32,6 +32,9 @@ /* TODO actually test the results and get rid of this */ #define qmp_discard_response(...) qobject_unref(qmp(__VA_ARGS__)) +#define DRIVE_FLOPPY_BLANK \ + "-drive if=floppy,file=null-co://,file.read-zeroes=on,format=raw,size=1440k" + #define TEST_IMAGE_SIZE 1440 * 1024 #define FLOPPY_BASE 0x3f0 @@ -546,6 +549,40 @@ static void fuzz_registers(void) } } +static bool qtest_check_clang_sanitizer(void) +{ +#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer) + return true; +#else + g_test_skip("QEMU not configured using --enable-sanitizers"); + return false; +#endif +} +static void test_cve_2021_20196(void) +{ + QTestState *s; + + if (!qtest_check_clang_sanitizer()) { + return; + } + + s = qtest_initf("-nographic -m 32M -nodefaults " DRIVE_FLOPPY_BLANK); + + qtest_outw(s, 0x3f4, 0x0500); + qtest_outb(s, 0x3f5, 0x00); + qtest_outb(s, 0x3f5, 0x00); + qtest_outw(s, 0x3f4, 0x0000); + qtest_outb(s, 0x3f5, 0x00); + qtest_outw(s, 0x3f1, 0x0400); + qtest_outw(s, 0x3f4, 0x0000); + qtest_outw(s, 0x3f4, 0x0000); + qtest_outb(s, 0x3f5, 0x00); + qtest_outb(s, 0x3f5, 0x01); + qtest_outw(s, 0x3f1, 0x0500); + qtest_outb(s, 0x3f5, 0x00); + qtest_quit(s); +} + int main(int argc, char **argv) { int fd; @@ -576,6 +613,7 @@ int main(int argc, char **argv) qtest_add_func("/fdc/read_no_dma_18", test_read_no_dma_18); qtest_add_func("/fdc/read_no_dma_19", test_read_no_dma_19); qtest_add_func("/fdc/fuzz-registers", fuzz_registers); + qtest_add_func("/fdc/fuzz/cve_2021_20196", test_cve_2021_20196); ret = g_test_run(); diff --git a/tests/tcg/ppc64le/Makefile.target b/tests/tcg/ppc64le/Makefile.target index 5e65b1590d..ba2fde5ff1 100644 --- a/tests/tcg/ppc64le/Makefile.target +++ b/tests/tcg/ppc64le/Makefile.target @@ -9,18 +9,12 @@ PPC64LE_TESTS=bcdsub endif bcdsub: CFLAGS += -mpower8-vector -PPC64LE_TESTS += byte_reverse ifneq ($(DOCKER_IMAGE)$(CROSS_CC_HAS_POWER10),) +PPC64LE_TESTS += byte_reverse +endif +byte_reverse: CFLAGS += -mcpu=power10 run-byte_reverse: QEMU_OPTS+=-cpu POWER10 run-plugin-byte_reverse-with-%: QEMU_OPTS+=-cpu POWER10 -else -byte_reverse: - $(call skip-test, "BUILD of $@", "missing compiler support") -run-byte_reverse: - $(call skip-test, "RUN of byte_reverse", "not built") -run-plugin-byte_reverse-with-%: - $(call skip-test, "RUN of byte_reverse ($*)", "not built") -endif PPC64LE_TESTS += signal_save_restore_xer diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c index cbe0c99049..b02450e25a 100644 --- a/tests/unit/test-smp-parse.c +++ b/tests/unit/test-smp-parse.c @@ -23,6 +23,8 @@ #define MIN_CPUS 1 /* set the min CPUs supported by the machine as 1 */ #define MAX_CPUS 512 /* set the max CPUs supported by the machine as 512 */ +#define SMP_MACHINE_NAME "TEST-SMP" + /* * Used to define the generic 3-level CPU topology hierarchy * -sockets/cores/threads @@ -75,14 +77,6 @@ typedef struct SMPTestData { const char *expect_error; } SMPTestData; -/* Type info of the tested machine */ -static const TypeInfo smp_machine_info = { - .name = TYPE_MACHINE, - .parent = TYPE_OBJECT, - .class_size = sizeof(MachineClass), - .instance_size = sizeof(MachineState), -}; - /* * List all the possible valid sub-collections of the generic 5 * topology parameters (i.e. cpus/maxcpus/sockets/cores/threads), @@ -315,13 +309,13 @@ static struct SMPTestData data_generic_invalid[] = { * should tweak the supported min CPUs to 2 for testing */ .config = SMP_CONFIG_GENERIC(T, 1, F, 0, F, 0, F, 0, F, 0), .expect_error = "Invalid SMP CPUs 1. The min CPUs supported " - "by machine '(null)' is 2", + "by machine '" SMP_MACHINE_NAME "' is 2", }, { /* config: -smp 512 * should tweak the supported max CPUs to 511 for testing */ .config = SMP_CONFIG_GENERIC(T, 512, F, 0, F, 0, F, 0, F, 0), .expect_error = "Invalid SMP CPUs 512. The max CPUs supported " - "by machine '(null)' is 511", + "by machine '" SMP_MACHINE_NAME "' is 511", }, }; @@ -480,14 +474,17 @@ static void unsupported_params_init(MachineClass *mc, SMPTestData *data) } } -/* Reset the related machine properties before each sub-test */ -static void smp_machine_class_init(MachineClass *mc) +static void machine_base_class_init(ObjectClass *oc, void *data) { + MachineClass *mc = MACHINE_CLASS(oc); + mc->min_cpus = MIN_CPUS; mc->max_cpus = MAX_CPUS; mc->smp_props.prefer_sockets = true; mc->smp_props.dies_supported = false; + + mc->name = g_strdup(SMP_MACHINE_NAME); } static void test_generic(void) @@ -498,8 +495,6 @@ static void test_generic(void) SMPTestData *data = &(SMPTestData){{ }}; int i; - smp_machine_class_init(mc); - for (i = 0; i < ARRAY_SIZE(data_generic_valid); i++) { *data = data_generic_valid[i]; unsupported_params_init(mc, data); @@ -512,7 +507,7 @@ static void test_generic(void) smp_parse_test(ms, data, true); } - /* Reset the supported min CPUs and max CPUs */ + /* Force invalid min CPUs and max CPUs */ mc->min_cpus = 2; mc->max_cpus = 511; @@ -523,6 +518,10 @@ static void test_generic(void) smp_parse_test(ms, data, false); } + /* Reset the supported min CPUs and max CPUs */ + mc->min_cpus = MIN_CPUS; + mc->max_cpus = MAX_CPUS; + object_unref(obj); } @@ -535,7 +534,7 @@ static void test_with_dies(void) unsigned int num_dies = 2; int i; - smp_machine_class_init(mc); + /* Force the SMP compat properties */ mc->smp_props.dies_supported = true; for (i = 0; i < ARRAY_SIZE(data_generic_valid); i++) { @@ -575,15 +574,30 @@ static void test_with_dies(void) smp_parse_test(ms, data, false); } + /* Restore the SMP compat properties */ + mc->smp_props.dies_supported = false; + object_unref(obj); } +/* Type info of the tested machine */ +static const TypeInfo smp_machine_types[] = { + { + .name = TYPE_MACHINE, + .parent = TYPE_OBJECT, + .class_init = machine_base_class_init, + .class_size = sizeof(MachineClass), + .instance_size = sizeof(MachineState), + } +}; + +DEFINE_TYPES(smp_machine_types) + int main(int argc, char *argv[]) { - g_test_init(&argc, &argv, NULL); - module_call_init(MODULE_INIT_QOM); - type_register_static(&smp_machine_info); + + g_test_init(&argc, &argv, NULL); g_test_add_func("/test-smp-parse/generic", test_generic); g_test_add_func("/test-smp-parse/with_dies", test_with_dies); diff --git a/tests/vm/Makefile.include b/tests/vm/Makefile.include index f3a3a1c751..ae91f5043e 100644 --- a/tests/vm/Makefile.include +++ b/tests/vm/Makefile.include @@ -2,14 +2,22 @@ .PHONY: vm-build-all vm-clean-all +HOST_ARCH = $(if $(ARCH),$(ARCH),$(shell uname -m)) + EFI_AARCH64 = $(wildcard $(BUILD_DIR)/pc-bios/edk2-aarch64-code.fd) -IMAGES := freebsd netbsd openbsd centos fedora haiku.x86_64 +X86_IMAGES := freebsd netbsd openbsd centos fedora haiku.x86_64 ifneq ($(GENISOIMAGE),) -IMAGES += ubuntu.i386 centos +X86_IMAGES += ubuntu.i386 centos ifneq ($(EFI_AARCH64),) -IMAGES += ubuntu.aarch64 centos.aarch64 +ARM64_IMAGES += ubuntu.aarch64 centos.aarch64 +endif endif + +ifeq ($(HOST_ARCH),x86_64) +IMAGES=$(X86_IMAGES) $(if $(USE_TCG),$(ARM64_IMAGES)) +else ifeq ($(HOST_ARCH),aarch64) +IMAGES=$(ARM64_IMAGES) $(if $(USE_TCG),$(X86_IMAGES)) endif IMAGES_DIR := $(HOME)/.cache/qemu-vm/images @@ -43,7 +51,7 @@ else endif @echo " vm-build-haiku.x86_64 - Build QEMU in Haiku VM" @echo "" - @echo " vm-build-all - Build QEMU in all VMs" + @echo " vm-build-all - Build QEMU in: $(IMAGES)" @echo " vm-clean-all - Clean up VM images" @echo @echo "For trouble-shooting:" @@ -52,21 +60,22 @@ endif @echo @echo "Special variables:" @echo " BUILD_TARGET=foo - Override the build target" - @echo " TARGET_LIST=a,b,c - Override target list in builds" - @echo ' EXTRA_CONFIGURE_OPTS="..."' - @echo " J=[0..9]* - Override the -jN parameter for make commands" @echo " DEBUG=1 - Enable verbose output on host and interactive debugging" + @echo ' EXTRA_CONFIGURE_OPTS="..." - Pass to configure step' + @echo " J=[0..9]* - Override the -jN parameter for make commands" @echo " LOG_CONSOLE=1 - Log console to file in: ~/.cache/qemu-vm " - @echo " V=1 - Enable verbose ouput on host and guest commands" - @echo " QEMU_LOCAL=1 - Use QEMU binary local to this build." + @echo " USE_TCG=1 - Use TCG for cross-arch images" @echo " QEMU=/path/to/qemu - Change path to QEMU binary" - @echo " QEMU_IMG=/path/to/qemu-img - Change path to qemu-img tool" ifeq ($(HAVE_PYTHON_YAML),yes) @echo " QEMU_CONFIG=/path/conf.yml - Change path to VM configuration .yml file." else @echo " (install python3-yaml to enable support for yaml file to configure a VM.)" endif @echo " See conf_example_*.yml for file format details." + @echo " QEMU_IMG=/path/to/qemu-img - Change path to qemu-img tool" + @echo " QEMU_LOCAL=1 - Use QEMU binary local to this build." + @echo " TARGET_LIST=a,b,c - Override target list in builds" + @echo " V=1 - Enable verbose ouput on host and guest commands" vm-build-all: $(addprefix vm-build-, $(IMAGES)) diff --git a/ui/curses.c b/ui/curses.c index e4f9588c3e..861d63244c 100644 --- a/ui/curses.c +++ b/ui/curses.c @@ -38,6 +38,10 @@ #include "ui/input.h" #include "sysemu/sysemu.h" +#if defined(__APPLE__) || defined(__OpenBSD__) +#define _XOPEN_SOURCE_EXTENDED 1 +#endif + /* KEY_EVENT is defined in wincon.h and in curses.h. Avoid redefinition. */ #undef KEY_EVENT #include <curses.h> diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c index 461da7712f..01e4e74ee3 100644 --- a/ui/gtk-gl-area.c +++ b/ui/gtk-gl-area.c @@ -41,15 +41,16 @@ void gd_gl_area_draw(VirtualConsole *vc) #ifdef CONFIG_GBM QemuDmaBuf *dmabuf = vc->gfx.guest_fb.dmabuf; #endif - int ww, wh, y1, y2; + int ww, wh, ws, y1, y2; if (!vc->gfx.gls) { return; } gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area)); - ww = gtk_widget_get_allocated_width(vc->gfx.drawing_area); - wh = gtk_widget_get_allocated_height(vc->gfx.drawing_area); + ws = gdk_window_get_scale_factor(gtk_widget_get_window(vc->gfx.drawing_area)); + ww = gtk_widget_get_allocated_width(vc->gfx.drawing_area) * ws; + wh = gtk_widget_get_allocated_height(vc->gfx.drawing_area) * ws; if (vc->gfx.scanout_mode) { if (!vc->gfx.guest_fb.framebuffer) { @@ -589,11 +589,11 @@ void gd_hw_gl_flushed(void *vcon) VirtualConsole *vc = vcon; QemuDmaBuf *dmabuf = vc->gfx.guest_fb.dmabuf; - graphic_hw_gl_block(vc->gfx.dcl.con, false); - graphic_hw_gl_flushed(vc->gfx.dcl.con); qemu_set_fd_handler(dmabuf->fence_fd, NULL, NULL, NULL); close(dmabuf->fence_fd); dmabuf->fence_fd = -1; + graphic_hw_gl_block(vc->gfx.dcl.con, false); + graphic_hw_gl_flushed(vc->gfx.dcl.con); } /** DisplayState Callbacks (opengl version) **/ @@ -838,10 +838,11 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion, { VirtualConsole *vc = opaque; GtkDisplayState *s = vc->s; + GdkWindow *window; int x, y; int mx, my; int fbh, fbw; - int ww, wh; + int ww, wh, ws; if (!vc->gfx.ds) { return TRUE; @@ -850,8 +851,10 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion, fbw = surface_width(vc->gfx.ds) * vc->gfx.scale_x; fbh = surface_height(vc->gfx.ds) * vc->gfx.scale_y; - ww = gdk_window_get_width(gtk_widget_get_window(vc->gfx.drawing_area)); - wh = gdk_window_get_height(gtk_widget_get_window(vc->gfx.drawing_area)); + window = gtk_widget_get_window(vc->gfx.drawing_area); + ww = gdk_window_get_width(window); + wh = gdk_window_get_height(window); + ws = gdk_window_get_scale_factor(window); mx = my = 0; if (ww > fbw) { @@ -861,8 +864,8 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion, my = (wh - fbh) / 2; } - x = (motion->x - mx) / vc->gfx.scale_x; - y = (motion->y - my) / vc->gfx.scale_y; + x = (motion->x - mx) / vc->gfx.scale_x * ws; + y = (motion->y - my) / vc->gfx.scale_y * ws; if (qemu_input_is_absolute()) { if (x < 0 || y < 0 || diff --git a/ui/vnc-clipboard.c b/ui/vnc-clipboard.c index 9f077965d0..67284b556c 100644 --- a/ui/vnc-clipboard.c +++ b/ui/vnc-clipboard.c @@ -316,8 +316,10 @@ void vnc_server_cut_text_caps(VncState *vs) caps[1] = 0; vnc_clipboard_send(vs, 2, caps); - vs->cbpeer.name = "vnc"; - vs->cbpeer.update.notify = vnc_clipboard_notify; - vs->cbpeer.request = vnc_clipboard_request; - qemu_clipboard_peer_register(&vs->cbpeer); + if (!vs->cbpeer.update.notify) { + vs->cbpeer.name = "vnc"; + vs->cbpeer.update.notify = vnc_clipboard_notify; + vs->cbpeer.request = vnc_clipboard_request; + qemu_clipboard_peer_register(&vs->cbpeer); + } } diff --git a/util/rcu.c b/util/rcu.c index 13ac0f75cb..c91da9f137 100644 --- a/util/rcu.c +++ b/util/rcu.c @@ -46,6 +46,7 @@ unsigned long rcu_gp_ctr = RCU_GP_LOCKED; QemuEvent rcu_gp_event; +static int in_drain_call_rcu; static QemuMutex rcu_registry_lock; static QemuMutex rcu_sync_lock; @@ -107,6 +108,8 @@ static void wait_for_readers(void) * get some extra futex wakeups. */ qatomic_set(&index->waiting, false); + } else if (qatomic_read(&in_drain_call_rcu)) { + notifier_list_notify(&index->force_rcu, NULL); } } @@ -339,8 +342,10 @@ void drain_call_rcu(void) * assumed. */ + qatomic_inc(&in_drain_call_rcu); call_rcu1(&rcu_drain.rcu, drain_rcu_callback); qemu_event_wait(&rcu_drain.drain_complete_event); + qatomic_dec(&in_drain_call_rcu); if (locked) { qemu_mutex_lock_iothread(); @@ -363,6 +368,20 @@ void rcu_unregister_thread(void) qemu_mutex_unlock(&rcu_registry_lock); } +void rcu_add_force_rcu_notifier(Notifier *n) +{ + qemu_mutex_lock(&rcu_registry_lock); + notifier_list_add(&rcu_reader.force_rcu, n); + qemu_mutex_unlock(&rcu_registry_lock); +} + +void rcu_remove_force_rcu_notifier(Notifier *n) +{ + qemu_mutex_lock(&rcu_registry_lock); + notifier_remove(n); + qemu_mutex_unlock(&rcu_registry_lock); +} + static void rcu_init_complete(void) { QemuThread thread; diff --git a/util/transactions.c b/util/transactions.c index d0bc9a3e73..2dbdedce95 100644 --- a/util/transactions.c +++ b/util/transactions.c @@ -61,11 +61,13 @@ void tran_abort(Transaction *tran) { TransactionAction *act, *next; - QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { + QSLIST_FOREACH(act, &tran->actions, entry) { if (act->drv->abort) { act->drv->abort(act->opaque); } + } + QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { if (act->drv->clean) { act->drv->clean(act->opaque); } @@ -80,11 +82,13 @@ void tran_commit(Transaction *tran) { TransactionAction *act, *next; - QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { + QSLIST_FOREACH(act, &tran->actions, entry) { if (act->drv->commit) { act->drv->commit(act->opaque); } + } + QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) { if (act->drv->clean) { act->drv->clean(act->opaque); } |